From f3b19aa5cab65f7e73613aa37f6851ce56b794d1 Mon Sep 17 00:00:00 2001 From: Tero Kristo Date: Fri, 27 Feb 2015 17:54:14 +0200 Subject: ARM: OMAP2+: clock: export driver API to setup/get clock features As most of the clock driver support code is going to be moved under drivers/clk/ti, an API for setting / getting the SoC specific clock features is needed. This patch provides this API and changes the existing code to use it. Signed-off-by: Tero Kristo --- include/linux/clk/ti.h | 16 ++++++++++++++++ 1 file changed, 16 insertions(+) (limited to 'include/linux') diff --git a/include/linux/clk/ti.h b/include/linux/clk/ti.h index 79b76e13d904..1a7f86a68f62 100644 --- a/include/linux/clk/ti.h +++ b/include/linux/clk/ti.h @@ -338,6 +338,22 @@ int am43xx_dt_clk_init(void); int omap2420_dt_clk_init(void); int omap2430_dt_clk_init(void); +struct ti_clk_features { + u32 flags; + long fint_min; + long fint_max; + long fint_band1_max; + long fint_band2_min; + u8 dpll_bypass_vals; + u8 cm_idlest_val; +}; + +#define TI_CLK_DPLL_HAS_FREQSEL BIT(0) +#define TI_CLK_DPLL4_DENY_REPROGRAM BIT(1) + +void ti_clk_setup_features(struct ti_clk_features *features); +const struct ti_clk_features *ti_clk_get_features(void); + #ifdef CONFIG_OF void of_ti_clk_allow_autoidle_all(void); void of_ti_clk_deny_autoidle_all(void); -- cgit v1.2.3-70-g09d2 From b138b0283d35bed0cd3353d7e39add8ac493eb37 Mon Sep 17 00:00:00 2001 From: Tero Kristo Date: Mon, 2 Mar 2015 09:57:28 +0200 Subject: clk: ti: move generic OMAP DPLL implementation under drivers/clk With the legacy clock data now gone, we can start moving OMAP clock type implementations under clock driver. Start this with moving the generic OMAP DPLL clock type under TI clock driver. Signed-off-by: Tero Kristo --- arch/arm/mach-omap2/Makefile | 2 +- arch/arm/mach-omap2/clkt_dpll.c | 370 ---------------------------------------- drivers/clk/ti/Makefile | 3 +- drivers/clk/ti/clkt_dpll.c | 369 +++++++++++++++++++++++++++++++++++++++ drivers/clk/ti/clock.h | 2 + include/linux/clk/ti.h | 1 - 6 files changed, 374 insertions(+), 373 deletions(-) delete mode 100644 arch/arm/mach-omap2/clkt_dpll.c create mode 100644 drivers/clk/ti/clkt_dpll.c (limited to 'include/linux') diff --git a/arch/arm/mach-omap2/Makefile b/arch/arm/mach-omap2/Makefile index ec002bd4af77..fcb5d47f88ca 100644 --- a/arch/arm/mach-omap2/Makefile +++ b/arch/arm/mach-omap2/Makefile @@ -13,7 +13,7 @@ obj-y := id.o io.o control.o mux.o devices.o fb.o serial.o timer.o pm.o \ hwmod-common = omap_hwmod.o omap_hwmod_reset.o \ omap_hwmod_common_data.o clock-common = clock.o clock_common_data.o \ - clkt_dpll.o clkt_clksel.o + clkt_clksel.o secure-common = omap-smc.o omap-secure.o obj-$(CONFIG_ARCH_OMAP2) += $(omap-2-3-common) $(hwmod-common) diff --git a/arch/arm/mach-omap2/clkt_dpll.c b/arch/arm/mach-omap2/clkt_dpll.c deleted file mode 100644 index 82f0600c35f4..000000000000 --- a/arch/arm/mach-omap2/clkt_dpll.c +++ /dev/null @@ -1,370 +0,0 @@ -/* - * OMAP2/3/4 DPLL clock functions - * - * Copyright (C) 2005-2008 Texas Instruments, Inc. - * Copyright (C) 2004-2010 Nokia Corporation - * - * Contacts: - * Richard Woodruff - * Paul Walmsley - * - * This program is free software; you can redistribute it and/or modify - * it under the terms of the GNU General Public License version 2 as - * published by the Free Software Foundation. - */ -#undef DEBUG - -#include -#include -#include -#include - -#include - -#include "clock.h" - -/* DPLL rate rounding: minimum DPLL multiplier, divider values */ -#define DPLL_MIN_MULTIPLIER 2 -#define DPLL_MIN_DIVIDER 1 - -/* Possible error results from _dpll_test_mult */ -#define DPLL_MULT_UNDERFLOW -1 - -/* - * Scale factor to mitigate roundoff errors in DPLL rate rounding. - * The higher the scale factor, the greater the risk of arithmetic overflow, - * but the closer the rounded rate to the target rate. DPLL_SCALE_FACTOR - * must be a power of DPLL_SCALE_BASE. - */ -#define DPLL_SCALE_FACTOR 64 -#define DPLL_SCALE_BASE 2 -#define DPLL_ROUNDING_VAL ((DPLL_SCALE_BASE / 2) * \ - (DPLL_SCALE_FACTOR / DPLL_SCALE_BASE)) - -/* - * DPLL valid Fint frequency range for OMAP36xx and OMAP4xxx. - * From device data manual section 4.3 "DPLL and DLL Specifications". - */ -#define OMAP3PLUS_DPLL_FINT_JTYPE_MIN 500000 -#define OMAP3PLUS_DPLL_FINT_JTYPE_MAX 2500000 - -/* _dpll_test_fint() return codes */ -#define DPLL_FINT_UNDERFLOW -1 -#define DPLL_FINT_INVALID -2 - -/* Private functions */ - -/* - * _dpll_test_fint - test whether an Fint value is valid for the DPLL - * @clk: DPLL struct clk to test - * @n: divider value (N) to test - * - * Tests whether a particular divider @n will result in a valid DPLL - * internal clock frequency Fint. See the 34xx TRM 4.7.6.2 "DPLL Jitter - * Correction". Returns 0 if OK, -1 if the enclosing loop can terminate - * (assuming that it is counting N upwards), or -2 if the enclosing loop - * should skip to the next iteration (again assuming N is increasing). - */ -static int _dpll_test_fint(struct clk_hw_omap *clk, unsigned int n) -{ - struct dpll_data *dd; - long fint, fint_min, fint_max; - int ret = 0; - - dd = clk->dpll_data; - - /* DPLL divider must result in a valid jitter correction val */ - fint = __clk_get_rate(__clk_get_parent(clk->hw.clk)) / n; - - if (dd->flags & DPLL_J_TYPE) { - fint_min = OMAP3PLUS_DPLL_FINT_JTYPE_MIN; - fint_max = OMAP3PLUS_DPLL_FINT_JTYPE_MAX; - } else { - fint_min = ti_clk_get_features()->fint_min; - fint_max = ti_clk_get_features()->fint_max; - } - - if (!fint_min || !fint_max) { - WARN(1, "No fint limits available!\n"); - return DPLL_FINT_INVALID; - } - - if (fint < ti_clk_get_features()->fint_min) { - pr_debug("rejecting n=%d due to Fint failure, lowering max_divider\n", - n); - dd->max_divider = n; - ret = DPLL_FINT_UNDERFLOW; - } else if (fint > ti_clk_get_features()->fint_max) { - pr_debug("rejecting n=%d due to Fint failure, boosting min_divider\n", - n); - dd->min_divider = n; - ret = DPLL_FINT_INVALID; - } else if (fint > ti_clk_get_features()->fint_band1_max && - fint < ti_clk_get_features()->fint_band2_min) { - pr_debug("rejecting n=%d due to Fint failure\n", n); - ret = DPLL_FINT_INVALID; - } - - return ret; -} - -static unsigned long _dpll_compute_new_rate(unsigned long parent_rate, - unsigned int m, unsigned int n) -{ - unsigned long long num; - - num = (unsigned long long)parent_rate * m; - do_div(num, n); - return num; -} - -/* - * _dpll_test_mult - test a DPLL multiplier value - * @m: pointer to the DPLL m (multiplier) value under test - * @n: current DPLL n (divider) value under test - * @new_rate: pointer to storage for the resulting rounded rate - * @target_rate: the desired DPLL rate - * @parent_rate: the DPLL's parent clock rate - * - * This code tests a DPLL multiplier value, ensuring that the - * resulting rate will not be higher than the target_rate, and that - * the multiplier value itself is valid for the DPLL. Initially, the - * integer pointed to by the m argument should be prescaled by - * multiplying by DPLL_SCALE_FACTOR. The code will replace this with - * a non-scaled m upon return. This non-scaled m will result in a - * new_rate as close as possible to target_rate (but not greater than - * target_rate) given the current (parent_rate, n, prescaled m) - * triple. Returns DPLL_MULT_UNDERFLOW in the event that the - * non-scaled m attempted to underflow, which can allow the calling - * function to bail out early; or 0 upon success. - */ -static int _dpll_test_mult(int *m, int n, unsigned long *new_rate, - unsigned long target_rate, - unsigned long parent_rate) -{ - int r = 0, carry = 0; - - /* Unscale m and round if necessary */ - if (*m % DPLL_SCALE_FACTOR >= DPLL_ROUNDING_VAL) - carry = 1; - *m = (*m / DPLL_SCALE_FACTOR) + carry; - - /* - * The new rate must be <= the target rate to avoid programming - * a rate that is impossible for the hardware to handle - */ - *new_rate = _dpll_compute_new_rate(parent_rate, *m, n); - if (*new_rate > target_rate) { - (*m)--; - *new_rate = 0; - } - - /* Guard against m underflow */ - if (*m < DPLL_MIN_MULTIPLIER) { - *m = DPLL_MIN_MULTIPLIER; - *new_rate = 0; - r = DPLL_MULT_UNDERFLOW; - } - - if (*new_rate == 0) - *new_rate = _dpll_compute_new_rate(parent_rate, *m, n); - - return r; -} - -/** - * _omap2_dpll_is_in_bypass - check if DPLL is in bypass mode or not - * @v: bitfield value of the DPLL enable - * - * Checks given DPLL enable bitfield to see whether the DPLL is in bypass - * mode or not. Returns 1 if the DPLL is in bypass, 0 otherwise. - */ -static int _omap2_dpll_is_in_bypass(u32 v) -{ - u8 mask, val; - - mask = ti_clk_get_features()->dpll_bypass_vals; - - /* - * Each set bit in the mask corresponds to a bypass value equal - * to the bitshift. Go through each set-bit in the mask and - * compare against the given register value. - */ - while (mask) { - val = __ffs(mask); - mask ^= (1 << val); - if (v == val) - return 1; - } - - return 0; -} - -/* Public functions */ -u8 omap2_init_dpll_parent(struct clk_hw *hw) -{ - struct clk_hw_omap *clk = to_clk_hw_omap(hw); - u32 v; - struct dpll_data *dd; - - dd = clk->dpll_data; - if (!dd) - return -EINVAL; - - v = omap2_clk_readl(clk, dd->control_reg); - v &= dd->enable_mask; - v >>= __ffs(dd->enable_mask); - - /* Reparent the struct clk in case the dpll is in bypass */ - if (_omap2_dpll_is_in_bypass(v)) - return 1; - - return 0; -} - -/** - * omap2_get_dpll_rate - returns the current DPLL CLKOUT rate - * @clk: struct clk * of a DPLL - * - * DPLLs can be locked or bypassed - basically, enabled or disabled. - * When locked, the DPLL output depends on the M and N values. When - * bypassed, on OMAP2xxx, the output rate is either the 32KiHz clock - * or sys_clk. Bypass rates on OMAP3 depend on the DPLL: DPLLs 1 and - * 2 are bypassed with dpll1_fclk and dpll2_fclk respectively - * (generated by DPLL3), while DPLL 3, 4, and 5 bypass rates are sys_clk. - * Returns the current DPLL CLKOUT rate (*not* CLKOUTX2) if the DPLL is - * locked, or the appropriate bypass rate if the DPLL is bypassed, or 0 - * if the clock @clk is not a DPLL. - */ -unsigned long omap2_get_dpll_rate(struct clk_hw_omap *clk) -{ - long long dpll_clk; - u32 dpll_mult, dpll_div, v; - struct dpll_data *dd; - - dd = clk->dpll_data; - if (!dd) - return 0; - - /* Return bypass rate if DPLL is bypassed */ - v = omap2_clk_readl(clk, dd->control_reg); - v &= dd->enable_mask; - v >>= __ffs(dd->enable_mask); - - if (_omap2_dpll_is_in_bypass(v)) - return __clk_get_rate(dd->clk_bypass); - - v = omap2_clk_readl(clk, dd->mult_div1_reg); - dpll_mult = v & dd->mult_mask; - dpll_mult >>= __ffs(dd->mult_mask); - dpll_div = v & dd->div1_mask; - dpll_div >>= __ffs(dd->div1_mask); - - dpll_clk = (long long) __clk_get_rate(dd->clk_ref) * dpll_mult; - do_div(dpll_clk, dpll_div + 1); - - return dpll_clk; -} - -/* DPLL rate rounding code */ - -/** - * omap2_dpll_round_rate - round a target rate for an OMAP DPLL - * @clk: struct clk * for a DPLL - * @target_rate: desired DPLL clock rate - * - * Given a DPLL and a desired target rate, round the target rate to a - * possible, programmable rate for this DPLL. Attempts to select the - * minimum possible n. Stores the computed (m, n) in the DPLL's - * dpll_data structure so set_rate() will not need to call this - * (expensive) function again. Returns ~0 if the target rate cannot - * be rounded, or the rounded rate upon success. - */ -long omap2_dpll_round_rate(struct clk_hw *hw, unsigned long target_rate, - unsigned long *parent_rate) -{ - struct clk_hw_omap *clk = to_clk_hw_omap(hw); - int m, n, r, scaled_max_m; - int min_delta_m = INT_MAX, min_delta_n = INT_MAX; - unsigned long scaled_rt_rp; - unsigned long new_rate = 0; - struct dpll_data *dd; - unsigned long ref_rate; - long delta; - long prev_min_delta = LONG_MAX; - const char *clk_name; - - if (!clk || !clk->dpll_data) - return ~0; - - dd = clk->dpll_data; - - ref_rate = __clk_get_rate(dd->clk_ref); - clk_name = __clk_get_name(hw->clk); - pr_debug("clock: %s: starting DPLL round_rate, target rate %lu\n", - clk_name, target_rate); - - scaled_rt_rp = target_rate / (ref_rate / DPLL_SCALE_FACTOR); - scaled_max_m = dd->max_multiplier * DPLL_SCALE_FACTOR; - - dd->last_rounded_rate = 0; - - for (n = dd->min_divider; n <= dd->max_divider; n++) { - - /* Is the (input clk, divider) pair valid for the DPLL? */ - r = _dpll_test_fint(clk, n); - if (r == DPLL_FINT_UNDERFLOW) - break; - else if (r == DPLL_FINT_INVALID) - continue; - - /* Compute the scaled DPLL multiplier, based on the divider */ - m = scaled_rt_rp * n; - - /* - * Since we're counting n up, a m overflow means we - * can bail out completely (since as n increases in - * the next iteration, there's no way that m can - * increase beyond the current m) - */ - if (m > scaled_max_m) - break; - - r = _dpll_test_mult(&m, n, &new_rate, target_rate, - ref_rate); - - /* m can't be set low enough for this n - try with a larger n */ - if (r == DPLL_MULT_UNDERFLOW) - continue; - - /* skip rates above our target rate */ - delta = target_rate - new_rate; - if (delta < 0) - continue; - - if (delta < prev_min_delta) { - prev_min_delta = delta; - min_delta_m = m; - min_delta_n = n; - } - - pr_debug("clock: %s: m = %d: n = %d: new_rate = %lu\n", - clk_name, m, n, new_rate); - - if (delta == 0) - break; - } - - if (prev_min_delta == LONG_MAX) { - pr_debug("clock: %s: cannot round to rate %lu\n", - clk_name, target_rate); - return ~0; - } - - dd->last_rounded_m = min_delta_m; - dd->last_rounded_n = min_delta_n; - dd->last_rounded_rate = target_rate - prev_min_delta; - - return dd->last_rounded_rate; -} - diff --git a/drivers/clk/ti/Makefile b/drivers/clk/ti/Makefile index 105ffd0f5e79..62dae2ad3c69 100644 --- a/drivers/clk/ti/Makefile +++ b/drivers/clk/ti/Makefile @@ -1,6 +1,7 @@ obj-y += clk.o autoidle.o clockdomain.o clk-common = dpll.o composite.o divider.o gate.o \ - fixed-factor.o mux.o apll.o + fixed-factor.o mux.o apll.o \ + clkt_dpll.o obj-$(CONFIG_SOC_AM33XX) += $(clk-common) clk-33xx.o obj-$(CONFIG_SOC_TI81XX) += $(clk-common) fapll.o clk-816x.o obj-$(CONFIG_ARCH_OMAP2) += $(clk-common) interface.o clk-2xxx.o diff --git a/drivers/clk/ti/clkt_dpll.c b/drivers/clk/ti/clkt_dpll.c new file mode 100644 index 000000000000..a01fc7f305c1 --- /dev/null +++ b/drivers/clk/ti/clkt_dpll.c @@ -0,0 +1,369 @@ +/* + * OMAP2/3/4 DPLL clock functions + * + * Copyright (C) 2005-2008 Texas Instruments, Inc. + * Copyright (C) 2004-2010 Nokia Corporation + * + * Contacts: + * Richard Woodruff + * Paul Walmsley + * + * This program is free software; you can redistribute it and/or modify + * it under the terms of the GNU General Public License version 2 as + * published by the Free Software Foundation. + */ +#undef DEBUG + +#include +#include +#include +#include +#include + +#include + +#include "clock.h" + +/* DPLL rate rounding: minimum DPLL multiplier, divider values */ +#define DPLL_MIN_MULTIPLIER 2 +#define DPLL_MIN_DIVIDER 1 + +/* Possible error results from _dpll_test_mult */ +#define DPLL_MULT_UNDERFLOW -1 + +/* + * Scale factor to mitigate roundoff errors in DPLL rate rounding. + * The higher the scale factor, the greater the risk of arithmetic overflow, + * but the closer the rounded rate to the target rate. DPLL_SCALE_FACTOR + * must be a power of DPLL_SCALE_BASE. + */ +#define DPLL_SCALE_FACTOR 64 +#define DPLL_SCALE_BASE 2 +#define DPLL_ROUNDING_VAL ((DPLL_SCALE_BASE / 2) * \ + (DPLL_SCALE_FACTOR / DPLL_SCALE_BASE)) + +/* + * DPLL valid Fint frequency range for OMAP36xx and OMAP4xxx. + * From device data manual section 4.3 "DPLL and DLL Specifications". + */ +#define OMAP3PLUS_DPLL_FINT_JTYPE_MIN 500000 +#define OMAP3PLUS_DPLL_FINT_JTYPE_MAX 2500000 + +/* _dpll_test_fint() return codes */ +#define DPLL_FINT_UNDERFLOW -1 +#define DPLL_FINT_INVALID -2 + +/* Private functions */ + +/* + * _dpll_test_fint - test whether an Fint value is valid for the DPLL + * @clk: DPLL struct clk to test + * @n: divider value (N) to test + * + * Tests whether a particular divider @n will result in a valid DPLL + * internal clock frequency Fint. See the 34xx TRM 4.7.6.2 "DPLL Jitter + * Correction". Returns 0 if OK, -1 if the enclosing loop can terminate + * (assuming that it is counting N upwards), or -2 if the enclosing loop + * should skip to the next iteration (again assuming N is increasing). + */ +static int _dpll_test_fint(struct clk_hw_omap *clk, unsigned int n) +{ + struct dpll_data *dd; + long fint, fint_min, fint_max; + int ret = 0; + + dd = clk->dpll_data; + + /* DPLL divider must result in a valid jitter correction val */ + fint = __clk_get_rate(__clk_get_parent(clk->hw.clk)) / n; + + if (dd->flags & DPLL_J_TYPE) { + fint_min = OMAP3PLUS_DPLL_FINT_JTYPE_MIN; + fint_max = OMAP3PLUS_DPLL_FINT_JTYPE_MAX; + } else { + fint_min = ti_clk_get_features()->fint_min; + fint_max = ti_clk_get_features()->fint_max; + } + + if (!fint_min || !fint_max) { + WARN(1, "No fint limits available!\n"); + return DPLL_FINT_INVALID; + } + + if (fint < ti_clk_get_features()->fint_min) { + pr_debug("rejecting n=%d due to Fint failure, lowering max_divider\n", + n); + dd->max_divider = n; + ret = DPLL_FINT_UNDERFLOW; + } else if (fint > ti_clk_get_features()->fint_max) { + pr_debug("rejecting n=%d due to Fint failure, boosting min_divider\n", + n); + dd->min_divider = n; + ret = DPLL_FINT_INVALID; + } else if (fint > ti_clk_get_features()->fint_band1_max && + fint < ti_clk_get_features()->fint_band2_min) { + pr_debug("rejecting n=%d due to Fint failure\n", n); + ret = DPLL_FINT_INVALID; + } + + return ret; +} + +static unsigned long _dpll_compute_new_rate(unsigned long parent_rate, + unsigned int m, unsigned int n) +{ + unsigned long long num; + + num = (unsigned long long)parent_rate * m; + do_div(num, n); + return num; +} + +/* + * _dpll_test_mult - test a DPLL multiplier value + * @m: pointer to the DPLL m (multiplier) value under test + * @n: current DPLL n (divider) value under test + * @new_rate: pointer to storage for the resulting rounded rate + * @target_rate: the desired DPLL rate + * @parent_rate: the DPLL's parent clock rate + * + * This code tests a DPLL multiplier value, ensuring that the + * resulting rate will not be higher than the target_rate, and that + * the multiplier value itself is valid for the DPLL. Initially, the + * integer pointed to by the m argument should be prescaled by + * multiplying by DPLL_SCALE_FACTOR. The code will replace this with + * a non-scaled m upon return. This non-scaled m will result in a + * new_rate as close as possible to target_rate (but not greater than + * target_rate) given the current (parent_rate, n, prescaled m) + * triple. Returns DPLL_MULT_UNDERFLOW in the event that the + * non-scaled m attempted to underflow, which can allow the calling + * function to bail out early; or 0 upon success. + */ +static int _dpll_test_mult(int *m, int n, unsigned long *new_rate, + unsigned long target_rate, + unsigned long parent_rate) +{ + int r = 0, carry = 0; + + /* Unscale m and round if necessary */ + if (*m % DPLL_SCALE_FACTOR >= DPLL_ROUNDING_VAL) + carry = 1; + *m = (*m / DPLL_SCALE_FACTOR) + carry; + + /* + * The new rate must be <= the target rate to avoid programming + * a rate that is impossible for the hardware to handle + */ + *new_rate = _dpll_compute_new_rate(parent_rate, *m, n); + if (*new_rate > target_rate) { + (*m)--; + *new_rate = 0; + } + + /* Guard against m underflow */ + if (*m < DPLL_MIN_MULTIPLIER) { + *m = DPLL_MIN_MULTIPLIER; + *new_rate = 0; + r = DPLL_MULT_UNDERFLOW; + } + + if (*new_rate == 0) + *new_rate = _dpll_compute_new_rate(parent_rate, *m, n); + + return r; +} + +/** + * _omap2_dpll_is_in_bypass - check if DPLL is in bypass mode or not + * @v: bitfield value of the DPLL enable + * + * Checks given DPLL enable bitfield to see whether the DPLL is in bypass + * mode or not. Returns 1 if the DPLL is in bypass, 0 otherwise. + */ +static int _omap2_dpll_is_in_bypass(u32 v) +{ + u8 mask, val; + + mask = ti_clk_get_features()->dpll_bypass_vals; + + /* + * Each set bit in the mask corresponds to a bypass value equal + * to the bitshift. Go through each set-bit in the mask and + * compare against the given register value. + */ + while (mask) { + val = __ffs(mask); + mask ^= (1 << val); + if (v == val) + return 1; + } + + return 0; +} + +/* Public functions */ +u8 omap2_init_dpll_parent(struct clk_hw *hw) +{ + struct clk_hw_omap *clk = to_clk_hw_omap(hw); + u32 v; + struct dpll_data *dd; + + dd = clk->dpll_data; + if (!dd) + return -EINVAL; + + v = ti_clk_ll_ops->clk_readl(dd->control_reg); + v &= dd->enable_mask; + v >>= __ffs(dd->enable_mask); + + /* Reparent the struct clk in case the dpll is in bypass */ + if (_omap2_dpll_is_in_bypass(v)) + return 1; + + return 0; +} + +/** + * omap2_get_dpll_rate - returns the current DPLL CLKOUT rate + * @clk: struct clk * of a DPLL + * + * DPLLs can be locked or bypassed - basically, enabled or disabled. + * When locked, the DPLL output depends on the M and N values. When + * bypassed, on OMAP2xxx, the output rate is either the 32KiHz clock + * or sys_clk. Bypass rates on OMAP3 depend on the DPLL: DPLLs 1 and + * 2 are bypassed with dpll1_fclk and dpll2_fclk respectively + * (generated by DPLL3), while DPLL 3, 4, and 5 bypass rates are sys_clk. + * Returns the current DPLL CLKOUT rate (*not* CLKOUTX2) if the DPLL is + * locked, or the appropriate bypass rate if the DPLL is bypassed, or 0 + * if the clock @clk is not a DPLL. + */ +unsigned long omap2_get_dpll_rate(struct clk_hw_omap *clk) +{ + long long dpll_clk; + u32 dpll_mult, dpll_div, v; + struct dpll_data *dd; + + dd = clk->dpll_data; + if (!dd) + return 0; + + /* Return bypass rate if DPLL is bypassed */ + v = ti_clk_ll_ops->clk_readl(dd->control_reg); + v &= dd->enable_mask; + v >>= __ffs(dd->enable_mask); + + if (_omap2_dpll_is_in_bypass(v)) + return __clk_get_rate(dd->clk_bypass); + + v = ti_clk_ll_ops->clk_readl(dd->mult_div1_reg); + dpll_mult = v & dd->mult_mask; + dpll_mult >>= __ffs(dd->mult_mask); + dpll_div = v & dd->div1_mask; + dpll_div >>= __ffs(dd->div1_mask); + + dpll_clk = (long long)__clk_get_rate(dd->clk_ref) * dpll_mult; + do_div(dpll_clk, dpll_div + 1); + + return dpll_clk; +} + +/* DPLL rate rounding code */ + +/** + * omap2_dpll_round_rate - round a target rate for an OMAP DPLL + * @clk: struct clk * for a DPLL + * @target_rate: desired DPLL clock rate + * + * Given a DPLL and a desired target rate, round the target rate to a + * possible, programmable rate for this DPLL. Attempts to select the + * minimum possible n. Stores the computed (m, n) in the DPLL's + * dpll_data structure so set_rate() will not need to call this + * (expensive) function again. Returns ~0 if the target rate cannot + * be rounded, or the rounded rate upon success. + */ +long omap2_dpll_round_rate(struct clk_hw *hw, unsigned long target_rate, + unsigned long *parent_rate) +{ + struct clk_hw_omap *clk = to_clk_hw_omap(hw); + int m, n, r, scaled_max_m; + int min_delta_m = INT_MAX, min_delta_n = INT_MAX; + unsigned long scaled_rt_rp; + unsigned long new_rate = 0; + struct dpll_data *dd; + unsigned long ref_rate; + long delta; + long prev_min_delta = LONG_MAX; + const char *clk_name; + + if (!clk || !clk->dpll_data) + return ~0; + + dd = clk->dpll_data; + + ref_rate = __clk_get_rate(dd->clk_ref); + clk_name = __clk_get_name(hw->clk); + pr_debug("clock: %s: starting DPLL round_rate, target rate %lu\n", + clk_name, target_rate); + + scaled_rt_rp = target_rate / (ref_rate / DPLL_SCALE_FACTOR); + scaled_max_m = dd->max_multiplier * DPLL_SCALE_FACTOR; + + dd->last_rounded_rate = 0; + + for (n = dd->min_divider; n <= dd->max_divider; n++) { + /* Is the (input clk, divider) pair valid for the DPLL? */ + r = _dpll_test_fint(clk, n); + if (r == DPLL_FINT_UNDERFLOW) + break; + else if (r == DPLL_FINT_INVALID) + continue; + + /* Compute the scaled DPLL multiplier, based on the divider */ + m = scaled_rt_rp * n; + + /* + * Since we're counting n up, a m overflow means we + * can bail out completely (since as n increases in + * the next iteration, there's no way that m can + * increase beyond the current m) + */ + if (m > scaled_max_m) + break; + + r = _dpll_test_mult(&m, n, &new_rate, target_rate, + ref_rate); + + /* m can't be set low enough for this n - try with a larger n */ + if (r == DPLL_MULT_UNDERFLOW) + continue; + + /* skip rates above our target rate */ + delta = target_rate - new_rate; + if (delta < 0) + continue; + + if (delta < prev_min_delta) { + prev_min_delta = delta; + min_delta_m = m; + min_delta_n = n; + } + + pr_debug("clock: %s: m = %d: n = %d: new_rate = %lu\n", + clk_name, m, n, new_rate); + + if (delta == 0) + break; + } + + if (prev_min_delta == LONG_MAX) { + pr_debug("clock: %s: cannot round to rate %lu\n", + clk_name, target_rate); + return ~0; + } + + dd->last_rounded_m = min_delta_m; + dd->last_rounded_n = min_delta_n; + dd->last_rounded_rate = target_rate - prev_min_delta; + + return dd->last_rounded_rate; +} diff --git a/drivers/clk/ti/clock.h b/drivers/clk/ti/clock.h index 404158d2d7f8..05ed10a81ace 100644 --- a/drivers/clk/ti/clock.h +++ b/drivers/clk/ti/clock.h @@ -169,4 +169,6 @@ void ti_clk_patch_legacy_clks(struct ti_clk **patch); struct clk *ti_clk_register_clk(struct ti_clk *setup); int ti_clk_register_legacy_clks(struct ti_clk_alias *clks); +u8 omap2_init_dpll_parent(struct clk_hw *hw); + #endif diff --git a/include/linux/clk/ti.h b/include/linux/clk/ti.h index 1a7f86a68f62..886b2e9d2204 100644 --- a/include/linux/clk/ti.h +++ b/include/linux/clk/ti.h @@ -286,7 +286,6 @@ long omap4_dpll_regm4xen_determine_rate(struct clk_hw *hw, unsigned long max_rate, unsigned long *best_parent_rate, struct clk_hw **best_parent_clk); -u8 omap2_init_dpll_parent(struct clk_hw *hw); unsigned long omap3_dpll_recalc(struct clk_hw *hw, unsigned long parent_rate); long omap2_dpll_round_rate(struct clk_hw *hw, unsigned long target_rate, unsigned long *parent_rate); -- cgit v1.2.3-70-g09d2 From 59245ce01a2e3ded836172266e3ac2e576a03333 Mon Sep 17 00:00:00 2001 From: Tero Kristo Date: Mon, 2 Mar 2015 11:07:35 +0200 Subject: clk: ti: move OMAP4+ DPLL implementation under drivers/clk With the legacy clock support gone, the OMAP4 specific DPLL implementations can be moved under the clock driver. Change some of the function prototypes to be static at the same time, and remove some exports from the global TI clock driver header. Signed-off-by: Tero Kristo --- arch/arm/mach-omap2/Makefile | 6 +- arch/arm/mach-omap2/clock.h | 4 - arch/arm/mach-omap2/dpll44xx.c | 232 ---------------------------------------- drivers/clk/ti/Makefile | 6 +- drivers/clk/ti/clock.h | 14 +++ drivers/clk/ti/dpll44xx.c | 233 +++++++++++++++++++++++++++++++++++++++++ include/linux/clk/ti.h | 13 +-- 7 files changed, 254 insertions(+), 254 deletions(-) delete mode 100644 arch/arm/mach-omap2/dpll44xx.c create mode 100644 drivers/clk/ti/dpll44xx.c (limited to 'include/linux') diff --git a/arch/arm/mach-omap2/Makefile b/arch/arm/mach-omap2/Makefile index fcb5d47f88ca..5bcd282f04b3 100644 --- a/arch/arm/mach-omap2/Makefile +++ b/arch/arm/mach-omap2/Makefile @@ -193,12 +193,12 @@ obj-$(CONFIG_ARCH_OMAP3) += clock3517.o clock36xx.o obj-$(CONFIG_ARCH_OMAP3) += dpll3xxx.o obj-$(CONFIG_ARCH_OMAP3) += clkt_iclk.o obj-$(CONFIG_ARCH_OMAP4) += $(clock-common) -obj-$(CONFIG_ARCH_OMAP4) += dpll3xxx.o dpll44xx.o +obj-$(CONFIG_ARCH_OMAP4) += dpll3xxx.o obj-$(CONFIG_SOC_AM33XX) += $(clock-common) dpll3xxx.o obj-$(CONFIG_SOC_OMAP5) += $(clock-common) -obj-$(CONFIG_SOC_OMAP5) += dpll3xxx.o dpll44xx.o +obj-$(CONFIG_SOC_OMAP5) += dpll3xxx.o obj-$(CONFIG_SOC_DRA7XX) += $(clock-common) -obj-$(CONFIG_SOC_DRA7XX) += dpll3xxx.o dpll44xx.o +obj-$(CONFIG_SOC_DRA7XX) += dpll3xxx.o obj-$(CONFIG_SOC_AM43XX) += $(clock-common) dpll3xxx.o # OMAP2 clock rate set data (old "OPP" data) diff --git a/arch/arm/mach-omap2/clock.h b/arch/arm/mach-omap2/clock.h index ac21856d245d..d7ed2446057c 100644 --- a/arch/arm/mach-omap2/clock.h +++ b/arch/arm/mach-omap2/clock.h @@ -183,8 +183,6 @@ struct clksel { u32 omap3_dpll_autoidle_read(struct clk_hw_omap *clk); void omap3_dpll_allow_idle(struct clk_hw_omap *clk); void omap3_dpll_deny_idle(struct clk_hw_omap *clk); -void omap4_dpllmx_allow_gatectrl(struct clk_hw_omap *clk); -void omap4_dpllmx_deny_gatectrl(struct clk_hw_omap *clk); void __init omap2_clk_disable_clkdm_control(void); @@ -204,8 +202,6 @@ int omap2_clksel_set_parent(struct clk_hw *hw, u8 field_val); extern void omap2_clkt_iclk_allow_idle(struct clk_hw_omap *clk); extern void omap2_clkt_iclk_deny_idle(struct clk_hw_omap *clk); -unsigned long omap2_get_dpll_rate(struct clk_hw_omap *clk); - void omap2_clk_dflt_find_companion(struct clk_hw_omap *clk, void __iomem **other_reg, u8 *other_bit); diff --git a/arch/arm/mach-omap2/dpll44xx.c b/arch/arm/mach-omap2/dpll44xx.c deleted file mode 100644 index f231be05b9a6..000000000000 --- a/arch/arm/mach-omap2/dpll44xx.c +++ /dev/null @@ -1,232 +0,0 @@ -/* - * OMAP4-specific DPLL control functions - * - * Copyright (C) 2011 Texas Instruments, Inc. - * Rajendra Nayak - * - * This program is free software; you can redistribute it and/or modify - * it under the terms of the GNU General Public License version 2 as - * published by the Free Software Foundation. - */ - -#include -#include -#include -#include -#include - -#include "clock.h" - -/* - * Maximum DPLL input frequency (FINT) and output frequency (FOUT) that - * can supported when using the DPLL low-power mode. Frequencies are - * defined in OMAP4430/60 Public TRM section 3.6.3.3.2 "Enable Control, - * Status, and Low-Power Operation Mode". - */ -#define OMAP4_DPLL_LP_FINT_MAX 1000000 -#define OMAP4_DPLL_LP_FOUT_MAX 100000000 - -/* - * Bitfield declarations - */ -#define OMAP4430_DPLL_CLKOUT_GATE_CTRL_MASK (1 << 8) -#define OMAP4430_DPLL_CLKOUTX2_GATE_CTRL_MASK (1 << 10) -#define OMAP4430_DPLL_REGM4XEN_MASK (1 << 11) - -/* Static rate multiplier for OMAP4 REGM4XEN clocks */ -#define OMAP4430_REGM4XEN_MULT 4 - -void omap4_dpllmx_allow_gatectrl(struct clk_hw_omap *clk) -{ - u32 v; - u32 mask; - - if (!clk || !clk->clksel_reg) - return; - - mask = clk->flags & CLOCK_CLKOUTX2 ? - OMAP4430_DPLL_CLKOUTX2_GATE_CTRL_MASK : - OMAP4430_DPLL_CLKOUT_GATE_CTRL_MASK; - - v = omap2_clk_readl(clk, clk->clksel_reg); - /* Clear the bit to allow gatectrl */ - v &= ~mask; - omap2_clk_writel(v, clk, clk->clksel_reg); -} - -void omap4_dpllmx_deny_gatectrl(struct clk_hw_omap *clk) -{ - u32 v; - u32 mask; - - if (!clk || !clk->clksel_reg) - return; - - mask = clk->flags & CLOCK_CLKOUTX2 ? - OMAP4430_DPLL_CLKOUTX2_GATE_CTRL_MASK : - OMAP4430_DPLL_CLKOUT_GATE_CTRL_MASK; - - v = omap2_clk_readl(clk, clk->clksel_reg); - /* Set the bit to deny gatectrl */ - v |= mask; - omap2_clk_writel(v, clk, clk->clksel_reg); -} - -const struct clk_hw_omap_ops clkhwops_omap4_dpllmx = { - .allow_idle = omap4_dpllmx_allow_gatectrl, - .deny_idle = omap4_dpllmx_deny_gatectrl, -}; - -/** - * omap4_dpll_lpmode_recalc - compute DPLL low-power setting - * @dd: pointer to the dpll data structure - * - * Calculates if low-power mode can be enabled based upon the last - * multiplier and divider values calculated. If low-power mode can be - * enabled, then the bit to enable low-power mode is stored in the - * last_rounded_lpmode variable. This implementation is based upon the - * criteria for enabling low-power mode as described in the OMAP4430/60 - * Public TRM section 3.6.3.3.2 "Enable Control, Status, and Low-Power - * Operation Mode". - */ -static void omap4_dpll_lpmode_recalc(struct dpll_data *dd) -{ - long fint, fout; - - fint = __clk_get_rate(dd->clk_ref) / (dd->last_rounded_n + 1); - fout = fint * dd->last_rounded_m; - - if ((fint < OMAP4_DPLL_LP_FINT_MAX) && (fout < OMAP4_DPLL_LP_FOUT_MAX)) - dd->last_rounded_lpmode = 1; - else - dd->last_rounded_lpmode = 0; -} - -/** - * omap4_dpll_regm4xen_recalc - compute DPLL rate, considering REGM4XEN bit - * @clk: struct clk * of the DPLL to compute the rate for - * - * Compute the output rate for the OMAP4 DPLL represented by @clk. - * Takes the REGM4XEN bit into consideration, which is needed for the - * OMAP4 ABE DPLL. Returns the DPLL's output rate (before M-dividers) - * upon success, or 0 upon error. - */ -unsigned long omap4_dpll_regm4xen_recalc(struct clk_hw *hw, - unsigned long parent_rate) -{ - struct clk_hw_omap *clk = to_clk_hw_omap(hw); - u32 v; - unsigned long rate; - struct dpll_data *dd; - - if (!clk || !clk->dpll_data) - return 0; - - dd = clk->dpll_data; - - rate = omap2_get_dpll_rate(clk); - - /* regm4xen adds a multiplier of 4 to DPLL calculations */ - v = omap2_clk_readl(clk, dd->control_reg); - if (v & OMAP4430_DPLL_REGM4XEN_MASK) - rate *= OMAP4430_REGM4XEN_MULT; - - return rate; -} - -/** - * omap4_dpll_regm4xen_round_rate - round DPLL rate, considering REGM4XEN bit - * @clk: struct clk * of the DPLL to round a rate for - * @target_rate: the desired rate of the DPLL - * - * Compute the rate that would be programmed into the DPLL hardware - * for @clk if set_rate() were to be provided with the rate - * @target_rate. Takes the REGM4XEN bit into consideration, which is - * needed for the OMAP4 ABE DPLL. Returns the rounded rate (before - * M-dividers) upon success, -EINVAL if @clk is null or not a DPLL, or - * ~0 if an error occurred in omap2_dpll_round_rate(). - */ -long omap4_dpll_regm4xen_round_rate(struct clk_hw *hw, - unsigned long target_rate, - unsigned long *parent_rate) -{ - struct clk_hw_omap *clk = to_clk_hw_omap(hw); - struct dpll_data *dd; - long r; - - if (!clk || !clk->dpll_data) - return -EINVAL; - - dd = clk->dpll_data; - - dd->last_rounded_m4xen = 0; - - /* - * First try to compute the DPLL configuration for - * target rate without using the 4X multiplier. - */ - r = omap2_dpll_round_rate(hw, target_rate, NULL); - if (r != ~0) - goto out; - - /* - * If we did not find a valid DPLL configuration, try again, but - * this time see if using the 4X multiplier can help. Enabling the - * 4X multiplier is equivalent to dividing the target rate by 4. - */ - r = omap2_dpll_round_rate(hw, target_rate / OMAP4430_REGM4XEN_MULT, - NULL); - if (r == ~0) - return r; - - dd->last_rounded_rate *= OMAP4430_REGM4XEN_MULT; - dd->last_rounded_m4xen = 1; - -out: - omap4_dpll_lpmode_recalc(dd); - - return dd->last_rounded_rate; -} - -/** - * omap4_dpll_regm4xen_determine_rate - determine rate for a DPLL - * @hw: pointer to the clock to determine rate for - * @rate: target rate for the DPLL - * @best_parent_rate: pointer for returning best parent rate - * @best_parent_clk: pointer for returning best parent clock - * - * Determines which DPLL mode to use for reaching a desired rate. - * Checks whether the DPLL shall be in bypass or locked mode, and if - * locked, calculates the M,N values for the DPLL via round-rate. - * Returns a positive clock rate with success, negative error value - * in failure. - */ -long omap4_dpll_regm4xen_determine_rate(struct clk_hw *hw, unsigned long rate, - unsigned long min_rate, - unsigned long max_rate, - unsigned long *best_parent_rate, - struct clk_hw **best_parent_clk) -{ - struct clk_hw_omap *clk = to_clk_hw_omap(hw); - struct dpll_data *dd; - - if (!hw || !rate) - return -EINVAL; - - dd = clk->dpll_data; - if (!dd) - return -EINVAL; - - if (__clk_get_rate(dd->clk_bypass) == rate && - (dd->modes & (1 << DPLL_LOW_POWER_BYPASS))) { - *best_parent_clk = __clk_get_hw(dd->clk_bypass); - } else { - rate = omap4_dpll_regm4xen_round_rate(hw, rate, - best_parent_rate); - *best_parent_clk = __clk_get_hw(dd->clk_ref); - } - - *best_parent_rate = rate; - - return rate; -} diff --git a/drivers/clk/ti/Makefile b/drivers/clk/ti/Makefile index 62dae2ad3c69..c3ec3014fb2d 100644 --- a/drivers/clk/ti/Makefile +++ b/drivers/clk/ti/Makefile @@ -7,10 +7,10 @@ obj-$(CONFIG_SOC_TI81XX) += $(clk-common) fapll.o clk-816x.o obj-$(CONFIG_ARCH_OMAP2) += $(clk-common) interface.o clk-2xxx.o obj-$(CONFIG_ARCH_OMAP3) += $(clk-common) interface.o \ clk-3xxx.o -obj-$(CONFIG_ARCH_OMAP4) += $(clk-common) clk-44xx.o -obj-$(CONFIG_SOC_OMAP5) += $(clk-common) clk-54xx.o +obj-$(CONFIG_ARCH_OMAP4) += $(clk-common) clk-44xx.o dpll44xx.o +obj-$(CONFIG_SOC_OMAP5) += $(clk-common) clk-54xx.o dpll44xx.o obj-$(CONFIG_SOC_DRA7XX) += $(clk-common) clk-7xx.o \ - clk-dra7-atl.o + clk-dra7-atl.o dpll44xx.o obj-$(CONFIG_SOC_AM43XX) += $(clk-common) clk-43xx.o ifdef CONFIG_ATAGS diff --git a/drivers/clk/ti/clock.h b/drivers/clk/ti/clock.h index 05ed10a81ace..c75d4b44cbef 100644 --- a/drivers/clk/ti/clock.h +++ b/drivers/clk/ti/clock.h @@ -169,6 +169,20 @@ void ti_clk_patch_legacy_clks(struct ti_clk **patch); struct clk *ti_clk_register_clk(struct ti_clk *setup); int ti_clk_register_legacy_clks(struct ti_clk_alias *clks); +extern const struct clk_hw_omap_ops clkhwops_omap4_dpllmx; + u8 omap2_init_dpll_parent(struct clk_hw *hw); +unsigned long omap4_dpll_regm4xen_recalc(struct clk_hw *hw, + unsigned long parent_rate); +long omap4_dpll_regm4xen_round_rate(struct clk_hw *hw, + unsigned long target_rate, + unsigned long *parent_rate); +long omap4_dpll_regm4xen_determine_rate(struct clk_hw *hw, + unsigned long rate, + unsigned long min_rate, + unsigned long max_rate, + unsigned long *best_parent_rate, + struct clk_hw **best_parent_clk); + #endif diff --git a/drivers/clk/ti/dpll44xx.c b/drivers/clk/ti/dpll44xx.c new file mode 100644 index 000000000000..ef1a5b43d01f --- /dev/null +++ b/drivers/clk/ti/dpll44xx.c @@ -0,0 +1,233 @@ +/* + * OMAP4-specific DPLL control functions + * + * Copyright (C) 2011 Texas Instruments, Inc. + * Rajendra Nayak + * + * This program is free software; you can redistribute it and/or modify + * it under the terms of the GNU General Public License version 2 as + * published by the Free Software Foundation. + */ + +#include +#include +#include +#include +#include +#include + +#include "clock.h" + +/* + * Maximum DPLL input frequency (FINT) and output frequency (FOUT) that + * can supported when using the DPLL low-power mode. Frequencies are + * defined in OMAP4430/60 Public TRM section 3.6.3.3.2 "Enable Control, + * Status, and Low-Power Operation Mode". + */ +#define OMAP4_DPLL_LP_FINT_MAX 1000000 +#define OMAP4_DPLL_LP_FOUT_MAX 100000000 + +/* + * Bitfield declarations + */ +#define OMAP4430_DPLL_CLKOUT_GATE_CTRL_MASK BIT(8) +#define OMAP4430_DPLL_CLKOUTX2_GATE_CTRL_MASK BIT(10) +#define OMAP4430_DPLL_REGM4XEN_MASK BIT(11) + +/* Static rate multiplier for OMAP4 REGM4XEN clocks */ +#define OMAP4430_REGM4XEN_MULT 4 + +static void omap4_dpllmx_allow_gatectrl(struct clk_hw_omap *clk) +{ + u32 v; + u32 mask; + + if (!clk || !clk->clksel_reg) + return; + + mask = clk->flags & CLOCK_CLKOUTX2 ? + OMAP4430_DPLL_CLKOUTX2_GATE_CTRL_MASK : + OMAP4430_DPLL_CLKOUT_GATE_CTRL_MASK; + + v = ti_clk_ll_ops->clk_readl(clk->clksel_reg); + /* Clear the bit to allow gatectrl */ + v &= ~mask; + ti_clk_ll_ops->clk_writel(v, clk->clksel_reg); +} + +static void omap4_dpllmx_deny_gatectrl(struct clk_hw_omap *clk) +{ + u32 v; + u32 mask; + + if (!clk || !clk->clksel_reg) + return; + + mask = clk->flags & CLOCK_CLKOUTX2 ? + OMAP4430_DPLL_CLKOUTX2_GATE_CTRL_MASK : + OMAP4430_DPLL_CLKOUT_GATE_CTRL_MASK; + + v = ti_clk_ll_ops->clk_readl(clk->clksel_reg); + /* Set the bit to deny gatectrl */ + v |= mask; + ti_clk_ll_ops->clk_writel(v, clk->clksel_reg); +} + +const struct clk_hw_omap_ops clkhwops_omap4_dpllmx = { + .allow_idle = omap4_dpllmx_allow_gatectrl, + .deny_idle = omap4_dpllmx_deny_gatectrl, +}; + +/** + * omap4_dpll_lpmode_recalc - compute DPLL low-power setting + * @dd: pointer to the dpll data structure + * + * Calculates if low-power mode can be enabled based upon the last + * multiplier and divider values calculated. If low-power mode can be + * enabled, then the bit to enable low-power mode is stored in the + * last_rounded_lpmode variable. This implementation is based upon the + * criteria for enabling low-power mode as described in the OMAP4430/60 + * Public TRM section 3.6.3.3.2 "Enable Control, Status, and Low-Power + * Operation Mode". + */ +static void omap4_dpll_lpmode_recalc(struct dpll_data *dd) +{ + long fint, fout; + + fint = __clk_get_rate(dd->clk_ref) / (dd->last_rounded_n + 1); + fout = fint * dd->last_rounded_m; + + if ((fint < OMAP4_DPLL_LP_FINT_MAX) && (fout < OMAP4_DPLL_LP_FOUT_MAX)) + dd->last_rounded_lpmode = 1; + else + dd->last_rounded_lpmode = 0; +} + +/** + * omap4_dpll_regm4xen_recalc - compute DPLL rate, considering REGM4XEN bit + * @clk: struct clk * of the DPLL to compute the rate for + * + * Compute the output rate for the OMAP4 DPLL represented by @clk. + * Takes the REGM4XEN bit into consideration, which is needed for the + * OMAP4 ABE DPLL. Returns the DPLL's output rate (before M-dividers) + * upon success, or 0 upon error. + */ +unsigned long omap4_dpll_regm4xen_recalc(struct clk_hw *hw, + unsigned long parent_rate) +{ + struct clk_hw_omap *clk = to_clk_hw_omap(hw); + u32 v; + unsigned long rate; + struct dpll_data *dd; + + if (!clk || !clk->dpll_data) + return 0; + + dd = clk->dpll_data; + + rate = omap2_get_dpll_rate(clk); + + /* regm4xen adds a multiplier of 4 to DPLL calculations */ + v = ti_clk_ll_ops->clk_readl(dd->control_reg); + if (v & OMAP4430_DPLL_REGM4XEN_MASK) + rate *= OMAP4430_REGM4XEN_MULT; + + return rate; +} + +/** + * omap4_dpll_regm4xen_round_rate - round DPLL rate, considering REGM4XEN bit + * @clk: struct clk * of the DPLL to round a rate for + * @target_rate: the desired rate of the DPLL + * + * Compute the rate that would be programmed into the DPLL hardware + * for @clk if set_rate() were to be provided with the rate + * @target_rate. Takes the REGM4XEN bit into consideration, which is + * needed for the OMAP4 ABE DPLL. Returns the rounded rate (before + * M-dividers) upon success, -EINVAL if @clk is null or not a DPLL, or + * ~0 if an error occurred in omap2_dpll_round_rate(). + */ +long omap4_dpll_regm4xen_round_rate(struct clk_hw *hw, + unsigned long target_rate, + unsigned long *parent_rate) +{ + struct clk_hw_omap *clk = to_clk_hw_omap(hw); + struct dpll_data *dd; + long r; + + if (!clk || !clk->dpll_data) + return -EINVAL; + + dd = clk->dpll_data; + + dd->last_rounded_m4xen = 0; + + /* + * First try to compute the DPLL configuration for + * target rate without using the 4X multiplier. + */ + r = omap2_dpll_round_rate(hw, target_rate, NULL); + if (r != ~0) + goto out; + + /* + * If we did not find a valid DPLL configuration, try again, but + * this time see if using the 4X multiplier can help. Enabling the + * 4X multiplier is equivalent to dividing the target rate by 4. + */ + r = omap2_dpll_round_rate(hw, target_rate / OMAP4430_REGM4XEN_MULT, + NULL); + if (r == ~0) + return r; + + dd->last_rounded_rate *= OMAP4430_REGM4XEN_MULT; + dd->last_rounded_m4xen = 1; + +out: + omap4_dpll_lpmode_recalc(dd); + + return dd->last_rounded_rate; +} + +/** + * omap4_dpll_regm4xen_determine_rate - determine rate for a DPLL + * @hw: pointer to the clock to determine rate for + * @rate: target rate for the DPLL + * @best_parent_rate: pointer for returning best parent rate + * @best_parent_clk: pointer for returning best parent clock + * + * Determines which DPLL mode to use for reaching a desired rate. + * Checks whether the DPLL shall be in bypass or locked mode, and if + * locked, calculates the M,N values for the DPLL via round-rate. + * Returns a positive clock rate with success, negative error value + * in failure. + */ +long omap4_dpll_regm4xen_determine_rate(struct clk_hw *hw, unsigned long rate, + unsigned long min_rate, + unsigned long max_rate, + unsigned long *best_parent_rate, + struct clk_hw **best_parent_clk) +{ + struct clk_hw_omap *clk = to_clk_hw_omap(hw); + struct dpll_data *dd; + + if (!hw || !rate) + return -EINVAL; + + dd = clk->dpll_data; + if (!dd) + return -EINVAL; + + if (__clk_get_rate(dd->clk_bypass) == rate && + (dd->modes & (1 << DPLL_LOW_POWER_BYPASS))) { + *best_parent_clk = __clk_get_hw(dd->clk_bypass); + } else { + rate = omap4_dpll_regm4xen_round_rate(hw, rate, + best_parent_rate); + *best_parent_clk = __clk_get_hw(dd->clk_ref); + } + + *best_parent_rate = rate; + + return rate; +} diff --git a/include/linux/clk/ti.h b/include/linux/clk/ti.h index 886b2e9d2204..ee59e076340f 100644 --- a/include/linux/clk/ti.h +++ b/include/linux/clk/ti.h @@ -275,17 +275,6 @@ long omap3_noncore_dpll_determine_rate(struct clk_hw *hw, unsigned long max_rate, unsigned long *best_parent_rate, struct clk_hw **best_parent_clk); -unsigned long omap4_dpll_regm4xen_recalc(struct clk_hw *hw, - unsigned long parent_rate); -long omap4_dpll_regm4xen_round_rate(struct clk_hw *hw, - unsigned long target_rate, - unsigned long *parent_rate); -long omap4_dpll_regm4xen_determine_rate(struct clk_hw *hw, - unsigned long rate, - unsigned long min_rate, - unsigned long max_rate, - unsigned long *best_parent_rate, - struct clk_hw **best_parent_clk); unsigned long omap3_dpll_recalc(struct clk_hw *hw, unsigned long parent_rate); long omap2_dpll_round_rate(struct clk_hw *hw, unsigned long target_rate, unsigned long *parent_rate); @@ -314,6 +303,7 @@ int omap2_reprogram_dpllcore(struct clk_hw *clk, unsigned long rate, unsigned long parent_rate); void omap2xxx_clkt_dpllcore_init(struct clk_hw *hw); void omap2xxx_clkt_vps_init(void); +unsigned long omap2_get_dpll_rate(struct clk_hw_omap *clk); void __iomem *ti_clk_get_reg_addr(struct device_node *node, int index); void ti_dt_clocks_register(struct ti_dt_clk *oclks); @@ -364,7 +354,6 @@ static inline void of_ti_clk_deny_autoidle_all(void) { } extern const struct clk_hw_omap_ops clkhwops_omap2xxx_dpll; extern const struct clk_hw_omap_ops clkhwops_omap2430_i2chs_wait; extern const struct clk_hw_omap_ops clkhwops_omap3_dpll; -extern const struct clk_hw_omap_ops clkhwops_omap4_dpllmx; extern const struct clk_hw_omap_ops clkhwops_wait; extern const struct clk_hw_omap_ops clkhwops_omap3430es2_dss_usbhost_wait; extern const struct clk_hw_omap_ops clkhwops_am35xx_ipss_module_wait; -- cgit v1.2.3-70-g09d2 From ef14db0977547b1982d4f6eaa305e1a22eb95778 Mon Sep 17 00:00:00 2001 From: Tero Kristo Date: Mon, 2 Mar 2015 14:33:54 +0200 Subject: clk: ti: move interface clock implementation under drivers/clk With the legacy clock support gone, the OMAP interface clock implementation can be moved under the clock driver. Some temporary header file tweaks are also needed to make this change work properly. Signed-off-by: Tero Kristo --- arch/arm/mach-omap2/Makefile | 3 +- arch/arm/mach-omap2/clkt_iclk.c | 68 ----------------------------------------- arch/arm/mach-omap2/clock.h | 11 ------- drivers/clk/ti/Makefile | 2 +- drivers/clk/ti/clkt_iclk.c | 66 +++++++++++++++++++++++++++++++++++++++ drivers/clk/ti/clock.h | 2 ++ include/linux/clk/ti.h | 10 ++++-- 7 files changed, 78 insertions(+), 84 deletions(-) delete mode 100644 arch/arm/mach-omap2/clkt_iclk.c create mode 100644 drivers/clk/ti/clkt_iclk.c (limited to 'include/linux') diff --git a/arch/arm/mach-omap2/Makefile b/arch/arm/mach-omap2/Makefile index 5bcd282f04b3..a2f51564e8d4 100644 --- a/arch/arm/mach-omap2/Makefile +++ b/arch/arm/mach-omap2/Makefile @@ -185,13 +185,12 @@ obj-$(CONFIG_SOC_DRA7XX) += clockdomains7xx_data.o obj-$(CONFIG_ARCH_OMAP2) += $(clock-common) clock2xxx.o obj-$(CONFIG_ARCH_OMAP2) += clkt2xxx_dpllcore.o obj-$(CONFIG_ARCH_OMAP2) += clkt2xxx_virt_prcm_set.o -obj-$(CONFIG_ARCH_OMAP2) += clkt2xxx_dpll.o clkt_iclk.o +obj-$(CONFIG_ARCH_OMAP2) += clkt2xxx_dpll.o obj-$(CONFIG_SOC_OMAP2430) += clock2430.o obj-$(CONFIG_ARCH_OMAP3) += $(clock-common) clock3xxx.o obj-$(CONFIG_ARCH_OMAP3) += clock34xx.o clkt34xx_dpll3m2.o obj-$(CONFIG_ARCH_OMAP3) += clock3517.o clock36xx.o obj-$(CONFIG_ARCH_OMAP3) += dpll3xxx.o -obj-$(CONFIG_ARCH_OMAP3) += clkt_iclk.o obj-$(CONFIG_ARCH_OMAP4) += $(clock-common) obj-$(CONFIG_ARCH_OMAP4) += dpll3xxx.o obj-$(CONFIG_SOC_AM33XX) += $(clock-common) dpll3xxx.o diff --git a/arch/arm/mach-omap2/clkt_iclk.c b/arch/arm/mach-omap2/clkt_iclk.c deleted file mode 100644 index 55eb579aeae1..000000000000 --- a/arch/arm/mach-omap2/clkt_iclk.c +++ /dev/null @@ -1,68 +0,0 @@ -/* - * OMAP2/3 interface clock control - * - * Copyright (C) 2011 Nokia Corporation - * Paul Walmsley - * - * This program is free software; you can redistribute it and/or modify - * it under the terms of the GNU General Public License version 2 as - * published by the Free Software Foundation. - */ -#undef DEBUG - -#include -#include -#include - -#include "clock.h" - -/* Register offsets */ -#define CM_AUTOIDLE 0x30 -#define CM_ICLKEN 0x10 - -/* Private functions */ - -/* XXX */ -void omap2_clkt_iclk_allow_idle(struct clk_hw_omap *clk) -{ - u32 v; - void __iomem *r; - - r = (__force void __iomem *) - ((__force u32)clk->enable_reg ^ (CM_AUTOIDLE ^ CM_ICLKEN)); - - v = omap2_clk_readl(clk, r); - v |= (1 << clk->enable_bit); - omap2_clk_writel(v, clk, r); -} - -/* XXX */ -void omap2_clkt_iclk_deny_idle(struct clk_hw_omap *clk) -{ - u32 v; - void __iomem *r; - - r = (__force void __iomem *) - ((__force u32)clk->enable_reg ^ (CM_AUTOIDLE ^ CM_ICLKEN)); - - v = omap2_clk_readl(clk, r); - v &= ~(1 << clk->enable_bit); - omap2_clk_writel(v, clk, r); -} - -/* Public data */ - -const struct clk_hw_omap_ops clkhwops_iclk = { - .allow_idle = omap2_clkt_iclk_allow_idle, - .deny_idle = omap2_clkt_iclk_deny_idle, -}; - -const struct clk_hw_omap_ops clkhwops_iclk_wait = { - .allow_idle = omap2_clkt_iclk_allow_idle, - .deny_idle = omap2_clkt_iclk_deny_idle, - .find_idlest = omap2_clk_dflt_find_idlest, - .find_companion = omap2_clk_dflt_find_companion, -}; - - - diff --git a/arch/arm/mach-omap2/clock.h b/arch/arm/mach-omap2/clock.h index d7ed2446057c..ca8c42c70db5 100644 --- a/arch/arm/mach-omap2/clock.h +++ b/arch/arm/mach-omap2/clock.h @@ -198,16 +198,6 @@ int omap2_clksel_set_rate(struct clk_hw *hw, unsigned long rate, unsigned long parent_rate); int omap2_clksel_set_parent(struct clk_hw *hw, u8 field_val); -/* clkt_iclk.c public functions */ -extern void omap2_clkt_iclk_allow_idle(struct clk_hw_omap *clk); -extern void omap2_clkt_iclk_deny_idle(struct clk_hw_omap *clk); - -void omap2_clk_dflt_find_companion(struct clk_hw_omap *clk, - void __iomem **other_reg, - u8 *other_bit); -void omap2_clk_dflt_find_idlest(struct clk_hw_omap *clk, - void __iomem **idlest_reg, - u8 *idlest_bit, u8 *idlest_val); int omap2_clk_enable_autoidle_all(void); int omap2_clk_allow_idle(struct clk *clk); int omap2_clk_deny_idle(struct clk *clk); @@ -231,7 +221,6 @@ extern const struct clksel_rate gpt_sys_rates[]; extern const struct clksel_rate gfx_l3_rates[]; extern const struct clksel_rate dsp_ick_rates[]; -extern const struct clk_hw_omap_ops clkhwops_iclk_wait; extern const struct clk_hw_omap_ops clkhwops_wait; extern const struct clk_hw_omap_ops clkhwops_omap3430es2_ssi_wait; extern const struct clk_hw_omap_ops clkhwops_omap3430es2_dss_usbhost_wait; diff --git a/drivers/clk/ti/Makefile b/drivers/clk/ti/Makefile index c3ec3014fb2d..23cd72638970 100644 --- a/drivers/clk/ti/Makefile +++ b/drivers/clk/ti/Makefile @@ -1,7 +1,7 @@ obj-y += clk.o autoidle.o clockdomain.o clk-common = dpll.o composite.o divider.o gate.o \ fixed-factor.o mux.o apll.o \ - clkt_dpll.o + clkt_dpll.o clkt_iclk.o obj-$(CONFIG_SOC_AM33XX) += $(clk-common) clk-33xx.o obj-$(CONFIG_SOC_TI81XX) += $(clk-common) fapll.o clk-816x.o obj-$(CONFIG_ARCH_OMAP2) += $(clk-common) interface.o clk-2xxx.o diff --git a/drivers/clk/ti/clkt_iclk.c b/drivers/clk/ti/clkt_iclk.c new file mode 100644 index 000000000000..a03919df00ef --- /dev/null +++ b/drivers/clk/ti/clkt_iclk.c @@ -0,0 +1,66 @@ +/* + * OMAP2/3 interface clock control + * + * Copyright (C) 2011 Nokia Corporation + * Paul Walmsley + * + * This program is free software; you can redistribute it and/or modify + * it under the terms of the GNU General Public License version 2 as + * published by the Free Software Foundation. + */ +#undef DEBUG + +#include +#include +#include +#include + +#include "clock.h" + +/* Register offsets */ +#define CM_AUTOIDLE 0x30 +#define CM_ICLKEN 0x10 + +/* Private functions */ + +/* XXX */ +void omap2_clkt_iclk_allow_idle(struct clk_hw_omap *clk) +{ + u32 v; + void __iomem *r; + + r = (__force void __iomem *) + ((__force u32)clk->enable_reg ^ (CM_AUTOIDLE ^ CM_ICLKEN)); + + v = ti_clk_ll_ops->clk_readl(r); + v |= (1 << clk->enable_bit); + ti_clk_ll_ops->clk_writel(v, r); +} + +/* XXX */ +void omap2_clkt_iclk_deny_idle(struct clk_hw_omap *clk) +{ + u32 v; + void __iomem *r; + + r = (__force void __iomem *) + ((__force u32)clk->enable_reg ^ (CM_AUTOIDLE ^ CM_ICLKEN)); + + v = ti_clk_ll_ops->clk_readl(r); + v &= ~(1 << clk->enable_bit); + ti_clk_ll_ops->clk_writel(v, r); +} + +/* Public data */ + +const struct clk_hw_omap_ops clkhwops_iclk = { + .allow_idle = omap2_clkt_iclk_allow_idle, + .deny_idle = omap2_clkt_iclk_deny_idle, +}; + +const struct clk_hw_omap_ops clkhwops_iclk_wait = { + .allow_idle = omap2_clkt_iclk_allow_idle, + .deny_idle = omap2_clkt_iclk_deny_idle, + .find_idlest = omap2_clk_dflt_find_idlest, + .find_companion = omap2_clk_dflt_find_companion, +}; diff --git a/drivers/clk/ti/clock.h b/drivers/clk/ti/clock.h index c75d4b44cbef..a7256a98201d 100644 --- a/drivers/clk/ti/clock.h +++ b/drivers/clk/ti/clock.h @@ -170,6 +170,8 @@ struct clk *ti_clk_register_clk(struct ti_clk *setup); int ti_clk_register_legacy_clks(struct ti_clk_alias *clks); extern const struct clk_hw_omap_ops clkhwops_omap4_dpllmx; +extern const struct clk_hw_omap_ops clkhwops_iclk; +extern const struct clk_hw_omap_ops clkhwops_iclk_wait; u8 omap2_init_dpll_parent(struct clk_hw *hw); diff --git a/include/linux/clk/ti.h b/include/linux/clk/ti.h index ee59e076340f..79e143dfc793 100644 --- a/include/linux/clk/ti.h +++ b/include/linux/clk/ti.h @@ -296,6 +296,14 @@ int omap3_dpll4_set_rate_and_parent(struct clk_hw *hw, unsigned long rate, int omap2_dflt_clk_enable(struct clk_hw *hw); void omap2_dflt_clk_disable(struct clk_hw *hw); int omap2_dflt_clk_is_enabled(struct clk_hw *hw); +void omap2_clkt_iclk_allow_idle(struct clk_hw_omap *clk); +void omap2_clkt_iclk_deny_idle(struct clk_hw_omap *clk); +void omap2_clk_dflt_find_companion(struct clk_hw_omap *clk, + void __iomem **other_reg, + u8 *other_bit); +void omap2_clk_dflt_find_idlest(struct clk_hw_omap *clk, + void __iomem **idlest_reg, + u8 *idlest_bit, u8 *idlest_val); void omap3_clk_lock_dpll5(void); unsigned long omap2_dpllcore_recalc(struct clk_hw *hw, unsigned long parent_rate); @@ -358,8 +366,6 @@ extern const struct clk_hw_omap_ops clkhwops_wait; extern const struct clk_hw_omap_ops clkhwops_omap3430es2_dss_usbhost_wait; extern const struct clk_hw_omap_ops clkhwops_am35xx_ipss_module_wait; extern const struct clk_hw_omap_ops clkhwops_am35xx_ipss_wait; -extern const struct clk_hw_omap_ops clkhwops_iclk; -extern const struct clk_hw_omap_ops clkhwops_iclk_wait; extern const struct clk_hw_omap_ops clkhwops_omap3430es2_iclk_ssi_wait; extern const struct clk_hw_omap_ops clkhwops_omap3430es2_iclk_dss_usbhost_wait; extern const struct clk_hw_omap_ops clkhwops_omap3430es2_iclk_hsotgusb_wait; -- cgit v1.2.3-70-g09d2 From bf22bae794d696e411acfcac39b415e160e93834 Mon Sep 17 00:00:00 2001 From: Tero Kristo Date: Mon, 2 Mar 2015 19:06:54 +0200 Subject: clk: ti: autoidle: move generic autoidle handling code to clock driver This is no longer needed in platform directory, as the legacy clock data is gone, so move it under TI clock driver. Some static functions are renamed also. Signed-off-by: Tero Kristo --- arch/arm/mach-omap2/clock.c | 104 ------------------------------------ arch/arm/mach-omap2/clock.h | 3 -- drivers/clk/ti/autoidle.c | 119 +++++++++++++++++++++++++++++++++++++++--- drivers/clk/ti/clock.h | 3 ++ drivers/clk/ti/fixed-factor.c | 2 + include/linux/clk/ti.h | 13 ++--- 6 files changed, 119 insertions(+), 125 deletions(-) (limited to 'include/linux') diff --git a/arch/arm/mach-omap2/clock.c b/arch/arm/mach-omap2/clock.c index cbc65b3a3b62..42ce860e1d4c 100644 --- a/arch/arm/mach-omap2/clock.c +++ b/arch/arm/mach-omap2/clock.c @@ -69,8 +69,6 @@ u16 cpu_mask; */ static bool clkdm_control = true; -static LIST_HEAD(clk_hw_omap_clocks); - struct clk_iomap { struct regmap *regmap; void __iomem *mem; @@ -578,108 +576,6 @@ static int __init omap_clk_setup(char *str) } __setup("mpurate=", omap_clk_setup); -/** - * omap2_init_clk_hw_omap_clocks - initialize an OMAP clock - * @clk: struct clk * to initialize - * - * Add an OMAP clock @clk to the internal list of OMAP clocks. Used - * temporarily for autoidle handling, until this support can be - * integrated into the common clock framework code in some way. No - * return value. - */ -void omap2_init_clk_hw_omap_clocks(struct clk *clk) -{ - struct clk_hw_omap *c; - - if (__clk_get_flags(clk) & CLK_IS_BASIC) - return; - - c = to_clk_hw_omap(__clk_get_hw(clk)); - list_add(&c->node, &clk_hw_omap_clocks); -} - -/** - * omap2_clk_enable_autoidle_all - enable autoidle on all OMAP clocks that - * support it - * - * Enable clock autoidle on all OMAP clocks that have allow_idle - * function pointers associated with them. This function is intended - * to be temporary until support for this is added to the common clock - * code. Returns 0. - */ -int omap2_clk_enable_autoidle_all(void) -{ - struct clk_hw_omap *c; - - list_for_each_entry(c, &clk_hw_omap_clocks, node) - if (c->ops && c->ops->allow_idle) - c->ops->allow_idle(c); - - of_ti_clk_allow_autoidle_all(); - - return 0; -} - -/** - * omap2_clk_disable_autoidle_all - disable autoidle on all OMAP clocks that - * support it - * - * Disable clock autoidle on all OMAP clocks that have allow_idle - * function pointers associated with them. This function is intended - * to be temporary until support for this is added to the common clock - * code. Returns 0. - */ -int omap2_clk_disable_autoidle_all(void) -{ - struct clk_hw_omap *c; - - list_for_each_entry(c, &clk_hw_omap_clocks, node) - if (c->ops && c->ops->deny_idle) - c->ops->deny_idle(c); - - of_ti_clk_deny_autoidle_all(); - - return 0; -} - -/** - * omap2_clk_deny_idle - disable autoidle on an OMAP clock - * @clk: struct clk * to disable autoidle for - * - * Disable autoidle on an OMAP clock. - */ -int omap2_clk_deny_idle(struct clk *clk) -{ - struct clk_hw_omap *c; - - if (__clk_get_flags(clk) & CLK_IS_BASIC) - return -EINVAL; - - c = to_clk_hw_omap(__clk_get_hw(clk)); - if (c->ops && c->ops->deny_idle) - c->ops->deny_idle(c); - return 0; -} - -/** - * omap2_clk_allow_idle - enable autoidle on an OMAP clock - * @clk: struct clk * to enable autoidle for - * - * Enable autoidle on an OMAP clock. - */ -int omap2_clk_allow_idle(struct clk *clk) -{ - struct clk_hw_omap *c; - - if (__clk_get_flags(clk) & CLK_IS_BASIC) - return -EINVAL; - - c = to_clk_hw_omap(__clk_get_hw(clk)); - if (c->ops && c->ops->allow_idle) - c->ops->allow_idle(c); - return 0; -} - /** * omap2_clk_enable_init_clocks - prepare & enable a list of clocks * @clk_names: ptr to an array of strings of clock names to enable diff --git a/arch/arm/mach-omap2/clock.h b/arch/arm/mach-omap2/clock.h index b71d43051c26..950a17ae4f36 100644 --- a/arch/arm/mach-omap2/clock.h +++ b/arch/arm/mach-omap2/clock.h @@ -186,9 +186,6 @@ void omap3_dpll_deny_idle(struct clk_hw_omap *clk); void __init omap2_clk_disable_clkdm_control(void); -int omap2_clk_enable_autoidle_all(void); -int omap2_clk_allow_idle(struct clk *clk); -int omap2_clk_deny_idle(struct clk *clk); int omap2_clk_switch_mpurate_at_boot(const char *mpurate_ck_name); void omap2_clk_print_new_rates(const char *hfclkin_ck_name, const char *core_ck_name, diff --git a/drivers/clk/ti/autoidle.c b/drivers/clk/ti/autoidle.c index e75c64c9e81c..3dbcc3681058 100644 --- a/drivers/clk/ti/autoidle.c +++ b/drivers/clk/ti/autoidle.c @@ -33,8 +33,47 @@ struct clk_ti_autoidle { #define AUTOIDLE_LOW 0x1 static LIST_HEAD(autoidle_clks); +static LIST_HEAD(clk_hw_omap_clocks); -static void ti_allow_autoidle(struct clk_ti_autoidle *clk) +/** + * omap2_clk_deny_idle - disable autoidle on an OMAP clock + * @clk: struct clk * to disable autoidle for + * + * Disable autoidle on an OMAP clock. + */ +int omap2_clk_deny_idle(struct clk *clk) +{ + struct clk_hw_omap *c; + + if (__clk_get_flags(clk) & CLK_IS_BASIC) + return -EINVAL; + + c = to_clk_hw_omap(__clk_get_hw(clk)); + if (c->ops && c->ops->deny_idle) + c->ops->deny_idle(c); + return 0; +} + +/** + * omap2_clk_allow_idle - enable autoidle on an OMAP clock + * @clk: struct clk * to enable autoidle for + * + * Enable autoidle on an OMAP clock. + */ +int omap2_clk_allow_idle(struct clk *clk) +{ + struct clk_hw_omap *c; + + if (__clk_get_flags(clk) & CLK_IS_BASIC) + return -EINVAL; + + c = to_clk_hw_omap(__clk_get_hw(clk)); + if (c->ops && c->ops->allow_idle) + c->ops->allow_idle(c); + return 0; +} + +static void _allow_autoidle(struct clk_ti_autoidle *clk) { u32 val; @@ -48,7 +87,7 @@ static void ti_allow_autoidle(struct clk_ti_autoidle *clk) ti_clk_ll_ops->clk_writel(val, clk->reg); } -static void ti_deny_autoidle(struct clk_ti_autoidle *clk) +static void _deny_autoidle(struct clk_ti_autoidle *clk) { u32 val; @@ -63,31 +102,31 @@ static void ti_deny_autoidle(struct clk_ti_autoidle *clk) } /** - * of_ti_clk_allow_autoidle_all - enable autoidle for all clocks + * _clk_generic_allow_autoidle_all - enable autoidle for all clocks * * Enables hardware autoidle for all registered DT clocks, which have * the feature. */ -void of_ti_clk_allow_autoidle_all(void) +static void _clk_generic_allow_autoidle_all(void) { struct clk_ti_autoidle *c; list_for_each_entry(c, &autoidle_clks, node) - ti_allow_autoidle(c); + _allow_autoidle(c); } /** - * of_ti_clk_deny_autoidle_all - disable autoidle for all clocks + * _clk_generic_deny_autoidle_all - disable autoidle for all clocks * * Disables hardware autoidle for all registered DT clocks, which have * the feature. */ -void of_ti_clk_deny_autoidle_all(void) +static void _clk_generic_deny_autoidle_all(void) { struct clk_ti_autoidle *c; list_for_each_entry(c, &autoidle_clks, node) - ti_deny_autoidle(c); + _deny_autoidle(c); } /** @@ -131,3 +170,67 @@ int __init of_ti_clk_autoidle_setup(struct device_node *node) return 0; } + +/** + * omap2_init_clk_hw_omap_clocks - initialize an OMAP clock + * @clk: struct clk * to initialize + * + * Add an OMAP clock @clk to the internal list of OMAP clocks. Used + * temporarily for autoidle handling, until this support can be + * integrated into the common clock framework code in some way. No + * return value. + */ +void omap2_init_clk_hw_omap_clocks(struct clk *clk) +{ + struct clk_hw_omap *c; + + if (__clk_get_flags(clk) & CLK_IS_BASIC) + return; + + c = to_clk_hw_omap(__clk_get_hw(clk)); + list_add(&c->node, &clk_hw_omap_clocks); +} + +/** + * omap2_clk_enable_autoidle_all - enable autoidle on all OMAP clocks that + * support it + * + * Enable clock autoidle on all OMAP clocks that have allow_idle + * function pointers associated with them. This function is intended + * to be temporary until support for this is added to the common clock + * code. Returns 0. + */ +int omap2_clk_enable_autoidle_all(void) +{ + struct clk_hw_omap *c; + + list_for_each_entry(c, &clk_hw_omap_clocks, node) + if (c->ops && c->ops->allow_idle) + c->ops->allow_idle(c); + + _clk_generic_allow_autoidle_all(); + + return 0; +} + +/** + * omap2_clk_disable_autoidle_all - disable autoidle on all OMAP clocks that + * support it + * + * Disable clock autoidle on all OMAP clocks that have allow_idle + * function pointers associated with them. This function is intended + * to be temporary until support for this is added to the common clock + * code. Returns 0. + */ +int omap2_clk_disable_autoidle_all(void) +{ + struct clk_hw_omap *c; + + list_for_each_entry(c, &clk_hw_omap_clocks, node) + if (c->ops && c->ops->deny_idle) + c->ops->deny_idle(c); + + _clk_generic_deny_autoidle_all(); + + return 0; +} diff --git a/drivers/clk/ti/clock.h b/drivers/clk/ti/clock.h index a7256a98201d..9b51021f509a 100644 --- a/drivers/clk/ti/clock.h +++ b/drivers/clk/ti/clock.h @@ -169,6 +169,9 @@ void ti_clk_patch_legacy_clks(struct ti_clk **patch); struct clk *ti_clk_register_clk(struct ti_clk *setup); int ti_clk_register_legacy_clks(struct ti_clk_alias *clks); +void omap2_init_clk_hw_omap_clocks(struct clk *clk); +int of_ti_clk_autoidle_setup(struct device_node *node); + extern const struct clk_hw_omap_ops clkhwops_omap4_dpllmx; extern const struct clk_hw_omap_ops clkhwops_iclk; extern const struct clk_hw_omap_ops clkhwops_iclk_wait; diff --git a/drivers/clk/ti/fixed-factor.c b/drivers/clk/ti/fixed-factor.c index c2c8a287408c..3cd406768909 100644 --- a/drivers/clk/ti/fixed-factor.c +++ b/drivers/clk/ti/fixed-factor.c @@ -22,6 +22,8 @@ #include #include +#include "clock.h" + #undef pr_fmt #define pr_fmt(fmt) "%s: " fmt, __func__ diff --git a/include/linux/clk/ti.h b/include/linux/clk/ti.h index 79e143dfc793..320e107f9a7a 100644 --- a/include/linux/clk/ti.h +++ b/include/linux/clk/ti.h @@ -259,7 +259,6 @@ extern const struct clk_ops ti_clk_mux_ops; #define to_clk_hw_omap(_hw) container_of(_hw, struct clk_hw_omap, hw) -void omap2_init_clk_hw_omap_clocks(struct clk *clk); int omap3_noncore_dpll_enable(struct clk_hw *hw); void omap3_noncore_dpll_disable(struct clk_hw *hw); int omap3_noncore_dpll_set_parent(struct clk_hw *hw, u8 index); @@ -288,6 +287,9 @@ long omap3_clkoutx2_round_rate(struct clk_hw *hw, unsigned long rate, int omap2_clkops_enable_clkdm(struct clk_hw *hw); void omap2_clkops_disable_clkdm(struct clk_hw *hw); int omap2_clk_disable_autoidle_all(void); +int omap2_clk_enable_autoidle_all(void); +int omap2_clk_allow_idle(struct clk *clk); +int omap2_clk_deny_idle(struct clk *clk); void omap2_clk_enable_init_clocks(const char **clk_names, u8 num_clocks); int omap3_dpll4_set_rate(struct clk_hw *clk, unsigned long rate, unsigned long parent_rate); @@ -320,7 +322,6 @@ void ti_dt_clk_init_retry_clks(void); void ti_dt_clockdomains_setup(void); int ti_clk_retry_init(struct device_node *node, struct clk_hw *hw, ti_of_clk_init_cb_t func); -int of_ti_clk_autoidle_setup(struct device_node *node); int ti_clk_add_component(struct device_node *node, struct clk_hw *hw, int type); int omap3430_dt_clk_init(void); @@ -351,14 +352,6 @@ struct ti_clk_features { void ti_clk_setup_features(struct ti_clk_features *features); const struct ti_clk_features *ti_clk_get_features(void); -#ifdef CONFIG_OF -void of_ti_clk_allow_autoidle_all(void); -void of_ti_clk_deny_autoidle_all(void); -#else -static inline void of_ti_clk_allow_autoidle_all(void) { } -static inline void of_ti_clk_deny_autoidle_all(void) { } -#endif - extern const struct clk_hw_omap_ops clkhwops_omap2xxx_dpll; extern const struct clk_hw_omap_ops clkhwops_omap2430_i2chs_wait; extern const struct clk_hw_omap_ops clkhwops_omap3_dpll; -- cgit v1.2.3-70-g09d2 From a5aa8a603efa25dd41220bff990da025c93b632b Mon Sep 17 00:00:00 2001 From: Tero Kristo Date: Tue, 3 Mar 2015 10:51:01 +0200 Subject: clk: ti: move omap2_clk_enable_init_clocks under clock driver This is no longer used outside clock driver, so move it under the driver and remove the export for it from the global header file. Signed-off-by: Tero Kristo --- arch/arm/mach-omap2/clock.c | 24 ------------------------ drivers/clk/ti/clk-2xxx.c | 2 ++ drivers/clk/ti/clk-33xx.c | 2 ++ drivers/clk/ti/clk-3xxx.c | 1 + drivers/clk/ti/clk-816x.c | 2 ++ drivers/clk/ti/clk.c | 24 ++++++++++++++++++++++++ drivers/clk/ti/clock.h | 1 + include/linux/clk/ti.h | 1 - 8 files changed, 32 insertions(+), 25 deletions(-) (limited to 'include/linux') diff --git a/arch/arm/mach-omap2/clock.c b/arch/arm/mach-omap2/clock.c index 42ce860e1d4c..234cedf8967d 100644 --- a/arch/arm/mach-omap2/clock.c +++ b/arch/arm/mach-omap2/clock.c @@ -576,30 +576,6 @@ static int __init omap_clk_setup(char *str) } __setup("mpurate=", omap_clk_setup); -/** - * omap2_clk_enable_init_clocks - prepare & enable a list of clocks - * @clk_names: ptr to an array of strings of clock names to enable - * @num_clocks: number of clock names in @clk_names - * - * Prepare and enable a list of clocks, named by @clk_names. No - * return value. XXX Deprecated; only needed until these clocks are - * properly claimed and enabled by the drivers or core code that uses - * them. XXX What code disables & calls clk_put on these clocks? - */ -void omap2_clk_enable_init_clocks(const char **clk_names, u8 num_clocks) -{ - struct clk *init_clk; - int i; - - for (i = 0; i < num_clocks; i++) { - init_clk = clk_get(NULL, clk_names[i]); - if (WARN(IS_ERR(init_clk), "could not find init clock %s\n", - clk_names[i])) - continue; - clk_prepare_enable(init_clk); - } -} - const struct clk_hw_omap_ops clkhwops_wait = { .find_idlest = omap2_clk_dflt_find_idlest, .find_companion = omap2_clk_dflt_find_companion, diff --git a/drivers/clk/ti/clk-2xxx.c b/drivers/clk/ti/clk-2xxx.c index c808ab3d2bb2..bd8790be2ab1 100644 --- a/drivers/clk/ti/clk-2xxx.c +++ b/drivers/clk/ti/clk-2xxx.c @@ -19,6 +19,8 @@ #include #include +#include "clock.h" + static struct ti_dt_clk omap2xxx_clks[] = { DT_CLK(NULL, "func_32k_ck", "func_32k_ck"), DT_CLK(NULL, "secure_32k_ck", "secure_32k_ck"), diff --git a/drivers/clk/ti/clk-33xx.c b/drivers/clk/ti/clk-33xx.c index 028b33783d38..733f9d374d0f 100644 --- a/drivers/clk/ti/clk-33xx.c +++ b/drivers/clk/ti/clk-33xx.c @@ -19,6 +19,8 @@ #include #include +#include "clock.h" + static struct ti_dt_clk am33xx_clks[] = { DT_CLK(NULL, "clk_32768_ck", "clk_32768_ck"), DT_CLK(NULL, "clk_rc32k_ck", "clk_rc32k_ck"), diff --git a/drivers/clk/ti/clk-3xxx.c b/drivers/clk/ti/clk-3xxx.c index 757636d166cf..bb3b88359daf 100644 --- a/drivers/clk/ti/clk-3xxx.c +++ b/drivers/clk/ti/clk-3xxx.c @@ -19,6 +19,7 @@ #include #include +#include "clock.h" static struct ti_dt_clk omap3xxx_clks[] = { DT_CLK(NULL, "apb_pclk", "dummy_apb_pclk"), diff --git a/drivers/clk/ti/clk-816x.c b/drivers/clk/ti/clk-816x.c index 9451e651a1ff..c69352b24dba 100644 --- a/drivers/clk/ti/clk-816x.c +++ b/drivers/clk/ti/clk-816x.c @@ -14,6 +14,8 @@ #include #include +#include "clock.h" + static struct ti_dt_clk dm816x_clks[] = { DT_CLK(NULL, "sys_clkin", "sys_clkin_ck"), DT_CLK(NULL, "timer_sys_ck", "sys_clkin_ck"), diff --git a/drivers/clk/ti/clk.c b/drivers/clk/ti/clk.c index e65ae4acff9c..5baea03cfc92 100644 --- a/drivers/clk/ti/clk.c +++ b/drivers/clk/ti/clk.c @@ -336,3 +336,27 @@ const struct ti_clk_features *ti_clk_get_features(void) { return &ti_clk_features; } + +/** + * omap2_clk_enable_init_clocks - prepare & enable a list of clocks + * @clk_names: ptr to an array of strings of clock names to enable + * @num_clocks: number of clock names in @clk_names + * + * Prepare and enable a list of clocks, named by @clk_names. No + * return value. XXX Deprecated; only needed until these clocks are + * properly claimed and enabled by the drivers or core code that uses + * them. XXX What code disables & calls clk_put on these clocks? + */ +void omap2_clk_enable_init_clocks(const char **clk_names, u8 num_clocks) +{ + struct clk *init_clk; + int i; + + for (i = 0; i < num_clocks; i++) { + init_clk = clk_get(NULL, clk_names[i]); + if (WARN(IS_ERR(init_clk), "could not find init clock %s\n", + clk_names[i])) + continue; + clk_prepare_enable(init_clk); + } +} diff --git a/drivers/clk/ti/clock.h b/drivers/clk/ti/clock.h index 9b51021f509a..4b26af8a273d 100644 --- a/drivers/clk/ti/clock.h +++ b/drivers/clk/ti/clock.h @@ -171,6 +171,7 @@ int ti_clk_register_legacy_clks(struct ti_clk_alias *clks); void omap2_init_clk_hw_omap_clocks(struct clk *clk); int of_ti_clk_autoidle_setup(struct device_node *node); +void omap2_clk_enable_init_clocks(const char **clk_names, u8 num_clocks); extern const struct clk_hw_omap_ops clkhwops_omap4_dpllmx; extern const struct clk_hw_omap_ops clkhwops_iclk; diff --git a/include/linux/clk/ti.h b/include/linux/clk/ti.h index 320e107f9a7a..61deace552ec 100644 --- a/include/linux/clk/ti.h +++ b/include/linux/clk/ti.h @@ -290,7 +290,6 @@ int omap2_clk_disable_autoidle_all(void); int omap2_clk_enable_autoidle_all(void); int omap2_clk_allow_idle(struct clk *clk); int omap2_clk_deny_idle(struct clk *clk); -void omap2_clk_enable_init_clocks(const char **clk_names, u8 num_clocks); int omap3_dpll4_set_rate(struct clk_hw *clk, unsigned long rate, unsigned long parent_rate); int omap3_dpll4_set_rate_and_parent(struct clk_hw *hw, unsigned long rate, -- cgit v1.2.3-70-g09d2 From 9a356d622e8e559eff50b298e574bbc34e860aba Mon Sep 17 00:00:00 2001 From: Tero Kristo Date: Tue, 3 Mar 2015 11:14:31 +0200 Subject: ARM: OMAP2+: clock: add support for clkdm ops to the low level clk ops Clock driver requires access to certain clockdomain handling ops once the code is being moved over under clock driver. Example of this is clk_enable / clk_disable under omap3 DPLL code. The required clkdm APIs are now exported through the ti_clk_ll_ops struct. Signed-off-by: Tero Kristo --- arch/arm/mach-omap2/clock.c | 2 ++ include/linux/clk/ti.h | 16 +++++++++++----- 2 files changed, 13 insertions(+), 5 deletions(-) (limited to 'include/linux') diff --git a/arch/arm/mach-omap2/clock.c b/arch/arm/mach-omap2/clock.c index 94a4949be9b0..d6afc1291fe9 100644 --- a/arch/arm/mach-omap2/clock.c +++ b/arch/arm/mach-omap2/clock.c @@ -120,6 +120,8 @@ u32 omap2_clk_readl(struct clk_hw_omap *clk, void __iomem *reg) static struct ti_clk_ll_ops omap_clk_ll_ops = { .clk_readl = clk_memmap_readl, .clk_writel = clk_memmap_writel, + .clkdm_clk_enable = clkdm_clk_enable, + .clkdm_clk_disable = clkdm_clk_disable, }; /** diff --git a/include/linux/clk/ti.h b/include/linux/clk/ti.h index 61deace552ec..fcf91844e94b 100644 --- a/include/linux/clk/ti.h +++ b/include/linux/clk/ti.h @@ -238,18 +238,24 @@ struct clk_omap_reg { }; /** - * struct ti_clk_ll_ops - low-level register access ops for a clock + * struct ti_clk_ll_ops - low-level ops for clocks * @clk_readl: pointer to register read function * @clk_writel: pointer to register write function + * @clkdm_clk_enable: pointer to clockdomain enable function + * @clkdm_clk_disable: pointer to clockdomain disable function * - * Low-level register access ops are generally used by the basic clock types - * (clk-gate, clk-mux, clk-divider etc.) to provide support for various - * low-level hardware interfaces (direct MMIO, regmap etc.), but can also be - * used by other hardware-specific clock drivers if needed. + * Low-level ops are generally used by the basic clock types (clk-gate, + * clk-mux, clk-divider etc.) to provide support for various low-level + * hadrware interfaces (direct MMIO, regmap etc.), and is initialized + * by board code. Low-level ops also contain some other platform specific + * operations not provided directly by clock drivers. */ struct ti_clk_ll_ops { u32 (*clk_readl)(void __iomem *reg); void (*clk_writel)(u32 val, void __iomem *reg); + int (*clkdm_clk_enable)(struct clockdomain *clkdm, struct clk *clk); + int (*clkdm_clk_disable)(struct clockdomain *clkdm, + struct clk *clk); }; extern struct ti_clk_ll_ops *ti_clk_ll_ops; -- cgit v1.2.3-70-g09d2 From 192383d87b876ea9879d8b598af593809a25b7d2 Mon Sep 17 00:00:00 2001 From: Tero Kristo Date: Tue, 3 Mar 2015 13:47:08 +0200 Subject: ARM: OMAP2+: clock: add support for specific CM ops to ti_clk_ll_ops Clock driver requires access to some CM API functions once the code is being moved under the clock driver from the platform directory. Gate type clock requires access to cm_wait_module_ready and cm_split_idlest_reg functions, which are both used for waiting until the module being clocked has been successfully activated. These CM APIs are now exported through the ti_clk_ll_ops struct. Signed-off-by: Tero Kristo --- arch/arm/mach-omap2/clock.c | 2 ++ include/linux/clk/ti.h | 6 ++++++ 2 files changed, 8 insertions(+) (limited to 'include/linux') diff --git a/arch/arm/mach-omap2/clock.c b/arch/arm/mach-omap2/clock.c index d6afc1291fe9..7a5713df54b3 100644 --- a/arch/arm/mach-omap2/clock.c +++ b/arch/arm/mach-omap2/clock.c @@ -122,6 +122,8 @@ static struct ti_clk_ll_ops omap_clk_ll_ops = { .clk_writel = clk_memmap_writel, .clkdm_clk_enable = clkdm_clk_enable, .clkdm_clk_disable = clkdm_clk_disable, + .cm_wait_module_ready = omap_cm_wait_module_ready, + .cm_split_idlest_reg = cm_split_idlest_reg, }; /** diff --git a/include/linux/clk/ti.h b/include/linux/clk/ti.h index fcf91844e94b..25eea896627a 100644 --- a/include/linux/clk/ti.h +++ b/include/linux/clk/ti.h @@ -243,6 +243,8 @@ struct clk_omap_reg { * @clk_writel: pointer to register write function * @clkdm_clk_enable: pointer to clockdomain enable function * @clkdm_clk_disable: pointer to clockdomain disable function + * @cm_wait_module_ready: pointer to CM module wait ready function + * @cm_split_idlest_reg: pointer to CM module function to split idlest reg * * Low-level ops are generally used by the basic clock types (clk-gate, * clk-mux, clk-divider etc.) to provide support for various low-level @@ -256,6 +258,10 @@ struct ti_clk_ll_ops { int (*clkdm_clk_enable)(struct clockdomain *clkdm, struct clk *clk); int (*clkdm_clk_disable)(struct clockdomain *clkdm, struct clk *clk); + int (*cm_wait_module_ready)(u8 part, s16 prcm_mod, u16 idlest_reg, + u8 idlest_shift); + int (*cm_split_idlest_reg)(void __iomem *idlest_reg, s16 *prcm_inst, + u8 *idlest_reg_id); }; extern struct ti_clk_ll_ops *ti_clk_ll_ops; -- cgit v1.2.3-70-g09d2 From 0565fb168d63f89591ce7dcb85438cb19d939a92 Mon Sep 17 00:00:00 2001 From: Tero Kristo Date: Tue, 3 Mar 2015 13:27:48 +0200 Subject: clk: ti: dpll: move omap3 DPLL functionality to clock driver With the legacy clock support gone, OMAP3 generic DPLL code can now be moved over to the clock driver also. A few un-unused clkoutx2 functions are also removed at the same time. Signed-off-by: Tero Kristo --- arch/arm/mach-omap2/Makefile | 8 +- arch/arm/mach-omap2/clock.h | 4 - arch/arm/mach-omap2/clock3xxx.c | 77 ---- arch/arm/mach-omap2/dpll3xxx.c | 818 --------------------------------------- drivers/clk/ti/Makefile | 14 +- drivers/clk/ti/clk-3xxx.c | 31 ++ drivers/clk/ti/clock.h | 27 ++ drivers/clk/ti/dpll3xxx.c | 825 ++++++++++++++++++++++++++++++++++++++++ include/linux/clk/ti.h | 30 -- 9 files changed, 893 insertions(+), 941 deletions(-) delete mode 100644 arch/arm/mach-omap2/dpll3xxx.c create mode 100644 drivers/clk/ti/dpll3xxx.c (limited to 'include/linux') diff --git a/arch/arm/mach-omap2/Makefile b/arch/arm/mach-omap2/Makefile index bf5d71d9fd2b..f9d4ccf39cea 100644 --- a/arch/arm/mach-omap2/Makefile +++ b/arch/arm/mach-omap2/Makefile @@ -189,15 +189,11 @@ obj-$(CONFIG_SOC_OMAP2430) += clock2430.o obj-$(CONFIG_ARCH_OMAP3) += $(clock-common) clock3xxx.o obj-$(CONFIG_ARCH_OMAP3) += clock34xx.o clkt34xx_dpll3m2.o obj-$(CONFIG_ARCH_OMAP3) += clock3517.o -obj-$(CONFIG_ARCH_OMAP3) += dpll3xxx.o obj-$(CONFIG_ARCH_OMAP4) += $(clock-common) -obj-$(CONFIG_ARCH_OMAP4) += dpll3xxx.o -obj-$(CONFIG_SOC_AM33XX) += $(clock-common) dpll3xxx.o +obj-$(CONFIG_SOC_AM33XX) += $(clock-common) obj-$(CONFIG_SOC_OMAP5) += $(clock-common) -obj-$(CONFIG_SOC_OMAP5) += dpll3xxx.o obj-$(CONFIG_SOC_DRA7XX) += $(clock-common) -obj-$(CONFIG_SOC_DRA7XX) += dpll3xxx.o -obj-$(CONFIG_SOC_AM43XX) += $(clock-common) dpll3xxx.o +obj-$(CONFIG_SOC_AM43XX) += $(clock-common) # OMAP2 clock rate set data (old "OPP" data) obj-$(CONFIG_SOC_OMAP2420) += opp2420_data.o diff --git a/arch/arm/mach-omap2/clock.h b/arch/arm/mach-omap2/clock.h index e2781b4aaeb4..d60691d5626a 100644 --- a/arch/arm/mach-omap2/clock.h +++ b/arch/arm/mach-omap2/clock.h @@ -180,10 +180,6 @@ struct clksel { #define OMAP4XXX_EN_DPLL_FRBYPASS 0x6 #define OMAP4XXX_EN_DPLL_LOCKED 0x7 -u32 omap3_dpll_autoidle_read(struct clk_hw_omap *clk); -void omap3_dpll_allow_idle(struct clk_hw_omap *clk); -void omap3_dpll_deny_idle(struct clk_hw_omap *clk); - void __init omap2_clk_disable_clkdm_control(void); void omap2_clk_print_new_rates(const char *hfclkin_ck_name, diff --git a/arch/arm/mach-omap2/clock3xxx.c b/arch/arm/mach-omap2/clock3xxx.c index 4bd61222aa33..0b0e3a8777d3 100644 --- a/arch/arm/mach-omap2/clock3xxx.c +++ b/arch/arm/mach-omap2/clock3xxx.c @@ -29,82 +29,5 @@ #include "cm2xxx_3xxx.h" #include "cm-regbits-34xx.h" -/* - * DPLL5_FREQ_FOR_USBHOST: USBHOST and USBTLL are the only clocks - * that are sourced by DPLL5, and both of these require this clock - * to be at 120 MHz for proper operation. - */ -#define DPLL5_FREQ_FOR_USBHOST 120000000 - /* needed by omap3_core_dpll_m2_set_rate() */ struct clk *sdrc_ick_p, *arm_fck_p; - -/** - * omap3_dpll4_set_rate - set rate for omap3 per-dpll - * @hw: clock to change - * @rate: target rate for clock - * @parent_rate: rate of the parent clock - * - * Check if the current SoC supports the per-dpll reprogram operation - * or not, and then do the rate change if supported. Returns -EINVAL - * if not supported, 0 for success, and potential error codes from the - * clock rate change. - */ -int omap3_dpll4_set_rate(struct clk_hw *hw, unsigned long rate, - unsigned long parent_rate) -{ - /* - * According to the 12-5 CDP code from TI, "Limitation 2.5" - * on 3430ES1 prevents us from changing DPLL multipliers or dividers - * on DPLL4. - */ - if (ti_clk_get_features()->flags & TI_CLK_DPLL4_DENY_REPROGRAM) { - pr_err("clock: DPLL4 cannot change rate due to silicon 'Limitation 2.5' on 3430ES1.\n"); - return -EINVAL; - } - - return omap3_noncore_dpll_set_rate(hw, rate, parent_rate); -} - -/** - * omap3_dpll4_set_rate_and_parent - set rate and parent for omap3 per-dpll - * @hw: clock to change - * @rate: target rate for clock - * @parent_rate: rate of the parent clock - * @index: parent index, 0 - reference clock, 1 - bypass clock - * - * Check if the current SoC support the per-dpll reprogram operation - * or not, and then do the rate + parent change if supported. Returns - * -EINVAL if not supported, 0 for success, and potential error codes - * from the clock rate change. - */ -int omap3_dpll4_set_rate_and_parent(struct clk_hw *hw, unsigned long rate, - unsigned long parent_rate, u8 index) -{ - if (ti_clk_get_features()->flags & TI_CLK_DPLL4_DENY_REPROGRAM) { - pr_err("clock: DPLL4 cannot change rate due to silicon 'Limitation 2.5' on 3430ES1.\n"); - return -EINVAL; - } - - return omap3_noncore_dpll_set_rate_and_parent(hw, rate, parent_rate, - index); -} - -void __init omap3_clk_lock_dpll5(void) -{ - struct clk *dpll5_clk; - struct clk *dpll5_m2_clk; - - dpll5_clk = clk_get(NULL, "dpll5_ck"); - clk_set_rate(dpll5_clk, DPLL5_FREQ_FOR_USBHOST); - clk_prepare_enable(dpll5_clk); - - /* Program dpll5_m2_clk divider for no division */ - dpll5_m2_clk = clk_get(NULL, "dpll5_m2_ck"); - clk_prepare_enable(dpll5_m2_clk); - clk_set_rate(dpll5_m2_clk, DPLL5_FREQ_FOR_USBHOST); - - clk_disable_unprepare(dpll5_m2_clk); - clk_disable_unprepare(dpll5_clk); - return; -} diff --git a/arch/arm/mach-omap2/dpll3xxx.c b/arch/arm/mach-omap2/dpll3xxx.c deleted file mode 100644 index 9a80f593ed15..000000000000 --- a/arch/arm/mach-omap2/dpll3xxx.c +++ /dev/null @@ -1,818 +0,0 @@ -/* - * OMAP3/4 - specific DPLL control functions - * - * Copyright (C) 2009-2010 Texas Instruments, Inc. - * Copyright (C) 2009-2010 Nokia Corporation - * - * Written by Paul Walmsley - * Testing and integration fixes by Jouni Högander - * - * 36xx support added by Vishwanath BS, Richard Woodruff, and Nishanth - * Menon - * - * Parts of this code are based on code written by - * Richard Woodruff, Tony Lindgren, Tuukka Tikkanen, Karthik Dasu - * - * This program is free software; you can redistribute it and/or modify - * it under the terms of the GNU General Public License version 2 as - * published by the Free Software Foundation. - */ - -#include -#include -#include -#include -#include -#include -#include -#include -#include - -#include "clockdomain.h" -#include "clock.h" - -/* CM_AUTOIDLE_PLL*.AUTO_* bit values */ -#define DPLL_AUTOIDLE_DISABLE 0x0 -#define DPLL_AUTOIDLE_LOW_POWER_STOP 0x1 - -#define MAX_DPLL_WAIT_TRIES 1000000 - -/* Private functions */ - -/* _omap3_dpll_write_clken - write clken_bits arg to a DPLL's enable bits */ -static void _omap3_dpll_write_clken(struct clk_hw_omap *clk, u8 clken_bits) -{ - const struct dpll_data *dd; - u32 v; - - dd = clk->dpll_data; - - v = omap2_clk_readl(clk, dd->control_reg); - v &= ~dd->enable_mask; - v |= clken_bits << __ffs(dd->enable_mask); - omap2_clk_writel(v, clk, dd->control_reg); -} - -/* _omap3_wait_dpll_status: wait for a DPLL to enter a specific state */ -static int _omap3_wait_dpll_status(struct clk_hw_omap *clk, u8 state) -{ - const struct dpll_data *dd; - int i = 0; - int ret = -EINVAL; - const char *clk_name; - - dd = clk->dpll_data; - clk_name = __clk_get_name(clk->hw.clk); - - state <<= __ffs(dd->idlest_mask); - - while (((omap2_clk_readl(clk, dd->idlest_reg) & dd->idlest_mask) - != state) && i < MAX_DPLL_WAIT_TRIES) { - i++; - udelay(1); - } - - if (i == MAX_DPLL_WAIT_TRIES) { - printk(KERN_ERR "clock: %s failed transition to '%s'\n", - clk_name, (state) ? "locked" : "bypassed"); - } else { - pr_debug("clock: %s transition to '%s' in %d loops\n", - clk_name, (state) ? "locked" : "bypassed", i); - - ret = 0; - } - - return ret; -} - -/* From 3430 TRM ES2 4.7.6.2 */ -static u16 _omap3_dpll_compute_freqsel(struct clk_hw_omap *clk, u8 n) -{ - unsigned long fint; - u16 f = 0; - - fint = __clk_get_rate(clk->dpll_data->clk_ref) / n; - - pr_debug("clock: fint is %lu\n", fint); - - if (fint >= 750000 && fint <= 1000000) - f = 0x3; - else if (fint > 1000000 && fint <= 1250000) - f = 0x4; - else if (fint > 1250000 && fint <= 1500000) - f = 0x5; - else if (fint > 1500000 && fint <= 1750000) - f = 0x6; - else if (fint > 1750000 && fint <= 2100000) - f = 0x7; - else if (fint > 7500000 && fint <= 10000000) - f = 0xB; - else if (fint > 10000000 && fint <= 12500000) - f = 0xC; - else if (fint > 12500000 && fint <= 15000000) - f = 0xD; - else if (fint > 15000000 && fint <= 17500000) - f = 0xE; - else if (fint > 17500000 && fint <= 21000000) - f = 0xF; - else - pr_debug("clock: unknown freqsel setting for %d\n", n); - - return f; -} - -/* - * _omap3_noncore_dpll_lock - instruct a DPLL to lock and wait for readiness - * @clk: pointer to a DPLL struct clk - * - * Instructs a non-CORE DPLL to lock. Waits for the DPLL to report - * readiness before returning. Will save and restore the DPLL's - * autoidle state across the enable, per the CDP code. If the DPLL - * locked successfully, return 0; if the DPLL did not lock in the time - * allotted, or DPLL3 was passed in, return -EINVAL. - */ -static int _omap3_noncore_dpll_lock(struct clk_hw_omap *clk) -{ - const struct dpll_data *dd; - u8 ai; - u8 state = 1; - int r = 0; - - pr_debug("clock: locking DPLL %s\n", __clk_get_name(clk->hw.clk)); - - dd = clk->dpll_data; - state <<= __ffs(dd->idlest_mask); - - /* Check if already locked */ - if ((omap2_clk_readl(clk, dd->idlest_reg) & dd->idlest_mask) == state) - goto done; - - ai = omap3_dpll_autoidle_read(clk); - - if (ai) - omap3_dpll_deny_idle(clk); - - _omap3_dpll_write_clken(clk, DPLL_LOCKED); - - r = _omap3_wait_dpll_status(clk, 1); - - if (ai) - omap3_dpll_allow_idle(clk); - -done: - return r; -} - -/* - * _omap3_noncore_dpll_bypass - instruct a DPLL to bypass and wait for readiness - * @clk: pointer to a DPLL struct clk - * - * Instructs a non-CORE DPLL to enter low-power bypass mode. In - * bypass mode, the DPLL's rate is set equal to its parent clock's - * rate. Waits for the DPLL to report readiness before returning. - * Will save and restore the DPLL's autoidle state across the enable, - * per the CDP code. If the DPLL entered bypass mode successfully, - * return 0; if the DPLL did not enter bypass in the time allotted, or - * DPLL3 was passed in, or the DPLL does not support low-power bypass, - * return -EINVAL. - */ -static int _omap3_noncore_dpll_bypass(struct clk_hw_omap *clk) -{ - int r; - u8 ai; - - if (!(clk->dpll_data->modes & (1 << DPLL_LOW_POWER_BYPASS))) - return -EINVAL; - - pr_debug("clock: configuring DPLL %s for low-power bypass\n", - __clk_get_name(clk->hw.clk)); - - ai = omap3_dpll_autoidle_read(clk); - - _omap3_dpll_write_clken(clk, DPLL_LOW_POWER_BYPASS); - - r = _omap3_wait_dpll_status(clk, 0); - - if (ai) - omap3_dpll_allow_idle(clk); - - return r; -} - -/* - * _omap3_noncore_dpll_stop - instruct a DPLL to stop - * @clk: pointer to a DPLL struct clk - * - * Instructs a non-CORE DPLL to enter low-power stop. Will save and - * restore the DPLL's autoidle state across the stop, per the CDP - * code. If DPLL3 was passed in, or the DPLL does not support - * low-power stop, return -EINVAL; otherwise, return 0. - */ -static int _omap3_noncore_dpll_stop(struct clk_hw_omap *clk) -{ - u8 ai; - - if (!(clk->dpll_data->modes & (1 << DPLL_LOW_POWER_STOP))) - return -EINVAL; - - pr_debug("clock: stopping DPLL %s\n", __clk_get_name(clk->hw.clk)); - - ai = omap3_dpll_autoidle_read(clk); - - _omap3_dpll_write_clken(clk, DPLL_LOW_POWER_STOP); - - if (ai) - omap3_dpll_allow_idle(clk); - - return 0; -} - -/** - * _lookup_dco - Lookup DCO used by j-type DPLL - * @clk: pointer to a DPLL struct clk - * @dco: digital control oscillator selector - * @m: DPLL multiplier to set - * @n: DPLL divider to set - * - * See 36xx TRM section 3.5.3.3.3.2 "Type B DPLL (Low-Jitter)" - * - * XXX This code is not needed for 3430/AM35xx; can it be optimized - * out in non-multi-OMAP builds for those chips? - */ -static void _lookup_dco(struct clk_hw_omap *clk, u8 *dco, u16 m, u8 n) -{ - unsigned long fint, clkinp; /* watch out for overflow */ - - clkinp = __clk_get_rate(__clk_get_parent(clk->hw.clk)); - fint = (clkinp / n) * m; - - if (fint < 1000000000) - *dco = 2; - else - *dco = 4; -} - -/** - * _lookup_sddiv - Calculate sigma delta divider for j-type DPLL - * @clk: pointer to a DPLL struct clk - * @sd_div: target sigma-delta divider - * @m: DPLL multiplier to set - * @n: DPLL divider to set - * - * See 36xx TRM section 3.5.3.3.3.2 "Type B DPLL (Low-Jitter)" - * - * XXX This code is not needed for 3430/AM35xx; can it be optimized - * out in non-multi-OMAP builds for those chips? - */ -static void _lookup_sddiv(struct clk_hw_omap *clk, u8 *sd_div, u16 m, u8 n) -{ - unsigned long clkinp, sd; /* watch out for overflow */ - int mod1, mod2; - - clkinp = __clk_get_rate(__clk_get_parent(clk->hw.clk)); - - /* - * target sigma-delta to near 250MHz - * sd = ceil[(m/(n+1)) * (clkinp_MHz / 250)] - */ - clkinp /= 100000; /* shift from MHz to 10*Hz for 38.4 and 19.2 */ - mod1 = (clkinp * m) % (250 * n); - sd = (clkinp * m) / (250 * n); - mod2 = sd % 10; - sd /= 10; - - if (mod1 || mod2) - sd++; - *sd_div = sd; -} - -/* - * _omap3_noncore_dpll_program - set non-core DPLL M,N values directly - * @clk: struct clk * of DPLL to set - * @freqsel: FREQSEL value to set - * - * Program the DPLL with the last M, N values calculated, and wait for - * the DPLL to lock. Returns -EINVAL upon error, or 0 upon success. - */ -static int omap3_noncore_dpll_program(struct clk_hw_omap *clk, u16 freqsel) -{ - struct dpll_data *dd = clk->dpll_data; - u8 dco, sd_div; - u32 v; - - /* 3430 ES2 TRM: 4.7.6.9 DPLL Programming Sequence */ - _omap3_noncore_dpll_bypass(clk); - - /* - * Set jitter correction. Jitter correction applicable for OMAP343X - * only since freqsel field is no longer present on other devices. - */ - if (ti_clk_get_features()->flags & TI_CLK_DPLL_HAS_FREQSEL) { - v = omap2_clk_readl(clk, dd->control_reg); - v &= ~dd->freqsel_mask; - v |= freqsel << __ffs(dd->freqsel_mask); - omap2_clk_writel(v, clk, dd->control_reg); - } - - /* Set DPLL multiplier, divider */ - v = omap2_clk_readl(clk, dd->mult_div1_reg); - - /* Handle Duty Cycle Correction */ - if (dd->dcc_mask) { - if (dd->last_rounded_rate >= dd->dcc_rate) - v |= dd->dcc_mask; /* Enable DCC */ - else - v &= ~dd->dcc_mask; /* Disable DCC */ - } - - v &= ~(dd->mult_mask | dd->div1_mask); - v |= dd->last_rounded_m << __ffs(dd->mult_mask); - v |= (dd->last_rounded_n - 1) << __ffs(dd->div1_mask); - - /* Configure dco and sd_div for dplls that have these fields */ - if (dd->dco_mask) { - _lookup_dco(clk, &dco, dd->last_rounded_m, dd->last_rounded_n); - v &= ~(dd->dco_mask); - v |= dco << __ffs(dd->dco_mask); - } - if (dd->sddiv_mask) { - _lookup_sddiv(clk, &sd_div, dd->last_rounded_m, - dd->last_rounded_n); - v &= ~(dd->sddiv_mask); - v |= sd_div << __ffs(dd->sddiv_mask); - } - - omap2_clk_writel(v, clk, dd->mult_div1_reg); - - /* Set 4X multiplier and low-power mode */ - if (dd->m4xen_mask || dd->lpmode_mask) { - v = omap2_clk_readl(clk, dd->control_reg); - - if (dd->m4xen_mask) { - if (dd->last_rounded_m4xen) - v |= dd->m4xen_mask; - else - v &= ~dd->m4xen_mask; - } - - if (dd->lpmode_mask) { - if (dd->last_rounded_lpmode) - v |= dd->lpmode_mask; - else - v &= ~dd->lpmode_mask; - } - - omap2_clk_writel(v, clk, dd->control_reg); - } - - /* We let the clock framework set the other output dividers later */ - - /* REVISIT: Set ramp-up delay? */ - - _omap3_noncore_dpll_lock(clk); - - return 0; -} - -/* Public functions */ - -/** - * omap3_dpll_recalc - recalculate DPLL rate - * @clk: DPLL struct clk - * - * Recalculate and propagate the DPLL rate. - */ -unsigned long omap3_dpll_recalc(struct clk_hw *hw, unsigned long parent_rate) -{ - struct clk_hw_omap *clk = to_clk_hw_omap(hw); - - return omap2_get_dpll_rate(clk); -} - -/* Non-CORE DPLL (e.g., DPLLs that do not control SDRC) clock functions */ - -/** - * omap3_noncore_dpll_enable - instruct a DPLL to enter bypass or lock mode - * @clk: pointer to a DPLL struct clk - * - * Instructs a non-CORE DPLL to enable, e.g., to enter bypass or lock. - * The choice of modes depends on the DPLL's programmed rate: if it is - * the same as the DPLL's parent clock, it will enter bypass; - * otherwise, it will enter lock. This code will wait for the DPLL to - * indicate readiness before returning, unless the DPLL takes too long - * to enter the target state. Intended to be used as the struct clk's - * enable function. If DPLL3 was passed in, or the DPLL does not - * support low-power stop, or if the DPLL took too long to enter - * bypass or lock, return -EINVAL; otherwise, return 0. - */ -int omap3_noncore_dpll_enable(struct clk_hw *hw) -{ - struct clk_hw_omap *clk = to_clk_hw_omap(hw); - int r; - struct dpll_data *dd; - struct clk_hw *parent; - - dd = clk->dpll_data; - if (!dd) - return -EINVAL; - - if (clk->clkdm) { - r = clkdm_clk_enable(clk->clkdm, hw->clk); - if (r) { - WARN(1, - "%s: could not enable %s's clockdomain %s: %d\n", - __func__, __clk_get_name(hw->clk), - clk->clkdm->name, r); - return r; - } - } - - parent = __clk_get_hw(__clk_get_parent(hw->clk)); - - if (__clk_get_rate(hw->clk) == __clk_get_rate(dd->clk_bypass)) { - WARN_ON(parent != __clk_get_hw(dd->clk_bypass)); - r = _omap3_noncore_dpll_bypass(clk); - } else { - WARN_ON(parent != __clk_get_hw(dd->clk_ref)); - r = _omap3_noncore_dpll_lock(clk); - } - - return r; -} - -/** - * omap3_noncore_dpll_disable - instruct a DPLL to enter low-power stop - * @clk: pointer to a DPLL struct clk - * - * Instructs a non-CORE DPLL to enter low-power stop. This function is - * intended for use in struct clkops. No return value. - */ -void omap3_noncore_dpll_disable(struct clk_hw *hw) -{ - struct clk_hw_omap *clk = to_clk_hw_omap(hw); - - _omap3_noncore_dpll_stop(clk); - if (clk->clkdm) - clkdm_clk_disable(clk->clkdm, hw->clk); -} - - -/* Non-CORE DPLL rate set code */ - -/** - * omap3_noncore_dpll_determine_rate - determine rate for a DPLL - * @hw: pointer to the clock to determine rate for - * @rate: target rate for the DPLL - * @best_parent_rate: pointer for returning best parent rate - * @best_parent_clk: pointer for returning best parent clock - * - * Determines which DPLL mode to use for reaching a desired target rate. - * Checks whether the DPLL shall be in bypass or locked mode, and if - * locked, calculates the M,N values for the DPLL via round-rate. - * Returns a positive clock rate with success, negative error value - * in failure. - */ -long omap3_noncore_dpll_determine_rate(struct clk_hw *hw, unsigned long rate, - unsigned long min_rate, - unsigned long max_rate, - unsigned long *best_parent_rate, - struct clk_hw **best_parent_clk) -{ - struct clk_hw_omap *clk = to_clk_hw_omap(hw); - struct dpll_data *dd; - - if (!hw || !rate) - return -EINVAL; - - dd = clk->dpll_data; - if (!dd) - return -EINVAL; - - if (__clk_get_rate(dd->clk_bypass) == rate && - (dd->modes & (1 << DPLL_LOW_POWER_BYPASS))) { - *best_parent_clk = __clk_get_hw(dd->clk_bypass); - } else { - rate = omap2_dpll_round_rate(hw, rate, best_parent_rate); - *best_parent_clk = __clk_get_hw(dd->clk_ref); - } - - *best_parent_rate = rate; - - return rate; -} - -/** - * omap3_noncore_dpll_set_parent - set parent for a DPLL clock - * @hw: pointer to the clock to set parent for - * @index: parent index to select - * - * Sets parent for a DPLL clock. This sets the DPLL into bypass or - * locked mode. Returns 0 with success, negative error value otherwise. - */ -int omap3_noncore_dpll_set_parent(struct clk_hw *hw, u8 index) -{ - struct clk_hw_omap *clk = to_clk_hw_omap(hw); - int ret; - - if (!hw) - return -EINVAL; - - if (index) - ret = _omap3_noncore_dpll_bypass(clk); - else - ret = _omap3_noncore_dpll_lock(clk); - - return ret; -} - -/** - * omap3_noncore_dpll_set_rate - set rate for a DPLL clock - * @hw: pointer to the clock to set parent for - * @rate: target rate for the clock - * @parent_rate: rate of the parent clock - * - * Sets rate for a DPLL clock. First checks if the clock parent is - * reference clock (in bypass mode, the rate of the clock can't be - * changed) and proceeds with the rate change operation. Returns 0 - * with success, negative error value otherwise. - */ -int omap3_noncore_dpll_set_rate(struct clk_hw *hw, unsigned long rate, - unsigned long parent_rate) -{ - struct clk_hw_omap *clk = to_clk_hw_omap(hw); - struct dpll_data *dd; - u16 freqsel = 0; - int ret; - - if (!hw || !rate) - return -EINVAL; - - dd = clk->dpll_data; - if (!dd) - return -EINVAL; - - if (__clk_get_hw(__clk_get_parent(hw->clk)) != - __clk_get_hw(dd->clk_ref)) - return -EINVAL; - - if (dd->last_rounded_rate == 0) - return -EINVAL; - - /* Freqsel is available only on OMAP343X devices */ - if (ti_clk_get_features()->flags & TI_CLK_DPLL_HAS_FREQSEL) { - freqsel = _omap3_dpll_compute_freqsel(clk, dd->last_rounded_n); - WARN_ON(!freqsel); - } - - pr_debug("%s: %s: set rate: locking rate to %lu.\n", __func__, - __clk_get_name(hw->clk), rate); - - ret = omap3_noncore_dpll_program(clk, freqsel); - - return ret; -} - -/** - * omap3_noncore_dpll_set_rate_and_parent - set rate and parent for a DPLL clock - * @hw: pointer to the clock to set rate and parent for - * @rate: target rate for the DPLL - * @parent_rate: clock rate of the DPLL parent - * @index: new parent index for the DPLL, 0 - reference, 1 - bypass - * - * Sets rate and parent for a DPLL clock. If new parent is the bypass - * clock, only selects the parent. Otherwise proceeds with a rate - * change, as this will effectively also change the parent as the - * DPLL is put into locked mode. Returns 0 with success, negative error - * value otherwise. - */ -int omap3_noncore_dpll_set_rate_and_parent(struct clk_hw *hw, - unsigned long rate, - unsigned long parent_rate, - u8 index) -{ - int ret; - - if (!hw || !rate) - return -EINVAL; - - /* - * clk-ref at index[0], in which case we only need to set rate, - * the parent will be changed automatically with the lock sequence. - * With clk-bypass case we only need to change parent. - */ - if (index) - ret = omap3_noncore_dpll_set_parent(hw, index); - else - ret = omap3_noncore_dpll_set_rate(hw, rate, parent_rate); - - return ret; -} - -/* DPLL autoidle read/set code */ - -/** - * omap3_dpll_autoidle_read - read a DPLL's autoidle bits - * @clk: struct clk * of the DPLL to read - * - * Return the DPLL's autoidle bits, shifted down to bit 0. Returns - * -EINVAL if passed a null pointer or if the struct clk does not - * appear to refer to a DPLL. - */ -u32 omap3_dpll_autoidle_read(struct clk_hw_omap *clk) -{ - const struct dpll_data *dd; - u32 v; - - if (!clk || !clk->dpll_data) - return -EINVAL; - - dd = clk->dpll_data; - - if (!dd->autoidle_reg) - return -EINVAL; - - v = omap2_clk_readl(clk, dd->autoidle_reg); - v &= dd->autoidle_mask; - v >>= __ffs(dd->autoidle_mask); - - return v; -} - -/** - * omap3_dpll_allow_idle - enable DPLL autoidle bits - * @clk: struct clk * of the DPLL to operate on - * - * Enable DPLL automatic idle control. This automatic idle mode - * switching takes effect only when the DPLL is locked, at least on - * OMAP3430. The DPLL will enter low-power stop when its downstream - * clocks are gated. No return value. - */ -void omap3_dpll_allow_idle(struct clk_hw_omap *clk) -{ - const struct dpll_data *dd; - u32 v; - - if (!clk || !clk->dpll_data) - return; - - dd = clk->dpll_data; - - if (!dd->autoidle_reg) - return; - - /* - * REVISIT: CORE DPLL can optionally enter low-power bypass - * by writing 0x5 instead of 0x1. Add some mechanism to - * optionally enter this mode. - */ - v = omap2_clk_readl(clk, dd->autoidle_reg); - v &= ~dd->autoidle_mask; - v |= DPLL_AUTOIDLE_LOW_POWER_STOP << __ffs(dd->autoidle_mask); - omap2_clk_writel(v, clk, dd->autoidle_reg); - -} - -/** - * omap3_dpll_deny_idle - prevent DPLL from automatically idling - * @clk: struct clk * of the DPLL to operate on - * - * Disable DPLL automatic idle control. No return value. - */ -void omap3_dpll_deny_idle(struct clk_hw_omap *clk) -{ - const struct dpll_data *dd; - u32 v; - - if (!clk || !clk->dpll_data) - return; - - dd = clk->dpll_data; - - if (!dd->autoidle_reg) - return; - - v = omap2_clk_readl(clk, dd->autoidle_reg); - v &= ~dd->autoidle_mask; - v |= DPLL_AUTOIDLE_DISABLE << __ffs(dd->autoidle_mask); - omap2_clk_writel(v, clk, dd->autoidle_reg); - -} - -/* Clock control for DPLL outputs */ - -/* Find the parent DPLL for the given clkoutx2 clock */ -static struct clk_hw_omap *omap3_find_clkoutx2_dpll(struct clk_hw *hw) -{ - struct clk_hw_omap *pclk = NULL; - struct clk *parent; - - /* Walk up the parents of clk, looking for a DPLL */ - do { - do { - parent = __clk_get_parent(hw->clk); - hw = __clk_get_hw(parent); - } while (hw && (__clk_get_flags(hw->clk) & CLK_IS_BASIC)); - if (!hw) - break; - pclk = to_clk_hw_omap(hw); - } while (pclk && !pclk->dpll_data); - - /* clk does not have a DPLL as a parent? error in the clock data */ - if (!pclk) { - WARN_ON(1); - return NULL; - } - - return pclk; -} - -/** - * omap3_clkoutx2_recalc - recalculate DPLL X2 output virtual clock rate - * @clk: DPLL output struct clk - * - * Using parent clock DPLL data, look up DPLL state. If locked, set our - * rate to the dpll_clk * 2; otherwise, just use dpll_clk. - */ -unsigned long omap3_clkoutx2_recalc(struct clk_hw *hw, - unsigned long parent_rate) -{ - const struct dpll_data *dd; - unsigned long rate; - u32 v; - struct clk_hw_omap *pclk = NULL; - - if (!parent_rate) - return 0; - - pclk = omap3_find_clkoutx2_dpll(hw); - - if (!pclk) - return 0; - - dd = pclk->dpll_data; - - WARN_ON(!dd->enable_mask); - - v = omap2_clk_readl(pclk, dd->control_reg) & dd->enable_mask; - v >>= __ffs(dd->enable_mask); - if ((v != OMAP3XXX_EN_DPLL_LOCKED) || (dd->flags & DPLL_J_TYPE)) - rate = parent_rate; - else - rate = parent_rate * 2; - return rate; -} - -int omap3_clkoutx2_set_rate(struct clk_hw *hw, unsigned long rate, - unsigned long parent_rate) -{ - return 0; -} - -long omap3_clkoutx2_round_rate(struct clk_hw *hw, unsigned long rate, - unsigned long *prate) -{ - const struct dpll_data *dd; - u32 v; - struct clk_hw_omap *pclk = NULL; - - if (!*prate) - return 0; - - pclk = omap3_find_clkoutx2_dpll(hw); - - if (!pclk) - return 0; - - dd = pclk->dpll_data; - - /* TYPE J does not have a clkoutx2 */ - if (dd->flags & DPLL_J_TYPE) { - *prate = __clk_round_rate(__clk_get_parent(pclk->hw.clk), rate); - return *prate; - } - - WARN_ON(!dd->enable_mask); - - v = omap2_clk_readl(pclk, dd->control_reg) & dd->enable_mask; - v >>= __ffs(dd->enable_mask); - - /* If in bypass, the rate is fixed to the bypass rate*/ - if (v != OMAP3XXX_EN_DPLL_LOCKED) - return *prate; - - if (__clk_get_flags(hw->clk) & CLK_SET_RATE_PARENT) { - unsigned long best_parent; - - best_parent = (rate / 2); - *prate = __clk_round_rate(__clk_get_parent(hw->clk), - best_parent); - } - - return *prate * 2; -} - -/* OMAP3/4 non-CORE DPLL clkops */ -const struct clk_hw_omap_ops clkhwops_omap3_dpll = { - .allow_idle = omap3_dpll_allow_idle, - .deny_idle = omap3_dpll_deny_idle, -}; diff --git a/drivers/clk/ti/Makefile b/drivers/clk/ti/Makefile index 23cd72638970..05a0294aba10 100644 --- a/drivers/clk/ti/Makefile +++ b/drivers/clk/ti/Makefile @@ -2,16 +2,18 @@ obj-y += clk.o autoidle.o clockdomain.o clk-common = dpll.o composite.o divider.o gate.o \ fixed-factor.o mux.o apll.o \ clkt_dpll.o clkt_iclk.o -obj-$(CONFIG_SOC_AM33XX) += $(clk-common) clk-33xx.o +obj-$(CONFIG_SOC_AM33XX) += $(clk-common) clk-33xx.o dpll3xxx.o obj-$(CONFIG_SOC_TI81XX) += $(clk-common) fapll.o clk-816x.o obj-$(CONFIG_ARCH_OMAP2) += $(clk-common) interface.o clk-2xxx.o obj-$(CONFIG_ARCH_OMAP3) += $(clk-common) interface.o \ - clk-3xxx.o -obj-$(CONFIG_ARCH_OMAP4) += $(clk-common) clk-44xx.o dpll44xx.o -obj-$(CONFIG_SOC_OMAP5) += $(clk-common) clk-54xx.o dpll44xx.o + clk-3xxx.o dpll3xxx.o +obj-$(CONFIG_ARCH_OMAP4) += $(clk-common) clk-44xx.o \ + dpll3xxx.o dpll44xx.o +obj-$(CONFIG_SOC_OMAP5) += $(clk-common) clk-54xx.o \ + dpll3xxx.o dpll44xx.o obj-$(CONFIG_SOC_DRA7XX) += $(clk-common) clk-7xx.o \ - clk-dra7-atl.o dpll44xx.o -obj-$(CONFIG_SOC_AM43XX) += $(clk-common) clk-43xx.o + clk-dra7-atl.o dpll3xxx.o dpll44xx.o +obj-$(CONFIG_SOC_AM43XX) += $(clk-common) dpll3xxx.o clk-43xx.o ifdef CONFIG_ATAGS obj-$(CONFIG_ARCH_OMAP3) += clk-3xxx-legacy.o diff --git a/drivers/clk/ti/clk-3xxx.c b/drivers/clk/ti/clk-3xxx.c index bb3b88359daf..5489ad8c07d4 100644 --- a/drivers/clk/ti/clk-3xxx.c +++ b/drivers/clk/ti/clk-3xxx.c @@ -21,6 +21,13 @@ #include "clock.h" +/* + * DPLL5_FREQ_FOR_USBHOST: USBHOST and USBTLL are the only clocks + * that are sourced by DPLL5, and both of these require this clock + * to be at 120 MHz for proper operation. + */ +#define DPLL5_FREQ_FOR_USBHOST 120000000 + static struct ti_dt_clk omap3xxx_clks[] = { DT_CLK(NULL, "apb_pclk", "dummy_apb_pclk"), DT_CLK(NULL, "omap_32k_fck", "omap_32k_fck"), @@ -325,6 +332,30 @@ enum { OMAP3_SOC_OMAP3630, }; +/** + * omap3_clk_lock_dpll5 - locks DPLL5 + * + * Locks DPLL5 to a pre-defined frequency. This is required for proper + * operation of USB. + */ +void __init omap3_clk_lock_dpll5(void) +{ + struct clk *dpll5_clk; + struct clk *dpll5_m2_clk; + + dpll5_clk = clk_get(NULL, "dpll5_ck"); + clk_set_rate(dpll5_clk, DPLL5_FREQ_FOR_USBHOST); + clk_prepare_enable(dpll5_clk); + + /* Program dpll5_m2_clk divider for no division */ + dpll5_m2_clk = clk_get(NULL, "dpll5_m2_ck"); + clk_prepare_enable(dpll5_m2_clk); + clk_set_rate(dpll5_m2_clk, DPLL5_FREQ_FOR_USBHOST); + + clk_disable_unprepare(dpll5_m2_clk); + clk_disable_unprepare(dpll5_clk); +} + static int __init omap3xxx_dt_clk_init(int soc_type) { if (soc_type == OMAP3_SOC_AM35XX || soc_type == OMAP3_SOC_OMAP3630 || diff --git a/drivers/clk/ti/clock.h b/drivers/clk/ti/clock.h index 4b26af8a273d..688d9e47b2c8 100644 --- a/drivers/clk/ti/clock.h +++ b/drivers/clk/ti/clock.h @@ -173,11 +173,38 @@ void omap2_init_clk_hw_omap_clocks(struct clk *clk); int of_ti_clk_autoidle_setup(struct device_node *node); void omap2_clk_enable_init_clocks(const char **clk_names, u8 num_clocks); +extern const struct clk_hw_omap_ops clkhwops_omap3_dpll; extern const struct clk_hw_omap_ops clkhwops_omap4_dpllmx; extern const struct clk_hw_omap_ops clkhwops_iclk; extern const struct clk_hw_omap_ops clkhwops_iclk_wait; u8 omap2_init_dpll_parent(struct clk_hw *hw); +int omap3_noncore_dpll_enable(struct clk_hw *hw); +void omap3_noncore_dpll_disable(struct clk_hw *hw); +int omap3_noncore_dpll_set_parent(struct clk_hw *hw, u8 index); +int omap3_noncore_dpll_set_rate(struct clk_hw *hw, unsigned long rate, + unsigned long parent_rate); +int omap3_noncore_dpll_set_rate_and_parent(struct clk_hw *hw, + unsigned long rate, + unsigned long parent_rate, + u8 index); +long omap3_noncore_dpll_determine_rate(struct clk_hw *hw, + unsigned long rate, + unsigned long min_rate, + unsigned long max_rate, + unsigned long *best_parent_rate, + struct clk_hw **best_parent_clk); +long omap2_dpll_round_rate(struct clk_hw *hw, unsigned long target_rate, + unsigned long *parent_rate); +unsigned long omap3_clkoutx2_recalc(struct clk_hw *hw, + unsigned long parent_rate); + +unsigned long omap3_dpll_recalc(struct clk_hw *hw, unsigned long parent_rate); +int omap3_dpll4_set_rate(struct clk_hw *clk, unsigned long rate, + unsigned long parent_rate); +int omap3_dpll4_set_rate_and_parent(struct clk_hw *hw, unsigned long rate, + unsigned long parent_rate, u8 index); +void omap3_clk_lock_dpll5(void); unsigned long omap4_dpll_regm4xen_recalc(struct clk_hw *hw, unsigned long parent_rate); diff --git a/drivers/clk/ti/dpll3xxx.c b/drivers/clk/ti/dpll3xxx.c new file mode 100644 index 000000000000..22d77a331287 --- /dev/null +++ b/drivers/clk/ti/dpll3xxx.c @@ -0,0 +1,825 @@ +/* + * OMAP3/4 - specific DPLL control functions + * + * Copyright (C) 2009-2010 Texas Instruments, Inc. + * Copyright (C) 2009-2010 Nokia Corporation + * + * Written by Paul Walmsley + * Testing and integration fixes by Jouni Högander + * + * 36xx support added by Vishwanath BS, Richard Woodruff, and Nishanth + * Menon + * + * Parts of this code are based on code written by + * Richard Woodruff, Tony Lindgren, Tuukka Tikkanen, Karthik Dasu + * + * This program is free software; you can redistribute it and/or modify + * it under the terms of the GNU General Public License version 2 as + * published by the Free Software Foundation. + */ + +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include + +#include "clock.h" + +/* CM_AUTOIDLE_PLL*.AUTO_* bit values */ +#define DPLL_AUTOIDLE_DISABLE 0x0 +#define DPLL_AUTOIDLE_LOW_POWER_STOP 0x1 + +#define MAX_DPLL_WAIT_TRIES 1000000 + +#define OMAP3XXX_EN_DPLL_LOCKED 0x7 + +/* Forward declarations */ +static u32 omap3_dpll_autoidle_read(struct clk_hw_omap *clk); +static void omap3_dpll_deny_idle(struct clk_hw_omap *clk); +static void omap3_dpll_allow_idle(struct clk_hw_omap *clk); + +/* Private functions */ + +/* _omap3_dpll_write_clken - write clken_bits arg to a DPLL's enable bits */ +static void _omap3_dpll_write_clken(struct clk_hw_omap *clk, u8 clken_bits) +{ + const struct dpll_data *dd; + u32 v; + + dd = clk->dpll_data; + + v = ti_clk_ll_ops->clk_readl(dd->control_reg); + v &= ~dd->enable_mask; + v |= clken_bits << __ffs(dd->enable_mask); + ti_clk_ll_ops->clk_writel(v, dd->control_reg); +} + +/* _omap3_wait_dpll_status: wait for a DPLL to enter a specific state */ +static int _omap3_wait_dpll_status(struct clk_hw_omap *clk, u8 state) +{ + const struct dpll_data *dd; + int i = 0; + int ret = -EINVAL; + const char *clk_name; + + dd = clk->dpll_data; + clk_name = __clk_get_name(clk->hw.clk); + + state <<= __ffs(dd->idlest_mask); + + while (((ti_clk_ll_ops->clk_readl(dd->idlest_reg) & dd->idlest_mask) + != state) && i < MAX_DPLL_WAIT_TRIES) { + i++; + udelay(1); + } + + if (i == MAX_DPLL_WAIT_TRIES) { + pr_err("clock: %s failed transition to '%s'\n", + clk_name, (state) ? "locked" : "bypassed"); + } else { + pr_debug("clock: %s transition to '%s' in %d loops\n", + clk_name, (state) ? "locked" : "bypassed", i); + + ret = 0; + } + + return ret; +} + +/* From 3430 TRM ES2 4.7.6.2 */ +static u16 _omap3_dpll_compute_freqsel(struct clk_hw_omap *clk, u8 n) +{ + unsigned long fint; + u16 f = 0; + + fint = __clk_get_rate(clk->dpll_data->clk_ref) / n; + + pr_debug("clock: fint is %lu\n", fint); + + if (fint >= 750000 && fint <= 1000000) + f = 0x3; + else if (fint > 1000000 && fint <= 1250000) + f = 0x4; + else if (fint > 1250000 && fint <= 1500000) + f = 0x5; + else if (fint > 1500000 && fint <= 1750000) + f = 0x6; + else if (fint > 1750000 && fint <= 2100000) + f = 0x7; + else if (fint > 7500000 && fint <= 10000000) + f = 0xB; + else if (fint > 10000000 && fint <= 12500000) + f = 0xC; + else if (fint > 12500000 && fint <= 15000000) + f = 0xD; + else if (fint > 15000000 && fint <= 17500000) + f = 0xE; + else if (fint > 17500000 && fint <= 21000000) + f = 0xF; + else + pr_debug("clock: unknown freqsel setting for %d\n", n); + + return f; +} + +/* + * _omap3_noncore_dpll_lock - instruct a DPLL to lock and wait for readiness + * @clk: pointer to a DPLL struct clk + * + * Instructs a non-CORE DPLL to lock. Waits for the DPLL to report + * readiness before returning. Will save and restore the DPLL's + * autoidle state across the enable, per the CDP code. If the DPLL + * locked successfully, return 0; if the DPLL did not lock in the time + * allotted, or DPLL3 was passed in, return -EINVAL. + */ +static int _omap3_noncore_dpll_lock(struct clk_hw_omap *clk) +{ + const struct dpll_data *dd; + u8 ai; + u8 state = 1; + int r = 0; + + pr_debug("clock: locking DPLL %s\n", __clk_get_name(clk->hw.clk)); + + dd = clk->dpll_data; + state <<= __ffs(dd->idlest_mask); + + /* Check if already locked */ + if ((ti_clk_ll_ops->clk_readl(dd->idlest_reg) & dd->idlest_mask) == + state) + goto done; + + ai = omap3_dpll_autoidle_read(clk); + + if (ai) + omap3_dpll_deny_idle(clk); + + _omap3_dpll_write_clken(clk, DPLL_LOCKED); + + r = _omap3_wait_dpll_status(clk, 1); + + if (ai) + omap3_dpll_allow_idle(clk); + +done: + return r; +} + +/* + * _omap3_noncore_dpll_bypass - instruct a DPLL to bypass and wait for readiness + * @clk: pointer to a DPLL struct clk + * + * Instructs a non-CORE DPLL to enter low-power bypass mode. In + * bypass mode, the DPLL's rate is set equal to its parent clock's + * rate. Waits for the DPLL to report readiness before returning. + * Will save and restore the DPLL's autoidle state across the enable, + * per the CDP code. If the DPLL entered bypass mode successfully, + * return 0; if the DPLL did not enter bypass in the time allotted, or + * DPLL3 was passed in, or the DPLL does not support low-power bypass, + * return -EINVAL. + */ +static int _omap3_noncore_dpll_bypass(struct clk_hw_omap *clk) +{ + int r; + u8 ai; + + if (!(clk->dpll_data->modes & (1 << DPLL_LOW_POWER_BYPASS))) + return -EINVAL; + + pr_debug("clock: configuring DPLL %s for low-power bypass\n", + __clk_get_name(clk->hw.clk)); + + ai = omap3_dpll_autoidle_read(clk); + + _omap3_dpll_write_clken(clk, DPLL_LOW_POWER_BYPASS); + + r = _omap3_wait_dpll_status(clk, 0); + + if (ai) + omap3_dpll_allow_idle(clk); + + return r; +} + +/* + * _omap3_noncore_dpll_stop - instruct a DPLL to stop + * @clk: pointer to a DPLL struct clk + * + * Instructs a non-CORE DPLL to enter low-power stop. Will save and + * restore the DPLL's autoidle state across the stop, per the CDP + * code. If DPLL3 was passed in, or the DPLL does not support + * low-power stop, return -EINVAL; otherwise, return 0. + */ +static int _omap3_noncore_dpll_stop(struct clk_hw_omap *clk) +{ + u8 ai; + + if (!(clk->dpll_data->modes & (1 << DPLL_LOW_POWER_STOP))) + return -EINVAL; + + pr_debug("clock: stopping DPLL %s\n", __clk_get_name(clk->hw.clk)); + + ai = omap3_dpll_autoidle_read(clk); + + _omap3_dpll_write_clken(clk, DPLL_LOW_POWER_STOP); + + if (ai) + omap3_dpll_allow_idle(clk); + + return 0; +} + +/** + * _lookup_dco - Lookup DCO used by j-type DPLL + * @clk: pointer to a DPLL struct clk + * @dco: digital control oscillator selector + * @m: DPLL multiplier to set + * @n: DPLL divider to set + * + * See 36xx TRM section 3.5.3.3.3.2 "Type B DPLL (Low-Jitter)" + * + * XXX This code is not needed for 3430/AM35xx; can it be optimized + * out in non-multi-OMAP builds for those chips? + */ +static void _lookup_dco(struct clk_hw_omap *clk, u8 *dco, u16 m, u8 n) +{ + unsigned long fint, clkinp; /* watch out for overflow */ + + clkinp = __clk_get_rate(__clk_get_parent(clk->hw.clk)); + fint = (clkinp / n) * m; + + if (fint < 1000000000) + *dco = 2; + else + *dco = 4; +} + +/** + * _lookup_sddiv - Calculate sigma delta divider for j-type DPLL + * @clk: pointer to a DPLL struct clk + * @sd_div: target sigma-delta divider + * @m: DPLL multiplier to set + * @n: DPLL divider to set + * + * See 36xx TRM section 3.5.3.3.3.2 "Type B DPLL (Low-Jitter)" + * + * XXX This code is not needed for 3430/AM35xx; can it be optimized + * out in non-multi-OMAP builds for those chips? + */ +static void _lookup_sddiv(struct clk_hw_omap *clk, u8 *sd_div, u16 m, u8 n) +{ + unsigned long clkinp, sd; /* watch out for overflow */ + int mod1, mod2; + + clkinp = __clk_get_rate(__clk_get_parent(clk->hw.clk)); + + /* + * target sigma-delta to near 250MHz + * sd = ceil[(m/(n+1)) * (clkinp_MHz / 250)] + */ + clkinp /= 100000; /* shift from MHz to 10*Hz for 38.4 and 19.2 */ + mod1 = (clkinp * m) % (250 * n); + sd = (clkinp * m) / (250 * n); + mod2 = sd % 10; + sd /= 10; + + if (mod1 || mod2) + sd++; + *sd_div = sd; +} + +/* + * _omap3_noncore_dpll_program - set non-core DPLL M,N values directly + * @clk: struct clk * of DPLL to set + * @freqsel: FREQSEL value to set + * + * Program the DPLL with the last M, N values calculated, and wait for + * the DPLL to lock. Returns -EINVAL upon error, or 0 upon success. + */ +static int omap3_noncore_dpll_program(struct clk_hw_omap *clk, u16 freqsel) +{ + struct dpll_data *dd = clk->dpll_data; + u8 dco, sd_div; + u32 v; + + /* 3430 ES2 TRM: 4.7.6.9 DPLL Programming Sequence */ + _omap3_noncore_dpll_bypass(clk); + + /* + * Set jitter correction. Jitter correction applicable for OMAP343X + * only since freqsel field is no longer present on other devices. + */ + if (ti_clk_get_features()->flags & TI_CLK_DPLL_HAS_FREQSEL) { + v = ti_clk_ll_ops->clk_readl(dd->control_reg); + v &= ~dd->freqsel_mask; + v |= freqsel << __ffs(dd->freqsel_mask); + ti_clk_ll_ops->clk_writel(v, dd->control_reg); + } + + /* Set DPLL multiplier, divider */ + v = ti_clk_ll_ops->clk_readl(dd->mult_div1_reg); + + /* Handle Duty Cycle Correction */ + if (dd->dcc_mask) { + if (dd->last_rounded_rate >= dd->dcc_rate) + v |= dd->dcc_mask; /* Enable DCC */ + else + v &= ~dd->dcc_mask; /* Disable DCC */ + } + + v &= ~(dd->mult_mask | dd->div1_mask); + v |= dd->last_rounded_m << __ffs(dd->mult_mask); + v |= (dd->last_rounded_n - 1) << __ffs(dd->div1_mask); + + /* Configure dco and sd_div for dplls that have these fields */ + if (dd->dco_mask) { + _lookup_dco(clk, &dco, dd->last_rounded_m, dd->last_rounded_n); + v &= ~(dd->dco_mask); + v |= dco << __ffs(dd->dco_mask); + } + if (dd->sddiv_mask) { + _lookup_sddiv(clk, &sd_div, dd->last_rounded_m, + dd->last_rounded_n); + v &= ~(dd->sddiv_mask); + v |= sd_div << __ffs(dd->sddiv_mask); + } + + ti_clk_ll_ops->clk_writel(v, dd->mult_div1_reg); + + /* Set 4X multiplier and low-power mode */ + if (dd->m4xen_mask || dd->lpmode_mask) { + v = ti_clk_ll_ops->clk_readl(dd->control_reg); + + if (dd->m4xen_mask) { + if (dd->last_rounded_m4xen) + v |= dd->m4xen_mask; + else + v &= ~dd->m4xen_mask; + } + + if (dd->lpmode_mask) { + if (dd->last_rounded_lpmode) + v |= dd->lpmode_mask; + else + v &= ~dd->lpmode_mask; + } + + ti_clk_ll_ops->clk_writel(v, dd->control_reg); + } + + /* We let the clock framework set the other output dividers later */ + + /* REVISIT: Set ramp-up delay? */ + + _omap3_noncore_dpll_lock(clk); + + return 0; +} + +/* Public functions */ + +/** + * omap3_dpll_recalc - recalculate DPLL rate + * @clk: DPLL struct clk + * + * Recalculate and propagate the DPLL rate. + */ +unsigned long omap3_dpll_recalc(struct clk_hw *hw, unsigned long parent_rate) +{ + struct clk_hw_omap *clk = to_clk_hw_omap(hw); + + return omap2_get_dpll_rate(clk); +} + +/* Non-CORE DPLL (e.g., DPLLs that do not control SDRC) clock functions */ + +/** + * omap3_noncore_dpll_enable - instruct a DPLL to enter bypass or lock mode + * @clk: pointer to a DPLL struct clk + * + * Instructs a non-CORE DPLL to enable, e.g., to enter bypass or lock. + * The choice of modes depends on the DPLL's programmed rate: if it is + * the same as the DPLL's parent clock, it will enter bypass; + * otherwise, it will enter lock. This code will wait for the DPLL to + * indicate readiness before returning, unless the DPLL takes too long + * to enter the target state. Intended to be used as the struct clk's + * enable function. If DPLL3 was passed in, or the DPLL does not + * support low-power stop, or if the DPLL took too long to enter + * bypass or lock, return -EINVAL; otherwise, return 0. + */ +int omap3_noncore_dpll_enable(struct clk_hw *hw) +{ + struct clk_hw_omap *clk = to_clk_hw_omap(hw); + int r; + struct dpll_data *dd; + struct clk_hw *parent; + + dd = clk->dpll_data; + if (!dd) + return -EINVAL; + + if (clk->clkdm) { + r = ti_clk_ll_ops->clkdm_clk_enable(clk->clkdm, hw->clk); + if (r) { + WARN(1, + "%s: could not enable %s's clockdomain %s: %d\n", + __func__, __clk_get_name(hw->clk), + clk->clkdm_name, r); + return r; + } + } + + parent = __clk_get_hw(__clk_get_parent(hw->clk)); + + if (__clk_get_rate(hw->clk) == __clk_get_rate(dd->clk_bypass)) { + WARN_ON(parent != __clk_get_hw(dd->clk_bypass)); + r = _omap3_noncore_dpll_bypass(clk); + } else { + WARN_ON(parent != __clk_get_hw(dd->clk_ref)); + r = _omap3_noncore_dpll_lock(clk); + } + + return r; +} + +/** + * omap3_noncore_dpll_disable - instruct a DPLL to enter low-power stop + * @clk: pointer to a DPLL struct clk + * + * Instructs a non-CORE DPLL to enter low-power stop. This function is + * intended for use in struct clkops. No return value. + */ +void omap3_noncore_dpll_disable(struct clk_hw *hw) +{ + struct clk_hw_omap *clk = to_clk_hw_omap(hw); + + _omap3_noncore_dpll_stop(clk); + if (clk->clkdm) + ti_clk_ll_ops->clkdm_clk_disable(clk->clkdm, hw->clk); +} + +/* Non-CORE DPLL rate set code */ + +/** + * omap3_noncore_dpll_determine_rate - determine rate for a DPLL + * @hw: pointer to the clock to determine rate for + * @rate: target rate for the DPLL + * @best_parent_rate: pointer for returning best parent rate + * @best_parent_clk: pointer for returning best parent clock + * + * Determines which DPLL mode to use for reaching a desired target rate. + * Checks whether the DPLL shall be in bypass or locked mode, and if + * locked, calculates the M,N values for the DPLL via round-rate. + * Returns a positive clock rate with success, negative error value + * in failure. + */ +long omap3_noncore_dpll_determine_rate(struct clk_hw *hw, unsigned long rate, + unsigned long min_rate, + unsigned long max_rate, + unsigned long *best_parent_rate, + struct clk_hw **best_parent_clk) +{ + struct clk_hw_omap *clk = to_clk_hw_omap(hw); + struct dpll_data *dd; + + if (!hw || !rate) + return -EINVAL; + + dd = clk->dpll_data; + if (!dd) + return -EINVAL; + + if (__clk_get_rate(dd->clk_bypass) == rate && + (dd->modes & (1 << DPLL_LOW_POWER_BYPASS))) { + *best_parent_clk = __clk_get_hw(dd->clk_bypass); + } else { + rate = omap2_dpll_round_rate(hw, rate, best_parent_rate); + *best_parent_clk = __clk_get_hw(dd->clk_ref); + } + + *best_parent_rate = rate; + + return rate; +} + +/** + * omap3_noncore_dpll_set_parent - set parent for a DPLL clock + * @hw: pointer to the clock to set parent for + * @index: parent index to select + * + * Sets parent for a DPLL clock. This sets the DPLL into bypass or + * locked mode. Returns 0 with success, negative error value otherwise. + */ +int omap3_noncore_dpll_set_parent(struct clk_hw *hw, u8 index) +{ + struct clk_hw_omap *clk = to_clk_hw_omap(hw); + int ret; + + if (!hw) + return -EINVAL; + + if (index) + ret = _omap3_noncore_dpll_bypass(clk); + else + ret = _omap3_noncore_dpll_lock(clk); + + return ret; +} + +/** + * omap3_noncore_dpll_set_rate - set rate for a DPLL clock + * @hw: pointer to the clock to set parent for + * @rate: target rate for the clock + * @parent_rate: rate of the parent clock + * + * Sets rate for a DPLL clock. First checks if the clock parent is + * reference clock (in bypass mode, the rate of the clock can't be + * changed) and proceeds with the rate change operation. Returns 0 + * with success, negative error value otherwise. + */ +int omap3_noncore_dpll_set_rate(struct clk_hw *hw, unsigned long rate, + unsigned long parent_rate) +{ + struct clk_hw_omap *clk = to_clk_hw_omap(hw); + struct dpll_data *dd; + u16 freqsel = 0; + int ret; + + if (!hw || !rate) + return -EINVAL; + + dd = clk->dpll_data; + if (!dd) + return -EINVAL; + + if (__clk_get_hw(__clk_get_parent(hw->clk)) != + __clk_get_hw(dd->clk_ref)) + return -EINVAL; + + if (dd->last_rounded_rate == 0) + return -EINVAL; + + /* Freqsel is available only on OMAP343X devices */ + if (ti_clk_get_features()->flags & TI_CLK_DPLL_HAS_FREQSEL) { + freqsel = _omap3_dpll_compute_freqsel(clk, dd->last_rounded_n); + WARN_ON(!freqsel); + } + + pr_debug("%s: %s: set rate: locking rate to %lu.\n", __func__, + __clk_get_name(hw->clk), rate); + + ret = omap3_noncore_dpll_program(clk, freqsel); + + return ret; +} + +/** + * omap3_noncore_dpll_set_rate_and_parent - set rate and parent for a DPLL clock + * @hw: pointer to the clock to set rate and parent for + * @rate: target rate for the DPLL + * @parent_rate: clock rate of the DPLL parent + * @index: new parent index for the DPLL, 0 - reference, 1 - bypass + * + * Sets rate and parent for a DPLL clock. If new parent is the bypass + * clock, only selects the parent. Otherwise proceeds with a rate + * change, as this will effectively also change the parent as the + * DPLL is put into locked mode. Returns 0 with success, negative error + * value otherwise. + */ +int omap3_noncore_dpll_set_rate_and_parent(struct clk_hw *hw, + unsigned long rate, + unsigned long parent_rate, + u8 index) +{ + int ret; + + if (!hw || !rate) + return -EINVAL; + + /* + * clk-ref at index[0], in which case we only need to set rate, + * the parent will be changed automatically with the lock sequence. + * With clk-bypass case we only need to change parent. + */ + if (index) + ret = omap3_noncore_dpll_set_parent(hw, index); + else + ret = omap3_noncore_dpll_set_rate(hw, rate, parent_rate); + + return ret; +} + +/* DPLL autoidle read/set code */ + +/** + * omap3_dpll_autoidle_read - read a DPLL's autoidle bits + * @clk: struct clk * of the DPLL to read + * + * Return the DPLL's autoidle bits, shifted down to bit 0. Returns + * -EINVAL if passed a null pointer or if the struct clk does not + * appear to refer to a DPLL. + */ +static u32 omap3_dpll_autoidle_read(struct clk_hw_omap *clk) +{ + const struct dpll_data *dd; + u32 v; + + if (!clk || !clk->dpll_data) + return -EINVAL; + + dd = clk->dpll_data; + + if (!dd->autoidle_reg) + return -EINVAL; + + v = ti_clk_ll_ops->clk_readl(dd->autoidle_reg); + v &= dd->autoidle_mask; + v >>= __ffs(dd->autoidle_mask); + + return v; +} + +/** + * omap3_dpll_allow_idle - enable DPLL autoidle bits + * @clk: struct clk * of the DPLL to operate on + * + * Enable DPLL automatic idle control. This automatic idle mode + * switching takes effect only when the DPLL is locked, at least on + * OMAP3430. The DPLL will enter low-power stop when its downstream + * clocks are gated. No return value. + */ +static void omap3_dpll_allow_idle(struct clk_hw_omap *clk) +{ + const struct dpll_data *dd; + u32 v; + + if (!clk || !clk->dpll_data) + return; + + dd = clk->dpll_data; + + if (!dd->autoidle_reg) + return; + + /* + * REVISIT: CORE DPLL can optionally enter low-power bypass + * by writing 0x5 instead of 0x1. Add some mechanism to + * optionally enter this mode. + */ + v = ti_clk_ll_ops->clk_readl(dd->autoidle_reg); + v &= ~dd->autoidle_mask; + v |= DPLL_AUTOIDLE_LOW_POWER_STOP << __ffs(dd->autoidle_mask); + ti_clk_ll_ops->clk_writel(v, dd->autoidle_reg); +} + +/** + * omap3_dpll_deny_idle - prevent DPLL from automatically idling + * @clk: struct clk * of the DPLL to operate on + * + * Disable DPLL automatic idle control. No return value. + */ +static void omap3_dpll_deny_idle(struct clk_hw_omap *clk) +{ + const struct dpll_data *dd; + u32 v; + + if (!clk || !clk->dpll_data) + return; + + dd = clk->dpll_data; + + if (!dd->autoidle_reg) + return; + + v = ti_clk_ll_ops->clk_readl(dd->autoidle_reg); + v &= ~dd->autoidle_mask; + v |= DPLL_AUTOIDLE_DISABLE << __ffs(dd->autoidle_mask); + ti_clk_ll_ops->clk_writel(v, dd->autoidle_reg); +} + +/* Clock control for DPLL outputs */ + +/* Find the parent DPLL for the given clkoutx2 clock */ +static struct clk_hw_omap *omap3_find_clkoutx2_dpll(struct clk_hw *hw) +{ + struct clk_hw_omap *pclk = NULL; + struct clk *parent; + + /* Walk up the parents of clk, looking for a DPLL */ + do { + do { + parent = __clk_get_parent(hw->clk); + hw = __clk_get_hw(parent); + } while (hw && (__clk_get_flags(hw->clk) & CLK_IS_BASIC)); + if (!hw) + break; + pclk = to_clk_hw_omap(hw); + } while (pclk && !pclk->dpll_data); + + /* clk does not have a DPLL as a parent? error in the clock data */ + if (!pclk) { + WARN_ON(1); + return NULL; + } + + return pclk; +} + +/** + * omap3_clkoutx2_recalc - recalculate DPLL X2 output virtual clock rate + * @clk: DPLL output struct clk + * + * Using parent clock DPLL data, look up DPLL state. If locked, set our + * rate to the dpll_clk * 2; otherwise, just use dpll_clk. + */ +unsigned long omap3_clkoutx2_recalc(struct clk_hw *hw, + unsigned long parent_rate) +{ + const struct dpll_data *dd; + unsigned long rate; + u32 v; + struct clk_hw_omap *pclk = NULL; + + if (!parent_rate) + return 0; + + pclk = omap3_find_clkoutx2_dpll(hw); + + if (!pclk) + return 0; + + dd = pclk->dpll_data; + + WARN_ON(!dd->enable_mask); + + v = ti_clk_ll_ops->clk_readl(dd->control_reg) & dd->enable_mask; + v >>= __ffs(dd->enable_mask); + if ((v != OMAP3XXX_EN_DPLL_LOCKED) || (dd->flags & DPLL_J_TYPE)) + rate = parent_rate; + else + rate = parent_rate * 2; + return rate; +} + +/* OMAP3/4 non-CORE DPLL clkops */ +const struct clk_hw_omap_ops clkhwops_omap3_dpll = { + .allow_idle = omap3_dpll_allow_idle, + .deny_idle = omap3_dpll_deny_idle, +}; + +/** + * omap3_dpll4_set_rate - set rate for omap3 per-dpll + * @hw: clock to change + * @rate: target rate for clock + * @parent_rate: rate of the parent clock + * + * Check if the current SoC supports the per-dpll reprogram operation + * or not, and then do the rate change if supported. Returns -EINVAL + * if not supported, 0 for success, and potential error codes from the + * clock rate change. + */ +int omap3_dpll4_set_rate(struct clk_hw *hw, unsigned long rate, + unsigned long parent_rate) +{ + /* + * According to the 12-5 CDP code from TI, "Limitation 2.5" + * on 3430ES1 prevents us from changing DPLL multipliers or dividers + * on DPLL4. + */ + if (ti_clk_get_features()->flags & TI_CLK_DPLL4_DENY_REPROGRAM) { + pr_err("clock: DPLL4 cannot change rate due to silicon 'Limitation 2.5' on 3430ES1.\n"); + return -EINVAL; + } + + return omap3_noncore_dpll_set_rate(hw, rate, parent_rate); +} + +/** + * omap3_dpll4_set_rate_and_parent - set rate and parent for omap3 per-dpll + * @hw: clock to change + * @rate: target rate for clock + * @parent_rate: rate of the parent clock + * @index: parent index, 0 - reference clock, 1 - bypass clock + * + * Check if the current SoC support the per-dpll reprogram operation + * or not, and then do the rate + parent change if supported. Returns + * -EINVAL if not supported, 0 for success, and potential error codes + * from the clock rate change. + */ +int omap3_dpll4_set_rate_and_parent(struct clk_hw *hw, unsigned long rate, + unsigned long parent_rate, u8 index) +{ + if (ti_clk_get_features()->flags & TI_CLK_DPLL4_DENY_REPROGRAM) { + pr_err("clock: DPLL4 cannot change rate due to silicon 'Limitation 2.5' on 3430ES1.\n"); + return -EINVAL; + } + + return omap3_noncore_dpll_set_rate_and_parent(hw, rate, parent_rate, + index); +} diff --git a/include/linux/clk/ti.h b/include/linux/clk/ti.h index 25eea896627a..f8e50271ec97 100644 --- a/include/linux/clk/ti.h +++ b/include/linux/clk/ti.h @@ -271,41 +271,13 @@ extern const struct clk_ops ti_clk_mux_ops; #define to_clk_hw_omap(_hw) container_of(_hw, struct clk_hw_omap, hw) -int omap3_noncore_dpll_enable(struct clk_hw *hw); -void omap3_noncore_dpll_disable(struct clk_hw *hw); -int omap3_noncore_dpll_set_parent(struct clk_hw *hw, u8 index); -int omap3_noncore_dpll_set_rate(struct clk_hw *hw, unsigned long rate, - unsigned long parent_rate); -int omap3_noncore_dpll_set_rate_and_parent(struct clk_hw *hw, - unsigned long rate, - unsigned long parent_rate, - u8 index); -long omap3_noncore_dpll_determine_rate(struct clk_hw *hw, - unsigned long rate, - unsigned long min_rate, - unsigned long max_rate, - unsigned long *best_parent_rate, - struct clk_hw **best_parent_clk); -unsigned long omap3_dpll_recalc(struct clk_hw *hw, unsigned long parent_rate); -long omap2_dpll_round_rate(struct clk_hw *hw, unsigned long target_rate, - unsigned long *parent_rate); void omap2_init_clk_clkdm(struct clk_hw *clk); -unsigned long omap3_clkoutx2_recalc(struct clk_hw *hw, - unsigned long parent_rate); -int omap3_clkoutx2_set_rate(struct clk_hw *hw, unsigned long rate, - unsigned long parent_rate); -long omap3_clkoutx2_round_rate(struct clk_hw *hw, unsigned long rate, - unsigned long *prate); int omap2_clkops_enable_clkdm(struct clk_hw *hw); void omap2_clkops_disable_clkdm(struct clk_hw *hw); int omap2_clk_disable_autoidle_all(void); int omap2_clk_enable_autoidle_all(void); int omap2_clk_allow_idle(struct clk *clk); int omap2_clk_deny_idle(struct clk *clk); -int omap3_dpll4_set_rate(struct clk_hw *clk, unsigned long rate, - unsigned long parent_rate); -int omap3_dpll4_set_rate_and_parent(struct clk_hw *hw, unsigned long rate, - unsigned long parent_rate, u8 index); int omap2_dflt_clk_enable(struct clk_hw *hw); void omap2_dflt_clk_disable(struct clk_hw *hw); int omap2_dflt_clk_is_enabled(struct clk_hw *hw); @@ -317,7 +289,6 @@ void omap2_clk_dflt_find_companion(struct clk_hw_omap *clk, void omap2_clk_dflt_find_idlest(struct clk_hw_omap *clk, void __iomem **idlest_reg, u8 *idlest_bit, u8 *idlest_val); -void omap3_clk_lock_dpll5(void); unsigned long omap2_dpllcore_recalc(struct clk_hw *hw, unsigned long parent_rate); int omap2_reprogram_dpllcore(struct clk_hw *clk, unsigned long rate, @@ -365,7 +336,6 @@ const struct ti_clk_features *ti_clk_get_features(void); extern const struct clk_hw_omap_ops clkhwops_omap2xxx_dpll; extern const struct clk_hw_omap_ops clkhwops_omap2430_i2chs_wait; -extern const struct clk_hw_omap_ops clkhwops_omap3_dpll; extern const struct clk_hw_omap_ops clkhwops_wait; extern const struct clk_hw_omap_ops clkhwops_omap3430es2_dss_usbhost_wait; extern const struct clk_hw_omap_ops clkhwops_am35xx_ipss_module_wait; -- cgit v1.2.3-70-g09d2 From 046b7c31668311942a2e431e7983d8ab9874d845 Mon Sep 17 00:00:00 2001 From: Tero Kristo Date: Tue, 3 Mar 2015 15:13:50 +0200 Subject: ARM: OMAP2+: clock: remove clkdm_control static boolean from code clkdm_control is used to determine, whether clocks should trigger a clockdomain transition when they are enabled/disabled. Keep this functionality intact, but replace this with a clk_features flag which can be initialized during boot if needed. Signed-off-by: Tero Kristo --- arch/arm/mach-omap2/clock.c | 34 ++++++++++------------------------ arch/arm/mach-omap2/clock.h | 2 -- include/linux/clk/ti.h | 1 + 3 files changed, 11 insertions(+), 26 deletions(-) (limited to 'include/linux') diff --git a/arch/arm/mach-omap2/clock.c b/arch/arm/mach-omap2/clock.c index 7a5713df54b3..6c17adf40e6f 100644 --- a/arch/arm/mach-omap2/clock.c +++ b/arch/arm/mach-omap2/clock.c @@ -61,14 +61,6 @@ u16 cpu_mask; #define OMAP3PLUS_DPLL_FINT_MIN 32000 #define OMAP3PLUS_DPLL_FINT_MAX 52000000 -/* - * clkdm_control: if true, then when a clock is enabled in the - * hardware, its clockdomain will first be enabled; and when a clock - * is disabled in the hardware, its clockdomain will be disabled - * afterwards. - */ -static bool clkdm_control = true; - struct clk_iomap { struct regmap *regmap; void __iomem *mem; @@ -287,19 +279,6 @@ void omap2_init_clk_clkdm(struct clk_hw *hw) } } -/** - * omap2_clk_disable_clkdm_control - disable clkdm control on clk enable/disable - * - * Prevent the OMAP clock code from calling into the clockdomain code - * when a hardware clock in that clockdomain is enabled or disabled. - * Intended to be called at init time from omap*_clk_init(). No - * return value. - */ -void __init omap2_clk_disable_clkdm_control(void) -{ - clkdm_control = false; -} - /** * omap2_clk_dflt_find_companion - find companion clock to @clk * @clk: struct clk * to find the companion clock of @@ -384,6 +363,12 @@ int omap2_dflt_clk_enable(struct clk_hw *hw) struct clk_hw_omap *clk; u32 v; int ret = 0; + bool clkdm_control; + + if (ti_clk_get_features()->flags & TI_CLK_DISABLE_CLKDM_CONTROL) + clkdm_control = false; + else + clkdm_control = true; clk = to_clk_hw_omap(hw); @@ -457,7 +442,8 @@ void omap2_dflt_clk_disable(struct clk_hw *hw) omap2_clk_writel(v, clk, clk->enable_reg); /* No OCP barrier needed here since it is a disable operation */ - if (clkdm_control && clk->clkdm) + if (!(ti_clk_get_features()->flags & TI_CLK_DISABLE_CLKDM_CONTROL) && + clk->clkdm) clkdm_clk_disable(clk->clkdm, hw->clk); } @@ -490,7 +476,7 @@ int omap2_clkops_enable_clkdm(struct clk_hw *hw) pr_err("%s: %s: should use dflt_clk_enable ?!\n", __func__, __clk_get_name(hw->clk)); - if (!clkdm_control) { + if (ti_clk_get_features()->flags & TI_CLK_DISABLE_CLKDM_CONTROL) { pr_err("%s: %s: clkfw-based clockdomain control disabled ?!\n", __func__, __clk_get_name(hw->clk)); return 0; @@ -528,7 +514,7 @@ void omap2_clkops_disable_clkdm(struct clk_hw *hw) pr_err("%s: %s: should use dflt_clk_disable ?!\n", __func__, __clk_get_name(hw->clk)); - if (!clkdm_control) { + if (ti_clk_get_features()->flags & TI_CLK_DISABLE_CLKDM_CONTROL) { pr_err("%s: %s: clkfw-based clockdomain control disabled ?!\n", __func__, __clk_get_name(hw->clk)); return; diff --git a/arch/arm/mach-omap2/clock.h b/arch/arm/mach-omap2/clock.h index d60691d5626a..948065497472 100644 --- a/arch/arm/mach-omap2/clock.h +++ b/arch/arm/mach-omap2/clock.h @@ -180,8 +180,6 @@ struct clksel { #define OMAP4XXX_EN_DPLL_FRBYPASS 0x6 #define OMAP4XXX_EN_DPLL_LOCKED 0x7 -void __init omap2_clk_disable_clkdm_control(void); - void omap2_clk_print_new_rates(const char *hfclkin_ck_name, const char *core_ck_name, const char *mpu_ck_name); diff --git a/include/linux/clk/ti.h b/include/linux/clk/ti.h index f8e50271ec97..fbb65e401d13 100644 --- a/include/linux/clk/ti.h +++ b/include/linux/clk/ti.h @@ -330,6 +330,7 @@ struct ti_clk_features { #define TI_CLK_DPLL_HAS_FREQSEL BIT(0) #define TI_CLK_DPLL4_DENY_REPROGRAM BIT(1) +#define TI_CLK_DISABLE_CLKDM_CONTROL BIT(2) void ti_clk_setup_features(struct ti_clk_features *features); const struct ti_clk_features *ti_clk_get_features(void); -- cgit v1.2.3-70-g09d2 From 9f37e90efaf0772b8f98bc347b9db77a3f0c27eb Mon Sep 17 00:00:00 2001 From: Tero Kristo Date: Tue, 3 Mar 2015 15:28:53 +0200 Subject: clk: ti: dflt: move support for default gate clock to clock driver With the legacy support gone, OMAP2+ default gate clock can be moved under clock driver. Create a new file for the purpose, and clean-up the header exports a bit as some clock APIs are no longer needed outside clock driver itself. Signed-off-by: Tero Kristo --- arch/arm/mach-omap2/clock.c | 273 -------------------------------------- drivers/clk/ti/Makefile | 2 +- drivers/clk/ti/clkt_dflt.c | 316 ++++++++++++++++++++++++++++++++++++++++++++ drivers/clk/ti/clock.h | 5 + include/linux/clk/ti.h | 4 - 5 files changed, 322 insertions(+), 278 deletions(-) create mode 100644 drivers/clk/ti/clkt_dflt.c (limited to 'include/linux') diff --git a/arch/arm/mach-omap2/clock.c b/arch/arm/mach-omap2/clock.c index 6c17adf40e6f..38a336b4c42b 100644 --- a/arch/arm/mach-omap2/clock.c +++ b/arch/arm/mach-omap2/clock.c @@ -40,12 +40,6 @@ #include "cm-regbits-34xx.h" #include "common.h" -/* - * MAX_MODULE_ENABLE_WAIT: maximum of number of microseconds to wait - * for a module to indicate that it is no longer in idle - */ -#define MAX_MODULE_ENABLE_WAIT 100000 - u16 cpu_mask; /* DPLL valid Fint frequency band limits - from 34xx TRM Section 4.7.6.2 */ @@ -176,77 +170,6 @@ void __init omap2_clk_legacy_provider_init(int index, void __iomem *mem) /* Private functions */ - -/** - * _wait_idlest_generic - wait for a module to leave the idle state - * @clk: module clock to wait for (needed for register offsets) - * @reg: virtual address of module IDLEST register - * @mask: value to mask against to determine if the module is active - * @idlest: idle state indicator (0 or 1) for the clock - * @name: name of the clock (for printk) - * - * Wait for a module to leave idle, where its idle-status register is - * not inside the CM module. Returns 1 if the module left idle - * promptly, or 0 if the module did not leave idle before the timeout - * elapsed. XXX Deprecated - should be moved into drivers for the - * individual IP block that the IDLEST register exists in. - */ -static int _wait_idlest_generic(struct clk_hw_omap *clk, void __iomem *reg, - u32 mask, u8 idlest, const char *name) -{ - int i = 0, ena = 0; - - ena = (idlest) ? 0 : mask; - - omap_test_timeout(((omap2_clk_readl(clk, reg) & mask) == ena), - MAX_MODULE_ENABLE_WAIT, i); - - if (i < MAX_MODULE_ENABLE_WAIT) - pr_debug("omap clock: module associated with clock %s ready after %d loops\n", - name, i); - else - pr_err("omap clock: module associated with clock %s didn't enable in %d tries\n", - name, MAX_MODULE_ENABLE_WAIT); - - return (i < MAX_MODULE_ENABLE_WAIT) ? 1 : 0; -}; - -/** - * _omap2_module_wait_ready - wait for an OMAP module to leave IDLE - * @clk: struct clk * belonging to the module - * - * If the necessary clocks for the OMAP hardware IP block that - * corresponds to clock @clk are enabled, then wait for the module to - * indicate readiness (i.e., to leave IDLE). This code does not - * belong in the clock code and will be moved in the medium term to - * module-dependent code. No return value. - */ -static void _omap2_module_wait_ready(struct clk_hw_omap *clk) -{ - void __iomem *companion_reg, *idlest_reg; - u8 other_bit, idlest_bit, idlest_val, idlest_reg_id; - s16 prcm_mod; - int r; - - /* Not all modules have multiple clocks that their IDLEST depends on */ - if (clk->ops->find_companion) { - clk->ops->find_companion(clk, &companion_reg, &other_bit); - if (!(omap2_clk_readl(clk, companion_reg) & (1 << other_bit))) - return; - } - - clk->ops->find_idlest(clk, &idlest_reg, &idlest_bit, &idlest_val); - r = cm_split_idlest_reg(idlest_reg, &prcm_mod, &idlest_reg_id); - if (r) { - /* IDLEST register not in the CM module */ - _wait_idlest_generic(clk, idlest_reg, (1 << idlest_bit), - idlest_val, __clk_get_name(clk->hw.clk)); - } else { - omap_cm_wait_module_ready(0, prcm_mod, idlest_reg_id, - idlest_bit); - }; -} - /* Public functions */ /** @@ -279,174 +202,6 @@ void omap2_init_clk_clkdm(struct clk_hw *hw) } } -/** - * omap2_clk_dflt_find_companion - find companion clock to @clk - * @clk: struct clk * to find the companion clock of - * @other_reg: void __iomem ** to return the companion clock CM_*CLKEN va in - * @other_bit: u8 ** to return the companion clock bit shift in - * - * Note: We don't need special code here for INVERT_ENABLE for the - * time being since INVERT_ENABLE only applies to clocks enabled by - * CM_CLKEN_PLL - * - * Convert CM_ICLKEN* <-> CM_FCLKEN*. This conversion assumes it's - * just a matter of XORing the bits. - * - * Some clocks don't have companion clocks. For example, modules with - * only an interface clock (such as MAILBOXES) don't have a companion - * clock. Right now, this code relies on the hardware exporting a bit - * in the correct companion register that indicates that the - * nonexistent 'companion clock' is active. Future patches will - * associate this type of code with per-module data structures to - * avoid this issue, and remove the casts. No return value. - */ -void omap2_clk_dflt_find_companion(struct clk_hw_omap *clk, - void __iomem **other_reg, u8 *other_bit) -{ - u32 r; - - /* - * Convert CM_ICLKEN* <-> CM_FCLKEN*. This conversion assumes - * it's just a matter of XORing the bits. - */ - r = ((__force u32)clk->enable_reg ^ (CM_FCLKEN ^ CM_ICLKEN)); - - *other_reg = (__force void __iomem *)r; - *other_bit = clk->enable_bit; -} - -/** - * omap2_clk_dflt_find_idlest - find CM_IDLEST reg va, bit shift for @clk - * @clk: struct clk * to find IDLEST info for - * @idlest_reg: void __iomem ** to return the CM_IDLEST va in - * @idlest_bit: u8 * to return the CM_IDLEST bit shift in - * @idlest_val: u8 * to return the idle status indicator - * - * Return the CM_IDLEST register address and bit shift corresponding - * to the module that "owns" this clock. This default code assumes - * that the CM_IDLEST bit shift is the CM_*CLKEN bit shift, and that - * the IDLEST register address ID corresponds to the CM_*CLKEN - * register address ID (e.g., that CM_FCLKEN2 corresponds to - * CM_IDLEST2). This is not true for all modules. No return value. - */ -void omap2_clk_dflt_find_idlest(struct clk_hw_omap *clk, - void __iomem **idlest_reg, u8 *idlest_bit, u8 *idlest_val) -{ - u32 r; - - r = (((__force u32)clk->enable_reg & ~0xf0) | 0x20); - *idlest_reg = (__force void __iomem *)r; - *idlest_bit = clk->enable_bit; - - /* - * 24xx uses 0 to indicate not ready, and 1 to indicate ready. - * 34xx reverses this, just to keep us on our toes - * AM35xx uses both, depending on the module. - */ - *idlest_val = ti_clk_get_features()->cm_idlest_val; -} - -/** - * omap2_dflt_clk_enable - enable a clock in the hardware - * @hw: struct clk_hw * of the clock to enable - * - * Enable the clock @hw in the hardware. We first call into the OMAP - * clockdomain code to "enable" the corresponding clockdomain if this - * is the first enabled user of the clockdomain. Then program the - * hardware to enable the clock. Then wait for the IP block that uses - * this clock to leave idle (if applicable). Returns the error value - * from clkdm_clk_enable() if it terminated with an error, or -EINVAL - * if @hw has a null clock enable_reg, or zero upon success. - */ -int omap2_dflt_clk_enable(struct clk_hw *hw) -{ - struct clk_hw_omap *clk; - u32 v; - int ret = 0; - bool clkdm_control; - - if (ti_clk_get_features()->flags & TI_CLK_DISABLE_CLKDM_CONTROL) - clkdm_control = false; - else - clkdm_control = true; - - clk = to_clk_hw_omap(hw); - - if (clkdm_control && clk->clkdm) { - ret = clkdm_clk_enable(clk->clkdm, hw->clk); - if (ret) { - WARN(1, "%s: could not enable %s's clockdomain %s: %d\n", - __func__, __clk_get_name(hw->clk), - clk->clkdm->name, ret); - return ret; - } - } - - if (unlikely(clk->enable_reg == NULL)) { - pr_err("%s: %s missing enable_reg\n", __func__, - __clk_get_name(hw->clk)); - ret = -EINVAL; - goto err; - } - - /* FIXME should not have INVERT_ENABLE bit here */ - v = omap2_clk_readl(clk, clk->enable_reg); - if (clk->flags & INVERT_ENABLE) - v &= ~(1 << clk->enable_bit); - else - v |= (1 << clk->enable_bit); - omap2_clk_writel(v, clk, clk->enable_reg); - v = omap2_clk_readl(clk, clk->enable_reg); /* OCP barrier */ - - if (clk->ops && clk->ops->find_idlest) - _omap2_module_wait_ready(clk); - - return 0; - -err: - if (clkdm_control && clk->clkdm) - clkdm_clk_disable(clk->clkdm, hw->clk); - return ret; -} - -/** - * omap2_dflt_clk_disable - disable a clock in the hardware - * @hw: struct clk_hw * of the clock to disable - * - * Disable the clock @hw in the hardware, and call into the OMAP - * clockdomain code to "disable" the corresponding clockdomain if all - * clocks/hwmods in that clockdomain are now disabled. No return - * value. - */ -void omap2_dflt_clk_disable(struct clk_hw *hw) -{ - struct clk_hw_omap *clk; - u32 v; - - clk = to_clk_hw_omap(hw); - if (!clk->enable_reg) { - /* - * 'independent' here refers to a clock which is not - * controlled by its parent. - */ - pr_err("%s: independent clock %s has no enable_reg\n", - __func__, __clk_get_name(hw->clk)); - return; - } - - v = omap2_clk_readl(clk, clk->enable_reg); - if (clk->flags & INVERT_ENABLE) - v |= (1 << clk->enable_bit); - else - v &= ~(1 << clk->enable_bit); - omap2_clk_writel(v, clk, clk->enable_reg); - /* No OCP barrier needed here since it is a disable operation */ - - if (!(ti_clk_get_features()->flags & TI_CLK_DISABLE_CLKDM_CONTROL) && - clk->clkdm) - clkdm_clk_disable(clk->clkdm, hw->clk); -} - /** * omap2_clkops_enable_clkdm - increment usecount on clkdm of @hw * @hw: struct clk_hw * of the clock being enabled @@ -523,29 +278,6 @@ void omap2_clkops_disable_clkdm(struct clk_hw *hw) clkdm_clk_disable(clk->clkdm, hw->clk); } -/** - * omap2_dflt_clk_is_enabled - is clock enabled in the hardware? - * @hw: struct clk_hw * to check - * - * Return 1 if the clock represented by @hw is enabled in the - * hardware, or 0 otherwise. Intended for use in the struct - * clk_ops.is_enabled function pointer. - */ -int omap2_dflt_clk_is_enabled(struct clk_hw *hw) -{ - struct clk_hw_omap *clk = to_clk_hw_omap(hw); - u32 v; - - v = omap2_clk_readl(clk, clk->enable_reg); - - if (clk->flags & INVERT_ENABLE) - v ^= BIT(clk->enable_bit); - - v &= BIT(clk->enable_bit); - - return v ? 1 : 0; -} - static int __initdata mpurate; /* @@ -566,11 +298,6 @@ static int __init omap_clk_setup(char *str) } __setup("mpurate=", omap_clk_setup); -const struct clk_hw_omap_ops clkhwops_wait = { - .find_idlest = omap2_clk_dflt_find_idlest, - .find_companion = omap2_clk_dflt_find_companion, -}; - /** * omap2_clk_print_new_rates - print summary of current clock tree rates * @hfclkin_ck_name: clk name for the off-chip HF oscillator diff --git a/drivers/clk/ti/Makefile b/drivers/clk/ti/Makefile index 05a0294aba10..9b93e6904359 100644 --- a/drivers/clk/ti/Makefile +++ b/drivers/clk/ti/Makefile @@ -1,7 +1,7 @@ obj-y += clk.o autoidle.o clockdomain.o clk-common = dpll.o composite.o divider.o gate.o \ fixed-factor.o mux.o apll.o \ - clkt_dpll.o clkt_iclk.o + clkt_dpll.o clkt_iclk.o clkt_dflt.o obj-$(CONFIG_SOC_AM33XX) += $(clk-common) clk-33xx.o dpll3xxx.o obj-$(CONFIG_SOC_TI81XX) += $(clk-common) fapll.o clk-816x.o obj-$(CONFIG_ARCH_OMAP2) += $(clk-common) interface.o clk-2xxx.o diff --git a/drivers/clk/ti/clkt_dflt.c b/drivers/clk/ti/clkt_dflt.c new file mode 100644 index 000000000000..a176b8ac8dd0 --- /dev/null +++ b/drivers/clk/ti/clkt_dflt.c @@ -0,0 +1,316 @@ +/* + * Default clock type + * + * Copyright (C) 2005-2008, 2015 Texas Instruments, Inc. + * Copyright (C) 2004-2010 Nokia Corporation + * + * Contacts: + * Richard Woodruff + * Paul Walmsley + * Tero Kristo + * + * This program is free software; you can redistribute it and/or modify + * it under the terms of the GNU General Public License version 2 as + * published by the Free Software Foundation. + * + * This program is distributed "as is" WITHOUT ANY WARRANTY of any + * kind, whether express or implied; without even the implied warranty + * of MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the + * GNU General Public License for more details. + */ + +#include +#include +#include +#include +#include +#include + +#include "clock.h" + +/* + * MAX_MODULE_ENABLE_WAIT: maximum of number of microseconds to wait + * for a module to indicate that it is no longer in idle + */ +#define MAX_MODULE_ENABLE_WAIT 100000 + +/* + * CM module register offsets, used for calculating the companion + * register addresses. + */ +#define CM_FCLKEN 0x0000 +#define CM_ICLKEN 0x0010 + +/** + * _wait_idlest_generic - wait for a module to leave the idle state + * @clk: module clock to wait for (needed for register offsets) + * @reg: virtual address of module IDLEST register + * @mask: value to mask against to determine if the module is active + * @idlest: idle state indicator (0 or 1) for the clock + * @name: name of the clock (for printk) + * + * Wait for a module to leave idle, where its idle-status register is + * not inside the CM module. Returns 1 if the module left idle + * promptly, or 0 if the module did not leave idle before the timeout + * elapsed. XXX Deprecated - should be moved into drivers for the + * individual IP block that the IDLEST register exists in. + */ +static int _wait_idlest_generic(struct clk_hw_omap *clk, void __iomem *reg, + u32 mask, u8 idlest, const char *name) +{ + int i = 0, ena = 0; + + ena = (idlest) ? 0 : mask; + + /* Wait until module enters enabled state */ + for (i = 0; i < MAX_MODULE_ENABLE_WAIT; i++) { + if ((ti_clk_ll_ops->clk_readl(reg) & mask) == ena) + break; + udelay(1); + } + + if (i < MAX_MODULE_ENABLE_WAIT) + pr_debug("omap clock: module associated with clock %s ready after %d loops\n", + name, i); + else + pr_err("omap clock: module associated with clock %s didn't enable in %d tries\n", + name, MAX_MODULE_ENABLE_WAIT); + + return (i < MAX_MODULE_ENABLE_WAIT) ? 1 : 0; +} + +/** + * _omap2_module_wait_ready - wait for an OMAP module to leave IDLE + * @clk: struct clk * belonging to the module + * + * If the necessary clocks for the OMAP hardware IP block that + * corresponds to clock @clk are enabled, then wait for the module to + * indicate readiness (i.e., to leave IDLE). This code does not + * belong in the clock code and will be moved in the medium term to + * module-dependent code. No return value. + */ +static void _omap2_module_wait_ready(struct clk_hw_omap *clk) +{ + void __iomem *companion_reg, *idlest_reg; + u8 other_bit, idlest_bit, idlest_val, idlest_reg_id; + s16 prcm_mod; + int r; + + /* Not all modules have multiple clocks that their IDLEST depends on */ + if (clk->ops->find_companion) { + clk->ops->find_companion(clk, &companion_reg, &other_bit); + if (!(ti_clk_ll_ops->clk_readl(companion_reg) & + (1 << other_bit))) + return; + } + + clk->ops->find_idlest(clk, &idlest_reg, &idlest_bit, &idlest_val); + r = ti_clk_ll_ops->cm_split_idlest_reg(idlest_reg, &prcm_mod, + &idlest_reg_id); + if (r) { + /* IDLEST register not in the CM module */ + _wait_idlest_generic(clk, idlest_reg, (1 << idlest_bit), + idlest_val, __clk_get_name(clk->hw.clk)); + } else { + ti_clk_ll_ops->cm_wait_module_ready(0, prcm_mod, idlest_reg_id, + idlest_bit); + } +} + +/** + * omap2_clk_dflt_find_companion - find companion clock to @clk + * @clk: struct clk * to find the companion clock of + * @other_reg: void __iomem ** to return the companion clock CM_*CLKEN va in + * @other_bit: u8 ** to return the companion clock bit shift in + * + * Note: We don't need special code here for INVERT_ENABLE for the + * time being since INVERT_ENABLE only applies to clocks enabled by + * CM_CLKEN_PLL + * + * Convert CM_ICLKEN* <-> CM_FCLKEN*. This conversion assumes it's + * just a matter of XORing the bits. + * + * Some clocks don't have companion clocks. For example, modules with + * only an interface clock (such as MAILBOXES) don't have a companion + * clock. Right now, this code relies on the hardware exporting a bit + * in the correct companion register that indicates that the + * nonexistent 'companion clock' is active. Future patches will + * associate this type of code with per-module data structures to + * avoid this issue, and remove the casts. No return value. + */ +void omap2_clk_dflt_find_companion(struct clk_hw_omap *clk, + void __iomem **other_reg, u8 *other_bit) +{ + u32 r; + + /* + * Convert CM_ICLKEN* <-> CM_FCLKEN*. This conversion assumes + * it's just a matter of XORing the bits. + */ + r = ((__force u32)clk->enable_reg ^ (CM_FCLKEN ^ CM_ICLKEN)); + + *other_reg = (__force void __iomem *)r; + *other_bit = clk->enable_bit; +} + +/** + * omap2_clk_dflt_find_idlest - find CM_IDLEST reg va, bit shift for @clk + * @clk: struct clk * to find IDLEST info for + * @idlest_reg: void __iomem ** to return the CM_IDLEST va in + * @idlest_bit: u8 * to return the CM_IDLEST bit shift in + * @idlest_val: u8 * to return the idle status indicator + * + * Return the CM_IDLEST register address and bit shift corresponding + * to the module that "owns" this clock. This default code assumes + * that the CM_IDLEST bit shift is the CM_*CLKEN bit shift, and that + * the IDLEST register address ID corresponds to the CM_*CLKEN + * register address ID (e.g., that CM_FCLKEN2 corresponds to + * CM_IDLEST2). This is not true for all modules. No return value. + */ +void omap2_clk_dflt_find_idlest(struct clk_hw_omap *clk, + void __iomem **idlest_reg, u8 *idlest_bit, + u8 *idlest_val) +{ + u32 r; + + r = (((__force u32)clk->enable_reg & ~0xf0) | 0x20); + *idlest_reg = (__force void __iomem *)r; + *idlest_bit = clk->enable_bit; + + /* + * 24xx uses 0 to indicate not ready, and 1 to indicate ready. + * 34xx reverses this, just to keep us on our toes + * AM35xx uses both, depending on the module. + */ + *idlest_val = ti_clk_get_features()->cm_idlest_val; +} + +/** + * omap2_dflt_clk_enable - enable a clock in the hardware + * @hw: struct clk_hw * of the clock to enable + * + * Enable the clock @hw in the hardware. We first call into the OMAP + * clockdomain code to "enable" the corresponding clockdomain if this + * is the first enabled user of the clockdomain. Then program the + * hardware to enable the clock. Then wait for the IP block that uses + * this clock to leave idle (if applicable). Returns the error value + * from clkdm_clk_enable() if it terminated with an error, or -EINVAL + * if @hw has a null clock enable_reg, or zero upon success. + */ +int omap2_dflt_clk_enable(struct clk_hw *hw) +{ + struct clk_hw_omap *clk; + u32 v; + int ret = 0; + bool clkdm_control; + + if (ti_clk_get_features()->flags & TI_CLK_DISABLE_CLKDM_CONTROL) + clkdm_control = false; + else + clkdm_control = true; + + clk = to_clk_hw_omap(hw); + + if (clkdm_control && clk->clkdm) { + ret = ti_clk_ll_ops->clkdm_clk_enable(clk->clkdm, hw->clk); + if (ret) { + WARN(1, + "%s: could not enable %s's clockdomain %s: %d\n", + __func__, __clk_get_name(hw->clk), + clk->clkdm_name, ret); + return ret; + } + } + + if (unlikely(!clk->enable_reg)) { + pr_err("%s: %s missing enable_reg\n", __func__, + __clk_get_name(hw->clk)); + ret = -EINVAL; + goto err; + } + + /* FIXME should not have INVERT_ENABLE bit here */ + v = ti_clk_ll_ops->clk_readl(clk->enable_reg); + if (clk->flags & INVERT_ENABLE) + v &= ~(1 << clk->enable_bit); + else + v |= (1 << clk->enable_bit); + ti_clk_ll_ops->clk_writel(v, clk->enable_reg); + v = ti_clk_ll_ops->clk_readl(clk->enable_reg); /* OCP barrier */ + + if (clk->ops && clk->ops->find_idlest) + _omap2_module_wait_ready(clk); + + return 0; + +err: + if (clkdm_control && clk->clkdm) + ti_clk_ll_ops->clkdm_clk_disable(clk->clkdm, hw->clk); + return ret; +} + +/** + * omap2_dflt_clk_disable - disable a clock in the hardware + * @hw: struct clk_hw * of the clock to disable + * + * Disable the clock @hw in the hardware, and call into the OMAP + * clockdomain code to "disable" the corresponding clockdomain if all + * clocks/hwmods in that clockdomain are now disabled. No return + * value. + */ +void omap2_dflt_clk_disable(struct clk_hw *hw) +{ + struct clk_hw_omap *clk; + u32 v; + + clk = to_clk_hw_omap(hw); + if (!clk->enable_reg) { + /* + * 'independent' here refers to a clock which is not + * controlled by its parent. + */ + pr_err("%s: independent clock %s has no enable_reg\n", + __func__, __clk_get_name(hw->clk)); + return; + } + + v = ti_clk_ll_ops->clk_readl(clk->enable_reg); + if (clk->flags & INVERT_ENABLE) + v |= (1 << clk->enable_bit); + else + v &= ~(1 << clk->enable_bit); + ti_clk_ll_ops->clk_writel(v, clk->enable_reg); + /* No OCP barrier needed here since it is a disable operation */ + + if (!(ti_clk_get_features()->flags & TI_CLK_DISABLE_CLKDM_CONTROL) && + clk->clkdm) + ti_clk_ll_ops->clkdm_clk_disable(clk->clkdm, hw->clk); +} + +/** + * omap2_dflt_clk_is_enabled - is clock enabled in the hardware? + * @hw: struct clk_hw * to check + * + * Return 1 if the clock represented by @hw is enabled in the + * hardware, or 0 otherwise. Intended for use in the struct + * clk_ops.is_enabled function pointer. + */ +int omap2_dflt_clk_is_enabled(struct clk_hw *hw) +{ + struct clk_hw_omap *clk = to_clk_hw_omap(hw); + u32 v; + + v = ti_clk_ll_ops->clk_readl(clk->enable_reg); + + if (clk->flags & INVERT_ENABLE) + v ^= BIT(clk->enable_bit); + + v &= BIT(clk->enable_bit); + + return v ? 1 : 0; +} + +const struct clk_hw_omap_ops clkhwops_wait = { + .find_idlest = omap2_clk_dflt_find_idlest, + .find_companion = omap2_clk_dflt_find_companion, +}; diff --git a/drivers/clk/ti/clock.h b/drivers/clk/ti/clock.h index 688d9e47b2c8..f21538364588 100644 --- a/drivers/clk/ti/clock.h +++ b/drivers/clk/ti/clock.h @@ -175,9 +175,14 @@ void omap2_clk_enable_init_clocks(const char **clk_names, u8 num_clocks); extern const struct clk_hw_omap_ops clkhwops_omap3_dpll; extern const struct clk_hw_omap_ops clkhwops_omap4_dpllmx; +extern const struct clk_hw_omap_ops clkhwops_wait; extern const struct clk_hw_omap_ops clkhwops_iclk; extern const struct clk_hw_omap_ops clkhwops_iclk_wait; +int omap2_dflt_clk_enable(struct clk_hw *hw); +void omap2_dflt_clk_disable(struct clk_hw *hw); +int omap2_dflt_clk_is_enabled(struct clk_hw *hw); + u8 omap2_init_dpll_parent(struct clk_hw *hw); int omap3_noncore_dpll_enable(struct clk_hw *hw); void omap3_noncore_dpll_disable(struct clk_hw *hw); diff --git a/include/linux/clk/ti.h b/include/linux/clk/ti.h index fbb65e401d13..81a913edffa7 100644 --- a/include/linux/clk/ti.h +++ b/include/linux/clk/ti.h @@ -278,9 +278,6 @@ int omap2_clk_disable_autoidle_all(void); int omap2_clk_enable_autoidle_all(void); int omap2_clk_allow_idle(struct clk *clk); int omap2_clk_deny_idle(struct clk *clk); -int omap2_dflt_clk_enable(struct clk_hw *hw); -void omap2_dflt_clk_disable(struct clk_hw *hw); -int omap2_dflt_clk_is_enabled(struct clk_hw *hw); void omap2_clkt_iclk_allow_idle(struct clk_hw_omap *clk); void omap2_clkt_iclk_deny_idle(struct clk_hw_omap *clk); void omap2_clk_dflt_find_companion(struct clk_hw_omap *clk, @@ -337,7 +334,6 @@ const struct ti_clk_features *ti_clk_get_features(void); extern const struct clk_hw_omap_ops clkhwops_omap2xxx_dpll; extern const struct clk_hw_omap_ops clkhwops_omap2430_i2chs_wait; -extern const struct clk_hw_omap_ops clkhwops_wait; extern const struct clk_hw_omap_ops clkhwops_omap3430es2_dss_usbhost_wait; extern const struct clk_hw_omap_ops clkhwops_am35xx_ipss_module_wait; extern const struct clk_hw_omap_ops clkhwops_am35xx_ipss_wait; -- cgit v1.2.3-70-g09d2 From d5a04dddf51e234dc89f21e4e4b91e853cf49ff2 Mon Sep 17 00:00:00 2001 From: Tero Kristo Date: Tue, 3 Mar 2015 16:08:42 +0200 Subject: clk: ti: omap2430: move clock support code under clock driver With the legacy clock support gone, this is no longer needed under platform code-base. Thus, move this under the TI clock driver, and remove the exported API from the public header. Signed-off-by: Tero Kristo --- arch/arm/mach-omap2/Makefile | 1 - arch/arm/mach-omap2/clock2430.c | 57 ----------------------------------------- drivers/clk/ti/clkt_iclk.c | 35 +++++++++++++++++++++++++ drivers/clk/ti/clock.h | 1 + include/linux/clk/ti.h | 1 - 5 files changed, 36 insertions(+), 59 deletions(-) delete mode 100644 arch/arm/mach-omap2/clock2430.c (limited to 'include/linux') diff --git a/arch/arm/mach-omap2/Makefile b/arch/arm/mach-omap2/Makefile index 070526563698..695d58f81ff3 100644 --- a/arch/arm/mach-omap2/Makefile +++ b/arch/arm/mach-omap2/Makefile @@ -185,7 +185,6 @@ obj-$(CONFIG_ARCH_OMAP2) += $(clock-common) obj-$(CONFIG_ARCH_OMAP2) += clkt2xxx_dpllcore.o obj-$(CONFIG_ARCH_OMAP2) += clkt2xxx_virt_prcm_set.o obj-$(CONFIG_ARCH_OMAP2) += clkt2xxx_dpll.o -obj-$(CONFIG_SOC_OMAP2430) += clock2430.o obj-$(CONFIG_ARCH_OMAP3) += $(clock-common) obj-$(CONFIG_ARCH_OMAP3) += clock34xx.o clkt34xx_dpll3m2.o obj-$(CONFIG_ARCH_OMAP3) += clock3517.o diff --git a/arch/arm/mach-omap2/clock2430.c b/arch/arm/mach-omap2/clock2430.c deleted file mode 100644 index cef0c8d1de52..000000000000 --- a/arch/arm/mach-omap2/clock2430.c +++ /dev/null @@ -1,57 +0,0 @@ -/* - * clock2430.c - OMAP2430-specific clock integration code - * - * Copyright (C) 2005-2008 Texas Instruments, Inc. - * Copyright (C) 2004-2010 Nokia Corporation - * - * Contacts: - * Richard Woodruff - * Paul Walmsley - * - * Based on earlier work by Tuukka Tikkanen, Tony Lindgren, - * Gordon McNutt and RidgeRun, Inc. - * - * This program is free software; you can redistribute it and/or modify - * it under the terms of the GNU General Public License version 2 as - * published by the Free Software Foundation. - */ -#undef DEBUG - -#include -#include -#include - -#include "soc.h" -#include "iomap.h" -#include "clock.h" -#include "clock2xxx.h" -#include "cm2xxx.h" -#include "cm-regbits-24xx.h" - -/** - * omap2430_clk_i2chs_find_idlest - return CM_IDLEST info for 2430 I2CHS - * @clk: struct clk * being enabled - * @idlest_reg: void __iomem ** to store CM_IDLEST reg address into - * @idlest_bit: pointer to a u8 to store the CM_IDLEST bit shift into - * @idlest_val: pointer to a u8 to store the CM_IDLEST indicator - * - * OMAP2430 I2CHS CM_IDLEST bits are in CM_IDLEST1_CORE, but the - * CM_*CLKEN bits are in CM_{I,F}CLKEN2_CORE. This custom function - * passes back the correct CM_IDLEST register address for I2CHS - * modules. No return value. - */ -static void omap2430_clk_i2chs_find_idlest(struct clk_hw_omap *clk, - void __iomem **idlest_reg, - u8 *idlest_bit, - u8 *idlest_val) -{ - *idlest_reg = OMAP2430_CM_REGADDR(CORE_MOD, CM_IDLEST); - *idlest_bit = clk->enable_bit; - *idlest_val = OMAP24XX_CM_IDLEST_VAL; -} - -/* 2430 I2CHS has non-standard IDLEST register */ -const struct clk_hw_omap_ops clkhwops_omap2430_i2chs_wait = { - .find_idlest = omap2430_clk_i2chs_find_idlest, - .find_companion = omap2_clk_dflt_find_companion, -}; diff --git a/drivers/clk/ti/clkt_iclk.c b/drivers/clk/ti/clkt_iclk.c index a03919df00ef..38c36908cf88 100644 --- a/drivers/clk/ti/clkt_iclk.c +++ b/drivers/clk/ti/clkt_iclk.c @@ -18,8 +18,12 @@ #include "clock.h" /* Register offsets */ +#define OMAP24XX_CM_FCLKEN2 0x04 #define CM_AUTOIDLE 0x30 #define CM_ICLKEN 0x10 +#define CM_IDLEST 0x20 + +#define OMAP24XX_CM_IDLEST_VAL 0 /* Private functions */ @@ -51,6 +55,31 @@ void omap2_clkt_iclk_deny_idle(struct clk_hw_omap *clk) ti_clk_ll_ops->clk_writel(v, r); } +/** + * omap2430_clk_i2chs_find_idlest - return CM_IDLEST info for 2430 I2CHS + * @clk: struct clk * being enabled + * @idlest_reg: void __iomem ** to store CM_IDLEST reg address into + * @idlest_bit: pointer to a u8 to store the CM_IDLEST bit shift into + * @idlest_val: pointer to a u8 to store the CM_IDLEST indicator + * + * OMAP2430 I2CHS CM_IDLEST bits are in CM_IDLEST1_CORE, but the + * CM_*CLKEN bits are in CM_{I,F}CLKEN2_CORE. This custom function + * passes back the correct CM_IDLEST register address for I2CHS + * modules. No return value. + */ +static void omap2430_clk_i2chs_find_idlest(struct clk_hw_omap *clk, + void __iomem **idlest_reg, + u8 *idlest_bit, + u8 *idlest_val) +{ + u32 r; + + r = ((__force u32)clk->enable_reg ^ (OMAP24XX_CM_FCLKEN2 ^ CM_IDLEST)); + *idlest_reg = (__force void __iomem *)r; + *idlest_bit = clk->enable_bit; + *idlest_val = OMAP24XX_CM_IDLEST_VAL; +} + /* Public data */ const struct clk_hw_omap_ops clkhwops_iclk = { @@ -64,3 +93,9 @@ const struct clk_hw_omap_ops clkhwops_iclk_wait = { .find_idlest = omap2_clk_dflt_find_idlest, .find_companion = omap2_clk_dflt_find_companion, }; + +/* 2430 I2CHS has non-standard IDLEST register */ +const struct clk_hw_omap_ops clkhwops_omap2430_i2chs_wait = { + .find_idlest = omap2430_clk_i2chs_find_idlest, + .find_companion = omap2_clk_dflt_find_companion, +}; diff --git a/drivers/clk/ti/clock.h b/drivers/clk/ti/clock.h index f21538364588..3652c267cf81 100644 --- a/drivers/clk/ti/clock.h +++ b/drivers/clk/ti/clock.h @@ -178,6 +178,7 @@ extern const struct clk_hw_omap_ops clkhwops_omap4_dpllmx; extern const struct clk_hw_omap_ops clkhwops_wait; extern const struct clk_hw_omap_ops clkhwops_iclk; extern const struct clk_hw_omap_ops clkhwops_iclk_wait; +extern const struct clk_hw_omap_ops clkhwops_omap2430_i2chs_wait; int omap2_dflt_clk_enable(struct clk_hw *hw); void omap2_dflt_clk_disable(struct clk_hw *hw); diff --git a/include/linux/clk/ti.h b/include/linux/clk/ti.h index 81a913edffa7..440ace33ea35 100644 --- a/include/linux/clk/ti.h +++ b/include/linux/clk/ti.h @@ -333,7 +333,6 @@ void ti_clk_setup_features(struct ti_clk_features *features); const struct ti_clk_features *ti_clk_get_features(void); extern const struct clk_hw_omap_ops clkhwops_omap2xxx_dpll; -extern const struct clk_hw_omap_ops clkhwops_omap2430_i2chs_wait; extern const struct clk_hw_omap_ops clkhwops_omap3430es2_dss_usbhost_wait; extern const struct clk_hw_omap_ops clkhwops_am35xx_ipss_module_wait; extern const struct clk_hw_omap_ops clkhwops_am35xx_ipss_wait; -- cgit v1.2.3-70-g09d2 From bd86cfdcbd827216fd682d62ffba2667bbe6fbc3 Mon Sep 17 00:00:00 2001 From: Tero Kristo Date: Tue, 3 Mar 2015 16:22:50 +0200 Subject: clk: ti: clkdm: move clkdm gate clock support code to clock driver With the legacy clock data gone, this is no longer needed under platform, so move it under the clock driver itself. Remove the exported clock driver APIs as well, as these are not needed outside clock driver anymore. Signed-off-by: Tero Kristo --- arch/arm/mach-omap2/clock.c | 76 -------------------------------------------- arch/arm/mach-omap2/clock.h | 3 -- drivers/clk/ti/clock.h | 3 ++ drivers/clk/ti/clockdomain.c | 76 ++++++++++++++++++++++++++++++++++++++++++++ include/linux/clk/ti.h | 2 -- 5 files changed, 79 insertions(+), 81 deletions(-) (limited to 'include/linux') diff --git a/arch/arm/mach-omap2/clock.c b/arch/arm/mach-omap2/clock.c index 38a336b4c42b..99875dba803a 100644 --- a/arch/arm/mach-omap2/clock.c +++ b/arch/arm/mach-omap2/clock.c @@ -202,82 +202,6 @@ void omap2_init_clk_clkdm(struct clk_hw *hw) } } -/** - * omap2_clkops_enable_clkdm - increment usecount on clkdm of @hw - * @hw: struct clk_hw * of the clock being enabled - * - * Increment the usecount of the clockdomain of the clock pointed to - * by @hw; if the usecount is 1, the clockdomain will be "enabled." - * Only needed for clocks that don't use omap2_dflt_clk_enable() as - * their enable function pointer. Passes along the return value of - * clkdm_clk_enable(), -EINVAL if @hw is not associated with a - * clockdomain, or 0 if clock framework-based clockdomain control is - * not implemented. - */ -int omap2_clkops_enable_clkdm(struct clk_hw *hw) -{ - struct clk_hw_omap *clk; - int ret = 0; - - clk = to_clk_hw_omap(hw); - - if (unlikely(!clk->clkdm)) { - pr_err("%s: %s: no clkdm set ?!\n", __func__, - __clk_get_name(hw->clk)); - return -EINVAL; - } - - if (unlikely(clk->enable_reg)) - pr_err("%s: %s: should use dflt_clk_enable ?!\n", __func__, - __clk_get_name(hw->clk)); - - if (ti_clk_get_features()->flags & TI_CLK_DISABLE_CLKDM_CONTROL) { - pr_err("%s: %s: clkfw-based clockdomain control disabled ?!\n", - __func__, __clk_get_name(hw->clk)); - return 0; - } - - ret = clkdm_clk_enable(clk->clkdm, hw->clk); - WARN(ret, "%s: could not enable %s's clockdomain %s: %d\n", - __func__, __clk_get_name(hw->clk), clk->clkdm->name, ret); - - return ret; -} - -/** - * omap2_clkops_disable_clkdm - decrement usecount on clkdm of @hw - * @hw: struct clk_hw * of the clock being disabled - * - * Decrement the usecount of the clockdomain of the clock pointed to - * by @hw; if the usecount is 0, the clockdomain will be "disabled." - * Only needed for clocks that don't use omap2_dflt_clk_disable() as their - * disable function pointer. No return value. - */ -void omap2_clkops_disable_clkdm(struct clk_hw *hw) -{ - struct clk_hw_omap *clk; - - clk = to_clk_hw_omap(hw); - - if (unlikely(!clk->clkdm)) { - pr_err("%s: %s: no clkdm set ?!\n", __func__, - __clk_get_name(hw->clk)); - return; - } - - if (unlikely(clk->enable_reg)) - pr_err("%s: %s: should use dflt_clk_disable ?!\n", __func__, - __clk_get_name(hw->clk)); - - if (ti_clk_get_features()->flags & TI_CLK_DISABLE_CLKDM_CONTROL) { - pr_err("%s: %s: clkfw-based clockdomain control disabled ?!\n", - __func__, __clk_get_name(hw->clk)); - return; - } - - clkdm_clk_disable(clk->clkdm, hw->clk); -} - static int __initdata mpurate; /* diff --git a/arch/arm/mach-omap2/clock.h b/arch/arm/mach-omap2/clock.h index 948065497472..a7e951129ffb 100644 --- a/arch/arm/mach-omap2/clock.h +++ b/arch/arm/mach-omap2/clock.h @@ -202,9 +202,6 @@ extern const struct clk_hw_omap_ops clkhwops_am35xx_ipss_module_wait; extern const struct clk_hw_omap_ops clkhwops_apll54; extern const struct clk_hw_omap_ops clkhwops_apll96; -extern int omap2_clkops_enable_clkdm(struct clk_hw *hw); -extern void omap2_clkops_disable_clkdm(struct clk_hw *hw); - struct regmap; int __init omap2_clk_provider_init(struct device_node *np, int index, diff --git a/drivers/clk/ti/clock.h b/drivers/clk/ti/clock.h index 3652c267cf81..83476d12d561 100644 --- a/drivers/clk/ti/clock.h +++ b/drivers/clk/ti/clock.h @@ -180,6 +180,9 @@ extern const struct clk_hw_omap_ops clkhwops_iclk; extern const struct clk_hw_omap_ops clkhwops_iclk_wait; extern const struct clk_hw_omap_ops clkhwops_omap2430_i2chs_wait; +int omap2_clkops_enable_clkdm(struct clk_hw *hw); +void omap2_clkops_disable_clkdm(struct clk_hw *hw); + int omap2_dflt_clk_enable(struct clk_hw *hw); void omap2_dflt_clk_disable(struct clk_hw *hw); int omap2_dflt_clk_is_enabled(struct clk_hw *hw); diff --git a/drivers/clk/ti/clockdomain.c b/drivers/clk/ti/clockdomain.c index 35fe1085480c..61ef87b1a688 100644 --- a/drivers/clk/ti/clockdomain.c +++ b/drivers/clk/ti/clockdomain.c @@ -24,6 +24,82 @@ #undef pr_fmt #define pr_fmt(fmt) "%s: " fmt, __func__ +/** + * omap2_clkops_enable_clkdm - increment usecount on clkdm of @hw + * @hw: struct clk_hw * of the clock being enabled + * + * Increment the usecount of the clockdomain of the clock pointed to + * by @hw; if the usecount is 1, the clockdomain will be "enabled." + * Only needed for clocks that don't use omap2_dflt_clk_enable() as + * their enable function pointer. Passes along the return value of + * clkdm_clk_enable(), -EINVAL if @hw is not associated with a + * clockdomain, or 0 if clock framework-based clockdomain control is + * not implemented. + */ +int omap2_clkops_enable_clkdm(struct clk_hw *hw) +{ + struct clk_hw_omap *clk; + int ret = 0; + + clk = to_clk_hw_omap(hw); + + if (unlikely(!clk->clkdm)) { + pr_err("%s: %s: no clkdm set ?!\n", __func__, + __clk_get_name(hw->clk)); + return -EINVAL; + } + + if (unlikely(clk->enable_reg)) + pr_err("%s: %s: should use dflt_clk_enable ?!\n", __func__, + __clk_get_name(hw->clk)); + + if (ti_clk_get_features()->flags & TI_CLK_DISABLE_CLKDM_CONTROL) { + pr_err("%s: %s: clkfw-based clockdomain control disabled ?!\n", + __func__, __clk_get_name(hw->clk)); + return 0; + } + + ret = ti_clk_ll_ops->clkdm_clk_enable(clk->clkdm, hw->clk); + WARN(ret, "%s: could not enable %s's clockdomain %s: %d\n", + __func__, __clk_get_name(hw->clk), clk->clkdm_name, ret); + + return ret; +} + +/** + * omap2_clkops_disable_clkdm - decrement usecount on clkdm of @hw + * @hw: struct clk_hw * of the clock being disabled + * + * Decrement the usecount of the clockdomain of the clock pointed to + * by @hw; if the usecount is 0, the clockdomain will be "disabled." + * Only needed for clocks that don't use omap2_dflt_clk_disable() as their + * disable function pointer. No return value. + */ +void omap2_clkops_disable_clkdm(struct clk_hw *hw) +{ + struct clk_hw_omap *clk; + + clk = to_clk_hw_omap(hw); + + if (unlikely(!clk->clkdm)) { + pr_err("%s: %s: no clkdm set ?!\n", __func__, + __clk_get_name(hw->clk)); + return; + } + + if (unlikely(clk->enable_reg)) + pr_err("%s: %s: should use dflt_clk_disable ?!\n", __func__, + __clk_get_name(hw->clk)); + + if (ti_clk_get_features()->flags & TI_CLK_DISABLE_CLKDM_CONTROL) { + pr_err("%s: %s: clkfw-based clockdomain control disabled ?!\n", + __func__, __clk_get_name(hw->clk)); + return; + } + + ti_clk_ll_ops->clkdm_clk_disable(clk->clkdm, hw->clk); +} + static void __init of_ti_clockdomain_setup(struct device_node *node) { struct clk *clk; diff --git a/include/linux/clk/ti.h b/include/linux/clk/ti.h index 440ace33ea35..27828422c9c5 100644 --- a/include/linux/clk/ti.h +++ b/include/linux/clk/ti.h @@ -272,8 +272,6 @@ extern const struct clk_ops ti_clk_mux_ops; #define to_clk_hw_omap(_hw) container_of(_hw, struct clk_hw_omap, hw) void omap2_init_clk_clkdm(struct clk_hw *clk); -int omap2_clkops_enable_clkdm(struct clk_hw *hw); -void omap2_clkops_disable_clkdm(struct clk_hw *hw); int omap2_clk_disable_autoidle_all(void); int omap2_clk_enable_autoidle_all(void); int omap2_clk_allow_idle(struct clk *clk); -- cgit v1.2.3-70-g09d2 From f2671d5c6cb4abe4636014cd66fd0eeb8190b2ca Mon Sep 17 00:00:00 2001 From: Tero Kristo Date: Tue, 3 Mar 2015 17:28:12 +0200 Subject: clk: ti: omap34xx: move omap34xx clock type support code to clock driver With the legacy clock data gone, this is no longer needed under platform, so move it under the clock driver itself. Remove unnecessary declarations from the TI clock header also. Signed-off-by: Tero Kristo --- arch/arm/mach-omap2/Makefile | 2 +- arch/arm/mach-omap2/clock34xx.c | 138 ---------------------------------------- drivers/clk/ti/clk-3xxx.c | 118 ++++++++++++++++++++++++++++++++++ drivers/clk/ti/clock.h | 4 ++ include/linux/clk/ti.h | 4 -- 5 files changed, 123 insertions(+), 143 deletions(-) delete mode 100644 arch/arm/mach-omap2/clock34xx.c (limited to 'include/linux') diff --git a/arch/arm/mach-omap2/Makefile b/arch/arm/mach-omap2/Makefile index 695d58f81ff3..22d2e48dcff5 100644 --- a/arch/arm/mach-omap2/Makefile +++ b/arch/arm/mach-omap2/Makefile @@ -186,7 +186,7 @@ obj-$(CONFIG_ARCH_OMAP2) += clkt2xxx_dpllcore.o obj-$(CONFIG_ARCH_OMAP2) += clkt2xxx_virt_prcm_set.o obj-$(CONFIG_ARCH_OMAP2) += clkt2xxx_dpll.o obj-$(CONFIG_ARCH_OMAP3) += $(clock-common) -obj-$(CONFIG_ARCH_OMAP3) += clock34xx.o clkt34xx_dpll3m2.o +obj-$(CONFIG_ARCH_OMAP3) += clkt34xx_dpll3m2.o obj-$(CONFIG_ARCH_OMAP3) += clock3517.o obj-$(CONFIG_ARCH_OMAP4) += $(clock-common) obj-$(CONFIG_SOC_AM33XX) += $(clock-common) diff --git a/arch/arm/mach-omap2/clock34xx.c b/arch/arm/mach-omap2/clock34xx.c deleted file mode 100644 index 4596468e50ab..000000000000 --- a/arch/arm/mach-omap2/clock34xx.c +++ /dev/null @@ -1,138 +0,0 @@ -/* - * OMAP3-specific clock framework functions - * - * Copyright (C) 2007-2008 Texas Instruments, Inc. - * Copyright (C) 2007-2011 Nokia Corporation - * - * Paul Walmsley - * Jouni Högander - * - * Parts of this code are based on code written by - * Richard Woodruff, Tony Lindgren, Tuukka Tikkanen, Karthik Dasu, - * Russell King - * - * This program is free software; you can redistribute it and/or modify - * it under the terms of the GNU General Public License version 2 as - * published by the Free Software Foundation. - */ -#undef DEBUG - -#include -#include -#include - -#include "clock.h" -#include "clock34xx.h" -#include "cm3xxx.h" -#include "cm-regbits-34xx.h" - -/** - * omap3430es2_clk_ssi_find_idlest - return CM_IDLEST info for SSI - * @clk: struct clk * being enabled - * @idlest_reg: void __iomem ** to store CM_IDLEST reg address into - * @idlest_bit: pointer to a u8 to store the CM_IDLEST bit shift into - * @idlest_val: pointer to a u8 to store the CM_IDLEST indicator - * - * The OMAP3430ES2 SSI target CM_IDLEST bit is at a different shift - * from the CM_{I,F}CLKEN bit. Pass back the correct info via - * @idlest_reg and @idlest_bit. No return value. - */ -static void omap3430es2_clk_ssi_find_idlest(struct clk_hw_omap *clk, - void __iomem **idlest_reg, - u8 *idlest_bit, - u8 *idlest_val) -{ - u32 r; - - r = (((__force u32)clk->enable_reg & ~0xf0) | 0x20); - *idlest_reg = (__force void __iomem *)r; - *idlest_bit = OMAP3430ES2_ST_SSI_IDLE_SHIFT; - *idlest_val = OMAP34XX_CM_IDLEST_VAL; -} -const struct clk_hw_omap_ops clkhwops_omap3430es2_ssi_wait = { - .find_idlest = omap3430es2_clk_ssi_find_idlest, - .find_companion = omap2_clk_dflt_find_companion, -}; - -const struct clk_hw_omap_ops clkhwops_omap3430es2_iclk_ssi_wait = { - .allow_idle = omap2_clkt_iclk_allow_idle, - .deny_idle = omap2_clkt_iclk_deny_idle, - .find_idlest = omap3430es2_clk_ssi_find_idlest, - .find_companion = omap2_clk_dflt_find_companion, -}; - -/** - * omap3430es2_clk_dss_usbhost_find_idlest - CM_IDLEST info for DSS, USBHOST - * @clk: struct clk * being enabled - * @idlest_reg: void __iomem ** to store CM_IDLEST reg address into - * @idlest_bit: pointer to a u8 to store the CM_IDLEST bit shift into - * @idlest_val: pointer to a u8 to store the CM_IDLEST indicator - * - * Some OMAP modules on OMAP3 ES2+ chips have both initiator and - * target IDLEST bits. For our purposes, we are concerned with the - * target IDLEST bits, which exist at a different bit position than - * the *CLKEN bit position for these modules (DSS and USBHOST) (The - * default find_idlest code assumes that they are at the same - * position.) No return value. - */ -static void omap3430es2_clk_dss_usbhost_find_idlest(struct clk_hw_omap *clk, - void __iomem **idlest_reg, - u8 *idlest_bit, - u8 *idlest_val) -{ - u32 r; - - r = (((__force u32)clk->enable_reg & ~0xf0) | 0x20); - *idlest_reg = (__force void __iomem *)r; - /* USBHOST_IDLE has same shift */ - *idlest_bit = OMAP3430ES2_ST_DSS_IDLE_SHIFT; - *idlest_val = OMAP34XX_CM_IDLEST_VAL; -} - -const struct clk_hw_omap_ops clkhwops_omap3430es2_dss_usbhost_wait = { - .find_idlest = omap3430es2_clk_dss_usbhost_find_idlest, - .find_companion = omap2_clk_dflt_find_companion, -}; - -const struct clk_hw_omap_ops clkhwops_omap3430es2_iclk_dss_usbhost_wait = { - .allow_idle = omap2_clkt_iclk_allow_idle, - .deny_idle = omap2_clkt_iclk_deny_idle, - .find_idlest = omap3430es2_clk_dss_usbhost_find_idlest, - .find_companion = omap2_clk_dflt_find_companion, -}; - -/** - * omap3430es2_clk_hsotgusb_find_idlest - return CM_IDLEST info for HSOTGUSB - * @clk: struct clk * being enabled - * @idlest_reg: void __iomem ** to store CM_IDLEST reg address into - * @idlest_bit: pointer to a u8 to store the CM_IDLEST bit shift into - * @idlest_val: pointer to a u8 to store the CM_IDLEST indicator - * - * The OMAP3430ES2 HSOTGUSB target CM_IDLEST bit is at a different - * shift from the CM_{I,F}CLKEN bit. Pass back the correct info via - * @idlest_reg and @idlest_bit. No return value. - */ -static void omap3430es2_clk_hsotgusb_find_idlest(struct clk_hw_omap *clk, - void __iomem **idlest_reg, - u8 *idlest_bit, - u8 *idlest_val) -{ - u32 r; - - r = (((__force u32)clk->enable_reg & ~0xf0) | 0x20); - *idlest_reg = (__force void __iomem *)r; - *idlest_bit = OMAP3430ES2_ST_HSOTGUSB_IDLE_SHIFT; - *idlest_val = OMAP34XX_CM_IDLEST_VAL; -} - -const struct clk_hw_omap_ops clkhwops_omap3430es2_iclk_hsotgusb_wait = { - .allow_idle = omap2_clkt_iclk_allow_idle, - .deny_idle = omap2_clkt_iclk_deny_idle, - .find_idlest = omap3430es2_clk_hsotgusb_find_idlest, - .find_companion = omap2_clk_dflt_find_companion, -}; - -const struct clk_hw_omap_ops clkhwops_omap3430es2_hsotgusb_wait = { - .find_idlest = omap3430es2_clk_hsotgusb_find_idlest, - .find_companion = omap2_clk_dflt_find_companion, -}; diff --git a/drivers/clk/ti/clk-3xxx.c b/drivers/clk/ti/clk-3xxx.c index 5489ad8c07d4..58879f0b7949 100644 --- a/drivers/clk/ti/clk-3xxx.c +++ b/drivers/clk/ti/clk-3xxx.c @@ -28,6 +28,124 @@ */ #define DPLL5_FREQ_FOR_USBHOST 120000000 +#define OMAP3430ES2_ST_DSS_IDLE_SHIFT 1 +#define OMAP3430ES2_ST_HSOTGUSB_IDLE_SHIFT 5 +#define OMAP3430ES2_ST_SSI_IDLE_SHIFT 8 + +#define OMAP34XX_CM_IDLEST_VAL 1 + +/** + * omap3430es2_clk_ssi_find_idlest - return CM_IDLEST info for SSI + * @clk: struct clk * being enabled + * @idlest_reg: void __iomem ** to store CM_IDLEST reg address into + * @idlest_bit: pointer to a u8 to store the CM_IDLEST bit shift into + * @idlest_val: pointer to a u8 to store the CM_IDLEST indicator + * + * The OMAP3430ES2 SSI target CM_IDLEST bit is at a different shift + * from the CM_{I,F}CLKEN bit. Pass back the correct info via + * @idlest_reg and @idlest_bit. No return value. + */ +static void omap3430es2_clk_ssi_find_idlest(struct clk_hw_omap *clk, + void __iomem **idlest_reg, + u8 *idlest_bit, + u8 *idlest_val) +{ + u32 r; + + r = (((__force u32)clk->enable_reg & ~0xf0) | 0x20); + *idlest_reg = (__force void __iomem *)r; + *idlest_bit = OMAP3430ES2_ST_SSI_IDLE_SHIFT; + *idlest_val = OMAP34XX_CM_IDLEST_VAL; +} + +const struct clk_hw_omap_ops clkhwops_omap3430es2_ssi_wait = { + .find_idlest = omap3430es2_clk_ssi_find_idlest, + .find_companion = omap2_clk_dflt_find_companion, +}; + +const struct clk_hw_omap_ops clkhwops_omap3430es2_iclk_ssi_wait = { + .allow_idle = omap2_clkt_iclk_allow_idle, + .deny_idle = omap2_clkt_iclk_deny_idle, + .find_idlest = omap3430es2_clk_ssi_find_idlest, + .find_companion = omap2_clk_dflt_find_companion, +}; + +/** + * omap3430es2_clk_dss_usbhost_find_idlest - CM_IDLEST info for DSS, USBHOST + * @clk: struct clk * being enabled + * @idlest_reg: void __iomem ** to store CM_IDLEST reg address into + * @idlest_bit: pointer to a u8 to store the CM_IDLEST bit shift into + * @idlest_val: pointer to a u8 to store the CM_IDLEST indicator + * + * Some OMAP modules on OMAP3 ES2+ chips have both initiator and + * target IDLEST bits. For our purposes, we are concerned with the + * target IDLEST bits, which exist at a different bit position than + * the *CLKEN bit position for these modules (DSS and USBHOST) (The + * default find_idlest code assumes that they are at the same + * position.) No return value. + */ +static void omap3430es2_clk_dss_usbhost_find_idlest(struct clk_hw_omap *clk, + void __iomem **idlest_reg, + u8 *idlest_bit, + u8 *idlest_val) +{ + u32 r; + + r = (((__force u32)clk->enable_reg & ~0xf0) | 0x20); + *idlest_reg = (__force void __iomem *)r; + /* USBHOST_IDLE has same shift */ + *idlest_bit = OMAP3430ES2_ST_DSS_IDLE_SHIFT; + *idlest_val = OMAP34XX_CM_IDLEST_VAL; +} + +const struct clk_hw_omap_ops clkhwops_omap3430es2_dss_usbhost_wait = { + .find_idlest = omap3430es2_clk_dss_usbhost_find_idlest, + .find_companion = omap2_clk_dflt_find_companion, +}; + +const struct clk_hw_omap_ops clkhwops_omap3430es2_iclk_dss_usbhost_wait = { + .allow_idle = omap2_clkt_iclk_allow_idle, + .deny_idle = omap2_clkt_iclk_deny_idle, + .find_idlest = omap3430es2_clk_dss_usbhost_find_idlest, + .find_companion = omap2_clk_dflt_find_companion, +}; + +/** + * omap3430es2_clk_hsotgusb_find_idlest - return CM_IDLEST info for HSOTGUSB + * @clk: struct clk * being enabled + * @idlest_reg: void __iomem ** to store CM_IDLEST reg address into + * @idlest_bit: pointer to a u8 to store the CM_IDLEST bit shift into + * @idlest_val: pointer to a u8 to store the CM_IDLEST indicator + * + * The OMAP3430ES2 HSOTGUSB target CM_IDLEST bit is at a different + * shift from the CM_{I,F}CLKEN bit. Pass back the correct info via + * @idlest_reg and @idlest_bit. No return value. + */ +static void omap3430es2_clk_hsotgusb_find_idlest(struct clk_hw_omap *clk, + void __iomem **idlest_reg, + u8 *idlest_bit, + u8 *idlest_val) +{ + u32 r; + + r = (((__force u32)clk->enable_reg & ~0xf0) | 0x20); + *idlest_reg = (__force void __iomem *)r; + *idlest_bit = OMAP3430ES2_ST_HSOTGUSB_IDLE_SHIFT; + *idlest_val = OMAP34XX_CM_IDLEST_VAL; +} + +const struct clk_hw_omap_ops clkhwops_omap3430es2_iclk_hsotgusb_wait = { + .allow_idle = omap2_clkt_iclk_allow_idle, + .deny_idle = omap2_clkt_iclk_deny_idle, + .find_idlest = omap3430es2_clk_hsotgusb_find_idlest, + .find_companion = omap2_clk_dflt_find_companion, +}; + +const struct clk_hw_omap_ops clkhwops_omap3430es2_hsotgusb_wait = { + .find_idlest = omap3430es2_clk_hsotgusb_find_idlest, + .find_companion = omap2_clk_dflt_find_companion, +}; + static struct ti_dt_clk omap3xxx_clks[] = { DT_CLK(NULL, "apb_pclk", "dummy_apb_pclk"), DT_CLK(NULL, "omap_32k_fck", "omap_32k_fck"), diff --git a/drivers/clk/ti/clock.h b/drivers/clk/ti/clock.h index 83476d12d561..c6fbd153b6d4 100644 --- a/drivers/clk/ti/clock.h +++ b/drivers/clk/ti/clock.h @@ -179,6 +179,10 @@ extern const struct clk_hw_omap_ops clkhwops_wait; extern const struct clk_hw_omap_ops clkhwops_iclk; extern const struct clk_hw_omap_ops clkhwops_iclk_wait; extern const struct clk_hw_omap_ops clkhwops_omap2430_i2chs_wait; +extern const struct clk_hw_omap_ops clkhwops_omap3430es2_dss_usbhost_wait; +extern const struct clk_hw_omap_ops clkhwops_omap3430es2_iclk_hsotgusb_wait; +extern const struct clk_hw_omap_ops clkhwops_omap3430es2_iclk_dss_usbhost_wait; +extern const struct clk_hw_omap_ops clkhwops_omap3430es2_iclk_ssi_wait; int omap2_clkops_enable_clkdm(struct clk_hw *hw); void omap2_clkops_disable_clkdm(struct clk_hw *hw); diff --git a/include/linux/clk/ti.h b/include/linux/clk/ti.h index 27828422c9c5..cd5b3eadc317 100644 --- a/include/linux/clk/ti.h +++ b/include/linux/clk/ti.h @@ -331,12 +331,8 @@ void ti_clk_setup_features(struct ti_clk_features *features); const struct ti_clk_features *ti_clk_get_features(void); extern const struct clk_hw_omap_ops clkhwops_omap2xxx_dpll; -extern const struct clk_hw_omap_ops clkhwops_omap3430es2_dss_usbhost_wait; extern const struct clk_hw_omap_ops clkhwops_am35xx_ipss_module_wait; extern const struct clk_hw_omap_ops clkhwops_am35xx_ipss_wait; -extern const struct clk_hw_omap_ops clkhwops_omap3430es2_iclk_ssi_wait; -extern const struct clk_hw_omap_ops clkhwops_omap3430es2_iclk_dss_usbhost_wait; -extern const struct clk_hw_omap_ops clkhwops_omap3430es2_iclk_hsotgusb_wait; #ifdef CONFIG_ATAGS int omap3430_clk_legacy_init(void); -- cgit v1.2.3-70-g09d2 From c9a58b0a848e4b88d2dd4690ef19bae8696649eb Mon Sep 17 00:00:00 2001 From: Tero Kristo Date: Tue, 3 Mar 2015 21:19:25 +0200 Subject: clk: ti: am3517: move remaining am3517 clock support code to clock driver With legacy clock support gone, this is no longer needed under platform, so move it under the clock driver itself. Make some exports be driver internal definitions at the same time. Signed-off-by: Tero Kristo --- arch/arm/mach-omap2/Makefile | 1 - arch/arm/mach-omap2/clock3517.c | 118 ---------------------------------------- arch/arm/mach-omap2/clock3517.h | 14 ----- drivers/clk/ti/clk-3xxx.c | 94 ++++++++++++++++++++++++++++++++ drivers/clk/ti/clock.h | 2 + include/linux/clk/ti.h | 2 - 6 files changed, 96 insertions(+), 135 deletions(-) delete mode 100644 arch/arm/mach-omap2/clock3517.c delete mode 100644 arch/arm/mach-omap2/clock3517.h (limited to 'include/linux') diff --git a/arch/arm/mach-omap2/Makefile b/arch/arm/mach-omap2/Makefile index 22d2e48dcff5..d424920a5e1c 100644 --- a/arch/arm/mach-omap2/Makefile +++ b/arch/arm/mach-omap2/Makefile @@ -187,7 +187,6 @@ obj-$(CONFIG_ARCH_OMAP2) += clkt2xxx_virt_prcm_set.o obj-$(CONFIG_ARCH_OMAP2) += clkt2xxx_dpll.o obj-$(CONFIG_ARCH_OMAP3) += $(clock-common) obj-$(CONFIG_ARCH_OMAP3) += clkt34xx_dpll3m2.o -obj-$(CONFIG_ARCH_OMAP3) += clock3517.o obj-$(CONFIG_ARCH_OMAP4) += $(clock-common) obj-$(CONFIG_SOC_AM33XX) += $(clock-common) obj-$(CONFIG_SOC_OMAP5) += $(clock-common) diff --git a/arch/arm/mach-omap2/clock3517.c b/arch/arm/mach-omap2/clock3517.c deleted file mode 100644 index 4d79ae2c0241..000000000000 --- a/arch/arm/mach-omap2/clock3517.c +++ /dev/null @@ -1,118 +0,0 @@ -/* - * OMAP3517/3505-specific clock framework functions - * - * Copyright (C) 2010 Texas Instruments, Inc. - * Copyright (C) 2011 Nokia Corporation - * - * Ranjith Lohithakshan - * Paul Walmsley - * - * Parts of this code are based on code written by - * Richard Woodruff, Tony Lindgren, Tuukka Tikkanen, Karthik Dasu, - * Russell King - * - * This program is free software; you can redistribute it and/or modify - * it under the terms of the GNU General Public License version 2 as - * published by the Free Software Foundation. - */ -#undef DEBUG - -#include -#include -#include - -#include "clock.h" -#include "clock3517.h" -#include "cm3xxx.h" -#include "cm-regbits-34xx.h" - -/* - * In AM35xx IPSS, the {ICK,FCK} enable bits for modules are exported - * in the same register at a bit offset of 0x8. The EN_ACK for ICK is - * at an offset of 4 from ICK enable bit. - */ -#define AM35XX_IPSS_ICK_MASK 0xF -#define AM35XX_IPSS_ICK_EN_ACK_OFFSET 0x4 -#define AM35XX_IPSS_ICK_FCK_OFFSET 0x8 -#define AM35XX_IPSS_CLK_IDLEST_VAL 0 - -/** - * am35xx_clk_find_idlest - return clock ACK info for AM35XX IPSS - * @clk: struct clk * being enabled - * @idlest_reg: void __iomem ** to store CM_IDLEST reg address into - * @idlest_bit: pointer to a u8 to store the CM_IDLEST bit shift into - * @idlest_val: pointer to a u8 to store the CM_IDLEST indicator - * - * The interface clocks on AM35xx IPSS reflects the clock idle status - * in the enable register itsel at a bit offset of 4 from the enable - * bit. A value of 1 indicates that clock is enabled. - */ -static void am35xx_clk_find_idlest(struct clk_hw_omap *clk, - void __iomem **idlest_reg, - u8 *idlest_bit, - u8 *idlest_val) -{ - *idlest_reg = (__force void __iomem *)(clk->enable_reg); - *idlest_bit = clk->enable_bit + AM35XX_IPSS_ICK_EN_ACK_OFFSET; - *idlest_val = AM35XX_IPSS_CLK_IDLEST_VAL; -} - -/** - * am35xx_clk_find_companion - find companion clock to @clk - * @clk: struct clk * to find the companion clock of - * @other_reg: void __iomem ** to return the companion clock CM_*CLKEN va in - * @other_bit: u8 ** to return the companion clock bit shift in - * - * Some clocks don't have companion clocks. For example, modules with - * only an interface clock (such as HECC) don't have a companion - * clock. Right now, this code relies on the hardware exporting a bit - * in the correct companion register that indicates that the - * nonexistent 'companion clock' is active. Future patches will - * associate this type of code with per-module data structures to - * avoid this issue, and remove the casts. No return value. - */ -static void am35xx_clk_find_companion(struct clk_hw_omap *clk, - void __iomem **other_reg, - u8 *other_bit) -{ - *other_reg = (__force void __iomem *)(clk->enable_reg); - if (clk->enable_bit & AM35XX_IPSS_ICK_MASK) - *other_bit = clk->enable_bit + AM35XX_IPSS_ICK_FCK_OFFSET; - else - *other_bit = clk->enable_bit - AM35XX_IPSS_ICK_FCK_OFFSET; -} -const struct clk_hw_omap_ops clkhwops_am35xx_ipss_module_wait = { - .find_idlest = am35xx_clk_find_idlest, - .find_companion = am35xx_clk_find_companion, -}; - -/** - * am35xx_clk_ipss_find_idlest - return CM_IDLEST info for IPSS - * @clk: struct clk * being enabled - * @idlest_reg: void __iomem ** to store CM_IDLEST reg address into - * @idlest_bit: pointer to a u8 to store the CM_IDLEST bit shift into - * @idlest_val: pointer to a u8 to store the CM_IDLEST indicator - * - * The IPSS target CM_IDLEST bit is at a different shift from the - * CM_{I,F}CLKEN bit. Pass back the correct info via @idlest_reg - * and @idlest_bit. No return value. - */ -static void am35xx_clk_ipss_find_idlest(struct clk_hw_omap *clk, - void __iomem **idlest_reg, - u8 *idlest_bit, - u8 *idlest_val) -{ - u32 r; - - r = (((__force u32)clk->enable_reg & ~0xf0) | 0x20); - *idlest_reg = (__force void __iomem *)r; - *idlest_bit = AM35XX_ST_IPSS_SHIFT; - *idlest_val = OMAP34XX_CM_IDLEST_VAL; -} - -const struct clk_hw_omap_ops clkhwops_am35xx_ipss_wait = { - .allow_idle = omap2_clkt_iclk_allow_idle, - .deny_idle = omap2_clkt_iclk_deny_idle, - .find_idlest = am35xx_clk_ipss_find_idlest, - .find_companion = omap2_clk_dflt_find_companion, -}; diff --git a/arch/arm/mach-omap2/clock3517.h b/arch/arm/mach-omap2/clock3517.h deleted file mode 100644 index ca5e5a64c2e2..000000000000 --- a/arch/arm/mach-omap2/clock3517.h +++ /dev/null @@ -1,14 +0,0 @@ -/* - * OMAP3517/3505 clock function prototypes and macros - * - * Copyright (C) 2010 Texas Instruments, Inc. - * Copyright (C) 2010 Nokia Corporation - */ - -#ifndef __ARCH_ARM_MACH_OMAP2_CLOCK3517_H -#define __ARCH_ARM_MACH_OMAP2_CLOCK3517_H - -extern const struct clkops clkops_am35xx_ipss_module_wait; -extern const struct clkops clkops_am35xx_ipss_wait; - -#endif diff --git a/drivers/clk/ti/clk-3xxx.c b/drivers/clk/ti/clk-3xxx.c index 58879f0b7949..6e33332b6b34 100644 --- a/drivers/clk/ti/clk-3xxx.c +++ b/drivers/clk/ti/clk-3xxx.c @@ -34,6 +34,18 @@ #define OMAP34XX_CM_IDLEST_VAL 1 +/* + * In AM35xx IPSS, the {ICK,FCK} enable bits for modules are exported + * in the same register at a bit offset of 0x8. The EN_ACK for ICK is + * at an offset of 4 from ICK enable bit. + */ +#define AM35XX_IPSS_ICK_MASK 0xF +#define AM35XX_IPSS_ICK_EN_ACK_OFFSET 0x4 +#define AM35XX_IPSS_ICK_FCK_OFFSET 0x8 +#define AM35XX_IPSS_CLK_IDLEST_VAL 0 + +#define AM35XX_ST_IPSS_SHIFT 5 + /** * omap3430es2_clk_ssi_find_idlest - return CM_IDLEST info for SSI * @clk: struct clk * being enabled @@ -146,6 +158,88 @@ const struct clk_hw_omap_ops clkhwops_omap3430es2_hsotgusb_wait = { .find_companion = omap2_clk_dflt_find_companion, }; +/** + * am35xx_clk_find_idlest - return clock ACK info for AM35XX IPSS + * @clk: struct clk * being enabled + * @idlest_reg: void __iomem ** to store CM_IDLEST reg address into + * @idlest_bit: pointer to a u8 to store the CM_IDLEST bit shift into + * @idlest_val: pointer to a u8 to store the CM_IDLEST indicator + * + * The interface clocks on AM35xx IPSS reflects the clock idle status + * in the enable register itsel at a bit offset of 4 from the enable + * bit. A value of 1 indicates that clock is enabled. + */ +static void am35xx_clk_find_idlest(struct clk_hw_omap *clk, + void __iomem **idlest_reg, + u8 *idlest_bit, + u8 *idlest_val) +{ + *idlest_reg = (__force void __iomem *)(clk->enable_reg); + *idlest_bit = clk->enable_bit + AM35XX_IPSS_ICK_EN_ACK_OFFSET; + *idlest_val = AM35XX_IPSS_CLK_IDLEST_VAL; +} + +/** + * am35xx_clk_find_companion - find companion clock to @clk + * @clk: struct clk * to find the companion clock of + * @other_reg: void __iomem ** to return the companion clock CM_*CLKEN va in + * @other_bit: u8 ** to return the companion clock bit shift in + * + * Some clocks don't have companion clocks. For example, modules with + * only an interface clock (such as HECC) don't have a companion + * clock. Right now, this code relies on the hardware exporting a bit + * in the correct companion register that indicates that the + * nonexistent 'companion clock' is active. Future patches will + * associate this type of code with per-module data structures to + * avoid this issue, and remove the casts. No return value. + */ +static void am35xx_clk_find_companion(struct clk_hw_omap *clk, + void __iomem **other_reg, + u8 *other_bit) +{ + *other_reg = (__force void __iomem *)(clk->enable_reg); + if (clk->enable_bit & AM35XX_IPSS_ICK_MASK) + *other_bit = clk->enable_bit + AM35XX_IPSS_ICK_FCK_OFFSET; + else + *other_bit = clk->enable_bit - AM35XX_IPSS_ICK_FCK_OFFSET; +} + +const struct clk_hw_omap_ops clkhwops_am35xx_ipss_module_wait = { + .find_idlest = am35xx_clk_find_idlest, + .find_companion = am35xx_clk_find_companion, +}; + +/** + * am35xx_clk_ipss_find_idlest - return CM_IDLEST info for IPSS + * @clk: struct clk * being enabled + * @idlest_reg: void __iomem ** to store CM_IDLEST reg address into + * @idlest_bit: pointer to a u8 to store the CM_IDLEST bit shift into + * @idlest_val: pointer to a u8 to store the CM_IDLEST indicator + * + * The IPSS target CM_IDLEST bit is at a different shift from the + * CM_{I,F}CLKEN bit. Pass back the correct info via @idlest_reg + * and @idlest_bit. No return value. + */ +static void am35xx_clk_ipss_find_idlest(struct clk_hw_omap *clk, + void __iomem **idlest_reg, + u8 *idlest_bit, + u8 *idlest_val) +{ + u32 r; + + r = (((__force u32)clk->enable_reg & ~0xf0) | 0x20); + *idlest_reg = (__force void __iomem *)r; + *idlest_bit = AM35XX_ST_IPSS_SHIFT; + *idlest_val = OMAP34XX_CM_IDLEST_VAL; +} + +const struct clk_hw_omap_ops clkhwops_am35xx_ipss_wait = { + .allow_idle = omap2_clkt_iclk_allow_idle, + .deny_idle = omap2_clkt_iclk_deny_idle, + .find_idlest = am35xx_clk_ipss_find_idlest, + .find_companion = omap2_clk_dflt_find_companion, +}; + static struct ti_dt_clk omap3xxx_clks[] = { DT_CLK(NULL, "apb_pclk", "dummy_apb_pclk"), DT_CLK(NULL, "omap_32k_fck", "omap_32k_fck"), diff --git a/drivers/clk/ti/clock.h b/drivers/clk/ti/clock.h index c6fbd153b6d4..0ca5a36da999 100644 --- a/drivers/clk/ti/clock.h +++ b/drivers/clk/ti/clock.h @@ -183,6 +183,8 @@ extern const struct clk_hw_omap_ops clkhwops_omap3430es2_dss_usbhost_wait; extern const struct clk_hw_omap_ops clkhwops_omap3430es2_iclk_hsotgusb_wait; extern const struct clk_hw_omap_ops clkhwops_omap3430es2_iclk_dss_usbhost_wait; extern const struct clk_hw_omap_ops clkhwops_omap3430es2_iclk_ssi_wait; +extern const struct clk_hw_omap_ops clkhwops_am35xx_ipss_module_wait; +extern const struct clk_hw_omap_ops clkhwops_am35xx_ipss_wait; int omap2_clkops_enable_clkdm(struct clk_hw *hw); void omap2_clkops_disable_clkdm(struct clk_hw *hw); diff --git a/include/linux/clk/ti.h b/include/linux/clk/ti.h index cd5b3eadc317..15f3c971ccab 100644 --- a/include/linux/clk/ti.h +++ b/include/linux/clk/ti.h @@ -331,8 +331,6 @@ void ti_clk_setup_features(struct ti_clk_features *features); const struct ti_clk_features *ti_clk_get_features(void); extern const struct clk_hw_omap_ops clkhwops_omap2xxx_dpll; -extern const struct clk_hw_omap_ops clkhwops_am35xx_ipss_module_wait; -extern const struct clk_hw_omap_ops clkhwops_am35xx_ipss_wait; #ifdef CONFIG_ATAGS int omap3430_clk_legacy_init(void); -- cgit v1.2.3-70-g09d2 From a3314e9cf69c1d4052017e559ea69a042ccd83e2 Mon Sep 17 00:00:00 2001 From: Tero Kristo Date: Wed, 4 Mar 2015 21:02:05 +0200 Subject: clk: ti: move some public definitions to private header Several exported TI clock driver features are no longer needed outside the clock driver itself, thus move all of these to the driver private header file. Also, update some of the driver files to actually include this header. Signed-off-by: Tero Kristo --- drivers/clk/ti/apll.c | 2 ++ drivers/clk/ti/autoidle.c | 2 ++ drivers/clk/ti/clk-43xx.c | 2 ++ drivers/clk/ti/clk-44xx.c | 2 ++ drivers/clk/ti/clk-54xx.c | 2 ++ drivers/clk/ti/clk-7xx.c | 3 ++- drivers/clk/ti/clock.h | 47 +++++++++++++++++++++++++++++++++++++++++++++++ include/linux/clk/ti.h | 45 --------------------------------------------- 8 files changed, 59 insertions(+), 46 deletions(-) (limited to 'include/linux') diff --git a/drivers/clk/ti/apll.c b/drivers/clk/ti/apll.c index 49baf3831546..594b759f02ee 100644 --- a/drivers/clk/ti/apll.c +++ b/drivers/clk/ti/apll.c @@ -27,6 +27,8 @@ #include #include +#include "clock.h" + #define APLL_FORCE_LOCK 0x1 #define APLL_AUTO_IDLE 0x2 #define MAX_APLL_WAIT_TRIES 1000000 diff --git a/drivers/clk/ti/autoidle.c b/drivers/clk/ti/autoidle.c index 3dbcc3681058..94f0dcd94181 100644 --- a/drivers/clk/ti/autoidle.c +++ b/drivers/clk/ti/autoidle.c @@ -22,6 +22,8 @@ #include #include +#include "clock.h" + struct clk_ti_autoidle { void __iomem *reg; u8 shift; diff --git a/drivers/clk/ti/clk-43xx.c b/drivers/clk/ti/clk-43xx.c index 3795fce8a830..894316738459 100644 --- a/drivers/clk/ti/clk-43xx.c +++ b/drivers/clk/ti/clk-43xx.c @@ -19,6 +19,8 @@ #include #include +#include "clock.h" + static struct ti_dt_clk am43xx_clks[] = { DT_CLK(NULL, "clk_32768_ck", "clk_32768_ck"), DT_CLK(NULL, "clk_rc32k_ck", "clk_rc32k_ck"), diff --git a/drivers/clk/ti/clk-44xx.c b/drivers/clk/ti/clk-44xx.c index 581db7711f51..7a8b51b35f9f 100644 --- a/drivers/clk/ti/clk-44xx.c +++ b/drivers/clk/ti/clk-44xx.c @@ -16,6 +16,8 @@ #include #include +#include "clock.h" + /* * OMAP4 ABE DPLL default frequency. In OMAP4460 TRM version V, section * "3.6.3.2.3 CM1_ABE Clock Generator" states that the "DPLL_ABE_X2_CLK diff --git a/drivers/clk/ti/clk-54xx.c b/drivers/clk/ti/clk-54xx.c index 96c69a335975..59ce2fa2c104 100644 --- a/drivers/clk/ti/clk-54xx.c +++ b/drivers/clk/ti/clk-54xx.c @@ -17,6 +17,8 @@ #include #include +#include "clock.h" + #define OMAP5_DPLL_ABE_DEFFREQ 98304000 /* diff --git a/drivers/clk/ti/clk-7xx.c b/drivers/clk/ti/clk-7xx.c index 5d2217ae4478..8b827219d454 100644 --- a/drivers/clk/ti/clk-7xx.c +++ b/drivers/clk/ti/clk-7xx.c @@ -16,11 +16,12 @@ #include #include +#include "clock.h" + #define DRA7_DPLL_ABE_DEFFREQ 180633600 #define DRA7_DPLL_GMAC_DEFFREQ 1000000000 #define DRA7_DPLL_USB_DEFFREQ 960000000 - static struct ti_dt_clk dra7xx_clks[] = { DT_CLK(NULL, "atl_clkin0_ck", "atl_clkin0_ck"), DT_CLK(NULL, "atl_clkin1_ck", "atl_clkin1_ck"), diff --git a/drivers/clk/ti/clock.h b/drivers/clk/ti/clock.h index 0ca5a36da999..3c43125b9cc9 100644 --- a/drivers/clk/ti/clock.h +++ b/drivers/clk/ti/clock.h @@ -154,6 +154,35 @@ struct ti_clk_dpll { u8 recal_st_bit; }; +/* Composite clock component types */ +enum { + CLK_COMPONENT_TYPE_GATE = 0, + CLK_COMPONENT_TYPE_DIVIDER, + CLK_COMPONENT_TYPE_MUX, + CLK_COMPONENT_TYPE_MAX, +}; + +/** + * struct ti_dt_clk - OMAP DT clock alias declarations + * @lk: clock lookup definition + * @node_name: clock DT node to map to + */ +struct ti_dt_clk { + struct clk_lookup lk; + char *node_name; +}; + +#define DT_CLK(dev, con, name) \ + { \ + .lk = { \ + .dev_id = dev, \ + .con_id = con, \ + }, \ + .node_name = name, \ + } + +typedef void (*ti_of_clk_init_cb_t)(struct clk_hw *, struct device_node *); + struct clk *ti_clk_register_gate(struct ti_clk *setup); struct clk *ti_clk_register_interface(struct ti_clk *setup); struct clk *ti_clk_register_mux(struct ti_clk *setup); @@ -169,6 +198,12 @@ void ti_clk_patch_legacy_clks(struct ti_clk **patch); struct clk *ti_clk_register_clk(struct ti_clk *setup); int ti_clk_register_legacy_clks(struct ti_clk_alias *clks); +void __iomem *ti_clk_get_reg_addr(struct device_node *node, int index); +void ti_dt_clocks_register(struct ti_dt_clk *oclks); +int ti_clk_retry_init(struct device_node *node, struct clk_hw *hw, + ti_of_clk_init_cb_t func); +int ti_clk_add_component(struct device_node *node, struct clk_hw *hw, int type); + void omap2_init_clk_hw_omap_clocks(struct clk *clk); int of_ti_clk_autoidle_setup(struct device_node *node); void omap2_clk_enable_init_clocks(const char **clk_names, u8 num_clocks); @@ -186,12 +221,24 @@ extern const struct clk_hw_omap_ops clkhwops_omap3430es2_iclk_ssi_wait; extern const struct clk_hw_omap_ops clkhwops_am35xx_ipss_module_wait; extern const struct clk_hw_omap_ops clkhwops_am35xx_ipss_wait; +extern const struct clk_ops ti_clk_divider_ops; +extern const struct clk_ops ti_clk_mux_ops; + int omap2_clkops_enable_clkdm(struct clk_hw *hw); void omap2_clkops_disable_clkdm(struct clk_hw *hw); int omap2_dflt_clk_enable(struct clk_hw *hw); void omap2_dflt_clk_disable(struct clk_hw *hw); int omap2_dflt_clk_is_enabled(struct clk_hw *hw); +void omap2_clk_dflt_find_companion(struct clk_hw_omap *clk, + void __iomem **other_reg, + u8 *other_bit); +void omap2_clk_dflt_find_idlest(struct clk_hw_omap *clk, + void __iomem **idlest_reg, + u8 *idlest_bit, u8 *idlest_val); + +void omap2_clkt_iclk_allow_idle(struct clk_hw_omap *clk); +void omap2_clkt_iclk_deny_idle(struct clk_hw_omap *clk); u8 omap2_init_dpll_parent(struct clk_hw *hw); int omap3_noncore_dpll_enable(struct clk_hw *hw); diff --git a/include/linux/clk/ti.h b/include/linux/clk/ti.h index 15f3c971ccab..5eccdf5c0e84 100644 --- a/include/linux/clk/ti.h +++ b/include/linux/clk/ti.h @@ -188,33 +188,6 @@ struct clk_hw_omap { /* DPLL Type and DCO Selection Flags */ #define DPLL_J_TYPE 0x1 -/* Composite clock component types */ -enum { - CLK_COMPONENT_TYPE_GATE = 0, - CLK_COMPONENT_TYPE_DIVIDER, - CLK_COMPONENT_TYPE_MUX, - CLK_COMPONENT_TYPE_MAX, -}; - -/** - * struct ti_dt_clk - OMAP DT clock alias declarations - * @lk: clock lookup definition - * @node_name: clock DT node to map to - */ -struct ti_dt_clk { - struct clk_lookup lk; - char *node_name; -}; - -#define DT_CLK(dev, con, name) \ - { \ - .lk = { \ - .dev_id = dev, \ - .con_id = con, \ - }, \ - .node_name = name, \ - } - /* Static memmap indices */ enum { TI_CLKM_CM = 0, @@ -225,8 +198,6 @@ enum { CLK_MAX_MEMMAPS }; -typedef void (*ti_of_clk_init_cb_t)(struct clk_hw *, struct device_node *); - /** * struct clk_omap_reg - OMAP register declaration * @offset: offset from the master IP module base address @@ -266,9 +237,6 @@ struct ti_clk_ll_ops { extern struct ti_clk_ll_ops *ti_clk_ll_ops; -extern const struct clk_ops ti_clk_divider_ops; -extern const struct clk_ops ti_clk_mux_ops; - #define to_clk_hw_omap(_hw) container_of(_hw, struct clk_hw_omap, hw) void omap2_init_clk_clkdm(struct clk_hw *clk); @@ -276,14 +244,6 @@ int omap2_clk_disable_autoidle_all(void); int omap2_clk_enable_autoidle_all(void); int omap2_clk_allow_idle(struct clk *clk); int omap2_clk_deny_idle(struct clk *clk); -void omap2_clkt_iclk_allow_idle(struct clk_hw_omap *clk); -void omap2_clkt_iclk_deny_idle(struct clk_hw_omap *clk); -void omap2_clk_dflt_find_companion(struct clk_hw_omap *clk, - void __iomem **other_reg, - u8 *other_bit); -void omap2_clk_dflt_find_idlest(struct clk_hw_omap *clk, - void __iomem **idlest_reg, - u8 *idlest_bit, u8 *idlest_val); unsigned long omap2_dpllcore_recalc(struct clk_hw *hw, unsigned long parent_rate); int omap2_reprogram_dpllcore(struct clk_hw *clk, unsigned long rate, @@ -292,14 +252,9 @@ void omap2xxx_clkt_dpllcore_init(struct clk_hw *hw); void omap2xxx_clkt_vps_init(void); unsigned long omap2_get_dpll_rate(struct clk_hw_omap *clk); -void __iomem *ti_clk_get_reg_addr(struct device_node *node, int index); -void ti_dt_clocks_register(struct ti_dt_clk *oclks); void ti_dt_clk_init_provider(struct device_node *np, int index); void ti_dt_clk_init_retry_clks(void); void ti_dt_clockdomains_setup(void); -int ti_clk_retry_init(struct device_node *node, struct clk_hw *hw, - ti_of_clk_init_cb_t func); -int ti_clk_add_component(struct device_node *node, struct clk_hw *hw, int type); int omap3430_dt_clk_init(void); int omap3630_dt_clk_init(void); -- cgit v1.2.3-70-g09d2 From e9e63088e4f93cf4ed7999294c09905b7dcb4d32 Mon Sep 17 00:00:00 2001 From: Tero Kristo Date: Mon, 27 Apr 2015 21:55:42 +0300 Subject: clk: ti: remove exported ll_ops struct, instead add an API for registration We should avoid exporting data from drivers, instead use an API for registering the clock low level operations. Signed-off-by: Tero Kristo --- arch/arm/mach-omap2/clock.c | 17 +++++++++++++---- arch/arm/mach-omap2/clock.h | 1 + arch/arm/mach-omap2/io.c | 2 ++ drivers/clk/ti/clk.c | 21 +++++++++++++++++++++ drivers/clk/ti/clock.h | 2 ++ drivers/clk/ti/clockdomain.c | 2 ++ include/linux/clk/ti.h | 3 +-- 7 files changed, 42 insertions(+), 6 deletions(-) (limited to 'include/linux') diff --git a/arch/arm/mach-omap2/clock.c b/arch/arm/mach-omap2/clock.c index 99875dba803a..40a88c2e4016 100644 --- a/arch/arm/mach-omap2/clock.c +++ b/arch/arm/mach-omap2/clock.c @@ -112,6 +112,19 @@ static struct ti_clk_ll_ops omap_clk_ll_ops = { .cm_split_idlest_reg = cm_split_idlest_reg, }; +/** + * omap2_clk_setup_ll_ops - setup clock driver low-level ops + * + * Sets up clock driver low-level platform ops. These are needed + * for register accesses and various other misc platform operations. + * Returns 0 on success, -EBUSY if low level ops have been registered + * already. + */ +int __init omap2_clk_setup_ll_ops(void) +{ + return ti_clk_setup_ll_ops(&omap_clk_ll_ops); +} + /** * omap2_clk_provider_init - initialize a clock provider * @match_table: DT device table to match for devices to init @@ -130,8 +143,6 @@ int __init omap2_clk_provider_init(struct device_node *np, int index, { struct clk_iomap *io; - ti_clk_ll_ops = &omap_clk_ll_ops; - io = kzalloc(sizeof(*io), GFP_KERNEL); io->regmap = syscon; @@ -155,8 +166,6 @@ void __init omap2_clk_legacy_provider_init(int index, void __iomem *mem) { struct clk_iomap *io; - ti_clk_ll_ops = &omap_clk_ll_ops; - io = memblock_virt_alloc(sizeof(*io), 0); io->mem = mem; diff --git a/arch/arm/mach-omap2/clock.h b/arch/arm/mach-omap2/clock.h index 1986ab216b1a..a7051d6a05e9 100644 --- a/arch/arm/mach-omap2/clock.h +++ b/arch/arm/mach-omap2/clock.h @@ -83,6 +83,7 @@ struct regmap; int __init omap2_clk_provider_init(struct device_node *np, int index, struct regmap *syscon, void __iomem *mem); void __init omap2_clk_legacy_provider_init(int index, void __iomem *mem); +int __init omap2_clk_setup_ll_ops(void); void __init ti_clk_init_features(void); #endif diff --git a/arch/arm/mach-omap2/io.c b/arch/arm/mach-omap2/io.c index 74678565cd97..a253aafbb9a2 100644 --- a/arch/arm/mach-omap2/io.c +++ b/arch/arm/mach-omap2/io.c @@ -722,6 +722,8 @@ int __init omap_clk_init(void) ti_clk_init_features(); + omap2_clk_setup_ll_ops(); + if (of_have_populated_dt()) { ret = omap_control_init(); if (ret) diff --git a/drivers/clk/ti/clk.c b/drivers/clk/ti/clk.c index 5baea03cfc92..58b83e0af90f 100644 --- a/drivers/clk/ti/clk.c +++ b/drivers/clk/ti/clk.c @@ -32,6 +32,27 @@ static struct device_node *clocks_node_ptr[CLK_MAX_MEMMAPS]; struct ti_clk_features ti_clk_features; +/** + * ti_clk_setup_ll_ops - setup low level clock operations + * @ops: low level clock ops descriptor + * + * Sets up low level clock operations for TI clock driver. This is used + * to provide various callbacks for the clock driver towards platform + * specific code. Returns 0 on success, -EBUSY if ll_ops have been + * registered already. + */ +int ti_clk_setup_ll_ops(struct ti_clk_ll_ops *ops) +{ + if (ti_clk_ll_ops) { + pr_err("Attempt to register ll_ops multiple times.\n"); + return -EBUSY; + } + + ti_clk_ll_ops = ops; + + return 0; +} + /** * ti_dt_clocks_register - register DT alias clocks during boot * @oclks: list of clocks to register diff --git a/drivers/clk/ti/clock.h b/drivers/clk/ti/clock.h index 3c43125b9cc9..d4d232fd89bc 100644 --- a/drivers/clk/ti/clock.h +++ b/drivers/clk/ti/clock.h @@ -280,4 +280,6 @@ long omap4_dpll_regm4xen_determine_rate(struct clk_hw *hw, unsigned long *best_parent_rate, struct clk_hw **best_parent_clk); +extern struct ti_clk_ll_ops *ti_clk_ll_ops; + #endif diff --git a/drivers/clk/ti/clockdomain.c b/drivers/clk/ti/clockdomain.c index 61ef87b1a688..80a7b6944d10 100644 --- a/drivers/clk/ti/clockdomain.c +++ b/drivers/clk/ti/clockdomain.c @@ -21,6 +21,8 @@ #include #include +#include "clock.h" + #undef pr_fmt #define pr_fmt(fmt) "%s: " fmt, __func__ diff --git a/include/linux/clk/ti.h b/include/linux/clk/ti.h index 5eccdf5c0e84..5b644313e38a 100644 --- a/include/linux/clk/ti.h +++ b/include/linux/clk/ti.h @@ -235,8 +235,6 @@ struct ti_clk_ll_ops { u8 *idlest_reg_id); }; -extern struct ti_clk_ll_ops *ti_clk_ll_ops; - #define to_clk_hw_omap(_hw) container_of(_hw, struct clk_hw_omap, hw) void omap2_init_clk_clkdm(struct clk_hw *clk); @@ -255,6 +253,7 @@ unsigned long omap2_get_dpll_rate(struct clk_hw_omap *clk); void ti_dt_clk_init_provider(struct device_node *np, int index); void ti_dt_clk_init_retry_clks(void); void ti_dt_clockdomains_setup(void); +int ti_clk_setup_ll_ops(struct ti_clk_ll_ops *ops); int omap3430_dt_clk_init(void); int omap3630_dt_clk_init(void); -- cgit v1.2.3-70-g09d2 From 989feafb84118a840ff21250a1e5f516f43e3dbb Mon Sep 17 00:00:00 2001 From: Tero Kristo Date: Mon, 27 Apr 2015 22:23:06 +0300 Subject: clk: ti: move low-level access and init code under clock driver With most of the clock code under clock driver already, the low-level register access code, and the init code for the same, is no longer needed outside the clock driver. Thus, these can be moved under clock driver also. Signed-off-by: Tero Kristo --- arch/arm/mach-omap2/clock.c | 84 --------------------------------------------- arch/arm/mach-omap2/clock.h | 5 --- drivers/clk/ti/clk.c | 75 ++++++++++++++++++++++++++++++++++++++-- include/linux/clk/ti.h | 7 +++- 4 files changed, 78 insertions(+), 93 deletions(-) (limited to 'include/linux') diff --git a/arch/arm/mach-omap2/clock.c b/arch/arm/mach-omap2/clock.c index 79cec5fbbe74..4340ba6524d1 100644 --- a/arch/arm/mach-omap2/clock.c +++ b/arch/arm/mach-omap2/clock.c @@ -23,9 +23,7 @@ #include #include #include -#include #include -#include #include #include @@ -55,41 +53,7 @@ u16 cpu_mask; #define OMAP3PLUS_DPLL_FINT_MIN 32000 #define OMAP3PLUS_DPLL_FINT_MAX 52000000 -struct clk_iomap { - struct regmap *regmap; - void __iomem *mem; -}; - -static struct clk_iomap *clk_memmaps[CLK_MAX_MEMMAPS]; - -static void clk_memmap_writel(u32 val, void __iomem *reg) -{ - struct clk_omap_reg *r = (struct clk_omap_reg *)® - struct clk_iomap *io = clk_memmaps[r->index]; - - if (io->regmap) - regmap_write(io->regmap, r->offset, val); - else - writel_relaxed(val, io->mem + r->offset); -} - -static u32 clk_memmap_readl(void __iomem *reg) -{ - u32 val; - struct clk_omap_reg *r = (struct clk_omap_reg *)® - struct clk_iomap *io = clk_memmaps[r->index]; - - if (io->regmap) - regmap_read(io->regmap, r->offset, &val); - else - val = readl_relaxed(io->mem + r->offset); - - return val; -} - static struct ti_clk_ll_ops omap_clk_ll_ops = { - .clk_readl = clk_memmap_readl, - .clk_writel = clk_memmap_writel, .clkdm_clk_enable = clkdm_clk_enable, .clkdm_clk_disable = clkdm_clk_disable, .cm_wait_module_ready = omap_cm_wait_module_ready, @@ -109,54 +73,6 @@ int __init omap2_clk_setup_ll_ops(void) return ti_clk_setup_ll_ops(&omap_clk_ll_ops); } -/** - * omap2_clk_provider_init - initialize a clock provider - * @match_table: DT device table to match for devices to init - * @np: device node pointer for the this clock provider - * @index: index for the clock provider - + @syscon: syscon regmap pointer - * @mem: iomem pointer for the clock provider memory area, only used if - * syscon is not provided - * - * Initializes a clock provider module (CM/PRM etc.), registering - * the memory mapping at specified index and initializing the - * low level driver infrastructure. Returns 0 in success. - */ -int __init omap2_clk_provider_init(struct device_node *np, int index, - struct regmap *syscon, void __iomem *mem) -{ - struct clk_iomap *io; - - io = kzalloc(sizeof(*io), GFP_KERNEL); - - io->regmap = syscon; - io->mem = mem; - - clk_memmaps[index] = io; - - ti_dt_clk_init_provider(np, index); - - return 0; -} - -/** - * omap2_clk_legacy_provider_init - initialize a legacy clock provider - * @index: index for the clock provider - * @mem: iomem pointer for the clock provider memory area - * - * Initializes a legacy clock provider memory mapping. - */ -void __init omap2_clk_legacy_provider_init(int index, void __iomem *mem) -{ - struct clk_iomap *io; - - io = memblock_virt_alloc(sizeof(*io), 0); - - io->mem = mem; - - clk_memmaps[index] = io; -} - /* * OMAP2+ specific clock functions */ diff --git a/arch/arm/mach-omap2/clock.h b/arch/arm/mach-omap2/clock.h index f3dc04cd5538..67da640ba1c7 100644 --- a/arch/arm/mach-omap2/clock.h +++ b/arch/arm/mach-omap2/clock.h @@ -75,11 +75,6 @@ extern const struct clkops clkops_omap2_dflt; extern struct clk_functions omap2_clk_functions; -struct regmap; - -int __init omap2_clk_provider_init(struct device_node *np, int index, - struct regmap *syscon, void __iomem *mem); -void __init omap2_clk_legacy_provider_init(int index, void __iomem *mem); int __init omap2_clk_setup_ll_ops(void); void __init ti_clk_init_features(void); diff --git a/drivers/clk/ti/clk.c b/drivers/clk/ti/clk.c index 58b83e0af90f..07584e00677e 100644 --- a/drivers/clk/ti/clk.c +++ b/drivers/clk/ti/clk.c @@ -21,6 +21,8 @@ #include #include #include +#include +#include #include "clock.h" @@ -32,6 +34,38 @@ static struct device_node *clocks_node_ptr[CLK_MAX_MEMMAPS]; struct ti_clk_features ti_clk_features; +struct clk_iomap { + struct regmap *regmap; + void __iomem *mem; +}; + +static struct clk_iomap *clk_memmaps[CLK_MAX_MEMMAPS]; + +static void clk_memmap_writel(u32 val, void __iomem *reg) +{ + struct clk_omap_reg *r = (struct clk_omap_reg *)® + struct clk_iomap *io = clk_memmaps[r->index]; + + if (io->regmap) + regmap_write(io->regmap, r->offset, val); + else + writel_relaxed(val, io->mem + r->offset); +} + +static u32 clk_memmap_readl(void __iomem *reg) +{ + u32 val; + struct clk_omap_reg *r = (struct clk_omap_reg *)® + struct clk_iomap *io = clk_memmaps[r->index]; + + if (io->regmap) + regmap_read(io->regmap, r->offset, &val); + else + val = readl_relaxed(io->mem + r->offset); + + return val; +} + /** * ti_clk_setup_ll_ops - setup low level clock operations * @ops: low level clock ops descriptor @@ -49,6 +83,8 @@ int ti_clk_setup_ll_ops(struct ti_clk_ll_ops *ops) } ti_clk_ll_ops = ops; + ops->clk_readl = clk_memmap_readl; + ops->clk_writel = clk_memmap_writel; return 0; } @@ -161,28 +197,61 @@ void __iomem *ti_clk_get_reg_addr(struct device_node *node, int index) } /** - * ti_dt_clk_init_provider - init master clock provider + * omap2_clk_provider_init - init master clock provider * @parent: master node * @index: internal index for clk_reg_ops + * @syscon: syscon regmap pointer for accessing clock registers + * @mem: iomem pointer for the clock provider memory area, only used if + * syscon is not provided * * Initializes a master clock IP block. This basically sets up the * mapping from clocks node to the memory map index. All the clocks * are then initialized through the common of_clk_init call, and the * clocks will access their memory maps based on the node layout. + * Returns 0 in success. */ -void ti_dt_clk_init_provider(struct device_node *parent, int index) +int __init omap2_clk_provider_init(struct device_node *parent, int index, + struct regmap *syscon, void __iomem *mem) { struct device_node *clocks; + struct clk_iomap *io; /* get clocks for this parent */ clocks = of_get_child_by_name(parent, "clocks"); if (!clocks) { pr_err("%s missing 'clocks' child node.\n", parent->name); - return; + return -EINVAL; } /* add clocks node info */ clocks_node_ptr[index] = clocks; + + io = kzalloc(sizeof(*io), GFP_KERNEL); + + io->regmap = syscon; + io->mem = mem; + + clk_memmaps[index] = io; + + return 0; +} + +/** + * omap2_clk_legacy_provider_init - initialize a legacy clock provider + * @index: index for the clock provider + * @mem: iomem pointer for the clock provider memory area + * + * Initializes a legacy clock provider memory mapping. + */ +void __init omap2_clk_legacy_provider_init(int index, void __iomem *mem) +{ + struct clk_iomap *io; + + io = memblock_virt_alloc(sizeof(*io), 0); + + io->mem = mem; + + clk_memmaps[index] = io; } /** diff --git a/include/linux/clk/ti.h b/include/linux/clk/ti.h index 5b644313e38a..9299222d680d 100644 --- a/include/linux/clk/ti.h +++ b/include/linux/clk/ti.h @@ -250,11 +250,16 @@ void omap2xxx_clkt_dpllcore_init(struct clk_hw *hw); void omap2xxx_clkt_vps_init(void); unsigned long omap2_get_dpll_rate(struct clk_hw_omap *clk); -void ti_dt_clk_init_provider(struct device_node *np, int index); void ti_dt_clk_init_retry_clks(void); void ti_dt_clockdomains_setup(void); int ti_clk_setup_ll_ops(struct ti_clk_ll_ops *ops); +struct regmap; + +int omap2_clk_provider_init(struct device_node *parent, int index, + struct regmap *syscon, void __iomem *mem); +void omap2_clk_legacy_provider_init(int index, void __iomem *mem); + int omap3430_dt_clk_init(void); int omap3630_dt_clk_init(void); int am35xx_dt_clk_init(void); -- cgit v1.2.3-70-g09d2 From 1e25aa9641e8f3fa39cd5e46b4afcafd7f12a44b Mon Sep 17 00:00:00 2001 From: Srinivas Pandruvada Date: Mon, 1 Jun 2015 16:36:27 -0700 Subject: hid-sensor: Fix suspend/resume delay By default all the sensors are runtime suspended state (lowest power state). During Linux suspend process, all the run time suspended devices are resumed and then suspended. This caused all sensors to power up and introduced delay in suspend time, when we introduced runtime PM for HID sensors. The opposite process happens during resume process. To fix this, we do powerup process of the sensors only when the request is issued from user (raw or tiggerred). In this way when runtime, resume calls for powerup it will simply return as this will not match user requested state. Note this is a regression fix as the increase in suspend / resume times can be substantial (report of 8 seconds on Len's laptop!) Signed-off-by: Srinivas Pandruvada Tested-by: Len Brown Cc: Signed-off-by: Jonathan Cameron --- drivers/iio/common/hid-sensors/hid-sensor-trigger.c | 11 ++++++++++- include/linux/hid-sensor-hub.h | 1 + 2 files changed, 11 insertions(+), 1 deletion(-) (limited to 'include/linux') diff --git a/drivers/iio/common/hid-sensors/hid-sensor-trigger.c b/drivers/iio/common/hid-sensors/hid-sensor-trigger.c index 610fc98f88ef..595511022795 100644 --- a/drivers/iio/common/hid-sensors/hid-sensor-trigger.c +++ b/drivers/iio/common/hid-sensors/hid-sensor-trigger.c @@ -36,6 +36,8 @@ static int _hid_sensor_power_state(struct hid_sensor_common *st, bool state) s32 poll_value = 0; if (state) { + if (!atomic_read(&st->user_requested_state)) + return 0; if (sensor_hub_device_open(st->hsdev)) return -EIO; @@ -52,8 +54,12 @@ static int _hid_sensor_power_state(struct hid_sensor_common *st, bool state) poll_value = hid_sensor_read_poll_value(st); } else { - if (!atomic_dec_and_test(&st->data_ready)) + int val; + + val = atomic_dec_if_positive(&st->data_ready); + if (val < 0) return 0; + sensor_hub_device_close(st->hsdev); state_val = hid_sensor_get_usage_index(st->hsdev, st->power_state.report_id, @@ -92,9 +98,11 @@ EXPORT_SYMBOL(hid_sensor_power_state); int hid_sensor_power_state(struct hid_sensor_common *st, bool state) { + #ifdef CONFIG_PM int ret; + atomic_set(&st->user_requested_state, state); if (state) ret = pm_runtime_get_sync(&st->pdev->dev); else { @@ -109,6 +117,7 @@ int hid_sensor_power_state(struct hid_sensor_common *st, bool state) return 0; #else + atomic_set(&st->user_requested_state, state); return _hid_sensor_power_state(st, state); #endif } diff --git a/include/linux/hid-sensor-hub.h b/include/linux/hid-sensor-hub.h index 0408421d885f..cd224dfd94d8 100644 --- a/include/linux/hid-sensor-hub.h +++ b/include/linux/hid-sensor-hub.h @@ -230,6 +230,7 @@ struct hid_sensor_common { struct platform_device *pdev; unsigned usage_id; atomic_t data_ready; + atomic_t user_requested_state; struct iio_trigger *trigger; struct hid_sensor_hub_attribute_info poll; struct hid_sensor_hub_attribute_info report_state; -- cgit v1.2.3-70-g09d2 From fcc577dd55db193926537e0e4de98492d665446b Mon Sep 17 00:00:00 2001 From: Cristina Opriceana Date: Tue, 23 Jun 2015 16:34:19 +0300 Subject: iio: Fix parameters in iio_triggered_buffer_setup This patch renames the top half handler and the bottom half handler of iio_triggered_buffer_setup() in accordance with their usage. The bottom half has been renamed to reflect the fact that it is a thread based call, compliant with iio_alloc_pollfunc(). The names of the parameters were swapped, thus creating confusion. Signed-off-by: Cristina Opriceana Signed-off-by: Jonathan Cameron --- drivers/iio/industrialio-triggered-buffer.c | 12 ++++++------ include/linux/iio/triggered_buffer.h | 4 ++-- 2 files changed, 8 insertions(+), 8 deletions(-) (limited to 'include/linux') diff --git a/drivers/iio/industrialio-triggered-buffer.c b/drivers/iio/industrialio-triggered-buffer.c index 15a5341b5e7b..4b2858ba1fd6 100644 --- a/drivers/iio/industrialio-triggered-buffer.c +++ b/drivers/iio/industrialio-triggered-buffer.c @@ -24,8 +24,8 @@ static const struct iio_buffer_setup_ops iio_triggered_buffer_setup_ops = { /** * iio_triggered_buffer_setup() - Setup triggered buffer and pollfunc * @indio_dev: IIO device structure - * @pollfunc_bh: Function which will be used as pollfunc bottom half - * @pollfunc_th: Function which will be used as pollfunc top half + * @h: Function which will be used as pollfunc top half + * @thread: Function which will be used as pollfunc bottom half * @setup_ops: Buffer setup functions to use for this device. * If NULL the default setup functions for triggered * buffers will be used. @@ -42,8 +42,8 @@ static const struct iio_buffer_setup_ops iio_triggered_buffer_setup_ops = { * iio_triggered_buffer_cleanup(). */ int iio_triggered_buffer_setup(struct iio_dev *indio_dev, - irqreturn_t (*pollfunc_bh)(int irq, void *p), - irqreturn_t (*pollfunc_th)(int irq, void *p), + irqreturn_t (*h)(int irq, void *p), + irqreturn_t (*thread)(int irq, void *p), const struct iio_buffer_setup_ops *setup_ops) { struct iio_buffer *buffer; @@ -57,8 +57,8 @@ int iio_triggered_buffer_setup(struct iio_dev *indio_dev, iio_device_attach_buffer(indio_dev, buffer); - indio_dev->pollfunc = iio_alloc_pollfunc(pollfunc_bh, - pollfunc_th, + indio_dev->pollfunc = iio_alloc_pollfunc(h, + thread, IRQF_ONESHOT, indio_dev, "%s_consumer%d", diff --git a/include/linux/iio/triggered_buffer.h b/include/linux/iio/triggered_buffer.h index c378ebec605e..f72f70d5a97b 100644 --- a/include/linux/iio/triggered_buffer.h +++ b/include/linux/iio/triggered_buffer.h @@ -7,8 +7,8 @@ struct iio_dev; struct iio_buffer_setup_ops; int iio_triggered_buffer_setup(struct iio_dev *indio_dev, - irqreturn_t (*pollfunc_bh)(int irq, void *p), - irqreturn_t (*pollfunc_th)(int irq, void *p), + irqreturn_t (*h)(int irq, void *p), + irqreturn_t (*thread)(int irq, void *p), const struct iio_buffer_setup_ops *setup_ops); void iio_triggered_buffer_cleanup(struct iio_dev *indio_dev); -- cgit v1.2.3-70-g09d2 From 0fd972a7d91d6e15393c449492a04d94c0b89351 Mon Sep 17 00:00:00 2001 From: Paul Gortmaker Date: Fri, 1 May 2015 20:13:42 -0400 Subject: module: relocate module_init from init.h to module.h Modular users will always be users of init functionality, but users of init functionality are not necessarily always modules. Hence any functionality like module_init and module_exit would be more at home in the module.h file. And module.h should explicitly include init.h to make the dependency clear. We've already done all the legwork needed to ensure that this move does not cause any build regressions due to implicit header file include assumptions about where module_init lives. Cc: Rusty Russell Acked-by: Rusty Russell Signed-off-by: Paul Gortmaker --- include/linux/init.h | 78 ---------------------------------------------- include/linux/module.h | 84 ++++++++++++++++++++++++++++++++++++++++++++++++++ 2 files changed, 84 insertions(+), 78 deletions(-) (limited to 'include/linux') diff --git a/include/linux/init.h b/include/linux/init.h index 7c68c36d3fd8..b449f378f995 100644 --- a/include/linux/init.h +++ b/include/linux/init.h @@ -282,68 +282,8 @@ void __init parse_early_param(void); void __init parse_early_options(char *cmdline); #endif /* __ASSEMBLY__ */ -/** - * module_init() - driver initialization entry point - * @x: function to be run at kernel boot time or module insertion - * - * module_init() will either be called during do_initcalls() (if - * builtin) or at module insertion time (if a module). There can only - * be one per module. - */ -#define module_init(x) __initcall(x); - -/** - * module_exit() - driver exit entry point - * @x: function to be run when driver is removed - * - * module_exit() will wrap the driver clean-up code - * with cleanup_module() when used with rmmod when - * the driver is a module. If the driver is statically - * compiled into the kernel, module_exit() has no effect. - * There can only be one per module. - */ -#define module_exit(x) __exitcall(x); - #else /* MODULE */ -/* - * In most cases loadable modules do not need custom - * initcall levels. There are still some valid cases where - * a driver may be needed early if built in, and does not - * matter when built as a loadable module. Like bus - * snooping debug drivers. - */ -#define early_initcall(fn) module_init(fn) -#define core_initcall(fn) module_init(fn) -#define core_initcall_sync(fn) module_init(fn) -#define postcore_initcall(fn) module_init(fn) -#define postcore_initcall_sync(fn) module_init(fn) -#define arch_initcall(fn) module_init(fn) -#define subsys_initcall(fn) module_init(fn) -#define subsys_initcall_sync(fn) module_init(fn) -#define fs_initcall(fn) module_init(fn) -#define fs_initcall_sync(fn) module_init(fn) -#define rootfs_initcall(fn) module_init(fn) -#define device_initcall(fn) module_init(fn) -#define device_initcall_sync(fn) module_init(fn) -#define late_initcall(fn) module_init(fn) -#define late_initcall_sync(fn) module_init(fn) - -#define console_initcall(fn) module_init(fn) -#define security_initcall(fn) module_init(fn) - -/* Each module must use one module_init(). */ -#define module_init(initfn) \ - static inline initcall_t __inittest(void) \ - { return initfn; } \ - int init_module(void) __attribute__((alias(#initfn))); - -/* This is only required if you want to be unloadable. */ -#define module_exit(exitfn) \ - static inline exitcall_t __exittest(void) \ - { return exitfn; } \ - void cleanup_module(void) __attribute__((alias(#exitfn))); - #define __setup_param(str, unique_id, fn) /* nothing */ #define __setup(str, func) /* nothing */ #endif @@ -351,24 +291,6 @@ void __init parse_early_options(char *cmdline); /* Data marked not to be saved by software suspend */ #define __nosavedata __section(.data..nosave) -/* This means "can be init if no module support, otherwise module load - may call it." */ -#ifdef CONFIG_MODULES -#define __init_or_module -#define __initdata_or_module -#define __initconst_or_module -#define __INIT_OR_MODULE .text -#define __INITDATA_OR_MODULE .data -#define __INITRODATA_OR_MODULE .section ".rodata","a",%progbits -#else -#define __init_or_module __init -#define __initdata_or_module __initdata -#define __initconst_or_module __initconst -#define __INIT_OR_MODULE __INIT -#define __INITDATA_OR_MODULE __INITDATA -#define __INITRODATA_OR_MODULE __INITRODATA -#endif /*CONFIG_MODULES*/ - #ifdef MODULE #define __exit_p(x) x #else diff --git a/include/linux/module.h b/include/linux/module.h index d67b1932cc59..3a19c79918e0 100644 --- a/include/linux/module.h +++ b/include/linux/module.h @@ -11,6 +11,7 @@ #include #include #include +#include #include #include #include @@ -71,6 +72,89 @@ extern struct module_attribute module_uevent; extern int init_module(void); extern void cleanup_module(void); +#ifndef MODULE +/** + * module_init() - driver initialization entry point + * @x: function to be run at kernel boot time or module insertion + * + * module_init() will either be called during do_initcalls() (if + * builtin) or at module insertion time (if a module). There can only + * be one per module. + */ +#define module_init(x) __initcall(x); + +/** + * module_exit() - driver exit entry point + * @x: function to be run when driver is removed + * + * module_exit() will wrap the driver clean-up code + * with cleanup_module() when used with rmmod when + * the driver is a module. If the driver is statically + * compiled into the kernel, module_exit() has no effect. + * There can only be one per module. + */ +#define module_exit(x) __exitcall(x); + +#else /* MODULE */ + +/* + * In most cases loadable modules do not need custom + * initcall levels. There are still some valid cases where + * a driver may be needed early if built in, and does not + * matter when built as a loadable module. Like bus + * snooping debug drivers. + */ +#define early_initcall(fn) module_init(fn) +#define core_initcall(fn) module_init(fn) +#define core_initcall_sync(fn) module_init(fn) +#define postcore_initcall(fn) module_init(fn) +#define postcore_initcall_sync(fn) module_init(fn) +#define arch_initcall(fn) module_init(fn) +#define subsys_initcall(fn) module_init(fn) +#define subsys_initcall_sync(fn) module_init(fn) +#define fs_initcall(fn) module_init(fn) +#define fs_initcall_sync(fn) module_init(fn) +#define rootfs_initcall(fn) module_init(fn) +#define device_initcall(fn) module_init(fn) +#define device_initcall_sync(fn) module_init(fn) +#define late_initcall(fn) module_init(fn) +#define late_initcall_sync(fn) module_init(fn) + +#define console_initcall(fn) module_init(fn) +#define security_initcall(fn) module_init(fn) + +/* Each module must use one module_init(). */ +#define module_init(initfn) \ + static inline initcall_t __inittest(void) \ + { return initfn; } \ + int init_module(void) __attribute__((alias(#initfn))); + +/* This is only required if you want to be unloadable. */ +#define module_exit(exitfn) \ + static inline exitcall_t __exittest(void) \ + { return exitfn; } \ + void cleanup_module(void) __attribute__((alias(#exitfn))); + +#endif + +/* This means "can be init if no module support, otherwise module load + may call it." */ +#ifdef CONFIG_MODULES +#define __init_or_module +#define __initdata_or_module +#define __initconst_or_module +#define __INIT_OR_MODULE .text +#define __INITDATA_OR_MODULE .data +#define __INITRODATA_OR_MODULE .section ".rodata","a",%progbits +#else +#define __init_or_module __init +#define __initdata_or_module __initdata +#define __initconst_or_module __initconst +#define __INIT_OR_MODULE __INIT +#define __INITDATA_OR_MODULE __INITDATA +#define __INITRODATA_OR_MODULE __INITRODATA +#endif /*CONFIG_MODULES*/ + /* Archs provide a method of finding the correct exception table. */ struct exception_table_entry; -- cgit v1.2.3-70-g09d2 From b17d1bf16cc72a374a48d748940f700009d40ff4 Mon Sep 17 00:00:00 2001 From: Uwe Kleine-König Date: Wed, 11 Feb 2015 11:52:37 +0100 Subject: gpio: make flags mandatory for gpiod_get functions MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit Now that all[1] users of the gpiod_get functions are converted to make use of the up to now optional flags parameter, make it mandatory which allows to remove some cpp magic. [1] all but etraxfs-uart which is broken anyhow and I'm allowed to ignore it by Jesper Nilsson :-) Acked-by: Alexandre Courbot Signed-off-by: Uwe Kleine-König --- drivers/gpio/devres.c | 18 +++++----- drivers/gpio/gpiolib.c | 16 ++++----- include/linux/gpio/consumer.h | 82 ++++++++++++------------------------------- 3 files changed, 40 insertions(+), 76 deletions(-) (limited to 'include/linux') diff --git a/drivers/gpio/devres.c b/drivers/gpio/devres.c index 07ba82317ece..903fcf4d04a0 100644 --- a/drivers/gpio/devres.c +++ b/drivers/gpio/devres.c @@ -59,13 +59,13 @@ static int devm_gpiod_match_array(struct device *dev, void *res, void *data) * automatically disposed on driver detach. See gpiod_get() for detailed * information about behavior and return values. */ -struct gpio_desc *__must_check __devm_gpiod_get(struct device *dev, +struct gpio_desc *__must_check devm_gpiod_get(struct device *dev, const char *con_id, enum gpiod_flags flags) { return devm_gpiod_get_index(dev, con_id, 0, flags); } -EXPORT_SYMBOL(__devm_gpiod_get); +EXPORT_SYMBOL(devm_gpiod_get); /** * devm_gpiod_get_optional - Resource-managed gpiod_get_optional() @@ -77,13 +77,13 @@ EXPORT_SYMBOL(__devm_gpiod_get); * are automatically disposed on driver detach. See gpiod_get_optional() for * detailed information about behavior and return values. */ -struct gpio_desc *__must_check __devm_gpiod_get_optional(struct device *dev, +struct gpio_desc *__must_check devm_gpiod_get_optional(struct device *dev, const char *con_id, enum gpiod_flags flags) { return devm_gpiod_get_index_optional(dev, con_id, 0, flags); } -EXPORT_SYMBOL(__devm_gpiod_get_optional); +EXPORT_SYMBOL(devm_gpiod_get_optional); /** * devm_gpiod_get_index - Resource-managed gpiod_get_index() @@ -96,7 +96,7 @@ EXPORT_SYMBOL(__devm_gpiod_get_optional); * automatically disposed on driver detach. See gpiod_get_index() for detailed * information about behavior and return values. */ -struct gpio_desc *__must_check __devm_gpiod_get_index(struct device *dev, +struct gpio_desc *__must_check devm_gpiod_get_index(struct device *dev, const char *con_id, unsigned int idx, enum gpiod_flags flags) @@ -120,7 +120,7 @@ struct gpio_desc *__must_check __devm_gpiod_get_index(struct device *dev, return desc; } -EXPORT_SYMBOL(__devm_gpiod_get_index); +EXPORT_SYMBOL(devm_gpiod_get_index); /** * devm_get_gpiod_from_child - get a GPIO descriptor from a device's child node @@ -182,10 +182,10 @@ EXPORT_SYMBOL(devm_get_gpiod_from_child); * gpiod_get_index_optional() for detailed information about behavior and * return values. */ -struct gpio_desc *__must_check __devm_gpiod_get_index_optional(struct device *dev, +struct gpio_desc *__must_check devm_gpiod_get_index_optional(struct device *dev, const char *con_id, unsigned int index, - enum gpiod_flags flags) + enum gpiod_flags flags) { struct gpio_desc *desc; @@ -197,7 +197,7 @@ struct gpio_desc *__must_check __devm_gpiod_get_index_optional(struct device *de return desc; } -EXPORT_SYMBOL(__devm_gpiod_get_index_optional); +EXPORT_SYMBOL(devm_gpiod_get_index_optional); /** * devm_gpiod_get_array - Resource-managed gpiod_get_array() diff --git a/drivers/gpio/gpiolib.c b/drivers/gpio/gpiolib.c index bf4bd1d120c3..4b2f98168225 100644 --- a/drivers/gpio/gpiolib.c +++ b/drivers/gpio/gpiolib.c @@ -1894,12 +1894,12 @@ EXPORT_SYMBOL_GPL(gpiod_count); * dev, -ENOENT if no GPIO has been assigned to the requested function, or * another IS_ERR() code if an error occurred while trying to acquire the GPIO. */ -struct gpio_desc *__must_check __gpiod_get(struct device *dev, const char *con_id, +struct gpio_desc *__must_check gpiod_get(struct device *dev, const char *con_id, enum gpiod_flags flags) { return gpiod_get_index(dev, con_id, 0, flags); } -EXPORT_SYMBOL_GPL(__gpiod_get); +EXPORT_SYMBOL_GPL(gpiod_get); /** * gpiod_get_optional - obtain an optional GPIO for a given GPIO function @@ -1911,13 +1911,13 @@ EXPORT_SYMBOL_GPL(__gpiod_get); * the requested function it will return NULL. This is convenient for drivers * that need to handle optional GPIOs. */ -struct gpio_desc *__must_check __gpiod_get_optional(struct device *dev, +struct gpio_desc *__must_check gpiod_get_optional(struct device *dev, const char *con_id, enum gpiod_flags flags) { return gpiod_get_index_optional(dev, con_id, 0, flags); } -EXPORT_SYMBOL_GPL(__gpiod_get_optional); +EXPORT_SYMBOL_GPL(gpiod_get_optional); /** @@ -1974,7 +1974,7 @@ static int gpiod_configure_flags(struct gpio_desc *desc, const char *con_id, * requested function and/or index, or another IS_ERR() code if an error * occurred while trying to acquire the GPIO. */ -struct gpio_desc *__must_check __gpiod_get_index(struct device *dev, +struct gpio_desc *__must_check gpiod_get_index(struct device *dev, const char *con_id, unsigned int idx, enum gpiod_flags flags) @@ -2023,7 +2023,7 @@ struct gpio_desc *__must_check __gpiod_get_index(struct device *dev, return desc; } -EXPORT_SYMBOL_GPL(__gpiod_get_index); +EXPORT_SYMBOL_GPL(gpiod_get_index); /** * fwnode_get_named_gpiod - obtain a GPIO from firmware node @@ -2092,7 +2092,7 @@ EXPORT_SYMBOL_GPL(fwnode_get_named_gpiod); * specified index was assigned to the requested function it will return NULL. * This is convenient for drivers that need to handle optional GPIOs. */ -struct gpio_desc *__must_check __gpiod_get_index_optional(struct device *dev, +struct gpio_desc *__must_check gpiod_get_index_optional(struct device *dev, const char *con_id, unsigned int index, enum gpiod_flags flags) @@ -2107,7 +2107,7 @@ struct gpio_desc *__must_check __gpiod_get_index_optional(struct device *dev, return desc; } -EXPORT_SYMBOL_GPL(__gpiod_get_index_optional); +EXPORT_SYMBOL_GPL(gpiod_get_index_optional); /** * gpiod_hog - Hog the specified GPIO desc given the provided flags diff --git a/include/linux/gpio/consumer.h b/include/linux/gpio/consumer.h index adac255aee86..14cac67c2012 100644 --- a/include/linux/gpio/consumer.h +++ b/include/linux/gpio/consumer.h @@ -47,17 +47,17 @@ enum gpiod_flags { int gpiod_count(struct device *dev, const char *con_id); /* Acquire and dispose GPIOs */ -struct gpio_desc *__must_check __gpiod_get(struct device *dev, +struct gpio_desc *__must_check gpiod_get(struct device *dev, const char *con_id, enum gpiod_flags flags); -struct gpio_desc *__must_check __gpiod_get_index(struct device *dev, +struct gpio_desc *__must_check gpiod_get_index(struct device *dev, const char *con_id, unsigned int idx, enum gpiod_flags flags); -struct gpio_desc *__must_check __gpiod_get_optional(struct device *dev, +struct gpio_desc *__must_check gpiod_get_optional(struct device *dev, const char *con_id, enum gpiod_flags flags); -struct gpio_desc *__must_check __gpiod_get_index_optional(struct device *dev, +struct gpio_desc *__must_check gpiod_get_index_optional(struct device *dev, const char *con_id, unsigned int index, enum gpiod_flags flags); @@ -70,18 +70,18 @@ struct gpio_descs *__must_check gpiod_get_array_optional(struct device *dev, void gpiod_put(struct gpio_desc *desc); void gpiod_put_array(struct gpio_descs *descs); -struct gpio_desc *__must_check __devm_gpiod_get(struct device *dev, +struct gpio_desc *__must_check devm_gpiod_get(struct device *dev, const char *con_id, enum gpiod_flags flags); -struct gpio_desc *__must_check __devm_gpiod_get_index(struct device *dev, +struct gpio_desc *__must_check devm_gpiod_get_index(struct device *dev, const char *con_id, unsigned int idx, enum gpiod_flags flags); -struct gpio_desc *__must_check __devm_gpiod_get_optional(struct device *dev, +struct gpio_desc *__must_check devm_gpiod_get_optional(struct device *dev, const char *con_id, enum gpiod_flags flags); struct gpio_desc *__must_check -__devm_gpiod_get_index_optional(struct device *dev, const char *con_id, +devm_gpiod_get_index_optional(struct device *dev, const char *con_id, unsigned int index, enum gpiod_flags flags); struct gpio_descs *__must_check devm_gpiod_get_array(struct device *dev, const char *con_id, @@ -146,31 +146,31 @@ static inline int gpiod_count(struct device *dev, const char *con_id) return 0; } -static inline struct gpio_desc *__must_check __gpiod_get(struct device *dev, - const char *con_id, - enum gpiod_flags flags) +static inline struct gpio_desc *__must_check gpiod_get(struct device *dev, + const char *con_id, + enum gpiod_flags flags) { return ERR_PTR(-ENOSYS); } static inline struct gpio_desc *__must_check -__gpiod_get_index(struct device *dev, - const char *con_id, - unsigned int idx, - enum gpiod_flags flags) +gpiod_get_index(struct device *dev, + const char *con_id, + unsigned int idx, + enum gpiod_flags flags) { return ERR_PTR(-ENOSYS); } static inline struct gpio_desc *__must_check -__gpiod_get_optional(struct device *dev, const char *con_id, - enum gpiod_flags flags) +gpiod_get_optional(struct device *dev, const char *con_id, + enum gpiod_flags flags) { return ERR_PTR(-ENOSYS); } static inline struct gpio_desc *__must_check -__gpiod_get_index_optional(struct device *dev, const char *con_id, - unsigned int index, enum gpiod_flags flags) +gpiod_get_index_optional(struct device *dev, const char *con_id, + unsigned int index, enum gpiod_flags flags) { return ERR_PTR(-ENOSYS); } @@ -206,7 +206,7 @@ static inline void gpiod_put_array(struct gpio_descs *descs) } static inline struct gpio_desc *__must_check -__devm_gpiod_get(struct device *dev, +devm_gpiod_get(struct device *dev, const char *con_id, enum gpiod_flags flags) { @@ -214,7 +214,7 @@ __devm_gpiod_get(struct device *dev, } static inline struct gpio_desc *__must_check -__devm_gpiod_get_index(struct device *dev, +devm_gpiod_get_index(struct device *dev, const char *con_id, unsigned int idx, enum gpiod_flags flags) @@ -223,14 +223,14 @@ __devm_gpiod_get_index(struct device *dev, } static inline struct gpio_desc *__must_check -__devm_gpiod_get_optional(struct device *dev, const char *con_id, +devm_gpiod_get_optional(struct device *dev, const char *con_id, enum gpiod_flags flags) { return ERR_PTR(-ENOSYS); } static inline struct gpio_desc *__must_check -__devm_gpiod_get_index_optional(struct device *dev, const char *con_id, +devm_gpiod_get_index_optional(struct device *dev, const char *con_id, unsigned int index, enum gpiod_flags flags) { return ERR_PTR(-ENOSYS); @@ -424,42 +424,6 @@ static inline struct gpio_desc *devm_get_gpiod_from_child( #endif /* CONFIG_GPIOLIB */ -/* - * Vararg-hacks! This is done to transition the kernel to always pass - * the options flags argument to the below functions. During a transition - * phase these vararg macros make both old-and-newstyle code compile, - * but when all calls to the elder API are removed, these should go away - * and the __gpiod_get() etc functions above be renamed just gpiod_get() - * etc. - */ -#define __gpiod_get(dev, con_id, flags, ...) __gpiod_get(dev, con_id, flags) -#define gpiod_get(varargs...) __gpiod_get(varargs, GPIOD_ASIS) -#define __gpiod_get_index(dev, con_id, index, flags, ...) \ - __gpiod_get_index(dev, con_id, index, flags) -#define gpiod_get_index(varargs...) __gpiod_get_index(varargs, GPIOD_ASIS) -#define __gpiod_get_optional(dev, con_id, flags, ...) \ - __gpiod_get_optional(dev, con_id, flags) -#define gpiod_get_optional(varargs...) __gpiod_get_optional(varargs, GPIOD_ASIS) -#define __gpiod_get_index_optional(dev, con_id, index, flags, ...) \ - __gpiod_get_index_optional(dev, con_id, index, flags) -#define gpiod_get_index_optional(varargs...) \ - __gpiod_get_index_optional(varargs, GPIOD_ASIS) -#define __devm_gpiod_get(dev, con_id, flags, ...) \ - __devm_gpiod_get(dev, con_id, flags) -#define devm_gpiod_get(varargs...) __devm_gpiod_get(varargs, GPIOD_ASIS) -#define __devm_gpiod_get_index(dev, con_id, index, flags, ...) \ - __devm_gpiod_get_index(dev, con_id, index, flags) -#define devm_gpiod_get_index(varargs...) \ - __devm_gpiod_get_index(varargs, GPIOD_ASIS) -#define __devm_gpiod_get_optional(dev, con_id, flags, ...) \ - __devm_gpiod_get_optional(dev, con_id, flags) -#define devm_gpiod_get_optional(varargs...) \ - __devm_gpiod_get_optional(varargs, GPIOD_ASIS) -#define __devm_gpiod_get_index_optional(dev, con_id, index, flags, ...) \ - __devm_gpiod_get_index_optional(dev, con_id, index, flags) -#define devm_gpiod_get_index_optional(varargs...) \ - __devm_gpiod_get_index_optional(varargs, GPIOD_ASIS) - #if IS_ENABLED(CONFIG_GPIOLIB) && IS_ENABLED(CONFIG_GPIO_SYSFS) int gpiod_export(struct gpio_desc *desc, bool direction_may_change); -- cgit v1.2.3-70-g09d2 From 16a624a9c81814cc2f1353eff2e502430c3fa79a Mon Sep 17 00:00:00 2001 From: Sascha Hauer Date: Wed, 24 Jun 2015 08:17:02 +0200 Subject: soc: mediatek: Add infracfg misc driver support This adds support for some miscellaneous bits of the infracfg controller. The mtk_infracfg_set/clear_bus_protection functions are necessary for the scpsys power domain driver to handle the bus protection bits which are contained in the infacfg register space. Signed-off-by: Sascha Hauer Reviewed-by: Daniel Kurtz Signed-off-by: Matthias Brugger --- drivers/soc/mediatek/Kconfig | 9 ++++ drivers/soc/mediatek/Makefile | 1 + drivers/soc/mediatek/mtk-infracfg.c | 91 +++++++++++++++++++++++++++++++++++ include/linux/soc/mediatek/infracfg.h | 26 ++++++++++ 4 files changed, 127 insertions(+) create mode 100644 drivers/soc/mediatek/mtk-infracfg.c create mode 100644 include/linux/soc/mediatek/infracfg.h (limited to 'include/linux') diff --git a/drivers/soc/mediatek/Kconfig b/drivers/soc/mediatek/Kconfig index 3c1850332a90..e609a6f5e2eb 100644 --- a/drivers/soc/mediatek/Kconfig +++ b/drivers/soc/mediatek/Kconfig @@ -1,6 +1,15 @@ # # MediaTek SoC drivers # +config MTK_INFRACFG + bool "MediaTek INFRACFG Support" + depends on ARCH_MEDIATEK || COMPILE_TEST + select REGMAP + help + Say yes here to add support for the MediaTek INFRACFG controller. The + INFRACFG controller contains various infrastructure registers not + directly associated to any device. + config MTK_PMIC_WRAP tristate "MediaTek PMIC Wrapper Support" depends on ARCH_MEDIATEK diff --git a/drivers/soc/mediatek/Makefile b/drivers/soc/mediatek/Makefile index ecaf4defd7f6..3fa940fb4eab 100644 --- a/drivers/soc/mediatek/Makefile +++ b/drivers/soc/mediatek/Makefile @@ -1 +1,2 @@ +obj-$(CONFIG_MTK_INFRACFG) += mtk-infracfg.o obj-$(CONFIG_MTK_PMIC_WRAP) += mtk-pmic-wrap.o diff --git a/drivers/soc/mediatek/mtk-infracfg.c b/drivers/soc/mediatek/mtk-infracfg.c new file mode 100644 index 000000000000..dba3055a9493 --- /dev/null +++ b/drivers/soc/mediatek/mtk-infracfg.c @@ -0,0 +1,91 @@ +/* + * Copyright (c) 2015 Pengutronix, Sascha Hauer + * + * This program is free software; you can redistribute it and/or modify + * it under the terms of the GNU General Public License version 2 as + * published by the Free Software Foundation. + * + * This program is distributed in the hope that it will be useful, + * but WITHOUT ANY WARRANTY; without even the implied warranty of + * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the + * GNU General Public License for more details. + */ + +#include +#include +#include +#include +#include + +#define INFRA_TOPAXI_PROTECTEN 0x0220 +#define INFRA_TOPAXI_PROTECTSTA1 0x0228 + +/** + * mtk_infracfg_set_bus_protection - enable bus protection + * @regmap: The infracfg regmap + * @mask: The mask containing the protection bits to be enabled. + * + * This function enables the bus protection bits for disabled power + * domains so that the system does not hang when some unit accesses the + * bus while in power down. + */ +int mtk_infracfg_set_bus_protection(struct regmap *infracfg, u32 mask) +{ + unsigned long expired; + u32 val; + int ret; + + regmap_update_bits(infracfg, INFRA_TOPAXI_PROTECTEN, mask, mask); + + expired = jiffies + HZ; + + while (1) { + ret = regmap_read(infracfg, INFRA_TOPAXI_PROTECTSTA1, &val); + if (ret) + return ret; + + if ((val & mask) == mask) + break; + + cpu_relax(); + if (time_after(jiffies, expired)) + return -EIO; + } + + return 0; +} + +/** + * mtk_infracfg_clear_bus_protection - disable bus protection + * @regmap: The infracfg regmap + * @mask: The mask containing the protection bits to be disabled. + * + * This function disables the bus protection bits previously enabled with + * mtk_infracfg_set_bus_protection. + */ +int mtk_infracfg_clear_bus_protection(struct regmap *infracfg, u32 mask) +{ + unsigned long expired; + int ret; + + regmap_update_bits(infracfg, INFRA_TOPAXI_PROTECTEN, mask, 0); + + expired = jiffies + HZ; + + while (1) { + u32 val; + + ret = regmap_read(infracfg, INFRA_TOPAXI_PROTECTSTA1, &val); + if (ret) + return ret; + + if (!(val & mask)) + break; + + cpu_relax(); + if (time_after(jiffies, expired)) + return -EIO; + } + + return 0; +} diff --git a/include/linux/soc/mediatek/infracfg.h b/include/linux/soc/mediatek/infracfg.h new file mode 100644 index 000000000000..a5714e93fb34 --- /dev/null +++ b/include/linux/soc/mediatek/infracfg.h @@ -0,0 +1,26 @@ +#ifndef __SOC_MEDIATEK_INFRACFG_H +#define __SOC_MEDIATEK_INFRACFG_H + +#define MT8173_TOP_AXI_PROT_EN_MCI_M2 BIT(0) +#define MT8173_TOP_AXI_PROT_EN_MM_M0 BIT(1) +#define MT8173_TOP_AXI_PROT_EN_MM_M1 BIT(2) +#define MT8173_TOP_AXI_PROT_EN_MMAPB_S BIT(6) +#define MT8173_TOP_AXI_PROT_EN_L2C_M2 BIT(9) +#define MT8173_TOP_AXI_PROT_EN_L2SS_SMI BIT(11) +#define MT8173_TOP_AXI_PROT_EN_L2SS_ADD BIT(12) +#define MT8173_TOP_AXI_PROT_EN_CCI_M2 BIT(13) +#define MT8173_TOP_AXI_PROT_EN_MFG_S BIT(14) +#define MT8173_TOP_AXI_PROT_EN_PERI_M0 BIT(15) +#define MT8173_TOP_AXI_PROT_EN_PERI_M1 BIT(16) +#define MT8173_TOP_AXI_PROT_EN_DEBUGSYS BIT(17) +#define MT8173_TOP_AXI_PROT_EN_CQ_DMA BIT(18) +#define MT8173_TOP_AXI_PROT_EN_GCPU BIT(19) +#define MT8173_TOP_AXI_PROT_EN_IOMMU BIT(20) +#define MT8173_TOP_AXI_PROT_EN_MFG_M0 BIT(21) +#define MT8173_TOP_AXI_PROT_EN_MFG_M1 BIT(22) +#define MT8173_TOP_AXI_PROT_EN_MFG_SNOOP_OUT BIT(23) + +int mtk_infracfg_set_bus_protection(struct regmap *infracfg, u32 mask); +int mtk_infracfg_clear_bus_protection(struct regmap *infracfg, u32 mask); + +#endif /* __SOC_MEDIATEK_INFRACFG_H */ -- cgit v1.2.3-70-g09d2 From d1ec4c34c7a9f328e43ea87522119258194f28f8 Mon Sep 17 00:00:00 2001 From: "Paul E. McKenney" Date: Wed, 13 May 2015 10:41:58 -0700 Subject: rcu: Drop RCU_USER_QS in favor of NO_HZ_FULL The RCU_USER_QS Kconfig parameter is now just a synonym for NO_HZ_FULL, so this commit eliminates RCU_USER_QS, replacing all uses with NO_HZ_FULL. Reported-by: Frederic Weisbecker Signed-off-by: Paul E. McKenney Acked-by: Frederic Weisbecker --- include/linux/rcupdate.h | 4 ++-- init/Kconfig | 9 --------- kernel/rcu/tree.c | 8 ++++---- kernel/time/Kconfig | 2 -- 4 files changed, 6 insertions(+), 17 deletions(-) (limited to 'include/linux') diff --git a/include/linux/rcupdate.h b/include/linux/rcupdate.h index 4cf5f51b4c9c..237f7b8d38ba 100644 --- a/include/linux/rcupdate.h +++ b/include/linux/rcupdate.h @@ -309,7 +309,7 @@ static inline void rcu_sysrq_end(void) } #endif /* #else #ifdef CONFIG_RCU_STALL_COMMON */ -#ifdef CONFIG_RCU_USER_QS +#ifdef CONFIG_NO_HZ_FULL void rcu_user_enter(void); void rcu_user_exit(void); #else @@ -317,7 +317,7 @@ static inline void rcu_user_enter(void) { } static inline void rcu_user_exit(void) { } static inline void rcu_user_hooks_switch(struct task_struct *prev, struct task_struct *next) { } -#endif /* CONFIG_RCU_USER_QS */ +#endif /* CONFIG_NO_HZ_FULL */ #ifdef CONFIG_RCU_NOCB_CPU void rcu_init_nohz(void); diff --git a/init/Kconfig b/init/Kconfig index af09b4fb43d2..fdeff4ab5995 100644 --- a/init/Kconfig +++ b/init/Kconfig @@ -538,15 +538,6 @@ config RCU_STALL_COMMON config CONTEXT_TRACKING bool -config RCU_USER_QS - bool - help - This option sets hooks on kernel / userspace boundaries and - puts RCU in extended quiescent state when the CPU runs in - userspace. It means that when a CPU runs in userspace, it is - excluded from the global RCU state machine and thus doesn't - try to keep the timer tick on for RCU. - config CONTEXT_TRACKING_FORCE bool "Force context tracking" depends on CONTEXT_TRACKING diff --git a/kernel/rcu/tree.c b/kernel/rcu/tree.c index 65137bc28b2b..8b5dd8ba9495 100644 --- a/kernel/rcu/tree.c +++ b/kernel/rcu/tree.c @@ -701,7 +701,7 @@ void rcu_idle_enter(void) } EXPORT_SYMBOL_GPL(rcu_idle_enter); -#ifdef CONFIG_RCU_USER_QS +#ifdef CONFIG_NO_HZ_FULL /** * rcu_user_enter - inform RCU that we are resuming userspace. * @@ -714,7 +714,7 @@ void rcu_user_enter(void) { rcu_eqs_enter(1); } -#endif /* CONFIG_RCU_USER_QS */ +#endif /* CONFIG_NO_HZ_FULL */ /** * rcu_irq_exit - inform RCU that current CPU is exiting irq towards idle @@ -828,7 +828,7 @@ void rcu_idle_exit(void) } EXPORT_SYMBOL_GPL(rcu_idle_exit); -#ifdef CONFIG_RCU_USER_QS +#ifdef CONFIG_NO_HZ_FULL /** * rcu_user_exit - inform RCU that we are exiting userspace. * @@ -839,7 +839,7 @@ void rcu_user_exit(void) { rcu_eqs_exit(1); } -#endif /* CONFIG_RCU_USER_QS */ +#endif /* CONFIG_NO_HZ_FULL */ /** * rcu_irq_enter - inform RCU that current CPU is entering irq away from idle diff --git a/kernel/time/Kconfig b/kernel/time/Kconfig index 579ce1b929af..4008d9f95dd7 100644 --- a/kernel/time/Kconfig +++ b/kernel/time/Kconfig @@ -92,12 +92,10 @@ config NO_HZ_FULL depends on !ARCH_USES_GETTIMEOFFSET && GENERIC_CLOCKEVENTS # We need at least one periodic CPU for timekeeping depends on SMP - # RCU_USER_QS dependency depends on HAVE_CONTEXT_TRACKING # VIRT_CPU_ACCOUNTING_GEN dependency depends on HAVE_VIRT_CPU_ACCOUNTING_GEN select NO_HZ_COMMON - select RCU_USER_QS select RCU_NOCB_CPU select VIRT_CPU_ACCOUNTING_GEN select IRQ_WORK -- cgit v1.2.3-70-g09d2 From 80eeb1f0f757c790b020d9f425bb0e824973d49c Mon Sep 17 00:00:00 2001 From: Sergej Sawazki Date: Sun, 28 Jun 2015 16:24:55 +0200 Subject: clk: add gpio controlled clock multiplexer Add a common clock driver for basic gpio controlled clock multiplexers. This driver can be used for devices like 5V41068A or 831721I from IDT or for discrete multiplexer circuits. The 'select' pin selects one of two parent clocks. Cc: Jyri Sarha Signed-off-by: Sergej Sawazki [sboyd@codeaurora.org: Fix error paths to free memory and do it in the correct order] Signed-off-by: Stephen Boyd --- .../devicetree/bindings/clock/gpio-mux-clock.txt | 19 ++ drivers/clk/clk-gpio-gate.c | 242 +++++++++++++++------ include/linux/clk-provider.h | 17 ++ 3 files changed, 214 insertions(+), 64 deletions(-) create mode 100644 Documentation/devicetree/bindings/clock/gpio-mux-clock.txt (limited to 'include/linux') diff --git a/Documentation/devicetree/bindings/clock/gpio-mux-clock.txt b/Documentation/devicetree/bindings/clock/gpio-mux-clock.txt new file mode 100644 index 000000000000..2be1e038ca62 --- /dev/null +++ b/Documentation/devicetree/bindings/clock/gpio-mux-clock.txt @@ -0,0 +1,19 @@ +Binding for simple gpio clock multiplexer. + +This binding uses the common clock binding[1]. + +[1] Documentation/devicetree/bindings/clock/clock-bindings.txt + +Required properties: +- compatible : shall be "gpio-mux-clock". +- clocks: list of two references to parent clocks. +- #clock-cells : from common clock binding; shall be set to 0. +- select-gpios : GPIO reference for selecting the parent clock. + +Example: + clock { + compatible = "gpio-mux-clock"; + clocks = <&parentclk1>, <&parentclk2>; + #clock-cells = <0>; + select-gpios = <&gpio 1 GPIO_ACTIVE_HIGH>; + }; diff --git a/drivers/clk/clk-gpio-gate.c b/drivers/clk/clk-gpio-gate.c index ef942daa955a..c0d202c24a97 100644 --- a/drivers/clk/clk-gpio-gate.c +++ b/drivers/clk/clk-gpio-gate.c @@ -1,12 +1,15 @@ /* * Copyright (C) 2013 - 2014 Texas Instruments Incorporated - http://www.ti.com - * Author: Jyri Sarha + * + * Authors: + * Jyri Sarha + * Sergej Sawazki * * This program is free software; you can redistribute it and/or modify * it under the terms of the GNU General Public License version 2 as * published by the Free Software Foundation. * - * Gpio gated clock implementation + * Gpio controlled clock implementation */ #include @@ -61,24 +64,55 @@ const struct clk_ops clk_gpio_gate_ops = { EXPORT_SYMBOL_GPL(clk_gpio_gate_ops); /** - * clk_register_gpio - register a gpip clock with the clock framework - * @dev: device that is registering this clock - * @name: name of this clock - * @parent_name: name of this clock's parent - * @gpio: gpio number to gate this clock - * @active_low: true if gpio should be set to 0 to enable clock - * @flags: clock flags + * DOC: basic clock multiplexer which can be controlled with a gpio output + * Traits of this clock: + * prepare - clk_prepare only ensures that parents are prepared + * rate - rate is only affected by parent switching. No clk_set_rate support + * parent - parent is adjustable through clk_set_parent */ -struct clk *clk_register_gpio_gate(struct device *dev, const char *name, - const char *parent_name, unsigned gpio, bool active_low, - unsigned long flags) + +static u8 clk_gpio_mux_get_parent(struct clk_hw *hw) { - struct clk_gpio *clk_gpio = NULL; - struct clk *clk = ERR_PTR(-EINVAL); - struct clk_init_data init = { NULL }; + struct clk_gpio *clk = to_clk_gpio(hw); + + return gpiod_get_value(clk->gpiod); +} + +static int clk_gpio_mux_set_parent(struct clk_hw *hw, u8 index) +{ + struct clk_gpio *clk = to_clk_gpio(hw); + + gpiod_set_value(clk->gpiod, index); + + return 0; +} + +const struct clk_ops clk_gpio_mux_ops = { + .get_parent = clk_gpio_mux_get_parent, + .set_parent = clk_gpio_mux_set_parent, + .determine_rate = __clk_mux_determine_rate, +}; +EXPORT_SYMBOL_GPL(clk_gpio_mux_ops); + +static struct clk *clk_register_gpio(struct device *dev, const char *name, + const char **parent_names, u8 num_parents, unsigned gpio, + bool active_low, unsigned long flags, + const struct clk_ops *clk_gpio_ops) +{ + struct clk_gpio *clk_gpio; + struct clk *clk; + struct clk_init_data init = {}; unsigned long gpio_flags; int err; + if (dev) + clk_gpio = devm_kzalloc(dev, sizeof(*clk_gpio), GFP_KERNEL); + else + clk_gpio = kzalloc(sizeof(*clk_gpio), GFP_KERNEL); + + if (!clk_gpio) + return ERR_PTR(-ENOMEM); + if (active_low) gpio_flags = GPIOF_ACTIVE_LOW | GPIOF_OUT_INIT_HIGH; else @@ -88,70 +122,108 @@ struct clk *clk_register_gpio_gate(struct device *dev, const char *name, err = devm_gpio_request_one(dev, gpio, gpio_flags, name); else err = gpio_request_one(gpio, gpio_flags, name); - if (err) { if (err != -EPROBE_DEFER) pr_err("%s: %s: Error requesting clock control gpio %u\n", __func__, name, gpio); - return ERR_PTR(err); - } - - if (dev) - clk_gpio = devm_kzalloc(dev, sizeof(struct clk_gpio), - GFP_KERNEL); - else - clk_gpio = kzalloc(sizeof(struct clk_gpio), GFP_KERNEL); + if (!dev) + kfree(clk_gpio); - if (!clk_gpio) { - clk = ERR_PTR(-ENOMEM); - goto clk_register_gpio_gate_err; + return ERR_PTR(err); } init.name = name; - init.ops = &clk_gpio_gate_ops; + init.ops = clk_gpio_ops; init.flags = flags | CLK_IS_BASIC; - init.parent_names = (parent_name ? &parent_name : NULL); - init.num_parents = (parent_name ? 1 : 0); + init.parent_names = parent_names; + init.num_parents = num_parents; clk_gpio->gpiod = gpio_to_desc(gpio); clk_gpio->hw.init = &init; - clk = clk_register(dev, &clk_gpio->hw); + if (dev) + clk = devm_clk_register(dev, &clk_gpio->hw); + else + clk = clk_register(NULL, &clk_gpio->hw); if (!IS_ERR(clk)) return clk; - if (!dev) + if (!dev) { + gpiod_put(clk_gpio->gpiod); kfree(clk_gpio); - -clk_register_gpio_gate_err: - if (!dev) - gpio_free(gpio); + } return clk; } + +/** + * clk_register_gpio_gate - register a gpio clock gate with the clock framework + * @dev: device that is registering this clock + * @name: name of this clock + * @parent_name: name of this clock's parent + * @gpio: gpio number to gate this clock + * @active_low: true if gpio should be set to 0 to enable clock + * @flags: clock flags + */ +struct clk *clk_register_gpio_gate(struct device *dev, const char *name, + const char *parent_name, unsigned gpio, bool active_low, + unsigned long flags) +{ + return clk_register_gpio(dev, name, + (parent_name ? &parent_name : NULL), + (parent_name ? 1 : 0), gpio, active_low, flags, + &clk_gpio_gate_ops); +} EXPORT_SYMBOL_GPL(clk_register_gpio_gate); +/** + * clk_register_gpio_mux - register a gpio clock mux with the clock framework + * @dev: device that is registering this clock + * @name: name of this clock + * @parent_names: names of this clock's parents + * @num_parents: number of parents listed in @parent_names + * @gpio: gpio number to gate this clock + * @active_low: true if gpio should be set to 0 to enable clock + * @flags: clock flags + */ +struct clk *clk_register_gpio_mux(struct device *dev, const char *name, + const char **parent_names, u8 num_parents, unsigned gpio, + bool active_low, unsigned long flags) +{ + if (num_parents != 2) { + pr_err("mux-clock %s must have 2 parents\n", name); + return ERR_PTR(-EINVAL); + } + + return clk_register_gpio(dev, name, parent_names, num_parents, + gpio, active_low, flags, &clk_gpio_mux_ops); +} +EXPORT_SYMBOL_GPL(clk_register_gpio_mux); + #ifdef CONFIG_OF /** - * The clk_register_gpio_gate has to be delayed, because the EPROBE_DEFER + * clk_register_get() has to be delayed, because -EPROBE_DEFER * can not be handled properly at of_clk_init() call time. */ -struct clk_gpio_gate_delayed_register_data { +struct clk_gpio_delayed_register_data { + const char *gpio_name; struct device_node *node; struct mutex lock; struct clk *clk; + struct clk *(*clk_register_get)(const char *name, + const char **parent_names, u8 num_parents, + unsigned gpio, bool active_low); }; -static struct clk *of_clk_gpio_gate_delayed_register_get( - struct of_phandle_args *clkspec, - void *_data) +static struct clk *of_clk_gpio_delayed_register_get( + struct of_phandle_args *clkspec, void *_data) { - struct clk_gpio_gate_delayed_register_data *data = _data; + struct clk_gpio_delayed_register_data *data = _data; struct clk *clk; - const char *clk_name = data->node->name; - const char *parent_name; + const char **parent_names; + int i, num_parents; int gpio; enum of_gpio_flags of_flags; @@ -162,47 +234,89 @@ static struct clk *of_clk_gpio_gate_delayed_register_get( return data->clk; } - gpio = of_get_named_gpio_flags(data->node, "enable-gpios", 0, - &of_flags); + gpio = of_get_named_gpio_flags(data->node, data->gpio_name, 0, + &of_flags); if (gpio < 0) { mutex_unlock(&data->lock); - if (gpio != -EPROBE_DEFER) - pr_err("%s: %s: Can't get 'enable-gpios' DT property\n", - __func__, clk_name); + if (gpio == -EPROBE_DEFER) + pr_debug("%s: %s: GPIOs not yet available, retry later\n", + data->node->name, __func__); + else + pr_err("%s: %s: Can't get '%s' DT property\n", + data->node->name, __func__, + data->gpio_name); return ERR_PTR(gpio); } - parent_name = of_clk_get_parent_name(data->node, 0); + num_parents = of_clk_get_parent_count(data->node); - clk = clk_register_gpio_gate(NULL, clk_name, parent_name, gpio, - of_flags & OF_GPIO_ACTIVE_LOW, 0); - if (IS_ERR(clk)) { - mutex_unlock(&data->lock); - return clk; - } + parent_names = kcalloc(num_parents, sizeof(char *), GFP_KERNEL); + if (!parent_names) + return ERR_PTR(-ENOMEM); + + for (i = 0; i < num_parents; i++) + parent_names[i] = of_clk_get_parent_name(data->node, i); + + clk = data->clk_register_get(data->node->name, parent_names, + num_parents, gpio, of_flags & OF_GPIO_ACTIVE_LOW); + if (IS_ERR(clk)) + goto out; data->clk = clk; +out: mutex_unlock(&data->lock); + kfree(parent_names); return clk; } -/** - * of_gpio_gate_clk_setup() - Setup function for gpio controlled clock - */ -static void __init of_gpio_gate_clk_setup(struct device_node *node) +static struct clk *of_clk_gpio_gate_delayed_register_get(const char *name, + const char **parent_names, u8 num_parents, + unsigned gpio, bool active_low) +{ + return clk_register_gpio_gate(NULL, name, parent_names[0], + gpio, active_low, 0); +} + +static struct clk *of_clk_gpio_mux_delayed_register_get(const char *name, + const char **parent_names, u8 num_parents, unsigned gpio, + bool active_low) +{ + return clk_register_gpio_mux(NULL, name, parent_names, num_parents, + gpio, active_low, 0); +} + +static void __init of_gpio_clk_setup(struct device_node *node, + const char *gpio_name, + struct clk *(*clk_register_get)(const char *name, + const char **parent_names, u8 num_parents, + unsigned gpio, bool active_low)) { - struct clk_gpio_gate_delayed_register_data *data; + struct clk_gpio_delayed_register_data *data; - data = kzalloc(sizeof(struct clk_gpio_gate_delayed_register_data), - GFP_KERNEL); + data = kzalloc(sizeof(*data), GFP_KERNEL); if (!data) return; data->node = node; + data->gpio_name = gpio_name; + data->clk_register_get = clk_register_get; mutex_init(&data->lock); - of_clk_add_provider(node, of_clk_gpio_gate_delayed_register_get, data); + of_clk_add_provider(node, of_clk_gpio_delayed_register_get, data); +} + +static void __init of_gpio_gate_clk_setup(struct device_node *node) +{ + of_gpio_clk_setup(node, "enable-gpios", + of_clk_gpio_gate_delayed_register_get); } CLK_OF_DECLARE(gpio_gate_clk, "gpio-gate-clock", of_gpio_gate_clk_setup); + +void __init of_gpio_mux_clk_setup(struct device_node *node) +{ + of_gpio_clk_setup(node, "select-gpios", + of_clk_gpio_mux_delayed_register_get); +} +CLK_OF_DECLARE(gpio_mux_clk, "gpio-mux-clock", of_gpio_mux_clk_setup); #endif diff --git a/include/linux/clk-provider.h b/include/linux/clk-provider.h index 78842f46f152..823d7f70878e 100644 --- a/include/linux/clk-provider.h +++ b/include/linux/clk-provider.h @@ -549,6 +549,23 @@ struct clk *clk_register_gpio_gate(struct device *dev, const char *name, void of_gpio_clk_gate_setup(struct device_node *node); +/** + * struct clk_gpio_mux - gpio controlled clock multiplexer + * + * @hw: see struct clk_gpio + * @gpiod: gpio descriptor to select the parent of this clock multiplexer + * + * Clock with a gpio control for selecting the parent clock. + * Implements .get_parent, .set_parent and .determine_rate + */ + +extern const struct clk_ops clk_gpio_mux_ops; +struct clk *clk_register_gpio_mux(struct device *dev, const char *name, + const char **parent_names, u8 num_parents, unsigned gpio, + bool active_low, unsigned long flags); + +void of_gpio_mux_clk_setup(struct device_node *node); + /** * clk_register - allocate a new clock, register it and return an opaque cookie * @dev: device that is registering this clock -- cgit v1.2.3-70-g09d2 From f9281648ecd5081803bb2da84b9ccb0cf48436cd Mon Sep 17 00:00:00 2001 From: Andy Lutomirski Date: Fri, 3 Jul 2015 12:44:21 -0700 Subject: context_tracking: Add ct_state() and CT_WARN_ON() This will let us sprinkle sanity checks around the kernel without making too much of a mess. Signed-off-by: Andy Lutomirski Cc: Andy Lutomirski Cc: Borislav Petkov Cc: Brian Gerst Cc: Denys Vlasenko Cc: Denys Vlasenko Cc: Frederic Weisbecker Cc: H. Peter Anvin Cc: Kees Cook Cc: Linus Torvalds Cc: Oleg Nesterov Cc: Peter Zijlstra Cc: Rik van Riel Cc: Thomas Gleixner Cc: paulmck@linux.vnet.ibm.com Link: http://lkml.kernel.org/r/5da41fb2ceb29eac671f427c67040401ba2a1fa0.1435952415.git.luto@kernel.org Signed-off-by: Ingo Molnar --- include/linux/context_tracking.h | 15 +++++++++++++++ include/linux/context_tracking_state.h | 1 + 2 files changed, 16 insertions(+) (limited to 'include/linux') diff --git a/include/linux/context_tracking.h b/include/linux/context_tracking.h index b96bd299966f..008fc67d0d96 100644 --- a/include/linux/context_tracking.h +++ b/include/linux/context_tracking.h @@ -49,13 +49,28 @@ static inline void exception_exit(enum ctx_state prev_ctx) } } + +/** + * ct_state() - return the current context tracking state if known + * + * Returns the current cpu's context tracking state if context tracking + * is enabled. If context tracking is disabled, returns + * CONTEXT_DISABLED. This should be used primarily for debugging. + */ +static inline enum ctx_state ct_state(void) +{ + return context_tracking_is_enabled() ? + this_cpu_read(context_tracking.state) : CONTEXT_DISABLED; +} #else static inline void user_enter(void) { } static inline void user_exit(void) { } static inline enum ctx_state exception_enter(void) { return 0; } static inline void exception_exit(enum ctx_state prev_ctx) { } +static inline enum ctx_state ct_state(void) { return CONTEXT_DISABLED; } #endif /* !CONFIG_CONTEXT_TRACKING */ +#define CT_WARN_ON(cond) WARN_ON(context_tracking_is_enabled() && (cond)) #ifdef CONFIG_CONTEXT_TRACKING_FORCE extern void context_tracking_init(void); diff --git a/include/linux/context_tracking_state.h b/include/linux/context_tracking_state.h index 678ecdf90cf6..ee956c528fab 100644 --- a/include/linux/context_tracking_state.h +++ b/include/linux/context_tracking_state.h @@ -14,6 +14,7 @@ struct context_tracking { bool active; int recursion; enum ctx_state { + CONTEXT_DISABLED = -1, /* returned by ct_state() if unknown */ CONTEXT_KERNEL = 0, CONTEXT_USER, CONTEXT_GUEST, -- cgit v1.2.3-70-g09d2 From eca2ebc7e007c9e2b8f5ecfcfc74b53fbe68e42b Mon Sep 17 00:00:00 2001 From: Martin Sperl Date: Mon, 22 Jun 2015 13:00:36 +0000 Subject: spi: expose spi_master and spi_device statistics via sysfs per spi-master statistics accessible as: /sys/class/spi_master/spi*/statistics/* per spi-device statistics accessible via: /sys/class/spi_master/spi*/spi*.*/statistics/* The following statistics are exposed as separate "files" inside these directories: * messages number of spi_messages * transfers number of spi_transfers * bytes number of bytes transferred * bytes_rx number of bytes transmitted * bytes_tx number of bytes received * errors number of errors encounterd * timedout number of messages that have timed out * spi_async number of spi_messages submitted using spi_async * spi_sync number of spi_messages submitted using spi_sync * spi_sync_immediate number of spi_messages submitted using spi_sync, that are handled immediately without a context switch to the spi_pump worker-thread Signed-off-by: Martin Sperl Signed-off-by: Mark Brown --- drivers/spi/spi.c | 168 +++++++++++++++++++++++++++++++++++++++++++++++- include/linux/spi/spi.h | 64 ++++++++++++++++++ 2 files changed, 229 insertions(+), 3 deletions(-) (limited to 'include/linux') diff --git a/drivers/spi/spi.c b/drivers/spi/spi.c index cf8b91b23a76..07476ca083a0 100644 --- a/drivers/spi/spi.c +++ b/drivers/spi/spi.c @@ -67,11 +67,141 @@ modalias_show(struct device *dev, struct device_attribute *a, char *buf) } static DEVICE_ATTR_RO(modalias); +#define SPI_STATISTICS_ATTRS(field, file) \ +static ssize_t spi_master_##field##_show(struct device *dev, \ + struct device_attribute *attr, \ + char *buf) \ +{ \ + struct spi_master *master = container_of(dev, \ + struct spi_master, dev); \ + return spi_statistics_##field##_show(&master->statistics, buf); \ +} \ +static struct device_attribute dev_attr_spi_master_##field = { \ + .attr = { .name = file, .mode = S_IRUGO }, \ + .show = spi_master_##field##_show, \ +}; \ +static ssize_t spi_device_##field##_show(struct device *dev, \ + struct device_attribute *attr, \ + char *buf) \ +{ \ + struct spi_device *spi = container_of(dev, \ + struct spi_device, dev); \ + return spi_statistics_##field##_show(&spi->statistics, buf); \ +} \ +static struct device_attribute dev_attr_spi_device_##field = { \ + .attr = { .name = file, .mode = S_IRUGO }, \ + .show = spi_device_##field##_show, \ +} + +#define SPI_STATISTICS_SHOW_NAME(name, file, field, format_string) \ +static ssize_t spi_statistics_##name##_show(struct spi_statistics *stat, \ + char *buf) \ +{ \ + unsigned long flags; \ + ssize_t len; \ + spin_lock_irqsave(&stat->lock, flags); \ + len = sprintf(buf, format_string, stat->field); \ + spin_unlock_irqrestore(&stat->lock, flags); \ + return len; \ +} \ +SPI_STATISTICS_ATTRS(name, file) + +#define SPI_STATISTICS_SHOW(field, format_string) \ + SPI_STATISTICS_SHOW_NAME(field, __stringify(field), \ + field, format_string) + +SPI_STATISTICS_SHOW(messages, "%lu"); +SPI_STATISTICS_SHOW(transfers, "%lu"); +SPI_STATISTICS_SHOW(errors, "%lu"); +SPI_STATISTICS_SHOW(timedout, "%lu"); + +SPI_STATISTICS_SHOW(spi_sync, "%lu"); +SPI_STATISTICS_SHOW(spi_sync_immediate, "%lu"); +SPI_STATISTICS_SHOW(spi_async, "%lu"); + +SPI_STATISTICS_SHOW(bytes, "%llu"); +SPI_STATISTICS_SHOW(bytes_rx, "%llu"); +SPI_STATISTICS_SHOW(bytes_tx, "%llu"); + static struct attribute *spi_dev_attrs[] = { &dev_attr_modalias.attr, NULL, }; -ATTRIBUTE_GROUPS(spi_dev); + +static const struct attribute_group spi_dev_group = { + .attrs = spi_dev_attrs, +}; + +static struct attribute *spi_device_statistics_attrs[] = { + &dev_attr_spi_device_messages.attr, + &dev_attr_spi_device_transfers.attr, + &dev_attr_spi_device_errors.attr, + &dev_attr_spi_device_timedout.attr, + &dev_attr_spi_device_spi_sync.attr, + &dev_attr_spi_device_spi_sync_immediate.attr, + &dev_attr_spi_device_spi_async.attr, + &dev_attr_spi_device_bytes.attr, + &dev_attr_spi_device_bytes_rx.attr, + &dev_attr_spi_device_bytes_tx.attr, + NULL, +}; + +static const struct attribute_group spi_device_statistics_group = { + .name = "statistics", + .attrs = spi_device_statistics_attrs, +}; + +static const struct attribute_group *spi_dev_groups[] = { + &spi_dev_group, + &spi_device_statistics_group, + NULL, +}; + +static struct attribute *spi_master_statistics_attrs[] = { + &dev_attr_spi_master_messages.attr, + &dev_attr_spi_master_transfers.attr, + &dev_attr_spi_master_errors.attr, + &dev_attr_spi_master_timedout.attr, + &dev_attr_spi_master_spi_sync.attr, + &dev_attr_spi_master_spi_sync_immediate.attr, + &dev_attr_spi_master_spi_async.attr, + &dev_attr_spi_master_bytes.attr, + &dev_attr_spi_master_bytes_rx.attr, + &dev_attr_spi_master_bytes_tx.attr, + NULL, +}; + +static const struct attribute_group spi_master_statistics_group = { + .name = "statistics", + .attrs = spi_master_statistics_attrs, +}; + +static const struct attribute_group *spi_master_groups[] = { + &spi_master_statistics_group, + NULL, +}; + +void spi_statistics_add_transfer_stats(struct spi_statistics *stats, + struct spi_transfer *xfer, + struct spi_master *master) +{ + unsigned long flags; + + spin_lock_irqsave(&stats->lock, flags); + + stats->transfers++; + + stats->bytes += xfer->len; + if ((xfer->tx_buf) && + (xfer->tx_buf != master->dummy_tx)) + stats->bytes_tx += xfer->len; + if ((xfer->rx_buf) && + (xfer->rx_buf != master->dummy_rx)) + stats->bytes_rx += xfer->len; + + spin_unlock_irqrestore(&stats->lock, flags); +} +EXPORT_SYMBOL_GPL(spi_statistics_add_transfer_stats); /* modalias support makes "modprobe $MODALIAS" new-style hotplug work, * and the sysfs version makes coldplug work too. @@ -249,6 +379,9 @@ struct spi_device *spi_alloc_device(struct spi_master *master) spi->dev.bus = &spi_bus_type; spi->dev.release = spidev_release; spi->cs_gpio = -ENOENT; + + spin_lock_init(&spi->statistics.lock); + device_initialize(&spi->dev); return spi; } @@ -689,17 +822,29 @@ static int spi_transfer_one_message(struct spi_master *master, bool keep_cs = false; int ret = 0; unsigned long ms = 1; + struct spi_statistics *statm = &master->statistics; + struct spi_statistics *stats = &msg->spi->statistics; spi_set_cs(msg->spi, true); + SPI_STATISTICS_INCREMENT_FIELD(statm, messages); + SPI_STATISTICS_INCREMENT_FIELD(stats, messages); + list_for_each_entry(xfer, &msg->transfers, transfer_list) { trace_spi_transfer_start(msg, xfer); + spi_statistics_add_transfer_stats(statm, xfer, master); + spi_statistics_add_transfer_stats(stats, xfer, master); + if (xfer->tx_buf || xfer->rx_buf) { reinit_completion(&master->xfer_completion); ret = master->transfer_one(master, msg->spi, xfer); if (ret < 0) { + SPI_STATISTICS_INCREMENT_FIELD(statm, + errors); + SPI_STATISTICS_INCREMENT_FIELD(stats, + errors); dev_err(&msg->spi->dev, "SPI transfer failed: %d\n", ret); goto out; @@ -715,6 +860,10 @@ static int spi_transfer_one_message(struct spi_master *master, } if (ms == 0) { + SPI_STATISTICS_INCREMENT_FIELD(statm, + timedout); + SPI_STATISTICS_INCREMENT_FIELD(stats, + timedout); dev_err(&msg->spi->dev, "SPI transfer timed out\n"); msg->status = -ETIMEDOUT; @@ -1416,10 +1565,10 @@ static struct class spi_master_class = { .name = "spi_master", .owner = THIS_MODULE, .dev_release = spi_master_release, + .dev_groups = spi_master_groups, }; - /** * spi_alloc_master - allocate SPI master controller * @dev: the controller, possibly using the platform_bus @@ -1585,6 +1734,8 @@ int spi_register_master(struct spi_master *master) goto done; } } + /* add statistics */ + spin_lock_init(&master->statistics.lock); mutex_lock(&board_lock); list_add_tail(&master->list, &spi_master_list); @@ -1939,6 +2090,9 @@ static int __spi_async(struct spi_device *spi, struct spi_message *message) message->spi = spi; + SPI_STATISTICS_INCREMENT_FIELD(&master->statistics, spi_async); + SPI_STATISTICS_INCREMENT_FIELD(&spi->statistics, spi_async); + trace_spi_message_submit(message); return master->transfer(spi, message); @@ -2075,6 +2229,9 @@ static int __spi_sync(struct spi_device *spi, struct spi_message *message, message->context = &done; message->spi = spi; + SPI_STATISTICS_INCREMENT_FIELD(&master->statistics, spi_sync); + SPI_STATISTICS_INCREMENT_FIELD(&spi->statistics, spi_sync); + if (!bus_locked) mutex_lock(&master->bus_lock_mutex); @@ -2102,8 +2259,13 @@ static int __spi_sync(struct spi_device *spi, struct spi_message *message, /* Push out the messages in the calling context if we * can. */ - if (master->transfer == spi_queued_transfer) + if (master->transfer == spi_queued_transfer) { + SPI_STATISTICS_INCREMENT_FIELD(&master->statistics, + spi_sync_immediate); + SPI_STATISTICS_INCREMENT_FIELD(&spi->statistics, + spi_sync_immediate); __spi_pump_messages(master, false); + } wait_for_completion(&done); status = message->status; diff --git a/include/linux/spi/spi.h b/include/linux/spi/spi.h index d673072346f2..269e8afd3e2a 100644 --- a/include/linux/spi/spi.h +++ b/include/linux/spi/spi.h @@ -23,6 +23,8 @@ #include struct dma_chan; +struct spi_master; +struct spi_transfer; /* * INTERFACES between SPI master-side drivers and SPI infrastructure. @@ -30,6 +32,59 @@ struct dma_chan; */ extern struct bus_type spi_bus_type; +/** + * struct spi_statistics - statistics for spi transfers + * @clock: lock protecting this structure + * + * @messages: number of spi-messages handled + * @transfers: number of spi_transfers handled + * @errors: number of errors during spi_transfer + * @timedout: number of timeouts during spi_transfer + * + * @spi_sync: number of times spi_sync is used + * @spi_sync_immediate: + * number of times spi_sync is executed immediately + * in calling context without queuing and scheduling + * @spi_async: number of times spi_async is used + * + * @bytes: number of bytes transferred to/from device + * @bytes_tx: number of bytes sent to device + * @bytes_rx: number of bytes received from device + * + */ +struct spi_statistics { + spinlock_t lock; /* lock for the whole structure */ + + unsigned long messages; + unsigned long transfers; + unsigned long errors; + unsigned long timedout; + + unsigned long spi_sync; + unsigned long spi_sync_immediate; + unsigned long spi_async; + + unsigned long long bytes; + unsigned long long bytes_rx; + unsigned long long bytes_tx; + +}; + +void spi_statistics_add_transfer_stats(struct spi_statistics *stats, + struct spi_transfer *xfer, + struct spi_master *master); + +#define SPI_STATISTICS_ADD_TO_FIELD(stats, field, count) \ + do { \ + unsigned long flags; \ + spin_lock_irqsave(&(stats)->lock, flags); \ + (stats)->field += count; \ + spin_unlock_irqrestore(&(stats)->lock, flags); \ + } while (0) + +#define SPI_STATISTICS_INCREMENT_FIELD(stats, field) \ + SPI_STATISTICS_ADD_TO_FIELD(stats, field, 1) + /** * struct spi_device - Master side proxy for an SPI slave device * @dev: Driver model representation of the device. @@ -60,6 +115,8 @@ extern struct bus_type spi_bus_type; * @cs_gpio: gpio number of the chipselect line (optional, -ENOENT when * when not using a GPIO line) * + * @statistics: statistics for the spi_device + * * A @spi_device is used to interchange data between an SPI slave * (usually a discrete chip) and CPU memory. * @@ -98,6 +155,9 @@ struct spi_device { char modalias[SPI_NAME_SIZE]; int cs_gpio; /* chip select gpio */ + /* the statistics */ + struct spi_statistics statistics; + /* * likely need more hooks for more protocol options affecting how * the controller talks to each chip, like: @@ -296,6 +356,7 @@ static inline void spi_unregister_driver(struct spi_driver *sdrv) * @cs_gpios: Array of GPIOs to use as chip select lines; one per CS * number. Any individual value may be -ENOENT for CS lines that * are not GPIOs (driven by the SPI controller itself). + * @statistics: statistics for the spi_master * @dma_tx: DMA transmit channel * @dma_rx: DMA receive channel * @dummy_rx: dummy receive buffer for full-duplex devices @@ -452,6 +513,9 @@ struct spi_master { /* gpio chip select */ int *cs_gpios; + /* statistics */ + struct spi_statistics statistics; + /* DMA channels for use with core dmaengine helpers */ struct dma_chan *dma_tx; struct dma_chan *dma_rx; -- cgit v1.2.3-70-g09d2 From 5f867db63473f32cce1b868e281ebd42a41f8fad Mon Sep 17 00:00:00 2001 From: Scott Wood Date: Fri, 26 Jun 2015 19:43:58 -0500 Subject: mtd: nand: Fix NAND_USE_BOUNCE_BUFFER flag conflict Commit 66507c7bc8895f0da6b ("mtd: nand: Add support to use nand_base poi databuf as bounce buffer") added a flag NAND_USE_BOUNCE_BUFFER using the same bit value as the existing NAND_BUSWIDTH_AUTO. Cc: Kamal Dasu Fixes: 66507c7bc8895f0da6b ("mtd: nand: Add support to use nand_base poi databuf as bounce buffer") Signed-off-by: Scott Wood Signed-off-by: Brian Norris --- include/linux/mtd/nand.h | 10 +++++----- 1 file changed, 5 insertions(+), 5 deletions(-) (limited to 'include/linux') diff --git a/include/linux/mtd/nand.h b/include/linux/mtd/nand.h index f25e2bdd188c..272f42952f34 100644 --- a/include/linux/mtd/nand.h +++ b/include/linux/mtd/nand.h @@ -177,11 +177,6 @@ typedef enum { #define NAND_OWN_BUFFERS 0x00020000 /* Chip may not exist, so silence any errors in scan */ #define NAND_SCAN_SILENT_NODEV 0x00040000 -/* - * This option could be defined by controller drivers to protect against - * kmap'ed, vmalloc'ed highmem buffers being passed from upper layers - */ -#define NAND_USE_BOUNCE_BUFFER 0x00080000 /* * Autodetect nand buswidth with readid/onfi. * This suppose the driver will configure the hardware in 8 bits mode @@ -189,6 +184,11 @@ typedef enum { * before calling nand_scan_tail. */ #define NAND_BUSWIDTH_AUTO 0x00080000 +/* + * This option could be defined by controller drivers to protect against + * kmap'ed, vmalloc'ed highmem buffers being passed from upper layers + */ +#define NAND_USE_BOUNCE_BUFFER 0x00100000 /* Options set by nand scan */ /* Nand scan has allocated controller struct */ -- cgit v1.2.3-70-g09d2 From 4c62dbbce902cf2afa88cac89ec67c828160f431 Mon Sep 17 00:00:00 2001 From: Jarkko Nikula Date: Fri, 26 Jun 2015 11:27:41 +0300 Subject: ACPI: Remove FSF mailing addresses There is no need to carry potentially outdated Free Software Foundation mailing address in file headers since the COPYING file includes it. Signed-off-by: Jarkko Nikula Signed-off-by: Rafael J. Wysocki --- drivers/acpi/ac.c | 4 ---- drivers/acpi/acpi_ipmi.c | 4 ---- drivers/acpi/acpi_memhotplug.c | 5 ----- drivers/acpi/acpi_pad.c | 4 ---- drivers/acpi/acpi_video.c | 4 ---- drivers/acpi/apei/apei-base.c | 4 ---- drivers/acpi/apei/einj.c | 4 ---- drivers/acpi/apei/erst-dbg.c | 4 ---- drivers/acpi/apei/erst.c | 4 ---- drivers/acpi/apei/ghes.c | 4 ---- drivers/acpi/apei/hest.c | 4 ---- drivers/acpi/battery.c | 4 ---- drivers/acpi/blacklist.c | 4 ---- drivers/acpi/bus.c | 4 ---- drivers/acpi/button.c | 4 ---- drivers/acpi/cm_sbs.c | 4 ---- drivers/acpi/container.c | 4 ---- drivers/acpi/device_pm.c | 4 ---- drivers/acpi/dock.c | 4 ---- drivers/acpi/ec.c | 4 ---- drivers/acpi/fan.c | 4 ---- drivers/acpi/hed.c | 4 ---- drivers/acpi/internal.h | 3 --- drivers/acpi/numa.c | 4 ---- drivers/acpi/osl.c | 4 ---- drivers/acpi/pci_irq.c | 4 ---- drivers/acpi/pci_link.c | 4 ---- drivers/acpi/pci_root.c | 4 ---- drivers/acpi/pci_slot.c | 4 ---- drivers/acpi/power.c | 4 ---- drivers/acpi/processor_driver.c | 4 ---- drivers/acpi/processor_idle.c | 4 ---- drivers/acpi/processor_perflib.c | 4 ---- drivers/acpi/processor_thermal.c | 4 ---- drivers/acpi/processor_throttling.c | 4 ---- drivers/acpi/resource.c | 4 ---- drivers/acpi/sbs.c | 4 ---- drivers/acpi/tables.c | 4 ---- drivers/acpi/thermal.c | 4 ---- drivers/acpi/utils.c | 4 ---- include/acpi/acpi_bus.h | 4 ---- include/acpi/acpi_drivers.h | 4 ---- include/linux/acpi.h | 4 ---- 43 files changed, 172 deletions(-) (limited to 'include/linux') diff --git a/drivers/acpi/ac.c b/drivers/acpi/ac.c index 9b5354a2cd08..f71b756b05c4 100644 --- a/drivers/acpi/ac.c +++ b/drivers/acpi/ac.c @@ -16,10 +16,6 @@ * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU * General Public License for more details. * - * You should have received a copy of the GNU General Public License along - * with this program; if not, write to the Free Software Foundation, Inc., - * 59 Temple Place, Suite 330, Boston, MA 02111-1307 USA. - * * ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ */ diff --git a/drivers/acpi/acpi_ipmi.c b/drivers/acpi/acpi_ipmi.c index ac0f52f6df2b..f77956c3fd45 100644 --- a/drivers/acpi/acpi_ipmi.c +++ b/drivers/acpi/acpi_ipmi.c @@ -17,10 +17,6 @@ * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU * General Public License for more details. * - * You should have received a copy of the GNU General Public License along - * with this program; if not, write to the Free Software Foundation, Inc., - * 59 Temple Place, Suite 330, Boston, MA 02111-1307 USA. - * * ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ */ diff --git a/drivers/acpi/acpi_memhotplug.c b/drivers/acpi/acpi_memhotplug.c index ee28f4d15625..6b0d3ef7309c 100644 --- a/drivers/acpi/acpi_memhotplug.c +++ b/drivers/acpi/acpi_memhotplug.c @@ -16,11 +16,6 @@ * NON INFRINGEMENT. See the GNU General Public License for more * details. * - * You should have received a copy of the GNU General Public License - * along with this program; if not, write to the Free Software - * Foundation, Inc., 675 Mass Ave, Cambridge, MA 02139, USA. - * - * * ACPI based HotPlug driver that supports Memory Hotplug * This driver fields notifications from firmware for memory add * and remove operations and alerts the VM of the affected memory diff --git a/drivers/acpi/acpi_pad.c b/drivers/acpi/acpi_pad.c index 00b39802d7ec..ae307ff36acb 100644 --- a/drivers/acpi/acpi_pad.c +++ b/drivers/acpi/acpi_pad.c @@ -12,10 +12,6 @@ * FITNESS FOR A PARTICULAR PURPOSE. See the GNU General Public License for * more details. * - * You should have received a copy of the GNU General Public License along with - * this program; if not, write to the Free Software Foundation, Inc., - * 51 Franklin St - Fifth Floor, Boston, MA 02110-1301 USA. - * */ #include diff --git a/drivers/acpi/acpi_video.c b/drivers/acpi/acpi_video.c index 8c2fe2f2f9fd..5778e8e4313a 100644 --- a/drivers/acpi/acpi_video.c +++ b/drivers/acpi/acpi_video.c @@ -17,10 +17,6 @@ * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU * General Public License for more details. * - * You should have received a copy of the GNU General Public License along - * with this program; if not, write to the Free Software Foundation, Inc., - * 59 Temple Place, Suite 330, Boston, MA 02111-1307 USA. - * * ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ */ diff --git a/drivers/acpi/apei/apei-base.c b/drivers/acpi/apei/apei-base.c index a85ac07f3da3..a2c8d7adb6eb 100644 --- a/drivers/acpi/apei/apei-base.c +++ b/drivers/acpi/apei/apei-base.c @@ -24,10 +24,6 @@ * but WITHOUT ANY WARRANTY; without even the implied warranty of * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the * GNU General Public License for more details. - * - * You should have received a copy of the GNU General Public License - * along with this program; if not, write to the Free Software - * Foundation, Inc., 59 Temple Place, Suite 330, Boston, MA 02111-1307 USA */ #include diff --git a/drivers/acpi/apei/einj.c b/drivers/acpi/apei/einj.c index a095d4f858da..0431883653be 100644 --- a/drivers/acpi/apei/einj.c +++ b/drivers/acpi/apei/einj.c @@ -18,10 +18,6 @@ * but WITHOUT ANY WARRANTY; without even the implied warranty of * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the * GNU General Public License for more details. - * - * You should have received a copy of the GNU General Public License - * along with this program; if not, write to the Free Software - * Foundation, Inc., 59 Temple Place, Suite 330, Boston, MA 02111-1307 USA */ #include diff --git a/drivers/acpi/apei/erst-dbg.c b/drivers/acpi/apei/erst-dbg.c index 04ab5c9d3ced..6330f557a2c8 100644 --- a/drivers/acpi/apei/erst-dbg.c +++ b/drivers/acpi/apei/erst-dbg.c @@ -17,10 +17,6 @@ * but WITHOUT ANY WARRANTY; without even the implied warranty of * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the * GNU General Public License for more details. - * - * You should have received a copy of the GNU General Public License - * along with this program; if not, write to the Free Software - * Foundation, Inc., 59 Temple Place, Suite 330, Boston, MA 02111-1307 USA */ #include diff --git a/drivers/acpi/apei/erst.c b/drivers/acpi/apei/erst.c index 3670bbab57a3..6682c5daf742 100644 --- a/drivers/acpi/apei/erst.c +++ b/drivers/acpi/apei/erst.c @@ -18,10 +18,6 @@ * but WITHOUT ANY WARRANTY; without even the implied warranty of * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the * GNU General Public License for more details. - * - * You should have received a copy of the GNU General Public License - * along with this program; if not, write to the Free Software - * Foundation, Inc., 59 Temple Place, Suite 330, Boston, MA 02111-1307 USA */ #include diff --git a/drivers/acpi/apei/ghes.c b/drivers/acpi/apei/ghes.c index 2bfd53cbfe80..23981ac1c6c2 100644 --- a/drivers/acpi/apei/ghes.c +++ b/drivers/acpi/apei/ghes.c @@ -23,10 +23,6 @@ * but WITHOUT ANY WARRANTY; without even the implied warranty of * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the * GNU General Public License for more details. - * - * You should have received a copy of the GNU General Public License - * along with this program; if not, write to the Free Software - * Foundation, Inc., 59 Temple Place, Suite 330, Boston, MA 02111-1307 USA */ #include diff --git a/drivers/acpi/apei/hest.c b/drivers/acpi/apei/hest.c index 06e9b411a0a2..20b3fcf4007c 100644 --- a/drivers/acpi/apei/hest.c +++ b/drivers/acpi/apei/hest.c @@ -21,10 +21,6 @@ * but WITHOUT ANY WARRANTY; without even the implied warranty of * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the * GNU General Public License for more details. - * - * You should have received a copy of the GNU General Public License - * along with this program; if not, write to the Free Software - * Foundation, Inc., 59 Temple Place, Suite 330, Boston, MA 02111-1307 USA */ #include diff --git a/drivers/acpi/battery.c b/drivers/acpi/battery.c index b3628cc01a53..b719ab3090bb 100644 --- a/drivers/acpi/battery.c +++ b/drivers/acpi/battery.c @@ -18,10 +18,6 @@ * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU * General Public License for more details. * - * You should have received a copy of the GNU General Public License along - * with this program; if not, write to the Free Software Foundation, Inc., - * 59 Temple Place, Suite 330, Boston, MA 02111-1307 USA. - * * ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ */ diff --git a/drivers/acpi/blacklist.c b/drivers/acpi/blacklist.c index 278dc4be992a..96809cd99ace 100644 --- a/drivers/acpi/blacklist.c +++ b/drivers/acpi/blacklist.c @@ -20,10 +20,6 @@ * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU * General Public License for more details. * - * You should have received a copy of the GNU General Public License along - * with this program; if not, write to the Free Software Foundation, Inc., - * 59 Temple Place, Suite 330, Boston, MA 02111-1307 USA. - * * ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ */ diff --git a/drivers/acpi/bus.c b/drivers/acpi/bus.c index 513e7230e3d0..c8356eb79911 100644 --- a/drivers/acpi/bus.c +++ b/drivers/acpi/bus.c @@ -15,10 +15,6 @@ * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU * General Public License for more details. * - * You should have received a copy of the GNU General Public License along - * with this program; if not, write to the Free Software Foundation, Inc., - * 59 Temple Place, Suite 330, Boston, MA 02111-1307 USA. - * * ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ */ diff --git a/drivers/acpi/button.c b/drivers/acpi/button.c index 6d5d1832a588..5c3b0918d5fd 100644 --- a/drivers/acpi/button.c +++ b/drivers/acpi/button.c @@ -16,10 +16,6 @@ * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU * General Public License for more details. * - * You should have received a copy of the GNU General Public License along - * with this program; if not, write to the Free Software Foundation, Inc., - * 59 Temple Place, Suite 330, Boston, MA 02111-1307 USA. - * * ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ */ diff --git a/drivers/acpi/cm_sbs.c b/drivers/acpi/cm_sbs.c index 6c9ee68e46fb..d0918d421f90 100644 --- a/drivers/acpi/cm_sbs.c +++ b/drivers/acpi/cm_sbs.c @@ -11,10 +11,6 @@ * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU * General Public License for more details. * - * You should have received a copy of the GNU General Public License along - * with this program; if not, write to the Free Software Foundation, Inc., - * 59 Temple Place, Suite 330, Boston, MA 02111-1307 USA. - * * ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ */ diff --git a/drivers/acpi/container.c b/drivers/acpi/container.c index c8ead9f97375..12c240903c18 100644 --- a/drivers/acpi/container.c +++ b/drivers/acpi/container.c @@ -20,10 +20,6 @@ * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU * General Public License for more details. * - * You should have received a copy of the GNU General Public License along - * with this program; if not, write to the Free Software Foundation, Inc., - * 59 Temple Place, Suite 330, Boston, MA 02111-1307 USA. - * * ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ */ #include diff --git a/drivers/acpi/device_pm.c b/drivers/acpi/device_pm.c index 717afcdb5f4a..d06cd59b5906 100644 --- a/drivers/acpi/device_pm.c +++ b/drivers/acpi/device_pm.c @@ -15,10 +15,6 @@ * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU * General Public License for more details. * - * You should have received a copy of the GNU General Public License along - * with this program; if not, write to the Free Software Foundation, Inc., - * 59 Temple Place, Suite 330, Boston, MA 02111-1307 USA. - * * ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ */ diff --git a/drivers/acpi/dock.c b/drivers/acpi/dock.c index a688aa243f6c..e8e128dede29 100644 --- a/drivers/acpi/dock.c +++ b/drivers/acpi/dock.c @@ -17,10 +17,6 @@ * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU * General Public License for more details. * - * You should have received a copy of the GNU General Public License along - * with this program; if not, write to the Free Software Foundation, Inc., - * 59 Temple Place, Suite 330, Boston, MA 02111-1307 USA. - * * ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ */ diff --git a/drivers/acpi/ec.c b/drivers/acpi/ec.c index 9d4761d2f6b7..990446629935 100644 --- a/drivers/acpi/ec.c +++ b/drivers/acpi/ec.c @@ -22,10 +22,6 @@ * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU * General Public License for more details. * - * You should have received a copy of the GNU General Public License along - * with this program; if not, write to the Free Software Foundation, Inc., - * 59 Temple Place, Suite 330, Boston, MA 02111-1307 USA. - * * ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ */ diff --git a/drivers/acpi/fan.c b/drivers/acpi/fan.c index bea0bbaafa97..e297a480e135 100644 --- a/drivers/acpi/fan.c +++ b/drivers/acpi/fan.c @@ -16,10 +16,6 @@ * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU * General Public License for more details. * - * You should have received a copy of the GNU General Public License along - * with this program; if not, write to the Free Software Foundation, Inc., - * 59 Temple Place, Suite 330, Boston, MA 02111-1307 USA. - * * ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ */ diff --git a/drivers/acpi/hed.c b/drivers/acpi/hed.c index a322710b5ba4..5c67a6d8f803 100644 --- a/drivers/acpi/hed.c +++ b/drivers/acpi/hed.c @@ -15,10 +15,6 @@ * but WITHOUT ANY WARRANTY; without even the implied warranty of * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the * GNU General Public License for more details. - * - * You should have received a copy of the GNU General Public License - * along with this program; if not, write to the Free Software - * Foundation, Inc., 59 Temple Place, Suite 330, Boston, MA 02111-1307 USA */ #include diff --git a/drivers/acpi/internal.h b/drivers/acpi/internal.h index 4683a96932b9..8c71cb8335c0 100644 --- a/drivers/acpi/internal.h +++ b/drivers/acpi/internal.h @@ -13,9 +13,6 @@ * FITNESS FOR A PARTICULAR PURPOSE. See the GNU General Public License for * more details. * - * You should have received a copy of the GNU General Public License along with - * this program; if not, write to the Free Software Foundation, Inc., - * 51 Franklin St - Fifth Floor, Boston, MA 02110-1301 USA. */ #ifndef _ACPI_INTERNAL_H_ diff --git a/drivers/acpi/numa.c b/drivers/acpi/numa.c index acaa3b4ea504..72b6e9ef0ae9 100644 --- a/drivers/acpi/numa.c +++ b/drivers/acpi/numa.c @@ -15,10 +15,6 @@ * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the * GNU General Public License for more details. * - * You should have received a copy of the GNU General Public License - * along with this program; if not, write to the Free Software - * Foundation, Inc., 59 Temple Place, Suite 330, Boston, MA 02111-1307 USA - * * ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ * */ diff --git a/drivers/acpi/osl.c b/drivers/acpi/osl.c index c262e4acd68d..5e1f1bc5421e 100644 --- a/drivers/acpi/osl.c +++ b/drivers/acpi/osl.c @@ -19,10 +19,6 @@ * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the * GNU General Public License for more details. * - * You should have received a copy of the GNU General Public License - * along with this program; if not, write to the Free Software - * Foundation, Inc., 59 Temple Place, Suite 330, Boston, MA 02111-1307 USA - * * ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ * */ diff --git a/drivers/acpi/pci_irq.c b/drivers/acpi/pci_irq.c index 304eccb0ae5c..25fff35df82c 100644 --- a/drivers/acpi/pci_irq.c +++ b/drivers/acpi/pci_irq.c @@ -19,10 +19,6 @@ * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU * General Public License for more details. * - * You should have received a copy of the GNU General Public License along - * with this program; if not, write to the Free Software Foundation, Inc., - * 59 Temple Place, Suite 330, Boston, MA 02111-1307 USA. - * * ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ */ diff --git a/drivers/acpi/pci_link.c b/drivers/acpi/pci_link.c index cfd7581cc19f..2f5f84ced85f 100644 --- a/drivers/acpi/pci_link.c +++ b/drivers/acpi/pci_link.c @@ -17,10 +17,6 @@ * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU * General Public License for more details. * - * You should have received a copy of the GNU General Public License along - * with this program; if not, write to the Free Software Foundation, Inc., - * 59 Temple Place, Suite 330, Boston, MA 02111-1307 USA. - * * ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ * * TBD: diff --git a/drivers/acpi/pci_root.c b/drivers/acpi/pci_root.c index 1b5569c092c6..393706a5261b 100644 --- a/drivers/acpi/pci_root.c +++ b/drivers/acpi/pci_root.c @@ -16,10 +16,6 @@ * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU * General Public License for more details. * - * You should have received a copy of the GNU General Public License along - * with this program; if not, write to the Free Software Foundation, Inc., - * 59 Temple Place, Suite 330, Boston, MA 02111-1307 USA. - * * ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ */ diff --git a/drivers/acpi/pci_slot.c b/drivers/acpi/pci_slot.c index 139d9e479370..7188e53b6b7c 100644 --- a/drivers/acpi/pci_slot.c +++ b/drivers/acpi/pci_slot.c @@ -20,10 +20,6 @@ * WITHOUT ANY WARRANTY; without even the implied warranty of * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU * General Public License for more details. - * - * You should have received a copy of the GNU General Public License along - * with this program; if not, write to the Free Software Foundation, Inc., - * 51 Franklin St - Fifth Floor, Boston, MA 02110-1301 USA. */ #include diff --git a/drivers/acpi/power.c b/drivers/acpi/power.c index 93eac53b5110..45b47f2c9f03 100644 --- a/drivers/acpi/power.c +++ b/drivers/acpi/power.c @@ -16,10 +16,6 @@ * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU * General Public License for more details. * - * You should have received a copy of the GNU General Public License along - * with this program; if not, write to the Free Software Foundation, Inc., - * 59 Temple Place, Suite 330, Boston, MA 02111-1307 USA. - * * ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ */ diff --git a/drivers/acpi/processor_driver.c b/drivers/acpi/processor_driver.c index d9f71581b79b..3af8dc30f129 100644 --- a/drivers/acpi/processor_driver.c +++ b/drivers/acpi/processor_driver.c @@ -21,10 +21,6 @@ * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU * General Public License for more details. * - * You should have received a copy of the GNU General Public License along - * with this program; if not, write to the Free Software Foundation, Inc., - * 59 Temple Place, Suite 330, Boston, MA 02111-1307 USA. - * * ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ */ diff --git a/drivers/acpi/processor_idle.c b/drivers/acpi/processor_idle.c index d540f42c9232..175c86bee3a9 100644 --- a/drivers/acpi/processor_idle.c +++ b/drivers/acpi/processor_idle.c @@ -21,10 +21,6 @@ * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU * General Public License for more details. * - * You should have received a copy of the GNU General Public License along - * with this program; if not, write to the Free Software Foundation, Inc., - * 59 Temple Place, Suite 330, Boston, MA 02111-1307 USA. - * * ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ */ diff --git a/drivers/acpi/processor_perflib.c b/drivers/acpi/processor_perflib.c index cfc8aba72f86..53cfe8ba9799 100644 --- a/drivers/acpi/processor_perflib.c +++ b/drivers/acpi/processor_perflib.c @@ -20,10 +20,6 @@ * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU * General Public License for more details. * - * You should have received a copy of the GNU General Public License along - * with this program; if not, write to the Free Software Foundation, Inc., - * 59 Temple Place, Suite 330, Boston, MA 02111-1307 USA. - * */ #include diff --git a/drivers/acpi/processor_thermal.c b/drivers/acpi/processor_thermal.c index e003663b2f8e..1fed84a092c2 100644 --- a/drivers/acpi/processor_thermal.c +++ b/drivers/acpi/processor_thermal.c @@ -19,10 +19,6 @@ * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU * General Public License for more details. * - * You should have received a copy of the GNU General Public License along - * with this program; if not, write to the Free Software Foundation, Inc., - * 59 Temple Place, Suite 330, Boston, MA 02111-1307 USA. - * * ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ */ diff --git a/drivers/acpi/processor_throttling.c b/drivers/acpi/processor_throttling.c index 84243c32e29c..f170d746336d 100644 --- a/drivers/acpi/processor_throttling.c +++ b/drivers/acpi/processor_throttling.c @@ -19,10 +19,6 @@ * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU * General Public License for more details. * - * You should have received a copy of the GNU General Public License along - * with this program; if not, write to the Free Software Foundation, Inc., - * 59 Temple Place, Suite 330, Boston, MA 02111-1307 USA. - * * ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ */ diff --git a/drivers/acpi/resource.c b/drivers/acpi/resource.c index 10561ce16ed1..64ea0d10b788 100644 --- a/drivers/acpi/resource.c +++ b/drivers/acpi/resource.c @@ -15,10 +15,6 @@ * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU * General Public License for more details. * - * You should have received a copy of the GNU General Public License along - * with this program; if not, write to the Free Software Foundation, Inc., - * 59 Temple Place, Suite 330, Boston, MA 02111-1307 USA. - * * ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ */ diff --git a/drivers/acpi/sbs.c b/drivers/acpi/sbs.c index 01504c819e8f..cb3dedb1beae 100644 --- a/drivers/acpi/sbs.c +++ b/drivers/acpi/sbs.c @@ -17,10 +17,6 @@ * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU * General Public License for more details. * - * You should have received a copy of the GNU General Public License along - * with this program; if not, write to the Free Software Foundation, Inc., - * 59 Temple Place, Suite 330, Boston, MA 02111-1307 USA. - * * ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ */ diff --git a/drivers/acpi/tables.c b/drivers/acpi/tables.c index 2e19189da0ee..17a6fa01a338 100644 --- a/drivers/acpi/tables.c +++ b/drivers/acpi/tables.c @@ -15,10 +15,6 @@ * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the * GNU General Public License for more details. * - * You should have received a copy of the GNU General Public License - * along with this program; if not, write to the Free Software - * Foundation, Inc., 59 Temple Place, Suite 330, Boston, MA 02111-1307 USA - * * ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ * */ diff --git a/drivers/acpi/thermal.c b/drivers/acpi/thermal.c index 6d4e44ea74ac..fc28b9f5aa84 100644 --- a/drivers/acpi/thermal.c +++ b/drivers/acpi/thermal.c @@ -16,10 +16,6 @@ * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU * General Public License for more details. * - * You should have received a copy of the GNU General Public License along - * with this program; if not, write to the Free Software Foundation, Inc., - * 59 Temple Place, Suite 330, Boston, MA 02111-1307 USA. - * * ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ * * This driver fully implements the ACPI thermal policy as described in the diff --git a/drivers/acpi/utils.c b/drivers/acpi/utils.c index 67c548ad3764..475c9079bf85 100644 --- a/drivers/acpi/utils.c +++ b/drivers/acpi/utils.c @@ -16,10 +16,6 @@ * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU * General Public License for more details. * - * You should have received a copy of the GNU General Public License along - * with this program; if not, write to the Free Software Foundation, Inc., - * 59 Temple Place, Suite 330, Boston, MA 02111-1307 USA. - * * ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ */ diff --git a/include/acpi/acpi_bus.h b/include/acpi/acpi_bus.h index 83061cac719b..5ba8fb64f664 100644 --- a/include/acpi/acpi_bus.h +++ b/include/acpi/acpi_bus.h @@ -16,10 +16,6 @@ * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU * General Public License for more details. * - * You should have received a copy of the GNU General Public License along - * with this program; if not, write to the Free Software Foundation, Inc., - * 59 Temple Place, Suite 330, Boston, MA 02111-1307 USA. - * * ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ */ diff --git a/include/acpi/acpi_drivers.h b/include/acpi/acpi_drivers.h index ea6428b7dacb..29c691265b49 100644 --- a/include/acpi/acpi_drivers.h +++ b/include/acpi/acpi_drivers.h @@ -16,10 +16,6 @@ * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU * General Public License for more details. * - * You should have received a copy of the GNU General Public License along - * with this program; if not, write to the Free Software Foundation, Inc., - * 59 Temple Place, Suite 330, Boston, MA 02111-1307 USA. - * * ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ */ diff --git a/include/linux/acpi.h b/include/linux/acpi.h index c471dfc93b71..1c116ee53b1e 100644 --- a/include/linux/acpi.h +++ b/include/linux/acpi.h @@ -15,10 +15,6 @@ * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the * GNU General Public License for more details. * - * You should have received a copy of the GNU General Public License - * along with this program; if not, write to the Free Software - * Foundation, Inc., 59 Temple Place, Suite 330, Boston, MA 02111-1307 USA - * * ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ */ -- cgit v1.2.3-70-g09d2 From f089d4d20fcbcf16a62ef3b4b57f41ecf59a5d83 Mon Sep 17 00:00:00 2001 From: Charles Keepax Date: Tue, 7 Jul 2015 15:28:12 +0100 Subject: mfd: wm5110: Add registers for custom write sequence triggers This register will be needed as part of some additional support for the headphone path on wm5110, so this patch adds the register and sets up its regmap config. Signed-off-by: Charles Keepax Acked-by: Lee Jones Signed-off-by: Mark Brown --- drivers/mfd/wm5110-tables.c | 2 ++ include/linux/mfd/arizona/registers.h | 37 +++++++++++++++++++++++++++++++++++ 2 files changed, 39 insertions(+) (limited to 'include/linux') diff --git a/drivers/mfd/wm5110-tables.c b/drivers/mfd/wm5110-tables.c index 12cad94b4035..62a4aa13cb98 100644 --- a/drivers/mfd/wm5110-tables.c +++ b/drivers/mfd/wm5110-tables.c @@ -676,6 +676,7 @@ static const struct reg_default wm5110_reg_default[] = { { 0x00000032, 0x0100 }, /* R50 - PWM Drive 3 */ { 0x00000040, 0x0000 }, /* R64 - Wake control */ { 0x00000041, 0x0000 }, /* R65 - Sequence control */ + { 0x00000042, 0x0000 }, /* R66 - Spare Triggers */ { 0x00000061, 0x01FF }, /* R97 - Sample Rate Sequence Select 1 */ { 0x00000062, 0x01FF }, /* R98 - Sample Rate Sequence Select 2 */ { 0x00000063, 0x01FF }, /* R99 - Sample Rate Sequence Select 3 */ @@ -1716,6 +1717,7 @@ static bool wm5110_readable_register(struct device *dev, unsigned int reg) case ARIZONA_PWM_DRIVE_3: case ARIZONA_WAKE_CONTROL: case ARIZONA_SEQUENCE_CONTROL: + case ARIZONA_SPARE_TRIGGERS: case ARIZONA_SAMPLE_RATE_SEQUENCE_SELECT_1: case ARIZONA_SAMPLE_RATE_SEQUENCE_SELECT_2: case ARIZONA_SAMPLE_RATE_SEQUENCE_SELECT_3: diff --git a/include/linux/mfd/arizona/registers.h b/include/linux/mfd/arizona/registers.h index 3499d36e6067..11affb3c2768 100644 --- a/include/linux/mfd/arizona/registers.h +++ b/include/linux/mfd/arizona/registers.h @@ -39,6 +39,7 @@ #define ARIZONA_PWM_DRIVE_3 0x32 #define ARIZONA_WAKE_CONTROL 0x40 #define ARIZONA_SEQUENCE_CONTROL 0x41 +#define ARIZONA_SPARE_TRIGGERS 0x42 #define ARIZONA_SAMPLE_RATE_SEQUENCE_SELECT_1 0x61 #define ARIZONA_SAMPLE_RATE_SEQUENCE_SELECT_2 0x62 #define ARIZONA_SAMPLE_RATE_SEQUENCE_SELECT_3 0x63 @@ -1430,6 +1431,42 @@ #define ARIZONA_WSEQ_ENA_JD2_RISE_SHIFT 0 /* WSEQ_ENA_JD2_RISE */ #define ARIZONA_WSEQ_ENA_JD2_RISE_WIDTH 1 /* WSEQ_ENA_JD2_RISE */ +/* + * R66 (0x42) - Spare Triggers + */ +#define ARIZONA_WS_TRG8 0x0080 /* WS_TRG8 */ +#define ARIZONA_WS_TRG8_MASK 0x0080 /* WS_TRG8 */ +#define ARIZONA_WS_TRG8_SHIFT 7 /* WS_TRG8 */ +#define ARIZONA_WS_TRG8_WIDTH 1 /* WS_TRG8 */ +#define ARIZONA_WS_TRG7 0x0040 /* WS_TRG7 */ +#define ARIZONA_WS_TRG7_MASK 0x0040 /* WS_TRG7 */ +#define ARIZONA_WS_TRG7_SHIFT 6 /* WS_TRG7 */ +#define ARIZONA_WS_TRG7_WIDTH 1 /* WS_TRG7 */ +#define ARIZONA_WS_TRG6 0x0020 /* WS_TRG6 */ +#define ARIZONA_WS_TRG6_MASK 0x0020 /* WS_TRG6 */ +#define ARIZONA_WS_TRG6_SHIFT 5 /* WS_TRG6 */ +#define ARIZONA_WS_TRG6_WIDTH 1 /* WS_TRG6 */ +#define ARIZONA_WS_TRG5 0x0010 /* WS_TRG5 */ +#define ARIZONA_WS_TRG5_MASK 0x0010 /* WS_TRG5 */ +#define ARIZONA_WS_TRG5_SHIFT 4 /* WS_TRG5 */ +#define ARIZONA_WS_TRG5_WIDTH 1 /* WS_TRG5 */ +#define ARIZONA_WS_TRG4 0x0008 /* WS_TRG4 */ +#define ARIZONA_WS_TRG4_MASK 0x0008 /* WS_TRG4 */ +#define ARIZONA_WS_TRG4_SHIFT 3 /* WS_TRG4 */ +#define ARIZONA_WS_TRG4_WIDTH 1 /* WS_TRG4 */ +#define ARIZONA_WS_TRG3 0x0004 /* WS_TRG3 */ +#define ARIZONA_WS_TRG3_MASK 0x0004 /* WS_TRG3 */ +#define ARIZONA_WS_TRG3_SHIFT 2 /* WS_TRG3 */ +#define ARIZONA_WS_TRG3_WIDTH 1 /* WS_TRG3 */ +#define ARIZONA_WS_TRG2 0x0002 /* WS_TRG2 */ +#define ARIZONA_WS_TRG2_MASK 0x0002 /* WS_TRG2 */ +#define ARIZONA_WS_TRG2_SHIFT 1 /* WS_TRG2 */ +#define ARIZONA_WS_TRG2_WIDTH 1 /* WS_TRG2 */ +#define ARIZONA_WS_TRG1 0x0001 /* WS_TRG1 */ +#define ARIZONA_WS_TRG1_MASK 0x0001 /* WS_TRG1 */ +#define ARIZONA_WS_TRG1_SHIFT 0 /* WS_TRG1 */ +#define ARIZONA_WS_TRG1_WIDTH 1 /* WS_TRG1 */ + /* * R97 (0x61) - Sample Rate Sequence Select 1 */ -- cgit v1.2.3-70-g09d2 From 2d53809594afaf2ae66a90a3142c1b702fd3bcea Mon Sep 17 00:00:00 2001 From: Dirk Behme Date: Mon, 6 Jul 2015 15:57:44 -0700 Subject: Input: zforce_ts - convert to use the gpiod interface Use the new GPIO descriptor interface to handle the zForce GPIOs. This simplifies the code and allows transparently handle GPIO polarity, as specified in device tree data. Also switch to using gpio_{set|get}_value_cansleep() since none of the callers is in atomic context and cansleep variant allows more GPIO controllers to be used with the touchscreen. Signed-off-by: Dirk Behme Signed-off-by: Dmitry Torokhov --- drivers/input/touchscreen/zforce_ts.c | 58 +++++++++++++++++---------------- include/linux/platform_data/zforce_ts.h | 3 -- 2 files changed, 30 insertions(+), 31 deletions(-) (limited to 'include/linux') diff --git a/drivers/input/touchscreen/zforce_ts.c b/drivers/input/touchscreen/zforce_ts.c index f58a196521a9..c4cffcfb03d3 100644 --- a/drivers/input/touchscreen/zforce_ts.c +++ b/drivers/input/touchscreen/zforce_ts.c @@ -24,14 +24,13 @@ #include #include #include -#include +#include #include #include #include #include #include #include -#include #define WAIT_TIMEOUT msecs_to_jiffies(1000) @@ -120,6 +119,9 @@ struct zforce_ts { struct regulator *reg_vdd; + struct gpio_desc *gpio_int; + struct gpio_desc *gpio_rst; + bool suspending; bool suspended; bool boot_complete; @@ -161,6 +163,16 @@ static int zforce_command(struct zforce_ts *ts, u8 cmd) return 0; } +static void zforce_reset_assert(struct zforce_ts *ts) +{ + gpiod_set_value_cansleep(ts->gpio_rst, 1); +} + +static void zforce_reset_deassert(struct zforce_ts *ts) +{ + gpiod_set_value_cansleep(ts->gpio_rst, 0); +} + static int zforce_send_wait(struct zforce_ts *ts, const char *buf, int len) { struct i2c_client *client = ts->client; @@ -479,7 +491,6 @@ static irqreturn_t zforce_irq_thread(int irq, void *dev_id) { struct zforce_ts *ts = dev_id; struct i2c_client *client = ts->client; - const struct zforce_ts_platdata *pdata = ts->pdata; int ret; u8 payload_buffer[FRAME_MAXSIZE]; u8 *payload; @@ -499,7 +510,7 @@ static irqreturn_t zforce_irq_thread(int irq, void *dev_id) if (!ts->suspending && device_may_wakeup(&client->dev)) pm_stay_awake(&client->dev); - while (!gpio_get_value(pdata->gpio_int)) { + while (!gpiod_get_value_cansleep(ts->gpio_int)) { ret = zforce_read_packet(ts, payload_buffer); if (ret < 0) { dev_err(&client->dev, @@ -690,7 +701,7 @@ static void zforce_reset(void *data) { struct zforce_ts *ts = data; - gpio_set_value(ts->pdata->gpio_rst, 0); + zforce_reset_assert(ts); udelay(10); @@ -712,18 +723,6 @@ static struct zforce_ts_platdata *zforce_parse_dt(struct device *dev) return ERR_PTR(-ENOMEM); } - pdata->gpio_int = of_get_gpio(np, 0); - if (!gpio_is_valid(pdata->gpio_int)) { - dev_err(dev, "failed to get interrupt gpio\n"); - return ERR_PTR(-EINVAL); - } - - pdata->gpio_rst = of_get_gpio(np, 1); - if (!gpio_is_valid(pdata->gpio_rst)) { - dev_err(dev, "failed to get reset gpio\n"); - return ERR_PTR(-EINVAL); - } - if (of_property_read_u32(np, "x-size", &pdata->x_max)) { dev_err(dev, "failed to get x-size property\n"); return ERR_PTR(-EINVAL); @@ -755,19 +754,22 @@ static int zforce_probe(struct i2c_client *client, if (!ts) return -ENOMEM; - ret = devm_gpio_request_one(&client->dev, pdata->gpio_int, GPIOF_IN, - "zforce_ts_int"); - if (ret) { - dev_err(&client->dev, "request of gpio %d failed, %d\n", - pdata->gpio_int, ret); + /* INT GPIO */ + ts->gpio_int = devm_gpiod_get_index(&client->dev, NULL, 0, GPIOD_IN); + if (IS_ERR(ts->gpio_int)) { + ret = PTR_ERR(ts->gpio_int); + dev_err(&client->dev, + "failed to request interrupt GPIO: %d\n", ret); return ret; } - ret = devm_gpio_request_one(&client->dev, pdata->gpio_rst, - GPIOF_OUT_INIT_LOW, "zforce_ts_rst"); - if (ret) { - dev_err(&client->dev, "request of gpio %d failed, %d\n", - pdata->gpio_rst, ret); + /* RST GPIO */ + ts->gpio_rst = devm_gpiod_get_index(&client->dev, NULL, 1, + GPIOD_OUT_HIGH); + if (IS_ERR(ts->gpio_rst)) { + ret = PTR_ERR(ts->gpio_rst); + dev_err(&client->dev, + "failed to request reset GPIO: %d\n", ret); return ret; } @@ -863,7 +865,7 @@ static int zforce_probe(struct i2c_client *client, i2c_set_clientdata(client, ts); /* let the controller boot */ - gpio_set_value(pdata->gpio_rst, 1); + zforce_reset_deassert(ts); ts->command_waiting = NOTIFICATION_BOOTCOMPLETE; if (wait_for_completion_timeout(&ts->command_done, WAIT_TIMEOUT) == 0) diff --git a/include/linux/platform_data/zforce_ts.h b/include/linux/platform_data/zforce_ts.h index 0472ab2f6ede..7bdece8ef33e 100644 --- a/include/linux/platform_data/zforce_ts.h +++ b/include/linux/platform_data/zforce_ts.h @@ -16,9 +16,6 @@ #define _LINUX_INPUT_ZFORCE_TS_H struct zforce_ts_platdata { - int gpio_int; - int gpio_rst; - unsigned int x_max; unsigned int y_max; }; -- cgit v1.2.3-70-g09d2 From 7876f930d0e78addc6bbdbba0d6c196a0788d545 Mon Sep 17 00:00:00 2001 From: Tejun Heo Date: Thu, 9 Jul 2015 16:39:49 -0400 Subject: blkcg: implement all_blkcgs list Add all_blkcgs list goes through blkcg->all_blkcgs_node and is protected by blkcg_pol_mutex. This will be used to fix blkcg_policy_data allocation bug. Signed-off-by: Tejun Heo Cc: Vivek Goyal Cc: Arianna Avanzini Signed-off-by: Jens Axboe --- block/blk-cgroup.c | 17 ++++++++++++----- include/linux/blk-cgroup.h | 1 + 2 files changed, 13 insertions(+), 5 deletions(-) (limited to 'include/linux') diff --git a/block/blk-cgroup.c b/block/blk-cgroup.c index 05b893de516b..42ff436ffaf4 100644 --- a/block/blk-cgroup.c +++ b/block/blk-cgroup.c @@ -46,6 +46,8 @@ struct cgroup_subsys_state * const blkcg_root_css = &blkcg_root.css; static struct blkcg_policy *blkcg_policy[BLKCG_MAX_POLS]; +static LIST_HEAD(all_blkcgs); /* protected by blkcg_pol_mutex */ + static bool blkcg_policy_enabled(struct request_queue *q, const struct blkcg_policy *pol) { @@ -817,6 +819,10 @@ static void blkcg_css_free(struct cgroup_subsys_state *css) { struct blkcg *blkcg = css_to_blkcg(css); + mutex_lock(&blkcg_pol_mutex); + list_del(&blkcg->all_blkcgs_node); + mutex_unlock(&blkcg_pol_mutex); + if (blkcg != &blkcg_root) { int i; @@ -833,6 +839,8 @@ blkcg_css_alloc(struct cgroup_subsys_state *parent_css) struct cgroup_subsys_state *ret; int i; + mutex_lock(&blkcg_pol_mutex); + if (!parent_css) { blkcg = &blkcg_root; goto done; @@ -844,8 +852,6 @@ blkcg_css_alloc(struct cgroup_subsys_state *parent_css) goto free_blkcg; } - mutex_lock(&blkcg_pol_mutex); - for (i = 0; i < BLKCG_MAX_POLS ; i++) { struct blkcg_policy *pol = blkcg_policy[i]; struct blkcg_policy_data *cpd; @@ -862,7 +868,6 @@ blkcg_css_alloc(struct cgroup_subsys_state *parent_css) BUG_ON(blkcg->pd[i]); cpd = kzalloc(pol->cpd_size, GFP_KERNEL); if (!cpd) { - mutex_unlock(&blkcg_pol_mutex); ret = ERR_PTR(-ENOMEM); goto free_pd_blkcg; } @@ -871,7 +876,6 @@ blkcg_css_alloc(struct cgroup_subsys_state *parent_css) pol->cpd_init_fn(blkcg); } - mutex_unlock(&blkcg_pol_mutex); done: spin_lock_init(&blkcg->lock); INIT_RADIX_TREE(&blkcg->blkg_tree, GFP_ATOMIC); @@ -879,14 +883,17 @@ done: #ifdef CONFIG_CGROUP_WRITEBACK INIT_LIST_HEAD(&blkcg->cgwb_list); #endif + list_add_tail(&blkcg->all_blkcgs_node, &all_blkcgs); + + mutex_unlock(&blkcg_pol_mutex); return &blkcg->css; free_pd_blkcg: for (i--; i >= 0; i--) kfree(blkcg->pd[i]); - free_blkcg: kfree(blkcg); + mutex_unlock(&blkcg_pol_mutex); return ret; } diff --git a/include/linux/blk-cgroup.h b/include/linux/blk-cgroup.h index 58cfab80dd70..cf3e7bc22ef3 100644 --- a/include/linux/blk-cgroup.h +++ b/include/linux/blk-cgroup.h @@ -47,6 +47,7 @@ struct blkcg { struct blkcg_policy_data *pd[BLKCG_MAX_POLS]; + struct list_head all_blkcgs_node; #ifdef CONFIG_CGROUP_WRITEBACK struct list_head cgwb_list; #endif -- cgit v1.2.3-70-g09d2 From 06b285bd11257bccc5a1b85a835507e33656aff2 Mon Sep 17 00:00:00 2001 From: Tejun Heo Date: Thu, 9 Jul 2015 16:39:50 -0400 Subject: blkcg: fix blkcg_policy_data allocation bug e48453c386f3 ("block, cgroup: implement policy-specific per-blkcg data") updated per-blkcg policy data to be dynamically allocated. When a policy is registered, its policy data aren't created. Instead, when the policy is activated on a queue, the policy data are allocated if there are blkg's (blkcg_gq's) which are attached to a given blkcg. This is buggy. Consider the following scenario. 1. A blkcg is created. No blkg's attached yet. 2. The policy is registered. No policy data is allocated. 3. The policy is activated on a queue. As the above blkcg doesn't have any blkg's, it won't allocate the matching blkcg_policy_data. 4. An IO is issued from the blkcg and blkg is created and the blkcg still doesn't have the matching policy data allocated. With cfq-iosched, this leads to an oops. It also doesn't free policy data on policy unregistration assuming that freeing of all policy data on blkcg destruction should take care of it; however, this also is incorrect. 1. A blkcg has policy data. 2. The policy gets unregistered but the policy data remains. 3. Another policy gets registered on the same slot. 4. Later, the new policy tries to allocate policy data on the previous blkcg but the slot is already occupied and gets skipped. The policy ends up operating on the policy data of the previous policy. There's no reason to manage blkcg_policy_data lazily. The reason we do lazy allocation of blkg's is that the number of all possible blkg's is the product of cgroups and block devices which can reach a surprising level. blkcg_policy_data is contrained by the number of cgroups and shouldn't be a problem. This patch makes blkcg_policy_data to be allocated for all existing blkcg's on policy registration and freed on unregistration and removes blkcg_policy_data handling from policy [de]activation paths. This makes that blkcg_policy_data are created and removed with the policy they belong to and fixes the above described problems. Signed-off-by: Tejun Heo Fixes: e48453c386f3 ("block, cgroup: implement policy-specific per-blkcg data") Cc: Vivek Goyal Cc: Arianna Avanzini Signed-off-by: Jens Axboe --- block/blk-cgroup.c | 78 +++++++++++++++++++++++++--------------------- include/linux/blk-cgroup.h | 10 ++---- 2 files changed, 44 insertions(+), 44 deletions(-) (limited to 'include/linux') diff --git a/block/blk-cgroup.c b/block/blk-cgroup.c index 42ff436ffaf4..9da02c021ebe 100644 --- a/block/blk-cgroup.c +++ b/block/blk-cgroup.c @@ -1048,10 +1048,8 @@ int blkcg_activate_policy(struct request_queue *q, const struct blkcg_policy *pol) { LIST_HEAD(pds); - LIST_HEAD(cpds); struct blkcg_gq *blkg; struct blkg_policy_data *pd, *nd; - struct blkcg_policy_data *cpd, *cnd; int cnt = 0, ret; if (blkcg_policy_enabled(q, pol)) @@ -1064,10 +1062,7 @@ int blkcg_activate_policy(struct request_queue *q, cnt++; spin_unlock_irq(q->queue_lock); - /* - * Allocate per-blkg and per-blkcg policy data - * for all existing blkgs. - */ + /* allocate per-blkg policy data for all existing blkgs */ while (cnt--) { pd = kzalloc_node(pol->pd_size, GFP_KERNEL, q->node); if (!pd) { @@ -1075,15 +1070,6 @@ int blkcg_activate_policy(struct request_queue *q, goto out_free; } list_add_tail(&pd->alloc_node, &pds); - - if (!pol->cpd_size) - continue; - cpd = kzalloc_node(pol->cpd_size, GFP_KERNEL, q->node); - if (!cpd) { - ret = -ENOMEM; - goto out_free; - } - list_add_tail(&cpd->alloc_node, &cpds); } /* @@ -1093,32 +1079,17 @@ int blkcg_activate_policy(struct request_queue *q, spin_lock_irq(q->queue_lock); list_for_each_entry(blkg, &q->blkg_list, q_node) { - if (WARN_ON(list_empty(&pds)) || - WARN_ON(pol->cpd_size && list_empty(&cpds))) { + if (WARN_ON(list_empty(&pds))) { /* umm... this shouldn't happen, just abort */ ret = -ENOMEM; goto out_unlock; } - cpd = list_first_entry(&cpds, struct blkcg_policy_data, - alloc_node); - list_del_init(&cpd->alloc_node); pd = list_first_entry(&pds, struct blkg_policy_data, alloc_node); list_del_init(&pd->alloc_node); /* grab blkcg lock too while installing @pd on @blkg */ spin_lock(&blkg->blkcg->lock); - if (!pol->cpd_size) - goto no_cpd; - if (!blkg->blkcg->pd[pol->plid]) { - /* Per-policy per-blkcg data */ - blkg->blkcg->pd[pol->plid] = cpd; - cpd->plid = pol->plid; - pol->cpd_init_fn(blkg->blkcg); - } else { /* must free it as it has already been extracted */ - kfree(cpd); - } -no_cpd: blkg->pd[pol->plid] = pd; pd->blkg = blkg; pd->plid = pol->plid; @@ -1135,8 +1106,6 @@ out_free: blk_queue_bypass_end(q); list_for_each_entry_safe(pd, nd, &pds, alloc_node) kfree(pd); - list_for_each_entry_safe(cpd, cnd, &cpds, alloc_node) - kfree(cpd); return ret; } EXPORT_SYMBOL_GPL(blkcg_activate_policy); @@ -1191,6 +1160,7 @@ EXPORT_SYMBOL_GPL(blkcg_deactivate_policy); */ int blkcg_policy_register(struct blkcg_policy *pol) { + struct blkcg *blkcg; int i, ret; if (WARN_ON(pol->pd_size < sizeof(struct blkg_policy_data))) @@ -1207,9 +1177,27 @@ int blkcg_policy_register(struct blkcg_policy *pol) if (i >= BLKCG_MAX_POLS) goto err_unlock; - /* register and update blkgs */ + /* register @pol */ pol->plid = i; - blkcg_policy[i] = pol; + blkcg_policy[pol->plid] = pol; + + /* allocate and install cpd's */ + if (pol->cpd_size) { + list_for_each_entry(blkcg, &all_blkcgs, all_blkcgs_node) { + struct blkcg_policy_data *cpd; + + cpd = kzalloc(pol->cpd_size, GFP_KERNEL); + if (!cpd) { + mutex_unlock(&blkcg_pol_mutex); + goto err_free_cpds; + } + + blkcg->pd[pol->plid] = cpd; + cpd->plid = pol->plid; + pol->cpd_init_fn(blkcg); + } + } + mutex_unlock(&blkcg_pol_mutex); /* everything is in place, add intf files for the new policy */ @@ -1219,6 +1207,14 @@ int blkcg_policy_register(struct blkcg_policy *pol) mutex_unlock(&blkcg_pol_register_mutex); return 0; +err_free_cpds: + if (pol->cpd_size) { + list_for_each_entry(blkcg, &all_blkcgs, all_blkcgs_node) { + kfree(blkcg->pd[pol->plid]); + blkcg->pd[pol->plid] = NULL; + } + } + blkcg_policy[pol->plid] = NULL; err_unlock: mutex_unlock(&blkcg_pol_mutex); mutex_unlock(&blkcg_pol_register_mutex); @@ -1234,6 +1230,8 @@ EXPORT_SYMBOL_GPL(blkcg_policy_register); */ void blkcg_policy_unregister(struct blkcg_policy *pol) { + struct blkcg *blkcg; + mutex_lock(&blkcg_pol_register_mutex); if (WARN_ON(blkcg_policy[pol->plid] != pol)) @@ -1243,9 +1241,17 @@ void blkcg_policy_unregister(struct blkcg_policy *pol) if (pol->cftypes) cgroup_rm_cftypes(pol->cftypes); - /* unregister and update blkgs */ + /* remove cpds and unregister */ mutex_lock(&blkcg_pol_mutex); + + if (pol->cpd_size) { + list_for_each_entry(blkcg, &all_blkcgs, all_blkcgs_node) { + kfree(blkcg->pd[pol->plid]); + blkcg->pd[pol->plid] = NULL; + } + } blkcg_policy[pol->plid] = NULL; + mutex_unlock(&blkcg_pol_mutex); out_unlock: mutex_unlock(&blkcg_pol_register_mutex); diff --git a/include/linux/blk-cgroup.h b/include/linux/blk-cgroup.h index cf3e7bc22ef3..1b62d768c7df 100644 --- a/include/linux/blk-cgroup.h +++ b/include/linux/blk-cgroup.h @@ -89,18 +89,12 @@ struct blkg_policy_data { * Policies that need to keep per-blkcg data which is independent * from any request_queue associated to it must specify its size * with the cpd_size field of the blkcg_policy structure and - * embed a blkcg_policy_data in it. blkcg core allocates - * policy-specific per-blkcg structures lazily the first time - * they are actually needed, so it handles them together with - * blkgs. cpd_init() is invoked to let each policy handle - * per-blkcg data. + * embed a blkcg_policy_data in it. cpd_init() is invoked to let + * each policy handle per-blkcg data. */ struct blkcg_policy_data { /* the policy id this per-policy data belongs to */ int plid; - - /* used during policy activation */ - struct list_head alloc_node; }; /* association between a blk cgroup and a request queue */ -- cgit v1.2.3-70-g09d2 From 4a0e3e989d66bb7204b163d9cfaa7fa96d0f2023 Mon Sep 17 00:00:00 2001 From: Enrico Mioso Date: Wed, 8 Jul 2015 13:05:57 +0200 Subject: cdc_ncm: Add support for moving NDP to end of NCM frame NCM specs are not actually mandating a specific position in the frame for the NDP (Network Datagram Pointer). However, some Huawei devices will ignore our aggregates if it is not placed after the datagrams it points to. Add support for doing just this, in a per-device configurable way. While at it, update NCM subdrivers, disabling this functionality in all of them, except in huawei_cdc_ncm where it is enabled instead. We aren't making any distinction between different Huawei NCM devices, based on what the vendor driver does. Standard NCM devices are left unaffected: if they are compliant, they should be always usable, still stay on the safe side. This change has been tested and working with a Huawei E3131 device (which works regardless of NDP position), a Huawei E3531 (also working both ways) and an E3372 (which mandates NDP to be after indexed datagrams). V1->V2: - corrected wrong NDP acronym definition - fixed possible NULL pointer dereference - patch cleanup V2->V3: - Properly account for the NDP size when writing new packets to SKB Signed-off-by: Enrico Mioso Signed-off-by: David S. Miller --- drivers/net/usb/cdc_mbim.c | 2 +- drivers/net/usb/cdc_ncm.c | 61 ++++++++++++++++++++++++++++++++++++---- drivers/net/usb/huawei_cdc_ncm.c | 7 +++-- include/linux/usb/cdc_ncm.h | 7 ++++- 4 files changed, 67 insertions(+), 10 deletions(-) (limited to 'include/linux') diff --git a/drivers/net/usb/cdc_mbim.c b/drivers/net/usb/cdc_mbim.c index e4b7a47a825c..efc18e05af0a 100644 --- a/drivers/net/usb/cdc_mbim.c +++ b/drivers/net/usb/cdc_mbim.c @@ -158,7 +158,7 @@ static int cdc_mbim_bind(struct usbnet *dev, struct usb_interface *intf) if (!cdc_ncm_comm_intf_is_mbim(intf->cur_altsetting)) goto err; - ret = cdc_ncm_bind_common(dev, intf, data_altsetting); + ret = cdc_ncm_bind_common(dev, intf, data_altsetting, 0); if (ret) goto err; diff --git a/drivers/net/usb/cdc_ncm.c b/drivers/net/usb/cdc_ncm.c index 8067b8fbb0ee..1991e4a24657 100644 --- a/drivers/net/usb/cdc_ncm.c +++ b/drivers/net/usb/cdc_ncm.c @@ -684,10 +684,12 @@ static void cdc_ncm_free(struct cdc_ncm_ctx *ctx) ctx->tx_curr_skb = NULL; } + kfree(ctx->delayed_ndp16); + kfree(ctx); } -int cdc_ncm_bind_common(struct usbnet *dev, struct usb_interface *intf, u8 data_altsetting) +int cdc_ncm_bind_common(struct usbnet *dev, struct usb_interface *intf, u8 data_altsetting, int drvflags) { const struct usb_cdc_union_desc *union_desc = NULL; struct cdc_ncm_ctx *ctx; @@ -855,6 +857,17 @@ advance: /* finish setting up the device specific data */ cdc_ncm_setup(dev); + /* Device-specific flags */ + ctx->drvflags = drvflags; + + /* Allocate the delayed NDP if needed. */ + if (ctx->drvflags & CDC_NCM_FLAG_NDP_TO_END) { + ctx->delayed_ndp16 = kzalloc(ctx->max_ndp_size, GFP_KERNEL); + if (!ctx->delayed_ndp16) + goto error2; + dev_info(&intf->dev, "NDP will be placed at end of frame for this device."); + } + /* override ethtool_ops */ dev->net->ethtool_ops = &cdc_ncm_ethtool_ops; @@ -954,8 +967,11 @@ static int cdc_ncm_bind(struct usbnet *dev, struct usb_interface *intf) if (cdc_ncm_select_altsetting(intf) != CDC_NCM_COMM_ALTSETTING_NCM) return -ENODEV; - /* The NCM data altsetting is fixed */ - ret = cdc_ncm_bind_common(dev, intf, CDC_NCM_DATA_ALTSETTING_NCM); + /* The NCM data altsetting is fixed, so we hard-coded it. + * Additionally, generic NCM devices are assumed to accept arbitrarily + * placed NDP. + */ + ret = cdc_ncm_bind_common(dev, intf, CDC_NCM_DATA_ALTSETTING_NCM, 0); /* * We should get an event when network connection is "connected" or @@ -986,6 +1002,14 @@ static struct usb_cdc_ncm_ndp16 *cdc_ncm_ndp(struct cdc_ncm_ctx *ctx, struct sk_ struct usb_cdc_ncm_nth16 *nth16 = (void *)skb->data; size_t ndpoffset = le16_to_cpu(nth16->wNdpIndex); + /* If NDP should be moved to the end of the NCM package, we can't follow the + * NTH16 header as we would normally do. NDP isn't written to the SKB yet, and + * the wNdpIndex field in the header is actually not consistent with reality. It will be later. + */ + if (ctx->drvflags & CDC_NCM_FLAG_NDP_TO_END) + if (ctx->delayed_ndp16->dwSignature == sign) + return ctx->delayed_ndp16; + /* follow the chain of NDPs, looking for a match */ while (ndpoffset) { ndp16 = (struct usb_cdc_ncm_ndp16 *)(skb->data + ndpoffset); @@ -995,7 +1019,8 @@ static struct usb_cdc_ncm_ndp16 *cdc_ncm_ndp(struct cdc_ncm_ctx *ctx, struct sk_ } /* align new NDP */ - cdc_ncm_align_tail(skb, ctx->tx_ndp_modulus, 0, ctx->tx_max); + if (!(ctx->drvflags & CDC_NCM_FLAG_NDP_TO_END)) + cdc_ncm_align_tail(skb, ctx->tx_ndp_modulus, 0, ctx->tx_max); /* verify that there is room for the NDP and the datagram (reserve) */ if ((ctx->tx_max - skb->len - reserve) < ctx->max_ndp_size) @@ -1008,7 +1033,11 @@ static struct usb_cdc_ncm_ndp16 *cdc_ncm_ndp(struct cdc_ncm_ctx *ctx, struct sk_ nth16->wNdpIndex = cpu_to_le16(skb->len); /* push a new empty NDP */ - ndp16 = (struct usb_cdc_ncm_ndp16 *)memset(skb_put(skb, ctx->max_ndp_size), 0, ctx->max_ndp_size); + if (!(ctx->drvflags & CDC_NCM_FLAG_NDP_TO_END)) + ndp16 = (struct usb_cdc_ncm_ndp16 *)memset(skb_put(skb, ctx->max_ndp_size), 0, ctx->max_ndp_size); + else + ndp16 = ctx->delayed_ndp16; + ndp16->dwSignature = sign; ndp16->wLength = cpu_to_le16(sizeof(struct usb_cdc_ncm_ndp16) + sizeof(struct usb_cdc_ncm_dpe16)); return ndp16; @@ -1023,6 +1052,15 @@ cdc_ncm_fill_tx_frame(struct usbnet *dev, struct sk_buff *skb, __le32 sign) struct sk_buff *skb_out; u16 n = 0, index, ndplen; u8 ready2send = 0; + u32 delayed_ndp_size; + + /* When our NDP gets written in cdc_ncm_ndp(), then skb_out->len gets updated + * accordingly. Otherwise, we should check here. + */ + if (ctx->drvflags & CDC_NCM_FLAG_NDP_TO_END) + delayed_ndp_size = ctx->max_ndp_size; + else + delayed_ndp_size = 0; /* if there is a remaining skb, it gets priority */ if (skb != NULL) { @@ -1077,7 +1115,7 @@ cdc_ncm_fill_tx_frame(struct usbnet *dev, struct sk_buff *skb, __le32 sign) cdc_ncm_align_tail(skb_out, ctx->tx_modulus, ctx->tx_remainder, ctx->tx_max); /* check if we had enough room left for both NDP and frame */ - if (!ndp16 || skb_out->len + skb->len > ctx->tx_max) { + if (!ndp16 || skb_out->len + skb->len + delayed_ndp_size > ctx->tx_max) { if (n == 0) { /* won't fit, MTU problem? */ dev_kfree_skb_any(skb); @@ -1150,6 +1188,17 @@ cdc_ncm_fill_tx_frame(struct usbnet *dev, struct sk_buff *skb, __le32 sign) /* variables will be reset at next call */ } + /* If requested, put NDP at end of frame. */ + if (ctx->drvflags & CDC_NCM_FLAG_NDP_TO_END) { + nth16 = (struct usb_cdc_ncm_nth16 *)skb_out->data; + cdc_ncm_align_tail(skb_out, ctx->tx_ndp_modulus, 0, ctx->tx_max); + nth16->wNdpIndex = cpu_to_le16(skb_out->len); + memcpy(skb_put(skb_out, ctx->max_ndp_size), ctx->delayed_ndp16, ctx->max_ndp_size); + + /* Zero out delayed NDP - signature checking will naturally fail. */ + ndp16 = memset(ctx->delayed_ndp16, 0, ctx->max_ndp_size); + } + /* If collected data size is less or equal ctx->min_tx_pkt * bytes, we send buffers as it is. If we get more data, it * would be more efficient for USB HS mobile device with DMA diff --git a/drivers/net/usb/huawei_cdc_ncm.c b/drivers/net/usb/huawei_cdc_ncm.c index 735f7dadb9a0..2680a65cd5e4 100644 --- a/drivers/net/usb/huawei_cdc_ncm.c +++ b/drivers/net/usb/huawei_cdc_ncm.c @@ -73,11 +73,14 @@ static int huawei_cdc_ncm_bind(struct usbnet *usbnet_dev, struct usb_driver *subdriver = ERR_PTR(-ENODEV); int ret = -ENODEV; struct huawei_cdc_ncm_state *drvstate = (void *)&usbnet_dev->data; + int drvflags = 0; /* altsetting should always be 1 for NCM devices - so we hard-coded - * it here + * it here. Some huawei devices will need the NDP part of the NCM package to + * be at the end of the frame. */ - ret = cdc_ncm_bind_common(usbnet_dev, intf, 1); + drvflags |= CDC_NCM_FLAG_NDP_TO_END; + ret = cdc_ncm_bind_common(usbnet_dev, intf, 1, drvflags); if (ret) goto err; diff --git a/include/linux/usb/cdc_ncm.h b/include/linux/usb/cdc_ncm.h index 7c9b484735c5..1f6526c76ee8 100644 --- a/include/linux/usb/cdc_ncm.h +++ b/include/linux/usb/cdc_ncm.h @@ -80,6 +80,9 @@ #define CDC_NCM_TIMER_INTERVAL_MIN 5UL #define CDC_NCM_TIMER_INTERVAL_MAX (U32_MAX / NSEC_PER_USEC) +/* Driver flags */ +#define CDC_NCM_FLAG_NDP_TO_END 0x02 /* NDP is placed at end of frame */ + #define cdc_ncm_comm_intf_is_mbim(x) ((x)->desc.bInterfaceSubClass == USB_CDC_SUBCLASS_MBIM && \ (x)->desc.bInterfaceProtocol == USB_CDC_PROTO_NONE) #define cdc_ncm_data_intf_is_mbim(x) ((x)->desc.bInterfaceProtocol == USB_CDC_MBIM_PROTO_NTB) @@ -103,9 +106,11 @@ struct cdc_ncm_ctx { spinlock_t mtx; atomic_t stop; + int drvflags; u32 timer_interval; u32 max_ndp_size; + struct usb_cdc_ncm_ndp16 *delayed_ndp16; u32 tx_timer_pending; u32 tx_curr_frame_num; @@ -133,7 +138,7 @@ struct cdc_ncm_ctx { }; u8 cdc_ncm_select_altsetting(struct usb_interface *intf); -int cdc_ncm_bind_common(struct usbnet *dev, struct usb_interface *intf, u8 data_altsetting); +int cdc_ncm_bind_common(struct usbnet *dev, struct usb_interface *intf, u8 data_altsetting, int drvflags); void cdc_ncm_unbind(struct usbnet *dev, struct usb_interface *intf); struct sk_buff *cdc_ncm_fill_tx_frame(struct usbnet *dev, struct sk_buff *skb, __le32 sign); int cdc_ncm_rx_verify_nth16(struct cdc_ncm_ctx *ctx, struct sk_buff *skb_in); -- cgit v1.2.3-70-g09d2 From 8b58a39846568dcd7d0c98b2fadc25018e59dedf Mon Sep 17 00:00:00 2001 From: Florian Westphal Date: Wed, 8 Jul 2015 23:32:12 +0200 Subject: ipv6: use flag instead of u16 for hop in inet6_skb_parm Hop was always either 0 or sizeof(struct ipv6hdr). Signed-off-by: Florian Westphal Signed-off-by: David S. Miller --- include/linux/ipv6.h | 2 +- net/ipv6/af_inet6.c | 4 ++-- net/ipv6/datagram.c | 8 ++++---- net/ipv6/exthdrs.c | 2 +- 4 files changed, 8 insertions(+), 8 deletions(-) (limited to 'include/linux') diff --git a/include/linux/ipv6.h b/include/linux/ipv6.h index 82806c60aa42..1319a6bb6b82 100644 --- a/include/linux/ipv6.h +++ b/include/linux/ipv6.h @@ -94,7 +94,6 @@ static inline struct ipv6hdr *ipipv6_hdr(const struct sk_buff *skb) struct inet6_skb_parm { int iif; __be16 ra; - __u16 hop; __u16 dst0; __u16 srcrt; __u16 dst1; @@ -111,6 +110,7 @@ struct inet6_skb_parm { #define IP6SKB_REROUTED 4 #define IP6SKB_ROUTERALERT 8 #define IP6SKB_FRAGMENTED 16 +#define IP6SKB_HOPBYHOP 32 }; #define IP6CB(skb) ((struct inet6_skb_parm*)((skb)->cb)) diff --git a/net/ipv6/af_inet6.c b/net/ipv6/af_inet6.c index 7de52b65173f..39e670a91596 100644 --- a/net/ipv6/af_inet6.c +++ b/net/ipv6/af_inet6.c @@ -679,8 +679,8 @@ bool ipv6_opt_accepted(const struct sock *sk, const struct sk_buff *skb, const struct ipv6_pinfo *np = inet6_sk(sk); if (np->rxopt.all) { - if ((opt->hop && (np->rxopt.bits.hopopts || - np->rxopt.bits.ohopopts)) || + if (((opt->flags & IP6SKB_HOPBYHOP) && + (np->rxopt.bits.hopopts || np->rxopt.bits.ohopopts)) || (ip6_flowinfo((struct ipv6hdr *) skb_network_header(skb)) && np->rxopt.bits.rxflow) || (opt->srcrt && (np->rxopt.bits.srcrt || diff --git a/net/ipv6/datagram.c b/net/ipv6/datagram.c index 62d908e64eeb..50115522e80f 100644 --- a/net/ipv6/datagram.c +++ b/net/ipv6/datagram.c @@ -558,8 +558,8 @@ void ip6_datagram_recv_specific_ctl(struct sock *sk, struct msghdr *msg, } /* HbH is allowed only once */ - if (np->rxopt.bits.hopopts && opt->hop) { - u8 *ptr = nh + opt->hop; + if (np->rxopt.bits.hopopts && (opt->flags & IP6SKB_HOPBYHOP)) { + u8 *ptr = nh + sizeof(struct ipv6hdr); put_cmsg(msg, SOL_IPV6, IPV6_HOPOPTS, (ptr[1]+1)<<3, ptr); } @@ -620,8 +620,8 @@ void ip6_datagram_recv_specific_ctl(struct sock *sk, struct msghdr *msg, int hlim = ipv6_hdr(skb)->hop_limit; put_cmsg(msg, SOL_IPV6, IPV6_2292HOPLIMIT, sizeof(hlim), &hlim); } - if (np->rxopt.bits.ohopopts && opt->hop) { - u8 *ptr = nh + opt->hop; + if (np->rxopt.bits.ohopopts && (opt->flags & IP6SKB_HOPBYHOP)) { + u8 *ptr = nh + sizeof(struct ipv6hdr); put_cmsg(msg, SOL_IPV6, IPV6_2292HOPOPTS, (ptr[1]+1)<<3, ptr); } if (np->rxopt.bits.odstopts && opt->dst0) { diff --git a/net/ipv6/exthdrs.c b/net/ipv6/exthdrs.c index a7bbbe45570b..ce203b0402be 100644 --- a/net/ipv6/exthdrs.c +++ b/net/ipv6/exthdrs.c @@ -632,7 +632,7 @@ int ipv6_parse_hopopts(struct sk_buff *skb) return -1; } - opt->hop = sizeof(struct ipv6hdr); + opt->flags |= IP6SKB_HOPBYHOP; if (ip6_parse_tlv(tlvprochopopt_lst, skb)) { skb->transport_header += (skb_transport_header(skb)[1] + 1) << 3; opt = IP6CB(skb); -- cgit v1.2.3-70-g09d2 From fd4b7286ccc469bf5dde22db6b8fcc455c3c4a66 Mon Sep 17 00:00:00 2001 From: Kuninori Morimoto Date: Tue, 16 Jun 2015 08:52:39 +0000 Subject: regmap: add regmap_write_bits() regmap_write_bits() is similar to regmap_update_bits(), but regmap_write_bits() write data to register even though it is same value. Signed-off-by: Kuninori Morimoto Signed-off-by: Mark Brown --- drivers/base/regmap/regmap.c | 23 +++++++++++++++++++++++ include/linux/regmap.h | 9 +++++++++ 2 files changed, 32 insertions(+) (limited to 'include/linux') diff --git a/drivers/base/regmap/regmap.c b/drivers/base/regmap/regmap.c index 69ec411ce722..d93bb9a8ab98 100644 --- a/drivers/base/regmap/regmap.c +++ b/drivers/base/regmap/regmap.c @@ -2374,6 +2374,29 @@ int regmap_update_bits(struct regmap *map, unsigned int reg, } EXPORT_SYMBOL_GPL(regmap_update_bits); +/** + * regmap_write_bits: Perform a read/modify/write cycle on the register map + * + * @map: Register map to update + * @reg: Register to update + * @mask: Bitmask to change + * @val: New value for bitmask + * + * Returns zero for success, a negative number on error. + */ +int regmap_write_bits(struct regmap *map, unsigned int reg, + unsigned int mask, unsigned int val) +{ + int ret; + + map->lock(map->lock_arg); + ret = _regmap_update_bits(map, reg, mask, val, NULL, true); + map->unlock(map->lock_arg); + + return ret; +} +EXPORT_SYMBOL_GPL(regmap_write_bits); + /** * regmap_update_bits_async: Perform a read/modify/write cycle on the register * map asynchronously diff --git a/include/linux/regmap.h b/include/linux/regmap.h index 59c55ea0f0b5..e4b9ad4f05ef 100644 --- a/include/linux/regmap.h +++ b/include/linux/regmap.h @@ -424,6 +424,8 @@ int regmap_bulk_read(struct regmap *map, unsigned int reg, void *val, size_t val_count); int regmap_update_bits(struct regmap *map, unsigned int reg, unsigned int mask, unsigned int val); +int regmap_write_bits(struct regmap *map, unsigned int reg, + unsigned int mask, unsigned int val); int regmap_update_bits_async(struct regmap *map, unsigned int reg, unsigned int mask, unsigned int val); int regmap_update_bits_check(struct regmap *map, unsigned int reg, @@ -645,6 +647,13 @@ static inline int regmap_update_bits(struct regmap *map, unsigned int reg, return -EINVAL; } +static inline int regmap_write_bits(struct regmap *map, unsigned int reg, + unsigned int mask, unsigned int val) +{ + WARN_ONCE(1, "regmap API is disabled"); + return -EINVAL; +} + static inline int regmap_update_bits_async(struct regmap *map, unsigned int reg, unsigned int mask, unsigned int val) -- cgit v1.2.3-70-g09d2 From e874e6c7edc43436f73cf84157d9221f8b807c36 Mon Sep 17 00:00:00 2001 From: Kuninori Morimoto Date: Tue, 16 Jun 2015 08:52:55 +0000 Subject: regmap: add regmap_fields_force_write() regmap_fields_force_write() is similar to regmap_fields_write(), but regmap_fields_force_write() write data to register even though it is same value. Signed-off-by: Kuninori Morimoto Signed-off-by: Mark Brown --- drivers/base/regmap/regmap.c | 12 ++++++++++++ include/linux/regmap.h | 2 ++ 2 files changed, 14 insertions(+) (limited to 'include/linux') diff --git a/drivers/base/regmap/regmap.c b/drivers/base/regmap/regmap.c index d93bb9a8ab98..dd63bcbbf8a5 100644 --- a/drivers/base/regmap/regmap.c +++ b/drivers/base/regmap/regmap.c @@ -1624,6 +1624,18 @@ int regmap_fields_write(struct regmap_field *field, unsigned int id, } EXPORT_SYMBOL_GPL(regmap_fields_write); +int regmap_fields_force_write(struct regmap_field *field, unsigned int id, + unsigned int val) +{ + if (id >= field->id_size) + return -EINVAL; + + return regmap_write_bits(field->regmap, + field->reg + (field->id_offset * id), + field->mask, val << field->shift); +} +EXPORT_SYMBOL_GPL(regmap_fields_force_write); + /** * regmap_fields_update_bits(): Perform a read/modify/write cycle * on the register field diff --git a/include/linux/regmap.h b/include/linux/regmap.h index e4b9ad4f05ef..519c96231a91 100644 --- a/include/linux/regmap.h +++ b/include/linux/regmap.h @@ -505,6 +505,8 @@ int regmap_field_update_bits(struct regmap_field *field, int regmap_fields_write(struct regmap_field *field, unsigned int id, unsigned int val); +int regmap_fields_force_write(struct regmap_field *field, unsigned int id, + unsigned int val); int regmap_fields_read(struct regmap_field *field, unsigned int id, unsigned int *val); int regmap_fields_update_bits(struct regmap_field *field, unsigned int id, -- cgit v1.2.3-70-g09d2 From 5544eb9b81940647b8fad1f251b37cbe2819ce44 Mon Sep 17 00:00:00 2001 From: Paolo Bonzini Date: Tue, 7 Jul 2015 15:41:58 +0200 Subject: KVM: count number of assigned devices If there are no assigned devices, the guest PAT are not providing any useful information and can be overridden to writeback; VMX always does this because it has the "IPAT" bit in its extended page table entries, but SVM does not have anything similar. Hook into VFIO and legacy device assignment so that they provide this information to KVM. Reviewed-by: Alex Williamson Tested-by: Joerg Roedel Signed-off-by: Paolo Bonzini --- arch/x86/include/asm/kvm_host.h | 2 ++ arch/x86/kvm/iommu.c | 2 ++ arch/x86/kvm/x86.c | 18 ++++++++++++++++++ include/linux/kvm_host.h | 18 ++++++++++++++++++ virt/kvm/vfio.c | 5 +++++ 5 files changed, 45 insertions(+) (limited to 'include/linux') diff --git a/arch/x86/include/asm/kvm_host.h b/arch/x86/include/asm/kvm_host.h index 2a7f5d782c33..49ec9038ec14 100644 --- a/arch/x86/include/asm/kvm_host.h +++ b/arch/x86/include/asm/kvm_host.h @@ -604,6 +604,8 @@ struct kvm_arch { bool iommu_noncoherent; #define __KVM_HAVE_ARCH_NONCOHERENT_DMA atomic_t noncoherent_dma_count; +#define __KVM_HAVE_ARCH_ASSIGNED_DEVICE + atomic_t assigned_device_count; struct kvm_pic *vpic; struct kvm_ioapic *vioapic; struct kvm_pit *vpit; diff --git a/arch/x86/kvm/iommu.c b/arch/x86/kvm/iommu.c index 7dbced309ddb..5c520ebf6343 100644 --- a/arch/x86/kvm/iommu.c +++ b/arch/x86/kvm/iommu.c @@ -200,6 +200,7 @@ int kvm_assign_device(struct kvm *kvm, struct pci_dev *pdev) goto out_unmap; } + kvm_arch_start_assignment(kvm); pci_set_dev_assigned(pdev); dev_info(&pdev->dev, "kvm assign device\n"); @@ -224,6 +225,7 @@ int kvm_deassign_device(struct kvm *kvm, struct pci_dev *pdev) iommu_detach_device(domain, &pdev->dev); pci_clear_dev_assigned(pdev); + kvm_arch_end_assignment(kvm); dev_info(&pdev->dev, "kvm deassign device\n"); diff --git a/arch/x86/kvm/x86.c b/arch/x86/kvm/x86.c index 6bd19c7abc65..0024968b342d 100644 --- a/arch/x86/kvm/x86.c +++ b/arch/x86/kvm/x86.c @@ -8213,6 +8213,24 @@ bool kvm_arch_can_inject_async_page_present(struct kvm_vcpu *vcpu) kvm_x86_ops->interrupt_allowed(vcpu); } +void kvm_arch_start_assignment(struct kvm *kvm) +{ + atomic_inc(&kvm->arch.assigned_device_count); +} +EXPORT_SYMBOL_GPL(kvm_arch_start_assignment); + +void kvm_arch_end_assignment(struct kvm *kvm) +{ + atomic_dec(&kvm->arch.assigned_device_count); +} +EXPORT_SYMBOL_GPL(kvm_arch_end_assignment); + +bool kvm_arch_has_assigned_device(struct kvm *kvm) +{ + return atomic_read(&kvm->arch.assigned_device_count); +} +EXPORT_SYMBOL_GPL(kvm_arch_has_assigned_device); + void kvm_arch_register_noncoherent_dma(struct kvm *kvm) { atomic_inc(&kvm->arch.noncoherent_dma_count); diff --git a/include/linux/kvm_host.h b/include/linux/kvm_host.h index 9564fd78c547..05e99b8ef465 100644 --- a/include/linux/kvm_host.h +++ b/include/linux/kvm_host.h @@ -734,6 +734,24 @@ static inline bool kvm_arch_has_noncoherent_dma(struct kvm *kvm) return false; } #endif +#ifdef __KVM_HAVE_ARCH_ASSIGNED_DEVICE +void kvm_arch_start_assignment(struct kvm *kvm); +void kvm_arch_end_assignment(struct kvm *kvm); +bool kvm_arch_has_assigned_device(struct kvm *kvm); +#else +static inline void kvm_arch_start_assignment(struct kvm *kvm) +{ +} + +static inline void kvm_arch_end_assignment(struct kvm *kvm) +{ +} + +static inline bool kvm_arch_has_assigned_device(struct kvm *kvm) +{ + return false; +} +#endif static inline wait_queue_head_t *kvm_arch_vcpu_wq(struct kvm_vcpu *vcpu) { diff --git a/virt/kvm/vfio.c b/virt/kvm/vfio.c index 620e37f741b8..1dd087da6f31 100644 --- a/virt/kvm/vfio.c +++ b/virt/kvm/vfio.c @@ -155,6 +155,8 @@ static int kvm_vfio_set_group(struct kvm_device *dev, long attr, u64 arg) list_add_tail(&kvg->node, &kv->group_list); kvg->vfio_group = vfio_group; + kvm_arch_start_assignment(dev->kvm); + mutex_unlock(&kv->lock); kvm_vfio_update_coherency(dev); @@ -190,6 +192,8 @@ static int kvm_vfio_set_group(struct kvm_device *dev, long attr, u64 arg) break; } + kvm_arch_end_assignment(dev->kvm); + mutex_unlock(&kv->lock); kvm_vfio_group_put_external_user(vfio_group); @@ -239,6 +243,7 @@ static void kvm_vfio_destroy(struct kvm_device *dev) kvm_vfio_group_put_external_user(kvg->vfio_group); list_del(&kvg->node); kfree(kvg); + kvm_arch_end_assignment(dev->kvm); } kvm_vfio_update_coherency(dev); -- cgit v1.2.3-70-g09d2 From 90f8572b0f021fdd1baa68e00a8c30482ee9e5f4 Mon Sep 17 00:00:00 2001 From: "Eric W. Biederman" Date: Mon, 29 Jun 2015 14:42:03 -0500 Subject: vfs: Commit to never having exectuables on proc and sysfs. Today proc and sysfs do not contain any executable files. Several applications today mount proc or sysfs without noexec and nosuid and then depend on there being no exectuables files on proc or sysfs. Having any executable files show on proc or sysfs would cause a user space visible regression, and most likely security problems. Therefore commit to never allowing executables on proc and sysfs by adding a new flag to mark them as filesystems without executables and enforce that flag. Test the flag where MNT_NOEXEC is tested today, so that the only user visible effect will be that exectuables will be treated as if the execute bit is cleared. The filesystems proc and sysfs do not currently incoporate any executable files so this does not result in any user visible effects. This makes it unnecessary to vet changes to proc and sysfs tightly for adding exectuable files or changes to chattr that would modify existing files, as no matter what the individual file say they will not be treated as exectuable files by the vfs. Not having to vet changes to closely is important as without this we are only one proc_create call (or another goof up in the implementation of notify_change) from having problematic executables on proc. Those mistakes are all too easy to make and would create a situation where there are security issues or the assumptions of some program having to be broken (and cause userspace regressions). Signed-off-by: "Eric W. Biederman" --- fs/exec.c | 10 ++++++++-- fs/open.c | 2 +- fs/proc/root.c | 2 ++ fs/sysfs/mount.c | 4 ++++ include/linux/fs.h | 3 +++ kernel/sys.c | 3 +-- mm/mmap.c | 4 ++-- mm/nommu.c | 2 +- security/security.c | 2 +- 9 files changed, 23 insertions(+), 9 deletions(-) (limited to 'include/linux') diff --git a/fs/exec.c b/fs/exec.c index 1977c2a553ac..b06623a9347f 100644 --- a/fs/exec.c +++ b/fs/exec.c @@ -98,6 +98,12 @@ static inline void put_binfmt(struct linux_binfmt * fmt) module_put(fmt->module); } +bool path_noexec(const struct path *path) +{ + return (path->mnt->mnt_flags & MNT_NOEXEC) || + (path->mnt->mnt_sb->s_iflags & SB_I_NOEXEC); +} + #ifdef CONFIG_USELIB /* * Note that a shared library must be both readable and executable due to @@ -132,7 +138,7 @@ SYSCALL_DEFINE1(uselib, const char __user *, library) goto exit; error = -EACCES; - if (file->f_path.mnt->mnt_flags & MNT_NOEXEC) + if (path_noexec(&file->f_path)) goto exit; fsnotify_open(file); @@ -777,7 +783,7 @@ static struct file *do_open_execat(int fd, struct filename *name, int flags) if (!S_ISREG(file_inode(file)->i_mode)) goto exit; - if (file->f_path.mnt->mnt_flags & MNT_NOEXEC) + if (path_noexec(&file->f_path)) goto exit; err = deny_write_access(file); diff --git a/fs/open.c b/fs/open.c index e33dab287fa0..b6f1e96a7c0b 100644 --- a/fs/open.c +++ b/fs/open.c @@ -377,7 +377,7 @@ retry: * with the "noexec" flag. */ res = -EACCES; - if (path.mnt->mnt_flags & MNT_NOEXEC) + if (path_noexec(&path)) goto out_path_release; } diff --git a/fs/proc/root.c b/fs/proc/root.c index 68feb0f70e63..361ab4ee42fc 100644 --- a/fs/proc/root.c +++ b/fs/proc/root.c @@ -134,6 +134,8 @@ static struct dentry *proc_mount(struct file_system_type *fs_type, } sb->s_flags |= MS_ACTIVE; + /* User space would break if executables appear on proc */ + sb->s_iflags |= SB_I_NOEXEC; } return dget(sb->s_root); diff --git a/fs/sysfs/mount.c b/fs/sysfs/mount.c index 1c6ac6fcee9f..f3db82071cfb 100644 --- a/fs/sysfs/mount.c +++ b/fs/sysfs/mount.c @@ -40,6 +40,10 @@ static struct dentry *sysfs_mount(struct file_system_type *fs_type, SYSFS_MAGIC, &new_sb, ns); if (IS_ERR(root) || !new_sb) kobj_ns_drop(KOBJ_NS_TYPE_NET, ns); + else if (new_sb) + /* Userspace would break if executables appear on sysfs */ + root->d_sb->s_iflags |= SB_I_NOEXEC; + return root; } diff --git a/include/linux/fs.h b/include/linux/fs.h index a0653e560c26..42912f8d286e 100644 --- a/include/linux/fs.h +++ b/include/linux/fs.h @@ -1244,6 +1244,7 @@ struct mm_struct; /* sb->s_iflags */ #define SB_I_CGROUPWB 0x00000001 /* cgroup-aware writeback enabled */ +#define SB_I_NOEXEC 0x00000002 /* Ignore executables on this fs */ /* Possible states of 'frozen' field */ enum { @@ -3030,4 +3031,6 @@ static inline bool dir_relax(struct inode *inode) return !IS_DEADDIR(inode); } +extern bool path_noexec(const struct path *path); + #endif /* _LINUX_FS_H */ diff --git a/kernel/sys.c b/kernel/sys.c index 259fda25eb6b..fa2f2f671a5c 100644 --- a/kernel/sys.c +++ b/kernel/sys.c @@ -1668,8 +1668,7 @@ static int prctl_set_mm_exe_file(struct mm_struct *mm, unsigned int fd) * overall picture. */ err = -EACCES; - if (!S_ISREG(inode->i_mode) || - exe.file->f_path.mnt->mnt_flags & MNT_NOEXEC) + if (!S_ISREG(inode->i_mode) || path_noexec(&exe.file->f_path)) goto exit; err = inode_permission(inode, MAY_EXEC); diff --git a/mm/mmap.c b/mm/mmap.c index aa632ade2be7..f126923ce683 100644 --- a/mm/mmap.c +++ b/mm/mmap.c @@ -1268,7 +1268,7 @@ unsigned long do_mmap_pgoff(struct file *file, unsigned long addr, * mounted, in which case we dont add PROT_EXEC.) */ if ((prot & PROT_READ) && (current->personality & READ_IMPLIES_EXEC)) - if (!(file && (file->f_path.mnt->mnt_flags & MNT_NOEXEC))) + if (!(file && path_noexec(&file->f_path))) prot |= PROT_EXEC; if (!(flags & MAP_FIXED)) @@ -1337,7 +1337,7 @@ unsigned long do_mmap_pgoff(struct file *file, unsigned long addr, case MAP_PRIVATE: if (!(file->f_mode & FMODE_READ)) return -EACCES; - if (file->f_path.mnt->mnt_flags & MNT_NOEXEC) { + if (path_noexec(&file->f_path)) { if (vm_flags & VM_EXEC) return -EPERM; vm_flags &= ~VM_MAYEXEC; diff --git a/mm/nommu.c b/mm/nommu.c index 58ea3643b9e9..ce17abf087ff 100644 --- a/mm/nommu.c +++ b/mm/nommu.c @@ -1035,7 +1035,7 @@ static int validate_mmap_request(struct file *file, /* handle executable mappings and implied executable * mappings */ - if (file->f_path.mnt->mnt_flags & MNT_NOEXEC) { + if (path_noexec(&file->f_path)) { if (prot & PROT_EXEC) return -EPERM; } else if ((prot & PROT_READ) && !(prot & PROT_EXEC)) { diff --git a/security/security.c b/security/security.c index 595fffab48b0..062f3c997fdc 100644 --- a/security/security.c +++ b/security/security.c @@ -776,7 +776,7 @@ static inline unsigned long mmap_prot(struct file *file, unsigned long prot) * ditto if it's not on noexec mount, except that on !MMU we need * NOMMU_MAP_EXEC (== VM_MAYEXEC) in this case */ - if (!(file->f_path.mnt->mnt_flags & MNT_NOEXEC)) { + if (!path_noexec(&file->f_path)) { #ifndef CONFIG_MMU if (file->f_op->mmap_capabilities) { unsigned caps = file->f_op->mmap_capabilities(file); -- cgit v1.2.3-70-g09d2 From 5127e31a6ce04bd41a020c0ba28a1c0915ab6da1 Mon Sep 17 00:00:00 2001 From: "Suzuki K. Poulose" Date: Fri, 10 Jul 2015 16:26:38 +0100 Subject: regulator: Add missing dummy definition for regulator_list_voltage MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit Fixes a build break when CONFIG_REGULATOR is not selected. e.g, on linux-next - 07102015: drivers/clk/tegra/clk-dfll.c: In function ‘find_lut_index_for_rate’: drivers/clk/tegra/clk-dfll.c:691:3: error: implicit declaration of function ‘regulator_list_voltage’ [-Werror=implicit-function-declaration] if (regulator_list_voltage(td->vdd_reg, td->i2c_lut[i]) == uv) ^ CC drivers/clocksource/mmio.o CC fs/proc/softirqs.o cc1: some warnings being treated as errors make[3]: *** [drivers/clk/tegra/clk-dfll.o] Error 1 make[2]: *** [drivers/clk/tegra] Error 2 make[1]: *** [drivers/clk] Error 2 make[1]: *** Waiting for unfinished jobs.... This should be pushed to 4.2 as we have the issue in 4.2-rc1, just that nobody uses it without the REGULATOR(yet). Signed-off-by: Suzuki K. Poulose Signed-off-by: Mark Brown --- include/linux/regulator/consumer.h | 6 ++++++ 1 file changed, 6 insertions(+) (limited to 'include/linux') diff --git a/include/linux/regulator/consumer.h b/include/linux/regulator/consumer.h index f8a689ed62a5..2ba4a40919c8 100644 --- a/include/linux/regulator/consumer.h +++ b/include/linux/regulator/consumer.h @@ -550,6 +550,12 @@ static inline int regulator_count_voltages(struct regulator *regulator) { return 0; } + +static inline int regulator_list_voltage(struct regulator *regulator, unsigned selector) +{ + return -EINVAL; +} + #endif static inline int regulator_set_voltage_tol(struct regulator *regulator, -- cgit v1.2.3-70-g09d2 From 634ec36cc0ab9d8dda0f2c101fa28d2e2a61b9eb Mon Sep 17 00:00:00 2001 From: David Thomson Date: Fri, 10 Jul 2015 13:56:54 +1200 Subject: net: phy: Pass mdix ethtool setting through to phy driver Pass the mdix setting from ethtool down to the phy driver, to allow driver specific implementations of manually setting the polarity. Signed-off-by: David Thomson Reviewed-by: Florian Fainelli Signed-off-by: David S. Miller --- drivers/net/phy/phy.c | 2 ++ include/linux/phy.h | 2 ++ 2 files changed, 4 insertions(+) (limited to 'include/linux') diff --git a/drivers/net/phy/phy.c b/drivers/net/phy/phy.c index b2197b506acb..47693a9ebd3a 100644 --- a/drivers/net/phy/phy.c +++ b/drivers/net/phy/phy.c @@ -353,6 +353,8 @@ int phy_ethtool_sset(struct phy_device *phydev, struct ethtool_cmd *cmd) phydev->duplex = cmd->duplex; + phydev->mdix = cmd->eth_tp_mdix_ctrl; + /* Restart the PHY */ phy_start_aneg(phydev); diff --git a/include/linux/phy.h b/include/linux/phy.h index a26c3f84b8dd..e5fb1d415961 100644 --- a/include/linux/phy.h +++ b/include/linux/phy.h @@ -424,6 +424,8 @@ struct phy_device { struct net_device *attached_dev; + u8 mdix; + void (*adjust_link)(struct net_device *dev); }; #define to_phy_device(d) container_of(d, struct phy_device, dev) -- cgit v1.2.3-70-g09d2 From 0dcdbc97557fd8c297c4e38e9f66e304a64bae9d Mon Sep 17 00:00:00 2001 From: Jiang Liu Date: Thu, 4 Jun 2015 12:13:28 +0800 Subject: genirq: Remove the irq argument from note_interrupt() Only required for the slow path. Retrieve it from irq descriptor if necessary. [ tglx: Split out from combo patch. Left [try_]misrouted_irq() untouched as there is no win in the slow path ] Signed-off-by: Jiang Liu Cc: Konrad Rzeszutek Wilk Cc: Tony Luck Cc: Bjorn Helgaas Cc: Benjamin Herrenschmidt Cc: Randy Dunlap Cc: Yinghai Lu Cc: Borislav Petkov Cc: Jason Cooper Cc: Kevin Cernekee Cc: Arnd Bergmann Link: http://lkml.kernel.org/r/1433391238-19471-19-git-send-email-jiang.liu@linux.intel.com Signed-off-by: Thomas Gleixner --- include/linux/irq.h | 3 +-- kernel/irq/chip.c | 2 +- kernel/irq/handle.c | 2 +- kernel/irq/spurious.c | 6 ++++-- 4 files changed, 7 insertions(+), 6 deletions(-) (limited to 'include/linux') diff --git a/include/linux/irq.h b/include/linux/irq.h index 92188b0225bb..429ac266c7c6 100644 --- a/include/linux/irq.h +++ b/include/linux/irq.h @@ -487,8 +487,7 @@ extern int irq_chip_set_vcpu_affinity_parent(struct irq_data *data, #endif /* Handling of unhandled and spurious interrupts: */ -extern void note_interrupt(unsigned int irq, struct irq_desc *desc, - irqreturn_t action_ret); +extern void note_interrupt(struct irq_desc *desc, irqreturn_t action_ret); /* Enable/disable irq debugging output: */ diff --git a/kernel/irq/chip.c b/kernel/irq/chip.c index 310d65885440..76f199dc6a5e 100644 --- a/kernel/irq/chip.c +++ b/kernel/irq/chip.c @@ -328,7 +328,7 @@ void handle_nested_irq(unsigned int irq) action_ret = action->thread_fn(action->irq, action->dev_id); if (!noirqdebug) - note_interrupt(irq, desc, action_ret); + note_interrupt(desc, action_ret); raw_spin_lock_irq(&desc->lock); irqd_clear(&desc->irq_data, IRQD_IRQ_INPROGRESS); diff --git a/kernel/irq/handle.c b/kernel/irq/handle.c index 4d37b96343e9..b6eeea8a80c5 100644 --- a/kernel/irq/handle.c +++ b/kernel/irq/handle.c @@ -176,7 +176,7 @@ handle_irq_event_percpu(struct irq_desc *desc, struct irqaction *action) add_interrupt_randomness(irq, flags); if (!noirqdebug) - note_interrupt(irq, desc, retval); + note_interrupt(desc, retval); return retval; } diff --git a/kernel/irq/spurious.c b/kernel/irq/spurious.c index 5378c529c1dc..32144175458d 100644 --- a/kernel/irq/spurious.c +++ b/kernel/irq/spurious.c @@ -270,9 +270,10 @@ try_misrouted_irq(unsigned int irq, struct irq_desc *desc, #define SPURIOUS_DEFERRED 0x80000000 -void note_interrupt(unsigned int irq, struct irq_desc *desc, - irqreturn_t action_ret) +void note_interrupt(struct irq_desc *desc, irqreturn_t action_ret) { + unsigned int irq; + if (desc->istate & IRQS_POLL_INPROGRESS || irq_settings_is_polled(desc)) return; @@ -396,6 +397,7 @@ void note_interrupt(unsigned int irq, struct irq_desc *desc, desc->last_unhandled = jiffies; } + irq = irq_desc_get_irq(desc); if (unlikely(try_misrouted_irq(irq, desc, action_ret))) { int ok = misrouted_irq(irq); if (action_ret == IRQ_NONE) -- cgit v1.2.3-70-g09d2 From 4200e831e4a8fd09fa4e78de2e571ab270c12d06 Mon Sep 17 00:00:00 2001 From: Dmitry Torokhov Date: Mon, 6 Jul 2015 15:18:24 -0700 Subject: Input: of_touchscreen - switch to using device properties Let's switch form OF to device properties so that common parsing code could work not only on device tree but also on ACPI-based platforms. Reviewed-by: Roger Quadros Signed-off-by: Dmitry Torokhov --- drivers/input/touchscreen/Kconfig | 4 +-- drivers/input/touchscreen/Makefile | 2 +- drivers/input/touchscreen/edt-ft5x06.c | 2 +- drivers/input/touchscreen/of_touchscreen.c | 56 ++++++++++++++++-------------- drivers/input/touchscreen/tsc2005.c | 2 +- include/linux/input/touchscreen.h | 11 ++---- 6 files changed, 37 insertions(+), 40 deletions(-) (limited to 'include/linux') diff --git a/drivers/input/touchscreen/Kconfig b/drivers/input/touchscreen/Kconfig index aa2b5f21b89b..27035ecbd4f5 100644 --- a/drivers/input/touchscreen/Kconfig +++ b/drivers/input/touchscreen/Kconfig @@ -11,9 +11,9 @@ menuconfig INPUT_TOUCHSCREEN if INPUT_TOUCHSCREEN -config OF_TOUCHSCREEN +config TOUCHSCREEN_PROPERTIES def_tristate INPUT - depends on INPUT && OF + depends on INPUT config TOUCHSCREEN_88PM860X tristate "Marvell 88PM860x touchscreen" diff --git a/drivers/input/touchscreen/Makefile b/drivers/input/touchscreen/Makefile index fa3d33bac7fc..c85aae23e7f8 100644 --- a/drivers/input/touchscreen/Makefile +++ b/drivers/input/touchscreen/Makefile @@ -6,7 +6,7 @@ wm97xx-ts-y := wm97xx-core.o -obj-$(CONFIG_OF_TOUCHSCREEN) += of_touchscreen.o +obj-$(CONFIG_TOUCHSCREEN_PROPERTIES) += of_touchscreen.o obj-$(CONFIG_TOUCHSCREEN_88PM860X) += 88pm860x-ts.o obj-$(CONFIG_TOUCHSCREEN_AD7877) += ad7877.o obj-$(CONFIG_TOUCHSCREEN_AD7879) += ad7879.o diff --git a/drivers/input/touchscreen/edt-ft5x06.c b/drivers/input/touchscreen/edt-ft5x06.c index 394b1de9a2a3..8f8f3199be39 100644 --- a/drivers/input/touchscreen/edt-ft5x06.c +++ b/drivers/input/touchscreen/edt-ft5x06.c @@ -1041,7 +1041,7 @@ static int edt_ft5x06_ts_probe(struct i2c_client *client, 0, tsdata->num_y * 64 - 1, 0, 0); if (!pdata) - touchscreen_parse_of_params(input, true); + touchscreen_parse_properties(input, true); error = input_mt_init_slots(input, MAX_SUPPORT_POINTS, INPUT_MT_DIRECT); if (error) { diff --git a/drivers/input/touchscreen/of_touchscreen.c b/drivers/input/touchscreen/of_touchscreen.c index 50bc0f219547..bb6f2fe14667 100644 --- a/drivers/input/touchscreen/of_touchscreen.c +++ b/drivers/input/touchscreen/of_touchscreen.c @@ -9,12 +9,12 @@ * */ -#include +#include #include #include #include -static bool touchscreen_get_prop_u32(struct device_node *np, +static bool touchscreen_get_prop_u32(struct device *dev, const char *property, unsigned int default_value, unsigned int *value) @@ -22,7 +22,7 @@ static bool touchscreen_get_prop_u32(struct device_node *np, u32 val; int error; - error = of_property_read_u32(np, property, &val); + error = device_property_read_u32(dev, property, &val); if (error) { *value = default_value; return false; @@ -51,54 +51,58 @@ static void touchscreen_set_params(struct input_dev *dev, } /** - * touchscreen_parse_of_params - parse common touchscreen DT properties - * @dev: device that should be parsed + * touchscreen_parse_properties - parse common touchscreen DT properties + * @input: input device that should be parsed + * @multitouch: specifies whether parsed properties should be applied to + * single-touch or multi-touch axes * * This function parses common DT properties for touchscreens and setups the - * input device accordingly. The function keeps previously setuped default + * input device accordingly. The function keeps previously set up default * values if no value is specified via DT. */ -void touchscreen_parse_of_params(struct input_dev *dev, bool multitouch) +void touchscreen_parse_properties(struct input_dev *input, bool multitouch) { - struct device_node *np = dev->dev.parent->of_node; + struct device *dev = input->dev.parent; unsigned int axis; unsigned int maximum, fuzz; bool data_present; - input_alloc_absinfo(dev); - if (!dev->absinfo) + input_alloc_absinfo(input); + if (!input->absinfo) return; axis = multitouch ? ABS_MT_POSITION_X : ABS_X; - data_present = touchscreen_get_prop_u32(np, "touchscreen-size-x", - input_abs_get_max(dev, + data_present = touchscreen_get_prop_u32(dev, "touchscreen-size-x", + input_abs_get_max(input, axis) + 1, &maximum) | - touchscreen_get_prop_u32(np, "touchscreen-fuzz-x", - input_abs_get_fuzz(dev, axis), + touchscreen_get_prop_u32(dev, "touchscreen-fuzz-x", + input_abs_get_fuzz(input, axis), &fuzz); if (data_present) - touchscreen_set_params(dev, axis, maximum - 1, fuzz); + touchscreen_set_params(input, axis, maximum - 1, fuzz); axis = multitouch ? ABS_MT_POSITION_Y : ABS_Y; - data_present = touchscreen_get_prop_u32(np, "touchscreen-size-y", - input_abs_get_max(dev, + data_present = touchscreen_get_prop_u32(dev, "touchscreen-size-y", + input_abs_get_max(input, axis) + 1, &maximum) | - touchscreen_get_prop_u32(np, "touchscreen-fuzz-y", - input_abs_get_fuzz(dev, axis), + touchscreen_get_prop_u32(dev, "touchscreen-fuzz-y", + input_abs_get_fuzz(input, axis), &fuzz); if (data_present) - touchscreen_set_params(dev, axis, maximum - 1, fuzz); + touchscreen_set_params(input, axis, maximum - 1, fuzz); axis = multitouch ? ABS_MT_PRESSURE : ABS_PRESSURE; - data_present = touchscreen_get_prop_u32(np, "touchscreen-max-pressure", - input_abs_get_max(dev, axis), + data_present = touchscreen_get_prop_u32(dev, + "touchscreen-max-pressure", + input_abs_get_max(input, axis), &maximum) | - touchscreen_get_prop_u32(np, "touchscreen-fuzz-pressure", - input_abs_get_fuzz(dev, axis), + touchscreen_get_prop_u32(dev, + "touchscreen-fuzz-pressure", + input_abs_get_fuzz(input, axis), &fuzz); if (data_present) - touchscreen_set_params(dev, axis, maximum, fuzz); + touchscreen_set_params(input, axis, maximum, fuzz); } -EXPORT_SYMBOL(touchscreen_parse_of_params); +EXPORT_SYMBOL(touchscreen_parse_properties); diff --git a/drivers/input/touchscreen/tsc2005.c b/drivers/input/touchscreen/tsc2005.c index d8c025b0f88c..aaf947525cd9 100644 --- a/drivers/input/touchscreen/tsc2005.c +++ b/drivers/input/touchscreen/tsc2005.c @@ -709,7 +709,7 @@ static int tsc2005_probe(struct spi_device *spi) input_set_abs_params(input_dev, ABS_PRESSURE, 0, max_p, fudge_p, 0); if (np) - touchscreen_parse_of_params(input_dev, false); + touchscreen_parse_properties(input_dev, false); input_dev->open = tsc2005_open; input_dev->close = tsc2005_close; diff --git a/include/linux/input/touchscreen.h b/include/linux/input/touchscreen.h index eecc9ea6cd58..c91e1376132b 100644 --- a/include/linux/input/touchscreen.h +++ b/include/linux/input/touchscreen.h @@ -9,15 +9,8 @@ #ifndef _TOUCHSCREEN_H #define _TOUCHSCREEN_H -#include +struct input_dev; -#ifdef CONFIG_OF -void touchscreen_parse_of_params(struct input_dev *dev, bool multitouch); -#else -static inline void touchscreen_parse_of_params(struct input_dev *dev, - bool multitouch) -{ -} -#endif +void touchscreen_parse_properties(struct input_dev *dev, bool multitouch); #endif -- cgit v1.2.3-70-g09d2 From 28a74c050060c17b1edaee2d60470a33be476941 Mon Sep 17 00:00:00 2001 From: Dmitry Torokhov Date: Mon, 6 Jul 2015 11:48:47 -0700 Subject: Input: pixcir_i2c_ts - move platform data Let's move driver's platform data definitions from include/linux/input/ into include/linux/platform_data/ so that it stays with the rest of platform data definitions. Acked-by: Roger Quadros Signed-off-by: Dmitry Torokhov --- drivers/input/touchscreen/pixcir_i2c_ts.c | 2 +- include/linux/input/pixcir_ts.h | 64 ----------------------------- include/linux/platform_data/pixcir_i2c_ts.h | 64 +++++++++++++++++++++++++++++ 3 files changed, 65 insertions(+), 65 deletions(-) delete mode 100644 include/linux/input/pixcir_ts.h create mode 100644 include/linux/platform_data/pixcir_i2c_ts.h (limited to 'include/linux') diff --git a/drivers/input/touchscreen/pixcir_i2c_ts.c b/drivers/input/touchscreen/pixcir_i2c_ts.c index 2c2107147319..f7d90997a786 100644 --- a/drivers/input/touchscreen/pixcir_i2c_ts.c +++ b/drivers/input/touchscreen/pixcir_i2c_ts.c @@ -24,11 +24,11 @@ #include #include #include -#include #include #include #include #include +#include #define PIXCIR_MAX_SLOTS 5 /* Max fingers supported by driver */ diff --git a/include/linux/input/pixcir_ts.h b/include/linux/input/pixcir_ts.h deleted file mode 100644 index 7bae83b7c396..000000000000 --- a/include/linux/input/pixcir_ts.h +++ /dev/null @@ -1,64 +0,0 @@ -#ifndef _PIXCIR_I2C_TS_H -#define _PIXCIR_I2C_TS_H - -/* - * Register map - */ -#define PIXCIR_REG_POWER_MODE 51 -#define PIXCIR_REG_INT_MODE 52 - -/* - * Power modes: - * active: max scan speed - * idle: lower scan speed with automatic transition to active on touch - * halt: datasheet says sleep but this is more like halt as the chip - * clocks are cut and it can only be brought out of this mode - * using the RESET pin. - */ -enum pixcir_power_mode { - PIXCIR_POWER_ACTIVE, - PIXCIR_POWER_IDLE, - PIXCIR_POWER_HALT, -}; - -#define PIXCIR_POWER_MODE_MASK 0x03 -#define PIXCIR_POWER_ALLOW_IDLE (1UL << 2) - -/* - * Interrupt modes: - * periodical: interrupt is asserted periodicaly - * diff coordinates: interrupt is asserted when coordinates change - * level on touch: interrupt level asserted during touch - * pulse on touch: interrupt pulse asserted druing touch - * - */ -enum pixcir_int_mode { - PIXCIR_INT_PERIODICAL, - PIXCIR_INT_DIFF_COORD, - PIXCIR_INT_LEVEL_TOUCH, - PIXCIR_INT_PULSE_TOUCH, -}; - -#define PIXCIR_INT_MODE_MASK 0x03 -#define PIXCIR_INT_ENABLE (1UL << 3) -#define PIXCIR_INT_POL_HIGH (1UL << 2) - -/** - * struct pixcir_irc_chip_data - chip related data - * @max_fingers: Max number of fingers reported simultaneously by h/w - * @has_hw_ids: Hardware supports finger tracking IDs - * - */ -struct pixcir_i2c_chip_data { - u8 max_fingers; - bool has_hw_ids; -}; - -struct pixcir_ts_platform_data { - int x_max; - int y_max; - int gpio_attb; /* GPIO connected to ATTB line */ - struct pixcir_i2c_chip_data chip; -}; - -#endif diff --git a/include/linux/platform_data/pixcir_i2c_ts.h b/include/linux/platform_data/pixcir_i2c_ts.h new file mode 100644 index 000000000000..7bae83b7c396 --- /dev/null +++ b/include/linux/platform_data/pixcir_i2c_ts.h @@ -0,0 +1,64 @@ +#ifndef _PIXCIR_I2C_TS_H +#define _PIXCIR_I2C_TS_H + +/* + * Register map + */ +#define PIXCIR_REG_POWER_MODE 51 +#define PIXCIR_REG_INT_MODE 52 + +/* + * Power modes: + * active: max scan speed + * idle: lower scan speed with automatic transition to active on touch + * halt: datasheet says sleep but this is more like halt as the chip + * clocks are cut and it can only be brought out of this mode + * using the RESET pin. + */ +enum pixcir_power_mode { + PIXCIR_POWER_ACTIVE, + PIXCIR_POWER_IDLE, + PIXCIR_POWER_HALT, +}; + +#define PIXCIR_POWER_MODE_MASK 0x03 +#define PIXCIR_POWER_ALLOW_IDLE (1UL << 2) + +/* + * Interrupt modes: + * periodical: interrupt is asserted periodicaly + * diff coordinates: interrupt is asserted when coordinates change + * level on touch: interrupt level asserted during touch + * pulse on touch: interrupt pulse asserted druing touch + * + */ +enum pixcir_int_mode { + PIXCIR_INT_PERIODICAL, + PIXCIR_INT_DIFF_COORD, + PIXCIR_INT_LEVEL_TOUCH, + PIXCIR_INT_PULSE_TOUCH, +}; + +#define PIXCIR_INT_MODE_MASK 0x03 +#define PIXCIR_INT_ENABLE (1UL << 3) +#define PIXCIR_INT_POL_HIGH (1UL << 2) + +/** + * struct pixcir_irc_chip_data - chip related data + * @max_fingers: Max number of fingers reported simultaneously by h/w + * @has_hw_ids: Hardware supports finger tracking IDs + * + */ +struct pixcir_i2c_chip_data { + u8 max_fingers; + bool has_hw_ids; +}; + +struct pixcir_ts_platform_data { + int x_max; + int y_max; + int gpio_attb; /* GPIO connected to ATTB line */ + struct pixcir_i2c_chip_data chip; +}; + +#endif -- cgit v1.2.3-70-g09d2 From cb4a5f068096c0cea954f363e70020aabb3555f4 Mon Sep 17 00:00:00 2001 From: Dmitry Torokhov Date: Mon, 6 Jul 2015 11:56:21 -0700 Subject: Input: pixcir_i2c_ts - switch the device over to gpiod This allows uniform parsing on legacy, DT and ACPI systems. Acked-by: Roger Quadros Signed-off-by: Dmitry Torokhov --- drivers/input/touchscreen/pixcir_i2c_ts.c | 26 +++++++++----------------- include/linux/platform_data/pixcir_i2c_ts.h | 1 - 2 files changed, 9 insertions(+), 18 deletions(-) (limited to 'include/linux') diff --git a/drivers/input/touchscreen/pixcir_i2c_ts.c b/drivers/input/touchscreen/pixcir_i2c_ts.c index f7d90997a786..19732b573227 100644 --- a/drivers/input/touchscreen/pixcir_i2c_ts.c +++ b/drivers/input/touchscreen/pixcir_i2c_ts.c @@ -25,8 +25,8 @@ #include #include #include +#include #include -#include #include #include @@ -35,6 +35,7 @@ struct pixcir_i2c_ts_data { struct i2c_client *client; struct input_dev *input; + struct gpio_desc *gpio_attb; const struct pixcir_ts_platform_data *pdata; bool running; int max_fingers; /* Max fingers supported in this instance */ @@ -161,7 +162,6 @@ static void pixcir_ts_report(struct pixcir_i2c_ts_data *ts, static irqreturn_t pixcir_ts_isr(int irq, void *dev_id) { struct pixcir_i2c_ts_data *tsdata = dev_id; - const struct pixcir_ts_platform_data *pdata = tsdata->pdata; struct pixcir_report_data report; while (tsdata->running) { @@ -171,7 +171,7 @@ static irqreturn_t pixcir_ts_isr(int irq, void *dev_id) /* report it */ pixcir_ts_report(tsdata, &report); - if (gpio_get_value(pdata->gpio_attb)) { + if (gpiod_get_value(tsdata->gpio_attb)) { if (report.num_touches) { /* * Last report with no finger up? @@ -427,9 +427,6 @@ static struct pixcir_ts_platform_data *pixcir_parse_dt(struct device *dev) pdata->chip = *(const struct pixcir_i2c_chip_data *)match->data; - pdata->gpio_attb = of_get_named_gpio(np, "attb-gpio", 0); - /* gpio_attb validity is checked in probe */ - if (of_property_read_u32(np, "touchscreen-size-x", &pdata->x_max)) { dev_err(dev, "Failed to get touchscreen-size-x property\n"); return ERR_PTR(-EINVAL); @@ -442,8 +439,8 @@ static struct pixcir_ts_platform_data *pixcir_parse_dt(struct device *dev) } pdata->y_max -= 1; - dev_dbg(dev, "%s: x %d, y %d, gpio %d\n", __func__, - pdata->x_max + 1, pdata->y_max + 1, pdata->gpio_attb); + dev_dbg(dev, "%s: x %d, y %d\n", __func__, + pdata->x_max + 1, pdata->y_max + 1); return pdata; } @@ -476,11 +473,6 @@ static int pixcir_i2c_ts_probe(struct i2c_client *client, return -EINVAL; } - if (!gpio_is_valid(pdata->gpio_attb)) { - dev_err(dev, "Invalid gpio_attb in pdata\n"); - return -EINVAL; - } - if (!pdata->chip.max_fingers) { dev_err(dev, "Invalid max_fingers in pdata\n"); return -EINVAL; @@ -530,10 +522,10 @@ static int pixcir_i2c_ts_probe(struct i2c_client *client, input_set_drvdata(input, tsdata); - error = devm_gpio_request_one(dev, pdata->gpio_attb, - GPIOF_DIR_IN, "pixcir_i2c_attb"); - if (error) { - dev_err(dev, "Failed to request ATTB gpio\n"); + tsdata->gpio_attb = devm_gpiod_get(dev, "attb", GPIOD_IN); + if (IS_ERR(tsdata->gpio_attb)) { + error = PTR_ERR(tsdata->gpio_attb); + dev_err(dev, "Failed to request ATTB gpio: %d\n", error); return error; } diff --git a/include/linux/platform_data/pixcir_i2c_ts.h b/include/linux/platform_data/pixcir_i2c_ts.h index 7bae83b7c396..646af6f8b838 100644 --- a/include/linux/platform_data/pixcir_i2c_ts.h +++ b/include/linux/platform_data/pixcir_i2c_ts.h @@ -57,7 +57,6 @@ struct pixcir_i2c_chip_data { struct pixcir_ts_platform_data { int x_max; int y_max; - int gpio_attb; /* GPIO connected to ATTB line */ struct pixcir_i2c_chip_data chip; }; -- cgit v1.2.3-70-g09d2 From d3b58c47d330de8c29898fe9746f7530408f8a59 Mon Sep 17 00:00:00 2001 From: Oliver Hartkopp Date: Fri, 26 Jun 2015 11:58:19 +0200 Subject: can: replace timestamp as unique skb attribute Commit 514ac99c64b "can: fix multiple delivery of a single CAN frame for overlapping CAN filters" requires the skb->tstamp to be set to check for identical CAN skbs. Without timestamping to be required by user space applications this timestamp was not generated which lead to commit 36c01245eb8 "can: fix loss of CAN frames in raw_rcv" - which forces the timestamp to be set in all CAN related skbuffs by introducing several __net_timestamp() calls. This forces e.g. out of tree drivers which are not using alloc_can{,fd}_skb() to add __net_timestamp() after skbuff creation to prevent the frame loss fixed in mainline Linux. This patch removes the timestamp dependency and uses an atomic counter to create an unique identifier together with the skbuff pointer. Btw: the new skbcnt element introduced in struct can_skb_priv has to be initialized with zero in out-of-tree drivers which are not using alloc_can{,fd}_skb() too. Signed-off-by: Oliver Hartkopp Cc: linux-stable Signed-off-by: Marc Kleine-Budde --- drivers/net/can/dev.c | 7 ++----- drivers/net/can/slcan.c | 2 +- drivers/net/can/vcan.c | 3 --- include/linux/can/skb.h | 2 ++ net/can/af_can.c | 12 +++++++----- net/can/bcm.c | 2 ++ net/can/raw.c | 7 ++++--- 7 files changed, 18 insertions(+), 17 deletions(-) (limited to 'include/linux') diff --git a/drivers/net/can/dev.c b/drivers/net/can/dev.c index e9b1810d319f..aede704605c6 100644 --- a/drivers/net/can/dev.c +++ b/drivers/net/can/dev.c @@ -440,9 +440,6 @@ unsigned int can_get_echo_skb(struct net_device *dev, unsigned int idx) struct can_frame *cf = (struct can_frame *)skb->data; u8 dlc = cf->can_dlc; - if (!(skb->tstamp.tv64)) - __net_timestamp(skb); - netif_rx(priv->echo_skb[idx]); priv->echo_skb[idx] = NULL; @@ -578,7 +575,6 @@ struct sk_buff *alloc_can_skb(struct net_device *dev, struct can_frame **cf) if (unlikely(!skb)) return NULL; - __net_timestamp(skb); skb->protocol = htons(ETH_P_CAN); skb->pkt_type = PACKET_BROADCAST; skb->ip_summed = CHECKSUM_UNNECESSARY; @@ -589,6 +585,7 @@ struct sk_buff *alloc_can_skb(struct net_device *dev, struct can_frame **cf) can_skb_reserve(skb); can_skb_prv(skb)->ifindex = dev->ifindex; + can_skb_prv(skb)->skbcnt = 0; *cf = (struct can_frame *)skb_put(skb, sizeof(struct can_frame)); memset(*cf, 0, sizeof(struct can_frame)); @@ -607,7 +604,6 @@ struct sk_buff *alloc_canfd_skb(struct net_device *dev, if (unlikely(!skb)) return NULL; - __net_timestamp(skb); skb->protocol = htons(ETH_P_CANFD); skb->pkt_type = PACKET_BROADCAST; skb->ip_summed = CHECKSUM_UNNECESSARY; @@ -618,6 +614,7 @@ struct sk_buff *alloc_canfd_skb(struct net_device *dev, can_skb_reserve(skb); can_skb_prv(skb)->ifindex = dev->ifindex; + can_skb_prv(skb)->skbcnt = 0; *cfd = (struct canfd_frame *)skb_put(skb, sizeof(struct canfd_frame)); memset(*cfd, 0, sizeof(struct canfd_frame)); diff --git a/drivers/net/can/slcan.c b/drivers/net/can/slcan.c index f64f5290d6f8..a23a7af8eb9a 100644 --- a/drivers/net/can/slcan.c +++ b/drivers/net/can/slcan.c @@ -207,7 +207,6 @@ static void slc_bump(struct slcan *sl) if (!skb) return; - __net_timestamp(skb); skb->dev = sl->dev; skb->protocol = htons(ETH_P_CAN); skb->pkt_type = PACKET_BROADCAST; @@ -215,6 +214,7 @@ static void slc_bump(struct slcan *sl) can_skb_reserve(skb); can_skb_prv(skb)->ifindex = sl->dev->ifindex; + can_skb_prv(skb)->skbcnt = 0; memcpy(skb_put(skb, sizeof(struct can_frame)), &cf, sizeof(struct can_frame)); diff --git a/drivers/net/can/vcan.c b/drivers/net/can/vcan.c index 0ce868de855d..674f367087c5 100644 --- a/drivers/net/can/vcan.c +++ b/drivers/net/can/vcan.c @@ -78,9 +78,6 @@ static void vcan_rx(struct sk_buff *skb, struct net_device *dev) skb->dev = dev; skb->ip_summed = CHECKSUM_UNNECESSARY; - if (!(skb->tstamp.tv64)) - __net_timestamp(skb); - netif_rx_ni(skb); } diff --git a/include/linux/can/skb.h b/include/linux/can/skb.h index b6a52a4b457a..51bb6532785c 100644 --- a/include/linux/can/skb.h +++ b/include/linux/can/skb.h @@ -27,10 +27,12 @@ /** * struct can_skb_priv - private additional data inside CAN sk_buffs * @ifindex: ifindex of the first interface the CAN frame appeared on + * @skbcnt: atomic counter to have an unique id together with skb pointer * @cf: align to the following CAN frame at skb->data */ struct can_skb_priv { int ifindex; + int skbcnt; struct can_frame cf[0]; }; diff --git a/net/can/af_can.c b/net/can/af_can.c index 7933e62a7318..166d436196c1 100644 --- a/net/can/af_can.c +++ b/net/can/af_can.c @@ -89,6 +89,8 @@ struct timer_list can_stattimer; /* timer for statistics update */ struct s_stats can_stats; /* packet statistics */ struct s_pstats can_pstats; /* receive list statistics */ +static atomic_t skbcounter = ATOMIC_INIT(0); + /* * af_can socket functions */ @@ -310,12 +312,8 @@ int can_send(struct sk_buff *skb, int loop) return err; } - if (newskb) { - if (!(newskb->tstamp.tv64)) - __net_timestamp(newskb); - + if (newskb) netif_rx_ni(newskb); - } /* update statistics */ can_stats.tx_frames++; @@ -683,6 +681,10 @@ static void can_receive(struct sk_buff *skb, struct net_device *dev) can_stats.rx_frames++; can_stats.rx_frames_delta++; + /* create non-zero unique skb identifier together with *skb */ + while (!(can_skb_prv(skb)->skbcnt)) + can_skb_prv(skb)->skbcnt = atomic_inc_return(&skbcounter); + rcu_read_lock(); /* deliver the packet to sockets listening on all devices */ diff --git a/net/can/bcm.c b/net/can/bcm.c index b523453585be..a1ba6875c2a2 100644 --- a/net/can/bcm.c +++ b/net/can/bcm.c @@ -261,6 +261,7 @@ static void bcm_can_tx(struct bcm_op *op) can_skb_reserve(skb); can_skb_prv(skb)->ifindex = dev->ifindex; + can_skb_prv(skb)->skbcnt = 0; memcpy(skb_put(skb, CFSIZ), cf, CFSIZ); @@ -1217,6 +1218,7 @@ static int bcm_tx_send(struct msghdr *msg, int ifindex, struct sock *sk) } can_skb_prv(skb)->ifindex = dev->ifindex; + can_skb_prv(skb)->skbcnt = 0; skb->dev = dev; can_skb_set_owner(skb, sk); err = can_send(skb, 1); /* send with loopback */ diff --git a/net/can/raw.c b/net/can/raw.c index 31b9748cbb4e..2e67b1423cd3 100644 --- a/net/can/raw.c +++ b/net/can/raw.c @@ -75,7 +75,7 @@ MODULE_ALIAS("can-proto-1"); */ struct uniqframe { - ktime_t tstamp; + int skbcnt; const struct sk_buff *skb; unsigned int join_rx_count; }; @@ -133,7 +133,7 @@ static void raw_rcv(struct sk_buff *oskb, void *data) /* eliminate multiple filter matches for the same skb */ if (this_cpu_ptr(ro->uniq)->skb == oskb && - ktime_equal(this_cpu_ptr(ro->uniq)->tstamp, oskb->tstamp)) { + this_cpu_ptr(ro->uniq)->skbcnt == can_skb_prv(oskb)->skbcnt) { if (ro->join_filters) { this_cpu_inc(ro->uniq->join_rx_count); /* drop frame until all enabled filters matched */ @@ -144,7 +144,7 @@ static void raw_rcv(struct sk_buff *oskb, void *data) } } else { this_cpu_ptr(ro->uniq)->skb = oskb; - this_cpu_ptr(ro->uniq)->tstamp = oskb->tstamp; + this_cpu_ptr(ro->uniq)->skbcnt = can_skb_prv(oskb)->skbcnt; this_cpu_ptr(ro->uniq)->join_rx_count = 1; /* drop first frame to check all enabled filters? */ if (ro->join_filters && ro->count > 1) @@ -749,6 +749,7 @@ static int raw_sendmsg(struct socket *sock, struct msghdr *msg, size_t size) can_skb_reserve(skb); can_skb_prv(skb)->ifindex = dev->ifindex; + can_skb_prv(skb)->skbcnt = 0; err = memcpy_from_msg(skb_put(skb, size), msg, size); if (err < 0) -- cgit v1.2.3-70-g09d2 From 29d01b22eaa18d8b46091d3c98c6001c49f78e4a Mon Sep 17 00:00:00 2001 From: Jeff Layton Date: Sat, 11 Jul 2015 06:43:02 -0400 Subject: locks: new helpers - flock_lock_inode_wait and posix_lock_inode_wait Allow callers to pass in an inode instead of a filp. Signed-off-by: Jeff Layton Reviewed-by: "J. Bruce Fields" Tested-by: "J. Bruce Fields" --- fs/locks.c | 50 ++++++++++++++++++++++++++++++++++++++------------ include/linux/fs.h | 14 ++++++++++++++ 2 files changed, 52 insertions(+), 12 deletions(-) (limited to 'include/linux') diff --git a/fs/locks.c b/fs/locks.c index 4366b7c54e6d..ba268a503c1b 100644 --- a/fs/locks.c +++ b/fs/locks.c @@ -1163,20 +1163,19 @@ int posix_lock_file(struct file *filp, struct file_lock *fl, EXPORT_SYMBOL(posix_lock_file); /** - * posix_lock_file_wait - Apply a POSIX-style lock to a file - * @filp: The file to apply the lock to + * posix_lock_inode_wait - Apply a POSIX-style lock to a file + * @inode: inode of file to which lock request should be applied * @fl: The lock to be applied * - * Add a POSIX style lock to a file. - * We merge adjacent & overlapping locks whenever possible. - * POSIX locks are sorted by owner task, then by starting address + * Variant of posix_lock_file_wait that does not take a filp, and so can be + * used after the filp has already been torn down. */ -int posix_lock_file_wait(struct file *filp, struct file_lock *fl) +int posix_lock_inode_wait(struct inode *inode, struct file_lock *fl) { int error; might_sleep (); for (;;) { - error = posix_lock_file(filp, fl, NULL); + error = __posix_lock_file(inode, fl, NULL); if (error != FILE_LOCK_DEFERRED) break; error = wait_event_interruptible(fl->fl_wait, !fl->fl_next); @@ -1188,6 +1187,21 @@ int posix_lock_file_wait(struct file *filp, struct file_lock *fl) } return error; } +EXPORT_SYMBOL(posix_lock_inode_wait); + +/** + * posix_lock_file_wait - Apply a POSIX-style lock to a file + * @filp: The file to apply the lock to + * @fl: The lock to be applied + * + * Add a POSIX style lock to a file. + * We merge adjacent & overlapping locks whenever possible. + * POSIX locks are sorted by owner task, then by starting address + */ +int posix_lock_file_wait(struct file *filp, struct file_lock *fl) +{ + return posix_lock_inode_wait(file_inode(filp), fl); +} EXPORT_SYMBOL(posix_lock_file_wait); /** @@ -1850,18 +1864,18 @@ int fcntl_setlease(unsigned int fd, struct file *filp, long arg) } /** - * flock_lock_file_wait - Apply a FLOCK-style lock to a file - * @filp: The file to apply the lock to + * flock_lock_inode_wait - Apply a FLOCK-style lock to a file + * @inode: inode of the file to apply to * @fl: The lock to be applied * - * Add a FLOCK style lock to a file. + * Apply a FLOCK style lock request to an inode. */ -int flock_lock_file_wait(struct file *filp, struct file_lock *fl) +int flock_lock_inode_wait(struct inode *inode, struct file_lock *fl) { int error; might_sleep(); for (;;) { - error = flock_lock_inode(file_inode(filp), fl); + error = flock_lock_inode(inode, fl); if (error != FILE_LOCK_DEFERRED) break; error = wait_event_interruptible(fl->fl_wait, !fl->fl_next); @@ -1873,7 +1887,19 @@ int flock_lock_file_wait(struct file *filp, struct file_lock *fl) } return error; } +EXPORT_SYMBOL(flock_lock_inode_wait); +/** + * flock_lock_file_wait - Apply a FLOCK-style lock to a file + * @filp: The file to apply the lock to + * @fl: The lock to be applied + * + * Add a FLOCK style lock to a file. + */ +int flock_lock_file_wait(struct file *filp, struct file_lock *fl) +{ + return flock_lock_inode_wait(file_inode(filp), fl); +} EXPORT_SYMBOL(flock_lock_file_wait); /** diff --git a/include/linux/fs.h b/include/linux/fs.h index a0653e560c26..4c990edd1377 100644 --- a/include/linux/fs.h +++ b/include/linux/fs.h @@ -1046,11 +1046,13 @@ extern void locks_remove_file(struct file *); extern void locks_release_private(struct file_lock *); extern void posix_test_lock(struct file *, struct file_lock *); extern int posix_lock_file(struct file *, struct file_lock *, struct file_lock *); +extern int posix_lock_inode_wait(struct inode *, struct file_lock *); extern int posix_lock_file_wait(struct file *, struct file_lock *); extern int posix_unblock_lock(struct file_lock *); extern int vfs_test_lock(struct file *, struct file_lock *); extern int vfs_lock_file(struct file *, unsigned int, struct file_lock *, struct file_lock *); extern int vfs_cancel_lock(struct file *filp, struct file_lock *fl); +extern int flock_lock_inode_wait(struct inode *inode, struct file_lock *fl); extern int flock_lock_file_wait(struct file *filp, struct file_lock *fl); extern int __break_lease(struct inode *inode, unsigned int flags, unsigned int type); extern void lease_get_mtime(struct inode *, struct timespec *time); @@ -1137,6 +1139,12 @@ static inline int posix_lock_file(struct file *filp, struct file_lock *fl, return -ENOLCK; } +static inline int posix_lock_inode_wait(struct inode *inode, + struct file_lock *fl) +{ + return -ENOLCK; +} + static inline int posix_lock_file_wait(struct file *filp, struct file_lock *fl) { return -ENOLCK; @@ -1163,6 +1171,12 @@ static inline int vfs_cancel_lock(struct file *filp, struct file_lock *fl) return 0; } +static inline int flock_lock_inode_wait(struct inode *inode, + struct file_lock *request) +{ + return -ENOLCK; +} + static inline int flock_lock_file_wait(struct file *filp, struct file_lock *request) { -- cgit v1.2.3-70-g09d2 From ee296d7c5709440f8abd36b5b65c6b3e388538d9 Mon Sep 17 00:00:00 2001 From: Jeff Layton Date: Sat, 11 Jul 2015 06:43:03 -0400 Subject: locks: inline posix_lock_file_wait and flock_lock_file_wait They just call file_inode and then the corresponding *_inode_file_wait function. Just make them static inlines instead. Signed-off-by: Jeff Layton --- fs/locks.c | 28 ---------------------------- include/linux/fs.h | 32 ++++++++++++++------------------ 2 files changed, 14 insertions(+), 46 deletions(-) (limited to 'include/linux') diff --git a/fs/locks.c b/fs/locks.c index ba268a503c1b..d3d558ba4da7 100644 --- a/fs/locks.c +++ b/fs/locks.c @@ -1189,21 +1189,6 @@ int posix_lock_inode_wait(struct inode *inode, struct file_lock *fl) } EXPORT_SYMBOL(posix_lock_inode_wait); -/** - * posix_lock_file_wait - Apply a POSIX-style lock to a file - * @filp: The file to apply the lock to - * @fl: The lock to be applied - * - * Add a POSIX style lock to a file. - * We merge adjacent & overlapping locks whenever possible. - * POSIX locks are sorted by owner task, then by starting address - */ -int posix_lock_file_wait(struct file *filp, struct file_lock *fl) -{ - return posix_lock_inode_wait(file_inode(filp), fl); -} -EXPORT_SYMBOL(posix_lock_file_wait); - /** * locks_mandatory_locked - Check for an active lock * @file: the file to check @@ -1889,19 +1874,6 @@ int flock_lock_inode_wait(struct inode *inode, struct file_lock *fl) } EXPORT_SYMBOL(flock_lock_inode_wait); -/** - * flock_lock_file_wait - Apply a FLOCK-style lock to a file - * @filp: The file to apply the lock to - * @fl: The lock to be applied - * - * Add a FLOCK style lock to a file. - */ -int flock_lock_file_wait(struct file *filp, struct file_lock *fl) -{ - return flock_lock_inode_wait(file_inode(filp), fl); -} -EXPORT_SYMBOL(flock_lock_file_wait); - /** * sys_flock: - flock() system call. * @fd: the file descriptor to lock. diff --git a/include/linux/fs.h b/include/linux/fs.h index 4c990edd1377..cc008c338f5a 100644 --- a/include/linux/fs.h +++ b/include/linux/fs.h @@ -1047,13 +1047,11 @@ extern void locks_release_private(struct file_lock *); extern void posix_test_lock(struct file *, struct file_lock *); extern int posix_lock_file(struct file *, struct file_lock *, struct file_lock *); extern int posix_lock_inode_wait(struct inode *, struct file_lock *); -extern int posix_lock_file_wait(struct file *, struct file_lock *); extern int posix_unblock_lock(struct file_lock *); extern int vfs_test_lock(struct file *, struct file_lock *); extern int vfs_lock_file(struct file *, unsigned int, struct file_lock *, struct file_lock *); extern int vfs_cancel_lock(struct file *filp, struct file_lock *fl); extern int flock_lock_inode_wait(struct inode *inode, struct file_lock *fl); -extern int flock_lock_file_wait(struct file *filp, struct file_lock *fl); extern int __break_lease(struct inode *inode, unsigned int flags, unsigned int type); extern void lease_get_mtime(struct inode *, struct timespec *time); extern int generic_setlease(struct file *, long, struct file_lock **, void **priv); @@ -1145,11 +1143,6 @@ static inline int posix_lock_inode_wait(struct inode *inode, return -ENOLCK; } -static inline int posix_lock_file_wait(struct file *filp, struct file_lock *fl) -{ - return -ENOLCK; -} - static inline int posix_unblock_lock(struct file_lock *waiter) { return -ENOENT; @@ -1177,12 +1170,6 @@ static inline int flock_lock_inode_wait(struct inode *inode, return -ENOLCK; } -static inline int flock_lock_file_wait(struct file *filp, - struct file_lock *request) -{ - return -ENOLCK; -} - static inline int __break_lease(struct inode *inode, unsigned int mode, unsigned int type) { return 0; @@ -1216,6 +1203,20 @@ static inline void show_fd_locks(struct seq_file *f, struct file *filp, struct files_struct *files) {} #endif /* !CONFIG_FILE_LOCKING */ +static inline struct inode *file_inode(const struct file *f) +{ + return f->f_inode; +} + +static inline int posix_lock_file_wait(struct file *filp, struct file_lock *fl) +{ + return posix_lock_inode_wait(file_inode(filp), fl); +} + +static inline int flock_lock_file_wait(struct file *filp, struct file_lock *fl) +{ + return flock_lock_inode_wait(file_inode(filp), fl); +} struct fasync_struct { spinlock_t fa_lock; @@ -2025,11 +2026,6 @@ extern void ihold(struct inode * inode); extern void iput(struct inode *); extern int generic_update_time(struct inode *, struct timespec *, int); -static inline struct inode *file_inode(const struct file *f) -{ - return f->f_inode; -} - /* /sys/fs */ extern struct kobject *fs_kobj; -- cgit v1.2.3-70-g09d2 From 671a2781ff01abf4fdc8904881fc3abd3a8279af Mon Sep 17 00:00:00 2001 From: Jeff Vander Stoep Date: Fri, 10 Jul 2015 17:19:55 -0400 Subject: security: add ioctl specific auditing to lsm_audit Add information about ioctl calls to the LSM audit data. Log the file path and command number. Signed-off-by: Jeff Vander Stoep Acked-by: Nick Kralevich [PM: subject line tweak] Signed-off-by: Paul Moore --- include/linux/lsm_audit.h | 7 +++++++ security/lsm_audit.c | 15 +++++++++++++++ 2 files changed, 22 insertions(+) (limited to 'include/linux') diff --git a/include/linux/lsm_audit.h b/include/linux/lsm_audit.h index 1cc89e9df480..ffb9c9da4f39 100644 --- a/include/linux/lsm_audit.h +++ b/include/linux/lsm_audit.h @@ -40,6 +40,11 @@ struct lsm_network_audit { } fam; }; +struct lsm_ioctlop_audit { + struct path path; + u16 cmd; +}; + /* Auxiliary data to use in generating the audit record. */ struct common_audit_data { char type; @@ -53,6 +58,7 @@ struct common_audit_data { #define LSM_AUDIT_DATA_KMOD 8 #define LSM_AUDIT_DATA_INODE 9 #define LSM_AUDIT_DATA_DENTRY 10 +#define LSM_AUDIT_DATA_IOCTL_OP 11 union { struct path path; struct dentry *dentry; @@ -68,6 +74,7 @@ struct common_audit_data { } key_struct; #endif char *kmod_name; + struct lsm_ioctlop_audit *op; } u; /* this union contains LSM specific data */ union { diff --git a/security/lsm_audit.c b/security/lsm_audit.c index 1d34277dc402..9f6c649c65e9 100644 --- a/security/lsm_audit.c +++ b/security/lsm_audit.c @@ -245,6 +245,21 @@ static void dump_common_audit_data(struct audit_buffer *ab, } break; } + case LSM_AUDIT_DATA_IOCTL_OP: { + struct inode *inode; + + audit_log_d_path(ab, " path=", &a->u.op->path); + + inode = a->u.op->path.dentry->d_inode; + if (inode) { + audit_log_format(ab, " dev="); + audit_log_untrustedstring(ab, inode->i_sb->s_id); + audit_log_format(ab, " ino=%lu", inode->i_ino); + } + + audit_log_format(ab, " ioctlcmd=%hx", a->u.op->cmd); + break; + } case LSM_AUDIT_DATA_DENTRY: { struct inode *inode; -- cgit v1.2.3-70-g09d2 From 7bd393543287b921f964a350166bf2866527a1b5 Mon Sep 17 00:00:00 2001 From: James Ban Date: Tue, 30 Jun 2015 13:39:39 +0900 Subject: regulator: da9211: support da9215 This is a patch for supporting da9215 buck converter. Signed-off-by: James Ban Signed-off-by: Mark Brown --- .../devicetree/bindings/regulator/da9211.txt | 32 +++++++++++++++-- drivers/regulator/Kconfig | 6 ++-- drivers/regulator/da9211-regulator.c | 40 ++++++++++++++++------ drivers/regulator/da9211-regulator.h | 18 +++++----- include/linux/regulator/da9211.h | 19 +++++----- 5 files changed, 81 insertions(+), 34 deletions(-) (limited to 'include/linux') diff --git a/Documentation/devicetree/bindings/regulator/da9211.txt b/Documentation/devicetree/bindings/regulator/da9211.txt index eb618907c7de..c620493e8dbe 100644 --- a/Documentation/devicetree/bindings/regulator/da9211.txt +++ b/Documentation/devicetree/bindings/regulator/da9211.txt @@ -1,7 +1,7 @@ -* Dialog Semiconductor DA9211/DA9213 Voltage Regulator +* Dialog Semiconductor DA9211/DA9213/DA9215 Voltage Regulator Required properties: -- compatible: "dlg,da9211" or "dlg,da9213". +- compatible: "dlg,da9211" or "dlg,da9213" or "dlg,da9215" - reg: I2C slave address, usually 0x68. - interrupts: the interrupt outputs of the controller - regulators: A node that houses a sub-node for each regulator within the @@ -66,3 +66,31 @@ Example 2) DA9213 }; }; }; + + +Example 3) DA9215 + pmic: da9215@68 { + compatible = "dlg,da9215"; + reg = <0x68>; + interrupts = <3 27>; + + regulators { + BUCKA { + regulator-name = "VBUCKA"; + regulator-min-microvolt = < 300000>; + regulator-max-microvolt = <1570000>; + regulator-min-microamp = <4000000>; + regulator-max-microamp = <7000000>; + enable-gpios = <&gpio 27 0>; + }; + BUCKB { + regulator-name = "VBUCKB"; + regulator-min-microvolt = < 300000>; + regulator-max-microvolt = <1570000>; + regulator-min-microamp = <4000000>; + regulator-max-microamp = <7000000>; + enable-gpios = <&gpio 17 0>; + }; + }; + }; + diff --git a/drivers/regulator/Kconfig b/drivers/regulator/Kconfig index bef3bde6971b..23496da101de 100644 --- a/drivers/regulator/Kconfig +++ b/drivers/regulator/Kconfig @@ -209,13 +209,13 @@ config REGULATOR_DA9210 interface. config REGULATOR_DA9211 - tristate "Dialog Semiconductor DA9211/DA9212/DA9213/DA9214 regulator" + tristate "Dialog Semiconductor DA9211/DA9212/DA9213/DA9214/DA9215 regulator" depends on I2C select REGMAP_I2C help Say y here to support for the Dialog Semiconductor DA9211/DA9212 - /DA9213/DA9214. - The DA9211/DA9212/DA9213/DA9214 is a multi-phase synchronous + /DA9213/DA9214/DA9215. + The DA9211/DA9212/DA9213/DA9214/DA9215 is a multi-phase synchronous step down converter 12A or 16A DC-DC Buck controlled through an I2C interface. diff --git a/drivers/regulator/da9211-regulator.c b/drivers/regulator/da9211-regulator.c index df79e4b1946e..0858100d2d03 100644 --- a/drivers/regulator/da9211-regulator.c +++ b/drivers/regulator/da9211-regulator.c @@ -1,6 +1,6 @@ /* - * da9211-regulator.c - Regulator device driver for DA9211/DA9213 - * Copyright (C) 2014 Dialog Semiconductor Ltd. + * da9211-regulator.c - Regulator device driver for DA9211/DA9213/DA9215 + * Copyright (C) 2015 Dialog Semiconductor Ltd. * * This library is free software; you can redistribute it and/or * modify it under the terms of the GNU Library General Public @@ -32,6 +32,7 @@ /* DEVICE IDs */ #define DA9211_DEVICE_ID 0x22 #define DA9213_DEVICE_ID 0x23 +#define DA9215_DEVICE_ID 0x24 #define DA9211_BUCK_MODE_SLEEP 1 #define DA9211_BUCK_MODE_SYNC 2 @@ -90,6 +91,13 @@ static const int da9213_current_limits[] = { 3000000, 3200000, 3400000, 3600000, 3800000, 4000000, 4200000, 4400000, 4600000, 4800000, 5000000, 5200000, 5400000, 5600000, 5800000, 6000000 }; +/* Current limits for DA9215 buck (uA) indices + * corresponds with register values + */ +static const int da9215_current_limits[] = { + 4000000, 4200000, 4400000, 4600000, 4800000, 5000000, 5200000, 5400000, + 5600000, 5800000, 6000000, 6200000, 6400000, 6600000, 6800000, 7000000 +}; static unsigned int da9211_buck_get_mode(struct regulator_dev *rdev) { @@ -157,6 +165,10 @@ static int da9211_set_current_limit(struct regulator_dev *rdev, int min, current_limits = da9213_current_limits; max_size = ARRAY_SIZE(da9213_current_limits)-1; break; + case DA9215: + current_limits = da9215_current_limits; + max_size = ARRAY_SIZE(da9215_current_limits)-1; + break; default: return -EINVAL; } @@ -189,6 +201,9 @@ static int da9211_get_current_limit(struct regulator_dev *rdev) case DA9213: current_limits = da9213_current_limits; break; + case DA9215: + current_limits = da9215_current_limits; + break; default: return -EINVAL; } @@ -350,13 +365,11 @@ static int da9211_regulator_init(struct da9211 *chip) /* If configuration for 1/2 bucks is different between platform data * and the register, driver should exit. */ - if ((chip->pdata->num_buck == 2 && data == 0x40) - || (chip->pdata->num_buck == 1 && data == 0x00)) { - if (data == 0) - chip->num_regulator = 1; - else - chip->num_regulator = 2; - } else { + if (chip->pdata->num_buck == 1 && data == 0x00) + chip->num_regulator = 1; + else if (chip->pdata->num_buck == 2 && data != 0x00) + chip->num_regulator = 2; + else { dev_err(chip->dev, "Configuration is mismatched\n"); return -EINVAL; } @@ -438,6 +451,9 @@ static int da9211_i2c_probe(struct i2c_client *i2c, case DA9213_DEVICE_ID: chip->chip_id = DA9213; break; + case DA9215_DEVICE_ID: + chip->chip_id = DA9215; + break; default: dev_err(chip->dev, "Unsupported device id = 0x%x.\n", data); return -ENODEV; @@ -478,6 +494,7 @@ static int da9211_i2c_probe(struct i2c_client *i2c, static const struct i2c_device_id da9211_i2c_id[] = { {"da9211", DA9211}, {"da9213", DA9213}, + {"da9215", DA9215}, {}, }; MODULE_DEVICE_TABLE(i2c, da9211_i2c_id); @@ -486,6 +503,7 @@ MODULE_DEVICE_TABLE(i2c, da9211_i2c_id); static const struct of_device_id da9211_dt_ids[] = { { .compatible = "dlg,da9211", .data = &da9211_i2c_id[0] }, { .compatible = "dlg,da9213", .data = &da9211_i2c_id[1] }, + { .compatible = "dlg,da9215", .data = &da9211_i2c_id[2] }, {}, }; MODULE_DEVICE_TABLE(of, da9211_dt_ids); @@ -504,5 +522,5 @@ static struct i2c_driver da9211_regulator_driver = { module_i2c_driver(da9211_regulator_driver); MODULE_AUTHOR("James Ban "); -MODULE_DESCRIPTION("Regulator device driver for Dialog DA9211/DA9213"); -MODULE_LICENSE("GPL v2"); +MODULE_DESCRIPTION("Regulator device driver for Dialog DA9211/DA9213/DA9215"); +MODULE_LICENSE("GPL"); diff --git a/drivers/regulator/da9211-regulator.h b/drivers/regulator/da9211-regulator.h index 93fa9df2721c..d6ad96fc64d3 100644 --- a/drivers/regulator/da9211-regulator.h +++ b/drivers/regulator/da9211-regulator.h @@ -1,16 +1,16 @@ /* - * da9211-regulator.h - Regulator definitions for DA9211/DA9213 - * Copyright (C) 2014 Dialog Semiconductor Ltd. + * da9211-regulator.h - Regulator definitions for DA9211/DA9213/DA9215 + * Copyright (C) 2015 Dialog Semiconductor Ltd. * - * This library is free software; you can redistribute it and/or - * modify it under the terms of the GNU Library General Public - * License as published by the Free Software Foundation; either - * version 2 of the License, or (at your option) any later version. + * This program is free software; you can redistribute it and/or + * modify it under the terms of the GNU General Public License + * as published by the Free Software Foundation; either version 2 + * of the License, or (at your option) any later version. * - * This library is distributed in the hope that it will be useful, + * This program is distributed in the hope that it will be useful, * but WITHOUT ANY WARRANTY; without even the implied warranty of - * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU - * Library General Public License for more details. + * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the + * GNU General Public License for more details. */ #ifndef __DA9211_REGISTERS_H__ diff --git a/include/linux/regulator/da9211.h b/include/linux/regulator/da9211.h index 5dd65acc2a69..a43a5ca1167b 100644 --- a/include/linux/regulator/da9211.h +++ b/include/linux/regulator/da9211.h @@ -1,16 +1,16 @@ /* - * da9211.h - Regulator device driver for DA9211/DA9213 - * Copyright (C) 2014 Dialog Semiconductor Ltd. + * da9211.h - Regulator device driver for DA9211/DA9213/DA9215 + * Copyright (C) 2015 Dialog Semiconductor Ltd. * - * This library is free software; you can redistribute it and/or - * modify it under the terms of the GNU Library General Public - * License as published by the Free Software Foundation; either - * version 2 of the License, or (at your option) any later version. + * This program is free software; you can redistribute it and/or + * modify it under the terms of the GNU General Public License + * as published by the Free Software Foundation; either version 2 + * of the License, or (at your option) any later version. * - * This library is distributed in the hope that it will be useful, + * This program is distributed in the hope that it will be useful, * but WITHOUT ANY WARRANTY; without even the implied warranty of - * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU - * Library General Public License for more details. + * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the + * GNU General Public License for more details. */ #ifndef __LINUX_REGULATOR_DA9211_H @@ -23,6 +23,7 @@ enum da9211_chip_id { DA9211, DA9213, + DA9215, }; struct da9211_pdata { -- cgit v1.2.3-70-g09d2 From 7e47682ea555e7c1edef1d8fd96e2aa4c12abe59 Mon Sep 17 00:00:00 2001 From: Aleksa Sarai Date: Tue, 9 Jun 2015 21:32:09 +1000 Subject: cgroup: allow a cgroup subsystem to reject a fork Add a new cgroup subsystem callback can_fork that conditionally states whether or not the fork is accepted or rejected by a cgroup policy. In addition, add a cancel_fork callback so that if an error occurs later in the forking process, any state modified by can_fork can be reverted. Allow for a private opaque pointer to be passed from cgroup_can_fork to cgroup_post_fork, allowing for the fork state to be stored by each subsystem separately. Also add a tagging system for cgroup_subsys.h to allow for CGROUP_ enumerations to be be defined and used. In addition, explicitly add a CGROUP_CANFORK_COUNT macro to make arrays easier to define. This is in preparation for implementing the pids cgroup subsystem. Signed-off-by: Aleksa Sarai Signed-off-by: Tejun Heo --- include/linux/cgroup-defs.h | 10 +++++- include/linux/cgroup.h | 15 +++++++-- include/linux/cgroup_subsys.h | 23 ++++++++++++++ kernel/cgroup.c | 73 +++++++++++++++++++++++++++++++++++++++++-- kernel/cgroup_freezer.c | 2 +- kernel/fork.c | 17 ++++++++-- kernel/sched/core.c | 2 +- 7 files changed, 133 insertions(+), 9 deletions(-) (limited to 'include/linux') diff --git a/include/linux/cgroup-defs.h b/include/linux/cgroup-defs.h index 93755a629299..83e37d8c4d80 100644 --- a/include/linux/cgroup-defs.h +++ b/include/linux/cgroup-defs.h @@ -34,12 +34,17 @@ struct seq_file; /* define the enumeration of all cgroup subsystems */ #define SUBSYS(_x) _x ## _cgrp_id, +#define SUBSYS_TAG(_t) CGROUP_ ## _t, \ + __unused_tag_ ## _t = CGROUP_ ## _t - 1, enum cgroup_subsys_id { #include CGROUP_SUBSYS_COUNT, }; +#undef SUBSYS_TAG #undef SUBSYS +#define CGROUP_CANFORK_COUNT (CGROUP_CANFORK_END - CGROUP_CANFORK_START) + /* bits in struct cgroup_subsys_state flags field */ enum { CSS_NO_REF = (1 << 0), /* no reference counting for this css */ @@ -406,7 +411,9 @@ struct cgroup_subsys { struct cgroup_taskset *tset); void (*attach)(struct cgroup_subsys_state *css, struct cgroup_taskset *tset); - void (*fork)(struct task_struct *task); + int (*can_fork)(struct task_struct *task, void **priv_p); + void (*cancel_fork)(struct task_struct *task, void *priv); + void (*fork)(struct task_struct *task, void *priv); void (*exit)(struct cgroup_subsys_state *css, struct cgroup_subsys_state *old_css, struct task_struct *task); @@ -491,6 +498,7 @@ static inline void cgroup_threadgroup_change_end(struct task_struct *tsk) #else /* CONFIG_CGROUPS */ +#define CGROUP_CANFORK_COUNT 0 #define CGROUP_SUBSYS_COUNT 0 static inline void cgroup_threadgroup_change_begin(struct task_struct *tsk) {} diff --git a/include/linux/cgroup.h b/include/linux/cgroup.h index a593e299162e..a71fe2a3984e 100644 --- a/include/linux/cgroup.h +++ b/include/linux/cgroup.h @@ -62,7 +62,12 @@ int proc_cgroup_show(struct seq_file *m, struct pid_namespace *ns, struct pid *pid, struct task_struct *tsk); void cgroup_fork(struct task_struct *p); -void cgroup_post_fork(struct task_struct *p); +extern int cgroup_can_fork(struct task_struct *p, + void *ss_priv[CGROUP_CANFORK_COUNT]); +extern void cgroup_cancel_fork(struct task_struct *p, + void *ss_priv[CGROUP_CANFORK_COUNT]); +extern void cgroup_post_fork(struct task_struct *p, + void *old_ss_priv[CGROUP_CANFORK_COUNT]); void cgroup_exit(struct task_struct *p); int cgroup_init_early(void); @@ -524,7 +529,13 @@ static inline int cgroupstats_build(struct cgroupstats *stats, struct dentry *dentry) { return -EINVAL; } static inline void cgroup_fork(struct task_struct *p) {} -static inline void cgroup_post_fork(struct task_struct *p) {} +static inline int cgroup_can_fork(struct task_struct *p, + void *ss_priv[CGROUP_CANFORK_COUNT]) +{ return 0; } +static inline void cgroup_cancel_fork(struct task_struct *p, + void *ss_priv[CGROUP_CANFORK_COUNT]) {} +static inline void cgroup_post_fork(struct task_struct *p, + void *ss_priv[CGROUP_CANFORK_COUNT]) {} static inline void cgroup_exit(struct task_struct *p) {} static inline int cgroup_init_early(void) { return 0; } diff --git a/include/linux/cgroup_subsys.h b/include/linux/cgroup_subsys.h index e4a96fb14403..ec43bce7e1ea 100644 --- a/include/linux/cgroup_subsys.h +++ b/include/linux/cgroup_subsys.h @@ -3,6 +3,17 @@ * * DO NOT ADD ANY SUBSYSTEM WITHOUT EXPLICIT ACKS FROM CGROUP MAINTAINERS. */ + +/* + * This file *must* be included with SUBSYS() defined. + * SUBSYS_TAG() is a noop if undefined. + */ + +#ifndef SUBSYS_TAG +#define __TMP_SUBSYS_TAG +#define SUBSYS_TAG(_x) +#endif + #if IS_ENABLED(CONFIG_CPUSETS) SUBSYS(cpuset) #endif @@ -47,12 +58,24 @@ SUBSYS(net_prio) SUBSYS(hugetlb) #endif +/* + * Subsystems that implement the can_fork() family of callbacks. + */ +SUBSYS_TAG(CANFORK_START) +SUBSYS_TAG(CANFORK_END) + /* * The following subsystems are not supported on the default hierarchy. */ #if IS_ENABLED(CONFIG_CGROUP_DEBUG) SUBSYS(debug) #endif + +#ifdef __TMP_SUBSYS_TAG +#undef __TMP_SUBSYS_TAG +#undef SUBSYS_TAG +#endif + /* * DO NOT ADD ANY SUBSYSTEM WITHOUT EXPLICIT ACKS FROM CGROUP MAINTAINERS. */ diff --git a/kernel/cgroup.c b/kernel/cgroup.c index f89d9292eee6..a59dd1a6b74a 100644 --- a/kernel/cgroup.c +++ b/kernel/cgroup.c @@ -186,6 +186,9 @@ static u64 css_serial_nr_next = 1; static unsigned long have_fork_callback __read_mostly; static unsigned long have_exit_callback __read_mostly; +/* Ditto for the can_fork callback. */ +static unsigned long have_canfork_callback __read_mostly; + static struct cftype cgroup_dfl_base_files[]; static struct cftype cgroup_legacy_base_files[]; @@ -4955,6 +4958,7 @@ static void __init cgroup_init_subsys(struct cgroup_subsys *ss, bool early) have_fork_callback |= (bool)ss->fork << ss->id; have_exit_callback |= (bool)ss->exit << ss->id; + have_canfork_callback |= (bool)ss->can_fork << ss->id; /* At system boot, before all subsystems have been * registered, no tasks have been forked, so we don't @@ -5197,6 +5201,19 @@ static const struct file_operations proc_cgroupstats_operations = { .release = single_release, }; +static void **subsys_canfork_priv_p(void *ss_priv[CGROUP_CANFORK_COUNT], int i) +{ + if (CGROUP_CANFORK_START <= i && i < CGROUP_CANFORK_END) + return &ss_priv[i - CGROUP_CANFORK_START]; + return NULL; +} + +static void *subsys_canfork_priv(void *ss_priv[CGROUP_CANFORK_COUNT], int i) +{ + void **private = subsys_canfork_priv_p(ss_priv, i); + return private ? *private : NULL; +} + /** * cgroup_fork - initialize cgroup related fields during copy_process() * @child: pointer to task_struct of forking parent process. @@ -5211,6 +5228,57 @@ void cgroup_fork(struct task_struct *child) INIT_LIST_HEAD(&child->cg_list); } +/** + * cgroup_can_fork - called on a new task before the process is exposed + * @child: the task in question. + * + * This calls the subsystem can_fork() callbacks. If the can_fork() callback + * returns an error, the fork aborts with that error code. This allows for + * a cgroup subsystem to conditionally allow or deny new forks. + */ +int cgroup_can_fork(struct task_struct *child, + void *ss_priv[CGROUP_CANFORK_COUNT]) +{ + struct cgroup_subsys *ss; + int i, j, ret; + + for_each_subsys_which(ss, i, &have_canfork_callback) { + ret = ss->can_fork(child, subsys_canfork_priv_p(ss_priv, i)); + if (ret) + goto out_revert; + } + + return 0; + +out_revert: + for_each_subsys(ss, j) { + if (j >= i) + break; + if (ss->cancel_fork) + ss->cancel_fork(child, subsys_canfork_priv(ss_priv, j)); + } + + return ret; +} + +/** + * cgroup_cancel_fork - called if a fork failed after cgroup_can_fork() + * @child: the task in question + * + * This calls the cancel_fork() callbacks if a fork failed *after* + * cgroup_can_fork() succeded. + */ +void cgroup_cancel_fork(struct task_struct *child, + void *ss_priv[CGROUP_CANFORK_COUNT]) +{ + struct cgroup_subsys *ss; + int i; + + for_each_subsys(ss, i) + if (ss->cancel_fork) + ss->cancel_fork(child, subsys_canfork_priv(ss_priv, i)); +} + /** * cgroup_post_fork - called on a new task after adding it to the task list * @child: the task in question @@ -5221,7 +5289,8 @@ void cgroup_fork(struct task_struct *child) * cgroup_task_iter_start() - to guarantee that the new task ends up on its * list. */ -void cgroup_post_fork(struct task_struct *child) +void cgroup_post_fork(struct task_struct *child, + void *old_ss_priv[CGROUP_CANFORK_COUNT]) { struct cgroup_subsys *ss; int i; @@ -5266,7 +5335,7 @@ void cgroup_post_fork(struct task_struct *child) * and addition to css_set. */ for_each_subsys_which(ss, i, &have_fork_callback) - ss->fork(child); + ss->fork(child, subsys_canfork_priv(old_ss_priv, i)); } /** diff --git a/kernel/cgroup_freezer.c b/kernel/cgroup_freezer.c index 92b98cc0ee76..f1b30ad5dc6d 100644 --- a/kernel/cgroup_freezer.c +++ b/kernel/cgroup_freezer.c @@ -203,7 +203,7 @@ static void freezer_attach(struct cgroup_subsys_state *new_css, * to do anything as freezer_attach() will put @task into the appropriate * state. */ -static void freezer_fork(struct task_struct *task) +static void freezer_fork(struct task_struct *task, void *private) { struct freezer *freezer; diff --git a/kernel/fork.c b/kernel/fork.c index 1bfefc6f96a4..40e3af12c55e 100644 --- a/kernel/fork.c +++ b/kernel/fork.c @@ -1239,6 +1239,7 @@ static struct task_struct *copy_process(unsigned long clone_flags, { int retval; struct task_struct *p; + void *cgrp_ss_priv[CGROUP_CANFORK_COUNT] = {}; if ((clone_flags & (CLONE_NEWNS|CLONE_FS)) == (CLONE_NEWNS|CLONE_FS)) return ERR_PTR(-EINVAL); @@ -1512,6 +1513,16 @@ static struct task_struct *copy_process(unsigned long clone_flags, INIT_LIST_HEAD(&p->thread_group); p->task_works = NULL; + /* + * Ensure that the cgroup subsystem policies allow the new process to be + * forked. It should be noted the the new process's css_set can be changed + * between here and cgroup_post_fork() if an organisation operation is in + * progress. + */ + retval = cgroup_can_fork(p, cgrp_ss_priv); + if (retval) + goto bad_fork_free_pid; + /* * Make it visible to the rest of the system, but dont wake it up yet. * Need tasklist lock for parent etc handling! @@ -1548,7 +1559,7 @@ static struct task_struct *copy_process(unsigned long clone_flags, spin_unlock(¤t->sighand->siglock); write_unlock_irq(&tasklist_lock); retval = -ERESTARTNOINTR; - goto bad_fork_free_pid; + goto bad_fork_cancel_cgroup; } if (likely(p->pid)) { @@ -1590,7 +1601,7 @@ static struct task_struct *copy_process(unsigned long clone_flags, write_unlock_irq(&tasklist_lock); proc_fork_connector(p); - cgroup_post_fork(p); + cgroup_post_fork(p, cgrp_ss_priv); if (clone_flags & CLONE_THREAD) threadgroup_change_end(current); perf_event_fork(p); @@ -1600,6 +1611,8 @@ static struct task_struct *copy_process(unsigned long clone_flags, return p; +bad_fork_cancel_cgroup: + cgroup_cancel_fork(p, cgrp_ss_priv); bad_fork_free_pid: if (pid != &init_struct_pid) free_pid(pid); diff --git a/kernel/sched/core.c b/kernel/sched/core.c index 78b4bad10081..d811652fe6f5 100644 --- a/kernel/sched/core.c +++ b/kernel/sched/core.c @@ -8068,7 +8068,7 @@ static void cpu_cgroup_css_offline(struct cgroup_subsys_state *css) sched_offline_group(tg); } -static void cpu_cgroup_fork(struct task_struct *task) +static void cpu_cgroup_fork(struct task_struct *task, void *private) { sched_move_task(task); } -- cgit v1.2.3-70-g09d2 From 49b786ea146f69c371df18e81ce0a2d5839f865c Mon Sep 17 00:00:00 2001 From: Aleksa Sarai Date: Tue, 9 Jun 2015 21:32:10 +1000 Subject: cgroup: implement the PIDs subsystem Adds a new single-purpose PIDs subsystem to limit the number of tasks that can be forked inside a cgroup. Essentially this is an implementation of RLIMIT_NPROC that applies to a cgroup rather than a process tree. However, it should be noted that organisational operations (adding and removing tasks from a PIDs hierarchy) will *not* be prevented. Rather, the number of tasks in the hierarchy cannot exceed the limit through forking. This is due to the fact that, in the unified hierarchy, attach cannot fail (and it is not possible for a task to overcome its PIDs cgroup policy limit by attaching to a child cgroup -- even if migrating mid-fork it must be able to fork in the parent first). PIDs are fundamentally a global resource, and it is possible to reach PID exhaustion inside a cgroup without hitting any reasonable kmemcg policy. Once you've hit PID exhaustion, you're only in a marginally better state than OOM. This subsystem allows PID exhaustion inside a cgroup to be prevented. Signed-off-by: Aleksa Sarai Signed-off-by: Tejun Heo --- CREDITS | 5 + include/linux/cgroup_subsys.h | 5 + init/Kconfig | 16 ++ kernel/Makefile | 1 + kernel/cgroup_pids.c | 366 ++++++++++++++++++++++++++++++++++++++++++ 5 files changed, 393 insertions(+) create mode 100644 kernel/cgroup_pids.c (limited to 'include/linux') diff --git a/CREDITS b/CREDITS index 1d616640bbf6..4fcf9cd8544c 100644 --- a/CREDITS +++ b/CREDITS @@ -3219,6 +3219,11 @@ S: 69 rue Dunois S: 75013 Paris S: France +N: Aleksa Sarai +E: cyphar@cyphar.com +W: https://www.cyphar.com/ +D: `pids` cgroup subsystem + N: Dipankar Sarma E: dipankar@in.ibm.com D: RCU diff --git a/include/linux/cgroup_subsys.h b/include/linux/cgroup_subsys.h index ec43bce7e1ea..1f36945fd23d 100644 --- a/include/linux/cgroup_subsys.h +++ b/include/linux/cgroup_subsys.h @@ -62,6 +62,11 @@ SUBSYS(hugetlb) * Subsystems that implement the can_fork() family of callbacks. */ SUBSYS_TAG(CANFORK_START) + +#if IS_ENABLED(CONFIG_CGROUP_PIDS) +SUBSYS(pids) +#endif + SUBSYS_TAG(CANFORK_END) /* diff --git a/init/Kconfig b/init/Kconfig index af09b4fb43d2..2184b34cbf73 100644 --- a/init/Kconfig +++ b/init/Kconfig @@ -955,6 +955,22 @@ config CGROUP_FREEZER Provides a way to freeze and unfreeze all tasks in a cgroup. +config CGROUP_PIDS + bool "PIDs cgroup subsystem" + help + Provides enforcement of process number limits in the scope of a + cgroup. Any attempt to fork more processes than is allowed in the + cgroup will fail. PIDs are fundamentally a global resource because it + is fairly trivial to reach PID exhaustion before you reach even a + conservative kmemcg limit. As a result, it is possible to grind a + system to halt without being limited by other cgroup policies. The + PIDs cgroup subsystem is designed to stop this from happening. + + It should be noted that organisational operations (such as attaching + to a cgroup hierarchy will *not* be blocked by the PIDs subsystem), + since the PIDs limit only affects a process's ability to fork, not to + attach to a cgroup. + config CGROUP_DEVICE bool "Device controller for cgroups" help diff --git a/kernel/Makefile b/kernel/Makefile index 43c4c920f30a..718fb8afab7a 100644 --- a/kernel/Makefile +++ b/kernel/Makefile @@ -55,6 +55,7 @@ obj-$(CONFIG_BACKTRACE_SELF_TEST) += backtracetest.o obj-$(CONFIG_COMPAT) += compat.o obj-$(CONFIG_CGROUPS) += cgroup.o obj-$(CONFIG_CGROUP_FREEZER) += cgroup_freezer.o +obj-$(CONFIG_CGROUP_PIDS) += cgroup_pids.o obj-$(CONFIG_CPUSETS) += cpuset.o obj-$(CONFIG_UTS_NS) += utsname.o obj-$(CONFIG_USER_NS) += user_namespace.o diff --git a/kernel/cgroup_pids.c b/kernel/cgroup_pids.c new file mode 100644 index 000000000000..d75488824ae2 --- /dev/null +++ b/kernel/cgroup_pids.c @@ -0,0 +1,366 @@ +/* + * Process number limiting controller for cgroups. + * + * Used to allow a cgroup hierarchy to stop any new processes from fork()ing + * after a certain limit is reached. + * + * Since it is trivial to hit the task limit without hitting any kmemcg limits + * in place, PIDs are a fundamental resource. As such, PID exhaustion must be + * preventable in the scope of a cgroup hierarchy by allowing resource limiting + * of the number of tasks in a cgroup. + * + * In order to use the `pids` controller, set the maximum number of tasks in + * pids.max (this is not available in the root cgroup for obvious reasons). The + * number of processes currently in the cgroup is given by pids.current. + * Organisational operations are not blocked by cgroup policies, so it is + * possible to have pids.current > pids.max. However, it is not possible to + * violate a cgroup policy through fork(). fork() will return -EAGAIN if forking + * would cause a cgroup policy to be violated. + * + * To set a cgroup to have no limit, set pids.max to "max". This is the default + * for all new cgroups (N.B. that PID limits are hierarchical, so the most + * stringent limit in the hierarchy is followed). + * + * pids.current tracks all child cgroup hierarchies, so parent/pids.current is + * a superset of parent/child/pids.current. + * + * Copyright (C) 2015 Aleksa Sarai + * + * This file is subject to the terms and conditions of version 2 of the GNU + * General Public License. See the file COPYING in the main directory of the + * Linux distribution for more details. + */ + +#include +#include +#include +#include +#include + +#define PIDS_MAX (PID_MAX_LIMIT + 1ULL) +#define PIDS_MAX_STR "max" + +struct pids_cgroup { + struct cgroup_subsys_state css; + + /* + * Use 64-bit types so that we can safely represent "max" as + * %PIDS_MAX = (%PID_MAX_LIMIT + 1). + */ + atomic64_t counter; + int64_t limit; +}; + +static struct pids_cgroup *css_pids(struct cgroup_subsys_state *css) +{ + return container_of(css, struct pids_cgroup, css); +} + +static struct pids_cgroup *parent_pids(struct pids_cgroup *pids) +{ + return css_pids(pids->css.parent); +} + +static struct cgroup_subsys_state * +pids_css_alloc(struct cgroup_subsys_state *parent) +{ + struct pids_cgroup *pids; + + pids = kzalloc(sizeof(struct pids_cgroup), GFP_KERNEL); + if (!pids) + return ERR_PTR(-ENOMEM); + + pids->limit = PIDS_MAX; + atomic64_set(&pids->counter, 0); + return &pids->css; +} + +static void pids_css_free(struct cgroup_subsys_state *css) +{ + kfree(css_pids(css)); +} + +/** + * pids_cancel - uncharge the local pid count + * @pids: the pid cgroup state + * @num: the number of pids to cancel + * + * This function will WARN if the pid count goes under 0, because such a case is + * a bug in the pids controller proper. + */ +static void pids_cancel(struct pids_cgroup *pids, int num) +{ + /* + * A negative count (or overflow for that matter) is invalid, + * and indicates a bug in the `pids` controller proper. + */ + WARN_ON_ONCE(atomic64_add_negative(-num, &pids->counter)); +} + +/** + * pids_uncharge - hierarchically uncharge the pid count + * @pids: the pid cgroup state + * @num: the number of pids to uncharge + */ +static void pids_uncharge(struct pids_cgroup *pids, int num) +{ + struct pids_cgroup *p; + + for (p = pids; p; p = parent_pids(p)) + pids_cancel(p, num); +} + +/** + * pids_charge - hierarchically charge the pid count + * @pids: the pid cgroup state + * @num: the number of pids to charge + * + * This function does *not* follow the pid limit set. It cannot fail and the new + * pid count may exceed the limit. This is only used for reverting failed + * attaches, where there is no other way out than violating the limit. + */ +static void pids_charge(struct pids_cgroup *pids, int num) +{ + struct pids_cgroup *p; + + for (p = pids; p; p = parent_pids(p)) + atomic64_add(num, &p->counter); +} + +/** + * pids_try_charge - hierarchically try to charge the pid count + * @pids: the pid cgroup state + * @num: the number of pids to charge + * + * This function follows the set limit. It will fail if the charge would cause + * the new value to exceed the hierarchical limit. Returns 0 if the charge + * succeded, otherwise -EAGAIN. + */ +static int pids_try_charge(struct pids_cgroup *pids, int num) +{ + struct pids_cgroup *p, *q; + + for (p = pids; p; p = parent_pids(p)) { + int64_t new = atomic64_add_return(num, &p->counter); + + /* + * Since new is capped to the maximum number of pid_t, if + * p->limit is %PIDS_MAX then we know that this test will never + * fail. + */ + if (new > p->limit) + goto revert; + } + + return 0; + +revert: + for (q = pids; q != p; q = parent_pids(q)) + pids_cancel(q, num); + pids_cancel(p, num); + + return -EAGAIN; +} + +static int pids_can_attach(struct cgroup_subsys_state *css, + struct cgroup_taskset *tset) +{ + struct pids_cgroup *pids = css_pids(css); + struct task_struct *task; + + cgroup_taskset_for_each(task, tset) { + struct cgroup_subsys_state *old_css; + struct pids_cgroup *old_pids; + + /* + * Grab a ref to each task's css. We don't drop the ref until + * we either fail and hit ->cancel_attach() or succeed and hit + * ->attach(). + */ + old_css = task_get_css(task, pids_cgrp_id); + old_pids = css_pids(old_css); + + pids_charge(pids, 1); + pids_uncharge(old_pids, 1); + } + + return 0; +} + +static void pids_cancel_attach(struct cgroup_subsys_state *css, + struct cgroup_taskset *tset) +{ + struct pids_cgroup *pids = css_pids(css); + struct task_struct *task; + + cgroup_taskset_for_each(task, tset) { + struct cgroup_subsys_state *old_css; + struct pids_cgroup *old_pids; + + old_css = task_css(task, pids_cgrp_id); + old_pids = css_pids(old_css); + + pids_charge(old_pids, 1); + pids_uncharge(pids, 1); + css_put(old_css); + } +} + +static void pids_attach(struct cgroup_subsys_state *css, + struct cgroup_taskset *tset) +{ + struct task_struct *task; + + cgroup_taskset_for_each(task, tset) + css_put(task_css(task, pids_cgrp_id)); +} + +static int pids_can_fork(struct task_struct *task, void **priv_p) +{ + struct cgroup_subsys_state *css; + struct pids_cgroup *pids; + int err; + + /* + * Use the "current" task_css for the pids subsystem as the tentative + * css. It is possible we will charge the wrong hierarchy, in which + * case we will forcefully revert/reapply the charge on the right + * hierarchy after it is committed to the task proper. + */ + css = task_get_css(current, pids_cgrp_id); + pids = css_pids(css); + + err = pids_try_charge(pids, 1); + if (err) + goto err_css_put; + + *priv_p = css; + return 0; + +err_css_put: + css_put(css); + return err; +} + +static void pids_cancel_fork(struct task_struct *task, void *priv) +{ + struct cgroup_subsys_state *css = priv; + struct pids_cgroup *pids = css_pids(css); + + pids_uncharge(pids, 1); + css_put(css); +} + +static void pids_fork(struct task_struct *task, void *priv) +{ + struct cgroup_subsys_state *css; + struct cgroup_subsys_state *old_css = priv; + struct pids_cgroup *pids; + struct pids_cgroup *old_pids = css_pids(old_css); + + css = task_get_css(task, pids_cgrp_id); + pids = css_pids(css); + + /* + * If the association has changed, we have to revert and reapply the + * charge/uncharge on the wrong hierarchy to the current one. Since + * the association can only change due to an organisation event, its + * okay for us to ignore the limit in this case. + */ + if (pids != old_pids) { + pids_uncharge(old_pids, 1); + pids_charge(pids, 1); + } + + css_put(css); + css_put(old_css); +} + +static void pids_exit(struct cgroup_subsys_state *css, + struct cgroup_subsys_state *old_css, + struct task_struct *task) +{ + struct pids_cgroup *pids = css_pids(old_css); + + pids_uncharge(pids, 1); +} + +static ssize_t pids_max_write(struct kernfs_open_file *of, char *buf, + size_t nbytes, loff_t off) +{ + struct cgroup_subsys_state *css = of_css(of); + struct pids_cgroup *pids = css_pids(css); + int64_t limit; + int err; + + buf = strstrip(buf); + if (!strcmp(buf, PIDS_MAX_STR)) { + limit = PIDS_MAX; + goto set_limit; + } + + err = kstrtoll(buf, 0, &limit); + if (err) + return err; + + if (limit < 0 || limit >= PIDS_MAX) + return -EINVAL; + +set_limit: + /* + * Limit updates don't need to be mutex'd, since it isn't + * critical that any racing fork()s follow the new limit. + */ + pids->limit = limit; + return nbytes; +} + +static int pids_max_show(struct seq_file *sf, void *v) +{ + struct cgroup_subsys_state *css = seq_css(sf); + struct pids_cgroup *pids = css_pids(css); + int64_t limit = pids->limit; + + if (limit >= PIDS_MAX) + seq_printf(sf, "%s\n", PIDS_MAX_STR); + else + seq_printf(sf, "%lld\n", limit); + + return 0; +} + +static s64 pids_current_read(struct cgroup_subsys_state *css, + struct cftype *cft) +{ + struct pids_cgroup *pids = css_pids(css); + + return atomic64_read(&pids->counter); +} + +static struct cftype pids_files[] = { + { + .name = "max", + .write = pids_max_write, + .seq_show = pids_max_show, + .flags = CFTYPE_NOT_ON_ROOT, + }, + { + .name = "current", + .read_s64 = pids_current_read, + }, + { } /* terminate */ +}; + +struct cgroup_subsys pids_cgrp_subsys = { + .css_alloc = pids_css_alloc, + .css_free = pids_css_free, + .attach = pids_attach, + .can_attach = pids_can_attach, + .cancel_attach = pids_cancel_attach, + .can_fork = pids_can_fork, + .cancel_fork = pids_cancel_fork, + .fork = pids_fork, + .exit = pids_exit, + .legacy_cftypes = pids_files, + .dfl_cftypes = pids_files, +}; -- cgit v1.2.3-70-g09d2 From 83cb8557e8d2c8e5eddc64840c437299343a7960 Mon Sep 17 00:00:00 2001 From: Tejun Heo Date: Fri, 26 Jun 2015 17:21:28 -0400 Subject: percpu: update incorrect comment for this_cpu_*() operations this_cpu_*() ops have been protected against both preemption and interrupts for quite a while now. We apparently forgot to update the comment. Fix it. Signed-off-by: Tejun Heo Cc: Christoph Lameter --- include/linux/percpu-defs.h | 6 ++---- 1 file changed, 2 insertions(+), 4 deletions(-) (limited to 'include/linux') diff --git a/include/linux/percpu-defs.h b/include/linux/percpu-defs.h index 57f3a1c550dc..8f16299ca068 100644 --- a/include/linux/percpu-defs.h +++ b/include/linux/percpu-defs.h @@ -488,10 +488,8 @@ do { \ #define __this_cpu_dec_return(pcp) __this_cpu_add_return(pcp, -1) /* - * Operations with implied preemption protection. These operations can be - * used without worrying about preemption. Note that interrupts may still - * occur while an operation is in progress and if the interrupt modifies - * the variable too then RMW actions may not be reliable. + * Operations with implied preemption/interrupt protection. These + * operations can be used without worrying about preemption or interrupt. */ #define this_cpu_read(pcp) __pcpu_size_call_return(this_cpu_read_, pcp) #define this_cpu_write(pcp, val) __pcpu_size_call(this_cpu_write_, pcp, val) -- cgit v1.2.3-70-g09d2 From 4c62360d7562a20c996836d163259c87d9378120 Mon Sep 17 00:00:00 2001 From: "Luck, Tony" Date: Tue, 30 Jun 2015 15:57:51 -0700 Subject: efi: Handle memory error structures produced based on old versions of standard The memory error record structure includes as its first field a bitmask of which subsequent fields are valid. The allows new fields to be added to the structure while keeping compatibility with older software that parses these records. This mechanism was used between versions 2.2 and 2.3 to add four new fields, growing the size of the structure from 73 bytes to 80. But Linux just added all the new fields so this test: if (gdata->error_data_length >= sizeof(*mem_err)) cper_print_mem(newpfx, mem_err); else goto err_section_too_small; now make Linux complain about old format records being too short. Add a definition for the old format of the structure and use that for the minimum size check. Pass the actual size to cper_print_mem() so it can sanity check the validation_bits field to ensure that if a BIOS using the old format sets bits as if it were new, we won't access fields beyond the end of the structure. Signed-off-by: Tony Luck Cc: Signed-off-by: Matt Fleming --- drivers/firmware/efi/cper.c | 15 ++++++++++++--- include/linux/cper.h | 22 +++++++++++++++++++++- 2 files changed, 33 insertions(+), 4 deletions(-) (limited to 'include/linux') diff --git a/drivers/firmware/efi/cper.c b/drivers/firmware/efi/cper.c index 4fd9961d552e..d42537425438 100644 --- a/drivers/firmware/efi/cper.c +++ b/drivers/firmware/efi/cper.c @@ -305,10 +305,17 @@ const char *cper_mem_err_unpack(struct trace_seq *p, return ret; } -static void cper_print_mem(const char *pfx, const struct cper_sec_mem_err *mem) +static void cper_print_mem(const char *pfx, const struct cper_sec_mem_err *mem, + int len) { struct cper_mem_err_compact cmem; + /* Don't trust UEFI 2.1/2.2 structure with bad validation bits */ + if (len == sizeof(struct cper_sec_mem_err_old) && + (mem->validation_bits & ~(CPER_MEM_VALID_RANK_NUMBER - 1))) { + pr_err(FW_WARN "valid bits set for fields beyond structure\n"); + return; + } if (mem->validation_bits & CPER_MEM_VALID_ERROR_STATUS) printk("%s""error_status: 0x%016llx\n", pfx, mem->error_status); if (mem->validation_bits & CPER_MEM_VALID_PA) @@ -405,8 +412,10 @@ static void cper_estatus_print_section( } else if (!uuid_le_cmp(*sec_type, CPER_SEC_PLATFORM_MEM)) { struct cper_sec_mem_err *mem_err = (void *)(gdata + 1); printk("%s""section_type: memory error\n", newpfx); - if (gdata->error_data_length >= sizeof(*mem_err)) - cper_print_mem(newpfx, mem_err); + if (gdata->error_data_length >= + sizeof(struct cper_sec_mem_err_old)) + cper_print_mem(newpfx, mem_err, + gdata->error_data_length); else goto err_section_too_small; } else if (!uuid_le_cmp(*sec_type, CPER_SEC_PCIE)) { diff --git a/include/linux/cper.h b/include/linux/cper.h index 76abba4b238e..dcacb1a72e26 100644 --- a/include/linux/cper.h +++ b/include/linux/cper.h @@ -340,7 +340,27 @@ struct cper_ia_proc_ctx { __u64 mm_reg_addr; }; -/* Memory Error Section */ +/* Old Memory Error Section UEFI 2.1, 2.2 */ +struct cper_sec_mem_err_old { + __u64 validation_bits; + __u64 error_status; + __u64 physical_addr; + __u64 physical_addr_mask; + __u16 node; + __u16 card; + __u16 module; + __u16 bank; + __u16 device; + __u16 row; + __u16 column; + __u16 bit_pos; + __u64 requestor_id; + __u64 responder_id; + __u64 target_id; + __u8 error_type; +}; + +/* Memory Error Section UEFI >= 2.3 */ struct cper_sec_mem_err { __u64 validation_bits; __u64 error_status; -- cgit v1.2.3-70-g09d2 From 71d126fd28de2d4d9b7b2088dbccd7ca62fad6e0 Mon Sep 17 00:00:00 2001 From: Arne Fitzenreiter Date: Wed, 15 Jul 2015 13:54:36 +0200 Subject: libata: add ATA_HORKAGE_NOTRIM Some devices lose data on TRIM whether queued or not. This patch adds a horkage to disable TRIM. tj: Collapsed unnecessary if() nesting. Signed-off-by: Arne Fitzenreiter Signed-off-by: Tejun Heo Cc: stable@vger.kernel.org --- drivers/ata/libata-scsi.c | 3 ++- drivers/ata/libata-transport.c | 2 ++ include/linux/libata.h | 2 ++ 3 files changed, 6 insertions(+), 1 deletion(-) (limited to 'include/linux') diff --git a/drivers/ata/libata-scsi.c b/drivers/ata/libata-scsi.c index 3131adcc1f87..641a61a59e89 100644 --- a/drivers/ata/libata-scsi.c +++ b/drivers/ata/libata-scsi.c @@ -2568,7 +2568,8 @@ static unsigned int ata_scsiop_read_cap(struct ata_scsi_args *args, u8 *rbuf) rbuf[14] = (lowest_aligned >> 8) & 0x3f; rbuf[15] = lowest_aligned; - if (ata_id_has_trim(args->id)) { + if (ata_id_has_trim(args->id) && + !(dev->horkage & ATA_HORKAGE_NOTRIM)) { rbuf[14] |= 0x80; /* LBPME */ if (ata_id_has_zero_after_trim(args->id) && diff --git a/drivers/ata/libata-transport.c b/drivers/ata/libata-transport.c index d6c37bcd416d..e2d94972962d 100644 --- a/drivers/ata/libata-transport.c +++ b/drivers/ata/libata-transport.c @@ -569,6 +569,8 @@ show_ata_dev_trim(struct device *dev, if (!ata_id_has_trim(ata_dev->id)) mode = "unsupported"; + else if (ata_dev->horkage & ATA_HORKAGE_NOTRIM) + mode = "forced_unsupported"; else if (ata_dev->horkage & ATA_HORKAGE_NO_NCQ_TRIM) mode = "forced_unqueued"; else if (ata_fpdma_dsm_supported(ata_dev)) diff --git a/include/linux/libata.h b/include/linux/libata.h index 36ce37bcc963..5c8bac6225a6 100644 --- a/include/linux/libata.h +++ b/include/linux/libata.h @@ -431,6 +431,8 @@ enum { ATA_HORKAGE_WD_BROKEN_LPM = (1 << 21), /* some WDs have broken LPM */ ATA_HORKAGE_ZERO_AFTER_TRIM = (1 << 22),/* guarantees zero after trim */ ATA_HORKAGE_NO_NCQ_LOG = (1 << 23), /* don't use NCQ for log read */ + ATA_HORKAGE_NOTRIM = (1 << 24), /* don't use TRIM */ + /* DMA mask for user DMA control: User visible values; DO NOT renumber */ -- cgit v1.2.3-70-g09d2 From af34d637637eabaf49406eb35c948cd51ba262a6 Mon Sep 17 00:00:00 2001 From: David Milburn Date: Mon, 13 Jul 2015 11:48:23 -0500 Subject: libata: add ATA_HORKAGE_MAX_SEC_1024 to revert back to previous max_sectors limit Since no longer limiting max_sectors to BLK_DEF_MAX_SECTORS (commit 34b48db66e08), data corruption may occur on ST380013AS drive configured on 82801JI (ICH10 Family) SATA controller. This patch will allow the driver to limit max_sectors as before # cat /sys/block/sdb/queue/max_sectors_kb 512 I was able to double the max_sectors_kb value up to 16384 on linux-4.2.0-rc2 before seeing corruption, but seems safer to use previous limit. Without this patch max_sectors_kb will be 32767. tj: Minor comment update. Reported-by: Jeff Moyer Signed-off-by: David Milburn Signed-off-by: Tejun Heo Cc: stable@vger.kernel.org # v3.19 and later Fixes: 34b48db66e08 ("block: remove artifical max_hw_sectors cap") --- drivers/ata/libata-core.c | 10 ++++++++++ include/linux/ata.h | 1 + include/linux/libata.h | 2 +- 3 files changed, 12 insertions(+), 1 deletion(-) (limited to 'include/linux') diff --git a/drivers/ata/libata-core.c b/drivers/ata/libata-core.c index ed2b218ea64d..68202a8a3a0b 100644 --- a/drivers/ata/libata-core.c +++ b/drivers/ata/libata-core.c @@ -2478,6 +2478,10 @@ int ata_dev_configure(struct ata_device *dev) dev->max_sectors = min_t(unsigned int, ATA_MAX_SECTORS_128, dev->max_sectors); + if (dev->horkage & ATA_HORKAGE_MAX_SEC_1024) + dev->max_sectors = min_t(unsigned int, ATA_MAX_SECTORS_1024, + dev->max_sectors); + if (dev->horkage & ATA_HORKAGE_MAX_SEC_LBA48) dev->max_sectors = ATA_MAX_SECTORS_LBA48; @@ -4146,6 +4150,12 @@ static const struct ata_blacklist_entry ata_device_blacklist [] = { { "Slimtype DVD A DS8A8SH", NULL, ATA_HORKAGE_MAX_SEC_LBA48 }, { "Slimtype DVD A DS8A9SH", NULL, ATA_HORKAGE_MAX_SEC_LBA48 }, + /* + * Causes silent data corruption with higher max sects. + * http://lkml.kernel.org/g/x49wpy40ysk.fsf@segfault.boston.devel.redhat.com + */ + { "ST380013AS", "3.20", ATA_HORKAGE_MAX_SEC_1024 }, + /* Devices we expect to fail diagnostics */ /* Devices where NCQ should be avoided */ diff --git a/include/linux/ata.h b/include/linux/ata.h index fed36418dd1c..6c78956aa470 100644 --- a/include/linux/ata.h +++ b/include/linux/ata.h @@ -45,6 +45,7 @@ enum { ATA_SECT_SIZE = 512, ATA_MAX_SECTORS_128 = 128, ATA_MAX_SECTORS = 256, + ATA_MAX_SECTORS_1024 = 1024, ATA_MAX_SECTORS_LBA48 = 65535,/* TODO: 65536? */ ATA_MAX_SECTORS_TAPE = 65535, diff --git a/include/linux/libata.h b/include/linux/libata.h index 5c8bac6225a6..c9cfbcdb8d14 100644 --- a/include/linux/libata.h +++ b/include/linux/libata.h @@ -432,7 +432,7 @@ enum { ATA_HORKAGE_ZERO_AFTER_TRIM = (1 << 22),/* guarantees zero after trim */ ATA_HORKAGE_NO_NCQ_LOG = (1 << 23), /* don't use NCQ for log read */ ATA_HORKAGE_NOTRIM = (1 << 24), /* don't use TRIM */ - + ATA_HORKAGE_MAX_SEC_1024 = (1 << 25), /* Limit max sects to 1024 */ /* DMA mask for user DMA control: User visible values; DO NOT renumber */ -- cgit v1.2.3-70-g09d2 From 70aa996601335ca3069190ebcdae8870828086a8 Mon Sep 17 00:00:00 2001 From: "Eric W. Biederman" Date: Fri, 10 Jul 2015 18:13:20 -0500 Subject: netfilter: kill nf_hooks_active The function obscures what is going on in nf_hook_thresh and it's existence requires computing the hook list twice. Signed-off-by: "Eric W. Biederman" Signed-off-by: Pablo Neira Ayuso --- include/linux/netfilter.h | 11 ++++------- 1 file changed, 4 insertions(+), 7 deletions(-) (limited to 'include/linux') diff --git a/include/linux/netfilter.h b/include/linux/netfilter.h index 00050dfd9f23..60e89348a91d 100644 --- a/include/linux/netfilter.h +++ b/include/linux/netfilter.h @@ -150,11 +150,6 @@ static inline bool nf_hook_list_active(struct list_head *nf_hook_list, } #endif -static inline bool nf_hooks_active(u_int8_t pf, unsigned int hook) -{ - return nf_hook_list_active(&nf_hooks[pf][hook], pf, hook); -} - int nf_hook_slow(struct sk_buff *skb, struct nf_hook_state *state); /** @@ -172,10 +167,12 @@ static inline int nf_hook_thresh(u_int8_t pf, unsigned int hook, int (*okfn)(struct sock *, struct sk_buff *), int thresh) { - if (nf_hooks_active(pf, hook)) { + struct list_head *nf_hook_list = &nf_hooks[pf][hook]; + + if (nf_hook_list_active(nf_hook_list, pf, hook)) { struct nf_hook_state state; - nf_hook_state_init(&state, &nf_hooks[pf][hook], hook, thresh, + nf_hook_state_init(&state, nf_hook_list, hook, thresh, pf, indev, outdev, sk, okfn); return nf_hook_slow(skb, &state); } -- cgit v1.2.3-70-g09d2 From 085db2c04557d31db61541f361bd8b4de92c9939 Mon Sep 17 00:00:00 2001 From: "Eric W. Biederman" Date: Fri, 10 Jul 2015 18:15:06 -0500 Subject: netfilter: Per network namespace netfilter hooks. - Add a new set of functions for registering and unregistering per network namespace hooks. - Modify the old global namespace hook functions to use the per network namespace hooks in their implementation, so their remains a single list that needs to be walked for any hook (this is important for keeping the hook priority working and for keeping the code walking the hooks simple). - Only allow registering the per netdevice hooks in the network namespace where the network device lives. - Dynamically allocate the structures in the per network namespace hook list in nf_register_net_hook, and unregister them in nf_unregister_net_hook. Dynamic allocate is required somewhere as the number of network namespaces are not fixed so we might as well allocate them in the registration function. The chain of registered hooks on any list is expected to be small so the cost of walking that list to find the entry we are unregistering should also be small. Performing the management of the dynamically allocated list entries in the registration and unregistration functions keeps the complexity from spreading. Signed-off-by: "Eric W. Biederman" --- include/linux/netfilter.h | 14 +++- include/net/netns/netfilter.h | 1 + net/netfilter/core.c | 182 +++++++++++++++++++++++++++++++++++++----- 3 files changed, 173 insertions(+), 24 deletions(-) (limited to 'include/linux') diff --git a/include/linux/netfilter.h b/include/linux/netfilter.h index 60e89348a91d..9bbd110ec81b 100644 --- a/include/linux/netfilter.h +++ b/include/linux/netfilter.h @@ -11,6 +11,8 @@ #include #include #include +#include +#include #ifdef CONFIG_NETFILTER static inline int NF_DROP_GETERR(int verdict) @@ -118,6 +120,13 @@ struct nf_sockopt_ops { }; /* Function to register/unregister hook points. */ +int nf_register_net_hook(struct net *net, const struct nf_hook_ops *ops); +void nf_unregister_net_hook(struct net *net, const struct nf_hook_ops *ops); +int nf_register_net_hooks(struct net *net, const struct nf_hook_ops *reg, + unsigned int n); +void nf_unregister_net_hooks(struct net *net, const struct nf_hook_ops *reg, + unsigned int n); + int nf_register_hook(struct nf_hook_ops *reg); void nf_unregister_hook(struct nf_hook_ops *reg); int nf_register_hooks(struct nf_hook_ops *reg, unsigned int n); @@ -128,8 +137,6 @@ void nf_unregister_hooks(struct nf_hook_ops *reg, unsigned int n); int nf_register_sockopt(struct nf_sockopt_ops *reg); void nf_unregister_sockopt(struct nf_sockopt_ops *reg); -extern struct list_head nf_hooks[NFPROTO_NUMPROTO][NF_MAX_HOOKS]; - #ifdef HAVE_JUMP_LABEL extern struct static_key nf_hooks_needed[NFPROTO_NUMPROTO][NF_MAX_HOOKS]; @@ -167,7 +174,8 @@ static inline int nf_hook_thresh(u_int8_t pf, unsigned int hook, int (*okfn)(struct sock *, struct sk_buff *), int thresh) { - struct list_head *nf_hook_list = &nf_hooks[pf][hook]; + struct net *net = dev_net(indev ? indev : outdev); + struct list_head *nf_hook_list = &net->nf.hooks[pf][hook]; if (nf_hook_list_active(nf_hook_list, pf, hook)) { struct nf_hook_state state; diff --git a/include/net/netns/netfilter.h b/include/net/netns/netfilter.h index 532e4ba64f49..38aa4983e2a9 100644 --- a/include/net/netns/netfilter.h +++ b/include/net/netns/netfilter.h @@ -14,5 +14,6 @@ struct netns_nf { #ifdef CONFIG_SYSCTL struct ctl_table_header *nf_log_dir_header; #endif + struct list_head hooks[NFPROTO_NUMPROTO][NF_MAX_HOOKS]; }; #endif diff --git a/net/netfilter/core.c b/net/netfilter/core.c index fa4d3c111d3f..56ead1a1711c 100644 --- a/net/netfilter/core.c +++ b/net/netfilter/core.c @@ -52,9 +52,6 @@ void nf_unregister_afinfo(const struct nf_afinfo *afinfo) } EXPORT_SYMBOL_GPL(nf_unregister_afinfo); -struct list_head nf_hooks[NFPROTO_NUMPROTO][NF_MAX_HOOKS] __read_mostly; -EXPORT_SYMBOL(nf_hooks); - #ifdef HAVE_JUMP_LABEL struct static_key nf_hooks_needed[NFPROTO_NUMPROTO][NF_MAX_HOOKS]; EXPORT_SYMBOL(nf_hooks_needed); @@ -62,27 +59,40 @@ EXPORT_SYMBOL(nf_hooks_needed); static DEFINE_MUTEX(nf_hook_mutex); -static struct list_head *find_nf_hook_list(const struct nf_hook_ops *reg) +static struct list_head *find_nf_hook_list(struct net *net, + const struct nf_hook_ops *reg) { struct list_head *nf_hook_list = NULL; if (reg->pf != NFPROTO_NETDEV) - nf_hook_list = &nf_hooks[reg->pf][reg->hooknum]; + nf_hook_list = &net->nf.hooks[reg->pf][reg->hooknum]; else if (reg->hooknum == NF_NETDEV_INGRESS) { #ifdef CONFIG_NETFILTER_INGRESS - if (reg->dev) + if (reg->dev && dev_net(reg->dev) == net) nf_hook_list = ®->dev->nf_hooks_ingress; #endif } return nf_hook_list; } -int nf_register_hook(struct nf_hook_ops *reg) +int nf_register_net_hook(struct net *net, const struct nf_hook_ops *reg) { struct list_head *nf_hook_list; - struct nf_hook_ops *elem; + struct nf_hook_ops *elem, *new; + + new = kzalloc(sizeof(*new), GFP_KERNEL); + if (!new) + return -ENOMEM; - nf_hook_list = find_nf_hook_list(reg); + new->hook = reg->hook; + new->dev = reg->dev; + new->owner = reg->owner; + new->priv = reg->priv; + new->pf = reg->pf; + new->hooknum = reg->hooknum; + new->priority = reg->priority; + + nf_hook_list = find_nf_hook_list(net, reg); if (!nf_hook_list) return -ENOENT; @@ -91,7 +101,7 @@ int nf_register_hook(struct nf_hook_ops *reg) if (reg->priority < elem->priority) break; } - list_add_rcu(®->list, elem->list.prev); + list_add_rcu(&new->list, elem->list.prev); mutex_unlock(&nf_hook_mutex); #ifdef CONFIG_NETFILTER_INGRESS if (reg->pf == NFPROTO_NETDEV && reg->hooknum == NF_NETDEV_INGRESS) @@ -102,13 +112,35 @@ int nf_register_hook(struct nf_hook_ops *reg) #endif return 0; } -EXPORT_SYMBOL(nf_register_hook); +EXPORT_SYMBOL(nf_register_net_hook); -void nf_unregister_hook(struct nf_hook_ops *reg) +void nf_unregister_net_hook(struct net *net, const struct nf_hook_ops *reg) { + struct list_head *nf_hook_list; + struct nf_hook_ops *elem; + + nf_hook_list = find_nf_hook_list(net, reg); + if (!nf_hook_list) + return; + mutex_lock(&nf_hook_mutex); - list_del_rcu(®->list); + list_for_each_entry(elem, nf_hook_list, list) { + if ((reg->hook == elem->hook) && + (reg->dev == elem->dev) && + (reg->owner == elem->owner) && + (reg->priv == elem->priv) && + (reg->pf == elem->pf) && + (reg->hooknum == elem->hooknum) && + (reg->priority == elem->priority)) { + list_del_rcu(&elem->list); + break; + } + } mutex_unlock(&nf_hook_mutex); + if (&elem->list == nf_hook_list) { + WARN(1, "nf_unregister_net_hook: hook not found!\n"); + return; + } #ifdef CONFIG_NETFILTER_INGRESS if (reg->pf == NFPROTO_NETDEV && reg->hooknum == NF_NETDEV_INGRESS) net_dec_ingress_queue(); @@ -117,7 +149,77 @@ void nf_unregister_hook(struct nf_hook_ops *reg) static_key_slow_dec(&nf_hooks_needed[reg->pf][reg->hooknum]); #endif synchronize_net(); - nf_queue_nf_hook_drop(reg); + nf_queue_nf_hook_drop(elem); + kfree(elem); +} +EXPORT_SYMBOL(nf_unregister_net_hook); + +int nf_register_net_hooks(struct net *net, const struct nf_hook_ops *reg, + unsigned int n) +{ + unsigned int i; + int err = 0; + + for (i = 0; i < n; i++) { + err = nf_register_net_hook(net, ®[i]); + if (err) + goto err; + } + return err; + +err: + if (i > 0) + nf_unregister_net_hooks(net, reg, i); + return err; +} +EXPORT_SYMBOL(nf_register_net_hooks); + +void nf_unregister_net_hooks(struct net *net, const struct nf_hook_ops *reg, + unsigned int n) +{ + while (n-- > 0) + nf_unregister_net_hook(net, ®[n]); +} +EXPORT_SYMBOL(nf_unregister_net_hooks); + +static LIST_HEAD(nf_hook_list); + +int nf_register_hook(struct nf_hook_ops *reg) +{ + struct net *net, *last; + int ret; + + rtnl_lock(); + for_each_net(net) { + ret = nf_register_net_hook(net, reg); + if (ret && ret != -ENOENT) + goto rollback; + } + list_add_tail(®->list, &nf_hook_list); + rtnl_unlock(); + + return 0; +rollback: + last = net; + for_each_net(net) { + if (net == last) + break; + nf_unregister_net_hook(net, reg); + } + rtnl_unlock(); + return ret; +} +EXPORT_SYMBOL(nf_register_hook); + +void nf_unregister_hook(struct nf_hook_ops *reg) +{ + struct net *net; + + rtnl_lock(); + list_del(®->list); + for_each_net(net) + nf_unregister_net_hook(net, reg); + rtnl_unlock(); } EXPORT_SYMBOL(nf_unregister_hook); @@ -294,8 +396,46 @@ void (*nf_nat_decode_session_hook)(struct sk_buff *, struct flowi *); EXPORT_SYMBOL(nf_nat_decode_session_hook); #endif +static int nf_register_hook_list(struct net *net) +{ + struct nf_hook_ops *elem; + int ret; + + rtnl_lock(); + list_for_each_entry(elem, &nf_hook_list, list) { + ret = nf_register_net_hook(net, elem); + if (ret && ret != -ENOENT) + goto out_undo; + } + rtnl_unlock(); + return 0; + +out_undo: + list_for_each_entry_continue_reverse(elem, &nf_hook_list, list) + nf_unregister_net_hook(net, elem); + rtnl_unlock(); + return ret; +} + +static void nf_unregister_hook_list(struct net *net) +{ + struct nf_hook_ops *elem; + + rtnl_lock(); + list_for_each_entry(elem, &nf_hook_list, list) + nf_unregister_net_hook(net, elem); + rtnl_unlock(); +} + static int __net_init netfilter_net_init(struct net *net) { + int i, h, ret; + + for (i = 0; i < ARRAY_SIZE(net->nf.hooks); i++) { + for (h = 0; h < NF_MAX_HOOKS; h++) + INIT_LIST_HEAD(&net->nf.hooks[i][h]); + } + #ifdef CONFIG_PROC_FS net->nf.proc_netfilter = proc_net_mkdir(net, "netfilter", net->proc_net); @@ -306,11 +446,16 @@ static int __net_init netfilter_net_init(struct net *net) return -ENOMEM; } #endif - return 0; + ret = nf_register_hook_list(net); + if (ret) + remove_proc_entry("netfilter", net->proc_net); + + return ret; } static void __net_exit netfilter_net_exit(struct net *net) { + nf_unregister_hook_list(net); remove_proc_entry("netfilter", net->proc_net); } @@ -321,12 +466,7 @@ static struct pernet_operations netfilter_net_ops = { int __init netfilter_init(void) { - int i, h, ret; - - for (i = 0; i < ARRAY_SIZE(nf_hooks); i++) { - for (h = 0; h < NF_MAX_HOOKS; h++) - INIT_LIST_HEAD(&nf_hooks[i][h]); - } + int ret; ret = register_pernet_subsys(&netfilter_net_ops); if (ret < 0) -- cgit v1.2.3-70-g09d2 From e7c8899f3e6f2830136cf6e115c4a55ce7a3920a Mon Sep 17 00:00:00 2001 From: Florian Westphal Date: Tue, 14 Jul 2015 17:51:07 +0200 Subject: netfilter: move tee_active to core This prepares for a TEE like expression in nftables. We want to ensure only one duplicate is sent, so both will use the same percpu variable to detect duplication. The other use case is detection of recursive call to xtables, but since we don't want dependency from nft to xtables core its put into core.c instead of the x_tables core. Signed-off-by: Florian Westphal Signed-off-by: Pablo Neira Ayuso --- include/linux/netfilter.h | 11 +++++++++++ net/netfilter/core.c | 3 +++ net/netfilter/xt_TEE.c | 13 ++++++------- 3 files changed, 20 insertions(+), 7 deletions(-) (limited to 'include/linux') diff --git a/include/linux/netfilter.h b/include/linux/netfilter.h index 9bbd110ec81b..e01da73ee6c4 100644 --- a/include/linux/netfilter.h +++ b/include/linux/netfilter.h @@ -390,4 +390,15 @@ extern struct nfq_ct_hook __rcu *nfq_ct_hook; static inline void nf_ct_attach(struct sk_buff *new, struct sk_buff *skb) {} #endif +/** + * nf_skb_duplicated - TEE target has sent a packet + * + * When a xtables target sends a packet, the OUTPUT and POSTROUTING + * hooks are traversed again, i.e. nft and xtables are invoked recursively. + * + * This is used by xtables TEE target to prevent the duplicated skb from + * being duplicated again. + */ +DECLARE_PER_CPU(bool, nf_skb_duplicated); + #endif /*__LINUX_NETFILTER_H*/ diff --git a/net/netfilter/core.c b/net/netfilter/core.c index 56ead1a1711c..6896cee8b733 100644 --- a/net/netfilter/core.c +++ b/net/netfilter/core.c @@ -34,6 +34,9 @@ EXPORT_SYMBOL(nf_afinfo); const struct nf_ipv6_ops __rcu *nf_ipv6_ops __read_mostly; EXPORT_SYMBOL_GPL(nf_ipv6_ops); +DEFINE_PER_CPU(bool, nf_skb_duplicated); +EXPORT_SYMBOL_GPL(nf_skb_duplicated); + int nf_register_afinfo(const struct nf_afinfo *afinfo) { mutex_lock(&afinfo_mutex); diff --git a/net/netfilter/xt_TEE.c b/net/netfilter/xt_TEE.c index a747eb475b68..8950e79c4dc9 100644 --- a/net/netfilter/xt_TEE.c +++ b/net/netfilter/xt_TEE.c @@ -37,7 +37,6 @@ struct xt_tee_priv { }; static const union nf_inet_addr tee_zero_address; -static DEFINE_PER_CPU(bool, tee_active); static struct net *pick_net(struct sk_buff *skb) { @@ -88,7 +87,7 @@ tee_tg4(struct sk_buff *skb, const struct xt_action_param *par) const struct xt_tee_tginfo *info = par->targinfo; struct iphdr *iph; - if (__this_cpu_read(tee_active)) + if (__this_cpu_read(nf_skb_duplicated)) return XT_CONTINUE; /* * Copy the skb, and route the copy. Will later return %XT_CONTINUE for @@ -125,9 +124,9 @@ tee_tg4(struct sk_buff *skb, const struct xt_action_param *par) ip_send_check(iph); if (tee_tg_route4(skb, info)) { - __this_cpu_write(tee_active, true); + __this_cpu_write(nf_skb_duplicated, true); ip_local_out(skb); - __this_cpu_write(tee_active, false); + __this_cpu_write(nf_skb_duplicated, false); } else { kfree_skb(skb); } @@ -170,7 +169,7 @@ tee_tg6(struct sk_buff *skb, const struct xt_action_param *par) { const struct xt_tee_tginfo *info = par->targinfo; - if (__this_cpu_read(tee_active)) + if (__this_cpu_read(nf_skb_duplicated)) return XT_CONTINUE; skb = pskb_copy(skb, GFP_ATOMIC); if (skb == NULL) @@ -188,9 +187,9 @@ tee_tg6(struct sk_buff *skb, const struct xt_action_param *par) --iph->hop_limit; } if (tee_tg_route6(skb, info)) { - __this_cpu_write(tee_active, true); + __this_cpu_write(nf_skb_duplicated, true); ip6_local_out(skb); - __this_cpu_write(tee_active, false); + __this_cpu_write(nf_skb_duplicated, false); } else { kfree_skb(skb); } -- cgit v1.2.3-70-g09d2 From 7814b6ec6d0d63444abdb49554166c8cfcbd063e Mon Sep 17 00:00:00 2001 From: Florian Westphal Date: Tue, 14 Jul 2015 17:51:08 +0200 Subject: netfilter: xtables: don't save/restore jumpstack offset In most cases there is no reentrancy into ip/ip6tables. For skbs sent by REJECT or SYNPROXY targets, there is one level of reentrancy, but its not relevant as those targets issue an absolute verdict, i.e. the jumpstack can be clobbered since its not used after the target issues absolute verdict (ACCEPT, DROP, STOLEN, etc). So the only special case where it is relevant is the TEE target, which returns XT_CONTINUE. This patch changes ip(6)_do_table to always use the jump stack starting from 0. When we detect we're operating on an skb sent via TEE (percpu nf_skb_duplicated is 1) we switch to an alternate stack to leave the original one alone. Since there is no TEE support for arptables, it doesn't need to test if tee is active. The jump stack overflow tests are no longer needed as well -- since ->stacksize is the largest call depth we cannot exceed it. A much better alternative to the external jumpstack would be to just declare a jumps[32] stack on the local stack frame, but that would mean we'd have to reject iptables rulesets that used to work before. Another alternative would be to start rejecting rulesets with a larger call depth, e.g. 1000 -- in this case it would be feasible to allocate the entire stack in the percpu area which would avoid one dereference. Signed-off-by: Florian Westphal Signed-off-by: Pablo Neira Ayuso --- include/linux/netfilter/x_tables.h | 1 - net/ipv4/netfilter/arp_tables.c | 11 +++-------- net/ipv4/netfilter/ip_tables.c | 37 ++++++++++++++++++++----------------- net/ipv6/netfilter/ip6_tables.c | 26 ++++++++++++++------------ net/netfilter/x_tables.c | 22 +++++++++++----------- 5 files changed, 48 insertions(+), 49 deletions(-) (limited to 'include/linux') diff --git a/include/linux/netfilter/x_tables.h b/include/linux/netfilter/x_tables.h index 286098a5667f..149284557ca7 100644 --- a/include/linux/netfilter/x_tables.h +++ b/include/linux/netfilter/x_tables.h @@ -222,7 +222,6 @@ struct xt_table_info { * @stacksize jumps (number of user chains) can possibly be made. */ unsigned int stacksize; - unsigned int __percpu *stackptr; void ***jumpstack; unsigned char entries[0] __aligned(8); diff --git a/net/ipv4/netfilter/arp_tables.c b/net/ipv4/netfilter/arp_tables.c index ae6d0a124213..969fdbe6fbb5 100644 --- a/net/ipv4/netfilter/arp_tables.c +++ b/net/ipv4/netfilter/arp_tables.c @@ -280,6 +280,9 @@ unsigned int arpt_do_table(struct sk_buff *skb, table_base = private->entries; jumpstack = (struct arpt_entry **)private->jumpstack[cpu]; + /* No TEE support for arptables, so no need to switch to alternate + * stack. All targets that reenter must return absolute verdicts. + */ e = get_entry(table_base, private->hook_entry[hook]); acpar.in = state->in; @@ -325,11 +328,6 @@ unsigned int arpt_do_table(struct sk_buff *skb, } if (table_base + v != arpt_next_entry(e)) { - - if (stackidx >= private->stacksize) { - verdict = NF_DROP; - break; - } jumpstack[stackidx++] = e; } @@ -337,9 +335,6 @@ unsigned int arpt_do_table(struct sk_buff *skb, continue; } - /* Targets which reenter must return - * abs. verdicts - */ acpar.target = t->u.kernel.target; acpar.targinfo = t->data; verdict = t->u.kernel.target->target(skb, &acpar); diff --git a/net/ipv4/netfilter/ip_tables.c b/net/ipv4/netfilter/ip_tables.c index 5e44b35a8de8..a2e4b018a254 100644 --- a/net/ipv4/netfilter/ip_tables.c +++ b/net/ipv4/netfilter/ip_tables.c @@ -296,12 +296,13 @@ ipt_do_table(struct sk_buff *skb, const char *indev, *outdev; const void *table_base; struct ipt_entry *e, **jumpstack; - unsigned int *stackptr, origptr, cpu; + unsigned int stackidx, cpu; const struct xt_table_info *private; struct xt_action_param acpar; unsigned int addend; /* Initialization */ + stackidx = 0; ip = ip_hdr(skb); indev = state->in ? state->in->name : nulldevname; outdev = state->out ? state->out->name : nulldevname; @@ -331,13 +332,20 @@ ipt_do_table(struct sk_buff *skb, smp_read_barrier_depends(); table_base = private->entries; jumpstack = (struct ipt_entry **)private->jumpstack[cpu]; - stackptr = per_cpu_ptr(private->stackptr, cpu); - origptr = *stackptr; + + /* Switch to alternate jumpstack if we're being invoked via TEE. + * TEE issues XT_CONTINUE verdict on original skb so we must not + * clobber the jumpstack. + * + * For recursion via REJECT or SYNPROXY the stack will be clobbered + * but it is no problem since absolute verdict is issued by these. + */ + jumpstack += private->stacksize * __this_cpu_read(nf_skb_duplicated); e = get_entry(table_base, private->hook_entry[hook]); - pr_debug("Entering %s(hook %u); sp at %u (UF %p)\n", - table->name, hook, origptr, + pr_debug("Entering %s(hook %u), UF %p\n", + table->name, hook, get_entry(table_base, private->underflow[hook])); do { @@ -383,28 +391,24 @@ ipt_do_table(struct sk_buff *skb, verdict = (unsigned int)(-v) - 1; break; } - if (*stackptr <= origptr) { + if (stackidx == 0) { e = get_entry(table_base, private->underflow[hook]); pr_debug("Underflow (this is normal) " "to %p\n", e); } else { - e = jumpstack[--*stackptr]; + e = jumpstack[--stackidx]; pr_debug("Pulled %p out from pos %u\n", - e, *stackptr); + e, stackidx); e = ipt_next_entry(e); } continue; } if (table_base + v != ipt_next_entry(e) && !(e->ip.flags & IPT_F_GOTO)) { - if (*stackptr >= private->stacksize) { - verdict = NF_DROP; - break; - } - jumpstack[(*stackptr)++] = e; + jumpstack[stackidx++] = e; pr_debug("Pushed %p into pos %u\n", - e, *stackptr - 1); + e, stackidx - 1); } e = get_entry(table_base, v); @@ -423,9 +427,8 @@ ipt_do_table(struct sk_buff *skb, /* Verdict */ break; } while (!acpar.hotdrop); - pr_debug("Exiting %s; resetting sp from %u to %u\n", - __func__, *stackptr, origptr); - *stackptr = origptr; + pr_debug("Exiting %s; sp at %u\n", __func__, stackidx); + xt_write_recseq_end(addend); local_bh_enable(); diff --git a/net/ipv6/netfilter/ip6_tables.c b/net/ipv6/netfilter/ip6_tables.c index baf032179918..531281f0ff86 100644 --- a/net/ipv6/netfilter/ip6_tables.c +++ b/net/ipv6/netfilter/ip6_tables.c @@ -324,12 +324,13 @@ ip6t_do_table(struct sk_buff *skb, const char *indev, *outdev; const void *table_base; struct ip6t_entry *e, **jumpstack; - unsigned int *stackptr, origptr, cpu; + unsigned int stackidx, cpu; const struct xt_table_info *private; struct xt_action_param acpar; unsigned int addend; /* Initialization */ + stackidx = 0; indev = state->in ? state->in->name : nulldevname; outdev = state->out ? state->out->name : nulldevname; /* We handle fragments by dealing with the first fragment as @@ -357,8 +358,15 @@ ip6t_do_table(struct sk_buff *skb, cpu = smp_processor_id(); table_base = private->entries; jumpstack = (struct ip6t_entry **)private->jumpstack[cpu]; - stackptr = per_cpu_ptr(private->stackptr, cpu); - origptr = *stackptr; + + /* Switch to alternate jumpstack if we're being invoked via TEE. + * TEE issues XT_CONTINUE verdict on original skb so we must not + * clobber the jumpstack. + * + * For recursion via REJECT or SYNPROXY the stack will be clobbered + * but it is no problem since absolute verdict is issued by these. + */ + jumpstack += private->stacksize * __this_cpu_read(nf_skb_duplicated); e = get_entry(table_base, private->hook_entry[hook]); @@ -406,20 +414,16 @@ ip6t_do_table(struct sk_buff *skb, verdict = (unsigned int)(-v) - 1; break; } - if (*stackptr <= origptr) + if (stackidx == 0) e = get_entry(table_base, private->underflow[hook]); else - e = ip6t_next_entry(jumpstack[--*stackptr]); + e = ip6t_next_entry(jumpstack[--stackidx]); continue; } if (table_base + v != ip6t_next_entry(e) && !(e->ipv6.flags & IP6T_F_GOTO)) { - if (*stackptr >= private->stacksize) { - verdict = NF_DROP; - break; - } - jumpstack[(*stackptr)++] = e; + jumpstack[stackidx++] = e; } e = get_entry(table_base, v); @@ -437,8 +441,6 @@ ip6t_do_table(struct sk_buff *skb, break; } while (!acpar.hotdrop); - *stackptr = origptr; - xt_write_recseq_end(addend); local_bh_enable(); diff --git a/net/netfilter/x_tables.c b/net/netfilter/x_tables.c index 4db7d60d42fa..154447e519ab 100644 --- a/net/netfilter/x_tables.c +++ b/net/netfilter/x_tables.c @@ -67,9 +67,6 @@ static const char *const xt_prefix[NFPROTO_NUMPROTO] = { [NFPROTO_IPV6] = "ip6", }; -/* Allow this many total (re)entries. */ -static const unsigned int xt_jumpstack_multiplier = 2; - /* Registration hooks for targets. */ int xt_register_target(struct xt_target *target) { @@ -688,8 +685,6 @@ void xt_free_table_info(struct xt_table_info *info) kvfree(info->jumpstack); } - free_percpu(info->stackptr); - kvfree(info); } EXPORT_SYMBOL(xt_free_table_info); @@ -737,10 +732,6 @@ static int xt_jumpstack_alloc(struct xt_table_info *i) unsigned int size; int cpu; - i->stackptr = alloc_percpu(unsigned int); - if (i->stackptr == NULL) - return -ENOMEM; - size = sizeof(void **) * nr_cpu_ids; if (size > PAGE_SIZE) i->jumpstack = vzalloc(size); @@ -753,8 +744,17 @@ static int xt_jumpstack_alloc(struct xt_table_info *i) if (i->stacksize == 0) return 0; - i->stacksize *= xt_jumpstack_multiplier; - size = sizeof(void *) * i->stacksize; + /* Jumpstack needs to be able to record two full callchains, one + * from the first rule set traversal, plus one table reentrancy + * via -j TEE without clobbering the callchain that brought us to + * TEE target. + * + * This is done by allocating two jumpstacks per cpu, on reentry + * the upper half of the stack is used. + * + * see the jumpstack setup in ipt_do_table() for more details. + */ + size = sizeof(void *) * i->stacksize * 2u; for_each_possible_cpu(cpu) { if (size > PAGE_SIZE) i->jumpstack[cpu] = vmalloc_node(size, -- cgit v1.2.3-70-g09d2 From dcebd3153e0a7749bb054ab73fa4e1ca33e9d3f9 Mon Sep 17 00:00:00 2001 From: Florian Westphal Date: Tue, 14 Jul 2015 17:51:09 +0200 Subject: netfilter: add and use jump label for xt_tee Don't bother testing if we need to switch to alternate stack unless TEE target is used. Suggested-by: Eric Dumazet Signed-off-by: Florian Westphal Signed-off-by: Pablo Neira Ayuso --- include/linux/netfilter/x_tables.h | 7 +++++++ net/ipv4/netfilter/ip_tables.c | 3 ++- net/ipv6/netfilter/ip6_tables.c | 3 ++- net/netfilter/x_tables.c | 3 +++ net/netfilter/xt_TEE.c | 2 ++ 5 files changed, 16 insertions(+), 2 deletions(-) (limited to 'include/linux') diff --git a/include/linux/netfilter/x_tables.h b/include/linux/netfilter/x_tables.h index 149284557ca7..b006b719183f 100644 --- a/include/linux/netfilter/x_tables.h +++ b/include/linux/netfilter/x_tables.h @@ -3,6 +3,7 @@ #include +#include #include /** @@ -280,6 +281,12 @@ void xt_free_table_info(struct xt_table_info *info); */ DECLARE_PER_CPU(seqcount_t, xt_recseq); +/* xt_tee_enabled - true if x_tables needs to handle reentrancy + * + * Enabled if current ip(6)tables ruleset has at least one -j TEE rule. + */ +extern struct static_key xt_tee_enabled; + /** * xt_write_recseq_begin - start of a write section * diff --git a/net/ipv4/netfilter/ip_tables.c b/net/ipv4/netfilter/ip_tables.c index a2e4b018a254..ff585bdbf850 100644 --- a/net/ipv4/netfilter/ip_tables.c +++ b/net/ipv4/netfilter/ip_tables.c @@ -340,7 +340,8 @@ ipt_do_table(struct sk_buff *skb, * For recursion via REJECT or SYNPROXY the stack will be clobbered * but it is no problem since absolute verdict is issued by these. */ - jumpstack += private->stacksize * __this_cpu_read(nf_skb_duplicated); + if (static_key_false(&xt_tee_enabled)) + jumpstack += private->stacksize * __this_cpu_read(nf_skb_duplicated); e = get_entry(table_base, private->hook_entry[hook]); diff --git a/net/ipv6/netfilter/ip6_tables.c b/net/ipv6/netfilter/ip6_tables.c index 531281f0ff86..ea6d105063c2 100644 --- a/net/ipv6/netfilter/ip6_tables.c +++ b/net/ipv6/netfilter/ip6_tables.c @@ -366,7 +366,8 @@ ip6t_do_table(struct sk_buff *skb, * For recursion via REJECT or SYNPROXY the stack will be clobbered * but it is no problem since absolute verdict is issued by these. */ - jumpstack += private->stacksize * __this_cpu_read(nf_skb_duplicated); + if (static_key_false(&xt_tee_enabled)) + jumpstack += private->stacksize * __this_cpu_read(nf_skb_duplicated); e = get_entry(table_base, private->hook_entry[hook]); diff --git a/net/netfilter/x_tables.c b/net/netfilter/x_tables.c index 154447e519ab..9b42b5ea6dcd 100644 --- a/net/netfilter/x_tables.c +++ b/net/netfilter/x_tables.c @@ -727,6 +727,9 @@ EXPORT_SYMBOL_GPL(xt_compat_unlock); DEFINE_PER_CPU(seqcount_t, xt_recseq); EXPORT_PER_CPU_SYMBOL_GPL(xt_recseq); +struct static_key xt_tee_enabled __read_mostly; +EXPORT_SYMBOL_GPL(xt_tee_enabled); + static int xt_jumpstack_alloc(struct xt_table_info *i) { unsigned int size; diff --git a/net/netfilter/xt_TEE.c b/net/netfilter/xt_TEE.c index 8950e79c4dc9..c5d6556dbc5e 100644 --- a/net/netfilter/xt_TEE.c +++ b/net/netfilter/xt_TEE.c @@ -251,6 +251,7 @@ static int tee_tg_check(const struct xt_tgchk_param *par) } else info->priv = NULL; + static_key_slow_inc(&xt_tee_enabled); return 0; } @@ -262,6 +263,7 @@ static void tee_tg_destroy(const struct xt_tgdtor_param *par) unregister_netdevice_notifier(&info->priv->notifier); kfree(info->priv); } + static_key_slow_dec(&xt_tee_enabled); } static struct xt_target tee_tg_reg[] __read_mostly = { -- cgit v1.2.3-70-g09d2 From 13c4a90119d28cfcb6b5bdd820c233b86c2b0237 Mon Sep 17 00:00:00 2001 From: Tycho Andersen Date: Sat, 13 Jun 2015 09:02:48 -0600 Subject: seccomp: add ptrace options for suspend/resume This patch is the first step in enabling checkpoint/restore of processes with seccomp enabled. One of the things CRIU does while dumping tasks is inject code into them via ptrace to collect information that is only available to the process itself. However, if we are in a seccomp mode where these processes are prohibited from making these syscalls, then what CRIU does kills the task. This patch adds a new ptrace option, PTRACE_O_SUSPEND_SECCOMP, that enables a task from the init user namespace which has CAP_SYS_ADMIN and no seccomp filters to disable (and re-enable) seccomp filters for another task so that they can be successfully dumped (and restored). We restrict the set of processes that can disable seccomp through ptrace because although today ptrace can be used to bypass seccomp, there is some discussion of closing this loophole in the future and we would like this patch to not depend on that behavior and be future proofed for when it is removed. Note that seccomp can be suspended before any filters are actually installed; this behavior is useful on criu restore, so that we can suspend seccomp, restore the filters, unmap our restore code from the restored process' address space, and then resume the task by detaching and have the filters resumed as well. v2 changes: * require that the tracer have no seccomp filters installed * drop TIF_NOTSC manipulation from the patch * change from ptrace command to a ptrace option and use this ptrace option as the flag to check. This means that as soon as the tracer detaches/dies, seccomp is re-enabled and as a corrollary that one can not disable seccomp across PTRACE_ATTACHs. v3 changes: * get rid of various #ifdefs everywhere * report more sensible errors when PTRACE_O_SUSPEND_SECCOMP is incorrectly used v4 changes: * get rid of may_suspend_seccomp() in favor of a capable() check in ptrace directly v5 changes: * check that seccomp is not enabled (or suspended) on the tracer Signed-off-by: Tycho Andersen CC: Will Drewry CC: Roland McGrath CC: Pavel Emelyanov CC: Serge E. Hallyn Acked-by: Oleg Nesterov Acked-by: Andy Lutomirski [kees: access seccomp.mode through seccomp_mode() instead] Signed-off-by: Kees Cook --- include/linux/ptrace.h | 1 + include/uapi/linux/ptrace.h | 6 ++++-- kernel/ptrace.c | 13 +++++++++++++ kernel/seccomp.c | 8 ++++++++ 4 files changed, 26 insertions(+), 2 deletions(-) (limited to 'include/linux') diff --git a/include/linux/ptrace.h b/include/linux/ptrace.h index 987a73a40ef8..061265f92876 100644 --- a/include/linux/ptrace.h +++ b/include/linux/ptrace.h @@ -34,6 +34,7 @@ #define PT_TRACE_SECCOMP PT_EVENT_FLAG(PTRACE_EVENT_SECCOMP) #define PT_EXITKILL (PTRACE_O_EXITKILL << PT_OPT_FLAG_SHIFT) +#define PT_SUSPEND_SECCOMP (PTRACE_O_SUSPEND_SECCOMP << PT_OPT_FLAG_SHIFT) /* single stepping state bits (used on ARM and PA-RISC) */ #define PT_SINGLESTEP_BIT 31 diff --git a/include/uapi/linux/ptrace.h b/include/uapi/linux/ptrace.h index cf1019e15f5b..a7a697986614 100644 --- a/include/uapi/linux/ptrace.h +++ b/include/uapi/linux/ptrace.h @@ -89,9 +89,11 @@ struct ptrace_peeksiginfo_args { #define PTRACE_O_TRACESECCOMP (1 << PTRACE_EVENT_SECCOMP) /* eventless options */ -#define PTRACE_O_EXITKILL (1 << 20) +#define PTRACE_O_EXITKILL (1 << 20) +#define PTRACE_O_SUSPEND_SECCOMP (1 << 21) -#define PTRACE_O_MASK (0x000000ff | PTRACE_O_EXITKILL) +#define PTRACE_O_MASK (\ + 0x000000ff | PTRACE_O_EXITKILL | PTRACE_O_SUSPEND_SECCOMP) #include diff --git a/kernel/ptrace.c b/kernel/ptrace.c index c8e0e050a36a..787320de68e0 100644 --- a/kernel/ptrace.c +++ b/kernel/ptrace.c @@ -556,6 +556,19 @@ static int ptrace_setoptions(struct task_struct *child, unsigned long data) if (data & ~(unsigned long)PTRACE_O_MASK) return -EINVAL; + if (unlikely(data & PTRACE_O_SUSPEND_SECCOMP)) { + if (!config_enabled(CONFIG_CHECKPOINT_RESTORE) || + !config_enabled(CONFIG_SECCOMP)) + return -EINVAL; + + if (!capable(CAP_SYS_ADMIN)) + return -EPERM; + + if (seccomp_mode(¤t->seccomp) != SECCOMP_MODE_DISABLED || + current->ptrace & PT_SUSPEND_SECCOMP) + return -EPERM; + } + /* Avoid intermediate state when all opts are cleared */ flags = child->ptrace; flags &= ~(PTRACE_O_MASK << PT_OPT_FLAG_SHIFT); diff --git a/kernel/seccomp.c b/kernel/seccomp.c index 980fd26da22e..645e42d6fa4d 100644 --- a/kernel/seccomp.c +++ b/kernel/seccomp.c @@ -590,6 +590,10 @@ void secure_computing_strict(int this_syscall) { int mode = current->seccomp.mode; + if (config_enabled(CONFIG_CHECKPOINT_RESTORE) && + unlikely(current->ptrace & PT_SUSPEND_SECCOMP)) + return; + if (mode == 0) return; else if (mode == SECCOMP_MODE_STRICT) @@ -691,6 +695,10 @@ u32 seccomp_phase1(struct seccomp_data *sd) int this_syscall = sd ? sd->nr : syscall_get_nr(current, task_pt_regs(current)); + if (config_enabled(CONFIG_CHECKPOINT_RESTORE) && + unlikely(current->ptrace & PT_SUSPEND_SECCOMP)) + return SECCOMP_PHASE1_OK; + switch (mode) { case SECCOMP_MODE_STRICT: __secure_computing_strict(this_syscall); /* may call do_exit */ -- cgit v1.2.3-70-g09d2 From 221272f97ca528048a577a3ff23d7774286ca5fd Mon Sep 17 00:00:00 2001 From: Kees Cook Date: Mon, 15 Jun 2015 15:29:16 -0700 Subject: seccomp: swap hard-coded zeros to defined name For clarity, if CONFIG_SECCOMP isn't defined, seccomp_mode() is returning "disabled". This makes that more clear, along with another 0-use, and results in no operational change. Signed-off-by: Kees Cook --- include/linux/seccomp.h | 2 +- kernel/seccomp.c | 2 +- 2 files changed, 2 insertions(+), 2 deletions(-) (limited to 'include/linux') diff --git a/include/linux/seccomp.h b/include/linux/seccomp.h index a19ddacdac30..f4265039a94c 100644 --- a/include/linux/seccomp.h +++ b/include/linux/seccomp.h @@ -78,7 +78,7 @@ static inline long prctl_set_seccomp(unsigned long arg2, char __user *arg3) static inline int seccomp_mode(struct seccomp *s) { - return 0; + return SECCOMP_MODE_DISABLED; } #endif /* CONFIG_SECCOMP */ diff --git a/kernel/seccomp.c b/kernel/seccomp.c index 645e42d6fa4d..383bd6caca81 100644 --- a/kernel/seccomp.c +++ b/kernel/seccomp.c @@ -594,7 +594,7 @@ void secure_computing_strict(int this_syscall) unlikely(current->ptrace & PT_SUSPEND_SECCOMP)) return; - if (mode == 0) + if (mode == SECCOMP_MODE_DISABLED) return; else if (mode == SECCOMP_MODE_STRICT) __secure_computing_strict(this_syscall); -- cgit v1.2.3-70-g09d2 From d5671f6bf2a672cfa72ef2cbac5cc53a4539690d Mon Sep 17 00:00:00 2001 From: Denys Vlasenko Date: Tue, 26 May 2015 17:48:34 +0200 Subject: rcu: Deinline rcu_read_lock_sched_held() if DEBUG_LOCK_ALLOC DEBUG_LOCK_ALLOC=y is not a production setting, but it is not very unusual either. Many developers routinely use kernels built with it enabled. Apart from being selected by hand, it is also auto-selected by PROVE_LOCKING "Lock debugging: prove locking correctness" and LOCK_STAT "Lock usage statistics" config options. LOCK STAT is necessary for "perf lock" to work. I wouldn't spend too much time optimizing it, but this particular function has a very large cost in code size: when it is deinlined, code size decreases by 830,000 bytes: text data bss dec hex filename 85674192 22294776 20627456 128596424 7aa39c8 vmlinux.before 84837612 22294424 20627456 127759492 79d7484 vmlinux (with this config: http://busybox.net/~vda/kernel_config) Signed-off-by: Denys Vlasenko CC: "Paul E. McKenney" CC: Josh Triplett CC: Mathieu Desnoyers CC: Lai Jiangshan CC: Tejun Heo CC: Oleg Nesterov CC: linux-kernel@vger.kernel.org Reviewed-by: Steven Rostedt Signed-off-by: Paul E. McKenney --- include/linux/rcupdate.h | 40 ++------------------------------------- kernel/rcu/update.c | 49 ++++++++++++++++++++++++++++++++++++++++++++++++ 2 files changed, 51 insertions(+), 38 deletions(-) (limited to 'include/linux') diff --git a/include/linux/rcupdate.h b/include/linux/rcupdate.h index 237f7b8d38ba..def6d45ad61c 100644 --- a/include/linux/rcupdate.h +++ b/include/linux/rcupdate.h @@ -469,46 +469,10 @@ int rcu_read_lock_bh_held(void); * If CONFIG_DEBUG_LOCK_ALLOC is selected, returns nonzero iff in an * RCU-sched read-side critical section. In absence of * CONFIG_DEBUG_LOCK_ALLOC, this assumes we are in an RCU-sched read-side - * critical section unless it can prove otherwise. Note that disabling - * of preemption (including disabling irqs) counts as an RCU-sched - * read-side critical section. This is useful for debug checks in functions - * that required that they be called within an RCU-sched read-side - * critical section. - * - * Check debug_lockdep_rcu_enabled() to prevent false positives during boot - * and while lockdep is disabled. - * - * Note that if the CPU is in the idle loop from an RCU point of - * view (ie: that we are in the section between rcu_idle_enter() and - * rcu_idle_exit()) then rcu_read_lock_held() returns false even if the CPU - * did an rcu_read_lock(). The reason for this is that RCU ignores CPUs - * that are in such a section, considering these as in extended quiescent - * state, so such a CPU is effectively never in an RCU read-side critical - * section regardless of what RCU primitives it invokes. This state of - * affairs is required --- we need to keep an RCU-free window in idle - * where the CPU may possibly enter into low power mode. This way we can - * notice an extended quiescent state to other CPUs that started a grace - * period. Otherwise we would delay any grace period as long as we run in - * the idle task. - * - * Similarly, we avoid claiming an SRCU read lock held if the current - * CPU is offline. + * critical section unless it can prove otherwise. */ #ifdef CONFIG_PREEMPT_COUNT -static inline int rcu_read_lock_sched_held(void) -{ - int lockdep_opinion = 0; - - if (!debug_lockdep_rcu_enabled()) - return 1; - if (!rcu_is_watching()) - return 0; - if (!rcu_lockdep_current_cpu_online()) - return 0; - if (debug_locks) - lockdep_opinion = lock_is_held(&rcu_sched_lock_map); - return lockdep_opinion || preempt_count() != 0 || irqs_disabled(); -} +int rcu_read_lock_sched_held(void); #else /* #ifdef CONFIG_PREEMPT_COUNT */ static inline int rcu_read_lock_sched_held(void) { diff --git a/kernel/rcu/update.c b/kernel/rcu/update.c index afaecb7a799a..fec5f48b8860 100644 --- a/kernel/rcu/update.c +++ b/kernel/rcu/update.c @@ -62,6 +62,55 @@ MODULE_ALIAS("rcupdate"); module_param(rcu_expedited, int, 0); +#if defined(CONFIG_DEBUG_LOCK_ALLOC) && defined(CONFIG_PREEMPT_COUNT) +/** + * rcu_read_lock_sched_held() - might we be in RCU-sched read-side critical section? + * + * If CONFIG_DEBUG_LOCK_ALLOC is selected, returns nonzero iff in an + * RCU-sched read-side critical section. In absence of + * CONFIG_DEBUG_LOCK_ALLOC, this assumes we are in an RCU-sched read-side + * critical section unless it can prove otherwise. Note that disabling + * of preemption (including disabling irqs) counts as an RCU-sched + * read-side critical section. This is useful for debug checks in functions + * that required that they be called within an RCU-sched read-side + * critical section. + * + * Check debug_lockdep_rcu_enabled() to prevent false positives during boot + * and while lockdep is disabled. + * + * Note that if the CPU is in the idle loop from an RCU point of + * view (ie: that we are in the section between rcu_idle_enter() and + * rcu_idle_exit()) then rcu_read_lock_held() returns false even if the CPU + * did an rcu_read_lock(). The reason for this is that RCU ignores CPUs + * that are in such a section, considering these as in extended quiescent + * state, so such a CPU is effectively never in an RCU read-side critical + * section regardless of what RCU primitives it invokes. This state of + * affairs is required --- we need to keep an RCU-free window in idle + * where the CPU may possibly enter into low power mode. This way we can + * notice an extended quiescent state to other CPUs that started a grace + * period. Otherwise we would delay any grace period as long as we run in + * the idle task. + * + * Similarly, we avoid claiming an SRCU read lock held if the current + * CPU is offline. + */ +int rcu_read_lock_sched_held(void) +{ + int lockdep_opinion = 0; + + if (!debug_lockdep_rcu_enabled()) + return 1; + if (!rcu_is_watching()) + return 0; + if (!rcu_lockdep_current_cpu_online()) + return 0; + if (debug_locks) + lockdep_opinion = lock_is_held(&rcu_sched_lock_map); + return lockdep_opinion || preempt_count() != 0 || irqs_disabled(); +} +EXPORT_SYMBOL(rcu_read_lock_sched_held); +#endif + #ifndef CONFIG_TINY_RCU static atomic_t rcu_expedited_nesting = -- cgit v1.2.3-70-g09d2 From 30bb6fb39e5c08b9db5bc592d6cbc9a5fc5e67a4 Mon Sep 17 00:00:00 2001 From: Geert Uytterhoeven Date: Mon, 15 Jun 2015 13:31:33 +0200 Subject: gpio: Remove double "base" in comment Signed-off-by: Geert Uytterhoeven Signed-off-by: Linus Walleij --- include/linux/gpio/driver.h | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) (limited to 'include/linux') diff --git a/include/linux/gpio/driver.h b/include/linux/gpio/driver.h index cc7ec129b329..c8393cd4d44f 100644 --- a/include/linux/gpio/driver.h +++ b/include/linux/gpio/driver.h @@ -45,7 +45,7 @@ struct seq_file; * @base: identifies the first GPIO number handled by this chip; * or, if negative during registration, requests dynamic ID allocation. * DEPRECATION: providing anything non-negative and nailing the base - * base offset of GPIO chips is deprecated. Please pass -1 as base to + * offset of GPIO chips is deprecated. Please pass -1 as base to * let gpiolib select the chip base in all possible cases. We want to * get rid of the static GPIO number space in the long run. * @ngpio: the number of GPIOs handled by this controller; the last GPIO -- cgit v1.2.3-70-g09d2 From d746d707a8b1421a4ba46b497cb5d59e20161645 Mon Sep 17 00:00:00 2001 From: Anuradha Karuppiah Date: Tue, 14 Jul 2015 13:43:19 -0700 Subject: net core: Add protodown support. This patch introduces the proto_down flag that can be used by user space applications to notify switch drivers that errors have been detected on the device. The switch driver can react to protodown notification by doing a phys down on the associated switch port. Signed-off-by: Anuradha Karuppiah Signed-off-by: Andy Gospodarek Signed-off-by: Roopa Prabhu Signed-off-by: Wilson Kok Signed-off-by: David S. Miller --- include/linux/netdevice.h | 14 ++++++++++++++ net/core/dev.c | 20 ++++++++++++++++++++ net/core/net-sysfs.c | 14 ++++++++++++++ 3 files changed, 48 insertions(+) (limited to 'include/linux') diff --git a/include/linux/netdevice.h b/include/linux/netdevice.h index e20979dfd6a9..45cfd797eb77 100644 --- a/include/linux/netdevice.h +++ b/include/linux/netdevice.h @@ -1041,6 +1041,12 @@ typedef u16 (*select_queue_fallback_t)(struct net_device *dev, * TX queue. * int (*ndo_get_iflink)(const struct net_device *dev); * Called to get the iflink value of this device. + * void (*ndo_change_proto_down)(struct net_device *dev, + * bool proto_down); + * This function is used to pass protocol port error state information + * to the switch driver. The switch driver can react to the proto_down + * by doing a phys down on the associated switch port. + * */ struct net_device_ops { int (*ndo_init)(struct net_device *dev); @@ -1211,6 +1217,8 @@ struct net_device_ops { int queue_index, u32 maxrate); int (*ndo_get_iflink)(const struct net_device *dev); + int (*ndo_change_proto_down)(struct net_device *dev, + bool proto_down); }; /** @@ -1502,6 +1510,10 @@ enum netdev_priv_flags { * * @qdisc_tx_busylock: XXX: need comments on this one * + * @proto_down: protocol port state information can be sent to the + * switch driver and used to set the phys state of the + * switch port. + * * FIXME: cleanup struct net_device such that network protocol info * moves out. */ @@ -1762,6 +1774,7 @@ struct net_device { #endif struct phy_device *phydev; struct lock_class_key *qdisc_tx_busylock; + bool proto_down; }; #define to_net_dev(d) container_of(d, struct net_device, dev) @@ -2982,6 +2995,7 @@ int dev_get_phys_port_id(struct net_device *dev, struct netdev_phys_item_id *ppid); int dev_get_phys_port_name(struct net_device *dev, char *name, size_t len); +int dev_change_proto_down(struct net_device *dev, bool proto_down); struct sk_buff *validate_xmit_skb_list(struct sk_buff *skb, struct net_device *dev); struct sk_buff *dev_hard_start_xmit(struct sk_buff *skb, struct net_device *dev, struct netdev_queue *txq, int *ret); diff --git a/net/core/dev.c b/net/core/dev.c index 69445a33ace6..8810b6bbebfe 100644 --- a/net/core/dev.c +++ b/net/core/dev.c @@ -6074,6 +6074,26 @@ int dev_get_phys_port_name(struct net_device *dev, } EXPORT_SYMBOL(dev_get_phys_port_name); +/** + * dev_change_proto_down - update protocol port state information + * @dev: device + * @proto_down: new value + * + * This info can be used by switch drivers to set the phys state of the + * port. + */ +int dev_change_proto_down(struct net_device *dev, bool proto_down) +{ + const struct net_device_ops *ops = dev->netdev_ops; + + if (!ops->ndo_change_proto_down) + return -EOPNOTSUPP; + if (!netif_device_present(dev)) + return -ENODEV; + return ops->ndo_change_proto_down(dev, proto_down); +} +EXPORT_SYMBOL(dev_change_proto_down); + /** * dev_new_index - allocate an ifindex * @net: the applicable net namespace diff --git a/net/core/net-sysfs.c b/net/core/net-sysfs.c index 18b34d771ed4..194c1d03b2b3 100644 --- a/net/core/net-sysfs.c +++ b/net/core/net-sysfs.c @@ -404,6 +404,19 @@ static ssize_t group_store(struct device *dev, struct device_attribute *attr, NETDEVICE_SHOW(group, fmt_dec); static DEVICE_ATTR(netdev_group, S_IRUGO | S_IWUSR, group_show, group_store); +static int change_proto_down(struct net_device *dev, unsigned long proto_down) +{ + return dev_change_proto_down(dev, (bool) proto_down); +} + +static ssize_t proto_down_store(struct device *dev, + struct device_attribute *attr, + const char *buf, size_t len) +{ + return netdev_store(dev, attr, buf, len, change_proto_down); +} +NETDEVICE_SHOW_RW(proto_down, fmt_dec); + static ssize_t phys_port_id_show(struct device *dev, struct device_attribute *attr, char *buf) { @@ -501,6 +514,7 @@ static struct attribute *net_class_attrs[] = { &dev_attr_phys_port_id.attr, &dev_attr_phys_port_name.attr, &dev_attr_phys_switch_id.attr, + &dev_attr_proto_down.attr, NULL, }; ATTRIBUTE_GROUPS(net_class); -- cgit v1.2.3-70-g09d2 From 527b397a7a3647b8ba2eae2e7a12b237bf411476 Mon Sep 17 00:00:00 2001 From: Geert Uytterhoeven Date: Tue, 23 Jun 2015 15:48:02 +0200 Subject: gpio: em: Remove obsolete platform data support MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit Since commit 59032702ead90562 ("ARM: shmobile: Remove legacy platform devices from EMEV2 SoC code"), EMMA Mobile SoCs are only supported in generic DT-only ARM multi-platform builds. The driver doesn't need to use platform data anymore, hence remove platform data configuration. Signed-off-by: Geert Uytterhoeven Acked-by: Simon Horman Tested-by: Niklas Söderlund Signed-off-by: Linus Walleij --- drivers/gpio/gpio-em.c | 34 ++++++++-------------------------- include/linux/platform_data/gpio-em.h | 11 ----------- 2 files changed, 8 insertions(+), 37 deletions(-) delete mode 100644 include/linux/platform_data/gpio-em.h (limited to 'include/linux') diff --git a/drivers/gpio/gpio-em.c b/drivers/gpio/gpio-em.c index fbf287307c4c..a77f16c8d142 100644 --- a/drivers/gpio/gpio-em.c +++ b/drivers/gpio/gpio-em.c @@ -31,7 +31,6 @@ #include #include #include -#include struct em_gio_priv { void __iomem *base0; @@ -273,13 +272,12 @@ static const struct irq_domain_ops em_gio_irq_domain_ops = { static int em_gio_probe(struct platform_device *pdev) { - struct gpio_em_config pdata_dt; - struct gpio_em_config *pdata = dev_get_platdata(&pdev->dev); struct em_gio_priv *p; struct resource *io[2], *irq[2]; struct gpio_chip *gpio_chip; struct irq_chip *irq_chip; const char *name = dev_name(&pdev->dev); + unsigned int ngpios; int ret; p = devm_kzalloc(&pdev->dev, sizeof(*p), GFP_KERNEL); @@ -319,18 +317,10 @@ static int em_gio_probe(struct platform_device *pdev) goto err0; } - if (!pdata) { - memset(&pdata_dt, 0, sizeof(pdata_dt)); - pdata = &pdata_dt; - - if (of_property_read_u32(pdev->dev.of_node, "ngpios", - &pdata->number_of_pins)) { - dev_err(&pdev->dev, "Missing ngpios OF property\n"); - ret = -EINVAL; - goto err0; - } - - pdata->gpio_base = -1; + if (of_property_read_u32(pdev->dev.of_node, "ngpios", &ngpios)) { + dev_err(&pdev->dev, "Missing ngpios OF property\n"); + ret = -EINVAL; + goto err0; } gpio_chip = &p->gpio_chip; @@ -345,8 +335,8 @@ static int em_gio_probe(struct platform_device *pdev) gpio_chip->label = name; gpio_chip->dev = &pdev->dev; gpio_chip->owner = THIS_MODULE; - gpio_chip->base = pdata->gpio_base; - gpio_chip->ngpio = pdata->number_of_pins; + gpio_chip->base = -1; + gpio_chip->ngpio = ngpios; irq_chip = &p->irq_chip; irq_chip->name = name; @@ -357,9 +347,7 @@ static int em_gio_probe(struct platform_device *pdev) irq_chip->irq_release_resources = em_gio_irq_relres; irq_chip->flags = IRQCHIP_SKIP_SET_WAKE | IRQCHIP_MASK_ON_SUSPEND; - p->irq_domain = irq_domain_add_simple(pdev->dev.of_node, - pdata->number_of_pins, - pdata->irq_base, + p->irq_domain = irq_domain_add_simple(pdev->dev.of_node, ngpios, 0, &em_gio_irq_domain_ops, p); if (!p->irq_domain) { ret = -ENXIO; @@ -387,12 +375,6 @@ static int em_gio_probe(struct platform_device *pdev) goto err1; } - if (pdata->pctl_name) { - ret = gpiochip_add_pin_range(gpio_chip, pdata->pctl_name, 0, - gpio_chip->base, gpio_chip->ngpio); - if (ret < 0) - dev_warn(&pdev->dev, "failed to add pin range\n"); - } return 0; err1: diff --git a/include/linux/platform_data/gpio-em.h b/include/linux/platform_data/gpio-em.h deleted file mode 100644 index 7c5a519d2dcd..000000000000 --- a/include/linux/platform_data/gpio-em.h +++ /dev/null @@ -1,11 +0,0 @@ -#ifndef __GPIO_EM_H__ -#define __GPIO_EM_H__ - -struct gpio_em_config { - unsigned int gpio_base; - unsigned int irq_base; - unsigned int number_of_pins; - const char *pctl_name; -}; - -#endif /* __GPIO_EM_H__ */ -- cgit v1.2.3-70-g09d2 From 9cf705de06a27cc99874626c9717b32e9874b3bb Mon Sep 17 00:00:00 2001 From: Tony Lindgren Date: Thu, 16 Jul 2015 01:55:57 -0700 Subject: ARM: OMAP2+: Add support for initializing dm814x clocks Let's add a minimal clocks for dm814x to get it booted. This is mostly a placeholder and relies on the PLLs being on from the bootloader. Note that the divider clocks work the same way as on dm816x and am335x. Cc: Matthijs van Duin Cc: Mike Turquette Cc: Paul Walmsley Cc: Stephen Boyd Cc: Tero Kristo Acked-by: Stephen Boyd Signed-off-by: Tony Lindgren --- arch/arm/mach-omap2/io.c | 4 ++-- drivers/clk/ti/Makefile | 2 +- drivers/clk/ti/clk-814x.c | 31 +++++++++++++++++++++++++++++++ drivers/clk/ti/clk-816x.c | 2 +- include/linux/clk/ti.h | 3 ++- 5 files changed, 37 insertions(+), 5 deletions(-) create mode 100644 drivers/clk/ti/clk-814x.c (limited to 'include/linux') diff --git a/arch/arm/mach-omap2/io.c b/arch/arm/mach-omap2/io.c index 6779a9ff0d10..596af73c7549 100644 --- a/arch/arm/mach-omap2/io.c +++ b/arch/arm/mach-omap2/io.c @@ -558,7 +558,7 @@ void __init ti814x_init_early(void) ti81xx_hwmod_init(); omap_hwmod_init_postsetup(); if (of_have_populated_dt()) - omap_clk_soc_init = ti81xx_dt_clk_init; + omap_clk_soc_init = dm814x_dt_clk_init; } void __init ti816x_init_early(void) @@ -575,7 +575,7 @@ void __init ti816x_init_early(void) ti81xx_hwmod_init(); omap_hwmod_init_postsetup(); if (of_have_populated_dt()) - omap_clk_soc_init = ti81xx_dt_clk_init; + omap_clk_soc_init = dm816x_dt_clk_init; } #endif diff --git a/drivers/clk/ti/Makefile b/drivers/clk/ti/Makefile index 105ffd0f5e79..80b42884a0e9 100644 --- a/drivers/clk/ti/Makefile +++ b/drivers/clk/ti/Makefile @@ -2,7 +2,7 @@ obj-y += clk.o autoidle.o clockdomain.o clk-common = dpll.o composite.o divider.o gate.o \ fixed-factor.o mux.o apll.o obj-$(CONFIG_SOC_AM33XX) += $(clk-common) clk-33xx.o -obj-$(CONFIG_SOC_TI81XX) += $(clk-common) fapll.o clk-816x.o +obj-$(CONFIG_SOC_TI81XX) += $(clk-common) fapll.o clk-814x.o clk-816x.o obj-$(CONFIG_ARCH_OMAP2) += $(clk-common) interface.o clk-2xxx.o obj-$(CONFIG_ARCH_OMAP3) += $(clk-common) interface.o \ clk-3xxx.o diff --git a/drivers/clk/ti/clk-814x.c b/drivers/clk/ti/clk-814x.c new file mode 100644 index 000000000000..d490d427cc20 --- /dev/null +++ b/drivers/clk/ti/clk-814x.c @@ -0,0 +1,31 @@ +/* + * This program is free software; you can redistribute it and/or + * modify it under the terms of the GNU General Public License as + * published by the Free Software Foundation version 2. + */ + +#include +#include +#include + +static struct ti_dt_clk dm814_clks[] = { + DT_CLK(NULL, "devosc_ck", "devosc_ck"), + DT_CLK(NULL, "mpu_ck", "mpu_ck"), + DT_CLK(NULL, "sysclk4_ck", "sysclk4_ck"), + DT_CLK(NULL, "sysclk6_ck", "sysclk6_ck"), + DT_CLK(NULL, "sysclk10_ck", "sysclk10_ck"), + DT_CLK(NULL, "sysclk18_ck", "sysclk18_ck"), + DT_CLK(NULL, "timer_sys_ck", "devosc_ck"), + DT_CLK(NULL, "cpsw_125mhz_gclk", "cpsw_125mhz_gclk"), + DT_CLK(NULL, "cpsw_cpts_rft_clk", "cpsw_cpts_rft_clk"), + { .node_name = NULL }, +}; + +int __init dm814x_dt_clk_init(void) +{ + ti_dt_clocks_register(dm814_clks); + omap2_clk_disable_autoidle_all(); + omap2_clk_enable_init_clocks(NULL, 0); + + return 0; +} diff --git a/drivers/clk/ti/clk-816x.c b/drivers/clk/ti/clk-816x.c index 9451e651a1ff..43d07456e78d 100644 --- a/drivers/clk/ti/clk-816x.c +++ b/drivers/clk/ti/clk-816x.c @@ -42,7 +42,7 @@ static const char *enable_init_clks[] = { "ddr_pll_clk3", }; -int __init ti81xx_dt_clk_init(void) +int __init dm816x_dt_clk_init(void) { ti_dt_clocks_register(dm816x_clks); omap2_clk_disable_autoidle_all(); diff --git a/include/linux/clk/ti.h b/include/linux/clk/ti.h index 79b76e13d904..1736e29cee1b 100644 --- a/include/linux/clk/ti.h +++ b/include/linux/clk/ti.h @@ -329,7 +329,8 @@ int ti_clk_add_component(struct device_node *node, struct clk_hw *hw, int type); int omap3430_dt_clk_init(void); int omap3630_dt_clk_init(void); int am35xx_dt_clk_init(void); -int ti81xx_dt_clk_init(void); +int dm814x_dt_clk_init(void); +int dm816x_dt_clk_init(void); int omap4xxx_dt_clk_init(void); int omap5xxx_dt_clk_init(void); int dra7xx_dt_clk_init(void); -- cgit v1.2.3-70-g09d2 From e1443d2849b146be4ed8d4ef89ae7e215aafaa5b Mon Sep 17 00:00:00 2001 From: Stephen Chandler Paul Date: Wed, 15 Jul 2015 10:20:17 -0700 Subject: Input: i8042 - add unmask_kbd_data option A big problem with the current i8042 debugging option is that it outputs data going to and from the keyboard by default. As a result, many dmesg logs uploaded by users will unintentionally contain sensitive information such as their password, as such it's probably a good idea not to output data coming from the keyboard unless specifically enabled by the user. Signed-off-by: Stephen Chandler Paul Reviewed-by: Andreas Mohr Reviewed-by: Benjamin Tissoires Signed-off-by: Dmitry Torokhov --- Documentation/kernel-parameters.txt | 4 ++++ drivers/input/serio/i8042.c | 43 +++++++++++++++++++++++++++++++++---- drivers/input/serio/i8042.h | 13 +++++++++++ drivers/input/serio/serio.c | 5 ++--- include/linux/serio.h | 2 ++ 5 files changed, 60 insertions(+), 7 deletions(-) (limited to 'include/linux') diff --git a/Documentation/kernel-parameters.txt b/Documentation/kernel-parameters.txt index bfcb1a62a7b4..fd0f7cd8e496 100644 --- a/Documentation/kernel-parameters.txt +++ b/Documentation/kernel-parameters.txt @@ -1274,6 +1274,10 @@ bytes respectively. Such letter suffixes can also be entirely omitted. , i8042.debug [HW] Toggle i8042 debug mode + i8042.unmask_kbd_data + [HW] Enable printing of interrupt data from the KBD port + (disabled by default, and as a pre-condition + requires that i8042.debug=1 be enabled) i8042.direct [HW] Put keyboard port into non-translated mode i8042.dumbkbd [HW] Pretend that controller can only read data from keyboard and cannot control its state diff --git a/drivers/input/serio/i8042.c b/drivers/input/serio/i8042.c index cb5ece77fd7d..c9c98f0ab284 100644 --- a/drivers/input/serio/i8042.c +++ b/drivers/input/serio/i8042.c @@ -88,6 +88,10 @@ MODULE_PARM_DESC(nopnp, "Do not use PNP to detect controller settings"); static bool i8042_debug; module_param_named(debug, i8042_debug, bool, 0600); MODULE_PARM_DESC(debug, "Turn i8042 debugging mode on and off"); + +static bool i8042_unmask_kbd_data; +module_param_named(unmask_kbd_data, i8042_unmask_kbd_data, bool, 0600); +MODULE_PARM_DESC(unmask_kbd_data, "Unconditional enable (may reveal sensitive data) of normally sanitize-filtered kbd data traffic debug log [pre-condition: i8042.debug=1 enabled]"); #endif static bool i8042_bypass_aux_irq_test; @@ -116,6 +120,7 @@ struct i8042_port { struct serio *serio; int irq; bool exists; + bool driver_bound; signed char mux; }; @@ -133,6 +138,7 @@ static bool i8042_kbd_irq_registered; static bool i8042_aux_irq_registered; static unsigned char i8042_suppress_kbd_ack; static struct platform_device *i8042_platform_device; +static struct notifier_block i8042_kbd_bind_notifier_block; static irqreturn_t i8042_interrupt(int irq, void *dev_id); static bool (*i8042_platform_filter)(unsigned char data, unsigned char str, @@ -528,10 +534,10 @@ static irqreturn_t i8042_interrupt(int irq, void *dev_id) port = &i8042_ports[port_no]; serio = port->exists ? port->serio : NULL; - dbg("%02x <- i8042 (interrupt, %d, %d%s%s)\n", - data, port_no, irq, - dfl & SERIO_PARITY ? ", bad parity" : "", - dfl & SERIO_TIMEOUT ? ", timeout" : ""); + filter_dbg(port->driver_bound, data, "<- i8042 (interrupt, %d, %d%s%s)\n", + port_no, irq, + dfl & SERIO_PARITY ? ", bad parity" : "", + dfl & SERIO_TIMEOUT ? ", timeout" : ""); filtered = i8042_filter(data, str, serio); @@ -1438,6 +1444,29 @@ static int __init i8042_setup_kbd(void) return error; } +static int i8042_kbd_bind_notifier(struct notifier_block *nb, + unsigned long action, void *data) +{ + struct device *dev = data; + struct serio *serio = to_serio_port(dev); + struct i8042_port *port = serio->port_data; + + if (serio != i8042_ports[I8042_KBD_PORT_NO].serio) + return 0; + + switch (action) { + case BUS_NOTIFY_BOUND_DRIVER: + port->driver_bound = true; + break; + + case BUS_NOTIFY_UNBIND_DRIVER: + port->driver_bound = false; + break; + } + + return 0; +} + static int __init i8042_probe(struct platform_device *dev) { int error; @@ -1507,6 +1536,10 @@ static struct platform_driver i8042_driver = { .shutdown = i8042_shutdown, }; +static struct notifier_block i8042_kbd_bind_notifier_block = { + .notifier_call = i8042_kbd_bind_notifier, +}; + static int __init i8042_init(void) { struct platform_device *pdev; @@ -1528,6 +1561,7 @@ static int __init i8042_init(void) goto err_platform_exit; } + bus_register_notifier(&serio_bus, &i8042_kbd_bind_notifier_block); panic_blink = i8042_panic_blink; return 0; @@ -1543,6 +1577,7 @@ static void __exit i8042_exit(void) platform_driver_unregister(&i8042_driver); i8042_platform_exit(); + bus_unregister_notifier(&serio_bus, &i8042_kbd_bind_notifier_block); panic_blink = NULL; } diff --git a/drivers/input/serio/i8042.h b/drivers/input/serio/i8042.h index fc080beffedc..1db0a40c9bab 100644 --- a/drivers/input/serio/i8042.h +++ b/drivers/input/serio/i8042.h @@ -73,6 +73,17 @@ static unsigned long i8042_start_time; printk(KERN_DEBUG KBUILD_MODNAME ": [%d] " format, \ (int) (jiffies - i8042_start_time), ##arg); \ } while (0) + +#define filter_dbg(filter, data, format, args...) \ + do { \ + if (!i8042_debug) \ + break; \ + \ + if (!filter || i8042_unmask_kbd_data) \ + dbg("%02x " format, data, ##args); \ + else \ + dbg("** " format, ##args); \ + } while (0) #else #define dbg_init() do { } while (0) #define dbg(format, arg...) \ @@ -80,6 +91,8 @@ static unsigned long i8042_start_time; if (0) \ printk(KERN_DEBUG pr_fmt(format), ##arg); \ } while (0) + +#define filter_dbg(filter, data, format, args...) do { } while (0) #endif #endif /* _I8042_H */ diff --git a/drivers/input/serio/serio.c b/drivers/input/serio/serio.c index a05a5179da32..8f828975ab10 100644 --- a/drivers/input/serio/serio.c +++ b/drivers/input/serio/serio.c @@ -49,8 +49,6 @@ static DEFINE_MUTEX(serio_mutex); static LIST_HEAD(serio_list); -static struct bus_type serio_bus; - static void serio_add_port(struct serio *serio); static int serio_reconnect_port(struct serio *serio); static void serio_disconnect_port(struct serio *serio); @@ -1017,7 +1015,7 @@ irqreturn_t serio_interrupt(struct serio *serio, } EXPORT_SYMBOL(serio_interrupt); -static struct bus_type serio_bus = { +struct bus_type serio_bus = { .name = "serio", .drv_groups = serio_driver_groups, .match = serio_bus_match, @@ -1029,6 +1027,7 @@ static struct bus_type serio_bus = { .pm = &serio_pm_ops, #endif }; +EXPORT_SYMBOL(serio_bus); static int __init serio_init(void) { diff --git a/include/linux/serio.h b/include/linux/serio.h index 9f779c7a2da4..df4ab5de1586 100644 --- a/include/linux/serio.h +++ b/include/linux/serio.h @@ -18,6 +18,8 @@ #include #include +extern struct bus_type serio_bus; + struct serio { void *port_data; -- cgit v1.2.3-70-g09d2 From 0afab670bda6f3c9980be9e6de0effcc2c6d456c Mon Sep 17 00:00:00 2001 From: Krzysztof Kozlowski Date: Wed, 15 Jul 2015 21:59:47 +0900 Subject: mfd/extcon: max77693: Remove unused extern declarations and max77693_dev members Clean up the max77693 private header file by removing: 1. Left-overs from previous way of interrupt handling (driver uses regmap_irq_chip). 2. Unused members of struct 'max77693_dev' related to interrupts in extcon driver. Signed-off-by: Krzysztof Kozlowski Acked-by: Chanwoo Choi Acked-by: Lee Jones Signed-off-by: Mark Brown --- drivers/extcon/extcon-max77693.c | 19 ------------------- include/linux/mfd/max77693-private.h | 8 -------- 2 files changed, 27 deletions(-) (limited to 'include/linux') diff --git a/drivers/extcon/extcon-max77693.c b/drivers/extcon/extcon-max77693.c index f4f3b3d53928..770db3a72a6a 100644 --- a/drivers/extcon/extcon-max77693.c +++ b/drivers/extcon/extcon-max77693.c @@ -1164,28 +1164,9 @@ static int max77693_muic_probe(struct platform_device *pdev) } for (i = 0; i < num_init_data; i++) { - enum max77693_irq_source irq_src - = MAX77693_IRQ_GROUP_NR; - regmap_write(info->max77693->regmap_muic, init_data[i].addr, init_data[i].data); - - switch (init_data[i].addr) { - case MAX77693_MUIC_REG_INTMASK1: - irq_src = MUIC_INT1; - break; - case MAX77693_MUIC_REG_INTMASK2: - irq_src = MUIC_INT2; - break; - case MAX77693_MUIC_REG_INTMASK3: - irq_src = MUIC_INT3; - break; - } - - if (irq_src < MAX77693_IRQ_GROUP_NR) - info->max77693->irq_masks_cur[irq_src] - = init_data[i].data; } if (pdata && pdata->muic_data) { diff --git a/include/linux/mfd/max77693-private.h b/include/linux/mfd/max77693-private.h index 51633ea6f910..ad67b8235a8d 100644 --- a/include/linux/mfd/max77693-private.h +++ b/include/linux/mfd/max77693-private.h @@ -547,18 +547,10 @@ struct max77693_dev { struct regmap_irq_chip_data *irq_data_muic; int irq; - int irq_gpio; - struct mutex irqlock; - int irq_masks_cur[MAX77693_IRQ_GROUP_NR]; - int irq_masks_cache[MAX77693_IRQ_GROUP_NR]; }; enum max77693_types { TYPE_MAX77693, }; -extern int max77693_irq_init(struct max77693_dev *max77686); -extern void max77693_irq_exit(struct max77693_dev *max77686); -extern int max77693_irq_resume(struct max77693_dev *max77686); - #endif /* __LINUX_MFD_MAX77693_PRIV_H */ -- cgit v1.2.3-70-g09d2 From b3b58cee8aced52e3d7fdb387f40c782a4511198 Mon Sep 17 00:00:00 2001 From: Krzysztof Kozlowski Date: Wed, 15 Jul 2015 21:59:48 +0900 Subject: mfd: max77693: Store I2C device type as enum and add default unknown Store the device type (obtained from i2c_device_id) as an enum and add a default type of unknown to distinguish from case when this is not set at all. Signed-off-by: Krzysztof Kozlowski Acked-by: Lee Jones Signed-off-by: Mark Brown --- include/linux/mfd/max77693-private.h | 11 ++++++----- 1 file changed, 6 insertions(+), 5 deletions(-) (limited to 'include/linux') diff --git a/include/linux/mfd/max77693-private.h b/include/linux/mfd/max77693-private.h index ad67b8235a8d..e3c0afff38d3 100644 --- a/include/linux/mfd/max77693-private.h +++ b/include/linux/mfd/max77693-private.h @@ -529,13 +529,18 @@ enum max77693_irq_muic { MAX77693_MUIC_IRQ_NR, }; +enum max77693_types { + TYPE_MAX77693_UNKNOWN, + TYPE_MAX77693, +}; + struct max77693_dev { struct device *dev; struct i2c_client *i2c; /* 0xCC , PMIC, Charger, Flash LED */ struct i2c_client *muic; /* 0x4A , MUIC */ struct i2c_client *haptic; /* 0x90 , Haptic */ - int type; + enum max77693_types type; struct regmap *regmap; struct regmap *regmap_muic; @@ -549,8 +554,4 @@ struct max77693_dev { int irq; }; -enum max77693_types { - TYPE_MAX77693, -}; - #endif /* __LINUX_MFD_MAX77693_PRIV_H */ -- cgit v1.2.3-70-g09d2 From 61b305cd2ae747b8c9a2e4467dea2575a390162c Mon Sep 17 00:00:00 2001 From: Krzysztof Kozlowski Date: Wed, 15 Jul 2015 21:59:50 +0900 Subject: drivers: max77693: Move state container to common header This prepares for merging some of the drivers between max77693 and max77843 so the child MFD driver can be attached to any parent MFD main driver. Move the state container to common header file. Additionally add consistent 'i2c' prefixes to its members (of 'struct i2c_client' type). Signed-off-by: Krzysztof Kozlowski Acked-by: Sebastian Reichel Acked-by: Dmitry Torokhov Acked-by: Lee Jones Acked-by: Chanwoo Choi Acked-by: Jacek Anaszewski Signed-off-by: Mark Brown --- drivers/extcon/extcon-max77693.c | 3 ++- drivers/input/misc/max77693-haptic.c | 1 + drivers/leds/leds-max77693.c | 1 + drivers/mfd/max77693.c | 31 +++++++++++++------------ drivers/power/max77693_charger.c | 1 + drivers/regulator/max77693.c | 1 + include/linux/mfd/max77693-common.h | 44 ++++++++++++++++++++++++++++++++++++ include/linux/mfd/max77693-private.h | 25 -------------------- 8 files changed, 66 insertions(+), 41 deletions(-) create mode 100644 include/linux/mfd/max77693-common.h (limited to 'include/linux') diff --git a/drivers/extcon/extcon-max77693.c b/drivers/extcon/extcon-max77693.c index 770db3a72a6a..c7bb180cfff4 100644 --- a/drivers/extcon/extcon-max77693.c +++ b/drivers/extcon/extcon-max77693.c @@ -24,6 +24,7 @@ #include #include #include +#include #include #include #include @@ -1077,7 +1078,7 @@ static int max77693_muic_probe(struct platform_device *pdev) dev_dbg(&pdev->dev, "allocate register map\n"); } else { info->max77693->regmap_muic = devm_regmap_init_i2c( - info->max77693->muic, + info->max77693->i2c_muic, &max77693_muic_regmap_config); if (IS_ERR(info->max77693->regmap_muic)) { ret = PTR_ERR(info->max77693->regmap_muic); diff --git a/drivers/input/misc/max77693-haptic.c b/drivers/input/misc/max77693-haptic.c index 39e930c10ebb..4524499ea72f 100644 --- a/drivers/input/misc/max77693-haptic.c +++ b/drivers/input/misc/max77693-haptic.c @@ -24,6 +24,7 @@ #include #include #include +#include #include #define MAX_MAGNITUDE_SHIFT 16 diff --git a/drivers/leds/leds-max77693.c b/drivers/leds/leds-max77693.c index b8b0eec7b540..df348a06d8c7 100644 --- a/drivers/leds/leds-max77693.c +++ b/drivers/leds/leds-max77693.c @@ -13,6 +13,7 @@ #include #include +#include #include #include #include diff --git a/drivers/mfd/max77693.c b/drivers/mfd/max77693.c index cb14afa97e6f..67bc53fdc389 100644 --- a/drivers/mfd/max77693.c +++ b/drivers/mfd/max77693.c @@ -33,6 +33,7 @@ #include #include #include +#include #include #include #include @@ -193,22 +194,22 @@ static int max77693_i2c_probe(struct i2c_client *i2c, } else dev_info(max77693->dev, "device ID: 0x%x\n", reg_data); - max77693->muic = i2c_new_dummy(i2c->adapter, I2C_ADDR_MUIC); - if (!max77693->muic) { + max77693->i2c_muic = i2c_new_dummy(i2c->adapter, I2C_ADDR_MUIC); + if (!max77693->i2c_muic) { dev_err(max77693->dev, "Failed to allocate I2C device for MUIC\n"); return -ENODEV; } - i2c_set_clientdata(max77693->muic, max77693); + i2c_set_clientdata(max77693->i2c_muic, max77693); - max77693->haptic = i2c_new_dummy(i2c->adapter, I2C_ADDR_HAPTIC); - if (!max77693->haptic) { + max77693->i2c_haptic = i2c_new_dummy(i2c->adapter, I2C_ADDR_HAPTIC); + if (!max77693->i2c_haptic) { dev_err(max77693->dev, "Failed to allocate I2C device for Haptic\n"); ret = -ENODEV; goto err_i2c_haptic; } - i2c_set_clientdata(max77693->haptic, max77693); + i2c_set_clientdata(max77693->i2c_haptic, max77693); - max77693->regmap_haptic = devm_regmap_init_i2c(max77693->haptic, + max77693->regmap_haptic = devm_regmap_init_i2c(max77693->i2c_haptic, &max77693_regmap_haptic_config); if (IS_ERR(max77693->regmap_haptic)) { ret = PTR_ERR(max77693->regmap_haptic); @@ -222,7 +223,7 @@ static int max77693_i2c_probe(struct i2c_client *i2c, * instance of MUIC device when irq of max77693 is initialized * before call max77693-muic probe() function. */ - max77693->regmap_muic = devm_regmap_init_i2c(max77693->muic, + max77693->regmap_muic = devm_regmap_init_i2c(max77693->i2c_muic, &max77693_regmap_muic_config); if (IS_ERR(max77693->regmap_muic)) { ret = PTR_ERR(max77693->regmap_muic); @@ -255,7 +256,7 @@ static int max77693_i2c_probe(struct i2c_client *i2c, IRQF_ONESHOT | IRQF_SHARED | IRQF_TRIGGER_FALLING, 0, &max77693_charger_irq_chip, - &max77693->irq_data_charger); + &max77693->irq_data_chg); if (ret) { dev_err(max77693->dev, "failed to add irq chip: %d\n", ret); goto err_irq_charger; @@ -296,15 +297,15 @@ err_mfd: err_intsrc: regmap_del_irq_chip(max77693->irq, max77693->irq_data_muic); err_irq_muic: - regmap_del_irq_chip(max77693->irq, max77693->irq_data_charger); + regmap_del_irq_chip(max77693->irq, max77693->irq_data_chg); err_irq_charger: regmap_del_irq_chip(max77693->irq, max77693->irq_data_topsys); err_irq_topsys: regmap_del_irq_chip(max77693->irq, max77693->irq_data_led); err_regmap: - i2c_unregister_device(max77693->haptic); + i2c_unregister_device(max77693->i2c_haptic); err_i2c_haptic: - i2c_unregister_device(max77693->muic); + i2c_unregister_device(max77693->i2c_muic); return ret; } @@ -315,12 +316,12 @@ static int max77693_i2c_remove(struct i2c_client *i2c) mfd_remove_devices(max77693->dev); regmap_del_irq_chip(max77693->irq, max77693->irq_data_muic); - regmap_del_irq_chip(max77693->irq, max77693->irq_data_charger); + regmap_del_irq_chip(max77693->irq, max77693->irq_data_chg); regmap_del_irq_chip(max77693->irq, max77693->irq_data_topsys); regmap_del_irq_chip(max77693->irq, max77693->irq_data_led); - i2c_unregister_device(max77693->muic); - i2c_unregister_device(max77693->haptic); + i2c_unregister_device(max77693->i2c_muic); + i2c_unregister_device(max77693->i2c_haptic); return 0; } diff --git a/drivers/power/max77693_charger.c b/drivers/power/max77693_charger.c index 754879eb59f6..060cab5ae3aa 100644 --- a/drivers/power/max77693_charger.c +++ b/drivers/power/max77693_charger.c @@ -20,6 +20,7 @@ #include #include #include +#include #include #define MAX77693_CHARGER_NAME "max77693-charger" diff --git a/drivers/regulator/max77693.c b/drivers/regulator/max77693.c index 236851ab575a..c6ab440a74b7 100644 --- a/drivers/regulator/max77693.c +++ b/drivers/regulator/max77693.c @@ -29,6 +29,7 @@ #include #include #include +#include #include #include #include diff --git a/include/linux/mfd/max77693-common.h b/include/linux/mfd/max77693-common.h new file mode 100644 index 000000000000..7da4cc38e982 --- /dev/null +++ b/include/linux/mfd/max77693-common.h @@ -0,0 +1,44 @@ +/* + * Common data shared between Maxim 77693 and 77843 drivers + * + * Copyright (C) 2015 Samsung Electronics + * + * This program is free software; you can redistribute it and/or modify + * it under the terms of the GNU General Public License as published by + * the Free Software Foundation; either version 2 of the License, or + * (at your option) any later version. + */ + +#ifndef __LINUX_MFD_MAX77693_COMMON_H +#define __LINUX_MFD_MAX77693_COMMON_H + +enum max77693_types { + TYPE_MAX77693_UNKNOWN, + TYPE_MAX77693, +}; + +/* + * Shared also with max77843. + */ +struct max77693_dev { + struct device *dev; + struct i2c_client *i2c; /* 0xCC , PMIC, Charger, Flash LED */ + struct i2c_client *i2c_muic; /* 0x4A , MUIC */ + struct i2c_client *i2c_haptic; /* MAX77693: 0x90 , Haptic */ + + enum max77693_types type; + + struct regmap *regmap; + struct regmap *regmap_muic; + struct regmap *regmap_haptic; /* Only MAX77693 */ + + struct regmap_irq_chip_data *irq_data_led; + struct regmap_irq_chip_data *irq_data_topsys; + struct regmap_irq_chip_data *irq_data_chg; /* Only MAX77693 */ + struct regmap_irq_chip_data *irq_data_muic; + + int irq; +}; + + +#endif /* __LINUX_MFD_MAX77693_COMMON_H */ diff --git a/include/linux/mfd/max77693-private.h b/include/linux/mfd/max77693-private.h index e3c0afff38d3..8c4143c0c651 100644 --- a/include/linux/mfd/max77693-private.h +++ b/include/linux/mfd/max77693-private.h @@ -529,29 +529,4 @@ enum max77693_irq_muic { MAX77693_MUIC_IRQ_NR, }; -enum max77693_types { - TYPE_MAX77693_UNKNOWN, - TYPE_MAX77693, -}; - -struct max77693_dev { - struct device *dev; - struct i2c_client *i2c; /* 0xCC , PMIC, Charger, Flash LED */ - struct i2c_client *muic; /* 0x4A , MUIC */ - struct i2c_client *haptic; /* 0x90 , Haptic */ - - enum max77693_types type; - - struct regmap *regmap; - struct regmap *regmap_muic; - struct regmap *regmap_haptic; - - struct regmap_irq_chip_data *irq_data_led; - struct regmap_irq_chip_data *irq_data_topsys; - struct regmap_irq_chip_data *irq_data_charger; - struct regmap_irq_chip_data *irq_data_muic; - - int irq; -}; - #endif /* __LINUX_MFD_MAX77693_PRIV_H */ -- cgit v1.2.3-70-g09d2 From bc1aadc18621ccf93fb33ecbb847b422c354899d Mon Sep 17 00:00:00 2001 From: Krzysztof Kozlowski Date: Wed, 15 Jul 2015 21:59:51 +0900 Subject: drivers: max77843: Switch to common max77693 state container Switch to the same definition of state container as in MAX77693 drivers. This will allow usage of one regulator driver in both devices: MAX77693 and MAX77843. Signed-off-by: Krzysztof Kozlowski Acked-by: Dmitry Torokhov Acked-by: Lee Jones Acked-by: Chanwoo Choi Signed-off-by: Mark Brown --- drivers/extcon/extcon-max77843.c | 17 +++++++++-------- drivers/input/misc/max77843-haptic.c | 3 ++- drivers/mfd/max77843.c | 20 +++++++++++--------- drivers/regulator/max77843.c | 6 ++++-- include/linux/mfd/max77693-common.h | 5 +++++ include/linux/mfd/max77843-private.h | 20 -------------------- 6 files changed, 31 insertions(+), 40 deletions(-) (limited to 'include/linux') diff --git a/drivers/extcon/extcon-max77843.c b/drivers/extcon/extcon-max77843.c index fac2f1417a79..4dfe0a6337d8 100644 --- a/drivers/extcon/extcon-max77843.c +++ b/drivers/extcon/extcon-max77843.c @@ -15,6 +15,7 @@ #include #include #include +#include #include #include #include @@ -32,7 +33,7 @@ enum max77843_muic_status { struct max77843_muic_info { struct device *dev; - struct max77843 *max77843; + struct max77693_dev *max77843; struct extcon_dev *edev; struct mutex mutex; @@ -198,7 +199,7 @@ static const struct regmap_irq_chip max77843_muic_irq_chip = { static int max77843_muic_set_path(struct max77843_muic_info *info, u8 val, bool attached) { - struct max77843 *max77843 = info->max77843; + struct max77693_dev *max77843 = info->max77843; int ret = 0; unsigned int ctrl1, ctrl2; @@ -539,7 +540,7 @@ static void max77843_muic_irq_work(struct work_struct *work) { struct max77843_muic_info *info = container_of(work, struct max77843_muic_info, irq_work); - struct max77843 *max77843 = info->max77843; + struct max77693_dev *max77843 = info->max77843; int ret = 0; mutex_lock(&info->mutex); @@ -615,7 +616,7 @@ static void max77843_muic_detect_cable_wq(struct work_struct *work) { struct max77843_muic_info *info = container_of(to_delayed_work(work), struct max77843_muic_info, wq_detcable); - struct max77843 *max77843 = info->max77843; + struct max77693_dev *max77843 = info->max77843; int chg_type, adc, ret; bool attached; @@ -656,7 +657,7 @@ err_cable_wq: static int max77843_muic_set_debounce_time(struct max77843_muic_info *info, enum max77843_muic_adc_debounce_time time) { - struct max77843 *max77843 = info->max77843; + struct max77693_dev *max77843 = info->max77843; int ret; switch (time) { @@ -681,7 +682,7 @@ static int max77843_muic_set_debounce_time(struct max77843_muic_info *info, return 0; } -static int max77843_init_muic_regmap(struct max77843 *max77843) +static int max77843_init_muic_regmap(struct max77693_dev *max77843) { int ret; @@ -720,7 +721,7 @@ err_muic_i2c: static int max77843_muic_probe(struct platform_device *pdev) { - struct max77843 *max77843 = dev_get_drvdata(pdev->dev.parent); + struct max77693_dev *max77843 = dev_get_drvdata(pdev->dev.parent); struct max77843_muic_info *info; unsigned int id; int i, ret; @@ -821,7 +822,7 @@ err_muic_irq: static int max77843_muic_remove(struct platform_device *pdev) { struct max77843_muic_info *info = platform_get_drvdata(pdev); - struct max77843 *max77843 = info->max77843; + struct max77693_dev *max77843 = info->max77843; cancel_work_sync(&info->irq_work); regmap_del_irq_chip(max77843->irq, max77843->irq_data_muic); diff --git a/drivers/input/misc/max77843-haptic.c b/drivers/input/misc/max77843-haptic.c index dccbb465a055..30da81ab5a21 100644 --- a/drivers/input/misc/max77843-haptic.c +++ b/drivers/input/misc/max77843-haptic.c @@ -14,6 +14,7 @@ #include #include #include +#include #include #include #include @@ -243,7 +244,7 @@ static void max77843_haptic_close(struct input_dev *dev) static int max77843_haptic_probe(struct platform_device *pdev) { - struct max77843 *max77843 = dev_get_drvdata(pdev->dev.parent); + struct max77693_dev *max77843 = dev_get_drvdata(pdev->dev.parent); struct max77843_haptic *haptic; int error; diff --git a/drivers/mfd/max77843.c b/drivers/mfd/max77843.c index a354ac677ec7..c52162ea3d0a 100644 --- a/drivers/mfd/max77843.c +++ b/drivers/mfd/max77843.c @@ -17,6 +17,7 @@ #include #include #include +#include #include #include #include @@ -71,7 +72,7 @@ static const struct regmap_irq_chip max77843_irq_chip = { }; /* Charger and Charger regulator use same regmap. */ -static int max77843_chg_init(struct max77843 *max77843) +static int max77843_chg_init(struct max77693_dev *max77843) { int ret; @@ -101,7 +102,7 @@ err_chg_i2c: static int max77843_probe(struct i2c_client *i2c, const struct i2c_device_id *id) { - struct max77843 *max77843; + struct max77693_dev *max77843; unsigned int reg_data; int ret; @@ -113,6 +114,7 @@ static int max77843_probe(struct i2c_client *i2c, max77843->dev = &i2c->dev; max77843->i2c = i2c; max77843->irq = i2c->irq; + max77843->type = id->driver_data; max77843->regmap = devm_regmap_init_i2c(i2c, &max77843_regmap_config); @@ -123,7 +125,7 @@ static int max77843_probe(struct i2c_client *i2c, ret = regmap_add_irq_chip(max77843->regmap, max77843->irq, IRQF_TRIGGER_LOW | IRQF_ONESHOT | IRQF_SHARED, - 0, &max77843_irq_chip, &max77843->irq_data); + 0, &max77843_irq_chip, &max77843->irq_data_topsys); if (ret) { dev_err(&i2c->dev, "Failed to add TOPSYS IRQ chip\n"); return ret; @@ -164,18 +166,18 @@ static int max77843_probe(struct i2c_client *i2c, return 0; err_pmic_id: - regmap_del_irq_chip(max77843->irq, max77843->irq_data); + regmap_del_irq_chip(max77843->irq, max77843->irq_data_topsys); return ret; } static int max77843_remove(struct i2c_client *i2c) { - struct max77843 *max77843 = i2c_get_clientdata(i2c); + struct max77693_dev *max77843 = i2c_get_clientdata(i2c); mfd_remove_devices(max77843->dev); - regmap_del_irq_chip(max77843->irq, max77843->irq_data); + regmap_del_irq_chip(max77843->irq, max77843->irq_data_topsys); i2c_unregister_device(max77843->i2c_chg); @@ -188,7 +190,7 @@ static const struct of_device_id max77843_dt_match[] = { }; static const struct i2c_device_id max77843_id[] = { - { "max77843", }, + { "max77843", TYPE_MAX77843, }, { }, }; MODULE_DEVICE_TABLE(i2c, max77843_id); @@ -196,7 +198,7 @@ MODULE_DEVICE_TABLE(i2c, max77843_id); static int __maybe_unused max77843_suspend(struct device *dev) { struct i2c_client *i2c = container_of(dev, struct i2c_client, dev); - struct max77843 *max77843 = i2c_get_clientdata(i2c); + struct max77693_dev *max77843 = i2c_get_clientdata(i2c); disable_irq(max77843->irq); if (device_may_wakeup(dev)) @@ -208,7 +210,7 @@ static int __maybe_unused max77843_suspend(struct device *dev) static int __maybe_unused max77843_resume(struct device *dev) { struct i2c_client *i2c = container_of(dev, struct i2c_client, dev); - struct max77843 *max77843 = i2c_get_clientdata(i2c); + struct max77693_dev *max77843 = i2c_get_clientdata(i2c); if (device_may_wakeup(dev)) disable_irq_wake(max77843->irq); diff --git a/drivers/regulator/max77843.c b/drivers/regulator/max77843.c index f4fd0d3cfa6e..9926247aae6b 100644 --- a/drivers/regulator/max77843.c +++ b/drivers/regulator/max77843.c @@ -15,6 +15,7 @@ #include #include #include +#include #include #include @@ -130,7 +131,8 @@ static const struct regulator_desc max77843_supported_regulators[] = { }, }; -static struct regmap *max77843_get_regmap(struct max77843 *max77843, int reg_id) +static struct regmap *max77843_get_regmap(struct max77693_dev *max77843, + int reg_id) { switch (reg_id) { case MAX77843_SAFEOUT1: @@ -145,7 +147,7 @@ static struct regmap *max77843_get_regmap(struct max77843 *max77843, int reg_id) static int max77843_regulator_probe(struct platform_device *pdev) { - struct max77843 *max77843 = dev_get_drvdata(pdev->dev.parent); + struct max77693_dev *max77843 = dev_get_drvdata(pdev->dev.parent); struct regulator_config config = {}; int i; diff --git a/include/linux/mfd/max77693-common.h b/include/linux/mfd/max77693-common.h index 7da4cc38e982..095b121aa725 100644 --- a/include/linux/mfd/max77693-common.h +++ b/include/linux/mfd/max77693-common.h @@ -15,6 +15,9 @@ enum max77693_types { TYPE_MAX77693_UNKNOWN, TYPE_MAX77693, + TYPE_MAX77843, + + TYPE_MAX77693_NUM, }; /* @@ -25,12 +28,14 @@ struct max77693_dev { struct i2c_client *i2c; /* 0xCC , PMIC, Charger, Flash LED */ struct i2c_client *i2c_muic; /* 0x4A , MUIC */ struct i2c_client *i2c_haptic; /* MAX77693: 0x90 , Haptic */ + struct i2c_client *i2c_chg; /* MAX77843: 0xD2, Charger */ enum max77693_types type; struct regmap *regmap; struct regmap *regmap_muic; struct regmap *regmap_haptic; /* Only MAX77693 */ + struct regmap *regmap_chg; /* Only MAX77843 */ struct regmap_irq_chip_data *irq_data_led; struct regmap_irq_chip_data *irq_data_topsys; diff --git a/include/linux/mfd/max77843-private.h b/include/linux/mfd/max77843-private.h index 7178ace8379e..0121d9440340 100644 --- a/include/linux/mfd/max77843-private.h +++ b/include/linux/mfd/max77843-private.h @@ -431,24 +431,4 @@ enum max77843_irq_muic { #define MAX77843_REG_SAFEOUTCTRL_SAFEOUT2_MASK \ (0x3 << SAFEOUTCTRL_SAFEOUT2_SHIFT) -struct max77843 { - struct device *dev; - - struct i2c_client *i2c; - struct i2c_client *i2c_chg; - struct i2c_client *i2c_fuel; - struct i2c_client *i2c_muic; - - struct regmap *regmap; - struct regmap *regmap_chg; - struct regmap *regmap_fuel; - struct regmap *regmap_muic; - - struct regmap_irq_chip_data *irq_data; - struct regmap_irq_chip_data *irq_data_chg; - struct regmap_irq_chip_data *irq_data_fuel; - struct regmap_irq_chip_data *irq_data_muic; - - int irq; -}; #endif /* __MAX77843_H__ */ -- cgit v1.2.3-70-g09d2 From cceb433a1e2930301b33c79016eff147eb555cea Mon Sep 17 00:00:00 2001 From: Krzysztof Kozlowski Date: Wed, 15 Jul 2015 21:59:52 +0900 Subject: mfd/extcon: max77693: Rename defines to allow inclusion with max77843 Add MAX77693 prefix to some of the defines used in max77693 extcon driver so the max77693-private.h can be included simultaneously with max77843-private.h. Additionally use BIT() macro in header. Signed-off-by: Krzysztof Kozlowski Acked-by: Lee Jones Acked-by: Chanwoo Choi Signed-off-by: Mark Brown --- drivers/extcon/extcon-max77693.c | 72 +++++++++++++------------ include/linux/mfd/max77693-private.h | 102 +++++++++++++++++------------------ 2 files changed, 89 insertions(+), 85 deletions(-) (limited to 'include/linux') diff --git a/drivers/extcon/extcon-max77693.c b/drivers/extcon/extcon-max77693.c index c7bb180cfff4..35b9e118b2fb 100644 --- a/drivers/extcon/extcon-max77693.c +++ b/drivers/extcon/extcon-max77693.c @@ -43,7 +43,7 @@ static struct max77693_reg_data default_init_data[] = { { /* STATUS2 - [3]ChgDetRun */ .addr = MAX77693_MUIC_REG_STATUS2, - .data = STATUS2_CHGDETRUN_MASK, + .data = MAX77693_STATUS2_CHGDETRUN_MASK, }, { /* INTMASK1 - Unmask [3]ADC1KM,[0]ADCM */ .addr = MAX77693_MUIC_REG_INTMASK1, @@ -236,7 +236,7 @@ static int max77693_muic_set_debounce_time(struct max77693_muic_info *info, */ ret = regmap_write(info->max77693->regmap_muic, MAX77693_MUIC_REG_CTRL3, - time << CONTROL3_ADCDBSET_SHIFT); + time << MAX77693_CONTROL3_ADCDBSET_SHIFT); if (ret) { dev_err(info->dev, "failed to set ADC debounce time\n"); return ret; @@ -269,7 +269,7 @@ static int max77693_muic_set_path(struct max77693_muic_info *info, if (attached) ctrl1 = val; else - ctrl1 = CONTROL1_SW_OPEN; + ctrl1 = MAX77693_CONTROL1_SW_OPEN; ret = regmap_update_bits(info->max77693->regmap_muic, MAX77693_MUIC_REG_CTRL1, COMP_SW_MASK, ctrl1); @@ -279,13 +279,14 @@ static int max77693_muic_set_path(struct max77693_muic_info *info, } if (attached) - ctrl2 |= CONTROL2_CPEN_MASK; /* LowPwr=0, CPEn=1 */ + ctrl2 |= MAX77693_CONTROL2_CPEN_MASK; /* LowPwr=0, CPEn=1 */ else - ctrl2 |= CONTROL2_LOWPWR_MASK; /* LowPwr=1, CPEn=0 */ + ctrl2 |= MAX77693_CONTROL2_LOWPWR_MASK; /* LowPwr=1, CPEn=0 */ ret = regmap_update_bits(info->max77693->regmap_muic, MAX77693_MUIC_REG_CTRL2, - CONTROL2_LOWPWR_MASK | CONTROL2_CPEN_MASK, ctrl2); + MAX77693_CONTROL2_LOWPWR_MASK | MAX77693_CONTROL2_CPEN_MASK, + ctrl2); if (ret < 0) { dev_err(info->dev, "failed to update MUIC register\n"); return ret; @@ -327,8 +328,8 @@ static int max77693_muic_get_cable_type(struct max77693_muic_info *info, * Read ADC value to check cable type and decide cable state * according to cable type */ - adc = info->status[0] & STATUS1_ADC_MASK; - adc >>= STATUS1_ADC_SHIFT; + adc = info->status[0] & MAX77693_STATUS1_ADC_MASK; + adc >>= MAX77693_STATUS1_ADC_SHIFT; /* * Check current cable state/cable type and store cable type @@ -351,8 +352,8 @@ static int max77693_muic_get_cable_type(struct max77693_muic_info *info, * Read ADC value to check cable type and decide cable state * according to cable type */ - adc = info->status[0] & STATUS1_ADC_MASK; - adc >>= STATUS1_ADC_SHIFT; + adc = info->status[0] & MAX77693_STATUS1_ADC_MASK; + adc >>= MAX77693_STATUS1_ADC_SHIFT; /* * Check current cable state/cable type and store cable type @@ -367,13 +368,13 @@ static int max77693_muic_get_cable_type(struct max77693_muic_info *info, } else { *attached = true; - adclow = info->status[0] & STATUS1_ADCLOW_MASK; - adclow >>= STATUS1_ADCLOW_SHIFT; - adc1k = info->status[0] & STATUS1_ADC1K_MASK; - adc1k >>= STATUS1_ADC1K_SHIFT; + adclow = info->status[0] & MAX77693_STATUS1_ADCLOW_MASK; + adclow >>= MAX77693_STATUS1_ADCLOW_SHIFT; + adc1k = info->status[0] & MAX77693_STATUS1_ADC1K_MASK; + adc1k >>= MAX77693_STATUS1_ADC1K_SHIFT; - vbvolt = info->status[1] & STATUS2_VBVOLT_MASK; - vbvolt >>= STATUS2_VBVOLT_SHIFT; + vbvolt = info->status[1] & MAX77693_STATUS2_VBVOLT_MASK; + vbvolt >>= MAX77693_STATUS2_VBVOLT_SHIFT; /** * [0x1|VBVolt|ADCLow|ADC1K] @@ -398,8 +399,8 @@ static int max77693_muic_get_cable_type(struct max77693_muic_info *info, * Read charger type to check cable type and decide cable state * according to type of charger cable. */ - chg_type = info->status[1] & STATUS2_CHGTYP_MASK; - chg_type >>= STATUS2_CHGTYP_SHIFT; + chg_type = info->status[1] & MAX77693_STATUS2_CHGTYP_MASK; + chg_type >>= MAX77693_STATUS2_CHGTYP_SHIFT; if (chg_type == MAX77693_CHARGER_TYPE_NONE) { *attached = false; @@ -423,10 +424,10 @@ static int max77693_muic_get_cable_type(struct max77693_muic_info *info, * Read ADC value to check cable type and decide cable state * according to cable type */ - adc = info->status[0] & STATUS1_ADC_MASK; - adc >>= STATUS1_ADC_SHIFT; - chg_type = info->status[1] & STATUS2_CHGTYP_MASK; - chg_type >>= STATUS2_CHGTYP_SHIFT; + adc = info->status[0] & MAX77693_STATUS1_ADC_MASK; + adc >>= MAX77693_STATUS1_ADC_SHIFT; + chg_type = info->status[1] & MAX77693_STATUS2_CHGTYP_MASK; + chg_type >>= MAX77693_STATUS2_CHGTYP_SHIFT; if (adc == MAX77693_MUIC_ADC_OPEN && chg_type == MAX77693_CHARGER_TYPE_NONE) @@ -438,8 +439,8 @@ static int max77693_muic_get_cable_type(struct max77693_muic_info *info, * Read vbvolt field, if vbvolt is 1, * this cable is used for charging. */ - vbvolt = info->status[1] & STATUS2_VBVOLT_MASK; - vbvolt >>= STATUS2_VBVOLT_SHIFT; + vbvolt = info->status[1] & MAX77693_STATUS2_VBVOLT_MASK; + vbvolt >>= MAX77693_STATUS2_VBVOLT_SHIFT; cable_type = vbvolt; break; @@ -521,7 +522,8 @@ static int max77693_muic_dock_handler(struct max77693_muic_info *info, } /* Dock-Car/Desk/Audio, PATH:AUDIO */ - ret = max77693_muic_set_path(info, CONTROL1_SW_AUDIO, attached); + ret = max77693_muic_set_path(info, MAX77693_CONTROL1_SW_AUDIO, + attached); if (ret < 0) return ret; extcon_set_cable_state_(info->edev, dock_id, attached); @@ -586,14 +588,16 @@ static int max77693_muic_adc_ground_handler(struct max77693_muic_info *info) case MAX77693_MUIC_GND_USB_HOST: case MAX77693_MUIC_GND_USB_HOST_VB: /* USB_HOST, PATH: AP_USB */ - ret = max77693_muic_set_path(info, CONTROL1_SW_USB, attached); + ret = max77693_muic_set_path(info, MAX77693_CONTROL1_SW_USB, + attached); if (ret < 0) return ret; extcon_set_cable_state_(info->edev, EXTCON_USB_HOST, attached); break; case MAX77693_MUIC_GND_AV_CABLE_LOAD: /* Audio Video Cable with load, PATH:AUDIO */ - ret = max77693_muic_set_path(info, CONTROL1_SW_AUDIO, attached); + ret = max77693_muic_set_path(info, MAX77693_CONTROL1_SW_AUDIO, + attached); if (ret < 0) return ret; extcon_set_cable_state_(info->edev, EXTCON_USB, attached); @@ -616,7 +620,7 @@ static int max77693_muic_jig_handler(struct max77693_muic_info *info, int cable_type, bool attached) { int ret = 0; - u8 path = CONTROL1_SW_OPEN; + u8 path = MAX77693_CONTROL1_SW_OPEN; dev_info(info->dev, "external connector is %s (adc:0x%02x)\n", @@ -626,12 +630,12 @@ static int max77693_muic_jig_handler(struct max77693_muic_info *info, case MAX77693_MUIC_ADC_FACTORY_MODE_USB_OFF: /* ADC_JIG_USB_OFF */ case MAX77693_MUIC_ADC_FACTORY_MODE_USB_ON: /* ADC_JIG_USB_ON */ /* PATH:AP_USB */ - path = CONTROL1_SW_USB; + path = MAX77693_CONTROL1_SW_USB; break; case MAX77693_MUIC_ADC_FACTORY_MODE_UART_OFF: /* ADC_JIG_UART_OFF */ case MAX77693_MUIC_ADC_FACTORY_MODE_UART_ON: /* ADC_JIG_UART_ON */ /* PATH:AP_UART */ - path = CONTROL1_SW_UART; + path = MAX77693_CONTROL1_SW_UART; break; default: dev_err(info->dev, "failed to detect %s jig cable\n", @@ -1181,12 +1185,12 @@ static int max77693_muic_probe(struct platform_device *pdev) if (muic_pdata->path_uart) info->path_uart = muic_pdata->path_uart; else - info->path_uart = CONTROL1_SW_UART; + info->path_uart = MAX77693_CONTROL1_SW_UART; if (muic_pdata->path_usb) info->path_usb = muic_pdata->path_usb; else - info->path_usb = CONTROL1_SW_USB; + info->path_usb = MAX77693_CONTROL1_SW_USB; /* * Default delay time for detecting cable state @@ -1198,8 +1202,8 @@ static int max77693_muic_probe(struct platform_device *pdev) else delay_jiffies = msecs_to_jiffies(DELAY_MS_DEFAULT); } else { - info->path_usb = CONTROL1_SW_USB; - info->path_uart = CONTROL1_SW_UART; + info->path_usb = MAX77693_CONTROL1_SW_USB; + info->path_uart = MAX77693_CONTROL1_SW_UART; delay_jiffies = msecs_to_jiffies(DELAY_MS_DEFAULT); } diff --git a/include/linux/mfd/max77693-private.h b/include/linux/mfd/max77693-private.h index 8c4143c0c651..3c7a63b98ad6 100644 --- a/include/linux/mfd/max77693-private.h +++ b/include/linux/mfd/max77693-private.h @@ -310,30 +310,30 @@ enum max77693_muic_reg { #define INTMASK2_CHGTYP_MASK (1 << INTMASK2_CHGTYP_SHIFT) /* MAX77693 MUIC - STATUS1~3 Register */ -#define STATUS1_ADC_SHIFT (0) -#define STATUS1_ADCLOW_SHIFT (5) -#define STATUS1_ADCERR_SHIFT (6) -#define STATUS1_ADC1K_SHIFT (7) -#define STATUS1_ADC_MASK (0x1f << STATUS1_ADC_SHIFT) -#define STATUS1_ADCLOW_MASK (0x1 << STATUS1_ADCLOW_SHIFT) -#define STATUS1_ADCERR_MASK (0x1 << STATUS1_ADCERR_SHIFT) -#define STATUS1_ADC1K_MASK (0x1 << STATUS1_ADC1K_SHIFT) - -#define STATUS2_CHGTYP_SHIFT (0) -#define STATUS2_CHGDETRUN_SHIFT (3) -#define STATUS2_DCDTMR_SHIFT (4) -#define STATUS2_DXOVP_SHIFT (5) -#define STATUS2_VBVOLT_SHIFT (6) -#define STATUS2_VIDRM_SHIFT (7) -#define STATUS2_CHGTYP_MASK (0x7 << STATUS2_CHGTYP_SHIFT) -#define STATUS2_CHGDETRUN_MASK (0x1 << STATUS2_CHGDETRUN_SHIFT) -#define STATUS2_DCDTMR_MASK (0x1 << STATUS2_DCDTMR_SHIFT) -#define STATUS2_DXOVP_MASK (0x1 << STATUS2_DXOVP_SHIFT) -#define STATUS2_VBVOLT_MASK (0x1 << STATUS2_VBVOLT_SHIFT) -#define STATUS2_VIDRM_MASK (0x1 << STATUS2_VIDRM_SHIFT) - -#define STATUS3_OVP_SHIFT (2) -#define STATUS3_OVP_MASK (0x1 << STATUS3_OVP_SHIFT) +#define MAX77693_STATUS1_ADC_SHIFT 0 +#define MAX77693_STATUS1_ADCLOW_SHIFT 5 +#define MAX77693_STATUS1_ADCERR_SHIFT 6 +#define MAX77693_STATUS1_ADC1K_SHIFT 7 +#define MAX77693_STATUS1_ADC_MASK (0x1f << MAX77693_STATUS1_ADC_SHIFT) +#define MAX77693_STATUS1_ADCLOW_MASK BIT(MAX77693_STATUS1_ADCLOW_SHIFT) +#define MAX77693_STATUS1_ADCERR_MASK BIT(MAX77693_STATUS1_ADCERR_SHIFT) +#define MAX77693_STATUS1_ADC1K_MASK BIT(MAX77693_STATUS1_ADC1K_SHIFT) + +#define MAX77693_STATUS2_CHGTYP_SHIFT 0 +#define MAX77693_STATUS2_CHGDETRUN_SHIFT 3 +#define MAX77693_STATUS2_DCDTMR_SHIFT 4 +#define MAX77693_STATUS2_DXOVP_SHIFT 5 +#define MAX77693_STATUS2_VBVOLT_SHIFT 6 +#define MAX77693_STATUS2_VIDRM_SHIFT 7 +#define MAX77693_STATUS2_CHGTYP_MASK (0x7 << MAX77693_STATUS2_CHGTYP_SHIFT) +#define MAX77693_STATUS2_CHGDETRUN_MASK BIT(MAX77693_STATUS2_CHGDETRUN_SHIFT) +#define MAX77693_STATUS2_DCDTMR_MASK BIT(MAX77693_STATUS2_DCDTMR_SHIFT) +#define MAX77693_STATUS2_DXOVP_MASK BIT(MAX77693_STATUS2_DXOVP_SHIFT) +#define MAX77693_STATUS2_VBVOLT_MASK BIT(MAX77693_STATUS2_VBVOLT_SHIFT) +#define MAX77693_STATUS2_VIDRM_MASK BIT(MAX77693_STATUS2_VIDRM_SHIFT) + +#define MAX77693_STATUS3_OVP_SHIFT 2 +#define MAX77693_STATUS3_OVP_MASK BIT(MAX77693_STATUS3_OVP_SHIFT) /* MAX77693 CDETCTRL1~2 register */ #define CDETCTRL1_CHGDETEN_SHIFT (0) @@ -362,38 +362,38 @@ enum max77693_muic_reg { #define COMN1SW_MASK (0x7 << COMN1SW_SHIFT) #define COMP2SW_MASK (0x7 << COMP2SW_SHIFT) #define COMP_SW_MASK (COMP2SW_MASK | COMN1SW_MASK) -#define CONTROL1_SW_USB ((1 << COMP2SW_SHIFT) \ +#define MAX77693_CONTROL1_SW_USB ((1 << COMP2SW_SHIFT) \ | (1 << COMN1SW_SHIFT)) -#define CONTROL1_SW_AUDIO ((2 << COMP2SW_SHIFT) \ +#define MAX77693_CONTROL1_SW_AUDIO ((2 << COMP2SW_SHIFT) \ | (2 << COMN1SW_SHIFT)) -#define CONTROL1_SW_UART ((3 << COMP2SW_SHIFT) \ +#define MAX77693_CONTROL1_SW_UART ((3 << COMP2SW_SHIFT) \ | (3 << COMN1SW_SHIFT)) -#define CONTROL1_SW_OPEN ((0 << COMP2SW_SHIFT) \ +#define MAX77693_CONTROL1_SW_OPEN ((0 << COMP2SW_SHIFT) \ | (0 << COMN1SW_SHIFT)) -#define CONTROL2_LOWPWR_SHIFT (0) -#define CONTROL2_ADCEN_SHIFT (1) -#define CONTROL2_CPEN_SHIFT (2) -#define CONTROL2_SFOUTASRT_SHIFT (3) -#define CONTROL2_SFOUTORD_SHIFT (4) -#define CONTROL2_ACCDET_SHIFT (5) -#define CONTROL2_USBCPINT_SHIFT (6) -#define CONTROL2_RCPS_SHIFT (7) -#define CONTROL2_LOWPWR_MASK (0x1 << CONTROL2_LOWPWR_SHIFT) -#define CONTROL2_ADCEN_MASK (0x1 << CONTROL2_ADCEN_SHIFT) -#define CONTROL2_CPEN_MASK (0x1 << CONTROL2_CPEN_SHIFT) -#define CONTROL2_SFOUTASRT_MASK (0x1 << CONTROL2_SFOUTASRT_SHIFT) -#define CONTROL2_SFOUTORD_MASK (0x1 << CONTROL2_SFOUTORD_SHIFT) -#define CONTROL2_ACCDET_MASK (0x1 << CONTROL2_ACCDET_SHIFT) -#define CONTROL2_USBCPINT_MASK (0x1 << CONTROL2_USBCPINT_SHIFT) -#define CONTROL2_RCPS_MASK (0x1 << CONTROL2_RCPS_SHIFT) - -#define CONTROL3_JIGSET_SHIFT (0) -#define CONTROL3_BTLDSET_SHIFT (2) -#define CONTROL3_ADCDBSET_SHIFT (4) -#define CONTROL3_JIGSET_MASK (0x3 << CONTROL3_JIGSET_SHIFT) -#define CONTROL3_BTLDSET_MASK (0x3 << CONTROL3_BTLDSET_SHIFT) -#define CONTROL3_ADCDBSET_MASK (0x3 << CONTROL3_ADCDBSET_SHIFT) +#define MAX77693_CONTROL2_LOWPWR_SHIFT 0 +#define MAX77693_CONTROL2_ADCEN_SHIFT 1 +#define MAX77693_CONTROL2_CPEN_SHIFT 2 +#define MAX77693_CONTROL2_SFOUTASRT_SHIFT 3 +#define MAX77693_CONTROL2_SFOUTORD_SHIFT 4 +#define MAX77693_CONTROL2_ACCDET_SHIFT 5 +#define MAX77693_CONTROL2_USBCPINT_SHIFT 6 +#define MAX77693_CONTROL2_RCPS_SHIFT 7 +#define MAX77693_CONTROL2_LOWPWR_MASK BIT(MAX77693_CONTROL2_LOWPWR_SHIFT) +#define MAX77693_CONTROL2_ADCEN_MASK BIT(MAX77693_CONTROL2_ADCEN_SHIFT) +#define MAX77693_CONTROL2_CPEN_MASK BIT(MAX77693_CONTROL2_CPEN_SHIFT) +#define MAX77693_CONTROL2_SFOUTASRT_MASK BIT(MAX77693_CONTROL2_SFOUTASRT_SHIFT) +#define MAX77693_CONTROL2_SFOUTORD_MASK BIT(MAX77693_CONTROL2_SFOUTORD_SHIFT) +#define MAX77693_CONTROL2_ACCDET_MASK BIT(MAX77693_CONTROL2_ACCDET_SHIFT) +#define MAX77693_CONTROL2_USBCPINT_MASK BIT(MAX77693_CONTROL2_USBCPINT_SHIFT) +#define MAX77693_CONTROL2_RCPS_MASK BIT(MAX77693_CONTROL2_RCPS_SHIFT) + +#define MAX77693_CONTROL3_JIGSET_SHIFT 0 +#define MAX77693_CONTROL3_BTLDSET_SHIFT 2 +#define MAX77693_CONTROL3_ADCDBSET_SHIFT 4 +#define MAX77693_CONTROL3_JIGSET_MASK (0x3 << MAX77693_CONTROL3_JIGSET_SHIFT) +#define MAX77693_CONTROL3_BTLDSET_MASK (0x3 << MAX77693_CONTROL3_BTLDSET_SHIFT) +#define MAX77693_CONTROL3_ADCDBSET_MASK (0x3 << MAX77693_CONTROL3_ADCDBSET_SHIFT) /* Slave addr = 0x90: Haptic */ enum max77693_haptic_reg { -- cgit v1.2.3-70-g09d2 From 309a3e00a511a233acb25eec567a4b11c99d016a Mon Sep 17 00:00:00 2001 From: Krzysztof Kozlowski Date: Wed, 15 Jul 2015 21:59:53 +0900 Subject: mfd/extcon: max77843: Rename defines to allow inclusion with max77693 Add MAX77843_MUIC prefix to some of the defines used in max77843 extcon driver so the max77693-private.h can be included simultaneously with max77843-private.h. Signed-off-by: Krzysztof Kozlowski Acked-by: Lee Jones Acked-by: Chanwoo Choi Signed-off-by: Mark Brown --- drivers/extcon/extcon-max77843.c | 49 +++++++---- include/linux/mfd/max77843-private.h | 154 +++++++++++++++++------------------ 2 files changed, 109 insertions(+), 94 deletions(-) (limited to 'include/linux') diff --git a/drivers/extcon/extcon-max77843.c b/drivers/extcon/extcon-max77843.c index 4dfe0a6337d8..f652c4199870 100644 --- a/drivers/extcon/extcon-max77843.c +++ b/drivers/extcon/extcon-max77843.c @@ -206,11 +206,11 @@ static int max77843_muic_set_path(struct max77843_muic_info *info, if (attached) ctrl1 = val; else - ctrl1 = CONTROL1_SW_OPEN; + ctrl1 = MAX77843_MUIC_CONTROL1_SW_OPEN; ret = regmap_update_bits(max77843->regmap_muic, MAX77843_MUIC_REG_CONTROL1, - CONTROL1_COM_SW, ctrl1); + MAX77843_MUIC_CONTROL1_COM_SW, ctrl1); if (ret < 0) { dev_err(info->dev, "Cannot switch MUIC port\n"); return ret; @@ -244,7 +244,7 @@ static int max77843_muic_get_cable_type(struct max77843_muic_info *info, adc = info->status[MAX77843_MUIC_STATUS1] & MAX77843_MUIC_STATUS1_ADC_MASK; - adc >>= STATUS1_ADC_SHIFT; + adc >>= MAX77843_MUIC_STATUS1_ADC_SHIFT; switch (group) { case MAX77843_CABLE_GROUP_ADC: @@ -310,7 +310,7 @@ static int max77843_muic_get_cable_type(struct max77843_muic_info *info, /* Get VBVolt register bit */ gnd_type |= (info->status[MAX77843_MUIC_STATUS2] & MAX77843_MUIC_STATUS2_VBVOLT_MASK); - gnd_type >>= STATUS2_VBVOLT_SHIFT; + gnd_type >>= MAX77843_MUIC_STATUS2_VBVOLT_SHIFT; /* Offset of GND cable */ gnd_type |= MAX77843_MUIC_GND_USB_HOST; @@ -339,7 +339,9 @@ static int max77843_muic_adc_gnd_handler(struct max77843_muic_info *info) switch (gnd_cable_type) { case MAX77843_MUIC_GND_USB_HOST: case MAX77843_MUIC_GND_USB_HOST_VB: - ret = max77843_muic_set_path(info, CONTROL1_SW_USB, attached); + ret = max77843_muic_set_path(info, + MAX77843_MUIC_CONTROL1_SW_USB, + attached); if (ret < 0) return ret; @@ -347,7 +349,9 @@ static int max77843_muic_adc_gnd_handler(struct max77843_muic_info *info) break; case MAX77843_MUIC_GND_MHL_VB: case MAX77843_MUIC_GND_MHL: - ret = max77843_muic_set_path(info, CONTROL1_SW_OPEN, attached); + ret = max77843_muic_set_path(info, + MAX77843_MUIC_CONTROL1_SW_OPEN, + attached); if (ret < 0) return ret; @@ -366,7 +370,7 @@ static int max77843_muic_jig_handler(struct max77843_muic_info *info, int cable_type, bool attached) { int ret; - u8 path = CONTROL1_SW_OPEN; + u8 path = MAX77843_MUIC_CONTROL1_SW_OPEN; dev_dbg(info->dev, "external connector is %s (adc:0x%02x)\n", attached ? "attached" : "detached", cable_type); @@ -374,10 +378,10 @@ static int max77843_muic_jig_handler(struct max77843_muic_info *info, switch (cable_type) { case MAX77843_MUIC_ADC_FACTORY_MODE_USB_OFF: case MAX77843_MUIC_ADC_FACTORY_MODE_USB_ON: - path = CONTROL1_SW_USB; + path = MAX77843_MUIC_CONTROL1_SW_USB; break; case MAX77843_MUIC_ADC_FACTORY_MODE_UART_OFF: - path = CONTROL1_SW_UART; + path = MAX77843_MUIC_CONTROL1_SW_UART; break; default: return -EINVAL; @@ -475,14 +479,18 @@ static int max77843_muic_chg_handler(struct max77843_muic_info *info) switch (chg_type) { case MAX77843_MUIC_CHG_USB: - ret = max77843_muic_set_path(info, CONTROL1_SW_USB, attached); + ret = max77843_muic_set_path(info, + MAX77843_MUIC_CONTROL1_SW_USB, + attached); if (ret < 0) return ret; extcon_set_cable_state_(info->edev, EXTCON_USB, attached); break; case MAX77843_MUIC_CHG_DOWNSTREAM: - ret = max77843_muic_set_path(info, CONTROL1_SW_OPEN, attached); + ret = max77843_muic_set_path(info, + MAX77843_MUIC_CONTROL1_SW_OPEN, + attached); if (ret < 0) return ret; @@ -490,14 +498,18 @@ static int max77843_muic_chg_handler(struct max77843_muic_info *info) attached); break; case MAX77843_MUIC_CHG_DEDICATED: - ret = max77843_muic_set_path(info, CONTROL1_SW_OPEN, attached); + ret = max77843_muic_set_path(info, + MAX77843_MUIC_CONTROL1_SW_OPEN, + attached); if (ret < 0) return ret; extcon_set_cable_state_(info->edev, EXTCON_TA, attached); break; case MAX77843_MUIC_CHG_SPECIAL_500MA: - ret = max77843_muic_set_path(info, CONTROL1_SW_OPEN, attached); + ret = max77843_muic_set_path(info, + MAX77843_MUIC_CONTROL1_SW_OPEN, + attached); if (ret < 0) return ret; @@ -505,7 +517,9 @@ static int max77843_muic_chg_handler(struct max77843_muic_info *info) attached); break; case MAX77843_MUIC_CHG_SPECIAL_1A: - ret = max77843_muic_set_path(info, CONTROL1_SW_OPEN, attached); + ret = max77843_muic_set_path(info, + MAX77843_MUIC_CONTROL1_SW_OPEN, + attached); if (ret < 0) return ret; @@ -529,7 +543,8 @@ static int max77843_muic_chg_handler(struct max77843_muic_info *info) "failed to detect %s accessory (chg_type:0x%x)\n", attached ? "attached" : "detached", chg_type); - max77843_muic_set_path(info, CONTROL1_SW_OPEN, attached); + max77843_muic_set_path(info, MAX77843_MUIC_CONTROL1_SW_OPEN, + attached); return -EINVAL; } @@ -668,7 +683,7 @@ static int max77843_muic_set_debounce_time(struct max77843_muic_info *info, ret = regmap_update_bits(max77843->regmap_muic, MAX77843_MUIC_REG_CONTROL4, MAX77843_MUIC_CONTROL4_ADCDBSET_MASK, - time << CONTROL4_ADCDBSET_SHIFT); + time << MAX77843_MUIC_CONTROL4_ADCDBSET_SHIFT); if (ret < 0) { dev_err(info->dev, "Cannot write MUIC regmap\n"); return ret; @@ -769,7 +784,7 @@ static int max77843_muic_probe(struct platform_device *pdev) max77843_muic_set_debounce_time(info, MAX77843_DEBOUNCE_TIME_25MS); /* Set initial path for UART */ - max77843_muic_set_path(info, CONTROL1_SW_UART, true); + max77843_muic_set_path(info, MAX77843_MUIC_CONTROL1_SW_UART, true); /* Check revision number of MUIC device */ ret = regmap_read(max77843->regmap_muic, MAX77843_MUIC_REG_ID, &id); diff --git a/include/linux/mfd/max77843-private.h b/include/linux/mfd/max77843-private.h index 0121d9440340..c19303b0ccfd 100644 --- a/include/linux/mfd/max77843-private.h +++ b/include/linux/mfd/max77843-private.h @@ -318,62 +318,62 @@ enum max77843_irq_muic { MAX77843_INTSRCMASK_SYS_MASK | MAX77843_INTSRCMASK_CHGR_MASK) /* MAX77843 STATUS register*/ -#define STATUS1_ADC_SHIFT 0 -#define STATUS1_ADCERROR_SHIFT 6 -#define STATUS1_ADC1K_SHIFT 7 -#define STATUS2_CHGTYP_SHIFT 0 -#define STATUS2_CHGDETRUN_SHIFT 3 -#define STATUS2_DCDTMR_SHIFT 4 -#define STATUS2_DXOVP_SHIFT 5 -#define STATUS2_VBVOLT_SHIFT 6 -#define STATUS3_VBADC_SHIFT 0 -#define STATUS3_VDNMON_SHIFT 4 -#define STATUS3_DNRES_SHIFT 5 -#define STATUS3_MPNACK_SHIFT 6 - -#define MAX77843_MUIC_STATUS1_ADC_MASK (0x1f << STATUS1_ADC_SHIFT) -#define MAX77843_MUIC_STATUS1_ADCERROR_MASK BIT(STATUS1_ADCERROR_SHIFT) -#define MAX77843_MUIC_STATUS1_ADC1K_MASK BIT(STATUS1_ADC1K_SHIFT) -#define MAX77843_MUIC_STATUS2_CHGTYP_MASK (0x7 << STATUS2_CHGTYP_SHIFT) -#define MAX77843_MUIC_STATUS2_CHGDETRUN_MASK BIT(STATUS2_CHGDETRUN_SHIFT) -#define MAX77843_MUIC_STATUS2_DCDTMR_MASK BIT(STATUS2_DCDTMR_SHIFT) -#define MAX77843_MUIC_STATUS2_DXOVP_MASK BIT(STATUS2_DXOVP_SHIFT) -#define MAX77843_MUIC_STATUS2_VBVOLT_MASK BIT(STATUS2_VBVOLT_SHIFT) -#define MAX77843_MUIC_STATUS3_VBADC_MASK (0xf << STATUS3_VBADC_SHIFT) -#define MAX77843_MUIC_STATUS3_VDNMON_MASK BIT(STATUS3_VDNMON_SHIFT) -#define MAX77843_MUIC_STATUS3_DNRES_MASK BIT(STATUS3_DNRES_SHIFT) -#define MAX77843_MUIC_STATUS3_MPNACK_MASK BIT(STATUS3_MPNACK_SHIFT) +#define MAX77843_MUIC_STATUS1_ADC_SHIFT 0 +#define MAX77843_MUIC_STATUS1_ADCERROR_SHIFT 6 +#define MAX77843_MUIC_STATUS1_ADC1K_SHIFT 7 +#define MAX77843_MUIC_STATUS2_CHGTYP_SHIFT 0 +#define MAX77843_MUIC_STATUS2_CHGDETRUN_SHIFT 3 +#define MAX77843_MUIC_STATUS2_DCDTMR_SHIFT 4 +#define MAX77843_MUIC_STATUS2_DXOVP_SHIFT 5 +#define MAX77843_MUIC_STATUS2_VBVOLT_SHIFT 6 +#define MAX77843_MUIC_STATUS3_VBADC_SHIFT 0 +#define MAX77843_MUIC_STATUS3_VDNMON_SHIFT 4 +#define MAX77843_MUIC_STATUS3_DNRES_SHIFT 5 +#define MAX77843_MUIC_STATUS3_MPNACK_SHIFT 6 + +#define MAX77843_MUIC_STATUS1_ADC_MASK (0x1f << MAX77843_MUIC_STATUS1_ADC_SHIFT) +#define MAX77843_MUIC_STATUS1_ADCERROR_MASK BIT(MAX77843_MUIC_STATUS1_ADCERROR_SHIFT) +#define MAX77843_MUIC_STATUS1_ADC1K_MASK BIT(MAX77843_MUIC_STATUS1_ADC1K_SHIFT) +#define MAX77843_MUIC_STATUS2_CHGTYP_MASK (0x7 << MAX77843_MUIC_STATUS2_CHGTYP_SHIFT) +#define MAX77843_MUIC_STATUS2_CHGDETRUN_MASK BIT(MAX77843_MUIC_STATUS2_CHGDETRUN_SHIFT) +#define MAX77843_MUIC_STATUS2_DCDTMR_MASK BIT(MAX77843_MUIC_STATUS2_DCDTMR_SHIFT) +#define MAX77843_MUIC_STATUS2_DXOVP_MASK BIT(MAX77843_MUIC_STATUS2_DXOVP_SHIFT) +#define MAX77843_MUIC_STATUS2_VBVOLT_MASK BIT(MAX77843_MUIC_STATUS2_VBVOLT_SHIFT) +#define MAX77843_MUIC_STATUS3_VBADC_MASK (0xf << MAX77843_MUIC_STATUS3_VBADC_SHIFT) +#define MAX77843_MUIC_STATUS3_VDNMON_MASK BIT(MAX77843_MUIC_STATUS3_VDNMON_SHIFT) +#define MAX77843_MUIC_STATUS3_DNRES_MASK BIT(MAX77843_MUIC_STATUS3_DNRES_SHIFT) +#define MAX77843_MUIC_STATUS3_MPNACK_MASK BIT(MAX77843_MUIC_STATUS3_MPNACK_SHIFT) /* MAX77843 CONTROL register */ -#define CONTROL1_COMP1SW_SHIFT 0 -#define CONTROL1_COMP2SW_SHIFT 3 -#define CONTROL1_IDBEN_SHIFT 7 -#define CONTROL2_LOWPWR_SHIFT 0 -#define CONTROL2_ADCEN_SHIFT 1 -#define CONTROL2_CPEN_SHIFT 2 -#define CONTROL2_ACC_DET_SHIFT 5 -#define CONTROL2_USBCPINT_SHIFT 6 -#define CONTROL2_RCPS_SHIFT 7 -#define CONTROL3_JIGSET_SHIFT 0 -#define CONTROL4_ADCDBSET_SHIFT 0 -#define CONTROL4_USBAUTO_SHIFT 4 -#define CONTROL4_FCTAUTO_SHIFT 5 -#define CONTROL4_ADCMODE_SHIFT 6 - -#define MAX77843_MUIC_CONTROL1_COMP1SW_MASK (0x7 << CONTROL1_COMP1SW_SHIFT) -#define MAX77843_MUIC_CONTROL1_COMP2SW_MASK (0x7 << CONTROL1_COMP2SW_SHIFT) -#define MAX77843_MUIC_CONTROL1_IDBEN_MASK BIT(CONTROL1_IDBEN_SHIFT) -#define MAX77843_MUIC_CONTROL2_LOWPWR_MASK BIT(CONTROL2_LOWPWR_SHIFT) -#define MAX77843_MUIC_CONTROL2_ADCEN_MASK BIT(CONTROL2_ADCEN_SHIFT) -#define MAX77843_MUIC_CONTROL2_CPEN_MASK BIT(CONTROL2_CPEN_SHIFT) -#define MAX77843_MUIC_CONTROL2_ACC_DET_MASK BIT(CONTROL2_ACC_DET_SHIFT) -#define MAX77843_MUIC_CONTROL2_USBCPINT_MASK BIT(CONTROL2_USBCPINT_SHIFT) -#define MAX77843_MUIC_CONTROL2_RCPS_MASK BIT(CONTROL2_RCPS_SHIFT) -#define MAX77843_MUIC_CONTROL3_JIGSET_MASK (0x3 << CONTROL3_JIGSET_SHIFT) -#define MAX77843_MUIC_CONTROL4_ADCDBSET_MASK (0x3 << CONTROL4_ADCDBSET_SHIFT) -#define MAX77843_MUIC_CONTROL4_USBAUTO_MASK BIT(CONTROL4_USBAUTO_SHIFT) -#define MAX77843_MUIC_CONTROL4_FCTAUTO_MASK BIT(CONTROL4_FCTAUTO_SHIFT) -#define MAX77843_MUIC_CONTROL4_ADCMODE_MASK (0x3 << CONTROL4_ADCMODE_SHIFT) +#define MAX77843_MUIC_CONTROL1_COMP1SW_SHIFT 0 +#define MAX77843_MUIC_CONTROL1_COMP2SW_SHIFT 3 +#define MAX77843_MUIC_CONTROL1_IDBEN_SHIFT 7 +#define MAX77843_MUIC_CONTROL2_LOWPWR_SHIFT 0 +#define MAX77843_MUIC_CONTROL2_ADCEN_SHIFT 1 +#define MAX77843_MUIC_CONTROL2_CPEN_SHIFT 2 +#define MAX77843_MUIC_CONTROL2_ACC_DET_SHIFT 5 +#define MAX77843_MUIC_CONTROL2_USBCPINT_SHIFT 6 +#define MAX77843_MUIC_CONTROL2_RCPS_SHIFT 7 +#define MAX77843_MUIC_CONTROL3_JIGSET_SHIFT 0 +#define MAX77843_MUIC_CONTROL4_ADCDBSET_SHIFT 0 +#define MAX77843_MUIC_CONTROL4_USBAUTO_SHIFT 4 +#define MAX77843_MUIC_CONTROL4_FCTAUTO_SHIFT 5 +#define MAX77843_MUIC_CONTROL4_ADCMODE_SHIFT 6 + +#define MAX77843_MUIC_CONTROL1_COMP1SW_MASK (0x7 << MAX77843_MUIC_CONTROL1_COMP1SW_SHIFT) +#define MAX77843_MUIC_CONTROL1_COMP2SW_MASK (0x7 << MAX77843_MUIC_CONTROL1_COMP2SW_SHIFT) +#define MAX77843_MUIC_CONTROL1_IDBEN_MASK BIT(MAX77843_MUIC_CONTROL1_IDBEN_SHIFT) +#define MAX77843_MUIC_CONTROL2_LOWPWR_MASK BIT(MAX77843_MUIC_CONTROL2_LOWPWR_SHIFT) +#define MAX77843_MUIC_CONTROL2_ADCEN_MASK BIT(MAX77843_MUIC_CONTROL2_ADCEN_SHIFT) +#define MAX77843_MUIC_CONTROL2_CPEN_MASK BIT(MAX77843_MUIC_CONTROL2_CPEN_SHIFT) +#define MAX77843_MUIC_CONTROL2_ACC_DET_MASK BIT(MAX77843_MUIC_CONTROL2_ACC_DET_SHIFT) +#define MAX77843_MUIC_CONTROL2_USBCPINT_MASK BIT(MAX77843_MUIC_CONTROL2_USBCPINT_SHIFT) +#define MAX77843_MUIC_CONTROL2_RCPS_MASK BIT(MAX77843_MUIC_CONTROL2_RCPS_SHIFT) +#define MAX77843_MUIC_CONTROL3_JIGSET_MASK (0x3 << MAX77843_MUIC_CONTROL3_JIGSET_SHIFT) +#define MAX77843_MUIC_CONTROL4_ADCDBSET_MASK (0x3 << MAX77843_MUIC_CONTROL4_ADCDBSET_SHIFT) +#define MAX77843_MUIC_CONTROL4_USBAUTO_MASK BIT(MAX77843_MUIC_CONTROL4_USBAUTO_SHIFT) +#define MAX77843_MUIC_CONTROL4_FCTAUTO_MASK BIT(MAX77843_MUIC_CONTROL4_FCTAUTO_SHIFT) +#define MAX77843_MUIC_CONTROL4_ADCMODE_MASK (0x3 << MAX77843_MUIC_CONTROL4_ADCMODE_SHIFT) /* MAX77843 switch port */ #define COM_OPEN 0 @@ -383,38 +383,38 @@ enum max77843_irq_muic { #define COM_AUX_USB 4 #define COM_AUX_UART 5 -#define CONTROL1_COM_SW \ +#define MAX77843_MUIC_CONTROL1_COM_SW \ ((MAX77843_MUIC_CONTROL1_COMP1SW_MASK | \ MAX77843_MUIC_CONTROL1_COMP2SW_MASK)) -#define CONTROL1_SW_OPEN \ - ((COM_OPEN << CONTROL1_COMP1SW_SHIFT | \ - COM_OPEN << CONTROL1_COMP2SW_SHIFT)) -#define CONTROL1_SW_USB \ - ((COM_USB << CONTROL1_COMP1SW_SHIFT | \ - COM_USB << CONTROL1_COMP2SW_SHIFT)) -#define CONTROL1_SW_AUDIO \ - ((COM_AUDIO << CONTROL1_COMP1SW_SHIFT | \ - COM_AUDIO << CONTROL1_COMP2SW_SHIFT)) -#define CONTROL1_SW_UART \ - ((COM_UART << CONTROL1_COMP1SW_SHIFT | \ - COM_UART << CONTROL1_COMP2SW_SHIFT)) -#define CONTROL1_SW_AUX_USB \ - ((COM_AUX_USB << CONTROL1_COMP1SW_SHIFT | \ - COM_AUX_USB << CONTROL1_COMP2SW_SHIFT)) -#define CONTROL1_SW_AUX_UART \ - ((COM_AUX_UART << CONTROL1_COMP1SW_SHIFT | \ - COM_AUX_UART << CONTROL1_COMP2SW_SHIFT)) +#define MAX77843_MUIC_CONTROL1_SW_OPEN \ + ((COM_OPEN << MAX77843_MUIC_CONTROL1_COMP1SW_SHIFT | \ + COM_OPEN << MAX77843_MUIC_CONTROL1_COMP2SW_SHIFT)) +#define MAX77843_MUIC_CONTROL1_SW_USB \ + ((COM_USB << MAX77843_MUIC_CONTROL1_COMP1SW_SHIFT | \ + COM_USB << MAX77843_MUIC_CONTROL1_COMP2SW_SHIFT)) +#define MAX77843_MUIC_CONTROL1_SW_AUDIO \ + ((COM_AUDIO << MAX77843_MUIC_CONTROL1_COMP1SW_SHIFT | \ + COM_AUDIO << MAX77843_MUIC_CONTROL1_COMP2SW_SHIFT)) +#define MAX77843_MUIC_CONTROL1_SW_UART \ + ((COM_UART << MAX77843_MUIC_CONTROL1_COMP1SW_SHIFT | \ + COM_UART << MAX77843_MUIC_CONTROL1_COMP2SW_SHIFT)) +#define MAX77843_MUIC_CONTROL1_SW_AUX_USB \ + ((COM_AUX_USB << MAX77843_MUIC_CONTROL1_COMP1SW_SHIFT | \ + COM_AUX_USB << MAX77843_MUIC_CONTROL1_COMP2SW_SHIFT)) +#define MAX77843_MUIC_CONTROL1_SW_AUX_UART \ + ((COM_AUX_UART << MAX77843_MUIC_CONTROL1_COMP1SW_SHIFT | \ + COM_AUX_UART << MAX77843_MUIC_CONTROL1_COMP2SW_SHIFT)) #define MAX77843_DISABLE 0 #define MAX77843_ENABLE 1 #define CONTROL4_AUTO_DISABLE \ - ((MAX77843_DISABLE << CONTROL4_USBAUTO_SHIFT) | \ - (MAX77843_DISABLE << CONTROL4_FCTAUTO_SHIFT)) + ((MAX77843_DISABLE << MAX77843_MUIC_CONTROL4_USBAUTO_SHIFT) | \ + (MAX77843_DISABLE << MAX77843_MUIC_CONTROL4_FCTAUTO_SHIFT)) #define CONTROL4_AUTO_ENABLE \ - ((MAX77843_ENABLE << CONTROL4_USBAUTO_SHIFT) | \ - (MAX77843_ENABLE << CONTROL4_FCTAUTO_SHIFT)) + ((MAX77843_ENABLE << MAX77843_MUIC_CONTROL4_USBAUTO_SHIFT) | \ + (MAX77843_ENABLE << MAX77843_MUIC_CONTROL4_FCTAUTO_SHIFT)) /* MAX77843 SAFEOUT LDO Control register */ #define SAFEOUTCTRL_SAFEOUT1_SHIFT 0 -- cgit v1.2.3-70-g09d2 From 8019ff6cfc0440415fcfb6352c58c3951e6ab053 Mon Sep 17 00:00:00 2001 From: Nariman Poushin Date: Thu, 16 Jul 2015 16:36:21 +0100 Subject: regmap: Use reg_sequence for multi_reg_write / register_patch Separate the functionality using sequences of register writes from the functions that take register defaults. This change renames the arguments in order to support the extension of reg_sequence to take an optional delay to be applied after any given register in a sequence is written. This avoids adding an int to all register defaults, which could substantially increase memory usage for regmaps with large default tables. This also updates all the clients of multi_reg_write/register_patch. Signed-off-by: Nariman Poushin Signed-off-by: Mark Brown --- drivers/base/regmap/internal.h | 2 +- drivers/base/regmap/regmap.c | 22 +++++++++++----------- drivers/gpu/drm/i2c/adv7511.c | 2 +- drivers/input/misc/drv260x.c | 6 +++--- drivers/input/misc/drv2665.c | 2 +- drivers/input/misc/drv2667.c | 4 ++-- drivers/mfd/arizona-core.c | 2 +- drivers/mfd/twl6040.c | 2 +- drivers/mfd/wm5102-tables.c | 6 +++--- drivers/mfd/wm5110-tables.c | 6 +++--- drivers/mfd/wm8994-core.c | 8 ++++---- drivers/mfd/wm8997-tables.c | 2 +- include/linux/regmap.h | 17 ++++++++++++++--- sound/soc/codecs/arizona.c | 2 +- sound/soc/codecs/cs35l32.c | 2 +- sound/soc/codecs/cs42l52.c | 2 +- sound/soc/codecs/da7210.c | 4 ++-- sound/soc/codecs/rt5640.c | 2 +- sound/soc/codecs/rt5645.c | 4 ++-- sound/soc/codecs/rt5651.c | 2 +- sound/soc/codecs/rt5670.c | 2 +- sound/soc/codecs/rt5677.c | 2 +- sound/soc/codecs/tlv320aic3x.c | 2 +- sound/soc/codecs/wm2200.c | 2 +- sound/soc/codecs/wm5100.c | 2 +- sound/soc/codecs/wm8962.c | 2 +- sound/soc/codecs/wm8993.c | 2 +- 27 files changed, 62 insertions(+), 51 deletions(-) (limited to 'include/linux') diff --git a/drivers/base/regmap/internal.h b/drivers/base/regmap/internal.h index b2b2849fc6d3..873ddf91c9d3 100644 --- a/drivers/base/regmap/internal.h +++ b/drivers/base/regmap/internal.h @@ -136,7 +136,7 @@ struct regmap { /* if set, the HW registers are known to match map->reg_defaults */ bool no_sync_defaults; - struct reg_default *patch; + struct reg_sequence *patch; int patch_regs; /* if set, converts bulk rw to single rw */ diff --git a/drivers/base/regmap/regmap.c b/drivers/base/regmap/regmap.c index 7111d04f2621..2cbb4502747d 100644 --- a/drivers/base/regmap/regmap.c +++ b/drivers/base/regmap/regmap.c @@ -1743,7 +1743,7 @@ EXPORT_SYMBOL_GPL(regmap_bulk_write); * relative. The page register has been written if that was neccessary. */ static int _regmap_raw_multi_reg_write(struct regmap *map, - const struct reg_default *regs, + const struct reg_sequence *regs, size_t num_regs) { int ret; @@ -1800,12 +1800,12 @@ static unsigned int _regmap_register_page(struct regmap *map, } static int _regmap_range_multi_paged_reg_write(struct regmap *map, - struct reg_default *regs, + struct reg_sequence *regs, size_t num_regs) { int ret; int i, n; - struct reg_default *base; + struct reg_sequence *base; unsigned int this_page = 0; /* * the set of registers are not neccessarily in order, but @@ -1843,7 +1843,7 @@ static int _regmap_range_multi_paged_reg_write(struct regmap *map, } static int _regmap_multi_reg_write(struct regmap *map, - const struct reg_default *regs, + const struct reg_sequence *regs, size_t num_regs) { int i; @@ -1895,8 +1895,8 @@ static int _regmap_multi_reg_write(struct regmap *map, struct regmap_range_node *range; range = _regmap_range_lookup(map, reg); if (range) { - size_t len = sizeof(struct reg_default)*num_regs; - struct reg_default *base = kmemdup(regs, len, + size_t len = sizeof(struct reg_sequence)*num_regs; + struct reg_sequence *base = kmemdup(regs, len, GFP_KERNEL); if (!base) return -ENOMEM; @@ -1929,7 +1929,7 @@ static int _regmap_multi_reg_write(struct regmap *map, * A value of zero will be returned on success, a negative errno will be * returned in error cases. */ -int regmap_multi_reg_write(struct regmap *map, const struct reg_default *regs, +int regmap_multi_reg_write(struct regmap *map, const struct reg_sequence *regs, int num_regs) { int ret; @@ -1962,7 +1962,7 @@ EXPORT_SYMBOL_GPL(regmap_multi_reg_write); * be returned in error cases. */ int regmap_multi_reg_write_bypassed(struct regmap *map, - const struct reg_default *regs, + const struct reg_sequence *regs, int num_regs) { int ret; @@ -2552,10 +2552,10 @@ EXPORT_SYMBOL_GPL(regmap_async_complete); * The caller must ensure that this function cannot be called * concurrently with either itself or regcache_sync(). */ -int regmap_register_patch(struct regmap *map, const struct reg_default *regs, +int regmap_register_patch(struct regmap *map, const struct reg_sequence *regs, int num_regs) { - struct reg_default *p; + struct reg_sequence *p; int ret; bool bypass; @@ -2564,7 +2564,7 @@ int regmap_register_patch(struct regmap *map, const struct reg_default *regs, return 0; p = krealloc(map->patch, - sizeof(struct reg_default) * (map->patch_regs + num_regs), + sizeof(struct reg_sequence) * (map->patch_regs + num_regs), GFP_KERNEL); if (p) { memcpy(p + map->patch_regs, regs, num_regs * sizeof(*regs)); diff --git a/drivers/gpu/drm/i2c/adv7511.c b/drivers/gpu/drm/i2c/adv7511.c index 2aaa3c88999e..00416f23b5cb 100644 --- a/drivers/gpu/drm/i2c/adv7511.c +++ b/drivers/gpu/drm/i2c/adv7511.c @@ -54,7 +54,7 @@ static struct adv7511 *encoder_to_adv7511(struct drm_encoder *encoder) } /* ADI recommended values for proper operation. */ -static const struct reg_default adv7511_fixed_registers[] = { +static const struct reg_sequence adv7511_fixed_registers[] = { { 0x98, 0x03 }, { 0x9a, 0xe0 }, { 0x9c, 0x30 }, diff --git a/drivers/input/misc/drv260x.c b/drivers/input/misc/drv260x.c index e5d60ecd29a4..f5c9cf2f4073 100644 --- a/drivers/input/misc/drv260x.c +++ b/drivers/input/misc/drv260x.c @@ -313,14 +313,14 @@ static void drv260x_close(struct input_dev *input) gpiod_set_value(haptics->enable_gpio, 0); } -static const struct reg_default drv260x_lra_cal_regs[] = { +static const struct reg_sequence drv260x_lra_cal_regs[] = { { DRV260X_MODE, DRV260X_AUTO_CAL }, { DRV260X_CTRL3, DRV260X_NG_THRESH_2 }, { DRV260X_FEEDBACK_CTRL, DRV260X_FB_REG_LRA_MODE | DRV260X_BRAKE_FACTOR_4X | DRV260X_LOOP_GAIN_HIGH }, }; -static const struct reg_default drv260x_lra_init_regs[] = { +static const struct reg_sequence drv260x_lra_init_regs[] = { { DRV260X_MODE, DRV260X_RT_PLAYBACK }, { DRV260X_A_TO_V_CTRL, DRV260X_AUDIO_HAPTICS_PEAK_20MS | DRV260X_AUDIO_HAPTICS_FILTER_125HZ }, @@ -337,7 +337,7 @@ static const struct reg_default drv260x_lra_init_regs[] = { { DRV260X_CTRL4, DRV260X_AUTOCAL_TIME_500MS }, }; -static const struct reg_default drv260x_erm_cal_regs[] = { +static const struct reg_sequence drv260x_erm_cal_regs[] = { { DRV260X_MODE, DRV260X_AUTO_CAL }, { DRV260X_A_TO_V_MIN_INPUT, DRV260X_AUDIO_HAPTICS_MIN_IN_VOLT }, { DRV260X_A_TO_V_MAX_INPUT, DRV260X_AUDIO_HAPTICS_MAX_IN_VOLT }, diff --git a/drivers/input/misc/drv2665.c b/drivers/input/misc/drv2665.c index 0afaa33de07d..924456e3ca75 100644 --- a/drivers/input/misc/drv2665.c +++ b/drivers/input/misc/drv2665.c @@ -132,7 +132,7 @@ static void drv2665_close(struct input_dev *input) "Failed to enter standby mode: %d\n", error); } -static const struct reg_default drv2665_init_regs[] = { +static const struct reg_sequence drv2665_init_regs[] = { { DRV2665_CTRL_2, 0 | DRV2665_10_MS_IDLE_TOUT }, { DRV2665_CTRL_1, DRV2665_25_VPP_GAIN }, }; diff --git a/drivers/input/misc/drv2667.c b/drivers/input/misc/drv2667.c index fc0fddf0896a..047136aa646f 100644 --- a/drivers/input/misc/drv2667.c +++ b/drivers/input/misc/drv2667.c @@ -262,14 +262,14 @@ static void drv2667_close(struct input_dev *input) "Failed to enter standby mode: %d\n", error); } -static const struct reg_default drv2667_init_regs[] = { +static const struct reg_sequence drv2667_init_regs[] = { { DRV2667_CTRL_2, 0 }, { DRV2667_CTRL_1, DRV2667_25_VPP_GAIN }, { DRV2667_WV_SEQ_0, 1 }, { DRV2667_WV_SEQ_1, 0 } }; -static const struct reg_default drv2667_page1_init[] = { +static const struct reg_sequence drv2667_page1_init[] = { { DRV2667_RAM_HDR_SZ, 0x05 }, { DRV2667_RAM_START_HI, 0x80 }, { DRV2667_RAM_START_LO, 0x06 }, diff --git a/drivers/mfd/arizona-core.c b/drivers/mfd/arizona-core.c index bebf58a06a6b..66d50be11960 100644 --- a/drivers/mfd/arizona-core.c +++ b/drivers/mfd/arizona-core.c @@ -392,7 +392,7 @@ err: * Register patch to some of the CODECs internal write sequences * to ensure a clean exit from the low power sleep state. */ -static const struct reg_default wm5110_sleep_patch[] = { +static const struct reg_sequence wm5110_sleep_patch[] = { { 0x337A, 0xC100 }, { 0x337B, 0x0041 }, { 0x3300, 0xA210 }, diff --git a/drivers/mfd/twl6040.c b/drivers/mfd/twl6040.c index c5265c1262c5..583dc33432f3 100644 --- a/drivers/mfd/twl6040.c +++ b/drivers/mfd/twl6040.c @@ -86,7 +86,7 @@ static const struct reg_default twl6040_defaults[] = { { 0x2E, 0x00 }, /* REG_STATUS (ro) */ }; -static struct reg_default twl6040_patch[] = { +static struct reg_sequence twl6040_patch[] = { /* * Select I2C bus access to dual access registers * Interrupt register is cleared on read diff --git a/drivers/mfd/wm5102-tables.c b/drivers/mfd/wm5102-tables.c index aeae6ec123b3..423fb3730dc7 100644 --- a/drivers/mfd/wm5102-tables.c +++ b/drivers/mfd/wm5102-tables.c @@ -21,7 +21,7 @@ #define WM5102_NUM_AOD_ISR 2 #define WM5102_NUM_ISR 5 -static const struct reg_default wm5102_reva_patch[] = { +static const struct reg_sequence wm5102_reva_patch[] = { { 0x80, 0x0003 }, { 0x221, 0x0090 }, { 0x211, 0x0014 }, @@ -57,7 +57,7 @@ static const struct reg_default wm5102_reva_patch[] = { { 0x80, 0x0000 }, }; -static const struct reg_default wm5102_revb_patch[] = { +static const struct reg_sequence wm5102_revb_patch[] = { { 0x19, 0x0001 }, { 0x80, 0x0003 }, { 0x081, 0xE022 }, @@ -80,7 +80,7 @@ static const struct reg_default wm5102_revb_patch[] = { /* We use a function so we can use ARRAY_SIZE() */ int wm5102_patch(struct arizona *arizona) { - const struct reg_default *wm5102_patch; + const struct reg_sequence *wm5102_patch; int patch_size; switch (arizona->rev) { diff --git a/drivers/mfd/wm5110-tables.c b/drivers/mfd/wm5110-tables.c index 12cad94b4035..26ce14f903fe 100644 --- a/drivers/mfd/wm5110-tables.c +++ b/drivers/mfd/wm5110-tables.c @@ -21,7 +21,7 @@ #define WM5110_NUM_AOD_ISR 2 #define WM5110_NUM_ISR 5 -static const struct reg_default wm5110_reva_patch[] = { +static const struct reg_sequence wm5110_reva_patch[] = { { 0x80, 0x3 }, { 0x44, 0x20 }, { 0x45, 0x40 }, @@ -134,7 +134,7 @@ static const struct reg_default wm5110_reva_patch[] = { { 0x209, 0x002A }, }; -static const struct reg_default wm5110_revb_patch[] = { +static const struct reg_sequence wm5110_revb_patch[] = { { 0x80, 0x3 }, { 0x36e, 0x0210 }, { 0x370, 0x0210 }, @@ -224,7 +224,7 @@ static const struct reg_default wm5110_revb_patch[] = { { 0x80, 0x0 }, }; -static const struct reg_default wm5110_revd_patch[] = { +static const struct reg_sequence wm5110_revd_patch[] = { { 0x80, 0x3 }, { 0x80, 0x3 }, { 0x393, 0x27 }, diff --git a/drivers/mfd/wm8994-core.c b/drivers/mfd/wm8994-core.c index 53ae5af5d6e4..0f4169a3a5d4 100644 --- a/drivers/mfd/wm8994-core.c +++ b/drivers/mfd/wm8994-core.c @@ -243,21 +243,21 @@ static int wm8994_ldo_in_use(struct wm8994_pdata *pdata, int ldo) } #endif -static const struct reg_default wm8994_revc_patch[] = { +static const struct reg_sequence wm8994_revc_patch[] = { { 0x102, 0x3 }, { 0x56, 0x3 }, { 0x817, 0x0 }, { 0x102, 0x0 }, }; -static const struct reg_default wm8958_reva_patch[] = { +static const struct reg_sequence wm8958_reva_patch[] = { { 0x102, 0x3 }, { 0xcb, 0x81 }, { 0x817, 0x0 }, { 0x102, 0x0 }, }; -static const struct reg_default wm1811_reva_patch[] = { +static const struct reg_sequence wm1811_reva_patch[] = { { 0x102, 0x3 }, { 0x56, 0xc07 }, { 0x5d, 0x7e }, @@ -326,7 +326,7 @@ static int wm8994_device_init(struct wm8994 *wm8994, int irq) { struct wm8994_pdata *pdata; struct regmap_config *regmap_config; - const struct reg_default *regmap_patch = NULL; + const struct reg_sequence *regmap_patch = NULL; const char *devname; int ret, i, patch_regs = 0; int pulls = 0; diff --git a/drivers/mfd/wm8997-tables.c b/drivers/mfd/wm8997-tables.c index c0c25d75aacc..cab2c68f1737 100644 --- a/drivers/mfd/wm8997-tables.c +++ b/drivers/mfd/wm8997-tables.c @@ -17,7 +17,7 @@ #include "arizona.h" -static const struct reg_default wm8997_reva_patch[] = { +static const struct reg_sequence wm8997_reva_patch[] = { { 0x80, 0x0003 }, { 0x214, 0x0008 }, { 0x458, 0x0000 }, diff --git a/include/linux/regmap.h b/include/linux/regmap.h index 59c55ea0f0b5..c9ef2ec69142 100644 --- a/include/linux/regmap.h +++ b/include/linux/regmap.h @@ -50,6 +50,17 @@ struct reg_default { unsigned int def; }; +/** + * Register/value pairs for sequences of writes + * + * @reg: Register address. + * @def: Register value. + */ +struct reg_sequence { + unsigned int reg; + unsigned int def; +}; + #ifdef CONFIG_REGMAP enum regmap_endian { @@ -410,10 +421,10 @@ int regmap_raw_write(struct regmap *map, unsigned int reg, const void *val, size_t val_len); int regmap_bulk_write(struct regmap *map, unsigned int reg, const void *val, size_t val_count); -int regmap_multi_reg_write(struct regmap *map, const struct reg_default *regs, +int regmap_multi_reg_write(struct regmap *map, const struct reg_sequence *regs, int num_regs); int regmap_multi_reg_write_bypassed(struct regmap *map, - const struct reg_default *regs, + const struct reg_sequence *regs, int num_regs); int regmap_raw_write_async(struct regmap *map, unsigned int reg, const void *val, size_t val_len); @@ -450,7 +461,7 @@ void regcache_mark_dirty(struct regmap *map); bool regmap_check_range_table(struct regmap *map, unsigned int reg, const struct regmap_access_table *table); -int regmap_register_patch(struct regmap *map, const struct reg_default *regs, +int regmap_register_patch(struct regmap *map, const struct reg_sequence *regs, int num_regs); int regmap_parse_val(struct regmap *map, const void *buf, unsigned int *val); diff --git a/sound/soc/codecs/arizona.c b/sound/soc/codecs/arizona.c index 802e05eae3e9..5edd33fcd68c 100644 --- a/sound/soc/codecs/arizona.c +++ b/sound/soc/codecs/arizona.c @@ -1366,7 +1366,7 @@ static void arizona_wm5102_set_dac_comp(struct snd_soc_codec *codec, { struct arizona_priv *priv = snd_soc_codec_get_drvdata(codec); struct arizona *arizona = priv->arizona; - struct reg_default dac_comp[] = { + struct reg_sequence dac_comp[] = { { 0x80, 0x3 }, { ARIZONA_DAC_COMP_1, 0 }, { ARIZONA_DAC_COMP_2, 0 }, diff --git a/sound/soc/codecs/cs35l32.c b/sound/soc/codecs/cs35l32.c index 8f40025b7e7c..2813a1b0c949 100644 --- a/sound/soc/codecs/cs35l32.c +++ b/sound/soc/codecs/cs35l32.c @@ -276,7 +276,7 @@ static const struct snd_soc_codec_driver soc_codec_dev_cs35l32 = { }; /* Current and threshold powerup sequence Pg37 in datasheet */ -static const struct reg_default cs35l32_monitor_patch[] = { +static const struct reg_sequence cs35l32_monitor_patch[] = { { 0x00, 0x99 }, { 0x48, 0x17 }, diff --git a/sound/soc/codecs/cs42l52.c b/sound/soc/codecs/cs42l52.c index 4de52c9957ac..8b2d05933594 100644 --- a/sound/soc/codecs/cs42l52.c +++ b/sound/soc/codecs/cs42l52.c @@ -1118,7 +1118,7 @@ static const struct snd_soc_codec_driver soc_codec_dev_cs42l52 = { }; /* Current and threshold powerup sequence Pg37 */ -static const struct reg_default cs42l52_threshold_patch[] = { +static const struct reg_sequence cs42l52_threshold_patch[] = { { 0x00, 0x99 }, { 0x3E, 0xBA }, diff --git a/sound/soc/codecs/da7210.c b/sound/soc/codecs/da7210.c index 21810e5f3321..bf0fb3d4df22 100644 --- a/sound/soc/codecs/da7210.c +++ b/sound/soc/codecs/da7210.c @@ -1182,7 +1182,7 @@ static struct snd_soc_codec_driver soc_codec_dev_da7210 = { #if IS_ENABLED(CONFIG_I2C) -static struct reg_default da7210_regmap_i2c_patch[] = { +static struct reg_sequence da7210_regmap_i2c_patch[] = { /* System controller master disable */ { DA7210_STARTUP1, 0x00 }, @@ -1269,7 +1269,7 @@ static struct i2c_driver da7210_i2c_driver = { #if defined(CONFIG_SPI_MASTER) -static struct reg_default da7210_regmap_spi_patch[] = { +static struct reg_sequence da7210_regmap_spi_patch[] = { /* Dummy read to give two pulses over nCS for SPI */ { DA7210_AUX2, 0x00 }, { DA7210_AUX2, 0x00 }, diff --git a/sound/soc/codecs/rt5640.c b/sound/soc/codecs/rt5640.c index 9bc78e57513d..1ed1f8895e12 100644 --- a/sound/soc/codecs/rt5640.c +++ b/sound/soc/codecs/rt5640.c @@ -51,7 +51,7 @@ static const struct regmap_range_cfg rt5640_ranges[] = { .window_len = 0x1, }, }; -static const struct reg_default init_list[] = { +static const struct reg_sequence init_list[] = { {RT5640_PR_BASE + 0x3d, 0x3600}, {RT5640_PR_BASE + 0x12, 0x0aa8}, {RT5640_PR_BASE + 0x14, 0x0aaa}, diff --git a/sound/soc/codecs/rt5645.c b/sound/soc/codecs/rt5645.c index 9ce311e088fc..c0f4be430e70 100644 --- a/sound/soc/codecs/rt5645.c +++ b/sound/soc/codecs/rt5645.c @@ -54,7 +54,7 @@ static const struct regmap_range_cfg rt5645_ranges[] = { }, }; -static const struct reg_default init_list[] = { +static const struct reg_sequence init_list[] = { {RT5645_PR_BASE + 0x3d, 0x3600}, {RT5645_PR_BASE + 0x1c, 0xfd20}, {RT5645_PR_BASE + 0x20, 0x611f}, @@ -63,7 +63,7 @@ static const struct reg_default init_list[] = { }; #define RT5645_INIT_REG_LEN ARRAY_SIZE(init_list) -static const struct reg_default rt5650_init_list[] = { +static const struct reg_sequence rt5650_init_list[] = { {0xf6, 0x0100}, }; diff --git a/sound/soc/codecs/rt5651.c b/sound/soc/codecs/rt5651.c index a3506e193abc..db9b8667f136 100644 --- a/sound/soc/codecs/rt5651.c +++ b/sound/soc/codecs/rt5651.c @@ -46,7 +46,7 @@ static const struct regmap_range_cfg rt5651_ranges[] = { .window_len = 0x1, }, }; -static struct reg_default init_list[] = { +static struct reg_sequence init_list[] = { {RT5651_PR_BASE + 0x3d, 0x3e00}, }; diff --git a/sound/soc/codecs/rt5670.c b/sound/soc/codecs/rt5670.c index a9123d414178..462a91f7cf68 100644 --- a/sound/soc/codecs/rt5670.c +++ b/sound/soc/codecs/rt5670.c @@ -51,7 +51,7 @@ static const struct regmap_range_cfg rt5670_ranges[] = { .window_len = 0x1, }, }; -static const struct reg_default init_list[] = { +static const struct reg_sequence init_list[] = { { RT5670_PR_BASE + 0x14, 0x9a8a }, { RT5670_PR_BASE + 0x38, 0x3ba1 }, { RT5670_PR_BASE + 0x3d, 0x3640 }, diff --git a/sound/soc/codecs/rt5677.c b/sound/soc/codecs/rt5677.c index 31d969ac1192..b89775251470 100644 --- a/sound/soc/codecs/rt5677.c +++ b/sound/soc/codecs/rt5677.c @@ -54,7 +54,7 @@ static const struct regmap_range_cfg rt5677_ranges[] = { }, }; -static const struct reg_default init_list[] = { +static const struct reg_sequence init_list[] = { {RT5677_ASRC_12, 0x0018}, {RT5677_PR_BASE + 0x3d, 0x364d}, {RT5677_PR_BASE + 0x17, 0x4fc0}, diff --git a/sound/soc/codecs/tlv320aic3x.c b/sound/soc/codecs/tlv320aic3x.c index a7cf19b53fb2..83ae1eb44d4f 100644 --- a/sound/soc/codecs/tlv320aic3x.c +++ b/sound/soc/codecs/tlv320aic3x.c @@ -1668,7 +1668,7 @@ static const struct i2c_device_id aic3x_i2c_id[] = { }; MODULE_DEVICE_TABLE(i2c, aic3x_i2c_id); -static const struct reg_default aic3007_class_d[] = { +static const struct reg_sequence aic3007_class_d[] = { /* Class-D speaker driver init; datasheet p. 46 */ { AIC3X_PAGE_SELECT, 0x0D }, { 0xD, 0x0D }, diff --git a/sound/soc/codecs/wm2200.c b/sound/soc/codecs/wm2200.c index c83083285e53..6c607928fb9b 100644 --- a/sound/soc/codecs/wm2200.c +++ b/sound/soc/codecs/wm2200.c @@ -897,7 +897,7 @@ static bool wm2200_readable_register(struct device *dev, unsigned int reg) } } -static const struct reg_default wm2200_reva_patch[] = { +static const struct reg_sequence wm2200_reva_patch[] = { { 0x07, 0x0003 }, { 0x102, 0x0200 }, { 0x203, 0x0084 }, diff --git a/sound/soc/codecs/wm5100.c b/sound/soc/codecs/wm5100.c index 4c10cd88c1af..26d79bbb7599 100644 --- a/sound/soc/codecs/wm5100.c +++ b/sound/soc/codecs/wm5100.c @@ -1247,7 +1247,7 @@ static const struct snd_soc_dapm_route wm5100_dapm_routes[] = { { "PWM2", NULL, "PWM2 Driver" }, }; -static const struct reg_default wm5100_reva_patches[] = { +static const struct reg_sequence wm5100_reva_patches[] = { { WM5100_AUDIO_IF_1_10, 0 }, { WM5100_AUDIO_IF_1_11, 1 }, { WM5100_AUDIO_IF_1_12, 2 }, diff --git a/sound/soc/codecs/wm8962.c b/sound/soc/codecs/wm8962.c index c5748fd4f296..05492e826aea 100644 --- a/sound/soc/codecs/wm8962.c +++ b/sound/soc/codecs/wm8962.c @@ -3495,7 +3495,7 @@ static struct snd_soc_codec_driver soc_codec_dev_wm8962 = { }; /* Improve power consumption for IN4 DC measurement mode */ -static const struct reg_default wm8962_dc_measure[] = { +static const struct reg_sequence wm8962_dc_measure[] = { { 0xfd, 0x1 }, { 0xcc, 0x40 }, { 0xfd, 0 }, diff --git a/sound/soc/codecs/wm8993.c b/sound/soc/codecs/wm8993.c index 8a8db8605dc2..52ec64d8502d 100644 --- a/sound/soc/codecs/wm8993.c +++ b/sound/soc/codecs/wm8993.c @@ -1595,7 +1595,7 @@ static int wm8993_resume(struct snd_soc_codec *codec) #endif /* Tune DC servo configuration */ -static struct reg_default wm8993_regmap_patch[] = { +static struct reg_sequence wm8993_regmap_patch[] = { { 0x44, 3 }, { 0x56, 3 }, { 0x44, 0 }, -- cgit v1.2.3-70-g09d2 From 2de9d6006c190bb0f706e8404de94cd94293801f Mon Sep 17 00:00:00 2001 From: Nariman Poushin Date: Thu, 16 Jul 2015 16:36:22 +0100 Subject: regmap: Apply optional delay in multi_reg_write/register_patch Add an optional delay_us field in reg_sequence to allow the client to specify a delay (in microseconds) to be applied after any given write in a sequence of writes. We treat a delay in a sequence the same way we treat a page change as they are logically similar in that you can coalesce all write before a delay (in the same way you can coalesce all writes before a page change is needed) Signed-off-by: Nariman Poushin Signed-off-by: Mark Brown --- drivers/base/regmap/regmap.c | 54 +++++++++++++++++++++++++++++++++++++++----- include/linux/regmap.h | 5 +++- 2 files changed, 52 insertions(+), 7 deletions(-) (limited to 'include/linux') diff --git a/drivers/base/regmap/regmap.c b/drivers/base/regmap/regmap.c index 2cbb4502747d..b3a5aa5cd580 100644 --- a/drivers/base/regmap/regmap.c +++ b/drivers/base/regmap/regmap.c @@ -18,6 +18,7 @@ #include #include #include +#include #define CREATE_TRACE_POINTS #include "trace.h" @@ -1807,10 +1808,12 @@ static int _regmap_range_multi_paged_reg_write(struct regmap *map, int i, n; struct reg_sequence *base; unsigned int this_page = 0; + unsigned int page_change = 0; /* * the set of registers are not neccessarily in order, but * since the order of write must be preserved this algorithm - * chops the set each time the page changes + * chops the set each time the page changes. This also applies + * if there is a delay required at any point in the sequence. */ base = regs; for (i = 0, n = 0; i < num_regs; i++, n++) { @@ -1826,16 +1829,48 @@ static int _regmap_range_multi_paged_reg_write(struct regmap *map, this_page = win_page; if (win_page != this_page) { this_page = win_page; + page_change = 1; + } + } + + /* If we have both a page change and a delay make sure to + * write the regs and apply the delay before we change the + * page. + */ + + if (page_change || regs[i].delay_us) { + + /* For situations where the first write requires + * a delay we need to make sure we don't call + * raw_multi_reg_write with n=0 + * This can't occur with page breaks as we + * never write on the first iteration + */ + if (regs[i].delay_us && i == 0) + n = 1; + ret = _regmap_raw_multi_reg_write(map, base, n); if (ret != 0) return ret; + + if (regs[i].delay_us) + udelay(regs[i].delay_us); + base += n; n = 0; - } - ret = _regmap_select_page(map, &base[n].reg, range, 1); - if (ret != 0) - return ret; + + if (page_change) { + ret = _regmap_select_page(map, + &base[n].reg, + range, 1); + if (ret != 0) + return ret; + + page_change = 0; + } + } + } if (n > 0) return _regmap_raw_multi_reg_write(map, base, n); @@ -1854,6 +1889,9 @@ static int _regmap_multi_reg_write(struct regmap *map, ret = _regmap_write(map, regs[i].reg, regs[i].def); if (ret != 0) return ret; + + if (regs[i].delay_us) + udelay(regs[i].delay_us); } return 0; } @@ -1893,8 +1931,12 @@ static int _regmap_multi_reg_write(struct regmap *map, for (i = 0; i < num_regs; i++) { unsigned int reg = regs[i].reg; struct regmap_range_node *range; + + /* Coalesce all the writes between a page break or a delay + * in a sequence + */ range = _regmap_range_lookup(map, reg); - if (range) { + if (range || regs[i].delay_us) { size_t len = sizeof(struct reg_sequence)*num_regs; struct reg_sequence *base = kmemdup(regs, len, GFP_KERNEL); diff --git a/include/linux/regmap.h b/include/linux/regmap.h index c9ef2ec69142..5a7cf2136c81 100644 --- a/include/linux/regmap.h +++ b/include/linux/regmap.h @@ -51,14 +51,17 @@ struct reg_default { }; /** - * Register/value pairs for sequences of writes + * Register/value pairs for sequences of writes with an optional delay in + * microseconds to be applied after each write. * * @reg: Register address. * @def: Register value. + * @delay_us: Delay to be applied after the register write in microseconds */ struct reg_sequence { unsigned int reg; unsigned int def; + unsigned int delay_us; }; #ifdef CONFIG_REGMAP -- cgit v1.2.3-70-g09d2 From c391f262bee9d0d6424a99c85183a06c50e307ee Mon Sep 17 00:00:00 2001 From: Jiang Liu Date: Mon, 1 Jun 2015 16:05:41 +0800 Subject: genirq: Rename irq_data_get_msi() as irq_data_get_msi_desc() Rename irq_data_get_msi() as irq_data_get_msi_desc() to keep consistency with other irq_data access helpers. Signed-off-by: Jiang Liu Acked-by: Bjorn Helgaas Cc: Jason Cooper Signed-off-by: Thomas Gleixner --- drivers/pci/host/pcie-designware.c | 2 +- drivers/pci/msi.c | 2 +- include/linux/irq.h | 2 +- 3 files changed, 3 insertions(+), 3 deletions(-) (limited to 'include/linux') diff --git a/drivers/pci/host/pcie-designware.c b/drivers/pci/host/pcie-designware.c index 69486be7181e..85c7735d7511 100644 --- a/drivers/pci/host/pcie-designware.c +++ b/drivers/pci/host/pcie-designware.c @@ -326,7 +326,7 @@ static int dw_msi_setup_irq(struct msi_controller *chip, struct pci_dev *pdev, static void dw_msi_teardown_irq(struct msi_controller *chip, unsigned int irq) { struct irq_data *data = irq_get_irq_data(irq); - struct msi_desc *msi = irq_data_get_msi(data); + struct msi_desc *msi = irq_data_get_msi_desc(data); struct pcie_port *pp = sys_to_pcie(msi->dev->bus->sysdata); clear_irq_range(pp, irq, 1, data->hwirq); diff --git a/drivers/pci/msi.c b/drivers/pci/msi.c index f66be868ad21..64673f13bbb9 100644 --- a/drivers/pci/msi.c +++ b/drivers/pci/msi.c @@ -249,7 +249,7 @@ static void msix_mask_irq(struct msi_desc *desc, u32 flag) static void msi_set_mask_bit(struct irq_data *data, u32 flag) { - struct msi_desc *desc = irq_data_get_msi(data); + struct msi_desc *desc = irq_data_get_msi_desc(data); if (desc->msi_attrib.is_msix) { msix_mask_irq(desc, flag); diff --git a/include/linux/irq.h b/include/linux/irq.h index 429ac266c7c6..5284cb166d90 100644 --- a/include/linux/irq.h +++ b/include/linux/irq.h @@ -638,7 +638,7 @@ static inline struct msi_desc *irq_get_msi_desc(unsigned int irq) return d ? d->msi_desc : NULL; } -static inline struct msi_desc *irq_data_get_msi(struct irq_data *d) +static inline struct msi_desc *irq_data_get_msi_desc(struct irq_data *d) { return d->msi_desc; } -- cgit v1.2.3-70-g09d2 From b2c0b2cbb282f0cf42518ffacbe197e6f2884168 Mon Sep 17 00:00:00 2001 From: Russell King Date: Wed, 3 Sep 2014 23:57:13 +0100 Subject: nmi: create generic NMI backtrace implementation x86s NMI backtrace implementation (for arch_trigger_all_cpu_backtrace()) is fairly generic in nature - the only architecture specific bits are the act of raising the NMI to other CPUs, and reporting the status of the NMI handler. These are fairly simple to factor out, and produce a generic implementation which can be shared between ARM and x86. Reviewed-by: Thomas Gleixner Signed-off-by: Russell King --- include/linux/nmi.h | 6 ++ lib/Makefile | 2 +- lib/nmi_backtrace.c | 162 ++++++++++++++++++++++++++++++++++++++++++++++++++++ 3 files changed, 169 insertions(+), 1 deletion(-) create mode 100644 lib/nmi_backtrace.c (limited to 'include/linux') diff --git a/include/linux/nmi.h b/include/linux/nmi.h index f94da0e65dea..5791e3229068 100644 --- a/include/linux/nmi.h +++ b/include/linux/nmi.h @@ -49,6 +49,12 @@ static inline bool trigger_allbutself_cpu_backtrace(void) arch_trigger_all_cpu_backtrace(false); return true; } + +/* generic implementation */ +void nmi_trigger_all_cpu_backtrace(bool include_self, + void (*raise)(cpumask_t *mask)); +bool nmi_cpu_backtrace(struct pt_regs *regs); + #else static inline bool trigger_all_cpu_backtrace(void) { diff --git a/lib/Makefile b/lib/Makefile index 6897b527581a..392169c5bc4e 100644 --- a/lib/Makefile +++ b/lib/Makefile @@ -13,7 +13,7 @@ lib-y := ctype.o string.o vsprintf.o cmdline.o \ sha1.o md5.o irq_regs.o argv_split.o \ proportions.o flex_proportions.o ratelimit.o show_mem.o \ is_single_threaded.o plist.o decompress.o kobject_uevent.o \ - earlycpio.o seq_buf.o + earlycpio.o seq_buf.o nmi_backtrace.o obj-$(CONFIG_ARCH_HAS_DEBUG_STRICT_USER_COPY_CHECKS) += usercopy.o lib-$(CONFIG_MMU) += ioremap.o diff --git a/lib/nmi_backtrace.c b/lib/nmi_backtrace.c new file mode 100644 index 000000000000..88d3d32e5923 --- /dev/null +++ b/lib/nmi_backtrace.c @@ -0,0 +1,162 @@ +/* + * NMI backtrace support + * + * Gratuitously copied from arch/x86/kernel/apic/hw_nmi.c by Russell King, + * with the following header: + * + * HW NMI watchdog support + * + * started by Don Zickus, Copyright (C) 2010 Red Hat, Inc. + * + * Arch specific calls to support NMI watchdog + * + * Bits copied from original nmi.c file + */ +#include +#include +#include +#include +#include + +#ifdef arch_trigger_all_cpu_backtrace +/* For reliability, we're prepared to waste bits here. */ +static DECLARE_BITMAP(backtrace_mask, NR_CPUS) __read_mostly; +static cpumask_t printtrace_mask; + +#define NMI_BUF_SIZE 4096 + +struct nmi_seq_buf { + unsigned char buffer[NMI_BUF_SIZE]; + struct seq_buf seq; +}; + +/* Safe printing in NMI context */ +static DEFINE_PER_CPU(struct nmi_seq_buf, nmi_print_seq); + +/* "in progress" flag of arch_trigger_all_cpu_backtrace */ +static unsigned long backtrace_flag; + +static void print_seq_line(struct nmi_seq_buf *s, int start, int end) +{ + const char *buf = s->buffer + start; + + printk("%.*s", (end - start) + 1, buf); +} + +void nmi_trigger_all_cpu_backtrace(bool include_self, + void (*raise)(cpumask_t *mask)) +{ + struct nmi_seq_buf *s; + int i, cpu, this_cpu = get_cpu(); + + if (test_and_set_bit(0, &backtrace_flag)) { + /* + * If there is already a trigger_all_cpu_backtrace() in progress + * (backtrace_flag == 1), don't output double cpu dump infos. + */ + put_cpu(); + return; + } + + cpumask_copy(to_cpumask(backtrace_mask), cpu_online_mask); + if (!include_self) + cpumask_clear_cpu(this_cpu, to_cpumask(backtrace_mask)); + + cpumask_copy(&printtrace_mask, to_cpumask(backtrace_mask)); + + /* + * Set up per_cpu seq_buf buffers that the NMIs running on the other + * CPUs will write to. + */ + for_each_cpu(cpu, to_cpumask(backtrace_mask)) { + s = &per_cpu(nmi_print_seq, cpu); + seq_buf_init(&s->seq, s->buffer, NMI_BUF_SIZE); + } + + if (!cpumask_empty(to_cpumask(backtrace_mask))) { + pr_info("Sending NMI to %s CPUs:\n", + (include_self ? "all" : "other")); + raise(to_cpumask(backtrace_mask)); + } + + /* Wait for up to 10 seconds for all CPUs to do the backtrace */ + for (i = 0; i < 10 * 1000; i++) { + if (cpumask_empty(to_cpumask(backtrace_mask))) + break; + mdelay(1); + touch_softlockup_watchdog(); + } + + /* + * Now that all the NMIs have triggered, we can dump out their + * back traces safely to the console. + */ + for_each_cpu(cpu, &printtrace_mask) { + int len, last_i = 0; + + s = &per_cpu(nmi_print_seq, cpu); + len = seq_buf_used(&s->seq); + if (!len) + continue; + + /* Print line by line. */ + for (i = 0; i < len; i++) { + if (s->buffer[i] == '\n') { + print_seq_line(s, last_i, i); + last_i = i + 1; + } + } + /* Check if there was a partial line. */ + if (last_i < len) { + print_seq_line(s, last_i, len - 1); + pr_cont("\n"); + } + } + + clear_bit(0, &backtrace_flag); + smp_mb__after_atomic(); + put_cpu(); +} + +/* + * It is not safe to call printk() directly from NMI handlers. + * It may be fine if the NMI detected a lock up and we have no choice + * but to do so, but doing a NMI on all other CPUs to get a back trace + * can be done with a sysrq-l. We don't want that to lock up, which + * can happen if the NMI interrupts a printk in progress. + * + * Instead, we redirect the vprintk() to this nmi_vprintk() that writes + * the content into a per cpu seq_buf buffer. Then when the NMIs are + * all done, we can safely dump the contents of the seq_buf to a printk() + * from a non NMI context. + */ +static int nmi_vprintk(const char *fmt, va_list args) +{ + struct nmi_seq_buf *s = this_cpu_ptr(&nmi_print_seq); + unsigned int len = seq_buf_used(&s->seq); + + seq_buf_vprintf(&s->seq, fmt, args); + return seq_buf_used(&s->seq) - len; +} + +bool nmi_cpu_backtrace(struct pt_regs *regs) +{ + int cpu = smp_processor_id(); + + if (cpumask_test_cpu(cpu, to_cpumask(backtrace_mask))) { + printk_func_t printk_func_save = this_cpu_read(printk_func); + + /* Replace printk to write into the NMI seq */ + this_cpu_write(printk_func, nmi_vprintk); + pr_warn("NMI backtrace for cpu %d\n", cpu); + show_regs(regs); + this_cpu_write(printk_func, printk_func_save); + + cpumask_clear_cpu(cpu, to_cpumask(backtrace_mask)); + return true; + } + + return false; +} +NOKPROBE_SYMBOL(nmi_cpu_backtrace); +#endif -- cgit v1.2.3-70-g09d2 From b54e5ed8f285d62c0d242c4ef9da90937994db02 Mon Sep 17 00:00:00 2001 From: Ming Lei Date: Thu, 16 Jul 2015 11:16:44 +0800 Subject: block: partition: introduce hd_free_part() So the helper can be used in both generic partition case and part0 case. Signed-off-by: Ming Lei Signed-off-by: Jens Axboe --- block/genhd.c | 3 +-- block/partition-generic.c | 3 +-- include/linux/genhd.h | 6 ++++++ 3 files changed, 8 insertions(+), 4 deletions(-) (limited to 'include/linux') diff --git a/block/genhd.c b/block/genhd.c index 59a1395eedac..85df45292dba 100644 --- a/block/genhd.c +++ b/block/genhd.c @@ -1110,8 +1110,7 @@ static void disk_release(struct device *dev) disk_release_events(disk); kfree(disk->random); disk_replace_part_tbl(disk, NULL); - free_part_stats(&disk->part0); - free_part_info(&disk->part0); + hd_free_part(&disk->part0); if (disk->queue) blk_put_queue(disk->queue); kfree(disk); diff --git a/block/partition-generic.c b/block/partition-generic.c index 0d9e5f97f0a8..eca0d02a607c 100644 --- a/block/partition-generic.c +++ b/block/partition-generic.c @@ -212,8 +212,7 @@ static void part_release(struct device *dev) { struct hd_struct *p = dev_to_part(dev); blk_free_devt(dev->devt); - free_part_stats(p); - free_part_info(p); + hd_free_part(p); kfree(p); } diff --git a/include/linux/genhd.h b/include/linux/genhd.h index ec274e0f4ed2..a221220ffcb2 100644 --- a/include/linux/genhd.h +++ b/include/linux/genhd.h @@ -663,6 +663,12 @@ static inline void hd_struct_put(struct hd_struct *part) __delete_partition(part); } +static inline void hd_free_part(struct hd_struct *part) +{ + free_part_stats(part); + free_part_info(part); +} + /* * Any access of part->nr_sects which is not protected by partition * bd_mutex or gendisk bdev bd_mutex, should be done using this -- cgit v1.2.3-70-g09d2 From 6c71013ecb7e2bddbed9f5b95e7aed22c491daa9 Mon Sep 17 00:00:00 2001 From: Ming Lei Date: Thu, 16 Jul 2015 11:16:45 +0800 Subject: block: partition: convert percpu ref Percpu refcount is the perfect match for partition's case, and the conversion is quite straight. With the convertion, one pair of atomic inc/dec can be saved for accounting block I/O, which is run in hot path of block I/O. Signed-off-by: Ming Lei Acked-by: Tejun Heo Signed-off-by: Jens Axboe --- block/genhd.c | 6 +++++- block/partition-generic.c | 9 +++++---- include/linux/genhd.h | 27 +++++++++++++++++---------- 3 files changed, 27 insertions(+), 15 deletions(-) (limited to 'include/linux') diff --git a/block/genhd.c b/block/genhd.c index 85df45292dba..0c706f33a599 100644 --- a/block/genhd.c +++ b/block/genhd.c @@ -1284,7 +1284,11 @@ struct gendisk *alloc_disk_node(int minors, int node_id) * converted to make use of bd_mutex and sequence counters. */ seqcount_init(&disk->part0.nr_sects_seq); - hd_ref_init(&disk->part0); + if (hd_ref_init(&disk->part0)) { + hd_free_part(&disk->part0); + kfree(disk); + return NULL; + } disk->minors = minors; rand_initialize_disk(disk); diff --git a/block/partition-generic.c b/block/partition-generic.c index eca0d02a607c..e7711133284e 100644 --- a/block/partition-generic.c +++ b/block/partition-generic.c @@ -232,8 +232,9 @@ static void delete_partition_rcu_cb(struct rcu_head *head) put_device(part_to_dev(part)); } -void __delete_partition(struct hd_struct *part) +void __delete_partition(struct percpu_ref *ref) { + struct hd_struct *part = container_of(ref, struct hd_struct, ref); call_rcu(&part->rcu_head, delete_partition_rcu_cb); } @@ -254,7 +255,7 @@ void delete_partition(struct gendisk *disk, int partno) kobject_put(part->holder_dir); device_del(part_to_dev(part)); - hd_struct_put(part); + hd_struct_kill(part); } static ssize_t whole_disk_show(struct device *dev, @@ -355,8 +356,8 @@ struct hd_struct *add_partition(struct gendisk *disk, int partno, if (!dev_get_uevent_suppress(ddev)) kobject_uevent(&pdev->kobj, KOBJ_ADD); - hd_ref_init(p); - return p; + if (!hd_ref_init(p)) + return p; out_free_info: free_part_info(p); diff --git a/include/linux/genhd.h b/include/linux/genhd.h index a221220ffcb2..2adbfa6d02bc 100644 --- a/include/linux/genhd.h +++ b/include/linux/genhd.h @@ -13,6 +13,7 @@ #include #include #include +#include #ifdef CONFIG_BLOCK @@ -124,7 +125,7 @@ struct hd_struct { #else struct disk_stats dkstats; #endif - atomic_t ref; + struct percpu_ref ref; struct rcu_head rcu_head; }; @@ -611,7 +612,7 @@ extern struct hd_struct * __must_check add_partition(struct gendisk *disk, sector_t len, int flags, struct partition_meta_info *info); -extern void __delete_partition(struct hd_struct *); +extern void __delete_partition(struct percpu_ref *); extern void delete_partition(struct gendisk *, int); extern void printk_all_partitions(void); @@ -640,33 +641,39 @@ extern ssize_t part_fail_store(struct device *dev, const char *buf, size_t count); #endif /* CONFIG_FAIL_MAKE_REQUEST */ -static inline void hd_ref_init(struct hd_struct *part) +static inline int hd_ref_init(struct hd_struct *part) { - atomic_set(&part->ref, 1); - smp_mb(); + if (percpu_ref_init(&part->ref, __delete_partition, 0, + GFP_KERNEL)) + return -ENOMEM; + return 0; } static inline void hd_struct_get(struct hd_struct *part) { - atomic_inc(&part->ref); - smp_mb__after_atomic(); + percpu_ref_get(&part->ref); } static inline int hd_struct_try_get(struct hd_struct *part) { - return atomic_inc_not_zero(&part->ref); + return percpu_ref_tryget_live(&part->ref); } static inline void hd_struct_put(struct hd_struct *part) { - if (atomic_dec_and_test(&part->ref)) - __delete_partition(part); + percpu_ref_put(&part->ref); +} + +static inline void hd_struct_kill(struct hd_struct *part) +{ + percpu_ref_kill(&part->ref); } static inline void hd_free_part(struct hd_struct *part) { free_part_stats(part); free_part_info(part); + percpu_ref_exit(&part->ref); } /* -- cgit v1.2.3-70-g09d2 From 0034af036554c39eefd14d835a8ec3496ac46712 Mon Sep 17 00:00:00 2001 From: Jens Axboe Date: Thu, 16 Jul 2015 09:14:26 -0600 Subject: block: make /sys/block//queue/discard_max_bytes writeable Lots of devices support huge discard sizes these days. Depending on how the device handles them internally, huge discards can introduce massive latencies (hundreds of msec) on the device side. We have a sysfs file, discard_max_bytes, that advertises the max hardware supported discard size. Make this writeable, and split the settings into a soft and hard limit. This can be set from 'discard_granularity' and up to the hardware limit. Add a new sysfs file, 'discard_max_hw_bytes', that shows the hw set limit. Reviewed-by: Jeff Moyer Signed-off-by: Jens Axboe --- Documentation/block/queue-sysfs.txt | 10 +++++++++- block/blk-settings.c | 4 ++++ block/blk-sysfs.c | 40 ++++++++++++++++++++++++++++++++++++- include/linux/blkdev.h | 1 + 4 files changed, 53 insertions(+), 2 deletions(-) (limited to 'include/linux') diff --git a/Documentation/block/queue-sysfs.txt b/Documentation/block/queue-sysfs.txt index 3a29f8914df9..e5d914845be6 100644 --- a/Documentation/block/queue-sysfs.txt +++ b/Documentation/block/queue-sysfs.txt @@ -20,7 +20,7 @@ This shows the size of internal allocation of the device in bytes, if reported by the device. A value of '0' means device does not support the discard functionality. -discard_max_bytes (RO) +discard_max_hw_bytes (RO) ---------------------- Devices that support discard functionality may have internal limits on the number of bytes that can be trimmed or unmapped in a single operation. @@ -29,6 +29,14 @@ number of bytes that can be discarded in a single operation. Discard requests issued to the device must not exceed this limit. A discard_max_bytes value of 0 means that the device does not support discard functionality. +discard_max_bytes (RW) +---------------------- +While discard_max_hw_bytes is the hardware limit for the device, this +setting is the software limit. Some devices exhibit large latencies when +large discards are issued, setting this value lower will make Linux issue +smaller discards and potentially help reduce latencies induced by large +discard operations. + discard_zeroes_data (RO) ------------------------ When read, this file will show if the discarded block are zeroed by the diff --git a/block/blk-settings.c b/block/blk-settings.c index 12600bfffca9..b38d8d723276 100644 --- a/block/blk-settings.c +++ b/block/blk-settings.c @@ -116,6 +116,7 @@ void blk_set_default_limits(struct queue_limits *lim) lim->chunk_sectors = 0; lim->max_write_same_sectors = 0; lim->max_discard_sectors = 0; + lim->max_hw_discard_sectors = 0; lim->discard_granularity = 0; lim->discard_alignment = 0; lim->discard_misaligned = 0; @@ -303,6 +304,7 @@ EXPORT_SYMBOL(blk_queue_chunk_sectors); void blk_queue_max_discard_sectors(struct request_queue *q, unsigned int max_discard_sectors) { + q->limits.max_hw_discard_sectors = max_discard_sectors; q->limits.max_discard_sectors = max_discard_sectors; } EXPORT_SYMBOL(blk_queue_max_discard_sectors); @@ -641,6 +643,8 @@ int blk_stack_limits(struct queue_limits *t, struct queue_limits *b, t->max_discard_sectors = min_not_zero(t->max_discard_sectors, b->max_discard_sectors); + t->max_hw_discard_sectors = min_not_zero(t->max_hw_discard_sectors, + b->max_hw_discard_sectors); t->discard_granularity = max(t->discard_granularity, b->discard_granularity); t->discard_alignment = lcm_not_zero(t->discard_alignment, alignment) % diff --git a/block/blk-sysfs.c b/block/blk-sysfs.c index 6264b382d4d1..b1f34e463c0f 100644 --- a/block/blk-sysfs.c +++ b/block/blk-sysfs.c @@ -145,12 +145,43 @@ static ssize_t queue_discard_granularity_show(struct request_queue *q, char *pag return queue_var_show(q->limits.discard_granularity, page); } +static ssize_t queue_discard_max_hw_show(struct request_queue *q, char *page) +{ + unsigned long long val; + + val = q->limits.max_hw_discard_sectors << 9; + return sprintf(page, "%llu\n", val); +} + static ssize_t queue_discard_max_show(struct request_queue *q, char *page) { return sprintf(page, "%llu\n", (unsigned long long)q->limits.max_discard_sectors << 9); } +static ssize_t queue_discard_max_store(struct request_queue *q, + const char *page, size_t count) +{ + unsigned long max_discard; + ssize_t ret = queue_var_store(&max_discard, page, count); + + if (ret < 0) + return ret; + + if (max_discard & (q->limits.discard_granularity - 1)) + return -EINVAL; + + max_discard >>= 9; + if (max_discard > UINT_MAX) + return -EINVAL; + + if (max_discard > q->limits.max_hw_discard_sectors) + max_discard = q->limits.max_hw_discard_sectors; + + q->limits.max_discard_sectors = max_discard; + return ret; +} + static ssize_t queue_discard_zeroes_data_show(struct request_queue *q, char *page) { return queue_var_show(queue_discard_zeroes_data(q), page); @@ -360,9 +391,15 @@ static struct queue_sysfs_entry queue_discard_granularity_entry = { .show = queue_discard_granularity_show, }; +static struct queue_sysfs_entry queue_discard_max_hw_entry = { + .attr = {.name = "discard_max_hw_bytes", .mode = S_IRUGO }, + .show = queue_discard_max_hw_show, +}; + static struct queue_sysfs_entry queue_discard_max_entry = { - .attr = {.name = "discard_max_bytes", .mode = S_IRUGO }, + .attr = {.name = "discard_max_bytes", .mode = S_IRUGO | S_IWUSR }, .show = queue_discard_max_show, + .store = queue_discard_max_store, }; static struct queue_sysfs_entry queue_discard_zeroes_data_entry = { @@ -421,6 +458,7 @@ static struct attribute *default_attrs[] = { &queue_io_opt_entry.attr, &queue_discard_granularity_entry.attr, &queue_discard_max_entry.attr, + &queue_discard_max_hw_entry.attr, &queue_discard_zeroes_data_entry.attr, &queue_write_same_max_entry.attr, &queue_nonrot_entry.attr, diff --git a/include/linux/blkdev.h b/include/linux/blkdev.h index d4068c17d0df..243f29e779ec 100644 --- a/include/linux/blkdev.h +++ b/include/linux/blkdev.h @@ -268,6 +268,7 @@ struct queue_limits { unsigned int io_min; unsigned int io_opt; unsigned int max_discard_sectors; + unsigned int max_hw_discard_sectors; unsigned int max_write_same_sectors; unsigned int discard_granularity; unsigned int discard_alignment; -- cgit v1.2.3-70-g09d2 From 2531c8cf56a640cd7d17057df8484e570716a450 Mon Sep 17 00:00:00 2001 From: Dominik Dingel Date: Fri, 17 Jul 2015 16:23:37 -0700 Subject: mm: hugetlb: allow hugepages_supported to be architecture specific s390 has a constant hugepage size, by setting HPAGE_SHIFT we also change e.g. the pageblock_order, which should be independent in respect to hugepage support. With this patch every architecture is free to define how to check for hugepage support. Signed-off-by: Dominik Dingel Acked-by: Martin Schwidefsky Cc: Heiko Carstens Cc: Christian Borntraeger Cc: Michael Holzheu Cc: Gerald Schaefer Signed-off-by: Andrew Morton Signed-off-by: Linus Torvalds --- include/linux/hugetlb.h | 17 ++++++++--------- 1 file changed, 8 insertions(+), 9 deletions(-) (limited to 'include/linux') diff --git a/include/linux/hugetlb.h b/include/linux/hugetlb.h index 205026175c42..d891f949466a 100644 --- a/include/linux/hugetlb.h +++ b/include/linux/hugetlb.h @@ -460,15 +460,14 @@ static inline spinlock_t *huge_pte_lockptr(struct hstate *h, return &mm->page_table_lock; } -static inline bool hugepages_supported(void) -{ - /* - * Some platform decide whether they support huge pages at boot - * time. On these, such as powerpc, HPAGE_SHIFT is set to 0 when - * there is no such support - */ - return HPAGE_SHIFT != 0; -} +#ifndef hugepages_supported +/* + * Some platform decide whether they support huge pages at boot + * time. Some of them, such as powerpc, set HPAGE_SHIFT to 0 + * when there is no such support + */ +#define hugepages_supported() (HPAGE_SHIFT != 0) +#endif #else /* CONFIG_HUGETLB_PAGE */ struct hstate {}; -- cgit v1.2.3-70-g09d2 From 8db1486065141e619e4855b84e350ef32064f7e1 Mon Sep 17 00:00:00 2001 From: Nicolas Iooss Date: Fri, 17 Jul 2015 16:23:42 -0700 Subject: include, lib: add __printf attributes to several function prototypes Using __printf attributes helps to detect several format string issues at compile time (even though -Wformat-security is currently disabled in Makefile). For example it can detect when formatting a pointer as a number, like the issue fixed in commit a3fa71c40f18 ("wl18xx: show rx_frames_per_rates as an array as it really is"), or when the arguments do not match the format string, c.f. for example commit 5ce1aca81435 ("reiserfs: fix __RASSERT format string"). To prevent similar bugs in the future, add a __printf attribute to every function prototype which needs one in include/linux/ and lib/. These functions were mostly found by using gcc's -Wsuggest-attribute=format flag. Signed-off-by: Nicolas Iooss Cc: Greg Kroah-Hartman Cc: Felipe Balbi Cc: Joel Becker Signed-off-by: Andrew Morton Signed-off-by: Linus Torvalds --- include/linux/clkdev.h | 7 ++++--- include/linux/compat.h | 2 +- include/linux/configfs.h | 3 ++- include/linux/cpu.h | 7 ++++--- include/linux/dcache.h | 3 ++- include/linux/device.h | 15 +++++++-------- include/linux/iommu.h | 2 +- include/linux/kernel.h | 9 +++++---- include/linux/kobject.h | 5 +++-- include/linux/mmiotrace.h | 2 +- include/linux/printk.h | 6 +++--- lib/kobject.c | 5 +++-- 12 files changed, 36 insertions(+), 30 deletions(-) (limited to 'include/linux') diff --git a/include/linux/clkdev.h b/include/linux/clkdev.h index a240b18e86fa..08bffcc466de 100644 --- a/include/linux/clkdev.h +++ b/include/linux/clkdev.h @@ -33,18 +33,19 @@ struct clk_lookup { } struct clk_lookup *clkdev_alloc(struct clk *clk, const char *con_id, - const char *dev_fmt, ...); + const char *dev_fmt, ...) __printf(3, 4); void clkdev_add(struct clk_lookup *cl); void clkdev_drop(struct clk_lookup *cl); struct clk_lookup *clkdev_create(struct clk *clk, const char *con_id, - const char *dev_fmt, ...); + const char *dev_fmt, ...) __printf(3, 4); void clkdev_add_table(struct clk_lookup *, size_t); int clk_add_alias(const char *, const char *, const char *, struct device *); -int clk_register_clkdev(struct clk *, const char *, const char *, ...); +int clk_register_clkdev(struct clk *, const char *, const char *, ...) + __printf(3, 4); int clk_register_clkdevs(struct clk *, struct clk_lookup *, size_t); #ifdef CONFIG_COMMON_CLK diff --git a/include/linux/compat.h b/include/linux/compat.h index ab25814690bc..a76c9172b2eb 100644 --- a/include/linux/compat.h +++ b/include/linux/compat.h @@ -424,7 +424,7 @@ asmlinkage long compat_sys_settimeofday(struct compat_timeval __user *tv, asmlinkage long compat_sys_adjtimex(struct compat_timex __user *utp); -extern int compat_printk(const char *fmt, ...); +extern __printf(1, 2) int compat_printk(const char *fmt, ...); extern void sigset_from_compat(sigset_t *set, const compat_sigset_t *compat); extern void sigset_to_compat(compat_sigset_t *compat, const sigset_t *set); diff --git a/include/linux/configfs.h b/include/linux/configfs.h index c9e5c57e4edf..63a36e89d0eb 100644 --- a/include/linux/configfs.h +++ b/include/linux/configfs.h @@ -64,7 +64,8 @@ struct config_item { struct dentry *ci_dentry; }; -extern int config_item_set_name(struct config_item *, const char *, ...); +extern __printf(2, 3) +int config_item_set_name(struct config_item *, const char *, ...); static inline char *config_item_name(struct config_item * item) { diff --git a/include/linux/cpu.h b/include/linux/cpu.h index c0fb6b1b4712..23c30bdcca86 100644 --- a/include/linux/cpu.h +++ b/include/linux/cpu.h @@ -40,9 +40,10 @@ extern void cpu_remove_dev_attr(struct device_attribute *attr); extern int cpu_add_dev_attr_group(struct attribute_group *attrs); extern void cpu_remove_dev_attr_group(struct attribute_group *attrs); -extern struct device *cpu_device_create(struct device *parent, void *drvdata, - const struct attribute_group **groups, - const char *fmt, ...); +extern __printf(4, 5) +struct device *cpu_device_create(struct device *parent, void *drvdata, + const struct attribute_group **groups, + const char *fmt, ...); #ifdef CONFIG_HOTPLUG_CPU extern void unregister_cpu(struct cpu *cpu); extern ssize_t arch_cpu_probe(const char *, size_t); diff --git a/include/linux/dcache.h b/include/linux/dcache.h index d2d50249b7b2..d67ae119cf4e 100644 --- a/include/linux/dcache.h +++ b/include/linux/dcache.h @@ -327,7 +327,8 @@ static inline unsigned d_count(const struct dentry *dentry) /* * helper function for dentry_operations.d_dname() members */ -extern char *dynamic_dname(struct dentry *, char *, int, const char *, ...); +extern __printf(4, 5) +char *dynamic_dname(struct dentry *, char *, int, const char *, ...); extern char *simple_dname(struct dentry *, char *, int); extern char *__d_path(const struct path *, const struct path *, char *, int); diff --git a/include/linux/device.h b/include/linux/device.h index 5a31bf3a4024..a2b4ea70a946 100644 --- a/include/linux/device.h +++ b/include/linux/device.h @@ -637,8 +637,9 @@ extern int devres_release_group(struct device *dev, void *id); /* managed devm_k.alloc/kfree for device drivers */ extern void *devm_kmalloc(struct device *dev, size_t size, gfp_t gfp); -extern char *devm_kvasprintf(struct device *dev, gfp_t gfp, const char *fmt, - va_list ap); +extern __printf(3, 0) +char *devm_kvasprintf(struct device *dev, gfp_t gfp, const char *fmt, + va_list ap); extern __printf(3, 4) char *devm_kasprintf(struct device *dev, gfp_t gfp, const char *fmt, ...); static inline void *devm_kzalloc(struct device *dev, size_t size, gfp_t gfp) @@ -1011,12 +1012,10 @@ extern int __must_check device_reprobe(struct device *dev); /* * Easy functions for dynamically creating devices on the fly */ -extern struct device *device_create_vargs(struct class *cls, - struct device *parent, - dev_t devt, - void *drvdata, - const char *fmt, - va_list vargs); +extern __printf(5, 0) +struct device *device_create_vargs(struct class *cls, struct device *parent, + dev_t devt, void *drvdata, + const char *fmt, va_list vargs); extern __printf(5, 6) struct device *device_create(struct class *cls, struct device *parent, dev_t devt, void *drvdata, diff --git a/include/linux/iommu.h b/include/linux/iommu.h index dc767f7c3704..f9c1b6d0f2e4 100644 --- a/include/linux/iommu.h +++ b/include/linux/iommu.h @@ -258,7 +258,7 @@ extern int iommu_domain_set_attr(struct iommu_domain *domain, enum iommu_attr, void *data); struct device *iommu_device_create(struct device *parent, void *drvdata, const struct attribute_group **groups, - const char *fmt, ...); + const char *fmt, ...) __printf(4, 5); void iommu_device_destroy(struct device *dev); int iommu_device_link(struct device *dev, struct device *link); void iommu_device_unlink(struct device *dev, struct device *link); diff --git a/include/linux/kernel.h b/include/linux/kernel.h index 5f0be58640ea..5582410727cb 100644 --- a/include/linux/kernel.h +++ b/include/linux/kernel.h @@ -411,7 +411,8 @@ extern __printf(3, 0) int vscnprintf(char *buf, size_t size, const char *fmt, va_list args); extern __printf(2, 3) char *kasprintf(gfp_t gfp, const char *fmt, ...); -extern char *kvasprintf(gfp_t gfp, const char *fmt, va_list args); +extern __printf(2, 0) +char *kvasprintf(gfp_t gfp, const char *fmt, va_list args); extern __scanf(2, 3) int sscanf(const char *, const char *, ...); @@ -679,10 +680,10 @@ do { \ __ftrace_vprintk(_THIS_IP_, fmt, vargs); \ } while (0) -extern int +extern __printf(2, 0) int __ftrace_vbprintk(unsigned long ip, const char *fmt, va_list ap); -extern int +extern __printf(2, 0) int __ftrace_vprintk(unsigned long ip, const char *fmt, va_list ap); extern void ftrace_dump(enum ftrace_dump_mode oops_dump_mode); @@ -702,7 +703,7 @@ int trace_printk(const char *fmt, ...) { return 0; } -static inline int +static __printf(1, 0) inline int ftrace_vprintk(const char *fmt, va_list ap) { return 0; diff --git a/include/linux/kobject.h b/include/linux/kobject.h index 2d61b909f414..637f67002c5a 100644 --- a/include/linux/kobject.h +++ b/include/linux/kobject.h @@ -80,8 +80,9 @@ struct kobject { extern __printf(2, 3) int kobject_set_name(struct kobject *kobj, const char *name, ...); -extern int kobject_set_name_vargs(struct kobject *kobj, const char *fmt, - va_list vargs); +extern __printf(2, 0) +int kobject_set_name_vargs(struct kobject *kobj, const char *fmt, + va_list vargs); static inline const char *kobject_name(const struct kobject *kobj) { diff --git a/include/linux/mmiotrace.h b/include/linux/mmiotrace.h index c5d52780d6a0..3ba327af055c 100644 --- a/include/linux/mmiotrace.h +++ b/include/linux/mmiotrace.h @@ -106,6 +106,6 @@ extern void enable_mmiotrace(void); extern void disable_mmiotrace(void); extern void mmio_trace_rw(struct mmiotrace_rw *rw); extern void mmio_trace_mapping(struct mmiotrace_map *map); -extern int mmio_trace_printk(const char *fmt, va_list args); +extern __printf(1, 0) int mmio_trace_printk(const char *fmt, va_list args); #endif /* _LINUX_MMIOTRACE_H */ diff --git a/include/linux/printk.h b/include/linux/printk.h index 58b1fec40d37..a6298b27ac99 100644 --- a/include/linux/printk.h +++ b/include/linux/printk.h @@ -122,7 +122,7 @@ static inline __printf(1, 2) __cold void early_printk(const char *s, ...) { } #endif -typedef int(*printk_func_t)(const char *fmt, va_list args); +typedef __printf(1, 0) int (*printk_func_t)(const char *fmt, va_list args); #ifdef CONFIG_PRINTK asmlinkage __printf(5, 0) @@ -166,7 +166,7 @@ char *log_buf_addr_get(void); u32 log_buf_len_get(void); void log_buf_kexec_setup(void); void __init setup_log_buf(int early); -void dump_stack_set_arch_desc(const char *fmt, ...); +__printf(1, 2) void dump_stack_set_arch_desc(const char *fmt, ...); void dump_stack_print_info(const char *log_lvl); void show_regs_print_info(const char *log_lvl); #else @@ -217,7 +217,7 @@ static inline void setup_log_buf(int early) { } -static inline void dump_stack_set_arch_desc(const char *fmt, ...) +static inline __printf(1, 2) void dump_stack_set_arch_desc(const char *fmt, ...) { } diff --git a/lib/kobject.c b/lib/kobject.c index 2e3bd01964a9..3e3a5c3cb330 100644 --- a/lib/kobject.c +++ b/lib/kobject.c @@ -337,8 +337,9 @@ error: } EXPORT_SYMBOL(kobject_init); -static int kobject_add_varg(struct kobject *kobj, struct kobject *parent, - const char *fmt, va_list vargs) +static __printf(3, 0) int kobject_add_varg(struct kobject *kobj, + struct kobject *parent, + const char *fmt, va_list vargs) { int retval; -- cgit v1.2.3-70-g09d2 From da89947b47a3a355f33a75d7672892c147ed880d Mon Sep 17 00:00:00 2001 From: Viresh Kumar Date: Fri, 17 Jul 2015 16:23:50 -0700 Subject: Update Viresh Kumar's email address Switch to my kernel.org alias instead of a badly named gmail address, which I rarely use. Signed-off-by: Viresh Kumar Signed-off-by: Andrew Morton Signed-off-by: Linus Torvalds --- .mailmap | 4 +++- Documentation/arm/SPEAr/overview.txt | 2 +- MAINTAINERS | 12 ++++++------ arch/arm/boot/dts/spear1310-evb.dts | 2 +- arch/arm/boot/dts/spear1310.dtsi | 2 +- arch/arm/boot/dts/spear1340-evb.dts | 2 +- arch/arm/boot/dts/spear1340.dtsi | 2 +- arch/arm/boot/dts/spear13xx.dtsi | 2 +- arch/arm/boot/dts/spear300-evb.dts | 2 +- arch/arm/boot/dts/spear300.dtsi | 2 +- arch/arm/boot/dts/spear310-evb.dts | 2 +- arch/arm/boot/dts/spear310.dtsi | 2 +- arch/arm/boot/dts/spear320-evb.dts | 2 +- arch/arm/boot/dts/spear320.dtsi | 2 +- arch/arm/boot/dts/spear3xx.dtsi | 2 +- arch/arm/mach-spear/generic.h | 2 +- arch/arm/mach-spear/include/mach/irqs.h | 2 +- arch/arm/mach-spear/include/mach/misc_regs.h | 2 +- arch/arm/mach-spear/include/mach/spear.h | 2 +- arch/arm/mach-spear/include/mach/uncompress.h | 2 +- arch/arm/mach-spear/pl080.c | 2 +- arch/arm/mach-spear/pl080.h | 2 +- arch/arm/mach-spear/restart.c | 2 +- arch/arm/mach-spear/spear1310.c | 2 +- arch/arm/mach-spear/spear1340.c | 2 +- arch/arm/mach-spear/spear13xx.c | 2 +- arch/arm/mach-spear/spear300.c | 2 +- arch/arm/mach-spear/spear310.c | 2 +- arch/arm/mach-spear/spear320.c | 2 +- arch/arm/mach-spear/spear3xx.c | 2 +- drivers/ata/pata_arasan_cf.c | 4 ++-- drivers/clk/spear/clk-aux-synth.c | 2 +- drivers/clk/spear/clk-frac-synth.c | 2 +- drivers/clk/spear/clk-gpt-synth.c | 2 +- drivers/clk/spear/clk-vco-pll.c | 2 +- drivers/clk/spear/clk.c | 2 +- drivers/clk/spear/clk.h | 2 +- drivers/clk/spear/spear1310_clock.c | 2 +- drivers/clk/spear/spear1340_clock.c | 2 +- drivers/clk/spear/spear3xx_clock.c | 2 +- drivers/clk/spear/spear6xx_clock.c | 2 +- drivers/dma/dw/core.c | 2 +- drivers/irqchip/spear-shirq.c | 2 +- drivers/mfd/stmpe-i2c.c | 2 +- drivers/mfd/stmpe-spi.c | 4 ++-- drivers/mmc/host/sdhci-spear.c | 4 ++-- drivers/pinctrl/spear/pinctrl-spear.c | 2 +- drivers/pinctrl/spear/pinctrl-spear.h | 2 +- drivers/pinctrl/spear/pinctrl-spear1310.c | 4 ++-- drivers/pinctrl/spear/pinctrl-spear1340.c | 4 ++-- drivers/pinctrl/spear/pinctrl-spear300.c | 4 ++-- drivers/pinctrl/spear/pinctrl-spear310.c | 4 ++-- drivers/pinctrl/spear/pinctrl-spear320.c | 4 ++-- drivers/pinctrl/spear/pinctrl-spear3xx.c | 2 +- drivers/pinctrl/spear/pinctrl-spear3xx.h | 2 +- drivers/watchdog/sp805_wdt.c | 4 ++-- include/linux/amba/sp810.h | 2 +- include/linux/pata_arasan_cf_data.h | 2 +- 58 files changed, 74 insertions(+), 72 deletions(-) (limited to 'include/linux') diff --git a/.mailmap b/.mailmap index 977f958eedbe..8d8ad17dcf9c 100644 --- a/.mailmap +++ b/.mailmap @@ -125,7 +125,9 @@ Uwe Kleine-König Uwe Kleine-König Uwe Kleine-König Valdis Kletnieks -Viresh Kumar +Viresh Kumar +Viresh Kumar +Viresh Kumar Takashi YOSHII Yusuke Goda Gustavo Padovan diff --git a/Documentation/arm/SPEAr/overview.txt b/Documentation/arm/SPEAr/overview.txt index 65610bf52ebf..1b049be6c84f 100644 --- a/Documentation/arm/SPEAr/overview.txt +++ b/Documentation/arm/SPEAr/overview.txt @@ -60,4 +60,4 @@ Introduction Document Author --------------- - Viresh Kumar , (c) 2010-2012 ST Microelectronics + Viresh Kumar , (c) 2010-2012 ST Microelectronics diff --git a/MAINTAINERS b/MAINTAINERS index 2de150384670..093da8a3f8b2 100644 --- a/MAINTAINERS +++ b/MAINTAINERS @@ -6090,7 +6090,7 @@ F: include/linux/ata.h F: include/linux/libata.h LIBATA PATA ARASAN COMPACT FLASH CONTROLLER -M: Viresh Kumar +M: Viresh Kumar L: linux-ide@vger.kernel.org T: git git://git.kernel.org/pub/scm/linux/kernel/git/tj/libata.git S: Maintained @@ -7996,7 +7996,7 @@ S: Maintained F: drivers/pinctrl/samsung/ PIN CONTROLLER - ST SPEAR -M: Viresh Kumar +M: Viresh Kumar L: spear-devel@list.st.com L: linux-arm-kernel@lists.infradead.org (moderated for non-subscribers) W: http://www.st.com/spear @@ -8895,7 +8895,7 @@ S: Maintained F: drivers/tty/serial/ SYNOPSYS DESIGNWARE DMAC DRIVER -M: Viresh Kumar +M: Viresh Kumar M: Andy Shevchenko S: Maintained F: include/linux/dma/dw.h @@ -9062,7 +9062,7 @@ S: Maintained F: drivers/mmc/host/sdhci-s3c* SECURE DIGITAL HOST CONTROLLER INTERFACE (SDHCI) ST SPEAR DRIVER -M: Viresh Kumar +M: Viresh Kumar L: spear-devel@list.st.com L: linux-mmc@vger.kernel.org S: Maintained @@ -9600,7 +9600,7 @@ S: Maintained F: include/linux/compiler.h SPEAR PLATFORM SUPPORT -M: Viresh Kumar +M: Viresh Kumar M: Shiraz Hashim L: spear-devel@list.st.com L: linux-arm-kernel@lists.infradead.org (moderated for non-subscribers) @@ -9609,7 +9609,7 @@ S: Maintained F: arch/arm/mach-spear/ SPEAR CLOCK FRAMEWORK SUPPORT -M: Viresh Kumar +M: Viresh Kumar L: spear-devel@list.st.com L: linux-arm-kernel@lists.infradead.org (moderated for non-subscribers) W: http://www.st.com/spear diff --git a/arch/arm/boot/dts/spear1310-evb.dts b/arch/arm/boot/dts/spear1310-evb.dts index d42c84b1df8d..e48857249ce7 100644 --- a/arch/arm/boot/dts/spear1310-evb.dts +++ b/arch/arm/boot/dts/spear1310-evb.dts @@ -1,7 +1,7 @@ /* * DTS file for SPEAr1310 Evaluation Baord * - * Copyright 2012 Viresh Kumar + * Copyright 2012 Viresh Kumar * * The code contained herein is licensed under the GNU General Public * License. You may obtain a copy of the GNU General Public License diff --git a/arch/arm/boot/dts/spear1310.dtsi b/arch/arm/boot/dts/spear1310.dtsi index 9d342920695a..54bc6d3cf290 100644 --- a/arch/arm/boot/dts/spear1310.dtsi +++ b/arch/arm/boot/dts/spear1310.dtsi @@ -1,7 +1,7 @@ /* * DTS file for all SPEAr1310 SoCs * - * Copyright 2012 Viresh Kumar + * Copyright 2012 Viresh Kumar * * The code contained herein is licensed under the GNU General Public * License. You may obtain a copy of the GNU General Public License diff --git a/arch/arm/boot/dts/spear1340-evb.dts b/arch/arm/boot/dts/spear1340-evb.dts index b23e05ed1d60..c611f5606dfe 100644 --- a/arch/arm/boot/dts/spear1340-evb.dts +++ b/arch/arm/boot/dts/spear1340-evb.dts @@ -1,7 +1,7 @@ /* * DTS file for SPEAr1340 Evaluation Baord * - * Copyright 2012 Viresh Kumar + * Copyright 2012 Viresh Kumar * * The code contained herein is licensed under the GNU General Public * License. You may obtain a copy of the GNU General Public License diff --git a/arch/arm/boot/dts/spear1340.dtsi b/arch/arm/boot/dts/spear1340.dtsi index 13e1aa33daa2..df2232d767ed 100644 --- a/arch/arm/boot/dts/spear1340.dtsi +++ b/arch/arm/boot/dts/spear1340.dtsi @@ -1,7 +1,7 @@ /* * DTS file for all SPEAr1340 SoCs * - * Copyright 2012 Viresh Kumar + * Copyright 2012 Viresh Kumar * * The code contained herein is licensed under the GNU General Public * License. You may obtain a copy of the GNU General Public License diff --git a/arch/arm/boot/dts/spear13xx.dtsi b/arch/arm/boot/dts/spear13xx.dtsi index 40accc87e3a2..14594ce8c18a 100644 --- a/arch/arm/boot/dts/spear13xx.dtsi +++ b/arch/arm/boot/dts/spear13xx.dtsi @@ -1,7 +1,7 @@ /* * DTS file for all SPEAr13xx SoCs * - * Copyright 2012 Viresh Kumar + * Copyright 2012 Viresh Kumar * * The code contained herein is licensed under the GNU General Public * License. You may obtain a copy of the GNU General Public License diff --git a/arch/arm/boot/dts/spear300-evb.dts b/arch/arm/boot/dts/spear300-evb.dts index 5de1431653e4..e859e8288bcd 100644 --- a/arch/arm/boot/dts/spear300-evb.dts +++ b/arch/arm/boot/dts/spear300-evb.dts @@ -1,7 +1,7 @@ /* * DTS file for SPEAr300 Evaluation Baord * - * Copyright 2012 Viresh Kumar + * Copyright 2012 Viresh Kumar * * The code contained herein is licensed under the GNU General Public * License. You may obtain a copy of the GNU General Public License diff --git a/arch/arm/boot/dts/spear300.dtsi b/arch/arm/boot/dts/spear300.dtsi index f79b3dfaabe6..f4e92e599729 100644 --- a/arch/arm/boot/dts/spear300.dtsi +++ b/arch/arm/boot/dts/spear300.dtsi @@ -1,7 +1,7 @@ /* * DTS file for SPEAr300 SoC * - * Copyright 2012 Viresh Kumar + * Copyright 2012 Viresh Kumar * * The code contained herein is licensed under the GNU General Public * License. You may obtain a copy of the GNU General Public License diff --git a/arch/arm/boot/dts/spear310-evb.dts b/arch/arm/boot/dts/spear310-evb.dts index b09632963d15..070f2c1b7851 100644 --- a/arch/arm/boot/dts/spear310-evb.dts +++ b/arch/arm/boot/dts/spear310-evb.dts @@ -1,7 +1,7 @@ /* * DTS file for SPEAr310 Evaluation Baord * - * Copyright 2012 Viresh Kumar + * Copyright 2012 Viresh Kumar * * The code contained herein is licensed under the GNU General Public * License. You may obtain a copy of the GNU General Public License diff --git a/arch/arm/boot/dts/spear310.dtsi b/arch/arm/boot/dts/spear310.dtsi index 95372080eea6..da210b454753 100644 --- a/arch/arm/boot/dts/spear310.dtsi +++ b/arch/arm/boot/dts/spear310.dtsi @@ -1,7 +1,7 @@ /* * DTS file for SPEAr310 SoC * - * Copyright 2012 Viresh Kumar + * Copyright 2012 Viresh Kumar * * The code contained herein is licensed under the GNU General Public * License. You may obtain a copy of the GNU General Public License diff --git a/arch/arm/boot/dts/spear320-evb.dts b/arch/arm/boot/dts/spear320-evb.dts index fdedbb514102..1b1034477923 100644 --- a/arch/arm/boot/dts/spear320-evb.dts +++ b/arch/arm/boot/dts/spear320-evb.dts @@ -1,7 +1,7 @@ /* * DTS file for SPEAr320 Evaluation Baord * - * Copyright 2012 Viresh Kumar + * Copyright 2012 Viresh Kumar * * The code contained herein is licensed under the GNU General Public * License. You may obtain a copy of the GNU General Public License diff --git a/arch/arm/boot/dts/spear320.dtsi b/arch/arm/boot/dts/spear320.dtsi index ffea342aeec9..22be6e5edaac 100644 --- a/arch/arm/boot/dts/spear320.dtsi +++ b/arch/arm/boot/dts/spear320.dtsi @@ -1,7 +1,7 @@ /* * DTS file for SPEAr320 SoC * - * Copyright 2012 Viresh Kumar + * Copyright 2012 Viresh Kumar * * The code contained herein is licensed under the GNU General Public * License. You may obtain a copy of the GNU General Public License diff --git a/arch/arm/boot/dts/spear3xx.dtsi b/arch/arm/boot/dts/spear3xx.dtsi index f0e3fcf8e323..118135d75899 100644 --- a/arch/arm/boot/dts/spear3xx.dtsi +++ b/arch/arm/boot/dts/spear3xx.dtsi @@ -1,7 +1,7 @@ /* * DTS file for all SPEAr3xx SoCs * - * Copyright 2012 Viresh Kumar + * Copyright 2012 Viresh Kumar * * The code contained herein is licensed under the GNU General Public * License. You may obtain a copy of the GNU General Public License diff --git a/arch/arm/mach-spear/generic.h b/arch/arm/mach-spear/generic.h index a99d90a4d09c..06640914d9a0 100644 --- a/arch/arm/mach-spear/generic.h +++ b/arch/arm/mach-spear/generic.h @@ -3,7 +3,7 @@ * * Copyright (C) 2009-2012 ST Microelectronics * Rajeev Kumar - * Viresh Kumar + * Viresh Kumar * * This file is licensed under the terms of the GNU General Public * License version 2. This program is licensed "as is" without any diff --git a/arch/arm/mach-spear/include/mach/irqs.h b/arch/arm/mach-spear/include/mach/irqs.h index 92da0a8c6bce..7058720c5278 100644 --- a/arch/arm/mach-spear/include/mach/irqs.h +++ b/arch/arm/mach-spear/include/mach/irqs.h @@ -3,7 +3,7 @@ * * Copyright (C) 2009-2012 ST Microelectronics * Rajeev Kumar - * Viresh Kumar + * Viresh Kumar * * This file is licensed under the terms of the GNU General Public * License version 2. This program is licensed "as is" without any diff --git a/arch/arm/mach-spear/include/mach/misc_regs.h b/arch/arm/mach-spear/include/mach/misc_regs.h index 935639ce59ba..cfaf7c665b58 100644 --- a/arch/arm/mach-spear/include/mach/misc_regs.h +++ b/arch/arm/mach-spear/include/mach/misc_regs.h @@ -4,7 +4,7 @@ * Miscellaneous registers definitions for SPEAr3xx machine family * * Copyright (C) 2009 ST Microelectronics - * Viresh Kumar + * Viresh Kumar * * This file is licensed under the terms of the GNU General Public * License version 2. This program is licensed "as is" without any diff --git a/arch/arm/mach-spear/include/mach/spear.h b/arch/arm/mach-spear/include/mach/spear.h index f2d6a0176575..5ed841ccf8a3 100644 --- a/arch/arm/mach-spear/include/mach/spear.h +++ b/arch/arm/mach-spear/include/mach/spear.h @@ -3,7 +3,7 @@ * * Copyright (C) 2009,2012 ST Microelectronics * Rajeev Kumar - * Viresh Kumar + * Viresh Kumar * * This file is licensed under the terms of the GNU General Public * License version 2. This program is licensed "as is" without any diff --git a/arch/arm/mach-spear/include/mach/uncompress.h b/arch/arm/mach-spear/include/mach/uncompress.h index 51b2dc93e4da..8439b9c12edb 100644 --- a/arch/arm/mach-spear/include/mach/uncompress.h +++ b/arch/arm/mach-spear/include/mach/uncompress.h @@ -4,7 +4,7 @@ * Serial port stubs for kernel decompress status messages * * Copyright (C) 2009 ST Microelectronics - * Viresh Kumar + * Viresh Kumar * * This file is licensed under the terms of the GNU General Public * License version 2. This program is licensed "as is" without any diff --git a/arch/arm/mach-spear/pl080.c b/arch/arm/mach-spear/pl080.c index cfa1199d0f4a..b4529f3e0ee9 100644 --- a/arch/arm/mach-spear/pl080.c +++ b/arch/arm/mach-spear/pl080.c @@ -4,7 +4,7 @@ * DMAC pl080 definitions for SPEAr platform * * Copyright (C) 2012 ST Microelectronics - * Viresh Kumar + * Viresh Kumar * * This file is licensed under the terms of the GNU General Public * License version 2. This program is licensed "as is" without any diff --git a/arch/arm/mach-spear/pl080.h b/arch/arm/mach-spear/pl080.h index eb6590ded40d..608dec6725ae 100644 --- a/arch/arm/mach-spear/pl080.h +++ b/arch/arm/mach-spear/pl080.h @@ -4,7 +4,7 @@ * DMAC pl080 definitions for SPEAr platform * * Copyright (C) 2012 ST Microelectronics - * Viresh Kumar + * Viresh Kumar * * This file is licensed under the terms of the GNU General Public * License version 2. This program is licensed "as is" without any diff --git a/arch/arm/mach-spear/restart.c b/arch/arm/mach-spear/restart.c index ce5e098c4888..b4342155a783 100644 --- a/arch/arm/mach-spear/restart.c +++ b/arch/arm/mach-spear/restart.c @@ -4,7 +4,7 @@ * SPEAr platform specific restart functions * * Copyright (C) 2009 ST Microelectronics - * Viresh Kumar + * Viresh Kumar * * This file is licensed under the terms of the GNU General Public * License version 2. This program is licensed "as is" without any diff --git a/arch/arm/mach-spear/spear1310.c b/arch/arm/mach-spear/spear1310.c index d9ce4d8000f0..cd5d375d91f0 100644 --- a/arch/arm/mach-spear/spear1310.c +++ b/arch/arm/mach-spear/spear1310.c @@ -4,7 +4,7 @@ * SPEAr1310 machine source file * * Copyright (C) 2012 ST Microelectronics - * Viresh Kumar + * Viresh Kumar * * This file is licensed under the terms of the GNU General Public * License version 2. This program is licensed "as is" without any diff --git a/arch/arm/mach-spear/spear1340.c b/arch/arm/mach-spear/spear1340.c index 3f3c0f124bd3..94594d5a446c 100644 --- a/arch/arm/mach-spear/spear1340.c +++ b/arch/arm/mach-spear/spear1340.c @@ -4,7 +4,7 @@ * SPEAr1340 machine source file * * Copyright (C) 2012 ST Microelectronics - * Viresh Kumar + * Viresh Kumar * * This file is licensed under the terms of the GNU General Public * License version 2. This program is licensed "as is" without any diff --git a/arch/arm/mach-spear/spear13xx.c b/arch/arm/mach-spear/spear13xx.c index 2e463a93468d..b7afce6795f4 100644 --- a/arch/arm/mach-spear/spear13xx.c +++ b/arch/arm/mach-spear/spear13xx.c @@ -4,7 +4,7 @@ * SPEAr13XX machines common source file * * Copyright (C) 2012 ST Microelectronics - * Viresh Kumar + * Viresh Kumar * * This file is licensed under the terms of the GNU General Public * License version 2. This program is licensed "as is" without any diff --git a/arch/arm/mach-spear/spear300.c b/arch/arm/mach-spear/spear300.c index b52e48f342f4..5b32edda2276 100644 --- a/arch/arm/mach-spear/spear300.c +++ b/arch/arm/mach-spear/spear300.c @@ -4,7 +4,7 @@ * SPEAr300 machine source file * * Copyright (C) 2009-2012 ST Microelectronics - * Viresh Kumar + * Viresh Kumar * * This file is licensed under the terms of the GNU General Public * License version 2. This program is licensed "as is" without any diff --git a/arch/arm/mach-spear/spear310.c b/arch/arm/mach-spear/spear310.c index ed2029db391f..86a44ac7ff67 100644 --- a/arch/arm/mach-spear/spear310.c +++ b/arch/arm/mach-spear/spear310.c @@ -4,7 +4,7 @@ * SPEAr310 machine source file * * Copyright (C) 2009-2012 ST Microelectronics - * Viresh Kumar + * Viresh Kumar * * This file is licensed under the terms of the GNU General Public * License version 2. This program is licensed "as is" without any diff --git a/arch/arm/mach-spear/spear320.c b/arch/arm/mach-spear/spear320.c index bf634b32a930..d45d751926c5 100644 --- a/arch/arm/mach-spear/spear320.c +++ b/arch/arm/mach-spear/spear320.c @@ -4,7 +4,7 @@ * SPEAr320 machine source file * * Copyright (C) 2009-2012 ST Microelectronics - * Viresh Kumar + * Viresh Kumar * * This file is licensed under the terms of the GNU General Public * License version 2. This program is licensed "as is" without any diff --git a/arch/arm/mach-spear/spear3xx.c b/arch/arm/mach-spear/spear3xx.c index bf3b1fd8cb23..23394ac76cf2 100644 --- a/arch/arm/mach-spear/spear3xx.c +++ b/arch/arm/mach-spear/spear3xx.c @@ -4,7 +4,7 @@ * SPEAr3XX machines common source file * * Copyright (C) 2009-2012 ST Microelectronics - * Viresh Kumar + * Viresh Kumar * * This file is licensed under the terms of the GNU General Public * License version 2. This program is licensed "as is" without any diff --git a/drivers/ata/pata_arasan_cf.c b/drivers/ata/pata_arasan_cf.c index a9b0c820f2eb..5d9ee99c2148 100644 --- a/drivers/ata/pata_arasan_cf.c +++ b/drivers/ata/pata_arasan_cf.c @@ -4,7 +4,7 @@ * Arasan Compact Flash host controller source file * * Copyright (C) 2011 ST Microelectronics - * Viresh Kumar + * Viresh Kumar * * This file is licensed under the terms of the GNU General Public * License version 2. This program is licensed "as is" without any @@ -968,7 +968,7 @@ static struct platform_driver arasan_cf_driver = { module_platform_driver(arasan_cf_driver); -MODULE_AUTHOR("Viresh Kumar "); +MODULE_AUTHOR("Viresh Kumar "); MODULE_DESCRIPTION("Arasan ATA Compact Flash driver"); MODULE_LICENSE("GPL"); MODULE_ALIAS("platform:" DRIVER_NAME); diff --git a/drivers/clk/spear/clk-aux-synth.c b/drivers/clk/spear/clk-aux-synth.c index bdfb4421c643..f271c350ef94 100644 --- a/drivers/clk/spear/clk-aux-synth.c +++ b/drivers/clk/spear/clk-aux-synth.c @@ -1,6 +1,6 @@ /* * Copyright (C) 2012 ST Microelectronics - * Viresh Kumar + * Viresh Kumar * * This file is licensed under the terms of the GNU General Public * License version 2. This program is licensed "as is" without any diff --git a/drivers/clk/spear/clk-frac-synth.c b/drivers/clk/spear/clk-frac-synth.c index dffd4ce6c8b5..58d678b5b40a 100644 --- a/drivers/clk/spear/clk-frac-synth.c +++ b/drivers/clk/spear/clk-frac-synth.c @@ -1,6 +1,6 @@ /* * Copyright (C) 2012 ST Microelectronics - * Viresh Kumar + * Viresh Kumar * * This file is licensed under the terms of the GNU General Public * License version 2. This program is licensed "as is" without any diff --git a/drivers/clk/spear/clk-gpt-synth.c b/drivers/clk/spear/clk-gpt-synth.c index 1afc18c4effc..1a722e99e76e 100644 --- a/drivers/clk/spear/clk-gpt-synth.c +++ b/drivers/clk/spear/clk-gpt-synth.c @@ -1,6 +1,6 @@ /* * Copyright (C) 2012 ST Microelectronics - * Viresh Kumar + * Viresh Kumar * * This file is licensed under the terms of the GNU General Public * License version 2. This program is licensed "as is" without any diff --git a/drivers/clk/spear/clk-vco-pll.c b/drivers/clk/spear/clk-vco-pll.c index 1b9b65bca51e..5ebddc528145 100644 --- a/drivers/clk/spear/clk-vco-pll.c +++ b/drivers/clk/spear/clk-vco-pll.c @@ -1,6 +1,6 @@ /* * Copyright (C) 2012 ST Microelectronics - * Viresh Kumar + * Viresh Kumar * * This file is licensed under the terms of the GNU General Public * License version 2. This program is licensed "as is" without any diff --git a/drivers/clk/spear/clk.c b/drivers/clk/spear/clk.c index 628b6d5ed3d9..157fe099ea6a 100644 --- a/drivers/clk/spear/clk.c +++ b/drivers/clk/spear/clk.c @@ -1,6 +1,6 @@ /* * Copyright (C) 2012 ST Microelectronics - * Viresh Kumar + * Viresh Kumar * * This file is licensed under the terms of the GNU General Public * License version 2. This program is licensed "as is" without any diff --git a/drivers/clk/spear/clk.h b/drivers/clk/spear/clk.h index 931737677dfa..9834944f08b1 100644 --- a/drivers/clk/spear/clk.h +++ b/drivers/clk/spear/clk.h @@ -2,7 +2,7 @@ * Clock framework definitions for SPEAr platform * * Copyright (C) 2012 ST Microelectronics - * Viresh Kumar + * Viresh Kumar * * This file is licensed under the terms of the GNU General Public * License version 2. This program is licensed "as is" without any diff --git a/drivers/clk/spear/spear1310_clock.c b/drivers/clk/spear/spear1310_clock.c index 4daa5977793a..222ce108b41a 100644 --- a/drivers/clk/spear/spear1310_clock.c +++ b/drivers/clk/spear/spear1310_clock.c @@ -4,7 +4,7 @@ * SPEAr1310 machine clock framework source file * * Copyright (C) 2012 ST Microelectronics - * Viresh Kumar + * Viresh Kumar * * This file is licensed under the terms of the GNU General Public * License version 2. This program is licensed "as is" without any diff --git a/drivers/clk/spear/spear1340_clock.c b/drivers/clk/spear/spear1340_clock.c index 5a5c6648308d..973c9d3fbcf8 100644 --- a/drivers/clk/spear/spear1340_clock.c +++ b/drivers/clk/spear/spear1340_clock.c @@ -4,7 +4,7 @@ * SPEAr1340 machine clock framework source file * * Copyright (C) 2012 ST Microelectronics - * Viresh Kumar + * Viresh Kumar * * This file is licensed under the terms of the GNU General Public * License version 2. This program is licensed "as is" without any diff --git a/drivers/clk/spear/spear3xx_clock.c b/drivers/clk/spear/spear3xx_clock.c index bb5f387774e2..404a55edd613 100644 --- a/drivers/clk/spear/spear3xx_clock.c +++ b/drivers/clk/spear/spear3xx_clock.c @@ -2,7 +2,7 @@ * SPEAr3xx machines clock framework source file * * Copyright (C) 2012 ST Microelectronics - * Viresh Kumar + * Viresh Kumar * * This file is licensed under the terms of the GNU General Public * License version 2. This program is licensed "as is" without any diff --git a/drivers/clk/spear/spear6xx_clock.c b/drivers/clk/spear/spear6xx_clock.c index 4f649c9cb094..231061fa73a4 100644 --- a/drivers/clk/spear/spear6xx_clock.c +++ b/drivers/clk/spear/spear6xx_clock.c @@ -2,7 +2,7 @@ * SPEAr6xx machines clock framework source file * * Copyright (C) 2012 ST Microelectronics - * Viresh Kumar + * Viresh Kumar * * This file is licensed under the terms of the GNU General Public * License version 2. This program is licensed "as is" without any diff --git a/drivers/dma/dw/core.c b/drivers/dma/dw/core.c index 1022c2e1a2b0..cf1c87fa1edd 100644 --- a/drivers/dma/dw/core.c +++ b/drivers/dma/dw/core.c @@ -1746,4 +1746,4 @@ EXPORT_SYMBOL_GPL(dw_dma_enable); MODULE_LICENSE("GPL v2"); MODULE_DESCRIPTION("Synopsys DesignWare DMA Controller core driver"); MODULE_AUTHOR("Haavard Skinnemoen (Atmel)"); -MODULE_AUTHOR("Viresh Kumar "); +MODULE_AUTHOR("Viresh Kumar "); diff --git a/drivers/irqchip/spear-shirq.c b/drivers/irqchip/spear-shirq.c index a45121546caf..acb721b31bcf 100644 --- a/drivers/irqchip/spear-shirq.c +++ b/drivers/irqchip/spear-shirq.c @@ -2,7 +2,7 @@ * SPEAr platform shared irq layer source file * * Copyright (C) 2009-2012 ST Microelectronics - * Viresh Kumar + * Viresh Kumar * * Copyright (C) 2012 ST Microelectronics * Shiraz Hashim diff --git a/drivers/mfd/stmpe-i2c.c b/drivers/mfd/stmpe-i2c.c index 5c054031c3f8..e14c8c9d189b 100644 --- a/drivers/mfd/stmpe-i2c.c +++ b/drivers/mfd/stmpe-i2c.c @@ -6,7 +6,7 @@ * * License Terms: GNU General Public License, version 2 * Author: Rabin Vincent for ST-Ericsson - * Author: Viresh Kumar for ST Microelectronics + * Author: Viresh Kumar for ST Microelectronics */ #include diff --git a/drivers/mfd/stmpe-spi.c b/drivers/mfd/stmpe-spi.c index a81badbaa917..6fdb30e84a2b 100644 --- a/drivers/mfd/stmpe-spi.c +++ b/drivers/mfd/stmpe-spi.c @@ -4,7 +4,7 @@ * Copyright (C) ST Microelectronics SA 2011 * * License Terms: GNU General Public License, version 2 - * Author: Viresh Kumar for ST Microelectronics + * Author: Viresh Kumar for ST Microelectronics */ #include @@ -146,4 +146,4 @@ module_exit(stmpe_exit); MODULE_LICENSE("GPL v2"); MODULE_DESCRIPTION("STMPE MFD SPI Interface Driver"); -MODULE_AUTHOR("Viresh Kumar "); +MODULE_AUTHOR("Viresh Kumar "); diff --git a/drivers/mmc/host/sdhci-spear.c b/drivers/mmc/host/sdhci-spear.c index df088343d60f..255a896769b8 100644 --- a/drivers/mmc/host/sdhci-spear.c +++ b/drivers/mmc/host/sdhci-spear.c @@ -4,7 +4,7 @@ * Support of SDHCI platform devices for spear soc family * * Copyright (C) 2010 ST Microelectronics - * Viresh Kumar + * Viresh Kumar * * Inspired by sdhci-pltfm.c * @@ -211,5 +211,5 @@ static struct platform_driver sdhci_driver = { module_platform_driver(sdhci_driver); MODULE_DESCRIPTION("SPEAr Secure Digital Host Controller Interface driver"); -MODULE_AUTHOR("Viresh Kumar "); +MODULE_AUTHOR("Viresh Kumar "); MODULE_LICENSE("GPL v2"); diff --git a/drivers/pinctrl/spear/pinctrl-spear.c b/drivers/pinctrl/spear/pinctrl-spear.c index f87a5eaf75da..0afaf79a4e51 100644 --- a/drivers/pinctrl/spear/pinctrl-spear.c +++ b/drivers/pinctrl/spear/pinctrl-spear.c @@ -2,7 +2,7 @@ * Driver for the ST Microelectronics SPEAr pinmux * * Copyright (C) 2012 ST Microelectronics - * Viresh Kumar + * Viresh Kumar * * Inspired from: * - U300 Pinctl drivers diff --git a/drivers/pinctrl/spear/pinctrl-spear.h b/drivers/pinctrl/spear/pinctrl-spear.h index dc8bf85ecb2a..27c2cc8d83ad 100644 --- a/drivers/pinctrl/spear/pinctrl-spear.h +++ b/drivers/pinctrl/spear/pinctrl-spear.h @@ -2,7 +2,7 @@ * Driver header file for the ST Microelectronics SPEAr pinmux * * Copyright (C) 2012 ST Microelectronics - * Viresh Kumar + * Viresh Kumar * * This file is licensed under the terms of the GNU General Public * License version 2. This program is licensed "as is" without any diff --git a/drivers/pinctrl/spear/pinctrl-spear1310.c b/drivers/pinctrl/spear/pinctrl-spear1310.c index a7bdc537efa7..92611bb757ac 100644 --- a/drivers/pinctrl/spear/pinctrl-spear1310.c +++ b/drivers/pinctrl/spear/pinctrl-spear1310.c @@ -2,7 +2,7 @@ * Driver for the ST Microelectronics SPEAr1310 pinmux * * Copyright (C) 2012 ST Microelectronics - * Viresh Kumar + * Viresh Kumar * * This file is licensed under the terms of the GNU General Public * License version 2. This program is licensed "as is" without any @@ -2730,7 +2730,7 @@ static void __exit spear1310_pinctrl_exit(void) } module_exit(spear1310_pinctrl_exit); -MODULE_AUTHOR("Viresh Kumar "); +MODULE_AUTHOR("Viresh Kumar "); MODULE_DESCRIPTION("ST Microelectronics SPEAr1310 pinctrl driver"); MODULE_LICENSE("GPL v2"); MODULE_DEVICE_TABLE(of, spear1310_pinctrl_of_match); diff --git a/drivers/pinctrl/spear/pinctrl-spear1340.c b/drivers/pinctrl/spear/pinctrl-spear1340.c index f43ec85a0328..f842e9dc40d0 100644 --- a/drivers/pinctrl/spear/pinctrl-spear1340.c +++ b/drivers/pinctrl/spear/pinctrl-spear1340.c @@ -2,7 +2,7 @@ * Driver for the ST Microelectronics SPEAr1340 pinmux * * Copyright (C) 2012 ST Microelectronics - * Viresh Kumar + * Viresh Kumar * * This file is licensed under the terms of the GNU General Public * License version 2. This program is licensed "as is" without any @@ -2046,7 +2046,7 @@ static void __exit spear1340_pinctrl_exit(void) } module_exit(spear1340_pinctrl_exit); -MODULE_AUTHOR("Viresh Kumar "); +MODULE_AUTHOR("Viresh Kumar "); MODULE_DESCRIPTION("ST Microelectronics SPEAr1340 pinctrl driver"); MODULE_LICENSE("GPL v2"); MODULE_DEVICE_TABLE(of, spear1340_pinctrl_of_match); diff --git a/drivers/pinctrl/spear/pinctrl-spear300.c b/drivers/pinctrl/spear/pinctrl-spear300.c index da8990a8eeef..d998a2ccff48 100644 --- a/drivers/pinctrl/spear/pinctrl-spear300.c +++ b/drivers/pinctrl/spear/pinctrl-spear300.c @@ -2,7 +2,7 @@ * Driver for the ST Microelectronics SPEAr300 pinmux * * Copyright (C) 2012 ST Microelectronics - * Viresh Kumar + * Viresh Kumar * * This file is licensed under the terms of the GNU General Public * License version 2. This program is licensed "as is" without any @@ -703,7 +703,7 @@ static void __exit spear300_pinctrl_exit(void) } module_exit(spear300_pinctrl_exit); -MODULE_AUTHOR("Viresh Kumar "); +MODULE_AUTHOR("Viresh Kumar "); MODULE_DESCRIPTION("ST Microelectronics SPEAr300 pinctrl driver"); MODULE_LICENSE("GPL v2"); MODULE_DEVICE_TABLE(of, spear300_pinctrl_of_match); diff --git a/drivers/pinctrl/spear/pinctrl-spear310.c b/drivers/pinctrl/spear/pinctrl-spear310.c index 31ede51e819b..609b18aceb16 100644 --- a/drivers/pinctrl/spear/pinctrl-spear310.c +++ b/drivers/pinctrl/spear/pinctrl-spear310.c @@ -2,7 +2,7 @@ * Driver for the ST Microelectronics SPEAr310 pinmux * * Copyright (C) 2012 ST Microelectronics - * Viresh Kumar + * Viresh Kumar * * This file is licensed under the terms of the GNU General Public * License version 2. This program is licensed "as is" without any @@ -426,7 +426,7 @@ static void __exit spear310_pinctrl_exit(void) } module_exit(spear310_pinctrl_exit); -MODULE_AUTHOR("Viresh Kumar "); +MODULE_AUTHOR("Viresh Kumar "); MODULE_DESCRIPTION("ST Microelectronics SPEAr310 pinctrl driver"); MODULE_LICENSE("GPL v2"); MODULE_DEVICE_TABLE(of, spear310_pinctrl_of_match); diff --git a/drivers/pinctrl/spear/pinctrl-spear320.c b/drivers/pinctrl/spear/pinctrl-spear320.c index 506e40b641e0..c07114431bd4 100644 --- a/drivers/pinctrl/spear/pinctrl-spear320.c +++ b/drivers/pinctrl/spear/pinctrl-spear320.c @@ -2,7 +2,7 @@ * Driver for the ST Microelectronics SPEAr320 pinmux * * Copyright (C) 2012 ST Microelectronics - * Viresh Kumar + * Viresh Kumar * * This file is licensed under the terms of the GNU General Public * License version 2. This program is licensed "as is" without any @@ -3467,7 +3467,7 @@ static void __exit spear320_pinctrl_exit(void) } module_exit(spear320_pinctrl_exit); -MODULE_AUTHOR("Viresh Kumar "); +MODULE_AUTHOR("Viresh Kumar "); MODULE_DESCRIPTION("ST Microelectronics SPEAr320 pinctrl driver"); MODULE_LICENSE("GPL v2"); MODULE_DEVICE_TABLE(of, spear320_pinctrl_of_match); diff --git a/drivers/pinctrl/spear/pinctrl-spear3xx.c b/drivers/pinctrl/spear/pinctrl-spear3xx.c index 12ee21af766b..d3119aafe709 100644 --- a/drivers/pinctrl/spear/pinctrl-spear3xx.c +++ b/drivers/pinctrl/spear/pinctrl-spear3xx.c @@ -2,7 +2,7 @@ * Driver for the ST Microelectronics SPEAr3xx pinmux * * Copyright (C) 2012 ST Microelectronics - * Viresh Kumar + * Viresh Kumar * * This file is licensed under the terms of the GNU General Public * License version 2. This program is licensed "as is" without any diff --git a/drivers/pinctrl/spear/pinctrl-spear3xx.h b/drivers/pinctrl/spear/pinctrl-spear3xx.h index 7860b36053c4..ce19dcf8f08b 100644 --- a/drivers/pinctrl/spear/pinctrl-spear3xx.h +++ b/drivers/pinctrl/spear/pinctrl-spear3xx.h @@ -2,7 +2,7 @@ * Header file for the ST Microelectronics SPEAr3xx pinmux * * Copyright (C) 2012 ST Microelectronics - * Viresh Kumar + * Viresh Kumar * * This file is licensed under the terms of the GNU General Public * License version 2. This program is licensed "as is" without any diff --git a/drivers/watchdog/sp805_wdt.c b/drivers/watchdog/sp805_wdt.c index c1b03f4235b9..4e7fec36f5c3 100644 --- a/drivers/watchdog/sp805_wdt.c +++ b/drivers/watchdog/sp805_wdt.c @@ -4,7 +4,7 @@ * Watchdog driver for ARM SP805 watchdog module * * Copyright (C) 2010 ST Microelectronics - * Viresh Kumar + * Viresh Kumar * * This file is licensed under the terms of the GNU General Public * License version 2 or later. This program is licensed "as is" without any @@ -303,6 +303,6 @@ static struct amba_driver sp805_wdt_driver = { module_amba_driver(sp805_wdt_driver); -MODULE_AUTHOR("Viresh Kumar "); +MODULE_AUTHOR("Viresh Kumar "); MODULE_DESCRIPTION("ARM SP805 Watchdog Driver"); MODULE_LICENSE("GPL"); diff --git a/include/linux/amba/sp810.h b/include/linux/amba/sp810.h index c7df89f99115..58fe9e8b6fd7 100644 --- a/include/linux/amba/sp810.h +++ b/include/linux/amba/sp810.h @@ -2,7 +2,7 @@ * ARM PrimeXsys System Controller SP810 header file * * Copyright (C) 2009 ST Microelectronics - * Viresh Kumar + * Viresh Kumar * * This file is licensed under the terms of the GNU General Public * License version 2. This program is licensed "as is" without any diff --git a/include/linux/pata_arasan_cf_data.h b/include/linux/pata_arasan_cf_data.h index 3cc21c9cc1e8..9fade5dd2e86 100644 --- a/include/linux/pata_arasan_cf_data.h +++ b/include/linux/pata_arasan_cf_data.h @@ -4,7 +4,7 @@ * Arasan Compact Flash host controller platform data header file * * Copyright (C) 2011 ST Microelectronics - * Viresh Kumar + * Viresh Kumar * * This file is licensed under the terms of the GNU General Public * License version 2. This program is licensed "as is" without any -- cgit v1.2.3-70-g09d2 From e2cfc91120fa01e3458167054af993fb83d7d0ec Mon Sep 17 00:00:00 2001 From: Joonsoo Kim Date: Fri, 17 Jul 2015 16:24:18 -0700 Subject: mm/page_owner: set correct gfp_mask on page_owner Currently, we set wrong gfp_mask to page_owner info in case of isolated freepage by compaction and split page. It causes incorrect mixed pageblock report that we can get from '/proc/pagetypeinfo'. This metric is really useful to measure fragmentation effect so should be accurate. This patch fixes it by setting correct information. Without this patch, after kernel build workload is finished, number of mixed pageblock is 112 among roughly 210 movable pageblocks. But, with this fix, output shows that mixed pageblock is just 57. Signed-off-by: Joonsoo Kim Cc: Mel Gorman Cc: Vlastimil Babka Signed-off-by: Andrew Morton Signed-off-by: Linus Torvalds --- include/linux/page_owner.h | 13 +++++++++++++ mm/page_alloc.c | 8 +++++--- mm/page_owner.c | 7 +++++++ 3 files changed, 25 insertions(+), 3 deletions(-) (limited to 'include/linux') diff --git a/include/linux/page_owner.h b/include/linux/page_owner.h index b48c3471c254..cacaabea8a09 100644 --- a/include/linux/page_owner.h +++ b/include/linux/page_owner.h @@ -8,6 +8,7 @@ extern struct page_ext_operations page_owner_ops; extern void __reset_page_owner(struct page *page, unsigned int order); extern void __set_page_owner(struct page *page, unsigned int order, gfp_t gfp_mask); +extern gfp_t __get_page_owner_gfp(struct page *page); static inline void reset_page_owner(struct page *page, unsigned int order) { @@ -25,6 +26,14 @@ static inline void set_page_owner(struct page *page, __set_page_owner(page, order, gfp_mask); } + +static inline gfp_t get_page_owner_gfp(struct page *page) +{ + if (likely(!page_owner_inited)) + return 0; + + return __get_page_owner_gfp(page); +} #else static inline void reset_page_owner(struct page *page, unsigned int order) { @@ -33,6 +42,10 @@ static inline void set_page_owner(struct page *page, unsigned int order, gfp_t gfp_mask) { } +static inline gfp_t get_page_owner_gfp(struct page *page) +{ + return 0; +} #endif /* CONFIG_PAGE_OWNER */ #endif /* __LINUX_PAGE_OWNER_H */ diff --git a/mm/page_alloc.c b/mm/page_alloc.c index fbba675a0bd9..ef19f22b2b7d 100644 --- a/mm/page_alloc.c +++ b/mm/page_alloc.c @@ -1948,6 +1948,7 @@ void free_hot_cold_page_list(struct list_head *list, bool cold) void split_page(struct page *page, unsigned int order) { int i; + gfp_t gfp_mask; VM_BUG_ON_PAGE(PageCompound(page), page); VM_BUG_ON_PAGE(!page_count(page), page); @@ -1961,10 +1962,11 @@ void split_page(struct page *page, unsigned int order) split_page(virt_to_page(page[0].shadow), order); #endif - set_page_owner(page, 0, 0); + gfp_mask = get_page_owner_gfp(page); + set_page_owner(page, 0, gfp_mask); for (i = 1; i < (1 << order); i++) { set_page_refcounted(page + i); - set_page_owner(page + i, 0, 0); + set_page_owner(page + i, 0, gfp_mask); } } EXPORT_SYMBOL_GPL(split_page); @@ -1994,7 +1996,7 @@ int __isolate_free_page(struct page *page, unsigned int order) zone->free_area[order].nr_free--; rmv_page_order(page); - set_page_owner(page, order, 0); + set_page_owner(page, order, __GFP_MOVABLE); /* Set the pageblock if the isolated page is at least a pageblock */ if (order >= pageblock_order - 1) { diff --git a/mm/page_owner.c b/mm/page_owner.c index bd5f842b56d2..983c3a10fa07 100644 --- a/mm/page_owner.c +++ b/mm/page_owner.c @@ -76,6 +76,13 @@ void __set_page_owner(struct page *page, unsigned int order, gfp_t gfp_mask) __set_bit(PAGE_EXT_OWNER, &page_ext->flags); } +gfp_t __get_page_owner_gfp(struct page *page) +{ + struct page_ext *page_ext = lookup_page_ext(page); + + return page_ext->gfp_mask; +} + static ssize_t print_page_owner(char __user *buf, size_t count, unsigned long pfn, struct page *page, struct page_ext *page_ext) -- cgit v1.2.3-70-g09d2 From 0c8c0f03e3a292e031596484275c14cf39c0ab7a Mon Sep 17 00:00:00 2001 From: Dave Hansen Date: Fri, 17 Jul 2015 12:28:11 +0200 Subject: x86/fpu, sched: Dynamically allocate 'struct fpu' The FPU rewrite removed the dynamic allocations of 'struct fpu'. But, this potentially wastes massive amounts of memory (2k per task on systems that do not have AVX-512 for instance). Instead of having a separate slab, this patch just appends the space that we need to the 'task_struct' which we dynamically allocate already. This saves from doing an extra slab allocation at fork(). The only real downside here is that we have to stick everything and the end of the task_struct. But, I think the BUILD_BUG_ON()s I stuck in there should keep that from being too fragile. Signed-off-by: Dave Hansen Cc: Andy Lutomirski Cc: Borislav Petkov Cc: Brian Gerst Cc: Dave Hansen Cc: Denys Vlasenko Cc: H. Peter Anvin Cc: Linus Torvalds Cc: Oleg Nesterov Cc: Peter Zijlstra Cc: Thomas Gleixner Link: http://lkml.kernel.org/r/1437128892-9831-2-git-send-email-mingo@kernel.org Signed-off-by: Ingo Molnar --- arch/x86/include/asm/fpu/types.h | 72 +++++++++++++++++++++------------------- arch/x86/include/asm/processor.h | 10 ++++-- arch/x86/kernel/fpu/init.c | 39 ++++++++++++++++++++++ arch/x86/kernel/process.c | 2 +- fs/proc/kcore.c | 4 +-- include/linux/sched.h | 12 +++++-- kernel/fork.c | 8 ++++- 7 files changed, 104 insertions(+), 43 deletions(-) (limited to 'include/linux') diff --git a/arch/x86/include/asm/fpu/types.h b/arch/x86/include/asm/fpu/types.h index 0637826292de..c49c5173158e 100644 --- a/arch/x86/include/asm/fpu/types.h +++ b/arch/x86/include/asm/fpu/types.h @@ -189,6 +189,7 @@ union fpregs_state { struct fxregs_state fxsave; struct swregs_state soft; struct xregs_state xsave; + u8 __padding[PAGE_SIZE]; }; /* @@ -197,40 +198,6 @@ union fpregs_state { * state fields: */ struct fpu { - /* - * @state: - * - * In-memory copy of all FPU registers that we save/restore - * over context switches. If the task is using the FPU then - * the registers in the FPU are more recent than this state - * copy. If the task context-switches away then they get - * saved here and represent the FPU state. - * - * After context switches there may be a (short) time period - * during which the in-FPU hardware registers are unchanged - * and still perfectly match this state, if the tasks - * scheduled afterwards are not using the FPU. - * - * This is the 'lazy restore' window of optimization, which - * we track though 'fpu_fpregs_owner_ctx' and 'fpu->last_cpu'. - * - * We detect whether a subsequent task uses the FPU via setting - * CR0::TS to 1, which causes any FPU use to raise a #NM fault. - * - * During this window, if the task gets scheduled again, we - * might be able to skip having to do a restore from this - * memory buffer to the hardware registers - at the cost of - * incurring the overhead of #NM fault traps. - * - * Note that on modern CPUs that support the XSAVEOPT (or other - * optimized XSAVE instructions), we don't use #NM traps anymore, - * as the hardware can track whether FPU registers need saving - * or not. On such CPUs we activate the non-lazy ('eagerfpu') - * logic, which unconditionally saves/restores all FPU state - * across context switches. (if FPU state exists.) - */ - union fpregs_state state; - /* * @last_cpu: * @@ -288,6 +255,43 @@ struct fpu { * deal with bursty apps that only use the FPU for a short time: */ unsigned char counter; + /* + * @state: + * + * In-memory copy of all FPU registers that we save/restore + * over context switches. If the task is using the FPU then + * the registers in the FPU are more recent than this state + * copy. If the task context-switches away then they get + * saved here and represent the FPU state. + * + * After context switches there may be a (short) time period + * during which the in-FPU hardware registers are unchanged + * and still perfectly match this state, if the tasks + * scheduled afterwards are not using the FPU. + * + * This is the 'lazy restore' window of optimization, which + * we track though 'fpu_fpregs_owner_ctx' and 'fpu->last_cpu'. + * + * We detect whether a subsequent task uses the FPU via setting + * CR0::TS to 1, which causes any FPU use to raise a #NM fault. + * + * During this window, if the task gets scheduled again, we + * might be able to skip having to do a restore from this + * memory buffer to the hardware registers - at the cost of + * incurring the overhead of #NM fault traps. + * + * Note that on modern CPUs that support the XSAVEOPT (or other + * optimized XSAVE instructions), we don't use #NM traps anymore, + * as the hardware can track whether FPU registers need saving + * or not. On such CPUs we activate the non-lazy ('eagerfpu') + * logic, which unconditionally saves/restores all FPU state + * across context switches. (if FPU state exists.) + */ + union fpregs_state state; + /* + * WARNING: 'state' is dynamically-sized. Do not put + * anything after it here. + */ }; #endif /* _ASM_X86_FPU_H */ diff --git a/arch/x86/include/asm/processor.h b/arch/x86/include/asm/processor.h index 43e6519df0d5..944f1785ed0d 100644 --- a/arch/x86/include/asm/processor.h +++ b/arch/x86/include/asm/processor.h @@ -390,9 +390,6 @@ struct thread_struct { #endif unsigned long gs; - /* Floating point and extended processor state */ - struct fpu fpu; - /* Save middle states of ptrace breakpoints */ struct perf_event *ptrace_bps[HBP_NUM]; /* Debug status used for traps, single steps, etc... */ @@ -418,6 +415,13 @@ struct thread_struct { unsigned long iopl; /* Max allowed port in the bitmap, in bytes: */ unsigned io_bitmap_max; + + /* Floating point and extended processor state */ + struct fpu fpu; + /* + * WARNING: 'fpu' is dynamically-sized. It *MUST* be at + * the end. + */ }; /* diff --git a/arch/x86/kernel/fpu/init.c b/arch/x86/kernel/fpu/init.c index 32826791e675..deacbfa6b33e 100644 --- a/arch/x86/kernel/fpu/init.c +++ b/arch/x86/kernel/fpu/init.c @@ -136,6 +136,45 @@ static void __init fpu__init_system_generic(void) unsigned int xstate_size; EXPORT_SYMBOL_GPL(xstate_size); +#define CHECK_MEMBER_AT_END_OF(TYPE, MEMBER) \ + BUILD_BUG_ON((sizeof(TYPE) - \ + offsetof(TYPE, MEMBER) - \ + sizeof(((TYPE *)0)->MEMBER)) > \ + 0) \ + +/* + * We append the 'struct fpu' to the task_struct. + */ +int __weak arch_task_struct_size(void) +{ + int task_size = sizeof(struct task_struct); + + /* + * Subtract off the static size of the register state. + * It potentially has a bunch of padding. + */ + task_size -= sizeof(((struct task_struct *)0)->thread.fpu.state); + + /* + * Add back the dynamically-calculated register state + * size. + */ + task_size += xstate_size; + + /* + * We dynamically size 'struct fpu', so we require that + * it be at the end of 'thread_struct' and that + * 'thread_struct' be at the end of 'task_struct'. If + * you hit a compile error here, check the structure to + * see if something got added to the end. + */ + CHECK_MEMBER_AT_END_OF(struct fpu, state); + CHECK_MEMBER_AT_END_OF(struct thread_struct, fpu); + CHECK_MEMBER_AT_END_OF(struct task_struct, thread); + + return task_size; +} + /* * Set up the xstate_size based on the legacy FPU context size. * diff --git a/arch/x86/kernel/process.c b/arch/x86/kernel/process.c index 9cad694ed7c4..975420eac105 100644 --- a/arch/x86/kernel/process.c +++ b/arch/x86/kernel/process.c @@ -81,7 +81,7 @@ EXPORT_SYMBOL_GPL(idle_notifier_unregister); */ int arch_dup_task_struct(struct task_struct *dst, struct task_struct *src) { - *dst = *src; + memcpy(dst, src, arch_task_struct_size()); return fpu__copy(&dst->thread.fpu, &src->thread.fpu); } diff --git a/fs/proc/kcore.c b/fs/proc/kcore.c index 91a4e6426321..a0fe99485687 100644 --- a/fs/proc/kcore.c +++ b/fs/proc/kcore.c @@ -92,7 +92,7 @@ static size_t get_kcore_size(int *nphdr, size_t *elf_buflen) roundup(sizeof(CORE_STR), 4)) + roundup(sizeof(struct elf_prstatus), 4) + roundup(sizeof(struct elf_prpsinfo), 4) + - roundup(sizeof(struct task_struct), 4); + roundup(arch_task_struct_size(), 4); *elf_buflen = PAGE_ALIGN(*elf_buflen); return size + *elf_buflen; } @@ -415,7 +415,7 @@ static void elf_kcore_store_hdr(char *bufp, int nphdr, int dataoff) /* set up the task structure */ notes[2].name = CORE_STR; notes[2].type = NT_TASKSTRUCT; - notes[2].datasz = sizeof(struct task_struct); + notes[2].datasz = arch_task_struct_size(); notes[2].data = current; nhdr->p_filesz += notesize(¬es[2]); diff --git a/include/linux/sched.h b/include/linux/sched.h index ae21f1591615..e43a41d892b6 100644 --- a/include/linux/sched.h +++ b/include/linux/sched.h @@ -1522,8 +1522,6 @@ struct task_struct { /* hung task detection */ unsigned long last_switch_count; #endif -/* CPU-specific state of this task */ - struct thread_struct thread; /* filesystem information */ struct fs_struct *fs; /* open file information */ @@ -1778,8 +1776,18 @@ struct task_struct { unsigned long task_state_change; #endif int pagefault_disabled; +/* CPU-specific state of this task */ + struct thread_struct thread; +/* + * WARNING: on x86, 'thread_struct' contains a variable-sized + * structure. It *MUST* be at the end of 'task_struct'. + * + * Do not put anything below here! + */ }; +extern int arch_task_struct_size(void); + /* Future-safe accessor for struct task_struct's cpus_allowed. */ #define tsk_cpus_allowed(tsk) (&(tsk)->cpus_allowed) diff --git a/kernel/fork.c b/kernel/fork.c index 1bfefc6f96a4..431b67a6098c 100644 --- a/kernel/fork.c +++ b/kernel/fork.c @@ -287,15 +287,21 @@ static void set_max_threads(unsigned int max_threads_suggested) max_threads = clamp_t(u64, threads, MIN_THREADS, MAX_THREADS); } +int __weak arch_task_struct_size(void) +{ + return sizeof(struct task_struct); +} + void __init fork_init(void) { + int task_struct_size = arch_task_struct_size(); #ifndef CONFIG_ARCH_TASK_STRUCT_ALLOCATOR #ifndef ARCH_MIN_TASKALIGN #define ARCH_MIN_TASKALIGN L1_CACHE_BYTES #endif /* create a slab on which task_structs can be allocated */ task_struct_cachep = - kmem_cache_create("task_struct", sizeof(struct task_struct), + kmem_cache_create("task_struct", task_struct_size, ARCH_MIN_TASKALIGN, SLAB_PANIC | SLAB_NOTRACK, NULL); #endif -- cgit v1.2.3-70-g09d2 From 5aaeb5c01c5b6c0be7b7aadbf3ace9f3a4458c3d Mon Sep 17 00:00:00 2001 From: Ingo Molnar Date: Fri, 17 Jul 2015 12:28:12 +0200 Subject: x86/fpu, sched: Introduce CONFIG_ARCH_WANTS_DYNAMIC_TASK_STRUCT and use it on x86 Don't burden architectures without dynamic task_struct sizing with the overhead of dynamic sizing. Also optimize the x86 code a bit by caching task_struct_size. Acked-and-Tested-by: Dave Hansen Cc: Andy Lutomirski Cc: Borislav Petkov Cc: Brian Gerst Cc: Dave Hansen Cc: Denys Vlasenko Cc: Linus Torvalds Cc: Oleg Nesterov Cc: Peter Zijlstra Cc: Thomas Gleixner Link: http://lkml.kernel.org/r/1437128892-9831-3-git-send-email-mingo@kernel.org Signed-off-by: Ingo Molnar --- arch/Kconfig | 4 ++++ arch/x86/Kconfig | 1 + arch/x86/kernel/fpu/init.c | 17 +++++++++-------- arch/x86/kernel/process.c | 2 +- fs/proc/kcore.c | 4 ++-- include/linux/sched.h | 6 +++++- kernel/fork.c | 11 +++++------ 7 files changed, 27 insertions(+), 18 deletions(-) (limited to 'include/linux') diff --git a/arch/Kconfig b/arch/Kconfig index bec6666a3cc4..8a8ea7110de8 100644 --- a/arch/Kconfig +++ b/arch/Kconfig @@ -221,6 +221,10 @@ config ARCH_TASK_STRUCT_ALLOCATOR config ARCH_THREAD_INFO_ALLOCATOR bool +# Select if arch wants to size task_struct dynamically via arch_task_struct_size: +config ARCH_WANTS_DYNAMIC_TASK_STRUCT + bool + config HAVE_REGS_AND_STACK_ACCESS_API bool help diff --git a/arch/x86/Kconfig b/arch/x86/Kconfig index 3dbb7e7909ca..b3a1a5d77d92 100644 --- a/arch/x86/Kconfig +++ b/arch/x86/Kconfig @@ -41,6 +41,7 @@ config X86 select ARCH_USE_CMPXCHG_LOCKREF if X86_64 select ARCH_USE_QUEUED_RWLOCKS select ARCH_USE_QUEUED_SPINLOCKS + select ARCH_WANTS_DYNAMIC_TASK_STRUCT select ARCH_WANT_FRAME_POINTERS select ARCH_WANT_IPC_PARSE_VERSION if X86_32 select ARCH_WANT_OPTIONAL_GPIOLIB diff --git a/arch/x86/kernel/fpu/init.c b/arch/x86/kernel/fpu/init.c index deacbfa6b33e..0b39173dd971 100644 --- a/arch/x86/kernel/fpu/init.c +++ b/arch/x86/kernel/fpu/init.c @@ -4,6 +4,8 @@ #include #include +#include + /* * Initialize the TS bit in CR0 according to the style of context-switches * we are using: @@ -136,16 +138,14 @@ static void __init fpu__init_system_generic(void) unsigned int xstate_size; EXPORT_SYMBOL_GPL(xstate_size); -#define CHECK_MEMBER_AT_END_OF(TYPE, MEMBER) \ - BUILD_BUG_ON((sizeof(TYPE) - \ - offsetof(TYPE, MEMBER) - \ - sizeof(((TYPE *)0)->MEMBER)) > \ - 0) \ +/* Enforce that 'MEMBER' is the last field of 'TYPE': */ +#define CHECK_MEMBER_AT_END_OF(TYPE, MEMBER) \ + BUILD_BUG_ON(sizeof(TYPE) != offsetofend(TYPE, MEMBER)) /* - * We append the 'struct fpu' to the task_struct. + * We append the 'struct fpu' to the task_struct: */ -int __weak arch_task_struct_size(void) +static void __init fpu__init_task_struct_size(void) { int task_size = sizeof(struct task_struct); @@ -172,7 +172,7 @@ int __weak arch_task_struct_size(void) CHECK_MEMBER_AT_END_OF(struct thread_struct, fpu); CHECK_MEMBER_AT_END_OF(struct task_struct, thread); - return task_size; + arch_task_struct_size = task_size; } /* @@ -326,6 +326,7 @@ void __init fpu__init_system(struct cpuinfo_x86 *c) fpu__init_system_generic(); fpu__init_system_xstate_size_legacy(); fpu__init_system_xstate(); + fpu__init_task_struct_size(); fpu__init_system_ctx_switch(); } diff --git a/arch/x86/kernel/process.c b/arch/x86/kernel/process.c index 975420eac105..397688beed4b 100644 --- a/arch/x86/kernel/process.c +++ b/arch/x86/kernel/process.c @@ -81,7 +81,7 @@ EXPORT_SYMBOL_GPL(idle_notifier_unregister); */ int arch_dup_task_struct(struct task_struct *dst, struct task_struct *src) { - memcpy(dst, src, arch_task_struct_size()); + memcpy(dst, src, arch_task_struct_size); return fpu__copy(&dst->thread.fpu, &src->thread.fpu); } diff --git a/fs/proc/kcore.c b/fs/proc/kcore.c index a0fe99485687..92e6726f6e37 100644 --- a/fs/proc/kcore.c +++ b/fs/proc/kcore.c @@ -92,7 +92,7 @@ static size_t get_kcore_size(int *nphdr, size_t *elf_buflen) roundup(sizeof(CORE_STR), 4)) + roundup(sizeof(struct elf_prstatus), 4) + roundup(sizeof(struct elf_prpsinfo), 4) + - roundup(arch_task_struct_size(), 4); + roundup(arch_task_struct_size, 4); *elf_buflen = PAGE_ALIGN(*elf_buflen); return size + *elf_buflen; } @@ -415,7 +415,7 @@ static void elf_kcore_store_hdr(char *bufp, int nphdr, int dataoff) /* set up the task structure */ notes[2].name = CORE_STR; notes[2].type = NT_TASKSTRUCT; - notes[2].datasz = arch_task_struct_size(); + notes[2].datasz = arch_task_struct_size; notes[2].data = current; nhdr->p_filesz += notesize(¬es[2]); diff --git a/include/linux/sched.h b/include/linux/sched.h index e43a41d892b6..04b5ada460b4 100644 --- a/include/linux/sched.h +++ b/include/linux/sched.h @@ -1786,7 +1786,11 @@ struct task_struct { */ }; -extern int arch_task_struct_size(void); +#ifdef CONFIG_ARCH_WANTS_DYNAMIC_TASK_STRUCT +extern int arch_task_struct_size __read_mostly; +#else +# define arch_task_struct_size (sizeof(struct task_struct)) +#endif /* Future-safe accessor for struct task_struct's cpus_allowed. */ #define tsk_cpus_allowed(tsk) (&(tsk)->cpus_allowed) diff --git a/kernel/fork.c b/kernel/fork.c index 431b67a6098c..dbd9b8d7b7cc 100644 --- a/kernel/fork.c +++ b/kernel/fork.c @@ -287,21 +287,20 @@ static void set_max_threads(unsigned int max_threads_suggested) max_threads = clamp_t(u64, threads, MIN_THREADS, MAX_THREADS); } -int __weak arch_task_struct_size(void) -{ - return sizeof(struct task_struct); -} +#ifdef CONFIG_ARCH_WANTS_DYNAMIC_TASK_STRUCT +/* Initialized by the architecture: */ +int arch_task_struct_size __read_mostly; +#endif void __init fork_init(void) { - int task_struct_size = arch_task_struct_size(); #ifndef CONFIG_ARCH_TASK_STRUCT_ALLOCATOR #ifndef ARCH_MIN_TASKALIGN #define ARCH_MIN_TASKALIGN L1_CACHE_BYTES #endif /* create a slab on which task_structs can be allocated */ task_struct_cachep = - kmem_cache_create("task_struct", task_struct_size, + kmem_cache_create("task_struct", arch_task_struct_size, ARCH_MIN_TASKALIGN, SLAB_PANIC | SLAB_NOTRACK, NULL); #endif -- cgit v1.2.3-70-g09d2 From 5c31252c4a86dc591c23f1a951edd52ad791ef0e Mon Sep 17 00:00:00 2001 From: Boris Brezillon Date: Wed, 1 Jul 2015 10:21:47 +0200 Subject: pwm: Add the pwm_is_enabled() helper Some PWM drivers are testing the PWMF_ENABLED flag. Create a helper function to hide the logic behind enabled test. This will allow us to smoothly move from the current approach to an atomic PWM update approach. Signed-off-by: Boris Brezillon Signed-off-by: Thierry Reding --- drivers/pwm/core.c | 4 ++-- drivers/pwm/pwm-atmel-tcb.c | 2 +- drivers/pwm/pwm-atmel.c | 6 +++--- drivers/pwm/pwm-bcm-kona.c | 4 ++-- drivers/pwm/pwm-ep93xx.c | 4 ++-- drivers/pwm/pwm-imx.c | 2 +- drivers/pwm/pwm-mxs.c | 4 ++-- drivers/pwm/pwm-renesas-tpu.c | 2 +- drivers/pwm/pwm-tegra.c | 6 +++--- drivers/pwm/pwm-tiecap.c | 10 +++++----- drivers/pwm/pwm-tiehrpwm.c | 6 +++--- drivers/pwm/sysfs.c | 2 +- include/linux/pwm.h | 5 +++++ 13 files changed, 31 insertions(+), 26 deletions(-) (limited to 'include/linux') diff --git a/drivers/pwm/core.c b/drivers/pwm/core.c index 3a7769fe53de..f7c11d2dec37 100644 --- a/drivers/pwm/core.c +++ b/drivers/pwm/core.c @@ -455,7 +455,7 @@ int pwm_set_polarity(struct pwm_device *pwm, enum pwm_polarity polarity) if (!pwm->chip->ops->set_polarity) return -ENOSYS; - if (test_bit(PWMF_ENABLED, &pwm->flags)) + if (pwm_is_enabled(pwm)) return -EBUSY; err = pwm->chip->ops->set_polarity(pwm->chip, pwm, polarity); @@ -853,7 +853,7 @@ static void pwm_dbg_show(struct pwm_chip *chip, struct seq_file *s) if (test_bit(PWMF_REQUESTED, &pwm->flags)) seq_puts(s, " requested"); - if (test_bit(PWMF_ENABLED, &pwm->flags)) + if (pwm_is_enabled(pwm)) seq_puts(s, " enabled"); seq_puts(s, "\n"); diff --git a/drivers/pwm/pwm-atmel-tcb.c b/drivers/pwm/pwm-atmel-tcb.c index d14e0677c92d..6da01b3bf6f4 100644 --- a/drivers/pwm/pwm-atmel-tcb.c +++ b/drivers/pwm/pwm-atmel-tcb.c @@ -347,7 +347,7 @@ static int atmel_tcb_pwm_config(struct pwm_chip *chip, struct pwm_device *pwm, tcbpwm->duty = duty; /* If the PWM is enabled, call enable to apply the new conf */ - if (test_bit(PWMF_ENABLED, &pwm->flags)) + if (pwm_is_enabled(pwm)) atmel_tcb_pwm_enable(chip, pwm); return 0; diff --git a/drivers/pwm/pwm-atmel.c b/drivers/pwm/pwm-atmel.c index a947c9095d9d..b3b294de88e0 100644 --- a/drivers/pwm/pwm-atmel.c +++ b/drivers/pwm/pwm-atmel.c @@ -114,7 +114,7 @@ static int atmel_pwm_config(struct pwm_chip *chip, struct pwm_device *pwm, u32 val; int ret; - if (test_bit(PWMF_ENABLED, &pwm->flags) && (period_ns != pwm->period)) { + if (pwm_is_enabled(pwm) && (period_ns != pwm->period)) { dev_err(chip->dev, "cannot change PWM period while enabled\n"); return -EBUSY; } @@ -176,7 +176,7 @@ static void atmel_pwm_config_v1(struct pwm_chip *chip, struct pwm_device *pwm, * If the PWM channel is enabled, only update CDTY by using the update * register, it needs to set bit 10 of CMR to 0 */ - if (test_bit(PWMF_ENABLED, &pwm->flags)) + if (pwm_is_enabled(pwm)) return; /* * If the PWM channel is disabled, write value to duty and period @@ -191,7 +191,7 @@ static void atmel_pwm_config_v2(struct pwm_chip *chip, struct pwm_device *pwm, { struct atmel_pwm_chip *atmel_pwm = to_atmel_pwm_chip(chip); - if (test_bit(PWMF_ENABLED, &pwm->flags)) { + if (pwm_is_enabled(pwm)) { /* * If the PWM channel is enabled, using the duty update register * to update the value. diff --git a/drivers/pwm/pwm-bcm-kona.c b/drivers/pwm/pwm-bcm-kona.c index 7af8fea2dc5b..dfdcf88279ae 100644 --- a/drivers/pwm/pwm-bcm-kona.c +++ b/drivers/pwm/pwm-bcm-kona.c @@ -134,7 +134,7 @@ static int kona_pwmc_config(struct pwm_chip *chip, struct pwm_device *pwm, } /* If the PWM channel is enabled, write the settings to the HW */ - if (test_bit(PWMF_ENABLED, &pwm->flags)) { + if (pwm_is_enabled(pwm)) { value = readl(kp->base + PRESCALE_OFFSET); value &= ~PRESCALE_MASK(chan); value |= prescale << PRESCALE_SHIFT(chan); @@ -287,7 +287,7 @@ static int kona_pwmc_remove(struct platform_device *pdev) unsigned int chan; for (chan = 0; chan < kp->chip.npwm; chan++) - if (test_bit(PWMF_ENABLED, &kp->chip.pwms[chan].flags)) + if (pwm_is_enabled(&kp->chip.pwms[chan])) clk_disable_unprepare(kp->clk); return pwmchip_remove(&kp->chip); diff --git a/drivers/pwm/pwm-ep93xx.c b/drivers/pwm/pwm-ep93xx.c index e593e9c45c51..bbf10ae02f0e 100644 --- a/drivers/pwm/pwm-ep93xx.c +++ b/drivers/pwm/pwm-ep93xx.c @@ -82,7 +82,7 @@ static int ep93xx_pwm_config(struct pwm_chip *chip, struct pwm_device *pwm, * The clock needs to be enabled to access the PWM registers. * Configuration can be changed at any time. */ - if (!test_bit(PWMF_ENABLED, &pwm->flags)) { + if (!pwm_is_enabled(pwm)) { ret = clk_enable(ep93xx_pwm->clk); if (ret) return ret; @@ -113,7 +113,7 @@ static int ep93xx_pwm_config(struct pwm_chip *chip, struct pwm_device *pwm, ret = -EINVAL; } - if (!test_bit(PWMF_ENABLED, &pwm->flags)) + if (!pwm_is_enabled(pwm)) clk_disable(ep93xx_pwm->clk); return ret; diff --git a/drivers/pwm/pwm-imx.c b/drivers/pwm/pwm-imx.c index 66d6f0c5c421..008dc646225e 100644 --- a/drivers/pwm/pwm-imx.c +++ b/drivers/pwm/pwm-imx.c @@ -114,7 +114,7 @@ static int imx_pwm_config_v2(struct pwm_chip *chip, unsigned long long c; unsigned long period_cycles, duty_cycles, prescale; unsigned int period_ms; - bool enable = test_bit(PWMF_ENABLED, &pwm->flags); + bool enable = pwm_is_enabled(pwm); int wait_count = 0, fifoav; u32 cr, sr; diff --git a/drivers/pwm/pwm-mxs.c b/drivers/pwm/pwm-mxs.c index b430811e14f5..9a596324ebef 100644 --- a/drivers/pwm/pwm-mxs.c +++ b/drivers/pwm/pwm-mxs.c @@ -77,7 +77,7 @@ static int mxs_pwm_config(struct pwm_chip *chip, struct pwm_device *pwm, * If the PWM channel is disabled, make sure to turn on the clock * before writing the register. Otherwise, keep it enabled. */ - if (!test_bit(PWMF_ENABLED, &pwm->flags)) { + if (!pwm_is_enabled(pwm)) { ret = clk_prepare_enable(mxs->clk); if (ret) return ret; @@ -92,7 +92,7 @@ static int mxs_pwm_config(struct pwm_chip *chip, struct pwm_device *pwm, /* * If the PWM is not enabled, turn the clock off again to save power. */ - if (!test_bit(PWMF_ENABLED, &pwm->flags)) + if (!pwm_is_enabled(pwm)) clk_disable_unprepare(mxs->clk); return 0; diff --git a/drivers/pwm/pwm-renesas-tpu.c b/drivers/pwm/pwm-renesas-tpu.c index ee63f9e9d0fb..075c1a764ba2 100644 --- a/drivers/pwm/pwm-renesas-tpu.c +++ b/drivers/pwm/pwm-renesas-tpu.c @@ -301,7 +301,7 @@ static int tpu_pwm_config(struct pwm_chip *chip, struct pwm_device *_pwm, pwm->duty = duty; /* If the channel is disabled we're done. */ - if (!test_bit(PWMF_ENABLED, &_pwm->flags)) + if (!pwm_is_enabled(_pwm)) return 0; if (duty_only && pwm->timer_on) { diff --git a/drivers/pwm/pwm-tegra.c b/drivers/pwm/pwm-tegra.c index cabd7d8e05cc..d4de0607b502 100644 --- a/drivers/pwm/pwm-tegra.c +++ b/drivers/pwm/pwm-tegra.c @@ -112,7 +112,7 @@ static int tegra_pwm_config(struct pwm_chip *chip, struct pwm_device *pwm, * If the PWM channel is disabled, make sure to turn on the clock * before writing the register. Otherwise, keep it enabled. */ - if (!test_bit(PWMF_ENABLED, &pwm->flags)) { + if (!pwm_is_enabled(pwm)) { err = clk_prepare_enable(pc->clk); if (err < 0) return err; @@ -124,7 +124,7 @@ static int tegra_pwm_config(struct pwm_chip *chip, struct pwm_device *pwm, /* * If the PWM is not enabled, turn the clock off again to save power. */ - if (!test_bit(PWMF_ENABLED, &pwm->flags)) + if (!pwm_is_enabled(pwm)) clk_disable_unprepare(pc->clk); return 0; @@ -214,7 +214,7 @@ static int tegra_pwm_remove(struct platform_device *pdev) for (i = 0; i < NUM_PWM; i++) { struct pwm_device *pwm = &pc->chip.pwms[i]; - if (!test_bit(PWMF_ENABLED, &pwm->flags)) + if (!pwm_is_enabled(pwm)) if (clk_prepare_enable(pc->clk) < 0) continue; diff --git a/drivers/pwm/pwm-tiecap.c b/drivers/pwm/pwm-tiecap.c index e557befdf4e6..616af764a276 100644 --- a/drivers/pwm/pwm-tiecap.c +++ b/drivers/pwm/pwm-tiecap.c @@ -97,7 +97,7 @@ static int ecap_pwm_config(struct pwm_chip *chip, struct pwm_device *pwm, writew(reg_val, pc->mmio_base + ECCTL2); - if (!test_bit(PWMF_ENABLED, &pwm->flags)) { + if (!pwm_is_enabled(pwm)) { /* Update active registers if not running */ writel(duty_cycles, pc->mmio_base + CAP2); writel(period_cycles, pc->mmio_base + CAP1); @@ -111,7 +111,7 @@ static int ecap_pwm_config(struct pwm_chip *chip, struct pwm_device *pwm, writel(period_cycles, pc->mmio_base + CAP3); } - if (!test_bit(PWMF_ENABLED, &pwm->flags)) { + if (!pwm_is_enabled(pwm)) { reg_val = readw(pc->mmio_base + ECCTL2); /* Disable APWM mode to put APWM output Low */ reg_val &= ~ECCTL2_APWM_MODE; @@ -179,7 +179,7 @@ static void ecap_pwm_disable(struct pwm_chip *chip, struct pwm_device *pwm) static void ecap_pwm_free(struct pwm_chip *chip, struct pwm_device *pwm) { - if (test_bit(PWMF_ENABLED, &pwm->flags)) { + if (pwm_is_enabled(pwm)) { dev_warn(chip->dev, "Removing PWM device without disabling\n"); pm_runtime_put_sync(chip->dev); } @@ -306,7 +306,7 @@ static int ecap_pwm_suspend(struct device *dev) ecap_pwm_save_context(pc); /* Disable explicitly if PWM is running */ - if (test_bit(PWMF_ENABLED, &pwm->flags)) + if (pwm_is_enabled(pwm)) pm_runtime_put_sync(dev); return 0; @@ -318,7 +318,7 @@ static int ecap_pwm_resume(struct device *dev) struct pwm_device *pwm = pc->chip.pwms; /* Enable explicitly if PWM was running */ - if (test_bit(PWMF_ENABLED, &pwm->flags)) + if (pwm_is_enabled(pwm)) pm_runtime_get_sync(dev); ecap_pwm_restore_context(pc); diff --git a/drivers/pwm/pwm-tiehrpwm.c b/drivers/pwm/pwm-tiehrpwm.c index 694b3cf7694b..6a41e66015b6 100644 --- a/drivers/pwm/pwm-tiehrpwm.c +++ b/drivers/pwm/pwm-tiehrpwm.c @@ -407,7 +407,7 @@ static void ehrpwm_pwm_free(struct pwm_chip *chip, struct pwm_device *pwm) { struct ehrpwm_pwm_chip *pc = to_ehrpwm_pwm_chip(chip); - if (test_bit(PWMF_ENABLED, &pwm->flags)) { + if (pwm_is_enabled(pwm)) { dev_warn(chip->dev, "Removing PWM device without disabling\n"); pm_runtime_put_sync(chip->dev); } @@ -565,7 +565,7 @@ static int ehrpwm_pwm_suspend(struct device *dev) for (i = 0; i < pc->chip.npwm; i++) { struct pwm_device *pwm = &pc->chip.pwms[i]; - if (!test_bit(PWMF_ENABLED, &pwm->flags)) + if (!pwm_is_enabled(pwm)) continue; /* Disable explicitly if PWM is running */ @@ -582,7 +582,7 @@ static int ehrpwm_pwm_resume(struct device *dev) for (i = 0; i < pc->chip.npwm; i++) { struct pwm_device *pwm = &pc->chip.pwms[i]; - if (!test_bit(PWMF_ENABLED, &pwm->flags)) + if (!pwm_is_enabled(pwm)) continue; /* Enable explicitly if PWM was running */ diff --git a/drivers/pwm/sysfs.c b/drivers/pwm/sysfs.c index 4bd0c639e16d..eecf21d68108 100644 --- a/drivers/pwm/sysfs.c +++ b/drivers/pwm/sysfs.c @@ -97,7 +97,7 @@ static ssize_t pwm_enable_show(struct device *child, char *buf) { const struct pwm_device *pwm = child_to_pwm_device(child); - int enabled = test_bit(PWMF_ENABLED, &pwm->flags); + int enabled = pwm_is_enabled(pwm); return sprintf(buf, "%d\n", enabled); } diff --git a/include/linux/pwm.h b/include/linux/pwm.h index 36262d08a9da..ec34f4d9a9ee 100644 --- a/include/linux/pwm.h +++ b/include/linux/pwm.h @@ -92,6 +92,11 @@ struct pwm_device { enum pwm_polarity polarity; }; +static inline bool pwm_is_enabled(const struct pwm_device *pwm) +{ + return test_bit(PWMF_ENABLED, &pwm->flags); +} + static inline void pwm_set_period(struct pwm_device *pwm, unsigned int period) { if (pwm) -- cgit v1.2.3-70-g09d2 From a1cf42171a2e3c33cbc12bb037795caf0589149b Mon Sep 17 00:00:00 2001 From: Boris Brezillon Date: Wed, 1 Jul 2015 10:21:48 +0200 Subject: pwm: Constify PWM device where possible The PWM argument is not modified in PWM property accessors, make it a const argument so that the accessors can be used from sysfs. Signed-off-by: Boris Brezillon Signed-off-by: Thierry Reding --- include/linux/pwm.h | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) (limited to 'include/linux') diff --git a/include/linux/pwm.h b/include/linux/pwm.h index ec34f4d9a9ee..d8f691339a45 100644 --- a/include/linux/pwm.h +++ b/include/linux/pwm.h @@ -103,7 +103,7 @@ static inline void pwm_set_period(struct pwm_device *pwm, unsigned int period) pwm->period = period; } -static inline unsigned int pwm_get_period(struct pwm_device *pwm) +static inline unsigned int pwm_get_period(const struct pwm_device *pwm) { return pwm ? pwm->period : 0; } @@ -114,7 +114,7 @@ static inline void pwm_set_duty_cycle(struct pwm_device *pwm, unsigned int duty) pwm->duty_cycle = duty; } -static inline unsigned int pwm_get_duty_cycle(struct pwm_device *pwm) +static inline unsigned int pwm_get_duty_cycle(const struct pwm_device *pwm) { return pwm ? pwm->duty_cycle : 0; } -- cgit v1.2.3-70-g09d2 From 011e76314818b6a24d5347b2d83b8a577e6aaae6 Mon Sep 17 00:00:00 2001 From: Boris Brezillon Date: Wed, 1 Jul 2015 10:21:49 +0200 Subject: pwm: Add pwm_get_polarity() helper function Some drivers are directly accessing the ->polarity field in pwm_device. Add a helper to retrieve the current polarity so that we can easily move this field elsewhere (required to support atomic update). Signed-off-by: Boris Brezillon Signed-off-by: Thierry Reding --- include/linux/pwm.h | 5 +++++ 1 file changed, 5 insertions(+) (limited to 'include/linux') diff --git a/include/linux/pwm.h b/include/linux/pwm.h index d8f691339a45..6f286df30021 100644 --- a/include/linux/pwm.h +++ b/include/linux/pwm.h @@ -124,6 +124,11 @@ static inline unsigned int pwm_get_duty_cycle(const struct pwm_device *pwm) */ int pwm_set_polarity(struct pwm_device *pwm, enum pwm_polarity polarity); +static inline enum pwm_polarity pwm_get_polarity(const struct pwm_device *pwm) +{ + return pwm ? pwm->polarity : PWM_POLARITY_NORMAL; +} + /** * struct pwm_ops - PWM controller operations * @request: optional hook for requesting a PWM -- cgit v1.2.3-70-g09d2 From 87dc11220df266d7d4b6dc594b55d0729b92809d Mon Sep 17 00:00:00 2001 From: Stephen Boyd Date: Wed, 15 Jul 2015 11:21:40 -0700 Subject: clockevents: Remove clockevents_notify() prototype This function no longer exists after commit a49b116dcb12 (clockevents: Cleanup dead cpu explicitely, 2015-04-03). Remove the prototype and the stub function. Signed-off-by: Stephen Boyd Cc: trivial@kernel.org Cc: Rafael J. Wysocki Link: http://lkml.kernel.org/r/1436984500-5425-1-git-send-email-sboyd@codeaurora.org Signed-off-by: Thomas Gleixner --- include/linux/clockchips.h | 3 --- 1 file changed, 3 deletions(-) (limited to 'include/linux') diff --git a/include/linux/clockchips.h b/include/linux/clockchips.h index 597a1e836f22..31ce435981fe 100644 --- a/include/linux/clockchips.h +++ b/include/linux/clockchips.h @@ -234,13 +234,10 @@ static inline int tick_check_broadcast_expired(void) { return 0; } static inline void tick_setup_hrtimer_broadcast(void) { } # endif -extern int clockevents_notify(unsigned long reason, void *arg); - #else /* !CONFIG_GENERIC_CLOCKEVENTS: */ static inline void clockevents_suspend(void) { } static inline void clockevents_resume(void) { } -static inline int clockevents_notify(unsigned long reason, void *arg) { return 0; } static inline int tick_check_broadcast_expired(void) { return 0; } static inline void tick_setup_hrtimer_broadcast(void) { } -- cgit v1.2.3-70-g09d2 From 0642ef6f2992eba46c41abb5ceb7d4fa14ba888e Mon Sep 17 00:00:00 2001 From: Richard Fitzgerald Date: Tue, 23 Jun 2015 14:32:54 +0100 Subject: debugfs: Export bool read/write functions The file read/write functions for bools have no special dependencies on debugfs internals and are sufficiently non-trivial to be worth exporting so clients can re-use the implementation. Signed-off-by: Richard Fitzgerald Acked-by: Greg Kroah-Hartman Signed-off-by: Mark Brown --- fs/debugfs/file.c | 14 ++++++++------ include/linux/debugfs.h | 20 ++++++++++++++++++++ 2 files changed, 28 insertions(+), 6 deletions(-) (limited to 'include/linux') diff --git a/fs/debugfs/file.c b/fs/debugfs/file.c index 284f9aa0028b..6c55ade071c3 100644 --- a/fs/debugfs/file.c +++ b/fs/debugfs/file.c @@ -435,8 +435,8 @@ struct dentry *debugfs_create_atomic_t(const char *name, umode_t mode, } EXPORT_SYMBOL_GPL(debugfs_create_atomic_t); -static ssize_t read_file_bool(struct file *file, char __user *user_buf, - size_t count, loff_t *ppos) +ssize_t debugfs_read_file_bool(struct file *file, char __user *user_buf, + size_t count, loff_t *ppos) { char buf[3]; u32 *val = file->private_data; @@ -449,9 +449,10 @@ static ssize_t read_file_bool(struct file *file, char __user *user_buf, buf[2] = 0x00; return simple_read_from_buffer(user_buf, count, ppos, buf, 2); } +EXPORT_SYMBOL_GPL(debugfs_read_file_bool); -static ssize_t write_file_bool(struct file *file, const char __user *user_buf, - size_t count, loff_t *ppos) +ssize_t debugfs_write_file_bool(struct file *file, const char __user *user_buf, + size_t count, loff_t *ppos) { char buf[32]; size_t buf_size; @@ -468,10 +469,11 @@ static ssize_t write_file_bool(struct file *file, const char __user *user_buf, return count; } +EXPORT_SYMBOL_GPL(debugfs_write_file_bool); static const struct file_operations fops_bool = { - .read = read_file_bool, - .write = write_file_bool, + .read = debugfs_read_file_bool, + .write = debugfs_write_file_bool, .open = simple_open, .llseek = default_llseek, }; diff --git a/include/linux/debugfs.h b/include/linux/debugfs.h index 420311bcee38..9beb636b97eb 100644 --- a/include/linux/debugfs.h +++ b/include/linux/debugfs.h @@ -116,6 +116,12 @@ struct dentry *debugfs_create_devm_seqfile(struct device *dev, const char *name, bool debugfs_initialized(void); +ssize_t debugfs_read_file_bool(struct file *file, char __user *user_buf, + size_t count, loff_t *ppos); + +ssize_t debugfs_write_file_bool(struct file *file, const char __user *user_buf, + size_t count, loff_t *ppos); + #else #include @@ -282,6 +288,20 @@ static inline struct dentry *debugfs_create_devm_seqfile(struct device *dev, return ERR_PTR(-ENODEV); } +static inline ssize_t debugfs_read_file_bool(struct file *file, + char __user *user_buf, + size_t count, loff_t *ppos) +{ + return -ENODEV; +} + +static inline ssize_t debugfs_write_file_bool(struct file *file, + const char __user *user_buf, + size_t count, loff_t *ppos) +{ + return -ENODEV; +} + #endif #endif -- cgit v1.2.3-70-g09d2 From 584ac4e935a1f905d67c8fa3fbe8e32d384721f1 Mon Sep 17 00:00:00 2001 From: Stephen Boyd Date: Fri, 19 Jun 2015 15:00:46 -0700 Subject: clk: tegra: Properly include clk.h Clock provider drivers generally shouldn't include clk.h because it's the consumer API. Only include clk.h in files that are using it. Also add in a clkdev.h include that was missing in a file using clkdev APIs. Cc: Peter De Schrijver Cc: Thierry Reding Signed-off-by: Stephen Boyd --- drivers/clk/tegra/clk-divider.c | 1 - drivers/clk/tegra/clk-periph-gate.c | 1 - drivers/clk/tegra/clk-periph.c | 1 - drivers/clk/tegra/clk-pll-out.c | 1 - drivers/clk/tegra/clk-pll.c | 2 +- drivers/clk/tegra/clk-super.c | 1 - drivers/clk/tegra/clk-tegra-audio.c | 1 - drivers/clk/tegra/clk-tegra-fixed.c | 1 - drivers/clk/tegra/clk-tegra-periph.c | 1 - drivers/clk/tegra/clk-tegra-pmc.c | 1 - drivers/clk/tegra/clk-tegra-super-gen4.c | 1 - drivers/clk/tegra/clk-tegra114.c | 2 -- drivers/clk/tegra/clk-tegra124.c | 1 - drivers/clk/tegra/clk-tegra20.c | 1 - drivers/clk/tegra/clk-tegra30.c | 1 - drivers/clk/tegra/clk.c | 1 + include/linux/clk/tegra.h | 3 ++- 17 files changed, 4 insertions(+), 17 deletions(-) (limited to 'include/linux') diff --git a/drivers/clk/tegra/clk-divider.c b/drivers/clk/tegra/clk-divider.c index 59a5714dfe18..48c83efda4cf 100644 --- a/drivers/clk/tegra/clk-divider.c +++ b/drivers/clk/tegra/clk-divider.c @@ -19,7 +19,6 @@ #include #include #include -#include #include "clk.h" diff --git a/drivers/clk/tegra/clk-periph-gate.c b/drivers/clk/tegra/clk-periph-gate.c index 0aa8830ae7cc..d28d6e95020f 100644 --- a/drivers/clk/tegra/clk-periph-gate.c +++ b/drivers/clk/tegra/clk-periph-gate.c @@ -14,7 +14,6 @@ * along with this program. If not, see . */ -#include #include #include #include diff --git a/drivers/clk/tegra/clk-periph.c b/drivers/clk/tegra/clk-periph.c index d84ae49d0e05..ec5b6113b012 100644 --- a/drivers/clk/tegra/clk-periph.c +++ b/drivers/clk/tegra/clk-periph.c @@ -14,7 +14,6 @@ * along with this program. If not, see . */ -#include #include #include #include diff --git a/drivers/clk/tegra/clk-pll-out.c b/drivers/clk/tegra/clk-pll-out.c index 3598987a451d..257cae0c1488 100644 --- a/drivers/clk/tegra/clk-pll-out.c +++ b/drivers/clk/tegra/clk-pll-out.c @@ -20,7 +20,6 @@ #include #include #include -#include #include "clk.h" diff --git a/drivers/clk/tegra/clk-pll.c b/drivers/clk/tegra/clk-pll.c index 05c6d08a6695..63499c461482 100644 --- a/drivers/clk/tegra/clk-pll.c +++ b/drivers/clk/tegra/clk-pll.c @@ -18,8 +18,8 @@ #include #include #include -#include #include +#include #include "clk.h" diff --git a/drivers/clk/tegra/clk-super.c b/drivers/clk/tegra/clk-super.c index 2fd924d38606..131d1b5085e2 100644 --- a/drivers/clk/tegra/clk-super.c +++ b/drivers/clk/tegra/clk-super.c @@ -20,7 +20,6 @@ #include #include #include -#include #include "clk.h" diff --git a/drivers/clk/tegra/clk-tegra-audio.c b/drivers/clk/tegra/clk-tegra-audio.c index 5c38aab2c5b8..11e3ad7ad7a3 100644 --- a/drivers/clk/tegra/clk-tegra-audio.c +++ b/drivers/clk/tegra/clk-tegra-audio.c @@ -15,7 +15,6 @@ */ #include -#include #include #include #include diff --git a/drivers/clk/tegra/clk-tegra-fixed.c b/drivers/clk/tegra/clk-tegra-fixed.c index 605676d368eb..da0b5941c89f 100644 --- a/drivers/clk/tegra/clk-tegra-fixed.c +++ b/drivers/clk/tegra/clk-tegra-fixed.c @@ -15,7 +15,6 @@ */ #include -#include #include #include #include diff --git a/drivers/clk/tegra/clk-tegra-periph.c b/drivers/clk/tegra/clk-tegra-periph.c index 46af9244ba74..cb6ab830941d 100644 --- a/drivers/clk/tegra/clk-tegra-periph.c +++ b/drivers/clk/tegra/clk-tegra-periph.c @@ -15,7 +15,6 @@ */ #include -#include #include #include #include diff --git a/drivers/clk/tegra/clk-tegra-pmc.c b/drivers/clk/tegra/clk-tegra-pmc.c index 08b21c1ee867..91377abfefa1 100644 --- a/drivers/clk/tegra/clk-tegra-pmc.c +++ b/drivers/clk/tegra/clk-tegra-pmc.c @@ -15,7 +15,6 @@ */ #include -#include #include #include #include diff --git a/drivers/clk/tegra/clk-tegra-super-gen4.c b/drivers/clk/tegra/clk-tegra-super-gen4.c index feb3201c85ce..ecd7ff736b74 100644 --- a/drivers/clk/tegra/clk-tegra-super-gen4.c +++ b/drivers/clk/tegra/clk-tegra-super-gen4.c @@ -15,7 +15,6 @@ */ #include -#include #include #include #include diff --git a/drivers/clk/tegra/clk-tegra114.c b/drivers/clk/tegra/clk-tegra114.c index 8237d16b4075..db5871519bf5 100644 --- a/drivers/clk/tegra/clk-tegra114.c +++ b/drivers/clk/tegra/clk-tegra114.c @@ -15,9 +15,7 @@ */ #include -#include #include -#include #include #include #include diff --git a/drivers/clk/tegra/clk-tegra124.c b/drivers/clk/tegra/clk-tegra124.c index e8cca3eac007..0c44cc7f8558 100644 --- a/drivers/clk/tegra/clk-tegra124.c +++ b/drivers/clk/tegra/clk-tegra124.c @@ -15,7 +15,6 @@ */ #include -#include #include #include #include diff --git a/drivers/clk/tegra/clk-tegra20.c b/drivers/clk/tegra/clk-tegra20.c index 41272dcc9e22..bf004f0e4f65 100644 --- a/drivers/clk/tegra/clk-tegra20.c +++ b/drivers/clk/tegra/clk-tegra20.c @@ -15,7 +15,6 @@ */ #include -#include #include #include #include diff --git a/drivers/clk/tegra/clk-tegra30.c b/drivers/clk/tegra/clk-tegra30.c index 0af3e834dd24..fad561a5896b 100644 --- a/drivers/clk/tegra/clk-tegra30.c +++ b/drivers/clk/tegra/clk-tegra30.c @@ -16,7 +16,6 @@ #include #include -#include #include #include #include diff --git a/drivers/clk/tegra/clk.c b/drivers/clk/tegra/clk.c index 41cd87c67be6..22aa8b18c840 100644 --- a/drivers/clk/tegra/clk.c +++ b/drivers/clk/tegra/clk.c @@ -14,6 +14,7 @@ * along with this program. If not, see . */ +#include #include #include #include diff --git a/include/linux/clk/tegra.h b/include/linux/clk/tegra.h index 19c4208f4752..57bf7aab4516 100644 --- a/include/linux/clk/tegra.h +++ b/include/linux/clk/tegra.h @@ -17,7 +17,8 @@ #ifndef __LINUX_CLK_TEGRA_H_ #define __LINUX_CLK_TEGRA_H_ -#include +#include +#include /* * Tegra CPU clock and reset control ops -- cgit v1.2.3-70-g09d2 From 61ae76563ec3b506235d5dd69c6fdacea321254d Mon Sep 17 00:00:00 2001 From: Stephen Boyd Date: Mon, 22 Jun 2015 17:13:49 -0700 Subject: clk: Remove clk.h from clk-provider.h Remove clk.h from clk-provider.h so that we can clearly split clk providers from clk consumers. This will allow us to quickly detect when clock providers are using the consumer APIs by looking at the includes. Signed-off-by: Stephen Boyd --- include/linux/clk-provider.h | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) (limited to 'include/linux') diff --git a/include/linux/clk-provider.h b/include/linux/clk-provider.h index 78842f46f152..36fa555ff431 100644 --- a/include/linux/clk-provider.h +++ b/include/linux/clk-provider.h @@ -11,7 +11,6 @@ #ifndef __LINUX_CLK_PROVIDER_H #define __LINUX_CLK_PROVIDER_H -#include #include #include @@ -33,6 +32,7 @@ #define CLK_GET_ACCURACY_NOCACHE BIT(8) /* do not use the cached clk accuracy */ #define CLK_RECALC_NEW_RATES BIT(9) /* recalc rates after notifications */ +struct clk; struct clk_hw; struct clk_core; struct dentry; -- cgit v1.2.3-70-g09d2 From 10dc4512185741a298cd7bc87e9968944f31a50d Mon Sep 17 00:00:00 2001 From: Chuck Lever Date: Thu, 9 Jul 2015 16:45:28 -0400 Subject: svcrdma: Clean up svc_rdma_get_reply_array() Kernel coding conventions frown upon having large nontrivial functions in header files, and the preference these days is to allow the compiler to make inlining decisions if possible. As these functions are re-homed into a .c file, be sure that comparisons with fields in struct rpcrdma_msg are with be32 constants. This is a refactoring change; no behavior change is intended. Signed-off-by: Chuck Lever Signed-off-by: J. Bruce Fields --- include/linux/sunrpc/svc_rdma.h | 81 +---------------------------------- net/sunrpc/xprtrdma/svc_rdma_sendto.c | 73 +++++++++++++++++++++++++++++++ 2 files changed, 75 insertions(+), 79 deletions(-) (limited to 'include/linux') diff --git a/include/linux/sunrpc/svc_rdma.h b/include/linux/sunrpc/svc_rdma.h index cb94ee4181d4..ca4d86a6c947 100644 --- a/include/linux/sunrpc/svc_rdma.h +++ b/include/linux/sunrpc/svc_rdma.h @@ -213,6 +213,8 @@ extern int rdma_read_chunk_frmr(struct svcxprt_rdma *, struct svc_rqst *, /* svc_rdma_sendto.c */ extern int svc_rdma_sendto(struct svc_rqst *); +extern struct rpcrdma_read_chunk * + svc_rdma_get_read_chunk(struct rpcrdma_msg *); /* svc_rdma_transport.c */ extern int svc_rdma_send(struct svcxprt_rdma *, struct ib_send_wr *); @@ -238,83 +240,4 @@ extern void svc_rdma_prep_reply_hdr(struct svc_rqst *); extern int svc_rdma_init(void); extern void svc_rdma_cleanup(void); -/* - * Returns the address of the first read chunk or if no read chunk is - * present - */ -static inline struct rpcrdma_read_chunk * -svc_rdma_get_read_chunk(struct rpcrdma_msg *rmsgp) -{ - struct rpcrdma_read_chunk *ch = - (struct rpcrdma_read_chunk *)&rmsgp->rm_body.rm_chunks[0]; - - if (ch->rc_discrim == 0) - return NULL; - - return ch; -} - -/* - * Returns the address of the first read write array element or if no - * write array list is present - */ -static inline struct rpcrdma_write_array * -svc_rdma_get_write_array(struct rpcrdma_msg *rmsgp) -{ - if (rmsgp->rm_body.rm_chunks[0] != 0 - || rmsgp->rm_body.rm_chunks[1] == 0) - return NULL; - - return (struct rpcrdma_write_array *)&rmsgp->rm_body.rm_chunks[1]; -} - -/* - * Returns the address of the first reply array element or if no - * reply array is present - */ -static inline struct rpcrdma_write_array * -svc_rdma_get_reply_array(struct rpcrdma_msg *rmsgp) -{ - struct rpcrdma_read_chunk *rch; - struct rpcrdma_write_array *wr_ary; - struct rpcrdma_write_array *rp_ary; - - /* XXX: Need to fix when reply list may occur with read-list and/or - * write list */ - if (rmsgp->rm_body.rm_chunks[0] != 0 || - rmsgp->rm_body.rm_chunks[1] != 0) - return NULL; - - rch = svc_rdma_get_read_chunk(rmsgp); - if (rch) { - while (rch->rc_discrim) - rch++; - - /* The reply list follows an empty write array located - * at 'rc_position' here. The reply array is at rc_target. - */ - rp_ary = (struct rpcrdma_write_array *)&rch->rc_target; - - goto found_it; - } - - wr_ary = svc_rdma_get_write_array(rmsgp); - if (wr_ary) { - rp_ary = (struct rpcrdma_write_array *) - &wr_ary-> - wc_array[ntohl(wr_ary->wc_nchunks)].wc_target.rs_length; - - goto found_it; - } - - /* No read list, no write list */ - rp_ary = (struct rpcrdma_write_array *) - &rmsgp->rm_body.rm_chunks[2]; - - found_it: - if (rp_ary->wc_discrim == 0) - return NULL; - - return rp_ary; -} #endif diff --git a/net/sunrpc/xprtrdma/svc_rdma_sendto.c b/net/sunrpc/xprtrdma/svc_rdma_sendto.c index 95412abc95b0..1dfae8317065 100644 --- a/net/sunrpc/xprtrdma/svc_rdma_sendto.c +++ b/net/sunrpc/xprtrdma/svc_rdma_sendto.c @@ -136,6 +136,79 @@ static dma_addr_t dma_map_xdr(struct svcxprt_rdma *xprt, return dma_addr; } +/* Returns the address of the first read chunk or if no read chunk + * is present + */ +struct rpcrdma_read_chunk * +svc_rdma_get_read_chunk(struct rpcrdma_msg *rmsgp) +{ + struct rpcrdma_read_chunk *ch = + (struct rpcrdma_read_chunk *)&rmsgp->rm_body.rm_chunks[0]; + + if (ch->rc_discrim == xdr_zero) + return NULL; + return ch; +} + +/* Returns the address of the first read write array element or + * if no write array list is present + */ +static struct rpcrdma_write_array * +svc_rdma_get_write_array(struct rpcrdma_msg *rmsgp) +{ + if (rmsgp->rm_body.rm_chunks[0] != xdr_zero || + rmsgp->rm_body.rm_chunks[1] == xdr_zero) + return NULL; + return (struct rpcrdma_write_array *)&rmsgp->rm_body.rm_chunks[1]; +} + +/* Returns the address of the first reply array element or if no + * reply array is present + */ +static struct rpcrdma_write_array * +svc_rdma_get_reply_array(struct rpcrdma_msg *rmsgp) +{ + struct rpcrdma_read_chunk *rch; + struct rpcrdma_write_array *wr_ary; + struct rpcrdma_write_array *rp_ary; + + /* XXX: Need to fix when reply chunk may occur with read list + * and/or write list. + */ + if (rmsgp->rm_body.rm_chunks[0] != xdr_zero || + rmsgp->rm_body.rm_chunks[1] != xdr_zero) + return NULL; + + rch = svc_rdma_get_read_chunk(rmsgp); + if (rch) { + while (rch->rc_discrim != xdr_zero) + rch++; + + /* The reply chunk follows an empty write array located + * at 'rc_position' here. The reply array is at rc_target. + */ + rp_ary = (struct rpcrdma_write_array *)&rch->rc_target; + goto found_it; + } + + wr_ary = svc_rdma_get_write_array(rmsgp); + if (wr_ary) { + int chunk = be32_to_cpu(wr_ary->wc_nchunks); + + rp_ary = (struct rpcrdma_write_array *) + &wr_ary->wc_array[chunk].wc_target.rs_length; + goto found_it; + } + + /* No read list, no write list */ + rp_ary = (struct rpcrdma_write_array *)&rmsgp->rm_body.rm_chunks[2]; + + found_it: + if (rp_ary->wc_discrim == xdr_zero) + return NULL; + return rp_ary; +} + /* Assumptions: * - The specified write_len can be represented in sc_max_sge * PAGE_SIZE */ -- cgit v1.2.3-70-g09d2 From 31193fe5f6fb616711323f5d74ee5bb92aacba4a Mon Sep 17 00:00:00 2001 From: Chuck Lever Date: Thu, 9 Jul 2015 16:45:37 -0400 Subject: svcrdma: Remove svc_rdma_fastreg() Commit 0bf4828983df ("svcrdma: refactor marshalling logic") removed the last call site for svc_rdma_fastreg(). Signed-off-by: Chuck Lever Signed-off-by: J. Bruce Fields --- include/linux/sunrpc/svc_rdma.h | 1 - net/sunrpc/xprtrdma/svc_rdma_transport.c | 34 -------------------------------- 2 files changed, 35 deletions(-) (limited to 'include/linux') diff --git a/include/linux/sunrpc/svc_rdma.h b/include/linux/sunrpc/svc_rdma.h index ca4d86a6c947..13af61b70417 100644 --- a/include/linux/sunrpc/svc_rdma.h +++ b/include/linux/sunrpc/svc_rdma.h @@ -227,7 +227,6 @@ extern void svc_rdma_put_context(struct svc_rdma_op_ctxt *, int); extern void svc_rdma_unmap_dma(struct svc_rdma_op_ctxt *ctxt); extern struct svc_rdma_req_map *svc_rdma_get_req_map(void); extern void svc_rdma_put_req_map(struct svc_rdma_req_map *); -extern int svc_rdma_fastreg(struct svcxprt_rdma *, struct svc_rdma_fastreg_mr *); extern struct svc_rdma_fastreg_mr *svc_rdma_get_frmr(struct svcxprt_rdma *); extern void svc_rdma_put_frmr(struct svcxprt_rdma *, struct svc_rdma_fastreg_mr *); diff --git a/net/sunrpc/xprtrdma/svc_rdma_transport.c b/net/sunrpc/xprtrdma/svc_rdma_transport.c index f4b973233977..4054a9de6a91 100644 --- a/net/sunrpc/xprtrdma/svc_rdma_transport.c +++ b/net/sunrpc/xprtrdma/svc_rdma_transport.c @@ -1202,40 +1202,6 @@ static int svc_rdma_secure_port(struct svc_rqst *rqstp) return 1; } -/* - * Attempt to register the kvec representing the RPC memory with the - * device. - * - * Returns: - * NULL : The device does not support fastreg or there were no more - * fastreg mr. - * frmr : The kvec register request was successfully posted. - * <0 : An error was encountered attempting to register the kvec. - */ -int svc_rdma_fastreg(struct svcxprt_rdma *xprt, - struct svc_rdma_fastreg_mr *frmr) -{ - struct ib_send_wr fastreg_wr; - u8 key; - - /* Bump the key */ - key = (u8)(frmr->mr->lkey & 0x000000FF); - ib_update_fast_reg_key(frmr->mr, ++key); - - /* Prepare FASTREG WR */ - memset(&fastreg_wr, 0, sizeof fastreg_wr); - fastreg_wr.opcode = IB_WR_FAST_REG_MR; - fastreg_wr.send_flags = IB_SEND_SIGNALED; - fastreg_wr.wr.fast_reg.iova_start = (unsigned long)frmr->kva; - fastreg_wr.wr.fast_reg.page_list = frmr->page_list; - fastreg_wr.wr.fast_reg.page_list_len = frmr->page_list_len; - fastreg_wr.wr.fast_reg.page_shift = PAGE_SHIFT; - fastreg_wr.wr.fast_reg.length = frmr->map_len; - fastreg_wr.wr.fast_reg.access_flags = frmr->access_flags; - fastreg_wr.wr.fast_reg.rkey = frmr->mr->lkey; - return svc_rdma_send(xprt, &fastreg_wr); -} - int svc_rdma_send(struct svcxprt_rdma *xprt, struct ib_send_wr *wr) { struct ib_send_wr *bad_wr, *n_wr; -- cgit v1.2.3-70-g09d2 From 0c4f691ff6791e55ac831666df0b49b1679c56e4 Mon Sep 17 00:00:00 2001 From: Scott Feldman Date: Sat, 18 Jul 2015 18:24:48 -0700 Subject: net: don't reforward packets already forwarded by offload device Just before queuing skb for xmit on port, check if skb has been marked by switchdev port driver as already fordwarded by device. If so, drop skb. A non-zero skb->offload_fwd_mark field is set by the switchdev port driver/device on ingress to indicate the skb has already been forwarded by the device to egress ports with matching dev->skb_mark. The switchdev port driver would assign a non-zero dev->offload_skb_mark for each device port netdev during registration, for example. Signed-off-by: Scott Feldman Acked-by: Jiri Pirko Acked-by: Roopa Prabhu Acked-by: Nicolas Dichtel Signed-off-by: David S. Miller --- include/linux/netdevice.h | 6 ++++++ include/linux/skbuff.h | 9 ++++++++- net/core/dev.c | 10 ++++++++++ 3 files changed, 24 insertions(+), 1 deletion(-) (limited to 'include/linux') diff --git a/include/linux/netdevice.h b/include/linux/netdevice.h index 45cfd797eb77..8364f29e08be 100644 --- a/include/linux/netdevice.h +++ b/include/linux/netdevice.h @@ -1456,6 +1456,8 @@ enum netdev_priv_flags { * * @xps_maps: XXX: need comments on this one * + * @offload_fwd_mark: Offload device fwding mark + * * @trans_start: Time (in jiffies) of last Tx * @watchdog_timeo: Represents the timeout that is used by * the watchdog ( see dev_watchdog() ) @@ -1697,6 +1699,10 @@ struct net_device { struct xps_dev_maps __rcu *xps_maps; #endif +#ifdef CONFIG_NET_SWITCHDEV + u32 offload_fwd_mark; +#endif + /* These may be needed for future network-power-down code. */ /* diff --git a/include/linux/skbuff.h b/include/linux/skbuff.h index d6cdd6e87d53..af7a09650fa2 100644 --- a/include/linux/skbuff.h +++ b/include/linux/skbuff.h @@ -506,6 +506,7 @@ static inline u32 skb_mstamp_us_delta(const struct skb_mstamp *t1, * @no_fcs: Request NIC to treat last 4 bytes as Ethernet FCS * @napi_id: id of the NAPI struct this skb came from * @secmark: security marking + * @offload_fwd_mark: fwding offload mark * @mark: Generic packet mark * @vlan_proto: vlan encapsulation protocol * @vlan_tci: vlan tag control information @@ -650,9 +651,15 @@ struct sk_buff { unsigned int sender_cpu; }; #endif + union { #ifdef CONFIG_NETWORK_SECMARK - __u32 secmark; + __u32 secmark; +#endif +#ifdef CONFIG_NET_SWITCHDEV + __u32 offload_fwd_mark; #endif + }; + union { __u32 mark; __u32 reserved_tailroom; diff --git a/net/core/dev.c b/net/core/dev.c index 8810b6bbebfe..2ee15afb412d 100644 --- a/net/core/dev.c +++ b/net/core/dev.c @@ -3061,6 +3061,16 @@ static int __dev_queue_xmit(struct sk_buff *skb, void *accel_priv) else skb_dst_force(skb); +#ifdef CONFIG_NET_SWITCHDEV + /* Don't forward if offload device already forwarded */ + if (skb->offload_fwd_mark && + skb->offload_fwd_mark == dev->offload_fwd_mark) { + consume_skb(skb); + rc = NET_XMIT_SUCCESS; + goto out; + } +#endif + txq = netdev_pick_tx(dev, skb, accel_priv); q = rcu_dereference_bh(txq->qdisc); -- cgit v1.2.3-70-g09d2 From d754f98b502ad9a8c7570d494e1eaa0e6bc0350c Mon Sep 17 00:00:00 2001 From: Scott Feldman Date: Sat, 18 Jul 2015 18:24:49 -0700 Subject: net: add phys ID compare helper to test if two IDs are the same Signed-off-by: Scott Feldman Acked-by: Jiri Pirko Signed-off-by: David S. Miller --- include/linux/netdevice.h | 7 +++++++ net/switchdev/switchdev.c | 8 ++------ 2 files changed, 9 insertions(+), 6 deletions(-) (limited to 'include/linux') diff --git a/include/linux/netdevice.h b/include/linux/netdevice.h index 8364f29e08be..607b5f41f46f 100644 --- a/include/linux/netdevice.h +++ b/include/linux/netdevice.h @@ -766,6 +766,13 @@ struct netdev_phys_item_id { unsigned char id_len; }; +static inline bool netdev_phys_item_id_same(struct netdev_phys_item_id *a, + struct netdev_phys_item_id *b) +{ + return a->id_len == b->id_len && + memcmp(a->id, b->id, a->id_len) == 0; +} + typedef u16 (*select_queue_fallback_t)(struct net_device *dev, struct sk_buff *skb); diff --git a/net/switchdev/switchdev.c b/net/switchdev/switchdev.c index 9f2add3cba26..4e5bba50ccff 100644 --- a/net/switchdev/switchdev.c +++ b/net/switchdev/switchdev.c @@ -910,13 +910,9 @@ static struct net_device *switchdev_get_dev_by_nhs(struct fib_info *fi) if (switchdev_port_attr_get(dev, &attr)) return NULL; - if (nhsel > 0) { - if (prev_attr.u.ppid.id_len != attr.u.ppid.id_len) + if (nhsel > 0 && + !netdev_phys_item_id_same(&prev_attr.u.ppid, &attr.u.ppid)) return NULL; - if (memcmp(prev_attr.u.ppid.id, attr.u.ppid.id, - attr.u.ppid.id_len)) - return NULL; - } prev_attr = attr; } -- cgit v1.2.3-70-g09d2 From 6acc23266054a9969737b435fa012f87465dbc50 Mon Sep 17 00:00:00 2001 From: Jiri Benc Date: Thu, 16 Jul 2015 21:50:50 +0200 Subject: net: remove skb_frag_add_head It's not used anywhere. Signed-off-by: Jiri Benc Signed-off-by: David S. Miller --- include/linux/skbuff.h | 6 ------ 1 file changed, 6 deletions(-) (limited to 'include/linux') diff --git a/include/linux/skbuff.h b/include/linux/skbuff.h index af7a09650fa2..6bd96fe9416a 100644 --- a/include/linux/skbuff.h +++ b/include/linux/skbuff.h @@ -2678,12 +2678,6 @@ static inline void skb_frag_list_init(struct sk_buff *skb) skb_shinfo(skb)->frag_list = NULL; } -static inline void skb_frag_add_head(struct sk_buff *skb, struct sk_buff *frag) -{ - frag->next = skb_shinfo(skb)->frag_list; - skb_shinfo(skb)->frag_list = frag; -} - #define skb_walk_frags(skb, iter) \ for (iter = skb_shinfo(skb)->frag_list; iter; iter = iter->next) -- cgit v1.2.3-70-g09d2 From f4c190eb8b4f80b12dc98ce7d54a3bea0e4e7e69 Mon Sep 17 00:00:00 2001 From: Joachim Eastwood Date: Fri, 17 Jul 2015 00:26:12 +0200 Subject: stmmac: drop custom_* fields from plat_stmmacenet_data Both of these fields are unused and has been unused since they were added 3 and 5 years ago. Drop them since they are clearly not very useful. Signed-off-by: Joachim Eastwood Signed-off-by: David S. Miller --- Documentation/networking/stmmac.txt | 4 ---- include/linux/stmmac.h | 2 -- 2 files changed, 6 deletions(-) (limited to 'include/linux') diff --git a/Documentation/networking/stmmac.txt b/Documentation/networking/stmmac.txt index e655e2453c98..5fddefa69baf 100644 --- a/Documentation/networking/stmmac.txt +++ b/Documentation/networking/stmmac.txt @@ -139,8 +139,6 @@ struct plat_stmmacenet_data { void (*free)(struct platform_device *pdev, void *priv); int (*init)(struct platform_device *pdev, void *priv); void (*exit)(struct platform_device *pdev, void *priv); - void *custom_cfg; - void *custom_data; void *bsp_priv; }; @@ -186,8 +184,6 @@ Where: which will be stored in bsp_priv, and then passed to init and exit callbacks. init/exit callbacks should not use or modify platform data. - o custom_cfg/custom_data: this is a custom configuration that can be passed - while initializing the resources. o bsp_priv: another private pointer. For MDIO bus The we have: diff --git a/include/linux/stmmac.h b/include/linux/stmmac.h index c735f5c91eea..c86a20047cb1 100644 --- a/include/linux/stmmac.h +++ b/include/linux/stmmac.h @@ -123,8 +123,6 @@ struct plat_stmmacenet_data { void (*free)(struct platform_device *pdev, void *priv); int (*init)(struct platform_device *pdev, void *priv); void (*exit)(struct platform_device *pdev, void *priv); - void *custom_cfg; - void *custom_data; void *bsp_priv; }; -- cgit v1.2.3-70-g09d2 From 4e10df9a60d96ced321dd2af71da558c6b750078 Mon Sep 17 00:00:00 2001 From: Alexei Starovoitov Date: Mon, 20 Jul 2015 20:34:18 -0700 Subject: bpf: introduce bpf_skb_vlan_push/pop() helpers Allow eBPF programs attached to TC qdiscs call skb_vlan_push/pop via helper functions. These functions may change skb->data/hlen which are cached by some JITs to improve performance of ld_abs/ld_ind instructions. Therefore JITs need to recognize bpf_skb_vlan_push/pop() calls, re-compute header len and re-cache skb->data/hlen back into cpu registers. Note, skb->data/hlen are not directly accessible from the programs, so any changes to skb->data done either by these helpers or by other TC actions are safe. eBPF JIT supported by three architectures: - arm64 JIT is using bpf_load_pointer() without caching, so it's ok as-is. - x64 JIT re-caches skb->data/hlen unconditionally after vlan_push/pop calls (experiments showed that conditional re-caching is slower). - s390 JIT falls back to interpreter for now when bpf_skb_vlan_push() is present in the program (re-caching is tbd). These helpers allow more scalable handling of vlan from the programs. Instead of creating thousands of vlan netdevs on top of eth0 and attaching TC+ingress+bpf to all of them, the program can be attached to eth0 directly and manipulate vlans as necessary. Signed-off-by: Alexei Starovoitov Signed-off-by: David S. Miller --- arch/s390/net/bpf_jit_comp.c | 4 +++ arch/x86/net/bpf_jit_comp.c | 80 +++++++++++++++++++++++--------------------- include/linux/bpf.h | 2 ++ include/linux/filter.h | 1 + include/uapi/linux/bpf.h | 2 ++ net/core/filter.c | 48 ++++++++++++++++++++++++++ 6 files changed, 99 insertions(+), 38 deletions(-) (limited to 'include/linux') diff --git a/arch/s390/net/bpf_jit_comp.c b/arch/s390/net/bpf_jit_comp.c index fee782acc2ee..79c731e8d178 100644 --- a/arch/s390/net/bpf_jit_comp.c +++ b/arch/s390/net/bpf_jit_comp.c @@ -973,6 +973,10 @@ static noinline int bpf_jit_insn(struct bpf_jit *jit, struct bpf_prog *fp, int i */ const u64 func = (u64)__bpf_call_base + imm; + if (bpf_helper_changes_skb_data((void *)func)) + /* TODO reload skb->data, hlen */ + return -1; + REG_SET_SEEN(BPF_REG_5); jit->seen |= SEEN_FUNC; /* lg %w1,(%l) */ diff --git a/arch/x86/net/bpf_jit_comp.c b/arch/x86/net/bpf_jit_comp.c index 579a8fd74be0..6c335a8fc086 100644 --- a/arch/x86/net/bpf_jit_comp.c +++ b/arch/x86/net/bpf_jit_comp.c @@ -315,6 +315,26 @@ static void emit_bpf_tail_call(u8 **pprog) *pprog = prog; } + +static void emit_load_skb_data_hlen(u8 **pprog) +{ + u8 *prog = *pprog; + int cnt = 0; + + /* r9d = skb->len - skb->data_len (headlen) + * r10 = skb->data + */ + /* mov %r9d, off32(%rdi) */ + EMIT3_off32(0x44, 0x8b, 0x8f, offsetof(struct sk_buff, len)); + + /* sub %r9d, off32(%rdi) */ + EMIT3_off32(0x44, 0x2b, 0x8f, offsetof(struct sk_buff, data_len)); + + /* mov %r10, off32(%rdi) */ + EMIT3_off32(0x4c, 0x8b, 0x97, offsetof(struct sk_buff, data)); + *pprog = prog; +} + static int do_jit(struct bpf_prog *bpf_prog, int *addrs, u8 *image, int oldproglen, struct jit_context *ctx) { @@ -329,36 +349,8 @@ static int do_jit(struct bpf_prog *bpf_prog, int *addrs, u8 *image, emit_prologue(&prog); - if (seen_ld_abs) { - /* r9d : skb->len - skb->data_len (headlen) - * r10 : skb->data - */ - if (is_imm8(offsetof(struct sk_buff, len))) - /* mov %r9d, off8(%rdi) */ - EMIT4(0x44, 0x8b, 0x4f, - offsetof(struct sk_buff, len)); - else - /* mov %r9d, off32(%rdi) */ - EMIT3_off32(0x44, 0x8b, 0x8f, - offsetof(struct sk_buff, len)); - - if (is_imm8(offsetof(struct sk_buff, data_len))) - /* sub %r9d, off8(%rdi) */ - EMIT4(0x44, 0x2b, 0x4f, - offsetof(struct sk_buff, data_len)); - else - EMIT3_off32(0x44, 0x2b, 0x8f, - offsetof(struct sk_buff, data_len)); - - if (is_imm8(offsetof(struct sk_buff, data))) - /* mov %r10, off8(%rdi) */ - EMIT4(0x4c, 0x8b, 0x57, - offsetof(struct sk_buff, data)); - else - /* mov %r10, off32(%rdi) */ - EMIT3_off32(0x4c, 0x8b, 0x97, - offsetof(struct sk_buff, data)); - } + if (seen_ld_abs) + emit_load_skb_data_hlen(&prog); for (i = 0; i < insn_cnt; i++, insn++) { const s32 imm32 = insn->imm; @@ -367,6 +359,7 @@ static int do_jit(struct bpf_prog *bpf_prog, int *addrs, u8 *image, u8 b1 = 0, b2 = 0, b3 = 0; s64 jmp_offset; u8 jmp_cond; + bool reload_skb_data; int ilen; u8 *func; @@ -818,12 +811,18 @@ xadd: if (is_imm8(insn->off)) func = (u8 *) __bpf_call_base + imm32; jmp_offset = func - (image + addrs[i]); if (seen_ld_abs) { - EMIT2(0x41, 0x52); /* push %r10 */ - EMIT2(0x41, 0x51); /* push %r9 */ - /* need to adjust jmp offset, since - * pop %r9, pop %r10 take 4 bytes after call insn - */ - jmp_offset += 4; + reload_skb_data = bpf_helper_changes_skb_data(func); + if (reload_skb_data) { + EMIT1(0x57); /* push %rdi */ + jmp_offset += 22; /* pop, mov, sub, mov */ + } else { + EMIT2(0x41, 0x52); /* push %r10 */ + EMIT2(0x41, 0x51); /* push %r9 */ + /* need to adjust jmp offset, since + * pop %r9, pop %r10 take 4 bytes after call insn + */ + jmp_offset += 4; + } } if (!imm32 || !is_simm32(jmp_offset)) { pr_err("unsupported bpf func %d addr %p image %p\n", @@ -832,8 +831,13 @@ xadd: if (is_imm8(insn->off)) } EMIT1_off32(0xE8, jmp_offset); if (seen_ld_abs) { - EMIT2(0x41, 0x59); /* pop %r9 */ - EMIT2(0x41, 0x5A); /* pop %r10 */ + if (reload_skb_data) { + EMIT1(0x5F); /* pop %rdi */ + emit_load_skb_data_hlen(&prog); + } else { + EMIT2(0x41, 0x59); /* pop %r9 */ + EMIT2(0x41, 0x5A); /* pop %r10 */ + } } break; diff --git a/include/linux/bpf.h b/include/linux/bpf.h index 4383476a0d48..139d6d2e123f 100644 --- a/include/linux/bpf.h +++ b/include/linux/bpf.h @@ -192,5 +192,7 @@ extern const struct bpf_func_proto bpf_ktime_get_ns_proto; extern const struct bpf_func_proto bpf_get_current_pid_tgid_proto; extern const struct bpf_func_proto bpf_get_current_uid_gid_proto; extern const struct bpf_func_proto bpf_get_current_comm_proto; +extern const struct bpf_func_proto bpf_skb_vlan_push_proto; +extern const struct bpf_func_proto bpf_skb_vlan_pop_proto; #endif /* _LINUX_BPF_H */ diff --git a/include/linux/filter.h b/include/linux/filter.h index 17724f6ea983..69d00555ce35 100644 --- a/include/linux/filter.h +++ b/include/linux/filter.h @@ -411,6 +411,7 @@ void sk_filter_uncharge(struct sock *sk, struct sk_filter *fp); u64 __bpf_call_base(u64 r1, u64 r2, u64 r3, u64 r4, u64 r5); void bpf_int_jit_compile(struct bpf_prog *fp); +bool bpf_helper_changes_skb_data(void *func); #ifdef CONFIG_BPF_JIT typedef void (*bpf_jit_fill_hole_t)(void *area, unsigned int size); diff --git a/include/uapi/linux/bpf.h b/include/uapi/linux/bpf.h index 2de87e58b12b..2f6c83d714e9 100644 --- a/include/uapi/linux/bpf.h +++ b/include/uapi/linux/bpf.h @@ -256,6 +256,8 @@ enum bpf_func_id { * Return: classid if != 0 */ BPF_FUNC_get_cgroup_classid, + BPF_FUNC_skb_vlan_push, /* bpf_skb_vlan_push(skb, vlan_proto, vlan_tci) */ + BPF_FUNC_skb_vlan_pop, /* bpf_skb_vlan_pop(skb) */ __BPF_FUNC_MAX_ID, }; diff --git a/net/core/filter.c b/net/core/filter.c index 247450a5e387..50338071fac4 100644 --- a/net/core/filter.c +++ b/net/core/filter.c @@ -1437,6 +1437,50 @@ static const struct bpf_func_proto bpf_get_cgroup_classid_proto = { .arg1_type = ARG_PTR_TO_CTX, }; +static u64 bpf_skb_vlan_push(u64 r1, u64 r2, u64 vlan_tci, u64 r4, u64 r5) +{ + struct sk_buff *skb = (struct sk_buff *) (long) r1; + __be16 vlan_proto = (__force __be16) r2; + + if (unlikely(vlan_proto != htons(ETH_P_8021Q) && + vlan_proto != htons(ETH_P_8021AD))) + vlan_proto = htons(ETH_P_8021Q); + + return skb_vlan_push(skb, vlan_proto, vlan_tci); +} + +const struct bpf_func_proto bpf_skb_vlan_push_proto = { + .func = bpf_skb_vlan_push, + .gpl_only = false, + .ret_type = RET_INTEGER, + .arg1_type = ARG_PTR_TO_CTX, + .arg2_type = ARG_ANYTHING, + .arg3_type = ARG_ANYTHING, +}; + +static u64 bpf_skb_vlan_pop(u64 r1, u64 r2, u64 r3, u64 r4, u64 r5) +{ + struct sk_buff *skb = (struct sk_buff *) (long) r1; + + return skb_vlan_pop(skb); +} + +const struct bpf_func_proto bpf_skb_vlan_pop_proto = { + .func = bpf_skb_vlan_pop, + .gpl_only = false, + .ret_type = RET_INTEGER, + .arg1_type = ARG_PTR_TO_CTX, +}; + +bool bpf_helper_changes_skb_data(void *func) +{ + if (func == bpf_skb_vlan_push) + return true; + if (func == bpf_skb_vlan_pop) + return true; + return false; +} + static const struct bpf_func_proto * sk_filter_func_proto(enum bpf_func_id func_id) { @@ -1476,6 +1520,10 @@ tc_cls_act_func_proto(enum bpf_func_id func_id) return &bpf_clone_redirect_proto; case BPF_FUNC_get_cgroup_classid: return &bpf_get_cgroup_classid_proto; + case BPF_FUNC_skb_vlan_push: + return &bpf_skb_vlan_push_proto; + case BPF_FUNC_skb_vlan_pop: + return &bpf_skb_vlan_pop_proto; default: return sk_filter_func_proto(func_id); } -- cgit v1.2.3-70-g09d2 From be9015abb8296d8dc72cef4da75fa30e88ab7c81 Mon Sep 17 00:00:00 2001 From: Shobhit Kumar Date: Fri, 26 Jun 2015 14:32:04 +0530 Subject: gpiolib: Add support for removing registered consumer lookup table MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit In case we unload and load a driver module again that is registering a lookup table, without this it will result in multiple entries. Provide an option to remove the lookup table on driver unload Cc: Samuel Ortiz Cc: Linus Walleij Cc: Alexandre Courbot Cc: Thierry Reding Reviewed-by: Alexandre Courbot Reviewed-by: Linus Walleij Tested-by: Ville Syrjälä Signed-off-by: Shobhit Kumar Acked-by: Lee Jones Signed-off-by: Daniel Vetter --- drivers/gpio/gpiolib.c | 13 +++++++++++++ include/linux/gpio/machine.h | 1 + 2 files changed, 14 insertions(+) (limited to 'include/linux') diff --git a/drivers/gpio/gpiolib.c b/drivers/gpio/gpiolib.c index bf4bd1d120c3..f25dc880b007 100644 --- a/drivers/gpio/gpiolib.c +++ b/drivers/gpio/gpiolib.c @@ -1672,6 +1672,19 @@ void gpiod_add_lookup_table(struct gpiod_lookup_table *table) mutex_unlock(&gpio_lookup_lock); } +/** + * gpiod_remove_lookup_table() - unregister GPIO device consumers + * @table: table of consumers to unregister + */ +void gpiod_remove_lookup_table(struct gpiod_lookup_table *table) +{ + mutex_lock(&gpio_lookup_lock); + + list_del(&table->list); + + mutex_unlock(&gpio_lookup_lock); +} + static struct gpio_desc *of_find_gpio(struct device *dev, const char *con_id, unsigned int idx, enum gpio_lookup_flags *flags) diff --git a/include/linux/gpio/machine.h b/include/linux/gpio/machine.h index e2706140eaff..c0d712d22b07 100644 --- a/include/linux/gpio/machine.h +++ b/include/linux/gpio/machine.h @@ -57,5 +57,6 @@ struct gpiod_lookup_table { } void gpiod_add_lookup_table(struct gpiod_lookup_table *table); +void gpiod_remove_lookup_table(struct gpiod_lookup_table *table); #endif /* __LINUX_GPIO_MACHINE_H */ -- cgit v1.2.3-70-g09d2 From 3490565b633c705d2fb1f6ede51228952664663d Mon Sep 17 00:00:00 2001 From: Denys Vlasenko Date: Mon, 13 Jul 2015 20:31:03 +0200 Subject: locking/spinlocks: Force inlining of spinlock ops With both gcc 4.7.2 and 4.9.2, sometimes GCC mysteriously doesn't inline very small functions we expect to be inlined. See: https://gcc.gnu.org/bugzilla/show_bug.cgi?id=66122 In particular, with this config: http://busybox.net/~vda/kernel_config there are more than a thousand copies of tiny spinlock-related functions: $ nm --size-sort vmlinux | grep -iF ' t ' | uniq -c | grep -v '^ *1 ' | sort -rn | grep ' spin' 473 000000000000000b t spin_unlock_irqrestore 292 000000000000000b t spin_unlock 215 000000000000000b t spin_lock 134 000000000000000b t spin_unlock_irq 130 000000000000000b t spin_unlock_bh 120 000000000000000b t spin_lock_irq 106 000000000000000b t spin_lock_bh Disassembly: ffffffff81004720 : ffffffff81004720: 55 push %rbp ffffffff81004721: 48 89 e5 mov %rsp,%rbp ffffffff81004724: e8 f8 4e e2 02 callq <_raw_spin_lock> ffffffff81004729: 5d pop %rbp ffffffff8100472a: c3 retq This patch fixes this via s/inline/__always_inline/ in spinlock.h. This decreases vmlinux by about 40k: text data bss dec hex filename 82375570 22255544 20627456 125258570 7774b4a vmlinux.before 82335059 22255416 20627456 125217931 776ac8b vmlinux Signed-off-by: Denys Vlasenko Cc: Andrew Morton Cc: Andy Lutomirski Cc: Bart Van Assche Cc: Borislav Petkov Cc: Brian Gerst Cc: David Rientjes Cc: H. Peter Anvin Cc: Linus Torvalds Cc: Paul E. McKenney Cc: Peter Zijlstra Cc: Thomas Gleixner Cc: Thomas Graf Link: http://lkml.kernel.org/r/1436812263-15243-1-git-send-email-dvlasenk@redhat.com Signed-off-by: Ingo Molnar --- include/linux/spinlock.h | 30 +++++++++++++++--------------- 1 file changed, 15 insertions(+), 15 deletions(-) (limited to 'include/linux') diff --git a/include/linux/spinlock.h b/include/linux/spinlock.h index 0063b24b4f36..ffcd053ca89a 100644 --- a/include/linux/spinlock.h +++ b/include/linux/spinlock.h @@ -296,7 +296,7 @@ static inline void do_raw_spin_unlock(raw_spinlock_t *lock) __releases(lock) * Map the spin_lock functions to the raw variants for PREEMPT_RT=n */ -static inline raw_spinlock_t *spinlock_check(spinlock_t *lock) +static __always_inline raw_spinlock_t *spinlock_check(spinlock_t *lock) { return &lock->rlock; } @@ -307,17 +307,17 @@ do { \ raw_spin_lock_init(&(_lock)->rlock); \ } while (0) -static inline void spin_lock(spinlock_t *lock) +static __always_inline void spin_lock(spinlock_t *lock) { raw_spin_lock(&lock->rlock); } -static inline void spin_lock_bh(spinlock_t *lock) +static __always_inline void spin_lock_bh(spinlock_t *lock) { raw_spin_lock_bh(&lock->rlock); } -static inline int spin_trylock(spinlock_t *lock) +static __always_inline int spin_trylock(spinlock_t *lock) { return raw_spin_trylock(&lock->rlock); } @@ -337,7 +337,7 @@ do { \ raw_spin_lock_nest_lock(spinlock_check(lock), nest_lock); \ } while (0) -static inline void spin_lock_irq(spinlock_t *lock) +static __always_inline void spin_lock_irq(spinlock_t *lock) { raw_spin_lock_irq(&lock->rlock); } @@ -352,32 +352,32 @@ do { \ raw_spin_lock_irqsave_nested(spinlock_check(lock), flags, subclass); \ } while (0) -static inline void spin_unlock(spinlock_t *lock) +static __always_inline void spin_unlock(spinlock_t *lock) { raw_spin_unlock(&lock->rlock); } -static inline void spin_unlock_bh(spinlock_t *lock) +static __always_inline void spin_unlock_bh(spinlock_t *lock) { raw_spin_unlock_bh(&lock->rlock); } -static inline void spin_unlock_irq(spinlock_t *lock) +static __always_inline void spin_unlock_irq(spinlock_t *lock) { raw_spin_unlock_irq(&lock->rlock); } -static inline void spin_unlock_irqrestore(spinlock_t *lock, unsigned long flags) +static __always_inline void spin_unlock_irqrestore(spinlock_t *lock, unsigned long flags) { raw_spin_unlock_irqrestore(&lock->rlock, flags); } -static inline int spin_trylock_bh(spinlock_t *lock) +static __always_inline int spin_trylock_bh(spinlock_t *lock) { return raw_spin_trylock_bh(&lock->rlock); } -static inline int spin_trylock_irq(spinlock_t *lock) +static __always_inline int spin_trylock_irq(spinlock_t *lock) { return raw_spin_trylock_irq(&lock->rlock); } @@ -387,22 +387,22 @@ static inline int spin_trylock_irq(spinlock_t *lock) raw_spin_trylock_irqsave(spinlock_check(lock), flags); \ }) -static inline void spin_unlock_wait(spinlock_t *lock) +static __always_inline void spin_unlock_wait(spinlock_t *lock) { raw_spin_unlock_wait(&lock->rlock); } -static inline int spin_is_locked(spinlock_t *lock) +static __always_inline int spin_is_locked(spinlock_t *lock) { return raw_spin_is_locked(&lock->rlock); } -static inline int spin_is_contended(spinlock_t *lock) +static __always_inline int spin_is_contended(spinlock_t *lock) { return raw_spin_is_contended(&lock->rlock); } -static inline int spin_can_lock(spinlock_t *lock) +static __always_inline int spin_can_lock(spinlock_t *lock) { return raw_spin_can_lock(&lock->rlock); } -- cgit v1.2.3-70-g09d2 From 8ffaadf7429270914b8f146ec13cf305e01df20d Mon Sep 17 00:00:00 2001 From: Jon Derrick Date: Mon, 20 Jul 2015 10:14:09 -0600 Subject: NVMe: Use CMB for the IO SQes if available Some controllers have a controller-side memory buffer available for use for submissions, completions, lists, or data. If a CMB is available, the entire CMB will be ioremapped and it will attempt to map the IO SQes onto the CMB. The queues will be shrunk as needed. The CMB will not be used if the queue depth is shrunk below some threshold where it may have reduced performance over a larger queue in system memory. Signed-off-by: Jon Derrick Reviewed-by: Christoph Hellwig Signed-off-by: Jens Axboe --- drivers/block/nvme-core.c | 122 ++++++++++++++++++++++++++++++++++++++++++++-- include/linux/nvme.h | 17 +++++++ 2 files changed, 134 insertions(+), 5 deletions(-) (limited to 'include/linux') diff --git a/drivers/block/nvme-core.c b/drivers/block/nvme-core.c index e09ad6cc6dec..82b4ffb6eefa 100644 --- a/drivers/block/nvme-core.c +++ b/drivers/block/nvme-core.c @@ -72,6 +72,10 @@ module_param(nvme_char_major, int, 0); static int use_threaded_interrupts; module_param(use_threaded_interrupts, int, 0); +static bool use_cmb_sqes = true; +module_param(use_cmb_sqes, bool, 0644); +MODULE_PARM_DESC(use_cmb_sqes, "use controller's memory buffer for I/O SQes"); + static DEFINE_SPINLOCK(dev_list_lock); static LIST_HEAD(dev_list); static struct task_struct *nvme_thread; @@ -103,6 +107,7 @@ struct nvme_queue { char irqname[24]; /* nvme4294967295-65535\0 */ spinlock_t q_lock; struct nvme_command *sq_cmds; + struct nvme_command __iomem *sq_cmds_io; volatile struct nvme_completion *cqes; struct blk_mq_tags **tags; dma_addr_t sq_dma_addr; @@ -383,7 +388,11 @@ static int __nvme_submit_cmd(struct nvme_queue *nvmeq, struct nvme_command *cmd) { u16 tail = nvmeq->sq_tail; - memcpy(&nvmeq->sq_cmds[tail], cmd, sizeof(*cmd)); + if (nvmeq->sq_cmds_io) + memcpy_toio(&nvmeq->sq_cmds_io[tail], cmd, sizeof(*cmd)); + else + memcpy(&nvmeq->sq_cmds[tail], cmd, sizeof(*cmd)); + if (++tail == nvmeq->q_depth) tail = 0; writel(tail, nvmeq->q_db); @@ -1364,7 +1373,8 @@ static void nvme_free_queue(struct nvme_queue *nvmeq) { dma_free_coherent(nvmeq->q_dmadev, CQ_SIZE(nvmeq->q_depth), (void *)nvmeq->cqes, nvmeq->cq_dma_addr); - dma_free_coherent(nvmeq->q_dmadev, SQ_SIZE(nvmeq->q_depth), + if (nvmeq->sq_cmds) + dma_free_coherent(nvmeq->q_dmadev, SQ_SIZE(nvmeq->q_depth), nvmeq->sq_cmds, nvmeq->sq_dma_addr); kfree(nvmeq); } @@ -1437,6 +1447,46 @@ static void nvme_disable_queue(struct nvme_dev *dev, int qid) spin_unlock_irq(&nvmeq->q_lock); } +static int nvme_cmb_qdepth(struct nvme_dev *dev, int nr_io_queues, + int entry_size) +{ + int q_depth = dev->q_depth; + unsigned q_size_aligned = roundup(q_depth * entry_size, dev->page_size); + + if (q_size_aligned * nr_io_queues > dev->cmb_size) { + q_depth = rounddown(dev->cmb_size / nr_io_queues, + dev->page_size) / entry_size; + + /* + * Ensure the reduced q_depth is above some threshold where it + * would be better to map queues in system memory with the + * original depth + */ + if (q_depth < 64) + return -ENOMEM; + } + + return q_depth; +} + +static int nvme_alloc_sq_cmds(struct nvme_dev *dev, struct nvme_queue *nvmeq, + int qid, int depth) +{ + if (qid && dev->cmb && use_cmb_sqes && NVME_CMB_SQS(dev->cmbsz)) { + unsigned offset = (qid - 1) * + roundup(SQ_SIZE(depth), dev->page_size); + nvmeq->sq_dma_addr = dev->cmb_dma_addr + offset; + nvmeq->sq_cmds_io = dev->cmb + offset; + } else { + nvmeq->sq_cmds = dma_alloc_coherent(dev->dev, SQ_SIZE(depth), + &nvmeq->sq_dma_addr, GFP_KERNEL); + if (!nvmeq->sq_cmds) + return -ENOMEM; + } + + return 0; +} + static struct nvme_queue *nvme_alloc_queue(struct nvme_dev *dev, int qid, int depth) { @@ -1449,9 +1499,7 @@ static struct nvme_queue *nvme_alloc_queue(struct nvme_dev *dev, int qid, if (!nvmeq->cqes) goto free_nvmeq; - nvmeq->sq_cmds = dma_alloc_coherent(dev->dev, SQ_SIZE(depth), - &nvmeq->sq_dma_addr, GFP_KERNEL); - if (!nvmeq->sq_cmds) + if (nvme_alloc_sq_cmds(dev, nvmeq, qid, depth)) goto free_cqdma; nvmeq->q_dmadev = dev->dev; @@ -2149,6 +2197,58 @@ static int set_queue_count(struct nvme_dev *dev, int count) return min(result & 0xffff, result >> 16) + 1; } +static void __iomem *nvme_map_cmb(struct nvme_dev *dev) +{ + u64 szu, size, offset; + u32 cmbloc; + resource_size_t bar_size; + struct pci_dev *pdev = to_pci_dev(dev->dev); + void __iomem *cmb; + dma_addr_t dma_addr; + + if (!use_cmb_sqes) + return NULL; + + dev->cmbsz = readl(&dev->bar->cmbsz); + if (!(NVME_CMB_SZ(dev->cmbsz))) + return NULL; + + cmbloc = readl(&dev->bar->cmbloc); + + szu = (u64)1 << (12 + 4 * NVME_CMB_SZU(dev->cmbsz)); + size = szu * NVME_CMB_SZ(dev->cmbsz); + offset = szu * NVME_CMB_OFST(cmbloc); + bar_size = pci_resource_len(pdev, NVME_CMB_BIR(cmbloc)); + + if (offset > bar_size) + return NULL; + + /* + * Controllers may support a CMB size larger than their BAR, + * for example, due to being behind a bridge. Reduce the CMB to + * the reported size of the BAR + */ + if (size > bar_size - offset) + size = bar_size - offset; + + dma_addr = pci_resource_start(pdev, NVME_CMB_BIR(cmbloc)) + offset; + cmb = ioremap_wc(dma_addr, size); + if (!cmb) + return NULL; + + dev->cmb_dma_addr = dma_addr; + dev->cmb_size = size; + return cmb; +} + +static inline void nvme_release_cmb(struct nvme_dev *dev) +{ + if (dev->cmb) { + iounmap(dev->cmb); + dev->cmb = NULL; + } +} + static size_t db_bar_size(struct nvme_dev *dev, unsigned nr_io_queues) { return 4096 + ((nr_io_queues + 1) * 8 * dev->db_stride); @@ -2167,6 +2267,15 @@ static int nvme_setup_io_queues(struct nvme_dev *dev) if (result < nr_io_queues) nr_io_queues = result; + if (dev->cmb && NVME_CMB_SQS(dev->cmbsz)) { + result = nvme_cmb_qdepth(dev, nr_io_queues, + sizeof(struct nvme_command)); + if (result > 0) + dev->q_depth = result; + else + nvme_release_cmb(dev); + } + size = db_bar_size(dev, nr_io_queues); if (size > 8192) { iounmap(dev->bar); @@ -2430,6 +2539,8 @@ static int nvme_dev_map(struct nvme_dev *dev) dev->q_depth = min_t(int, NVME_CAP_MQES(cap) + 1, NVME_Q_DEPTH); dev->db_stride = 1 << NVME_CAP_STRIDE(cap); dev->dbs = ((void __iomem *)dev->bar) + 4096; + if (readl(&dev->bar->vs) >= NVME_VS(1, 2)) + dev->cmb = nvme_map_cmb(dev); return 0; @@ -3135,6 +3246,7 @@ static void nvme_remove(struct pci_dev *pdev) nvme_dev_remove_admin(dev); device_destroy(nvme_class, MKDEV(nvme_char_major, dev->instance)); nvme_free_queues(dev, 0); + nvme_release_cmb(dev); nvme_release_prp_pools(dev); kref_put(&dev->kref, nvme_free_dev); } diff --git a/include/linux/nvme.h b/include/linux/nvme.h index c0d94ed8ce9a..fa3fe160c6cb 100644 --- a/include/linux/nvme.h +++ b/include/linux/nvme.h @@ -32,6 +32,8 @@ struct nvme_bar { __u32 aqa; /* Admin Queue Attributes */ __u64 asq; /* Admin SQ Base Address */ __u64 acq; /* Admin CQ Base Address */ + __u32 cmbloc; /* Controller Memory Buffer Location */ + __u32 cmbsz; /* Controller Memory Buffer Size */ }; #define NVME_CAP_MQES(cap) ((cap) & 0xffff) @@ -40,6 +42,17 @@ struct nvme_bar { #define NVME_CAP_MPSMIN(cap) (((cap) >> 48) & 0xf) #define NVME_CAP_MPSMAX(cap) (((cap) >> 52) & 0xf) +#define NVME_CMB_BIR(cmbloc) ((cmbloc) & 0x7) +#define NVME_CMB_OFST(cmbloc) (((cmbloc) >> 12) & 0xfffff) +#define NVME_CMB_SZ(cmbsz) (((cmbsz) >> 12) & 0xfffff) +#define NVME_CMB_SZU(cmbsz) (((cmbsz) >> 8) & 0xf) + +#define NVME_CMB_WDS(cmbsz) ((cmbsz) & 0x10) +#define NVME_CMB_RDS(cmbsz) ((cmbsz) & 0x8) +#define NVME_CMB_LISTS(cmbsz) ((cmbsz) & 0x4) +#define NVME_CMB_CQS(cmbsz) ((cmbsz) & 0x2) +#define NVME_CMB_SQS(cmbsz) ((cmbsz) & 0x1) + enum { NVME_CC_ENABLE = 1 << 0, NVME_CC_CSS_NVM = 0 << 4, @@ -100,6 +113,10 @@ struct nvme_dev { u32 max_hw_sectors; u32 stripe_size; u32 page_size; + void __iomem *cmb; + dma_addr_t cmb_dma_addr; + u64 cmb_size; + u32 cmbsz; u16 oncs; u16 abort_limit; u8 event_limit; -- cgit v1.2.3-70-g09d2 From 499a24256862714539e902c0499b67da2bb3ab72 Mon Sep 17 00:00:00 2001 From: Roopa Prabhu Date: Tue, 21 Jul 2015 10:43:46 +0200 Subject: lwtunnel: infrastructure for handling light weight tunnels like mpls Provides infrastructure to parse/dump/store encap information for light weight tunnels like mpls. Encap information for such tunnels is associated with fib routes. This infrastructure is based on previous suggestions from Eric Biederman to follow the xfrm infrastructure. Signed-off-by: Roopa Prabhu Signed-off-by: David S. Miller --- include/linux/lwtunnel.h | 6 ++ include/net/lwtunnel.h | 132 +++++++++++++++++++++++++++++++ include/uapi/linux/lwtunnel.h | 15 ++++ net/Kconfig | 7 ++ net/core/Makefile | 1 + net/core/lwtunnel.c | 179 ++++++++++++++++++++++++++++++++++++++++++ 6 files changed, 340 insertions(+) create mode 100644 include/linux/lwtunnel.h create mode 100644 include/net/lwtunnel.h create mode 100644 include/uapi/linux/lwtunnel.h create mode 100644 net/core/lwtunnel.c (limited to 'include/linux') diff --git a/include/linux/lwtunnel.h b/include/linux/lwtunnel.h new file mode 100644 index 000000000000..97f32f8b4ae1 --- /dev/null +++ b/include/linux/lwtunnel.h @@ -0,0 +1,6 @@ +#ifndef _LINUX_LWTUNNEL_H_ +#define _LINUX_LWTUNNEL_H_ + +#include + +#endif /* _LINUX_LWTUNNEL_H_ */ diff --git a/include/net/lwtunnel.h b/include/net/lwtunnel.h new file mode 100644 index 000000000000..df24b3611ff4 --- /dev/null +++ b/include/net/lwtunnel.h @@ -0,0 +1,132 @@ +#ifndef __NET_LWTUNNEL_H +#define __NET_LWTUNNEL_H 1 + +#include +#include +#include +#include +#include + +#define LWTUNNEL_HASH_BITS 7 +#define LWTUNNEL_HASH_SIZE (1 << LWTUNNEL_HASH_BITS) + +/* lw tunnel state flags */ +#define LWTUNNEL_STATE_OUTPUT_REDIRECT 0x1 + +struct lwtunnel_state { + __u16 type; + __u16 flags; + atomic_t refcnt; + int len; + __u8 data[0]; +}; + +struct lwtunnel_encap_ops { + int (*build_state)(struct net_device *dev, struct nlattr *encap, + struct lwtunnel_state **ts); + int (*output)(struct sock *sk, struct sk_buff *skb); + int (*fill_encap)(struct sk_buff *skb, + struct lwtunnel_state *lwtstate); + int (*get_encap_size)(struct lwtunnel_state *lwtstate); + int (*cmp_encap)(struct lwtunnel_state *a, struct lwtunnel_state *b); +}; + +extern const struct lwtunnel_encap_ops __rcu * + lwtun_encaps[LWTUNNEL_ENCAP_MAX+1]; + +#ifdef CONFIG_LWTUNNEL +static inline void lwtunnel_state_get(struct lwtunnel_state *lws) +{ + atomic_inc(&lws->refcnt); +} + +static inline void lwtunnel_state_put(struct lwtunnel_state *lws) +{ + if (!lws) + return; + + if (atomic_dec_and_test(&lws->refcnt)) + kfree(lws); +} + +static inline bool lwtunnel_output_redirect(struct lwtunnel_state *lwtstate) +{ + if (lwtstate && (lwtstate->flags & LWTUNNEL_STATE_OUTPUT_REDIRECT)) + return true; + + return false; +} + +int lwtunnel_encap_add_ops(const struct lwtunnel_encap_ops *op, + unsigned int num); +int lwtunnel_encap_del_ops(const struct lwtunnel_encap_ops *op, + unsigned int num); +int lwtunnel_build_state(struct net_device *dev, u16 encap_type, + struct nlattr *encap, + struct lwtunnel_state **lws); +int lwtunnel_fill_encap(struct sk_buff *skb, + struct lwtunnel_state *lwtstate); +int lwtunnel_get_encap_size(struct lwtunnel_state *lwtstate); +struct lwtunnel_state *lwtunnel_state_alloc(int hdr_len); +int lwtunnel_cmp_encap(struct lwtunnel_state *a, struct lwtunnel_state *b); + +#else + +static inline void lwtunnel_state_get(struct lwtunnel_state *lws) +{ +} + +static inline void lwtunnel_state_put(struct lwtunnel_state *lws) +{ +} + +static inline bool lwtunnel_output_redirect(struct lwtunnel_state *lwtstate) +{ + return false; +} + +static inline int lwtunnel_encap_add_ops(const struct lwtunnel_encap_ops *op, + unsigned int num) +{ + return -EOPNOTSUPP; + +} + +static inline int lwtunnel_encap_del_ops(const struct lwtunnel_encap_ops *op, + unsigned int num) +{ + return -EOPNOTSUPP; +} + +static inline int lwtunnel_build_state(struct net_device *dev, u16 encap_type, + struct nlattr *encap, + struct lwtunnel_state **lws) +{ + return -EOPNOTSUPP; +} + +static inline int lwtunnel_fill_encap(struct sk_buff *skb, + struct lwtunnel_state *lwtstate) +{ + return 0; +} + +static inline int lwtunnel_get_encap_size(struct lwtunnel_state *lwtstate) +{ + return 0; +} + +static inline struct lwtunnel_state *lwtunnel_state_alloc(int hdr_len) +{ + return NULL; +} + +static inline int lwtunnel_cmp_encap(struct lwtunnel_state *a, + struct lwtunnel_state *b) +{ + return 0; +} + +#endif + +#endif /* __NET_LWTUNNEL_H */ diff --git a/include/uapi/linux/lwtunnel.h b/include/uapi/linux/lwtunnel.h new file mode 100644 index 000000000000..aa611d931a31 --- /dev/null +++ b/include/uapi/linux/lwtunnel.h @@ -0,0 +1,15 @@ +#ifndef _UAPI_LWTUNNEL_H_ +#define _UAPI_LWTUNNEL_H_ + +#include + +enum lwtunnel_encap_types { + LWTUNNEL_ENCAP_NONE, + LWTUNNEL_ENCAP_MPLS, + __LWTUNNEL_ENCAP_MAX, +}; + +#define LWTUNNEL_ENCAP_MAX (__LWTUNNEL_ENCAP_MAX - 1) + + +#endif /* _UAPI_LWTUNNEL_H_ */ diff --git a/net/Kconfig b/net/Kconfig index 57a7c5af3175..7021c1bf44d6 100644 --- a/net/Kconfig +++ b/net/Kconfig @@ -374,6 +374,13 @@ source "net/caif/Kconfig" source "net/ceph/Kconfig" source "net/nfc/Kconfig" +config LWTUNNEL + bool "Network light weight tunnels" + ---help--- + This feature provides an infrastructure to support light weight + tunnels like mpls. There is no netdevice associated with a light + weight tunnel endpoint. Tunnel encapsulation parameters are stored + with light weight tunnel state associated with fib routes. endif # if NET diff --git a/net/core/Makefile b/net/core/Makefile index fec0856dd6c0..086b01fbe1bd 100644 --- a/net/core/Makefile +++ b/net/core/Makefile @@ -23,3 +23,4 @@ obj-$(CONFIG_NETWORK_PHY_TIMESTAMPING) += timestamping.o obj-$(CONFIG_NET_PTP_CLASSIFY) += ptp_classifier.o obj-$(CONFIG_CGROUP_NET_PRIO) += netprio_cgroup.o obj-$(CONFIG_CGROUP_NET_CLASSID) += netclassid_cgroup.o +obj-$(CONFIG_LWTUNNEL) += lwtunnel.o diff --git a/net/core/lwtunnel.c b/net/core/lwtunnel.c new file mode 100644 index 000000000000..d7ae3a235b4b --- /dev/null +++ b/net/core/lwtunnel.c @@ -0,0 +1,179 @@ +/* + * lwtunnel Infrastructure for light weight tunnels like mpls + * + * Authors: Roopa Prabhu, + * + * This program is free software; you can redistribute it and/or + * modify it under the terms of the GNU General Public License + * as published by the Free Software Foundation; either version + * 2 of the License, or (at your option) any later version. + * + */ + +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include + +#include +#include + +struct lwtunnel_state *lwtunnel_state_alloc(int encap_len) +{ + struct lwtunnel_state *lws; + + lws = kzalloc(sizeof(*lws) + encap_len, GFP_ATOMIC); + + return lws; +} +EXPORT_SYMBOL(lwtunnel_state_alloc); + +const struct lwtunnel_encap_ops __rcu * + lwtun_encaps[LWTUNNEL_ENCAP_MAX + 1] __read_mostly; + +int lwtunnel_encap_add_ops(const struct lwtunnel_encap_ops *ops, + unsigned int num) +{ + if (num > LWTUNNEL_ENCAP_MAX) + return -ERANGE; + + return !cmpxchg((const struct lwtunnel_encap_ops **) + &lwtun_encaps[num], + NULL, ops) ? 0 : -1; +} +EXPORT_SYMBOL(lwtunnel_encap_add_ops); + +int lwtunnel_encap_del_ops(const struct lwtunnel_encap_ops *ops, + unsigned int encap_type) +{ + int ret; + + if (encap_type == LWTUNNEL_ENCAP_NONE || + encap_type > LWTUNNEL_ENCAP_MAX) + return -ERANGE; + + ret = (cmpxchg((const struct lwtunnel_encap_ops **) + &lwtun_encaps[encap_type], + ops, NULL) == ops) ? 0 : -1; + + synchronize_net(); + + return ret; +} +EXPORT_SYMBOL(lwtunnel_encap_del_ops); + +int lwtunnel_build_state(struct net_device *dev, u16 encap_type, + struct nlattr *encap, struct lwtunnel_state **lws) +{ + const struct lwtunnel_encap_ops *ops; + int ret = -EINVAL; + + if (encap_type == LWTUNNEL_ENCAP_NONE || + encap_type > LWTUNNEL_ENCAP_MAX) + return ret; + + ret = -EOPNOTSUPP; + rcu_read_lock(); + ops = rcu_dereference(lwtun_encaps[encap_type]); + if (likely(ops && ops->build_state)) + ret = ops->build_state(dev, encap, lws); + rcu_read_unlock(); + + return ret; +} +EXPORT_SYMBOL(lwtunnel_build_state); + +int lwtunnel_fill_encap(struct sk_buff *skb, struct lwtunnel_state *lwtstate) +{ + const struct lwtunnel_encap_ops *ops; + struct nlattr *nest; + int ret = -EINVAL; + + if (!lwtstate) + return 0; + + if (lwtstate->type == LWTUNNEL_ENCAP_NONE || + lwtstate->type > LWTUNNEL_ENCAP_MAX) + return 0; + + ret = -EOPNOTSUPP; + nest = nla_nest_start(skb, RTA_ENCAP); + rcu_read_lock(); + ops = rcu_dereference(lwtun_encaps[lwtstate->type]); + if (likely(ops && ops->fill_encap)) + ret = ops->fill_encap(skb, lwtstate); + rcu_read_unlock(); + + if (ret) + goto nla_put_failure; + nla_nest_end(skb, nest); + ret = nla_put_u16(skb, RTA_ENCAP_TYPE, lwtstate->type); + if (ret) + goto nla_put_failure; + + return 0; + +nla_put_failure: + nla_nest_cancel(skb, nest); + + return (ret == -EOPNOTSUPP ? 0 : ret); +} +EXPORT_SYMBOL(lwtunnel_fill_encap); + +int lwtunnel_get_encap_size(struct lwtunnel_state *lwtstate) +{ + const struct lwtunnel_encap_ops *ops; + int ret = 0; + + if (!lwtstate) + return 0; + + if (lwtstate->type == LWTUNNEL_ENCAP_NONE || + lwtstate->type > LWTUNNEL_ENCAP_MAX) + return 0; + + rcu_read_lock(); + ops = rcu_dereference(lwtun_encaps[lwtstate->type]); + if (likely(ops && ops->get_encap_size)) + ret = nla_total_size(ops->get_encap_size(lwtstate)); + rcu_read_unlock(); + + return ret; +} +EXPORT_SYMBOL(lwtunnel_get_encap_size); + +int lwtunnel_cmp_encap(struct lwtunnel_state *a, struct lwtunnel_state *b) +{ + const struct lwtunnel_encap_ops *ops; + int ret = 0; + + if (!a && !b) + return 0; + + if (!a || !b) + return 1; + + if (a->type != b->type) + return 1; + + if (a->type == LWTUNNEL_ENCAP_NONE || + a->type > LWTUNNEL_ENCAP_MAX) + return 0; + + rcu_read_lock(); + ops = rcu_dereference(lwtun_encaps[a->type]); + if (likely(ops && ops->cmp_encap)) + ret = ops->cmp_encap(a, b); + rcu_read_unlock(); + + return ret; +} +EXPORT_SYMBOL(lwtunnel_cmp_encap); -- cgit v1.2.3-70-g09d2 From e3e4712ec0961ed586a8db340bd994c4ad7f5dba Mon Sep 17 00:00:00 2001 From: Roopa Prabhu Date: Tue, 21 Jul 2015 10:43:53 +0200 Subject: mpls: ip tunnel support This implementation uses lwtunnel infrastructure to register hooks for mpls tunnel encaps. It picks cues from iptunnel_encaps infrastructure and previous mpls iptunnel RFC patches from Eric W. Biederman and Robert Shearman Signed-off-by: Roopa Prabhu Signed-off-by: David S. Miller --- include/linux/mpls_iptunnel.h | 6 + include/net/mpls_iptunnel.h | 29 +++++ include/uapi/linux/mpls_iptunnel.h | 28 +++++ net/mpls/Kconfig | 8 +- net/mpls/Makefile | 1 + net/mpls/mpls_iptunnel.c | 233 +++++++++++++++++++++++++++++++++++++ 6 files changed, 304 insertions(+), 1 deletion(-) create mode 100644 include/linux/mpls_iptunnel.h create mode 100644 include/net/mpls_iptunnel.h create mode 100644 include/uapi/linux/mpls_iptunnel.h create mode 100644 net/mpls/mpls_iptunnel.c (limited to 'include/linux') diff --git a/include/linux/mpls_iptunnel.h b/include/linux/mpls_iptunnel.h new file mode 100644 index 000000000000..ef29eb2d6dfd --- /dev/null +++ b/include/linux/mpls_iptunnel.h @@ -0,0 +1,6 @@ +#ifndef _LINUX_MPLS_IPTUNNEL_H +#define _LINUX_MPLS_IPTUNNEL_H + +#include + +#endif /* _LINUX_MPLS_IPTUNNEL_H */ diff --git a/include/net/mpls_iptunnel.h b/include/net/mpls_iptunnel.h new file mode 100644 index 000000000000..4757997f76ed --- /dev/null +++ b/include/net/mpls_iptunnel.h @@ -0,0 +1,29 @@ +/* + * Copyright (c) 2015 Cumulus Networks, Inc. + * + * This program is free software; you can redistribute it and/or + * modify it under the terms of version 2 of the GNU General Public + * License as published by the Free Software Foundation. + * + * This program is distributed in the hope that it will be useful, but + * WITHOUT ANY WARRANTY; without even the implied warranty of + * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU + * General Public License for more details. + */ + +#ifndef _NET_MPLS_IPTUNNEL_H +#define _NET_MPLS_IPTUNNEL_H 1 + +#define MAX_NEW_LABELS 2 + +struct mpls_iptunnel_encap { + u32 label[MAX_NEW_LABELS]; + u32 labels; +}; + +static inline struct mpls_iptunnel_encap *mpls_lwtunnel_encap(struct lwtunnel_state *lwtstate) +{ + return (struct mpls_iptunnel_encap *)lwtstate->data; +} + +#endif diff --git a/include/uapi/linux/mpls_iptunnel.h b/include/uapi/linux/mpls_iptunnel.h new file mode 100644 index 000000000000..d80a0498f77e --- /dev/null +++ b/include/uapi/linux/mpls_iptunnel.h @@ -0,0 +1,28 @@ +/* + * mpls tunnel api + * + * Authors: + * Roopa Prabhu + * + * This program is free software; you can redistribute it and/or + * modify it under the terms of the GNU General Public License + * as published by the Free Software Foundation; either version + * 2 of the License, or (at your option) any later version. + */ + +#ifndef _UAPI_LINUX_MPLS_IPTUNNEL_H +#define _UAPI_LINUX_MPLS_IPTUNNEL_H + +/* MPLS tunnel attributes + * [RTA_ENCAP] = { + * [MPLS_IPTUNNEL_DST] + * } + */ +enum { + MPLS_IPTUNNEL_UNSPEC, + MPLS_IPTUNNEL_DST, + __MPLS_IPTUNNEL_MAX, +}; +#define MPLS_IPTUNNEL_MAX (__MPLS_IPTUNNEL_MAX - 1) + +#endif /* _UAPI_LINUX_MPLS_IPTUNNEL_H */ diff --git a/net/mpls/Kconfig b/net/mpls/Kconfig index 17bde799c854..5c467ef97311 100644 --- a/net/mpls/Kconfig +++ b/net/mpls/Kconfig @@ -24,7 +24,13 @@ config NET_MPLS_GSO config MPLS_ROUTING tristate "MPLS: routing support" - help + ---help--- Add support for forwarding of mpls packets. +config MPLS_IPTUNNEL + tristate "MPLS: IP over MPLS tunnel support" + depends on LWTUNNEL && MPLS_ROUTING + ---help--- + mpls ip tunnel support. + endif # MPLS diff --git a/net/mpls/Makefile b/net/mpls/Makefile index 65bbe68c72e6..9ca923625016 100644 --- a/net/mpls/Makefile +++ b/net/mpls/Makefile @@ -3,5 +3,6 @@ # obj-$(CONFIG_NET_MPLS_GSO) += mpls_gso.o obj-$(CONFIG_MPLS_ROUTING) += mpls_router.o +obj-$(CONFIG_MPLS_IPTUNNEL) += mpls_iptunnel.o mpls_router-y := af_mpls.o diff --git a/net/mpls/mpls_iptunnel.c b/net/mpls/mpls_iptunnel.c new file mode 100644 index 000000000000..eea096f21ba5 --- /dev/null +++ b/net/mpls/mpls_iptunnel.c @@ -0,0 +1,233 @@ +/* + * mpls tunnels An implementation mpls tunnels using the light weight tunnel + * infrastructure + * + * Authors: Roopa Prabhu, + * + * This program is free software; you can redistribute it and/or + * modify it under the terms of the GNU General Public License + * as published by the Free Software Foundation; either version + * 2 of the License, or (at your option) any later version. + * + */ +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include "internal.h" + +static const struct nla_policy mpls_iptunnel_policy[MPLS_IPTUNNEL_MAX + 1] = { + [MPLS_IPTUNNEL_DST] = { .type = NLA_U32 }, +}; + +static unsigned int mpls_encap_size(struct mpls_iptunnel_encap *en) +{ + /* The size of the layer 2.5 labels to be added for this route */ + return en->labels * sizeof(struct mpls_shim_hdr); +} + +int mpls_output(struct sock *sk, struct sk_buff *skb) +{ + struct mpls_iptunnel_encap *tun_encap_info; + struct mpls_shim_hdr *hdr; + struct net_device *out_dev; + unsigned int hh_len; + unsigned int new_header_size; + unsigned int mtu; + struct dst_entry *dst = skb_dst(skb); + struct rtable *rt = NULL; + struct rt6_info *rt6 = NULL; + struct lwtunnel_state *lwtstate = NULL; + int err = 0; + bool bos; + int i; + unsigned int ttl; + + /* Obtain the ttl */ + if (skb->protocol == htons(ETH_P_IP)) { + ttl = ip_hdr(skb)->ttl; + rt = (struct rtable *)dst; + lwtstate = rt->rt_lwtstate; + } else if (skb->protocol == htons(ETH_P_IPV6)) { + ttl = ipv6_hdr(skb)->hop_limit; + rt6 = (struct rt6_info *)dst; + lwtstate = rt6->rt6i_lwtstate; + } else { + goto drop; + } + + skb_orphan(skb); + + /* Find the output device */ + out_dev = rcu_dereference(dst->dev); + if (!mpls_output_possible(out_dev) || + !lwtstate || skb_warn_if_lro(skb)) + goto drop; + + skb_forward_csum(skb); + + tun_encap_info = mpls_lwtunnel_encap(lwtstate); + + /* Verify the destination can hold the packet */ + new_header_size = mpls_encap_size(tun_encap_info); + mtu = mpls_dev_mtu(out_dev); + if (mpls_pkt_too_big(skb, mtu - new_header_size)) + goto drop; + + hh_len = LL_RESERVED_SPACE(out_dev); + if (!out_dev->header_ops) + hh_len = 0; + + /* Ensure there is enough space for the headers in the skb */ + if (skb_cow(skb, hh_len + new_header_size)) + goto drop; + + skb_push(skb, new_header_size); + skb_reset_network_header(skb); + + skb->dev = out_dev; + skb->protocol = htons(ETH_P_MPLS_UC); + + /* Push the new labels */ + hdr = mpls_hdr(skb); + bos = true; + for (i = tun_encap_info->labels - 1; i >= 0; i--) { + hdr[i] = mpls_entry_encode(tun_encap_info->label[i], + ttl, 0, bos); + bos = false; + } + + if (rt) + err = neigh_xmit(NEIGH_ARP_TABLE, out_dev, &rt->rt_gateway, + skb); + else if (rt6) + err = neigh_xmit(NEIGH_ND_TABLE, out_dev, &rt6->rt6i_gateway, + skb); + if (err) + net_dbg_ratelimited("%s: packet transmission failed: %d\n", + __func__, err); + + return 0; + +drop: + kfree_skb(skb); + return -EINVAL; +} + +static int mpls_build_state(struct net_device *dev, struct nlattr *nla, + struct lwtunnel_state **ts) +{ + struct mpls_iptunnel_encap *tun_encap_info; + struct nlattr *tb[MPLS_IPTUNNEL_MAX + 1]; + struct lwtunnel_state *newts; + int tun_encap_info_len; + int ret; + + ret = nla_parse_nested(tb, MPLS_IPTUNNEL_MAX, nla, + mpls_iptunnel_policy); + if (ret < 0) + return ret; + + if (!tb[MPLS_IPTUNNEL_DST]) + return -EINVAL; + + tun_encap_info_len = sizeof(*tun_encap_info); + + newts = lwtunnel_state_alloc(tun_encap_info_len); + if (!newts) + return -ENOMEM; + + newts->len = tun_encap_info_len; + tun_encap_info = mpls_lwtunnel_encap(newts); + ret = nla_get_labels(tb[MPLS_IPTUNNEL_DST], MAX_NEW_LABELS, + &tun_encap_info->labels, tun_encap_info->label); + if (ret) + goto errout; + newts->type = LWTUNNEL_ENCAP_MPLS; + newts->flags |= LWTUNNEL_STATE_OUTPUT_REDIRECT; + + *ts = newts; + + return 0; + +errout: + kfree(newts); + *ts = NULL; + + return ret; +} + +static int mpls_fill_encap_info(struct sk_buff *skb, + struct lwtunnel_state *lwtstate) +{ + struct mpls_iptunnel_encap *tun_encap_info; + + tun_encap_info = mpls_lwtunnel_encap(lwtstate); + + if (nla_put_labels(skb, MPLS_IPTUNNEL_DST, tun_encap_info->labels, + tun_encap_info->label)) + goto nla_put_failure; + + return 0; + +nla_put_failure: + return -EMSGSIZE; +} + +static int mpls_encap_nlsize(struct lwtunnel_state *lwtstate) +{ + struct mpls_iptunnel_encap *tun_encap_info; + + tun_encap_info = mpls_lwtunnel_encap(lwtstate); + + return nla_total_size(tun_encap_info->labels * 4); +} + +static int mpls_encap_cmp(struct lwtunnel_state *a, struct lwtunnel_state *b) +{ + struct mpls_iptunnel_encap *a_hdr = mpls_lwtunnel_encap(a); + struct mpls_iptunnel_encap *b_hdr = mpls_lwtunnel_encap(b); + int l; + + if (a_hdr->labels != b_hdr->labels) + return 1; + + for (l = 0; l < MAX_NEW_LABELS; l++) + if (a_hdr->label[l] != b_hdr->label[l]) + return 1; + return 0; +} + +static const struct lwtunnel_encap_ops mpls_iptun_ops = { + .build_state = mpls_build_state, + .output = mpls_output, + .fill_encap = mpls_fill_encap_info, + .get_encap_size = mpls_encap_nlsize, + .cmp_encap = mpls_encap_cmp, +}; + +static int __init mpls_iptunnel_init(void) +{ + return lwtunnel_encap_add_ops(&mpls_iptun_ops, LWTUNNEL_ENCAP_MPLS); +} +module_init(mpls_iptunnel_init); + +static void __exit mpls_iptunnel_exit(void) +{ + lwtunnel_encap_del_ops(&mpls_iptun_ops, LWTUNNEL_ENCAP_MPLS); +} +module_exit(mpls_iptunnel_exit); + +MODULE_DESCRIPTION("MultiProtocol Label Switching IP Tunnels"); +MODULE_LICENSE("GPL v2"); -- cgit v1.2.3-70-g09d2 From ee122c79d4227f6ec642157834b6a90fcffa4382 Mon Sep 17 00:00:00 2001 From: Thomas Graf Date: Tue, 21 Jul 2015 10:43:58 +0200 Subject: vxlan: Flow based tunneling Allows putting a VXLAN device into a new flow-based mode in which skbs with a ip_tunnel_info dst metadata attached will be encapsulated according to the instructions stored in there with the VXLAN device defaults taken into consideration. Similar on the receive side, if the VXLAN_F_COLLECT_METADATA flag is set, the packet processing will populate a ip_tunnel_info struct for each packet received and attach it to the skb using the new metadata dst. The metadata structure will contain the outer header and tunnel header fields which have been stripped off. Layers further up in the stack such as routing, tc or netfitler can later match on these fields and perform forwarding. It is the responsibility of upper layers to ensure that the flag is set if the metadata is needed. The flag limits the additional cost of metadata collecting based on demand. This prepares the VXLAN device to be steered by the routing and other subsystems which allows to support encapsulation for a large number of tunnel endpoints and tunnel ids through a single net_device which improves the scalability. It also allows for OVS to leverage this mode which in turn allows for the removal of the OVS specific VXLAN code. Because the skb is currently scrubed in vxlan_rcv(), the attachment of the new dst metadata is postponed until after scrubing which requires the temporary addition of a new member to vxlan_metadata. This member is removed again in a later commit after the indirect VXLAN receive API has been removed. Signed-off-by: Thomas Graf Signed-off-by: Pravin B Shelar Signed-off-by: David S. Miller --- drivers/net/vxlan.c | 149 ++++++++++++++++++++++++++++++++++++------- include/linux/skbuff.h | 1 + include/net/dst_metadata.h | 13 ++++ include/net/ip_tunnels.h | 14 ++++ include/net/vxlan.h | 10 ++- include/uapi/linux/if_link.h | 1 + 6 files changed, 165 insertions(+), 23 deletions(-) (limited to 'include/linux') diff --git a/drivers/net/vxlan.c b/drivers/net/vxlan.c index ec86a11743fd..06c092b05a51 100644 --- a/drivers/net/vxlan.c +++ b/drivers/net/vxlan.c @@ -49,6 +49,7 @@ #include #include #endif +#include #define VXLAN_VERSION "0.1" @@ -140,6 +141,11 @@ struct vxlan_dev { static u32 vxlan_salt __read_mostly; static struct workqueue_struct *vxlan_wq; +static inline bool vxlan_collect_metadata(struct vxlan_sock *vs) +{ + return vs->flags & VXLAN_F_COLLECT_METADATA; +} + #if IS_ENABLED(CONFIG_IPV6) static inline bool vxlan_addr_equal(const union vxlan_addr *a, const union vxlan_addr *b) @@ -1164,10 +1170,13 @@ static struct vxlanhdr *vxlan_remcsum(struct sk_buff *skb, struct vxlanhdr *vh, /* Callback from net/ipv4/udp.c to receive packets */ static int vxlan_udp_encap_recv(struct sock *sk, struct sk_buff *skb) { + struct metadata_dst *tun_dst = NULL; + struct ip_tunnel_info *info; struct vxlan_sock *vs; struct vxlanhdr *vxh; u32 flags, vni; - struct vxlan_metadata md = {0}; + struct vxlan_metadata _md; + struct vxlan_metadata *md = &_md; /* Need Vxlan and inner Ethernet header to be present */ if (!pskb_may_pull(skb, VXLAN_HLEN)) @@ -1202,6 +1211,33 @@ static int vxlan_udp_encap_recv(struct sock *sk, struct sk_buff *skb) vni &= VXLAN_VNI_MASK; } + if (vxlan_collect_metadata(vs)) { + const struct iphdr *iph = ip_hdr(skb); + + tun_dst = metadata_dst_alloc(sizeof(*md), GFP_ATOMIC); + if (!tun_dst) + goto drop; + + info = &tun_dst->u.tun_info; + info->key.ipv4_src = iph->saddr; + info->key.ipv4_dst = iph->daddr; + info->key.ipv4_tos = iph->tos; + info->key.ipv4_ttl = iph->ttl; + info->key.tp_src = udp_hdr(skb)->source; + info->key.tp_dst = udp_hdr(skb)->dest; + + info->mode = IP_TUNNEL_INFO_RX; + info->key.tun_flags = TUNNEL_KEY; + info->key.tun_id = cpu_to_be64(vni >> 8); + if (udp_hdr(skb)->check != 0) + info->key.tun_flags |= TUNNEL_CSUM; + + md = ip_tunnel_info_opts(info, sizeof(*md)); + md->tun_dst = tun_dst; + } else { + memset(md, 0, sizeof(*md)); + } + /* For backwards compatibility, only allow reserved fields to be * used by VXLAN extensions if explicitly requested. */ @@ -1209,13 +1245,16 @@ static int vxlan_udp_encap_recv(struct sock *sk, struct sk_buff *skb) struct vxlanhdr_gbp *gbp; gbp = (struct vxlanhdr_gbp *)vxh; - md.gbp = ntohs(gbp->policy_id); + md->gbp = ntohs(gbp->policy_id); + + if (tun_dst) + info->key.tun_flags |= TUNNEL_VXLAN_OPT; if (gbp->dont_learn) - md.gbp |= VXLAN_GBP_DONT_LEARN; + md->gbp |= VXLAN_GBP_DONT_LEARN; if (gbp->policy_applied) - md.gbp |= VXLAN_GBP_POLICY_APPLIED; + md->gbp |= VXLAN_GBP_POLICY_APPLIED; flags &= ~VXLAN_GBP_USED_BITS; } @@ -1233,8 +1272,8 @@ static int vxlan_udp_encap_recv(struct sock *sk, struct sk_buff *skb) goto bad_flags; } - md.vni = vxh->vx_vni; - vs->rcv(vs, skb, &md); + md->vni = vxh->vx_vni; + vs->rcv(vs, skb, md); return 0; drop: @@ -1247,6 +1286,9 @@ bad_flags: ntohl(vxh->vx_flags), ntohl(vxh->vx_vni)); error: + if (tun_dst) + dst_release((struct dst_entry *)tun_dst); + /* Return non vxlan pkt */ return 1; } @@ -1263,7 +1305,12 @@ static void vxlan_rcv(struct vxlan_sock *vs, struct sk_buff *skb, int err = 0; union vxlan_addr *remote_ip; - vni = ntohl(md->vni) >> 8; + /* For flow based devices, map all packets to VNI 0 */ + if (vs->flags & VXLAN_F_FLOW_BASED) + vni = 0; + else + vni = ntohl(md->vni) >> 8; + /* Is this VNI defined? */ vxlan = vxlan_vs_find_vni(vs, vni); if (!vxlan) @@ -1292,12 +1339,19 @@ static void vxlan_rcv(struct vxlan_sock *vs, struct sk_buff *skb, #endif } + if (md->tun_dst) { + skb_dst_set(skb, (struct dst_entry *)md->tun_dst); + md->tun_dst = NULL; + } + if ((vxlan->flags & VXLAN_F_LEARN) && vxlan_snoop(skb->dev, &saddr, eth_hdr(skb)->h_source)) goto drop; skb_reset_network_header(skb); - skb->mark = md->gbp; + /* In flow-based mode, GBP is carried in dst_metadata */ + if (!(vs->flags & VXLAN_F_FLOW_BASED)) + skb->mark = md->gbp; if (oip6) err = IP6_ECN_decapsulate(oip6, skb); @@ -1330,6 +1384,9 @@ static void vxlan_rcv(struct vxlan_sock *vs, struct sk_buff *skb, return; drop: + if (md->tun_dst) + dst_release((struct dst_entry *)md->tun_dst); + /* Consume bad packet */ kfree_skb(skb); } @@ -1878,22 +1935,40 @@ static void vxlan_encap_bypass(struct sk_buff *skb, struct vxlan_dev *src_vxlan, static void vxlan_xmit_one(struct sk_buff *skb, struct net_device *dev, struct vxlan_rdst *rdst, bool did_rsc) { + struct ip_tunnel_info *info = skb_tunnel_info(skb); struct vxlan_dev *vxlan = netdev_priv(dev); struct sock *sk = vxlan->vn_sock->sock->sk; struct rtable *rt = NULL; const struct iphdr *old_iph; struct flowi4 fl4; union vxlan_addr *dst; - struct vxlan_metadata md; + union vxlan_addr remote_ip; + struct vxlan_metadata _md; + struct vxlan_metadata *md = &_md; __be16 src_port = 0, dst_port; u32 vni; __be16 df = 0; __u8 tos, ttl; int err; + u32 flags = vxlan->flags; - dst_port = rdst->remote_port ? rdst->remote_port : vxlan->dst_port; - vni = rdst->remote_vni; - dst = &rdst->remote_ip; + if (rdst) { + dst_port = rdst->remote_port ? rdst->remote_port : vxlan->dst_port; + vni = rdst->remote_vni; + dst = &rdst->remote_ip; + } else { + if (!info) { + WARN_ONCE(1, "%s: Missing encapsulation instructions\n", + dev->name); + goto drop; + } + + dst_port = info->key.tp_dst ? : vxlan->dst_port; + vni = be64_to_cpu(info->key.tun_id); + remote_ip.sin.sin_family = AF_INET; + remote_ip.sin.sin_addr.s_addr = info->key.ipv4_dst; + dst = &remote_ip; + } if (vxlan_addr_any(dst)) { if (did_rsc) { @@ -1918,8 +1993,25 @@ static void vxlan_xmit_one(struct sk_buff *skb, struct net_device *dev, vxlan->port_max, true); if (dst->sa.sa_family == AF_INET) { + if (info) { + if (info->key.tun_flags & TUNNEL_DONT_FRAGMENT) + df = htons(IP_DF); + if (info->key.tun_flags & TUNNEL_CSUM) + flags |= VXLAN_F_UDP_CSUM; + else + flags &= ~VXLAN_F_UDP_CSUM; + + ttl = info->key.ipv4_ttl; + tos = info->key.ipv4_tos; + + if (info->options_len) + md = ip_tunnel_info_opts(info, sizeof(*md)); + } else { + md->gbp = skb->mark; + } + memset(&fl4, 0, sizeof(fl4)); - fl4.flowi4_oif = rdst->remote_ifindex; + fl4.flowi4_oif = rdst ? rdst->remote_ifindex : 0; fl4.flowi4_tos = RT_TOS(tos); fl4.flowi4_mark = skb->mark; fl4.flowi4_proto = IPPROTO_UDP; @@ -1958,14 +2050,12 @@ static void vxlan_xmit_one(struct sk_buff *skb, struct net_device *dev, tos = ip_tunnel_ecn_encap(tos, old_iph, skb); ttl = ttl ? : ip4_dst_hoplimit(&rt->dst); - md.vni = htonl(vni << 8); - md.gbp = skb->mark; - + md->vni = htonl(vni << 8); err = vxlan_xmit_skb(rt, sk, skb, fl4.saddr, dst->sin.sin_addr.s_addr, tos, ttl, df, - src_port, dst_port, &md, + src_port, dst_port, md, !net_eq(vxlan->net, dev_net(vxlan->dev)), - vxlan->flags); + flags); if (err < 0) { /* skb is already freed. */ skb = NULL; @@ -1980,7 +2070,7 @@ static void vxlan_xmit_one(struct sk_buff *skb, struct net_device *dev, u32 flags; memset(&fl6, 0, sizeof(fl6)); - fl6.flowi6_oif = rdst->remote_ifindex; + fl6.flowi6_oif = rdst ? rdst->remote_ifindex : 0; fl6.daddr = dst->sin6.sin6_addr; fl6.saddr = vxlan->saddr.sin6.sin6_addr; fl6.flowi6_mark = skb->mark; @@ -2018,11 +2108,11 @@ static void vxlan_xmit_one(struct sk_buff *skb, struct net_device *dev, } ttl = ttl ? : ip6_dst_hoplimit(ndst); - md.vni = htonl(vni << 8); - md.gbp = skb->mark; + md->vni = htonl(vni << 8); + md->gbp = skb->mark; err = vxlan6_xmit_skb(ndst, sk, skb, dev, &fl6.saddr, &fl6.daddr, - 0, ttl, src_port, dst_port, &md, + 0, ttl, src_port, dst_port, md, !net_eq(vxlan->net, dev_net(vxlan->dev)), vxlan->flags); #endif @@ -2051,6 +2141,7 @@ tx_free: static netdev_tx_t vxlan_xmit(struct sk_buff *skb, struct net_device *dev) { struct vxlan_dev *vxlan = netdev_priv(dev); + const struct ip_tunnel_info *info = skb_tunnel_info(skb); struct ethhdr *eth; bool did_rsc = false; struct vxlan_rdst *rdst, *fdst = NULL; @@ -2078,6 +2169,12 @@ static netdev_tx_t vxlan_xmit(struct sk_buff *skb, struct net_device *dev) #endif } + if (vxlan->flags & VXLAN_F_FLOW_BASED && + info && info->mode == IP_TUNNEL_INFO_TX) { + vxlan_xmit_one(skb, dev, NULL, false); + return NETDEV_TX_OK; + } + f = vxlan_find_mac(vxlan, eth->h_dest); did_rsc = false; @@ -2405,6 +2502,7 @@ static const struct nla_policy vxlan_policy[IFLA_VXLAN_MAX + 1] = { [IFLA_VXLAN_RSC] = { .type = NLA_U8 }, [IFLA_VXLAN_L2MISS] = { .type = NLA_U8 }, [IFLA_VXLAN_L3MISS] = { .type = NLA_U8 }, + [IFLA_VXLAN_FLOWBASED] = { .type = NLA_U8 }, [IFLA_VXLAN_PORT] = { .type = NLA_U16 }, [IFLA_VXLAN_UDP_CSUM] = { .type = NLA_U8 }, [IFLA_VXLAN_UDP_ZERO_CSUM6_TX] = { .type = NLA_U8 }, @@ -2681,6 +2779,10 @@ static int vxlan_newlink(struct net *src_net, struct net_device *dev, if (data[IFLA_VXLAN_LIMIT]) vxlan->addrmax = nla_get_u32(data[IFLA_VXLAN_LIMIT]); + if (data[IFLA_VXLAN_FLOWBASED] && + nla_get_u8(data[IFLA_VXLAN_FLOWBASED])) + vxlan->flags |= VXLAN_F_FLOW_BASED; + if (data[IFLA_VXLAN_PORT_RANGE]) { const struct ifla_vxlan_port_range *p = nla_data(data[IFLA_VXLAN_PORT_RANGE]); @@ -2777,6 +2879,7 @@ static size_t vxlan_get_size(const struct net_device *dev) nla_total_size(sizeof(__u8)) + /* IFLA_VXLAN_RSC */ nla_total_size(sizeof(__u8)) + /* IFLA_VXLAN_L2MISS */ nla_total_size(sizeof(__u8)) + /* IFLA_VXLAN_L3MISS */ + nla_total_size(sizeof(__u8)) + /* IFLA_VXLAN_FLOWBASED */ nla_total_size(sizeof(__u32)) + /* IFLA_VXLAN_AGEING */ nla_total_size(sizeof(__u32)) + /* IFLA_VXLAN_LIMIT */ nla_total_size(sizeof(struct ifla_vxlan_port_range)) + @@ -2843,6 +2946,8 @@ static int vxlan_fill_info(struct sk_buff *skb, const struct net_device *dev) !!(vxlan->flags & VXLAN_F_L2MISS)) || nla_put_u8(skb, IFLA_VXLAN_L3MISS, !!(vxlan->flags & VXLAN_F_L3MISS)) || + nla_put_u8(skb, IFLA_VXLAN_FLOWBASED, + !!(vxlan->flags & VXLAN_F_FLOW_BASED)) || nla_put_u32(skb, IFLA_VXLAN_AGEING, vxlan->age_interval) || nla_put_u32(skb, IFLA_VXLAN_LIMIT, vxlan->addrmax) || nla_put_be16(skb, IFLA_VXLAN_PORT, vxlan->dst_port) || diff --git a/include/linux/skbuff.h b/include/linux/skbuff.h index 6bd96fe9416a..648a2c241993 100644 --- a/include/linux/skbuff.h +++ b/include/linux/skbuff.h @@ -3469,5 +3469,6 @@ static inline unsigned int skb_gso_network_seglen(const struct sk_buff *skb) skb_network_header(skb); return hdr_len + skb_gso_transport_seglen(skb); } + #endif /* __KERNEL__ */ #endif /* _LINUX_SKBUFF_H */ diff --git a/include/net/dst_metadata.h b/include/net/dst_metadata.h index 4f7694f3c7d0..e843937fb30a 100644 --- a/include/net/dst_metadata.h +++ b/include/net/dst_metadata.h @@ -8,6 +8,9 @@ struct metadata_dst { struct dst_entry dst; size_t opts_len; + union { + struct ip_tunnel_info tun_info; + } u; }; static inline struct metadata_dst *skb_metadata_dst(struct sk_buff *skb) @@ -20,6 +23,16 @@ static inline struct metadata_dst *skb_metadata_dst(struct sk_buff *skb) return NULL; } +static inline struct ip_tunnel_info *skb_tunnel_info(struct sk_buff *skb) +{ + struct metadata_dst *md_dst = skb_metadata_dst(skb); + + if (md_dst) + return &md_dst->u.tun_info; + + return NULL; +} + static inline bool skb_valid_dst(const struct sk_buff *skb) { struct dst_entry *dst = skb_dst(skb); diff --git a/include/net/ip_tunnels.h b/include/net/ip_tunnels.h index 6b9d559ce5f5..d11530f1c1e2 100644 --- a/include/net/ip_tunnels.h +++ b/include/net/ip_tunnels.h @@ -38,10 +38,19 @@ struct ip_tunnel_key { __be16 tp_dst; } __packed __aligned(4); /* Minimize padding. */ +/* Indicates whether the tunnel info structure represents receive + * or transmit tunnel parameters. + */ +enum { + IP_TUNNEL_INFO_RX, + IP_TUNNEL_INFO_TX, +}; + struct ip_tunnel_info { struct ip_tunnel_key key; const void *options; u8 options_len; + u8 mode; }; /* 6rd prefix/relay information */ @@ -284,6 +293,11 @@ static inline void iptunnel_xmit_stats(int err, } } +static inline void *ip_tunnel_info_opts(struct ip_tunnel_info *info, size_t n) +{ + return info + 1; +} + #endif /* CONFIG_INET */ #endif /* __NET_IP_TUNNELS_H */ diff --git a/include/net/vxlan.h b/include/net/vxlan.h index 0082b5d33d7d..80a2da29e088 100644 --- a/include/net/vxlan.h +++ b/include/net/vxlan.h @@ -7,6 +7,7 @@ #include #include #include +#include #define VNI_HASH_BITS 10 #define VNI_HASH_SIZE (1< Date: Mon, 13 Jul 2015 11:40:02 -0700 Subject: PCI: Add dev_flags bit to access VPD through function 0 Add a dev_flags bit, PCI_DEV_FLAGS_VPD_REF_F0, to access VPD through function 0 to provide VPD access on other functions. This is for hardware devices that provide copies of the same VPD capability registers in multiple functions. Because the kernel expects that each function has its own registers, both the locking and the state tracking are affected by VPD accesses to different functions. On such devices for example, if a VPD write is performed on function 0, *any* later attempt to read VPD from any other function of that device will hang. This has to do with how the kernel tracks the expected value of the F bit per function. Concurrent accesses to different functions of the same device can not only hang but also corrupt both read and write VPD data. When hangs occur, typically the error message: vpd r/w failed. This is likely a firmware bug on this device. will be seen. Never set this bit on function 0 or there will be an infinite recursion. Signed-off-by: Mark Rustad Signed-off-by: Bjorn Helgaas Acked-by: Alexander Duyck CC: stable@vger.kernel.org --- drivers/pci/access.c | 61 +++++++++++++++++++++++++++++++++++++++++++++++++++- include/linux/pci.h | 2 ++ 2 files changed, 62 insertions(+), 1 deletion(-) (limited to 'include/linux') diff --git a/drivers/pci/access.c b/drivers/pci/access.c index 5465b005220c..769f7e35f1a2 100644 --- a/drivers/pci/access.c +++ b/drivers/pci/access.c @@ -439,6 +439,56 @@ static const struct pci_vpd_ops pci_vpd_pci22_ops = { .release = pci_vpd_pci22_release, }; +static ssize_t pci_vpd_f0_read(struct pci_dev *dev, loff_t pos, size_t count, + void *arg) +{ + struct pci_dev *tdev = pci_get_slot(dev->bus, PCI_SLOT(dev->devfn)); + ssize_t ret; + + if (!tdev) + return -ENODEV; + + ret = pci_read_vpd(tdev, pos, count, arg); + pci_dev_put(tdev); + return ret; +} + +static ssize_t pci_vpd_f0_write(struct pci_dev *dev, loff_t pos, size_t count, + const void *arg) +{ + struct pci_dev *tdev = pci_get_slot(dev->bus, PCI_SLOT(dev->devfn)); + ssize_t ret; + + if (!tdev) + return -ENODEV; + + ret = pci_write_vpd(tdev, pos, count, arg); + pci_dev_put(tdev); + return ret; +} + +static const struct pci_vpd_ops pci_vpd_f0_ops = { + .read = pci_vpd_f0_read, + .write = pci_vpd_f0_write, + .release = pci_vpd_pci22_release, +}; + +static int pci_vpd_f0_dev_check(struct pci_dev *dev) +{ + struct pci_dev *tdev = pci_get_slot(dev->bus, PCI_SLOT(dev->devfn)); + int ret = 0; + + if (!tdev) + return -ENODEV; + if (!tdev->vpd || !tdev->multifunction || + dev->class != tdev->class || dev->vendor != tdev->vendor || + dev->device != tdev->device) + ret = -ENODEV; + + pci_dev_put(tdev); + return ret; +} + int pci_vpd_pci22_init(struct pci_dev *dev) { struct pci_vpd_pci22 *vpd; @@ -447,12 +497,21 @@ int pci_vpd_pci22_init(struct pci_dev *dev) cap = pci_find_capability(dev, PCI_CAP_ID_VPD); if (!cap) return -ENODEV; + if (dev->dev_flags & PCI_DEV_FLAGS_VPD_REF_F0) { + int ret = pci_vpd_f0_dev_check(dev); + + if (ret) + return ret; + } vpd = kzalloc(sizeof(*vpd), GFP_ATOMIC); if (!vpd) return -ENOMEM; vpd->base.len = PCI_VPD_PCI22_SIZE; - vpd->base.ops = &pci_vpd_pci22_ops; + if (dev->dev_flags & PCI_DEV_FLAGS_VPD_REF_F0) + vpd->base.ops = &pci_vpd_f0_ops; + else + vpd->base.ops = &pci_vpd_pci22_ops; mutex_init(&vpd->lock); vpd->cap = cap; vpd->busy = false; diff --git a/include/linux/pci.h b/include/linux/pci.h index 8a0321a8fb59..8edb125db13a 100644 --- a/include/linux/pci.h +++ b/include/linux/pci.h @@ -180,6 +180,8 @@ enum pci_dev_flags { PCI_DEV_FLAGS_NO_BUS_RESET = (__force pci_dev_flags_t) (1 << 6), /* Do not use PM reset even if device advertises NoSoftRst- */ PCI_DEV_FLAGS_NO_PM_RESET = (__force pci_dev_flags_t) (1 << 7), + /* Get VPD from function 0 VPD */ + PCI_DEV_FLAGS_VPD_REF_F0 = (__force pci_dev_flags_t) (1 << 8), }; enum pci_irq_reroute_variant { -- cgit v1.2.3-70-g09d2 From 019d8817b1b064c2bacfbcf40fc68184438ad05a Mon Sep 17 00:00:00 2001 From: Alan Stern Date: Wed, 15 Jul 2015 14:40:06 +0200 Subject: PM / sleep: Allow devices without runtime PM to do direct-complete Don't unset the direct_complete flag on devices that have runtime PM disabled, if they are runtime suspended. This is needed because otherwise ancestor devices wouldn't be able to do direct_complete without adding runtime PM support to all its descendants. Also removes pm_runtime_suspended_if_enabled() because it's now unused. Signed-off-by: Tomeu Vizoso Signed-off-by: Alan Stern Signed-off-by: Rafael J. Wysocki --- Documentation/power/devices.txt | 7 +++++++ Documentation/power/runtime_pm.txt | 4 ---- drivers/base/power/main.c | 2 +- include/linux/pm_runtime.h | 6 ------ 4 files changed, 8 insertions(+), 11 deletions(-) (limited to 'include/linux') diff --git a/Documentation/power/devices.txt b/Documentation/power/devices.txt index d172bce0fd49..8ba6625fdd63 100644 --- a/Documentation/power/devices.txt +++ b/Documentation/power/devices.txt @@ -341,6 +341,13 @@ the phases are: and is entirely responsible for bringing the device back to the functional state as appropriate. + Note that this direct-complete procedure applies even if the device is + disabled for runtime PM; only the runtime-PM status matters. It follows + that if a device has system-sleep callbacks but does not support runtime + PM, then its prepare callback must never return a positive value. This + is because all devices are initially set to runtime-suspended with + runtime PM disabled. + 2. The suspend methods should quiesce the device to stop it from performing I/O. They also may save the device registers and put it into the appropriate low-power state, depending on the bus type the device is on, diff --git a/Documentation/power/runtime_pm.txt b/Documentation/power/runtime_pm.txt index e76dc0ad4d2b..0784bc3a2ab5 100644 --- a/Documentation/power/runtime_pm.txt +++ b/Documentation/power/runtime_pm.txt @@ -445,10 +445,6 @@ drivers/base/power/runtime.c and include/linux/pm_runtime.h: bool pm_runtime_status_suspended(struct device *dev); - return true if the device's runtime PM status is 'suspended' - bool pm_runtime_suspended_if_enabled(struct device *dev); - - return true if the device's runtime PM status is 'suspended' and its - 'power.disable_depth' field is equal to 1 - void pm_runtime_allow(struct device *dev); - set the power.runtime_auto flag for the device and decrease its usage counter (used by the /sys/devices/.../power/control interface to diff --git a/drivers/base/power/main.c b/drivers/base/power/main.c index 30b7bbfdc558..1710c26ba097 100644 --- a/drivers/base/power/main.c +++ b/drivers/base/power/main.c @@ -1377,7 +1377,7 @@ static int __device_suspend(struct device *dev, pm_message_t state, bool async) if (dev->power.direct_complete) { if (pm_runtime_status_suspended(dev)) { pm_runtime_disable(dev); - if (pm_runtime_suspended_if_enabled(dev)) + if (pm_runtime_status_suspended(dev)) goto Complete; pm_runtime_enable(dev); diff --git a/include/linux/pm_runtime.h b/include/linux/pm_runtime.h index 30e84d48bfea..3bdbb4189780 100644 --- a/include/linux/pm_runtime.h +++ b/include/linux/pm_runtime.h @@ -98,11 +98,6 @@ static inline bool pm_runtime_status_suspended(struct device *dev) return dev->power.runtime_status == RPM_SUSPENDED; } -static inline bool pm_runtime_suspended_if_enabled(struct device *dev) -{ - return pm_runtime_status_suspended(dev) && dev->power.disable_depth == 1; -} - static inline bool pm_runtime_enabled(struct device *dev) { return !dev->power.disable_depth; @@ -164,7 +159,6 @@ static inline void device_set_run_wake(struct device *dev, bool enable) {} static inline bool pm_runtime_suspended(struct device *dev) { return false; } static inline bool pm_runtime_active(struct device *dev) { return true; } static inline bool pm_runtime_status_suspended(struct device *dev) { return false; } -static inline bool pm_runtime_suspended_if_enabled(struct device *dev) { return false; } static inline bool pm_runtime_enabled(struct device *dev) { return false; } static inline void pm_runtime_no_callbacks(struct device *dev) {} -- cgit v1.2.3-70-g09d2 From 7d0c502040a23a5924d3021651cf5326c8694a77 Mon Sep 17 00:00:00 2001 From: Hendrik Brueckner Date: Thu, 19 Feb 2015 14:44:24 +0100 Subject: cpufeature: correctly annotate the module init function A section mismatch warning is reported if an __init annotated function is specified for module_cpu_feature_match(). Change the module_cpu_feature_match() function and annotate the generated cpu_feature_match_* function as __init. Signed-off-by: Hendrik Brueckner Signed-off-by: Martin Schwidefsky --- include/linux/cpufeature.h | 7 ++++--- 1 file changed, 4 insertions(+), 3 deletions(-) (limited to 'include/linux') diff --git a/include/linux/cpufeature.h b/include/linux/cpufeature.h index c4d4eb8ac9fe..986c06c88d81 100644 --- a/include/linux/cpufeature.h +++ b/include/linux/cpufeature.h @@ -11,6 +11,7 @@ #ifdef CONFIG_GENERIC_CPU_AUTOPROBE +#include #include #include @@ -43,16 +44,16 @@ * For a list of legal values for 'feature', please consult the file * 'asm/cpufeature.h' of your favorite architecture. */ -#define module_cpu_feature_match(x, __init) \ +#define module_cpu_feature_match(x, __initfunc) \ static struct cpu_feature const cpu_feature_match_ ## x[] = \ { { .feature = cpu_feature(x) }, { } }; \ MODULE_DEVICE_TABLE(cpu, cpu_feature_match_ ## x); \ \ -static int cpu_feature_match_ ## x ## _init(void) \ +static int __init cpu_feature_match_ ## x ## _init(void) \ { \ if (!cpu_have_feature(cpu_feature(x))) \ return -ENODEV; \ - return __init(); \ + return __initfunc(); \ } \ module_init(cpu_feature_match_ ## x ## _init) -- cgit v1.2.3-70-g09d2 From c179c9b978b90bdf9cb39f5b5716dede157f1eaf Mon Sep 17 00:00:00 2001 From: Jiang Liu Date: Thu, 9 Jul 2015 16:00:36 +0800 Subject: PCI: Add helper function msi_desc_to_pci_sysdata() Add helper function msi_desc_to_pci_sysdata() to retrieve sysdata from an MSI descriptor. To avoid pulling include/linux/pci.h into include/linux/msi.h, msi_desc_to_pci_sysdata() is implemented as a normal function instead of an inline function. Signed-off-by: Jiang Liu Reviewed-by: Yijing Wang Acked-by: Bjorn Helgaas Cc: Tony Luck Cc: linux-arm-kernel@lists.infradead.org Cc: Grant Likely Cc: Marc Zyngier Cc: Stuart Yoder Cc: Borislav Petkov Cc: Alexander Gordeev Link: http://lkml.kernel.org/r/1436428847-8886-2-git-send-email-jiang.liu@linux.intel.com Signed-off-by: Thomas Gleixner --- drivers/pci/msi.c | 8 ++++++++ include/linux/msi.h | 7 +++++++ 2 files changed, 15 insertions(+) (limited to 'include/linux') diff --git a/drivers/pci/msi.c b/drivers/pci/msi.c index 157eb8817fb8..ab4174243962 100644 --- a/drivers/pci/msi.c +++ b/drivers/pci/msi.c @@ -1137,6 +1137,14 @@ int pci_enable_msix_range(struct pci_dev *dev, struct msix_entry *entries, } EXPORT_SYMBOL(pci_enable_msix_range); +void *msi_desc_to_pci_sysdata(struct msi_desc *desc) +{ + struct pci_dev *dev = msi_desc_to_pci_dev(desc); + + return dev->bus->sysdata; +} +EXPORT_SYMBOL_GPL(msi_desc_to_pci_sysdata); + #ifdef CONFIG_PCI_MSI_IRQ_DOMAIN /** * pci_msi_domain_write_msg - Helper to write MSI message to PCI config space diff --git a/include/linux/msi.h b/include/linux/msi.h index 8ac4a68ffae2..cfbd2afeaf64 100644 --- a/include/linux/msi.h +++ b/include/linux/msi.h @@ -60,6 +60,13 @@ static inline struct pci_dev *msi_desc_to_pci_dev(struct msi_desc *desc) { return desc->dev; } + +void *msi_desc_to_pci_sysdata(struct msi_desc *desc); +#else /* CONFIG_PCI_MSI */ +static inline void *msi_desc_to_pci_sysdata(struct msi_desc *desc) +{ + return NULL; +} #endif /* CONFIG_PCI_MSI */ void __pci_read_msi_msg(struct msi_desc *entry, struct msi_msg *msg); -- cgit v1.2.3-70-g09d2 From 4a7cc831670550e6b48ef5760e7213f89935ff0d Mon Sep 17 00:00:00 2001 From: Jiang Liu Date: Thu, 9 Jul 2015 16:00:44 +0800 Subject: genirq/MSI: Move msi_list from struct pci_dev to struct device Move msi_list from struct pci_dev into struct device, so we can support non-PCI-device based generic MSI interrupts. msi_list is now conditional under CONFIG_GENERIC_MSI_IRQ, which is selected from CONFIG_PCI_MSI, so no functional change for PCI MSI users. Signed-off-by: Jiang Liu Reviewed-by: Yijing Wang Acked-by: Bjorn Helgaas Cc: Tony Luck Cc: linux-arm-kernel@lists.infradead.org Cc: Grant Likely Cc: Marc Zyngier Cc: Stuart Yoder Cc: Borislav Petkov Cc: Greg Kroah-Hartman Cc: Joe Perches Cc: Dmitry Torokhov Cc: Paul Gortmaker Cc: Luis R. Rodriguez Cc: Rafael J. Wysocki Cc: Joerg Roedel Cc: Alexander Gordeev Link: http://lkml.kernel.org/r/1436428847-8886-10-git-send-email-jiang.liu@linux.intel.com Signed-off-by: Thomas Gleixner --- drivers/base/core.c | 3 +++ drivers/pci/msi.c | 3 +-- include/linux/device.h | 4 ++++ include/linux/msi.h | 2 +- include/linux/pci.h | 1 - 5 files changed, 9 insertions(+), 4 deletions(-) (limited to 'include/linux') diff --git a/drivers/base/core.c b/drivers/base/core.c index dafae6d2f7ac..18e2a89aa138 100644 --- a/drivers/base/core.c +++ b/drivers/base/core.c @@ -662,6 +662,9 @@ void device_initialize(struct device *dev) INIT_LIST_HEAD(&dev->devres_head); device_pm_init(dev); set_dev_node(dev, -1); +#ifdef CONFIG_GENERIC_MSI_IRQ + INIT_LIST_HEAD(&dev->msi_list); +#endif } EXPORT_SYMBOL_GPL(device_initialize); diff --git a/drivers/pci/msi.c b/drivers/pci/msi.c index f0714c3fd315..4ef5021a084d 100644 --- a/drivers/pci/msi.c +++ b/drivers/pci/msi.c @@ -900,7 +900,7 @@ void pci_msi_shutdown(struct pci_dev *dev) return; BUG_ON(list_empty(dev_to_msi_list(&dev->dev))); - desc = first_msi_entry(dev); + desc = first_pci_msi_entry(dev); pci_msi_set_enable(dev, 0); pci_intx_for_msi(dev, 1); @@ -1044,7 +1044,6 @@ EXPORT_SYMBOL(pci_msi_enabled); void pci_msi_init_pci_dev(struct pci_dev *dev) { - INIT_LIST_HEAD(&dev->msi_list); } /** diff --git a/include/linux/device.h b/include/linux/device.h index 5a31bf3a4024..22227e7fe463 100644 --- a/include/linux/device.h +++ b/include/linux/device.h @@ -713,6 +713,7 @@ struct device_dma_parameters { * along with subsystem-level and driver-level callbacks. * @pins: For device pin management. * See Documentation/pinctrl.txt for details. + * @msi_list: Hosts MSI descriptors * @numa_node: NUMA node this device is close to. * @dma_mask: Dma mask (if dma'ble device). * @coherent_dma_mask: Like dma_mask, but for alloc_coherent mapping as not all @@ -776,6 +777,9 @@ struct device { #ifdef CONFIG_PINCTRL struct dev_pin_info *pins; #endif +#ifdef CONFIG_GENERIC_MSI_IRQ + struct list_head msi_list; +#endif #ifdef CONFIG_NUMA int numa_node; /* NUMA node this device is close to */ diff --git a/include/linux/msi.h b/include/linux/msi.h index cfbd2afeaf64..57fe766a14bf 100644 --- a/include/linux/msi.h +++ b/include/linux/msi.h @@ -45,7 +45,7 @@ struct msi_desc { /* Helpers to hide struct msi_desc implementation details */ #define msi_desc_to_dev(desc) (&(desc)->dev.dev) -#define dev_to_msi_list(dev) (&to_pci_dev((dev))->msi_list) +#define dev_to_msi_list(dev) (&(dev)->msi_list) #define first_msi_entry(dev) \ list_first_entry(dev_to_msi_list((dev)), struct msi_desc, list) #define for_each_msi_entry(desc, dev) \ diff --git a/include/linux/pci.h b/include/linux/pci.h index 8a0321a8fb59..fbf245f5eba7 100644 --- a/include/linux/pci.h +++ b/include/linux/pci.h @@ -366,7 +366,6 @@ struct pci_dev { struct bin_attribute *res_attr[DEVICE_COUNT_RESOURCE]; /* sysfs file for resources */ struct bin_attribute *res_attr_wc[DEVICE_COUNT_RESOURCE]; /* sysfs file for WC mapping of resources */ #ifdef CONFIG_PCI_MSI - struct list_head msi_list; const struct attribute_group **msi_irq_groups; #endif struct pci_vpd *vpd; -- cgit v1.2.3-70-g09d2 From 25a98bd4ff9355a218d2e7aa4d6e3c9bc2c27d6f Mon Sep 17 00:00:00 2001 From: Jiang Liu Date: Thu, 9 Jul 2015 16:00:45 +0800 Subject: genirq/MSI: Store 'struct device' instead of 'struct pci_dev' in struct msi_desc Store 'struct device *' instead of 'struct pci_dev *' in struct msi_desc, so struct msi_desc can be reused by non PCI based MSI drivers. Signed-off-by: Jiang Liu Reviewed-by: Yijing Wang Reviewed-by: Marc Zyngier Cc: Tony Luck Cc: linux-arm-kernel@lists.infradead.org Cc: Bjorn Helgaas Cc: Grant Likely Cc: Stuart Yoder Cc: Borislav Petkov Cc: Alexander Gordeev Link: http://lkml.kernel.org/r/1436428847-8886-11-git-send-email-jiang.liu@linux.intel.com Signed-off-by: Thomas Gleixner --- drivers/pci/msi.c | 7 ++++++- include/linux/msi.h | 11 ++++------- 2 files changed, 10 insertions(+), 8 deletions(-) (limited to 'include/linux') diff --git a/drivers/pci/msi.c b/drivers/pci/msi.c index 4ef5021a084d..897e1a4bce06 100644 --- a/drivers/pci/msi.c +++ b/drivers/pci/msi.c @@ -413,7 +413,7 @@ static struct msi_desc *alloc_msi_entry(struct pci_dev *dev) return NULL; INIT_LIST_HEAD(&desc->list); - desc->dev = dev; + desc->dev = &dev->dev; return desc; } @@ -1140,6 +1140,11 @@ int pci_enable_msix_range(struct pci_dev *dev, struct msix_entry *entries, } EXPORT_SYMBOL(pci_enable_msix_range); +struct pci_dev *msi_desc_to_pci_dev(struct msi_desc *desc) +{ + return to_pci_dev(desc->dev); +} + void *msi_desc_to_pci_sysdata(struct msi_desc *desc) { struct pci_dev *dev = msi_desc_to_pci_dev(desc); diff --git a/include/linux/msi.h b/include/linux/msi.h index 57fe766a14bf..5f77e231f515 100644 --- a/include/linux/msi.h +++ b/include/linux/msi.h @@ -14,6 +14,7 @@ extern int pci_msi_ignore_mask; /* Helper functions */ struct irq_data; struct msi_desc; +struct pci_dev; void __get_cached_msi_msg(struct msi_desc *entry, struct msi_msg *msg); void get_cached_msi_msg(unsigned int irq, struct msi_msg *msg); @@ -37,14 +38,14 @@ struct msi_desc { void __iomem *mask_base; u8 mask_pos; }; - struct pci_dev *dev; + struct device *dev; /* Last set MSI message */ struct msi_msg msg; }; /* Helpers to hide struct msi_desc implementation details */ -#define msi_desc_to_dev(desc) (&(desc)->dev.dev) +#define msi_desc_to_dev(desc) ((desc)->dev) #define dev_to_msi_list(dev) (&(dev)->msi_list) #define first_msi_entry(dev) \ list_first_entry(dev_to_msi_list((dev)), struct msi_desc, list) @@ -56,11 +57,7 @@ struct msi_desc { #define for_each_pci_msi_entry(desc, pdev) \ for_each_msi_entry((desc), &(pdev)->dev) -static inline struct pci_dev *msi_desc_to_pci_dev(struct msi_desc *desc) -{ - return desc->dev; -} - +struct pci_dev *msi_desc_to_pci_dev(struct msi_desc *desc); void *msi_desc_to_pci_sysdata(struct msi_desc *desc); #else /* CONFIG_PCI_MSI */ static inline void *msi_desc_to_pci_sysdata(struct msi_desc *desc) -- cgit v1.2.3-70-g09d2 From fc88419cfac50b05c7c1ea218b08e70c31d1b71f Mon Sep 17 00:00:00 2001 From: Jiang Liu Date: Thu, 9 Jul 2015 16:00:46 +0800 Subject: genirq/MSI: Reorginize struct msi_desc to prepare for support of generic MSI Reorganize struct msi_desc so it could be reused by other MSI drivers. We have the following layout now: struct msi_desc { /* Shared device/bus independent data */ ... union { /* PCI specific data */ struct { ... }; }; }; We need to have anonymous union and a anonymous structure for the PCI fields, otherwise we would have to change all instances using these fields. For non PCI devices we will enforce a proper namespace and a non anonymous structure. [ tglx: Added proper comments to the structure and massaged changelog ] Signed-off-by: Jiang Liu Reviewed-by: Yijing Wang Reviewed-by: Marc Zyngier Cc: Tony Luck Cc: linux-arm-kernel@lists.infradead.org Cc: Bjorn Helgaas Cc: Grant Likely Cc: Stuart Yoder Cc: Borislav Petkov Cc: Alexander Gordeev Link: http://lkml.kernel.org/r/1436428847-8886-12-git-send-email-jiang.liu@linux.intel.com Signed-off-by: Thomas Gleixner --- include/linux/msi.h | 70 ++++++++++++++++++++++++++++++++++++++--------------- 1 file changed, 50 insertions(+), 20 deletions(-) (limited to 'include/linux') diff --git a/include/linux/msi.h b/include/linux/msi.h index 5f77e231f515..518e8c4a4064 100644 --- a/include/linux/msi.h +++ b/include/linux/msi.h @@ -18,30 +18,60 @@ struct pci_dev; void __get_cached_msi_msg(struct msi_desc *entry, struct msi_msg *msg); void get_cached_msi_msg(unsigned int irq, struct msi_msg *msg); +/** + * struct msi_desc - Descriptor structure for MSI based interrupts + * @list: List head for management + * @irq: The base interrupt number + * @nvec_used: The number of vectors used + * @dev: Pointer to the device which uses this descriptor + * @msg: The last set MSI message cached for reuse + * + * @masked: [PCI MSI/X] Mask bits + * @is_msix: [PCI MSI/X] True if MSI-X + * @multiple: [PCI MSI/X] log2 num of messages allocated + * @multi_cap: [PCI MSI/X] log2 num of messages supported + * @maskbit: [PCI MSI/X] Mask-Pending bit supported? + * @is_64: [PCI MSI/X] Address size: 0=32bit 1=64bit + * @entry_nr: [PCI MSI/X] Entry which is described by this descriptor + * @default_irq:[PCI MSI/X] The default pre-assigned non-MSI irq + * @mask_pos: [PCI MSI] Mask register position + * @mask_base: [PCI MSI-X] Mask register base address + */ struct msi_desc { - struct { - __u8 is_msix : 1; - __u8 multiple: 3; /* log2 num of messages allocated */ - __u8 multi_cap : 3; /* log2 num of messages supported */ - __u8 maskbit : 1; /* mask-pending bit supported ? */ - __u8 is_64 : 1; /* Address size: 0=32bit 1=64bit */ - __u16 entry_nr; /* specific enabled entry */ - unsigned default_irq; /* default pre-assigned irq */ - } msi_attrib; - - u32 masked; /* mask bits */ - unsigned int irq; - unsigned int nvec_used; /* number of messages */ - struct list_head list; + /* Shared device/bus type independent data */ + struct list_head list; + unsigned int irq; + unsigned int nvec_used; + struct device *dev; + struct msi_msg msg; union { - void __iomem *mask_base; - u8 mask_pos; - }; - struct device *dev; + /* PCI MSI/X specific data */ + struct { + u32 masked; + struct { + __u8 is_msix : 1; + __u8 multiple : 3; + __u8 multi_cap : 3; + __u8 maskbit : 1; + __u8 is_64 : 1; + __u16 entry_nr; + unsigned default_irq; + } msi_attrib; + union { + u8 mask_pos; + void __iomem *mask_base; + }; + }; - /* Last set MSI message */ - struct msi_msg msg; + /* + * Non PCI variants add their data structure here. New + * entries need to use a named structure. We want + * proper name spaces for this. The PCI part is + * anonymous for now as it would require an immediate + * tree wide cleanup. + */ + }; }; /* Helpers to hide struct msi_desc implementation details */ -- cgit v1.2.3-70-g09d2 From aa48b6f708868ab9c22ca737f27a0da832bf7f08 Mon Sep 17 00:00:00 2001 From: Jiang Liu Date: Thu, 9 Jul 2015 16:00:47 +0800 Subject: genirq/MSI: Move alloc_msi_entry() from PCI into generic MSI code Move alloc_msi_entry() from PCI MSI code into generic MSI code, so it can be reused by other generic MSI drivers. Also introduce free_msi_entry() for completeness. Suggested-by: Stuart Yoder . Signed-off-by: Jiang Liu Reviewed-by: Marc Zyngier Reviewed-by: Yijing Wang Acked-by: Bjorn Helgaas Cc: Tony Luck Cc: linux-arm-kernel@lists.infradead.org Cc: Grant Likely Cc: Borislav Petkov Cc: Alexander Gordeev Link: http://lkml.kernel.org/r/1436428847-8886-13-git-send-email-jiang.liu@linux.intel.com Signed-off-by: Thomas Gleixner --- drivers/pci/msi.c | 16 ++-------------- include/linux/msi.h | 2 ++ kernel/irq/msi.c | 17 +++++++++++++++++ 3 files changed, 21 insertions(+), 14 deletions(-) (limited to 'include/linux') diff --git a/drivers/pci/msi.c b/drivers/pci/msi.c index 897e1a4bce06..cd4c78c193de 100644 --- a/drivers/pci/msi.c +++ b/drivers/pci/msi.c @@ -406,18 +406,6 @@ static void free_msi_irqs(struct pci_dev *dev) } } -static struct msi_desc *alloc_msi_entry(struct pci_dev *dev) -{ - struct msi_desc *desc = kzalloc(sizeof(*desc), GFP_KERNEL); - if (!desc) - return NULL; - - INIT_LIST_HEAD(&desc->list); - desc->dev = &dev->dev; - - return desc; -} - static void pci_intx_for_msi(struct pci_dev *dev, int enable) { if (!(dev->dev_flags & PCI_DEV_FLAGS_MSI_INTX_DISABLE_BUG)) @@ -572,7 +560,7 @@ static struct msi_desc *msi_setup_entry(struct pci_dev *dev, int nvec) struct msi_desc *entry; /* MSI Entry Initialization */ - entry = alloc_msi_entry(dev); + entry = alloc_msi_entry(&dev->dev); if (!entry) return NULL; @@ -700,7 +688,7 @@ static int msix_setup_entries(struct pci_dev *dev, void __iomem *base, int i; for (i = 0; i < nvec; i++) { - entry = alloc_msi_entry(dev); + entry = alloc_msi_entry(&dev->dev); if (!entry) { if (!i) iounmap(base); diff --git a/include/linux/msi.h b/include/linux/msi.h index 518e8c4a4064..f83c87e447bc 100644 --- a/include/linux/msi.h +++ b/include/linux/msi.h @@ -96,6 +96,8 @@ static inline void *msi_desc_to_pci_sysdata(struct msi_desc *desc) } #endif /* CONFIG_PCI_MSI */ +struct msi_desc *alloc_msi_entry(struct device *dev); +void free_msi_entry(struct msi_desc *entry); void __pci_read_msi_msg(struct msi_desc *entry, struct msi_msg *msg); void __pci_write_msi_msg(struct msi_desc *entry, struct msi_msg *msg); void pci_write_msi_msg(unsigned int irq, struct msi_msg *msg); diff --git a/kernel/irq/msi.c b/kernel/irq/msi.c index 7bf1f1bbb7fa..7e6512b9dc1f 100644 --- a/kernel/irq/msi.c +++ b/kernel/irq/msi.c @@ -18,6 +18,23 @@ /* Temparory solution for building, will be removed later */ #include +struct msi_desc *alloc_msi_entry(struct device *dev) +{ + struct msi_desc *desc = kzalloc(sizeof(*desc), GFP_KERNEL); + if (!desc) + return NULL; + + INIT_LIST_HEAD(&desc->list); + desc->dev = dev; + + return desc; +} + +void free_msi_entry(struct msi_desc *entry) +{ + kfree(entry); +} + void __get_cached_msi_msg(struct msi_desc *entry, struct msi_msg *msg) { *msg = entry->msg; -- cgit v1.2.3-70-g09d2 From 3985e8a3611a93bb36789f65db862e5700aab65e Mon Sep 17 00:00:00 2001 From: Erik Kline Date: Wed, 22 Jul 2015 16:38:25 +0900 Subject: ipv6: sysctl to restrict candidate source addresses Per RFC 6724, section 4, "Candidate Source Addresses": It is RECOMMENDED that the candidate source addresses be the set of unicast addresses assigned to the interface that will be used to send to the destination (the "outgoing" interface). Add a sysctl to enable this behaviour. Signed-off-by: Erik Kline Signed-off-by: David S. Miller --- Documentation/networking/ip-sysctl.txt | 7 +++++++ include/linux/ipv6.h | 1 + include/uapi/linux/ipv6.h | 1 + net/ipv6/addrconf.c | 22 +++++++++++++++++++--- 4 files changed, 28 insertions(+), 3 deletions(-) (limited to 'include/linux') diff --git a/Documentation/networking/ip-sysctl.txt b/Documentation/networking/ip-sysctl.txt index f63aeefd2c24..1a5ab21bcca5 100644 --- a/Documentation/networking/ip-sysctl.txt +++ b/Documentation/networking/ip-sysctl.txt @@ -1460,6 +1460,13 @@ router_solicitations - INTEGER routers are present. Default: 3 +use_oif_addrs_only - BOOLEAN + When enabled, the candidate source addresses for destinations + routed via this interface are restricted to the set of addresses + configured on this interface (vis. RFC 6724, section 4). + + Default: false + use_tempaddr - INTEGER Preference for Privacy Extensions (RFC3041). <= 0 : disable Privacy Extensions diff --git a/include/linux/ipv6.h b/include/linux/ipv6.h index 1319a6bb6b82..06ed637225b8 100644 --- a/include/linux/ipv6.h +++ b/include/linux/ipv6.h @@ -57,6 +57,7 @@ struct ipv6_devconf { bool initialized; struct in6_addr secret; } stable_secret; + __s32 use_oif_addrs_only; void *sysctl; }; diff --git a/include/uapi/linux/ipv6.h b/include/uapi/linux/ipv6.h index 5efa54ae567c..641a146ead7d 100644 --- a/include/uapi/linux/ipv6.h +++ b/include/uapi/linux/ipv6.h @@ -171,6 +171,7 @@ enum { DEVCONF_USE_OPTIMISTIC, DEVCONF_ACCEPT_RA_MTU, DEVCONF_STABLE_SECRET, + DEVCONF_USE_OIF_ADDRS_ONLY, DEVCONF_MAX }; diff --git a/net/ipv6/addrconf.c b/net/ipv6/addrconf.c index 32153c248959..eb0c6a3a8a00 100644 --- a/net/ipv6/addrconf.c +++ b/net/ipv6/addrconf.c @@ -211,7 +211,8 @@ static struct ipv6_devconf ipv6_devconf __read_mostly = { .accept_ra_mtu = 1, .stable_secret = { .initialized = false, - } + }, + .use_oif_addrs_only = 0, }; static struct ipv6_devconf ipv6_devconf_dflt __read_mostly = { @@ -253,6 +254,7 @@ static struct ipv6_devconf ipv6_devconf_dflt __read_mostly = { .stable_secret = { .initialized = false, }, + .use_oif_addrs_only = 0, }; /* Check if a valid qdisc is available */ @@ -1472,11 +1474,16 @@ int ipv6_dev_get_saddr(struct net *net, const struct net_device *dst_dev, * include addresses assigned to interfaces * belonging to the same site as the outgoing * interface.) + * - "It is RECOMMENDED that the candidate source addresses + * be the set of unicast addresses assigned to the + * interface that will be used to send to the destination + * (the 'outgoing' interface)." (RFC 6724) */ if (dst_dev) { + idev = __in6_dev_get(dst_dev); if ((dst_type & IPV6_ADDR_MULTICAST) || - dst.scope <= IPV6_ADDR_SCOPE_LINKLOCAL) { - idev = __in6_dev_get(dst_dev); + dst.scope <= IPV6_ADDR_SCOPE_LINKLOCAL || + (idev && idev->cnf.use_oif_addrs_only)) { use_oif_addr = true; } } @@ -4607,6 +4614,7 @@ static inline void ipv6_store_devconf(struct ipv6_devconf *cnf, array[DEVCONF_ACCEPT_RA_FROM_LOCAL] = cnf->accept_ra_from_local; array[DEVCONF_ACCEPT_RA_MTU] = cnf->accept_ra_mtu; /* we omit DEVCONF_STABLE_SECRET for now */ + array[DEVCONF_USE_OIF_ADDRS_ONLY] = cnf->use_oif_addrs_only; } static inline size_t inet6_ifla6_size(void) @@ -5605,6 +5613,14 @@ static struct addrconf_sysctl_table .mode = 0600, .proc_handler = addrconf_sysctl_stable_secret, }, + { + .procname = "use_oif_addrs_only", + .data = &ipv6_devconf.use_oif_addrs_only, + .maxlen = sizeof(int), + .mode = 0644, + .proc_handler = proc_dointvec, + + }, { /* sentinel */ } -- cgit v1.2.3-70-g09d2 From cd812599796f500b042f5464b6665755eca21137 Mon Sep 17 00:00:00 2001 From: Trond Myklebust Date: Sun, 5 Jul 2015 11:12:07 -0400 Subject: NFS: Remove the "NFS_CAP_CHANGE_ATTR" capability Setting the change attribute has been mandatory for all NFS versions, since commit 3a1556e8662c ("NFSv2/v3: Simulate the change attribute"). We should therefore not have anything be conditional on it being set/unset. Signed-off-by: Trond Myklebust --- fs/nfs/client.c | 2 +- fs/nfs/inode.c | 4 ++-- fs/nfs/nfs4proc.c | 3 --- include/linux/nfs_fs_sb.h | 2 +- 4 files changed, 4 insertions(+), 7 deletions(-) (limited to 'include/linux') diff --git a/fs/nfs/client.c b/fs/nfs/client.c index ecebb406cc1a..4a90c9bb3135 100644 --- a/fs/nfs/client.c +++ b/fs/nfs/client.c @@ -775,7 +775,7 @@ static int nfs_init_server(struct nfs_server *server, server->options = data->options; server->caps |= NFS_CAP_HARDLINKS|NFS_CAP_SYMLINKS|NFS_CAP_FILEID| NFS_CAP_MODE|NFS_CAP_NLINK|NFS_CAP_OWNER|NFS_CAP_OWNER_GROUP| - NFS_CAP_ATIME|NFS_CAP_CTIME|NFS_CAP_MTIME|NFS_CAP_CHANGE_ATTR; + NFS_CAP_ATIME|NFS_CAP_CTIME|NFS_CAP_MTIME; if (data->rsize) server->rsize = nfs_block_size(data->rsize, NULL); diff --git a/fs/nfs/inode.c b/fs/nfs/inode.c index 426e4f8207ef..0adc7d245b3d 100644 --- a/fs/nfs/inode.c +++ b/fs/nfs/inode.c @@ -442,7 +442,7 @@ nfs_fhget(struct super_block *sb, struct nfs_fh *fh, struct nfs_fattr *fattr, st nfs_set_cache_invalid(inode, NFS_INO_INVALID_ATTR); if (fattr->valid & NFS_ATTR_FATTR_CHANGE) inode->i_version = fattr->change_attr; - else if (nfs_server_capable(inode, NFS_CAP_CHANGE_ATTR)) + else nfs_set_cache_invalid(inode, NFS_INO_INVALID_ATTR | NFS_INO_REVAL_PAGECACHE); if (fattr->valid & NFS_ATTR_FATTR_SIZE) @@ -1692,7 +1692,7 @@ static int nfs_update_inode(struct inode *inode, struct nfs_fattr *fattr) nfs_force_lookup_revalidate(inode); inode->i_version = fattr->change_attr; } - } else if (server->caps & NFS_CAP_CHANGE_ATTR) + } else nfsi->cache_validity |= save_cache_validity; if (fattr->valid & NFS_ATTR_FATTR_MTIME) { diff --git a/fs/nfs/nfs4proc.c b/fs/nfs/nfs4proc.c index 9264994ec9d3..c85ffe67b5f3 100644 --- a/fs/nfs/nfs4proc.c +++ b/fs/nfs/nfs4proc.c @@ -8591,7 +8591,6 @@ static const struct nfs4_minor_version_ops nfs_v4_0_minor_ops = { .minor_version = 0, .init_caps = NFS_CAP_READDIRPLUS | NFS_CAP_ATOMIC_OPEN - | NFS_CAP_CHANGE_ATTR | NFS_CAP_POSIX_LOCK, .init_client = nfs40_init_client, .shutdown_client = nfs40_shutdown_client, @@ -8617,7 +8616,6 @@ static const struct nfs4_minor_version_ops nfs_v4_1_minor_ops = { .minor_version = 1, .init_caps = NFS_CAP_READDIRPLUS | NFS_CAP_ATOMIC_OPEN - | NFS_CAP_CHANGE_ATTR | NFS_CAP_POSIX_LOCK | NFS_CAP_STATEID_NFSV41 | NFS_CAP_ATOMIC_OPEN_V1, @@ -8640,7 +8638,6 @@ static const struct nfs4_minor_version_ops nfs_v4_2_minor_ops = { .minor_version = 2, .init_caps = NFS_CAP_READDIRPLUS | NFS_CAP_ATOMIC_OPEN - | NFS_CAP_CHANGE_ATTR | NFS_CAP_POSIX_LOCK | NFS_CAP_STATEID_NFSV41 | NFS_CAP_ATOMIC_OPEN_V1 diff --git a/include/linux/nfs_fs_sb.h b/include/linux/nfs_fs_sb.h index a2ea1491d3df..20bc8e51b161 100644 --- a/include/linux/nfs_fs_sb.h +++ b/include/linux/nfs_fs_sb.h @@ -220,7 +220,7 @@ struct nfs_server { #define NFS_CAP_SYMLINKS (1U << 2) #define NFS_CAP_ACLS (1U << 3) #define NFS_CAP_ATOMIC_OPEN (1U << 4) -#define NFS_CAP_CHANGE_ATTR (1U << 5) +/* #define NFS_CAP_CHANGE_ATTR (1U << 5) */ #define NFS_CAP_FILEID (1U << 6) #define NFS_CAP_MODE (1U << 7) #define NFS_CAP_NLINK (1U << 8) -- cgit v1.2.3-70-g09d2 From 115c48d7a5351abeadd0c8a3dc87eca3d66a6475 Mon Sep 17 00:00:00 2001 From: Trond Myklebust Date: Sun, 5 Jul 2015 12:36:34 -0400 Subject: NFS: nfs_mark_for_revalidate should always set NFS_INO_REVAL_PAGECACHE I'm not aware of any existing bugs around this, but the expectation is that nfs_mark_for_revalidate() should always force a revalidation of the cached metadata. Signed-off-by: Trond Myklebust --- include/linux/nfs_fs.h | 7 +++++-- 1 file changed, 5 insertions(+), 2 deletions(-) (limited to 'include/linux') diff --git a/include/linux/nfs_fs.h b/include/linux/nfs_fs.h index f91b5ade30c9..874b77228fb9 100644 --- a/include/linux/nfs_fs.h +++ b/include/linux/nfs_fs.h @@ -292,9 +292,12 @@ static inline void nfs_mark_for_revalidate(struct inode *inode) struct nfs_inode *nfsi = NFS_I(inode); spin_lock(&inode->i_lock); - nfsi->cache_validity |= NFS_INO_INVALID_ATTR|NFS_INO_INVALID_ACCESS; + nfsi->cache_validity |= NFS_INO_INVALID_ATTR | + NFS_INO_REVAL_PAGECACHE | + NFS_INO_INVALID_ACCESS | + NFS_INO_INVALID_ACL; if (S_ISDIR(inode->i_mode)) - nfsi->cache_validity |= NFS_INO_REVAL_PAGECACHE|NFS_INO_INVALID_DATA; + nfsi->cache_validity |= NFS_INO_INVALID_DATA; spin_unlock(&inode->i_lock); } -- cgit v1.2.3-70-g09d2 From 24560056de61d86153cecb84d04e4237437f5888 Mon Sep 17 00:00:00 2001 From: "Paul E. McKenney" Date: Sat, 30 May 2015 10:11:24 -0700 Subject: rcu: Add RCU-sched flavors of get-state and cond-sync The get_state_synchronize_rcu() and cond_synchronize_rcu() functions allow polling for grace-period completion, with an actual wait for a grace period occurring only when cond_synchronize_rcu() is called too soon after the corresponding get_state_synchronize_rcu(). However, these functions work only for vanilla RCU. This commit adds the get_state_synchronize_sched() and cond_synchronize_sched(), which provide the same capability for RCU-sched. Reported-by: Peter Zijlstra (Intel) Signed-off-by: Paul E. McKenney --- include/linux/rcutiny.h | 10 ++++++++++ include/linux/rcutree.h | 2 ++ kernel/rcu/rcutorture.c | 2 ++ kernel/rcu/tree.c | 52 +++++++++++++++++++++++++++++++++++++++++++++++++ 4 files changed, 66 insertions(+) (limited to 'include/linux') diff --git a/include/linux/rcutiny.h b/include/linux/rcutiny.h index 3df6c1ec4e25..ff968b7af3a4 100644 --- a/include/linux/rcutiny.h +++ b/include/linux/rcutiny.h @@ -37,6 +37,16 @@ static inline void cond_synchronize_rcu(unsigned long oldstate) might_sleep(); } +static inline unsigned long get_state_synchronize_sched(void) +{ + return 0; +} + +static inline void cond_synchronize_sched(unsigned long oldstate) +{ + might_sleep(); +} + static inline void rcu_barrier_bh(void) { wait_rcu_gp(call_rcu_bh); diff --git a/include/linux/rcutree.h b/include/linux/rcutree.h index 456879143f89..5abec82f325e 100644 --- a/include/linux/rcutree.h +++ b/include/linux/rcutree.h @@ -76,6 +76,8 @@ void rcu_barrier_bh(void); void rcu_barrier_sched(void); unsigned long get_state_synchronize_rcu(void); void cond_synchronize_rcu(unsigned long oldstate); +unsigned long get_state_synchronize_sched(void); +void cond_synchronize_sched(unsigned long oldstate); extern unsigned long rcutorture_testseq; extern unsigned long rcutorture_vernum; diff --git a/kernel/rcu/rcutorture.c b/kernel/rcu/rcutorture.c index 59e32684c23b..0f2cb55f0ab3 100644 --- a/kernel/rcu/rcutorture.c +++ b/kernel/rcu/rcutorture.c @@ -635,6 +635,8 @@ static struct rcu_torture_ops sched_ops = { .deferred_free = rcu_sched_torture_deferred_free, .sync = synchronize_sched, .exp_sync = synchronize_sched_expedited, + .get_state = get_state_synchronize_sched, + .cond_sync = cond_synchronize_sched, .call = call_rcu_sched, .cb_barrier = rcu_barrier_sched, .fqs = rcu_sched_force_quiescent_state, diff --git a/kernel/rcu/tree.c b/kernel/rcu/tree.c index 8b5dd8ba9495..9629298eea24 100644 --- a/kernel/rcu/tree.c +++ b/kernel/rcu/tree.c @@ -3253,6 +3253,58 @@ void cond_synchronize_rcu(unsigned long oldstate) } EXPORT_SYMBOL_GPL(cond_synchronize_rcu); +/** + * get_state_synchronize_sched - Snapshot current RCU-sched state + * + * Returns a cookie that is used by a later call to cond_synchronize_sched() + * to determine whether or not a full grace period has elapsed in the + * meantime. + */ +unsigned long get_state_synchronize_sched(void) +{ + /* + * Any prior manipulation of RCU-protected data must happen + * before the load from ->gpnum. + */ + smp_mb(); /* ^^^ */ + + /* + * Make sure this load happens before the purportedly + * time-consuming work between get_state_synchronize_sched() + * and cond_synchronize_sched(). + */ + return smp_load_acquire(&rcu_sched_state.gpnum); +} +EXPORT_SYMBOL_GPL(get_state_synchronize_sched); + +/** + * cond_synchronize_sched - Conditionally wait for an RCU-sched grace period + * + * @oldstate: return value from earlier call to get_state_synchronize_sched() + * + * If a full RCU-sched grace period has elapsed since the earlier call to + * get_state_synchronize_sched(), just return. Otherwise, invoke + * synchronize_sched() to wait for a full grace period. + * + * Yes, this function does not take counter wrap into account. But + * counter wrap is harmless. If the counter wraps, we have waited for + * more than 2 billion grace periods (and way more on a 64-bit system!), + * so waiting for one additional grace period should be just fine. + */ +void cond_synchronize_sched(unsigned long oldstate) +{ + unsigned long newstate; + + /* + * Ensure that this load happens before any RCU-destructive + * actions the caller might carry out after we return. + */ + newstate = smp_load_acquire(&rcu_sched_state.completed); + if (ULONG_CMP_GE(oldstate, newstate)) + synchronize_sched(); +} +EXPORT_SYMBOL_GPL(cond_synchronize_sched); + static int synchronize_sched_expedited_cpu_stop(void *data) { /* -- cgit v1.2.3-70-g09d2 From 155d1d12786386d21732f9bba036343ffa43847d Mon Sep 17 00:00:00 2001 From: Peter Zijlstra Date: Tue, 2 Jun 2015 17:26:48 +0200 Subject: rcu: Use WRITE_ONCE in RCU_INIT_POINTER For the paranoid amongst us GCC would be in its right to use byte stores to write our NULL value, tell it not to do that. Signed-off-by: Peter Zijlstra (Intel) Signed-off-by: Paul E. McKenney --- include/linux/rcupdate.h | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) (limited to 'include/linux') diff --git a/include/linux/rcupdate.h b/include/linux/rcupdate.h index def6d45ad61c..c63428c1ed8a 100644 --- a/include/linux/rcupdate.h +++ b/include/linux/rcupdate.h @@ -995,7 +995,7 @@ static inline notrace void rcu_read_unlock_sched_notrace(void) #define RCU_INIT_POINTER(p, v) \ do { \ rcu_dereference_sparse(p, __rcu); \ - p = RCU_INITIALIZER(v); \ + WRITE_ONCE(p, RCU_INITIALIZER(v)); \ } while (0) /** -- cgit v1.2.3-70-g09d2 From ec90a194ae2cb8b8e9fe4f6f70dd3d4dc0269b4b Mon Sep 17 00:00:00 2001 From: "Paul E. McKenney" Date: Wed, 10 Jun 2015 12:53:06 -0700 Subject: rcu: Create a synchronize_rcu_mult() There have been several requests for a primitive that waits for grace periods for several RCU flavors concurrently, so this commit creates it. This is a variadic macro, and you pass in the call_rcu() functions of the flavors of RCU that you wish to wait for. Note that you cannot pass in call_srcu() for two reasons: (1) This would result in a type mismatch and (2) You need to specify which srcu_struct you want to use. Handle this by creating a wrapper function for your SRCU domain, for example: void call_srcu_mine(struct rcu_head *head, rcu_callback_t func) { call_srcu(&ss_mine, head, func); } You can then do something like this: synchronize_rcu_mult(call_srcu_mine, call_rcu, call_rcu_sched); Signed-off-by: Paul E. McKenney --- include/linux/rcupdate.h | 35 +++++++++++++++++++++++++++++++---- include/linux/types.h | 3 +++ kernel/rcu/update.c | 37 +++++++++++++++++++++++++++---------- 3 files changed, 61 insertions(+), 14 deletions(-) (limited to 'include/linux') diff --git a/include/linux/rcupdate.h b/include/linux/rcupdate.h index c63428c1ed8a..33ec16b9c2ee 100644 --- a/include/linux/rcupdate.h +++ b/include/linux/rcupdate.h @@ -226,6 +226,37 @@ struct rcu_synchronize { }; void wakeme_after_rcu(struct rcu_head *head); +void __wait_rcu_gp(bool checktiny, int n, call_rcu_func_t *crcu_array, + struct rcu_synchronize *rs_array); + +#define _wait_rcu_gp(checktiny, ...) \ +do { \ + call_rcu_func_t __crcu_array[] = { __VA_ARGS__ }; \ + const int __n = ARRAY_SIZE(__crcu_array); \ + struct rcu_synchronize __rs_array[__n]; \ + \ + __wait_rcu_gp(checktiny, __n, __crcu_array, __rs_array); \ +} while (0) + +#define wait_rcu_gp(...) _wait_rcu_gp(false, __VA_ARGS__) + +/** + * synchronize_rcu_mult - Wait concurrently for multiple grace periods + * @...: List of call_rcu() functions for the flavors to wait on. + * + * This macro waits concurrently for multiple flavors of RCU grace periods. + * For example, synchronize_rcu_mult(call_rcu, call_rcu_bh) would wait + * on concurrent RCU and RCU-bh grace periods. Waiting on a give SRCU + * domain requires you to write a wrapper function for that SRCU domain's + * call_srcu() function, supplying the corresponding srcu_struct. + * + * If Tiny RCU, tell _wait_rcu_gp() not to bother waiting for RCU + * or RCU-bh, given that anywhere synchronize_rcu_mult() can be called + * is automatically a grace period. + */ +#define synchronize_rcu_mult(...) \ + _wait_rcu_gp(IS_ENABLED(CONFIG_TINY_RCU), __VA_ARGS__) + /** * call_rcu_tasks() - Queue an RCU for invocation task-based grace period * @head: structure to be used for queueing the RCU updates. @@ -392,10 +423,6 @@ bool __rcu_is_watching(void); * TREE_RCU and rcu_barrier_() primitives in TINY_RCU. */ -typedef void call_rcu_func_t(struct rcu_head *head, - void (*func)(struct rcu_head *head)); -void wait_rcu_gp(call_rcu_func_t crf); - #if defined(CONFIG_TREE_RCU) || defined(CONFIG_PREEMPT_RCU) #include #elif defined(CONFIG_TINY_RCU) diff --git a/include/linux/types.h b/include/linux/types.h index 8715287c3b1f..c314989d9158 100644 --- a/include/linux/types.h +++ b/include/linux/types.h @@ -212,6 +212,9 @@ struct callback_head { }; #define rcu_head callback_head +typedef void (*rcu_callback_t)(struct rcu_head *head); +typedef void (*call_rcu_func_t)(struct rcu_head *head, rcu_callback_t func); + /* clocksource cycle base type */ typedef u64 cycle_t; diff --git a/kernel/rcu/update.c b/kernel/rcu/update.c index fec5f48b8860..a0a0dd03c73a 100644 --- a/kernel/rcu/update.c +++ b/kernel/rcu/update.c @@ -318,20 +318,37 @@ void wakeme_after_rcu(struct rcu_head *head) rcu = container_of(head, struct rcu_synchronize, head); complete(&rcu->completion); } +EXPORT_SYMBOL_GPL(wakeme_after_rcu); -void wait_rcu_gp(call_rcu_func_t crf) +void __wait_rcu_gp(bool checktiny, int n, call_rcu_func_t *crcu_array, + struct rcu_synchronize *rs_array) { - struct rcu_synchronize rcu; + int i; - init_rcu_head_on_stack(&rcu.head); - init_completion(&rcu.completion); - /* Will wake me after RCU finished. */ - crf(&rcu.head, wakeme_after_rcu); - /* Wait for it. */ - wait_for_completion(&rcu.completion); - destroy_rcu_head_on_stack(&rcu.head); + /* Initialize and register callbacks for each flavor specified. */ + for (i = 0; i < n; i++) { + if (checktiny && + (crcu_array[i] == call_rcu || + crcu_array[i] == call_rcu_bh)) { + might_sleep(); + continue; + } + init_rcu_head_on_stack(&rs_array[i].head); + init_completion(&rs_array[i].completion); + (crcu_array[i])(&rs_array[i].head, wakeme_after_rcu); + } + + /* Wait for all callbacks to be invoked. */ + for (i = 0; i < n; i++) { + if (checktiny && + (crcu_array[i] == call_rcu || + crcu_array[i] == call_rcu_bh)) + continue; + wait_for_completion(&rs_array[i].completion); + destroy_rcu_head_on_stack(&rs_array[i].head); + } } -EXPORT_SYMBOL_GPL(wait_rcu_gp); +EXPORT_SYMBOL_GPL(__wait_rcu_gp); #ifdef CONFIG_DEBUG_OBJECTS_RCU_HEAD void init_rcu_head(struct rcu_head *head) -- cgit v1.2.3-70-g09d2 From f78f5b90c4ffa559e400c3919a02236101f29f3f Mon Sep 17 00:00:00 2001 From: "Paul E. McKenney" Date: Thu, 18 Jun 2015 15:50:02 -0700 Subject: rcu: Rename rcu_lockdep_assert() to RCU_LOCKDEP_WARN() This commit renames rcu_lockdep_assert() to RCU_LOCKDEP_WARN() for consistency with the WARN() series of macros. This also requires inverting the sense of the conditional, which this commit also does. Reported-by: Ingo Molnar Signed-off-by: Paul E. McKenney Reviewed-by: Ingo Molnar --- Documentation/RCU/whatisRCU.txt | 2 +- arch/x86/kernel/cpu/mcheck/mce.c | 6 ++-- arch/x86/kernel/traps.c | 2 +- drivers/base/power/opp.c | 4 +-- include/linux/fdtable.h | 4 +-- include/linux/rcupdate.h | 63 ++++++++++++++++++++++++++-------------- kernel/cgroup.c | 4 +-- kernel/pid.c | 5 ++-- kernel/rcu/srcu.c | 10 +++---- kernel/rcu/tiny.c | 8 ++--- kernel/rcu/tree.c | 28 +++++++++--------- kernel/rcu/tree_plugin.h | 8 ++--- kernel/rcu/update.c | 4 +-- kernel/sched/core.c | 8 ++--- kernel/workqueue.c | 20 ++++++------- security/device_cgroup.c | 6 ++-- 16 files changed, 101 insertions(+), 81 deletions(-) (limited to 'include/linux') diff --git a/Documentation/RCU/whatisRCU.txt b/Documentation/RCU/whatisRCU.txt index 5746b0c77f3e..adc2184009c5 100644 --- a/Documentation/RCU/whatisRCU.txt +++ b/Documentation/RCU/whatisRCU.txt @@ -883,7 +883,7 @@ All: lockdep-checked RCU-protected pointer access rcu_access_pointer rcu_dereference_raw - rcu_lockdep_assert + RCU_LOCKDEP_WARN rcu_sleep_check RCU_NONIDLE diff --git a/arch/x86/kernel/cpu/mcheck/mce.c b/arch/x86/kernel/cpu/mcheck/mce.c index df919ff103c3..3d6b5269fb2e 100644 --- a/arch/x86/kernel/cpu/mcheck/mce.c +++ b/arch/x86/kernel/cpu/mcheck/mce.c @@ -54,9 +54,9 @@ static DEFINE_MUTEX(mce_chrdev_read_mutex); #define rcu_dereference_check_mce(p) \ ({ \ - rcu_lockdep_assert(rcu_read_lock_sched_held() || \ - lockdep_is_held(&mce_chrdev_read_mutex), \ - "suspicious rcu_dereference_check_mce() usage"); \ + RCU_LOCKDEP_WARN(!rcu_read_lock_sched_held() && \ + !lockdep_is_held(&mce_chrdev_read_mutex), \ + "suspicious rcu_dereference_check_mce() usage"); \ smp_load_acquire(&(p)); \ }) diff --git a/arch/x86/kernel/traps.c b/arch/x86/kernel/traps.c index f5791927aa64..c5a5231d1d11 100644 --- a/arch/x86/kernel/traps.c +++ b/arch/x86/kernel/traps.c @@ -136,7 +136,7 @@ enum ctx_state ist_enter(struct pt_regs *regs) preempt_count_add(HARDIRQ_OFFSET); /* This code is a bit fragile. Test it. */ - rcu_lockdep_assert(rcu_is_watching(), "ist_enter didn't work"); + RCU_LOCKDEP_WARN(!rcu_is_watching(), "ist_enter didn't work"); return prev_state; } diff --git a/drivers/base/power/opp.c b/drivers/base/power/opp.c index 677fb2843553..3b188f20b43f 100644 --- a/drivers/base/power/opp.c +++ b/drivers/base/power/opp.c @@ -110,8 +110,8 @@ static DEFINE_MUTEX(dev_opp_list_lock); #define opp_rcu_lockdep_assert() \ do { \ - rcu_lockdep_assert(rcu_read_lock_held() || \ - lockdep_is_held(&dev_opp_list_lock), \ + RCU_LOCKDEP_WARN(!rcu_read_lock_held() && \ + !lockdep_is_held(&dev_opp_list_lock), \ "Missing rcu_read_lock() or " \ "dev_opp_list_lock protection"); \ } while (0) diff --git a/include/linux/fdtable.h b/include/linux/fdtable.h index fbb88740634a..674e3e226465 100644 --- a/include/linux/fdtable.h +++ b/include/linux/fdtable.h @@ -86,8 +86,8 @@ static inline struct file *__fcheck_files(struct files_struct *files, unsigned i static inline struct file *fcheck_files(struct files_struct *files, unsigned int fd) { - rcu_lockdep_assert(rcu_read_lock_held() || - lockdep_is_held(&files->file_lock), + RCU_LOCKDEP_WARN(!rcu_read_lock_held() && + !lockdep_is_held(&files->file_lock), "suspicious rcu_dereference_check() usage"); return __fcheck_files(files, fd); } diff --git a/include/linux/rcupdate.h b/include/linux/rcupdate.h index 33ec16b9c2ee..ff476515f716 100644 --- a/include/linux/rcupdate.h +++ b/include/linux/rcupdate.h @@ -536,6 +536,11 @@ static inline int rcu_read_lock_sched_held(void) #endif /* #else #ifdef CONFIG_DEBUG_LOCK_ALLOC */ +/* Deprecate rcu_lockdep_assert(): Use RCU_LOCKDEP_WARN() instead. */ +static inline void __attribute((deprecated)) deprecate_rcu_lockdep_assert(void) +{ +} + #ifdef CONFIG_PROVE_RCU /** @@ -546,17 +551,32 @@ static inline int rcu_read_lock_sched_held(void) #define rcu_lockdep_assert(c, s) \ do { \ static bool __section(.data.unlikely) __warned; \ + deprecate_rcu_lockdep_assert(); \ if (debug_lockdep_rcu_enabled() && !__warned && !(c)) { \ __warned = true; \ lockdep_rcu_suspicious(__FILE__, __LINE__, s); \ } \ } while (0) +/** + * RCU_LOCKDEP_WARN - emit lockdep splat if specified condition is met + * @c: condition to check + * @s: informative message + */ +#define RCU_LOCKDEP_WARN(c, s) \ + do { \ + static bool __section(.data.unlikely) __warned; \ + if (debug_lockdep_rcu_enabled() && !__warned && (c)) { \ + __warned = true; \ + lockdep_rcu_suspicious(__FILE__, __LINE__, s); \ + } \ + } while (0) + #if defined(CONFIG_PROVE_RCU) && !defined(CONFIG_PREEMPT_RCU) static inline void rcu_preempt_sleep_check(void) { - rcu_lockdep_assert(!lock_is_held(&rcu_lock_map), - "Illegal context switch in RCU read-side critical section"); + RCU_LOCKDEP_WARN(lock_is_held(&rcu_lock_map), + "Illegal context switch in RCU read-side critical section"); } #else /* #ifdef CONFIG_PROVE_RCU */ static inline void rcu_preempt_sleep_check(void) @@ -567,15 +587,16 @@ static inline void rcu_preempt_sleep_check(void) #define rcu_sleep_check() \ do { \ rcu_preempt_sleep_check(); \ - rcu_lockdep_assert(!lock_is_held(&rcu_bh_lock_map), \ - "Illegal context switch in RCU-bh read-side critical section"); \ - rcu_lockdep_assert(!lock_is_held(&rcu_sched_lock_map), \ - "Illegal context switch in RCU-sched read-side critical section"); \ + RCU_LOCKDEP_WARN(lock_is_held(&rcu_bh_lock_map), \ + "Illegal context switch in RCU-bh read-side critical section"); \ + RCU_LOCKDEP_WARN(lock_is_held(&rcu_sched_lock_map), \ + "Illegal context switch in RCU-sched read-side critical section"); \ } while (0) #else /* #ifdef CONFIG_PROVE_RCU */ -#define rcu_lockdep_assert(c, s) do { } while (0) +#define rcu_lockdep_assert(c, s) deprecate_rcu_lockdep_assert() +#define RCU_LOCKDEP_WARN(c, s) do { } while (0) #define rcu_sleep_check() do { } while (0) #endif /* #else #ifdef CONFIG_PROVE_RCU */ @@ -606,13 +627,13 @@ static inline void rcu_preempt_sleep_check(void) ({ \ /* Dependency order vs. p above. */ \ typeof(*p) *________p1 = (typeof(*p) *__force)lockless_dereference(p); \ - rcu_lockdep_assert(c, "suspicious rcu_dereference_check() usage"); \ + RCU_LOCKDEP_WARN(!(c), "suspicious rcu_dereference_check() usage"); \ rcu_dereference_sparse(p, space); \ ((typeof(*p) __force __kernel *)(________p1)); \ }) #define __rcu_dereference_protected(p, c, space) \ ({ \ - rcu_lockdep_assert(c, "suspicious rcu_dereference_protected() usage"); \ + RCU_LOCKDEP_WARN(!(c), "suspicious rcu_dereference_protected() usage"); \ rcu_dereference_sparse(p, space); \ ((typeof(*p) __force __kernel *)(p)); \ }) @@ -836,8 +857,8 @@ static inline void rcu_read_lock(void) __rcu_read_lock(); __acquire(RCU); rcu_lock_acquire(&rcu_lock_map); - rcu_lockdep_assert(rcu_is_watching(), - "rcu_read_lock() used illegally while idle"); + RCU_LOCKDEP_WARN(!rcu_is_watching(), + "rcu_read_lock() used illegally while idle"); } /* @@ -887,8 +908,8 @@ static inline void rcu_read_lock(void) */ static inline void rcu_read_unlock(void) { - rcu_lockdep_assert(rcu_is_watching(), - "rcu_read_unlock() used illegally while idle"); + RCU_LOCKDEP_WARN(!rcu_is_watching(), + "rcu_read_unlock() used illegally while idle"); __release(RCU); __rcu_read_unlock(); rcu_lock_release(&rcu_lock_map); /* Keep acq info for rls diags. */ @@ -916,8 +937,8 @@ static inline void rcu_read_lock_bh(void) local_bh_disable(); __acquire(RCU_BH); rcu_lock_acquire(&rcu_bh_lock_map); - rcu_lockdep_assert(rcu_is_watching(), - "rcu_read_lock_bh() used illegally while idle"); + RCU_LOCKDEP_WARN(!rcu_is_watching(), + "rcu_read_lock_bh() used illegally while idle"); } /* @@ -927,8 +948,8 @@ static inline void rcu_read_lock_bh(void) */ static inline void rcu_read_unlock_bh(void) { - rcu_lockdep_assert(rcu_is_watching(), - "rcu_read_unlock_bh() used illegally while idle"); + RCU_LOCKDEP_WARN(!rcu_is_watching(), + "rcu_read_unlock_bh() used illegally while idle"); rcu_lock_release(&rcu_bh_lock_map); __release(RCU_BH); local_bh_enable(); @@ -952,8 +973,8 @@ static inline void rcu_read_lock_sched(void) preempt_disable(); __acquire(RCU_SCHED); rcu_lock_acquire(&rcu_sched_lock_map); - rcu_lockdep_assert(rcu_is_watching(), - "rcu_read_lock_sched() used illegally while idle"); + RCU_LOCKDEP_WARN(!rcu_is_watching(), + "rcu_read_lock_sched() used illegally while idle"); } /* Used by lockdep and tracing: cannot be traced, cannot call lockdep. */ @@ -970,8 +991,8 @@ static inline notrace void rcu_read_lock_sched_notrace(void) */ static inline void rcu_read_unlock_sched(void) { - rcu_lockdep_assert(rcu_is_watching(), - "rcu_read_unlock_sched() used illegally while idle"); + RCU_LOCKDEP_WARN(!rcu_is_watching(), + "rcu_read_unlock_sched() used illegally while idle"); rcu_lock_release(&rcu_sched_lock_map); __release(RCU_SCHED); preempt_enable(); diff --git a/kernel/cgroup.c b/kernel/cgroup.c index f89d9292eee6..b89f3168411b 100644 --- a/kernel/cgroup.c +++ b/kernel/cgroup.c @@ -107,8 +107,8 @@ static DEFINE_SPINLOCK(release_agent_path_lock); struct percpu_rw_semaphore cgroup_threadgroup_rwsem; #define cgroup_assert_mutex_or_rcu_locked() \ - rcu_lockdep_assert(rcu_read_lock_held() || \ - lockdep_is_held(&cgroup_mutex), \ + RCU_LOCKDEP_WARN(!rcu_read_lock_held() && \ + !lockdep_is_held(&cgroup_mutex), \ "cgroup_mutex or RCU read lock required"); /* diff --git a/kernel/pid.c b/kernel/pid.c index 4fd07d5b7baf..ca368793808e 100644 --- a/kernel/pid.c +++ b/kernel/pid.c @@ -451,9 +451,8 @@ EXPORT_SYMBOL(pid_task); */ struct task_struct *find_task_by_pid_ns(pid_t nr, struct pid_namespace *ns) { - rcu_lockdep_assert(rcu_read_lock_held(), - "find_task_by_pid_ns() needs rcu_read_lock()" - " protection"); + RCU_LOCKDEP_WARN(!rcu_read_lock_held(), + "find_task_by_pid_ns() needs rcu_read_lock() protection"); return pid_task(find_pid_ns(nr, ns), PIDTYPE_PID); } diff --git a/kernel/rcu/srcu.c b/kernel/rcu/srcu.c index de35087c92a5..d3fcb2ec8536 100644 --- a/kernel/rcu/srcu.c +++ b/kernel/rcu/srcu.c @@ -415,11 +415,11 @@ static void __synchronize_srcu(struct srcu_struct *sp, int trycount) struct rcu_head *head = &rcu.head; bool done = false; - rcu_lockdep_assert(!lock_is_held(&sp->dep_map) && - !lock_is_held(&rcu_bh_lock_map) && - !lock_is_held(&rcu_lock_map) && - !lock_is_held(&rcu_sched_lock_map), - "Illegal synchronize_srcu() in same-type SRCU (or RCU) read-side critical section"); + RCU_LOCKDEP_WARN(lock_is_held(&sp->dep_map) || + lock_is_held(&rcu_bh_lock_map) || + lock_is_held(&rcu_lock_map) || + lock_is_held(&rcu_sched_lock_map), + "Illegal synchronize_srcu() in same-type SRCU (or in RCU) read-side critical section"); might_sleep(); init_completion(&rcu.completion); diff --git a/kernel/rcu/tiny.c b/kernel/rcu/tiny.c index c291bd65d2cb..d0471056d0af 100644 --- a/kernel/rcu/tiny.c +++ b/kernel/rcu/tiny.c @@ -191,10 +191,10 @@ static void rcu_process_callbacks(struct softirq_action *unused) */ void synchronize_sched(void) { - rcu_lockdep_assert(!lock_is_held(&rcu_bh_lock_map) && - !lock_is_held(&rcu_lock_map) && - !lock_is_held(&rcu_sched_lock_map), - "Illegal synchronize_sched() in RCU read-side critical section"); + RCU_LOCKDEP_WARN(lock_is_held(&rcu_bh_lock_map) || + lock_is_held(&rcu_lock_map) || + lock_is_held(&rcu_sched_lock_map), + "Illegal synchronize_sched() in RCU read-side critical section"); cond_resched(); } EXPORT_SYMBOL_GPL(synchronize_sched); diff --git a/kernel/rcu/tree.c b/kernel/rcu/tree.c index cb64d7e13d24..0a73d26357a2 100644 --- a/kernel/rcu/tree.c +++ b/kernel/rcu/tree.c @@ -649,12 +649,12 @@ static void rcu_eqs_enter_common(long long oldval, bool user) * It is illegal to enter an extended quiescent state while * in an RCU read-side critical section. */ - rcu_lockdep_assert(!lock_is_held(&rcu_lock_map), - "Illegal idle entry in RCU read-side critical section."); - rcu_lockdep_assert(!lock_is_held(&rcu_bh_lock_map), - "Illegal idle entry in RCU-bh read-side critical section."); - rcu_lockdep_assert(!lock_is_held(&rcu_sched_lock_map), - "Illegal idle entry in RCU-sched read-side critical section."); + RCU_LOCKDEP_WARN(lock_is_held(&rcu_lock_map), + "Illegal idle entry in RCU read-side critical section."); + RCU_LOCKDEP_WARN(lock_is_held(&rcu_bh_lock_map), + "Illegal idle entry in RCU-bh read-side critical section."); + RCU_LOCKDEP_WARN(lock_is_held(&rcu_sched_lock_map), + "Illegal idle entry in RCU-sched read-side critical section."); } /* @@ -3161,10 +3161,10 @@ static inline int rcu_blocking_is_gp(void) */ void synchronize_sched(void) { - rcu_lockdep_assert(!lock_is_held(&rcu_bh_lock_map) && - !lock_is_held(&rcu_lock_map) && - !lock_is_held(&rcu_sched_lock_map), - "Illegal synchronize_sched() in RCU-sched read-side critical section"); + RCU_LOCKDEP_WARN(lock_is_held(&rcu_bh_lock_map) || + lock_is_held(&rcu_lock_map) || + lock_is_held(&rcu_sched_lock_map), + "Illegal synchronize_sched() in RCU-sched read-side critical section"); if (rcu_blocking_is_gp()) return; if (rcu_gp_is_expedited()) @@ -3188,10 +3188,10 @@ EXPORT_SYMBOL_GPL(synchronize_sched); */ void synchronize_rcu_bh(void) { - rcu_lockdep_assert(!lock_is_held(&rcu_bh_lock_map) && - !lock_is_held(&rcu_lock_map) && - !lock_is_held(&rcu_sched_lock_map), - "Illegal synchronize_rcu_bh() in RCU-bh read-side critical section"); + RCU_LOCKDEP_WARN(lock_is_held(&rcu_bh_lock_map) || + lock_is_held(&rcu_lock_map) || + lock_is_held(&rcu_sched_lock_map), + "Illegal synchronize_rcu_bh() in RCU-bh read-side critical section"); if (rcu_blocking_is_gp()) return; if (rcu_gp_is_expedited()) diff --git a/kernel/rcu/tree_plugin.h b/kernel/rcu/tree_plugin.h index a983bc68a146..9e922f111d63 100644 --- a/kernel/rcu/tree_plugin.h +++ b/kernel/rcu/tree_plugin.h @@ -538,10 +538,10 @@ EXPORT_SYMBOL_GPL(call_rcu); */ void synchronize_rcu(void) { - rcu_lockdep_assert(!lock_is_held(&rcu_bh_lock_map) && - !lock_is_held(&rcu_lock_map) && - !lock_is_held(&rcu_sched_lock_map), - "Illegal synchronize_rcu() in RCU read-side critical section"); + RCU_LOCKDEP_WARN(lock_is_held(&rcu_bh_lock_map) || + lock_is_held(&rcu_lock_map) || + lock_is_held(&rcu_sched_lock_map), + "Illegal synchronize_rcu() in RCU read-side critical section"); if (!rcu_scheduler_active) return; if (rcu_gp_is_expedited()) diff --git a/kernel/rcu/update.c b/kernel/rcu/update.c index a0a0dd03c73a..47268fb1d27b 100644 --- a/kernel/rcu/update.c +++ b/kernel/rcu/update.c @@ -589,8 +589,8 @@ EXPORT_SYMBOL_GPL(call_rcu_tasks); void synchronize_rcu_tasks(void) { /* Complain if the scheduler has not started. */ - rcu_lockdep_assert(!rcu_scheduler_active, - "synchronize_rcu_tasks called too soon"); + RCU_LOCKDEP_WARN(rcu_scheduler_active, + "synchronize_rcu_tasks called too soon"); /* Wait for the grace period. */ wait_rcu_gp(call_rcu_tasks); diff --git a/kernel/sched/core.c b/kernel/sched/core.c index 78b4bad10081..5e73c79fadd0 100644 --- a/kernel/sched/core.c +++ b/kernel/sched/core.c @@ -2200,8 +2200,8 @@ unsigned long to_ratio(u64 period, u64 runtime) #ifdef CONFIG_SMP inline struct dl_bw *dl_bw_of(int i) { - rcu_lockdep_assert(rcu_read_lock_sched_held(), - "sched RCU must be held"); + RCU_LOCKDEP_WARN(!rcu_read_lock_sched_held(), + "sched RCU must be held"); return &cpu_rq(i)->rd->dl_bw; } @@ -2210,8 +2210,8 @@ static inline int dl_bw_cpus(int i) struct root_domain *rd = cpu_rq(i)->rd; int cpus = 0; - rcu_lockdep_assert(rcu_read_lock_sched_held(), - "sched RCU must be held"); + RCU_LOCKDEP_WARN(!rcu_read_lock_sched_held(), + "sched RCU must be held"); for_each_cpu_and(i, rd->span, cpu_active_mask) cpus++; diff --git a/kernel/workqueue.c b/kernel/workqueue.c index 4c4f06176f74..cb91c63b4f4a 100644 --- a/kernel/workqueue.c +++ b/kernel/workqueue.c @@ -338,20 +338,20 @@ static void workqueue_sysfs_unregister(struct workqueue_struct *wq); #include #define assert_rcu_or_pool_mutex() \ - rcu_lockdep_assert(rcu_read_lock_sched_held() || \ - lockdep_is_held(&wq_pool_mutex), \ - "sched RCU or wq_pool_mutex should be held") + RCU_LOCKDEP_WARN(!rcu_read_lock_sched_held() && \ + !lockdep_is_held(&wq_pool_mutex), \ + "sched RCU or wq_pool_mutex should be held") #define assert_rcu_or_wq_mutex(wq) \ - rcu_lockdep_assert(rcu_read_lock_sched_held() || \ - lockdep_is_held(&wq->mutex), \ - "sched RCU or wq->mutex should be held") + RCU_LOCKDEP_WARN(!rcu_read_lock_sched_held() && \ + !lockdep_is_held(&wq->mutex), \ + "sched RCU or wq->mutex should be held") #define assert_rcu_or_wq_mutex_or_pool_mutex(wq) \ - rcu_lockdep_assert(rcu_read_lock_sched_held() || \ - lockdep_is_held(&wq->mutex) || \ - lockdep_is_held(&wq_pool_mutex), \ - "sched RCU, wq->mutex or wq_pool_mutex should be held") + RCU_LOCKDEP_WARN(!rcu_read_lock_sched_held() && \ + !lockdep_is_held(&wq->mutex) && \ + !lockdep_is_held(&wq_pool_mutex), \ + "sched RCU, wq->mutex or wq_pool_mutex should be held") #define for_each_cpu_worker_pool(pool, cpu) \ for ((pool) = &per_cpu(cpu_worker_pools, cpu)[0]; \ diff --git a/security/device_cgroup.c b/security/device_cgroup.c index 188c1d26393b..73455089feef 100644 --- a/security/device_cgroup.c +++ b/security/device_cgroup.c @@ -400,9 +400,9 @@ static bool verify_new_ex(struct dev_cgroup *dev_cgroup, { bool match = false; - rcu_lockdep_assert(rcu_read_lock_held() || - lockdep_is_held(&devcgroup_mutex), - "device_cgroup:verify_new_ex called without proper synchronization"); + RCU_LOCKDEP_WARN(!rcu_read_lock_held() && + lockdep_is_held(&devcgroup_mutex), + "device_cgroup:verify_new_ex called without proper synchronization"); if (dev_cgroup->behavior == DEVCG_DEFAULT_ALLOW) { if (behavior == DEVCG_DEFAULT_ALLOW) { -- cgit v1.2.3-70-g09d2 From 38aa420096e565fe9c98f9d9475fd168114501a9 Mon Sep 17 00:00:00 2001 From: Nikhil Badola Date: Mon, 15 Jun 2015 15:46:37 +0530 Subject: drivers:usb:fsl: Replace macros with enumerated type Replace macros with enumerated type to represent usb ip controller version Signed-off-by: Nikhil Badola Signed-off-by: Greg Kroah-Hartman --- drivers/usb/host/fsl-mph-dr-of.c | 8 ++++---- include/linux/fsl_devices.h | 16 ++++++++++------ 2 files changed, 14 insertions(+), 10 deletions(-) (limited to 'include/linux') diff --git a/drivers/usb/host/fsl-mph-dr-of.c b/drivers/usb/host/fsl-mph-dr-of.c index 5e0d60035216..219595637cb1 100644 --- a/drivers/usb/host/fsl-mph-dr-of.c +++ b/drivers/usb/host/fsl-mph-dr-of.c @@ -119,9 +119,9 @@ error: static const struct of_device_id fsl_usb2_mph_dr_of_match[]; -static int usb_get_ver_info(struct device_node *np) +static enum fsl_usb2_controller_ver usb_get_ver_info(struct device_node *np) { - int ver = -1; + enum fsl_usb2_controller_ver ver = FSL_USB_VER_NONE; /* * returns 1 for usb controller version 1.6 @@ -142,7 +142,7 @@ static int usb_get_ver_info(struct device_node *np) else /* for previous controller versions */ ver = FSL_USB_VER_OLD; - if (ver > -1) + if (ver > FSL_USB_VER_NONE) return ver; } @@ -215,7 +215,7 @@ static int fsl_usb2_mph_dr_of_probe(struct platform_device *ofdev) pdata->controller_ver = usb_get_ver_info(np); if (pdata->have_sysif_regs) { - if (pdata->controller_ver < 0) { + if (pdata->controller_ver == FSL_USB_VER_NONE) { dev_warn(&ofdev->dev, "Could not get controller version\n"); return -ENODEV; } diff --git a/include/linux/fsl_devices.h b/include/linux/fsl_devices.h index 2a2f56b292c1..0d4855cd5330 100644 --- a/include/linux/fsl_devices.h +++ b/include/linux/fsl_devices.h @@ -20,11 +20,6 @@ #define FSL_UTMI_PHY_DLY 10 /*As per P1010RM, delay for UTMI PHY CLK to become stable - 10ms*/ #define FSL_USB_PHY_CLK_TIMEOUT 10000 /* uSec */ -#define FSL_USB_VER_OLD 0 -#define FSL_USB_VER_1_6 1 -#define FSL_USB_VER_2_2 2 -#define FSL_USB_VER_2_4 3 -#define FSL_USB_VER_2_5 4 #include @@ -52,6 +47,15 @@ * */ +enum fsl_usb2_controller_ver { + FSL_USB_VER_NONE = -1, + FSL_USB_VER_OLD = 0, + FSL_USB_VER_1_6 = 1, + FSL_USB_VER_2_2 = 2, + FSL_USB_VER_2_4 = 3, + FSL_USB_VER_2_5 = 4, +}; + enum fsl_usb2_operating_modes { FSL_USB2_MPH_HOST, FSL_USB2_DR_HOST, @@ -72,7 +76,7 @@ struct platform_device; struct fsl_usb2_platform_data { /* board specific information */ - int controller_ver; + enum fsl_usb2_controller_ver controller_ver; enum fsl_usb2_operating_modes operating_mode; enum fsl_usb2_phy_modes phy_mode; unsigned int port_enables; -- cgit v1.2.3-70-g09d2 From 523f1dec58408b36e7683a3d61a0286eed1fc1c8 Mon Sep 17 00:00:00 2001 From: Nikhil Badola Date: Mon, 15 Jun 2015 15:47:29 +0530 Subject: drivers: usb :fsl: Implement Workaround for USB Erratum A007792 USB controller version-2.5 requires to enable internal UTMI phy and program PTS field in PORTSC register before asserting controller reset. This is must for successful resetting of the controller and subsequent enumeration of usb devices Signed-off-by: Nikhil Badola Signed-off-by: Suresh Gupta Signed-off-by: Greg Kroah-Hartman --- drivers/usb/host/ehci-fsl.c | 9 +++++++++ drivers/usb/host/fsl-mph-dr-of.c | 6 ++++++ include/linux/fsl_devices.h | 1 + 3 files changed, 16 insertions(+) (limited to 'include/linux') diff --git a/drivers/usb/host/ehci-fsl.c b/drivers/usb/host/ehci-fsl.c index 5352e74b92e2..716aa8be1d6f 100644 --- a/drivers/usb/host/ehci-fsl.c +++ b/drivers/usb/host/ehci-fsl.c @@ -129,6 +129,15 @@ static int fsl_ehci_drv_probe(struct platform_device *pdev) if (pdata->have_sysif_regs && pdata->controller_ver < FSL_USB_VER_1_6) setbits32(hcd->regs + FSL_SOC_USB_CTRL, 0x4); + /* + * Enable UTMI phy and program PTS field in UTMI mode before asserting + * controller reset for USB Controller version 2.5 + */ + if (pdata->has_fsl_erratum_a007792) { + writel_be(CTRL_UTMI_PHY_EN, hcd->regs + FSL_SOC_USB_CTRL); + writel(PORT_PTS_UTMI, hcd->regs + FSL_SOC_USB_PORTSC1); + } + /* Don't need to set host mode here. It will be done by tdi_reset() */ retval = usb_add_hcd(hcd, irq, IRQF_SHARED); diff --git a/drivers/usb/host/fsl-mph-dr-of.c b/drivers/usb/host/fsl-mph-dr-of.c index 219595637cb1..17e1e6b7a035 100644 --- a/drivers/usb/host/fsl-mph-dr-of.c +++ b/drivers/usb/host/fsl-mph-dr-of.c @@ -214,6 +214,12 @@ static int fsl_usb2_mph_dr_of_probe(struct platform_device *ofdev) pdata->phy_mode = determine_usb_phy(prop); pdata->controller_ver = usb_get_ver_info(np); + /* Activate Erratum by reading property in device tree */ + if (of_get_property(np, "fsl,usb-erratum-a007792", NULL)) + pdata->has_fsl_erratum_a007792 = 1; + else + pdata->has_fsl_erratum_a007792 = 0; + if (pdata->have_sysif_regs) { if (pdata->controller_ver == FSL_USB_VER_NONE) { dev_warn(&ofdev->dev, "Could not get controller version\n"); diff --git a/include/linux/fsl_devices.h b/include/linux/fsl_devices.h index 0d4855cd5330..bdb40f67180c 100644 --- a/include/linux/fsl_devices.h +++ b/include/linux/fsl_devices.h @@ -97,6 +97,7 @@ struct fsl_usb2_platform_data { unsigned suspended:1; unsigned already_suspended:1; + unsigned has_fsl_erratum_a007792:1; /* register save area for suspend/resume */ u32 pm_command; -- cgit v1.2.3-70-g09d2 From 6009d95e04cf74c6f80db56fddca21fea476ad24 Mon Sep 17 00:00:00 2001 From: Nikhil Badola Date: Mon, 15 Jun 2015 15:48:22 +0530 Subject: drivers:usb:fsl: Introduce FSL_USB2_PHY_UTMI_DUAL macro Introduce FSL_USB2_PHY_UTMI_DUAL macro for setting phy mode in SOCs such has T4240, T1040, T2080 which have utmi dual-phy Signed-off-by: Ramneek Mehresh Signed-off-by: Nikhil Badola Signed-off-by: Greg Kroah-Hartman --- drivers/usb/host/ehci-fsl.c | 1 + drivers/usb/host/fsl-mph-dr-of.c | 2 ++ include/linux/fsl_devices.h | 1 + 3 files changed, 4 insertions(+) (limited to 'include/linux') diff --git a/drivers/usb/host/ehci-fsl.c b/drivers/usb/host/ehci-fsl.c index 716aa8be1d6f..b04c9dbd5c7d 100644 --- a/drivers/usb/host/ehci-fsl.c +++ b/drivers/usb/host/ehci-fsl.c @@ -213,6 +213,7 @@ static int ehci_fsl_setup_phy(struct usb_hcd *hcd, portsc |= PORT_PTS_PTW; /* fall through */ case FSL_USB2_PHY_UTMI: + case FSL_USB2_PHY_UTMI_DUAL: if (pdata->have_sysif_regs && pdata->controller_ver) { /* controller version 1.6 or above */ setbits32(non_ehci + FSL_SOC_USB_CTRL, UTMI_PHY_EN); diff --git a/drivers/usb/host/fsl-mph-dr-of.c b/drivers/usb/host/fsl-mph-dr-of.c index 17e1e6b7a035..631fc504afda 100644 --- a/drivers/usb/host/fsl-mph-dr-of.c +++ b/drivers/usb/host/fsl-mph-dr-of.c @@ -69,6 +69,8 @@ static enum fsl_usb2_phy_modes determine_usb_phy(const char *phy_type) return FSL_USB2_PHY_UTMI; if (!strcasecmp(phy_type, "utmi_wide")) return FSL_USB2_PHY_UTMI_WIDE; + if (!strcasecmp(phy_type, "utmi_dual")) + return FSL_USB2_PHY_UTMI_DUAL; if (!strcasecmp(phy_type, "serial")) return FSL_USB2_PHY_SERIAL; diff --git a/include/linux/fsl_devices.h b/include/linux/fsl_devices.h index bdb40f67180c..070d9aef90a7 100644 --- a/include/linux/fsl_devices.h +++ b/include/linux/fsl_devices.h @@ -69,6 +69,7 @@ enum fsl_usb2_phy_modes { FSL_USB2_PHY_UTMI, FSL_USB2_PHY_UTMI_WIDE, FSL_USB2_PHY_SERIAL, + FSL_USB2_PHY_UTMI_DUAL, }; struct clk; -- cgit v1.2.3-70-g09d2 From f4fdfaa280a284be8a056d6840cdbbf42c05bf95 Mon Sep 17 00:00:00 2001 From: Nikhil Badola Date: Tue, 14 Jul 2015 17:28:10 +0530 Subject: drivers: usb: fsl: Modify phy clk valid bit checking Phy_clk_valid bit is checked only when the boolean property phy-clk-valid in present in usb node device tree. This property is added to the usb node via device tree fixup. Signed-off-by: Nikhil Badola Signed-off-by: Greg Kroah-Hartman --- drivers/usb/host/ehci-fsl.c | 16 ++++++++-------- drivers/usb/host/fsl-mph-dr-of.c | 9 +++++++++ include/linux/fsl_devices.h | 1 + 3 files changed, 18 insertions(+), 8 deletions(-) (limited to 'include/linux') diff --git a/drivers/usb/host/ehci-fsl.c b/drivers/usb/host/ehci-fsl.c index b04c9dbd5c7d..05ebe3dcd618 100644 --- a/drivers/usb/host/ehci-fsl.c +++ b/drivers/usb/host/ehci-fsl.c @@ -230,14 +230,14 @@ static int ehci_fsl_setup_phy(struct usb_hcd *hcd, break; } - if (pdata->have_sysif_regs && - pdata->controller_ver > FSL_USB_VER_1_6 && - (phy_mode == FSL_USB2_PHY_ULPI)) { - /* check PHY_CLK_VALID to get phy clk valid */ - if (!(spin_event_timeout(in_be32(non_ehci + FSL_SOC_USB_CTRL) & - PHY_CLK_VALID, FSL_USB_PHY_CLK_TIMEOUT, 0) || - in_be32(non_ehci + FSL_SOC_USB_PRICTRL))) { - dev_warn(hcd->self.controller, "USB PHY clock invalid\n"); + /* + * check PHY_CLK_VALID to determine phy clock presence before writing + * to portsc + */ + if (pdata->check_phy_clk_valid) { + if (!(in_be32(non_ehci + FSL_SOC_USB_CTRL) & PHY_CLK_VALID)) { + dev_warn(hcd->self.controller, + "USB PHY clock invalid\n"); return -EINVAL; } } diff --git a/drivers/usb/host/fsl-mph-dr-of.c b/drivers/usb/host/fsl-mph-dr-of.c index 631fc504afda..9f731413ab3e 100644 --- a/drivers/usb/host/fsl-mph-dr-of.c +++ b/drivers/usb/host/fsl-mph-dr-of.c @@ -222,6 +222,15 @@ static int fsl_usb2_mph_dr_of_probe(struct platform_device *ofdev) else pdata->has_fsl_erratum_a007792 = 0; + /* + * Determine whether phy_clk_valid needs to be checked + * by reading property in device tree + */ + if (of_get_property(np, "phy-clk-valid", NULL)) + pdata->check_phy_clk_valid = 1; + else + pdata->check_phy_clk_valid = 0; + if (pdata->have_sysif_regs) { if (pdata->controller_ver == FSL_USB_VER_NONE) { dev_warn(&ofdev->dev, "Could not get controller version\n"); diff --git a/include/linux/fsl_devices.h b/include/linux/fsl_devices.h index 070d9aef90a7..cebdbbb4aa69 100644 --- a/include/linux/fsl_devices.h +++ b/include/linux/fsl_devices.h @@ -99,6 +99,7 @@ struct fsl_usb2_platform_data { unsigned suspended:1; unsigned already_suspended:1; unsigned has_fsl_erratum_a007792:1; + unsigned check_phy_clk_valid:1; /* register save area for suspend/resume */ u32 pm_command; -- cgit v1.2.3-70-g09d2 From ee86dbc6e327062396748162b95309388c19faab Mon Sep 17 00:00:00 2001 From: Andrey Smetanin Date: Fri, 3 Jul 2015 15:01:35 +0300 Subject: kvm: introduce vcpu_debug = kvm_debug + vcpu context vcpu_debug is useful macro like kvm_debug but additionally includes vcpu context inside output. Signed-off-by: Andrey Smetanin Signed-off-by: Denis V. Lunev Reviewed-by: Peter Hornyack CC: Paolo Bonzini CC: Gleb Natapov Signed-off-by: Paolo Bonzini --- include/linux/kvm_host.h | 3 +++ 1 file changed, 3 insertions(+) (limited to 'include/linux') diff --git a/include/linux/kvm_host.h b/include/linux/kvm_host.h index 05e99b8ef465..1d917d9b7f12 100644 --- a/include/linux/kvm_host.h +++ b/include/linux/kvm_host.h @@ -424,6 +424,9 @@ struct kvm { #define vcpu_unimpl(vcpu, fmt, ...) \ kvm_pr_unimpl("vcpu%i " fmt, (vcpu)->vcpu_id, ## __VA_ARGS__) +#define vcpu_debug(vcpu, fmt, ...) \ + kvm_debug("vcpu%i " fmt, (vcpu)->vcpu_id, ## __VA_ARGS__) + static inline struct kvm_vcpu *kvm_get_vcpu(struct kvm *kvm, int i) { smp_rmb(); -- cgit v1.2.3-70-g09d2 From 2ce7918990641b07e70e1b25752d666369e2016e Mon Sep 17 00:00:00 2001 From: Andrey Smetanin Date: Fri, 3 Jul 2015 15:01:41 +0300 Subject: kvm/x86: add sending hyper-v crash notification to user space Sending of notification is done by exiting vcpu to user space if KVM_REQ_HV_CRASH is enabled for vcpu. At exit to user space the kvm_run structure contains system_event with type KVM_SYSTEM_EVENT_CRASH to notify about guest crash occurred. Signed-off-by: Andrey Smetanin Signed-off-by: Denis V. Lunev Reviewed-by: Peter Hornyack CC: Paolo Bonzini CC: Gleb Natapov Signed-off-by: Paolo Bonzini --- Documentation/virtual/kvm/api.txt | 5 +++++ arch/x86/kvm/x86.c | 6 ++++++ include/linux/kvm_host.h | 1 + include/uapi/linux/kvm.h | 1 + 4 files changed, 13 insertions(+) (limited to 'include/linux') diff --git a/Documentation/virtual/kvm/api.txt b/Documentation/virtual/kvm/api.txt index a7926a90156f..a4ebcb712375 100644 --- a/Documentation/virtual/kvm/api.txt +++ b/Documentation/virtual/kvm/api.txt @@ -3277,6 +3277,7 @@ should put the acknowledged interrupt vector into the 'epr' field. struct { #define KVM_SYSTEM_EVENT_SHUTDOWN 1 #define KVM_SYSTEM_EVENT_RESET 2 +#define KVM_SYSTEM_EVENT_CRASH 3 __u32 type; __u64 flags; } system_event; @@ -3296,6 +3297,10 @@ Valid values for 'type' are: KVM_SYSTEM_EVENT_RESET -- the guest has requested a reset of the VM. As with SHUTDOWN, userspace can choose to ignore the request, or to schedule the reset to occur in the future and may call KVM_RUN again. + KVM_SYSTEM_EVENT_CRASH -- the guest crash occurred and the guest + has requested a crash condition maintenance. Userspace can choose + to ignore the request, or to gather VM memory core dump and/or + reset/shutdown of the VM. /* Fix the size of the union. */ char padding[256]; diff --git a/arch/x86/kvm/x86.c b/arch/x86/kvm/x86.c index cfa3e5a7d6be..28076c266a9a 100644 --- a/arch/x86/kvm/x86.c +++ b/arch/x86/kvm/x86.c @@ -6263,6 +6263,12 @@ static int vcpu_enter_guest(struct kvm_vcpu *vcpu) vcpu_scan_ioapic(vcpu); if (kvm_check_request(KVM_REQ_APIC_PAGE_RELOAD, vcpu)) kvm_vcpu_reload_apic_access_page(vcpu); + if (kvm_check_request(KVM_REQ_HV_CRASH, vcpu)) { + vcpu->run->exit_reason = KVM_EXIT_SYSTEM_EVENT; + vcpu->run->system_event.type = KVM_SYSTEM_EVENT_CRASH; + r = 0; + goto out; + } } if (kvm_check_request(KVM_REQ_EVENT, vcpu) || req_int_win) { diff --git a/include/linux/kvm_host.h b/include/linux/kvm_host.h index 1d917d9b7f12..51103f0feb7e 100644 --- a/include/linux/kvm_host.h +++ b/include/linux/kvm_host.h @@ -139,6 +139,7 @@ static inline bool is_error_page(struct page *page) #define KVM_REQ_DISABLE_IBS 24 #define KVM_REQ_APIC_PAGE_RELOAD 25 #define KVM_REQ_SMI 26 +#define KVM_REQ_HV_CRASH 27 #define KVM_USERSPACE_IRQ_SOURCE_ID 0 #define KVM_IRQFD_RESAMPLE_IRQ_SOURCE_ID 1 diff --git a/include/uapi/linux/kvm.h b/include/uapi/linux/kvm.h index 716ad4ae4d4b..9ef19ebd9df4 100644 --- a/include/uapi/linux/kvm.h +++ b/include/uapi/linux/kvm.h @@ -317,6 +317,7 @@ struct kvm_run { struct { #define KVM_SYSTEM_EVENT_SHUTDOWN 1 #define KVM_SYSTEM_EVENT_RESET 2 +#define KVM_SYSTEM_EVENT_CRASH 3 __u32 type; __u64 flags; } system_event; -- cgit v1.2.3-70-g09d2 From 3bbd14e0a2e3a988b1b5fe702a2539bd8d0ec622 Mon Sep 17 00:00:00 2001 From: Pablo Neira Ayuso Date: Mon, 20 Jul 2015 13:32:52 +0200 Subject: netfilter: rename local nf_hook_list to hook_list 085db2c04557 ("netfilter: Per network namespace netfilter hooks.") introduced a new nf_hook_list that is global, so let's avoid this overlap. Signed-off-by: Pablo Neira Ayuso Acked-by: "Eric W. Biederman" --- include/linux/netfilter.h | 14 +++++++------- net/netfilter/core.c | 28 ++++++++++++++-------------- 2 files changed, 21 insertions(+), 21 deletions(-) (limited to 'include/linux') diff --git a/include/linux/netfilter.h b/include/linux/netfilter.h index e01da73ee6c4..d788ce62d826 100644 --- a/include/linux/netfilter.h +++ b/include/linux/netfilter.h @@ -140,20 +140,20 @@ void nf_unregister_sockopt(struct nf_sockopt_ops *reg); #ifdef HAVE_JUMP_LABEL extern struct static_key nf_hooks_needed[NFPROTO_NUMPROTO][NF_MAX_HOOKS]; -static inline bool nf_hook_list_active(struct list_head *nf_hook_list, +static inline bool nf_hook_list_active(struct list_head *hook_list, u_int8_t pf, unsigned int hook) { if (__builtin_constant_p(pf) && __builtin_constant_p(hook)) return static_key_false(&nf_hooks_needed[pf][hook]); - return !list_empty(nf_hook_list); + return !list_empty(hook_list); } #else -static inline bool nf_hook_list_active(struct list_head *nf_hook_list, +static inline bool nf_hook_list_active(struct list_head *hook_list, u_int8_t pf, unsigned int hook) { - return !list_empty(nf_hook_list); + return !list_empty(hook_list); } #endif @@ -175,12 +175,12 @@ static inline int nf_hook_thresh(u_int8_t pf, unsigned int hook, int thresh) { struct net *net = dev_net(indev ? indev : outdev); - struct list_head *nf_hook_list = &net->nf.hooks[pf][hook]; + struct list_head *hook_list = &net->nf.hooks[pf][hook]; - if (nf_hook_list_active(nf_hook_list, pf, hook)) { + if (nf_hook_list_active(hook_list, pf, hook)) { struct nf_hook_state state; - nf_hook_state_init(&state, nf_hook_list, hook, thresh, + nf_hook_state_init(&state, hook_list, hook, thresh, pf, indev, outdev, sk, okfn); return nf_hook_slow(skb, &state); } diff --git a/net/netfilter/core.c b/net/netfilter/core.c index 0ecb2b52f276..2a5a0704245c 100644 --- a/net/netfilter/core.c +++ b/net/netfilter/core.c @@ -62,20 +62,20 @@ EXPORT_SYMBOL(nf_hooks_needed); static DEFINE_MUTEX(nf_hook_mutex); -static struct list_head *find_nf_hook_list(struct net *net, +static struct list_head *nf_find_hook_list(struct net *net, const struct nf_hook_ops *reg) { - struct list_head *nf_hook_list = NULL; + struct list_head *hook_list = NULL; if (reg->pf != NFPROTO_NETDEV) - nf_hook_list = &net->nf.hooks[reg->pf][reg->hooknum]; + hook_list = &net->nf.hooks[reg->pf][reg->hooknum]; else if (reg->hooknum == NF_NETDEV_INGRESS) { #ifdef CONFIG_NETFILTER_INGRESS if (reg->dev && dev_net(reg->dev) == net) - nf_hook_list = ®->dev->nf_hooks_ingress; + hook_list = ®->dev->nf_hooks_ingress; #endif } - return nf_hook_list; + return hook_list; } struct nf_hook_entry { @@ -85,7 +85,7 @@ struct nf_hook_entry { int nf_register_net_hook(struct net *net, const struct nf_hook_ops *reg) { - struct list_head *nf_hook_list; + struct list_head *hook_list; struct nf_hook_entry *entry; struct nf_hook_ops *elem; @@ -96,14 +96,14 @@ int nf_register_net_hook(struct net *net, const struct nf_hook_ops *reg) entry->orig_ops = reg; entry->ops = *reg; - nf_hook_list = find_nf_hook_list(net, reg); - if (!nf_hook_list) { + hook_list = nf_find_hook_list(net, reg); + if (!hook_list) { kfree(entry); return -ENOENT; } mutex_lock(&nf_hook_mutex); - list_for_each_entry(elem, nf_hook_list, list) { + list_for_each_entry(elem, hook_list, list) { if (reg->priority < elem->priority) break; } @@ -122,16 +122,16 @@ EXPORT_SYMBOL(nf_register_net_hook); void nf_unregister_net_hook(struct net *net, const struct nf_hook_ops *reg) { - struct list_head *nf_hook_list; + struct list_head *hook_list; struct nf_hook_entry *entry; struct nf_hook_ops *elem; - nf_hook_list = find_nf_hook_list(net, reg); - if (!nf_hook_list) + hook_list = nf_find_hook_list(net, reg); + if (!hook_list) return; mutex_lock(&nf_hook_mutex); - list_for_each_entry(elem, nf_hook_list, list) { + list_for_each_entry(elem, hook_list, list) { entry = container_of(elem, struct nf_hook_entry, ops); if (entry->orig_ops == reg) { list_del_rcu(&entry->ops.list); @@ -139,7 +139,7 @@ void nf_unregister_net_hook(struct net *net, const struct nf_hook_ops *reg) } } mutex_unlock(&nf_hook_mutex); - if (&elem->list == nf_hook_list) { + if (&elem->list == hook_list) { WARN(1, "nf_unregister_net_hook: hook not found!\n"); return; } -- cgit v1.2.3-70-g09d2 From 6184fc0b8dd76c6aedc7a26e93254993e14e52de Mon Sep 17 00:00:00 2001 From: Jan Kara Date: Wed, 24 Jun 2015 18:07:02 +0200 Subject: quota: Propagate error from ->acquire_dquot() Currently when some error happened in ->acquire_dquot(), dqget() just returned NULL. That was indistinguishable from a case when e.g. someone run quotaoff and so was generally silently ignored. However ->acquire_dquot() can fail because of ENOSPC or EIO in which case user should better know. So propagate error up from ->acquire_dquot properly. Signed-off-by: Jan Kara --- fs/ocfs2/file.c | 8 ++--- fs/ocfs2/quota_local.c | 4 +-- fs/quota/dquot.c | 88 ++++++++++++++++++++++++++++++++++-------------- include/linux/quotaops.h | 5 +-- 4 files changed, 72 insertions(+), 33 deletions(-) (limited to 'include/linux') diff --git a/fs/ocfs2/file.c b/fs/ocfs2/file.c index 719f7f4c7a37..4d9e8275ed99 100644 --- a/fs/ocfs2/file.c +++ b/fs/ocfs2/file.c @@ -1209,8 +1209,8 @@ int ocfs2_setattr(struct dentry *dentry, struct iattr *attr) && OCFS2_HAS_RO_COMPAT_FEATURE(sb, OCFS2_FEATURE_RO_COMPAT_USRQUOTA)) { transfer_to[USRQUOTA] = dqget(sb, make_kqid_uid(attr->ia_uid)); - if (!transfer_to[USRQUOTA]) { - status = -ESRCH; + if (IS_ERR(transfer_to[USRQUOTA])) { + status = PTR_ERR(transfer_to[USRQUOTA]); goto bail_unlock; } } @@ -1218,8 +1218,8 @@ int ocfs2_setattr(struct dentry *dentry, struct iattr *attr) && OCFS2_HAS_RO_COMPAT_FEATURE(sb, OCFS2_FEATURE_RO_COMPAT_GRPQUOTA)) { transfer_to[GRPQUOTA] = dqget(sb, make_kqid_gid(attr->ia_gid)); - if (!transfer_to[GRPQUOTA]) { - status = -ESRCH; + if (IS_ERR(transfer_to[GRPQUOTA])) { + status = PTR_ERR(transfer_to[GRPQUOTA]); goto bail_unlock; } } diff --git a/fs/ocfs2/quota_local.c b/fs/ocfs2/quota_local.c index 3d0b63d34225..bb07004df72a 100644 --- a/fs/ocfs2/quota_local.c +++ b/fs/ocfs2/quota_local.c @@ -499,8 +499,8 @@ static int ocfs2_recover_local_quota_file(struct inode *lqinode, dquot = dqget(sb, make_kqid(&init_user_ns, type, le64_to_cpu(dqblk->dqb_id))); - if (!dquot) { - status = -EIO; + if (IS_ERR(dquot)) { + status = PTR_ERR(dquot); mlog(ML_ERROR, "Failed to get quota structure " "for id %u, type %d. Cannot finish quota " "file recovery.\n", diff --git a/fs/quota/dquot.c b/fs/quota/dquot.c index 20d1f74561cf..fed66e2c9fe8 100644 --- a/fs/quota/dquot.c +++ b/fs/quota/dquot.c @@ -247,7 +247,7 @@ struct dqstats dqstats; EXPORT_SYMBOL(dqstats); static qsize_t inode_get_rsv_space(struct inode *inode); -static void __dquot_initialize(struct inode *inode, int type); +static int __dquot_initialize(struct inode *inode, int type); static inline unsigned int hashfn(const struct super_block *sb, struct kqid qid) @@ -832,16 +832,17 @@ static struct dquot *get_empty_dquot(struct super_block *sb, int type) struct dquot *dqget(struct super_block *sb, struct kqid qid) { unsigned int hashent = hashfn(sb, qid); - struct dquot *dquot = NULL, *empty = NULL; + struct dquot *dquot, *empty = NULL; if (!sb_has_quota_active(sb, qid.type)) - return NULL; + return ERR_PTR(-ESRCH); we_slept: spin_lock(&dq_list_lock); spin_lock(&dq_state_lock); if (!sb_has_quota_active(sb, qid.type)) { spin_unlock(&dq_state_lock); spin_unlock(&dq_list_lock); + dquot = ERR_PTR(-ESRCH); goto out; } spin_unlock(&dq_state_lock); @@ -876,11 +877,15 @@ we_slept: * already finished or it will be canceled due to dq_count > 1 test */ wait_on_dquot(dquot); /* Read the dquot / allocate space in quota file */ - if (!test_bit(DQ_ACTIVE_B, &dquot->dq_flags) && - sb->dq_op->acquire_dquot(dquot) < 0) { - dqput(dquot); - dquot = NULL; - goto out; + if (!test_bit(DQ_ACTIVE_B, &dquot->dq_flags)) { + int err; + + err = sb->dq_op->acquire_dquot(dquot); + if (err < 0) { + dqput(dquot); + dquot = ERR_PTR(err); + goto out; + } } #ifdef CONFIG_QUOTA_DEBUG BUG_ON(!dquot->dq_sb); /* Has somebody invalidated entry under us? */ @@ -1390,15 +1395,16 @@ static int dquot_active(const struct inode *inode) * It is better to call this function outside of any transaction as it * might need a lot of space in journal for dquot structure allocation. */ -static void __dquot_initialize(struct inode *inode, int type) +static int __dquot_initialize(struct inode *inode, int type) { int cnt, init_needed = 0; struct dquot **dquots, *got[MAXQUOTAS]; struct super_block *sb = inode->i_sb; qsize_t rsv; + int ret = 0; if (!dquot_active(inode)) - return; + return 0; dquots = i_dquot(inode); @@ -1407,6 +1413,7 @@ static void __dquot_initialize(struct inode *inode, int type) struct kqid qid; kprojid_t projid; int rc; + struct dquot *dquot; got[cnt] = NULL; if (type != -1 && cnt != type) @@ -1438,16 +1445,25 @@ static void __dquot_initialize(struct inode *inode, int type) qid = make_kqid_projid(projid); break; } - got[cnt] = dqget(sb, qid); + dquot = dqget(sb, qid); + if (IS_ERR(dquot)) { + /* We raced with somebody turning quotas off... */ + if (PTR_ERR(dquot) != -ESRCH) { + ret = PTR_ERR(dquot); + goto out_put; + } + dquot = NULL; + } + got[cnt] = dquot; } /* All required i_dquot has been initialized */ if (!init_needed) - return; + return 0; spin_lock(&dq_data_lock); if (IS_NOQUOTA(inode)) - goto out_err; + goto out_lock; for (cnt = 0; cnt < MAXQUOTAS; cnt++) { if (type != -1 && cnt != type) continue; @@ -1469,15 +1485,18 @@ static void __dquot_initialize(struct inode *inode, int type) dquot_resv_space(dquots[cnt], rsv); } } -out_err: +out_lock: spin_unlock(&dq_data_lock); +out_put: /* Drop unused references */ dqput_all(got); + + return ret; } -void dquot_initialize(struct inode *inode) +int dquot_initialize(struct inode *inode) { - __dquot_initialize(inode, -1); + return __dquot_initialize(inode, -1); } EXPORT_SYMBOL(dquot_initialize); @@ -1961,18 +1980,37 @@ EXPORT_SYMBOL(__dquot_transfer); int dquot_transfer(struct inode *inode, struct iattr *iattr) { struct dquot *transfer_to[MAXQUOTAS] = {}; + struct dquot *dquot; struct super_block *sb = inode->i_sb; int ret; if (!dquot_active(inode)) return 0; - if (iattr->ia_valid & ATTR_UID && !uid_eq(iattr->ia_uid, inode->i_uid)) - transfer_to[USRQUOTA] = dqget(sb, make_kqid_uid(iattr->ia_uid)); - if (iattr->ia_valid & ATTR_GID && !gid_eq(iattr->ia_gid, inode->i_gid)) - transfer_to[GRPQUOTA] = dqget(sb, make_kqid_gid(iattr->ia_gid)); - + if (iattr->ia_valid & ATTR_UID && !uid_eq(iattr->ia_uid, inode->i_uid)){ + dquot = dqget(sb, make_kqid_uid(iattr->ia_uid)); + if (IS_ERR(dquot)) { + if (PTR_ERR(dquot) != -ESRCH) { + ret = PTR_ERR(dquot); + goto out_put; + } + dquot = NULL; + } + transfer_to[USRQUOTA] = dquot; + } + if (iattr->ia_valid & ATTR_GID && !gid_eq(iattr->ia_gid, inode->i_gid)){ + dquot = dqget(sb, make_kqid_gid(iattr->ia_gid)); + if (IS_ERR(dquot)) { + if (PTR_ERR(dquot) != -ESRCH) { + ret = PTR_ERR(dquot); + goto out_put; + } + dquot = NULL; + } + transfer_to[GRPQUOTA] = dquot; + } ret = __dquot_transfer(inode, transfer_to); +out_put: dqput_all(transfer_to); return ret; } @@ -2518,8 +2556,8 @@ int dquot_get_dqblk(struct super_block *sb, struct kqid qid, struct dquot *dquot; dquot = dqget(sb, qid); - if (!dquot) - return -ESRCH; + if (IS_ERR(dquot)) + return PTR_ERR(dquot); do_get_dqblk(dquot, di); dqput(dquot); @@ -2631,8 +2669,8 @@ int dquot_set_dqblk(struct super_block *sb, struct kqid qid, int rc; dquot = dqget(sb, qid); - if (!dquot) { - rc = -ESRCH; + if (IS_ERR(dquot)) { + rc = PTR_ERR(dquot); goto out; } rc = do_set_dqblk(dquot, di); diff --git a/include/linux/quotaops.h b/include/linux/quotaops.h index 77ca6601ff25..7a57c28eb5e7 100644 --- a/include/linux/quotaops.h +++ b/include/linux/quotaops.h @@ -43,7 +43,7 @@ void inode_claim_rsv_space(struct inode *inode, qsize_t number); void inode_sub_rsv_space(struct inode *inode, qsize_t number); void inode_reclaim_rsv_space(struct inode *inode, qsize_t number); -void dquot_initialize(struct inode *inode); +int dquot_initialize(struct inode *inode); void dquot_drop(struct inode *inode); struct dquot *dqget(struct super_block *sb, struct kqid qid); static inline struct dquot *dqgrab(struct dquot *dquot) @@ -200,8 +200,9 @@ static inline int sb_has_quota_active(struct super_block *sb, int type) return 0; } -static inline void dquot_initialize(struct inode *inode) +static inline int dquot_initialize(struct inode *inode) { + return 0; } static inline void dquot_drop(struct inode *inode) -- cgit v1.2.3-70-g09d2 From c290ea01abb7907fde602f3ba55905ef10a37477 Mon Sep 17 00:00:00 2001 From: Jan Kara Date: Thu, 18 Jun 2015 16:52:29 +0200 Subject: fs: Remove ext3 filesystem driver The functionality of ext3 is fully supported by ext4 driver. Major distributions (SUSE, RedHat) already use ext4 driver to handle ext3 filesystems for quite some time. There is some ugliness in mm resulting from jbd cleaning buffers in a dirty page without cleaning page dirty bit and also support for buffer bouncing in the block layer when stable pages are required is there only because of jbd. So let's remove the ext3 driver. This saves us some 28k lines of duplicated code. Acked-by: Theodore Ts'o Signed-off-by: Jan Kara --- Documentation/filesystems/ext2.txt | 4 +- Documentation/filesystems/ext3.txt | 209 +-- Documentation/filesystems/vfs.txt | 2 +- MAINTAINERS | 18 +- fs/Kconfig | 5 +- fs/Makefile | 2 - fs/ext3/Kconfig | 89 - fs/ext3/Makefile | 12 - fs/ext3/acl.c | 281 --- fs/ext3/acl.h | 72 - fs/ext3/balloc.c | 2158 ---------------------- fs/ext3/bitmap.c | 20 - fs/ext3/dir.c | 537 ------ fs/ext3/ext3.h | 1332 -------------- fs/ext3/ext3_jbd.c | 59 - fs/ext3/file.c | 79 - fs/ext3/fsync.c | 109 -- fs/ext3/hash.c | 206 --- fs/ext3/ialloc.c | 706 ------- fs/ext3/inode.c | 3574 ------------------------------------ fs/ext3/ioctl.c | 327 ---- fs/ext3/namei.c | 2586 -------------------------- fs/ext3/namei.h | 27 - fs/ext3/resize.c | 1117 ----------- fs/ext3/super.c | 3165 ------------------------------- fs/ext3/symlink.c | 46 - fs/ext3/xattr.c | 1330 -------------- fs/ext3/xattr.h | 136 -- fs/ext3/xattr_security.c | 78 - fs/ext3/xattr_trusted.c | 54 - fs/ext3/xattr_user.c | 58 - fs/ext4/Kconfig | 41 +- fs/ext4/super.c | 14 +- fs/jbd/Kconfig | 30 - fs/jbd/Makefile | 7 - fs/jbd/checkpoint.c | 782 -------- fs/jbd/commit.c | 1021 ---------- fs/jbd/journal.c | 2145 ---------------------- fs/jbd/recovery.c | 594 ------ fs/jbd/revoke.c | 733 -------- fs/jbd/transaction.c | 2237 ---------------------- include/linux/jbd.h | 1047 ----------- include/linux/jbd2.h | 41 +- include/linux/jbd_common.h | 46 - include/trace/events/ext3.h | 866 --------- include/trace/events/jbd.h | 194 -- 46 files changed, 87 insertions(+), 28109 deletions(-) delete mode 100644 fs/ext3/Kconfig delete mode 100644 fs/ext3/Makefile delete mode 100644 fs/ext3/acl.c delete mode 100644 fs/ext3/acl.h delete mode 100644 fs/ext3/balloc.c delete mode 100644 fs/ext3/bitmap.c delete mode 100644 fs/ext3/dir.c delete mode 100644 fs/ext3/ext3.h delete mode 100644 fs/ext3/ext3_jbd.c delete mode 100644 fs/ext3/file.c delete mode 100644 fs/ext3/fsync.c delete mode 100644 fs/ext3/hash.c delete mode 100644 fs/ext3/ialloc.c delete mode 100644 fs/ext3/inode.c delete mode 100644 fs/ext3/ioctl.c delete mode 100644 fs/ext3/namei.c delete mode 100644 fs/ext3/namei.h delete mode 100644 fs/ext3/resize.c delete mode 100644 fs/ext3/super.c delete mode 100644 fs/ext3/symlink.c delete mode 100644 fs/ext3/xattr.c delete mode 100644 fs/ext3/xattr.h delete mode 100644 fs/ext3/xattr_security.c delete mode 100644 fs/ext3/xattr_trusted.c delete mode 100644 fs/ext3/xattr_user.c delete mode 100644 fs/jbd/Kconfig delete mode 100644 fs/jbd/Makefile delete mode 100644 fs/jbd/checkpoint.c delete mode 100644 fs/jbd/commit.c delete mode 100644 fs/jbd/journal.c delete mode 100644 fs/jbd/recovery.c delete mode 100644 fs/jbd/revoke.c delete mode 100644 fs/jbd/transaction.c delete mode 100644 include/linux/jbd.h delete mode 100644 include/linux/jbd_common.h delete mode 100644 include/trace/events/ext3.h delete mode 100644 include/trace/events/jbd.h (limited to 'include/linux') diff --git a/Documentation/filesystems/ext2.txt b/Documentation/filesystems/ext2.txt index b9714569e472..55755395d3dc 100644 --- a/Documentation/filesystems/ext2.txt +++ b/Documentation/filesystems/ext2.txt @@ -360,8 +360,8 @@ and are copied into the filesystem. If a transaction is incomplete at the time of the crash, then there is no guarantee of consistency for the blocks in that transaction so they are discarded (which means any filesystem changes they represent are also lost). -Check Documentation/filesystems/ext3.txt if you want to read more about -ext3 and journaling. +Check Documentation/filesystems/ext4.txt if you want to read more about +ext4 and journaling. References ========== diff --git a/Documentation/filesystems/ext3.txt b/Documentation/filesystems/ext3.txt index 7ed0d17d6721..58758fbef9e0 100644 --- a/Documentation/filesystems/ext3.txt +++ b/Documentation/filesystems/ext3.txt @@ -6,210 +6,7 @@ Ext3 was originally released in September 1999. Written by Stephen Tweedie for the 2.2 branch, and ported to 2.4 kernels by Peter Braam, Andreas Dilger, Andrew Morton, Alexander Viro, Ted Ts'o and Stephen Tweedie. -Ext3 is the ext2 filesystem enhanced with journalling capabilities. +Ext3 is the ext2 filesystem enhanced with journalling capabilities. The +filesystem is a subset of ext4 filesystem so use ext4 driver for accessing +ext3 filesystems. -Options -======= - -When mounting an ext3 filesystem, the following option are accepted: -(*) == default - -ro Mount filesystem read only. Note that ext3 will replay - the journal (and thus write to the partition) even when - mounted "read only". Mount options "ro,noload" can be - used to prevent writes to the filesystem. - -journal=update Update the ext3 file system's journal to the current - format. - -journal=inum When a journal already exists, this option is ignored. - Otherwise, it specifies the number of the inode which - will represent the ext3 file system's journal file. - -journal_path=path -journal_dev=devnum When the external journal device's major/minor numbers - have changed, these options allow the user to specify - the new journal location. The journal device is - identified through either its new major/minor numbers - encoded in devnum, or via a path to the device. - -norecovery Don't load the journal on mounting. Note that this forces -noload mount of inconsistent filesystem, which can lead to - various problems. - -data=journal All data are committed into the journal prior to being - written into the main file system. - -data=ordered (*) All data are forced directly out to the main file - system prior to its metadata being committed to the - journal. - -data=writeback Data ordering is not preserved, data may be written - into the main file system after its metadata has been - committed to the journal. - -commit=nrsec (*) Ext3 can be told to sync all its data and metadata - every 'nrsec' seconds. The default value is 5 seconds. - This means that if you lose your power, you will lose - as much as the latest 5 seconds of work (your - filesystem will not be damaged though, thanks to the - journaling). This default value (or any low value) - will hurt performance, but it's good for data-safety. - Setting it to 0 will have the same effect as leaving - it at the default (5 seconds). - Setting it to very large values will improve - performance. - -barrier=<0|1(*)> This enables/disables the use of write barriers in -barrier (*) the jbd code. barrier=0 disables, barrier=1 enables. -nobarrier This also requires an IO stack which can support - barriers, and if jbd gets an error on a barrier - write, it will disable again with a warning. - Write barriers enforce proper on-disk ordering - of journal commits, making volatile disk write caches - safe to use, at some performance penalty. If - your disks are battery-backed in one way or another, - disabling barriers may safely improve performance. - The mount options "barrier" and "nobarrier" can - also be used to enable or disable barriers, for - consistency with other ext3 mount options. - -user_xattr Enables Extended User Attributes. Additionally, you - need to have extended attribute support enabled in the - kernel configuration (CONFIG_EXT3_FS_XATTR). See the - attr(5) manual page and http://acl.bestbits.at/ to - learn more about extended attributes. - -nouser_xattr Disables Extended User Attributes. - -acl Enables POSIX Access Control Lists support. - Additionally, you need to have ACL support enabled in - the kernel configuration (CONFIG_EXT3_FS_POSIX_ACL). - See the acl(5) manual page and http://acl.bestbits.at/ - for more information. - -noacl This option disables POSIX Access Control List - support. - -reservation - -noreservation - -bsddf (*) Make 'df' act like BSD. -minixdf Make 'df' act like Minix. - -check=none Don't do extra checking of bitmaps on mount. -nocheck - -debug Extra debugging information is sent to syslog. - -errors=remount-ro Remount the filesystem read-only on an error. -errors=continue Keep going on a filesystem error. -errors=panic Panic and halt the machine if an error occurs. - (These mount options override the errors behavior - specified in the superblock, which can be - configured using tune2fs.) - -data_err=ignore(*) Just print an error message if an error occurs - in a file data buffer in ordered mode. -data_err=abort Abort the journal if an error occurs in a file - data buffer in ordered mode. - -grpid Give objects the same group ID as their creator. -bsdgroups - -nogrpid (*) New objects have the group ID of their creator. -sysvgroups - -resgid=n The group ID which may use the reserved blocks. - -resuid=n The user ID which may use the reserved blocks. - -sb=n Use alternate superblock at this location. - -quota These options are ignored by the filesystem. They -noquota are used only by quota tools to recognize volumes -grpquota where quota should be turned on. See documentation -usrquota in the quota-tools package for more details - (http://sourceforge.net/projects/linuxquota). - -jqfmt= These options tell filesystem details about quota -usrjquota= so that quota information can be properly updated -grpjquota= during journal replay. They replace the above - quota options. See documentation in the quota-tools - package for more details - (http://sourceforge.net/projects/linuxquota). - -Specification -============= -Ext3 shares all disk implementation with the ext2 filesystem, and adds -transactions capabilities to ext2. Journaling is done by the Journaling Block -Device layer. - -Journaling Block Device layer ------------------------------ -The Journaling Block Device layer (JBD) isn't ext3 specific. It was designed -to add journaling capabilities to a block device. The ext3 filesystem code -will inform the JBD of modifications it is performing (called a transaction). -The journal supports the transactions start and stop, and in case of a crash, -the journal can replay the transactions to quickly put the partition back into -a consistent state. - -Handles represent a single atomic update to a filesystem. JBD can handle an -external journal on a block device. - -Data Mode ---------- -There are 3 different data modes: - -* writeback mode -In data=writeback mode, ext3 does not journal data at all. This mode provides -a similar level of journaling as that of XFS, JFS, and ReiserFS in its default -mode - metadata journaling. A crash+recovery can cause incorrect data to -appear in files which were written shortly before the crash. This mode will -typically provide the best ext3 performance. - -* ordered mode -In data=ordered mode, ext3 only officially journals metadata, but it logically -groups metadata and data blocks into a single unit called a transaction. When -it's time to write the new metadata out to disk, the associated data blocks -are written first. In general, this mode performs slightly slower than -writeback but significantly faster than journal mode. - -* journal mode -data=journal mode provides full data and metadata journaling. All new data is -written to the journal first, and then to its final location. -In the event of a crash, the journal can be replayed, bringing both data and -metadata into a consistent state. This mode is the slowest except when data -needs to be read from and written to disk at the same time where it -outperforms all other modes. - -Compatibility -------------- - -Ext2 partitions can be easily convert to ext3, with `tune2fs -j `. -Ext3 is fully compatible with Ext2. Ext3 partitions can easily be mounted as -Ext2. - - -External Tools -============== -See manual pages to learn more. - -tune2fs: create a ext3 journal on a ext2 partition with the -j flag. -mke2fs: create a ext3 partition with the -j flag. -debugfs: ext2 and ext3 file system debugger. -ext2online: online (mounted) ext2 and ext3 filesystem resizer - - -References -========== - -kernel source: - - -programs: http://e2fsprogs.sourceforge.net/ - http://ext2resize.sourceforge.net - -useful links: http://www.ibm.com/developerworks/library/l-fs7/index.html - http://www.ibm.com/developerworks/library/l-fs8/index.html diff --git a/Documentation/filesystems/vfs.txt b/Documentation/filesystems/vfs.txt index 5eb8456fc41e..8c6f07ad373a 100644 --- a/Documentation/filesystems/vfs.txt +++ b/Documentation/filesystems/vfs.txt @@ -769,7 +769,7 @@ struct address_space_operations { to stall to allow flushers a chance to complete some IO. Ordinarily it can use PageDirty and PageWriteback but some filesystems have more complex state (unstable pages in NFS prevent reclaim) or - do not set those flags due to locking problems (jbd). This callback + do not set those flags due to locking problems. This callback allows a filesystem to indicate to the VM if a page should be treated as dirty or writeback for the purposes of stalling. diff --git a/MAINTAINERS b/MAINTAINERS index a2264167791a..0555bdb72c0d 100644 --- a/MAINTAINERS +++ b/MAINTAINERS @@ -4059,15 +4059,6 @@ F: Documentation/filesystems/ext2.txt F: fs/ext2/ F: include/linux/ext2* -EXT3 FILE SYSTEM -M: Jan Kara -M: Andrew Morton -M: Andreas Dilger -L: linux-ext4@vger.kernel.org -S: Maintained -F: Documentation/filesystems/ext3.txt -F: fs/ext3/ - EXT4 FILE SYSTEM M: "Theodore Ts'o" M: Andreas Dilger @@ -5751,16 +5742,9 @@ S: Maintained F: fs/jffs2/ F: include/uapi/linux/jffs2.h -JOURNALLING LAYER FOR BLOCK DEVICES (JBD) -M: Andrew Morton -M: Jan Kara -L: linux-ext4@vger.kernel.org -S: Maintained -F: fs/jbd/ -F: include/linux/jbd.h - JOURNALLING LAYER FOR BLOCK DEVICES (JBD2) M: "Theodore Ts'o" +M: Jan Kara L: linux-ext4@vger.kernel.org S: Maintained F: fs/jbd2/ diff --git a/fs/Kconfig b/fs/Kconfig index 011f43365d7b..da3f32f1a4e4 100644 --- a/fs/Kconfig +++ b/fs/Kconfig @@ -11,18 +11,15 @@ config DCACHE_WORD_ACCESS if BLOCK source "fs/ext2/Kconfig" -source "fs/ext3/Kconfig" source "fs/ext4/Kconfig" -source "fs/jbd/Kconfig" source "fs/jbd2/Kconfig" config FS_MBCACHE # Meta block cache for Extended Attributes (ext2/ext3/ext4) tristate default y if EXT2_FS=y && EXT2_FS_XATTR - default y if EXT3_FS=y && EXT3_FS_XATTR default y if EXT4_FS=y - default m if EXT2_FS_XATTR || EXT3_FS_XATTR || EXT4_FS + default m if EXT2_FS_XATTR || EXT4_FS source "fs/reiserfs/Kconfig" source "fs/jfs/Kconfig" diff --git a/fs/Makefile b/fs/Makefile index cb20e4bf2303..09e051fefc5b 100644 --- a/fs/Makefile +++ b/fs/Makefile @@ -62,12 +62,10 @@ obj-$(CONFIG_DLM) += dlm/ # Do not add any filesystems before this line obj-$(CONFIG_FSCACHE) += fscache/ obj-$(CONFIG_REISERFS_FS) += reiserfs/ -obj-$(CONFIG_EXT3_FS) += ext3/ # Before ext2 so root fs can be ext3 obj-$(CONFIG_EXT2_FS) += ext2/ # We place ext4 after ext2 so plain ext2 root fs's are mounted using ext2 # unless explicitly requested by rootfstype obj-$(CONFIG_EXT4_FS) += ext4/ -obj-$(CONFIG_JBD) += jbd/ obj-$(CONFIG_JBD2) += jbd2/ obj-$(CONFIG_CRAMFS) += cramfs/ obj-$(CONFIG_SQUASHFS) += squashfs/ diff --git a/fs/ext3/Kconfig b/fs/ext3/Kconfig deleted file mode 100644 index e8c6ba0e4a3e..000000000000 --- a/fs/ext3/Kconfig +++ /dev/null @@ -1,89 +0,0 @@ -config EXT3_FS - tristate "Ext3 journalling file system support" - select JBD - help - This is the journalling version of the Second extended file system - (often called ext3), the de facto standard Linux file system - (method to organize files on a storage device) for hard disks. - - The journalling code included in this driver means you do not have - to run e2fsck (file system checker) on your file systems after a - crash. The journal keeps track of any changes that were being made - at the time the system crashed, and can ensure that your file system - is consistent without the need for a lengthy check. - - Other than adding the journal to the file system, the on-disk format - of ext3 is identical to ext2. It is possible to freely switch - between using the ext3 driver and the ext2 driver, as long as the - file system has been cleanly unmounted, or e2fsck is run on the file - system. - - To add a journal on an existing ext2 file system or change the - behavior of ext3 file systems, you can use the tune2fs utility ("man - tune2fs"). To modify attributes of files and directories on ext3 - file systems, use chattr ("man chattr"). You need to be using - e2fsprogs version 1.20 or later in order to create ext3 journals - (available at ). - - To compile this file system support as a module, choose M here: the - module will be called ext3. - -config EXT3_DEFAULTS_TO_ORDERED - bool "Default to 'data=ordered' in ext3" - depends on EXT3_FS - default y - help - The journal mode options for ext3 have different tradeoffs - between when data is guaranteed to be on disk and - performance. The use of "data=writeback" can cause - unwritten data to appear in files after an system crash or - power failure, which can be a security issue. However, - "data=ordered" mode can also result in major performance - problems, including seconds-long delays before an fsync() - call returns. For details, see: - - http://ext4.wiki.kernel.org/index.php/Ext3_data_mode_tradeoffs - - If you have been historically happy with ext3's performance, - data=ordered mode will be a safe choice and you should - answer 'y' here. If you understand the reliability and data - privacy issues of data=writeback and are willing to make - that trade off, answer 'n'. - -config EXT3_FS_XATTR - bool "Ext3 extended attributes" - depends on EXT3_FS - default y - help - Extended attributes are name:value pairs associated with inodes by - the kernel or by users (see the attr(5) manual page, or visit - for details). - - If unsure, say N. - - You need this for POSIX ACL support on ext3. - -config EXT3_FS_POSIX_ACL - bool "Ext3 POSIX Access Control Lists" - depends on EXT3_FS_XATTR - select FS_POSIX_ACL - help - Posix Access Control Lists (ACLs) support permissions for users and - groups beyond the owner/group/world scheme. - - To learn more about Access Control Lists, visit the Posix ACLs for - Linux website . - - If you don't know what Access Control Lists are, say N - -config EXT3_FS_SECURITY - bool "Ext3 Security Labels" - depends on EXT3_FS_XATTR - help - Security labels support alternative access control models - implemented by security modules like SELinux. This option - enables an extended attribute handler for file security - labels in the ext3 filesystem. - - If you are not using a security module that requires using - extended attributes for file security labels, say N. diff --git a/fs/ext3/Makefile b/fs/ext3/Makefile deleted file mode 100644 index e77766a8b3f0..000000000000 --- a/fs/ext3/Makefile +++ /dev/null @@ -1,12 +0,0 @@ -# -# Makefile for the linux ext3-filesystem routines. -# - -obj-$(CONFIG_EXT3_FS) += ext3.o - -ext3-y := balloc.o bitmap.o dir.o file.o fsync.o ialloc.o inode.o \ - ioctl.o namei.o super.o symlink.o hash.o resize.o ext3_jbd.o - -ext3-$(CONFIG_EXT3_FS_XATTR) += xattr.o xattr_user.o xattr_trusted.o -ext3-$(CONFIG_EXT3_FS_POSIX_ACL) += acl.o -ext3-$(CONFIG_EXT3_FS_SECURITY) += xattr_security.o diff --git a/fs/ext3/acl.c b/fs/ext3/acl.c deleted file mode 100644 index 8bbaf5bcf982..000000000000 --- a/fs/ext3/acl.c +++ /dev/null @@ -1,281 +0,0 @@ -/* - * linux/fs/ext3/acl.c - * - * Copyright (C) 2001-2003 Andreas Gruenbacher, - */ - -#include "ext3.h" -#include "xattr.h" -#include "acl.h" - -/* - * Convert from filesystem to in-memory representation. - */ -static struct posix_acl * -ext3_acl_from_disk(const void *value, size_t size) -{ - const char *end = (char *)value + size; - int n, count; - struct posix_acl *acl; - - if (!value) - return NULL; - if (size < sizeof(ext3_acl_header)) - return ERR_PTR(-EINVAL); - if (((ext3_acl_header *)value)->a_version != - cpu_to_le32(EXT3_ACL_VERSION)) - return ERR_PTR(-EINVAL); - value = (char *)value + sizeof(ext3_acl_header); - count = ext3_acl_count(size); - if (count < 0) - return ERR_PTR(-EINVAL); - if (count == 0) - return NULL; - acl = posix_acl_alloc(count, GFP_NOFS); - if (!acl) - return ERR_PTR(-ENOMEM); - for (n=0; n < count; n++) { - ext3_acl_entry *entry = - (ext3_acl_entry *)value; - if ((char *)value + sizeof(ext3_acl_entry_short) > end) - goto fail; - acl->a_entries[n].e_tag = le16_to_cpu(entry->e_tag); - acl->a_entries[n].e_perm = le16_to_cpu(entry->e_perm); - switch(acl->a_entries[n].e_tag) { - case ACL_USER_OBJ: - case ACL_GROUP_OBJ: - case ACL_MASK: - case ACL_OTHER: - value = (char *)value + - sizeof(ext3_acl_entry_short); - break; - - case ACL_USER: - value = (char *)value + sizeof(ext3_acl_entry); - if ((char *)value > end) - goto fail; - acl->a_entries[n].e_uid = - make_kuid(&init_user_ns, - le32_to_cpu(entry->e_id)); - break; - case ACL_GROUP: - value = (char *)value + sizeof(ext3_acl_entry); - if ((char *)value > end) - goto fail; - acl->a_entries[n].e_gid = - make_kgid(&init_user_ns, - le32_to_cpu(entry->e_id)); - break; - - default: - goto fail; - } - } - if (value != end) - goto fail; - return acl; - -fail: - posix_acl_release(acl); - return ERR_PTR(-EINVAL); -} - -/* - * Convert from in-memory to filesystem representation. - */ -static void * -ext3_acl_to_disk(const struct posix_acl *acl, size_t *size) -{ - ext3_acl_header *ext_acl; - char *e; - size_t n; - - *size = ext3_acl_size(acl->a_count); - ext_acl = kmalloc(sizeof(ext3_acl_header) + acl->a_count * - sizeof(ext3_acl_entry), GFP_NOFS); - if (!ext_acl) - return ERR_PTR(-ENOMEM); - ext_acl->a_version = cpu_to_le32(EXT3_ACL_VERSION); - e = (char *)ext_acl + sizeof(ext3_acl_header); - for (n=0; n < acl->a_count; n++) { - const struct posix_acl_entry *acl_e = &acl->a_entries[n]; - ext3_acl_entry *entry = (ext3_acl_entry *)e; - entry->e_tag = cpu_to_le16(acl_e->e_tag); - entry->e_perm = cpu_to_le16(acl_e->e_perm); - switch(acl_e->e_tag) { - case ACL_USER: - entry->e_id = cpu_to_le32( - from_kuid(&init_user_ns, acl_e->e_uid)); - e += sizeof(ext3_acl_entry); - break; - case ACL_GROUP: - entry->e_id = cpu_to_le32( - from_kgid(&init_user_ns, acl_e->e_gid)); - e += sizeof(ext3_acl_entry); - break; - - case ACL_USER_OBJ: - case ACL_GROUP_OBJ: - case ACL_MASK: - case ACL_OTHER: - e += sizeof(ext3_acl_entry_short); - break; - - default: - goto fail; - } - } - return (char *)ext_acl; - -fail: - kfree(ext_acl); - return ERR_PTR(-EINVAL); -} - -/* - * Inode operation get_posix_acl(). - * - * inode->i_mutex: don't care - */ -struct posix_acl * -ext3_get_acl(struct inode *inode, int type) -{ - int name_index; - char *value = NULL; - struct posix_acl *acl; - int retval; - - switch (type) { - case ACL_TYPE_ACCESS: - name_index = EXT3_XATTR_INDEX_POSIX_ACL_ACCESS; - break; - case ACL_TYPE_DEFAULT: - name_index = EXT3_XATTR_INDEX_POSIX_ACL_DEFAULT; - break; - default: - BUG(); - } - - retval = ext3_xattr_get(inode, name_index, "", NULL, 0); - if (retval > 0) { - value = kmalloc(retval, GFP_NOFS); - if (!value) - return ERR_PTR(-ENOMEM); - retval = ext3_xattr_get(inode, name_index, "", value, retval); - } - if (retval > 0) - acl = ext3_acl_from_disk(value, retval); - else if (retval == -ENODATA || retval == -ENOSYS) - acl = NULL; - else - acl = ERR_PTR(retval); - kfree(value); - - if (!IS_ERR(acl)) - set_cached_acl(inode, type, acl); - - return acl; -} - -/* - * Set the access or default ACL of an inode. - * - * inode->i_mutex: down unless called from ext3_new_inode - */ -static int -__ext3_set_acl(handle_t *handle, struct inode *inode, int type, - struct posix_acl *acl) -{ - int name_index; - void *value = NULL; - size_t size = 0; - int error; - - switch(type) { - case ACL_TYPE_ACCESS: - name_index = EXT3_XATTR_INDEX_POSIX_ACL_ACCESS; - if (acl) { - error = posix_acl_equiv_mode(acl, &inode->i_mode); - if (error < 0) - return error; - else { - inode->i_ctime = CURRENT_TIME_SEC; - ext3_mark_inode_dirty(handle, inode); - if (error == 0) - acl = NULL; - } - } - break; - - case ACL_TYPE_DEFAULT: - name_index = EXT3_XATTR_INDEX_POSIX_ACL_DEFAULT; - if (!S_ISDIR(inode->i_mode)) - return acl ? -EACCES : 0; - break; - - default: - return -EINVAL; - } - if (acl) { - value = ext3_acl_to_disk(acl, &size); - if (IS_ERR(value)) - return (int)PTR_ERR(value); - } - - error = ext3_xattr_set_handle(handle, inode, name_index, "", - value, size, 0); - - kfree(value); - - if (!error) - set_cached_acl(inode, type, acl); - - return error; -} - -int -ext3_set_acl(struct inode *inode, struct posix_acl *acl, int type) -{ - handle_t *handle; - int error, retries = 0; - -retry: - handle = ext3_journal_start(inode, EXT3_DATA_TRANS_BLOCKS(inode->i_sb)); - if (IS_ERR(handle)) - return PTR_ERR(handle); - error = __ext3_set_acl(handle, inode, type, acl); - ext3_journal_stop(handle); - if (error == -ENOSPC && ext3_should_retry_alloc(inode->i_sb, &retries)) - goto retry; - return error; -} - -/* - * Initialize the ACLs of a new inode. Called from ext3_new_inode. - * - * dir->i_mutex: down - * inode->i_mutex: up (access to inode is still exclusive) - */ -int -ext3_init_acl(handle_t *handle, struct inode *inode, struct inode *dir) -{ - struct posix_acl *default_acl, *acl; - int error; - - error = posix_acl_create(dir, &inode->i_mode, &default_acl, &acl); - if (error) - return error; - - if (default_acl) { - error = __ext3_set_acl(handle, inode, ACL_TYPE_DEFAULT, - default_acl); - posix_acl_release(default_acl); - } - if (acl) { - if (!error) - error = __ext3_set_acl(handle, inode, ACL_TYPE_ACCESS, - acl); - posix_acl_release(acl); - } - return error; -} diff --git a/fs/ext3/acl.h b/fs/ext3/acl.h deleted file mode 100644 index ea1c69edab9e..000000000000 --- a/fs/ext3/acl.h +++ /dev/null @@ -1,72 +0,0 @@ -/* - File: fs/ext3/acl.h - - (C) 2001 Andreas Gruenbacher, -*/ - -#include - -#define EXT3_ACL_VERSION 0x0001 - -typedef struct { - __le16 e_tag; - __le16 e_perm; - __le32 e_id; -} ext3_acl_entry; - -typedef struct { - __le16 e_tag; - __le16 e_perm; -} ext3_acl_entry_short; - -typedef struct { - __le32 a_version; -} ext3_acl_header; - -static inline size_t ext3_acl_size(int count) -{ - if (count <= 4) { - return sizeof(ext3_acl_header) + - count * sizeof(ext3_acl_entry_short); - } else { - return sizeof(ext3_acl_header) + - 4 * sizeof(ext3_acl_entry_short) + - (count - 4) * sizeof(ext3_acl_entry); - } -} - -static inline int ext3_acl_count(size_t size) -{ - ssize_t s; - size -= sizeof(ext3_acl_header); - s = size - 4 * sizeof(ext3_acl_entry_short); - if (s < 0) { - if (size % sizeof(ext3_acl_entry_short)) - return -1; - return size / sizeof(ext3_acl_entry_short); - } else { - if (s % sizeof(ext3_acl_entry)) - return -1; - return s / sizeof(ext3_acl_entry) + 4; - } -} - -#ifdef CONFIG_EXT3_FS_POSIX_ACL - -/* acl.c */ -extern struct posix_acl *ext3_get_acl(struct inode *inode, int type); -extern int ext3_set_acl(struct inode *inode, struct posix_acl *acl, int type); -extern int ext3_init_acl (handle_t *, struct inode *, struct inode *); - -#else /* CONFIG_EXT3_FS_POSIX_ACL */ -#include -#define ext3_get_acl NULL -#define ext3_set_acl NULL - -static inline int -ext3_init_acl(handle_t *handle, struct inode *inode, struct inode *dir) -{ - return 0; -} -#endif /* CONFIG_EXT3_FS_POSIX_ACL */ - diff --git a/fs/ext3/balloc.c b/fs/ext3/balloc.c deleted file mode 100644 index 158b5d4ce067..000000000000 --- a/fs/ext3/balloc.c +++ /dev/null @@ -1,2158 +0,0 @@ -/* - * linux/fs/ext3/balloc.c - * - * Copyright (C) 1992, 1993, 1994, 1995 - * Remy Card (card@masi.ibp.fr) - * Laboratoire MASI - Institut Blaise Pascal - * Universite Pierre et Marie Curie (Paris VI) - * - * Enhanced block allocation by Stephen Tweedie (sct@redhat.com), 1993 - * Big-endian to little-endian byte-swapping/bitmaps by - * David S. Miller (davem@caip.rutgers.edu), 1995 - */ - -#include -#include -#include "ext3.h" - -/* - * balloc.c contains the blocks allocation and deallocation routines - */ - -/* - * The free blocks are managed by bitmaps. A file system contains several - * blocks groups. Each group contains 1 bitmap block for blocks, 1 bitmap - * block for inodes, N blocks for the inode table and data blocks. - * - * The file system contains group descriptors which are located after the - * super block. Each descriptor contains the number of the bitmap block and - * the free blocks count in the block. The descriptors are loaded in memory - * when a file system is mounted (see ext3_fill_super). - */ - - -#define in_range(b, first, len) ((b) >= (first) && (b) <= (first) + (len) - 1) - -/* - * Calculate the block group number and offset, given a block number - */ -static void ext3_get_group_no_and_offset(struct super_block *sb, - ext3_fsblk_t blocknr, unsigned long *blockgrpp, ext3_grpblk_t *offsetp) -{ - struct ext3_super_block *es = EXT3_SB(sb)->s_es; - - blocknr = blocknr - le32_to_cpu(es->s_first_data_block); - if (offsetp) - *offsetp = blocknr % EXT3_BLOCKS_PER_GROUP(sb); - if (blockgrpp) - *blockgrpp = blocknr / EXT3_BLOCKS_PER_GROUP(sb); -} - -/** - * ext3_get_group_desc() -- load group descriptor from disk - * @sb: super block - * @block_group: given block group - * @bh: pointer to the buffer head to store the block - * group descriptor - */ -struct ext3_group_desc * ext3_get_group_desc(struct super_block * sb, - unsigned int block_group, - struct buffer_head ** bh) -{ - unsigned long group_desc; - unsigned long offset; - struct ext3_group_desc * desc; - struct ext3_sb_info *sbi = EXT3_SB(sb); - - if (block_group >= sbi->s_groups_count) { - ext3_error (sb, "ext3_get_group_desc", - "block_group >= groups_count - " - "block_group = %d, groups_count = %lu", - block_group, sbi->s_groups_count); - - return NULL; - } - smp_rmb(); - - group_desc = block_group >> EXT3_DESC_PER_BLOCK_BITS(sb); - offset = block_group & (EXT3_DESC_PER_BLOCK(sb) - 1); - if (!sbi->s_group_desc[group_desc]) { - ext3_error (sb, "ext3_get_group_desc", - "Group descriptor not loaded - " - "block_group = %d, group_desc = %lu, desc = %lu", - block_group, group_desc, offset); - return NULL; - } - - desc = (struct ext3_group_desc *) sbi->s_group_desc[group_desc]->b_data; - if (bh) - *bh = sbi->s_group_desc[group_desc]; - return desc + offset; -} - -static int ext3_valid_block_bitmap(struct super_block *sb, - struct ext3_group_desc *desc, - unsigned int block_group, - struct buffer_head *bh) -{ - ext3_grpblk_t offset; - ext3_grpblk_t next_zero_bit; - ext3_fsblk_t bitmap_blk; - ext3_fsblk_t group_first_block; - - group_first_block = ext3_group_first_block_no(sb, block_group); - - /* check whether block bitmap block number is set */ - bitmap_blk = le32_to_cpu(desc->bg_block_bitmap); - offset = bitmap_blk - group_first_block; - if (!ext3_test_bit(offset, bh->b_data)) - /* bad block bitmap */ - goto err_out; - - /* check whether the inode bitmap block number is set */ - bitmap_blk = le32_to_cpu(desc->bg_inode_bitmap); - offset = bitmap_blk - group_first_block; - if (!ext3_test_bit(offset, bh->b_data)) - /* bad block bitmap */ - goto err_out; - - /* check whether the inode table block number is set */ - bitmap_blk = le32_to_cpu(desc->bg_inode_table); - offset = bitmap_blk - group_first_block; - next_zero_bit = ext3_find_next_zero_bit(bh->b_data, - offset + EXT3_SB(sb)->s_itb_per_group, - offset); - if (next_zero_bit >= offset + EXT3_SB(sb)->s_itb_per_group) - /* good bitmap for inode tables */ - return 1; - -err_out: - ext3_error(sb, __func__, - "Invalid block bitmap - " - "block_group = %d, block = %lu", - block_group, bitmap_blk); - return 0; -} - -/** - * read_block_bitmap() - * @sb: super block - * @block_group: given block group - * - * Read the bitmap for a given block_group,and validate the - * bits for block/inode/inode tables are set in the bitmaps - * - * Return buffer_head on success or NULL in case of failure. - */ -static struct buffer_head * -read_block_bitmap(struct super_block *sb, unsigned int block_group) -{ - struct ext3_group_desc * desc; - struct buffer_head * bh = NULL; - ext3_fsblk_t bitmap_blk; - - desc = ext3_get_group_desc(sb, block_group, NULL); - if (!desc) - return NULL; - trace_ext3_read_block_bitmap(sb, block_group); - bitmap_blk = le32_to_cpu(desc->bg_block_bitmap); - bh = sb_getblk(sb, bitmap_blk); - if (unlikely(!bh)) { - ext3_error(sb, __func__, - "Cannot read block bitmap - " - "block_group = %d, block_bitmap = %u", - block_group, le32_to_cpu(desc->bg_block_bitmap)); - return NULL; - } - if (likely(bh_uptodate_or_lock(bh))) - return bh; - - if (bh_submit_read(bh) < 0) { - brelse(bh); - ext3_error(sb, __func__, - "Cannot read block bitmap - " - "block_group = %d, block_bitmap = %u", - block_group, le32_to_cpu(desc->bg_block_bitmap)); - return NULL; - } - ext3_valid_block_bitmap(sb, desc, block_group, bh); - /* - * file system mounted not to panic on error, continue with corrupt - * bitmap - */ - return bh; -} -/* - * The reservation window structure operations - * -------------------------------------------- - * Operations include: - * dump, find, add, remove, is_empty, find_next_reservable_window, etc. - * - * We use a red-black tree to represent per-filesystem reservation - * windows. - * - */ - -/** - * __rsv_window_dump() -- Dump the filesystem block allocation reservation map - * @rb_root: root of per-filesystem reservation rb tree - * @verbose: verbose mode - * @fn: function which wishes to dump the reservation map - * - * If verbose is turned on, it will print the whole block reservation - * windows(start, end). Otherwise, it will only print out the "bad" windows, - * those windows that overlap with their immediate neighbors. - */ -#if 1 -static void __rsv_window_dump(struct rb_root *root, int verbose, - const char *fn) -{ - struct rb_node *n; - struct ext3_reserve_window_node *rsv, *prev; - int bad; - -restart: - n = rb_first(root); - bad = 0; - prev = NULL; - - printk("Block Allocation Reservation Windows Map (%s):\n", fn); - while (n) { - rsv = rb_entry(n, struct ext3_reserve_window_node, rsv_node); - if (verbose) - printk("reservation window 0x%p " - "start: %lu, end: %lu\n", - rsv, rsv->rsv_start, rsv->rsv_end); - if (rsv->rsv_start && rsv->rsv_start >= rsv->rsv_end) { - printk("Bad reservation %p (start >= end)\n", - rsv); - bad = 1; - } - if (prev && prev->rsv_end >= rsv->rsv_start) { - printk("Bad reservation %p (prev->end >= start)\n", - rsv); - bad = 1; - } - if (bad) { - if (!verbose) { - printk("Restarting reservation walk in verbose mode\n"); - verbose = 1; - goto restart; - } - } - n = rb_next(n); - prev = rsv; - } - printk("Window map complete.\n"); - BUG_ON(bad); -} -#define rsv_window_dump(root, verbose) \ - __rsv_window_dump((root), (verbose), __func__) -#else -#define rsv_window_dump(root, verbose) do {} while (0) -#endif - -/** - * goal_in_my_reservation() - * @rsv: inode's reservation window - * @grp_goal: given goal block relative to the allocation block group - * @group: the current allocation block group - * @sb: filesystem super block - * - * Test if the given goal block (group relative) is within the file's - * own block reservation window range. - * - * If the reservation window is outside the goal allocation group, return 0; - * grp_goal (given goal block) could be -1, which means no specific - * goal block. In this case, always return 1. - * If the goal block is within the reservation window, return 1; - * otherwise, return 0; - */ -static int -goal_in_my_reservation(struct ext3_reserve_window *rsv, ext3_grpblk_t grp_goal, - unsigned int group, struct super_block * sb) -{ - ext3_fsblk_t group_first_block, group_last_block; - - group_first_block = ext3_group_first_block_no(sb, group); - group_last_block = group_first_block + (EXT3_BLOCKS_PER_GROUP(sb) - 1); - - if ((rsv->_rsv_start > group_last_block) || - (rsv->_rsv_end < group_first_block)) - return 0; - if ((grp_goal >= 0) && ((grp_goal + group_first_block < rsv->_rsv_start) - || (grp_goal + group_first_block > rsv->_rsv_end))) - return 0; - return 1; -} - -/** - * search_reserve_window() - * @rb_root: root of reservation tree - * @goal: target allocation block - * - * Find the reserved window which includes the goal, or the previous one - * if the goal is not in any window. - * Returns NULL if there are no windows or if all windows start after the goal. - */ -static struct ext3_reserve_window_node * -search_reserve_window(struct rb_root *root, ext3_fsblk_t goal) -{ - struct rb_node *n = root->rb_node; - struct ext3_reserve_window_node *rsv; - - if (!n) - return NULL; - - do { - rsv = rb_entry(n, struct ext3_reserve_window_node, rsv_node); - - if (goal < rsv->rsv_start) - n = n->rb_left; - else if (goal > rsv->rsv_end) - n = n->rb_right; - else - return rsv; - } while (n); - /* - * We've fallen off the end of the tree: the goal wasn't inside - * any particular node. OK, the previous node must be to one - * side of the interval containing the goal. If it's the RHS, - * we need to back up one. - */ - if (rsv->rsv_start > goal) { - n = rb_prev(&rsv->rsv_node); - rsv = rb_entry(n, struct ext3_reserve_window_node, rsv_node); - } - return rsv; -} - -/** - * ext3_rsv_window_add() -- Insert a window to the block reservation rb tree. - * @sb: super block - * @rsv: reservation window to add - * - * Must be called with rsv_lock hold. - */ -void ext3_rsv_window_add(struct super_block *sb, - struct ext3_reserve_window_node *rsv) -{ - struct rb_root *root = &EXT3_SB(sb)->s_rsv_window_root; - struct rb_node *node = &rsv->rsv_node; - ext3_fsblk_t start = rsv->rsv_start; - - struct rb_node ** p = &root->rb_node; - struct rb_node * parent = NULL; - struct ext3_reserve_window_node *this; - - trace_ext3_rsv_window_add(sb, rsv); - while (*p) - { - parent = *p; - this = rb_entry(parent, struct ext3_reserve_window_node, rsv_node); - - if (start < this->rsv_start) - p = &(*p)->rb_left; - else if (start > this->rsv_end) - p = &(*p)->rb_right; - else { - rsv_window_dump(root, 1); - BUG(); - } - } - - rb_link_node(node, parent, p); - rb_insert_color(node, root); -} - -/** - * ext3_rsv_window_remove() -- unlink a window from the reservation rb tree - * @sb: super block - * @rsv: reservation window to remove - * - * Mark the block reservation window as not allocated, and unlink it - * from the filesystem reservation window rb tree. Must be called with - * rsv_lock hold. - */ -static void rsv_window_remove(struct super_block *sb, - struct ext3_reserve_window_node *rsv) -{ - rsv->rsv_start = EXT3_RESERVE_WINDOW_NOT_ALLOCATED; - rsv->rsv_end = EXT3_RESERVE_WINDOW_NOT_ALLOCATED; - rsv->rsv_alloc_hit = 0; - rb_erase(&rsv->rsv_node, &EXT3_SB(sb)->s_rsv_window_root); -} - -/* - * rsv_is_empty() -- Check if the reservation window is allocated. - * @rsv: given reservation window to check - * - * returns 1 if the end block is EXT3_RESERVE_WINDOW_NOT_ALLOCATED. - */ -static inline int rsv_is_empty(struct ext3_reserve_window *rsv) -{ - /* a valid reservation end block could not be 0 */ - return rsv->_rsv_end == EXT3_RESERVE_WINDOW_NOT_ALLOCATED; -} - -/** - * ext3_init_block_alloc_info() - * @inode: file inode structure - * - * Allocate and initialize the reservation window structure, and - * link the window to the ext3 inode structure at last - * - * The reservation window structure is only dynamically allocated - * and linked to ext3 inode the first time the open file - * needs a new block. So, before every ext3_new_block(s) call, for - * regular files, we should check whether the reservation window - * structure exists or not. In the latter case, this function is called. - * Fail to do so will result in block reservation being turned off for that - * open file. - * - * This function is called from ext3_get_blocks_handle(), also called - * when setting the reservation window size through ioctl before the file - * is open for write (needs block allocation). - * - * Needs truncate_mutex protection prior to call this function. - */ -void ext3_init_block_alloc_info(struct inode *inode) -{ - struct ext3_inode_info *ei = EXT3_I(inode); - struct ext3_block_alloc_info *block_i; - struct super_block *sb = inode->i_sb; - - block_i = kmalloc(sizeof(*block_i), GFP_NOFS); - if (block_i) { - struct ext3_reserve_window_node *rsv = &block_i->rsv_window_node; - - rsv->rsv_start = EXT3_RESERVE_WINDOW_NOT_ALLOCATED; - rsv->rsv_end = EXT3_RESERVE_WINDOW_NOT_ALLOCATED; - - /* - * if filesystem is mounted with NORESERVATION, the goal - * reservation window size is set to zero to indicate - * block reservation is off - */ - if (!test_opt(sb, RESERVATION)) - rsv->rsv_goal_size = 0; - else - rsv->rsv_goal_size = EXT3_DEFAULT_RESERVE_BLOCKS; - rsv->rsv_alloc_hit = 0; - block_i->last_alloc_logical_block = 0; - block_i->last_alloc_physical_block = 0; - } - ei->i_block_alloc_info = block_i; -} - -/** - * ext3_discard_reservation() - * @inode: inode - * - * Discard(free) block reservation window on last file close, or truncate - * or at last iput(). - * - * It is being called in three cases: - * ext3_release_file(): last writer close the file - * ext3_clear_inode(): last iput(), when nobody link to this file. - * ext3_truncate(): when the block indirect map is about to change. - * - */ -void ext3_discard_reservation(struct inode *inode) -{ - struct ext3_inode_info *ei = EXT3_I(inode); - struct ext3_block_alloc_info *block_i = ei->i_block_alloc_info; - struct ext3_reserve_window_node *rsv; - spinlock_t *rsv_lock = &EXT3_SB(inode->i_sb)->s_rsv_window_lock; - - if (!block_i) - return; - - rsv = &block_i->rsv_window_node; - if (!rsv_is_empty(&rsv->rsv_window)) { - spin_lock(rsv_lock); - if (!rsv_is_empty(&rsv->rsv_window)) { - trace_ext3_discard_reservation(inode, rsv); - rsv_window_remove(inode->i_sb, rsv); - } - spin_unlock(rsv_lock); - } -} - -/** - * ext3_free_blocks_sb() -- Free given blocks and update quota - * @handle: handle to this transaction - * @sb: super block - * @block: start physical block to free - * @count: number of blocks to free - * @pdquot_freed_blocks: pointer to quota - */ -void ext3_free_blocks_sb(handle_t *handle, struct super_block *sb, - ext3_fsblk_t block, unsigned long count, - unsigned long *pdquot_freed_blocks) -{ - struct buffer_head *bitmap_bh = NULL; - struct buffer_head *gd_bh; - unsigned long block_group; - ext3_grpblk_t bit; - unsigned long i; - unsigned long overflow; - struct ext3_group_desc * desc; - struct ext3_super_block * es; - struct ext3_sb_info *sbi; - int err = 0, ret; - ext3_grpblk_t group_freed; - - *pdquot_freed_blocks = 0; - sbi = EXT3_SB(sb); - es = sbi->s_es; - if (block < le32_to_cpu(es->s_first_data_block) || - block + count < block || - block + count > le32_to_cpu(es->s_blocks_count)) { - ext3_error (sb, "ext3_free_blocks", - "Freeing blocks not in datazone - " - "block = "E3FSBLK", count = %lu", block, count); - goto error_return; - } - - ext3_debug ("freeing block(s) %lu-%lu\n", block, block + count - 1); - -do_more: - overflow = 0; - block_group = (block - le32_to_cpu(es->s_first_data_block)) / - EXT3_BLOCKS_PER_GROUP(sb); - bit = (block - le32_to_cpu(es->s_first_data_block)) % - EXT3_BLOCKS_PER_GROUP(sb); - /* - * Check to see if we are freeing blocks across a group - * boundary. - */ - if (bit + count > EXT3_BLOCKS_PER_GROUP(sb)) { - overflow = bit + count - EXT3_BLOCKS_PER_GROUP(sb); - count -= overflow; - } - brelse(bitmap_bh); - bitmap_bh = read_block_bitmap(sb, block_group); - if (!bitmap_bh) - goto error_return; - desc = ext3_get_group_desc (sb, block_group, &gd_bh); - if (!desc) - goto error_return; - - if (in_range (le32_to_cpu(desc->bg_block_bitmap), block, count) || - in_range (le32_to_cpu(desc->bg_inode_bitmap), block, count) || - in_range (block, le32_to_cpu(desc->bg_inode_table), - sbi->s_itb_per_group) || - in_range (block + count - 1, le32_to_cpu(desc->bg_inode_table), - sbi->s_itb_per_group)) { - ext3_error (sb, "ext3_free_blocks", - "Freeing blocks in system zones - " - "Block = "E3FSBLK", count = %lu", - block, count); - goto error_return; - } - - /* - * We are about to start releasing blocks in the bitmap, - * so we need undo access. - */ - /* @@@ check errors */ - BUFFER_TRACE(bitmap_bh, "getting undo access"); - err = ext3_journal_get_undo_access(handle, bitmap_bh); - if (err) - goto error_return; - - /* - * We are about to modify some metadata. Call the journal APIs - * to unshare ->b_data if a currently-committing transaction is - * using it - */ - BUFFER_TRACE(gd_bh, "get_write_access"); - err = ext3_journal_get_write_access(handle, gd_bh); - if (err) - goto error_return; - - jbd_lock_bh_state(bitmap_bh); - - for (i = 0, group_freed = 0; i < count; i++) { - /* - * An HJ special. This is expensive... - */ -#ifdef CONFIG_JBD_DEBUG - jbd_unlock_bh_state(bitmap_bh); - { - struct buffer_head *debug_bh; - debug_bh = sb_find_get_block(sb, block + i); - if (debug_bh) { - BUFFER_TRACE(debug_bh, "Deleted!"); - if (!bh2jh(bitmap_bh)->b_committed_data) - BUFFER_TRACE(debug_bh, - "No committed data in bitmap"); - BUFFER_TRACE2(debug_bh, bitmap_bh, "bitmap"); - __brelse(debug_bh); - } - } - jbd_lock_bh_state(bitmap_bh); -#endif - if (need_resched()) { - jbd_unlock_bh_state(bitmap_bh); - cond_resched(); - jbd_lock_bh_state(bitmap_bh); - } - /* @@@ This prevents newly-allocated data from being - * freed and then reallocated within the same - * transaction. - * - * Ideally we would want to allow that to happen, but to - * do so requires making journal_forget() capable of - * revoking the queued write of a data block, which - * implies blocking on the journal lock. *forget() - * cannot block due to truncate races. - * - * Eventually we can fix this by making journal_forget() - * return a status indicating whether or not it was able - * to revoke the buffer. On successful revoke, it is - * safe not to set the allocation bit in the committed - * bitmap, because we know that there is no outstanding - * activity on the buffer any more and so it is safe to - * reallocate it. - */ - BUFFER_TRACE(bitmap_bh, "set in b_committed_data"); - J_ASSERT_BH(bitmap_bh, - bh2jh(bitmap_bh)->b_committed_data != NULL); - ext3_set_bit_atomic(sb_bgl_lock(sbi, block_group), bit + i, - bh2jh(bitmap_bh)->b_committed_data); - - /* - * We clear the bit in the bitmap after setting the committed - * data bit, because this is the reverse order to that which - * the allocator uses. - */ - BUFFER_TRACE(bitmap_bh, "clear bit"); - if (!ext3_clear_bit_atomic(sb_bgl_lock(sbi, block_group), - bit + i, bitmap_bh->b_data)) { - jbd_unlock_bh_state(bitmap_bh); - ext3_error(sb, __func__, - "bit already cleared for block "E3FSBLK, - block + i); - jbd_lock_bh_state(bitmap_bh); - BUFFER_TRACE(bitmap_bh, "bit already cleared"); - } else { - group_freed++; - } - } - jbd_unlock_bh_state(bitmap_bh); - - spin_lock(sb_bgl_lock(sbi, block_group)); - le16_add_cpu(&desc->bg_free_blocks_count, group_freed); - spin_unlock(sb_bgl_lock(sbi, block_group)); - percpu_counter_add(&sbi->s_freeblocks_counter, count); - - /* We dirtied the bitmap block */ - BUFFER_TRACE(bitmap_bh, "dirtied bitmap block"); - err = ext3_journal_dirty_metadata(handle, bitmap_bh); - - /* And the group descriptor block */ - BUFFER_TRACE(gd_bh, "dirtied group descriptor block"); - ret = ext3_journal_dirty_metadata(handle, gd_bh); - if (!err) err = ret; - *pdquot_freed_blocks += group_freed; - - if (overflow && !err) { - block += count; - count = overflow; - goto do_more; - } - -error_return: - brelse(bitmap_bh); - ext3_std_error(sb, err); - return; -} - -/** - * ext3_free_blocks() -- Free given blocks and update quota - * @handle: handle for this transaction - * @inode: inode - * @block: start physical block to free - * @count: number of blocks to count - */ -void ext3_free_blocks(handle_t *handle, struct inode *inode, - ext3_fsblk_t block, unsigned long count) -{ - struct super_block *sb = inode->i_sb; - unsigned long dquot_freed_blocks; - - trace_ext3_free_blocks(inode, block, count); - ext3_free_blocks_sb(handle, sb, block, count, &dquot_freed_blocks); - if (dquot_freed_blocks) - dquot_free_block(inode, dquot_freed_blocks); - return; -} - -/** - * ext3_test_allocatable() - * @nr: given allocation block group - * @bh: bufferhead contains the bitmap of the given block group - * - * For ext3 allocations, we must not reuse any blocks which are - * allocated in the bitmap buffer's "last committed data" copy. This - * prevents deletes from freeing up the page for reuse until we have - * committed the delete transaction. - * - * If we didn't do this, then deleting something and reallocating it as - * data would allow the old block to be overwritten before the - * transaction committed (because we force data to disk before commit). - * This would lead to corruption if we crashed between overwriting the - * data and committing the delete. - * - * @@@ We may want to make this allocation behaviour conditional on - * data-writes at some point, and disable it for metadata allocations or - * sync-data inodes. - */ -static int ext3_test_allocatable(ext3_grpblk_t nr, struct buffer_head *bh) -{ - int ret; - struct journal_head *jh = bh2jh(bh); - - if (ext3_test_bit(nr, bh->b_data)) - return 0; - - jbd_lock_bh_state(bh); - if (!jh->b_committed_data) - ret = 1; - else - ret = !ext3_test_bit(nr, jh->b_committed_data); - jbd_unlock_bh_state(bh); - return ret; -} - -/** - * bitmap_search_next_usable_block() - * @start: the starting block (group relative) of the search - * @bh: bufferhead contains the block group bitmap - * @maxblocks: the ending block (group relative) of the reservation - * - * The bitmap search --- search forward alternately through the actual - * bitmap on disk and the last-committed copy in journal, until we find a - * bit free in both bitmaps. - */ -static ext3_grpblk_t -bitmap_search_next_usable_block(ext3_grpblk_t start, struct buffer_head *bh, - ext3_grpblk_t maxblocks) -{ - ext3_grpblk_t next; - struct journal_head *jh = bh2jh(bh); - - while (start < maxblocks) { - next = ext3_find_next_zero_bit(bh->b_data, maxblocks, start); - if (next >= maxblocks) - return -1; - if (ext3_test_allocatable(next, bh)) - return next; - jbd_lock_bh_state(bh); - if (jh->b_committed_data) - start = ext3_find_next_zero_bit(jh->b_committed_data, - maxblocks, next); - jbd_unlock_bh_state(bh); - } - return -1; -} - -/** - * find_next_usable_block() - * @start: the starting block (group relative) to find next - * allocatable block in bitmap. - * @bh: bufferhead contains the block group bitmap - * @maxblocks: the ending block (group relative) for the search - * - * Find an allocatable block in a bitmap. We honor both the bitmap and - * its last-committed copy (if that exists), and perform the "most - * appropriate allocation" algorithm of looking for a free block near - * the initial goal; then for a free byte somewhere in the bitmap; then - * for any free bit in the bitmap. - */ -static ext3_grpblk_t -find_next_usable_block(ext3_grpblk_t start, struct buffer_head *bh, - ext3_grpblk_t maxblocks) -{ - ext3_grpblk_t here, next; - char *p, *r; - - if (start > 0) { - /* - * The goal was occupied; search forward for a free - * block within the next XX blocks. - * - * end_goal is more or less random, but it has to be - * less than EXT3_BLOCKS_PER_GROUP. Aligning up to the - * next 64-bit boundary is simple.. - */ - ext3_grpblk_t end_goal = (start + 63) & ~63; - if (end_goal > maxblocks) - end_goal = maxblocks; - here = ext3_find_next_zero_bit(bh->b_data, end_goal, start); - if (here < end_goal && ext3_test_allocatable(here, bh)) - return here; - ext3_debug("Bit not found near goal\n"); - } - - here = start; - if (here < 0) - here = 0; - - p = bh->b_data + (here >> 3); - r = memscan(p, 0, ((maxblocks + 7) >> 3) - (here >> 3)); - next = (r - bh->b_data) << 3; - - if (next < maxblocks && next >= start && ext3_test_allocatable(next, bh)) - return next; - - /* - * The bitmap search --- search forward alternately through the actual - * bitmap and the last-committed copy until we find a bit free in - * both - */ - here = bitmap_search_next_usable_block(here, bh, maxblocks); - return here; -} - -/** - * claim_block() - * @lock: the spin lock for this block group - * @block: the free block (group relative) to allocate - * @bh: the buffer_head contains the block group bitmap - * - * We think we can allocate this block in this bitmap. Try to set the bit. - * If that succeeds then check that nobody has allocated and then freed the - * block since we saw that is was not marked in b_committed_data. If it _was_ - * allocated and freed then clear the bit in the bitmap again and return - * zero (failure). - */ -static inline int -claim_block(spinlock_t *lock, ext3_grpblk_t block, struct buffer_head *bh) -{ - struct journal_head *jh = bh2jh(bh); - int ret; - - if (ext3_set_bit_atomic(lock, block, bh->b_data)) - return 0; - jbd_lock_bh_state(bh); - if (jh->b_committed_data && ext3_test_bit(block,jh->b_committed_data)) { - ext3_clear_bit_atomic(lock, block, bh->b_data); - ret = 0; - } else { - ret = 1; - } - jbd_unlock_bh_state(bh); - return ret; -} - -/** - * ext3_try_to_allocate() - * @sb: superblock - * @handle: handle to this transaction - * @group: given allocation block group - * @bitmap_bh: bufferhead holds the block bitmap - * @grp_goal: given target block within the group - * @count: target number of blocks to allocate - * @my_rsv: reservation window - * - * Attempt to allocate blocks within a give range. Set the range of allocation - * first, then find the first free bit(s) from the bitmap (within the range), - * and at last, allocate the blocks by claiming the found free bit as allocated. - * - * To set the range of this allocation: - * if there is a reservation window, only try to allocate block(s) from the - * file's own reservation window; - * Otherwise, the allocation range starts from the give goal block, ends at - * the block group's last block. - * - * If we failed to allocate the desired block then we may end up crossing to a - * new bitmap. In that case we must release write access to the old one via - * ext3_journal_release_buffer(), else we'll run out of credits. - */ -static ext3_grpblk_t -ext3_try_to_allocate(struct super_block *sb, handle_t *handle, int group, - struct buffer_head *bitmap_bh, ext3_grpblk_t grp_goal, - unsigned long *count, struct ext3_reserve_window *my_rsv) -{ - ext3_fsblk_t group_first_block; - ext3_grpblk_t start, end; - unsigned long num = 0; - - /* we do allocation within the reservation window if we have a window */ - if (my_rsv) { - group_first_block = ext3_group_first_block_no(sb, group); - if (my_rsv->_rsv_start >= group_first_block) - start = my_rsv->_rsv_start - group_first_block; - else - /* reservation window cross group boundary */ - start = 0; - end = my_rsv->_rsv_end - group_first_block + 1; - if (end > EXT3_BLOCKS_PER_GROUP(sb)) - /* reservation window crosses group boundary */ - end = EXT3_BLOCKS_PER_GROUP(sb); - if ((start <= grp_goal) && (grp_goal < end)) - start = grp_goal; - else - grp_goal = -1; - } else { - if (grp_goal > 0) - start = grp_goal; - else - start = 0; - end = EXT3_BLOCKS_PER_GROUP(sb); - } - - BUG_ON(start > EXT3_BLOCKS_PER_GROUP(sb)); - -repeat: - if (grp_goal < 0 || !ext3_test_allocatable(grp_goal, bitmap_bh)) { - grp_goal = find_next_usable_block(start, bitmap_bh, end); - if (grp_goal < 0) - goto fail_access; - if (!my_rsv) { - int i; - - for (i = 0; i < 7 && grp_goal > start && - ext3_test_allocatable(grp_goal - 1, - bitmap_bh); - i++, grp_goal--) - ; - } - } - start = grp_goal; - - if (!claim_block(sb_bgl_lock(EXT3_SB(sb), group), - grp_goal, bitmap_bh)) { - /* - * The block was allocated by another thread, or it was - * allocated and then freed by another thread - */ - start++; - grp_goal++; - if (start >= end) - goto fail_access; - goto repeat; - } - num++; - grp_goal++; - while (num < *count && grp_goal < end - && ext3_test_allocatable(grp_goal, bitmap_bh) - && claim_block(sb_bgl_lock(EXT3_SB(sb), group), - grp_goal, bitmap_bh)) { - num++; - grp_goal++; - } - *count = num; - return grp_goal - num; -fail_access: - *count = num; - return -1; -} - -/** - * find_next_reservable_window(): - * find a reservable space within the given range. - * It does not allocate the reservation window for now: - * alloc_new_reservation() will do the work later. - * - * @search_head: the head of the searching list; - * This is not necessarily the list head of the whole filesystem - * - * We have both head and start_block to assist the search - * for the reservable space. The list starts from head, - * but we will shift to the place where start_block is, - * then start from there, when looking for a reservable space. - * - * @my_rsv: the reservation window - * - * @sb: the super block - * - * @start_block: the first block we consider to start - * the real search from - * - * @last_block: - * the maximum block number that our goal reservable space - * could start from. This is normally the last block in this - * group. The search will end when we found the start of next - * possible reservable space is out of this boundary. - * This could handle the cross boundary reservation window - * request. - * - * basically we search from the given range, rather than the whole - * reservation double linked list, (start_block, last_block) - * to find a free region that is of my size and has not - * been reserved. - * - */ -static int find_next_reservable_window( - struct ext3_reserve_window_node *search_head, - struct ext3_reserve_window_node *my_rsv, - struct super_block * sb, - ext3_fsblk_t start_block, - ext3_fsblk_t last_block) -{ - struct rb_node *next; - struct ext3_reserve_window_node *rsv, *prev; - ext3_fsblk_t cur; - int size = my_rsv->rsv_goal_size; - - /* TODO: make the start of the reservation window byte-aligned */ - /* cur = *start_block & ~7;*/ - cur = start_block; - rsv = search_head; - if (!rsv) - return -1; - - while (1) { - if (cur <= rsv->rsv_end) - cur = rsv->rsv_end + 1; - - /* TODO? - * in the case we could not find a reservable space - * that is what is expected, during the re-search, we could - * remember what's the largest reservable space we could have - * and return that one. - * - * For now it will fail if we could not find the reservable - * space with expected-size (or more)... - */ - if (cur > last_block) - return -1; /* fail */ - - prev = rsv; - next = rb_next(&rsv->rsv_node); - rsv = rb_entry(next,struct ext3_reserve_window_node,rsv_node); - - /* - * Reached the last reservation, we can just append to the - * previous one. - */ - if (!next) - break; - - if (cur + size <= rsv->rsv_start) { - /* - * Found a reserveable space big enough. We could - * have a reservation across the group boundary here - */ - break; - } - } - /* - * we come here either : - * when we reach the end of the whole list, - * and there is empty reservable space after last entry in the list. - * append it to the end of the list. - * - * or we found one reservable space in the middle of the list, - * return the reservation window that we could append to. - * succeed. - */ - - if ((prev != my_rsv) && (!rsv_is_empty(&my_rsv->rsv_window))) - rsv_window_remove(sb, my_rsv); - - /* - * Let's book the whole available window for now. We will check the - * disk bitmap later and then, if there are free blocks then we adjust - * the window size if it's larger than requested. - * Otherwise, we will remove this node from the tree next time - * call find_next_reservable_window. - */ - my_rsv->rsv_start = cur; - my_rsv->rsv_end = cur + size - 1; - my_rsv->rsv_alloc_hit = 0; - - if (prev != my_rsv) - ext3_rsv_window_add(sb, my_rsv); - - return 0; -} - -/** - * alloc_new_reservation()--allocate a new reservation window - * - * To make a new reservation, we search part of the filesystem - * reservation list (the list that inside the group). We try to - * allocate a new reservation window near the allocation goal, - * or the beginning of the group, if there is no goal. - * - * We first find a reservable space after the goal, then from - * there, we check the bitmap for the first free block after - * it. If there is no free block until the end of group, then the - * whole group is full, we failed. Otherwise, check if the free - * block is inside the expected reservable space, if so, we - * succeed. - * If the first free block is outside the reservable space, then - * start from the first free block, we search for next available - * space, and go on. - * - * on succeed, a new reservation will be found and inserted into the list - * It contains at least one free block, and it does not overlap with other - * reservation windows. - * - * failed: we failed to find a reservation window in this group - * - * @my_rsv: the reservation window - * - * @grp_goal: The goal (group-relative). It is where the search for a - * free reservable space should start from. - * if we have a grp_goal(grp_goal >0 ), then start from there, - * no grp_goal(grp_goal = -1), we start from the first block - * of the group. - * - * @sb: the super block - * @group: the group we are trying to allocate in - * @bitmap_bh: the block group block bitmap - * - */ -static int alloc_new_reservation(struct ext3_reserve_window_node *my_rsv, - ext3_grpblk_t grp_goal, struct super_block *sb, - unsigned int group, struct buffer_head *bitmap_bh) -{ - struct ext3_reserve_window_node *search_head; - ext3_fsblk_t group_first_block, group_end_block, start_block; - ext3_grpblk_t first_free_block; - struct rb_root *fs_rsv_root = &EXT3_SB(sb)->s_rsv_window_root; - unsigned long size; - int ret; - spinlock_t *rsv_lock = &EXT3_SB(sb)->s_rsv_window_lock; - - group_first_block = ext3_group_first_block_no(sb, group); - group_end_block = group_first_block + (EXT3_BLOCKS_PER_GROUP(sb) - 1); - - if (grp_goal < 0) - start_block = group_first_block; - else - start_block = grp_goal + group_first_block; - - trace_ext3_alloc_new_reservation(sb, start_block); - size = my_rsv->rsv_goal_size; - - if (!rsv_is_empty(&my_rsv->rsv_window)) { - /* - * if the old reservation is cross group boundary - * and if the goal is inside the old reservation window, - * we will come here when we just failed to allocate from - * the first part of the window. We still have another part - * that belongs to the next group. In this case, there is no - * point to discard our window and try to allocate a new one - * in this group(which will fail). we should - * keep the reservation window, just simply move on. - * - * Maybe we could shift the start block of the reservation - * window to the first block of next group. - */ - - if ((my_rsv->rsv_start <= group_end_block) && - (my_rsv->rsv_end > group_end_block) && - (start_block >= my_rsv->rsv_start)) - return -1; - - if ((my_rsv->rsv_alloc_hit > - (my_rsv->rsv_end - my_rsv->rsv_start + 1) / 2)) { - /* - * if the previously allocation hit ratio is - * greater than 1/2, then we double the size of - * the reservation window the next time, - * otherwise we keep the same size window - */ - size = size * 2; - if (size > EXT3_MAX_RESERVE_BLOCKS) - size = EXT3_MAX_RESERVE_BLOCKS; - my_rsv->rsv_goal_size= size; - } - } - - spin_lock(rsv_lock); - /* - * shift the search start to the window near the goal block - */ - search_head = search_reserve_window(fs_rsv_root, start_block); - - /* - * find_next_reservable_window() simply finds a reservable window - * inside the given range(start_block, group_end_block). - * - * To make sure the reservation window has a free bit inside it, we - * need to check the bitmap after we found a reservable window. - */ -retry: - ret = find_next_reservable_window(search_head, my_rsv, sb, - start_block, group_end_block); - - if (ret == -1) { - if (!rsv_is_empty(&my_rsv->rsv_window)) - rsv_window_remove(sb, my_rsv); - spin_unlock(rsv_lock); - return -1; - } - - /* - * On success, find_next_reservable_window() returns the - * reservation window where there is a reservable space after it. - * Before we reserve this reservable space, we need - * to make sure there is at least a free block inside this region. - * - * searching the first free bit on the block bitmap and copy of - * last committed bitmap alternatively, until we found a allocatable - * block. Search start from the start block of the reservable space - * we just found. - */ - spin_unlock(rsv_lock); - first_free_block = bitmap_search_next_usable_block( - my_rsv->rsv_start - group_first_block, - bitmap_bh, group_end_block - group_first_block + 1); - - if (first_free_block < 0) { - /* - * no free block left on the bitmap, no point - * to reserve the space. return failed. - */ - spin_lock(rsv_lock); - if (!rsv_is_empty(&my_rsv->rsv_window)) - rsv_window_remove(sb, my_rsv); - spin_unlock(rsv_lock); - return -1; /* failed */ - } - - start_block = first_free_block + group_first_block; - /* - * check if the first free block is within the - * free space we just reserved - */ - if (start_block >= my_rsv->rsv_start && - start_block <= my_rsv->rsv_end) { - trace_ext3_reserved(sb, start_block, my_rsv); - return 0; /* success */ - } - /* - * if the first free bit we found is out of the reservable space - * continue search for next reservable space, - * start from where the free block is, - * we also shift the list head to where we stopped last time - */ - search_head = my_rsv; - spin_lock(rsv_lock); - goto retry; -} - -/** - * try_to_extend_reservation() - * @my_rsv: given reservation window - * @sb: super block - * @size: the delta to extend - * - * Attempt to expand the reservation window large enough to have - * required number of free blocks - * - * Since ext3_try_to_allocate() will always allocate blocks within - * the reservation window range, if the window size is too small, - * multiple blocks allocation has to stop at the end of the reservation - * window. To make this more efficient, given the total number of - * blocks needed and the current size of the window, we try to - * expand the reservation window size if necessary on a best-effort - * basis before ext3_new_blocks() tries to allocate blocks, - */ -static void try_to_extend_reservation(struct ext3_reserve_window_node *my_rsv, - struct super_block *sb, int size) -{ - struct ext3_reserve_window_node *next_rsv; - struct rb_node *next; - spinlock_t *rsv_lock = &EXT3_SB(sb)->s_rsv_window_lock; - - if (!spin_trylock(rsv_lock)) - return; - - next = rb_next(&my_rsv->rsv_node); - - if (!next) - my_rsv->rsv_end += size; - else { - next_rsv = rb_entry(next, struct ext3_reserve_window_node, rsv_node); - - if ((next_rsv->rsv_start - my_rsv->rsv_end - 1) >= size) - my_rsv->rsv_end += size; - else - my_rsv->rsv_end = next_rsv->rsv_start - 1; - } - spin_unlock(rsv_lock); -} - -/** - * ext3_try_to_allocate_with_rsv() - * @sb: superblock - * @handle: handle to this transaction - * @group: given allocation block group - * @bitmap_bh: bufferhead holds the block bitmap - * @grp_goal: given target block within the group - * @my_rsv: reservation window - * @count: target number of blocks to allocate - * @errp: pointer to store the error code - * - * This is the main function used to allocate a new block and its reservation - * window. - * - * Each time when a new block allocation is need, first try to allocate from - * its own reservation. If it does not have a reservation window, instead of - * looking for a free bit on bitmap first, then look up the reservation list to - * see if it is inside somebody else's reservation window, we try to allocate a - * reservation window for it starting from the goal first. Then do the block - * allocation within the reservation window. - * - * This will avoid keeping on searching the reservation list again and - * again when somebody is looking for a free block (without - * reservation), and there are lots of free blocks, but they are all - * being reserved. - * - * We use a red-black tree for the per-filesystem reservation list. - * - */ -static ext3_grpblk_t -ext3_try_to_allocate_with_rsv(struct super_block *sb, handle_t *handle, - unsigned int group, struct buffer_head *bitmap_bh, - ext3_grpblk_t grp_goal, - struct ext3_reserve_window_node * my_rsv, - unsigned long *count, int *errp) -{ - ext3_fsblk_t group_first_block, group_last_block; - ext3_grpblk_t ret = 0; - int fatal; - unsigned long num = *count; - - *errp = 0; - - /* - * Make sure we use undo access for the bitmap, because it is critical - * that we do the frozen_data COW on bitmap buffers in all cases even - * if the buffer is in BJ_Forget state in the committing transaction. - */ - BUFFER_TRACE(bitmap_bh, "get undo access for new block"); - fatal = ext3_journal_get_undo_access(handle, bitmap_bh); - if (fatal) { - *errp = fatal; - return -1; - } - - /* - * we don't deal with reservation when - * filesystem is mounted without reservation - * or the file is not a regular file - * or last attempt to allocate a block with reservation turned on failed - */ - if (my_rsv == NULL ) { - ret = ext3_try_to_allocate(sb, handle, group, bitmap_bh, - grp_goal, count, NULL); - goto out; - } - /* - * grp_goal is a group relative block number (if there is a goal) - * 0 <= grp_goal < EXT3_BLOCKS_PER_GROUP(sb) - * first block is a filesystem wide block number - * first block is the block number of the first block in this group - */ - group_first_block = ext3_group_first_block_no(sb, group); - group_last_block = group_first_block + (EXT3_BLOCKS_PER_GROUP(sb) - 1); - - /* - * Basically we will allocate a new block from inode's reservation - * window. - * - * We need to allocate a new reservation window, if: - * a) inode does not have a reservation window; or - * b) last attempt to allocate a block from existing reservation - * failed; or - * c) we come here with a goal and with a reservation window - * - * We do not need to allocate a new reservation window if we come here - * at the beginning with a goal and the goal is inside the window, or - * we don't have a goal but already have a reservation window. - * then we could go to allocate from the reservation window directly. - */ - while (1) { - if (rsv_is_empty(&my_rsv->rsv_window) || (ret < 0) || - !goal_in_my_reservation(&my_rsv->rsv_window, - grp_goal, group, sb)) { - if (my_rsv->rsv_goal_size < *count) - my_rsv->rsv_goal_size = *count; - ret = alloc_new_reservation(my_rsv, grp_goal, sb, - group, bitmap_bh); - if (ret < 0) - break; /* failed */ - - if (!goal_in_my_reservation(&my_rsv->rsv_window, - grp_goal, group, sb)) - grp_goal = -1; - } else if (grp_goal >= 0) { - int curr = my_rsv->rsv_end - - (grp_goal + group_first_block) + 1; - - if (curr < *count) - try_to_extend_reservation(my_rsv, sb, - *count - curr); - } - - if ((my_rsv->rsv_start > group_last_block) || - (my_rsv->rsv_end < group_first_block)) { - rsv_window_dump(&EXT3_SB(sb)->s_rsv_window_root, 1); - BUG(); - } - ret = ext3_try_to_allocate(sb, handle, group, bitmap_bh, - grp_goal, &num, &my_rsv->rsv_window); - if (ret >= 0) { - my_rsv->rsv_alloc_hit += num; - *count = num; - break; /* succeed */ - } - num = *count; - } -out: - if (ret >= 0) { - BUFFER_TRACE(bitmap_bh, "journal_dirty_metadata for " - "bitmap block"); - fatal = ext3_journal_dirty_metadata(handle, bitmap_bh); - if (fatal) { - *errp = fatal; - return -1; - } - return ret; - } - - BUFFER_TRACE(bitmap_bh, "journal_release_buffer"); - ext3_journal_release_buffer(handle, bitmap_bh); - return ret; -} - -/** - * ext3_has_free_blocks() - * @sbi: in-core super block structure. - * - * Check if filesystem has at least 1 free block available for allocation. - */ -static int ext3_has_free_blocks(struct ext3_sb_info *sbi, int use_reservation) -{ - ext3_fsblk_t free_blocks, root_blocks; - - free_blocks = percpu_counter_read_positive(&sbi->s_freeblocks_counter); - root_blocks = le32_to_cpu(sbi->s_es->s_r_blocks_count); - if (free_blocks < root_blocks + 1 && !capable(CAP_SYS_RESOURCE) && - !use_reservation && !uid_eq(sbi->s_resuid, current_fsuid()) && - (gid_eq(sbi->s_resgid, GLOBAL_ROOT_GID) || - !in_group_p (sbi->s_resgid))) { - return 0; - } - return 1; -} - -/** - * ext3_should_retry_alloc() - * @sb: super block - * @retries number of attemps has been made - * - * ext3_should_retry_alloc() is called when ENOSPC is returned, and if - * it is profitable to retry the operation, this function will wait - * for the current or committing transaction to complete, and then - * return TRUE. - * - * if the total number of retries exceed three times, return FALSE. - */ -int ext3_should_retry_alloc(struct super_block *sb, int *retries) -{ - if (!ext3_has_free_blocks(EXT3_SB(sb), 0) || (*retries)++ > 3) - return 0; - - jbd_debug(1, "%s: retrying operation after ENOSPC\n", sb->s_id); - - return journal_force_commit_nested(EXT3_SB(sb)->s_journal); -} - -/** - * ext3_new_blocks() -- core block(s) allocation function - * @handle: handle to this transaction - * @inode: file inode - * @goal: given target block(filesystem wide) - * @count: target number of blocks to allocate - * @errp: error code - * - * ext3_new_blocks uses a goal block to assist allocation. It tries to - * allocate block(s) from the block group contains the goal block first. If that - * fails, it will try to allocate block(s) from other block groups without - * any specific goal block. - * - */ -ext3_fsblk_t ext3_new_blocks(handle_t *handle, struct inode *inode, - ext3_fsblk_t goal, unsigned long *count, int *errp) -{ - struct buffer_head *bitmap_bh = NULL; - struct buffer_head *gdp_bh; - int group_no; - int goal_group; - ext3_grpblk_t grp_target_blk; /* blockgroup relative goal block */ - ext3_grpblk_t grp_alloc_blk; /* blockgroup-relative allocated block*/ - ext3_fsblk_t ret_block; /* filesyetem-wide allocated block */ - int bgi; /* blockgroup iteration index */ - int fatal = 0, err; - int performed_allocation = 0; - ext3_grpblk_t free_blocks; /* number of free blocks in a group */ - struct super_block *sb; - struct ext3_group_desc *gdp; - struct ext3_super_block *es; - struct ext3_sb_info *sbi; - struct ext3_reserve_window_node *my_rsv = NULL; - struct ext3_block_alloc_info *block_i; - unsigned short windowsz = 0; -#ifdef EXT3FS_DEBUG - static int goal_hits, goal_attempts; -#endif - unsigned long ngroups; - unsigned long num = *count; - - *errp = -ENOSPC; - sb = inode->i_sb; - - /* - * Check quota for allocation of this block. - */ - err = dquot_alloc_block(inode, num); - if (err) { - *errp = err; - return 0; - } - - trace_ext3_request_blocks(inode, goal, num); - - sbi = EXT3_SB(sb); - es = sbi->s_es; - ext3_debug("goal=%lu.\n", goal); - /* - * Allocate a block from reservation only when - * filesystem is mounted with reservation(default,-o reservation), and - * it's a regular file, and - * the desired window size is greater than 0 (One could use ioctl - * command EXT3_IOC_SETRSVSZ to set the window size to 0 to turn off - * reservation on that particular file) - */ - block_i = EXT3_I(inode)->i_block_alloc_info; - if (block_i && ((windowsz = block_i->rsv_window_node.rsv_goal_size) > 0)) - my_rsv = &block_i->rsv_window_node; - - if (!ext3_has_free_blocks(sbi, IS_NOQUOTA(inode))) { - *errp = -ENOSPC; - goto out; - } - - /* - * First, test whether the goal block is free. - */ - if (goal < le32_to_cpu(es->s_first_data_block) || - goal >= le32_to_cpu(es->s_blocks_count)) - goal = le32_to_cpu(es->s_first_data_block); - group_no = (goal - le32_to_cpu(es->s_first_data_block)) / - EXT3_BLOCKS_PER_GROUP(sb); - goal_group = group_no; -retry_alloc: - gdp = ext3_get_group_desc(sb, group_no, &gdp_bh); - if (!gdp) - goto io_error; - - free_blocks = le16_to_cpu(gdp->bg_free_blocks_count); - /* - * if there is not enough free blocks to make a new resevation - * turn off reservation for this allocation - */ - if (my_rsv && (free_blocks < windowsz) - && (free_blocks > 0) - && (rsv_is_empty(&my_rsv->rsv_window))) - my_rsv = NULL; - - if (free_blocks > 0) { - grp_target_blk = ((goal - le32_to_cpu(es->s_first_data_block)) % - EXT3_BLOCKS_PER_GROUP(sb)); - bitmap_bh = read_block_bitmap(sb, group_no); - if (!bitmap_bh) - goto io_error; - grp_alloc_blk = ext3_try_to_allocate_with_rsv(sb, handle, - group_no, bitmap_bh, grp_target_blk, - my_rsv, &num, &fatal); - if (fatal) - goto out; - if (grp_alloc_blk >= 0) - goto allocated; - } - - ngroups = EXT3_SB(sb)->s_groups_count; - smp_rmb(); - - /* - * Now search the rest of the groups. We assume that - * group_no and gdp correctly point to the last group visited. - */ - for (bgi = 0; bgi < ngroups; bgi++) { - group_no++; - if (group_no >= ngroups) - group_no = 0; - gdp = ext3_get_group_desc(sb, group_no, &gdp_bh); - if (!gdp) - goto io_error; - free_blocks = le16_to_cpu(gdp->bg_free_blocks_count); - /* - * skip this group (and avoid loading bitmap) if there - * are no free blocks - */ - if (!free_blocks) - continue; - /* - * skip this group if the number of - * free blocks is less than half of the reservation - * window size. - */ - if (my_rsv && (free_blocks <= (windowsz/2))) - continue; - - brelse(bitmap_bh); - bitmap_bh = read_block_bitmap(sb, group_no); - if (!bitmap_bh) - goto io_error; - /* - * try to allocate block(s) from this group, without a goal(-1). - */ - grp_alloc_blk = ext3_try_to_allocate_with_rsv(sb, handle, - group_no, bitmap_bh, -1, my_rsv, - &num, &fatal); - if (fatal) - goto out; - if (grp_alloc_blk >= 0) - goto allocated; - } - /* - * We may end up a bogus earlier ENOSPC error due to - * filesystem is "full" of reservations, but - * there maybe indeed free blocks available on disk - * In this case, we just forget about the reservations - * just do block allocation as without reservations. - */ - if (my_rsv) { - my_rsv = NULL; - windowsz = 0; - group_no = goal_group; - goto retry_alloc; - } - /* No space left on the device */ - *errp = -ENOSPC; - goto out; - -allocated: - - ext3_debug("using block group %d(%d)\n", - group_no, gdp->bg_free_blocks_count); - - BUFFER_TRACE(gdp_bh, "get_write_access"); - fatal = ext3_journal_get_write_access(handle, gdp_bh); - if (fatal) - goto out; - - ret_block = grp_alloc_blk + ext3_group_first_block_no(sb, group_no); - - if (in_range(le32_to_cpu(gdp->bg_block_bitmap), ret_block, num) || - in_range(le32_to_cpu(gdp->bg_inode_bitmap), ret_block, num) || - in_range(ret_block, le32_to_cpu(gdp->bg_inode_table), - EXT3_SB(sb)->s_itb_per_group) || - in_range(ret_block + num - 1, le32_to_cpu(gdp->bg_inode_table), - EXT3_SB(sb)->s_itb_per_group)) { - ext3_error(sb, "ext3_new_block", - "Allocating block in system zone - " - "blocks from "E3FSBLK", length %lu", - ret_block, num); - /* - * claim_block() marked the blocks we allocated as in use. So we - * may want to selectively mark some of the blocks as free. - */ - goto retry_alloc; - } - - performed_allocation = 1; - -#ifdef CONFIG_JBD_DEBUG - { - struct buffer_head *debug_bh; - - /* Record bitmap buffer state in the newly allocated block */ - debug_bh = sb_find_get_block(sb, ret_block); - if (debug_bh) { - BUFFER_TRACE(debug_bh, "state when allocated"); - BUFFER_TRACE2(debug_bh, bitmap_bh, "bitmap state"); - brelse(debug_bh); - } - } - jbd_lock_bh_state(bitmap_bh); - spin_lock(sb_bgl_lock(sbi, group_no)); - if (buffer_jbd(bitmap_bh) && bh2jh(bitmap_bh)->b_committed_data) { - int i; - - for (i = 0; i < num; i++) { - if (ext3_test_bit(grp_alloc_blk+i, - bh2jh(bitmap_bh)->b_committed_data)) { - printk("%s: block was unexpectedly set in " - "b_committed_data\n", __func__); - } - } - } - ext3_debug("found bit %d\n", grp_alloc_blk); - spin_unlock(sb_bgl_lock(sbi, group_no)); - jbd_unlock_bh_state(bitmap_bh); -#endif - - if (ret_block + num - 1 >= le32_to_cpu(es->s_blocks_count)) { - ext3_error(sb, "ext3_new_block", - "block("E3FSBLK") >= blocks count(%d) - " - "block_group = %d, es == %p ", ret_block, - le32_to_cpu(es->s_blocks_count), group_no, es); - goto out; - } - - /* - * It is up to the caller to add the new buffer to a journal - * list of some description. We don't know in advance whether - * the caller wants to use it as metadata or data. - */ - ext3_debug("allocating block %lu. Goal hits %d of %d.\n", - ret_block, goal_hits, goal_attempts); - - spin_lock(sb_bgl_lock(sbi, group_no)); - le16_add_cpu(&gdp->bg_free_blocks_count, -num); - spin_unlock(sb_bgl_lock(sbi, group_no)); - percpu_counter_sub(&sbi->s_freeblocks_counter, num); - - BUFFER_TRACE(gdp_bh, "journal_dirty_metadata for group descriptor"); - fatal = ext3_journal_dirty_metadata(handle, gdp_bh); - if (fatal) - goto out; - - *errp = 0; - brelse(bitmap_bh); - - if (num < *count) { - dquot_free_block(inode, *count-num); - *count = num; - } - - trace_ext3_allocate_blocks(inode, goal, num, - (unsigned long long)ret_block); - - return ret_block; - -io_error: - *errp = -EIO; -out: - if (fatal) { - *errp = fatal; - ext3_std_error(sb, fatal); - } - /* - * Undo the block allocation - */ - if (!performed_allocation) - dquot_free_block(inode, *count); - brelse(bitmap_bh); - return 0; -} - -ext3_fsblk_t ext3_new_block(handle_t *handle, struct inode *inode, - ext3_fsblk_t goal, int *errp) -{ - unsigned long count = 1; - - return ext3_new_blocks(handle, inode, goal, &count, errp); -} - -/** - * ext3_count_free_blocks() -- count filesystem free blocks - * @sb: superblock - * - * Adds up the number of free blocks from each block group. - */ -ext3_fsblk_t ext3_count_free_blocks(struct super_block *sb) -{ - ext3_fsblk_t desc_count; - struct ext3_group_desc *gdp; - int i; - unsigned long ngroups = EXT3_SB(sb)->s_groups_count; -#ifdef EXT3FS_DEBUG - struct ext3_super_block *es; - ext3_fsblk_t bitmap_count; - unsigned long x; - struct buffer_head *bitmap_bh = NULL; - - es = EXT3_SB(sb)->s_es; - desc_count = 0; - bitmap_count = 0; - gdp = NULL; - - smp_rmb(); - for (i = 0; i < ngroups; i++) { - gdp = ext3_get_group_desc(sb, i, NULL); - if (!gdp) - continue; - desc_count += le16_to_cpu(gdp->bg_free_blocks_count); - brelse(bitmap_bh); - bitmap_bh = read_block_bitmap(sb, i); - if (bitmap_bh == NULL) - continue; - - x = ext3_count_free(bitmap_bh, sb->s_blocksize); - printk("group %d: stored = %d, counted = %lu\n", - i, le16_to_cpu(gdp->bg_free_blocks_count), x); - bitmap_count += x; - } - brelse(bitmap_bh); - printk("ext3_count_free_blocks: stored = "E3FSBLK - ", computed = "E3FSBLK", "E3FSBLK"\n", - (ext3_fsblk_t)le32_to_cpu(es->s_free_blocks_count), - desc_count, bitmap_count); - return bitmap_count; -#else - desc_count = 0; - smp_rmb(); - for (i = 0; i < ngroups; i++) { - gdp = ext3_get_group_desc(sb, i, NULL); - if (!gdp) - continue; - desc_count += le16_to_cpu(gdp->bg_free_blocks_count); - } - - return desc_count; -#endif -} - -static inline int test_root(int a, int b) -{ - int num = b; - - while (a > num) - num *= b; - return num == a; -} - -static int ext3_group_sparse(int group) -{ - if (group <= 1) - return 1; - if (!(group & 1)) - return 0; - return (test_root(group, 7) || test_root(group, 5) || - test_root(group, 3)); -} - -/** - * ext3_bg_has_super - number of blocks used by the superblock in group - * @sb: superblock for filesystem - * @group: group number to check - * - * Return the number of blocks used by the superblock (primary or backup) - * in this group. Currently this will be only 0 or 1. - */ -int ext3_bg_has_super(struct super_block *sb, int group) -{ - if (EXT3_HAS_RO_COMPAT_FEATURE(sb, - EXT3_FEATURE_RO_COMPAT_SPARSE_SUPER) && - !ext3_group_sparse(group)) - return 0; - return 1; -} - -static unsigned long ext3_bg_num_gdb_meta(struct super_block *sb, int group) -{ - unsigned long metagroup = group / EXT3_DESC_PER_BLOCK(sb); - unsigned long first = metagroup * EXT3_DESC_PER_BLOCK(sb); - unsigned long last = first + EXT3_DESC_PER_BLOCK(sb) - 1; - - if (group == first || group == first + 1 || group == last) - return 1; - return 0; -} - -static unsigned long ext3_bg_num_gdb_nometa(struct super_block *sb, int group) -{ - return ext3_bg_has_super(sb, group) ? EXT3_SB(sb)->s_gdb_count : 0; -} - -/** - * ext3_bg_num_gdb - number of blocks used by the group table in group - * @sb: superblock for filesystem - * @group: group number to check - * - * Return the number of blocks used by the group descriptor table - * (primary or backup) in this group. In the future there may be a - * different number of descriptor blocks in each group. - */ -unsigned long ext3_bg_num_gdb(struct super_block *sb, int group) -{ - unsigned long first_meta_bg = - le32_to_cpu(EXT3_SB(sb)->s_es->s_first_meta_bg); - unsigned long metagroup = group / EXT3_DESC_PER_BLOCK(sb); - - if (!EXT3_HAS_INCOMPAT_FEATURE(sb,EXT3_FEATURE_INCOMPAT_META_BG) || - metagroup < first_meta_bg) - return ext3_bg_num_gdb_nometa(sb,group); - - return ext3_bg_num_gdb_meta(sb,group); - -} - -/** - * ext3_trim_all_free -- function to trim all free space in alloc. group - * @sb: super block for file system - * @group: allocation group to trim - * @start: first group block to examine - * @max: last group block to examine - * @gdp: allocation group description structure - * @minblocks: minimum extent block count - * - * ext3_trim_all_free walks through group's block bitmap searching for free - * blocks. When the free block is found, it tries to allocate this block and - * consequent free block to get the biggest free extent possible, until it - * reaches any used block. Then issue a TRIM command on this extent and free - * the extent in the block bitmap. This is done until whole group is scanned. - */ -static ext3_grpblk_t ext3_trim_all_free(struct super_block *sb, - unsigned int group, - ext3_grpblk_t start, ext3_grpblk_t max, - ext3_grpblk_t minblocks) -{ - handle_t *handle; - ext3_grpblk_t next, free_blocks, bit, freed, count = 0; - ext3_fsblk_t discard_block; - struct ext3_sb_info *sbi; - struct buffer_head *gdp_bh, *bitmap_bh = NULL; - struct ext3_group_desc *gdp; - int err = 0, ret = 0; - - /* - * We will update one block bitmap, and one group descriptor - */ - handle = ext3_journal_start_sb(sb, 2); - if (IS_ERR(handle)) - return PTR_ERR(handle); - - bitmap_bh = read_block_bitmap(sb, group); - if (!bitmap_bh) { - err = -EIO; - goto err_out; - } - - BUFFER_TRACE(bitmap_bh, "getting undo access"); - err = ext3_journal_get_undo_access(handle, bitmap_bh); - if (err) - goto err_out; - - gdp = ext3_get_group_desc(sb, group, &gdp_bh); - if (!gdp) { - err = -EIO; - goto err_out; - } - - BUFFER_TRACE(gdp_bh, "get_write_access"); - err = ext3_journal_get_write_access(handle, gdp_bh); - if (err) - goto err_out; - - free_blocks = le16_to_cpu(gdp->bg_free_blocks_count); - sbi = EXT3_SB(sb); - - /* Walk through the whole group */ - while (start <= max) { - start = bitmap_search_next_usable_block(start, bitmap_bh, max); - if (start < 0) - break; - next = start; - - /* - * Allocate contiguous free extents by setting bits in the - * block bitmap - */ - while (next <= max - && claim_block(sb_bgl_lock(sbi, group), - next, bitmap_bh)) { - next++; - } - - /* We did not claim any blocks */ - if (next == start) - continue; - - discard_block = (ext3_fsblk_t)start + - ext3_group_first_block_no(sb, group); - - /* Update counters */ - spin_lock(sb_bgl_lock(sbi, group)); - le16_add_cpu(&gdp->bg_free_blocks_count, start - next); - spin_unlock(sb_bgl_lock(sbi, group)); - percpu_counter_sub(&sbi->s_freeblocks_counter, next - start); - - free_blocks -= next - start; - /* Do not issue a TRIM on extents smaller than minblocks */ - if ((next - start) < minblocks) - goto free_extent; - - trace_ext3_discard_blocks(sb, discard_block, next - start); - /* Send the TRIM command down to the device */ - err = sb_issue_discard(sb, discard_block, next - start, - GFP_NOFS, 0); - count += (next - start); -free_extent: - freed = 0; - - /* - * Clear bits in the bitmap - */ - for (bit = start; bit < next; bit++) { - BUFFER_TRACE(bitmap_bh, "clear bit"); - if (!ext3_clear_bit_atomic(sb_bgl_lock(sbi, group), - bit, bitmap_bh->b_data)) { - ext3_error(sb, __func__, - "bit already cleared for block "E3FSBLK, - (unsigned long)bit); - BUFFER_TRACE(bitmap_bh, "bit already cleared"); - } else { - freed++; - } - } - - /* Update couters */ - spin_lock(sb_bgl_lock(sbi, group)); - le16_add_cpu(&gdp->bg_free_blocks_count, freed); - spin_unlock(sb_bgl_lock(sbi, group)); - percpu_counter_add(&sbi->s_freeblocks_counter, freed); - - start = next; - if (err < 0) { - if (err != -EOPNOTSUPP) - ext3_warning(sb, __func__, "Discard command " - "returned error %d\n", err); - break; - } - - if (fatal_signal_pending(current)) { - err = -ERESTARTSYS; - break; - } - - cond_resched(); - - /* No more suitable extents */ - if (free_blocks < minblocks) - break; - } - - /* We dirtied the bitmap block */ - BUFFER_TRACE(bitmap_bh, "dirtied bitmap block"); - ret = ext3_journal_dirty_metadata(handle, bitmap_bh); - if (!err) - err = ret; - - /* And the group descriptor block */ - BUFFER_TRACE(gdp_bh, "dirtied group descriptor block"); - ret = ext3_journal_dirty_metadata(handle, gdp_bh); - if (!err) - err = ret; - - ext3_debug("trimmed %d blocks in the group %d\n", - count, group); - -err_out: - if (err) - count = err; - ext3_journal_stop(handle); - brelse(bitmap_bh); - - return count; -} - -/** - * ext3_trim_fs() -- trim ioctl handle function - * @sb: superblock for filesystem - * @start: First Byte to trim - * @len: number of Bytes to trim from start - * @minlen: minimum extent length in Bytes - * - * ext3_trim_fs goes through all allocation groups containing Bytes from - * start to start+len. For each such a group ext3_trim_all_free function - * is invoked to trim all free space. - */ -int ext3_trim_fs(struct super_block *sb, struct fstrim_range *range) -{ - ext3_grpblk_t last_block, first_block; - unsigned long group, first_group, last_group; - struct ext3_group_desc *gdp; - struct ext3_super_block *es = EXT3_SB(sb)->s_es; - uint64_t start, minlen, end, trimmed = 0; - ext3_fsblk_t first_data_blk = - le32_to_cpu(EXT3_SB(sb)->s_es->s_first_data_block); - ext3_fsblk_t max_blks = le32_to_cpu(es->s_blocks_count); - int ret = 0; - - start = range->start >> sb->s_blocksize_bits; - end = start + (range->len >> sb->s_blocksize_bits) - 1; - minlen = range->minlen >> sb->s_blocksize_bits; - - if (minlen > EXT3_BLOCKS_PER_GROUP(sb) || - start >= max_blks || - range->len < sb->s_blocksize) - return -EINVAL; - if (end >= max_blks) - end = max_blks - 1; - if (end <= first_data_blk) - goto out; - if (start < first_data_blk) - start = first_data_blk; - - smp_rmb(); - - /* Determine first and last group to examine based on start and len */ - ext3_get_group_no_and_offset(sb, (ext3_fsblk_t) start, - &first_group, &first_block); - ext3_get_group_no_and_offset(sb, (ext3_fsblk_t) end, - &last_group, &last_block); - - /* end now represents the last block to discard in this group */ - end = EXT3_BLOCKS_PER_GROUP(sb) - 1; - - for (group = first_group; group <= last_group; group++) { - gdp = ext3_get_group_desc(sb, group, NULL); - if (!gdp) - break; - - /* - * For all the groups except the last one, last block will - * always be EXT3_BLOCKS_PER_GROUP(sb)-1, so we only need to - * change it for the last group, note that last_block is - * already computed earlier by ext3_get_group_no_and_offset() - */ - if (group == last_group) - end = last_block; - - if (le16_to_cpu(gdp->bg_free_blocks_count) >= minlen) { - ret = ext3_trim_all_free(sb, group, first_block, - end, minlen); - if (ret < 0) - break; - trimmed += ret; - } - - /* - * For every group except the first one, we are sure - * that the first block to discard will be block #0. - */ - first_block = 0; - } - - if (ret > 0) - ret = 0; - -out: - range->len = trimmed * sb->s_blocksize; - return ret; -} diff --git a/fs/ext3/bitmap.c b/fs/ext3/bitmap.c deleted file mode 100644 index ef9c643e8e9d..000000000000 --- a/fs/ext3/bitmap.c +++ /dev/null @@ -1,20 +0,0 @@ -/* - * linux/fs/ext3/bitmap.c - * - * Copyright (C) 1992, 1993, 1994, 1995 - * Remy Card (card@masi.ibp.fr) - * Laboratoire MASI - Institut Blaise Pascal - * Universite Pierre et Marie Curie (Paris VI) - */ - -#include "ext3.h" - -#ifdef EXT3FS_DEBUG - -unsigned long ext3_count_free (struct buffer_head * map, unsigned int numchars) -{ - return numchars * BITS_PER_BYTE - memweight(map->b_data, numchars); -} - -#endif /* EXT3FS_DEBUG */ - diff --git a/fs/ext3/dir.c b/fs/ext3/dir.c deleted file mode 100644 index 17742eed2c16..000000000000 --- a/fs/ext3/dir.c +++ /dev/null @@ -1,537 +0,0 @@ -/* - * linux/fs/ext3/dir.c - * - * Copyright (C) 1992, 1993, 1994, 1995 - * Remy Card (card@masi.ibp.fr) - * Laboratoire MASI - Institut Blaise Pascal - * Universite Pierre et Marie Curie (Paris VI) - * - * from - * - * linux/fs/minix/dir.c - * - * Copyright (C) 1991, 1992 Linus Torvalds - * - * ext3 directory handling functions - * - * Big-endian to little-endian byte-swapping/bitmaps by - * David S. Miller (davem@caip.rutgers.edu), 1995 - * - * Hash Tree Directory indexing (c) 2001 Daniel Phillips - * - */ - -#include -#include "ext3.h" - -static unsigned char ext3_filetype_table[] = { - DT_UNKNOWN, DT_REG, DT_DIR, DT_CHR, DT_BLK, DT_FIFO, DT_SOCK, DT_LNK -}; - -static int ext3_dx_readdir(struct file *, struct dir_context *); - -static unsigned char get_dtype(struct super_block *sb, int filetype) -{ - if (!EXT3_HAS_INCOMPAT_FEATURE(sb, EXT3_FEATURE_INCOMPAT_FILETYPE) || - (filetype >= EXT3_FT_MAX)) - return DT_UNKNOWN; - - return (ext3_filetype_table[filetype]); -} - -/** - * Check if the given dir-inode refers to an htree-indexed directory - * (or a directory which could potentially get converted to use htree - * indexing). - * - * Return 1 if it is a dx dir, 0 if not - */ -static int is_dx_dir(struct inode *inode) -{ - struct super_block *sb = inode->i_sb; - - if (EXT3_HAS_COMPAT_FEATURE(inode->i_sb, - EXT3_FEATURE_COMPAT_DIR_INDEX) && - ((EXT3_I(inode)->i_flags & EXT3_INDEX_FL) || - ((inode->i_size >> sb->s_blocksize_bits) == 1))) - return 1; - - return 0; -} - -int ext3_check_dir_entry (const char * function, struct inode * dir, - struct ext3_dir_entry_2 * de, - struct buffer_head * bh, - unsigned long offset) -{ - const char * error_msg = NULL; - const int rlen = ext3_rec_len_from_disk(de->rec_len); - - if (unlikely(rlen < EXT3_DIR_REC_LEN(1))) - error_msg = "rec_len is smaller than minimal"; - else if (unlikely(rlen % 4 != 0)) - error_msg = "rec_len % 4 != 0"; - else if (unlikely(rlen < EXT3_DIR_REC_LEN(de->name_len))) - error_msg = "rec_len is too small for name_len"; - else if (unlikely((((char *) de - bh->b_data) + rlen > dir->i_sb->s_blocksize))) - error_msg = "directory entry across blocks"; - else if (unlikely(le32_to_cpu(de->inode) > - le32_to_cpu(EXT3_SB(dir->i_sb)->s_es->s_inodes_count))) - error_msg = "inode out of bounds"; - - if (unlikely(error_msg != NULL)) - ext3_error (dir->i_sb, function, - "bad entry in directory #%lu: %s - " - "offset=%lu, inode=%lu, rec_len=%d, name_len=%d", - dir->i_ino, error_msg, offset, - (unsigned long) le32_to_cpu(de->inode), - rlen, de->name_len); - - return error_msg == NULL ? 1 : 0; -} - -static int ext3_readdir(struct file *file, struct dir_context *ctx) -{ - unsigned long offset; - int i; - struct ext3_dir_entry_2 *de; - int err; - struct inode *inode = file_inode(file); - struct super_block *sb = inode->i_sb; - int dir_has_error = 0; - - if (is_dx_dir(inode)) { - err = ext3_dx_readdir(file, ctx); - if (err != ERR_BAD_DX_DIR) - return err; - /* - * We don't set the inode dirty flag since it's not - * critical that it get flushed back to the disk. - */ - EXT3_I(inode)->i_flags &= ~EXT3_INDEX_FL; - } - offset = ctx->pos & (sb->s_blocksize - 1); - - while (ctx->pos < inode->i_size) { - unsigned long blk = ctx->pos >> EXT3_BLOCK_SIZE_BITS(sb); - struct buffer_head map_bh; - struct buffer_head *bh = NULL; - - map_bh.b_state = 0; - err = ext3_get_blocks_handle(NULL, inode, blk, 1, &map_bh, 0); - if (err > 0) { - pgoff_t index = map_bh.b_blocknr >> - (PAGE_CACHE_SHIFT - inode->i_blkbits); - if (!ra_has_index(&file->f_ra, index)) - page_cache_sync_readahead( - sb->s_bdev->bd_inode->i_mapping, - &file->f_ra, file, - index, 1); - file->f_ra.prev_pos = (loff_t)index << PAGE_CACHE_SHIFT; - bh = ext3_bread(NULL, inode, blk, 0, &err); - } - - /* - * We ignore I/O errors on directories so users have a chance - * of recovering data when there's a bad sector - */ - if (!bh) { - if (!dir_has_error) { - ext3_error(sb, __func__, "directory #%lu " - "contains a hole at offset %lld", - inode->i_ino, ctx->pos); - dir_has_error = 1; - } - /* corrupt size? Maybe no more blocks to read */ - if (ctx->pos > inode->i_blocks << 9) - break; - ctx->pos += sb->s_blocksize - offset; - continue; - } - - /* If the dir block has changed since the last call to - * readdir(2), then we might be pointing to an invalid - * dirent right now. Scan from the start of the block - * to make sure. */ - if (offset && file->f_version != inode->i_version) { - for (i = 0; i < sb->s_blocksize && i < offset; ) { - de = (struct ext3_dir_entry_2 *) - (bh->b_data + i); - /* It's too expensive to do a full - * dirent test each time round this - * loop, but we do have to test at - * least that it is non-zero. A - * failure will be detected in the - * dirent test below. */ - if (ext3_rec_len_from_disk(de->rec_len) < - EXT3_DIR_REC_LEN(1)) - break; - i += ext3_rec_len_from_disk(de->rec_len); - } - offset = i; - ctx->pos = (ctx->pos & ~(sb->s_blocksize - 1)) - | offset; - file->f_version = inode->i_version; - } - - while (ctx->pos < inode->i_size - && offset < sb->s_blocksize) { - de = (struct ext3_dir_entry_2 *) (bh->b_data + offset); - if (!ext3_check_dir_entry ("ext3_readdir", inode, de, - bh, offset)) { - /* On error, skip the to the - next block. */ - ctx->pos = (ctx->pos | - (sb->s_blocksize - 1)) + 1; - break; - } - offset += ext3_rec_len_from_disk(de->rec_len); - if (le32_to_cpu(de->inode)) { - if (!dir_emit(ctx, de->name, de->name_len, - le32_to_cpu(de->inode), - get_dtype(sb, de->file_type))) { - brelse(bh); - return 0; - } - } - ctx->pos += ext3_rec_len_from_disk(de->rec_len); - } - offset = 0; - brelse (bh); - if (ctx->pos < inode->i_size) - if (!dir_relax(inode)) - return 0; - } - return 0; -} - -static inline int is_32bit_api(void) -{ -#ifdef CONFIG_COMPAT - return is_compat_task(); -#else - return (BITS_PER_LONG == 32); -#endif -} - -/* - * These functions convert from the major/minor hash to an f_pos - * value for dx directories - * - * Upper layer (for example NFS) should specify FMODE_32BITHASH or - * FMODE_64BITHASH explicitly. On the other hand, we allow ext3 to be mounted - * directly on both 32-bit and 64-bit nodes, under such case, neither - * FMODE_32BITHASH nor FMODE_64BITHASH is specified. - */ -static inline loff_t hash2pos(struct file *filp, __u32 major, __u32 minor) -{ - if ((filp->f_mode & FMODE_32BITHASH) || - (!(filp->f_mode & FMODE_64BITHASH) && is_32bit_api())) - return major >> 1; - else - return ((__u64)(major >> 1) << 32) | (__u64)minor; -} - -static inline __u32 pos2maj_hash(struct file *filp, loff_t pos) -{ - if ((filp->f_mode & FMODE_32BITHASH) || - (!(filp->f_mode & FMODE_64BITHASH) && is_32bit_api())) - return (pos << 1) & 0xffffffff; - else - return ((pos >> 32) << 1) & 0xffffffff; -} - -static inline __u32 pos2min_hash(struct file *filp, loff_t pos) -{ - if ((filp->f_mode & FMODE_32BITHASH) || - (!(filp->f_mode & FMODE_64BITHASH) && is_32bit_api())) - return 0; - else - return pos & 0xffffffff; -} - -/* - * Return 32- or 64-bit end-of-file for dx directories - */ -static inline loff_t ext3_get_htree_eof(struct file *filp) -{ - if ((filp->f_mode & FMODE_32BITHASH) || - (!(filp->f_mode & FMODE_64BITHASH) && is_32bit_api())) - return EXT3_HTREE_EOF_32BIT; - else - return EXT3_HTREE_EOF_64BIT; -} - - -/* - * ext3_dir_llseek() calls generic_file_llseek[_size]() to handle both - * non-htree and htree directories, where the "offset" is in terms - * of the filename hash value instead of the byte offset. - * - * Because we may return a 64-bit hash that is well beyond s_maxbytes, - * we need to pass the max hash as the maximum allowable offset in - * the htree directory case. - * - * NOTE: offsets obtained *before* ext3_set_inode_flag(dir, EXT3_INODE_INDEX) - * will be invalid once the directory was converted into a dx directory - */ -static loff_t ext3_dir_llseek(struct file *file, loff_t offset, int whence) -{ - struct inode *inode = file->f_mapping->host; - int dx_dir = is_dx_dir(inode); - loff_t htree_max = ext3_get_htree_eof(file); - - if (likely(dx_dir)) - return generic_file_llseek_size(file, offset, whence, - htree_max, htree_max); - else - return generic_file_llseek(file, offset, whence); -} - -/* - * This structure holds the nodes of the red-black tree used to store - * the directory entry in hash order. - */ -struct fname { - __u32 hash; - __u32 minor_hash; - struct rb_node rb_hash; - struct fname *next; - __u32 inode; - __u8 name_len; - __u8 file_type; - char name[0]; -}; - -/* - * This functoin implements a non-recursive way of freeing all of the - * nodes in the red-black tree. - */ -static void free_rb_tree_fname(struct rb_root *root) -{ - struct fname *fname, *next; - - rbtree_postorder_for_each_entry_safe(fname, next, root, rb_hash) - do { - struct fname *old = fname; - fname = fname->next; - kfree(old); - } while (fname); - - *root = RB_ROOT; -} - -static struct dir_private_info *ext3_htree_create_dir_info(struct file *filp, - loff_t pos) -{ - struct dir_private_info *p; - - p = kzalloc(sizeof(struct dir_private_info), GFP_KERNEL); - if (!p) - return NULL; - p->curr_hash = pos2maj_hash(filp, pos); - p->curr_minor_hash = pos2min_hash(filp, pos); - return p; -} - -void ext3_htree_free_dir_info(struct dir_private_info *p) -{ - free_rb_tree_fname(&p->root); - kfree(p); -} - -/* - * Given a directory entry, enter it into the fname rb tree. - */ -int ext3_htree_store_dirent(struct file *dir_file, __u32 hash, - __u32 minor_hash, - struct ext3_dir_entry_2 *dirent) -{ - struct rb_node **p, *parent = NULL; - struct fname * fname, *new_fn; - struct dir_private_info *info; - int len; - - info = (struct dir_private_info *) dir_file->private_data; - p = &info->root.rb_node; - - /* Create and allocate the fname structure */ - len = sizeof(struct fname) + dirent->name_len + 1; - new_fn = kzalloc(len, GFP_KERNEL); - if (!new_fn) - return -ENOMEM; - new_fn->hash = hash; - new_fn->minor_hash = minor_hash; - new_fn->inode = le32_to_cpu(dirent->inode); - new_fn->name_len = dirent->name_len; - new_fn->file_type = dirent->file_type; - memcpy(new_fn->name, dirent->name, dirent->name_len); - new_fn->name[dirent->name_len] = 0; - - while (*p) { - parent = *p; - fname = rb_entry(parent, struct fname, rb_hash); - - /* - * If the hash and minor hash match up, then we put - * them on a linked list. This rarely happens... - */ - if ((new_fn->hash == fname->hash) && - (new_fn->minor_hash == fname->minor_hash)) { - new_fn->next = fname->next; - fname->next = new_fn; - return 0; - } - - if (new_fn->hash < fname->hash) - p = &(*p)->rb_left; - else if (new_fn->hash > fname->hash) - p = &(*p)->rb_right; - else if (new_fn->minor_hash < fname->minor_hash) - p = &(*p)->rb_left; - else /* if (new_fn->minor_hash > fname->minor_hash) */ - p = &(*p)->rb_right; - } - - rb_link_node(&new_fn->rb_hash, parent, p); - rb_insert_color(&new_fn->rb_hash, &info->root); - return 0; -} - - - -/* - * This is a helper function for ext3_dx_readdir. It calls filldir - * for all entres on the fname linked list. (Normally there is only - * one entry on the linked list, unless there are 62 bit hash collisions.) - */ -static bool call_filldir(struct file *file, struct dir_context *ctx, - struct fname *fname) -{ - struct dir_private_info *info = file->private_data; - struct inode *inode = file_inode(file); - struct super_block *sb = inode->i_sb; - - if (!fname) { - printk("call_filldir: called with null fname?!?\n"); - return true; - } - ctx->pos = hash2pos(file, fname->hash, fname->minor_hash); - while (fname) { - if (!dir_emit(ctx, fname->name, fname->name_len, - fname->inode, - get_dtype(sb, fname->file_type))) { - info->extra_fname = fname; - return false; - } - fname = fname->next; - } - return true; -} - -static int ext3_dx_readdir(struct file *file, struct dir_context *ctx) -{ - struct dir_private_info *info = file->private_data; - struct inode *inode = file_inode(file); - struct fname *fname; - int ret; - - if (!info) { - info = ext3_htree_create_dir_info(file, ctx->pos); - if (!info) - return -ENOMEM; - file->private_data = info; - } - - if (ctx->pos == ext3_get_htree_eof(file)) - return 0; /* EOF */ - - /* Some one has messed with f_pos; reset the world */ - if (info->last_pos != ctx->pos) { - free_rb_tree_fname(&info->root); - info->curr_node = NULL; - info->extra_fname = NULL; - info->curr_hash = pos2maj_hash(file, ctx->pos); - info->curr_minor_hash = pos2min_hash(file, ctx->pos); - } - - /* - * If there are any leftover names on the hash collision - * chain, return them first. - */ - if (info->extra_fname) { - if (!call_filldir(file, ctx, info->extra_fname)) - goto finished; - info->extra_fname = NULL; - goto next_node; - } else if (!info->curr_node) - info->curr_node = rb_first(&info->root); - - while (1) { - /* - * Fill the rbtree if we have no more entries, - * or the inode has changed since we last read in the - * cached entries. - */ - if ((!info->curr_node) || - (file->f_version != inode->i_version)) { - info->curr_node = NULL; - free_rb_tree_fname(&info->root); - file->f_version = inode->i_version; - ret = ext3_htree_fill_tree(file, info->curr_hash, - info->curr_minor_hash, - &info->next_hash); - if (ret < 0) - return ret; - if (ret == 0) { - ctx->pos = ext3_get_htree_eof(file); - break; - } - info->curr_node = rb_first(&info->root); - } - - fname = rb_entry(info->curr_node, struct fname, rb_hash); - info->curr_hash = fname->hash; - info->curr_minor_hash = fname->minor_hash; - if (!call_filldir(file, ctx, fname)) - break; - next_node: - info->curr_node = rb_next(info->curr_node); - if (info->curr_node) { - fname = rb_entry(info->curr_node, struct fname, - rb_hash); - info->curr_hash = fname->hash; - info->curr_minor_hash = fname->minor_hash; - } else { - if (info->next_hash == ~0) { - ctx->pos = ext3_get_htree_eof(file); - break; - } - info->curr_hash = info->next_hash; - info->curr_minor_hash = 0; - } - } -finished: - info->last_pos = ctx->pos; - return 0; -} - -static int ext3_release_dir (struct inode * inode, struct file * filp) -{ - if (filp->private_data) - ext3_htree_free_dir_info(filp->private_data); - - return 0; -} - -const struct file_operations ext3_dir_operations = { - .llseek = ext3_dir_llseek, - .read = generic_read_dir, - .iterate = ext3_readdir, - .unlocked_ioctl = ext3_ioctl, -#ifdef CONFIG_COMPAT - .compat_ioctl = ext3_compat_ioctl, -#endif - .fsync = ext3_sync_file, - .release = ext3_release_dir, -}; diff --git a/fs/ext3/ext3.h b/fs/ext3/ext3.h deleted file mode 100644 index f483a80b3fe7..000000000000 --- a/fs/ext3/ext3.h +++ /dev/null @@ -1,1332 +0,0 @@ -/* - * Written by Stephen C. Tweedie , 1999 - * - * Copyright 1998--1999 Red Hat corp --- All Rights Reserved - * - * This file is part of the Linux kernel and is made available under - * the terms of the GNU General Public License, version 2, or at your - * option, any later version, incorporated herein by reference. - * - * Copyright (C) 1992, 1993, 1994, 1995 - * Remy Card (card@masi.ibp.fr) - * Laboratoire MASI - Institut Blaise Pascal - * Universite Pierre et Marie Curie (Paris VI) - * - * from - * - * linux/include/linux/minix_fs.h - * - * Copyright (C) 1991, 1992 Linus Torvalds - */ - -#include -#include -#include -#include -#include - -/* - * The second extended filesystem constants/structures - */ - -/* - * Define EXT3FS_DEBUG to produce debug messages - */ -#undef EXT3FS_DEBUG - -/* - * Define EXT3_RESERVATION to reserve data blocks for expanding files - */ -#define EXT3_DEFAULT_RESERVE_BLOCKS 8 -/*max window size: 1024(direct blocks) + 3([t,d]indirect blocks) */ -#define EXT3_MAX_RESERVE_BLOCKS 1027 -#define EXT3_RESERVE_WINDOW_NOT_ALLOCATED 0 - -/* - * Debug code - */ -#ifdef EXT3FS_DEBUG -#define ext3_debug(f, a...) \ - do { \ - printk (KERN_DEBUG "EXT3-fs DEBUG (%s, %d): %s:", \ - __FILE__, __LINE__, __func__); \ - printk (KERN_DEBUG f, ## a); \ - } while (0) -#else -#define ext3_debug(f, a...) do {} while (0) -#endif - -/* - * Special inodes numbers - */ -#define EXT3_BAD_INO 1 /* Bad blocks inode */ -#define EXT3_ROOT_INO 2 /* Root inode */ -#define EXT3_BOOT_LOADER_INO 5 /* Boot loader inode */ -#define EXT3_UNDEL_DIR_INO 6 /* Undelete directory inode */ -#define EXT3_RESIZE_INO 7 /* Reserved group descriptors inode */ -#define EXT3_JOURNAL_INO 8 /* Journal inode */ - -/* First non-reserved inode for old ext3 filesystems */ -#define EXT3_GOOD_OLD_FIRST_INO 11 - -/* - * Maximal count of links to a file - */ -#define EXT3_LINK_MAX 32000 - -/* - * Macro-instructions used to manage several block sizes - */ -#define EXT3_MIN_BLOCK_SIZE 1024 -#define EXT3_MAX_BLOCK_SIZE 65536 -#define EXT3_MIN_BLOCK_LOG_SIZE 10 -#define EXT3_BLOCK_SIZE(s) ((s)->s_blocksize) -#define EXT3_ADDR_PER_BLOCK(s) (EXT3_BLOCK_SIZE(s) / sizeof (__u32)) -#define EXT3_BLOCK_SIZE_BITS(s) ((s)->s_blocksize_bits) -#define EXT3_ADDR_PER_BLOCK_BITS(s) (EXT3_SB(s)->s_addr_per_block_bits) -#define EXT3_INODE_SIZE(s) (EXT3_SB(s)->s_inode_size) -#define EXT3_FIRST_INO(s) (EXT3_SB(s)->s_first_ino) - -/* - * Macro-instructions used to manage fragments - */ -#define EXT3_MIN_FRAG_SIZE 1024 -#define EXT3_MAX_FRAG_SIZE 4096 -#define EXT3_MIN_FRAG_LOG_SIZE 10 -#define EXT3_FRAG_SIZE(s) (EXT3_SB(s)->s_frag_size) -#define EXT3_FRAGS_PER_BLOCK(s) (EXT3_SB(s)->s_frags_per_block) - -/* - * Structure of a blocks group descriptor - */ -struct ext3_group_desc -{ - __le32 bg_block_bitmap; /* Blocks bitmap block */ - __le32 bg_inode_bitmap; /* Inodes bitmap block */ - __le32 bg_inode_table; /* Inodes table block */ - __le16 bg_free_blocks_count; /* Free blocks count */ - __le16 bg_free_inodes_count; /* Free inodes count */ - __le16 bg_used_dirs_count; /* Directories count */ - __u16 bg_pad; - __le32 bg_reserved[3]; -}; - -/* - * Macro-instructions used to manage group descriptors - */ -#define EXT3_BLOCKS_PER_GROUP(s) (EXT3_SB(s)->s_blocks_per_group) -#define EXT3_DESC_PER_BLOCK(s) (EXT3_SB(s)->s_desc_per_block) -#define EXT3_INODES_PER_GROUP(s) (EXT3_SB(s)->s_inodes_per_group) -#define EXT3_DESC_PER_BLOCK_BITS(s) (EXT3_SB(s)->s_desc_per_block_bits) - -/* - * Constants relative to the data blocks - */ -#define EXT3_NDIR_BLOCKS 12 -#define EXT3_IND_BLOCK EXT3_NDIR_BLOCKS -#define EXT3_DIND_BLOCK (EXT3_IND_BLOCK + 1) -#define EXT3_TIND_BLOCK (EXT3_DIND_BLOCK + 1) -#define EXT3_N_BLOCKS (EXT3_TIND_BLOCK + 1) - -/* - * Inode flags - */ -#define EXT3_SECRM_FL 0x00000001 /* Secure deletion */ -#define EXT3_UNRM_FL 0x00000002 /* Undelete */ -#define EXT3_COMPR_FL 0x00000004 /* Compress file */ -#define EXT3_SYNC_FL 0x00000008 /* Synchronous updates */ -#define EXT3_IMMUTABLE_FL 0x00000010 /* Immutable file */ -#define EXT3_APPEND_FL 0x00000020 /* writes to file may only append */ -#define EXT3_NODUMP_FL 0x00000040 /* do not dump file */ -#define EXT3_NOATIME_FL 0x00000080 /* do not update atime */ -/* Reserved for compression usage... */ -#define EXT3_DIRTY_FL 0x00000100 -#define EXT3_COMPRBLK_FL 0x00000200 /* One or more compressed clusters */ -#define EXT3_NOCOMPR_FL 0x00000400 /* Don't compress */ -#define EXT3_ECOMPR_FL 0x00000800 /* Compression error */ -/* End compression flags --- maybe not all used */ -#define EXT3_INDEX_FL 0x00001000 /* hash-indexed directory */ -#define EXT3_IMAGIC_FL 0x00002000 /* AFS directory */ -#define EXT3_JOURNAL_DATA_FL 0x00004000 /* file data should be journaled */ -#define EXT3_NOTAIL_FL 0x00008000 /* file tail should not be merged */ -#define EXT3_DIRSYNC_FL 0x00010000 /* dirsync behaviour (directories only) */ -#define EXT3_TOPDIR_FL 0x00020000 /* Top of directory hierarchies*/ -#define EXT3_RESERVED_FL 0x80000000 /* reserved for ext3 lib */ - -#define EXT3_FL_USER_VISIBLE 0x0003DFFF /* User visible flags */ -#define EXT3_FL_USER_MODIFIABLE 0x000380FF /* User modifiable flags */ - -/* Flags that should be inherited by new inodes from their parent. */ -#define EXT3_FL_INHERITED (EXT3_SECRM_FL | EXT3_UNRM_FL | EXT3_COMPR_FL |\ - EXT3_SYNC_FL | EXT3_NODUMP_FL |\ - EXT3_NOATIME_FL | EXT3_COMPRBLK_FL |\ - EXT3_NOCOMPR_FL | EXT3_JOURNAL_DATA_FL |\ - EXT3_NOTAIL_FL | EXT3_DIRSYNC_FL) - -/* Flags that are appropriate for regular files (all but dir-specific ones). */ -#define EXT3_REG_FLMASK (~(EXT3_DIRSYNC_FL | EXT3_TOPDIR_FL)) - -/* Flags that are appropriate for non-directories/regular files. */ -#define EXT3_OTHER_FLMASK (EXT3_NODUMP_FL | EXT3_NOATIME_FL) - -/* Mask out flags that are inappropriate for the given type of inode. */ -static inline __u32 ext3_mask_flags(umode_t mode, __u32 flags) -{ - if (S_ISDIR(mode)) - return flags; - else if (S_ISREG(mode)) - return flags & EXT3_REG_FLMASK; - else - return flags & EXT3_OTHER_FLMASK; -} - -/* Used to pass group descriptor data when online resize is done */ -struct ext3_new_group_input { - __u32 group; /* Group number for this data */ - __u32 block_bitmap; /* Absolute block number of block bitmap */ - __u32 inode_bitmap; /* Absolute block number of inode bitmap */ - __u32 inode_table; /* Absolute block number of inode table start */ - __u32 blocks_count; /* Total number of blocks in this group */ - __u16 reserved_blocks; /* Number of reserved blocks in this group */ - __u16 unused; -}; - -/* The struct ext3_new_group_input in kernel space, with free_blocks_count */ -struct ext3_new_group_data { - __u32 group; - __u32 block_bitmap; - __u32 inode_bitmap; - __u32 inode_table; - __u32 blocks_count; - __u16 reserved_blocks; - __u16 unused; - __u32 free_blocks_count; -}; - - -/* - * ioctl commands - */ -#define EXT3_IOC_GETFLAGS FS_IOC_GETFLAGS -#define EXT3_IOC_SETFLAGS FS_IOC_SETFLAGS -#define EXT3_IOC_GETVERSION _IOR('f', 3, long) -#define EXT3_IOC_SETVERSION _IOW('f', 4, long) -#define EXT3_IOC_GROUP_EXTEND _IOW('f', 7, unsigned long) -#define EXT3_IOC_GROUP_ADD _IOW('f', 8,struct ext3_new_group_input) -#define EXT3_IOC_GETVERSION_OLD FS_IOC_GETVERSION -#define EXT3_IOC_SETVERSION_OLD FS_IOC_SETVERSION -#ifdef CONFIG_JBD_DEBUG -#define EXT3_IOC_WAIT_FOR_READONLY _IOR('f', 99, long) -#endif -#define EXT3_IOC_GETRSVSZ _IOR('f', 5, long) -#define EXT3_IOC_SETRSVSZ _IOW('f', 6, long) - -/* - * ioctl commands in 32 bit emulation - */ -#define EXT3_IOC32_GETFLAGS FS_IOC32_GETFLAGS -#define EXT3_IOC32_SETFLAGS FS_IOC32_SETFLAGS -#define EXT3_IOC32_GETVERSION _IOR('f', 3, int) -#define EXT3_IOC32_SETVERSION _IOW('f', 4, int) -#define EXT3_IOC32_GETRSVSZ _IOR('f', 5, int) -#define EXT3_IOC32_SETRSVSZ _IOW('f', 6, int) -#define EXT3_IOC32_GROUP_EXTEND _IOW('f', 7, unsigned int) -#ifdef CONFIG_JBD_DEBUG -#define EXT3_IOC32_WAIT_FOR_READONLY _IOR('f', 99, int) -#endif -#define EXT3_IOC32_GETVERSION_OLD FS_IOC32_GETVERSION -#define EXT3_IOC32_SETVERSION_OLD FS_IOC32_SETVERSION - -/* Number of supported quota types */ -#define EXT3_MAXQUOTAS 2 - -/* - * Mount options - */ -struct ext3_mount_options { - unsigned long s_mount_opt; - kuid_t s_resuid; - kgid_t s_resgid; - unsigned long s_commit_interval; -#ifdef CONFIG_QUOTA - int s_jquota_fmt; - char *s_qf_names[EXT3_MAXQUOTAS]; -#endif -}; - -/* - * Structure of an inode on the disk - */ -struct ext3_inode { - __le16 i_mode; /* File mode */ - __le16 i_uid; /* Low 16 bits of Owner Uid */ - __le32 i_size; /* Size in bytes */ - __le32 i_atime; /* Access time */ - __le32 i_ctime; /* Creation time */ - __le32 i_mtime; /* Modification time */ - __le32 i_dtime; /* Deletion Time */ - __le16 i_gid; /* Low 16 bits of Group Id */ - __le16 i_links_count; /* Links count */ - __le32 i_blocks; /* Blocks count */ - __le32 i_flags; /* File flags */ - union { - struct { - __u32 l_i_reserved1; - } linux1; - struct { - __u32 h_i_translator; - } hurd1; - struct { - __u32 m_i_reserved1; - } masix1; - } osd1; /* OS dependent 1 */ - __le32 i_block[EXT3_N_BLOCKS];/* Pointers to blocks */ - __le32 i_generation; /* File version (for NFS) */ - __le32 i_file_acl; /* File ACL */ - __le32 i_dir_acl; /* Directory ACL */ - __le32 i_faddr; /* Fragment address */ - union { - struct { - __u8 l_i_frag; /* Fragment number */ - __u8 l_i_fsize; /* Fragment size */ - __u16 i_pad1; - __le16 l_i_uid_high; /* these 2 fields */ - __le16 l_i_gid_high; /* were reserved2[0] */ - __u32 l_i_reserved2; - } linux2; - struct { - __u8 h_i_frag; /* Fragment number */ - __u8 h_i_fsize; /* Fragment size */ - __u16 h_i_mode_high; - __u16 h_i_uid_high; - __u16 h_i_gid_high; - __u32 h_i_author; - } hurd2; - struct { - __u8 m_i_frag; /* Fragment number */ - __u8 m_i_fsize; /* Fragment size */ - __u16 m_pad1; - __u32 m_i_reserved2[2]; - } masix2; - } osd2; /* OS dependent 2 */ - __le16 i_extra_isize; - __le16 i_pad1; -}; - -#define i_size_high i_dir_acl - -#define i_reserved1 osd1.linux1.l_i_reserved1 -#define i_frag osd2.linux2.l_i_frag -#define i_fsize osd2.linux2.l_i_fsize -#define i_uid_low i_uid -#define i_gid_low i_gid -#define i_uid_high osd2.linux2.l_i_uid_high -#define i_gid_high osd2.linux2.l_i_gid_high -#define i_reserved2 osd2.linux2.l_i_reserved2 - -/* - * File system states - */ -#define EXT3_VALID_FS 0x0001 /* Unmounted cleanly */ -#define EXT3_ERROR_FS 0x0002 /* Errors detected */ -#define EXT3_ORPHAN_FS 0x0004 /* Orphans being recovered */ - -/* - * Misc. filesystem flags - */ -#define EXT2_FLAGS_SIGNED_HASH 0x0001 /* Signed dirhash in use */ -#define EXT2_FLAGS_UNSIGNED_HASH 0x0002 /* Unsigned dirhash in use */ -#define EXT2_FLAGS_TEST_FILESYS 0x0004 /* to test development code */ - -/* - * Mount flags - */ -#define EXT3_MOUNT_CHECK 0x00001 /* Do mount-time checks */ -/* EXT3_MOUNT_OLDALLOC was there */ -#define EXT3_MOUNT_GRPID 0x00004 /* Create files with directory's group */ -#define EXT3_MOUNT_DEBUG 0x00008 /* Some debugging messages */ -#define EXT3_MOUNT_ERRORS_CONT 0x00010 /* Continue on errors */ -#define EXT3_MOUNT_ERRORS_RO 0x00020 /* Remount fs ro on errors */ -#define EXT3_MOUNT_ERRORS_PANIC 0x00040 /* Panic on errors */ -#define EXT3_MOUNT_MINIX_DF 0x00080 /* Mimics the Minix statfs */ -#define EXT3_MOUNT_NOLOAD 0x00100 /* Don't use existing journal*/ -#define EXT3_MOUNT_ABORT 0x00200 /* Fatal error detected */ -#define EXT3_MOUNT_DATA_FLAGS 0x00C00 /* Mode for data writes: */ -#define EXT3_MOUNT_JOURNAL_DATA 0x00400 /* Write data to journal */ -#define EXT3_MOUNT_ORDERED_DATA 0x00800 /* Flush data before commit */ -#define EXT3_MOUNT_WRITEBACK_DATA 0x00C00 /* No data ordering */ -#define EXT3_MOUNT_UPDATE_JOURNAL 0x01000 /* Update the journal format */ -#define EXT3_MOUNT_NO_UID32 0x02000 /* Disable 32-bit UIDs */ -#define EXT3_MOUNT_XATTR_USER 0x04000 /* Extended user attributes */ -#define EXT3_MOUNT_POSIX_ACL 0x08000 /* POSIX Access Control Lists */ -#define EXT3_MOUNT_RESERVATION 0x10000 /* Preallocation */ -#define EXT3_MOUNT_BARRIER 0x20000 /* Use block barriers */ -#define EXT3_MOUNT_QUOTA 0x80000 /* Some quota option set */ -#define EXT3_MOUNT_USRQUOTA 0x100000 /* "old" user quota */ -#define EXT3_MOUNT_GRPQUOTA 0x200000 /* "old" group quota */ -#define EXT3_MOUNT_DATA_ERR_ABORT 0x400000 /* Abort on file data write - * error in ordered mode */ - -/* Compatibility, for having both ext2_fs.h and ext3_fs.h included at once */ -#ifndef _LINUX_EXT2_FS_H -#define clear_opt(o, opt) o &= ~EXT3_MOUNT_##opt -#define set_opt(o, opt) o |= EXT3_MOUNT_##opt -#define test_opt(sb, opt) (EXT3_SB(sb)->s_mount_opt & \ - EXT3_MOUNT_##opt) -#else -#define EXT2_MOUNT_NOLOAD EXT3_MOUNT_NOLOAD -#define EXT2_MOUNT_ABORT EXT3_MOUNT_ABORT -#define EXT2_MOUNT_DATA_FLAGS EXT3_MOUNT_DATA_FLAGS -#endif - -#define ext3_set_bit __set_bit_le -#define ext3_set_bit_atomic ext2_set_bit_atomic -#define ext3_clear_bit __clear_bit_le -#define ext3_clear_bit_atomic ext2_clear_bit_atomic -#define ext3_test_bit test_bit_le -#define ext3_find_next_zero_bit find_next_zero_bit_le - -/* - * Maximal mount counts between two filesystem checks - */ -#define EXT3_DFL_MAX_MNT_COUNT 20 /* Allow 20 mounts */ -#define EXT3_DFL_CHECKINTERVAL 0 /* Don't use interval check */ - -/* - * Behaviour when detecting errors - */ -#define EXT3_ERRORS_CONTINUE 1 /* Continue execution */ -#define EXT3_ERRORS_RO 2 /* Remount fs read-only */ -#define EXT3_ERRORS_PANIC 3 /* Panic */ -#define EXT3_ERRORS_DEFAULT EXT3_ERRORS_CONTINUE - -/* - * Structure of the super block - */ -struct ext3_super_block { -/*00*/ __le32 s_inodes_count; /* Inodes count */ - __le32 s_blocks_count; /* Blocks count */ - __le32 s_r_blocks_count; /* Reserved blocks count */ - __le32 s_free_blocks_count; /* Free blocks count */ -/*10*/ __le32 s_free_inodes_count; /* Free inodes count */ - __le32 s_first_data_block; /* First Data Block */ - __le32 s_log_block_size; /* Block size */ - __le32 s_log_frag_size; /* Fragment size */ -/*20*/ __le32 s_blocks_per_group; /* # Blocks per group */ - __le32 s_frags_per_group; /* # Fragments per group */ - __le32 s_inodes_per_group; /* # Inodes per group */ - __le32 s_mtime; /* Mount time */ -/*30*/ __le32 s_wtime; /* Write time */ - __le16 s_mnt_count; /* Mount count */ - __le16 s_max_mnt_count; /* Maximal mount count */ - __le16 s_magic; /* Magic signature */ - __le16 s_state; /* File system state */ - __le16 s_errors; /* Behaviour when detecting errors */ - __le16 s_minor_rev_level; /* minor revision level */ -/*40*/ __le32 s_lastcheck; /* time of last check */ - __le32 s_checkinterval; /* max. time between checks */ - __le32 s_creator_os; /* OS */ - __le32 s_rev_level; /* Revision level */ -/*50*/ __le16 s_def_resuid; /* Default uid for reserved blocks */ - __le16 s_def_resgid; /* Default gid for reserved blocks */ - /* - * These fields are for EXT3_DYNAMIC_REV superblocks only. - * - * Note: the difference between the compatible feature set and - * the incompatible feature set is that if there is a bit set - * in the incompatible feature set that the kernel doesn't - * know about, it should refuse to mount the filesystem. - * - * e2fsck's requirements are more strict; if it doesn't know - * about a feature in either the compatible or incompatible - * feature set, it must abort and not try to meddle with - * things it doesn't understand... - */ - __le32 s_first_ino; /* First non-reserved inode */ - __le16 s_inode_size; /* size of inode structure */ - __le16 s_block_group_nr; /* block group # of this superblock */ - __le32 s_feature_compat; /* compatible feature set */ -/*60*/ __le32 s_feature_incompat; /* incompatible feature set */ - __le32 s_feature_ro_compat; /* readonly-compatible feature set */ -/*68*/ __u8 s_uuid[16]; /* 128-bit uuid for volume */ -/*78*/ char s_volume_name[16]; /* volume name */ -/*88*/ char s_last_mounted[64]; /* directory where last mounted */ -/*C8*/ __le32 s_algorithm_usage_bitmap; /* For compression */ - /* - * Performance hints. Directory preallocation should only - * happen if the EXT3_FEATURE_COMPAT_DIR_PREALLOC flag is on. - */ - __u8 s_prealloc_blocks; /* Nr of blocks to try to preallocate*/ - __u8 s_prealloc_dir_blocks; /* Nr to preallocate for dirs */ - __le16 s_reserved_gdt_blocks; /* Per group desc for online growth */ - /* - * Journaling support valid if EXT3_FEATURE_COMPAT_HAS_JOURNAL set. - */ -/*D0*/ __u8 s_journal_uuid[16]; /* uuid of journal superblock */ -/*E0*/ __le32 s_journal_inum; /* inode number of journal file */ - __le32 s_journal_dev; /* device number of journal file */ - __le32 s_last_orphan; /* start of list of inodes to delete */ - __le32 s_hash_seed[4]; /* HTREE hash seed */ - __u8 s_def_hash_version; /* Default hash version to use */ - __u8 s_reserved_char_pad; - __u16 s_reserved_word_pad; - __le32 s_default_mount_opts; - __le32 s_first_meta_bg; /* First metablock block group */ - __le32 s_mkfs_time; /* When the filesystem was created */ - __le32 s_jnl_blocks[17]; /* Backup of the journal inode */ - /* 64bit support valid if EXT4_FEATURE_COMPAT_64BIT */ -/*150*/ __le32 s_blocks_count_hi; /* Blocks count */ - __le32 s_r_blocks_count_hi; /* Reserved blocks count */ - __le32 s_free_blocks_count_hi; /* Free blocks count */ - __le16 s_min_extra_isize; /* All inodes have at least # bytes */ - __le16 s_want_extra_isize; /* New inodes should reserve # bytes */ - __le32 s_flags; /* Miscellaneous flags */ - __le16 s_raid_stride; /* RAID stride */ - __le16 s_mmp_interval; /* # seconds to wait in MMP checking */ - __le64 s_mmp_block; /* Block for multi-mount protection */ - __le32 s_raid_stripe_width; /* blocks on all data disks (N*stride)*/ - __u8 s_log_groups_per_flex; /* FLEX_BG group size */ - __u8 s_reserved_char_pad2; - __le16 s_reserved_pad; - __u32 s_reserved[162]; /* Padding to the end of the block */ -}; - -/* data type for block offset of block group */ -typedef int ext3_grpblk_t; - -/* data type for filesystem-wide blocks number */ -typedef unsigned long ext3_fsblk_t; - -#define E3FSBLK "%lu" - -struct ext3_reserve_window { - ext3_fsblk_t _rsv_start; /* First byte reserved */ - ext3_fsblk_t _rsv_end; /* Last byte reserved or 0 */ -}; - -struct ext3_reserve_window_node { - struct rb_node rsv_node; - __u32 rsv_goal_size; - __u32 rsv_alloc_hit; - struct ext3_reserve_window rsv_window; -}; - -struct ext3_block_alloc_info { - /* information about reservation window */ - struct ext3_reserve_window_node rsv_window_node; - /* - * was i_next_alloc_block in ext3_inode_info - * is the logical (file-relative) number of the - * most-recently-allocated block in this file. - * We use this for detecting linearly ascending allocation requests. - */ - __u32 last_alloc_logical_block; - /* - * Was i_next_alloc_goal in ext3_inode_info - * is the *physical* companion to i_next_alloc_block. - * it the physical block number of the block which was most-recentl - * allocated to this file. This give us the goal (target) for the next - * allocation when we detect linearly ascending requests. - */ - ext3_fsblk_t last_alloc_physical_block; -}; - -#define rsv_start rsv_window._rsv_start -#define rsv_end rsv_window._rsv_end - -/* - * third extended file system inode data in memory - */ -struct ext3_inode_info { - __le32 i_data[15]; /* unconverted */ - __u32 i_flags; -#ifdef EXT3_FRAGMENTS - __u32 i_faddr; - __u8 i_frag_no; - __u8 i_frag_size; -#endif - ext3_fsblk_t i_file_acl; - __u32 i_dir_acl; - __u32 i_dtime; - - /* - * i_block_group is the number of the block group which contains - * this file's inode. Constant across the lifetime of the inode, - * it is ued for making block allocation decisions - we try to - * place a file's data blocks near its inode block, and new inodes - * near to their parent directory's inode. - */ - __u32 i_block_group; - unsigned long i_state_flags; /* Dynamic state flags for ext3 */ - - /* block reservation info */ - struct ext3_block_alloc_info *i_block_alloc_info; - - __u32 i_dir_start_lookup; -#ifdef CONFIG_EXT3_FS_XATTR - /* - * Extended attributes can be read independently of the main file - * data. Taking i_mutex even when reading would cause contention - * between readers of EAs and writers of regular file data, so - * instead we synchronize on xattr_sem when reading or changing - * EAs. - */ - struct rw_semaphore xattr_sem; -#endif - - struct list_head i_orphan; /* unlinked but open inodes */ - - /* - * i_disksize keeps track of what the inode size is ON DISK, not - * in memory. During truncate, i_size is set to the new size by - * the VFS prior to calling ext3_truncate(), but the filesystem won't - * set i_disksize to 0 until the truncate is actually under way. - * - * The intent is that i_disksize always represents the blocks which - * are used by this file. This allows recovery to restart truncate - * on orphans if we crash during truncate. We actually write i_disksize - * into the on-disk inode when writing inodes out, instead of i_size. - * - * The only time when i_disksize and i_size may be different is when - * a truncate is in progress. The only things which change i_disksize - * are ext3_get_block (growth) and ext3_truncate (shrinkth). - */ - loff_t i_disksize; - - /* on-disk additional length */ - __u16 i_extra_isize; - - /* - * truncate_mutex is for serialising ext3_truncate() against - * ext3_getblock(). In the 2.4 ext2 design, great chunks of inode's - * data tree are chopped off during truncate. We can't do that in - * ext3 because whenever we perform intermediate commits during - * truncate, the inode and all the metadata blocks *must* be in a - * consistent state which allows truncation of the orphans to restart - * during recovery. Hence we must fix the get_block-vs-truncate race - * by other means, so we have truncate_mutex. - */ - struct mutex truncate_mutex; - - /* - * Transactions that contain inode's metadata needed to complete - * fsync and fdatasync, respectively. - */ - atomic_t i_sync_tid; - atomic_t i_datasync_tid; - -#ifdef CONFIG_QUOTA - struct dquot *i_dquot[MAXQUOTAS]; -#endif - - struct inode vfs_inode; -}; - -/* - * third extended-fs super-block data in memory - */ -struct ext3_sb_info { - unsigned long s_frag_size; /* Size of a fragment in bytes */ - unsigned long s_frags_per_block;/* Number of fragments per block */ - unsigned long s_inodes_per_block;/* Number of inodes per block */ - unsigned long s_frags_per_group;/* Number of fragments in a group */ - unsigned long s_blocks_per_group;/* Number of blocks in a group */ - unsigned long s_inodes_per_group;/* Number of inodes in a group */ - unsigned long s_itb_per_group; /* Number of inode table blocks per group */ - unsigned long s_gdb_count; /* Number of group descriptor blocks */ - unsigned long s_desc_per_block; /* Number of group descriptors per block */ - unsigned long s_groups_count; /* Number of groups in the fs */ - unsigned long s_overhead_last; /* Last calculated overhead */ - unsigned long s_blocks_last; /* Last seen block count */ - struct buffer_head * s_sbh; /* Buffer containing the super block */ - struct ext3_super_block * s_es; /* Pointer to the super block in the buffer */ - struct buffer_head ** s_group_desc; - unsigned long s_mount_opt; - ext3_fsblk_t s_sb_block; - kuid_t s_resuid; - kgid_t s_resgid; - unsigned short s_mount_state; - unsigned short s_pad; - int s_addr_per_block_bits; - int s_desc_per_block_bits; - int s_inode_size; - int s_first_ino; - spinlock_t s_next_gen_lock; - u32 s_next_generation; - u32 s_hash_seed[4]; - int s_def_hash_version; - int s_hash_unsigned; /* 3 if hash should be signed, 0 if not */ - struct percpu_counter s_freeblocks_counter; - struct percpu_counter s_freeinodes_counter; - struct percpu_counter s_dirs_counter; - struct blockgroup_lock *s_blockgroup_lock; - - /* root of the per fs reservation window tree */ - spinlock_t s_rsv_window_lock; - struct rb_root s_rsv_window_root; - struct ext3_reserve_window_node s_rsv_window_head; - - /* Journaling */ - struct inode * s_journal_inode; - struct journal_s * s_journal; - struct list_head s_orphan; - struct mutex s_orphan_lock; - struct mutex s_resize_lock; - unsigned long s_commit_interval; - struct block_device *journal_bdev; -#ifdef CONFIG_QUOTA - char *s_qf_names[EXT3_MAXQUOTAS]; /* Names of quota files with journalled quota */ - int s_jquota_fmt; /* Format of quota to use */ -#endif -}; - -static inline spinlock_t * -sb_bgl_lock(struct ext3_sb_info *sbi, unsigned int block_group) -{ - return bgl_lock_ptr(sbi->s_blockgroup_lock, block_group); -} - -static inline struct ext3_sb_info * EXT3_SB(struct super_block *sb) -{ - return sb->s_fs_info; -} -static inline struct ext3_inode_info *EXT3_I(struct inode *inode) -{ - return container_of(inode, struct ext3_inode_info, vfs_inode); -} - -static inline int ext3_valid_inum(struct super_block *sb, unsigned long ino) -{ - return ino == EXT3_ROOT_INO || - ino == EXT3_JOURNAL_INO || - ino == EXT3_RESIZE_INO || - (ino >= EXT3_FIRST_INO(sb) && - ino <= le32_to_cpu(EXT3_SB(sb)->s_es->s_inodes_count)); -} - -/* - * Inode dynamic state flags - */ -enum { - EXT3_STATE_JDATA, /* journaled data exists */ - EXT3_STATE_NEW, /* inode is newly created */ - EXT3_STATE_XATTR, /* has in-inode xattrs */ - EXT3_STATE_FLUSH_ON_CLOSE, /* flush dirty pages on close */ -}; - -static inline int ext3_test_inode_state(struct inode *inode, int bit) -{ - return test_bit(bit, &EXT3_I(inode)->i_state_flags); -} - -static inline void ext3_set_inode_state(struct inode *inode, int bit) -{ - set_bit(bit, &EXT3_I(inode)->i_state_flags); -} - -static inline void ext3_clear_inode_state(struct inode *inode, int bit) -{ - clear_bit(bit, &EXT3_I(inode)->i_state_flags); -} - -#define NEXT_ORPHAN(inode) EXT3_I(inode)->i_dtime - -/* - * Codes for operating systems - */ -#define EXT3_OS_LINUX 0 -#define EXT3_OS_HURD 1 -#define EXT3_OS_MASIX 2 -#define EXT3_OS_FREEBSD 3 -#define EXT3_OS_LITES 4 - -/* - * Revision levels - */ -#define EXT3_GOOD_OLD_REV 0 /* The good old (original) format */ -#define EXT3_DYNAMIC_REV 1 /* V2 format w/ dynamic inode sizes */ - -#define EXT3_CURRENT_REV EXT3_GOOD_OLD_REV -#define EXT3_MAX_SUPP_REV EXT3_DYNAMIC_REV - -#define EXT3_GOOD_OLD_INODE_SIZE 128 - -/* - * Feature set definitions - */ - -#define EXT3_HAS_COMPAT_FEATURE(sb,mask) \ - ( EXT3_SB(sb)->s_es->s_feature_compat & cpu_to_le32(mask) ) -#define EXT3_HAS_RO_COMPAT_FEATURE(sb,mask) \ - ( EXT3_SB(sb)->s_es->s_feature_ro_compat & cpu_to_le32(mask) ) -#define EXT3_HAS_INCOMPAT_FEATURE(sb,mask) \ - ( EXT3_SB(sb)->s_es->s_feature_incompat & cpu_to_le32(mask) ) -#define EXT3_SET_COMPAT_FEATURE(sb,mask) \ - EXT3_SB(sb)->s_es->s_feature_compat |= cpu_to_le32(mask) -#define EXT3_SET_RO_COMPAT_FEATURE(sb,mask) \ - EXT3_SB(sb)->s_es->s_feature_ro_compat |= cpu_to_le32(mask) -#define EXT3_SET_INCOMPAT_FEATURE(sb,mask) \ - EXT3_SB(sb)->s_es->s_feature_incompat |= cpu_to_le32(mask) -#define EXT3_CLEAR_COMPAT_FEATURE(sb,mask) \ - EXT3_SB(sb)->s_es->s_feature_compat &= ~cpu_to_le32(mask) -#define EXT3_CLEAR_RO_COMPAT_FEATURE(sb,mask) \ - EXT3_SB(sb)->s_es->s_feature_ro_compat &= ~cpu_to_le32(mask) -#define EXT3_CLEAR_INCOMPAT_FEATURE(sb,mask) \ - EXT3_SB(sb)->s_es->s_feature_incompat &= ~cpu_to_le32(mask) - -#define EXT3_FEATURE_COMPAT_DIR_PREALLOC 0x0001 -#define EXT3_FEATURE_COMPAT_IMAGIC_INODES 0x0002 -#define EXT3_FEATURE_COMPAT_HAS_JOURNAL 0x0004 -#define EXT3_FEATURE_COMPAT_EXT_ATTR 0x0008 -#define EXT3_FEATURE_COMPAT_RESIZE_INODE 0x0010 -#define EXT3_FEATURE_COMPAT_DIR_INDEX 0x0020 - -#define EXT3_FEATURE_RO_COMPAT_SPARSE_SUPER 0x0001 -#define EXT3_FEATURE_RO_COMPAT_LARGE_FILE 0x0002 -#define EXT3_FEATURE_RO_COMPAT_BTREE_DIR 0x0004 - -#define EXT3_FEATURE_INCOMPAT_COMPRESSION 0x0001 -#define EXT3_FEATURE_INCOMPAT_FILETYPE 0x0002 -#define EXT3_FEATURE_INCOMPAT_RECOVER 0x0004 /* Needs recovery */ -#define EXT3_FEATURE_INCOMPAT_JOURNAL_DEV 0x0008 /* Journal device */ -#define EXT3_FEATURE_INCOMPAT_META_BG 0x0010 - -#define EXT3_FEATURE_COMPAT_SUPP EXT2_FEATURE_COMPAT_EXT_ATTR -#define EXT3_FEATURE_INCOMPAT_SUPP (EXT3_FEATURE_INCOMPAT_FILETYPE| \ - EXT3_FEATURE_INCOMPAT_RECOVER| \ - EXT3_FEATURE_INCOMPAT_META_BG) -#define EXT3_FEATURE_RO_COMPAT_SUPP (EXT3_FEATURE_RO_COMPAT_SPARSE_SUPER| \ - EXT3_FEATURE_RO_COMPAT_LARGE_FILE| \ - EXT3_FEATURE_RO_COMPAT_BTREE_DIR) - -/* - * Default values for user and/or group using reserved blocks - */ -#define EXT3_DEF_RESUID 0 -#define EXT3_DEF_RESGID 0 - -/* - * Default mount options - */ -#define EXT3_DEFM_DEBUG 0x0001 -#define EXT3_DEFM_BSDGROUPS 0x0002 -#define EXT3_DEFM_XATTR_USER 0x0004 -#define EXT3_DEFM_ACL 0x0008 -#define EXT3_DEFM_UID16 0x0010 -#define EXT3_DEFM_JMODE 0x0060 -#define EXT3_DEFM_JMODE_DATA 0x0020 -#define EXT3_DEFM_JMODE_ORDERED 0x0040 -#define EXT3_DEFM_JMODE_WBACK 0x0060 - -/* - * Structure of a directory entry - */ -#define EXT3_NAME_LEN 255 - -struct ext3_dir_entry { - __le32 inode; /* Inode number */ - __le16 rec_len; /* Directory entry length */ - __le16 name_len; /* Name length */ - char name[EXT3_NAME_LEN]; /* File name */ -}; - -/* - * The new version of the directory entry. Since EXT3 structures are - * stored in intel byte order, and the name_len field could never be - * bigger than 255 chars, it's safe to reclaim the extra byte for the - * file_type field. - */ -struct ext3_dir_entry_2 { - __le32 inode; /* Inode number */ - __le16 rec_len; /* Directory entry length */ - __u8 name_len; /* Name length */ - __u8 file_type; - char name[EXT3_NAME_LEN]; /* File name */ -}; - -/* - * Ext3 directory file types. Only the low 3 bits are used. The - * other bits are reserved for now. - */ -#define EXT3_FT_UNKNOWN 0 -#define EXT3_FT_REG_FILE 1 -#define EXT3_FT_DIR 2 -#define EXT3_FT_CHRDEV 3 -#define EXT3_FT_BLKDEV 4 -#define EXT3_FT_FIFO 5 -#define EXT3_FT_SOCK 6 -#define EXT3_FT_SYMLINK 7 - -#define EXT3_FT_MAX 8 - -/* - * EXT3_DIR_PAD defines the directory entries boundaries - * - * NOTE: It must be a multiple of 4 - */ -#define EXT3_DIR_PAD 4 -#define EXT3_DIR_ROUND (EXT3_DIR_PAD - 1) -#define EXT3_DIR_REC_LEN(name_len) (((name_len) + 8 + EXT3_DIR_ROUND) & \ - ~EXT3_DIR_ROUND) -#define EXT3_MAX_REC_LEN ((1<<16)-1) - -/* - * Tests against MAX_REC_LEN etc were put in place for 64k block - * sizes; if that is not possible on this arch, we can skip - * those tests and speed things up. - */ -static inline unsigned ext3_rec_len_from_disk(__le16 dlen) -{ - unsigned len = le16_to_cpu(dlen); - -#if (PAGE_CACHE_SIZE >= 65536) - if (len == EXT3_MAX_REC_LEN) - return 1 << 16; -#endif - return len; -} - -static inline __le16 ext3_rec_len_to_disk(unsigned len) -{ -#if (PAGE_CACHE_SIZE >= 65536) - if (len == (1 << 16)) - return cpu_to_le16(EXT3_MAX_REC_LEN); - else if (len > (1 << 16)) - BUG(); -#endif - return cpu_to_le16(len); -} - -/* - * Hash Tree Directory indexing - * (c) Daniel Phillips, 2001 - */ - -#define is_dx(dir) (EXT3_HAS_COMPAT_FEATURE(dir->i_sb, \ - EXT3_FEATURE_COMPAT_DIR_INDEX) && \ - (EXT3_I(dir)->i_flags & EXT3_INDEX_FL)) -#define EXT3_DIR_LINK_MAX(dir) (!is_dx(dir) && (dir)->i_nlink >= EXT3_LINK_MAX) -#define EXT3_DIR_LINK_EMPTY(dir) ((dir)->i_nlink == 2 || (dir)->i_nlink == 1) - -/* Legal values for the dx_root hash_version field: */ - -#define DX_HASH_LEGACY 0 -#define DX_HASH_HALF_MD4 1 -#define DX_HASH_TEA 2 -#define DX_HASH_LEGACY_UNSIGNED 3 -#define DX_HASH_HALF_MD4_UNSIGNED 4 -#define DX_HASH_TEA_UNSIGNED 5 - -/* hash info structure used by the directory hash */ -struct dx_hash_info -{ - u32 hash; - u32 minor_hash; - int hash_version; - u32 *seed; -}; - - -/* 32 and 64 bit signed EOF for dx directories */ -#define EXT3_HTREE_EOF_32BIT ((1UL << (32 - 1)) - 1) -#define EXT3_HTREE_EOF_64BIT ((1ULL << (64 - 1)) - 1) - - -/* - * Control parameters used by ext3_htree_next_block - */ -#define HASH_NB_ALWAYS 1 - - -/* - * Describe an inode's exact location on disk and in memory - */ -struct ext3_iloc -{ - struct buffer_head *bh; - unsigned long offset; - unsigned long block_group; -}; - -static inline struct ext3_inode *ext3_raw_inode(struct ext3_iloc *iloc) -{ - return (struct ext3_inode *) (iloc->bh->b_data + iloc->offset); -} - -/* - * This structure is stuffed into the struct file's private_data field - * for directories. It is where we put information so that we can do - * readdir operations in hash tree order. - */ -struct dir_private_info { - struct rb_root root; - struct rb_node *curr_node; - struct fname *extra_fname; - loff_t last_pos; - __u32 curr_hash; - __u32 curr_minor_hash; - __u32 next_hash; -}; - -/* calculate the first block number of the group */ -static inline ext3_fsblk_t -ext3_group_first_block_no(struct super_block *sb, unsigned long group_no) -{ - return group_no * (ext3_fsblk_t)EXT3_BLOCKS_PER_GROUP(sb) + - le32_to_cpu(EXT3_SB(sb)->s_es->s_first_data_block); -} - -/* - * Special error return code only used by dx_probe() and its callers. - */ -#define ERR_BAD_DX_DIR -75000 - -/* - * Function prototypes - */ - -/* - * Ok, these declarations are also in but none of the - * ext3 source programs needs to include it so they are duplicated here. - */ -# define NORET_TYPE /**/ -# define ATTRIB_NORET __attribute__((noreturn)) -# define NORET_AND noreturn, - -/* balloc.c */ -extern int ext3_bg_has_super(struct super_block *sb, int group); -extern unsigned long ext3_bg_num_gdb(struct super_block *sb, int group); -extern ext3_fsblk_t ext3_new_block (handle_t *handle, struct inode *inode, - ext3_fsblk_t goal, int *errp); -extern ext3_fsblk_t ext3_new_blocks (handle_t *handle, struct inode *inode, - ext3_fsblk_t goal, unsigned long *count, int *errp); -extern void ext3_free_blocks (handle_t *handle, struct inode *inode, - ext3_fsblk_t block, unsigned long count); -extern void ext3_free_blocks_sb (handle_t *handle, struct super_block *sb, - ext3_fsblk_t block, unsigned long count, - unsigned long *pdquot_freed_blocks); -extern ext3_fsblk_t ext3_count_free_blocks (struct super_block *); -extern void ext3_check_blocks_bitmap (struct super_block *); -extern struct ext3_group_desc * ext3_get_group_desc(struct super_block * sb, - unsigned int block_group, - struct buffer_head ** bh); -extern int ext3_should_retry_alloc(struct super_block *sb, int *retries); -extern void ext3_init_block_alloc_info(struct inode *); -extern void ext3_rsv_window_add(struct super_block *sb, struct ext3_reserve_window_node *rsv); -extern int ext3_trim_fs(struct super_block *sb, struct fstrim_range *range); - -/* dir.c */ -extern int ext3_check_dir_entry(const char *, struct inode *, - struct ext3_dir_entry_2 *, - struct buffer_head *, unsigned long); -extern int ext3_htree_store_dirent(struct file *dir_file, __u32 hash, - __u32 minor_hash, - struct ext3_dir_entry_2 *dirent); -extern void ext3_htree_free_dir_info(struct dir_private_info *p); - -/* fsync.c */ -extern int ext3_sync_file(struct file *, loff_t, loff_t, int); - -/* hash.c */ -extern int ext3fs_dirhash(const char *name, int len, struct - dx_hash_info *hinfo); - -/* ialloc.c */ -extern struct inode * ext3_new_inode (handle_t *, struct inode *, - const struct qstr *, umode_t); -extern void ext3_free_inode (handle_t *, struct inode *); -extern struct inode * ext3_orphan_get (struct super_block *, unsigned long); -extern unsigned long ext3_count_free_inodes (struct super_block *); -extern unsigned long ext3_count_dirs (struct super_block *); -extern void ext3_check_inodes_bitmap (struct super_block *); -extern unsigned long ext3_count_free (struct buffer_head *, unsigned); - - -/* inode.c */ -int ext3_forget(handle_t *handle, int is_metadata, struct inode *inode, - struct buffer_head *bh, ext3_fsblk_t blocknr); -struct buffer_head * ext3_getblk (handle_t *, struct inode *, long, int, int *); -struct buffer_head * ext3_bread (handle_t *, struct inode *, int, int, int *); -int ext3_get_blocks_handle(handle_t *handle, struct inode *inode, - sector_t iblock, unsigned long maxblocks, struct buffer_head *bh_result, - int create); - -extern struct inode *ext3_iget(struct super_block *, unsigned long); -extern int ext3_write_inode (struct inode *, struct writeback_control *); -extern int ext3_setattr (struct dentry *, struct iattr *); -extern void ext3_evict_inode (struct inode *); -extern int ext3_sync_inode (handle_t *, struct inode *); -extern void ext3_discard_reservation (struct inode *); -extern void ext3_dirty_inode(struct inode *, int); -extern int ext3_change_inode_journal_flag(struct inode *, int); -extern int ext3_get_inode_loc(struct inode *, struct ext3_iloc *); -extern int ext3_can_truncate(struct inode *inode); -extern void ext3_truncate(struct inode *inode); -extern void ext3_set_inode_flags(struct inode *); -extern void ext3_get_inode_flags(struct ext3_inode_info *); -extern void ext3_set_aops(struct inode *inode); -extern int ext3_fiemap(struct inode *inode, struct fiemap_extent_info *fieinfo, - u64 start, u64 len); - -/* ioctl.c */ -extern long ext3_ioctl(struct file *, unsigned int, unsigned long); -extern long ext3_compat_ioctl(struct file *, unsigned int, unsigned long); - -/* namei.c */ -extern int ext3_orphan_add(handle_t *, struct inode *); -extern int ext3_orphan_del(handle_t *, struct inode *); -extern int ext3_htree_fill_tree(struct file *dir_file, __u32 start_hash, - __u32 start_minor_hash, __u32 *next_hash); - -/* resize.c */ -extern int ext3_group_add(struct super_block *sb, - struct ext3_new_group_data *input); -extern int ext3_group_extend(struct super_block *sb, - struct ext3_super_block *es, - ext3_fsblk_t n_blocks_count); - -/* super.c */ -extern __printf(3, 4) -void ext3_error(struct super_block *, const char *, const char *, ...); -extern void __ext3_std_error (struct super_block *, const char *, int); -extern __printf(3, 4) -void ext3_abort(struct super_block *, const char *, const char *, ...); -extern __printf(3, 4) -void ext3_warning(struct super_block *, const char *, const char *, ...); -extern __printf(3, 4) -void ext3_msg(struct super_block *, const char *, const char *, ...); -extern void ext3_update_dynamic_rev (struct super_block *sb); - -#define ext3_std_error(sb, errno) \ -do { \ - if ((errno)) \ - __ext3_std_error((sb), __func__, (errno)); \ -} while (0) - -/* - * Inodes and files operations - */ - -/* dir.c */ -extern const struct file_operations ext3_dir_operations; - -/* file.c */ -extern const struct inode_operations ext3_file_inode_operations; -extern const struct file_operations ext3_file_operations; - -/* namei.c */ -extern const struct inode_operations ext3_dir_inode_operations; -extern const struct inode_operations ext3_special_inode_operations; - -/* symlink.c */ -extern const struct inode_operations ext3_symlink_inode_operations; -extern const struct inode_operations ext3_fast_symlink_inode_operations; - -#define EXT3_JOURNAL(inode) (EXT3_SB((inode)->i_sb)->s_journal) - -/* Define the number of blocks we need to account to a transaction to - * modify one block of data. - * - * We may have to touch one inode, one bitmap buffer, up to three - * indirection blocks, the group and superblock summaries, and the data - * block to complete the transaction. */ - -#define EXT3_SINGLEDATA_TRANS_BLOCKS 8U - -/* Extended attribute operations touch at most two data buffers, - * two bitmap buffers, and two group summaries, in addition to the inode - * and the superblock, which are already accounted for. */ - -#define EXT3_XATTR_TRANS_BLOCKS 6U - -/* Define the minimum size for a transaction which modifies data. This - * needs to take into account the fact that we may end up modifying two - * quota files too (one for the group, one for the user quota). The - * superblock only gets updated once, of course, so don't bother - * counting that again for the quota updates. */ - -#define EXT3_DATA_TRANS_BLOCKS(sb) (EXT3_SINGLEDATA_TRANS_BLOCKS + \ - EXT3_XATTR_TRANS_BLOCKS - 2 + \ - EXT3_MAXQUOTAS_TRANS_BLOCKS(sb)) - -/* Delete operations potentially hit one directory's namespace plus an - * entire inode, plus arbitrary amounts of bitmap/indirection data. Be - * generous. We can grow the delete transaction later if necessary. */ - -#define EXT3_DELETE_TRANS_BLOCKS(sb) (EXT3_MAXQUOTAS_TRANS_BLOCKS(sb) + 64) - -/* Define an arbitrary limit for the amount of data we will anticipate - * writing to any given transaction. For unbounded transactions such as - * write(2) and truncate(2) we can write more than this, but we always - * start off at the maximum transaction size and grow the transaction - * optimistically as we go. */ - -#define EXT3_MAX_TRANS_DATA 64U - -/* We break up a large truncate or write transaction once the handle's - * buffer credits gets this low, we need either to extend the - * transaction or to start a new one. Reserve enough space here for - * inode, bitmap, superblock, group and indirection updates for at least - * one block, plus two quota updates. Quota allocations are not - * needed. */ - -#define EXT3_RESERVE_TRANS_BLOCKS 12U - -#define EXT3_INDEX_EXTRA_TRANS_BLOCKS 8 - -#ifdef CONFIG_QUOTA -/* Amount of blocks needed for quota update - we know that the structure was - * allocated so we need to update only inode+data */ -#define EXT3_QUOTA_TRANS_BLOCKS(sb) (test_opt(sb, QUOTA) ? 2 : 0) -/* Amount of blocks needed for quota insert/delete - we do some block writes - * but inode, sb and group updates are done only once */ -#define EXT3_QUOTA_INIT_BLOCKS(sb) (test_opt(sb, QUOTA) ? (DQUOT_INIT_ALLOC*\ - (EXT3_SINGLEDATA_TRANS_BLOCKS-3)+3+DQUOT_INIT_REWRITE) : 0) -#define EXT3_QUOTA_DEL_BLOCKS(sb) (test_opt(sb, QUOTA) ? (DQUOT_DEL_ALLOC*\ - (EXT3_SINGLEDATA_TRANS_BLOCKS-3)+3+DQUOT_DEL_REWRITE) : 0) -#else -#define EXT3_QUOTA_TRANS_BLOCKS(sb) 0 -#define EXT3_QUOTA_INIT_BLOCKS(sb) 0 -#define EXT3_QUOTA_DEL_BLOCKS(sb) 0 -#endif -#define EXT3_MAXQUOTAS_TRANS_BLOCKS(sb) (EXT3_MAXQUOTAS*EXT3_QUOTA_TRANS_BLOCKS(sb)) -#define EXT3_MAXQUOTAS_INIT_BLOCKS(sb) (EXT3_MAXQUOTAS*EXT3_QUOTA_INIT_BLOCKS(sb)) -#define EXT3_MAXQUOTAS_DEL_BLOCKS(sb) (EXT3_MAXQUOTAS*EXT3_QUOTA_DEL_BLOCKS(sb)) - -int -ext3_mark_iloc_dirty(handle_t *handle, - struct inode *inode, - struct ext3_iloc *iloc); - -/* - * On success, We end up with an outstanding reference count against - * iloc->bh. This _must_ be cleaned up later. - */ - -int ext3_reserve_inode_write(handle_t *handle, struct inode *inode, - struct ext3_iloc *iloc); - -int ext3_mark_inode_dirty(handle_t *handle, struct inode *inode); - -/* - * Wrapper functions with which ext3 calls into JBD. The intent here is - * to allow these to be turned into appropriate stubs so ext3 can control - * ext2 filesystems, so ext2+ext3 systems only nee one fs. This work hasn't - * been done yet. - */ - -static inline void ext3_journal_release_buffer(handle_t *handle, - struct buffer_head *bh) -{ - journal_release_buffer(handle, bh); -} - -void ext3_journal_abort_handle(const char *caller, const char *err_fn, - struct buffer_head *bh, handle_t *handle, int err); - -int __ext3_journal_get_undo_access(const char *where, handle_t *handle, - struct buffer_head *bh); - -int __ext3_journal_get_write_access(const char *where, handle_t *handle, - struct buffer_head *bh); - -int __ext3_journal_forget(const char *where, handle_t *handle, - struct buffer_head *bh); - -int __ext3_journal_revoke(const char *where, handle_t *handle, - unsigned long blocknr, struct buffer_head *bh); - -int __ext3_journal_get_create_access(const char *where, - handle_t *handle, struct buffer_head *bh); - -int __ext3_journal_dirty_metadata(const char *where, - handle_t *handle, struct buffer_head *bh); - -#define ext3_journal_get_undo_access(handle, bh) \ - __ext3_journal_get_undo_access(__func__, (handle), (bh)) -#define ext3_journal_get_write_access(handle, bh) \ - __ext3_journal_get_write_access(__func__, (handle), (bh)) -#define ext3_journal_revoke(handle, blocknr, bh) \ - __ext3_journal_revoke(__func__, (handle), (blocknr), (bh)) -#define ext3_journal_get_create_access(handle, bh) \ - __ext3_journal_get_create_access(__func__, (handle), (bh)) -#define ext3_journal_dirty_metadata(handle, bh) \ - __ext3_journal_dirty_metadata(__func__, (handle), (bh)) -#define ext3_journal_forget(handle, bh) \ - __ext3_journal_forget(__func__, (handle), (bh)) - -int ext3_journal_dirty_data(handle_t *handle, struct buffer_head *bh); - -handle_t *ext3_journal_start_sb(struct super_block *sb, int nblocks); -int __ext3_journal_stop(const char *where, handle_t *handle); - -static inline handle_t *ext3_journal_start(struct inode *inode, int nblocks) -{ - return ext3_journal_start_sb(inode->i_sb, nblocks); -} - -#define ext3_journal_stop(handle) \ - __ext3_journal_stop(__func__, (handle)) - -static inline handle_t *ext3_journal_current_handle(void) -{ - return journal_current_handle(); -} - -static inline int ext3_journal_extend(handle_t *handle, int nblocks) -{ - return journal_extend(handle, nblocks); -} - -static inline int ext3_journal_restart(handle_t *handle, int nblocks) -{ - return journal_restart(handle, nblocks); -} - -static inline int ext3_journal_blocks_per_page(struct inode *inode) -{ - return journal_blocks_per_page(inode); -} - -static inline int ext3_journal_force_commit(journal_t *journal) -{ - return journal_force_commit(journal); -} - -/* super.c */ -int ext3_force_commit(struct super_block *sb); - -static inline int ext3_should_journal_data(struct inode *inode) -{ - if (!S_ISREG(inode->i_mode)) - return 1; - if (test_opt(inode->i_sb, DATA_FLAGS) == EXT3_MOUNT_JOURNAL_DATA) - return 1; - if (EXT3_I(inode)->i_flags & EXT3_JOURNAL_DATA_FL) - return 1; - return 0; -} - -static inline int ext3_should_order_data(struct inode *inode) -{ - if (!S_ISREG(inode->i_mode)) - return 0; - if (EXT3_I(inode)->i_flags & EXT3_JOURNAL_DATA_FL) - return 0; - if (test_opt(inode->i_sb, DATA_FLAGS) == EXT3_MOUNT_ORDERED_DATA) - return 1; - return 0; -} - -static inline int ext3_should_writeback_data(struct inode *inode) -{ - if (!S_ISREG(inode->i_mode)) - return 0; - if (EXT3_I(inode)->i_flags & EXT3_JOURNAL_DATA_FL) - return 0; - if (test_opt(inode->i_sb, DATA_FLAGS) == EXT3_MOUNT_WRITEBACK_DATA) - return 1; - return 0; -} - -#include diff --git a/fs/ext3/ext3_jbd.c b/fs/ext3/ext3_jbd.c deleted file mode 100644 index 785a3261a26c..000000000000 --- a/fs/ext3/ext3_jbd.c +++ /dev/null @@ -1,59 +0,0 @@ -/* - * Interface between ext3 and JBD - */ - -#include "ext3.h" - -int __ext3_journal_get_undo_access(const char *where, handle_t *handle, - struct buffer_head *bh) -{ - int err = journal_get_undo_access(handle, bh); - if (err) - ext3_journal_abort_handle(where, __func__, bh, handle,err); - return err; -} - -int __ext3_journal_get_write_access(const char *where, handle_t *handle, - struct buffer_head *bh) -{ - int err = journal_get_write_access(handle, bh); - if (err) - ext3_journal_abort_handle(where, __func__, bh, handle,err); - return err; -} - -int __ext3_journal_forget(const char *where, handle_t *handle, - struct buffer_head *bh) -{ - int err = journal_forget(handle, bh); - if (err) - ext3_journal_abort_handle(where, __func__, bh, handle,err); - return err; -} - -int __ext3_journal_revoke(const char *where, handle_t *handle, - unsigned long blocknr, struct buffer_head *bh) -{ - int err = journal_revoke(handle, blocknr, bh); - if (err) - ext3_journal_abort_handle(where, __func__, bh, handle,err); - return err; -} - -int __ext3_journal_get_create_access(const char *where, - handle_t *handle, struct buffer_head *bh) -{ - int err = journal_get_create_access(handle, bh); - if (err) - ext3_journal_abort_handle(where, __func__, bh, handle,err); - return err; -} - -int __ext3_journal_dirty_metadata(const char *where, - handle_t *handle, struct buffer_head *bh) -{ - int err = journal_dirty_metadata(handle, bh); - if (err) - ext3_journal_abort_handle(where, __func__, bh, handle,err); - return err; -} diff --git a/fs/ext3/file.c b/fs/ext3/file.c deleted file mode 100644 index 3b8f650de22c..000000000000 --- a/fs/ext3/file.c +++ /dev/null @@ -1,79 +0,0 @@ -/* - * linux/fs/ext3/file.c - * - * Copyright (C) 1992, 1993, 1994, 1995 - * Remy Card (card@masi.ibp.fr) - * Laboratoire MASI - Institut Blaise Pascal - * Universite Pierre et Marie Curie (Paris VI) - * - * from - * - * linux/fs/minix/file.c - * - * Copyright (C) 1991, 1992 Linus Torvalds - * - * ext3 fs regular file handling primitives - * - * 64-bit file support on 64-bit platforms by Jakub Jelinek - * (jj@sunsite.ms.mff.cuni.cz) - */ - -#include -#include "ext3.h" -#include "xattr.h" -#include "acl.h" - -/* - * Called when an inode is released. Note that this is different - * from ext3_file_open: open gets called at every open, but release - * gets called only when /all/ the files are closed. - */ -static int ext3_release_file (struct inode * inode, struct file * filp) -{ - if (ext3_test_inode_state(inode, EXT3_STATE_FLUSH_ON_CLOSE)) { - filemap_flush(inode->i_mapping); - ext3_clear_inode_state(inode, EXT3_STATE_FLUSH_ON_CLOSE); - } - /* if we are the last writer on the inode, drop the block reservation */ - if ((filp->f_mode & FMODE_WRITE) && - (atomic_read(&inode->i_writecount) == 1)) - { - mutex_lock(&EXT3_I(inode)->truncate_mutex); - ext3_discard_reservation(inode); - mutex_unlock(&EXT3_I(inode)->truncate_mutex); - } - if (is_dx(inode) && filp->private_data) - ext3_htree_free_dir_info(filp->private_data); - - return 0; -} - -const struct file_operations ext3_file_operations = { - .llseek = generic_file_llseek, - .read_iter = generic_file_read_iter, - .write_iter = generic_file_write_iter, - .unlocked_ioctl = ext3_ioctl, -#ifdef CONFIG_COMPAT - .compat_ioctl = ext3_compat_ioctl, -#endif - .mmap = generic_file_mmap, - .open = dquot_file_open, - .release = ext3_release_file, - .fsync = ext3_sync_file, - .splice_read = generic_file_splice_read, - .splice_write = iter_file_splice_write, -}; - -const struct inode_operations ext3_file_inode_operations = { - .setattr = ext3_setattr, -#ifdef CONFIG_EXT3_FS_XATTR - .setxattr = generic_setxattr, - .getxattr = generic_getxattr, - .listxattr = ext3_listxattr, - .removexattr = generic_removexattr, -#endif - .get_acl = ext3_get_acl, - .set_acl = ext3_set_acl, - .fiemap = ext3_fiemap, -}; - diff --git a/fs/ext3/fsync.c b/fs/ext3/fsync.c deleted file mode 100644 index 1cb9c7e10c6f..000000000000 --- a/fs/ext3/fsync.c +++ /dev/null @@ -1,109 +0,0 @@ -/* - * linux/fs/ext3/fsync.c - * - * Copyright (C) 1993 Stephen Tweedie (sct@redhat.com) - * from - * Copyright (C) 1992 Remy Card (card@masi.ibp.fr) - * Laboratoire MASI - Institut Blaise Pascal - * Universite Pierre et Marie Curie (Paris VI) - * from - * linux/fs/minix/truncate.c Copyright (C) 1991, 1992 Linus Torvalds - * - * ext3fs fsync primitive - * - * Big-endian to little-endian byte-swapping/bitmaps by - * David S. Miller (davem@caip.rutgers.edu), 1995 - * - * Removed unnecessary code duplication for little endian machines - * and excessive __inline__s. - * Andi Kleen, 1997 - * - * Major simplications and cleanup - we only need to do the metadata, because - * we can depend on generic_block_fdatasync() to sync the data blocks. - */ - -#include -#include -#include "ext3.h" - -/* - * akpm: A new design for ext3_sync_file(). - * - * This is only called from sys_fsync(), sys_fdatasync() and sys_msync(). - * There cannot be a transaction open by this task. - * Another task could have dirtied this inode. Its data can be in any - * state in the journalling system. - * - * What we do is just kick off a commit and wait on it. This will snapshot the - * inode to disk. - */ - -int ext3_sync_file(struct file *file, loff_t start, loff_t end, int datasync) -{ - struct inode *inode = file->f_mapping->host; - struct ext3_inode_info *ei = EXT3_I(inode); - journal_t *journal = EXT3_SB(inode->i_sb)->s_journal; - int ret, needs_barrier = 0; - tid_t commit_tid; - - trace_ext3_sync_file_enter(file, datasync); - - if (inode->i_sb->s_flags & MS_RDONLY) { - /* Make sure that we read updated state */ - smp_rmb(); - if (EXT3_SB(inode->i_sb)->s_mount_state & EXT3_ERROR_FS) - return -EROFS; - return 0; - } - ret = filemap_write_and_wait_range(inode->i_mapping, start, end); - if (ret) - goto out; - - J_ASSERT(ext3_journal_current_handle() == NULL); - - /* - * data=writeback,ordered: - * The caller's filemap_fdatawrite()/wait will sync the data. - * Metadata is in the journal, we wait for a proper transaction - * to commit here. - * - * data=journal: - * filemap_fdatawrite won't do anything (the buffers are clean). - * ext3_force_commit will write the file data into the journal and - * will wait on that. - * filemap_fdatawait() will encounter a ton of newly-dirtied pages - * (they were dirtied by commit). But that's OK - the blocks are - * safe in-journal, which is all fsync() needs to ensure. - */ - if (ext3_should_journal_data(inode)) { - ret = ext3_force_commit(inode->i_sb); - goto out; - } - - if (datasync) - commit_tid = atomic_read(&ei->i_datasync_tid); - else - commit_tid = atomic_read(&ei->i_sync_tid); - - if (test_opt(inode->i_sb, BARRIER) && - !journal_trans_will_send_data_barrier(journal, commit_tid)) - needs_barrier = 1; - log_start_commit(journal, commit_tid); - ret = log_wait_commit(journal, commit_tid); - - /* - * In case we didn't commit a transaction, we have to flush - * disk caches manually so that data really is on persistent - * storage - */ - if (needs_barrier) { - int err; - - err = blkdev_issue_flush(inode->i_sb->s_bdev, GFP_KERNEL, NULL); - if (!ret) - ret = err; - } -out: - trace_ext3_sync_file_exit(inode, ret); - return ret; -} diff --git a/fs/ext3/hash.c b/fs/ext3/hash.c deleted file mode 100644 index ede315cdf126..000000000000 --- a/fs/ext3/hash.c +++ /dev/null @@ -1,206 +0,0 @@ -/* - * linux/fs/ext3/hash.c - * - * Copyright (C) 2002 by Theodore Ts'o - * - * This file is released under the GPL v2. - * - * This file may be redistributed under the terms of the GNU Public - * License. - */ - -#include "ext3.h" -#include - -#define DELTA 0x9E3779B9 - -static void TEA_transform(__u32 buf[4], __u32 const in[]) -{ - __u32 sum = 0; - __u32 b0 = buf[0], b1 = buf[1]; - __u32 a = in[0], b = in[1], c = in[2], d = in[3]; - int n = 16; - - do { - sum += DELTA; - b0 += ((b1 << 4)+a) ^ (b1+sum) ^ ((b1 >> 5)+b); - b1 += ((b0 << 4)+c) ^ (b0+sum) ^ ((b0 >> 5)+d); - } while(--n); - - buf[0] += b0; - buf[1] += b1; -} - - -/* The old legacy hash */ -static __u32 dx_hack_hash_unsigned(const char *name, int len) -{ - __u32 hash, hash0 = 0x12a3fe2d, hash1 = 0x37abe8f9; - const unsigned char *ucp = (const unsigned char *) name; - - while (len--) { - hash = hash1 + (hash0 ^ (((int) *ucp++) * 7152373)); - - if (hash & 0x80000000) - hash -= 0x7fffffff; - hash1 = hash0; - hash0 = hash; - } - return hash0 << 1; -} - -static __u32 dx_hack_hash_signed(const char *name, int len) -{ - __u32 hash, hash0 = 0x12a3fe2d, hash1 = 0x37abe8f9; - const signed char *scp = (const signed char *) name; - - while (len--) { - hash = hash1 + (hash0 ^ (((int) *scp++) * 7152373)); - - if (hash & 0x80000000) - hash -= 0x7fffffff; - hash1 = hash0; - hash0 = hash; - } - return hash0 << 1; -} - -static void str2hashbuf_signed(const char *msg, int len, __u32 *buf, int num) -{ - __u32 pad, val; - int i; - const signed char *scp = (const signed char *) msg; - - pad = (__u32)len | ((__u32)len << 8); - pad |= pad << 16; - - val = pad; - if (len > num*4) - len = num * 4; - for (i = 0; i < len; i++) { - if ((i % 4) == 0) - val = pad; - val = ((int) scp[i]) + (val << 8); - if ((i % 4) == 3) { - *buf++ = val; - val = pad; - num--; - } - } - if (--num >= 0) - *buf++ = val; - while (--num >= 0) - *buf++ = pad; -} - -static void str2hashbuf_unsigned(const char *msg, int len, __u32 *buf, int num) -{ - __u32 pad, val; - int i; - const unsigned char *ucp = (const unsigned char *) msg; - - pad = (__u32)len | ((__u32)len << 8); - pad |= pad << 16; - - val = pad; - if (len > num*4) - len = num * 4; - for (i=0; i < len; i++) { - if ((i % 4) == 0) - val = pad; - val = ((int) ucp[i]) + (val << 8); - if ((i % 4) == 3) { - *buf++ = val; - val = pad; - num--; - } - } - if (--num >= 0) - *buf++ = val; - while (--num >= 0) - *buf++ = pad; -} - -/* - * Returns the hash of a filename. If len is 0 and name is NULL, then - * this function can be used to test whether or not a hash version is - * supported. - * - * The seed is an 4 longword (32 bits) "secret" which can be used to - * uniquify a hash. If the seed is all zero's, then some default seed - * may be used. - * - * A particular hash version specifies whether or not the seed is - * represented, and whether or not the returned hash is 32 bits or 64 - * bits. 32 bit hashes will return 0 for the minor hash. - */ -int ext3fs_dirhash(const char *name, int len, struct dx_hash_info *hinfo) -{ - __u32 hash; - __u32 minor_hash = 0; - const char *p; - int i; - __u32 in[8], buf[4]; - void (*str2hashbuf)(const char *, int, __u32 *, int) = - str2hashbuf_signed; - - /* Initialize the default seed for the hash checksum functions */ - buf[0] = 0x67452301; - buf[1] = 0xefcdab89; - buf[2] = 0x98badcfe; - buf[3] = 0x10325476; - - /* Check to see if the seed is all zero's */ - if (hinfo->seed) { - for (i=0; i < 4; i++) { - if (hinfo->seed[i]) - break; - } - if (i < 4) - memcpy(buf, hinfo->seed, sizeof(buf)); - } - - switch (hinfo->hash_version) { - case DX_HASH_LEGACY_UNSIGNED: - hash = dx_hack_hash_unsigned(name, len); - break; - case DX_HASH_LEGACY: - hash = dx_hack_hash_signed(name, len); - break; - case DX_HASH_HALF_MD4_UNSIGNED: - str2hashbuf = str2hashbuf_unsigned; - case DX_HASH_HALF_MD4: - p = name; - while (len > 0) { - (*str2hashbuf)(p, len, in, 8); - half_md4_transform(buf, in); - len -= 32; - p += 32; - } - minor_hash = buf[2]; - hash = buf[1]; - break; - case DX_HASH_TEA_UNSIGNED: - str2hashbuf = str2hashbuf_unsigned; - case DX_HASH_TEA: - p = name; - while (len > 0) { - (*str2hashbuf)(p, len, in, 4); - TEA_transform(buf, in); - len -= 16; - p += 16; - } - hash = buf[0]; - minor_hash = buf[1]; - break; - default: - hinfo->hash = 0; - return -1; - } - hash = hash & ~1; - if (hash == (EXT3_HTREE_EOF_32BIT << 1)) - hash = (EXT3_HTREE_EOF_32BIT - 1) << 1; - hinfo->hash = hash; - hinfo->minor_hash = minor_hash; - return 0; -} diff --git a/fs/ext3/ialloc.c b/fs/ext3/ialloc.c deleted file mode 100644 index 3ad242e5840e..000000000000 --- a/fs/ext3/ialloc.c +++ /dev/null @@ -1,706 +0,0 @@ -/* - * linux/fs/ext3/ialloc.c - * - * Copyright (C) 1992, 1993, 1994, 1995 - * Remy Card (card@masi.ibp.fr) - * Laboratoire MASI - Institut Blaise Pascal - * Universite Pierre et Marie Curie (Paris VI) - * - * BSD ufs-inspired inode and directory allocation by - * Stephen Tweedie (sct@redhat.com), 1993 - * Big-endian to little-endian byte-swapping/bitmaps by - * David S. Miller (davem@caip.rutgers.edu), 1995 - */ - -#include -#include - -#include "ext3.h" -#include "xattr.h" -#include "acl.h" - -/* - * ialloc.c contains the inodes allocation and deallocation routines - */ - -/* - * The free inodes are managed by bitmaps. A file system contains several - * blocks groups. Each group contains 1 bitmap block for blocks, 1 bitmap - * block for inodes, N blocks for the inode table and data blocks. - * - * The file system contains group descriptors which are located after the - * super block. Each descriptor contains the number of the bitmap block and - * the free blocks count in the block. - */ - - -/* - * Read the inode allocation bitmap for a given block_group, reading - * into the specified slot in the superblock's bitmap cache. - * - * Return buffer_head of bitmap on success or NULL. - */ -static struct buffer_head * -read_inode_bitmap(struct super_block * sb, unsigned long block_group) -{ - struct ext3_group_desc *desc; - struct buffer_head *bh = NULL; - - desc = ext3_get_group_desc(sb, block_group, NULL); - if (!desc) - goto error_out; - - bh = sb_bread(sb, le32_to_cpu(desc->bg_inode_bitmap)); - if (!bh) - ext3_error(sb, "read_inode_bitmap", - "Cannot read inode bitmap - " - "block_group = %lu, inode_bitmap = %u", - block_group, le32_to_cpu(desc->bg_inode_bitmap)); -error_out: - return bh; -} - -/* - * NOTE! When we get the inode, we're the only people - * that have access to it, and as such there are no - * race conditions we have to worry about. The inode - * is not on the hash-lists, and it cannot be reached - * through the filesystem because the directory entry - * has been deleted earlier. - * - * HOWEVER: we must make sure that we get no aliases, - * which means that we have to call "clear_inode()" - * _before_ we mark the inode not in use in the inode - * bitmaps. Otherwise a newly created file might use - * the same inode number (not actually the same pointer - * though), and then we'd have two inodes sharing the - * same inode number and space on the harddisk. - */ -void ext3_free_inode (handle_t *handle, struct inode * inode) -{ - struct super_block * sb = inode->i_sb; - int is_directory; - unsigned long ino; - struct buffer_head *bitmap_bh = NULL; - struct buffer_head *bh2; - unsigned long block_group; - unsigned long bit; - struct ext3_group_desc * gdp; - struct ext3_super_block * es; - struct ext3_sb_info *sbi; - int fatal = 0, err; - - if (atomic_read(&inode->i_count) > 1) { - printk ("ext3_free_inode: inode has count=%d\n", - atomic_read(&inode->i_count)); - return; - } - if (inode->i_nlink) { - printk ("ext3_free_inode: inode has nlink=%d\n", - inode->i_nlink); - return; - } - if (!sb) { - printk("ext3_free_inode: inode on nonexistent device\n"); - return; - } - sbi = EXT3_SB(sb); - - ino = inode->i_ino; - ext3_debug ("freeing inode %lu\n", ino); - trace_ext3_free_inode(inode); - - is_directory = S_ISDIR(inode->i_mode); - - es = EXT3_SB(sb)->s_es; - if (ino < EXT3_FIRST_INO(sb) || ino > le32_to_cpu(es->s_inodes_count)) { - ext3_error (sb, "ext3_free_inode", - "reserved or nonexistent inode %lu", ino); - goto error_return; - } - block_group = (ino - 1) / EXT3_INODES_PER_GROUP(sb); - bit = (ino - 1) % EXT3_INODES_PER_GROUP(sb); - bitmap_bh = read_inode_bitmap(sb, block_group); - if (!bitmap_bh) - goto error_return; - - BUFFER_TRACE(bitmap_bh, "get_write_access"); - fatal = ext3_journal_get_write_access(handle, bitmap_bh); - if (fatal) - goto error_return; - - /* Ok, now we can actually update the inode bitmaps.. */ - if (!ext3_clear_bit_atomic(sb_bgl_lock(sbi, block_group), - bit, bitmap_bh->b_data)) - ext3_error (sb, "ext3_free_inode", - "bit already cleared for inode %lu", ino); - else { - gdp = ext3_get_group_desc (sb, block_group, &bh2); - - BUFFER_TRACE(bh2, "get_write_access"); - fatal = ext3_journal_get_write_access(handle, bh2); - if (fatal) goto error_return; - - if (gdp) { - spin_lock(sb_bgl_lock(sbi, block_group)); - le16_add_cpu(&gdp->bg_free_inodes_count, 1); - if (is_directory) - le16_add_cpu(&gdp->bg_used_dirs_count, -1); - spin_unlock(sb_bgl_lock(sbi, block_group)); - percpu_counter_inc(&sbi->s_freeinodes_counter); - if (is_directory) - percpu_counter_dec(&sbi->s_dirs_counter); - - } - BUFFER_TRACE(bh2, "call ext3_journal_dirty_metadata"); - err = ext3_journal_dirty_metadata(handle, bh2); - if (!fatal) fatal = err; - } - BUFFER_TRACE(bitmap_bh, "call ext3_journal_dirty_metadata"); - err = ext3_journal_dirty_metadata(handle, bitmap_bh); - if (!fatal) - fatal = err; - -error_return: - brelse(bitmap_bh); - ext3_std_error(sb, fatal); -} - -/* - * Orlov's allocator for directories. - * - * We always try to spread first-level directories. - * - * If there are blockgroups with both free inodes and free blocks counts - * not worse than average we return one with smallest directory count. - * Otherwise we simply return a random group. - * - * For the rest rules look so: - * - * It's OK to put directory into a group unless - * it has too many directories already (max_dirs) or - * it has too few free inodes left (min_inodes) or - * it has too few free blocks left (min_blocks). - * Parent's group is preferred, if it doesn't satisfy these - * conditions we search cyclically through the rest. If none - * of the groups look good we just look for a group with more - * free inodes than average (starting at parent's group). - * - * Debt is incremented each time we allocate a directory and decremented - * when we allocate an inode, within 0--255. - */ - -static int find_group_orlov(struct super_block *sb, struct inode *parent) -{ - int parent_group = EXT3_I(parent)->i_block_group; - struct ext3_sb_info *sbi = EXT3_SB(sb); - int ngroups = sbi->s_groups_count; - int inodes_per_group = EXT3_INODES_PER_GROUP(sb); - unsigned int freei, avefreei; - ext3_fsblk_t freeb, avefreeb; - unsigned int ndirs; - int max_dirs, min_inodes; - ext3_grpblk_t min_blocks; - int group = -1, i; - struct ext3_group_desc *desc; - - freei = percpu_counter_read_positive(&sbi->s_freeinodes_counter); - avefreei = freei / ngroups; - freeb = percpu_counter_read_positive(&sbi->s_freeblocks_counter); - avefreeb = freeb / ngroups; - ndirs = percpu_counter_read_positive(&sbi->s_dirs_counter); - - if ((parent == d_inode(sb->s_root)) || - (EXT3_I(parent)->i_flags & EXT3_TOPDIR_FL)) { - int best_ndir = inodes_per_group; - int best_group = -1; - - group = prandom_u32(); - parent_group = (unsigned)group % ngroups; - for (i = 0; i < ngroups; i++) { - group = (parent_group + i) % ngroups; - desc = ext3_get_group_desc (sb, group, NULL); - if (!desc || !desc->bg_free_inodes_count) - continue; - if (le16_to_cpu(desc->bg_used_dirs_count) >= best_ndir) - continue; - if (le16_to_cpu(desc->bg_free_inodes_count) < avefreei) - continue; - if (le16_to_cpu(desc->bg_free_blocks_count) < avefreeb) - continue; - best_group = group; - best_ndir = le16_to_cpu(desc->bg_used_dirs_count); - } - if (best_group >= 0) - return best_group; - goto fallback; - } - - max_dirs = ndirs / ngroups + inodes_per_group / 16; - min_inodes = avefreei - inodes_per_group / 4; - min_blocks = avefreeb - EXT3_BLOCKS_PER_GROUP(sb) / 4; - - for (i = 0; i < ngroups; i++) { - group = (parent_group + i) % ngroups; - desc = ext3_get_group_desc (sb, group, NULL); - if (!desc || !desc->bg_free_inodes_count) - continue; - if (le16_to_cpu(desc->bg_used_dirs_count) >= max_dirs) - continue; - if (le16_to_cpu(desc->bg_free_inodes_count) < min_inodes) - continue; - if (le16_to_cpu(desc->bg_free_blocks_count) < min_blocks) - continue; - return group; - } - -fallback: - for (i = 0; i < ngroups; i++) { - group = (parent_group + i) % ngroups; - desc = ext3_get_group_desc (sb, group, NULL); - if (!desc || !desc->bg_free_inodes_count) - continue; - if (le16_to_cpu(desc->bg_free_inodes_count) >= avefreei) - return group; - } - - if (avefreei) { - /* - * The free-inodes counter is approximate, and for really small - * filesystems the above test can fail to find any blockgroups - */ - avefreei = 0; - goto fallback; - } - - return -1; -} - -static int find_group_other(struct super_block *sb, struct inode *parent) -{ - int parent_group = EXT3_I(parent)->i_block_group; - int ngroups = EXT3_SB(sb)->s_groups_count; - struct ext3_group_desc *desc; - int group, i; - - /* - * Try to place the inode in its parent directory - */ - group = parent_group; - desc = ext3_get_group_desc (sb, group, NULL); - if (desc && le16_to_cpu(desc->bg_free_inodes_count) && - le16_to_cpu(desc->bg_free_blocks_count)) - return group; - - /* - * We're going to place this inode in a different blockgroup from its - * parent. We want to cause files in a common directory to all land in - * the same blockgroup. But we want files which are in a different - * directory which shares a blockgroup with our parent to land in a - * different blockgroup. - * - * So add our directory's i_ino into the starting point for the hash. - */ - group = (group + parent->i_ino) % ngroups; - - /* - * Use a quadratic hash to find a group with a free inode and some free - * blocks. - */ - for (i = 1; i < ngroups; i <<= 1) { - group += i; - if (group >= ngroups) - group -= ngroups; - desc = ext3_get_group_desc (sb, group, NULL); - if (desc && le16_to_cpu(desc->bg_free_inodes_count) && - le16_to_cpu(desc->bg_free_blocks_count)) - return group; - } - - /* - * That failed: try linear search for a free inode, even if that group - * has no free blocks. - */ - group = parent_group; - for (i = 0; i < ngroups; i++) { - if (++group >= ngroups) - group = 0; - desc = ext3_get_group_desc (sb, group, NULL); - if (desc && le16_to_cpu(desc->bg_free_inodes_count)) - return group; - } - - return -1; -} - -/* - * There are two policies for allocating an inode. If the new inode is - * a directory, then a forward search is made for a block group with both - * free space and a low directory-to-inode ratio; if that fails, then of - * the groups with above-average free space, that group with the fewest - * directories already is chosen. - * - * For other inodes, search forward from the parent directory's block - * group to find a free inode. - */ -struct inode *ext3_new_inode(handle_t *handle, struct inode * dir, - const struct qstr *qstr, umode_t mode) -{ - struct super_block *sb; - struct buffer_head *bitmap_bh = NULL; - struct buffer_head *bh2; - int group; - unsigned long ino = 0; - struct inode * inode; - struct ext3_group_desc * gdp = NULL; - struct ext3_super_block * es; - struct ext3_inode_info *ei; - struct ext3_sb_info *sbi; - int err = 0; - struct inode *ret; - int i; - - /* Cannot create files in a deleted directory */ - if (!dir || !dir->i_nlink) - return ERR_PTR(-EPERM); - - sb = dir->i_sb; - trace_ext3_request_inode(dir, mode); - inode = new_inode(sb); - if (!inode) - return ERR_PTR(-ENOMEM); - ei = EXT3_I(inode); - - sbi = EXT3_SB(sb); - es = sbi->s_es; - if (S_ISDIR(mode)) - group = find_group_orlov(sb, dir); - else - group = find_group_other(sb, dir); - - err = -ENOSPC; - if (group == -1) - goto out; - - for (i = 0; i < sbi->s_groups_count; i++) { - err = -EIO; - - gdp = ext3_get_group_desc(sb, group, &bh2); - if (!gdp) - goto fail; - - brelse(bitmap_bh); - bitmap_bh = read_inode_bitmap(sb, group); - if (!bitmap_bh) - goto fail; - - ino = 0; - -repeat_in_this_group: - ino = ext3_find_next_zero_bit((unsigned long *) - bitmap_bh->b_data, EXT3_INODES_PER_GROUP(sb), ino); - if (ino < EXT3_INODES_PER_GROUP(sb)) { - - BUFFER_TRACE(bitmap_bh, "get_write_access"); - err = ext3_journal_get_write_access(handle, bitmap_bh); - if (err) - goto fail; - - if (!ext3_set_bit_atomic(sb_bgl_lock(sbi, group), - ino, bitmap_bh->b_data)) { - /* we won it */ - BUFFER_TRACE(bitmap_bh, - "call ext3_journal_dirty_metadata"); - err = ext3_journal_dirty_metadata(handle, - bitmap_bh); - if (err) - goto fail; - goto got; - } - /* we lost it */ - journal_release_buffer(handle, bitmap_bh); - - if (++ino < EXT3_INODES_PER_GROUP(sb)) - goto repeat_in_this_group; - } - - /* - * This case is possible in concurrent environment. It is very - * rare. We cannot repeat the find_group_xxx() call because - * that will simply return the same blockgroup, because the - * group descriptor metadata has not yet been updated. - * So we just go onto the next blockgroup. - */ - if (++group == sbi->s_groups_count) - group = 0; - } - err = -ENOSPC; - goto out; - -got: - ino += group * EXT3_INODES_PER_GROUP(sb) + 1; - if (ino < EXT3_FIRST_INO(sb) || ino > le32_to_cpu(es->s_inodes_count)) { - ext3_error (sb, "ext3_new_inode", - "reserved inode or inode > inodes count - " - "block_group = %d, inode=%lu", group, ino); - err = -EIO; - goto fail; - } - - BUFFER_TRACE(bh2, "get_write_access"); - err = ext3_journal_get_write_access(handle, bh2); - if (err) goto fail; - spin_lock(sb_bgl_lock(sbi, group)); - le16_add_cpu(&gdp->bg_free_inodes_count, -1); - if (S_ISDIR(mode)) { - le16_add_cpu(&gdp->bg_used_dirs_count, 1); - } - spin_unlock(sb_bgl_lock(sbi, group)); - BUFFER_TRACE(bh2, "call ext3_journal_dirty_metadata"); - err = ext3_journal_dirty_metadata(handle, bh2); - if (err) goto fail; - - percpu_counter_dec(&sbi->s_freeinodes_counter); - if (S_ISDIR(mode)) - percpu_counter_inc(&sbi->s_dirs_counter); - - - if (test_opt(sb, GRPID)) { - inode->i_mode = mode; - inode->i_uid = current_fsuid(); - inode->i_gid = dir->i_gid; - } else - inode_init_owner(inode, dir, mode); - - inode->i_ino = ino; - /* This is the optimal IO size (for stat), not the fs block size */ - inode->i_blocks = 0; - inode->i_mtime = inode->i_atime = inode->i_ctime = CURRENT_TIME_SEC; - - memset(ei->i_data, 0, sizeof(ei->i_data)); - ei->i_dir_start_lookup = 0; - ei->i_disksize = 0; - - ei->i_flags = - ext3_mask_flags(mode, EXT3_I(dir)->i_flags & EXT3_FL_INHERITED); -#ifdef EXT3_FRAGMENTS - ei->i_faddr = 0; - ei->i_frag_no = 0; - ei->i_frag_size = 0; -#endif - ei->i_file_acl = 0; - ei->i_dir_acl = 0; - ei->i_dtime = 0; - ei->i_block_alloc_info = NULL; - ei->i_block_group = group; - - ext3_set_inode_flags(inode); - if (IS_DIRSYNC(inode)) - handle->h_sync = 1; - if (insert_inode_locked(inode) < 0) { - /* - * Likely a bitmap corruption causing inode to be allocated - * twice. - */ - err = -EIO; - goto fail; - } - spin_lock(&sbi->s_next_gen_lock); - inode->i_generation = sbi->s_next_generation++; - spin_unlock(&sbi->s_next_gen_lock); - - ei->i_state_flags = 0; - ext3_set_inode_state(inode, EXT3_STATE_NEW); - - /* See comment in ext3_iget for explanation */ - if (ino >= EXT3_FIRST_INO(sb) + 1 && - EXT3_INODE_SIZE(sb) > EXT3_GOOD_OLD_INODE_SIZE) { - ei->i_extra_isize = - sizeof(struct ext3_inode) - EXT3_GOOD_OLD_INODE_SIZE; - } else { - ei->i_extra_isize = 0; - } - - ret = inode; - dquot_initialize(inode); - err = dquot_alloc_inode(inode); - if (err) - goto fail_drop; - - err = ext3_init_acl(handle, inode, dir); - if (err) - goto fail_free_drop; - - err = ext3_init_security(handle, inode, dir, qstr); - if (err) - goto fail_free_drop; - - err = ext3_mark_inode_dirty(handle, inode); - if (err) { - ext3_std_error(sb, err); - goto fail_free_drop; - } - - ext3_debug("allocating inode %lu\n", inode->i_ino); - trace_ext3_allocate_inode(inode, dir, mode); - goto really_out; -fail: - ext3_std_error(sb, err); -out: - iput(inode); - ret = ERR_PTR(err); -really_out: - brelse(bitmap_bh); - return ret; - -fail_free_drop: - dquot_free_inode(inode); - -fail_drop: - dquot_drop(inode); - inode->i_flags |= S_NOQUOTA; - clear_nlink(inode); - unlock_new_inode(inode); - iput(inode); - brelse(bitmap_bh); - return ERR_PTR(err); -} - -/* Verify that we are loading a valid orphan from disk */ -struct inode *ext3_orphan_get(struct super_block *sb, unsigned long ino) -{ - unsigned long max_ino = le32_to_cpu(EXT3_SB(sb)->s_es->s_inodes_count); - unsigned long block_group; - int bit; - struct buffer_head *bitmap_bh; - struct inode *inode = NULL; - long err = -EIO; - - /* Error cases - e2fsck has already cleaned up for us */ - if (ino > max_ino) { - ext3_warning(sb, __func__, - "bad orphan ino %lu! e2fsck was run?", ino); - goto error; - } - - block_group = (ino - 1) / EXT3_INODES_PER_GROUP(sb); - bit = (ino - 1) % EXT3_INODES_PER_GROUP(sb); - bitmap_bh = read_inode_bitmap(sb, block_group); - if (!bitmap_bh) { - ext3_warning(sb, __func__, - "inode bitmap error for orphan %lu", ino); - goto error; - } - - /* Having the inode bit set should be a 100% indicator that this - * is a valid orphan (no e2fsck run on fs). Orphans also include - * inodes that were being truncated, so we can't check i_nlink==0. - */ - if (!ext3_test_bit(bit, bitmap_bh->b_data)) - goto bad_orphan; - - inode = ext3_iget(sb, ino); - if (IS_ERR(inode)) - goto iget_failed; - - /* - * If the orphans has i_nlinks > 0 then it should be able to be - * truncated, otherwise it won't be removed from the orphan list - * during processing and an infinite loop will result. - */ - if (inode->i_nlink && !ext3_can_truncate(inode)) - goto bad_orphan; - - if (NEXT_ORPHAN(inode) > max_ino) - goto bad_orphan; - brelse(bitmap_bh); - return inode; - -iget_failed: - err = PTR_ERR(inode); - inode = NULL; -bad_orphan: - ext3_warning(sb, __func__, - "bad orphan inode %lu! e2fsck was run?", ino); - printk(KERN_NOTICE "ext3_test_bit(bit=%d, block=%llu) = %d\n", - bit, (unsigned long long)bitmap_bh->b_blocknr, - ext3_test_bit(bit, bitmap_bh->b_data)); - printk(KERN_NOTICE "inode=%p\n", inode); - if (inode) { - printk(KERN_NOTICE "is_bad_inode(inode)=%d\n", - is_bad_inode(inode)); - printk(KERN_NOTICE "NEXT_ORPHAN(inode)=%u\n", - NEXT_ORPHAN(inode)); - printk(KERN_NOTICE "max_ino=%lu\n", max_ino); - printk(KERN_NOTICE "i_nlink=%u\n", inode->i_nlink); - /* Avoid freeing blocks if we got a bad deleted inode */ - if (inode->i_nlink == 0) - inode->i_blocks = 0; - iput(inode); - } - brelse(bitmap_bh); -error: - return ERR_PTR(err); -} - -unsigned long ext3_count_free_inodes (struct super_block * sb) -{ - unsigned long desc_count; - struct ext3_group_desc *gdp; - int i; -#ifdef EXT3FS_DEBUG - struct ext3_super_block *es; - unsigned long bitmap_count, x; - struct buffer_head *bitmap_bh = NULL; - - es = EXT3_SB(sb)->s_es; - desc_count = 0; - bitmap_count = 0; - gdp = NULL; - for (i = 0; i < EXT3_SB(sb)->s_groups_count; i++) { - gdp = ext3_get_group_desc (sb, i, NULL); - if (!gdp) - continue; - desc_count += le16_to_cpu(gdp->bg_free_inodes_count); - brelse(bitmap_bh); - bitmap_bh = read_inode_bitmap(sb, i); - if (!bitmap_bh) - continue; - - x = ext3_count_free(bitmap_bh, EXT3_INODES_PER_GROUP(sb) / 8); - printk("group %d: stored = %d, counted = %lu\n", - i, le16_to_cpu(gdp->bg_free_inodes_count), x); - bitmap_count += x; - } - brelse(bitmap_bh); - printk("ext3_count_free_inodes: stored = %u, computed = %lu, %lu\n", - le32_to_cpu(es->s_free_inodes_count), desc_count, bitmap_count); - return desc_count; -#else - desc_count = 0; - for (i = 0; i < EXT3_SB(sb)->s_groups_count; i++) { - gdp = ext3_get_group_desc (sb, i, NULL); - if (!gdp) - continue; - desc_count += le16_to_cpu(gdp->bg_free_inodes_count); - cond_resched(); - } - return desc_count; -#endif -} - -/* Called at mount-time, super-block is locked */ -unsigned long ext3_count_dirs (struct super_block * sb) -{ - unsigned long count = 0; - int i; - - for (i = 0; i < EXT3_SB(sb)->s_groups_count; i++) { - struct ext3_group_desc *gdp = ext3_get_group_desc (sb, i, NULL); - if (!gdp) - continue; - count += le16_to_cpu(gdp->bg_used_dirs_count); - } - return count; -} - diff --git a/fs/ext3/inode.c b/fs/ext3/inode.c deleted file mode 100644 index 6c7e5468a2f8..000000000000 --- a/fs/ext3/inode.c +++ /dev/null @@ -1,3574 +0,0 @@ -/* - * linux/fs/ext3/inode.c - * - * Copyright (C) 1992, 1993, 1994, 1995 - * Remy Card (card@masi.ibp.fr) - * Laboratoire MASI - Institut Blaise Pascal - * Universite Pierre et Marie Curie (Paris VI) - * - * from - * - * linux/fs/minix/inode.c - * - * Copyright (C) 1991, 1992 Linus Torvalds - * - * Goal-directed block allocation by Stephen Tweedie - * (sct@redhat.com), 1993, 1998 - * Big-endian to little-endian byte-swapping/bitmaps by - * David S. Miller (davem@caip.rutgers.edu), 1995 - * 64-bit file support on 64-bit platforms by Jakub Jelinek - * (jj@sunsite.ms.mff.cuni.cz) - * - * Assorted race fixes, rewrite of ext3_get_block() by Al Viro, 2000 - */ - -#include -#include -#include -#include -#include -#include -#include "ext3.h" -#include "xattr.h" -#include "acl.h" - -static int ext3_writepage_trans_blocks(struct inode *inode); -static int ext3_block_truncate_page(struct inode *inode, loff_t from); - -/* - * Test whether an inode is a fast symlink. - */ -static int ext3_inode_is_fast_symlink(struct inode *inode) -{ - int ea_blocks = EXT3_I(inode)->i_file_acl ? - (inode->i_sb->s_blocksize >> 9) : 0; - - return (S_ISLNK(inode->i_mode) && inode->i_blocks - ea_blocks == 0); -} - -/* - * The ext3 forget function must perform a revoke if we are freeing data - * which has been journaled. Metadata (eg. indirect blocks) must be - * revoked in all cases. - * - * "bh" may be NULL: a metadata block may have been freed from memory - * but there may still be a record of it in the journal, and that record - * still needs to be revoked. - */ -int ext3_forget(handle_t *handle, int is_metadata, struct inode *inode, - struct buffer_head *bh, ext3_fsblk_t blocknr) -{ - int err; - - might_sleep(); - - trace_ext3_forget(inode, is_metadata, blocknr); - BUFFER_TRACE(bh, "enter"); - - jbd_debug(4, "forgetting bh %p: is_metadata = %d, mode %o, " - "data mode %lx\n", - bh, is_metadata, inode->i_mode, - test_opt(inode->i_sb, DATA_FLAGS)); - - /* Never use the revoke function if we are doing full data - * journaling: there is no need to, and a V1 superblock won't - * support it. Otherwise, only skip the revoke on un-journaled - * data blocks. */ - - if (test_opt(inode->i_sb, DATA_FLAGS) == EXT3_MOUNT_JOURNAL_DATA || - (!is_metadata && !ext3_should_journal_data(inode))) { - if (bh) { - BUFFER_TRACE(bh, "call journal_forget"); - return ext3_journal_forget(handle, bh); - } - return 0; - } - - /* - * data!=journal && (is_metadata || should_journal_data(inode)) - */ - BUFFER_TRACE(bh, "call ext3_journal_revoke"); - err = ext3_journal_revoke(handle, blocknr, bh); - if (err) - ext3_abort(inode->i_sb, __func__, - "error %d when attempting revoke", err); - BUFFER_TRACE(bh, "exit"); - return err; -} - -/* - * Work out how many blocks we need to proceed with the next chunk of a - * truncate transaction. - */ -static unsigned long blocks_for_truncate(struct inode *inode) -{ - unsigned long needed; - - needed = inode->i_blocks >> (inode->i_sb->s_blocksize_bits - 9); - - /* Give ourselves just enough room to cope with inodes in which - * i_blocks is corrupt: we've seen disk corruptions in the past - * which resulted in random data in an inode which looked enough - * like a regular file for ext3 to try to delete it. Things - * will go a bit crazy if that happens, but at least we should - * try not to panic the whole kernel. */ - if (needed < 2) - needed = 2; - - /* But we need to bound the transaction so we don't overflow the - * journal. */ - if (needed > EXT3_MAX_TRANS_DATA) - needed = EXT3_MAX_TRANS_DATA; - - return EXT3_DATA_TRANS_BLOCKS(inode->i_sb) + needed; -} - -/* - * Truncate transactions can be complex and absolutely huge. So we need to - * be able to restart the transaction at a conventient checkpoint to make - * sure we don't overflow the journal. - * - * start_transaction gets us a new handle for a truncate transaction, - * and extend_transaction tries to extend the existing one a bit. If - * extend fails, we need to propagate the failure up and restart the - * transaction in the top-level truncate loop. --sct - */ -static handle_t *start_transaction(struct inode *inode) -{ - handle_t *result; - - result = ext3_journal_start(inode, blocks_for_truncate(inode)); - if (!IS_ERR(result)) - return result; - - ext3_std_error(inode->i_sb, PTR_ERR(result)); - return result; -} - -/* - * Try to extend this transaction for the purposes of truncation. - * - * Returns 0 if we managed to create more room. If we can't create more - * room, and the transaction must be restarted we return 1. - */ -static int try_to_extend_transaction(handle_t *handle, struct inode *inode) -{ - if (handle->h_buffer_credits > EXT3_RESERVE_TRANS_BLOCKS) - return 0; - if (!ext3_journal_extend(handle, blocks_for_truncate(inode))) - return 0; - return 1; -} - -/* - * Restart the transaction associated with *handle. This does a commit, - * so before we call here everything must be consistently dirtied against - * this transaction. - */ -static int truncate_restart_transaction(handle_t *handle, struct inode *inode) -{ - int ret; - - jbd_debug(2, "restarting handle %p\n", handle); - /* - * Drop truncate_mutex to avoid deadlock with ext3_get_blocks_handle - * At this moment, get_block can be called only for blocks inside - * i_size since page cache has been already dropped and writes are - * blocked by i_mutex. So we can safely drop the truncate_mutex. - */ - mutex_unlock(&EXT3_I(inode)->truncate_mutex); - ret = ext3_journal_restart(handle, blocks_for_truncate(inode)); - mutex_lock(&EXT3_I(inode)->truncate_mutex); - return ret; -} - -/* - * Called at inode eviction from icache - */ -void ext3_evict_inode (struct inode *inode) -{ - struct ext3_inode_info *ei = EXT3_I(inode); - struct ext3_block_alloc_info *rsv; - handle_t *handle; - int want_delete = 0; - - trace_ext3_evict_inode(inode); - if (!inode->i_nlink && !is_bad_inode(inode)) { - dquot_initialize(inode); - want_delete = 1; - } - - /* - * When journalling data dirty buffers are tracked only in the journal. - * So although mm thinks everything is clean and ready for reaping the - * inode might still have some pages to write in the running - * transaction or waiting to be checkpointed. Thus calling - * journal_invalidatepage() (via truncate_inode_pages()) to discard - * these buffers can cause data loss. Also even if we did not discard - * these buffers, we would have no way to find them after the inode - * is reaped and thus user could see stale data if he tries to read - * them before the transaction is checkpointed. So be careful and - * force everything to disk here... We use ei->i_datasync_tid to - * store the newest transaction containing inode's data. - * - * Note that directories do not have this problem because they don't - * use page cache. - * - * The s_journal check handles the case when ext3_get_journal() fails - * and puts the journal inode. - */ - if (inode->i_nlink && ext3_should_journal_data(inode) && - EXT3_SB(inode->i_sb)->s_journal && - (S_ISLNK(inode->i_mode) || S_ISREG(inode->i_mode)) && - inode->i_ino != EXT3_JOURNAL_INO) { - tid_t commit_tid = atomic_read(&ei->i_datasync_tid); - journal_t *journal = EXT3_SB(inode->i_sb)->s_journal; - - log_start_commit(journal, commit_tid); - log_wait_commit(journal, commit_tid); - filemap_write_and_wait(&inode->i_data); - } - truncate_inode_pages_final(&inode->i_data); - - ext3_discard_reservation(inode); - rsv = ei->i_block_alloc_info; - ei->i_block_alloc_info = NULL; - if (unlikely(rsv)) - kfree(rsv); - - if (!want_delete) - goto no_delete; - - handle = start_transaction(inode); - if (IS_ERR(handle)) { - /* - * If we're going to skip the normal cleanup, we still need to - * make sure that the in-core orphan linked list is properly - * cleaned up. - */ - ext3_orphan_del(NULL, inode); - goto no_delete; - } - - if (IS_SYNC(inode)) - handle->h_sync = 1; - inode->i_size = 0; - if (inode->i_blocks) - ext3_truncate(inode); - /* - * Kill off the orphan record created when the inode lost the last - * link. Note that ext3_orphan_del() has to be able to cope with the - * deletion of a non-existent orphan - ext3_truncate() could - * have removed the record. - */ - ext3_orphan_del(handle, inode); - ei->i_dtime = get_seconds(); - - /* - * One subtle ordering requirement: if anything has gone wrong - * (transaction abort, IO errors, whatever), then we can still - * do these next steps (the fs will already have been marked as - * having errors), but we can't free the inode if the mark_dirty - * fails. - */ - if (ext3_mark_inode_dirty(handle, inode)) { - /* If that failed, just dquot_drop() and be done with that */ - dquot_drop(inode); - clear_inode(inode); - } else { - ext3_xattr_delete_inode(handle, inode); - dquot_free_inode(inode); - dquot_drop(inode); - clear_inode(inode); - ext3_free_inode(handle, inode); - } - ext3_journal_stop(handle); - return; -no_delete: - clear_inode(inode); - dquot_drop(inode); -} - -typedef struct { - __le32 *p; - __le32 key; - struct buffer_head *bh; -} Indirect; - -static inline void add_chain(Indirect *p, struct buffer_head *bh, __le32 *v) -{ - p->key = *(p->p = v); - p->bh = bh; -} - -static int verify_chain(Indirect *from, Indirect *to) -{ - while (from <= to && from->key == *from->p) - from++; - return (from > to); -} - -/** - * ext3_block_to_path - parse the block number into array of offsets - * @inode: inode in question (we are only interested in its superblock) - * @i_block: block number to be parsed - * @offsets: array to store the offsets in - * @boundary: set this non-zero if the referred-to block is likely to be - * followed (on disk) by an indirect block. - * - * To store the locations of file's data ext3 uses a data structure common - * for UNIX filesystems - tree of pointers anchored in the inode, with - * data blocks at leaves and indirect blocks in intermediate nodes. - * This function translates the block number into path in that tree - - * return value is the path length and @offsets[n] is the offset of - * pointer to (n+1)th node in the nth one. If @block is out of range - * (negative or too large) warning is printed and zero returned. - * - * Note: function doesn't find node addresses, so no IO is needed. All - * we need to know is the capacity of indirect blocks (taken from the - * inode->i_sb). - */ - -/* - * Portability note: the last comparison (check that we fit into triple - * indirect block) is spelled differently, because otherwise on an - * architecture with 32-bit longs and 8Kb pages we might get into trouble - * if our filesystem had 8Kb blocks. We might use long long, but that would - * kill us on x86. Oh, well, at least the sign propagation does not matter - - * i_block would have to be negative in the very beginning, so we would not - * get there at all. - */ - -static int ext3_block_to_path(struct inode *inode, - long i_block, int offsets[4], int *boundary) -{ - int ptrs = EXT3_ADDR_PER_BLOCK(inode->i_sb); - int ptrs_bits = EXT3_ADDR_PER_BLOCK_BITS(inode->i_sb); - const long direct_blocks = EXT3_NDIR_BLOCKS, - indirect_blocks = ptrs, - double_blocks = (1 << (ptrs_bits * 2)); - int n = 0; - int final = 0; - - if (i_block < 0) { - ext3_warning (inode->i_sb, "ext3_block_to_path", "block < 0"); - } else if (i_block < direct_blocks) { - offsets[n++] = i_block; - final = direct_blocks; - } else if ( (i_block -= direct_blocks) < indirect_blocks) { - offsets[n++] = EXT3_IND_BLOCK; - offsets[n++] = i_block; - final = ptrs; - } else if ((i_block -= indirect_blocks) < double_blocks) { - offsets[n++] = EXT3_DIND_BLOCK; - offsets[n++] = i_block >> ptrs_bits; - offsets[n++] = i_block & (ptrs - 1); - final = ptrs; - } else if (((i_block -= double_blocks) >> (ptrs_bits * 2)) < ptrs) { - offsets[n++] = EXT3_TIND_BLOCK; - offsets[n++] = i_block >> (ptrs_bits * 2); - offsets[n++] = (i_block >> ptrs_bits) & (ptrs - 1); - offsets[n++] = i_block & (ptrs - 1); - final = ptrs; - } else { - ext3_warning(inode->i_sb, "ext3_block_to_path", "block > big"); - } - if (boundary) - *boundary = final - 1 - (i_block & (ptrs - 1)); - return n; -} - -/** - * ext3_get_branch - read the chain of indirect blocks leading to data - * @inode: inode in question - * @depth: depth of the chain (1 - direct pointer, etc.) - * @offsets: offsets of pointers in inode/indirect blocks - * @chain: place to store the result - * @err: here we store the error value - * - * Function fills the array of triples and returns %NULL - * if everything went OK or the pointer to the last filled triple - * (incomplete one) otherwise. Upon the return chain[i].key contains - * the number of (i+1)-th block in the chain (as it is stored in memory, - * i.e. little-endian 32-bit), chain[i].p contains the address of that - * number (it points into struct inode for i==0 and into the bh->b_data - * for i>0) and chain[i].bh points to the buffer_head of i-th indirect - * block for i>0 and NULL for i==0. In other words, it holds the block - * numbers of the chain, addresses they were taken from (and where we can - * verify that chain did not change) and buffer_heads hosting these - * numbers. - * - * Function stops when it stumbles upon zero pointer (absent block) - * (pointer to last triple returned, *@err == 0) - * or when it gets an IO error reading an indirect block - * (ditto, *@err == -EIO) - * or when it notices that chain had been changed while it was reading - * (ditto, *@err == -EAGAIN) - * or when it reads all @depth-1 indirect blocks successfully and finds - * the whole chain, all way to the data (returns %NULL, *err == 0). - */ -static Indirect *ext3_get_branch(struct inode *inode, int depth, int *offsets, - Indirect chain[4], int *err) -{ - struct super_block *sb = inode->i_sb; - Indirect *p = chain; - struct buffer_head *bh; - - *err = 0; - /* i_data is not going away, no lock needed */ - add_chain (chain, NULL, EXT3_I(inode)->i_data + *offsets); - if (!p->key) - goto no_block; - while (--depth) { - bh = sb_bread(sb, le32_to_cpu(p->key)); - if (!bh) - goto failure; - /* Reader: pointers */ - if (!verify_chain(chain, p)) - goto changed; - add_chain(++p, bh, (__le32*)bh->b_data + *++offsets); - /* Reader: end */ - if (!p->key) - goto no_block; - } - return NULL; - -changed: - brelse(bh); - *err = -EAGAIN; - goto no_block; -failure: - *err = -EIO; -no_block: - return p; -} - -/** - * ext3_find_near - find a place for allocation with sufficient locality - * @inode: owner - * @ind: descriptor of indirect block. - * - * This function returns the preferred place for block allocation. - * It is used when heuristic for sequential allocation fails. - * Rules are: - * + if there is a block to the left of our position - allocate near it. - * + if pointer will live in indirect block - allocate near that block. - * + if pointer will live in inode - allocate in the same - * cylinder group. - * - * In the latter case we colour the starting block by the callers PID to - * prevent it from clashing with concurrent allocations for a different inode - * in the same block group. The PID is used here so that functionally related - * files will be close-by on-disk. - * - * Caller must make sure that @ind is valid and will stay that way. - */ -static ext3_fsblk_t ext3_find_near(struct inode *inode, Indirect *ind) -{ - struct ext3_inode_info *ei = EXT3_I(inode); - __le32 *start = ind->bh ? (__le32*) ind->bh->b_data : ei->i_data; - __le32 *p; - ext3_fsblk_t bg_start; - ext3_grpblk_t colour; - - /* Try to find previous block */ - for (p = ind->p - 1; p >= start; p--) { - if (*p) - return le32_to_cpu(*p); - } - - /* No such thing, so let's try location of indirect block */ - if (ind->bh) - return ind->bh->b_blocknr; - - /* - * It is going to be referred to from the inode itself? OK, just put it - * into the same cylinder group then. - */ - bg_start = ext3_group_first_block_no(inode->i_sb, ei->i_block_group); - colour = (current->pid % 16) * - (EXT3_BLOCKS_PER_GROUP(inode->i_sb) / 16); - return bg_start + colour; -} - -/** - * ext3_find_goal - find a preferred place for allocation. - * @inode: owner - * @block: block we want - * @partial: pointer to the last triple within a chain - * - * Normally this function find the preferred place for block allocation, - * returns it. - */ - -static ext3_fsblk_t ext3_find_goal(struct inode *inode, long block, - Indirect *partial) -{ - struct ext3_block_alloc_info *block_i; - - block_i = EXT3_I(inode)->i_block_alloc_info; - - /* - * try the heuristic for sequential allocation, - * failing that at least try to get decent locality. - */ - if (block_i && (block == block_i->last_alloc_logical_block + 1) - && (block_i->last_alloc_physical_block != 0)) { - return block_i->last_alloc_physical_block + 1; - } - - return ext3_find_near(inode, partial); -} - -/** - * ext3_blks_to_allocate - Look up the block map and count the number - * of direct blocks need to be allocated for the given branch. - * - * @branch: chain of indirect blocks - * @k: number of blocks need for indirect blocks - * @blks: number of data blocks to be mapped. - * @blocks_to_boundary: the offset in the indirect block - * - * return the total number of blocks to be allocate, including the - * direct and indirect blocks. - */ -static int ext3_blks_to_allocate(Indirect *branch, int k, unsigned long blks, - int blocks_to_boundary) -{ - unsigned long count = 0; - - /* - * Simple case, [t,d]Indirect block(s) has not allocated yet - * then it's clear blocks on that path have not allocated - */ - if (k > 0) { - /* right now we don't handle cross boundary allocation */ - if (blks < blocks_to_boundary + 1) - count += blks; - else - count += blocks_to_boundary + 1; - return count; - } - - count++; - while (count < blks && count <= blocks_to_boundary && - le32_to_cpu(*(branch[0].p + count)) == 0) { - count++; - } - return count; -} - -/** - * ext3_alloc_blocks - multiple allocate blocks needed for a branch - * @handle: handle for this transaction - * @inode: owner - * @goal: preferred place for allocation - * @indirect_blks: the number of blocks need to allocate for indirect - * blocks - * @blks: number of blocks need to allocated for direct blocks - * @new_blocks: on return it will store the new block numbers for - * the indirect blocks(if needed) and the first direct block, - * @err: here we store the error value - * - * return the number of direct blocks allocated - */ -static int ext3_alloc_blocks(handle_t *handle, struct inode *inode, - ext3_fsblk_t goal, int indirect_blks, int blks, - ext3_fsblk_t new_blocks[4], int *err) -{ - int target, i; - unsigned long count = 0; - int index = 0; - ext3_fsblk_t current_block = 0; - int ret = 0; - - /* - * Here we try to allocate the requested multiple blocks at once, - * on a best-effort basis. - * To build a branch, we should allocate blocks for - * the indirect blocks(if not allocated yet), and at least - * the first direct block of this branch. That's the - * minimum number of blocks need to allocate(required) - */ - target = blks + indirect_blks; - - while (1) { - count = target; - /* allocating blocks for indirect blocks and direct blocks */ - current_block = ext3_new_blocks(handle,inode,goal,&count,err); - if (*err) - goto failed_out; - - target -= count; - /* allocate blocks for indirect blocks */ - while (index < indirect_blks && count) { - new_blocks[index++] = current_block++; - count--; - } - - if (count > 0) - break; - } - - /* save the new block number for the first direct block */ - new_blocks[index] = current_block; - - /* total number of blocks allocated for direct blocks */ - ret = count; - *err = 0; - return ret; -failed_out: - for (i = 0; i key). Upon the exit we have the same - * picture as after the successful ext3_get_block(), except that in one - * place chain is disconnected - *branch->p is still zero (we did not - * set the last link), but branch->key contains the number that should - * be placed into *branch->p to fill that gap. - * - * If allocation fails we free all blocks we've allocated (and forget - * their buffer_heads) and return the error value the from failed - * ext3_alloc_block() (normally -ENOSPC). Otherwise we set the chain - * as described above and return 0. - */ -static int ext3_alloc_branch(handle_t *handle, struct inode *inode, - int indirect_blks, int *blks, ext3_fsblk_t goal, - int *offsets, Indirect *branch) -{ - int blocksize = inode->i_sb->s_blocksize; - int i, n = 0; - int err = 0; - struct buffer_head *bh; - int num; - ext3_fsblk_t new_blocks[4]; - ext3_fsblk_t current_block; - - num = ext3_alloc_blocks(handle, inode, goal, indirect_blks, - *blks, new_blocks, &err); - if (err) - return err; - - branch[0].key = cpu_to_le32(new_blocks[0]); - /* - * metadata blocks and data blocks are allocated. - */ - for (n = 1; n <= indirect_blks; n++) { - /* - * Get buffer_head for parent block, zero it out - * and set the pointer to new one, then send - * parent to disk. - */ - bh = sb_getblk(inode->i_sb, new_blocks[n-1]); - if (unlikely(!bh)) { - err = -ENOMEM; - goto failed; - } - branch[n].bh = bh; - lock_buffer(bh); - BUFFER_TRACE(bh, "call get_create_access"); - err = ext3_journal_get_create_access(handle, bh); - if (err) { - unlock_buffer(bh); - brelse(bh); - goto failed; - } - - memset(bh->b_data, 0, blocksize); - branch[n].p = (__le32 *) bh->b_data + offsets[n]; - branch[n].key = cpu_to_le32(new_blocks[n]); - *branch[n].p = branch[n].key; - if ( n == indirect_blks) { - current_block = new_blocks[n]; - /* - * End of chain, update the last new metablock of - * the chain to point to the new allocated - * data blocks numbers - */ - for (i=1; i < num; i++) - *(branch[n].p + i) = cpu_to_le32(++current_block); - } - BUFFER_TRACE(bh, "marking uptodate"); - set_buffer_uptodate(bh); - unlock_buffer(bh); - - BUFFER_TRACE(bh, "call ext3_journal_dirty_metadata"); - err = ext3_journal_dirty_metadata(handle, bh); - if (err) - goto failed; - } - *blks = num; - return err; -failed: - /* Allocation failed, free what we already allocated */ - for (i = 1; i <= n ; i++) { - BUFFER_TRACE(branch[i].bh, "call journal_forget"); - ext3_journal_forget(handle, branch[i].bh); - } - for (i = 0; i < indirect_blks; i++) - ext3_free_blocks(handle, inode, new_blocks[i], 1); - - ext3_free_blocks(handle, inode, new_blocks[i], num); - - return err; -} - -/** - * ext3_splice_branch - splice the allocated branch onto inode. - * @handle: handle for this transaction - * @inode: owner - * @block: (logical) number of block we are adding - * @where: location of missing link - * @num: number of indirect blocks we are adding - * @blks: number of direct blocks we are adding - * - * This function fills the missing link and does all housekeeping needed in - * inode (->i_blocks, etc.). In case of success we end up with the full - * chain to new block and return 0. - */ -static int ext3_splice_branch(handle_t *handle, struct inode *inode, - long block, Indirect *where, int num, int blks) -{ - int i; - int err = 0; - struct ext3_block_alloc_info *block_i; - ext3_fsblk_t current_block; - struct ext3_inode_info *ei = EXT3_I(inode); - struct timespec now; - - block_i = ei->i_block_alloc_info; - /* - * If we're splicing into a [td]indirect block (as opposed to the - * inode) then we need to get write access to the [td]indirect block - * before the splice. - */ - if (where->bh) { - BUFFER_TRACE(where->bh, "get_write_access"); - err = ext3_journal_get_write_access(handle, where->bh); - if (err) - goto err_out; - } - /* That's it */ - - *where->p = where->key; - - /* - * Update the host buffer_head or inode to point to more just allocated - * direct blocks blocks - */ - if (num == 0 && blks > 1) { - current_block = le32_to_cpu(where->key) + 1; - for (i = 1; i < blks; i++) - *(where->p + i ) = cpu_to_le32(current_block++); - } - - /* - * update the most recently allocated logical & physical block - * in i_block_alloc_info, to assist find the proper goal block for next - * allocation - */ - if (block_i) { - block_i->last_alloc_logical_block = block + blks - 1; - block_i->last_alloc_physical_block = - le32_to_cpu(where[num].key) + blks - 1; - } - - /* We are done with atomic stuff, now do the rest of housekeeping */ - now = CURRENT_TIME_SEC; - if (!timespec_equal(&inode->i_ctime, &now) || !where->bh) { - inode->i_ctime = now; - ext3_mark_inode_dirty(handle, inode); - } - /* ext3_mark_inode_dirty already updated i_sync_tid */ - atomic_set(&ei->i_datasync_tid, handle->h_transaction->t_tid); - - /* had we spliced it onto indirect block? */ - if (where->bh) { - /* - * If we spliced it onto an indirect block, we haven't - * altered the inode. Note however that if it is being spliced - * onto an indirect block at the very end of the file (the - * file is growing) then we *will* alter the inode to reflect - * the new i_size. But that is not done here - it is done in - * generic_commit_write->__mark_inode_dirty->ext3_dirty_inode. - */ - jbd_debug(5, "splicing indirect only\n"); - BUFFER_TRACE(where->bh, "call ext3_journal_dirty_metadata"); - err = ext3_journal_dirty_metadata(handle, where->bh); - if (err) - goto err_out; - } else { - /* - * OK, we spliced it into the inode itself on a direct block. - * Inode was dirtied above. - */ - jbd_debug(5, "splicing direct\n"); - } - return err; - -err_out: - for (i = 1; i <= num; i++) { - BUFFER_TRACE(where[i].bh, "call journal_forget"); - ext3_journal_forget(handle, where[i].bh); - ext3_free_blocks(handle,inode,le32_to_cpu(where[i-1].key),1); - } - ext3_free_blocks(handle, inode, le32_to_cpu(where[num].key), blks); - - return err; -} - -/* - * Allocation strategy is simple: if we have to allocate something, we will - * have to go the whole way to leaf. So let's do it before attaching anything - * to tree, set linkage between the newborn blocks, write them if sync is - * required, recheck the path, free and repeat if check fails, otherwise - * set the last missing link (that will protect us from any truncate-generated - * removals - all blocks on the path are immune now) and possibly force the - * write on the parent block. - * That has a nice additional property: no special recovery from the failed - * allocations is needed - we simply release blocks and do not touch anything - * reachable from inode. - * - * `handle' can be NULL if create == 0. - * - * The BKL may not be held on entry here. Be sure to take it early. - * return > 0, # of blocks mapped or allocated. - * return = 0, if plain lookup failed. - * return < 0, error case. - */ -int ext3_get_blocks_handle(handle_t *handle, struct inode *inode, - sector_t iblock, unsigned long maxblocks, - struct buffer_head *bh_result, - int create) -{ - int err = -EIO; - int offsets[4]; - Indirect chain[4]; - Indirect *partial; - ext3_fsblk_t goal; - int indirect_blks; - int blocks_to_boundary = 0; - int depth; - struct ext3_inode_info *ei = EXT3_I(inode); - int count = 0; - ext3_fsblk_t first_block = 0; - - - trace_ext3_get_blocks_enter(inode, iblock, maxblocks, create); - J_ASSERT(handle != NULL || create == 0); - depth = ext3_block_to_path(inode,iblock,offsets,&blocks_to_boundary); - - if (depth == 0) - goto out; - - partial = ext3_get_branch(inode, depth, offsets, chain, &err); - - /* Simplest case - block found, no allocation needed */ - if (!partial) { - first_block = le32_to_cpu(chain[depth - 1].key); - clear_buffer_new(bh_result); - count++; - /*map more blocks*/ - while (count < maxblocks && count <= blocks_to_boundary) { - ext3_fsblk_t blk; - - if (!verify_chain(chain, chain + depth - 1)) { - /* - * Indirect block might be removed by - * truncate while we were reading it. - * Handling of that case: forget what we've - * got now. Flag the err as EAGAIN, so it - * will reread. - */ - err = -EAGAIN; - count = 0; - break; - } - blk = le32_to_cpu(*(chain[depth-1].p + count)); - - if (blk == first_block + count) - count++; - else - break; - } - if (err != -EAGAIN) - goto got_it; - } - - /* Next simple case - plain lookup or failed read of indirect block */ - if (!create || err == -EIO) - goto cleanup; - - /* - * Block out ext3_truncate while we alter the tree - */ - mutex_lock(&ei->truncate_mutex); - - /* - * If the indirect block is missing while we are reading - * the chain(ext3_get_branch() returns -EAGAIN err), or - * if the chain has been changed after we grab the semaphore, - * (either because another process truncated this branch, or - * another get_block allocated this branch) re-grab the chain to see if - * the request block has been allocated or not. - * - * Since we already block the truncate/other get_block - * at this point, we will have the current copy of the chain when we - * splice the branch into the tree. - */ - if (err == -EAGAIN || !verify_chain(chain, partial)) { - while (partial > chain) { - brelse(partial->bh); - partial--; - } - partial = ext3_get_branch(inode, depth, offsets, chain, &err); - if (!partial) { - count++; - mutex_unlock(&ei->truncate_mutex); - if (err) - goto cleanup; - clear_buffer_new(bh_result); - goto got_it; - } - } - - /* - * Okay, we need to do block allocation. Lazily initialize the block - * allocation info here if necessary - */ - if (S_ISREG(inode->i_mode) && (!ei->i_block_alloc_info)) - ext3_init_block_alloc_info(inode); - - goal = ext3_find_goal(inode, iblock, partial); - - /* the number of blocks need to allocate for [d,t]indirect blocks */ - indirect_blks = (chain + depth) - partial - 1; - - /* - * Next look up the indirect map to count the totoal number of - * direct blocks to allocate for this branch. - */ - count = ext3_blks_to_allocate(partial, indirect_blks, - maxblocks, blocks_to_boundary); - err = ext3_alloc_branch(handle, inode, indirect_blks, &count, goal, - offsets + (partial - chain), partial); - - /* - * The ext3_splice_branch call will free and forget any buffers - * on the new chain if there is a failure, but that risks using - * up transaction credits, especially for bitmaps where the - * credits cannot be returned. Can we handle this somehow? We - * may need to return -EAGAIN upwards in the worst case. --sct - */ - if (!err) - err = ext3_splice_branch(handle, inode, iblock, - partial, indirect_blks, count); - mutex_unlock(&ei->truncate_mutex); - if (err) - goto cleanup; - - set_buffer_new(bh_result); -got_it: - map_bh(bh_result, inode->i_sb, le32_to_cpu(chain[depth-1].key)); - if (count > blocks_to_boundary) - set_buffer_boundary(bh_result); - err = count; - /* Clean up and exit */ - partial = chain + depth - 1; /* the whole chain */ -cleanup: - while (partial > chain) { - BUFFER_TRACE(partial->bh, "call brelse"); - brelse(partial->bh); - partial--; - } - BUFFER_TRACE(bh_result, "returned"); -out: - trace_ext3_get_blocks_exit(inode, iblock, - depth ? le32_to_cpu(chain[depth-1].key) : 0, - count, err); - return err; -} - -/* Maximum number of blocks we map for direct IO at once. */ -#define DIO_MAX_BLOCKS 4096 -/* - * Number of credits we need for writing DIO_MAX_BLOCKS: - * We need sb + group descriptor + bitmap + inode -> 4 - * For B blocks with A block pointers per block we need: - * 1 (triple ind.) + (B/A/A + 2) (doubly ind.) + (B/A + 2) (indirect). - * If we plug in 4096 for B and 256 for A (for 1KB block size), we get 25. - */ -#define DIO_CREDITS 25 - -static int ext3_get_block(struct inode *inode, sector_t iblock, - struct buffer_head *bh_result, int create) -{ - handle_t *handle = ext3_journal_current_handle(); - int ret = 0, started = 0; - unsigned max_blocks = bh_result->b_size >> inode->i_blkbits; - - if (create && !handle) { /* Direct IO write... */ - if (max_blocks > DIO_MAX_BLOCKS) - max_blocks = DIO_MAX_BLOCKS; - handle = ext3_journal_start(inode, DIO_CREDITS + - EXT3_MAXQUOTAS_TRANS_BLOCKS(inode->i_sb)); - if (IS_ERR(handle)) { - ret = PTR_ERR(handle); - goto out; - } - started = 1; - } - - ret = ext3_get_blocks_handle(handle, inode, iblock, - max_blocks, bh_result, create); - if (ret > 0) { - bh_result->b_size = (ret << inode->i_blkbits); - ret = 0; - } - if (started) - ext3_journal_stop(handle); -out: - return ret; -} - -int ext3_fiemap(struct inode *inode, struct fiemap_extent_info *fieinfo, - u64 start, u64 len) -{ - return generic_block_fiemap(inode, fieinfo, start, len, - ext3_get_block); -} - -/* - * `handle' can be NULL if create is zero - */ -struct buffer_head *ext3_getblk(handle_t *handle, struct inode *inode, - long block, int create, int *errp) -{ - struct buffer_head dummy; - int fatal = 0, err; - - J_ASSERT(handle != NULL || create == 0); - - dummy.b_state = 0; - dummy.b_blocknr = -1000; - buffer_trace_init(&dummy.b_history); - err = ext3_get_blocks_handle(handle, inode, block, 1, - &dummy, create); - /* - * ext3_get_blocks_handle() returns number of blocks - * mapped. 0 in case of a HOLE. - */ - if (err > 0) { - WARN_ON(err > 1); - err = 0; - } - *errp = err; - if (!err && buffer_mapped(&dummy)) { - struct buffer_head *bh; - bh = sb_getblk(inode->i_sb, dummy.b_blocknr); - if (unlikely(!bh)) { - *errp = -ENOMEM; - goto err; - } - if (buffer_new(&dummy)) { - J_ASSERT(create != 0); - J_ASSERT(handle != NULL); - - /* - * Now that we do not always journal data, we should - * keep in mind whether this should always journal the - * new buffer as metadata. For now, regular file - * writes use ext3_get_block instead, so it's not a - * problem. - */ - lock_buffer(bh); - BUFFER_TRACE(bh, "call get_create_access"); - fatal = ext3_journal_get_create_access(handle, bh); - if (!fatal && !buffer_uptodate(bh)) { - memset(bh->b_data,0,inode->i_sb->s_blocksize); - set_buffer_uptodate(bh); - } - unlock_buffer(bh); - BUFFER_TRACE(bh, "call ext3_journal_dirty_metadata"); - err = ext3_journal_dirty_metadata(handle, bh); - if (!fatal) - fatal = err; - } else { - BUFFER_TRACE(bh, "not a new buffer"); - } - if (fatal) { - *errp = fatal; - brelse(bh); - bh = NULL; - } - return bh; - } -err: - return NULL; -} - -struct buffer_head *ext3_bread(handle_t *handle, struct inode *inode, - int block, int create, int *err) -{ - struct buffer_head * bh; - - bh = ext3_getblk(handle, inode, block, create, err); - if (!bh) - return bh; - if (bh_uptodate_or_lock(bh)) - return bh; - get_bh(bh); - bh->b_end_io = end_buffer_read_sync; - submit_bh(READ | REQ_META | REQ_PRIO, bh); - wait_on_buffer(bh); - if (buffer_uptodate(bh)) - return bh; - put_bh(bh); - *err = -EIO; - return NULL; -} - -static int walk_page_buffers( handle_t *handle, - struct buffer_head *head, - unsigned from, - unsigned to, - int *partial, - int (*fn)( handle_t *handle, - struct buffer_head *bh)) -{ - struct buffer_head *bh; - unsigned block_start, block_end; - unsigned blocksize = head->b_size; - int err, ret = 0; - struct buffer_head *next; - - for ( bh = head, block_start = 0; - ret == 0 && (bh != head || !block_start); - block_start = block_end, bh = next) - { - next = bh->b_this_page; - block_end = block_start + blocksize; - if (block_end <= from || block_start >= to) { - if (partial && !buffer_uptodate(bh)) - *partial = 1; - continue; - } - err = (*fn)(handle, bh); - if (!ret) - ret = err; - } - return ret; -} - -/* - * To preserve ordering, it is essential that the hole instantiation and - * the data write be encapsulated in a single transaction. We cannot - * close off a transaction and start a new one between the ext3_get_block() - * and the commit_write(). So doing the journal_start at the start of - * prepare_write() is the right place. - * - * Also, this function can nest inside ext3_writepage() -> - * block_write_full_page(). In that case, we *know* that ext3_writepage() - * has generated enough buffer credits to do the whole page. So we won't - * block on the journal in that case, which is good, because the caller may - * be PF_MEMALLOC. - * - * By accident, ext3 can be reentered when a transaction is open via - * quota file writes. If we were to commit the transaction while thus - * reentered, there can be a deadlock - we would be holding a quota - * lock, and the commit would never complete if another thread had a - * transaction open and was blocking on the quota lock - a ranking - * violation. - * - * So what we do is to rely on the fact that journal_stop/journal_start - * will _not_ run commit under these circumstances because handle->h_ref - * is elevated. We'll still have enough credits for the tiny quotafile - * write. - */ -static int do_journal_get_write_access(handle_t *handle, - struct buffer_head *bh) -{ - int dirty = buffer_dirty(bh); - int ret; - - if (!buffer_mapped(bh) || buffer_freed(bh)) - return 0; - /* - * __block_prepare_write() could have dirtied some buffers. Clean - * the dirty bit as jbd2_journal_get_write_access() could complain - * otherwise about fs integrity issues. Setting of the dirty bit - * by __block_prepare_write() isn't a real problem here as we clear - * the bit before releasing a page lock and thus writeback cannot - * ever write the buffer. - */ - if (dirty) - clear_buffer_dirty(bh); - ret = ext3_journal_get_write_access(handle, bh); - if (!ret && dirty) - ret = ext3_journal_dirty_metadata(handle, bh); - return ret; -} - -/* - * Truncate blocks that were not used by write. We have to truncate the - * pagecache as well so that corresponding buffers get properly unmapped. - */ -static void ext3_truncate_failed_write(struct inode *inode) -{ - truncate_inode_pages(inode->i_mapping, inode->i_size); - ext3_truncate(inode); -} - -/* - * Truncate blocks that were not used by direct IO write. We have to zero out - * the last file block as well because direct IO might have written to it. - */ -static void ext3_truncate_failed_direct_write(struct inode *inode) -{ - ext3_block_truncate_page(inode, inode->i_size); - ext3_truncate(inode); -} - -static int ext3_write_begin(struct file *file, struct address_space *mapping, - loff_t pos, unsigned len, unsigned flags, - struct page **pagep, void **fsdata) -{ - struct inode *inode = mapping->host; - int ret; - handle_t *handle; - int retries = 0; - struct page *page; - pgoff_t index; - unsigned from, to; - /* Reserve one block more for addition to orphan list in case - * we allocate blocks but write fails for some reason */ - int needed_blocks = ext3_writepage_trans_blocks(inode) + 1; - - trace_ext3_write_begin(inode, pos, len, flags); - - index = pos >> PAGE_CACHE_SHIFT; - from = pos & (PAGE_CACHE_SIZE - 1); - to = from + len; - -retry: - page = grab_cache_page_write_begin(mapping, index, flags); - if (!page) - return -ENOMEM; - *pagep = page; - - handle = ext3_journal_start(inode, needed_blocks); - if (IS_ERR(handle)) { - unlock_page(page); - page_cache_release(page); - ret = PTR_ERR(handle); - goto out; - } - ret = __block_write_begin(page, pos, len, ext3_get_block); - if (ret) - goto write_begin_failed; - - if (ext3_should_journal_data(inode)) { - ret = walk_page_buffers(handle, page_buffers(page), - from, to, NULL, do_journal_get_write_access); - } -write_begin_failed: - if (ret) { - /* - * block_write_begin may have instantiated a few blocks - * outside i_size. Trim these off again. Don't need - * i_size_read because we hold i_mutex. - * - * Add inode to orphan list in case we crash before truncate - * finishes. Do this only if ext3_can_truncate() agrees so - * that orphan processing code is happy. - */ - if (pos + len > inode->i_size && ext3_can_truncate(inode)) - ext3_orphan_add(handle, inode); - ext3_journal_stop(handle); - unlock_page(page); - page_cache_release(page); - if (pos + len > inode->i_size) - ext3_truncate_failed_write(inode); - } - if (ret == -ENOSPC && ext3_should_retry_alloc(inode->i_sb, &retries)) - goto retry; -out: - return ret; -} - - -int ext3_journal_dirty_data(handle_t *handle, struct buffer_head *bh) -{ - int err = journal_dirty_data(handle, bh); - if (err) - ext3_journal_abort_handle(__func__, __func__, - bh, handle, err); - return err; -} - -/* For ordered writepage and write_end functions */ -static int journal_dirty_data_fn(handle_t *handle, struct buffer_head *bh) -{ - /* - * Write could have mapped the buffer but it didn't copy the data in - * yet. So avoid filing such buffer into a transaction. - */ - if (buffer_mapped(bh) && buffer_uptodate(bh)) - return ext3_journal_dirty_data(handle, bh); - return 0; -} - -/* For write_end() in data=journal mode */ -static int write_end_fn(handle_t *handle, struct buffer_head *bh) -{ - if (!buffer_mapped(bh) || buffer_freed(bh)) - return 0; - set_buffer_uptodate(bh); - return ext3_journal_dirty_metadata(handle, bh); -} - -/* - * This is nasty and subtle: ext3_write_begin() could have allocated blocks - * for the whole page but later we failed to copy the data in. Update inode - * size according to what we managed to copy. The rest is going to be - * truncated in write_end function. - */ -static void update_file_sizes(struct inode *inode, loff_t pos, unsigned copied) -{ - /* What matters to us is i_disksize. We don't write i_size anywhere */ - if (pos + copied > inode->i_size) - i_size_write(inode, pos + copied); - if (pos + copied > EXT3_I(inode)->i_disksize) { - EXT3_I(inode)->i_disksize = pos + copied; - mark_inode_dirty(inode); - } -} - -/* - * We need to pick up the new inode size which generic_commit_write gave us - * `file' can be NULL - eg, when called from page_symlink(). - * - * ext3 never places buffers on inode->i_mapping->private_list. metadata - * buffers are managed internally. - */ -static int ext3_ordered_write_end(struct file *file, - struct address_space *mapping, - loff_t pos, unsigned len, unsigned copied, - struct page *page, void *fsdata) -{ - handle_t *handle = ext3_journal_current_handle(); - struct inode *inode = file->f_mapping->host; - unsigned from, to; - int ret = 0, ret2; - - trace_ext3_ordered_write_end(inode, pos, len, copied); - copied = block_write_end(file, mapping, pos, len, copied, page, fsdata); - - from = pos & (PAGE_CACHE_SIZE - 1); - to = from + copied; - ret = walk_page_buffers(handle, page_buffers(page), - from, to, NULL, journal_dirty_data_fn); - - if (ret == 0) - update_file_sizes(inode, pos, copied); - /* - * There may be allocated blocks outside of i_size because - * we failed to copy some data. Prepare for truncate. - */ - if (pos + len > inode->i_size && ext3_can_truncate(inode)) - ext3_orphan_add(handle, inode); - ret2 = ext3_journal_stop(handle); - if (!ret) - ret = ret2; - unlock_page(page); - page_cache_release(page); - - if (pos + len > inode->i_size) - ext3_truncate_failed_write(inode); - return ret ? ret : copied; -} - -static int ext3_writeback_write_end(struct file *file, - struct address_space *mapping, - loff_t pos, unsigned len, unsigned copied, - struct page *page, void *fsdata) -{ - handle_t *handle = ext3_journal_current_handle(); - struct inode *inode = file->f_mapping->host; - int ret; - - trace_ext3_writeback_write_end(inode, pos, len, copied); - copied = block_write_end(file, mapping, pos, len, copied, page, fsdata); - update_file_sizes(inode, pos, copied); - /* - * There may be allocated blocks outside of i_size because - * we failed to copy some data. Prepare for truncate. - */ - if (pos + len > inode->i_size && ext3_can_truncate(inode)) - ext3_orphan_add(handle, inode); - ret = ext3_journal_stop(handle); - unlock_page(page); - page_cache_release(page); - - if (pos + len > inode->i_size) - ext3_truncate_failed_write(inode); - return ret ? ret : copied; -} - -static int ext3_journalled_write_end(struct file *file, - struct address_space *mapping, - loff_t pos, unsigned len, unsigned copied, - struct page *page, void *fsdata) -{ - handle_t *handle = ext3_journal_current_handle(); - struct inode *inode = mapping->host; - struct ext3_inode_info *ei = EXT3_I(inode); - int ret = 0, ret2; - int partial = 0; - unsigned from, to; - - trace_ext3_journalled_write_end(inode, pos, len, copied); - from = pos & (PAGE_CACHE_SIZE - 1); - to = from + len; - - if (copied < len) { - if (!PageUptodate(page)) - copied = 0; - page_zero_new_buffers(page, from + copied, to); - to = from + copied; - } - - ret = walk_page_buffers(handle, page_buffers(page), from, - to, &partial, write_end_fn); - if (!partial) - SetPageUptodate(page); - - if (pos + copied > inode->i_size) - i_size_write(inode, pos + copied); - /* - * There may be allocated blocks outside of i_size because - * we failed to copy some data. Prepare for truncate. - */ - if (pos + len > inode->i_size && ext3_can_truncate(inode)) - ext3_orphan_add(handle, inode); - ext3_set_inode_state(inode, EXT3_STATE_JDATA); - atomic_set(&ei->i_datasync_tid, handle->h_transaction->t_tid); - if (inode->i_size > ei->i_disksize) { - ei->i_disksize = inode->i_size; - ret2 = ext3_mark_inode_dirty(handle, inode); - if (!ret) - ret = ret2; - } - - ret2 = ext3_journal_stop(handle); - if (!ret) - ret = ret2; - unlock_page(page); - page_cache_release(page); - - if (pos + len > inode->i_size) - ext3_truncate_failed_write(inode); - return ret ? ret : copied; -} - -/* - * bmap() is special. It gets used by applications such as lilo and by - * the swapper to find the on-disk block of a specific piece of data. - * - * Naturally, this is dangerous if the block concerned is still in the - * journal. If somebody makes a swapfile on an ext3 data-journaling - * filesystem and enables swap, then they may get a nasty shock when the - * data getting swapped to that swapfile suddenly gets overwritten by - * the original zero's written out previously to the journal and - * awaiting writeback in the kernel's buffer cache. - * - * So, if we see any bmap calls here on a modified, data-journaled file, - * take extra steps to flush any blocks which might be in the cache. - */ -static sector_t ext3_bmap(struct address_space *mapping, sector_t block) -{ - struct inode *inode = mapping->host; - journal_t *journal; - int err; - - if (ext3_test_inode_state(inode, EXT3_STATE_JDATA)) { - /* - * This is a REALLY heavyweight approach, but the use of - * bmap on dirty files is expected to be extremely rare: - * only if we run lilo or swapon on a freshly made file - * do we expect this to happen. - * - * (bmap requires CAP_SYS_RAWIO so this does not - * represent an unprivileged user DOS attack --- we'd be - * in trouble if mortal users could trigger this path at - * will.) - * - * NB. EXT3_STATE_JDATA is not set on files other than - * regular files. If somebody wants to bmap a directory - * or symlink and gets confused because the buffer - * hasn't yet been flushed to disk, they deserve - * everything they get. - */ - - ext3_clear_inode_state(inode, EXT3_STATE_JDATA); - journal = EXT3_JOURNAL(inode); - journal_lock_updates(journal); - err = journal_flush(journal); - journal_unlock_updates(journal); - - if (err) - return 0; - } - - return generic_block_bmap(mapping,block,ext3_get_block); -} - -static int bget_one(handle_t *handle, struct buffer_head *bh) -{ - get_bh(bh); - return 0; -} - -static int bput_one(handle_t *handle, struct buffer_head *bh) -{ - put_bh(bh); - return 0; -} - -static int buffer_unmapped(handle_t *handle, struct buffer_head *bh) -{ - return !buffer_mapped(bh); -} - -/* - * Note that whenever we need to map blocks we start a transaction even if - * we're not journalling data. This is to preserve ordering: any hole - * instantiation within __block_write_full_page -> ext3_get_block() should be - * journalled along with the data so we don't crash and then get metadata which - * refers to old data. - * - * In all journalling modes block_write_full_page() will start the I/O. - * - * We don't honour synchronous mounts for writepage(). That would be - * disastrous. Any write() or metadata operation will sync the fs for - * us. - */ -static int ext3_ordered_writepage(struct page *page, - struct writeback_control *wbc) -{ - struct inode *inode = page->mapping->host; - struct buffer_head *page_bufs; - handle_t *handle = NULL; - int ret = 0; - int err; - - J_ASSERT(PageLocked(page)); - /* - * We don't want to warn for emergency remount. The condition is - * ordered to avoid dereferencing inode->i_sb in non-error case to - * avoid slow-downs. - */ - WARN_ON_ONCE(IS_RDONLY(inode) && - !(EXT3_SB(inode->i_sb)->s_mount_state & EXT3_ERROR_FS)); - - /* - * We give up here if we're reentered, because it might be for a - * different filesystem. - */ - if (ext3_journal_current_handle()) - goto out_fail; - - trace_ext3_ordered_writepage(page); - if (!page_has_buffers(page)) { - create_empty_buffers(page, inode->i_sb->s_blocksize, - (1 << BH_Dirty)|(1 << BH_Uptodate)); - page_bufs = page_buffers(page); - } else { - page_bufs = page_buffers(page); - if (!walk_page_buffers(NULL, page_bufs, 0, PAGE_CACHE_SIZE, - NULL, buffer_unmapped)) { - /* Provide NULL get_block() to catch bugs if buffers - * weren't really mapped */ - return block_write_full_page(page, NULL, wbc); - } - } - handle = ext3_journal_start(inode, ext3_writepage_trans_blocks(inode)); - - if (IS_ERR(handle)) { - ret = PTR_ERR(handle); - goto out_fail; - } - - walk_page_buffers(handle, page_bufs, 0, - PAGE_CACHE_SIZE, NULL, bget_one); - - ret = block_write_full_page(page, ext3_get_block, wbc); - - /* - * The page can become unlocked at any point now, and - * truncate can then come in and change things. So we - * can't touch *page from now on. But *page_bufs is - * safe due to elevated refcount. - */ - - /* - * And attach them to the current transaction. But only if - * block_write_full_page() succeeded. Otherwise they are unmapped, - * and generally junk. - */ - if (ret == 0) - ret = walk_page_buffers(handle, page_bufs, 0, PAGE_CACHE_SIZE, - NULL, journal_dirty_data_fn); - walk_page_buffers(handle, page_bufs, 0, - PAGE_CACHE_SIZE, NULL, bput_one); - err = ext3_journal_stop(handle); - if (!ret) - ret = err; - return ret; - -out_fail: - redirty_page_for_writepage(wbc, page); - unlock_page(page); - return ret; -} - -static int ext3_writeback_writepage(struct page *page, - struct writeback_control *wbc) -{ - struct inode *inode = page->mapping->host; - handle_t *handle = NULL; - int ret = 0; - int err; - - J_ASSERT(PageLocked(page)); - /* - * We don't want to warn for emergency remount. The condition is - * ordered to avoid dereferencing inode->i_sb in non-error case to - * avoid slow-downs. - */ - WARN_ON_ONCE(IS_RDONLY(inode) && - !(EXT3_SB(inode->i_sb)->s_mount_state & EXT3_ERROR_FS)); - - if (ext3_journal_current_handle()) - goto out_fail; - - trace_ext3_writeback_writepage(page); - if (page_has_buffers(page)) { - if (!walk_page_buffers(NULL, page_buffers(page), 0, - PAGE_CACHE_SIZE, NULL, buffer_unmapped)) { - /* Provide NULL get_block() to catch bugs if buffers - * weren't really mapped */ - return block_write_full_page(page, NULL, wbc); - } - } - - handle = ext3_journal_start(inode, ext3_writepage_trans_blocks(inode)); - if (IS_ERR(handle)) { - ret = PTR_ERR(handle); - goto out_fail; - } - - ret = block_write_full_page(page, ext3_get_block, wbc); - - err = ext3_journal_stop(handle); - if (!ret) - ret = err; - return ret; - -out_fail: - redirty_page_for_writepage(wbc, page); - unlock_page(page); - return ret; -} - -static int ext3_journalled_writepage(struct page *page, - struct writeback_control *wbc) -{ - struct inode *inode = page->mapping->host; - handle_t *handle = NULL; - int ret = 0; - int err; - - J_ASSERT(PageLocked(page)); - /* - * We don't want to warn for emergency remount. The condition is - * ordered to avoid dereferencing inode->i_sb in non-error case to - * avoid slow-downs. - */ - WARN_ON_ONCE(IS_RDONLY(inode) && - !(EXT3_SB(inode->i_sb)->s_mount_state & EXT3_ERROR_FS)); - - trace_ext3_journalled_writepage(page); - if (!page_has_buffers(page) || PageChecked(page)) { - if (ext3_journal_current_handle()) - goto no_write; - - handle = ext3_journal_start(inode, - ext3_writepage_trans_blocks(inode)); - if (IS_ERR(handle)) { - ret = PTR_ERR(handle); - goto no_write; - } - /* - * It's mmapped pagecache. Add buffers and journal it. There - * doesn't seem much point in redirtying the page here. - */ - ClearPageChecked(page); - ret = __block_write_begin(page, 0, PAGE_CACHE_SIZE, - ext3_get_block); - if (ret != 0) { - ext3_journal_stop(handle); - goto out_unlock; - } - ret = walk_page_buffers(handle, page_buffers(page), 0, - PAGE_CACHE_SIZE, NULL, do_journal_get_write_access); - - err = walk_page_buffers(handle, page_buffers(page), 0, - PAGE_CACHE_SIZE, NULL, write_end_fn); - if (ret == 0) - ret = err; - ext3_set_inode_state(inode, EXT3_STATE_JDATA); - atomic_set(&EXT3_I(inode)->i_datasync_tid, - handle->h_transaction->t_tid); - unlock_page(page); - err = ext3_journal_stop(handle); - if (!ret) - ret = err; - } else { - /* - * It is a page full of checkpoint-mode buffers. Go and write - * them. They should have been already mapped when they went - * to the journal so provide NULL get_block function to catch - * errors. - */ - ret = block_write_full_page(page, NULL, wbc); - } -out: - return ret; - -no_write: - redirty_page_for_writepage(wbc, page); -out_unlock: - unlock_page(page); - goto out; -} - -static int ext3_readpage(struct file *file, struct page *page) -{ - trace_ext3_readpage(page); - return mpage_readpage(page, ext3_get_block); -} - -static int -ext3_readpages(struct file *file, struct address_space *mapping, - struct list_head *pages, unsigned nr_pages) -{ - return mpage_readpages(mapping, pages, nr_pages, ext3_get_block); -} - -static void ext3_invalidatepage(struct page *page, unsigned int offset, - unsigned int length) -{ - journal_t *journal = EXT3_JOURNAL(page->mapping->host); - - trace_ext3_invalidatepage(page, offset, length); - - /* - * If it's a full truncate we just forget about the pending dirtying - */ - if (offset == 0 && length == PAGE_CACHE_SIZE) - ClearPageChecked(page); - - journal_invalidatepage(journal, page, offset, length); -} - -static int ext3_releasepage(struct page *page, gfp_t wait) -{ - journal_t *journal = EXT3_JOURNAL(page->mapping->host); - - trace_ext3_releasepage(page); - WARN_ON(PageChecked(page)); - if (!page_has_buffers(page)) - return 0; - return journal_try_to_free_buffers(journal, page, wait); -} - -/* - * If the O_DIRECT write will extend the file then add this inode to the - * orphan list. So recovery will truncate it back to the original size - * if the machine crashes during the write. - * - * If the O_DIRECT write is intantiating holes inside i_size and the machine - * crashes then stale disk data _may_ be exposed inside the file. But current - * VFS code falls back into buffered path in that case so we are safe. - */ -static ssize_t ext3_direct_IO(struct kiocb *iocb, struct iov_iter *iter, - loff_t offset) -{ - struct file *file = iocb->ki_filp; - struct inode *inode = file->f_mapping->host; - struct ext3_inode_info *ei = EXT3_I(inode); - handle_t *handle; - ssize_t ret; - int orphan = 0; - size_t count = iov_iter_count(iter); - int retries = 0; - - trace_ext3_direct_IO_enter(inode, offset, count, iov_iter_rw(iter)); - - if (iov_iter_rw(iter) == WRITE) { - loff_t final_size = offset + count; - - if (final_size > inode->i_size) { - /* Credits for sb + inode write */ - handle = ext3_journal_start(inode, 2); - if (IS_ERR(handle)) { - ret = PTR_ERR(handle); - goto out; - } - ret = ext3_orphan_add(handle, inode); - if (ret) { - ext3_journal_stop(handle); - goto out; - } - orphan = 1; - ei->i_disksize = inode->i_size; - ext3_journal_stop(handle); - } - } - -retry: - ret = blockdev_direct_IO(iocb, inode, iter, offset, ext3_get_block); - /* - * In case of error extending write may have instantiated a few - * blocks outside i_size. Trim these off again. - */ - if (unlikely(iov_iter_rw(iter) == WRITE && ret < 0)) { - loff_t isize = i_size_read(inode); - loff_t end = offset + count; - - if (end > isize) - ext3_truncate_failed_direct_write(inode); - } - if (ret == -ENOSPC && ext3_should_retry_alloc(inode->i_sb, &retries)) - goto retry; - - if (orphan) { - int err; - - /* Credits for sb + inode write */ - handle = ext3_journal_start(inode, 2); - if (IS_ERR(handle)) { - /* This is really bad luck. We've written the data - * but cannot extend i_size. Truncate allocated blocks - * and pretend the write failed... */ - ext3_truncate_failed_direct_write(inode); - ret = PTR_ERR(handle); - if (inode->i_nlink) - ext3_orphan_del(NULL, inode); - goto out; - } - if (inode->i_nlink) - ext3_orphan_del(handle, inode); - if (ret > 0) { - loff_t end = offset + ret; - if (end > inode->i_size) { - ei->i_disksize = end; - i_size_write(inode, end); - /* - * We're going to return a positive `ret' - * here due to non-zero-length I/O, so there's - * no way of reporting error returns from - * ext3_mark_inode_dirty() to userspace. So - * ignore it. - */ - ext3_mark_inode_dirty(handle, inode); - } - } - err = ext3_journal_stop(handle); - if (ret == 0) - ret = err; - } -out: - trace_ext3_direct_IO_exit(inode, offset, count, iov_iter_rw(iter), ret); - return ret; -} - -/* - * Pages can be marked dirty completely asynchronously from ext3's journalling - * activity. By filemap_sync_pte(), try_to_unmap_one(), etc. We cannot do - * much here because ->set_page_dirty is called under VFS locks. The page is - * not necessarily locked. - * - * We cannot just dirty the page and leave attached buffers clean, because the - * buffers' dirty state is "definitive". We cannot just set the buffers dirty - * or jbddirty because all the journalling code will explode. - * - * So what we do is to mark the page "pending dirty" and next time writepage - * is called, propagate that into the buffers appropriately. - */ -static int ext3_journalled_set_page_dirty(struct page *page) -{ - SetPageChecked(page); - return __set_page_dirty_nobuffers(page); -} - -static const struct address_space_operations ext3_ordered_aops = { - .readpage = ext3_readpage, - .readpages = ext3_readpages, - .writepage = ext3_ordered_writepage, - .write_begin = ext3_write_begin, - .write_end = ext3_ordered_write_end, - .bmap = ext3_bmap, - .invalidatepage = ext3_invalidatepage, - .releasepage = ext3_releasepage, - .direct_IO = ext3_direct_IO, - .migratepage = buffer_migrate_page, - .is_partially_uptodate = block_is_partially_uptodate, - .is_dirty_writeback = buffer_check_dirty_writeback, - .error_remove_page = generic_error_remove_page, -}; - -static const struct address_space_operations ext3_writeback_aops = { - .readpage = ext3_readpage, - .readpages = ext3_readpages, - .writepage = ext3_writeback_writepage, - .write_begin = ext3_write_begin, - .write_end = ext3_writeback_write_end, - .bmap = ext3_bmap, - .invalidatepage = ext3_invalidatepage, - .releasepage = ext3_releasepage, - .direct_IO = ext3_direct_IO, - .migratepage = buffer_migrate_page, - .is_partially_uptodate = block_is_partially_uptodate, - .error_remove_page = generic_error_remove_page, -}; - -static const struct address_space_operations ext3_journalled_aops = { - .readpage = ext3_readpage, - .readpages = ext3_readpages, - .writepage = ext3_journalled_writepage, - .write_begin = ext3_write_begin, - .write_end = ext3_journalled_write_end, - .set_page_dirty = ext3_journalled_set_page_dirty, - .bmap = ext3_bmap, - .invalidatepage = ext3_invalidatepage, - .releasepage = ext3_releasepage, - .is_partially_uptodate = block_is_partially_uptodate, - .error_remove_page = generic_error_remove_page, -}; - -void ext3_set_aops(struct inode *inode) -{ - if (ext3_should_order_data(inode)) - inode->i_mapping->a_ops = &ext3_ordered_aops; - else if (ext3_should_writeback_data(inode)) - inode->i_mapping->a_ops = &ext3_writeback_aops; - else - inode->i_mapping->a_ops = &ext3_journalled_aops; -} - -/* - * ext3_block_truncate_page() zeroes out a mapping from file offset `from' - * up to the end of the block which corresponds to `from'. - * This required during truncate. We need to physically zero the tail end - * of that block so it doesn't yield old data if the file is later grown. - */ -static int ext3_block_truncate_page(struct inode *inode, loff_t from) -{ - ext3_fsblk_t index = from >> PAGE_CACHE_SHIFT; - unsigned offset = from & (PAGE_CACHE_SIZE - 1); - unsigned blocksize, iblock, length, pos; - struct page *page; - handle_t *handle = NULL; - struct buffer_head *bh; - int err = 0; - - /* Truncated on block boundary - nothing to do */ - blocksize = inode->i_sb->s_blocksize; - if ((from & (blocksize - 1)) == 0) - return 0; - - page = grab_cache_page(inode->i_mapping, index); - if (!page) - return -ENOMEM; - length = blocksize - (offset & (blocksize - 1)); - iblock = index << (PAGE_CACHE_SHIFT - inode->i_sb->s_blocksize_bits); - - if (!page_has_buffers(page)) - create_empty_buffers(page, blocksize, 0); - - /* Find the buffer that contains "offset" */ - bh = page_buffers(page); - pos = blocksize; - while (offset >= pos) { - bh = bh->b_this_page; - iblock++; - pos += blocksize; - } - - err = 0; - if (buffer_freed(bh)) { - BUFFER_TRACE(bh, "freed: skip"); - goto unlock; - } - - if (!buffer_mapped(bh)) { - BUFFER_TRACE(bh, "unmapped"); - ext3_get_block(inode, iblock, bh, 0); - /* unmapped? It's a hole - nothing to do */ - if (!buffer_mapped(bh)) { - BUFFER_TRACE(bh, "still unmapped"); - goto unlock; - } - } - - /* Ok, it's mapped. Make sure it's up-to-date */ - if (PageUptodate(page)) - set_buffer_uptodate(bh); - - if (!bh_uptodate_or_lock(bh)) { - err = bh_submit_read(bh); - /* Uhhuh. Read error. Complain and punt. */ - if (err) - goto unlock; - } - - /* data=writeback mode doesn't need transaction to zero-out data */ - if (!ext3_should_writeback_data(inode)) { - /* We journal at most one block */ - handle = ext3_journal_start(inode, 1); - if (IS_ERR(handle)) { - clear_highpage(page); - flush_dcache_page(page); - err = PTR_ERR(handle); - goto unlock; - } - } - - if (ext3_should_journal_data(inode)) { - BUFFER_TRACE(bh, "get write access"); - err = ext3_journal_get_write_access(handle, bh); - if (err) - goto stop; - } - - zero_user(page, offset, length); - BUFFER_TRACE(bh, "zeroed end of block"); - - err = 0; - if (ext3_should_journal_data(inode)) { - err = ext3_journal_dirty_metadata(handle, bh); - } else { - if (ext3_should_order_data(inode)) - err = ext3_journal_dirty_data(handle, bh); - mark_buffer_dirty(bh); - } -stop: - if (handle) - ext3_journal_stop(handle); - -unlock: - unlock_page(page); - page_cache_release(page); - return err; -} - -/* - * Probably it should be a library function... search for first non-zero word - * or memcmp with zero_page, whatever is better for particular architecture. - * Linus? - */ -static inline int all_zeroes(__le32 *p, __le32 *q) -{ - while (p < q) - if (*p++) - return 0; - return 1; -} - -/** - * ext3_find_shared - find the indirect blocks for partial truncation. - * @inode: inode in question - * @depth: depth of the affected branch - * @offsets: offsets of pointers in that branch (see ext3_block_to_path) - * @chain: place to store the pointers to partial indirect blocks - * @top: place to the (detached) top of branch - * - * This is a helper function used by ext3_truncate(). - * - * When we do truncate() we may have to clean the ends of several - * indirect blocks but leave the blocks themselves alive. Block is - * partially truncated if some data below the new i_size is referred - * from it (and it is on the path to the first completely truncated - * data block, indeed). We have to free the top of that path along - * with everything to the right of the path. Since no allocation - * past the truncation point is possible until ext3_truncate() - * finishes, we may safely do the latter, but top of branch may - * require special attention - pageout below the truncation point - * might try to populate it. - * - * We atomically detach the top of branch from the tree, store the - * block number of its root in *@top, pointers to buffer_heads of - * partially truncated blocks - in @chain[].bh and pointers to - * their last elements that should not be removed - in - * @chain[].p. Return value is the pointer to last filled element - * of @chain. - * - * The work left to caller to do the actual freeing of subtrees: - * a) free the subtree starting from *@top - * b) free the subtrees whose roots are stored in - * (@chain[i].p+1 .. end of @chain[i].bh->b_data) - * c) free the subtrees growing from the inode past the @chain[0]. - * (no partially truncated stuff there). */ - -static Indirect *ext3_find_shared(struct inode *inode, int depth, - int offsets[4], Indirect chain[4], __le32 *top) -{ - Indirect *partial, *p; - int k, err; - - *top = 0; - /* Make k index the deepest non-null offset + 1 */ - for (k = depth; k > 1 && !offsets[k-1]; k--) - ; - partial = ext3_get_branch(inode, k, offsets, chain, &err); - /* Writer: pointers */ - if (!partial) - partial = chain + k-1; - /* - * If the branch acquired continuation since we've looked at it - - * fine, it should all survive and (new) top doesn't belong to us. - */ - if (!partial->key && *partial->p) - /* Writer: end */ - goto no_top; - for (p=partial; p>chain && all_zeroes((__le32*)p->bh->b_data,p->p); p--) - ; - /* - * OK, we've found the last block that must survive. The rest of our - * branch should be detached before unlocking. However, if that rest - * of branch is all ours and does not grow immediately from the inode - * it's easier to cheat and just decrement partial->p. - */ - if (p == chain + k - 1 && p > chain) { - p->p--; - } else { - *top = *p->p; - /* Nope, don't do this in ext3. Must leave the tree intact */ -#if 0 - *p->p = 0; -#endif - } - /* Writer: end */ - - while(partial > p) { - brelse(partial->bh); - partial--; - } -no_top: - return partial; -} - -/* - * Zero a number of block pointers in either an inode or an indirect block. - * If we restart the transaction we must again get write access to the - * indirect block for further modification. - * - * We release `count' blocks on disk, but (last - first) may be greater - * than `count' because there can be holes in there. - */ -static void ext3_clear_blocks(handle_t *handle, struct inode *inode, - struct buffer_head *bh, ext3_fsblk_t block_to_free, - unsigned long count, __le32 *first, __le32 *last) -{ - __le32 *p; - if (try_to_extend_transaction(handle, inode)) { - if (bh) { - BUFFER_TRACE(bh, "call ext3_journal_dirty_metadata"); - if (ext3_journal_dirty_metadata(handle, bh)) - return; - } - ext3_mark_inode_dirty(handle, inode); - truncate_restart_transaction(handle, inode); - if (bh) { - BUFFER_TRACE(bh, "retaking write access"); - if (ext3_journal_get_write_access(handle, bh)) - return; - } - } - - /* - * Any buffers which are on the journal will be in memory. We find - * them on the hash table so journal_revoke() will run journal_forget() - * on them. We've already detached each block from the file, so - * bforget() in journal_forget() should be safe. - * - * AKPM: turn on bforget in journal_forget()!!! - */ - for (p = first; p < last; p++) { - u32 nr = le32_to_cpu(*p); - if (nr) { - struct buffer_head *bh; - - *p = 0; - bh = sb_find_get_block(inode->i_sb, nr); - ext3_forget(handle, 0, inode, bh, nr); - } - } - - ext3_free_blocks(handle, inode, block_to_free, count); -} - -/** - * ext3_free_data - free a list of data blocks - * @handle: handle for this transaction - * @inode: inode we are dealing with - * @this_bh: indirect buffer_head which contains *@first and *@last - * @first: array of block numbers - * @last: points immediately past the end of array - * - * We are freeing all blocks referred from that array (numbers are stored as - * little-endian 32-bit) and updating @inode->i_blocks appropriately. - * - * We accumulate contiguous runs of blocks to free. Conveniently, if these - * blocks are contiguous then releasing them at one time will only affect one - * or two bitmap blocks (+ group descriptor(s) and superblock) and we won't - * actually use a lot of journal space. - * - * @this_bh will be %NULL if @first and @last point into the inode's direct - * block pointers. - */ -static void ext3_free_data(handle_t *handle, struct inode *inode, - struct buffer_head *this_bh, - __le32 *first, __le32 *last) -{ - ext3_fsblk_t block_to_free = 0; /* Starting block # of a run */ - unsigned long count = 0; /* Number of blocks in the run */ - __le32 *block_to_free_p = NULL; /* Pointer into inode/ind - corresponding to - block_to_free */ - ext3_fsblk_t nr; /* Current block # */ - __le32 *p; /* Pointer into inode/ind - for current block */ - int err; - - if (this_bh) { /* For indirect block */ - BUFFER_TRACE(this_bh, "get_write_access"); - err = ext3_journal_get_write_access(handle, this_bh); - /* Important: if we can't update the indirect pointers - * to the blocks, we can't free them. */ - if (err) - return; - } - - for (p = first; p < last; p++) { - nr = le32_to_cpu(*p); - if (nr) { - /* accumulate blocks to free if they're contiguous */ - if (count == 0) { - block_to_free = nr; - block_to_free_p = p; - count = 1; - } else if (nr == block_to_free + count) { - count++; - } else { - ext3_clear_blocks(handle, inode, this_bh, - block_to_free, - count, block_to_free_p, p); - block_to_free = nr; - block_to_free_p = p; - count = 1; - } - } - } - - if (count > 0) - ext3_clear_blocks(handle, inode, this_bh, block_to_free, - count, block_to_free_p, p); - - if (this_bh) { - BUFFER_TRACE(this_bh, "call ext3_journal_dirty_metadata"); - - /* - * The buffer head should have an attached journal head at this - * point. However, if the data is corrupted and an indirect - * block pointed to itself, it would have been detached when - * the block was cleared. Check for this instead of OOPSing. - */ - if (bh2jh(this_bh)) - ext3_journal_dirty_metadata(handle, this_bh); - else - ext3_error(inode->i_sb, "ext3_free_data", - "circular indirect block detected, " - "inode=%lu, block=%llu", - inode->i_ino, - (unsigned long long)this_bh->b_blocknr); - } -} - -/** - * ext3_free_branches - free an array of branches - * @handle: JBD handle for this transaction - * @inode: inode we are dealing with - * @parent_bh: the buffer_head which contains *@first and *@last - * @first: array of block numbers - * @last: pointer immediately past the end of array - * @depth: depth of the branches to free - * - * We are freeing all blocks referred from these branches (numbers are - * stored as little-endian 32-bit) and updating @inode->i_blocks - * appropriately. - */ -static void ext3_free_branches(handle_t *handle, struct inode *inode, - struct buffer_head *parent_bh, - __le32 *first, __le32 *last, int depth) -{ - ext3_fsblk_t nr; - __le32 *p; - - if (is_handle_aborted(handle)) - return; - - if (depth--) { - struct buffer_head *bh; - int addr_per_block = EXT3_ADDR_PER_BLOCK(inode->i_sb); - p = last; - while (--p >= first) { - nr = le32_to_cpu(*p); - if (!nr) - continue; /* A hole */ - - /* Go read the buffer for the next level down */ - bh = sb_bread(inode->i_sb, nr); - - /* - * A read failure? Report error and clear slot - * (should be rare). - */ - if (!bh) { - ext3_error(inode->i_sb, "ext3_free_branches", - "Read failure, inode=%lu, block="E3FSBLK, - inode->i_ino, nr); - continue; - } - - /* This zaps the entire block. Bottom up. */ - BUFFER_TRACE(bh, "free child branches"); - ext3_free_branches(handle, inode, bh, - (__le32*)bh->b_data, - (__le32*)bh->b_data + addr_per_block, - depth); - - /* - * Everything below this this pointer has been - * released. Now let this top-of-subtree go. - * - * We want the freeing of this indirect block to be - * atomic in the journal with the updating of the - * bitmap block which owns it. So make some room in - * the journal. - * - * We zero the parent pointer *after* freeing its - * pointee in the bitmaps, so if extend_transaction() - * for some reason fails to put the bitmap changes and - * the release into the same transaction, recovery - * will merely complain about releasing a free block, - * rather than leaking blocks. - */ - if (is_handle_aborted(handle)) - return; - if (try_to_extend_transaction(handle, inode)) { - ext3_mark_inode_dirty(handle, inode); - truncate_restart_transaction(handle, inode); - } - - /* - * We've probably journalled the indirect block several - * times during the truncate. But it's no longer - * needed and we now drop it from the transaction via - * journal_revoke(). - * - * That's easy if it's exclusively part of this - * transaction. But if it's part of the committing - * transaction then journal_forget() will simply - * brelse() it. That means that if the underlying - * block is reallocated in ext3_get_block(), - * unmap_underlying_metadata() will find this block - * and will try to get rid of it. damn, damn. Thus - * we don't allow a block to be reallocated until - * a transaction freeing it has fully committed. - * - * We also have to make sure journal replay after a - * crash does not overwrite non-journaled data blocks - * with old metadata when the block got reallocated for - * data. Thus we have to store a revoke record for a - * block in the same transaction in which we free the - * block. - */ - ext3_forget(handle, 1, inode, bh, bh->b_blocknr); - - ext3_free_blocks(handle, inode, nr, 1); - - if (parent_bh) { - /* - * The block which we have just freed is - * pointed to by an indirect block: journal it - */ - BUFFER_TRACE(parent_bh, "get_write_access"); - if (!ext3_journal_get_write_access(handle, - parent_bh)){ - *p = 0; - BUFFER_TRACE(parent_bh, - "call ext3_journal_dirty_metadata"); - ext3_journal_dirty_metadata(handle, - parent_bh); - } - } - } - } else { - /* We have reached the bottom of the tree. */ - BUFFER_TRACE(parent_bh, "free data blocks"); - ext3_free_data(handle, inode, parent_bh, first, last); - } -} - -int ext3_can_truncate(struct inode *inode) -{ - if (S_ISREG(inode->i_mode)) - return 1; - if (S_ISDIR(inode->i_mode)) - return 1; - if (S_ISLNK(inode->i_mode)) - return !ext3_inode_is_fast_symlink(inode); - return 0; -} - -/* - * ext3_truncate() - * - * We block out ext3_get_block() block instantiations across the entire - * transaction, and VFS/VM ensures that ext3_truncate() cannot run - * simultaneously on behalf of the same inode. - * - * As we work through the truncate and commit bits of it to the journal there - * is one core, guiding principle: the file's tree must always be consistent on - * disk. We must be able to restart the truncate after a crash. - * - * The file's tree may be transiently inconsistent in memory (although it - * probably isn't), but whenever we close off and commit a journal transaction, - * the contents of (the filesystem + the journal) must be consistent and - * restartable. It's pretty simple, really: bottom up, right to left (although - * left-to-right works OK too). - * - * Note that at recovery time, journal replay occurs *before* the restart of - * truncate against the orphan inode list. - * - * The committed inode has the new, desired i_size (which is the same as - * i_disksize in this case). After a crash, ext3_orphan_cleanup() will see - * that this inode's truncate did not complete and it will again call - * ext3_truncate() to have another go. So there will be instantiated blocks - * to the right of the truncation point in a crashed ext3 filesystem. But - * that's fine - as long as they are linked from the inode, the post-crash - * ext3_truncate() run will find them and release them. - */ -void ext3_truncate(struct inode *inode) -{ - handle_t *handle; - struct ext3_inode_info *ei = EXT3_I(inode); - __le32 *i_data = ei->i_data; - int addr_per_block = EXT3_ADDR_PER_BLOCK(inode->i_sb); - int offsets[4]; - Indirect chain[4]; - Indirect *partial; - __le32 nr = 0; - int n; - long last_block; - unsigned blocksize = inode->i_sb->s_blocksize; - - trace_ext3_truncate_enter(inode); - - if (!ext3_can_truncate(inode)) - goto out_notrans; - - if (inode->i_size == 0 && ext3_should_writeback_data(inode)) - ext3_set_inode_state(inode, EXT3_STATE_FLUSH_ON_CLOSE); - - handle = start_transaction(inode); - if (IS_ERR(handle)) - goto out_notrans; - - last_block = (inode->i_size + blocksize-1) - >> EXT3_BLOCK_SIZE_BITS(inode->i_sb); - n = ext3_block_to_path(inode, last_block, offsets, NULL); - if (n == 0) - goto out_stop; /* error */ - - /* - * OK. This truncate is going to happen. We add the inode to the - * orphan list, so that if this truncate spans multiple transactions, - * and we crash, we will resume the truncate when the filesystem - * recovers. It also marks the inode dirty, to catch the new size. - * - * Implication: the file must always be in a sane, consistent - * truncatable state while each transaction commits. - */ - if (ext3_orphan_add(handle, inode)) - goto out_stop; - - /* - * The orphan list entry will now protect us from any crash which - * occurs before the truncate completes, so it is now safe to propagate - * the new, shorter inode size (held for now in i_size) into the - * on-disk inode. We do this via i_disksize, which is the value which - * ext3 *really* writes onto the disk inode. - */ - ei->i_disksize = inode->i_size; - - /* - * From here we block out all ext3_get_block() callers who want to - * modify the block allocation tree. - */ - mutex_lock(&ei->truncate_mutex); - - if (n == 1) { /* direct blocks */ - ext3_free_data(handle, inode, NULL, i_data+offsets[0], - i_data + EXT3_NDIR_BLOCKS); - goto do_indirects; - } - - partial = ext3_find_shared(inode, n, offsets, chain, &nr); - /* Kill the top of shared branch (not detached) */ - if (nr) { - if (partial == chain) { - /* Shared branch grows from the inode */ - ext3_free_branches(handle, inode, NULL, - &nr, &nr+1, (chain+n-1) - partial); - *partial->p = 0; - /* - * We mark the inode dirty prior to restart, - * and prior to stop. No need for it here. - */ - } else { - /* Shared branch grows from an indirect block */ - ext3_free_branches(handle, inode, partial->bh, - partial->p, - partial->p+1, (chain+n-1) - partial); - } - } - /* Clear the ends of indirect blocks on the shared branch */ - while (partial > chain) { - ext3_free_branches(handle, inode, partial->bh, partial->p + 1, - (__le32*)partial->bh->b_data+addr_per_block, - (chain+n-1) - partial); - BUFFER_TRACE(partial->bh, "call brelse"); - brelse (partial->bh); - partial--; - } -do_indirects: - /* Kill the remaining (whole) subtrees */ - switch (offsets[0]) { - default: - nr = i_data[EXT3_IND_BLOCK]; - if (nr) { - ext3_free_branches(handle, inode, NULL, &nr, &nr+1, 1); - i_data[EXT3_IND_BLOCK] = 0; - } - case EXT3_IND_BLOCK: - nr = i_data[EXT3_DIND_BLOCK]; - if (nr) { - ext3_free_branches(handle, inode, NULL, &nr, &nr+1, 2); - i_data[EXT3_DIND_BLOCK] = 0; - } - case EXT3_DIND_BLOCK: - nr = i_data[EXT3_TIND_BLOCK]; - if (nr) { - ext3_free_branches(handle, inode, NULL, &nr, &nr+1, 3); - i_data[EXT3_TIND_BLOCK] = 0; - } - case EXT3_TIND_BLOCK: - ; - } - - ext3_discard_reservation(inode); - - mutex_unlock(&ei->truncate_mutex); - inode->i_mtime = inode->i_ctime = CURRENT_TIME_SEC; - ext3_mark_inode_dirty(handle, inode); - - /* - * In a multi-transaction truncate, we only make the final transaction - * synchronous - */ - if (IS_SYNC(inode)) - handle->h_sync = 1; -out_stop: - /* - * If this was a simple ftruncate(), and the file will remain alive - * then we need to clear up the orphan record which we created above. - * However, if this was a real unlink then we were called by - * ext3_evict_inode(), and we allow that function to clean up the - * orphan info for us. - */ - if (inode->i_nlink) - ext3_orphan_del(handle, inode); - - ext3_journal_stop(handle); - trace_ext3_truncate_exit(inode); - return; -out_notrans: - /* - * Delete the inode from orphan list so that it doesn't stay there - * forever and trigger assertion on umount. - */ - if (inode->i_nlink) - ext3_orphan_del(NULL, inode); - trace_ext3_truncate_exit(inode); -} - -static ext3_fsblk_t ext3_get_inode_block(struct super_block *sb, - unsigned long ino, struct ext3_iloc *iloc) -{ - unsigned long block_group; - unsigned long offset; - ext3_fsblk_t block; - struct ext3_group_desc *gdp; - - if (!ext3_valid_inum(sb, ino)) { - /* - * This error is already checked for in namei.c unless we are - * looking at an NFS filehandle, in which case no error - * report is needed - */ - return 0; - } - - block_group = (ino - 1) / EXT3_INODES_PER_GROUP(sb); - gdp = ext3_get_group_desc(sb, block_group, NULL); - if (!gdp) - return 0; - /* - * Figure out the offset within the block group inode table - */ - offset = ((ino - 1) % EXT3_INODES_PER_GROUP(sb)) * - EXT3_INODE_SIZE(sb); - block = le32_to_cpu(gdp->bg_inode_table) + - (offset >> EXT3_BLOCK_SIZE_BITS(sb)); - - iloc->block_group = block_group; - iloc->offset = offset & (EXT3_BLOCK_SIZE(sb) - 1); - return block; -} - -/* - * ext3_get_inode_loc returns with an extra refcount against the inode's - * underlying buffer_head on success. If 'in_mem' is true, we have all - * data in memory that is needed to recreate the on-disk version of this - * inode. - */ -static int __ext3_get_inode_loc(struct inode *inode, - struct ext3_iloc *iloc, int in_mem) -{ - ext3_fsblk_t block; - struct buffer_head *bh; - - block = ext3_get_inode_block(inode->i_sb, inode->i_ino, iloc); - if (!block) - return -EIO; - - bh = sb_getblk(inode->i_sb, block); - if (unlikely(!bh)) { - ext3_error (inode->i_sb, "ext3_get_inode_loc", - "unable to read inode block - " - "inode=%lu, block="E3FSBLK, - inode->i_ino, block); - return -ENOMEM; - } - if (!buffer_uptodate(bh)) { - lock_buffer(bh); - - /* - * If the buffer has the write error flag, we have failed - * to write out another inode in the same block. In this - * case, we don't have to read the block because we may - * read the old inode data successfully. - */ - if (buffer_write_io_error(bh) && !buffer_uptodate(bh)) - set_buffer_uptodate(bh); - - if (buffer_uptodate(bh)) { - /* someone brought it uptodate while we waited */ - unlock_buffer(bh); - goto has_buffer; - } - - /* - * If we have all information of the inode in memory and this - * is the only valid inode in the block, we need not read the - * block. - */ - if (in_mem) { - struct buffer_head *bitmap_bh; - struct ext3_group_desc *desc; - int inodes_per_buffer; - int inode_offset, i; - int block_group; - int start; - - block_group = (inode->i_ino - 1) / - EXT3_INODES_PER_GROUP(inode->i_sb); - inodes_per_buffer = bh->b_size / - EXT3_INODE_SIZE(inode->i_sb); - inode_offset = ((inode->i_ino - 1) % - EXT3_INODES_PER_GROUP(inode->i_sb)); - start = inode_offset & ~(inodes_per_buffer - 1); - - /* Is the inode bitmap in cache? */ - desc = ext3_get_group_desc(inode->i_sb, - block_group, NULL); - if (!desc) - goto make_io; - - bitmap_bh = sb_getblk(inode->i_sb, - le32_to_cpu(desc->bg_inode_bitmap)); - if (unlikely(!bitmap_bh)) - goto make_io; - - /* - * If the inode bitmap isn't in cache then the - * optimisation may end up performing two reads instead - * of one, so skip it. - */ - if (!buffer_uptodate(bitmap_bh)) { - brelse(bitmap_bh); - goto make_io; - } - for (i = start; i < start + inodes_per_buffer; i++) { - if (i == inode_offset) - continue; - if (ext3_test_bit(i, bitmap_bh->b_data)) - break; - } - brelse(bitmap_bh); - if (i == start + inodes_per_buffer) { - /* all other inodes are free, so skip I/O */ - memset(bh->b_data, 0, bh->b_size); - set_buffer_uptodate(bh); - unlock_buffer(bh); - goto has_buffer; - } - } - -make_io: - /* - * There are other valid inodes in the buffer, this inode - * has in-inode xattrs, or we don't have this inode in memory. - * Read the block from disk. - */ - trace_ext3_load_inode(inode); - get_bh(bh); - bh->b_end_io = end_buffer_read_sync; - submit_bh(READ | REQ_META | REQ_PRIO, bh); - wait_on_buffer(bh); - if (!buffer_uptodate(bh)) { - ext3_error(inode->i_sb, "ext3_get_inode_loc", - "unable to read inode block - " - "inode=%lu, block="E3FSBLK, - inode->i_ino, block); - brelse(bh); - return -EIO; - } - } -has_buffer: - iloc->bh = bh; - return 0; -} - -int ext3_get_inode_loc(struct inode *inode, struct ext3_iloc *iloc) -{ - /* We have all inode data except xattrs in memory here. */ - return __ext3_get_inode_loc(inode, iloc, - !ext3_test_inode_state(inode, EXT3_STATE_XATTR)); -} - -void ext3_set_inode_flags(struct inode *inode) -{ - unsigned int flags = EXT3_I(inode)->i_flags; - - inode->i_flags &= ~(S_SYNC|S_APPEND|S_IMMUTABLE|S_NOATIME|S_DIRSYNC); - if (flags & EXT3_SYNC_FL) - inode->i_flags |= S_SYNC; - if (flags & EXT3_APPEND_FL) - inode->i_flags |= S_APPEND; - if (flags & EXT3_IMMUTABLE_FL) - inode->i_flags |= S_IMMUTABLE; - if (flags & EXT3_NOATIME_FL) - inode->i_flags |= S_NOATIME; - if (flags & EXT3_DIRSYNC_FL) - inode->i_flags |= S_DIRSYNC; -} - -/* Propagate flags from i_flags to EXT3_I(inode)->i_flags */ -void ext3_get_inode_flags(struct ext3_inode_info *ei) -{ - unsigned int flags = ei->vfs_inode.i_flags; - - ei->i_flags &= ~(EXT3_SYNC_FL|EXT3_APPEND_FL| - EXT3_IMMUTABLE_FL|EXT3_NOATIME_FL|EXT3_DIRSYNC_FL); - if (flags & S_SYNC) - ei->i_flags |= EXT3_SYNC_FL; - if (flags & S_APPEND) - ei->i_flags |= EXT3_APPEND_FL; - if (flags & S_IMMUTABLE) - ei->i_flags |= EXT3_IMMUTABLE_FL; - if (flags & S_NOATIME) - ei->i_flags |= EXT3_NOATIME_FL; - if (flags & S_DIRSYNC) - ei->i_flags |= EXT3_DIRSYNC_FL; -} - -struct inode *ext3_iget(struct super_block *sb, unsigned long ino) -{ - struct ext3_iloc iloc; - struct ext3_inode *raw_inode; - struct ext3_inode_info *ei; - struct buffer_head *bh; - struct inode *inode; - journal_t *journal = EXT3_SB(sb)->s_journal; - transaction_t *transaction; - long ret; - int block; - uid_t i_uid; - gid_t i_gid; - - inode = iget_locked(sb, ino); - if (!inode) - return ERR_PTR(-ENOMEM); - if (!(inode->i_state & I_NEW)) - return inode; - - ei = EXT3_I(inode); - ei->i_block_alloc_info = NULL; - - ret = __ext3_get_inode_loc(inode, &iloc, 0); - if (ret < 0) - goto bad_inode; - bh = iloc.bh; - raw_inode = ext3_raw_inode(&iloc); - inode->i_mode = le16_to_cpu(raw_inode->i_mode); - i_uid = (uid_t)le16_to_cpu(raw_inode->i_uid_low); - i_gid = (gid_t)le16_to_cpu(raw_inode->i_gid_low); - if(!(test_opt (inode->i_sb, NO_UID32))) { - i_uid |= le16_to_cpu(raw_inode->i_uid_high) << 16; - i_gid |= le16_to_cpu(raw_inode->i_gid_high) << 16; - } - i_uid_write(inode, i_uid); - i_gid_write(inode, i_gid); - set_nlink(inode, le16_to_cpu(raw_inode->i_links_count)); - inode->i_size = le32_to_cpu(raw_inode->i_size); - inode->i_atime.tv_sec = (signed)le32_to_cpu(raw_inode->i_atime); - inode->i_ctime.tv_sec = (signed)le32_to_cpu(raw_inode->i_ctime); - inode->i_mtime.tv_sec = (signed)le32_to_cpu(raw_inode->i_mtime); - inode->i_atime.tv_nsec = inode->i_ctime.tv_nsec = inode->i_mtime.tv_nsec = 0; - - ei->i_state_flags = 0; - ei->i_dir_start_lookup = 0; - ei->i_dtime = le32_to_cpu(raw_inode->i_dtime); - /* We now have enough fields to check if the inode was active or not. - * This is needed because nfsd might try to access dead inodes - * the test is that same one that e2fsck uses - * NeilBrown 1999oct15 - */ - if (inode->i_nlink == 0) { - if (inode->i_mode == 0 || - !(EXT3_SB(inode->i_sb)->s_mount_state & EXT3_ORPHAN_FS)) { - /* this inode is deleted */ - brelse (bh); - ret = -ESTALE; - goto bad_inode; - } - /* The only unlinked inodes we let through here have - * valid i_mode and are being read by the orphan - * recovery code: that's fine, we're about to complete - * the process of deleting those. */ - } - inode->i_blocks = le32_to_cpu(raw_inode->i_blocks); - ei->i_flags = le32_to_cpu(raw_inode->i_flags); -#ifdef EXT3_FRAGMENTS - ei->i_faddr = le32_to_cpu(raw_inode->i_faddr); - ei->i_frag_no = raw_inode->i_frag; - ei->i_frag_size = raw_inode->i_fsize; -#endif - ei->i_file_acl = le32_to_cpu(raw_inode->i_file_acl); - if (!S_ISREG(inode->i_mode)) { - ei->i_dir_acl = le32_to_cpu(raw_inode->i_dir_acl); - } else { - inode->i_size |= - ((__u64)le32_to_cpu(raw_inode->i_size_high)) << 32; - } - ei->i_disksize = inode->i_size; - inode->i_generation = le32_to_cpu(raw_inode->i_generation); - ei->i_block_group = iloc.block_group; - /* - * NOTE! The in-memory inode i_data array is in little-endian order - * even on big-endian machines: we do NOT byteswap the block numbers! - */ - for (block = 0; block < EXT3_N_BLOCKS; block++) - ei->i_data[block] = raw_inode->i_block[block]; - INIT_LIST_HEAD(&ei->i_orphan); - - /* - * Set transaction id's of transactions that have to be committed - * to finish f[data]sync. We set them to currently running transaction - * as we cannot be sure that the inode or some of its metadata isn't - * part of the transaction - the inode could have been reclaimed and - * now it is reread from disk. - */ - if (journal) { - tid_t tid; - - spin_lock(&journal->j_state_lock); - if (journal->j_running_transaction) - transaction = journal->j_running_transaction; - else - transaction = journal->j_committing_transaction; - if (transaction) - tid = transaction->t_tid; - else - tid = journal->j_commit_sequence; - spin_unlock(&journal->j_state_lock); - atomic_set(&ei->i_sync_tid, tid); - atomic_set(&ei->i_datasync_tid, tid); - } - - if (inode->i_ino >= EXT3_FIRST_INO(inode->i_sb) + 1 && - EXT3_INODE_SIZE(inode->i_sb) > EXT3_GOOD_OLD_INODE_SIZE) { - /* - * When mke2fs creates big inodes it does not zero out - * the unused bytes above EXT3_GOOD_OLD_INODE_SIZE, - * so ignore those first few inodes. - */ - ei->i_extra_isize = le16_to_cpu(raw_inode->i_extra_isize); - if (EXT3_GOOD_OLD_INODE_SIZE + ei->i_extra_isize > - EXT3_INODE_SIZE(inode->i_sb)) { - brelse (bh); - ret = -EIO; - goto bad_inode; - } - if (ei->i_extra_isize == 0) { - /* The extra space is currently unused. Use it. */ - ei->i_extra_isize = sizeof(struct ext3_inode) - - EXT3_GOOD_OLD_INODE_SIZE; - } else { - __le32 *magic = (void *)raw_inode + - EXT3_GOOD_OLD_INODE_SIZE + - ei->i_extra_isize; - if (*magic == cpu_to_le32(EXT3_XATTR_MAGIC)) - ext3_set_inode_state(inode, EXT3_STATE_XATTR); - } - } else - ei->i_extra_isize = 0; - - if (S_ISREG(inode->i_mode)) { - inode->i_op = &ext3_file_inode_operations; - inode->i_fop = &ext3_file_operations; - ext3_set_aops(inode); - } else if (S_ISDIR(inode->i_mode)) { - inode->i_op = &ext3_dir_inode_operations; - inode->i_fop = &ext3_dir_operations; - } else if (S_ISLNK(inode->i_mode)) { - if (ext3_inode_is_fast_symlink(inode)) { - inode->i_op = &ext3_fast_symlink_inode_operations; - nd_terminate_link(ei->i_data, inode->i_size, - sizeof(ei->i_data) - 1); - inode->i_link = (char *)ei->i_data; - } else { - inode->i_op = &ext3_symlink_inode_operations; - ext3_set_aops(inode); - } - } else { - inode->i_op = &ext3_special_inode_operations; - if (raw_inode->i_block[0]) - init_special_inode(inode, inode->i_mode, - old_decode_dev(le32_to_cpu(raw_inode->i_block[0]))); - else - init_special_inode(inode, inode->i_mode, - new_decode_dev(le32_to_cpu(raw_inode->i_block[1]))); - } - brelse (iloc.bh); - ext3_set_inode_flags(inode); - unlock_new_inode(inode); - return inode; - -bad_inode: - iget_failed(inode); - return ERR_PTR(ret); -} - -/* - * Post the struct inode info into an on-disk inode location in the - * buffer-cache. This gobbles the caller's reference to the - * buffer_head in the inode location struct. - * - * The caller must have write access to iloc->bh. - */ -static int ext3_do_update_inode(handle_t *handle, - struct inode *inode, - struct ext3_iloc *iloc) -{ - struct ext3_inode *raw_inode = ext3_raw_inode(iloc); - struct ext3_inode_info *ei = EXT3_I(inode); - struct buffer_head *bh = iloc->bh; - int err = 0, rc, block; - int need_datasync = 0; - __le32 disksize; - uid_t i_uid; - gid_t i_gid; - -again: - /* we can't allow multiple procs in here at once, its a bit racey */ - lock_buffer(bh); - - /* For fields not not tracking in the in-memory inode, - * initialise them to zero for new inodes. */ - if (ext3_test_inode_state(inode, EXT3_STATE_NEW)) - memset(raw_inode, 0, EXT3_SB(inode->i_sb)->s_inode_size); - - ext3_get_inode_flags(ei); - raw_inode->i_mode = cpu_to_le16(inode->i_mode); - i_uid = i_uid_read(inode); - i_gid = i_gid_read(inode); - if(!(test_opt(inode->i_sb, NO_UID32))) { - raw_inode->i_uid_low = cpu_to_le16(low_16_bits(i_uid)); - raw_inode->i_gid_low = cpu_to_le16(low_16_bits(i_gid)); -/* - * Fix up interoperability with old kernels. Otherwise, old inodes get - * re-used with the upper 16 bits of the uid/gid intact - */ - if(!ei->i_dtime) { - raw_inode->i_uid_high = - cpu_to_le16(high_16_bits(i_uid)); - raw_inode->i_gid_high = - cpu_to_le16(high_16_bits(i_gid)); - } else { - raw_inode->i_uid_high = 0; - raw_inode->i_gid_high = 0; - } - } else { - raw_inode->i_uid_low = - cpu_to_le16(fs_high2lowuid(i_uid)); - raw_inode->i_gid_low = - cpu_to_le16(fs_high2lowgid(i_gid)); - raw_inode->i_uid_high = 0; - raw_inode->i_gid_high = 0; - } - raw_inode->i_links_count = cpu_to_le16(inode->i_nlink); - disksize = cpu_to_le32(ei->i_disksize); - if (disksize != raw_inode->i_size) { - need_datasync = 1; - raw_inode->i_size = disksize; - } - raw_inode->i_atime = cpu_to_le32(inode->i_atime.tv_sec); - raw_inode->i_ctime = cpu_to_le32(inode->i_ctime.tv_sec); - raw_inode->i_mtime = cpu_to_le32(inode->i_mtime.tv_sec); - raw_inode->i_blocks = cpu_to_le32(inode->i_blocks); - raw_inode->i_dtime = cpu_to_le32(ei->i_dtime); - raw_inode->i_flags = cpu_to_le32(ei->i_flags); -#ifdef EXT3_FRAGMENTS - raw_inode->i_faddr = cpu_to_le32(ei->i_faddr); - raw_inode->i_frag = ei->i_frag_no; - raw_inode->i_fsize = ei->i_frag_size; -#endif - raw_inode->i_file_acl = cpu_to_le32(ei->i_file_acl); - if (!S_ISREG(inode->i_mode)) { - raw_inode->i_dir_acl = cpu_to_le32(ei->i_dir_acl); - } else { - disksize = cpu_to_le32(ei->i_disksize >> 32); - if (disksize != raw_inode->i_size_high) { - raw_inode->i_size_high = disksize; - need_datasync = 1; - } - if (ei->i_disksize > 0x7fffffffULL) { - struct super_block *sb = inode->i_sb; - if (!EXT3_HAS_RO_COMPAT_FEATURE(sb, - EXT3_FEATURE_RO_COMPAT_LARGE_FILE) || - EXT3_SB(sb)->s_es->s_rev_level == - cpu_to_le32(EXT3_GOOD_OLD_REV)) { - /* If this is the first large file - * created, add a flag to the superblock. - */ - unlock_buffer(bh); - err = ext3_journal_get_write_access(handle, - EXT3_SB(sb)->s_sbh); - if (err) - goto out_brelse; - - ext3_update_dynamic_rev(sb); - EXT3_SET_RO_COMPAT_FEATURE(sb, - EXT3_FEATURE_RO_COMPAT_LARGE_FILE); - handle->h_sync = 1; - err = ext3_journal_dirty_metadata(handle, - EXT3_SB(sb)->s_sbh); - /* get our lock and start over */ - goto again; - } - } - } - raw_inode->i_generation = cpu_to_le32(inode->i_generation); - if (S_ISCHR(inode->i_mode) || S_ISBLK(inode->i_mode)) { - if (old_valid_dev(inode->i_rdev)) { - raw_inode->i_block[0] = - cpu_to_le32(old_encode_dev(inode->i_rdev)); - raw_inode->i_block[1] = 0; - } else { - raw_inode->i_block[0] = 0; - raw_inode->i_block[1] = - cpu_to_le32(new_encode_dev(inode->i_rdev)); - raw_inode->i_block[2] = 0; - } - } else for (block = 0; block < EXT3_N_BLOCKS; block++) - raw_inode->i_block[block] = ei->i_data[block]; - - if (ei->i_extra_isize) - raw_inode->i_extra_isize = cpu_to_le16(ei->i_extra_isize); - - BUFFER_TRACE(bh, "call ext3_journal_dirty_metadata"); - unlock_buffer(bh); - rc = ext3_journal_dirty_metadata(handle, bh); - if (!err) - err = rc; - ext3_clear_inode_state(inode, EXT3_STATE_NEW); - - atomic_set(&ei->i_sync_tid, handle->h_transaction->t_tid); - if (need_datasync) - atomic_set(&ei->i_datasync_tid, handle->h_transaction->t_tid); -out_brelse: - brelse (bh); - ext3_std_error(inode->i_sb, err); - return err; -} - -/* - * ext3_write_inode() - * - * We are called from a few places: - * - * - Within generic_file_aio_write() -> generic_write_sync() for O_SYNC files. - * Here, there will be no transaction running. We wait for any running - * transaction to commit. - * - * - Within flush work (for sys_sync(), kupdate and such). - * We wait on commit, if told to. - * - * - Within iput_final() -> write_inode_now() - * We wait on commit, if told to. - * - * In all cases it is actually safe for us to return without doing anything, - * because the inode has been copied into a raw inode buffer in - * ext3_mark_inode_dirty(). This is a correctness thing for WB_SYNC_ALL - * writeback. - * - * Note that we are absolutely dependent upon all inode dirtiers doing the - * right thing: they *must* call mark_inode_dirty() after dirtying info in - * which we are interested. - * - * It would be a bug for them to not do this. The code: - * - * mark_inode_dirty(inode) - * stuff(); - * inode->i_size = expr; - * - * is in error because write_inode() could occur while `stuff()' is running, - * and the new i_size will be lost. Plus the inode will no longer be on the - * superblock's dirty inode list. - */ -int ext3_write_inode(struct inode *inode, struct writeback_control *wbc) -{ - if (WARN_ON_ONCE(current->flags & PF_MEMALLOC)) - return 0; - - if (ext3_journal_current_handle()) { - jbd_debug(1, "called recursively, non-PF_MEMALLOC!\n"); - dump_stack(); - return -EIO; - } - - /* - * No need to force transaction in WB_SYNC_NONE mode. Also - * ext3_sync_fs() will force the commit after everything is - * written. - */ - if (wbc->sync_mode != WB_SYNC_ALL || wbc->for_sync) - return 0; - - return ext3_force_commit(inode->i_sb); -} - -/* - * ext3_setattr() - * - * Called from notify_change. - * - * We want to trap VFS attempts to truncate the file as soon as - * possible. In particular, we want to make sure that when the VFS - * shrinks i_size, we put the inode on the orphan list and modify - * i_disksize immediately, so that during the subsequent flushing of - * dirty pages and freeing of disk blocks, we can guarantee that any - * commit will leave the blocks being flushed in an unused state on - * disk. (On recovery, the inode will get truncated and the blocks will - * be freed, so we have a strong guarantee that no future commit will - * leave these blocks visible to the user.) - * - * Called with inode->sem down. - */ -int ext3_setattr(struct dentry *dentry, struct iattr *attr) -{ - struct inode *inode = d_inode(dentry); - int error, rc = 0; - const unsigned int ia_valid = attr->ia_valid; - - error = inode_change_ok(inode, attr); - if (error) - return error; - - if (is_quota_modification(inode, attr)) - dquot_initialize(inode); - if ((ia_valid & ATTR_UID && !uid_eq(attr->ia_uid, inode->i_uid)) || - (ia_valid & ATTR_GID && !gid_eq(attr->ia_gid, inode->i_gid))) { - handle_t *handle; - - /* (user+group)*(old+new) structure, inode write (sb, - * inode block, ? - but truncate inode update has it) */ - handle = ext3_journal_start(inode, EXT3_MAXQUOTAS_INIT_BLOCKS(inode->i_sb)+ - EXT3_MAXQUOTAS_DEL_BLOCKS(inode->i_sb)+3); - if (IS_ERR(handle)) { - error = PTR_ERR(handle); - goto err_out; - } - error = dquot_transfer(inode, attr); - if (error) { - ext3_journal_stop(handle); - return error; - } - /* Update corresponding info in inode so that everything is in - * one transaction */ - if (attr->ia_valid & ATTR_UID) - inode->i_uid = attr->ia_uid; - if (attr->ia_valid & ATTR_GID) - inode->i_gid = attr->ia_gid; - error = ext3_mark_inode_dirty(handle, inode); - ext3_journal_stop(handle); - } - - if (attr->ia_valid & ATTR_SIZE) - inode_dio_wait(inode); - - if (S_ISREG(inode->i_mode) && - attr->ia_valid & ATTR_SIZE && attr->ia_size < inode->i_size) { - handle_t *handle; - - handle = ext3_journal_start(inode, 3); - if (IS_ERR(handle)) { - error = PTR_ERR(handle); - goto err_out; - } - - error = ext3_orphan_add(handle, inode); - if (error) { - ext3_journal_stop(handle); - goto err_out; - } - EXT3_I(inode)->i_disksize = attr->ia_size; - error = ext3_mark_inode_dirty(handle, inode); - ext3_journal_stop(handle); - if (error) { - /* Some hard fs error must have happened. Bail out. */ - ext3_orphan_del(NULL, inode); - goto err_out; - } - rc = ext3_block_truncate_page(inode, attr->ia_size); - if (rc) { - /* Cleanup orphan list and exit */ - handle = ext3_journal_start(inode, 3); - if (IS_ERR(handle)) { - ext3_orphan_del(NULL, inode); - goto err_out; - } - ext3_orphan_del(handle, inode); - ext3_journal_stop(handle); - goto err_out; - } - } - - if ((attr->ia_valid & ATTR_SIZE) && - attr->ia_size != i_size_read(inode)) { - truncate_setsize(inode, attr->ia_size); - ext3_truncate(inode); - } - - setattr_copy(inode, attr); - mark_inode_dirty(inode); - - if (ia_valid & ATTR_MODE) - rc = posix_acl_chmod(inode, inode->i_mode); - -err_out: - ext3_std_error(inode->i_sb, error); - if (!error) - error = rc; - return error; -} - - -/* - * How many blocks doth make a writepage()? - * - * With N blocks per page, it may be: - * N data blocks - * 2 indirect block - * 2 dindirect - * 1 tindirect - * N+5 bitmap blocks (from the above) - * N+5 group descriptor summary blocks - * 1 inode block - * 1 superblock. - * 2 * EXT3_SINGLEDATA_TRANS_BLOCKS for the quote files - * - * 3 * (N + 5) + 2 + 2 * EXT3_SINGLEDATA_TRANS_BLOCKS - * - * With ordered or writeback data it's the same, less the N data blocks. - * - * If the inode's direct blocks can hold an integral number of pages then a - * page cannot straddle two indirect blocks, and we can only touch one indirect - * and dindirect block, and the "5" above becomes "3". - * - * This still overestimates under most circumstances. If we were to pass the - * start and end offsets in here as well we could do block_to_path() on each - * block and work out the exact number of indirects which are touched. Pah. - */ - -static int ext3_writepage_trans_blocks(struct inode *inode) -{ - int bpp = ext3_journal_blocks_per_page(inode); - int indirects = (EXT3_NDIR_BLOCKS % bpp) ? 5 : 3; - int ret; - - if (ext3_should_journal_data(inode)) - ret = 3 * (bpp + indirects) + 2; - else - ret = 2 * (bpp + indirects) + indirects + 2; - -#ifdef CONFIG_QUOTA - /* We know that structure was already allocated during dquot_initialize so - * we will be updating only the data blocks + inodes */ - ret += EXT3_MAXQUOTAS_TRANS_BLOCKS(inode->i_sb); -#endif - - return ret; -} - -/* - * The caller must have previously called ext3_reserve_inode_write(). - * Give this, we know that the caller already has write access to iloc->bh. - */ -int ext3_mark_iloc_dirty(handle_t *handle, - struct inode *inode, struct ext3_iloc *iloc) -{ - int err = 0; - - /* the do_update_inode consumes one bh->b_count */ - get_bh(iloc->bh); - - /* ext3_do_update_inode() does journal_dirty_metadata */ - err = ext3_do_update_inode(handle, inode, iloc); - put_bh(iloc->bh); - return err; -} - -/* - * On success, We end up with an outstanding reference count against - * iloc->bh. This _must_ be cleaned up later. - */ - -int -ext3_reserve_inode_write(handle_t *handle, struct inode *inode, - struct ext3_iloc *iloc) -{ - int err = 0; - if (handle) { - err = ext3_get_inode_loc(inode, iloc); - if (!err) { - BUFFER_TRACE(iloc->bh, "get_write_access"); - err = ext3_journal_get_write_access(handle, iloc->bh); - if (err) { - brelse(iloc->bh); - iloc->bh = NULL; - } - } - } - ext3_std_error(inode->i_sb, err); - return err; -} - -/* - * What we do here is to mark the in-core inode as clean with respect to inode - * dirtiness (it may still be data-dirty). - * This means that the in-core inode may be reaped by prune_icache - * without having to perform any I/O. This is a very good thing, - * because *any* task may call prune_icache - even ones which - * have a transaction open against a different journal. - * - * Is this cheating? Not really. Sure, we haven't written the - * inode out, but prune_icache isn't a user-visible syncing function. - * Whenever the user wants stuff synced (sys_sync, sys_msync, sys_fsync) - * we start and wait on commits. - */ -int ext3_mark_inode_dirty(handle_t *handle, struct inode *inode) -{ - struct ext3_iloc iloc; - int err; - - might_sleep(); - trace_ext3_mark_inode_dirty(inode, _RET_IP_); - err = ext3_reserve_inode_write(handle, inode, &iloc); - if (!err) - err = ext3_mark_iloc_dirty(handle, inode, &iloc); - return err; -} - -/* - * ext3_dirty_inode() is called from __mark_inode_dirty() - * - * We're really interested in the case where a file is being extended. - * i_size has been changed by generic_commit_write() and we thus need - * to include the updated inode in the current transaction. - * - * Also, dquot_alloc_space() will always dirty the inode when blocks - * are allocated to the file. - * - * If the inode is marked synchronous, we don't honour that here - doing - * so would cause a commit on atime updates, which we don't bother doing. - * We handle synchronous inodes at the highest possible level. - */ -void ext3_dirty_inode(struct inode *inode, int flags) -{ - handle_t *current_handle = ext3_journal_current_handle(); - handle_t *handle; - - handle = ext3_journal_start(inode, 2); - if (IS_ERR(handle)) - goto out; - if (current_handle && - current_handle->h_transaction != handle->h_transaction) { - /* This task has a transaction open against a different fs */ - printk(KERN_EMERG "%s: transactions do not match!\n", - __func__); - } else { - jbd_debug(5, "marking dirty. outer handle=%p\n", - current_handle); - ext3_mark_inode_dirty(handle, inode); - } - ext3_journal_stop(handle); -out: - return; -} - -#if 0 -/* - * Bind an inode's backing buffer_head into this transaction, to prevent - * it from being flushed to disk early. Unlike - * ext3_reserve_inode_write, this leaves behind no bh reference and - * returns no iloc structure, so the caller needs to repeat the iloc - * lookup to mark the inode dirty later. - */ -static int ext3_pin_inode(handle_t *handle, struct inode *inode) -{ - struct ext3_iloc iloc; - - int err = 0; - if (handle) { - err = ext3_get_inode_loc(inode, &iloc); - if (!err) { - BUFFER_TRACE(iloc.bh, "get_write_access"); - err = journal_get_write_access(handle, iloc.bh); - if (!err) - err = ext3_journal_dirty_metadata(handle, - iloc.bh); - brelse(iloc.bh); - } - } - ext3_std_error(inode->i_sb, err); - return err; -} -#endif - -int ext3_change_inode_journal_flag(struct inode *inode, int val) -{ - journal_t *journal; - handle_t *handle; - int err; - - /* - * We have to be very careful here: changing a data block's - * journaling status dynamically is dangerous. If we write a - * data block to the journal, change the status and then delete - * that block, we risk forgetting to revoke the old log record - * from the journal and so a subsequent replay can corrupt data. - * So, first we make sure that the journal is empty and that - * nobody is changing anything. - */ - - journal = EXT3_JOURNAL(inode); - if (is_journal_aborted(journal)) - return -EROFS; - - journal_lock_updates(journal); - journal_flush(journal); - - /* - * OK, there are no updates running now, and all cached data is - * synced to disk. We are now in a completely consistent state - * which doesn't have anything in the journal, and we know that - * no filesystem updates are running, so it is safe to modify - * the inode's in-core data-journaling state flag now. - */ - - if (val) - EXT3_I(inode)->i_flags |= EXT3_JOURNAL_DATA_FL; - else - EXT3_I(inode)->i_flags &= ~EXT3_JOURNAL_DATA_FL; - ext3_set_aops(inode); - - journal_unlock_updates(journal); - - /* Finally we can mark the inode as dirty. */ - - handle = ext3_journal_start(inode, 1); - if (IS_ERR(handle)) - return PTR_ERR(handle); - - err = ext3_mark_inode_dirty(handle, inode); - handle->h_sync = 1; - ext3_journal_stop(handle); - ext3_std_error(inode->i_sb, err); - - return err; -} diff --git a/fs/ext3/ioctl.c b/fs/ext3/ioctl.c deleted file mode 100644 index 4d96e9a64532..000000000000 --- a/fs/ext3/ioctl.c +++ /dev/null @@ -1,327 +0,0 @@ -/* - * linux/fs/ext3/ioctl.c - * - * Copyright (C) 1993, 1994, 1995 - * Remy Card (card@masi.ibp.fr) - * Laboratoire MASI - Institut Blaise Pascal - * Universite Pierre et Marie Curie (Paris VI) - */ - -#include -#include -#include -#include "ext3.h" - -long ext3_ioctl(struct file *filp, unsigned int cmd, unsigned long arg) -{ - struct inode *inode = file_inode(filp); - struct ext3_inode_info *ei = EXT3_I(inode); - unsigned int flags; - unsigned short rsv_window_size; - - ext3_debug ("cmd = %u, arg = %lu\n", cmd, arg); - - switch (cmd) { - case EXT3_IOC_GETFLAGS: - ext3_get_inode_flags(ei); - flags = ei->i_flags & EXT3_FL_USER_VISIBLE; - return put_user(flags, (int __user *) arg); - case EXT3_IOC_SETFLAGS: { - handle_t *handle = NULL; - int err; - struct ext3_iloc iloc; - unsigned int oldflags; - unsigned int jflag; - - if (!inode_owner_or_capable(inode)) - return -EACCES; - - if (get_user(flags, (int __user *) arg)) - return -EFAULT; - - err = mnt_want_write_file(filp); - if (err) - return err; - - flags = ext3_mask_flags(inode->i_mode, flags); - - mutex_lock(&inode->i_mutex); - - /* Is it quota file? Do not allow user to mess with it */ - err = -EPERM; - if (IS_NOQUOTA(inode)) - goto flags_out; - - oldflags = ei->i_flags; - - /* The JOURNAL_DATA flag is modifiable only by root */ - jflag = flags & EXT3_JOURNAL_DATA_FL; - - /* - * The IMMUTABLE and APPEND_ONLY flags can only be changed by - * the relevant capability. - * - * This test looks nicer. Thanks to Pauline Middelink - */ - if ((flags ^ oldflags) & (EXT3_APPEND_FL | EXT3_IMMUTABLE_FL)) { - if (!capable(CAP_LINUX_IMMUTABLE)) - goto flags_out; - } - - /* - * The JOURNAL_DATA flag can only be changed by - * the relevant capability. - */ - if ((jflag ^ oldflags) & (EXT3_JOURNAL_DATA_FL)) { - if (!capable(CAP_SYS_RESOURCE)) - goto flags_out; - } - - handle = ext3_journal_start(inode, 1); - if (IS_ERR(handle)) { - err = PTR_ERR(handle); - goto flags_out; - } - if (IS_SYNC(inode)) - handle->h_sync = 1; - err = ext3_reserve_inode_write(handle, inode, &iloc); - if (err) - goto flags_err; - - flags = flags & EXT3_FL_USER_MODIFIABLE; - flags |= oldflags & ~EXT3_FL_USER_MODIFIABLE; - ei->i_flags = flags; - - ext3_set_inode_flags(inode); - inode->i_ctime = CURRENT_TIME_SEC; - - err = ext3_mark_iloc_dirty(handle, inode, &iloc); -flags_err: - ext3_journal_stop(handle); - if (err) - goto flags_out; - - if ((jflag ^ oldflags) & (EXT3_JOURNAL_DATA_FL)) - err = ext3_change_inode_journal_flag(inode, jflag); -flags_out: - mutex_unlock(&inode->i_mutex); - mnt_drop_write_file(filp); - return err; - } - case EXT3_IOC_GETVERSION: - case EXT3_IOC_GETVERSION_OLD: - return put_user(inode->i_generation, (int __user *) arg); - case EXT3_IOC_SETVERSION: - case EXT3_IOC_SETVERSION_OLD: { - handle_t *handle; - struct ext3_iloc iloc; - __u32 generation; - int err; - - if (!inode_owner_or_capable(inode)) - return -EPERM; - - err = mnt_want_write_file(filp); - if (err) - return err; - if (get_user(generation, (int __user *) arg)) { - err = -EFAULT; - goto setversion_out; - } - - mutex_lock(&inode->i_mutex); - handle = ext3_journal_start(inode, 1); - if (IS_ERR(handle)) { - err = PTR_ERR(handle); - goto unlock_out; - } - err = ext3_reserve_inode_write(handle, inode, &iloc); - if (err == 0) { - inode->i_ctime = CURRENT_TIME_SEC; - inode->i_generation = generation; - err = ext3_mark_iloc_dirty(handle, inode, &iloc); - } - ext3_journal_stop(handle); - -unlock_out: - mutex_unlock(&inode->i_mutex); -setversion_out: - mnt_drop_write_file(filp); - return err; - } - case EXT3_IOC_GETRSVSZ: - if (test_opt(inode->i_sb, RESERVATION) - && S_ISREG(inode->i_mode) - && ei->i_block_alloc_info) { - rsv_window_size = ei->i_block_alloc_info->rsv_window_node.rsv_goal_size; - return put_user(rsv_window_size, (int __user *)arg); - } - return -ENOTTY; - case EXT3_IOC_SETRSVSZ: { - int err; - - if (!test_opt(inode->i_sb, RESERVATION) ||!S_ISREG(inode->i_mode)) - return -ENOTTY; - - err = mnt_want_write_file(filp); - if (err) - return err; - - if (!inode_owner_or_capable(inode)) { - err = -EACCES; - goto setrsvsz_out; - } - - if (get_user(rsv_window_size, (int __user *)arg)) { - err = -EFAULT; - goto setrsvsz_out; - } - - if (rsv_window_size > EXT3_MAX_RESERVE_BLOCKS) - rsv_window_size = EXT3_MAX_RESERVE_BLOCKS; - - /* - * need to allocate reservation structure for this inode - * before set the window size - */ - mutex_lock(&ei->truncate_mutex); - if (!ei->i_block_alloc_info) - ext3_init_block_alloc_info(inode); - - if (ei->i_block_alloc_info){ - struct ext3_reserve_window_node *rsv = &ei->i_block_alloc_info->rsv_window_node; - rsv->rsv_goal_size = rsv_window_size; - } - mutex_unlock(&ei->truncate_mutex); -setrsvsz_out: - mnt_drop_write_file(filp); - return err; - } - case EXT3_IOC_GROUP_EXTEND: { - ext3_fsblk_t n_blocks_count; - struct super_block *sb = inode->i_sb; - int err, err2; - - if (!capable(CAP_SYS_RESOURCE)) - return -EPERM; - - err = mnt_want_write_file(filp); - if (err) - return err; - - if (get_user(n_blocks_count, (__u32 __user *)arg)) { - err = -EFAULT; - goto group_extend_out; - } - err = ext3_group_extend(sb, EXT3_SB(sb)->s_es, n_blocks_count); - journal_lock_updates(EXT3_SB(sb)->s_journal); - err2 = journal_flush(EXT3_SB(sb)->s_journal); - journal_unlock_updates(EXT3_SB(sb)->s_journal); - if (err == 0) - err = err2; -group_extend_out: - mnt_drop_write_file(filp); - return err; - } - case EXT3_IOC_GROUP_ADD: { - struct ext3_new_group_data input; - struct super_block *sb = inode->i_sb; - int err, err2; - - if (!capable(CAP_SYS_RESOURCE)) - return -EPERM; - - err = mnt_want_write_file(filp); - if (err) - return err; - - if (copy_from_user(&input, (struct ext3_new_group_input __user *)arg, - sizeof(input))) { - err = -EFAULT; - goto group_add_out; - } - - err = ext3_group_add(sb, &input); - journal_lock_updates(EXT3_SB(sb)->s_journal); - err2 = journal_flush(EXT3_SB(sb)->s_journal); - journal_unlock_updates(EXT3_SB(sb)->s_journal); - if (err == 0) - err = err2; -group_add_out: - mnt_drop_write_file(filp); - return err; - } - case FITRIM: { - - struct super_block *sb = inode->i_sb; - struct fstrim_range range; - int ret = 0; - - if (!capable(CAP_SYS_ADMIN)) - return -EPERM; - - if (copy_from_user(&range, (struct fstrim_range __user *)arg, - sizeof(range))) - return -EFAULT; - - ret = ext3_trim_fs(sb, &range); - if (ret < 0) - return ret; - - if (copy_to_user((struct fstrim_range __user *)arg, &range, - sizeof(range))) - return -EFAULT; - - return 0; - } - - default: - return -ENOTTY; - } -} - -#ifdef CONFIG_COMPAT -long ext3_compat_ioctl(struct file *file, unsigned int cmd, unsigned long arg) -{ - /* These are just misnamed, they actually get/put from/to user an int */ - switch (cmd) { - case EXT3_IOC32_GETFLAGS: - cmd = EXT3_IOC_GETFLAGS; - break; - case EXT3_IOC32_SETFLAGS: - cmd = EXT3_IOC_SETFLAGS; - break; - case EXT3_IOC32_GETVERSION: - cmd = EXT3_IOC_GETVERSION; - break; - case EXT3_IOC32_SETVERSION: - cmd = EXT3_IOC_SETVERSION; - break; - case EXT3_IOC32_GROUP_EXTEND: - cmd = EXT3_IOC_GROUP_EXTEND; - break; - case EXT3_IOC32_GETVERSION_OLD: - cmd = EXT3_IOC_GETVERSION_OLD; - break; - case EXT3_IOC32_SETVERSION_OLD: - cmd = EXT3_IOC_SETVERSION_OLD; - break; -#ifdef CONFIG_JBD_DEBUG - case EXT3_IOC32_WAIT_FOR_READONLY: - cmd = EXT3_IOC_WAIT_FOR_READONLY; - break; -#endif - case EXT3_IOC32_GETRSVSZ: - cmd = EXT3_IOC_GETRSVSZ; - break; - case EXT3_IOC32_SETRSVSZ: - cmd = EXT3_IOC_SETRSVSZ; - break; - case EXT3_IOC_GROUP_ADD: - break; - default: - return -ENOIOCTLCMD; - } - return ext3_ioctl(file, cmd, (unsigned long) compat_ptr(arg)); -} -#endif diff --git a/fs/ext3/namei.c b/fs/ext3/namei.c deleted file mode 100644 index c9e767cd4b67..000000000000 --- a/fs/ext3/namei.c +++ /dev/null @@ -1,2586 +0,0 @@ -/* - * linux/fs/ext3/namei.c - * - * Copyright (C) 1992, 1993, 1994, 1995 - * Remy Card (card@masi.ibp.fr) - * Laboratoire MASI - Institut Blaise Pascal - * Universite Pierre et Marie Curie (Paris VI) - * - * from - * - * linux/fs/minix/namei.c - * - * Copyright (C) 1991, 1992 Linus Torvalds - * - * Big-endian to little-endian byte-swapping/bitmaps by - * David S. Miller (davem@caip.rutgers.edu), 1995 - * Directory entry file type support and forward compatibility hooks - * for B-tree directories by Theodore Ts'o (tytso@mit.edu), 1998 - * Hash Tree Directory indexing (c) - * Daniel Phillips, 2001 - * Hash Tree Directory indexing porting - * Christopher Li, 2002 - * Hash Tree Directory indexing cleanup - * Theodore Ts'o, 2002 - */ - -#include -#include "ext3.h" -#include "namei.h" -#include "xattr.h" -#include "acl.h" - -/* - * define how far ahead to read directories while searching them. - */ -#define NAMEI_RA_CHUNKS 2 -#define NAMEI_RA_BLOCKS 4 -#define NAMEI_RA_SIZE (NAMEI_RA_CHUNKS * NAMEI_RA_BLOCKS) - -static struct buffer_head *ext3_append(handle_t *handle, - struct inode *inode, - u32 *block, int *err) -{ - struct buffer_head *bh; - - *block = inode->i_size >> inode->i_sb->s_blocksize_bits; - - if ((bh = ext3_dir_bread(handle, inode, *block, 1, err))) { - inode->i_size += inode->i_sb->s_blocksize; - EXT3_I(inode)->i_disksize = inode->i_size; - *err = ext3_journal_get_write_access(handle, bh); - if (*err) { - brelse(bh); - bh = NULL; - } - } - return bh; -} - -#ifndef assert -#define assert(test) J_ASSERT(test) -#endif - -#ifdef DX_DEBUG -#define dxtrace(command) command -#else -#define dxtrace(command) -#endif - -struct fake_dirent -{ - __le32 inode; - __le16 rec_len; - u8 name_len; - u8 file_type; -}; - -struct dx_countlimit -{ - __le16 limit; - __le16 count; -}; - -struct dx_entry -{ - __le32 hash; - __le32 block; -}; - -/* - * dx_root_info is laid out so that if it should somehow get overlaid by a - * dirent the two low bits of the hash version will be zero. Therefore, the - * hash version mod 4 should never be 0. Sincerely, the paranoia department. - */ - -struct dx_root -{ - struct fake_dirent dot; - char dot_name[4]; - struct fake_dirent dotdot; - char dotdot_name[4]; - struct dx_root_info - { - __le32 reserved_zero; - u8 hash_version; - u8 info_length; /* 8 */ - u8 indirect_levels; - u8 unused_flags; - } - info; - struct dx_entry entries[0]; -}; - -struct dx_node -{ - struct fake_dirent fake; - struct dx_entry entries[0]; -}; - - -struct dx_frame -{ - struct buffer_head *bh; - struct dx_entry *entries; - struct dx_entry *at; -}; - -struct dx_map_entry -{ - u32 hash; - u16 offs; - u16 size; -}; - -static inline unsigned dx_get_block (struct dx_entry *entry); -static void dx_set_block (struct dx_entry *entry, unsigned value); -static inline unsigned dx_get_hash (struct dx_entry *entry); -static void dx_set_hash (struct dx_entry *entry, unsigned value); -static unsigned dx_get_count (struct dx_entry *entries); -static unsigned dx_get_limit (struct dx_entry *entries); -static void dx_set_count (struct dx_entry *entries, unsigned value); -static void dx_set_limit (struct dx_entry *entries, unsigned value); -static unsigned dx_root_limit (struct inode *dir, unsigned infosize); -static unsigned dx_node_limit (struct inode *dir); -static struct dx_frame *dx_probe(struct qstr *entry, - struct inode *dir, - struct dx_hash_info *hinfo, - struct dx_frame *frame, - int *err); -static void dx_release (struct dx_frame *frames); -static int dx_make_map(struct ext3_dir_entry_2 *de, unsigned blocksize, - struct dx_hash_info *hinfo, struct dx_map_entry map[]); -static void dx_sort_map(struct dx_map_entry *map, unsigned count); -static struct ext3_dir_entry_2 *dx_move_dirents (char *from, char *to, - struct dx_map_entry *offsets, int count); -static struct ext3_dir_entry_2 *dx_pack_dirents(char *base, unsigned blocksize); -static void dx_insert_block (struct dx_frame *frame, u32 hash, u32 block); -static int ext3_htree_next_block(struct inode *dir, __u32 hash, - struct dx_frame *frame, - struct dx_frame *frames, - __u32 *start_hash); -static struct buffer_head * ext3_dx_find_entry(struct inode *dir, - struct qstr *entry, struct ext3_dir_entry_2 **res_dir, - int *err); -static int ext3_dx_add_entry(handle_t *handle, struct dentry *dentry, - struct inode *inode); - -/* - * p is at least 6 bytes before the end of page - */ -static inline struct ext3_dir_entry_2 * -ext3_next_entry(struct ext3_dir_entry_2 *p) -{ - return (struct ext3_dir_entry_2 *)((char *)p + - ext3_rec_len_from_disk(p->rec_len)); -} - -/* - * Future: use high four bits of block for coalesce-on-delete flags - * Mask them off for now. - */ - -static inline unsigned dx_get_block (struct dx_entry *entry) -{ - return le32_to_cpu(entry->block) & 0x00ffffff; -} - -static inline void dx_set_block (struct dx_entry *entry, unsigned value) -{ - entry->block = cpu_to_le32(value); -} - -static inline unsigned dx_get_hash (struct dx_entry *entry) -{ - return le32_to_cpu(entry->hash); -} - -static inline void dx_set_hash (struct dx_entry *entry, unsigned value) -{ - entry->hash = cpu_to_le32(value); -} - -static inline unsigned dx_get_count (struct dx_entry *entries) -{ - return le16_to_cpu(((struct dx_countlimit *) entries)->count); -} - -static inline unsigned dx_get_limit (struct dx_entry *entries) -{ - return le16_to_cpu(((struct dx_countlimit *) entries)->limit); -} - -static inline void dx_set_count (struct dx_entry *entries, unsigned value) -{ - ((struct dx_countlimit *) entries)->count = cpu_to_le16(value); -} - -static inline void dx_set_limit (struct dx_entry *entries, unsigned value) -{ - ((struct dx_countlimit *) entries)->limit = cpu_to_le16(value); -} - -static inline unsigned dx_root_limit (struct inode *dir, unsigned infosize) -{ - unsigned entry_space = dir->i_sb->s_blocksize - EXT3_DIR_REC_LEN(1) - - EXT3_DIR_REC_LEN(2) - infosize; - return entry_space / sizeof(struct dx_entry); -} - -static inline unsigned dx_node_limit (struct inode *dir) -{ - unsigned entry_space = dir->i_sb->s_blocksize - EXT3_DIR_REC_LEN(0); - return entry_space / sizeof(struct dx_entry); -} - -/* - * Debug - */ -#ifdef DX_DEBUG -static void dx_show_index (char * label, struct dx_entry *entries) -{ - int i, n = dx_get_count (entries); - printk("%s index ", label); - for (i = 0; i < n; i++) - { - printk("%x->%u ", i? dx_get_hash(entries + i): 0, dx_get_block(entries + i)); - } - printk("\n"); -} - -struct stats -{ - unsigned names; - unsigned space; - unsigned bcount; -}; - -static struct stats dx_show_leaf(struct dx_hash_info *hinfo, struct ext3_dir_entry_2 *de, - int size, int show_names) -{ - unsigned names = 0, space = 0; - char *base = (char *) de; - struct dx_hash_info h = *hinfo; - - printk("names: "); - while ((char *) de < base + size) - { - if (de->inode) - { - if (show_names) - { - int len = de->name_len; - char *name = de->name; - while (len--) printk("%c", *name++); - ext3fs_dirhash(de->name, de->name_len, &h); - printk(":%x.%u ", h.hash, - (unsigned) ((char *) de - base)); - } - space += EXT3_DIR_REC_LEN(de->name_len); - names++; - } - de = ext3_next_entry(de); - } - printk("(%i)\n", names); - return (struct stats) { names, space, 1 }; -} - -struct stats dx_show_entries(struct dx_hash_info *hinfo, struct inode *dir, - struct dx_entry *entries, int levels) -{ - unsigned blocksize = dir->i_sb->s_blocksize; - unsigned count = dx_get_count (entries), names = 0, space = 0, i; - unsigned bcount = 0; - struct buffer_head *bh; - int err; - printk("%i indexed blocks...\n", count); - for (i = 0; i < count; i++, entries++) - { - u32 block = dx_get_block(entries), hash = i? dx_get_hash(entries): 0; - u32 range = i < count - 1? (dx_get_hash(entries + 1) - hash): ~hash; - struct stats stats; - printk("%s%3u:%03u hash %8x/%8x ",levels?"":" ", i, block, hash, range); - if (!(bh = ext3_bread (NULL,dir, block, 0,&err))) continue; - stats = levels? - dx_show_entries(hinfo, dir, ((struct dx_node *) bh->b_data)->entries, levels - 1): - dx_show_leaf(hinfo, (struct ext3_dir_entry_2 *) bh->b_data, blocksize, 0); - names += stats.names; - space += stats.space; - bcount += stats.bcount; - brelse (bh); - } - if (bcount) - printk("%snames %u, fullness %u (%u%%)\n", levels?"":" ", - names, space/bcount,(space/bcount)*100/blocksize); - return (struct stats) { names, space, bcount}; -} -#endif /* DX_DEBUG */ - -/* - * Probe for a directory leaf block to search. - * - * dx_probe can return ERR_BAD_DX_DIR, which means there was a format - * error in the directory index, and the caller should fall back to - * searching the directory normally. The callers of dx_probe **MUST** - * check for this error code, and make sure it never gets reflected - * back to userspace. - */ -static struct dx_frame * -dx_probe(struct qstr *entry, struct inode *dir, - struct dx_hash_info *hinfo, struct dx_frame *frame_in, int *err) -{ - unsigned count, indirect; - struct dx_entry *at, *entries, *p, *q, *m; - struct dx_root *root; - struct buffer_head *bh; - struct dx_frame *frame = frame_in; - u32 hash; - - frame->bh = NULL; - if (!(bh = ext3_dir_bread(NULL, dir, 0, 0, err))) { - *err = ERR_BAD_DX_DIR; - goto fail; - } - root = (struct dx_root *) bh->b_data; - if (root->info.hash_version != DX_HASH_TEA && - root->info.hash_version != DX_HASH_HALF_MD4 && - root->info.hash_version != DX_HASH_LEGACY) { - ext3_warning(dir->i_sb, __func__, - "Unrecognised inode hash code %d", - root->info.hash_version); - brelse(bh); - *err = ERR_BAD_DX_DIR; - goto fail; - } - hinfo->hash_version = root->info.hash_version; - if (hinfo->hash_version <= DX_HASH_TEA) - hinfo->hash_version += EXT3_SB(dir->i_sb)->s_hash_unsigned; - hinfo->seed = EXT3_SB(dir->i_sb)->s_hash_seed; - if (entry) - ext3fs_dirhash(entry->name, entry->len, hinfo); - hash = hinfo->hash; - - if (root->info.unused_flags & 1) { - ext3_warning(dir->i_sb, __func__, - "Unimplemented inode hash flags: %#06x", - root->info.unused_flags); - brelse(bh); - *err = ERR_BAD_DX_DIR; - goto fail; - } - - if ((indirect = root->info.indirect_levels) > 1) { - ext3_warning(dir->i_sb, __func__, - "Unimplemented inode hash depth: %#06x", - root->info.indirect_levels); - brelse(bh); - *err = ERR_BAD_DX_DIR; - goto fail; - } - - entries = (struct dx_entry *) (((char *)&root->info) + - root->info.info_length); - - if (dx_get_limit(entries) != dx_root_limit(dir, - root->info.info_length)) { - ext3_warning(dir->i_sb, __func__, - "dx entry: limit != root limit"); - brelse(bh); - *err = ERR_BAD_DX_DIR; - goto fail; - } - - dxtrace (printk("Look up %x", hash)); - while (1) - { - count = dx_get_count(entries); - if (!count || count > dx_get_limit(entries)) { - ext3_warning(dir->i_sb, __func__, - "dx entry: no count or count > limit"); - brelse(bh); - *err = ERR_BAD_DX_DIR; - goto fail2; - } - - p = entries + 1; - q = entries + count - 1; - while (p <= q) - { - m = p + (q - p)/2; - dxtrace(printk(".")); - if (dx_get_hash(m) > hash) - q = m - 1; - else - p = m + 1; - } - - if (0) // linear search cross check - { - unsigned n = count - 1; - at = entries; - while (n--) - { - dxtrace(printk(",")); - if (dx_get_hash(++at) > hash) - { - at--; - break; - } - } - assert (at == p - 1); - } - - at = p - 1; - dxtrace(printk(" %x->%u\n", at == entries? 0: dx_get_hash(at), dx_get_block(at))); - frame->bh = bh; - frame->entries = entries; - frame->at = at; - if (!indirect--) return frame; - if (!(bh = ext3_dir_bread(NULL, dir, dx_get_block(at), 0, err))) { - *err = ERR_BAD_DX_DIR; - goto fail2; - } - at = entries = ((struct dx_node *) bh->b_data)->entries; - if (dx_get_limit(entries) != dx_node_limit (dir)) { - ext3_warning(dir->i_sb, __func__, - "dx entry: limit != node limit"); - brelse(bh); - *err = ERR_BAD_DX_DIR; - goto fail2; - } - frame++; - frame->bh = NULL; - } -fail2: - while (frame >= frame_in) { - brelse(frame->bh); - frame--; - } -fail: - if (*err == ERR_BAD_DX_DIR) - ext3_warning(dir->i_sb, __func__, - "Corrupt dir inode %ld, running e2fsck is " - "recommended.", dir->i_ino); - return NULL; -} - -static void dx_release (struct dx_frame *frames) -{ - if (frames[0].bh == NULL) - return; - - if (((struct dx_root *) frames[0].bh->b_data)->info.indirect_levels) - brelse(frames[1].bh); - brelse(frames[0].bh); -} - -/* - * This function increments the frame pointer to search the next leaf - * block, and reads in the necessary intervening nodes if the search - * should be necessary. Whether or not the search is necessary is - * controlled by the hash parameter. If the hash value is even, then - * the search is only continued if the next block starts with that - * hash value. This is used if we are searching for a specific file. - * - * If the hash value is HASH_NB_ALWAYS, then always go to the next block. - * - * This function returns 1 if the caller should continue to search, - * or 0 if it should not. If there is an error reading one of the - * index blocks, it will a negative error code. - * - * If start_hash is non-null, it will be filled in with the starting - * hash of the next page. - */ -static int ext3_htree_next_block(struct inode *dir, __u32 hash, - struct dx_frame *frame, - struct dx_frame *frames, - __u32 *start_hash) -{ - struct dx_frame *p; - struct buffer_head *bh; - int err, num_frames = 0; - __u32 bhash; - - p = frame; - /* - * Find the next leaf page by incrementing the frame pointer. - * If we run out of entries in the interior node, loop around and - * increment pointer in the parent node. When we break out of - * this loop, num_frames indicates the number of interior - * nodes need to be read. - */ - while (1) { - if (++(p->at) < p->entries + dx_get_count(p->entries)) - break; - if (p == frames) - return 0; - num_frames++; - p--; - } - - /* - * If the hash is 1, then continue only if the next page has a - * continuation hash of any value. This is used for readdir - * handling. Otherwise, check to see if the hash matches the - * desired contiuation hash. If it doesn't, return since - * there's no point to read in the successive index pages. - */ - bhash = dx_get_hash(p->at); - if (start_hash) - *start_hash = bhash; - if ((hash & 1) == 0) { - if ((bhash & ~1) != hash) - return 0; - } - /* - * If the hash is HASH_NB_ALWAYS, we always go to the next - * block so no check is necessary - */ - while (num_frames--) { - if (!(bh = ext3_dir_bread(NULL, dir, dx_get_block(p->at), - 0, &err))) - return err; /* Failure */ - p++; - brelse (p->bh); - p->bh = bh; - p->at = p->entries = ((struct dx_node *) bh->b_data)->entries; - } - return 1; -} - - -/* - * This function fills a red-black tree with information from a - * directory block. It returns the number directory entries loaded - * into the tree. If there is an error it is returned in err. - */ -static int htree_dirblock_to_tree(struct file *dir_file, - struct inode *dir, int block, - struct dx_hash_info *hinfo, - __u32 start_hash, __u32 start_minor_hash) -{ - struct buffer_head *bh; - struct ext3_dir_entry_2 *de, *top; - int err = 0, count = 0; - - dxtrace(printk("In htree dirblock_to_tree: block %d\n", block)); - - if (!(bh = ext3_dir_bread(NULL, dir, block, 0, &err))) - return err; - - de = (struct ext3_dir_entry_2 *) bh->b_data; - top = (struct ext3_dir_entry_2 *) ((char *) de + - dir->i_sb->s_blocksize - - EXT3_DIR_REC_LEN(0)); - for (; de < top; de = ext3_next_entry(de)) { - if (!ext3_check_dir_entry("htree_dirblock_to_tree", dir, de, bh, - (block<i_sb)) - +((char *)de - bh->b_data))) { - /* silently ignore the rest of the block */ - break; - } - ext3fs_dirhash(de->name, de->name_len, hinfo); - if ((hinfo->hash < start_hash) || - ((hinfo->hash == start_hash) && - (hinfo->minor_hash < start_minor_hash))) - continue; - if (de->inode == 0) - continue; - if ((err = ext3_htree_store_dirent(dir_file, - hinfo->hash, hinfo->minor_hash, de)) != 0) { - brelse(bh); - return err; - } - count++; - } - brelse(bh); - return count; -} - - -/* - * This function fills a red-black tree with information from a - * directory. We start scanning the directory in hash order, starting - * at start_hash and start_minor_hash. - * - * This function returns the number of entries inserted into the tree, - * or a negative error code. - */ -int ext3_htree_fill_tree(struct file *dir_file, __u32 start_hash, - __u32 start_minor_hash, __u32 *next_hash) -{ - struct dx_hash_info hinfo; - struct ext3_dir_entry_2 *de; - struct dx_frame frames[2], *frame; - struct inode *dir; - int block, err; - int count = 0; - int ret; - __u32 hashval; - - dxtrace(printk("In htree_fill_tree, start hash: %x:%x\n", start_hash, - start_minor_hash)); - dir = file_inode(dir_file); - if (!(EXT3_I(dir)->i_flags & EXT3_INDEX_FL)) { - hinfo.hash_version = EXT3_SB(dir->i_sb)->s_def_hash_version; - if (hinfo.hash_version <= DX_HASH_TEA) - hinfo.hash_version += - EXT3_SB(dir->i_sb)->s_hash_unsigned; - hinfo.seed = EXT3_SB(dir->i_sb)->s_hash_seed; - count = htree_dirblock_to_tree(dir_file, dir, 0, &hinfo, - start_hash, start_minor_hash); - *next_hash = ~0; - return count; - } - hinfo.hash = start_hash; - hinfo.minor_hash = 0; - frame = dx_probe(NULL, file_inode(dir_file), &hinfo, frames, &err); - if (!frame) - return err; - - /* Add '.' and '..' from the htree header */ - if (!start_hash && !start_minor_hash) { - de = (struct ext3_dir_entry_2 *) frames[0].bh->b_data; - if ((err = ext3_htree_store_dirent(dir_file, 0, 0, de)) != 0) - goto errout; - count++; - } - if (start_hash < 2 || (start_hash ==2 && start_minor_hash==0)) { - de = (struct ext3_dir_entry_2 *) frames[0].bh->b_data; - de = ext3_next_entry(de); - if ((err = ext3_htree_store_dirent(dir_file, 2, 0, de)) != 0) - goto errout; - count++; - } - - while (1) { - block = dx_get_block(frame->at); - ret = htree_dirblock_to_tree(dir_file, dir, block, &hinfo, - start_hash, start_minor_hash); - if (ret < 0) { - err = ret; - goto errout; - } - count += ret; - hashval = ~0; - ret = ext3_htree_next_block(dir, HASH_NB_ALWAYS, - frame, frames, &hashval); - *next_hash = hashval; - if (ret < 0) { - err = ret; - goto errout; - } - /* - * Stop if: (a) there are no more entries, or - * (b) we have inserted at least one entry and the - * next hash value is not a continuation - */ - if ((ret == 0) || - (count && ((hashval & 1) == 0))) - break; - } - dx_release(frames); - dxtrace(printk("Fill tree: returned %d entries, next hash: %x\n", - count, *next_hash)); - return count; -errout: - dx_release(frames); - return (err); -} - - -/* - * Directory block splitting, compacting - */ - -/* - * Create map of hash values, offsets, and sizes, stored at end of block. - * Returns number of entries mapped. - */ -static int dx_make_map(struct ext3_dir_entry_2 *de, unsigned blocksize, - struct dx_hash_info *hinfo, struct dx_map_entry *map_tail) -{ - int count = 0; - char *base = (char *) de; - struct dx_hash_info h = *hinfo; - - while ((char *) de < base + blocksize) - { - if (de->name_len && de->inode) { - ext3fs_dirhash(de->name, de->name_len, &h); - map_tail--; - map_tail->hash = h.hash; - map_tail->offs = (u16) ((char *) de - base); - map_tail->size = le16_to_cpu(de->rec_len); - count++; - cond_resched(); - } - /* XXX: do we need to check rec_len == 0 case? -Chris */ - de = ext3_next_entry(de); - } - return count; -} - -/* Sort map by hash value */ -static void dx_sort_map (struct dx_map_entry *map, unsigned count) -{ - struct dx_map_entry *p, *q, *top = map + count - 1; - int more; - /* Combsort until bubble sort doesn't suck */ - while (count > 2) - { - count = count*10/13; - if (count - 9 < 2) /* 9, 10 -> 11 */ - count = 11; - for (p = top, q = p - count; q >= map; p--, q--) - if (p->hash < q->hash) - swap(*p, *q); - } - /* Garden variety bubble sort */ - do { - more = 0; - q = top; - while (q-- > map) - { - if (q[1].hash >= q[0].hash) - continue; - swap(*(q+1), *q); - more = 1; - } - } while(more); -} - -static void dx_insert_block(struct dx_frame *frame, u32 hash, u32 block) -{ - struct dx_entry *entries = frame->entries; - struct dx_entry *old = frame->at, *new = old + 1; - int count = dx_get_count(entries); - - assert(count < dx_get_limit(entries)); - assert(old < entries + count); - memmove(new + 1, new, (char *)(entries + count) - (char *)(new)); - dx_set_hash(new, hash); - dx_set_block(new, block); - dx_set_count(entries, count + 1); -} - -static void ext3_update_dx_flag(struct inode *inode) -{ - if (!EXT3_HAS_COMPAT_FEATURE(inode->i_sb, - EXT3_FEATURE_COMPAT_DIR_INDEX)) - EXT3_I(inode)->i_flags &= ~EXT3_INDEX_FL; -} - -/* - * NOTE! unlike strncmp, ext3_match returns 1 for success, 0 for failure. - * - * `len <= EXT3_NAME_LEN' is guaranteed by caller. - * `de != NULL' is guaranteed by caller. - */ -static inline int ext3_match (int len, const char * const name, - struct ext3_dir_entry_2 * de) -{ - if (len != de->name_len) - return 0; - if (!de->inode) - return 0; - return !memcmp(name, de->name, len); -} - -/* - * Returns 0 if not found, -1 on failure, and 1 on success - */ -static inline int search_dirblock(struct buffer_head * bh, - struct inode *dir, - struct qstr *child, - unsigned long offset, - struct ext3_dir_entry_2 ** res_dir) -{ - struct ext3_dir_entry_2 * de; - char * dlimit; - int de_len; - const char *name = child->name; - int namelen = child->len; - - de = (struct ext3_dir_entry_2 *) bh->b_data; - dlimit = bh->b_data + dir->i_sb->s_blocksize; - while ((char *) de < dlimit) { - /* this code is executed quadratically often */ - /* do minimal checking `by hand' */ - - if ((char *) de + namelen <= dlimit && - ext3_match (namelen, name, de)) { - /* found a match - just to be sure, do a full check */ - if (!ext3_check_dir_entry("ext3_find_entry", - dir, de, bh, offset)) - return -1; - *res_dir = de; - return 1; - } - /* prevent looping on a bad block */ - de_len = ext3_rec_len_from_disk(de->rec_len); - if (de_len <= 0) - return -1; - offset += de_len; - de = (struct ext3_dir_entry_2 *) ((char *) de + de_len); - } - return 0; -} - - -/* - * ext3_find_entry() - * - * finds an entry in the specified directory with the wanted name. It - * returns the cache buffer in which the entry was found, and the entry - * itself (as a parameter - res_dir). It does NOT read the inode of the - * entry - you'll have to do that yourself if you want to. - * - * The returned buffer_head has ->b_count elevated. The caller is expected - * to brelse() it when appropriate. - */ -static struct buffer_head *ext3_find_entry(struct inode *dir, - struct qstr *entry, - struct ext3_dir_entry_2 **res_dir) -{ - struct super_block * sb; - struct buffer_head * bh_use[NAMEI_RA_SIZE]; - struct buffer_head * bh, *ret = NULL; - unsigned long start, block, b; - const u8 *name = entry->name; - int ra_max = 0; /* Number of bh's in the readahead - buffer, bh_use[] */ - int ra_ptr = 0; /* Current index into readahead - buffer */ - int num = 0; - int nblocks, i, err; - int namelen; - - *res_dir = NULL; - sb = dir->i_sb; - namelen = entry->len; - if (namelen > EXT3_NAME_LEN) - return NULL; - if ((namelen <= 2) && (name[0] == '.') && - (name[1] == '.' || name[1] == 0)) { - /* - * "." or ".." will only be in the first block - * NFS may look up ".."; "." should be handled by the VFS - */ - block = start = 0; - nblocks = 1; - goto restart; - } - if (is_dx(dir)) { - bh = ext3_dx_find_entry(dir, entry, res_dir, &err); - /* - * On success, or if the error was file not found, - * return. Otherwise, fall back to doing a search the - * old fashioned way. - */ - if (bh || (err != ERR_BAD_DX_DIR)) - return bh; - dxtrace(printk("ext3_find_entry: dx failed, falling back\n")); - } - nblocks = dir->i_size >> EXT3_BLOCK_SIZE_BITS(sb); - start = EXT3_I(dir)->i_dir_start_lookup; - if (start >= nblocks) - start = 0; - block = start; -restart: - do { - /* - * We deal with the read-ahead logic here. - */ - if (ra_ptr >= ra_max) { - /* Refill the readahead buffer */ - ra_ptr = 0; - b = block; - for (ra_max = 0; ra_max < NAMEI_RA_SIZE; ra_max++) { - /* - * Terminate if we reach the end of the - * directory and must wrap, or if our - * search has finished at this block. - */ - if (b >= nblocks || (num && block == start)) { - bh_use[ra_max] = NULL; - break; - } - num++; - bh = ext3_getblk(NULL, dir, b++, 0, &err); - bh_use[ra_max] = bh; - if (bh && !bh_uptodate_or_lock(bh)) { - get_bh(bh); - bh->b_end_io = end_buffer_read_sync; - submit_bh(READ | REQ_META | REQ_PRIO, - bh); - } - } - } - if ((bh = bh_use[ra_ptr++]) == NULL) - goto next; - wait_on_buffer(bh); - if (!buffer_uptodate(bh)) { - /* read error, skip block & hope for the best */ - ext3_error(sb, __func__, "reading directory #%lu " - "offset %lu", dir->i_ino, block); - brelse(bh); - goto next; - } - i = search_dirblock(bh, dir, entry, - block << EXT3_BLOCK_SIZE_BITS(sb), res_dir); - if (i == 1) { - EXT3_I(dir)->i_dir_start_lookup = block; - ret = bh; - goto cleanup_and_exit; - } else { - brelse(bh); - if (i < 0) - goto cleanup_and_exit; - } - next: - if (++block >= nblocks) - block = 0; - } while (block != start); - - /* - * If the directory has grown while we were searching, then - * search the last part of the directory before giving up. - */ - block = nblocks; - nblocks = dir->i_size >> EXT3_BLOCK_SIZE_BITS(sb); - if (block < nblocks) { - start = 0; - goto restart; - } - -cleanup_and_exit: - /* Clean up the read-ahead blocks */ - for (; ra_ptr < ra_max; ra_ptr++) - brelse (bh_use[ra_ptr]); - return ret; -} - -static struct buffer_head * ext3_dx_find_entry(struct inode *dir, - struct qstr *entry, struct ext3_dir_entry_2 **res_dir, - int *err) -{ - struct super_block *sb = dir->i_sb; - struct dx_hash_info hinfo; - struct dx_frame frames[2], *frame; - struct buffer_head *bh; - unsigned long block; - int retval; - - if (!(frame = dx_probe(entry, dir, &hinfo, frames, err))) - return NULL; - do { - block = dx_get_block(frame->at); - if (!(bh = ext3_dir_bread (NULL, dir, block, 0, err))) - goto errout; - - retval = search_dirblock(bh, dir, entry, - block << EXT3_BLOCK_SIZE_BITS(sb), - res_dir); - if (retval == 1) { - dx_release(frames); - return bh; - } - brelse(bh); - if (retval == -1) { - *err = ERR_BAD_DX_DIR; - goto errout; - } - - /* Check to see if we should continue to search */ - retval = ext3_htree_next_block(dir, hinfo.hash, frame, - frames, NULL); - if (retval < 0) { - ext3_warning(sb, __func__, - "error reading index page in directory #%lu", - dir->i_ino); - *err = retval; - goto errout; - } - } while (retval == 1); - - *err = -ENOENT; -errout: - dxtrace(printk("%s not found\n", entry->name)); - dx_release (frames); - return NULL; -} - -static struct dentry *ext3_lookup(struct inode * dir, struct dentry *dentry, unsigned int flags) -{ - struct inode * inode; - struct ext3_dir_entry_2 * de; - struct buffer_head * bh; - - if (dentry->d_name.len > EXT3_NAME_LEN) - return ERR_PTR(-ENAMETOOLONG); - - bh = ext3_find_entry(dir, &dentry->d_name, &de); - inode = NULL; - if (bh) { - unsigned long ino = le32_to_cpu(de->inode); - brelse (bh); - if (!ext3_valid_inum(dir->i_sb, ino)) { - ext3_error(dir->i_sb, "ext3_lookup", - "bad inode number: %lu", ino); - return ERR_PTR(-EIO); - } - inode = ext3_iget(dir->i_sb, ino); - if (inode == ERR_PTR(-ESTALE)) { - ext3_error(dir->i_sb, __func__, - "deleted inode referenced: %lu", - ino); - return ERR_PTR(-EIO); - } - } - return d_splice_alias(inode, dentry); -} - - -struct dentry *ext3_get_parent(struct dentry *child) -{ - unsigned long ino; - struct qstr dotdot = QSTR_INIT("..", 2); - struct ext3_dir_entry_2 * de; - struct buffer_head *bh; - - bh = ext3_find_entry(d_inode(child), &dotdot, &de); - if (!bh) - return ERR_PTR(-ENOENT); - ino = le32_to_cpu(de->inode); - brelse(bh); - - if (!ext3_valid_inum(d_inode(child)->i_sb, ino)) { - ext3_error(d_inode(child)->i_sb, "ext3_get_parent", - "bad inode number: %lu", ino); - return ERR_PTR(-EIO); - } - - return d_obtain_alias(ext3_iget(d_inode(child)->i_sb, ino)); -} - -#define S_SHIFT 12 -static unsigned char ext3_type_by_mode[S_IFMT >> S_SHIFT] = { - [S_IFREG >> S_SHIFT] = EXT3_FT_REG_FILE, - [S_IFDIR >> S_SHIFT] = EXT3_FT_DIR, - [S_IFCHR >> S_SHIFT] = EXT3_FT_CHRDEV, - [S_IFBLK >> S_SHIFT] = EXT3_FT_BLKDEV, - [S_IFIFO >> S_SHIFT] = EXT3_FT_FIFO, - [S_IFSOCK >> S_SHIFT] = EXT3_FT_SOCK, - [S_IFLNK >> S_SHIFT] = EXT3_FT_SYMLINK, -}; - -static inline void ext3_set_de_type(struct super_block *sb, - struct ext3_dir_entry_2 *de, - umode_t mode) { - if (EXT3_HAS_INCOMPAT_FEATURE(sb, EXT3_FEATURE_INCOMPAT_FILETYPE)) - de->file_type = ext3_type_by_mode[(mode & S_IFMT)>>S_SHIFT]; -} - -/* - * Move count entries from end of map between two memory locations. - * Returns pointer to last entry moved. - */ -static struct ext3_dir_entry_2 * -dx_move_dirents(char *from, char *to, struct dx_map_entry *map, int count) -{ - unsigned rec_len = 0; - - while (count--) { - struct ext3_dir_entry_2 *de = (struct ext3_dir_entry_2 *) (from + map->offs); - rec_len = EXT3_DIR_REC_LEN(de->name_len); - memcpy (to, de, rec_len); - ((struct ext3_dir_entry_2 *) to)->rec_len = - ext3_rec_len_to_disk(rec_len); - de->inode = 0; - map++; - to += rec_len; - } - return (struct ext3_dir_entry_2 *) (to - rec_len); -} - -/* - * Compact each dir entry in the range to the minimal rec_len. - * Returns pointer to last entry in range. - */ -static struct ext3_dir_entry_2 *dx_pack_dirents(char *base, unsigned blocksize) -{ - struct ext3_dir_entry_2 *next, *to, *prev; - struct ext3_dir_entry_2 *de = (struct ext3_dir_entry_2 *)base; - unsigned rec_len = 0; - - prev = to = de; - while ((char *)de < base + blocksize) { - next = ext3_next_entry(de); - if (de->inode && de->name_len) { - rec_len = EXT3_DIR_REC_LEN(de->name_len); - if (de > to) - memmove(to, de, rec_len); - to->rec_len = ext3_rec_len_to_disk(rec_len); - prev = to; - to = (struct ext3_dir_entry_2 *) (((char *) to) + rec_len); - } - de = next; - } - return prev; -} - -/* - * Split a full leaf block to make room for a new dir entry. - * Allocate a new block, and move entries so that they are approx. equally full. - * Returns pointer to de in block into which the new entry will be inserted. - */ -static struct ext3_dir_entry_2 *do_split(handle_t *handle, struct inode *dir, - struct buffer_head **bh,struct dx_frame *frame, - struct dx_hash_info *hinfo, int *error) -{ - unsigned blocksize = dir->i_sb->s_blocksize; - unsigned count, continued; - struct buffer_head *bh2; - u32 newblock; - u32 hash2; - struct dx_map_entry *map; - char *data1 = (*bh)->b_data, *data2; - unsigned split, move, size; - struct ext3_dir_entry_2 *de = NULL, *de2; - int err = 0, i; - - bh2 = ext3_append (handle, dir, &newblock, &err); - if (!(bh2)) { - brelse(*bh); - *bh = NULL; - goto errout; - } - - BUFFER_TRACE(*bh, "get_write_access"); - err = ext3_journal_get_write_access(handle, *bh); - if (err) - goto journal_error; - - BUFFER_TRACE(frame->bh, "get_write_access"); - err = ext3_journal_get_write_access(handle, frame->bh); - if (err) - goto journal_error; - - data2 = bh2->b_data; - - /* create map in the end of data2 block */ - map = (struct dx_map_entry *) (data2 + blocksize); - count = dx_make_map ((struct ext3_dir_entry_2 *) data1, - blocksize, hinfo, map); - map -= count; - dx_sort_map (map, count); - /* Split the existing block in the middle, size-wise */ - size = 0; - move = 0; - for (i = count-1; i >= 0; i--) { - /* is more than half of this entry in 2nd half of the block? */ - if (size + map[i].size/2 > blocksize/2) - break; - size += map[i].size; - move++; - } - /* map index at which we will split */ - split = count - move; - hash2 = map[split].hash; - continued = hash2 == map[split - 1].hash; - dxtrace(printk("Split block %i at %x, %i/%i\n", - dx_get_block(frame->at), hash2, split, count-split)); - - /* Fancy dance to stay within two buffers */ - de2 = dx_move_dirents(data1, data2, map + split, count - split); - de = dx_pack_dirents(data1,blocksize); - de->rec_len = ext3_rec_len_to_disk(data1 + blocksize - (char *) de); - de2->rec_len = ext3_rec_len_to_disk(data2 + blocksize - (char *) de2); - dxtrace(dx_show_leaf (hinfo, (struct ext3_dir_entry_2 *) data1, blocksize, 1)); - dxtrace(dx_show_leaf (hinfo, (struct ext3_dir_entry_2 *) data2, blocksize, 1)); - - /* Which block gets the new entry? */ - if (hinfo->hash >= hash2) - { - swap(*bh, bh2); - de = de2; - } - dx_insert_block (frame, hash2 + continued, newblock); - err = ext3_journal_dirty_metadata (handle, bh2); - if (err) - goto journal_error; - err = ext3_journal_dirty_metadata (handle, frame->bh); - if (err) - goto journal_error; - brelse (bh2); - dxtrace(dx_show_index ("frame", frame->entries)); - return de; - -journal_error: - brelse(*bh); - brelse(bh2); - *bh = NULL; - ext3_std_error(dir->i_sb, err); -errout: - *error = err; - return NULL; -} - - -/* - * Add a new entry into a directory (leaf) block. If de is non-NULL, - * it points to a directory entry which is guaranteed to be large - * enough for new directory entry. If de is NULL, then - * add_dirent_to_buf will attempt search the directory block for - * space. It will return -ENOSPC if no space is available, and -EIO - * and -EEXIST if directory entry already exists. - * - * NOTE! bh is NOT released in the case where ENOSPC is returned. In - * all other cases bh is released. - */ -static int add_dirent_to_buf(handle_t *handle, struct dentry *dentry, - struct inode *inode, struct ext3_dir_entry_2 *de, - struct buffer_head * bh) -{ - struct inode *dir = d_inode(dentry->d_parent); - const char *name = dentry->d_name.name; - int namelen = dentry->d_name.len; - unsigned long offset = 0; - unsigned short reclen; - int nlen, rlen, err; - char *top; - - reclen = EXT3_DIR_REC_LEN(namelen); - if (!de) { - de = (struct ext3_dir_entry_2 *)bh->b_data; - top = bh->b_data + dir->i_sb->s_blocksize - reclen; - while ((char *) de <= top) { - if (!ext3_check_dir_entry("ext3_add_entry", dir, de, - bh, offset)) { - brelse (bh); - return -EIO; - } - if (ext3_match (namelen, name, de)) { - brelse (bh); - return -EEXIST; - } - nlen = EXT3_DIR_REC_LEN(de->name_len); - rlen = ext3_rec_len_from_disk(de->rec_len); - if ((de->inode? rlen - nlen: rlen) >= reclen) - break; - de = (struct ext3_dir_entry_2 *)((char *)de + rlen); - offset += rlen; - } - if ((char *) de > top) - return -ENOSPC; - } - BUFFER_TRACE(bh, "get_write_access"); - err = ext3_journal_get_write_access(handle, bh); - if (err) { - ext3_std_error(dir->i_sb, err); - brelse(bh); - return err; - } - - /* By now the buffer is marked for journaling */ - nlen = EXT3_DIR_REC_LEN(de->name_len); - rlen = ext3_rec_len_from_disk(de->rec_len); - if (de->inode) { - struct ext3_dir_entry_2 *de1 = (struct ext3_dir_entry_2 *)((char *)de + nlen); - de1->rec_len = ext3_rec_len_to_disk(rlen - nlen); - de->rec_len = ext3_rec_len_to_disk(nlen); - de = de1; - } - de->file_type = EXT3_FT_UNKNOWN; - if (inode) { - de->inode = cpu_to_le32(inode->i_ino); - ext3_set_de_type(dir->i_sb, de, inode->i_mode); - } else - de->inode = 0; - de->name_len = namelen; - memcpy (de->name, name, namelen); - /* - * XXX shouldn't update any times until successful - * completion of syscall, but too many callers depend - * on this. - * - * XXX similarly, too many callers depend on - * ext3_new_inode() setting the times, but error - * recovery deletes the inode, so the worst that can - * happen is that the times are slightly out of date - * and/or different from the directory change time. - */ - dir->i_mtime = dir->i_ctime = CURRENT_TIME_SEC; - ext3_update_dx_flag(dir); - dir->i_version++; - ext3_mark_inode_dirty(handle, dir); - BUFFER_TRACE(bh, "call ext3_journal_dirty_metadata"); - err = ext3_journal_dirty_metadata(handle, bh); - if (err) - ext3_std_error(dir->i_sb, err); - brelse(bh); - return 0; -} - -/* - * This converts a one block unindexed directory to a 3 block indexed - * directory, and adds the dentry to the indexed directory. - */ -static int make_indexed_dir(handle_t *handle, struct dentry *dentry, - struct inode *inode, struct buffer_head *bh) -{ - struct inode *dir = d_inode(dentry->d_parent); - const char *name = dentry->d_name.name; - int namelen = dentry->d_name.len; - struct buffer_head *bh2; - struct dx_root *root; - struct dx_frame frames[2], *frame; - struct dx_entry *entries; - struct ext3_dir_entry_2 *de, *de2; - char *data1, *top; - unsigned len; - int retval; - unsigned blocksize; - struct dx_hash_info hinfo; - u32 block; - struct fake_dirent *fde; - - blocksize = dir->i_sb->s_blocksize; - dxtrace(printk(KERN_DEBUG "Creating index: inode %lu\n", dir->i_ino)); - retval = ext3_journal_get_write_access(handle, bh); - if (retval) { - ext3_std_error(dir->i_sb, retval); - brelse(bh); - return retval; - } - root = (struct dx_root *) bh->b_data; - - /* The 0th block becomes the root, move the dirents out */ - fde = &root->dotdot; - de = (struct ext3_dir_entry_2 *)((char *)fde + - ext3_rec_len_from_disk(fde->rec_len)); - if ((char *) de >= (((char *) root) + blocksize)) { - ext3_error(dir->i_sb, __func__, - "invalid rec_len for '..' in inode %lu", - dir->i_ino); - brelse(bh); - return -EIO; - } - len = ((char *) root) + blocksize - (char *) de; - - bh2 = ext3_append (handle, dir, &block, &retval); - if (!(bh2)) { - brelse(bh); - return retval; - } - EXT3_I(dir)->i_flags |= EXT3_INDEX_FL; - data1 = bh2->b_data; - - memcpy (data1, de, len); - de = (struct ext3_dir_entry_2 *) data1; - top = data1 + len; - while ((char *)(de2 = ext3_next_entry(de)) < top) - de = de2; - de->rec_len = ext3_rec_len_to_disk(data1 + blocksize - (char *) de); - /* Initialize the root; the dot dirents already exist */ - de = (struct ext3_dir_entry_2 *) (&root->dotdot); - de->rec_len = ext3_rec_len_to_disk(blocksize - EXT3_DIR_REC_LEN(2)); - memset (&root->info, 0, sizeof(root->info)); - root->info.info_length = sizeof(root->info); - root->info.hash_version = EXT3_SB(dir->i_sb)->s_def_hash_version; - entries = root->entries; - dx_set_block (entries, 1); - dx_set_count (entries, 1); - dx_set_limit (entries, dx_root_limit(dir, sizeof(root->info))); - - /* Initialize as for dx_probe */ - hinfo.hash_version = root->info.hash_version; - if (hinfo.hash_version <= DX_HASH_TEA) - hinfo.hash_version += EXT3_SB(dir->i_sb)->s_hash_unsigned; - hinfo.seed = EXT3_SB(dir->i_sb)->s_hash_seed; - ext3fs_dirhash(name, namelen, &hinfo); - frame = frames; - frame->entries = entries; - frame->at = entries; - frame->bh = bh; - bh = bh2; - /* - * Mark buffers dirty here so that if do_split() fails we write a - * consistent set of buffers to disk. - */ - ext3_journal_dirty_metadata(handle, frame->bh); - ext3_journal_dirty_metadata(handle, bh); - de = do_split(handle,dir, &bh, frame, &hinfo, &retval); - if (!de) { - ext3_mark_inode_dirty(handle, dir); - dx_release(frames); - return retval; - } - dx_release(frames); - - return add_dirent_to_buf(handle, dentry, inode, de, bh); -} - -/* - * ext3_add_entry() - * - * adds a file entry to the specified directory, using the same - * semantics as ext3_find_entry(). It returns NULL if it failed. - * - * NOTE!! The inode part of 'de' is left at 0 - which means you - * may not sleep between calling this and putting something into - * the entry, as someone else might have used it while you slept. - */ -static int ext3_add_entry (handle_t *handle, struct dentry *dentry, - struct inode *inode) -{ - struct inode *dir = d_inode(dentry->d_parent); - struct buffer_head * bh; - struct ext3_dir_entry_2 *de; - struct super_block * sb; - int retval; - int dx_fallback=0; - unsigned blocksize; - u32 block, blocks; - - sb = dir->i_sb; - blocksize = sb->s_blocksize; - if (!dentry->d_name.len) - return -EINVAL; - if (is_dx(dir)) { - retval = ext3_dx_add_entry(handle, dentry, inode); - if (!retval || (retval != ERR_BAD_DX_DIR)) - return retval; - EXT3_I(dir)->i_flags &= ~EXT3_INDEX_FL; - dx_fallback++; - ext3_mark_inode_dirty(handle, dir); - } - blocks = dir->i_size >> sb->s_blocksize_bits; - for (block = 0; block < blocks; block++) { - if (!(bh = ext3_dir_bread(handle, dir, block, 0, &retval))) - return retval; - - retval = add_dirent_to_buf(handle, dentry, inode, NULL, bh); - if (retval != -ENOSPC) - return retval; - - if (blocks == 1 && !dx_fallback && - EXT3_HAS_COMPAT_FEATURE(sb, EXT3_FEATURE_COMPAT_DIR_INDEX)) - return make_indexed_dir(handle, dentry, inode, bh); - brelse(bh); - } - bh = ext3_append(handle, dir, &block, &retval); - if (!bh) - return retval; - de = (struct ext3_dir_entry_2 *) bh->b_data; - de->inode = 0; - de->rec_len = ext3_rec_len_to_disk(blocksize); - return add_dirent_to_buf(handle, dentry, inode, de, bh); -} - -/* - * Returns 0 for success, or a negative error value - */ -static int ext3_dx_add_entry(handle_t *handle, struct dentry *dentry, - struct inode *inode) -{ - struct dx_frame frames[2], *frame; - struct dx_entry *entries, *at; - struct dx_hash_info hinfo; - struct buffer_head * bh; - struct inode *dir = d_inode(dentry->d_parent); - struct super_block * sb = dir->i_sb; - struct ext3_dir_entry_2 *de; - int err; - - frame = dx_probe(&dentry->d_name, dir, &hinfo, frames, &err); - if (!frame) - return err; - entries = frame->entries; - at = frame->at; - - if (!(bh = ext3_dir_bread(handle, dir, dx_get_block(frame->at), 0, &err))) - goto cleanup; - - BUFFER_TRACE(bh, "get_write_access"); - err = ext3_journal_get_write_access(handle, bh); - if (err) - goto journal_error; - - err = add_dirent_to_buf(handle, dentry, inode, NULL, bh); - if (err != -ENOSPC) { - bh = NULL; - goto cleanup; - } - - /* Block full, should compress but for now just split */ - dxtrace(printk("using %u of %u node entries\n", - dx_get_count(entries), dx_get_limit(entries))); - /* Need to split index? */ - if (dx_get_count(entries) == dx_get_limit(entries)) { - u32 newblock; - unsigned icount = dx_get_count(entries); - int levels = frame - frames; - struct dx_entry *entries2; - struct dx_node *node2; - struct buffer_head *bh2; - - if (levels && (dx_get_count(frames->entries) == - dx_get_limit(frames->entries))) { - ext3_warning(sb, __func__, - "Directory index full!"); - err = -ENOSPC; - goto cleanup; - } - bh2 = ext3_append (handle, dir, &newblock, &err); - if (!(bh2)) - goto cleanup; - node2 = (struct dx_node *)(bh2->b_data); - entries2 = node2->entries; - memset(&node2->fake, 0, sizeof(struct fake_dirent)); - node2->fake.rec_len = ext3_rec_len_to_disk(sb->s_blocksize); - BUFFER_TRACE(frame->bh, "get_write_access"); - err = ext3_journal_get_write_access(handle, frame->bh); - if (err) - goto journal_error; - if (levels) { - unsigned icount1 = icount/2, icount2 = icount - icount1; - unsigned hash2 = dx_get_hash(entries + icount1); - dxtrace(printk("Split index %i/%i\n", icount1, icount2)); - - BUFFER_TRACE(frame->bh, "get_write_access"); /* index root */ - err = ext3_journal_get_write_access(handle, - frames[0].bh); - if (err) - goto journal_error; - - memcpy ((char *) entries2, (char *) (entries + icount1), - icount2 * sizeof(struct dx_entry)); - dx_set_count (entries, icount1); - dx_set_count (entries2, icount2); - dx_set_limit (entries2, dx_node_limit(dir)); - - /* Which index block gets the new entry? */ - if (at - entries >= icount1) { - frame->at = at = at - entries - icount1 + entries2; - frame->entries = entries = entries2; - swap(frame->bh, bh2); - } - dx_insert_block (frames + 0, hash2, newblock); - dxtrace(dx_show_index ("node", frames[1].entries)); - dxtrace(dx_show_index ("node", - ((struct dx_node *) bh2->b_data)->entries)); - err = ext3_journal_dirty_metadata(handle, bh2); - if (err) - goto journal_error; - brelse (bh2); - } else { - dxtrace(printk("Creating second level index...\n")); - memcpy((char *) entries2, (char *) entries, - icount * sizeof(struct dx_entry)); - dx_set_limit(entries2, dx_node_limit(dir)); - - /* Set up root */ - dx_set_count(entries, 1); - dx_set_block(entries + 0, newblock); - ((struct dx_root *) frames[0].bh->b_data)->info.indirect_levels = 1; - - /* Add new access path frame */ - frame = frames + 1; - frame->at = at = at - entries + entries2; - frame->entries = entries = entries2; - frame->bh = bh2; - err = ext3_journal_get_write_access(handle, - frame->bh); - if (err) - goto journal_error; - } - err = ext3_journal_dirty_metadata(handle, frames[0].bh); - if (err) - goto journal_error; - } - de = do_split(handle, dir, &bh, frame, &hinfo, &err); - if (!de) - goto cleanup; - err = add_dirent_to_buf(handle, dentry, inode, de, bh); - bh = NULL; - goto cleanup; - -journal_error: - ext3_std_error(dir->i_sb, err); -cleanup: - if (bh) - brelse(bh); - dx_release(frames); - return err; -} - -/* - * ext3_delete_entry deletes a directory entry by merging it with the - * previous entry - */ -static int ext3_delete_entry (handle_t *handle, - struct inode * dir, - struct ext3_dir_entry_2 * de_del, - struct buffer_head * bh) -{ - struct ext3_dir_entry_2 * de, * pde; - int i; - - i = 0; - pde = NULL; - de = (struct ext3_dir_entry_2 *) bh->b_data; - while (i < bh->b_size) { - if (!ext3_check_dir_entry("ext3_delete_entry", dir, de, bh, i)) - return -EIO; - if (de == de_del) { - int err; - - BUFFER_TRACE(bh, "get_write_access"); - err = ext3_journal_get_write_access(handle, bh); - if (err) - goto journal_error; - - if (pde) - pde->rec_len = ext3_rec_len_to_disk( - ext3_rec_len_from_disk(pde->rec_len) + - ext3_rec_len_from_disk(de->rec_len)); - else - de->inode = 0; - dir->i_version++; - BUFFER_TRACE(bh, "call ext3_journal_dirty_metadata"); - err = ext3_journal_dirty_metadata(handle, bh); - if (err) { -journal_error: - ext3_std_error(dir->i_sb, err); - return err; - } - return 0; - } - i += ext3_rec_len_from_disk(de->rec_len); - pde = de; - de = ext3_next_entry(de); - } - return -ENOENT; -} - -static int ext3_add_nondir(handle_t *handle, - struct dentry *dentry, struct inode *inode) -{ - int err = ext3_add_entry(handle, dentry, inode); - if (!err) { - ext3_mark_inode_dirty(handle, inode); - unlock_new_inode(inode); - d_instantiate(dentry, inode); - return 0; - } - drop_nlink(inode); - unlock_new_inode(inode); - iput(inode); - return err; -} - -/* - * By the time this is called, we already have created - * the directory cache entry for the new file, but it - * is so far negative - it has no inode. - * - * If the create succeeds, we fill in the inode information - * with d_instantiate(). - */ -static int ext3_create (struct inode * dir, struct dentry * dentry, umode_t mode, - bool excl) -{ - handle_t *handle; - struct inode * inode; - int err, retries = 0; - - dquot_initialize(dir); - -retry: - handle = ext3_journal_start(dir, EXT3_DATA_TRANS_BLOCKS(dir->i_sb) + - EXT3_INDEX_EXTRA_TRANS_BLOCKS + 3 + - EXT3_MAXQUOTAS_INIT_BLOCKS(dir->i_sb)); - if (IS_ERR(handle)) - return PTR_ERR(handle); - - if (IS_DIRSYNC(dir)) - handle->h_sync = 1; - - inode = ext3_new_inode (handle, dir, &dentry->d_name, mode); - err = PTR_ERR(inode); - if (!IS_ERR(inode)) { - inode->i_op = &ext3_file_inode_operations; - inode->i_fop = &ext3_file_operations; - ext3_set_aops(inode); - err = ext3_add_nondir(handle, dentry, inode); - } - ext3_journal_stop(handle); - if (err == -ENOSPC && ext3_should_retry_alloc(dir->i_sb, &retries)) - goto retry; - return err; -} - -static int ext3_mknod (struct inode * dir, struct dentry *dentry, - umode_t mode, dev_t rdev) -{ - handle_t *handle; - struct inode *inode; - int err, retries = 0; - - if (!new_valid_dev(rdev)) - return -EINVAL; - - dquot_initialize(dir); - -retry: - handle = ext3_journal_start(dir, EXT3_DATA_TRANS_BLOCKS(dir->i_sb) + - EXT3_INDEX_EXTRA_TRANS_BLOCKS + 3 + - EXT3_MAXQUOTAS_INIT_BLOCKS(dir->i_sb)); - if (IS_ERR(handle)) - return PTR_ERR(handle); - - if (IS_DIRSYNC(dir)) - handle->h_sync = 1; - - inode = ext3_new_inode (handle, dir, &dentry->d_name, mode); - err = PTR_ERR(inode); - if (!IS_ERR(inode)) { - init_special_inode(inode, inode->i_mode, rdev); -#ifdef CONFIG_EXT3_FS_XATTR - inode->i_op = &ext3_special_inode_operations; -#endif - err = ext3_add_nondir(handle, dentry, inode); - } - ext3_journal_stop(handle); - if (err == -ENOSPC && ext3_should_retry_alloc(dir->i_sb, &retries)) - goto retry; - return err; -} - -static int ext3_tmpfile(struct inode *dir, struct dentry *dentry, umode_t mode) -{ - handle_t *handle; - struct inode *inode; - int err, retries = 0; - - dquot_initialize(dir); - -retry: - handle = ext3_journal_start(dir, EXT3_MAXQUOTAS_INIT_BLOCKS(dir->i_sb) + - 4 + EXT3_XATTR_TRANS_BLOCKS); - - if (IS_ERR(handle)) - return PTR_ERR(handle); - - inode = ext3_new_inode (handle, dir, NULL, mode); - err = PTR_ERR(inode); - if (!IS_ERR(inode)) { - inode->i_op = &ext3_file_inode_operations; - inode->i_fop = &ext3_file_operations; - ext3_set_aops(inode); - d_tmpfile(dentry, inode); - err = ext3_orphan_add(handle, inode); - if (err) - goto err_unlock_inode; - mark_inode_dirty(inode); - unlock_new_inode(inode); - } - ext3_journal_stop(handle); - if (err == -ENOSPC && ext3_should_retry_alloc(dir->i_sb, &retries)) - goto retry; - return err; -err_unlock_inode: - ext3_journal_stop(handle); - unlock_new_inode(inode); - return err; -} - -static int ext3_mkdir(struct inode * dir, struct dentry * dentry, umode_t mode) -{ - handle_t *handle; - struct inode * inode; - struct buffer_head * dir_block = NULL; - struct ext3_dir_entry_2 * de; - int err, retries = 0; - - if (dir->i_nlink >= EXT3_LINK_MAX) - return -EMLINK; - - dquot_initialize(dir); - -retry: - handle = ext3_journal_start(dir, EXT3_DATA_TRANS_BLOCKS(dir->i_sb) + - EXT3_INDEX_EXTRA_TRANS_BLOCKS + 3 + - EXT3_MAXQUOTAS_INIT_BLOCKS(dir->i_sb)); - if (IS_ERR(handle)) - return PTR_ERR(handle); - - if (IS_DIRSYNC(dir)) - handle->h_sync = 1; - - inode = ext3_new_inode (handle, dir, &dentry->d_name, S_IFDIR | mode); - err = PTR_ERR(inode); - if (IS_ERR(inode)) - goto out_stop; - - inode->i_op = &ext3_dir_inode_operations; - inode->i_fop = &ext3_dir_operations; - inode->i_size = EXT3_I(inode)->i_disksize = inode->i_sb->s_blocksize; - if (!(dir_block = ext3_dir_bread(handle, inode, 0, 1, &err))) - goto out_clear_inode; - - BUFFER_TRACE(dir_block, "get_write_access"); - err = ext3_journal_get_write_access(handle, dir_block); - if (err) - goto out_clear_inode; - - de = (struct ext3_dir_entry_2 *) dir_block->b_data; - de->inode = cpu_to_le32(inode->i_ino); - de->name_len = 1; - de->rec_len = ext3_rec_len_to_disk(EXT3_DIR_REC_LEN(de->name_len)); - strcpy (de->name, "."); - ext3_set_de_type(dir->i_sb, de, S_IFDIR); - de = ext3_next_entry(de); - de->inode = cpu_to_le32(dir->i_ino); - de->rec_len = ext3_rec_len_to_disk(inode->i_sb->s_blocksize - - EXT3_DIR_REC_LEN(1)); - de->name_len = 2; - strcpy (de->name, ".."); - ext3_set_de_type(dir->i_sb, de, S_IFDIR); - set_nlink(inode, 2); - BUFFER_TRACE(dir_block, "call ext3_journal_dirty_metadata"); - err = ext3_journal_dirty_metadata(handle, dir_block); - if (err) - goto out_clear_inode; - - err = ext3_mark_inode_dirty(handle, inode); - if (!err) - err = ext3_add_entry (handle, dentry, inode); - - if (err) { -out_clear_inode: - clear_nlink(inode); - unlock_new_inode(inode); - ext3_mark_inode_dirty(handle, inode); - iput (inode); - goto out_stop; - } - inc_nlink(dir); - ext3_update_dx_flag(dir); - err = ext3_mark_inode_dirty(handle, dir); - if (err) - goto out_clear_inode; - - unlock_new_inode(inode); - d_instantiate(dentry, inode); -out_stop: - brelse(dir_block); - ext3_journal_stop(handle); - if (err == -ENOSPC && ext3_should_retry_alloc(dir->i_sb, &retries)) - goto retry; - return err; -} - -/* - * routine to check that the specified directory is empty (for rmdir) - */ -static int empty_dir (struct inode * inode) -{ - unsigned long offset; - struct buffer_head * bh; - struct ext3_dir_entry_2 * de, * de1; - struct super_block * sb; - int err = 0; - - sb = inode->i_sb; - if (inode->i_size < EXT3_DIR_REC_LEN(1) + EXT3_DIR_REC_LEN(2) || - !(bh = ext3_dir_bread(NULL, inode, 0, 0, &err))) { - if (err) - ext3_error(inode->i_sb, __func__, - "error %d reading directory #%lu offset 0", - err, inode->i_ino); - else - ext3_warning(inode->i_sb, __func__, - "bad directory (dir #%lu) - no data block", - inode->i_ino); - return 1; - } - de = (struct ext3_dir_entry_2 *) bh->b_data; - de1 = ext3_next_entry(de); - if (le32_to_cpu(de->inode) != inode->i_ino || - !le32_to_cpu(de1->inode) || - strcmp (".", de->name) || - strcmp ("..", de1->name)) { - ext3_warning (inode->i_sb, "empty_dir", - "bad directory (dir #%lu) - no `.' or `..'", - inode->i_ino); - brelse (bh); - return 1; - } - offset = ext3_rec_len_from_disk(de->rec_len) + - ext3_rec_len_from_disk(de1->rec_len); - de = ext3_next_entry(de1); - while (offset < inode->i_size ) { - if (!bh || - (void *) de >= (void *) (bh->b_data+sb->s_blocksize)) { - err = 0; - brelse (bh); - if (!(bh = ext3_dir_bread (NULL, inode, - offset >> EXT3_BLOCK_SIZE_BITS(sb), 0, &err))) { - if (err) - ext3_error(sb, __func__, - "error %d reading directory" - " #%lu offset %lu", - err, inode->i_ino, offset); - offset += sb->s_blocksize; - continue; - } - de = (struct ext3_dir_entry_2 *) bh->b_data; - } - if (!ext3_check_dir_entry("empty_dir", inode, de, bh, offset)) { - de = (struct ext3_dir_entry_2 *)(bh->b_data + - sb->s_blocksize); - offset = (offset | (sb->s_blocksize - 1)) + 1; - continue; - } - if (le32_to_cpu(de->inode)) { - brelse (bh); - return 0; - } - offset += ext3_rec_len_from_disk(de->rec_len); - de = ext3_next_entry(de); - } - brelse (bh); - return 1; -} - -/* ext3_orphan_add() links an unlinked or truncated inode into a list of - * such inodes, starting at the superblock, in case we crash before the - * file is closed/deleted, or in case the inode truncate spans multiple - * transactions and the last transaction is not recovered after a crash. - * - * At filesystem recovery time, we walk this list deleting unlinked - * inodes and truncating linked inodes in ext3_orphan_cleanup(). - */ -int ext3_orphan_add(handle_t *handle, struct inode *inode) -{ - struct super_block *sb = inode->i_sb; - struct ext3_iloc iloc; - int err = 0, rc; - - mutex_lock(&EXT3_SB(sb)->s_orphan_lock); - if (!list_empty(&EXT3_I(inode)->i_orphan)) - goto out_unlock; - - /* Orphan handling is only valid for files with data blocks - * being truncated, or files being unlinked. */ - - /* @@@ FIXME: Observation from aviro: - * I think I can trigger J_ASSERT in ext3_orphan_add(). We block - * here (on s_orphan_lock), so race with ext3_link() which might bump - * ->i_nlink. For, say it, character device. Not a regular file, - * not a directory, not a symlink and ->i_nlink > 0. - * - * tytso, 4/25/2009: I'm not sure how that could happen; - * shouldn't the fs core protect us from these sort of - * unlink()/link() races? - */ - J_ASSERT ((S_ISREG(inode->i_mode) || S_ISDIR(inode->i_mode) || - S_ISLNK(inode->i_mode)) || inode->i_nlink == 0); - - BUFFER_TRACE(EXT3_SB(sb)->s_sbh, "get_write_access"); - err = ext3_journal_get_write_access(handle, EXT3_SB(sb)->s_sbh); - if (err) - goto out_unlock; - - err = ext3_reserve_inode_write(handle, inode, &iloc); - if (err) - goto out_unlock; - - /* Insert this inode at the head of the on-disk orphan list... */ - NEXT_ORPHAN(inode) = le32_to_cpu(EXT3_SB(sb)->s_es->s_last_orphan); - EXT3_SB(sb)->s_es->s_last_orphan = cpu_to_le32(inode->i_ino); - err = ext3_journal_dirty_metadata(handle, EXT3_SB(sb)->s_sbh); - rc = ext3_mark_iloc_dirty(handle, inode, &iloc); - if (!err) - err = rc; - - /* Only add to the head of the in-memory list if all the - * previous operations succeeded. If the orphan_add is going to - * fail (possibly taking the journal offline), we can't risk - * leaving the inode on the orphan list: stray orphan-list - * entries can cause panics at unmount time. - * - * This is safe: on error we're going to ignore the orphan list - * anyway on the next recovery. */ - if (!err) - list_add(&EXT3_I(inode)->i_orphan, &EXT3_SB(sb)->s_orphan); - - jbd_debug(4, "superblock will point to %lu\n", inode->i_ino); - jbd_debug(4, "orphan inode %lu will point to %d\n", - inode->i_ino, NEXT_ORPHAN(inode)); -out_unlock: - mutex_unlock(&EXT3_SB(sb)->s_orphan_lock); - ext3_std_error(inode->i_sb, err); - return err; -} - -/* - * ext3_orphan_del() removes an unlinked or truncated inode from the list - * of such inodes stored on disk, because it is finally being cleaned up. - */ -int ext3_orphan_del(handle_t *handle, struct inode *inode) -{ - struct list_head *prev; - struct ext3_inode_info *ei = EXT3_I(inode); - struct ext3_sb_info *sbi; - unsigned long ino_next; - struct ext3_iloc iloc; - int err = 0; - - mutex_lock(&EXT3_SB(inode->i_sb)->s_orphan_lock); - if (list_empty(&ei->i_orphan)) - goto out; - - ino_next = NEXT_ORPHAN(inode); - prev = ei->i_orphan.prev; - sbi = EXT3_SB(inode->i_sb); - - jbd_debug(4, "remove inode %lu from orphan list\n", inode->i_ino); - - list_del_init(&ei->i_orphan); - - /* If we're on an error path, we may not have a valid - * transaction handle with which to update the orphan list on - * disk, but we still need to remove the inode from the linked - * list in memory. */ - if (!handle) - goto out; - - err = ext3_reserve_inode_write(handle, inode, &iloc); - if (err) - goto out_err; - - if (prev == &sbi->s_orphan) { - jbd_debug(4, "superblock will point to %lu\n", ino_next); - BUFFER_TRACE(sbi->s_sbh, "get_write_access"); - err = ext3_journal_get_write_access(handle, sbi->s_sbh); - if (err) - goto out_brelse; - sbi->s_es->s_last_orphan = cpu_to_le32(ino_next); - err = ext3_journal_dirty_metadata(handle, sbi->s_sbh); - } else { - struct ext3_iloc iloc2; - struct inode *i_prev = - &list_entry(prev, struct ext3_inode_info, i_orphan)->vfs_inode; - - jbd_debug(4, "orphan inode %lu will point to %lu\n", - i_prev->i_ino, ino_next); - err = ext3_reserve_inode_write(handle, i_prev, &iloc2); - if (err) - goto out_brelse; - NEXT_ORPHAN(i_prev) = ino_next; - err = ext3_mark_iloc_dirty(handle, i_prev, &iloc2); - } - if (err) - goto out_brelse; - NEXT_ORPHAN(inode) = 0; - err = ext3_mark_iloc_dirty(handle, inode, &iloc); - -out_err: - ext3_std_error(inode->i_sb, err); -out: - mutex_unlock(&EXT3_SB(inode->i_sb)->s_orphan_lock); - return err; - -out_brelse: - brelse(iloc.bh); - goto out_err; -} - -static int ext3_rmdir (struct inode * dir, struct dentry *dentry) -{ - int retval; - struct inode * inode; - struct buffer_head * bh; - struct ext3_dir_entry_2 * de; - handle_t *handle; - - /* Initialize quotas before so that eventual writes go in - * separate transaction */ - dquot_initialize(dir); - dquot_initialize(d_inode(dentry)); - - handle = ext3_journal_start(dir, EXT3_DELETE_TRANS_BLOCKS(dir->i_sb)); - if (IS_ERR(handle)) - return PTR_ERR(handle); - - retval = -ENOENT; - bh = ext3_find_entry(dir, &dentry->d_name, &de); - if (!bh) - goto end_rmdir; - - if (IS_DIRSYNC(dir)) - handle->h_sync = 1; - - inode = d_inode(dentry); - - retval = -EIO; - if (le32_to_cpu(de->inode) != inode->i_ino) - goto end_rmdir; - - retval = -ENOTEMPTY; - if (!empty_dir (inode)) - goto end_rmdir; - - retval = ext3_delete_entry(handle, dir, de, bh); - if (retval) - goto end_rmdir; - if (inode->i_nlink != 2) - ext3_warning (inode->i_sb, "ext3_rmdir", - "empty directory has nlink!=2 (%d)", - inode->i_nlink); - inode->i_version++; - clear_nlink(inode); - /* There's no need to set i_disksize: the fact that i_nlink is - * zero will ensure that the right thing happens during any - * recovery. */ - inode->i_size = 0; - ext3_orphan_add(handle, inode); - inode->i_ctime = dir->i_ctime = dir->i_mtime = CURRENT_TIME_SEC; - ext3_mark_inode_dirty(handle, inode); - drop_nlink(dir); - ext3_update_dx_flag(dir); - ext3_mark_inode_dirty(handle, dir); - -end_rmdir: - ext3_journal_stop(handle); - brelse (bh); - return retval; -} - -static int ext3_unlink(struct inode * dir, struct dentry *dentry) -{ - int retval; - struct inode * inode; - struct buffer_head * bh; - struct ext3_dir_entry_2 * de; - handle_t *handle; - - trace_ext3_unlink_enter(dir, dentry); - /* Initialize quotas before so that eventual writes go - * in separate transaction */ - dquot_initialize(dir); - dquot_initialize(d_inode(dentry)); - - handle = ext3_journal_start(dir, EXT3_DELETE_TRANS_BLOCKS(dir->i_sb)); - if (IS_ERR(handle)) - return PTR_ERR(handle); - - if (IS_DIRSYNC(dir)) - handle->h_sync = 1; - - retval = -ENOENT; - bh = ext3_find_entry(dir, &dentry->d_name, &de); - if (!bh) - goto end_unlink; - - inode = d_inode(dentry); - - retval = -EIO; - if (le32_to_cpu(de->inode) != inode->i_ino) - goto end_unlink; - - if (!inode->i_nlink) { - ext3_warning (inode->i_sb, "ext3_unlink", - "Deleting nonexistent file (%lu), %d", - inode->i_ino, inode->i_nlink); - set_nlink(inode, 1); - } - retval = ext3_delete_entry(handle, dir, de, bh); - if (retval) - goto end_unlink; - dir->i_ctime = dir->i_mtime = CURRENT_TIME_SEC; - ext3_update_dx_flag(dir); - ext3_mark_inode_dirty(handle, dir); - drop_nlink(inode); - if (!inode->i_nlink) - ext3_orphan_add(handle, inode); - inode->i_ctime = dir->i_ctime; - ext3_mark_inode_dirty(handle, inode); - retval = 0; - -end_unlink: - ext3_journal_stop(handle); - brelse (bh); - trace_ext3_unlink_exit(dentry, retval); - return retval; -} - -static int ext3_symlink (struct inode * dir, - struct dentry *dentry, const char * symname) -{ - handle_t *handle; - struct inode * inode; - int l, err, retries = 0; - int credits; - - l = strlen(symname)+1; - if (l > dir->i_sb->s_blocksize) - return -ENAMETOOLONG; - - dquot_initialize(dir); - - if (l > EXT3_N_BLOCKS * 4) { - /* - * For non-fast symlinks, we just allocate inode and put it on - * orphan list in the first transaction => we need bitmap, - * group descriptor, sb, inode block, quota blocks, and - * possibly selinux xattr blocks. - */ - credits = 4 + EXT3_MAXQUOTAS_INIT_BLOCKS(dir->i_sb) + - EXT3_XATTR_TRANS_BLOCKS; - } else { - /* - * Fast symlink. We have to add entry to directory - * (EXT3_DATA_TRANS_BLOCKS + EXT3_INDEX_EXTRA_TRANS_BLOCKS), - * allocate new inode (bitmap, group descriptor, inode block, - * quota blocks, sb is already counted in previous macros). - */ - credits = EXT3_DATA_TRANS_BLOCKS(dir->i_sb) + - EXT3_INDEX_EXTRA_TRANS_BLOCKS + 3 + - EXT3_MAXQUOTAS_INIT_BLOCKS(dir->i_sb); - } -retry: - handle = ext3_journal_start(dir, credits); - if (IS_ERR(handle)) - return PTR_ERR(handle); - - if (IS_DIRSYNC(dir)) - handle->h_sync = 1; - - inode = ext3_new_inode (handle, dir, &dentry->d_name, S_IFLNK|S_IRWXUGO); - err = PTR_ERR(inode); - if (IS_ERR(inode)) - goto out_stop; - - if (l > EXT3_N_BLOCKS * 4) { - inode->i_op = &ext3_symlink_inode_operations; - ext3_set_aops(inode); - /* - * We cannot call page_symlink() with transaction started - * because it calls into ext3_write_begin() which acquires page - * lock which ranks below transaction start (and it can also - * wait for journal commit if we are running out of space). So - * we have to stop transaction now and restart it when symlink - * contents is written. - * - * To keep fs consistent in case of crash, we have to put inode - * to orphan list in the mean time. - */ - drop_nlink(inode); - err = ext3_orphan_add(handle, inode); - ext3_journal_stop(handle); - if (err) - goto err_drop_inode; - err = __page_symlink(inode, symname, l, 1); - if (err) - goto err_drop_inode; - /* - * Now inode is being linked into dir (EXT3_DATA_TRANS_BLOCKS - * + EXT3_INDEX_EXTRA_TRANS_BLOCKS), inode is also modified - */ - handle = ext3_journal_start(dir, - EXT3_DATA_TRANS_BLOCKS(dir->i_sb) + - EXT3_INDEX_EXTRA_TRANS_BLOCKS + 1); - if (IS_ERR(handle)) { - err = PTR_ERR(handle); - goto err_drop_inode; - } - set_nlink(inode, 1); - err = ext3_orphan_del(handle, inode); - if (err) { - ext3_journal_stop(handle); - drop_nlink(inode); - goto err_drop_inode; - } - } else { - inode->i_op = &ext3_fast_symlink_inode_operations; - inode->i_link = (char*)&EXT3_I(inode)->i_data; - memcpy(inode->i_link, symname, l); - inode->i_size = l-1; - } - EXT3_I(inode)->i_disksize = inode->i_size; - err = ext3_add_nondir(handle, dentry, inode); -out_stop: - ext3_journal_stop(handle); - if (err == -ENOSPC && ext3_should_retry_alloc(dir->i_sb, &retries)) - goto retry; - return err; -err_drop_inode: - unlock_new_inode(inode); - iput(inode); - return err; -} - -static int ext3_link (struct dentry * old_dentry, - struct inode * dir, struct dentry *dentry) -{ - handle_t *handle; - struct inode *inode = d_inode(old_dentry); - int err, retries = 0; - - if (inode->i_nlink >= EXT3_LINK_MAX) - return -EMLINK; - - dquot_initialize(dir); - -retry: - handle = ext3_journal_start(dir, EXT3_DATA_TRANS_BLOCKS(dir->i_sb) + - EXT3_INDEX_EXTRA_TRANS_BLOCKS + 1); - if (IS_ERR(handle)) - return PTR_ERR(handle); - - if (IS_DIRSYNC(dir)) - handle->h_sync = 1; - - inode->i_ctime = CURRENT_TIME_SEC; - inc_nlink(inode); - ihold(inode); - - err = ext3_add_entry(handle, dentry, inode); - if (!err) { - ext3_mark_inode_dirty(handle, inode); - /* this can happen only for tmpfile being - * linked the first time - */ - if (inode->i_nlink == 1) - ext3_orphan_del(handle, inode); - d_instantiate(dentry, inode); - } else { - drop_nlink(inode); - iput(inode); - } - ext3_journal_stop(handle); - if (err == -ENOSPC && ext3_should_retry_alloc(dir->i_sb, &retries)) - goto retry; - return err; -} - -#define PARENT_INO(buffer) \ - (ext3_next_entry((struct ext3_dir_entry_2 *)(buffer))->inode) - -/* - * Anybody can rename anything with this: the permission checks are left to the - * higher-level routines. - */ -static int ext3_rename (struct inode * old_dir, struct dentry *old_dentry, - struct inode * new_dir,struct dentry *new_dentry) -{ - handle_t *handle; - struct inode * old_inode, * new_inode; - struct buffer_head * old_bh, * new_bh, * dir_bh; - struct ext3_dir_entry_2 * old_de, * new_de; - int retval, flush_file = 0; - - dquot_initialize(old_dir); - dquot_initialize(new_dir); - - old_bh = new_bh = dir_bh = NULL; - - /* Initialize quotas before so that eventual writes go - * in separate transaction */ - if (d_really_is_positive(new_dentry)) - dquot_initialize(d_inode(new_dentry)); - handle = ext3_journal_start(old_dir, 2 * - EXT3_DATA_TRANS_BLOCKS(old_dir->i_sb) + - EXT3_INDEX_EXTRA_TRANS_BLOCKS + 2); - if (IS_ERR(handle)) - return PTR_ERR(handle); - - if (IS_DIRSYNC(old_dir) || IS_DIRSYNC(new_dir)) - handle->h_sync = 1; - - old_bh = ext3_find_entry(old_dir, &old_dentry->d_name, &old_de); - /* - * Check for inode number is _not_ due to possible IO errors. - * We might rmdir the source, keep it as pwd of some process - * and merrily kill the link to whatever was created under the - * same name. Goodbye sticky bit ;-< - */ - old_inode = d_inode(old_dentry); - retval = -ENOENT; - if (!old_bh || le32_to_cpu(old_de->inode) != old_inode->i_ino) - goto end_rename; - - new_inode = d_inode(new_dentry); - new_bh = ext3_find_entry(new_dir, &new_dentry->d_name, &new_de); - if (new_bh) { - if (!new_inode) { - brelse (new_bh); - new_bh = NULL; - } - } - if (S_ISDIR(old_inode->i_mode)) { - if (new_inode) { - retval = -ENOTEMPTY; - if (!empty_dir (new_inode)) - goto end_rename; - } - retval = -EIO; - dir_bh = ext3_dir_bread(handle, old_inode, 0, 0, &retval); - if (!dir_bh) - goto end_rename; - if (le32_to_cpu(PARENT_INO(dir_bh->b_data)) != old_dir->i_ino) - goto end_rename; - retval = -EMLINK; - if (!new_inode && new_dir!=old_dir && - new_dir->i_nlink >= EXT3_LINK_MAX) - goto end_rename; - } - if (!new_bh) { - retval = ext3_add_entry (handle, new_dentry, old_inode); - if (retval) - goto end_rename; - } else { - BUFFER_TRACE(new_bh, "get write access"); - retval = ext3_journal_get_write_access(handle, new_bh); - if (retval) - goto journal_error; - new_de->inode = cpu_to_le32(old_inode->i_ino); - if (EXT3_HAS_INCOMPAT_FEATURE(new_dir->i_sb, - EXT3_FEATURE_INCOMPAT_FILETYPE)) - new_de->file_type = old_de->file_type; - new_dir->i_version++; - new_dir->i_ctime = new_dir->i_mtime = CURRENT_TIME_SEC; - ext3_mark_inode_dirty(handle, new_dir); - BUFFER_TRACE(new_bh, "call ext3_journal_dirty_metadata"); - retval = ext3_journal_dirty_metadata(handle, new_bh); - if (retval) - goto journal_error; - brelse(new_bh); - new_bh = NULL; - } - - /* - * Like most other Unix systems, set the ctime for inodes on a - * rename. - */ - old_inode->i_ctime = CURRENT_TIME_SEC; - ext3_mark_inode_dirty(handle, old_inode); - - /* - * ok, that's it - */ - if (le32_to_cpu(old_de->inode) != old_inode->i_ino || - old_de->name_len != old_dentry->d_name.len || - strncmp(old_de->name, old_dentry->d_name.name, old_de->name_len) || - (retval = ext3_delete_entry(handle, old_dir, - old_de, old_bh)) == -ENOENT) { - /* old_de could have moved from under us during htree split, so - * make sure that we are deleting the right entry. We might - * also be pointing to a stale entry in the unused part of - * old_bh so just checking inum and the name isn't enough. */ - struct buffer_head *old_bh2; - struct ext3_dir_entry_2 *old_de2; - - old_bh2 = ext3_find_entry(old_dir, &old_dentry->d_name, - &old_de2); - if (old_bh2) { - retval = ext3_delete_entry(handle, old_dir, - old_de2, old_bh2); - brelse(old_bh2); - } - } - if (retval) { - ext3_warning(old_dir->i_sb, "ext3_rename", - "Deleting old file (%lu), %d, error=%d", - old_dir->i_ino, old_dir->i_nlink, retval); - } - - if (new_inode) { - drop_nlink(new_inode); - new_inode->i_ctime = CURRENT_TIME_SEC; - } - old_dir->i_ctime = old_dir->i_mtime = CURRENT_TIME_SEC; - ext3_update_dx_flag(old_dir); - if (dir_bh) { - BUFFER_TRACE(dir_bh, "get_write_access"); - retval = ext3_journal_get_write_access(handle, dir_bh); - if (retval) - goto journal_error; - PARENT_INO(dir_bh->b_data) = cpu_to_le32(new_dir->i_ino); - BUFFER_TRACE(dir_bh, "call ext3_journal_dirty_metadata"); - retval = ext3_journal_dirty_metadata(handle, dir_bh); - if (retval) { -journal_error: - ext3_std_error(new_dir->i_sb, retval); - goto end_rename; - } - drop_nlink(old_dir); - if (new_inode) { - drop_nlink(new_inode); - } else { - inc_nlink(new_dir); - ext3_update_dx_flag(new_dir); - ext3_mark_inode_dirty(handle, new_dir); - } - } - ext3_mark_inode_dirty(handle, old_dir); - if (new_inode) { - ext3_mark_inode_dirty(handle, new_inode); - if (!new_inode->i_nlink) - ext3_orphan_add(handle, new_inode); - if (ext3_should_writeback_data(new_inode)) - flush_file = 1; - } - retval = 0; - -end_rename: - brelse (dir_bh); - brelse (old_bh); - brelse (new_bh); - ext3_journal_stop(handle); - if (retval == 0 && flush_file) - filemap_flush(old_inode->i_mapping); - return retval; -} - -/* - * directories can handle most operations... - */ -const struct inode_operations ext3_dir_inode_operations = { - .create = ext3_create, - .lookup = ext3_lookup, - .link = ext3_link, - .unlink = ext3_unlink, - .symlink = ext3_symlink, - .mkdir = ext3_mkdir, - .rmdir = ext3_rmdir, - .mknod = ext3_mknod, - .tmpfile = ext3_tmpfile, - .rename = ext3_rename, - .setattr = ext3_setattr, -#ifdef CONFIG_EXT3_FS_XATTR - .setxattr = generic_setxattr, - .getxattr = generic_getxattr, - .listxattr = ext3_listxattr, - .removexattr = generic_removexattr, -#endif - .get_acl = ext3_get_acl, - .set_acl = ext3_set_acl, -}; - -const struct inode_operations ext3_special_inode_operations = { - .setattr = ext3_setattr, -#ifdef CONFIG_EXT3_FS_XATTR - .setxattr = generic_setxattr, - .getxattr = generic_getxattr, - .listxattr = ext3_listxattr, - .removexattr = generic_removexattr, -#endif - .get_acl = ext3_get_acl, - .set_acl = ext3_set_acl, -}; diff --git a/fs/ext3/namei.h b/fs/ext3/namei.h deleted file mode 100644 index 46304d8c9f0a..000000000000 --- a/fs/ext3/namei.h +++ /dev/null @@ -1,27 +0,0 @@ -/* linux/fs/ext3/namei.h - * - * Copyright (C) 2005 Simtec Electronics - * Ben Dooks - * -*/ - -extern struct dentry *ext3_get_parent(struct dentry *child); - -static inline struct buffer_head *ext3_dir_bread(handle_t *handle, - struct inode *inode, - int block, int create, - int *err) -{ - struct buffer_head *bh; - - bh = ext3_bread(handle, inode, block, create, err); - - if (!bh && !(*err)) { - *err = -EIO; - ext3_error(inode->i_sb, __func__, - "Directory hole detected on inode %lu\n", - inode->i_ino); - return NULL; - } - return bh; -} diff --git a/fs/ext3/resize.c b/fs/ext3/resize.c deleted file mode 100644 index 27105655502c..000000000000 --- a/fs/ext3/resize.c +++ /dev/null @@ -1,1117 +0,0 @@ -/* - * linux/fs/ext3/resize.c - * - * Support for resizing an ext3 filesystem while it is mounted. - * - * Copyright (C) 2001, 2002 Andreas Dilger - * - * This could probably be made into a module, because it is not often in use. - */ - - -#define EXT3FS_DEBUG - -#include "ext3.h" - - -#define outside(b, first, last) ((b) < (first) || (b) >= (last)) -#define inside(b, first, last) ((b) >= (first) && (b) < (last)) - -static int verify_group_input(struct super_block *sb, - struct ext3_new_group_data *input) -{ - struct ext3_sb_info *sbi = EXT3_SB(sb); - struct ext3_super_block *es = sbi->s_es; - ext3_fsblk_t start = le32_to_cpu(es->s_blocks_count); - ext3_fsblk_t end = start + input->blocks_count; - unsigned group = input->group; - ext3_fsblk_t itend = input->inode_table + sbi->s_itb_per_group; - unsigned overhead = ext3_bg_has_super(sb, group) ? - (1 + ext3_bg_num_gdb(sb, group) + - le16_to_cpu(es->s_reserved_gdt_blocks)) : 0; - ext3_fsblk_t metaend = start + overhead; - struct buffer_head *bh = NULL; - ext3_grpblk_t free_blocks_count; - int err = -EINVAL; - - input->free_blocks_count = free_blocks_count = - input->blocks_count - 2 - overhead - sbi->s_itb_per_group; - - if (test_opt(sb, DEBUG)) - printk(KERN_DEBUG "EXT3-fs: adding %s group %u: %u blocks " - "(%d free, %u reserved)\n", - ext3_bg_has_super(sb, input->group) ? "normal" : - "no-super", input->group, input->blocks_count, - free_blocks_count, input->reserved_blocks); - - if (group != sbi->s_groups_count) - ext3_warning(sb, __func__, - "Cannot add at group %u (only %lu groups)", - input->group, sbi->s_groups_count); - else if ((start - le32_to_cpu(es->s_first_data_block)) % - EXT3_BLOCKS_PER_GROUP(sb)) - ext3_warning(sb, __func__, "Last group not full"); - else if (input->reserved_blocks > input->blocks_count / 5) - ext3_warning(sb, __func__, "Reserved blocks too high (%u)", - input->reserved_blocks); - else if (free_blocks_count < 0) - ext3_warning(sb, __func__, "Bad blocks count %u", - input->blocks_count); - else if (!(bh = sb_bread(sb, end - 1))) - ext3_warning(sb, __func__, - "Cannot read last block ("E3FSBLK")", - end - 1); - else if (outside(input->block_bitmap, start, end)) - ext3_warning(sb, __func__, - "Block bitmap not in group (block %u)", - input->block_bitmap); - else if (outside(input->inode_bitmap, start, end)) - ext3_warning(sb, __func__, - "Inode bitmap not in group (block %u)", - input->inode_bitmap); - else if (outside(input->inode_table, start, end) || - outside(itend - 1, start, end)) - ext3_warning(sb, __func__, - "Inode table not in group (blocks %u-"E3FSBLK")", - input->inode_table, itend - 1); - else if (input->inode_bitmap == input->block_bitmap) - ext3_warning(sb, __func__, - "Block bitmap same as inode bitmap (%u)", - input->block_bitmap); - else if (inside(input->block_bitmap, input->inode_table, itend)) - ext3_warning(sb, __func__, - "Block bitmap (%u) in inode table (%u-"E3FSBLK")", - input->block_bitmap, input->inode_table, itend-1); - else if (inside(input->inode_bitmap, input->inode_table, itend)) - ext3_warning(sb, __func__, - "Inode bitmap (%u) in inode table (%u-"E3FSBLK")", - input->inode_bitmap, input->inode_table, itend-1); - else if (inside(input->block_bitmap, start, metaend)) - ext3_warning(sb, __func__, - "Block bitmap (%u) in GDT table" - " ("E3FSBLK"-"E3FSBLK")", - input->block_bitmap, start, metaend - 1); - else if (inside(input->inode_bitmap, start, metaend)) - ext3_warning(sb, __func__, - "Inode bitmap (%u) in GDT table" - " ("E3FSBLK"-"E3FSBLK")", - input->inode_bitmap, start, metaend - 1); - else if (inside(input->inode_table, start, metaend) || - inside(itend - 1, start, metaend)) - ext3_warning(sb, __func__, - "Inode table (%u-"E3FSBLK") overlaps" - "GDT table ("E3FSBLK"-"E3FSBLK")", - input->inode_table, itend - 1, start, metaend - 1); - else - err = 0; - brelse(bh); - - return err; -} - -static struct buffer_head *bclean(handle_t *handle, struct super_block *sb, - ext3_fsblk_t blk) -{ - struct buffer_head *bh; - int err; - - bh = sb_getblk(sb, blk); - if (unlikely(!bh)) - return ERR_PTR(-ENOMEM); - if ((err = ext3_journal_get_write_access(handle, bh))) { - brelse(bh); - bh = ERR_PTR(err); - } else { - lock_buffer(bh); - memset(bh->b_data, 0, sb->s_blocksize); - set_buffer_uptodate(bh); - unlock_buffer(bh); - } - - return bh; -} - -/* - * To avoid calling the atomic setbit hundreds or thousands of times, we only - * need to use it within a single byte (to ensure we get endianness right). - * We can use memset for the rest of the bitmap as there are no other users. - */ -static void mark_bitmap_end(int start_bit, int end_bit, char *bitmap) -{ - int i; - - if (start_bit >= end_bit) - return; - - ext3_debug("mark end bits +%d through +%d used\n", start_bit, end_bit); - for (i = start_bit; i < ((start_bit + 7) & ~7UL); i++) - ext3_set_bit(i, bitmap); - if (i < end_bit) - memset(bitmap + (i >> 3), 0xff, (end_bit - i) >> 3); -} - -/* - * If we have fewer than thresh credits, extend by EXT3_MAX_TRANS_DATA. - * If that fails, restart the transaction & regain write access for the - * buffer head which is used for block_bitmap modifications. - */ -static int extend_or_restart_transaction(handle_t *handle, int thresh, - struct buffer_head *bh) -{ - int err; - - if (handle->h_buffer_credits >= thresh) - return 0; - - err = ext3_journal_extend(handle, EXT3_MAX_TRANS_DATA); - if (err < 0) - return err; - if (err) { - err = ext3_journal_restart(handle, EXT3_MAX_TRANS_DATA); - if (err) - return err; - err = ext3_journal_get_write_access(handle, bh); - if (err) - return err; - } - - return 0; -} - -/* - * Set up the block and inode bitmaps, and the inode table for the new group. - * This doesn't need to be part of the main transaction, since we are only - * changing blocks outside the actual filesystem. We still do journaling to - * ensure the recovery is correct in case of a failure just after resize. - * If any part of this fails, we simply abort the resize. - */ -static int setup_new_group_blocks(struct super_block *sb, - struct ext3_new_group_data *input) -{ - struct ext3_sb_info *sbi = EXT3_SB(sb); - ext3_fsblk_t start = ext3_group_first_block_no(sb, input->group); - int reserved_gdb = ext3_bg_has_super(sb, input->group) ? - le16_to_cpu(sbi->s_es->s_reserved_gdt_blocks) : 0; - unsigned long gdblocks = ext3_bg_num_gdb(sb, input->group); - struct buffer_head *bh; - handle_t *handle; - ext3_fsblk_t block; - ext3_grpblk_t bit; - int i; - int err = 0, err2; - - /* This transaction may be extended/restarted along the way */ - handle = ext3_journal_start_sb(sb, EXT3_MAX_TRANS_DATA); - - if (IS_ERR(handle)) - return PTR_ERR(handle); - - mutex_lock(&sbi->s_resize_lock); - if (input->group != sbi->s_groups_count) { - err = -EBUSY; - goto exit_journal; - } - - if (IS_ERR(bh = bclean(handle, sb, input->block_bitmap))) { - err = PTR_ERR(bh); - goto exit_journal; - } - - if (ext3_bg_has_super(sb, input->group)) { - ext3_debug("mark backup superblock %#04lx (+0)\n", start); - ext3_set_bit(0, bh->b_data); - } - - /* Copy all of the GDT blocks into the backup in this group */ - for (i = 0, bit = 1, block = start + 1; - i < gdblocks; i++, block++, bit++) { - struct buffer_head *gdb; - - ext3_debug("update backup group %#04lx (+%d)\n", block, bit); - - err = extend_or_restart_transaction(handle, 1, bh); - if (err) - goto exit_bh; - - gdb = sb_getblk(sb, block); - if (unlikely(!gdb)) { - err = -ENOMEM; - goto exit_bh; - } - if ((err = ext3_journal_get_write_access(handle, gdb))) { - brelse(gdb); - goto exit_bh; - } - lock_buffer(gdb); - memcpy(gdb->b_data, sbi->s_group_desc[i]->b_data, gdb->b_size); - set_buffer_uptodate(gdb); - unlock_buffer(gdb); - err = ext3_journal_dirty_metadata(handle, gdb); - if (err) { - brelse(gdb); - goto exit_bh; - } - ext3_set_bit(bit, bh->b_data); - brelse(gdb); - } - - /* Zero out all of the reserved backup group descriptor table blocks */ - for (i = 0, bit = gdblocks + 1, block = start + bit; - i < reserved_gdb; i++, block++, bit++) { - struct buffer_head *gdb; - - ext3_debug("clear reserved block %#04lx (+%d)\n", block, bit); - - err = extend_or_restart_transaction(handle, 1, bh); - if (err) - goto exit_bh; - - if (IS_ERR(gdb = bclean(handle, sb, block))) { - err = PTR_ERR(gdb); - goto exit_bh; - } - err = ext3_journal_dirty_metadata(handle, gdb); - if (err) { - brelse(gdb); - goto exit_bh; - } - ext3_set_bit(bit, bh->b_data); - brelse(gdb); - } - ext3_debug("mark block bitmap %#04x (+%ld)\n", input->block_bitmap, - input->block_bitmap - start); - ext3_set_bit(input->block_bitmap - start, bh->b_data); - ext3_debug("mark inode bitmap %#04x (+%ld)\n", input->inode_bitmap, - input->inode_bitmap - start); - ext3_set_bit(input->inode_bitmap - start, bh->b_data); - - /* Zero out all of the inode table blocks */ - for (i = 0, block = input->inode_table, bit = block - start; - i < sbi->s_itb_per_group; i++, bit++, block++) { - struct buffer_head *it; - - ext3_debug("clear inode block %#04lx (+%d)\n", block, bit); - - err = extend_or_restart_transaction(handle, 1, bh); - if (err) - goto exit_bh; - - if (IS_ERR(it = bclean(handle, sb, block))) { - err = PTR_ERR(it); - goto exit_bh; - } - err = ext3_journal_dirty_metadata(handle, it); - if (err) { - brelse(it); - goto exit_bh; - } - brelse(it); - ext3_set_bit(bit, bh->b_data); - } - - err = extend_or_restart_transaction(handle, 2, bh); - if (err) - goto exit_bh; - - mark_bitmap_end(input->blocks_count, EXT3_BLOCKS_PER_GROUP(sb), - bh->b_data); - err = ext3_journal_dirty_metadata(handle, bh); - if (err) - goto exit_bh; - brelse(bh); - - /* Mark unused entries in inode bitmap used */ - ext3_debug("clear inode bitmap %#04x (+%ld)\n", - input->inode_bitmap, input->inode_bitmap - start); - if (IS_ERR(bh = bclean(handle, sb, input->inode_bitmap))) { - err = PTR_ERR(bh); - goto exit_journal; - } - - mark_bitmap_end(EXT3_INODES_PER_GROUP(sb), EXT3_BLOCKS_PER_GROUP(sb), - bh->b_data); - err = ext3_journal_dirty_metadata(handle, bh); -exit_bh: - brelse(bh); - -exit_journal: - mutex_unlock(&sbi->s_resize_lock); - if ((err2 = ext3_journal_stop(handle)) && !err) - err = err2; - - return err; -} - -/* - * Iterate through the groups which hold BACKUP superblock/GDT copies in an - * ext3 filesystem. The counters should be initialized to 1, 5, and 7 before - * calling this for the first time. In a sparse filesystem it will be the - * sequence of powers of 3, 5, and 7: 1, 3, 5, 7, 9, 25, 27, 49, 81, ... - * For a non-sparse filesystem it will be every group: 1, 2, 3, 4, ... - */ -static unsigned ext3_list_backups(struct super_block *sb, unsigned *three, - unsigned *five, unsigned *seven) -{ - unsigned *min = three; - int mult = 3; - unsigned ret; - - if (!EXT3_HAS_RO_COMPAT_FEATURE(sb, - EXT3_FEATURE_RO_COMPAT_SPARSE_SUPER)) { - ret = *min; - *min += 1; - return ret; - } - - if (*five < *min) { - min = five; - mult = 5; - } - if (*seven < *min) { - min = seven; - mult = 7; - } - - ret = *min; - *min *= mult; - - return ret; -} - -/* - * Check that all of the backup GDT blocks are held in the primary GDT block. - * It is assumed that they are stored in group order. Returns the number of - * groups in current filesystem that have BACKUPS, or -ve error code. - */ -static int verify_reserved_gdb(struct super_block *sb, - struct buffer_head *primary) -{ - const ext3_fsblk_t blk = primary->b_blocknr; - const unsigned long end = EXT3_SB(sb)->s_groups_count; - unsigned three = 1; - unsigned five = 5; - unsigned seven = 7; - unsigned grp; - __le32 *p = (__le32 *)primary->b_data; - int gdbackups = 0; - - while ((grp = ext3_list_backups(sb, &three, &five, &seven)) < end) { - if (le32_to_cpu(*p++) != grp * EXT3_BLOCKS_PER_GROUP(sb) + blk){ - ext3_warning(sb, __func__, - "reserved GDT "E3FSBLK - " missing grp %d ("E3FSBLK")", - blk, grp, - grp * EXT3_BLOCKS_PER_GROUP(sb) + blk); - return -EINVAL; - } - if (++gdbackups > EXT3_ADDR_PER_BLOCK(sb)) - return -EFBIG; - } - - return gdbackups; -} - -/* - * Called when we need to bring a reserved group descriptor table block into - * use from the resize inode. The primary copy of the new GDT block currently - * is an indirect block (under the double indirect block in the resize inode). - * The new backup GDT blocks will be stored as leaf blocks in this indirect - * block, in group order. Even though we know all the block numbers we need, - * we check to ensure that the resize inode has actually reserved these blocks. - * - * Don't need to update the block bitmaps because the blocks are still in use. - * - * We get all of the error cases out of the way, so that we are sure to not - * fail once we start modifying the data on disk, because JBD has no rollback. - */ -static int add_new_gdb(handle_t *handle, struct inode *inode, - struct ext3_new_group_data *input, - struct buffer_head **primary) -{ - struct super_block *sb = inode->i_sb; - struct ext3_super_block *es = EXT3_SB(sb)->s_es; - unsigned long gdb_num = input->group / EXT3_DESC_PER_BLOCK(sb); - ext3_fsblk_t gdblock = EXT3_SB(sb)->s_sbh->b_blocknr + 1 + gdb_num; - struct buffer_head **o_group_desc, **n_group_desc; - struct buffer_head *dind; - int gdbackups; - struct ext3_iloc iloc; - __le32 *data; - int err; - - if (test_opt(sb, DEBUG)) - printk(KERN_DEBUG - "EXT3-fs: ext3_add_new_gdb: adding group block %lu\n", - gdb_num); - - /* - * If we are not using the primary superblock/GDT copy don't resize, - * because the user tools have no way of handling this. Probably a - * bad time to do it anyways. - */ - if (EXT3_SB(sb)->s_sbh->b_blocknr != - le32_to_cpu(EXT3_SB(sb)->s_es->s_first_data_block)) { - ext3_warning(sb, __func__, - "won't resize using backup superblock at %llu", - (unsigned long long)EXT3_SB(sb)->s_sbh->b_blocknr); - return -EPERM; - } - - *primary = sb_bread(sb, gdblock); - if (!*primary) - return -EIO; - - if ((gdbackups = verify_reserved_gdb(sb, *primary)) < 0) { - err = gdbackups; - goto exit_bh; - } - - data = EXT3_I(inode)->i_data + EXT3_DIND_BLOCK; - dind = sb_bread(sb, le32_to_cpu(*data)); - if (!dind) { - err = -EIO; - goto exit_bh; - } - - data = (__le32 *)dind->b_data; - if (le32_to_cpu(data[gdb_num % EXT3_ADDR_PER_BLOCK(sb)]) != gdblock) { - ext3_warning(sb, __func__, - "new group %u GDT block "E3FSBLK" not reserved", - input->group, gdblock); - err = -EINVAL; - goto exit_dind; - } - - if ((err = ext3_journal_get_write_access(handle, EXT3_SB(sb)->s_sbh))) - goto exit_dind; - - if ((err = ext3_journal_get_write_access(handle, *primary))) - goto exit_sbh; - - if ((err = ext3_journal_get_write_access(handle, dind))) - goto exit_primary; - - /* ext3_reserve_inode_write() gets a reference on the iloc */ - if ((err = ext3_reserve_inode_write(handle, inode, &iloc))) - goto exit_dindj; - - n_group_desc = kmalloc((gdb_num + 1) * sizeof(struct buffer_head *), - GFP_NOFS); - if (!n_group_desc) { - err = -ENOMEM; - ext3_warning (sb, __func__, - "not enough memory for %lu groups", gdb_num + 1); - goto exit_inode; - } - - /* - * Finally, we have all of the possible failures behind us... - * - * Remove new GDT block from inode double-indirect block and clear out - * the new GDT block for use (which also "frees" the backup GDT blocks - * from the reserved inode). We don't need to change the bitmaps for - * these blocks, because they are marked as in-use from being in the - * reserved inode, and will become GDT blocks (primary and backup). - */ - data[gdb_num % EXT3_ADDR_PER_BLOCK(sb)] = 0; - err = ext3_journal_dirty_metadata(handle, dind); - if (err) - goto exit_group_desc; - brelse(dind); - dind = NULL; - inode->i_blocks -= (gdbackups + 1) * sb->s_blocksize >> 9; - err = ext3_mark_iloc_dirty(handle, inode, &iloc); - if (err) - goto exit_group_desc; - memset((*primary)->b_data, 0, sb->s_blocksize); - err = ext3_journal_dirty_metadata(handle, *primary); - if (err) - goto exit_group_desc; - - o_group_desc = EXT3_SB(sb)->s_group_desc; - memcpy(n_group_desc, o_group_desc, - EXT3_SB(sb)->s_gdb_count * sizeof(struct buffer_head *)); - n_group_desc[gdb_num] = *primary; - EXT3_SB(sb)->s_group_desc = n_group_desc; - EXT3_SB(sb)->s_gdb_count++; - kfree(o_group_desc); - - le16_add_cpu(&es->s_reserved_gdt_blocks, -1); - err = ext3_journal_dirty_metadata(handle, EXT3_SB(sb)->s_sbh); - if (err) - goto exit_inode; - - return 0; - -exit_group_desc: - kfree(n_group_desc); -exit_inode: - //ext3_journal_release_buffer(handle, iloc.bh); - brelse(iloc.bh); -exit_dindj: - //ext3_journal_release_buffer(handle, dind); -exit_primary: - //ext3_journal_release_buffer(handle, *primary); -exit_sbh: - //ext3_journal_release_buffer(handle, *primary); -exit_dind: - brelse(dind); -exit_bh: - brelse(*primary); - - ext3_debug("leaving with error %d\n", err); - return err; -} - -/* - * Called when we are adding a new group which has a backup copy of each of - * the GDT blocks (i.e. sparse group) and there are reserved GDT blocks. - * We need to add these reserved backup GDT blocks to the resize inode, so - * that they are kept for future resizing and not allocated to files. - * - * Each reserved backup GDT block will go into a different indirect block. - * The indirect blocks are actually the primary reserved GDT blocks, - * so we know in advance what their block numbers are. We only get the - * double-indirect block to verify it is pointing to the primary reserved - * GDT blocks so we don't overwrite a data block by accident. The reserved - * backup GDT blocks are stored in their reserved primary GDT block. - */ -static int reserve_backup_gdb(handle_t *handle, struct inode *inode, - struct ext3_new_group_data *input) -{ - struct super_block *sb = inode->i_sb; - int reserved_gdb =le16_to_cpu(EXT3_SB(sb)->s_es->s_reserved_gdt_blocks); - struct buffer_head **primary; - struct buffer_head *dind; - struct ext3_iloc iloc; - ext3_fsblk_t blk; - __le32 *data, *end; - int gdbackups = 0; - int res, i; - int err; - - primary = kmalloc(reserved_gdb * sizeof(*primary), GFP_NOFS); - if (!primary) - return -ENOMEM; - - data = EXT3_I(inode)->i_data + EXT3_DIND_BLOCK; - dind = sb_bread(sb, le32_to_cpu(*data)); - if (!dind) { - err = -EIO; - goto exit_free; - } - - blk = EXT3_SB(sb)->s_sbh->b_blocknr + 1 + EXT3_SB(sb)->s_gdb_count; - data = (__le32 *)dind->b_data + (EXT3_SB(sb)->s_gdb_count % - EXT3_ADDR_PER_BLOCK(sb)); - end = (__le32 *)dind->b_data + EXT3_ADDR_PER_BLOCK(sb); - - /* Get each reserved primary GDT block and verify it holds backups */ - for (res = 0; res < reserved_gdb; res++, blk++) { - if (le32_to_cpu(*data) != blk) { - ext3_warning(sb, __func__, - "reserved block "E3FSBLK - " not at offset %ld", - blk, - (long)(data - (__le32 *)dind->b_data)); - err = -EINVAL; - goto exit_bh; - } - primary[res] = sb_bread(sb, blk); - if (!primary[res]) { - err = -EIO; - goto exit_bh; - } - if ((gdbackups = verify_reserved_gdb(sb, primary[res])) < 0) { - brelse(primary[res]); - err = gdbackups; - goto exit_bh; - } - if (++data >= end) - data = (__le32 *)dind->b_data; - } - - for (i = 0; i < reserved_gdb; i++) { - if ((err = ext3_journal_get_write_access(handle, primary[i]))) { - /* - int j; - for (j = 0; j < i; j++) - ext3_journal_release_buffer(handle, primary[j]); - */ - goto exit_bh; - } - } - - if ((err = ext3_reserve_inode_write(handle, inode, &iloc))) - goto exit_bh; - - /* - * Finally we can add each of the reserved backup GDT blocks from - * the new group to its reserved primary GDT block. - */ - blk = input->group * EXT3_BLOCKS_PER_GROUP(sb); - for (i = 0; i < reserved_gdb; i++) { - int err2; - data = (__le32 *)primary[i]->b_data; - /* printk("reserving backup %lu[%u] = %lu\n", - primary[i]->b_blocknr, gdbackups, - blk + primary[i]->b_blocknr); */ - data[gdbackups] = cpu_to_le32(blk + primary[i]->b_blocknr); - err2 = ext3_journal_dirty_metadata(handle, primary[i]); - if (!err) - err = err2; - } - inode->i_blocks += reserved_gdb * sb->s_blocksize >> 9; - ext3_mark_iloc_dirty(handle, inode, &iloc); - -exit_bh: - while (--res >= 0) - brelse(primary[res]); - brelse(dind); - -exit_free: - kfree(primary); - - return err; -} - -/* - * Update the backup copies of the ext3 metadata. These don't need to be part - * of the main resize transaction, because e2fsck will re-write them if there - * is a problem (basically only OOM will cause a problem). However, we - * _should_ update the backups if possible, in case the primary gets trashed - * for some reason and we need to run e2fsck from a backup superblock. The - * important part is that the new block and inode counts are in the backup - * superblocks, and the location of the new group metadata in the GDT backups. - * - * We do not need take the s_resize_lock for this, because these - * blocks are not otherwise touched by the filesystem code when it is - * mounted. We don't need to worry about last changing from - * sbi->s_groups_count, because the worst that can happen is that we - * do not copy the full number of backups at this time. The resize - * which changed s_groups_count will backup again. - */ -static void update_backups(struct super_block *sb, - int blk_off, char *data, int size) -{ - struct ext3_sb_info *sbi = EXT3_SB(sb); - const unsigned long last = sbi->s_groups_count; - const int bpg = EXT3_BLOCKS_PER_GROUP(sb); - unsigned three = 1; - unsigned five = 5; - unsigned seven = 7; - unsigned group; - int rest = sb->s_blocksize - size; - handle_t *handle; - int err = 0, err2; - - handle = ext3_journal_start_sb(sb, EXT3_MAX_TRANS_DATA); - if (IS_ERR(handle)) { - group = 1; - err = PTR_ERR(handle); - goto exit_err; - } - - while ((group = ext3_list_backups(sb, &three, &five, &seven)) < last) { - struct buffer_head *bh; - - /* Out of journal space, and can't get more - abort - so sad */ - if (handle->h_buffer_credits == 0 && - ext3_journal_extend(handle, EXT3_MAX_TRANS_DATA) && - (err = ext3_journal_restart(handle, EXT3_MAX_TRANS_DATA))) - break; - - bh = sb_getblk(sb, group * bpg + blk_off); - if (unlikely(!bh)) { - err = -ENOMEM; - break; - } - ext3_debug("update metadata backup %#04lx\n", - (unsigned long)bh->b_blocknr); - if ((err = ext3_journal_get_write_access(handle, bh))) { - brelse(bh); - break; - } - lock_buffer(bh); - memcpy(bh->b_data, data, size); - if (rest) - memset(bh->b_data + size, 0, rest); - set_buffer_uptodate(bh); - unlock_buffer(bh); - err = ext3_journal_dirty_metadata(handle, bh); - brelse(bh); - if (err) - break; - } - if ((err2 = ext3_journal_stop(handle)) && !err) - err = err2; - - /* - * Ugh! Need to have e2fsck write the backup copies. It is too - * late to revert the resize, we shouldn't fail just because of - * the backup copies (they are only needed in case of corruption). - * - * However, if we got here we have a journal problem too, so we - * can't really start a transaction to mark the superblock. - * Chicken out and just set the flag on the hope it will be written - * to disk, and if not - we will simply wait until next fsck. - */ -exit_err: - if (err) { - ext3_warning(sb, __func__, - "can't update backup for group %d (err %d), " - "forcing fsck on next reboot", group, err); - sbi->s_mount_state &= ~EXT3_VALID_FS; - sbi->s_es->s_state &= cpu_to_le16(~EXT3_VALID_FS); - mark_buffer_dirty(sbi->s_sbh); - } -} - -/* Add group descriptor data to an existing or new group descriptor block. - * Ensure we handle all possible error conditions _before_ we start modifying - * the filesystem, because we cannot abort the transaction and not have it - * write the data to disk. - * - * If we are on a GDT block boundary, we need to get the reserved GDT block. - * Otherwise, we may need to add backup GDT blocks for a sparse group. - * - * We only need to hold the superblock lock while we are actually adding - * in the new group's counts to the superblock. Prior to that we have - * not really "added" the group at all. We re-check that we are still - * adding in the last group in case things have changed since verifying. - */ -int ext3_group_add(struct super_block *sb, struct ext3_new_group_data *input) -{ - struct ext3_sb_info *sbi = EXT3_SB(sb); - struct ext3_super_block *es = sbi->s_es; - int reserved_gdb = ext3_bg_has_super(sb, input->group) ? - le16_to_cpu(es->s_reserved_gdt_blocks) : 0; - struct buffer_head *primary = NULL; - struct ext3_group_desc *gdp; - struct inode *inode = NULL; - handle_t *handle; - int gdb_off, gdb_num; - int err, err2; - - gdb_num = input->group / EXT3_DESC_PER_BLOCK(sb); - gdb_off = input->group % EXT3_DESC_PER_BLOCK(sb); - - if (gdb_off == 0 && !EXT3_HAS_RO_COMPAT_FEATURE(sb, - EXT3_FEATURE_RO_COMPAT_SPARSE_SUPER)) { - ext3_warning(sb, __func__, - "Can't resize non-sparse filesystem further"); - return -EPERM; - } - - if (le32_to_cpu(es->s_blocks_count) + input->blocks_count < - le32_to_cpu(es->s_blocks_count)) { - ext3_warning(sb, __func__, "blocks_count overflow\n"); - return -EINVAL; - } - - if (le32_to_cpu(es->s_inodes_count) + EXT3_INODES_PER_GROUP(sb) < - le32_to_cpu(es->s_inodes_count)) { - ext3_warning(sb, __func__, "inodes_count overflow\n"); - return -EINVAL; - } - - if (reserved_gdb || gdb_off == 0) { - if (!EXT3_HAS_COMPAT_FEATURE(sb, - EXT3_FEATURE_COMPAT_RESIZE_INODE) - || !le16_to_cpu(es->s_reserved_gdt_blocks)) { - ext3_warning(sb, __func__, - "No reserved GDT blocks, can't resize"); - return -EPERM; - } - inode = ext3_iget(sb, EXT3_RESIZE_INO); - if (IS_ERR(inode)) { - ext3_warning(sb, __func__, - "Error opening resize inode"); - return PTR_ERR(inode); - } - } - - if ((err = verify_group_input(sb, input))) - goto exit_put; - - if ((err = setup_new_group_blocks(sb, input))) - goto exit_put; - - /* - * We will always be modifying at least the superblock and a GDT - * block. If we are adding a group past the last current GDT block, - * we will also modify the inode and the dindirect block. If we - * are adding a group with superblock/GDT backups we will also - * modify each of the reserved GDT dindirect blocks. - */ - handle = ext3_journal_start_sb(sb, - ext3_bg_has_super(sb, input->group) ? - 3 + reserved_gdb : 4); - if (IS_ERR(handle)) { - err = PTR_ERR(handle); - goto exit_put; - } - - mutex_lock(&sbi->s_resize_lock); - if (input->group != sbi->s_groups_count) { - ext3_warning(sb, __func__, - "multiple resizers run on filesystem!"); - err = -EBUSY; - goto exit_journal; - } - - if ((err = ext3_journal_get_write_access(handle, sbi->s_sbh))) - goto exit_journal; - - /* - * We will only either add reserved group blocks to a backup group - * or remove reserved blocks for the first group in a new group block. - * Doing both would be mean more complex code, and sane people don't - * use non-sparse filesystems anymore. This is already checked above. - */ - if (gdb_off) { - primary = sbi->s_group_desc[gdb_num]; - if ((err = ext3_journal_get_write_access(handle, primary))) - goto exit_journal; - - if (reserved_gdb && ext3_bg_num_gdb(sb, input->group) && - (err = reserve_backup_gdb(handle, inode, input))) - goto exit_journal; - } else if ((err = add_new_gdb(handle, inode, input, &primary))) - goto exit_journal; - - /* - * OK, now we've set up the new group. Time to make it active. - * - * We do not lock all allocations via s_resize_lock - * so we have to be safe wrt. concurrent accesses the group - * data. So we need to be careful to set all of the relevant - * group descriptor data etc. *before* we enable the group. - * - * The key field here is sbi->s_groups_count: as long as - * that retains its old value, nobody is going to access the new - * group. - * - * So first we update all the descriptor metadata for the new - * group; then we update the total disk blocks count; then we - * update the groups count to enable the group; then finally we - * update the free space counts so that the system can start - * using the new disk blocks. - */ - - /* Update group descriptor block for new group */ - gdp = (struct ext3_group_desc *)primary->b_data + gdb_off; - - gdp->bg_block_bitmap = cpu_to_le32(input->block_bitmap); - gdp->bg_inode_bitmap = cpu_to_le32(input->inode_bitmap); - gdp->bg_inode_table = cpu_to_le32(input->inode_table); - gdp->bg_free_blocks_count = cpu_to_le16(input->free_blocks_count); - gdp->bg_free_inodes_count = cpu_to_le16(EXT3_INODES_PER_GROUP(sb)); - - /* - * Make the new blocks and inodes valid next. We do this before - * increasing the group count so that once the group is enabled, - * all of its blocks and inodes are already valid. - * - * We always allocate group-by-group, then block-by-block or - * inode-by-inode within a group, so enabling these - * blocks/inodes before the group is live won't actually let us - * allocate the new space yet. - */ - le32_add_cpu(&es->s_blocks_count, input->blocks_count); - le32_add_cpu(&es->s_inodes_count, EXT3_INODES_PER_GROUP(sb)); - - /* - * We need to protect s_groups_count against other CPUs seeing - * inconsistent state in the superblock. - * - * The precise rules we use are: - * - * * Writers of s_groups_count *must* hold s_resize_lock - * AND - * * Writers must perform a smp_wmb() after updating all dependent - * data and before modifying the groups count - * - * * Readers must hold s_resize_lock over the access - * OR - * * Readers must perform an smp_rmb() after reading the groups count - * and before reading any dependent data. - * - * NB. These rules can be relaxed when checking the group count - * while freeing data, as we can only allocate from a block - * group after serialising against the group count, and we can - * only then free after serialising in turn against that - * allocation. - */ - smp_wmb(); - - /* Update the global fs size fields */ - sbi->s_groups_count++; - - err = ext3_journal_dirty_metadata(handle, primary); - if (err) - goto exit_journal; - - /* Update the reserved block counts only once the new group is - * active. */ - le32_add_cpu(&es->s_r_blocks_count, input->reserved_blocks); - - /* Update the free space counts */ - percpu_counter_add(&sbi->s_freeblocks_counter, - input->free_blocks_count); - percpu_counter_add(&sbi->s_freeinodes_counter, - EXT3_INODES_PER_GROUP(sb)); - - err = ext3_journal_dirty_metadata(handle, sbi->s_sbh); - -exit_journal: - mutex_unlock(&sbi->s_resize_lock); - if ((err2 = ext3_journal_stop(handle)) && !err) - err = err2; - if (!err) { - update_backups(sb, sbi->s_sbh->b_blocknr, (char *)es, - sizeof(struct ext3_super_block)); - update_backups(sb, primary->b_blocknr, primary->b_data, - primary->b_size); - } -exit_put: - iput(inode); - return err; -} /* ext3_group_add */ - -/* Extend the filesystem to the new number of blocks specified. This entry - * point is only used to extend the current filesystem to the end of the last - * existing group. It can be accessed via ioctl, or by "remount,resize=" - * for emergencies (because it has no dependencies on reserved blocks). - * - * If we _really_ wanted, we could use default values to call ext3_group_add() - * allow the "remount" trick to work for arbitrary resizing, assuming enough - * GDT blocks are reserved to grow to the desired size. - */ -int ext3_group_extend(struct super_block *sb, struct ext3_super_block *es, - ext3_fsblk_t n_blocks_count) -{ - ext3_fsblk_t o_blocks_count; - ext3_grpblk_t last; - ext3_grpblk_t add; - struct buffer_head * bh; - handle_t *handle; - int err; - unsigned long freed_blocks; - - /* We don't need to worry about locking wrt other resizers just - * yet: we're going to revalidate es->s_blocks_count after - * taking the s_resize_lock below. */ - o_blocks_count = le32_to_cpu(es->s_blocks_count); - - if (test_opt(sb, DEBUG)) - printk(KERN_DEBUG "EXT3-fs: extending last group from "E3FSBLK - " up to "E3FSBLK" blocks\n", - o_blocks_count, n_blocks_count); - - if (n_blocks_count == 0 || n_blocks_count == o_blocks_count) - return 0; - - if (n_blocks_count > (sector_t)(~0ULL) >> (sb->s_blocksize_bits - 9)) { - printk(KERN_ERR "EXT3-fs: filesystem on %s:" - " too large to resize to "E3FSBLK" blocks safely\n", - sb->s_id, n_blocks_count); - if (sizeof(sector_t) < 8) - ext3_warning(sb, __func__, - "CONFIG_LBDAF not enabled\n"); - return -EINVAL; - } - - if (n_blocks_count < o_blocks_count) { - ext3_warning(sb, __func__, - "can't shrink FS - resize aborted"); - return -EBUSY; - } - - /* Handle the remaining blocks in the last group only. */ - last = (o_blocks_count - le32_to_cpu(es->s_first_data_block)) % - EXT3_BLOCKS_PER_GROUP(sb); - - if (last == 0) { - ext3_warning(sb, __func__, - "need to use ext2online to resize further"); - return -EPERM; - } - - add = EXT3_BLOCKS_PER_GROUP(sb) - last; - - if (o_blocks_count + add < o_blocks_count) { - ext3_warning(sb, __func__, "blocks_count overflow"); - return -EINVAL; - } - - if (o_blocks_count + add > n_blocks_count) - add = n_blocks_count - o_blocks_count; - - if (o_blocks_count + add < n_blocks_count) - ext3_warning(sb, __func__, - "will only finish group ("E3FSBLK - " blocks, %u new)", - o_blocks_count + add, add); - - /* See if the device is actually as big as what was requested */ - bh = sb_bread(sb, o_blocks_count + add -1); - if (!bh) { - ext3_warning(sb, __func__, - "can't read last block, resize aborted"); - return -ENOSPC; - } - brelse(bh); - - /* We will update the superblock, one block bitmap, and - * one group descriptor via ext3_free_blocks(). - */ - handle = ext3_journal_start_sb(sb, 3); - if (IS_ERR(handle)) { - err = PTR_ERR(handle); - ext3_warning(sb, __func__, "error %d on journal start",err); - goto exit_put; - } - - mutex_lock(&EXT3_SB(sb)->s_resize_lock); - if (o_blocks_count != le32_to_cpu(es->s_blocks_count)) { - ext3_warning(sb, __func__, - "multiple resizers run on filesystem!"); - mutex_unlock(&EXT3_SB(sb)->s_resize_lock); - ext3_journal_stop(handle); - err = -EBUSY; - goto exit_put; - } - - if ((err = ext3_journal_get_write_access(handle, - EXT3_SB(sb)->s_sbh))) { - ext3_warning(sb, __func__, - "error %d on journal write access", err); - mutex_unlock(&EXT3_SB(sb)->s_resize_lock); - ext3_journal_stop(handle); - goto exit_put; - } - es->s_blocks_count = cpu_to_le32(o_blocks_count + add); - err = ext3_journal_dirty_metadata(handle, EXT3_SB(sb)->s_sbh); - mutex_unlock(&EXT3_SB(sb)->s_resize_lock); - if (err) { - ext3_warning(sb, __func__, - "error %d on journal dirty metadata", err); - ext3_journal_stop(handle); - goto exit_put; - } - ext3_debug("freeing blocks "E3FSBLK" through "E3FSBLK"\n", - o_blocks_count, o_blocks_count + add); - ext3_free_blocks_sb(handle, sb, o_blocks_count, add, &freed_blocks); - ext3_debug("freed blocks "E3FSBLK" through "E3FSBLK"\n", - o_blocks_count, o_blocks_count + add); - if ((err = ext3_journal_stop(handle))) - goto exit_put; - if (test_opt(sb, DEBUG)) - printk(KERN_DEBUG "EXT3-fs: extended group to %u blocks\n", - le32_to_cpu(es->s_blocks_count)); - update_backups(sb, EXT3_SB(sb)->s_sbh->b_blocknr, (char *)es, - sizeof(struct ext3_super_block)); -exit_put: - return err; -} /* ext3_group_extend */ diff --git a/fs/ext3/super.c b/fs/ext3/super.c deleted file mode 100644 index 5ed0044fbb37..000000000000 --- a/fs/ext3/super.c +++ /dev/null @@ -1,3165 +0,0 @@ -/* - * linux/fs/ext3/super.c - * - * Copyright (C) 1992, 1993, 1994, 1995 - * Remy Card (card@masi.ibp.fr) - * Laboratoire MASI - Institut Blaise Pascal - * Universite Pierre et Marie Curie (Paris VI) - * - * from - * - * linux/fs/minix/inode.c - * - * Copyright (C) 1991, 1992 Linus Torvalds - * - * Big-endian to little-endian byte-swapping/bitmaps by - * David S. Miller (davem@caip.rutgers.edu), 1995 - */ - -#include -#include -#include -#include -#include -#include -#include -#include -#include -#include -#include -#include - -#include - -#define CREATE_TRACE_POINTS - -#include "ext3.h" -#include "xattr.h" -#include "acl.h" -#include "namei.h" - -#ifdef CONFIG_EXT3_DEFAULTS_TO_ORDERED - #define EXT3_MOUNT_DEFAULT_DATA_MODE EXT3_MOUNT_ORDERED_DATA -#else - #define EXT3_MOUNT_DEFAULT_DATA_MODE EXT3_MOUNT_WRITEBACK_DATA -#endif - -static int ext3_load_journal(struct super_block *, struct ext3_super_block *, - unsigned long journal_devnum); -static int ext3_create_journal(struct super_block *, struct ext3_super_block *, - unsigned int); -static int ext3_commit_super(struct super_block *sb, - struct ext3_super_block *es, - int sync); -static void ext3_mark_recovery_complete(struct super_block * sb, - struct ext3_super_block * es); -static void ext3_clear_journal_err(struct super_block * sb, - struct ext3_super_block * es); -static int ext3_sync_fs(struct super_block *sb, int wait); -static const char *ext3_decode_error(struct super_block * sb, int errno, - char nbuf[16]); -static int ext3_remount (struct super_block * sb, int * flags, char * data); -static int ext3_statfs (struct dentry * dentry, struct kstatfs * buf); -static int ext3_unfreeze(struct super_block *sb); -static int ext3_freeze(struct super_block *sb); - -/* - * Wrappers for journal_start/end. - */ -handle_t *ext3_journal_start_sb(struct super_block *sb, int nblocks) -{ - journal_t *journal; - - if (sb->s_flags & MS_RDONLY) - return ERR_PTR(-EROFS); - - /* Special case here: if the journal has aborted behind our - * backs (eg. EIO in the commit thread), then we still need to - * take the FS itself readonly cleanly. */ - journal = EXT3_SB(sb)->s_journal; - if (is_journal_aborted(journal)) { - ext3_abort(sb, __func__, - "Detected aborted journal"); - return ERR_PTR(-EROFS); - } - - return journal_start(journal, nblocks); -} - -int __ext3_journal_stop(const char *where, handle_t *handle) -{ - struct super_block *sb; - int err; - int rc; - - sb = handle->h_transaction->t_journal->j_private; - err = handle->h_err; - rc = journal_stop(handle); - - if (!err) - err = rc; - if (err) - __ext3_std_error(sb, where, err); - return err; -} - -void ext3_journal_abort_handle(const char *caller, const char *err_fn, - struct buffer_head *bh, handle_t *handle, int err) -{ - char nbuf[16]; - const char *errstr = ext3_decode_error(NULL, err, nbuf); - - if (bh) - BUFFER_TRACE(bh, "abort"); - - if (!handle->h_err) - handle->h_err = err; - - if (is_handle_aborted(handle)) - return; - - printk(KERN_ERR "EXT3-fs: %s: aborting transaction: %s in %s\n", - caller, errstr, err_fn); - - journal_abort_handle(handle); -} - -void ext3_msg(struct super_block *sb, const char *prefix, - const char *fmt, ...) -{ - struct va_format vaf; - va_list args; - - va_start(args, fmt); - - vaf.fmt = fmt; - vaf.va = &args; - - printk("%sEXT3-fs (%s): %pV\n", prefix, sb->s_id, &vaf); - - va_end(args); -} - -/* Deal with the reporting of failure conditions on a filesystem such as - * inconsistencies detected or read IO failures. - * - * On ext2, we can store the error state of the filesystem in the - * superblock. That is not possible on ext3, because we may have other - * write ordering constraints on the superblock which prevent us from - * writing it out straight away; and given that the journal is about to - * be aborted, we can't rely on the current, or future, transactions to - * write out the superblock safely. - * - * We'll just use the journal_abort() error code to record an error in - * the journal instead. On recovery, the journal will complain about - * that error until we've noted it down and cleared it. - */ - -static void ext3_handle_error(struct super_block *sb) -{ - struct ext3_super_block *es = EXT3_SB(sb)->s_es; - - EXT3_SB(sb)->s_mount_state |= EXT3_ERROR_FS; - es->s_state |= cpu_to_le16(EXT3_ERROR_FS); - - if (sb->s_flags & MS_RDONLY) - return; - - if (!test_opt (sb, ERRORS_CONT)) { - journal_t *journal = EXT3_SB(sb)->s_journal; - - set_opt(EXT3_SB(sb)->s_mount_opt, ABORT); - if (journal) - journal_abort(journal, -EIO); - } - if (test_opt (sb, ERRORS_RO)) { - ext3_msg(sb, KERN_CRIT, - "error: remounting filesystem read-only"); - /* - * Make sure updated value of ->s_mount_state will be visible - * before ->s_flags update. - */ - smp_wmb(); - sb->s_flags |= MS_RDONLY; - } - ext3_commit_super(sb, es, 1); - if (test_opt(sb, ERRORS_PANIC)) - panic("EXT3-fs (%s): panic forced after error\n", - sb->s_id); -} - -void ext3_error(struct super_block *sb, const char *function, - const char *fmt, ...) -{ - struct va_format vaf; - va_list args; - - va_start(args, fmt); - - vaf.fmt = fmt; - vaf.va = &args; - - printk(KERN_CRIT "EXT3-fs error (device %s): %s: %pV\n", - sb->s_id, function, &vaf); - - va_end(args); - - ext3_handle_error(sb); -} - -static const char *ext3_decode_error(struct super_block * sb, int errno, - char nbuf[16]) -{ - char *errstr = NULL; - - switch (errno) { - case -EIO: - errstr = "IO failure"; - break; - case -ENOMEM: - errstr = "Out of memory"; - break; - case -EROFS: - if (!sb || EXT3_SB(sb)->s_journal->j_flags & JFS_ABORT) - errstr = "Journal has aborted"; - else - errstr = "Readonly filesystem"; - break; - default: - /* If the caller passed in an extra buffer for unknown - * errors, textualise them now. Else we just return - * NULL. */ - if (nbuf) { - /* Check for truncated error codes... */ - if (snprintf(nbuf, 16, "error %d", -errno) >= 0) - errstr = nbuf; - } - break; - } - - return errstr; -} - -/* __ext3_std_error decodes expected errors from journaling functions - * automatically and invokes the appropriate error response. */ - -void __ext3_std_error (struct super_block * sb, const char * function, - int errno) -{ - char nbuf[16]; - const char *errstr; - - /* Special case: if the error is EROFS, and we're not already - * inside a transaction, then there's really no point in logging - * an error. */ - if (errno == -EROFS && journal_current_handle() == NULL && - (sb->s_flags & MS_RDONLY)) - return; - - errstr = ext3_decode_error(sb, errno, nbuf); - ext3_msg(sb, KERN_CRIT, "error in %s: %s", function, errstr); - - ext3_handle_error(sb); -} - -/* - * ext3_abort is a much stronger failure handler than ext3_error. The - * abort function may be used to deal with unrecoverable failures such - * as journal IO errors or ENOMEM at a critical moment in log management. - * - * We unconditionally force the filesystem into an ABORT|READONLY state, - * unless the error response on the fs has been set to panic in which - * case we take the easy way out and panic immediately. - */ - -void ext3_abort(struct super_block *sb, const char *function, - const char *fmt, ...) -{ - struct va_format vaf; - va_list args; - - va_start(args, fmt); - - vaf.fmt = fmt; - vaf.va = &args; - - printk(KERN_CRIT "EXT3-fs (%s): error: %s: %pV\n", - sb->s_id, function, &vaf); - - va_end(args); - - if (test_opt(sb, ERRORS_PANIC)) - panic("EXT3-fs: panic from previous error\n"); - - if (sb->s_flags & MS_RDONLY) - return; - - ext3_msg(sb, KERN_CRIT, - "error: remounting filesystem read-only"); - EXT3_SB(sb)->s_mount_state |= EXT3_ERROR_FS; - set_opt(EXT3_SB(sb)->s_mount_opt, ABORT); - /* - * Make sure updated value of ->s_mount_state will be visible - * before ->s_flags update. - */ - smp_wmb(); - sb->s_flags |= MS_RDONLY; - - if (EXT3_SB(sb)->s_journal) - journal_abort(EXT3_SB(sb)->s_journal, -EIO); -} - -void ext3_warning(struct super_block *sb, const char *function, - const char *fmt, ...) -{ - struct va_format vaf; - va_list args; - - va_start(args, fmt); - - vaf.fmt = fmt; - vaf.va = &args; - - printk(KERN_WARNING "EXT3-fs (%s): warning: %s: %pV\n", - sb->s_id, function, &vaf); - - va_end(args); -} - -void ext3_update_dynamic_rev(struct super_block *sb) -{ - struct ext3_super_block *es = EXT3_SB(sb)->s_es; - - if (le32_to_cpu(es->s_rev_level) > EXT3_GOOD_OLD_REV) - return; - - ext3_msg(sb, KERN_WARNING, - "warning: updating to rev %d because of " - "new feature flag, running e2fsck is recommended", - EXT3_DYNAMIC_REV); - - es->s_first_ino = cpu_to_le32(EXT3_GOOD_OLD_FIRST_INO); - es->s_inode_size = cpu_to_le16(EXT3_GOOD_OLD_INODE_SIZE); - es->s_rev_level = cpu_to_le32(EXT3_DYNAMIC_REV); - /* leave es->s_feature_*compat flags alone */ - /* es->s_uuid will be set by e2fsck if empty */ - - /* - * The rest of the superblock fields should be zero, and if not it - * means they are likely already in use, so leave them alone. We - * can leave it up to e2fsck to clean up any inconsistencies there. - */ -} - -/* - * Open the external journal device - */ -static struct block_device *ext3_blkdev_get(dev_t dev, struct super_block *sb) -{ - struct block_device *bdev; - char b[BDEVNAME_SIZE]; - - bdev = blkdev_get_by_dev(dev, FMODE_READ|FMODE_WRITE|FMODE_EXCL, sb); - if (IS_ERR(bdev)) - goto fail; - return bdev; - -fail: - ext3_msg(sb, KERN_ERR, "error: failed to open journal device %s: %ld", - __bdevname(dev, b), PTR_ERR(bdev)); - - return NULL; -} - -/* - * Release the journal device - */ -static void ext3_blkdev_put(struct block_device *bdev) -{ - blkdev_put(bdev, FMODE_READ|FMODE_WRITE|FMODE_EXCL); -} - -static void ext3_blkdev_remove(struct ext3_sb_info *sbi) -{ - struct block_device *bdev; - bdev = sbi->journal_bdev; - if (bdev) { - ext3_blkdev_put(bdev); - sbi->journal_bdev = NULL; - } -} - -static inline struct inode *orphan_list_entry(struct list_head *l) -{ - return &list_entry(l, struct ext3_inode_info, i_orphan)->vfs_inode; -} - -static void dump_orphan_list(struct super_block *sb, struct ext3_sb_info *sbi) -{ - struct list_head *l; - - ext3_msg(sb, KERN_ERR, "error: sb orphan head is %d", - le32_to_cpu(sbi->s_es->s_last_orphan)); - - ext3_msg(sb, KERN_ERR, "sb_info orphan list:"); - list_for_each(l, &sbi->s_orphan) { - struct inode *inode = orphan_list_entry(l); - ext3_msg(sb, KERN_ERR, " " - "inode %s:%lu at %p: mode %o, nlink %d, next %d\n", - inode->i_sb->s_id, inode->i_ino, inode, - inode->i_mode, inode->i_nlink, - NEXT_ORPHAN(inode)); - } -} - -static void ext3_put_super (struct super_block * sb) -{ - struct ext3_sb_info *sbi = EXT3_SB(sb); - struct ext3_super_block *es = sbi->s_es; - int i, err; - - dquot_disable(sb, -1, DQUOT_USAGE_ENABLED | DQUOT_LIMITS_ENABLED); - ext3_xattr_put_super(sb); - err = journal_destroy(sbi->s_journal); - sbi->s_journal = NULL; - if (err < 0) - ext3_abort(sb, __func__, "Couldn't clean up the journal"); - - if (!(sb->s_flags & MS_RDONLY)) { - EXT3_CLEAR_INCOMPAT_FEATURE(sb, EXT3_FEATURE_INCOMPAT_RECOVER); - es->s_state = cpu_to_le16(sbi->s_mount_state); - BUFFER_TRACE(sbi->s_sbh, "marking dirty"); - mark_buffer_dirty(sbi->s_sbh); - ext3_commit_super(sb, es, 1); - } - - for (i = 0; i < sbi->s_gdb_count; i++) - brelse(sbi->s_group_desc[i]); - kfree(sbi->s_group_desc); - percpu_counter_destroy(&sbi->s_freeblocks_counter); - percpu_counter_destroy(&sbi->s_freeinodes_counter); - percpu_counter_destroy(&sbi->s_dirs_counter); - brelse(sbi->s_sbh); -#ifdef CONFIG_QUOTA - for (i = 0; i < EXT3_MAXQUOTAS; i++) - kfree(sbi->s_qf_names[i]); -#endif - - /* Debugging code just in case the in-memory inode orphan list - * isn't empty. The on-disk one can be non-empty if we've - * detected an error and taken the fs readonly, but the - * in-memory list had better be clean by this point. */ - if (!list_empty(&sbi->s_orphan)) - dump_orphan_list(sb, sbi); - J_ASSERT(list_empty(&sbi->s_orphan)); - - invalidate_bdev(sb->s_bdev); - if (sbi->journal_bdev && sbi->journal_bdev != sb->s_bdev) { - /* - * Invalidate the journal device's buffers. We don't want them - * floating about in memory - the physical journal device may - * hotswapped, and it breaks the `ro-after' testing code. - */ - sync_blockdev(sbi->journal_bdev); - invalidate_bdev(sbi->journal_bdev); - ext3_blkdev_remove(sbi); - } - sb->s_fs_info = NULL; - kfree(sbi->s_blockgroup_lock); - mutex_destroy(&sbi->s_orphan_lock); - mutex_destroy(&sbi->s_resize_lock); - kfree(sbi); -} - -static struct kmem_cache *ext3_inode_cachep; - -/* - * Called inside transaction, so use GFP_NOFS - */ -static struct inode *ext3_alloc_inode(struct super_block *sb) -{ - struct ext3_inode_info *ei; - - ei = kmem_cache_alloc(ext3_inode_cachep, GFP_NOFS); - if (!ei) - return NULL; - ei->i_block_alloc_info = NULL; - ei->vfs_inode.i_version = 1; - atomic_set(&ei->i_datasync_tid, 0); - atomic_set(&ei->i_sync_tid, 0); -#ifdef CONFIG_QUOTA - memset(&ei->i_dquot, 0, sizeof(ei->i_dquot)); -#endif - - return &ei->vfs_inode; -} - -static int ext3_drop_inode(struct inode *inode) -{ - int drop = generic_drop_inode(inode); - - trace_ext3_drop_inode(inode, drop); - return drop; -} - -static void ext3_i_callback(struct rcu_head *head) -{ - struct inode *inode = container_of(head, struct inode, i_rcu); - kmem_cache_free(ext3_inode_cachep, EXT3_I(inode)); -} - -static void ext3_destroy_inode(struct inode *inode) -{ - if (!list_empty(&(EXT3_I(inode)->i_orphan))) { - printk("EXT3 Inode %p: orphan list check failed!\n", - EXT3_I(inode)); - print_hex_dump(KERN_INFO, "", DUMP_PREFIX_ADDRESS, 16, 4, - EXT3_I(inode), sizeof(struct ext3_inode_info), - false); - dump_stack(); - } - call_rcu(&inode->i_rcu, ext3_i_callback); -} - -static void init_once(void *foo) -{ - struct ext3_inode_info *ei = (struct ext3_inode_info *) foo; - - INIT_LIST_HEAD(&ei->i_orphan); -#ifdef CONFIG_EXT3_FS_XATTR - init_rwsem(&ei->xattr_sem); -#endif - mutex_init(&ei->truncate_mutex); - inode_init_once(&ei->vfs_inode); -} - -static int __init init_inodecache(void) -{ - ext3_inode_cachep = kmem_cache_create("ext3_inode_cache", - sizeof(struct ext3_inode_info), - 0, (SLAB_RECLAIM_ACCOUNT| - SLAB_MEM_SPREAD), - init_once); - if (ext3_inode_cachep == NULL) - return -ENOMEM; - return 0; -} - -static void destroy_inodecache(void) -{ - /* - * Make sure all delayed rcu free inodes are flushed before we - * destroy cache. - */ - rcu_barrier(); - kmem_cache_destroy(ext3_inode_cachep); -} - -static inline void ext3_show_quota_options(struct seq_file *seq, struct super_block *sb) -{ -#if defined(CONFIG_QUOTA) - struct ext3_sb_info *sbi = EXT3_SB(sb); - - if (sbi->s_jquota_fmt) { - char *fmtname = ""; - - switch (sbi->s_jquota_fmt) { - case QFMT_VFS_OLD: - fmtname = "vfsold"; - break; - case QFMT_VFS_V0: - fmtname = "vfsv0"; - break; - case QFMT_VFS_V1: - fmtname = "vfsv1"; - break; - } - seq_printf(seq, ",jqfmt=%s", fmtname); - } - - if (sbi->s_qf_names[USRQUOTA]) - seq_printf(seq, ",usrjquota=%s", sbi->s_qf_names[USRQUOTA]); - - if (sbi->s_qf_names[GRPQUOTA]) - seq_printf(seq, ",grpjquota=%s", sbi->s_qf_names[GRPQUOTA]); - - if (test_opt(sb, USRQUOTA)) - seq_puts(seq, ",usrquota"); - - if (test_opt(sb, GRPQUOTA)) - seq_puts(seq, ",grpquota"); -#endif -} - -static char *data_mode_string(unsigned long mode) -{ - switch (mode) { - case EXT3_MOUNT_JOURNAL_DATA: - return "journal"; - case EXT3_MOUNT_ORDERED_DATA: - return "ordered"; - case EXT3_MOUNT_WRITEBACK_DATA: - return "writeback"; - } - return "unknown"; -} - -/* - * Show an option if - * - it's set to a non-default value OR - * - if the per-sb default is different from the global default - */ -static int ext3_show_options(struct seq_file *seq, struct dentry *root) -{ - struct super_block *sb = root->d_sb; - struct ext3_sb_info *sbi = EXT3_SB(sb); - struct ext3_super_block *es = sbi->s_es; - unsigned long def_mount_opts; - - def_mount_opts = le32_to_cpu(es->s_default_mount_opts); - - if (sbi->s_sb_block != 1) - seq_printf(seq, ",sb=%lu", sbi->s_sb_block); - if (test_opt(sb, MINIX_DF)) - seq_puts(seq, ",minixdf"); - if (test_opt(sb, GRPID)) - seq_puts(seq, ",grpid"); - if (!test_opt(sb, GRPID) && (def_mount_opts & EXT3_DEFM_BSDGROUPS)) - seq_puts(seq, ",nogrpid"); - if (!uid_eq(sbi->s_resuid, make_kuid(&init_user_ns, EXT3_DEF_RESUID)) || - le16_to_cpu(es->s_def_resuid) != EXT3_DEF_RESUID) { - seq_printf(seq, ",resuid=%u", - from_kuid_munged(&init_user_ns, sbi->s_resuid)); - } - if (!gid_eq(sbi->s_resgid, make_kgid(&init_user_ns, EXT3_DEF_RESGID)) || - le16_to_cpu(es->s_def_resgid) != EXT3_DEF_RESGID) { - seq_printf(seq, ",resgid=%u", - from_kgid_munged(&init_user_ns, sbi->s_resgid)); - } - if (test_opt(sb, ERRORS_RO)) { - int def_errors = le16_to_cpu(es->s_errors); - - if (def_errors == EXT3_ERRORS_PANIC || - def_errors == EXT3_ERRORS_CONTINUE) { - seq_puts(seq, ",errors=remount-ro"); - } - } - if (test_opt(sb, ERRORS_CONT)) - seq_puts(seq, ",errors=continue"); - if (test_opt(sb, ERRORS_PANIC)) - seq_puts(seq, ",errors=panic"); - if (test_opt(sb, NO_UID32)) - seq_puts(seq, ",nouid32"); - if (test_opt(sb, DEBUG)) - seq_puts(seq, ",debug"); -#ifdef CONFIG_EXT3_FS_XATTR - if (test_opt(sb, XATTR_USER)) - seq_puts(seq, ",user_xattr"); - if (!test_opt(sb, XATTR_USER) && - (def_mount_opts & EXT3_DEFM_XATTR_USER)) { - seq_puts(seq, ",nouser_xattr"); - } -#endif -#ifdef CONFIG_EXT3_FS_POSIX_ACL - if (test_opt(sb, POSIX_ACL)) - seq_puts(seq, ",acl"); - if (!test_opt(sb, POSIX_ACL) && (def_mount_opts & EXT3_DEFM_ACL)) - seq_puts(seq, ",noacl"); -#endif - if (!test_opt(sb, RESERVATION)) - seq_puts(seq, ",noreservation"); - if (sbi->s_commit_interval) { - seq_printf(seq, ",commit=%u", - (unsigned) (sbi->s_commit_interval / HZ)); - } - - /* - * Always display barrier state so it's clear what the status is. - */ - seq_puts(seq, ",barrier="); - seq_puts(seq, test_opt(sb, BARRIER) ? "1" : "0"); - seq_printf(seq, ",data=%s", data_mode_string(test_opt(sb, DATA_FLAGS))); - if (test_opt(sb, DATA_ERR_ABORT)) - seq_puts(seq, ",data_err=abort"); - - if (test_opt(sb, NOLOAD)) - seq_puts(seq, ",norecovery"); - - ext3_show_quota_options(seq, sb); - - return 0; -} - - -static struct inode *ext3_nfs_get_inode(struct super_block *sb, - u64 ino, u32 generation) -{ - struct inode *inode; - - if (ino < EXT3_FIRST_INO(sb) && ino != EXT3_ROOT_INO) - return ERR_PTR(-ESTALE); - if (ino > le32_to_cpu(EXT3_SB(sb)->s_es->s_inodes_count)) - return ERR_PTR(-ESTALE); - - /* iget isn't really right if the inode is currently unallocated!! - * - * ext3_read_inode will return a bad_inode if the inode had been - * deleted, so we should be safe. - * - * Currently we don't know the generation for parent directory, so - * a generation of 0 means "accept any" - */ - inode = ext3_iget(sb, ino); - if (IS_ERR(inode)) - return ERR_CAST(inode); - if (generation && inode->i_generation != generation) { - iput(inode); - return ERR_PTR(-ESTALE); - } - - return inode; -} - -static struct dentry *ext3_fh_to_dentry(struct super_block *sb, struct fid *fid, - int fh_len, int fh_type) -{ - return generic_fh_to_dentry(sb, fid, fh_len, fh_type, - ext3_nfs_get_inode); -} - -static struct dentry *ext3_fh_to_parent(struct super_block *sb, struct fid *fid, - int fh_len, int fh_type) -{ - return generic_fh_to_parent(sb, fid, fh_len, fh_type, - ext3_nfs_get_inode); -} - -/* - * Try to release metadata pages (indirect blocks, directories) which are - * mapped via the block device. Since these pages could have journal heads - * which would prevent try_to_free_buffers() from freeing them, we must use - * jbd layer's try_to_free_buffers() function to release them. - */ -static int bdev_try_to_free_page(struct super_block *sb, struct page *page, - gfp_t wait) -{ - journal_t *journal = EXT3_SB(sb)->s_journal; - - WARN_ON(PageChecked(page)); - if (!page_has_buffers(page)) - return 0; - if (journal) - return journal_try_to_free_buffers(journal, page, - wait & ~__GFP_WAIT); - return try_to_free_buffers(page); -} - -#ifdef CONFIG_QUOTA -#define QTYPE2NAME(t) ((t)==USRQUOTA?"user":"group") -#define QTYPE2MOPT(on, t) ((t)==USRQUOTA?((on)##USRJQUOTA):((on)##GRPJQUOTA)) - -static int ext3_write_dquot(struct dquot *dquot); -static int ext3_acquire_dquot(struct dquot *dquot); -static int ext3_release_dquot(struct dquot *dquot); -static int ext3_mark_dquot_dirty(struct dquot *dquot); -static int ext3_write_info(struct super_block *sb, int type); -static int ext3_quota_on(struct super_block *sb, int type, int format_id, - struct path *path); -static int ext3_quota_on_mount(struct super_block *sb, int type); -static ssize_t ext3_quota_read(struct super_block *sb, int type, char *data, - size_t len, loff_t off); -static ssize_t ext3_quota_write(struct super_block *sb, int type, - const char *data, size_t len, loff_t off); -static struct dquot **ext3_get_dquots(struct inode *inode) -{ - return EXT3_I(inode)->i_dquot; -} - -static const struct dquot_operations ext3_quota_operations = { - .write_dquot = ext3_write_dquot, - .acquire_dquot = ext3_acquire_dquot, - .release_dquot = ext3_release_dquot, - .mark_dirty = ext3_mark_dquot_dirty, - .write_info = ext3_write_info, - .alloc_dquot = dquot_alloc, - .destroy_dquot = dquot_destroy, -}; - -static const struct quotactl_ops ext3_qctl_operations = { - .quota_on = ext3_quota_on, - .quota_off = dquot_quota_off, - .quota_sync = dquot_quota_sync, - .get_state = dquot_get_state, - .set_info = dquot_set_dqinfo, - .get_dqblk = dquot_get_dqblk, - .set_dqblk = dquot_set_dqblk -}; -#endif - -static const struct super_operations ext3_sops = { - .alloc_inode = ext3_alloc_inode, - .destroy_inode = ext3_destroy_inode, - .write_inode = ext3_write_inode, - .dirty_inode = ext3_dirty_inode, - .drop_inode = ext3_drop_inode, - .evict_inode = ext3_evict_inode, - .put_super = ext3_put_super, - .sync_fs = ext3_sync_fs, - .freeze_fs = ext3_freeze, - .unfreeze_fs = ext3_unfreeze, - .statfs = ext3_statfs, - .remount_fs = ext3_remount, - .show_options = ext3_show_options, -#ifdef CONFIG_QUOTA - .quota_read = ext3_quota_read, - .quota_write = ext3_quota_write, - .get_dquots = ext3_get_dquots, -#endif - .bdev_try_to_free_page = bdev_try_to_free_page, -}; - -static const struct export_operations ext3_export_ops = { - .fh_to_dentry = ext3_fh_to_dentry, - .fh_to_parent = ext3_fh_to_parent, - .get_parent = ext3_get_parent, -}; - -enum { - Opt_bsd_df, Opt_minix_df, Opt_grpid, Opt_nogrpid, - Opt_resgid, Opt_resuid, Opt_sb, Opt_err_cont, Opt_err_panic, Opt_err_ro, - Opt_nouid32, Opt_nocheck, Opt_debug, Opt_oldalloc, Opt_orlov, - Opt_user_xattr, Opt_nouser_xattr, Opt_acl, Opt_noacl, - Opt_reservation, Opt_noreservation, Opt_noload, Opt_nobh, Opt_bh, - Opt_commit, Opt_journal_update, Opt_journal_inum, Opt_journal_dev, - Opt_journal_path, - Opt_abort, Opt_data_journal, Opt_data_ordered, Opt_data_writeback, - Opt_data_err_abort, Opt_data_err_ignore, - Opt_usrjquota, Opt_grpjquota, Opt_offusrjquota, Opt_offgrpjquota, - Opt_jqfmt_vfsold, Opt_jqfmt_vfsv0, Opt_jqfmt_vfsv1, Opt_quota, - Opt_noquota, Opt_ignore, Opt_barrier, Opt_nobarrier, Opt_err, - Opt_resize, Opt_usrquota, Opt_grpquota -}; - -static const match_table_t tokens = { - {Opt_bsd_df, "bsddf"}, - {Opt_minix_df, "minixdf"}, - {Opt_grpid, "grpid"}, - {Opt_grpid, "bsdgroups"}, - {Opt_nogrpid, "nogrpid"}, - {Opt_nogrpid, "sysvgroups"}, - {Opt_resgid, "resgid=%u"}, - {Opt_resuid, "resuid=%u"}, - {Opt_sb, "sb=%u"}, - {Opt_err_cont, "errors=continue"}, - {Opt_err_panic, "errors=panic"}, - {Opt_err_ro, "errors=remount-ro"}, - {Opt_nouid32, "nouid32"}, - {Opt_nocheck, "nocheck"}, - {Opt_nocheck, "check=none"}, - {Opt_debug, "debug"}, - {Opt_oldalloc, "oldalloc"}, - {Opt_orlov, "orlov"}, - {Opt_user_xattr, "user_xattr"}, - {Opt_nouser_xattr, "nouser_xattr"}, - {Opt_acl, "acl"}, - {Opt_noacl, "noacl"}, - {Opt_reservation, "reservation"}, - {Opt_noreservation, "noreservation"}, - {Opt_noload, "noload"}, - {Opt_noload, "norecovery"}, - {Opt_nobh, "nobh"}, - {Opt_bh, "bh"}, - {Opt_commit, "commit=%u"}, - {Opt_journal_update, "journal=update"}, - {Opt_journal_inum, "journal=%u"}, - {Opt_journal_dev, "journal_dev=%u"}, - {Opt_journal_path, "journal_path=%s"}, - {Opt_abort, "abort"}, - {Opt_data_journal, "data=journal"}, - {Opt_data_ordered, "data=ordered"}, - {Opt_data_writeback, "data=writeback"}, - {Opt_data_err_abort, "data_err=abort"}, - {Opt_data_err_ignore, "data_err=ignore"}, - {Opt_offusrjquota, "usrjquota="}, - {Opt_usrjquota, "usrjquota=%s"}, - {Opt_offgrpjquota, "grpjquota="}, - {Opt_grpjquota, "grpjquota=%s"}, - {Opt_jqfmt_vfsold, "jqfmt=vfsold"}, - {Opt_jqfmt_vfsv0, "jqfmt=vfsv0"}, - {Opt_jqfmt_vfsv1, "jqfmt=vfsv1"}, - {Opt_grpquota, "grpquota"}, - {Opt_noquota, "noquota"}, - {Opt_quota, "quota"}, - {Opt_usrquota, "usrquota"}, - {Opt_barrier, "barrier=%u"}, - {Opt_barrier, "barrier"}, - {Opt_nobarrier, "nobarrier"}, - {Opt_resize, "resize"}, - {Opt_err, NULL}, -}; - -static ext3_fsblk_t get_sb_block(void **data, struct super_block *sb) -{ - ext3_fsblk_t sb_block; - char *options = (char *) *data; - - if (!options || strncmp(options, "sb=", 3) != 0) - return 1; /* Default location */ - options += 3; - /*todo: use simple_strtoll with >32bit ext3 */ - sb_block = simple_strtoul(options, &options, 0); - if (*options && *options != ',') { - ext3_msg(sb, KERN_ERR, "error: invalid sb specification: %s", - (char *) *data); - return 1; - } - if (*options == ',') - options++; - *data = (void *) options; - return sb_block; -} - -#ifdef CONFIG_QUOTA -static int set_qf_name(struct super_block *sb, int qtype, substring_t *args) -{ - struct ext3_sb_info *sbi = EXT3_SB(sb); - char *qname; - - if (sb_any_quota_loaded(sb) && - !sbi->s_qf_names[qtype]) { - ext3_msg(sb, KERN_ERR, - "Cannot change journaled " - "quota options when quota turned on"); - return 0; - } - qname = match_strdup(args); - if (!qname) { - ext3_msg(sb, KERN_ERR, - "Not enough memory for storing quotafile name"); - return 0; - } - if (sbi->s_qf_names[qtype]) { - int same = !strcmp(sbi->s_qf_names[qtype], qname); - - kfree(qname); - if (!same) { - ext3_msg(sb, KERN_ERR, - "%s quota file already specified", - QTYPE2NAME(qtype)); - } - return same; - } - if (strchr(qname, '/')) { - ext3_msg(sb, KERN_ERR, - "quotafile must be on filesystem root"); - kfree(qname); - return 0; - } - sbi->s_qf_names[qtype] = qname; - set_opt(sbi->s_mount_opt, QUOTA); - return 1; -} - -static int clear_qf_name(struct super_block *sb, int qtype) { - - struct ext3_sb_info *sbi = EXT3_SB(sb); - - if (sb_any_quota_loaded(sb) && - sbi->s_qf_names[qtype]) { - ext3_msg(sb, KERN_ERR, "Cannot change journaled quota options" - " when quota turned on"); - return 0; - } - if (sbi->s_qf_names[qtype]) { - kfree(sbi->s_qf_names[qtype]); - sbi->s_qf_names[qtype] = NULL; - } - return 1; -} -#endif - -static int parse_options (char *options, struct super_block *sb, - unsigned int *inum, unsigned long *journal_devnum, - ext3_fsblk_t *n_blocks_count, int is_remount) -{ - struct ext3_sb_info *sbi = EXT3_SB(sb); - char * p; - substring_t args[MAX_OPT_ARGS]; - int data_opt = 0; - int option; - kuid_t uid; - kgid_t gid; - char *journal_path; - struct inode *journal_inode; - struct path path; - int error; - -#ifdef CONFIG_QUOTA - int qfmt; -#endif - - if (!options) - return 1; - - while ((p = strsep (&options, ",")) != NULL) { - int token; - if (!*p) - continue; - /* - * Initialize args struct so we know whether arg was - * found; some options take optional arguments. - */ - args[0].to = args[0].from = NULL; - token = match_token(p, tokens, args); - switch (token) { - case Opt_bsd_df: - clear_opt (sbi->s_mount_opt, MINIX_DF); - break; - case Opt_minix_df: - set_opt (sbi->s_mount_opt, MINIX_DF); - break; - case Opt_grpid: - set_opt (sbi->s_mount_opt, GRPID); - break; - case Opt_nogrpid: - clear_opt (sbi->s_mount_opt, GRPID); - break; - case Opt_resuid: - if (match_int(&args[0], &option)) - return 0; - uid = make_kuid(current_user_ns(), option); - if (!uid_valid(uid)) { - ext3_msg(sb, KERN_ERR, "Invalid uid value %d", option); - return 0; - - } - sbi->s_resuid = uid; - break; - case Opt_resgid: - if (match_int(&args[0], &option)) - return 0; - gid = make_kgid(current_user_ns(), option); - if (!gid_valid(gid)) { - ext3_msg(sb, KERN_ERR, "Invalid gid value %d", option); - return 0; - } - sbi->s_resgid = gid; - break; - case Opt_sb: - /* handled by get_sb_block() instead of here */ - /* *sb_block = match_int(&args[0]); */ - break; - case Opt_err_panic: - clear_opt (sbi->s_mount_opt, ERRORS_CONT); - clear_opt (sbi->s_mount_opt, ERRORS_RO); - set_opt (sbi->s_mount_opt, ERRORS_PANIC); - break; - case Opt_err_ro: - clear_opt (sbi->s_mount_opt, ERRORS_CONT); - clear_opt (sbi->s_mount_opt, ERRORS_PANIC); - set_opt (sbi->s_mount_opt, ERRORS_RO); - break; - case Opt_err_cont: - clear_opt (sbi->s_mount_opt, ERRORS_RO); - clear_opt (sbi->s_mount_opt, ERRORS_PANIC); - set_opt (sbi->s_mount_opt, ERRORS_CONT); - break; - case Opt_nouid32: - set_opt (sbi->s_mount_opt, NO_UID32); - break; - case Opt_nocheck: - clear_opt (sbi->s_mount_opt, CHECK); - break; - case Opt_debug: - set_opt (sbi->s_mount_opt, DEBUG); - break; - case Opt_oldalloc: - ext3_msg(sb, KERN_WARNING, - "Ignoring deprecated oldalloc option"); - break; - case Opt_orlov: - ext3_msg(sb, KERN_WARNING, - "Ignoring deprecated orlov option"); - break; -#ifdef CONFIG_EXT3_FS_XATTR - case Opt_user_xattr: - set_opt (sbi->s_mount_opt, XATTR_USER); - break; - case Opt_nouser_xattr: - clear_opt (sbi->s_mount_opt, XATTR_USER); - break; -#else - case Opt_user_xattr: - case Opt_nouser_xattr: - ext3_msg(sb, KERN_INFO, - "(no)user_xattr options not supported"); - break; -#endif -#ifdef CONFIG_EXT3_FS_POSIX_ACL - case Opt_acl: - set_opt(sbi->s_mount_opt, POSIX_ACL); - break; - case Opt_noacl: - clear_opt(sbi->s_mount_opt, POSIX_ACL); - break; -#else - case Opt_acl: - case Opt_noacl: - ext3_msg(sb, KERN_INFO, - "(no)acl options not supported"); - break; -#endif - case Opt_reservation: - set_opt(sbi->s_mount_opt, RESERVATION); - break; - case Opt_noreservation: - clear_opt(sbi->s_mount_opt, RESERVATION); - break; - case Opt_journal_update: - /* @@@ FIXME */ - /* Eventually we will want to be able to create - a journal file here. For now, only allow the - user to specify an existing inode to be the - journal file. */ - if (is_remount) { - ext3_msg(sb, KERN_ERR, "error: cannot specify " - "journal on remount"); - return 0; - } - set_opt (sbi->s_mount_opt, UPDATE_JOURNAL); - break; - case Opt_journal_inum: - if (is_remount) { - ext3_msg(sb, KERN_ERR, "error: cannot specify " - "journal on remount"); - return 0; - } - if (match_int(&args[0], &option)) - return 0; - *inum = option; - break; - case Opt_journal_dev: - if (is_remount) { - ext3_msg(sb, KERN_ERR, "error: cannot specify " - "journal on remount"); - return 0; - } - if (match_int(&args[0], &option)) - return 0; - *journal_devnum = option; - break; - case Opt_journal_path: - if (is_remount) { - ext3_msg(sb, KERN_ERR, "error: cannot specify " - "journal on remount"); - return 0; - } - - journal_path = match_strdup(&args[0]); - if (!journal_path) { - ext3_msg(sb, KERN_ERR, "error: could not dup " - "journal device string"); - return 0; - } - - error = kern_path(journal_path, LOOKUP_FOLLOW, &path); - if (error) { - ext3_msg(sb, KERN_ERR, "error: could not find " - "journal device path: error %d", error); - kfree(journal_path); - return 0; - } - - journal_inode = d_inode(path.dentry); - if (!S_ISBLK(journal_inode->i_mode)) { - ext3_msg(sb, KERN_ERR, "error: journal path %s " - "is not a block device", journal_path); - path_put(&path); - kfree(journal_path); - return 0; - } - - *journal_devnum = new_encode_dev(journal_inode->i_rdev); - path_put(&path); - kfree(journal_path); - break; - case Opt_noload: - set_opt (sbi->s_mount_opt, NOLOAD); - break; - case Opt_commit: - if (match_int(&args[0], &option)) - return 0; - if (option < 0) - return 0; - if (option == 0) - option = JBD_DEFAULT_MAX_COMMIT_AGE; - sbi->s_commit_interval = HZ * option; - break; - case Opt_data_journal: - data_opt = EXT3_MOUNT_JOURNAL_DATA; - goto datacheck; - case Opt_data_ordered: - data_opt = EXT3_MOUNT_ORDERED_DATA; - goto datacheck; - case Opt_data_writeback: - data_opt = EXT3_MOUNT_WRITEBACK_DATA; - datacheck: - if (is_remount) { - if (test_opt(sb, DATA_FLAGS) == data_opt) - break; - ext3_msg(sb, KERN_ERR, - "error: cannot change " - "data mode on remount. The filesystem " - "is mounted in data=%s mode and you " - "try to remount it in data=%s mode.", - data_mode_string(test_opt(sb, - DATA_FLAGS)), - data_mode_string(data_opt)); - return 0; - } else { - clear_opt(sbi->s_mount_opt, DATA_FLAGS); - sbi->s_mount_opt |= data_opt; - } - break; - case Opt_data_err_abort: - set_opt(sbi->s_mount_opt, DATA_ERR_ABORT); - break; - case Opt_data_err_ignore: - clear_opt(sbi->s_mount_opt, DATA_ERR_ABORT); - break; -#ifdef CONFIG_QUOTA - case Opt_usrjquota: - if (!set_qf_name(sb, USRQUOTA, &args[0])) - return 0; - break; - case Opt_grpjquota: - if (!set_qf_name(sb, GRPQUOTA, &args[0])) - return 0; - break; - case Opt_offusrjquota: - if (!clear_qf_name(sb, USRQUOTA)) - return 0; - break; - case Opt_offgrpjquota: - if (!clear_qf_name(sb, GRPQUOTA)) - return 0; - break; - case Opt_jqfmt_vfsold: - qfmt = QFMT_VFS_OLD; - goto set_qf_format; - case Opt_jqfmt_vfsv0: - qfmt = QFMT_VFS_V0; - goto set_qf_format; - case Opt_jqfmt_vfsv1: - qfmt = QFMT_VFS_V1; -set_qf_format: - if (sb_any_quota_loaded(sb) && - sbi->s_jquota_fmt != qfmt) { - ext3_msg(sb, KERN_ERR, "error: cannot change " - "journaled quota options when " - "quota turned on."); - return 0; - } - sbi->s_jquota_fmt = qfmt; - break; - case Opt_quota: - case Opt_usrquota: - set_opt(sbi->s_mount_opt, QUOTA); - set_opt(sbi->s_mount_opt, USRQUOTA); - break; - case Opt_grpquota: - set_opt(sbi->s_mount_opt, QUOTA); - set_opt(sbi->s_mount_opt, GRPQUOTA); - break; - case Opt_noquota: - if (sb_any_quota_loaded(sb)) { - ext3_msg(sb, KERN_ERR, "error: cannot change " - "quota options when quota turned on."); - return 0; - } - clear_opt(sbi->s_mount_opt, QUOTA); - clear_opt(sbi->s_mount_opt, USRQUOTA); - clear_opt(sbi->s_mount_opt, GRPQUOTA); - break; -#else - case Opt_quota: - case Opt_usrquota: - case Opt_grpquota: - ext3_msg(sb, KERN_ERR, - "error: quota options not supported."); - break; - case Opt_usrjquota: - case Opt_grpjquota: - case Opt_offusrjquota: - case Opt_offgrpjquota: - case Opt_jqfmt_vfsold: - case Opt_jqfmt_vfsv0: - case Opt_jqfmt_vfsv1: - ext3_msg(sb, KERN_ERR, - "error: journaled quota options not " - "supported."); - break; - case Opt_noquota: - break; -#endif - case Opt_abort: - set_opt(sbi->s_mount_opt, ABORT); - break; - case Opt_nobarrier: - clear_opt(sbi->s_mount_opt, BARRIER); - break; - case Opt_barrier: - if (args[0].from) { - if (match_int(&args[0], &option)) - return 0; - } else - option = 1; /* No argument, default to 1 */ - if (option) - set_opt(sbi->s_mount_opt, BARRIER); - else - clear_opt(sbi->s_mount_opt, BARRIER); - break; - case Opt_ignore: - break; - case Opt_resize: - if (!is_remount) { - ext3_msg(sb, KERN_ERR, - "error: resize option only available " - "for remount"); - return 0; - } - if (match_int(&args[0], &option) != 0) - return 0; - *n_blocks_count = option; - break; - case Opt_nobh: - ext3_msg(sb, KERN_WARNING, - "warning: ignoring deprecated nobh option"); - break; - case Opt_bh: - ext3_msg(sb, KERN_WARNING, - "warning: ignoring deprecated bh option"); - break; - default: - ext3_msg(sb, KERN_ERR, - "error: unrecognized mount option \"%s\" " - "or missing value", p); - return 0; - } - } -#ifdef CONFIG_QUOTA - if (sbi->s_qf_names[USRQUOTA] || sbi->s_qf_names[GRPQUOTA]) { - if (test_opt(sb, USRQUOTA) && sbi->s_qf_names[USRQUOTA]) - clear_opt(sbi->s_mount_opt, USRQUOTA); - if (test_opt(sb, GRPQUOTA) && sbi->s_qf_names[GRPQUOTA]) - clear_opt(sbi->s_mount_opt, GRPQUOTA); - - if (test_opt(sb, GRPQUOTA) || test_opt(sb, USRQUOTA)) { - ext3_msg(sb, KERN_ERR, "error: old and new quota " - "format mixing."); - return 0; - } - - if (!sbi->s_jquota_fmt) { - ext3_msg(sb, KERN_ERR, "error: journaled quota format " - "not specified."); - return 0; - } - } -#endif - return 1; -} - -static int ext3_setup_super(struct super_block *sb, struct ext3_super_block *es, - int read_only) -{ - struct ext3_sb_info *sbi = EXT3_SB(sb); - int res = 0; - - if (le32_to_cpu(es->s_rev_level) > EXT3_MAX_SUPP_REV) { - ext3_msg(sb, KERN_ERR, - "error: revision level too high, " - "forcing read-only mode"); - res = MS_RDONLY; - } - if (read_only) - return res; - if (!(sbi->s_mount_state & EXT3_VALID_FS)) - ext3_msg(sb, KERN_WARNING, - "warning: mounting unchecked fs, " - "running e2fsck is recommended"); - else if ((sbi->s_mount_state & EXT3_ERROR_FS)) - ext3_msg(sb, KERN_WARNING, - "warning: mounting fs with errors, " - "running e2fsck is recommended"); - else if ((__s16) le16_to_cpu(es->s_max_mnt_count) > 0 && - le16_to_cpu(es->s_mnt_count) >= - le16_to_cpu(es->s_max_mnt_count)) - ext3_msg(sb, KERN_WARNING, - "warning: maximal mount count reached, " - "running e2fsck is recommended"); - else if (le32_to_cpu(es->s_checkinterval) && - (le32_to_cpu(es->s_lastcheck) + - le32_to_cpu(es->s_checkinterval) <= get_seconds())) - ext3_msg(sb, KERN_WARNING, - "warning: checktime reached, " - "running e2fsck is recommended"); -#if 0 - /* @@@ We _will_ want to clear the valid bit if we find - inconsistencies, to force a fsck at reboot. But for - a plain journaled filesystem we can keep it set as - valid forever! :) */ - es->s_state &= cpu_to_le16(~EXT3_VALID_FS); -#endif - if (!le16_to_cpu(es->s_max_mnt_count)) - es->s_max_mnt_count = cpu_to_le16(EXT3_DFL_MAX_MNT_COUNT); - le16_add_cpu(&es->s_mnt_count, 1); - es->s_mtime = cpu_to_le32(get_seconds()); - ext3_update_dynamic_rev(sb); - EXT3_SET_INCOMPAT_FEATURE(sb, EXT3_FEATURE_INCOMPAT_RECOVER); - - ext3_commit_super(sb, es, 1); - if (test_opt(sb, DEBUG)) - ext3_msg(sb, KERN_INFO, "[bs=%lu, gc=%lu, " - "bpg=%lu, ipg=%lu, mo=%04lx]", - sb->s_blocksize, - sbi->s_groups_count, - EXT3_BLOCKS_PER_GROUP(sb), - EXT3_INODES_PER_GROUP(sb), - sbi->s_mount_opt); - - if (EXT3_SB(sb)->s_journal->j_inode == NULL) { - char b[BDEVNAME_SIZE]; - ext3_msg(sb, KERN_INFO, "using external journal on %s", - bdevname(EXT3_SB(sb)->s_journal->j_dev, b)); - } else { - ext3_msg(sb, KERN_INFO, "using internal journal"); - } - cleancache_init_fs(sb); - return res; -} - -/* Called at mount-time, super-block is locked */ -static int ext3_check_descriptors(struct super_block *sb) -{ - struct ext3_sb_info *sbi = EXT3_SB(sb); - int i; - - ext3_debug ("Checking group descriptors"); - - for (i = 0; i < sbi->s_groups_count; i++) { - struct ext3_group_desc *gdp = ext3_get_group_desc(sb, i, NULL); - ext3_fsblk_t first_block = ext3_group_first_block_no(sb, i); - ext3_fsblk_t last_block; - - if (i == sbi->s_groups_count - 1) - last_block = le32_to_cpu(sbi->s_es->s_blocks_count) - 1; - else - last_block = first_block + - (EXT3_BLOCKS_PER_GROUP(sb) - 1); - - if (le32_to_cpu(gdp->bg_block_bitmap) < first_block || - le32_to_cpu(gdp->bg_block_bitmap) > last_block) - { - ext3_error (sb, "ext3_check_descriptors", - "Block bitmap for group %d" - " not in group (block %lu)!", - i, (unsigned long) - le32_to_cpu(gdp->bg_block_bitmap)); - return 0; - } - if (le32_to_cpu(gdp->bg_inode_bitmap) < first_block || - le32_to_cpu(gdp->bg_inode_bitmap) > last_block) - { - ext3_error (sb, "ext3_check_descriptors", - "Inode bitmap for group %d" - " not in group (block %lu)!", - i, (unsigned long) - le32_to_cpu(gdp->bg_inode_bitmap)); - return 0; - } - if (le32_to_cpu(gdp->bg_inode_table) < first_block || - le32_to_cpu(gdp->bg_inode_table) + sbi->s_itb_per_group - 1 > - last_block) - { - ext3_error (sb, "ext3_check_descriptors", - "Inode table for group %d" - " not in group (block %lu)!", - i, (unsigned long) - le32_to_cpu(gdp->bg_inode_table)); - return 0; - } - } - - sbi->s_es->s_free_blocks_count=cpu_to_le32(ext3_count_free_blocks(sb)); - sbi->s_es->s_free_inodes_count=cpu_to_le32(ext3_count_free_inodes(sb)); - return 1; -} - - -/* ext3_orphan_cleanup() walks a singly-linked list of inodes (starting at - * the superblock) which were deleted from all directories, but held open by - * a process at the time of a crash. We walk the list and try to delete these - * inodes at recovery time (only with a read-write filesystem). - * - * In order to keep the orphan inode chain consistent during traversal (in - * case of crash during recovery), we link each inode into the superblock - * orphan list_head and handle it the same way as an inode deletion during - * normal operation (which journals the operations for us). - * - * We only do an iget() and an iput() on each inode, which is very safe if we - * accidentally point at an in-use or already deleted inode. The worst that - * can happen in this case is that we get a "bit already cleared" message from - * ext3_free_inode(). The only reason we would point at a wrong inode is if - * e2fsck was run on this filesystem, and it must have already done the orphan - * inode cleanup for us, so we can safely abort without any further action. - */ -static void ext3_orphan_cleanup (struct super_block * sb, - struct ext3_super_block * es) -{ - unsigned int s_flags = sb->s_flags; - int nr_orphans = 0, nr_truncates = 0; -#ifdef CONFIG_QUOTA - int i; -#endif - if (!es->s_last_orphan) { - jbd_debug(4, "no orphan inodes to clean up\n"); - return; - } - - if (bdev_read_only(sb->s_bdev)) { - ext3_msg(sb, KERN_ERR, "error: write access " - "unavailable, skipping orphan cleanup."); - return; - } - - /* Check if feature set allows readwrite operations */ - if (EXT3_HAS_RO_COMPAT_FEATURE(sb, ~EXT3_FEATURE_RO_COMPAT_SUPP)) { - ext3_msg(sb, KERN_INFO, "Skipping orphan cleanup due to " - "unknown ROCOMPAT features"); - return; - } - - if (EXT3_SB(sb)->s_mount_state & EXT3_ERROR_FS) { - /* don't clear list on RO mount w/ errors */ - if (es->s_last_orphan && !(s_flags & MS_RDONLY)) { - jbd_debug(1, "Errors on filesystem, " - "clearing orphan list.\n"); - es->s_last_orphan = 0; - } - jbd_debug(1, "Skipping orphan recovery on fs with errors.\n"); - return; - } - - if (s_flags & MS_RDONLY) { - ext3_msg(sb, KERN_INFO, "orphan cleanup on readonly fs"); - sb->s_flags &= ~MS_RDONLY; - } -#ifdef CONFIG_QUOTA - /* Needed for iput() to work correctly and not trash data */ - sb->s_flags |= MS_ACTIVE; - /* Turn on quotas so that they are updated correctly */ - for (i = 0; i < EXT3_MAXQUOTAS; i++) { - if (EXT3_SB(sb)->s_qf_names[i]) { - int ret = ext3_quota_on_mount(sb, i); - if (ret < 0) - ext3_msg(sb, KERN_ERR, - "error: cannot turn on journaled " - "quota: %d", ret); - } - } -#endif - - while (es->s_last_orphan) { - struct inode *inode; - - inode = ext3_orphan_get(sb, le32_to_cpu(es->s_last_orphan)); - if (IS_ERR(inode)) { - es->s_last_orphan = 0; - break; - } - - list_add(&EXT3_I(inode)->i_orphan, &EXT3_SB(sb)->s_orphan); - dquot_initialize(inode); - if (inode->i_nlink) { - printk(KERN_DEBUG - "%s: truncating inode %lu to %Ld bytes\n", - __func__, inode->i_ino, inode->i_size); - jbd_debug(2, "truncating inode %lu to %Ld bytes\n", - inode->i_ino, inode->i_size); - ext3_truncate(inode); - nr_truncates++; - } else { - printk(KERN_DEBUG - "%s: deleting unreferenced inode %lu\n", - __func__, inode->i_ino); - jbd_debug(2, "deleting unreferenced inode %lu\n", - inode->i_ino); - nr_orphans++; - } - iput(inode); /* The delete magic happens here! */ - } - -#define PLURAL(x) (x), ((x)==1) ? "" : "s" - - if (nr_orphans) - ext3_msg(sb, KERN_INFO, "%d orphan inode%s deleted", - PLURAL(nr_orphans)); - if (nr_truncates) - ext3_msg(sb, KERN_INFO, "%d truncate%s cleaned up", - PLURAL(nr_truncates)); -#ifdef CONFIG_QUOTA - /* Turn quotas off */ - for (i = 0; i < EXT3_MAXQUOTAS; i++) { - if (sb_dqopt(sb)->files[i]) - dquot_quota_off(sb, i); - } -#endif - sb->s_flags = s_flags; /* Restore MS_RDONLY status */ -} - -/* - * Maximal file size. There is a direct, and {,double-,triple-}indirect - * block limit, and also a limit of (2^32 - 1) 512-byte sectors in i_blocks. - * We need to be 1 filesystem block less than the 2^32 sector limit. - */ -static loff_t ext3_max_size(int bits) -{ - loff_t res = EXT3_NDIR_BLOCKS; - int meta_blocks; - loff_t upper_limit; - - /* This is calculated to be the largest file size for a - * dense, file such that the total number of - * sectors in the file, including data and all indirect blocks, - * does not exceed 2^32 -1 - * __u32 i_blocks representing the total number of - * 512 bytes blocks of the file - */ - upper_limit = (1LL << 32) - 1; - - /* total blocks in file system block size */ - upper_limit >>= (bits - 9); - - - /* indirect blocks */ - meta_blocks = 1; - /* double indirect blocks */ - meta_blocks += 1 + (1LL << (bits-2)); - /* tripple indirect blocks */ - meta_blocks += 1 + (1LL << (bits-2)) + (1LL << (2*(bits-2))); - - upper_limit -= meta_blocks; - upper_limit <<= bits; - - res += 1LL << (bits-2); - res += 1LL << (2*(bits-2)); - res += 1LL << (3*(bits-2)); - res <<= bits; - if (res > upper_limit) - res = upper_limit; - - if (res > MAX_LFS_FILESIZE) - res = MAX_LFS_FILESIZE; - - return res; -} - -static ext3_fsblk_t descriptor_loc(struct super_block *sb, - ext3_fsblk_t logic_sb_block, - int nr) -{ - struct ext3_sb_info *sbi = EXT3_SB(sb); - unsigned long bg, first_meta_bg; - int has_super = 0; - - first_meta_bg = le32_to_cpu(sbi->s_es->s_first_meta_bg); - - if (!EXT3_HAS_INCOMPAT_FEATURE(sb, EXT3_FEATURE_INCOMPAT_META_BG) || - nr < first_meta_bg) - return (logic_sb_block + nr + 1); - bg = sbi->s_desc_per_block * nr; - if (ext3_bg_has_super(sb, bg)) - has_super = 1; - return (has_super + ext3_group_first_block_no(sb, bg)); -} - - -static int ext3_fill_super (struct super_block *sb, void *data, int silent) -{ - struct buffer_head * bh; - struct ext3_super_block *es = NULL; - struct ext3_sb_info *sbi; - ext3_fsblk_t block; - ext3_fsblk_t sb_block = get_sb_block(&data, sb); - ext3_fsblk_t logic_sb_block; - unsigned long offset = 0; - unsigned int journal_inum = 0; - unsigned long journal_devnum = 0; - unsigned long def_mount_opts; - struct inode *root; - int blocksize; - int hblock; - int db_count; - int i; - int needs_recovery; - int ret = -EINVAL; - __le32 features; - int err; - - sbi = kzalloc(sizeof(*sbi), GFP_KERNEL); - if (!sbi) - return -ENOMEM; - - sbi->s_blockgroup_lock = - kzalloc(sizeof(struct blockgroup_lock), GFP_KERNEL); - if (!sbi->s_blockgroup_lock) { - kfree(sbi); - return -ENOMEM; - } - sb->s_fs_info = sbi; - sbi->s_sb_block = sb_block; - - blocksize = sb_min_blocksize(sb, EXT3_MIN_BLOCK_SIZE); - if (!blocksize) { - ext3_msg(sb, KERN_ERR, "error: unable to set blocksize"); - goto out_fail; - } - - /* - * The ext3 superblock will not be buffer aligned for other than 1kB - * block sizes. We need to calculate the offset from buffer start. - */ - if (blocksize != EXT3_MIN_BLOCK_SIZE) { - logic_sb_block = (sb_block * EXT3_MIN_BLOCK_SIZE) / blocksize; - offset = (sb_block * EXT3_MIN_BLOCK_SIZE) % blocksize; - } else { - logic_sb_block = sb_block; - } - - if (!(bh = sb_bread(sb, logic_sb_block))) { - ext3_msg(sb, KERN_ERR, "error: unable to read superblock"); - goto out_fail; - } - /* - * Note: s_es must be initialized as soon as possible because - * some ext3 macro-instructions depend on its value - */ - es = (struct ext3_super_block *) (bh->b_data + offset); - sbi->s_es = es; - sb->s_magic = le16_to_cpu(es->s_magic); - if (sb->s_magic != EXT3_SUPER_MAGIC) - goto cantfind_ext3; - - /* Set defaults before we parse the mount options */ - def_mount_opts = le32_to_cpu(es->s_default_mount_opts); - if (def_mount_opts & EXT3_DEFM_DEBUG) - set_opt(sbi->s_mount_opt, DEBUG); - if (def_mount_opts & EXT3_DEFM_BSDGROUPS) - set_opt(sbi->s_mount_opt, GRPID); - if (def_mount_opts & EXT3_DEFM_UID16) - set_opt(sbi->s_mount_opt, NO_UID32); -#ifdef CONFIG_EXT3_FS_XATTR - if (def_mount_opts & EXT3_DEFM_XATTR_USER) - set_opt(sbi->s_mount_opt, XATTR_USER); -#endif -#ifdef CONFIG_EXT3_FS_POSIX_ACL - if (def_mount_opts & EXT3_DEFM_ACL) - set_opt(sbi->s_mount_opt, POSIX_ACL); -#endif - if ((def_mount_opts & EXT3_DEFM_JMODE) == EXT3_DEFM_JMODE_DATA) - set_opt(sbi->s_mount_opt, JOURNAL_DATA); - else if ((def_mount_opts & EXT3_DEFM_JMODE) == EXT3_DEFM_JMODE_ORDERED) - set_opt(sbi->s_mount_opt, ORDERED_DATA); - else if ((def_mount_opts & EXT3_DEFM_JMODE) == EXT3_DEFM_JMODE_WBACK) - set_opt(sbi->s_mount_opt, WRITEBACK_DATA); - - if (le16_to_cpu(sbi->s_es->s_errors) == EXT3_ERRORS_PANIC) - set_opt(sbi->s_mount_opt, ERRORS_PANIC); - else if (le16_to_cpu(sbi->s_es->s_errors) == EXT3_ERRORS_CONTINUE) - set_opt(sbi->s_mount_opt, ERRORS_CONT); - else - set_opt(sbi->s_mount_opt, ERRORS_RO); - - sbi->s_resuid = make_kuid(&init_user_ns, le16_to_cpu(es->s_def_resuid)); - sbi->s_resgid = make_kgid(&init_user_ns, le16_to_cpu(es->s_def_resgid)); - - /* enable barriers by default */ - set_opt(sbi->s_mount_opt, BARRIER); - set_opt(sbi->s_mount_opt, RESERVATION); - - if (!parse_options ((char *) data, sb, &journal_inum, &journal_devnum, - NULL, 0)) - goto failed_mount; - - sb->s_flags = (sb->s_flags & ~MS_POSIXACL) | - (test_opt(sb, POSIX_ACL) ? MS_POSIXACL : 0); - - if (le32_to_cpu(es->s_rev_level) == EXT3_GOOD_OLD_REV && - (EXT3_HAS_COMPAT_FEATURE(sb, ~0U) || - EXT3_HAS_RO_COMPAT_FEATURE(sb, ~0U) || - EXT3_HAS_INCOMPAT_FEATURE(sb, ~0U))) - ext3_msg(sb, KERN_WARNING, - "warning: feature flags set on rev 0 fs, " - "running e2fsck is recommended"); - /* - * Check feature flags regardless of the revision level, since we - * previously didn't change the revision level when setting the flags, - * so there is a chance incompat flags are set on a rev 0 filesystem. - */ - features = EXT3_HAS_INCOMPAT_FEATURE(sb, ~EXT3_FEATURE_INCOMPAT_SUPP); - if (features) { - ext3_msg(sb, KERN_ERR, - "error: couldn't mount because of unsupported " - "optional features (%x)", le32_to_cpu(features)); - goto failed_mount; - } - features = EXT3_HAS_RO_COMPAT_FEATURE(sb, ~EXT3_FEATURE_RO_COMPAT_SUPP); - if (!(sb->s_flags & MS_RDONLY) && features) { - ext3_msg(sb, KERN_ERR, - "error: couldn't mount RDWR because of unsupported " - "optional features (%x)", le32_to_cpu(features)); - goto failed_mount; - } - blocksize = BLOCK_SIZE << le32_to_cpu(es->s_log_block_size); - - if (blocksize < EXT3_MIN_BLOCK_SIZE || - blocksize > EXT3_MAX_BLOCK_SIZE) { - ext3_msg(sb, KERN_ERR, - "error: couldn't mount because of unsupported " - "filesystem blocksize %d", blocksize); - goto failed_mount; - } - - hblock = bdev_logical_block_size(sb->s_bdev); - if (sb->s_blocksize != blocksize) { - /* - * Make sure the blocksize for the filesystem is larger - * than the hardware sectorsize for the machine. - */ - if (blocksize < hblock) { - ext3_msg(sb, KERN_ERR, - "error: fsblocksize %d too small for " - "hardware sectorsize %d", blocksize, hblock); - goto failed_mount; - } - - brelse (bh); - if (!sb_set_blocksize(sb, blocksize)) { - ext3_msg(sb, KERN_ERR, - "error: bad blocksize %d", blocksize); - goto out_fail; - } - logic_sb_block = (sb_block * EXT3_MIN_BLOCK_SIZE) / blocksize; - offset = (sb_block * EXT3_MIN_BLOCK_SIZE) % blocksize; - bh = sb_bread(sb, logic_sb_block); - if (!bh) { - ext3_msg(sb, KERN_ERR, - "error: can't read superblock on 2nd try"); - goto failed_mount; - } - es = (struct ext3_super_block *)(bh->b_data + offset); - sbi->s_es = es; - if (es->s_magic != cpu_to_le16(EXT3_SUPER_MAGIC)) { - ext3_msg(sb, KERN_ERR, - "error: magic mismatch"); - goto failed_mount; - } - } - - sb->s_maxbytes = ext3_max_size(sb->s_blocksize_bits); - - if (le32_to_cpu(es->s_rev_level) == EXT3_GOOD_OLD_REV) { - sbi->s_inode_size = EXT3_GOOD_OLD_INODE_SIZE; - sbi->s_first_ino = EXT3_GOOD_OLD_FIRST_INO; - } else { - sbi->s_inode_size = le16_to_cpu(es->s_inode_size); - sbi->s_first_ino = le32_to_cpu(es->s_first_ino); - if ((sbi->s_inode_size < EXT3_GOOD_OLD_INODE_SIZE) || - (!is_power_of_2(sbi->s_inode_size)) || - (sbi->s_inode_size > blocksize)) { - ext3_msg(sb, KERN_ERR, - "error: unsupported inode size: %d", - sbi->s_inode_size); - goto failed_mount; - } - } - sbi->s_frag_size = EXT3_MIN_FRAG_SIZE << - le32_to_cpu(es->s_log_frag_size); - if (blocksize != sbi->s_frag_size) { - ext3_msg(sb, KERN_ERR, - "error: fragsize %lu != blocksize %u (unsupported)", - sbi->s_frag_size, blocksize); - goto failed_mount; - } - sbi->s_frags_per_block = 1; - sbi->s_blocks_per_group = le32_to_cpu(es->s_blocks_per_group); - sbi->s_frags_per_group = le32_to_cpu(es->s_frags_per_group); - sbi->s_inodes_per_group = le32_to_cpu(es->s_inodes_per_group); - if (EXT3_INODE_SIZE(sb) == 0 || EXT3_INODES_PER_GROUP(sb) == 0) - goto cantfind_ext3; - sbi->s_inodes_per_block = blocksize / EXT3_INODE_SIZE(sb); - if (sbi->s_inodes_per_block == 0) - goto cantfind_ext3; - sbi->s_itb_per_group = sbi->s_inodes_per_group / - sbi->s_inodes_per_block; - sbi->s_desc_per_block = blocksize / sizeof(struct ext3_group_desc); - sbi->s_sbh = bh; - sbi->s_mount_state = le16_to_cpu(es->s_state); - sbi->s_addr_per_block_bits = ilog2(EXT3_ADDR_PER_BLOCK(sb)); - sbi->s_desc_per_block_bits = ilog2(EXT3_DESC_PER_BLOCK(sb)); - for (i = 0; i < 4; i++) - sbi->s_hash_seed[i] = le32_to_cpu(es->s_hash_seed[i]); - sbi->s_def_hash_version = es->s_def_hash_version; - i = le32_to_cpu(es->s_flags); - if (i & EXT2_FLAGS_UNSIGNED_HASH) - sbi->s_hash_unsigned = 3; - else if ((i & EXT2_FLAGS_SIGNED_HASH) == 0) { -#ifdef __CHAR_UNSIGNED__ - es->s_flags |= cpu_to_le32(EXT2_FLAGS_UNSIGNED_HASH); - sbi->s_hash_unsigned = 3; -#else - es->s_flags |= cpu_to_le32(EXT2_FLAGS_SIGNED_HASH); -#endif - } - - if (sbi->s_blocks_per_group > blocksize * 8) { - ext3_msg(sb, KERN_ERR, - "#blocks per group too big: %lu", - sbi->s_blocks_per_group); - goto failed_mount; - } - if (sbi->s_frags_per_group > blocksize * 8) { - ext3_msg(sb, KERN_ERR, - "error: #fragments per group too big: %lu", - sbi->s_frags_per_group); - goto failed_mount; - } - if (sbi->s_inodes_per_group > blocksize * 8) { - ext3_msg(sb, KERN_ERR, - "error: #inodes per group too big: %lu", - sbi->s_inodes_per_group); - goto failed_mount; - } - - err = generic_check_addressable(sb->s_blocksize_bits, - le32_to_cpu(es->s_blocks_count)); - if (err) { - ext3_msg(sb, KERN_ERR, - "error: filesystem is too large to mount safely"); - if (sizeof(sector_t) < 8) - ext3_msg(sb, KERN_ERR, - "error: CONFIG_LBDAF not enabled"); - ret = err; - goto failed_mount; - } - - if (EXT3_BLOCKS_PER_GROUP(sb) == 0) - goto cantfind_ext3; - sbi->s_groups_count = ((le32_to_cpu(es->s_blocks_count) - - le32_to_cpu(es->s_first_data_block) - 1) - / EXT3_BLOCKS_PER_GROUP(sb)) + 1; - db_count = DIV_ROUND_UP(sbi->s_groups_count, EXT3_DESC_PER_BLOCK(sb)); - sbi->s_group_desc = kmalloc(db_count * sizeof (struct buffer_head *), - GFP_KERNEL); - if (sbi->s_group_desc == NULL) { - ext3_msg(sb, KERN_ERR, - "error: not enough memory"); - ret = -ENOMEM; - goto failed_mount; - } - - bgl_lock_init(sbi->s_blockgroup_lock); - - for (i = 0; i < db_count; i++) { - block = descriptor_loc(sb, logic_sb_block, i); - sbi->s_group_desc[i] = sb_bread(sb, block); - if (!sbi->s_group_desc[i]) { - ext3_msg(sb, KERN_ERR, - "error: can't read group descriptor %d", i); - db_count = i; - goto failed_mount2; - } - } - if (!ext3_check_descriptors (sb)) { - ext3_msg(sb, KERN_ERR, - "error: group descriptors corrupted"); - goto failed_mount2; - } - sbi->s_gdb_count = db_count; - get_random_bytes(&sbi->s_next_generation, sizeof(u32)); - spin_lock_init(&sbi->s_next_gen_lock); - - /* per fileystem reservation list head & lock */ - spin_lock_init(&sbi->s_rsv_window_lock); - sbi->s_rsv_window_root = RB_ROOT; - /* Add a single, static dummy reservation to the start of the - * reservation window list --- it gives us a placeholder for - * append-at-start-of-list which makes the allocation logic - * _much_ simpler. */ - sbi->s_rsv_window_head.rsv_start = EXT3_RESERVE_WINDOW_NOT_ALLOCATED; - sbi->s_rsv_window_head.rsv_end = EXT3_RESERVE_WINDOW_NOT_ALLOCATED; - sbi->s_rsv_window_head.rsv_alloc_hit = 0; - sbi->s_rsv_window_head.rsv_goal_size = 0; - ext3_rsv_window_add(sb, &sbi->s_rsv_window_head); - - /* - * set up enough so that it can read an inode - */ - sb->s_op = &ext3_sops; - sb->s_export_op = &ext3_export_ops; - sb->s_xattr = ext3_xattr_handlers; -#ifdef CONFIG_QUOTA - sb->s_qcop = &ext3_qctl_operations; - sb->dq_op = &ext3_quota_operations; - sb->s_quota_types = QTYPE_MASK_USR | QTYPE_MASK_GRP; -#endif - memcpy(sb->s_uuid, es->s_uuid, sizeof(es->s_uuid)); - INIT_LIST_HEAD(&sbi->s_orphan); /* unlinked but open files */ - mutex_init(&sbi->s_orphan_lock); - mutex_init(&sbi->s_resize_lock); - - sb->s_root = NULL; - - needs_recovery = (es->s_last_orphan != 0 || - EXT3_HAS_INCOMPAT_FEATURE(sb, - EXT3_FEATURE_INCOMPAT_RECOVER)); - - /* - * The first inode we look at is the journal inode. Don't try - * root first: it may be modified in the journal! - */ - if (!test_opt(sb, NOLOAD) && - EXT3_HAS_COMPAT_FEATURE(sb, EXT3_FEATURE_COMPAT_HAS_JOURNAL)) { - if (ext3_load_journal(sb, es, journal_devnum)) - goto failed_mount2; - } else if (journal_inum) { - if (ext3_create_journal(sb, es, journal_inum)) - goto failed_mount2; - } else { - if (!silent) - ext3_msg(sb, KERN_ERR, - "error: no journal found. " - "mounting ext3 over ext2?"); - goto failed_mount2; - } - err = percpu_counter_init(&sbi->s_freeblocks_counter, - ext3_count_free_blocks(sb), GFP_KERNEL); - if (!err) { - err = percpu_counter_init(&sbi->s_freeinodes_counter, - ext3_count_free_inodes(sb), GFP_KERNEL); - } - if (!err) { - err = percpu_counter_init(&sbi->s_dirs_counter, - ext3_count_dirs(sb), GFP_KERNEL); - } - if (err) { - ext3_msg(sb, KERN_ERR, "error: insufficient memory"); - ret = err; - goto failed_mount3; - } - - /* We have now updated the journal if required, so we can - * validate the data journaling mode. */ - switch (test_opt(sb, DATA_FLAGS)) { - case 0: - /* No mode set, assume a default based on the journal - capabilities: ORDERED_DATA if the journal can - cope, else JOURNAL_DATA */ - if (journal_check_available_features - (sbi->s_journal, 0, 0, JFS_FEATURE_INCOMPAT_REVOKE)) - set_opt(sbi->s_mount_opt, DEFAULT_DATA_MODE); - else - set_opt(sbi->s_mount_opt, JOURNAL_DATA); - break; - - case EXT3_MOUNT_ORDERED_DATA: - case EXT3_MOUNT_WRITEBACK_DATA: - if (!journal_check_available_features - (sbi->s_journal, 0, 0, JFS_FEATURE_INCOMPAT_REVOKE)) { - ext3_msg(sb, KERN_ERR, - "error: journal does not support " - "requested data journaling mode"); - goto failed_mount3; - } - default: - break; - } - - /* - * The journal_load will have done any necessary log recovery, - * so we can safely mount the rest of the filesystem now. - */ - - root = ext3_iget(sb, EXT3_ROOT_INO); - if (IS_ERR(root)) { - ext3_msg(sb, KERN_ERR, "error: get root inode failed"); - ret = PTR_ERR(root); - goto failed_mount3; - } - if (!S_ISDIR(root->i_mode) || !root->i_blocks || !root->i_size) { - iput(root); - ext3_msg(sb, KERN_ERR, "error: corrupt root inode, run e2fsck"); - goto failed_mount3; - } - sb->s_root = d_make_root(root); - if (!sb->s_root) { - ext3_msg(sb, KERN_ERR, "error: get root dentry failed"); - ret = -ENOMEM; - goto failed_mount3; - } - - if (ext3_setup_super(sb, es, sb->s_flags & MS_RDONLY)) - sb->s_flags |= MS_RDONLY; - - EXT3_SB(sb)->s_mount_state |= EXT3_ORPHAN_FS; - ext3_orphan_cleanup(sb, es); - EXT3_SB(sb)->s_mount_state &= ~EXT3_ORPHAN_FS; - if (needs_recovery) { - ext3_mark_recovery_complete(sb, es); - ext3_msg(sb, KERN_INFO, "recovery complete"); - } - ext3_msg(sb, KERN_INFO, "mounted filesystem with %s data mode", - test_opt(sb,DATA_FLAGS) == EXT3_MOUNT_JOURNAL_DATA ? "journal": - test_opt(sb,DATA_FLAGS) == EXT3_MOUNT_ORDERED_DATA ? "ordered": - "writeback"); - - return 0; - -cantfind_ext3: - if (!silent) - ext3_msg(sb, KERN_INFO, - "error: can't find ext3 filesystem on dev %s.", - sb->s_id); - goto failed_mount; - -failed_mount3: - percpu_counter_destroy(&sbi->s_freeblocks_counter); - percpu_counter_destroy(&sbi->s_freeinodes_counter); - percpu_counter_destroy(&sbi->s_dirs_counter); - journal_destroy(sbi->s_journal); -failed_mount2: - for (i = 0; i < db_count; i++) - brelse(sbi->s_group_desc[i]); - kfree(sbi->s_group_desc); -failed_mount: -#ifdef CONFIG_QUOTA - for (i = 0; i < EXT3_MAXQUOTAS; i++) - kfree(sbi->s_qf_names[i]); -#endif - ext3_blkdev_remove(sbi); - brelse(bh); -out_fail: - sb->s_fs_info = NULL; - kfree(sbi->s_blockgroup_lock); - kfree(sbi); - return ret; -} - -/* - * Setup any per-fs journal parameters now. We'll do this both on - * initial mount, once the journal has been initialised but before we've - * done any recovery; and again on any subsequent remount. - */ -static void ext3_init_journal_params(struct super_block *sb, journal_t *journal) -{ - struct ext3_sb_info *sbi = EXT3_SB(sb); - - if (sbi->s_commit_interval) - journal->j_commit_interval = sbi->s_commit_interval; - /* We could also set up an ext3-specific default for the commit - * interval here, but for now we'll just fall back to the jbd - * default. */ - - spin_lock(&journal->j_state_lock); - if (test_opt(sb, BARRIER)) - journal->j_flags |= JFS_BARRIER; - else - journal->j_flags &= ~JFS_BARRIER; - if (test_opt(sb, DATA_ERR_ABORT)) - journal->j_flags |= JFS_ABORT_ON_SYNCDATA_ERR; - else - journal->j_flags &= ~JFS_ABORT_ON_SYNCDATA_ERR; - spin_unlock(&journal->j_state_lock); -} - -static journal_t *ext3_get_journal(struct super_block *sb, - unsigned int journal_inum) -{ - struct inode *journal_inode; - journal_t *journal; - - /* First, test for the existence of a valid inode on disk. Bad - * things happen if we iget() an unused inode, as the subsequent - * iput() will try to delete it. */ - - journal_inode = ext3_iget(sb, journal_inum); - if (IS_ERR(journal_inode)) { - ext3_msg(sb, KERN_ERR, "error: no journal found"); - return NULL; - } - if (!journal_inode->i_nlink) { - make_bad_inode(journal_inode); - iput(journal_inode); - ext3_msg(sb, KERN_ERR, "error: journal inode is deleted"); - return NULL; - } - - jbd_debug(2, "Journal inode found at %p: %Ld bytes\n", - journal_inode, journal_inode->i_size); - if (!S_ISREG(journal_inode->i_mode)) { - ext3_msg(sb, KERN_ERR, "error: invalid journal inode"); - iput(journal_inode); - return NULL; - } - - journal = journal_init_inode(journal_inode); - if (!journal) { - ext3_msg(sb, KERN_ERR, "error: could not load journal inode"); - iput(journal_inode); - return NULL; - } - journal->j_private = sb; - ext3_init_journal_params(sb, journal); - return journal; -} - -static journal_t *ext3_get_dev_journal(struct super_block *sb, - dev_t j_dev) -{ - struct buffer_head * bh; - journal_t *journal; - ext3_fsblk_t start; - ext3_fsblk_t len; - int hblock, blocksize; - ext3_fsblk_t sb_block; - unsigned long offset; - struct ext3_super_block * es; - struct block_device *bdev; - - bdev = ext3_blkdev_get(j_dev, sb); - if (bdev == NULL) - return NULL; - - blocksize = sb->s_blocksize; - hblock = bdev_logical_block_size(bdev); - if (blocksize < hblock) { - ext3_msg(sb, KERN_ERR, - "error: blocksize too small for journal device"); - goto out_bdev; - } - - sb_block = EXT3_MIN_BLOCK_SIZE / blocksize; - offset = EXT3_MIN_BLOCK_SIZE % blocksize; - set_blocksize(bdev, blocksize); - if (!(bh = __bread(bdev, sb_block, blocksize))) { - ext3_msg(sb, KERN_ERR, "error: couldn't read superblock of " - "external journal"); - goto out_bdev; - } - - es = (struct ext3_super_block *) (bh->b_data + offset); - if ((le16_to_cpu(es->s_magic) != EXT3_SUPER_MAGIC) || - !(le32_to_cpu(es->s_feature_incompat) & - EXT3_FEATURE_INCOMPAT_JOURNAL_DEV)) { - ext3_msg(sb, KERN_ERR, "error: external journal has " - "bad superblock"); - brelse(bh); - goto out_bdev; - } - - if (memcmp(EXT3_SB(sb)->s_es->s_journal_uuid, es->s_uuid, 16)) { - ext3_msg(sb, KERN_ERR, "error: journal UUID does not match"); - brelse(bh); - goto out_bdev; - } - - len = le32_to_cpu(es->s_blocks_count); - start = sb_block + 1; - brelse(bh); /* we're done with the superblock */ - - journal = journal_init_dev(bdev, sb->s_bdev, - start, len, blocksize); - if (!journal) { - ext3_msg(sb, KERN_ERR, - "error: failed to create device journal"); - goto out_bdev; - } - journal->j_private = sb; - if (!bh_uptodate_or_lock(journal->j_sb_buffer)) { - if (bh_submit_read(journal->j_sb_buffer)) { - ext3_msg(sb, KERN_ERR, "I/O error on journal device"); - goto out_journal; - } - } - if (be32_to_cpu(journal->j_superblock->s_nr_users) != 1) { - ext3_msg(sb, KERN_ERR, - "error: external journal has more than one " - "user (unsupported) - %d", - be32_to_cpu(journal->j_superblock->s_nr_users)); - goto out_journal; - } - EXT3_SB(sb)->journal_bdev = bdev; - ext3_init_journal_params(sb, journal); - return journal; -out_journal: - journal_destroy(journal); -out_bdev: - ext3_blkdev_put(bdev); - return NULL; -} - -static int ext3_load_journal(struct super_block *sb, - struct ext3_super_block *es, - unsigned long journal_devnum) -{ - journal_t *journal; - unsigned int journal_inum = le32_to_cpu(es->s_journal_inum); - dev_t journal_dev; - int err = 0; - int really_read_only; - - if (journal_devnum && - journal_devnum != le32_to_cpu(es->s_journal_dev)) { - ext3_msg(sb, KERN_INFO, "external journal device major/minor " - "numbers have changed"); - journal_dev = new_decode_dev(journal_devnum); - } else - journal_dev = new_decode_dev(le32_to_cpu(es->s_journal_dev)); - - really_read_only = bdev_read_only(sb->s_bdev); - - /* - * Are we loading a blank journal or performing recovery after a - * crash? For recovery, we need to check in advance whether we - * can get read-write access to the device. - */ - - if (EXT3_HAS_INCOMPAT_FEATURE(sb, EXT3_FEATURE_INCOMPAT_RECOVER)) { - if (sb->s_flags & MS_RDONLY) { - ext3_msg(sb, KERN_INFO, - "recovery required on readonly filesystem"); - if (really_read_only) { - ext3_msg(sb, KERN_ERR, "error: write access " - "unavailable, cannot proceed"); - return -EROFS; - } - ext3_msg(sb, KERN_INFO, - "write access will be enabled during recovery"); - } - } - - if (journal_inum && journal_dev) { - ext3_msg(sb, KERN_ERR, "error: filesystem has both journal " - "and inode journals"); - return -EINVAL; - } - - if (journal_inum) { - if (!(journal = ext3_get_journal(sb, journal_inum))) - return -EINVAL; - } else { - if (!(journal = ext3_get_dev_journal(sb, journal_dev))) - return -EINVAL; - } - - if (!(journal->j_flags & JFS_BARRIER)) - printk(KERN_INFO "EXT3-fs: barriers not enabled\n"); - - if (!really_read_only && test_opt(sb, UPDATE_JOURNAL)) { - err = journal_update_format(journal); - if (err) { - ext3_msg(sb, KERN_ERR, "error updating journal"); - journal_destroy(journal); - return err; - } - } - - if (!EXT3_HAS_INCOMPAT_FEATURE(sb, EXT3_FEATURE_INCOMPAT_RECOVER)) - err = journal_wipe(journal, !really_read_only); - if (!err) - err = journal_load(journal); - - if (err) { - ext3_msg(sb, KERN_ERR, "error loading journal"); - journal_destroy(journal); - return err; - } - - EXT3_SB(sb)->s_journal = journal; - ext3_clear_journal_err(sb, es); - - if (!really_read_only && journal_devnum && - journal_devnum != le32_to_cpu(es->s_journal_dev)) { - es->s_journal_dev = cpu_to_le32(journal_devnum); - - /* Make sure we flush the recovery flag to disk. */ - ext3_commit_super(sb, es, 1); - } - - return 0; -} - -static int ext3_create_journal(struct super_block *sb, - struct ext3_super_block *es, - unsigned int journal_inum) -{ - journal_t *journal; - int err; - - if (sb->s_flags & MS_RDONLY) { - ext3_msg(sb, KERN_ERR, - "error: readonly filesystem when trying to " - "create journal"); - return -EROFS; - } - - journal = ext3_get_journal(sb, journal_inum); - if (!journal) - return -EINVAL; - - ext3_msg(sb, KERN_INFO, "creating new journal on inode %u", - journal_inum); - - err = journal_create(journal); - if (err) { - ext3_msg(sb, KERN_ERR, "error creating journal"); - journal_destroy(journal); - return -EIO; - } - - EXT3_SB(sb)->s_journal = journal; - - ext3_update_dynamic_rev(sb); - EXT3_SET_INCOMPAT_FEATURE(sb, EXT3_FEATURE_INCOMPAT_RECOVER); - EXT3_SET_COMPAT_FEATURE(sb, EXT3_FEATURE_COMPAT_HAS_JOURNAL); - - es->s_journal_inum = cpu_to_le32(journal_inum); - - /* Make sure we flush the recovery flag to disk. */ - ext3_commit_super(sb, es, 1); - - return 0; -} - -static int ext3_commit_super(struct super_block *sb, - struct ext3_super_block *es, - int sync) -{ - struct buffer_head *sbh = EXT3_SB(sb)->s_sbh; - int error = 0; - - if (!sbh) - return error; - - if (buffer_write_io_error(sbh)) { - /* - * Oh, dear. A previous attempt to write the - * superblock failed. This could happen because the - * USB device was yanked out. Or it could happen to - * be a transient write error and maybe the block will - * be remapped. Nothing we can do but to retry the - * write and hope for the best. - */ - ext3_msg(sb, KERN_ERR, "previous I/O error to " - "superblock detected"); - clear_buffer_write_io_error(sbh); - set_buffer_uptodate(sbh); - } - /* - * If the file system is mounted read-only, don't update the - * superblock write time. This avoids updating the superblock - * write time when we are mounting the root file system - * read/only but we need to replay the journal; at that point, - * for people who are east of GMT and who make their clock - * tick in localtime for Windows bug-for-bug compatibility, - * the clock is set in the future, and this will cause e2fsck - * to complain and force a full file system check. - */ - if (!(sb->s_flags & MS_RDONLY)) - es->s_wtime = cpu_to_le32(get_seconds()); - es->s_free_blocks_count = cpu_to_le32(ext3_count_free_blocks(sb)); - es->s_free_inodes_count = cpu_to_le32(ext3_count_free_inodes(sb)); - BUFFER_TRACE(sbh, "marking dirty"); - mark_buffer_dirty(sbh); - if (sync) { - error = sync_dirty_buffer(sbh); - if (buffer_write_io_error(sbh)) { - ext3_msg(sb, KERN_ERR, "I/O error while writing " - "superblock"); - clear_buffer_write_io_error(sbh); - set_buffer_uptodate(sbh); - } - } - return error; -} - - -/* - * Have we just finished recovery? If so, and if we are mounting (or - * remounting) the filesystem readonly, then we will end up with a - * consistent fs on disk. Record that fact. - */ -static void ext3_mark_recovery_complete(struct super_block * sb, - struct ext3_super_block * es) -{ - journal_t *journal = EXT3_SB(sb)->s_journal; - - journal_lock_updates(journal); - if (journal_flush(journal) < 0) - goto out; - - if (EXT3_HAS_INCOMPAT_FEATURE(sb, EXT3_FEATURE_INCOMPAT_RECOVER) && - sb->s_flags & MS_RDONLY) { - EXT3_CLEAR_INCOMPAT_FEATURE(sb, EXT3_FEATURE_INCOMPAT_RECOVER); - ext3_commit_super(sb, es, 1); - } - -out: - journal_unlock_updates(journal); -} - -/* - * If we are mounting (or read-write remounting) a filesystem whose journal - * has recorded an error from a previous lifetime, move that error to the - * main filesystem now. - */ -static void ext3_clear_journal_err(struct super_block *sb, - struct ext3_super_block *es) -{ - journal_t *journal; - int j_errno; - const char *errstr; - - journal = EXT3_SB(sb)->s_journal; - - /* - * Now check for any error status which may have been recorded in the - * journal by a prior ext3_error() or ext3_abort() - */ - - j_errno = journal_errno(journal); - if (j_errno) { - char nbuf[16]; - - errstr = ext3_decode_error(sb, j_errno, nbuf); - ext3_warning(sb, __func__, "Filesystem error recorded " - "from previous mount: %s", errstr); - ext3_warning(sb, __func__, "Marking fs in need of " - "filesystem check."); - - EXT3_SB(sb)->s_mount_state |= EXT3_ERROR_FS; - es->s_state |= cpu_to_le16(EXT3_ERROR_FS); - ext3_commit_super (sb, es, 1); - - journal_clear_err(journal); - } -} - -/* - * Force the running and committing transactions to commit, - * and wait on the commit. - */ -int ext3_force_commit(struct super_block *sb) -{ - journal_t *journal; - int ret; - - if (sb->s_flags & MS_RDONLY) - return 0; - - journal = EXT3_SB(sb)->s_journal; - ret = ext3_journal_force_commit(journal); - return ret; -} - -static int ext3_sync_fs(struct super_block *sb, int wait) -{ - tid_t target; - - trace_ext3_sync_fs(sb, wait); - /* - * Writeback quota in non-journalled quota case - journalled quota has - * no dirty dquots - */ - dquot_writeback_dquots(sb, -1); - if (journal_start_commit(EXT3_SB(sb)->s_journal, &target)) { - if (wait) - log_wait_commit(EXT3_SB(sb)->s_journal, target); - } - return 0; -} - -/* - * LVM calls this function before a (read-only) snapshot is created. This - * gives us a chance to flush the journal completely and mark the fs clean. - */ -static int ext3_freeze(struct super_block *sb) -{ - int error = 0; - journal_t *journal; - - if (!(sb->s_flags & MS_RDONLY)) { - journal = EXT3_SB(sb)->s_journal; - - /* Now we set up the journal barrier. */ - journal_lock_updates(journal); - - /* - * We don't want to clear needs_recovery flag when we failed - * to flush the journal. - */ - error = journal_flush(journal); - if (error < 0) - goto out; - - /* Journal blocked and flushed, clear needs_recovery flag. */ - EXT3_CLEAR_INCOMPAT_FEATURE(sb, EXT3_FEATURE_INCOMPAT_RECOVER); - error = ext3_commit_super(sb, EXT3_SB(sb)->s_es, 1); - if (error) - goto out; - } - return 0; - -out: - journal_unlock_updates(journal); - return error; -} - -/* - * Called by LVM after the snapshot is done. We need to reset the RECOVER - * flag here, even though the filesystem is not technically dirty yet. - */ -static int ext3_unfreeze(struct super_block *sb) -{ - if (!(sb->s_flags & MS_RDONLY)) { - /* Reser the needs_recovery flag before the fs is unlocked. */ - EXT3_SET_INCOMPAT_FEATURE(sb, EXT3_FEATURE_INCOMPAT_RECOVER); - ext3_commit_super(sb, EXT3_SB(sb)->s_es, 1); - journal_unlock_updates(EXT3_SB(sb)->s_journal); - } - return 0; -} - -static int ext3_remount (struct super_block * sb, int * flags, char * data) -{ - struct ext3_super_block * es; - struct ext3_sb_info *sbi = EXT3_SB(sb); - ext3_fsblk_t n_blocks_count = 0; - unsigned long old_sb_flags; - struct ext3_mount_options old_opts; - int enable_quota = 0; - int err; -#ifdef CONFIG_QUOTA - int i; -#endif - - sync_filesystem(sb); - - /* Store the original options */ - old_sb_flags = sb->s_flags; - old_opts.s_mount_opt = sbi->s_mount_opt; - old_opts.s_resuid = sbi->s_resuid; - old_opts.s_resgid = sbi->s_resgid; - old_opts.s_commit_interval = sbi->s_commit_interval; -#ifdef CONFIG_QUOTA - old_opts.s_jquota_fmt = sbi->s_jquota_fmt; - for (i = 0; i < EXT3_MAXQUOTAS; i++) - if (sbi->s_qf_names[i]) { - old_opts.s_qf_names[i] = kstrdup(sbi->s_qf_names[i], - GFP_KERNEL); - if (!old_opts.s_qf_names[i]) { - int j; - - for (j = 0; j < i; j++) - kfree(old_opts.s_qf_names[j]); - return -ENOMEM; - } - } else - old_opts.s_qf_names[i] = NULL; -#endif - - /* - * Allow the "check" option to be passed as a remount option. - */ - if (!parse_options(data, sb, NULL, NULL, &n_blocks_count, 1)) { - err = -EINVAL; - goto restore_opts; - } - - if (test_opt(sb, ABORT)) - ext3_abort(sb, __func__, "Abort forced by user"); - - sb->s_flags = (sb->s_flags & ~MS_POSIXACL) | - (test_opt(sb, POSIX_ACL) ? MS_POSIXACL : 0); - - es = sbi->s_es; - - ext3_init_journal_params(sb, sbi->s_journal); - - if ((*flags & MS_RDONLY) != (sb->s_flags & MS_RDONLY) || - n_blocks_count > le32_to_cpu(es->s_blocks_count)) { - if (test_opt(sb, ABORT)) { - err = -EROFS; - goto restore_opts; - } - - if (*flags & MS_RDONLY) { - err = dquot_suspend(sb, -1); - if (err < 0) - goto restore_opts; - - /* - * First of all, the unconditional stuff we have to do - * to disable replay of the journal when we next remount - */ - sb->s_flags |= MS_RDONLY; - - /* - * OK, test if we are remounting a valid rw partition - * readonly, and if so set the rdonly flag and then - * mark the partition as valid again. - */ - if (!(es->s_state & cpu_to_le16(EXT3_VALID_FS)) && - (sbi->s_mount_state & EXT3_VALID_FS)) - es->s_state = cpu_to_le16(sbi->s_mount_state); - - ext3_mark_recovery_complete(sb, es); - } else { - __le32 ret; - if ((ret = EXT3_HAS_RO_COMPAT_FEATURE(sb, - ~EXT3_FEATURE_RO_COMPAT_SUPP))) { - ext3_msg(sb, KERN_WARNING, - "warning: couldn't remount RDWR " - "because of unsupported optional " - "features (%x)", le32_to_cpu(ret)); - err = -EROFS; - goto restore_opts; - } - - /* - * If we have an unprocessed orphan list hanging - * around from a previously readonly bdev mount, - * require a full umount & mount for now. - */ - if (es->s_last_orphan) { - ext3_msg(sb, KERN_WARNING, "warning: couldn't " - "remount RDWR because of unprocessed " - "orphan inode list. Please " - "umount & mount instead."); - err = -EINVAL; - goto restore_opts; - } - - /* - * Mounting a RDONLY partition read-write, so reread - * and store the current valid flag. (It may have - * been changed by e2fsck since we originally mounted - * the partition.) - */ - ext3_clear_journal_err(sb, es); - sbi->s_mount_state = le16_to_cpu(es->s_state); - if ((err = ext3_group_extend(sb, es, n_blocks_count))) - goto restore_opts; - if (!ext3_setup_super (sb, es, 0)) - sb->s_flags &= ~MS_RDONLY; - enable_quota = 1; - } - } -#ifdef CONFIG_QUOTA - /* Release old quota file names */ - for (i = 0; i < EXT3_MAXQUOTAS; i++) - kfree(old_opts.s_qf_names[i]); -#endif - if (enable_quota) - dquot_resume(sb, -1); - return 0; -restore_opts: - sb->s_flags = old_sb_flags; - sbi->s_mount_opt = old_opts.s_mount_opt; - sbi->s_resuid = old_opts.s_resuid; - sbi->s_resgid = old_opts.s_resgid; - sbi->s_commit_interval = old_opts.s_commit_interval; -#ifdef CONFIG_QUOTA - sbi->s_jquota_fmt = old_opts.s_jquota_fmt; - for (i = 0; i < EXT3_MAXQUOTAS; i++) { - kfree(sbi->s_qf_names[i]); - sbi->s_qf_names[i] = old_opts.s_qf_names[i]; - } -#endif - return err; -} - -static int ext3_statfs (struct dentry * dentry, struct kstatfs * buf) -{ - struct super_block *sb = dentry->d_sb; - struct ext3_sb_info *sbi = EXT3_SB(sb); - struct ext3_super_block *es = sbi->s_es; - u64 fsid; - - if (test_opt(sb, MINIX_DF)) { - sbi->s_overhead_last = 0; - } else if (sbi->s_blocks_last != le32_to_cpu(es->s_blocks_count)) { - unsigned long ngroups = sbi->s_groups_count, i; - ext3_fsblk_t overhead = 0; - smp_rmb(); - - /* - * Compute the overhead (FS structures). This is constant - * for a given filesystem unless the number of block groups - * changes so we cache the previous value until it does. - */ - - /* - * All of the blocks before first_data_block are - * overhead - */ - overhead = le32_to_cpu(es->s_first_data_block); - - /* - * Add the overhead attributed to the superblock and - * block group descriptors. If the sparse superblocks - * feature is turned on, then not all groups have this. - */ - for (i = 0; i < ngroups; i++) { - overhead += ext3_bg_has_super(sb, i) + - ext3_bg_num_gdb(sb, i); - cond_resched(); - } - - /* - * Every block group has an inode bitmap, a block - * bitmap, and an inode table. - */ - overhead += ngroups * (2 + sbi->s_itb_per_group); - - /* Add the internal journal blocks as well */ - if (sbi->s_journal && !sbi->journal_bdev) - overhead += sbi->s_journal->j_maxlen; - - sbi->s_overhead_last = overhead; - smp_wmb(); - sbi->s_blocks_last = le32_to_cpu(es->s_blocks_count); - } - - buf->f_type = EXT3_SUPER_MAGIC; - buf->f_bsize = sb->s_blocksize; - buf->f_blocks = le32_to_cpu(es->s_blocks_count) - sbi->s_overhead_last; - buf->f_bfree = percpu_counter_sum_positive(&sbi->s_freeblocks_counter); - buf->f_bavail = buf->f_bfree - le32_to_cpu(es->s_r_blocks_count); - if (buf->f_bfree < le32_to_cpu(es->s_r_blocks_count)) - buf->f_bavail = 0; - buf->f_files = le32_to_cpu(es->s_inodes_count); - buf->f_ffree = percpu_counter_sum_positive(&sbi->s_freeinodes_counter); - buf->f_namelen = EXT3_NAME_LEN; - fsid = le64_to_cpup((void *)es->s_uuid) ^ - le64_to_cpup((void *)es->s_uuid + sizeof(u64)); - buf->f_fsid.val[0] = fsid & 0xFFFFFFFFUL; - buf->f_fsid.val[1] = (fsid >> 32) & 0xFFFFFFFFUL; - return 0; -} - -/* Helper function for writing quotas on sync - we need to start transaction before quota file - * is locked for write. Otherwise the are possible deadlocks: - * Process 1 Process 2 - * ext3_create() quota_sync() - * journal_start() write_dquot() - * dquot_initialize() down(dqio_mutex) - * down(dqio_mutex) journal_start() - * - */ - -#ifdef CONFIG_QUOTA - -static inline struct inode *dquot_to_inode(struct dquot *dquot) -{ - return sb_dqopt(dquot->dq_sb)->files[dquot->dq_id.type]; -} - -static int ext3_write_dquot(struct dquot *dquot) -{ - int ret, err; - handle_t *handle; - struct inode *inode; - - inode = dquot_to_inode(dquot); - handle = ext3_journal_start(inode, - EXT3_QUOTA_TRANS_BLOCKS(dquot->dq_sb)); - if (IS_ERR(handle)) - return PTR_ERR(handle); - ret = dquot_commit(dquot); - err = ext3_journal_stop(handle); - if (!ret) - ret = err; - return ret; -} - -static int ext3_acquire_dquot(struct dquot *dquot) -{ - int ret, err; - handle_t *handle; - - handle = ext3_journal_start(dquot_to_inode(dquot), - EXT3_QUOTA_INIT_BLOCKS(dquot->dq_sb)); - if (IS_ERR(handle)) - return PTR_ERR(handle); - ret = dquot_acquire(dquot); - err = ext3_journal_stop(handle); - if (!ret) - ret = err; - return ret; -} - -static int ext3_release_dquot(struct dquot *dquot) -{ - int ret, err; - handle_t *handle; - - handle = ext3_journal_start(dquot_to_inode(dquot), - EXT3_QUOTA_DEL_BLOCKS(dquot->dq_sb)); - if (IS_ERR(handle)) { - /* Release dquot anyway to avoid endless cycle in dqput() */ - dquot_release(dquot); - return PTR_ERR(handle); - } - ret = dquot_release(dquot); - err = ext3_journal_stop(handle); - if (!ret) - ret = err; - return ret; -} - -static int ext3_mark_dquot_dirty(struct dquot *dquot) -{ - /* Are we journaling quotas? */ - if (EXT3_SB(dquot->dq_sb)->s_qf_names[USRQUOTA] || - EXT3_SB(dquot->dq_sb)->s_qf_names[GRPQUOTA]) { - dquot_mark_dquot_dirty(dquot); - return ext3_write_dquot(dquot); - } else { - return dquot_mark_dquot_dirty(dquot); - } -} - -static int ext3_write_info(struct super_block *sb, int type) -{ - int ret, err; - handle_t *handle; - - /* Data block + inode block */ - handle = ext3_journal_start(d_inode(sb->s_root), 2); - if (IS_ERR(handle)) - return PTR_ERR(handle); - ret = dquot_commit_info(sb, type); - err = ext3_journal_stop(handle); - if (!ret) - ret = err; - return ret; -} - -/* - * Turn on quotas during mount time - we need to find - * the quota file and such... - */ -static int ext3_quota_on_mount(struct super_block *sb, int type) -{ - return dquot_quota_on_mount(sb, EXT3_SB(sb)->s_qf_names[type], - EXT3_SB(sb)->s_jquota_fmt, type); -} - -/* - * Standard function to be called on quota_on - */ -static int ext3_quota_on(struct super_block *sb, int type, int format_id, - struct path *path) -{ - int err; - - if (!test_opt(sb, QUOTA)) - return -EINVAL; - - /* Quotafile not on the same filesystem? */ - if (path->dentry->d_sb != sb) - return -EXDEV; - /* Journaling quota? */ - if (EXT3_SB(sb)->s_qf_names[type]) { - /* Quotafile not of fs root? */ - if (path->dentry->d_parent != sb->s_root) - ext3_msg(sb, KERN_WARNING, - "warning: Quota file not on filesystem root. " - "Journaled quota will not work."); - } - - /* - * When we journal data on quota file, we have to flush journal to see - * all updates to the file when we bypass pagecache... - */ - if (ext3_should_journal_data(d_inode(path->dentry))) { - /* - * We don't need to lock updates but journal_flush() could - * otherwise be livelocked... - */ - journal_lock_updates(EXT3_SB(sb)->s_journal); - err = journal_flush(EXT3_SB(sb)->s_journal); - journal_unlock_updates(EXT3_SB(sb)->s_journal); - if (err) - return err; - } - - return dquot_quota_on(sb, type, format_id, path); -} - -/* Read data from quotafile - avoid pagecache and such because we cannot afford - * acquiring the locks... As quota files are never truncated and quota code - * itself serializes the operations (and no one else should touch the files) - * we don't have to be afraid of races */ -static ssize_t ext3_quota_read(struct super_block *sb, int type, char *data, - size_t len, loff_t off) -{ - struct inode *inode = sb_dqopt(sb)->files[type]; - sector_t blk = off >> EXT3_BLOCK_SIZE_BITS(sb); - int err = 0; - int offset = off & (sb->s_blocksize - 1); - int tocopy; - size_t toread; - struct buffer_head *bh; - loff_t i_size = i_size_read(inode); - - if (off > i_size) - return 0; - if (off+len > i_size) - len = i_size-off; - toread = len; - while (toread > 0) { - tocopy = sb->s_blocksize - offset < toread ? - sb->s_blocksize - offset : toread; - bh = ext3_bread(NULL, inode, blk, 0, &err); - if (err) - return err; - if (!bh) /* A hole? */ - memset(data, 0, tocopy); - else - memcpy(data, bh->b_data+offset, tocopy); - brelse(bh); - offset = 0; - toread -= tocopy; - data += tocopy; - blk++; - } - return len; -} - -/* Write to quotafile (we know the transaction is already started and has - * enough credits) */ -static ssize_t ext3_quota_write(struct super_block *sb, int type, - const char *data, size_t len, loff_t off) -{ - struct inode *inode = sb_dqopt(sb)->files[type]; - sector_t blk = off >> EXT3_BLOCK_SIZE_BITS(sb); - int err = 0; - int offset = off & (sb->s_blocksize - 1); - int journal_quota = EXT3_SB(sb)->s_qf_names[type] != NULL; - struct buffer_head *bh; - handle_t *handle = journal_current_handle(); - - if (!handle) { - ext3_msg(sb, KERN_WARNING, - "warning: quota write (off=%llu, len=%llu)" - " cancelled because transaction is not started.", - (unsigned long long)off, (unsigned long long)len); - return -EIO; - } - - /* - * Since we account only one data block in transaction credits, - * then it is impossible to cross a block boundary. - */ - if (sb->s_blocksize - offset < len) { - ext3_msg(sb, KERN_WARNING, "Quota write (off=%llu, len=%llu)" - " cancelled because not block aligned", - (unsigned long long)off, (unsigned long long)len); - return -EIO; - } - bh = ext3_bread(handle, inode, blk, 1, &err); - if (!bh) - goto out; - if (journal_quota) { - err = ext3_journal_get_write_access(handle, bh); - if (err) { - brelse(bh); - goto out; - } - } - lock_buffer(bh); - memcpy(bh->b_data+offset, data, len); - flush_dcache_page(bh->b_page); - unlock_buffer(bh); - if (journal_quota) - err = ext3_journal_dirty_metadata(handle, bh); - else { - /* Always do at least ordered writes for quotas */ - err = ext3_journal_dirty_data(handle, bh); - mark_buffer_dirty(bh); - } - brelse(bh); -out: - if (err) - return err; - if (inode->i_size < off + len) { - i_size_write(inode, off + len); - EXT3_I(inode)->i_disksize = inode->i_size; - } - inode->i_version++; - inode->i_mtime = inode->i_ctime = CURRENT_TIME; - ext3_mark_inode_dirty(handle, inode); - return len; -} - -#endif - -static struct dentry *ext3_mount(struct file_system_type *fs_type, - int flags, const char *dev_name, void *data) -{ - return mount_bdev(fs_type, flags, dev_name, data, ext3_fill_super); -} - -static struct file_system_type ext3_fs_type = { - .owner = THIS_MODULE, - .name = "ext3", - .mount = ext3_mount, - .kill_sb = kill_block_super, - .fs_flags = FS_REQUIRES_DEV, -}; -MODULE_ALIAS_FS("ext3"); - -static int __init init_ext3_fs(void) -{ - int err = init_ext3_xattr(); - if (err) - return err; - err = init_inodecache(); - if (err) - goto out1; - err = register_filesystem(&ext3_fs_type); - if (err) - goto out; - return 0; -out: - destroy_inodecache(); -out1: - exit_ext3_xattr(); - return err; -} - -static void __exit exit_ext3_fs(void) -{ - unregister_filesystem(&ext3_fs_type); - destroy_inodecache(); - exit_ext3_xattr(); -} - -MODULE_AUTHOR("Remy Card, Stephen Tweedie, Andrew Morton, Andreas Dilger, Theodore Ts'o and others"); -MODULE_DESCRIPTION("Second Extended Filesystem with journaling extensions"); -MODULE_LICENSE("GPL"); -module_init(init_ext3_fs) -module_exit(exit_ext3_fs) diff --git a/fs/ext3/symlink.c b/fs/ext3/symlink.c deleted file mode 100644 index c08c59094ae6..000000000000 --- a/fs/ext3/symlink.c +++ /dev/null @@ -1,46 +0,0 @@ -/* - * linux/fs/ext3/symlink.c - * - * Only fast symlinks left here - the rest is done by generic code. AV, 1999 - * - * Copyright (C) 1992, 1993, 1994, 1995 - * Remy Card (card@masi.ibp.fr) - * Laboratoire MASI - Institut Blaise Pascal - * Universite Pierre et Marie Curie (Paris VI) - * - * from - * - * linux/fs/minix/symlink.c - * - * Copyright (C) 1991, 1992 Linus Torvalds - * - * ext3 symlink handling code - */ - -#include "ext3.h" -#include "xattr.h" - -const struct inode_operations ext3_symlink_inode_operations = { - .readlink = generic_readlink, - .follow_link = page_follow_link_light, - .put_link = page_put_link, - .setattr = ext3_setattr, -#ifdef CONFIG_EXT3_FS_XATTR - .setxattr = generic_setxattr, - .getxattr = generic_getxattr, - .listxattr = ext3_listxattr, - .removexattr = generic_removexattr, -#endif -}; - -const struct inode_operations ext3_fast_symlink_inode_operations = { - .readlink = generic_readlink, - .follow_link = simple_follow_link, - .setattr = ext3_setattr, -#ifdef CONFIG_EXT3_FS_XATTR - .setxattr = generic_setxattr, - .getxattr = generic_getxattr, - .listxattr = ext3_listxattr, - .removexattr = generic_removexattr, -#endif -}; diff --git a/fs/ext3/xattr.c b/fs/ext3/xattr.c deleted file mode 100644 index 7cf36501ccf4..000000000000 --- a/fs/ext3/xattr.c +++ /dev/null @@ -1,1330 +0,0 @@ -/* - * linux/fs/ext3/xattr.c - * - * Copyright (C) 2001-2003 Andreas Gruenbacher, - * - * Fix by Harrison Xing . - * Ext3 code with a lot of help from Eric Jarman . - * Extended attributes for symlinks and special files added per - * suggestion of Luka Renko . - * xattr consolidation Copyright (c) 2004 James Morris , - * Red Hat Inc. - * ea-in-inode support by Alex Tomas aka bzzz - * and Andreas Gruenbacher . - */ - -/* - * Extended attributes are stored directly in inodes (on file systems with - * inodes bigger than 128 bytes) and on additional disk blocks. The i_file_acl - * field contains the block number if an inode uses an additional block. All - * attributes must fit in the inode and one additional block. Blocks that - * contain the identical set of attributes may be shared among several inodes. - * Identical blocks are detected by keeping a cache of blocks that have - * recently been accessed. - * - * The attributes in inodes and on blocks have a different header; the entries - * are stored in the same format: - * - * +------------------+ - * | header | - * | entry 1 | | - * | entry 2 | | growing downwards - * | entry 3 | v - * | four null bytes | - * | . . . | - * | value 1 | ^ - * | value 3 | | growing upwards - * | value 2 | | - * +------------------+ - * - * The header is followed by multiple entry descriptors. In disk blocks, the - * entry descriptors are kept sorted. In inodes, they are unsorted. The - * attribute values are aligned to the end of the block in no specific order. - * - * Locking strategy - * ---------------- - * EXT3_I(inode)->i_file_acl is protected by EXT3_I(inode)->xattr_sem. - * EA blocks are only changed if they are exclusive to an inode, so - * holding xattr_sem also means that nothing but the EA block's reference - * count can change. Multiple writers to the same block are synchronized - * by the buffer lock. - */ - -#include "ext3.h" -#include -#include -#include "xattr.h" -#include "acl.h" - -#define BHDR(bh) ((struct ext3_xattr_header *)((bh)->b_data)) -#define ENTRY(ptr) ((struct ext3_xattr_entry *)(ptr)) -#define BFIRST(bh) ENTRY(BHDR(bh)+1) -#define IS_LAST_ENTRY(entry) (*(__u32 *)(entry) == 0) - -#define IHDR(inode, raw_inode) \ - ((struct ext3_xattr_ibody_header *) \ - ((void *)raw_inode + \ - EXT3_GOOD_OLD_INODE_SIZE + \ - EXT3_I(inode)->i_extra_isize)) -#define IFIRST(hdr) ((struct ext3_xattr_entry *)((hdr)+1)) - -#ifdef EXT3_XATTR_DEBUG -# define ea_idebug(inode, f...) do { \ - printk(KERN_DEBUG "inode %s:%lu: ", \ - inode->i_sb->s_id, inode->i_ino); \ - printk(f); \ - printk("\n"); \ - } while (0) -# define ea_bdebug(bh, f...) do { \ - char b[BDEVNAME_SIZE]; \ - printk(KERN_DEBUG "block %s:%lu: ", \ - bdevname(bh->b_bdev, b), \ - (unsigned long) bh->b_blocknr); \ - printk(f); \ - printk("\n"); \ - } while (0) -#else -# define ea_idebug(f...) -# define ea_bdebug(f...) -#endif - -static void ext3_xattr_cache_insert(struct buffer_head *); -static struct buffer_head *ext3_xattr_cache_find(struct inode *, - struct ext3_xattr_header *, - struct mb_cache_entry **); -static void ext3_xattr_rehash(struct ext3_xattr_header *, - struct ext3_xattr_entry *); -static int ext3_xattr_list(struct dentry *dentry, char *buffer, - size_t buffer_size); - -static struct mb_cache *ext3_xattr_cache; - -static const struct xattr_handler *ext3_xattr_handler_map[] = { - [EXT3_XATTR_INDEX_USER] = &ext3_xattr_user_handler, -#ifdef CONFIG_EXT3_FS_POSIX_ACL - [EXT3_XATTR_INDEX_POSIX_ACL_ACCESS] = &posix_acl_access_xattr_handler, - [EXT3_XATTR_INDEX_POSIX_ACL_DEFAULT] = &posix_acl_default_xattr_handler, -#endif - [EXT3_XATTR_INDEX_TRUSTED] = &ext3_xattr_trusted_handler, -#ifdef CONFIG_EXT3_FS_SECURITY - [EXT3_XATTR_INDEX_SECURITY] = &ext3_xattr_security_handler, -#endif -}; - -const struct xattr_handler *ext3_xattr_handlers[] = { - &ext3_xattr_user_handler, - &ext3_xattr_trusted_handler, -#ifdef CONFIG_EXT3_FS_POSIX_ACL - &posix_acl_access_xattr_handler, - &posix_acl_default_xattr_handler, -#endif -#ifdef CONFIG_EXT3_FS_SECURITY - &ext3_xattr_security_handler, -#endif - NULL -}; - -static inline const struct xattr_handler * -ext3_xattr_handler(int name_index) -{ - const struct xattr_handler *handler = NULL; - - if (name_index > 0 && name_index < ARRAY_SIZE(ext3_xattr_handler_map)) - handler = ext3_xattr_handler_map[name_index]; - return handler; -} - -/* - * Inode operation listxattr() - * - * d_inode(dentry)->i_mutex: don't care - */ -ssize_t -ext3_listxattr(struct dentry *dentry, char *buffer, size_t size) -{ - return ext3_xattr_list(dentry, buffer, size); -} - -static int -ext3_xattr_check_names(struct ext3_xattr_entry *entry, void *end) -{ - while (!IS_LAST_ENTRY(entry)) { - struct ext3_xattr_entry *next = EXT3_XATTR_NEXT(entry); - if ((void *)next >= end) - return -EIO; - entry = next; - } - return 0; -} - -static inline int -ext3_xattr_check_block(struct buffer_head *bh) -{ - int error; - - if (BHDR(bh)->h_magic != cpu_to_le32(EXT3_XATTR_MAGIC) || - BHDR(bh)->h_blocks != cpu_to_le32(1)) - return -EIO; - error = ext3_xattr_check_names(BFIRST(bh), bh->b_data + bh->b_size); - return error; -} - -static inline int -ext3_xattr_check_entry(struct ext3_xattr_entry *entry, size_t size) -{ - size_t value_size = le32_to_cpu(entry->e_value_size); - - if (entry->e_value_block != 0 || value_size > size || - le16_to_cpu(entry->e_value_offs) + value_size > size) - return -EIO; - return 0; -} - -static int -ext3_xattr_find_entry(struct ext3_xattr_entry **pentry, int name_index, - const char *name, size_t size, int sorted) -{ - struct ext3_xattr_entry *entry; - size_t name_len; - int cmp = 1; - - if (name == NULL) - return -EINVAL; - name_len = strlen(name); - entry = *pentry; - for (; !IS_LAST_ENTRY(entry); entry = EXT3_XATTR_NEXT(entry)) { - cmp = name_index - entry->e_name_index; - if (!cmp) - cmp = name_len - entry->e_name_len; - if (!cmp) - cmp = memcmp(name, entry->e_name, name_len); - if (cmp <= 0 && (sorted || cmp == 0)) - break; - } - *pentry = entry; - if (!cmp && ext3_xattr_check_entry(entry, size)) - return -EIO; - return cmp ? -ENODATA : 0; -} - -static int -ext3_xattr_block_get(struct inode *inode, int name_index, const char *name, - void *buffer, size_t buffer_size) -{ - struct buffer_head *bh = NULL; - struct ext3_xattr_entry *entry; - size_t size; - int error; - - ea_idebug(inode, "name=%d.%s, buffer=%p, buffer_size=%ld", - name_index, name, buffer, (long)buffer_size); - - error = -ENODATA; - if (!EXT3_I(inode)->i_file_acl) - goto cleanup; - ea_idebug(inode, "reading block %u", EXT3_I(inode)->i_file_acl); - bh = sb_bread(inode->i_sb, EXT3_I(inode)->i_file_acl); - if (!bh) - goto cleanup; - ea_bdebug(bh, "b_count=%d, refcount=%d", - atomic_read(&(bh->b_count)), le32_to_cpu(BHDR(bh)->h_refcount)); - if (ext3_xattr_check_block(bh)) { -bad_block: ext3_error(inode->i_sb, __func__, - "inode %lu: bad block "E3FSBLK, inode->i_ino, - EXT3_I(inode)->i_file_acl); - error = -EIO; - goto cleanup; - } - ext3_xattr_cache_insert(bh); - entry = BFIRST(bh); - error = ext3_xattr_find_entry(&entry, name_index, name, bh->b_size, 1); - if (error == -EIO) - goto bad_block; - if (error) - goto cleanup; - size = le32_to_cpu(entry->e_value_size); - if (buffer) { - error = -ERANGE; - if (size > buffer_size) - goto cleanup; - memcpy(buffer, bh->b_data + le16_to_cpu(entry->e_value_offs), - size); - } - error = size; - -cleanup: - brelse(bh); - return error; -} - -static int -ext3_xattr_ibody_get(struct inode *inode, int name_index, const char *name, - void *buffer, size_t buffer_size) -{ - struct ext3_xattr_ibody_header *header; - struct ext3_xattr_entry *entry; - struct ext3_inode *raw_inode; - struct ext3_iloc iloc; - size_t size; - void *end; - int error; - - if (!ext3_test_inode_state(inode, EXT3_STATE_XATTR)) - return -ENODATA; - error = ext3_get_inode_loc(inode, &iloc); - if (error) - return error; - raw_inode = ext3_raw_inode(&iloc); - header = IHDR(inode, raw_inode); - entry = IFIRST(header); - end = (void *)raw_inode + EXT3_SB(inode->i_sb)->s_inode_size; - error = ext3_xattr_check_names(entry, end); - if (error) - goto cleanup; - error = ext3_xattr_find_entry(&entry, name_index, name, - end - (void *)entry, 0); - if (error) - goto cleanup; - size = le32_to_cpu(entry->e_value_size); - if (buffer) { - error = -ERANGE; - if (size > buffer_size) - goto cleanup; - memcpy(buffer, (void *)IFIRST(header) + - le16_to_cpu(entry->e_value_offs), size); - } - error = size; - -cleanup: - brelse(iloc.bh); - return error; -} - -/* - * ext3_xattr_get() - * - * Copy an extended attribute into the buffer - * provided, or compute the buffer size required. - * Buffer is NULL to compute the size of the buffer required. - * - * Returns a negative error number on failure, or the number of bytes - * used / required on success. - */ -int -ext3_xattr_get(struct inode *inode, int name_index, const char *name, - void *buffer, size_t buffer_size) -{ - int error; - - down_read(&EXT3_I(inode)->xattr_sem); - error = ext3_xattr_ibody_get(inode, name_index, name, buffer, - buffer_size); - if (error == -ENODATA) - error = ext3_xattr_block_get(inode, name_index, name, buffer, - buffer_size); - up_read(&EXT3_I(inode)->xattr_sem); - return error; -} - -static int -ext3_xattr_list_entries(struct dentry *dentry, struct ext3_xattr_entry *entry, - char *buffer, size_t buffer_size) -{ - size_t rest = buffer_size; - - for (; !IS_LAST_ENTRY(entry); entry = EXT3_XATTR_NEXT(entry)) { - const struct xattr_handler *handler = - ext3_xattr_handler(entry->e_name_index); - - if (handler) { - size_t size = handler->list(dentry, buffer, rest, - entry->e_name, - entry->e_name_len, - handler->flags); - if (buffer) { - if (size > rest) - return -ERANGE; - buffer += size; - } - rest -= size; - } - } - return buffer_size - rest; -} - -static int -ext3_xattr_block_list(struct dentry *dentry, char *buffer, size_t buffer_size) -{ - struct inode *inode = d_inode(dentry); - struct buffer_head *bh = NULL; - int error; - - ea_idebug(inode, "buffer=%p, buffer_size=%ld", - buffer, (long)buffer_size); - - error = 0; - if (!EXT3_I(inode)->i_file_acl) - goto cleanup; - ea_idebug(inode, "reading block %u", EXT3_I(inode)->i_file_acl); - bh = sb_bread(inode->i_sb, EXT3_I(inode)->i_file_acl); - error = -EIO; - if (!bh) - goto cleanup; - ea_bdebug(bh, "b_count=%d, refcount=%d", - atomic_read(&(bh->b_count)), le32_to_cpu(BHDR(bh)->h_refcount)); - if (ext3_xattr_check_block(bh)) { - ext3_error(inode->i_sb, __func__, - "inode %lu: bad block "E3FSBLK, inode->i_ino, - EXT3_I(inode)->i_file_acl); - error = -EIO; - goto cleanup; - } - ext3_xattr_cache_insert(bh); - error = ext3_xattr_list_entries(dentry, BFIRST(bh), buffer, buffer_size); - -cleanup: - brelse(bh); - - return error; -} - -static int -ext3_xattr_ibody_list(struct dentry *dentry, char *buffer, size_t buffer_size) -{ - struct inode *inode = d_inode(dentry); - struct ext3_xattr_ibody_header *header; - struct ext3_inode *raw_inode; - struct ext3_iloc iloc; - void *end; - int error; - - if (!ext3_test_inode_state(inode, EXT3_STATE_XATTR)) - return 0; - error = ext3_get_inode_loc(inode, &iloc); - if (error) - return error; - raw_inode = ext3_raw_inode(&iloc); - header = IHDR(inode, raw_inode); - end = (void *)raw_inode + EXT3_SB(inode->i_sb)->s_inode_size; - error = ext3_xattr_check_names(IFIRST(header), end); - if (error) - goto cleanup; - error = ext3_xattr_list_entries(dentry, IFIRST(header), - buffer, buffer_size); - -cleanup: - brelse(iloc.bh); - return error; -} - -/* - * ext3_xattr_list() - * - * Copy a list of attribute names into the buffer - * provided, or compute the buffer size required. - * Buffer is NULL to compute the size of the buffer required. - * - * Returns a negative error number on failure, or the number of bytes - * used / required on success. - */ -static int -ext3_xattr_list(struct dentry *dentry, char *buffer, size_t buffer_size) -{ - int i_error, b_error; - - down_read(&EXT3_I(d_inode(dentry))->xattr_sem); - i_error = ext3_xattr_ibody_list(dentry, buffer, buffer_size); - if (i_error < 0) { - b_error = 0; - } else { - if (buffer) { - buffer += i_error; - buffer_size -= i_error; - } - b_error = ext3_xattr_block_list(dentry, buffer, buffer_size); - if (b_error < 0) - i_error = 0; - } - up_read(&EXT3_I(d_inode(dentry))->xattr_sem); - return i_error + b_error; -} - -/* - * If the EXT3_FEATURE_COMPAT_EXT_ATTR feature of this file system is - * not set, set it. - */ -static void ext3_xattr_update_super_block(handle_t *handle, - struct super_block *sb) -{ - if (EXT3_HAS_COMPAT_FEATURE(sb, EXT3_FEATURE_COMPAT_EXT_ATTR)) - return; - - if (ext3_journal_get_write_access(handle, EXT3_SB(sb)->s_sbh) == 0) { - EXT3_SET_COMPAT_FEATURE(sb, EXT3_FEATURE_COMPAT_EXT_ATTR); - ext3_journal_dirty_metadata(handle, EXT3_SB(sb)->s_sbh); - } -} - -/* - * Release the xattr block BH: If the reference count is > 1, decrement - * it; otherwise free the block. - */ -static void -ext3_xattr_release_block(handle_t *handle, struct inode *inode, - struct buffer_head *bh) -{ - struct mb_cache_entry *ce = NULL; - int error = 0; - - ce = mb_cache_entry_get(ext3_xattr_cache, bh->b_bdev, bh->b_blocknr); - error = ext3_journal_get_write_access(handle, bh); - if (error) - goto out; - - lock_buffer(bh); - - if (BHDR(bh)->h_refcount == cpu_to_le32(1)) { - ea_bdebug(bh, "refcount now=0; freeing"); - if (ce) - mb_cache_entry_free(ce); - ext3_free_blocks(handle, inode, bh->b_blocknr, 1); - get_bh(bh); - ext3_forget(handle, 1, inode, bh, bh->b_blocknr); - } else { - le32_add_cpu(&BHDR(bh)->h_refcount, -1); - error = ext3_journal_dirty_metadata(handle, bh); - if (IS_SYNC(inode)) - handle->h_sync = 1; - dquot_free_block(inode, 1); - ea_bdebug(bh, "refcount now=%d; releasing", - le32_to_cpu(BHDR(bh)->h_refcount)); - if (ce) - mb_cache_entry_release(ce); - } - unlock_buffer(bh); -out: - ext3_std_error(inode->i_sb, error); - return; -} - -struct ext3_xattr_info { - int name_index; - const char *name; - const void *value; - size_t value_len; -}; - -struct ext3_xattr_search { - struct ext3_xattr_entry *first; - void *base; - void *end; - struct ext3_xattr_entry *here; - int not_found; -}; - -static int -ext3_xattr_set_entry(struct ext3_xattr_info *i, struct ext3_xattr_search *s) -{ - struct ext3_xattr_entry *last; - size_t free, min_offs = s->end - s->base, name_len = strlen(i->name); - - /* Compute min_offs and last. */ - last = s->first; - for (; !IS_LAST_ENTRY(last); last = EXT3_XATTR_NEXT(last)) { - if (!last->e_value_block && last->e_value_size) { - size_t offs = le16_to_cpu(last->e_value_offs); - if (offs < min_offs) - min_offs = offs; - } - } - free = min_offs - ((void *)last - s->base) - sizeof(__u32); - if (!s->not_found) { - if (!s->here->e_value_block && s->here->e_value_size) { - size_t size = le32_to_cpu(s->here->e_value_size); - free += EXT3_XATTR_SIZE(size); - } - free += EXT3_XATTR_LEN(name_len); - } - if (i->value) { - if (free < EXT3_XATTR_LEN(name_len) + - EXT3_XATTR_SIZE(i->value_len)) - return -ENOSPC; - } - - if (i->value && s->not_found) { - /* Insert the new name. */ - size_t size = EXT3_XATTR_LEN(name_len); - size_t rest = (void *)last - (void *)s->here + sizeof(__u32); - memmove((void *)s->here + size, s->here, rest); - memset(s->here, 0, size); - s->here->e_name_index = i->name_index; - s->here->e_name_len = name_len; - memcpy(s->here->e_name, i->name, name_len); - } else { - if (!s->here->e_value_block && s->here->e_value_size) { - void *first_val = s->base + min_offs; - size_t offs = le16_to_cpu(s->here->e_value_offs); - void *val = s->base + offs; - size_t size = EXT3_XATTR_SIZE( - le32_to_cpu(s->here->e_value_size)); - - if (i->value && size == EXT3_XATTR_SIZE(i->value_len)) { - /* The old and the new value have the same - size. Just replace. */ - s->here->e_value_size = - cpu_to_le32(i->value_len); - memset(val + size - EXT3_XATTR_PAD, 0, - EXT3_XATTR_PAD); /* Clear pad bytes. */ - memcpy(val, i->value, i->value_len); - return 0; - } - - /* Remove the old value. */ - memmove(first_val + size, first_val, val - first_val); - memset(first_val, 0, size); - s->here->e_value_size = 0; - s->here->e_value_offs = 0; - min_offs += size; - - /* Adjust all value offsets. */ - last = s->first; - while (!IS_LAST_ENTRY(last)) { - size_t o = le16_to_cpu(last->e_value_offs); - if (!last->e_value_block && - last->e_value_size && o < offs) - last->e_value_offs = - cpu_to_le16(o + size); - last = EXT3_XATTR_NEXT(last); - } - } - if (!i->value) { - /* Remove the old name. */ - size_t size = EXT3_XATTR_LEN(name_len); - last = ENTRY((void *)last - size); - memmove(s->here, (void *)s->here + size, - (void *)last - (void *)s->here + sizeof(__u32)); - memset(last, 0, size); - } - } - - if (i->value) { - /* Insert the new value. */ - s->here->e_value_size = cpu_to_le32(i->value_len); - if (i->value_len) { - size_t size = EXT3_XATTR_SIZE(i->value_len); - void *val = s->base + min_offs - size; - s->here->e_value_offs = cpu_to_le16(min_offs - size); - memset(val + size - EXT3_XATTR_PAD, 0, - EXT3_XATTR_PAD); /* Clear the pad bytes. */ - memcpy(val, i->value, i->value_len); - } - } - return 0; -} - -struct ext3_xattr_block_find { - struct ext3_xattr_search s; - struct buffer_head *bh; -}; - -static int -ext3_xattr_block_find(struct inode *inode, struct ext3_xattr_info *i, - struct ext3_xattr_block_find *bs) -{ - struct super_block *sb = inode->i_sb; - int error; - - ea_idebug(inode, "name=%d.%s, value=%p, value_len=%ld", - i->name_index, i->name, i->value, (long)i->value_len); - - if (EXT3_I(inode)->i_file_acl) { - /* The inode already has an extended attribute block. */ - bs->bh = sb_bread(sb, EXT3_I(inode)->i_file_acl); - error = -EIO; - if (!bs->bh) - goto cleanup; - ea_bdebug(bs->bh, "b_count=%d, refcount=%d", - atomic_read(&(bs->bh->b_count)), - le32_to_cpu(BHDR(bs->bh)->h_refcount)); - if (ext3_xattr_check_block(bs->bh)) { - ext3_error(sb, __func__, - "inode %lu: bad block "E3FSBLK, inode->i_ino, - EXT3_I(inode)->i_file_acl); - error = -EIO; - goto cleanup; - } - /* Find the named attribute. */ - bs->s.base = BHDR(bs->bh); - bs->s.first = BFIRST(bs->bh); - bs->s.end = bs->bh->b_data + bs->bh->b_size; - bs->s.here = bs->s.first; - error = ext3_xattr_find_entry(&bs->s.here, i->name_index, - i->name, bs->bh->b_size, 1); - if (error && error != -ENODATA) - goto cleanup; - bs->s.not_found = error; - } - error = 0; - -cleanup: - return error; -} - -static int -ext3_xattr_block_set(handle_t *handle, struct inode *inode, - struct ext3_xattr_info *i, - struct ext3_xattr_block_find *bs) -{ - struct super_block *sb = inode->i_sb; - struct buffer_head *new_bh = NULL; - struct ext3_xattr_search *s = &bs->s; - struct mb_cache_entry *ce = NULL; - int error = 0; - -#define header(x) ((struct ext3_xattr_header *)(x)) - - if (i->value && i->value_len > sb->s_blocksize) - return -ENOSPC; - if (s->base) { - ce = mb_cache_entry_get(ext3_xattr_cache, bs->bh->b_bdev, - bs->bh->b_blocknr); - error = ext3_journal_get_write_access(handle, bs->bh); - if (error) - goto cleanup; - lock_buffer(bs->bh); - - if (header(s->base)->h_refcount == cpu_to_le32(1)) { - if (ce) { - mb_cache_entry_free(ce); - ce = NULL; - } - ea_bdebug(bs->bh, "modifying in-place"); - error = ext3_xattr_set_entry(i, s); - if (!error) { - if (!IS_LAST_ENTRY(s->first)) - ext3_xattr_rehash(header(s->base), - s->here); - ext3_xattr_cache_insert(bs->bh); - } - unlock_buffer(bs->bh); - if (error == -EIO) - goto bad_block; - if (!error) - error = ext3_journal_dirty_metadata(handle, - bs->bh); - if (error) - goto cleanup; - goto inserted; - } else { - int offset = (char *)s->here - bs->bh->b_data; - - unlock_buffer(bs->bh); - journal_release_buffer(handle, bs->bh); - - if (ce) { - mb_cache_entry_release(ce); - ce = NULL; - } - ea_bdebug(bs->bh, "cloning"); - s->base = kmalloc(bs->bh->b_size, GFP_NOFS); - error = -ENOMEM; - if (s->base == NULL) - goto cleanup; - memcpy(s->base, BHDR(bs->bh), bs->bh->b_size); - s->first = ENTRY(header(s->base)+1); - header(s->base)->h_refcount = cpu_to_le32(1); - s->here = ENTRY(s->base + offset); - s->end = s->base + bs->bh->b_size; - } - } else { - /* Allocate a buffer where we construct the new block. */ - s->base = kzalloc(sb->s_blocksize, GFP_NOFS); - /* assert(header == s->base) */ - error = -ENOMEM; - if (s->base == NULL) - goto cleanup; - header(s->base)->h_magic = cpu_to_le32(EXT3_XATTR_MAGIC); - header(s->base)->h_blocks = cpu_to_le32(1); - header(s->base)->h_refcount = cpu_to_le32(1); - s->first = ENTRY(header(s->base)+1); - s->here = ENTRY(header(s->base)+1); - s->end = s->base + sb->s_blocksize; - } - - error = ext3_xattr_set_entry(i, s); - if (error == -EIO) - goto bad_block; - if (error) - goto cleanup; - if (!IS_LAST_ENTRY(s->first)) - ext3_xattr_rehash(header(s->base), s->here); - -inserted: - if (!IS_LAST_ENTRY(s->first)) { - new_bh = ext3_xattr_cache_find(inode, header(s->base), &ce); - if (new_bh) { - /* We found an identical block in the cache. */ - if (new_bh == bs->bh) - ea_bdebug(new_bh, "keeping"); - else { - /* The old block is released after updating - the inode. */ - error = dquot_alloc_block(inode, 1); - if (error) - goto cleanup; - error = ext3_journal_get_write_access(handle, - new_bh); - if (error) - goto cleanup_dquot; - lock_buffer(new_bh); - le32_add_cpu(&BHDR(new_bh)->h_refcount, 1); - ea_bdebug(new_bh, "reusing; refcount now=%d", - le32_to_cpu(BHDR(new_bh)->h_refcount)); - unlock_buffer(new_bh); - error = ext3_journal_dirty_metadata(handle, - new_bh); - if (error) - goto cleanup_dquot; - } - mb_cache_entry_release(ce); - ce = NULL; - } else if (bs->bh && s->base == bs->bh->b_data) { - /* We were modifying this block in-place. */ - ea_bdebug(bs->bh, "keeping this block"); - new_bh = bs->bh; - get_bh(new_bh); - } else { - /* We need to allocate a new block */ - ext3_fsblk_t goal = ext3_group_first_block_no(sb, - EXT3_I(inode)->i_block_group); - ext3_fsblk_t block; - - /* - * Protect us agaist concurrent allocations to the - * same inode from ext3_..._writepage(). Reservation - * code does not expect racing allocations. - */ - mutex_lock(&EXT3_I(inode)->truncate_mutex); - block = ext3_new_block(handle, inode, goal, &error); - mutex_unlock(&EXT3_I(inode)->truncate_mutex); - if (error) - goto cleanup; - ea_idebug(inode, "creating block %d", block); - - new_bh = sb_getblk(sb, block); - if (unlikely(!new_bh)) { -getblk_failed: - ext3_free_blocks(handle, inode, block, 1); - error = -ENOMEM; - goto cleanup; - } - lock_buffer(new_bh); - error = ext3_journal_get_create_access(handle, new_bh); - if (error) { - unlock_buffer(new_bh); - goto getblk_failed; - } - memcpy(new_bh->b_data, s->base, new_bh->b_size); - set_buffer_uptodate(new_bh); - unlock_buffer(new_bh); - ext3_xattr_cache_insert(new_bh); - error = ext3_journal_dirty_metadata(handle, new_bh); - if (error) - goto cleanup; - } - } - - /* Update the inode. */ - EXT3_I(inode)->i_file_acl = new_bh ? new_bh->b_blocknr : 0; - - /* Drop the previous xattr block. */ - if (bs->bh && bs->bh != new_bh) - ext3_xattr_release_block(handle, inode, bs->bh); - error = 0; - -cleanup: - if (ce) - mb_cache_entry_release(ce); - brelse(new_bh); - if (!(bs->bh && s->base == bs->bh->b_data)) - kfree(s->base); - - return error; - -cleanup_dquot: - dquot_free_block(inode, 1); - goto cleanup; - -bad_block: - ext3_error(inode->i_sb, __func__, - "inode %lu: bad block "E3FSBLK, inode->i_ino, - EXT3_I(inode)->i_file_acl); - goto cleanup; - -#undef header -} - -struct ext3_xattr_ibody_find { - struct ext3_xattr_search s; - struct ext3_iloc iloc; -}; - -static int -ext3_xattr_ibody_find(struct inode *inode, struct ext3_xattr_info *i, - struct ext3_xattr_ibody_find *is) -{ - struct ext3_xattr_ibody_header *header; - struct ext3_inode *raw_inode; - int error; - - if (EXT3_I(inode)->i_extra_isize == 0) - return 0; - raw_inode = ext3_raw_inode(&is->iloc); - header = IHDR(inode, raw_inode); - is->s.base = is->s.first = IFIRST(header); - is->s.here = is->s.first; - is->s.end = (void *)raw_inode + EXT3_SB(inode->i_sb)->s_inode_size; - if (ext3_test_inode_state(inode, EXT3_STATE_XATTR)) { - error = ext3_xattr_check_names(IFIRST(header), is->s.end); - if (error) - return error; - /* Find the named attribute. */ - error = ext3_xattr_find_entry(&is->s.here, i->name_index, - i->name, is->s.end - - (void *)is->s.base, 0); - if (error && error != -ENODATA) - return error; - is->s.not_found = error; - } - return 0; -} - -static int -ext3_xattr_ibody_set(handle_t *handle, struct inode *inode, - struct ext3_xattr_info *i, - struct ext3_xattr_ibody_find *is) -{ - struct ext3_xattr_ibody_header *header; - struct ext3_xattr_search *s = &is->s; - int error; - - if (EXT3_I(inode)->i_extra_isize == 0) - return -ENOSPC; - error = ext3_xattr_set_entry(i, s); - if (error) - return error; - header = IHDR(inode, ext3_raw_inode(&is->iloc)); - if (!IS_LAST_ENTRY(s->first)) { - header->h_magic = cpu_to_le32(EXT3_XATTR_MAGIC); - ext3_set_inode_state(inode, EXT3_STATE_XATTR); - } else { - header->h_magic = cpu_to_le32(0); - ext3_clear_inode_state(inode, EXT3_STATE_XATTR); - } - return 0; -} - -/* - * ext3_xattr_set_handle() - * - * Create, replace or remove an extended attribute for this inode. Value - * is NULL to remove an existing extended attribute, and non-NULL to - * either replace an existing extended attribute, or create a new extended - * attribute. The flags XATTR_REPLACE and XATTR_CREATE - * specify that an extended attribute must exist and must not exist - * previous to the call, respectively. - * - * Returns 0, or a negative error number on failure. - */ -int -ext3_xattr_set_handle(handle_t *handle, struct inode *inode, int name_index, - const char *name, const void *value, size_t value_len, - int flags) -{ - struct ext3_xattr_info i = { - .name_index = name_index, - .name = name, - .value = value, - .value_len = value_len, - - }; - struct ext3_xattr_ibody_find is = { - .s = { .not_found = -ENODATA, }, - }; - struct ext3_xattr_block_find bs = { - .s = { .not_found = -ENODATA, }, - }; - int error; - - if (!name) - return -EINVAL; - if (strlen(name) > 255) - return -ERANGE; - down_write(&EXT3_I(inode)->xattr_sem); - error = ext3_get_inode_loc(inode, &is.iloc); - if (error) - goto cleanup; - - error = ext3_journal_get_write_access(handle, is.iloc.bh); - if (error) - goto cleanup; - - if (ext3_test_inode_state(inode, EXT3_STATE_NEW)) { - struct ext3_inode *raw_inode = ext3_raw_inode(&is.iloc); - memset(raw_inode, 0, EXT3_SB(inode->i_sb)->s_inode_size); - ext3_clear_inode_state(inode, EXT3_STATE_NEW); - } - - error = ext3_xattr_ibody_find(inode, &i, &is); - if (error) - goto cleanup; - if (is.s.not_found) - error = ext3_xattr_block_find(inode, &i, &bs); - if (error) - goto cleanup; - if (is.s.not_found && bs.s.not_found) { - error = -ENODATA; - if (flags & XATTR_REPLACE) - goto cleanup; - error = 0; - if (!value) - goto cleanup; - } else { - error = -EEXIST; - if (flags & XATTR_CREATE) - goto cleanup; - } - if (!value) { - if (!is.s.not_found) - error = ext3_xattr_ibody_set(handle, inode, &i, &is); - else if (!bs.s.not_found) - error = ext3_xattr_block_set(handle, inode, &i, &bs); - } else { - error = ext3_xattr_ibody_set(handle, inode, &i, &is); - if (!error && !bs.s.not_found) { - i.value = NULL; - error = ext3_xattr_block_set(handle, inode, &i, &bs); - } else if (error == -ENOSPC) { - if (EXT3_I(inode)->i_file_acl && !bs.s.base) { - error = ext3_xattr_block_find(inode, &i, &bs); - if (error) - goto cleanup; - } - error = ext3_xattr_block_set(handle, inode, &i, &bs); - if (error) - goto cleanup; - if (!is.s.not_found) { - i.value = NULL; - error = ext3_xattr_ibody_set(handle, inode, &i, - &is); - } - } - } - if (!error) { - ext3_xattr_update_super_block(handle, inode->i_sb); - inode->i_ctime = CURRENT_TIME_SEC; - error = ext3_mark_iloc_dirty(handle, inode, &is.iloc); - /* - * The bh is consumed by ext3_mark_iloc_dirty, even with - * error != 0. - */ - is.iloc.bh = NULL; - if (IS_SYNC(inode)) - handle->h_sync = 1; - } - -cleanup: - brelse(is.iloc.bh); - brelse(bs.bh); - up_write(&EXT3_I(inode)->xattr_sem); - return error; -} - -/* - * ext3_xattr_set() - * - * Like ext3_xattr_set_handle, but start from an inode. This extended - * attribute modification is a filesystem transaction by itself. - * - * Returns 0, or a negative error number on failure. - */ -int -ext3_xattr_set(struct inode *inode, int name_index, const char *name, - const void *value, size_t value_len, int flags) -{ - handle_t *handle; - int error, retries = 0; - -retry: - handle = ext3_journal_start(inode, EXT3_DATA_TRANS_BLOCKS(inode->i_sb)); - if (IS_ERR(handle)) { - error = PTR_ERR(handle); - } else { - int error2; - - error = ext3_xattr_set_handle(handle, inode, name_index, name, - value, value_len, flags); - error2 = ext3_journal_stop(handle); - if (error == -ENOSPC && - ext3_should_retry_alloc(inode->i_sb, &retries)) - goto retry; - if (error == 0) - error = error2; - } - - return error; -} - -/* - * ext3_xattr_delete_inode() - * - * Free extended attribute resources associated with this inode. This - * is called immediately before an inode is freed. We have exclusive - * access to the inode. - */ -void -ext3_xattr_delete_inode(handle_t *handle, struct inode *inode) -{ - struct buffer_head *bh = NULL; - - if (!EXT3_I(inode)->i_file_acl) - goto cleanup; - bh = sb_bread(inode->i_sb, EXT3_I(inode)->i_file_acl); - if (!bh) { - ext3_error(inode->i_sb, __func__, - "inode %lu: block "E3FSBLK" read error", inode->i_ino, - EXT3_I(inode)->i_file_acl); - goto cleanup; - } - if (BHDR(bh)->h_magic != cpu_to_le32(EXT3_XATTR_MAGIC) || - BHDR(bh)->h_blocks != cpu_to_le32(1)) { - ext3_error(inode->i_sb, __func__, - "inode %lu: bad block "E3FSBLK, inode->i_ino, - EXT3_I(inode)->i_file_acl); - goto cleanup; - } - ext3_xattr_release_block(handle, inode, bh); - EXT3_I(inode)->i_file_acl = 0; - -cleanup: - brelse(bh); -} - -/* - * ext3_xattr_put_super() - * - * This is called when a file system is unmounted. - */ -void -ext3_xattr_put_super(struct super_block *sb) -{ - mb_cache_shrink(sb->s_bdev); -} - -/* - * ext3_xattr_cache_insert() - * - * Create a new entry in the extended attribute cache, and insert - * it unless such an entry is already in the cache. - * - * Returns 0, or a negative error number on failure. - */ -static void -ext3_xattr_cache_insert(struct buffer_head *bh) -{ - __u32 hash = le32_to_cpu(BHDR(bh)->h_hash); - struct mb_cache_entry *ce; - int error; - - ce = mb_cache_entry_alloc(ext3_xattr_cache, GFP_NOFS); - if (!ce) { - ea_bdebug(bh, "out of memory"); - return; - } - error = mb_cache_entry_insert(ce, bh->b_bdev, bh->b_blocknr, hash); - if (error) { - mb_cache_entry_free(ce); - if (error == -EBUSY) { - ea_bdebug(bh, "already in cache"); - error = 0; - } - } else { - ea_bdebug(bh, "inserting [%x]", (int)hash); - mb_cache_entry_release(ce); - } -} - -/* - * ext3_xattr_cmp() - * - * Compare two extended attribute blocks for equality. - * - * Returns 0 if the blocks are equal, 1 if they differ, and - * a negative error number on errors. - */ -static int -ext3_xattr_cmp(struct ext3_xattr_header *header1, - struct ext3_xattr_header *header2) -{ - struct ext3_xattr_entry *entry1, *entry2; - - entry1 = ENTRY(header1+1); - entry2 = ENTRY(header2+1); - while (!IS_LAST_ENTRY(entry1)) { - if (IS_LAST_ENTRY(entry2)) - return 1; - if (entry1->e_hash != entry2->e_hash || - entry1->e_name_index != entry2->e_name_index || - entry1->e_name_len != entry2->e_name_len || - entry1->e_value_size != entry2->e_value_size || - memcmp(entry1->e_name, entry2->e_name, entry1->e_name_len)) - return 1; - if (entry1->e_value_block != 0 || entry2->e_value_block != 0) - return -EIO; - if (memcmp((char *)header1 + le16_to_cpu(entry1->e_value_offs), - (char *)header2 + le16_to_cpu(entry2->e_value_offs), - le32_to_cpu(entry1->e_value_size))) - return 1; - - entry1 = EXT3_XATTR_NEXT(entry1); - entry2 = EXT3_XATTR_NEXT(entry2); - } - if (!IS_LAST_ENTRY(entry2)) - return 1; - return 0; -} - -/* - * ext3_xattr_cache_find() - * - * Find an identical extended attribute block. - * - * Returns a pointer to the block found, or NULL if such a block was - * not found or an error occurred. - */ -static struct buffer_head * -ext3_xattr_cache_find(struct inode *inode, struct ext3_xattr_header *header, - struct mb_cache_entry **pce) -{ - __u32 hash = le32_to_cpu(header->h_hash); - struct mb_cache_entry *ce; - - if (!header->h_hash) - return NULL; /* never share */ - ea_idebug(inode, "looking for cached blocks [%x]", (int)hash); -again: - ce = mb_cache_entry_find_first(ext3_xattr_cache, inode->i_sb->s_bdev, - hash); - while (ce) { - struct buffer_head *bh; - - if (IS_ERR(ce)) { - if (PTR_ERR(ce) == -EAGAIN) - goto again; - break; - } - bh = sb_bread(inode->i_sb, ce->e_block); - if (!bh) { - ext3_error(inode->i_sb, __func__, - "inode %lu: block %lu read error", - inode->i_ino, (unsigned long) ce->e_block); - } else if (le32_to_cpu(BHDR(bh)->h_refcount) >= - EXT3_XATTR_REFCOUNT_MAX) { - ea_idebug(inode, "block %lu refcount %d>=%d", - (unsigned long) ce->e_block, - le32_to_cpu(BHDR(bh)->h_refcount), - EXT3_XATTR_REFCOUNT_MAX); - } else if (ext3_xattr_cmp(header, BHDR(bh)) == 0) { - *pce = ce; - return bh; - } - brelse(bh); - ce = mb_cache_entry_find_next(ce, inode->i_sb->s_bdev, hash); - } - return NULL; -} - -#define NAME_HASH_SHIFT 5 -#define VALUE_HASH_SHIFT 16 - -/* - * ext3_xattr_hash_entry() - * - * Compute the hash of an extended attribute. - */ -static inline void ext3_xattr_hash_entry(struct ext3_xattr_header *header, - struct ext3_xattr_entry *entry) -{ - __u32 hash = 0; - char *name = entry->e_name; - int n; - - for (n=0; n < entry->e_name_len; n++) { - hash = (hash << NAME_HASH_SHIFT) ^ - (hash >> (8*sizeof(hash) - NAME_HASH_SHIFT)) ^ - *name++; - } - - if (entry->e_value_block == 0 && entry->e_value_size != 0) { - __le32 *value = (__le32 *)((char *)header + - le16_to_cpu(entry->e_value_offs)); - for (n = (le32_to_cpu(entry->e_value_size) + - EXT3_XATTR_ROUND) >> EXT3_XATTR_PAD_BITS; n; n--) { - hash = (hash << VALUE_HASH_SHIFT) ^ - (hash >> (8*sizeof(hash) - VALUE_HASH_SHIFT)) ^ - le32_to_cpu(*value++); - } - } - entry->e_hash = cpu_to_le32(hash); -} - -#undef NAME_HASH_SHIFT -#undef VALUE_HASH_SHIFT - -#define BLOCK_HASH_SHIFT 16 - -/* - * ext3_xattr_rehash() - * - * Re-compute the extended attribute hash value after an entry has changed. - */ -static void ext3_xattr_rehash(struct ext3_xattr_header *header, - struct ext3_xattr_entry *entry) -{ - struct ext3_xattr_entry *here; - __u32 hash = 0; - - ext3_xattr_hash_entry(header, entry); - here = ENTRY(header+1); - while (!IS_LAST_ENTRY(here)) { - if (!here->e_hash) { - /* Block is not shared if an entry's hash value == 0 */ - hash = 0; - break; - } - hash = (hash << BLOCK_HASH_SHIFT) ^ - (hash >> (8*sizeof(hash) - BLOCK_HASH_SHIFT)) ^ - le32_to_cpu(here->e_hash); - here = EXT3_XATTR_NEXT(here); - } - header->h_hash = cpu_to_le32(hash); -} - -#undef BLOCK_HASH_SHIFT - -int __init -init_ext3_xattr(void) -{ - ext3_xattr_cache = mb_cache_create("ext3_xattr", 6); - if (!ext3_xattr_cache) - return -ENOMEM; - return 0; -} - -void -exit_ext3_xattr(void) -{ - if (ext3_xattr_cache) - mb_cache_destroy(ext3_xattr_cache); - ext3_xattr_cache = NULL; -} diff --git a/fs/ext3/xattr.h b/fs/ext3/xattr.h deleted file mode 100644 index 32e93ebf8031..000000000000 --- a/fs/ext3/xattr.h +++ /dev/null @@ -1,136 +0,0 @@ -/* - File: fs/ext3/xattr.h - - On-disk format of extended attributes for the ext3 filesystem. - - (C) 2001 Andreas Gruenbacher, -*/ - -#include - -/* Magic value in attribute blocks */ -#define EXT3_XATTR_MAGIC 0xEA020000 - -/* Maximum number of references to one attribute block */ -#define EXT3_XATTR_REFCOUNT_MAX 1024 - -/* Name indexes */ -#define EXT3_XATTR_INDEX_USER 1 -#define EXT3_XATTR_INDEX_POSIX_ACL_ACCESS 2 -#define EXT3_XATTR_INDEX_POSIX_ACL_DEFAULT 3 -#define EXT3_XATTR_INDEX_TRUSTED 4 -#define EXT3_XATTR_INDEX_LUSTRE 5 -#define EXT3_XATTR_INDEX_SECURITY 6 - -struct ext3_xattr_header { - __le32 h_magic; /* magic number for identification */ - __le32 h_refcount; /* reference count */ - __le32 h_blocks; /* number of disk blocks used */ - __le32 h_hash; /* hash value of all attributes */ - __u32 h_reserved[4]; /* zero right now */ -}; - -struct ext3_xattr_ibody_header { - __le32 h_magic; /* magic number for identification */ -}; - -struct ext3_xattr_entry { - __u8 e_name_len; /* length of name */ - __u8 e_name_index; /* attribute name index */ - __le16 e_value_offs; /* offset in disk block of value */ - __le32 e_value_block; /* disk block attribute is stored on (n/i) */ - __le32 e_value_size; /* size of attribute value */ - __le32 e_hash; /* hash value of name and value */ - char e_name[0]; /* attribute name */ -}; - -#define EXT3_XATTR_PAD_BITS 2 -#define EXT3_XATTR_PAD (1<e_name_len)) ) -#define EXT3_XATTR_SIZE(size) \ - (((size) + EXT3_XATTR_ROUND) & ~EXT3_XATTR_ROUND) - -# ifdef CONFIG_EXT3_FS_XATTR - -extern const struct xattr_handler ext3_xattr_user_handler; -extern const struct xattr_handler ext3_xattr_trusted_handler; -extern const struct xattr_handler ext3_xattr_security_handler; - -extern ssize_t ext3_listxattr(struct dentry *, char *, size_t); - -extern int ext3_xattr_get(struct inode *, int, const char *, void *, size_t); -extern int ext3_xattr_set(struct inode *, int, const char *, const void *, size_t, int); -extern int ext3_xattr_set_handle(handle_t *, struct inode *, int, const char *, const void *, size_t, int); - -extern void ext3_xattr_delete_inode(handle_t *, struct inode *); -extern void ext3_xattr_put_super(struct super_block *); - -extern int init_ext3_xattr(void); -extern void exit_ext3_xattr(void); - -extern const struct xattr_handler *ext3_xattr_handlers[]; - -# else /* CONFIG_EXT3_FS_XATTR */ - -static inline int -ext3_xattr_get(struct inode *inode, int name_index, const char *name, - void *buffer, size_t size, int flags) -{ - return -EOPNOTSUPP; -} - -static inline int -ext3_xattr_set(struct inode *inode, int name_index, const char *name, - const void *value, size_t size, int flags) -{ - return -EOPNOTSUPP; -} - -static inline int -ext3_xattr_set_handle(handle_t *handle, struct inode *inode, int name_index, - const char *name, const void *value, size_t size, int flags) -{ - return -EOPNOTSUPP; -} - -static inline void -ext3_xattr_delete_inode(handle_t *handle, struct inode *inode) -{ -} - -static inline void -ext3_xattr_put_super(struct super_block *sb) -{ -} - -static inline int -init_ext3_xattr(void) -{ - return 0; -} - -static inline void -exit_ext3_xattr(void) -{ -} - -#define ext3_xattr_handlers NULL - -# endif /* CONFIG_EXT3_FS_XATTR */ - -#ifdef CONFIG_EXT3_FS_SECURITY -extern int ext3_init_security(handle_t *handle, struct inode *inode, - struct inode *dir, const struct qstr *qstr); -#else -static inline int ext3_init_security(handle_t *handle, struct inode *inode, - struct inode *dir, const struct qstr *qstr) -{ - return 0; -} -#endif diff --git a/fs/ext3/xattr_security.c b/fs/ext3/xattr_security.c deleted file mode 100644 index c9506d5e3b13..000000000000 --- a/fs/ext3/xattr_security.c +++ /dev/null @@ -1,78 +0,0 @@ -/* - * linux/fs/ext3/xattr_security.c - * Handler for storing security labels as extended attributes. - */ - -#include -#include "ext3.h" -#include "xattr.h" - -static size_t -ext3_xattr_security_list(struct dentry *dentry, char *list, size_t list_size, - const char *name, size_t name_len, int type) -{ - const size_t prefix_len = XATTR_SECURITY_PREFIX_LEN; - const size_t total_len = prefix_len + name_len + 1; - - - if (list && total_len <= list_size) { - memcpy(list, XATTR_SECURITY_PREFIX, prefix_len); - memcpy(list+prefix_len, name, name_len); - list[prefix_len + name_len] = '\0'; - } - return total_len; -} - -static int -ext3_xattr_security_get(struct dentry *dentry, const char *name, - void *buffer, size_t size, int type) -{ - if (strcmp(name, "") == 0) - return -EINVAL; - return ext3_xattr_get(d_inode(dentry), EXT3_XATTR_INDEX_SECURITY, - name, buffer, size); -} - -static int -ext3_xattr_security_set(struct dentry *dentry, const char *name, - const void *value, size_t size, int flags, int type) -{ - if (strcmp(name, "") == 0) - return -EINVAL; - return ext3_xattr_set(d_inode(dentry), EXT3_XATTR_INDEX_SECURITY, - name, value, size, flags); -} - -static int ext3_initxattrs(struct inode *inode, - const struct xattr *xattr_array, - void *fs_info) -{ - const struct xattr *xattr; - handle_t *handle = fs_info; - int err = 0; - - for (xattr = xattr_array; xattr->name != NULL; xattr++) { - err = ext3_xattr_set_handle(handle, inode, - EXT3_XATTR_INDEX_SECURITY, - xattr->name, xattr->value, - xattr->value_len, 0); - if (err < 0) - break; - } - return err; -} - -int -ext3_init_security(handle_t *handle, struct inode *inode, struct inode *dir, - const struct qstr *qstr) -{ - return security_inode_init_security(inode, dir, qstr, - &ext3_initxattrs, handle); -} - -const struct xattr_handler ext3_xattr_security_handler = { - .prefix = XATTR_SECURITY_PREFIX, - .list = ext3_xattr_security_list, - .get = ext3_xattr_security_get, - .set = ext3_xattr_security_set, -}; diff --git a/fs/ext3/xattr_trusted.c b/fs/ext3/xattr_trusted.c deleted file mode 100644 index 206cc66dc285..000000000000 --- a/fs/ext3/xattr_trusted.c +++ /dev/null @@ -1,54 +0,0 @@ -/* - * linux/fs/ext3/xattr_trusted.c - * Handler for trusted extended attributes. - * - * Copyright (C) 2003 by Andreas Gruenbacher, - */ - -#include "ext3.h" -#include "xattr.h" - -static size_t -ext3_xattr_trusted_list(struct dentry *dentry, char *list, size_t list_size, - const char *name, size_t name_len, int type) -{ - const size_t prefix_len = XATTR_TRUSTED_PREFIX_LEN; - const size_t total_len = prefix_len + name_len + 1; - - if (!capable(CAP_SYS_ADMIN)) - return 0; - - if (list && total_len <= list_size) { - memcpy(list, XATTR_TRUSTED_PREFIX, prefix_len); - memcpy(list+prefix_len, name, name_len); - list[prefix_len + name_len] = '\0'; - } - return total_len; -} - -static int -ext3_xattr_trusted_get(struct dentry *dentry, const char *name, - void *buffer, size_t size, int type) -{ - if (strcmp(name, "") == 0) - return -EINVAL; - return ext3_xattr_get(d_inode(dentry), EXT3_XATTR_INDEX_TRUSTED, - name, buffer, size); -} - -static int -ext3_xattr_trusted_set(struct dentry *dentry, const char *name, - const void *value, size_t size, int flags, int type) -{ - if (strcmp(name, "") == 0) - return -EINVAL; - return ext3_xattr_set(d_inode(dentry), EXT3_XATTR_INDEX_TRUSTED, name, - value, size, flags); -} - -const struct xattr_handler ext3_xattr_trusted_handler = { - .prefix = XATTR_TRUSTED_PREFIX, - .list = ext3_xattr_trusted_list, - .get = ext3_xattr_trusted_get, - .set = ext3_xattr_trusted_set, -}; diff --git a/fs/ext3/xattr_user.c b/fs/ext3/xattr_user.c deleted file mode 100644 index 021508ad1616..000000000000 --- a/fs/ext3/xattr_user.c +++ /dev/null @@ -1,58 +0,0 @@ -/* - * linux/fs/ext3/xattr_user.c - * Handler for extended user attributes. - * - * Copyright (C) 2001 by Andreas Gruenbacher, - */ - -#include "ext3.h" -#include "xattr.h" - -static size_t -ext3_xattr_user_list(struct dentry *dentry, char *list, size_t list_size, - const char *name, size_t name_len, int type) -{ - const size_t prefix_len = XATTR_USER_PREFIX_LEN; - const size_t total_len = prefix_len + name_len + 1; - - if (!test_opt(dentry->d_sb, XATTR_USER)) - return 0; - - if (list && total_len <= list_size) { - memcpy(list, XATTR_USER_PREFIX, prefix_len); - memcpy(list+prefix_len, name, name_len); - list[prefix_len + name_len] = '\0'; - } - return total_len; -} - -static int -ext3_xattr_user_get(struct dentry *dentry, const char *name, void *buffer, - size_t size, int type) -{ - if (strcmp(name, "") == 0) - return -EINVAL; - if (!test_opt(dentry->d_sb, XATTR_USER)) - return -EOPNOTSUPP; - return ext3_xattr_get(d_inode(dentry), EXT3_XATTR_INDEX_USER, - name, buffer, size); -} - -static int -ext3_xattr_user_set(struct dentry *dentry, const char *name, - const void *value, size_t size, int flags, int type) -{ - if (strcmp(name, "") == 0) - return -EINVAL; - if (!test_opt(dentry->d_sb, XATTR_USER)) - return -EOPNOTSUPP; - return ext3_xattr_set(d_inode(dentry), EXT3_XATTR_INDEX_USER, - name, value, size, flags); -} - -const struct xattr_handler ext3_xattr_user_handler = { - .prefix = XATTR_USER_PREFIX, - .list = ext3_xattr_user_list, - .get = ext3_xattr_user_get, - .set = ext3_xattr_user_set, -}; diff --git a/fs/ext4/Kconfig b/fs/ext4/Kconfig index bf8bc8aba471..219a190ccae9 100644 --- a/fs/ext4/Kconfig +++ b/fs/ext4/Kconfig @@ -1,5 +1,38 @@ +# Ext3 configs are here for backward compatibility with old configs which may +# have EXT3_FS set but not EXT4_FS set and thus would result in non-bootable +# kernels after the removal of ext3 driver. +config EXT3_FS + tristate "The Extended 3 (ext3) filesystem" + # These must match EXT4_FS selects... + select EXT4_FS + select JBD2 + select CRC16 + select CRYPTO + select CRYPTO_CRC32C + help + This config option is here only for backward compatibility. ext3 + filesystem is now handled by the ext4 driver. + +config EXT3_FS_POSIX_ACL + bool "Ext3 POSIX Access Control Lists" + depends on EXT3_FS + select EXT4_FS_POSIX_ACL + select FS_POSIX_ACL + help + This config option is here only for backward compatibility. ext3 + filesystem is now handled by the ext4 driver. + +config EXT3_FS_SECURITY + bool "Ext3 Security Labels" + depends on EXT3_FS + select EXT4_FS_SECURITY + help + This config option is here only for backward compatibility. ext3 + filesystem is now handled by the ext4 driver. + config EXT4_FS tristate "The Extended 4 (ext4) filesystem" + # Please update EXT3_FS selects when changing these select JBD2 select CRC16 select CRYPTO @@ -28,14 +61,14 @@ config EXT4_FS If unsure, say N. -config EXT4_USE_FOR_EXT23 +config EXT4_USE_FOR_EXT2 bool "Use ext4 for ext2/ext3 file systems" depends on EXT4_FS - depends on EXT3_FS=n || EXT2_FS=n + depends on EXT2_FS=n default y help - Allow the ext4 file system driver code to be used for ext2 or - ext3 file system mounts. This allows users to reduce their + Allow the ext4 file system driver code to be used for ext2 + file system mounts. This allows users to reduce their compiled kernel size by using one file system driver for ext2, ext3, and ext4 file systems. diff --git a/fs/ext4/super.c b/fs/ext4/super.c index 58987b5c514b..06b4b14e8aa0 100644 --- a/fs/ext4/super.c +++ b/fs/ext4/super.c @@ -84,7 +84,7 @@ static void ext4_unregister_li_request(struct super_block *sb); static void ext4_clear_request_list(void); static int ext4_reserve_clusters(struct ext4_sb_info *, ext4_fsblk_t); -#if !defined(CONFIG_EXT2_FS) && !defined(CONFIG_EXT2_FS_MODULE) && defined(CONFIG_EXT4_USE_FOR_EXT23) +#if !defined(CONFIG_EXT2_FS) && !defined(CONFIG_EXT2_FS_MODULE) && defined(CONFIG_EXT4_USE_FOR_EXT2) static struct file_system_type ext2_fs_type = { .owner = THIS_MODULE, .name = "ext2", @@ -100,7 +100,6 @@ MODULE_ALIAS("ext2"); #endif -#if !defined(CONFIG_EXT3_FS) && !defined(CONFIG_EXT3_FS_MODULE) && defined(CONFIG_EXT4_USE_FOR_EXT23) static struct file_system_type ext3_fs_type = { .owner = THIS_MODULE, .name = "ext3", @@ -111,9 +110,6 @@ static struct file_system_type ext3_fs_type = { MODULE_ALIAS_FS("ext3"); MODULE_ALIAS("ext3"); #define IS_EXT3_SB(sb) ((sb)->s_bdev->bd_holder == &ext3_fs_type) -#else -#define IS_EXT3_SB(sb) (0) -#endif static int ext4_verify_csum_type(struct super_block *sb, struct ext4_super_block *es) @@ -5500,7 +5496,7 @@ static struct dentry *ext4_mount(struct file_system_type *fs_type, int flags, return mount_bdev(fs_type, flags, dev_name, data, ext4_fill_super); } -#if !defined(CONFIG_EXT2_FS) && !defined(CONFIG_EXT2_FS_MODULE) && defined(CONFIG_EXT4_USE_FOR_EXT23) +#if !defined(CONFIG_EXT2_FS) && !defined(CONFIG_EXT2_FS_MODULE) && defined(CONFIG_EXT4_USE_FOR_EXT2) static inline void register_as_ext2(void) { int err = register_filesystem(&ext2_fs_type); @@ -5530,7 +5526,6 @@ static inline void unregister_as_ext2(void) { } static inline int ext2_feature_set_ok(struct super_block *sb) { return 0; } #endif -#if !defined(CONFIG_EXT3_FS) && !defined(CONFIG_EXT3_FS_MODULE) && defined(CONFIG_EXT4_USE_FOR_EXT23) static inline void register_as_ext3(void) { int err = register_filesystem(&ext3_fs_type); @@ -5556,11 +5551,6 @@ static inline int ext3_feature_set_ok(struct super_block *sb) return 0; return 1; } -#else -static inline void register_as_ext3(void) { } -static inline void unregister_as_ext3(void) { } -static inline int ext3_feature_set_ok(struct super_block *sb) { return 0; } -#endif static struct file_system_type ext4_fs_type = { .owner = THIS_MODULE, diff --git a/fs/jbd/Kconfig b/fs/jbd/Kconfig deleted file mode 100644 index 4e28beeed157..000000000000 --- a/fs/jbd/Kconfig +++ /dev/null @@ -1,30 +0,0 @@ -config JBD - tristate - help - This is a generic journalling layer for block devices. It is - currently used by the ext3 file system, but it could also be - used to add journal support to other file systems or block - devices such as RAID or LVM. - - If you are using the ext3 file system, you need to say Y here. - If you are not using ext3 then you will probably want to say N. - - To compile this device as a module, choose M here: the module will be - called jbd. If you are compiling ext3 into the kernel, you - cannot compile this code as a module. - -config JBD_DEBUG - bool "JBD (ext3) debugging support" - depends on JBD && DEBUG_FS - help - If you are using the ext3 journaled file system (or potentially any - other file system/device using JBD), this option allows you to - enable debugging output while the system is running, in order to - help track down any problems you are having. By default the - debugging output will be turned off. - - If you select Y here, then you will be able to turn on debugging - with "echo N > /sys/kernel/debug/jbd/jbd-debug", where N is a - number between 1 and 5, the higher the number, the more debugging - output is generated. To turn debugging off again, do - "echo 0 > /sys/kernel/debug/jbd/jbd-debug". diff --git a/fs/jbd/Makefile b/fs/jbd/Makefile deleted file mode 100644 index 54aca4868a36..000000000000 --- a/fs/jbd/Makefile +++ /dev/null @@ -1,7 +0,0 @@ -# -# Makefile for the linux journaling routines. -# - -obj-$(CONFIG_JBD) += jbd.o - -jbd-objs := transaction.o commit.o recovery.o checkpoint.o revoke.o journal.o diff --git a/fs/jbd/checkpoint.c b/fs/jbd/checkpoint.c deleted file mode 100644 index 08c03044abdd..000000000000 --- a/fs/jbd/checkpoint.c +++ /dev/null @@ -1,782 +0,0 @@ -/* - * linux/fs/jbd/checkpoint.c - * - * Written by Stephen C. Tweedie , 1999 - * - * Copyright 1999 Red Hat Software --- All Rights Reserved - * - * This file is part of the Linux kernel and is made available under - * the terms of the GNU General Public License, version 2, or at your - * option, any later version, incorporated herein by reference. - * - * Checkpoint routines for the generic filesystem journaling code. - * Part of the ext2fs journaling system. - * - * Checkpointing is the process of ensuring that a section of the log is - * committed fully to disk, so that that portion of the log can be - * reused. - */ - -#include -#include -#include -#include -#include -#include -#include - -/* - * Unlink a buffer from a transaction checkpoint list. - * - * Called with j_list_lock held. - */ -static inline void __buffer_unlink_first(struct journal_head *jh) -{ - transaction_t *transaction = jh->b_cp_transaction; - - jh->b_cpnext->b_cpprev = jh->b_cpprev; - jh->b_cpprev->b_cpnext = jh->b_cpnext; - if (transaction->t_checkpoint_list == jh) { - transaction->t_checkpoint_list = jh->b_cpnext; - if (transaction->t_checkpoint_list == jh) - transaction->t_checkpoint_list = NULL; - } -} - -/* - * Unlink a buffer from a transaction checkpoint(io) list. - * - * Called with j_list_lock held. - */ -static inline void __buffer_unlink(struct journal_head *jh) -{ - transaction_t *transaction = jh->b_cp_transaction; - - __buffer_unlink_first(jh); - if (transaction->t_checkpoint_io_list == jh) { - transaction->t_checkpoint_io_list = jh->b_cpnext; - if (transaction->t_checkpoint_io_list == jh) - transaction->t_checkpoint_io_list = NULL; - } -} - -/* - * Move a buffer from the checkpoint list to the checkpoint io list - * - * Called with j_list_lock held - */ -static inline void __buffer_relink_io(struct journal_head *jh) -{ - transaction_t *transaction = jh->b_cp_transaction; - - __buffer_unlink_first(jh); - - if (!transaction->t_checkpoint_io_list) { - jh->b_cpnext = jh->b_cpprev = jh; - } else { - jh->b_cpnext = transaction->t_checkpoint_io_list; - jh->b_cpprev = transaction->t_checkpoint_io_list->b_cpprev; - jh->b_cpprev->b_cpnext = jh; - jh->b_cpnext->b_cpprev = jh; - } - transaction->t_checkpoint_io_list = jh; -} - -/* - * Try to release a checkpointed buffer from its transaction. - * Returns 1 if we released it and 2 if we also released the - * whole transaction. - * - * Requires j_list_lock - * Called under jbd_lock_bh_state(jh2bh(jh)), and drops it - */ -static int __try_to_free_cp_buf(struct journal_head *jh) -{ - int ret = 0; - struct buffer_head *bh = jh2bh(jh); - - if (jh->b_jlist == BJ_None && !buffer_locked(bh) && - !buffer_dirty(bh) && !buffer_write_io_error(bh)) { - /* - * Get our reference so that bh cannot be freed before - * we unlock it - */ - get_bh(bh); - JBUFFER_TRACE(jh, "remove from checkpoint list"); - ret = __journal_remove_checkpoint(jh) + 1; - jbd_unlock_bh_state(bh); - BUFFER_TRACE(bh, "release"); - __brelse(bh); - } else { - jbd_unlock_bh_state(bh); - } - return ret; -} - -/* - * __log_wait_for_space: wait until there is space in the journal. - * - * Called under j-state_lock *only*. It will be unlocked if we have to wait - * for a checkpoint to free up some space in the log. - */ -void __log_wait_for_space(journal_t *journal) -{ - int nblocks, space_left; - assert_spin_locked(&journal->j_state_lock); - - nblocks = jbd_space_needed(journal); - while (__log_space_left(journal) < nblocks) { - if (journal->j_flags & JFS_ABORT) - return; - spin_unlock(&journal->j_state_lock); - mutex_lock(&journal->j_checkpoint_mutex); - - /* - * Test again, another process may have checkpointed while we - * were waiting for the checkpoint lock. If there are no - * transactions ready to be checkpointed, try to recover - * journal space by calling cleanup_journal_tail(), and if - * that doesn't work, by waiting for the currently committing - * transaction to complete. If there is absolutely no way - * to make progress, this is either a BUG or corrupted - * filesystem, so abort the journal and leave a stack - * trace for forensic evidence. - */ - spin_lock(&journal->j_state_lock); - spin_lock(&journal->j_list_lock); - nblocks = jbd_space_needed(journal); - space_left = __log_space_left(journal); - if (space_left < nblocks) { - int chkpt = journal->j_checkpoint_transactions != NULL; - tid_t tid = 0; - - if (journal->j_committing_transaction) - tid = journal->j_committing_transaction->t_tid; - spin_unlock(&journal->j_list_lock); - spin_unlock(&journal->j_state_lock); - if (chkpt) { - log_do_checkpoint(journal); - } else if (cleanup_journal_tail(journal) == 0) { - /* We were able to recover space; yay! */ - ; - } else if (tid) { - log_wait_commit(journal, tid); - } else { - printk(KERN_ERR "%s: needed %d blocks and " - "only had %d space available\n", - __func__, nblocks, space_left); - printk(KERN_ERR "%s: no way to get more " - "journal space\n", __func__); - WARN_ON(1); - journal_abort(journal, 0); - } - spin_lock(&journal->j_state_lock); - } else { - spin_unlock(&journal->j_list_lock); - } - mutex_unlock(&journal->j_checkpoint_mutex); - } -} - -/* - * We were unable to perform jbd_trylock_bh_state() inside j_list_lock. - * The caller must restart a list walk. Wait for someone else to run - * jbd_unlock_bh_state(). - */ -static void jbd_sync_bh(journal_t *journal, struct buffer_head *bh) - __releases(journal->j_list_lock) -{ - get_bh(bh); - spin_unlock(&journal->j_list_lock); - jbd_lock_bh_state(bh); - jbd_unlock_bh_state(bh); - put_bh(bh); -} - -/* - * Clean up transaction's list of buffers submitted for io. - * We wait for any pending IO to complete and remove any clean - * buffers. Note that we take the buffers in the opposite ordering - * from the one in which they were submitted for IO. - * - * Return 0 on success, and return <0 if some buffers have failed - * to be written out. - * - * Called with j_list_lock held. - */ -static int __wait_cp_io(journal_t *journal, transaction_t *transaction) -{ - struct journal_head *jh; - struct buffer_head *bh; - tid_t this_tid; - int released = 0; - int ret = 0; - - this_tid = transaction->t_tid; -restart: - /* Did somebody clean up the transaction in the meanwhile? */ - if (journal->j_checkpoint_transactions != transaction || - transaction->t_tid != this_tid) - return ret; - while (!released && transaction->t_checkpoint_io_list) { - jh = transaction->t_checkpoint_io_list; - bh = jh2bh(jh); - if (!jbd_trylock_bh_state(bh)) { - jbd_sync_bh(journal, bh); - spin_lock(&journal->j_list_lock); - goto restart; - } - get_bh(bh); - if (buffer_locked(bh)) { - spin_unlock(&journal->j_list_lock); - jbd_unlock_bh_state(bh); - wait_on_buffer(bh); - /* the journal_head may have gone by now */ - BUFFER_TRACE(bh, "brelse"); - __brelse(bh); - spin_lock(&journal->j_list_lock); - goto restart; - } - if (unlikely(buffer_write_io_error(bh))) - ret = -EIO; - - /* - * Now in whatever state the buffer currently is, we know that - * it has been written out and so we can drop it from the list - */ - released = __journal_remove_checkpoint(jh); - jbd_unlock_bh_state(bh); - __brelse(bh); - } - - return ret; -} - -#define NR_BATCH 64 - -static void -__flush_batch(journal_t *journal, struct buffer_head **bhs, int *batch_count) -{ - int i; - struct blk_plug plug; - - blk_start_plug(&plug); - for (i = 0; i < *batch_count; i++) - write_dirty_buffer(bhs[i], WRITE_SYNC); - blk_finish_plug(&plug); - - for (i = 0; i < *batch_count; i++) { - struct buffer_head *bh = bhs[i]; - clear_buffer_jwrite(bh); - BUFFER_TRACE(bh, "brelse"); - __brelse(bh); - } - *batch_count = 0; -} - -/* - * Try to flush one buffer from the checkpoint list to disk. - * - * Return 1 if something happened which requires us to abort the current - * scan of the checkpoint list. Return <0 if the buffer has failed to - * be written out. - * - * Called with j_list_lock held and drops it if 1 is returned - * Called under jbd_lock_bh_state(jh2bh(jh)), and drops it - */ -static int __process_buffer(journal_t *journal, struct journal_head *jh, - struct buffer_head **bhs, int *batch_count) -{ - struct buffer_head *bh = jh2bh(jh); - int ret = 0; - - if (buffer_locked(bh)) { - get_bh(bh); - spin_unlock(&journal->j_list_lock); - jbd_unlock_bh_state(bh); - wait_on_buffer(bh); - /* the journal_head may have gone by now */ - BUFFER_TRACE(bh, "brelse"); - __brelse(bh); - ret = 1; - } else if (jh->b_transaction != NULL) { - transaction_t *t = jh->b_transaction; - tid_t tid = t->t_tid; - - spin_unlock(&journal->j_list_lock); - jbd_unlock_bh_state(bh); - log_start_commit(journal, tid); - log_wait_commit(journal, tid); - ret = 1; - } else if (!buffer_dirty(bh)) { - ret = 1; - if (unlikely(buffer_write_io_error(bh))) - ret = -EIO; - get_bh(bh); - J_ASSERT_JH(jh, !buffer_jbddirty(bh)); - BUFFER_TRACE(bh, "remove from checkpoint"); - __journal_remove_checkpoint(jh); - spin_unlock(&journal->j_list_lock); - jbd_unlock_bh_state(bh); - __brelse(bh); - } else { - /* - * Important: we are about to write the buffer, and - * possibly block, while still holding the journal lock. - * We cannot afford to let the transaction logic start - * messing around with this buffer before we write it to - * disk, as that would break recoverability. - */ - BUFFER_TRACE(bh, "queue"); - get_bh(bh); - J_ASSERT_BH(bh, !buffer_jwrite(bh)); - set_buffer_jwrite(bh); - bhs[*batch_count] = bh; - __buffer_relink_io(jh); - jbd_unlock_bh_state(bh); - (*batch_count)++; - if (*batch_count == NR_BATCH) { - spin_unlock(&journal->j_list_lock); - __flush_batch(journal, bhs, batch_count); - ret = 1; - } - } - return ret; -} - -/* - * Perform an actual checkpoint. We take the first transaction on the - * list of transactions to be checkpointed and send all its buffers - * to disk. We submit larger chunks of data at once. - * - * The journal should be locked before calling this function. - * Called with j_checkpoint_mutex held. - */ -int log_do_checkpoint(journal_t *journal) -{ - transaction_t *transaction; - tid_t this_tid; - int result; - - jbd_debug(1, "Start checkpoint\n"); - - /* - * First thing: if there are any transactions in the log which - * don't need checkpointing, just eliminate them from the - * journal straight away. - */ - result = cleanup_journal_tail(journal); - trace_jbd_checkpoint(journal, result); - jbd_debug(1, "cleanup_journal_tail returned %d\n", result); - if (result <= 0) - return result; - - /* - * OK, we need to start writing disk blocks. Take one transaction - * and write it. - */ - result = 0; - spin_lock(&journal->j_list_lock); - if (!journal->j_checkpoint_transactions) - goto out; - transaction = journal->j_checkpoint_transactions; - this_tid = transaction->t_tid; -restart: - /* - * If someone cleaned up this transaction while we slept, we're - * done (maybe it's a new transaction, but it fell at the same - * address). - */ - if (journal->j_checkpoint_transactions == transaction && - transaction->t_tid == this_tid) { - int batch_count = 0; - struct buffer_head *bhs[NR_BATCH]; - struct journal_head *jh; - int retry = 0, err; - - while (!retry && transaction->t_checkpoint_list) { - struct buffer_head *bh; - - jh = transaction->t_checkpoint_list; - bh = jh2bh(jh); - if (!jbd_trylock_bh_state(bh)) { - jbd_sync_bh(journal, bh); - retry = 1; - break; - } - retry = __process_buffer(journal, jh, bhs,&batch_count); - if (retry < 0 && !result) - result = retry; - if (!retry && (need_resched() || - spin_needbreak(&journal->j_list_lock))) { - spin_unlock(&journal->j_list_lock); - retry = 1; - break; - } - } - - if (batch_count) { - if (!retry) { - spin_unlock(&journal->j_list_lock); - retry = 1; - } - __flush_batch(journal, bhs, &batch_count); - } - - if (retry) { - spin_lock(&journal->j_list_lock); - goto restart; - } - /* - * Now we have cleaned up the first transaction's checkpoint - * list. Let's clean up the second one - */ - err = __wait_cp_io(journal, transaction); - if (!result) - result = err; - } -out: - spin_unlock(&journal->j_list_lock); - if (result < 0) - journal_abort(journal, result); - else - result = cleanup_journal_tail(journal); - - return (result < 0) ? result : 0; -} - -/* - * Check the list of checkpoint transactions for the journal to see if - * we have already got rid of any since the last update of the log tail - * in the journal superblock. If so, we can instantly roll the - * superblock forward to remove those transactions from the log. - * - * Return <0 on error, 0 on success, 1 if there was nothing to clean up. - * - * This is the only part of the journaling code which really needs to be - * aware of transaction aborts. Checkpointing involves writing to the - * main filesystem area rather than to the journal, so it can proceed - * even in abort state, but we must not update the super block if - * checkpointing may have failed. Otherwise, we would lose some metadata - * buffers which should be written-back to the filesystem. - */ - -int cleanup_journal_tail(journal_t *journal) -{ - transaction_t * transaction; - tid_t first_tid; - unsigned int blocknr, freed; - - if (is_journal_aborted(journal)) - return 1; - - /* - * OK, work out the oldest transaction remaining in the log, and - * the log block it starts at. - * - * If the log is now empty, we need to work out which is the - * next transaction ID we will write, and where it will - * start. - */ - spin_lock(&journal->j_state_lock); - spin_lock(&journal->j_list_lock); - transaction = journal->j_checkpoint_transactions; - if (transaction) { - first_tid = transaction->t_tid; - blocknr = transaction->t_log_start; - } else if ((transaction = journal->j_committing_transaction) != NULL) { - first_tid = transaction->t_tid; - blocknr = transaction->t_log_start; - } else if ((transaction = journal->j_running_transaction) != NULL) { - first_tid = transaction->t_tid; - blocknr = journal->j_head; - } else { - first_tid = journal->j_transaction_sequence; - blocknr = journal->j_head; - } - spin_unlock(&journal->j_list_lock); - J_ASSERT(blocknr != 0); - - /* If the oldest pinned transaction is at the tail of the log - already then there's not much we can do right now. */ - if (journal->j_tail_sequence == first_tid) { - spin_unlock(&journal->j_state_lock); - return 1; - } - spin_unlock(&journal->j_state_lock); - - /* - * We need to make sure that any blocks that were recently written out - * --- perhaps by log_do_checkpoint() --- are flushed out before we - * drop the transactions from the journal. Similarly we need to be sure - * superblock makes it to disk before next transaction starts reusing - * freed space (otherwise we could replay some blocks of the new - * transaction thinking they belong to the old one). So we use - * WRITE_FLUSH_FUA. It's unlikely this will be necessary, especially - * with an appropriately sized journal, but we need this to guarantee - * correctness. Fortunately cleanup_journal_tail() doesn't get called - * all that often. - */ - journal_update_sb_log_tail(journal, first_tid, blocknr, - WRITE_FLUSH_FUA); - - spin_lock(&journal->j_state_lock); - /* OK, update the superblock to recover the freed space. - * Physical blocks come first: have we wrapped beyond the end of - * the log? */ - freed = blocknr - journal->j_tail; - if (blocknr < journal->j_tail) - freed = freed + journal->j_last - journal->j_first; - - trace_jbd_cleanup_journal_tail(journal, first_tid, blocknr, freed); - jbd_debug(1, - "Cleaning journal tail from %d to %d (offset %u), " - "freeing %u\n", - journal->j_tail_sequence, first_tid, blocknr, freed); - - journal->j_free += freed; - journal->j_tail_sequence = first_tid; - journal->j_tail = blocknr; - spin_unlock(&journal->j_state_lock); - return 0; -} - - -/* Checkpoint list management */ - -/* - * journal_clean_one_cp_list - * - * Find all the written-back checkpoint buffers in the given list and release - * them. - * - * Called with j_list_lock held. - * Returns number of buffers reaped (for debug) - */ - -static int journal_clean_one_cp_list(struct journal_head *jh, int *released) -{ - struct journal_head *last_jh; - struct journal_head *next_jh = jh; - int ret, freed = 0; - - *released = 0; - if (!jh) - return 0; - - last_jh = jh->b_cpprev; - do { - jh = next_jh; - next_jh = jh->b_cpnext; - /* Use trylock because of the ranking */ - if (jbd_trylock_bh_state(jh2bh(jh))) { - ret = __try_to_free_cp_buf(jh); - if (ret) { - freed++; - if (ret == 2) { - *released = 1; - return freed; - } - } - } - /* - * This function only frees up some memory - * if possible so we dont have an obligation - * to finish processing. Bail out if preemption - * requested: - */ - if (need_resched()) - return freed; - } while (jh != last_jh); - - return freed; -} - -/* - * journal_clean_checkpoint_list - * - * Find all the written-back checkpoint buffers in the journal and release them. - * - * Called with the journal locked. - * Called with j_list_lock held. - * Returns number of buffers reaped (for debug) - */ - -int __journal_clean_checkpoint_list(journal_t *journal) -{ - transaction_t *transaction, *last_transaction, *next_transaction; - int ret = 0; - int released; - - transaction = journal->j_checkpoint_transactions; - if (!transaction) - goto out; - - last_transaction = transaction->t_cpprev; - next_transaction = transaction; - do { - transaction = next_transaction; - next_transaction = transaction->t_cpnext; - ret += journal_clean_one_cp_list(transaction-> - t_checkpoint_list, &released); - /* - * This function only frees up some memory if possible so we - * dont have an obligation to finish processing. Bail out if - * preemption requested: - */ - if (need_resched()) - goto out; - if (released) - continue; - /* - * It is essential that we are as careful as in the case of - * t_checkpoint_list with removing the buffer from the list as - * we can possibly see not yet submitted buffers on io_list - */ - ret += journal_clean_one_cp_list(transaction-> - t_checkpoint_io_list, &released); - if (need_resched()) - goto out; - } while (transaction != last_transaction); -out: - return ret; -} - -/* - * journal_remove_checkpoint: called after a buffer has been committed - * to disk (either by being write-back flushed to disk, or being - * committed to the log). - * - * We cannot safely clean a transaction out of the log until all of the - * buffer updates committed in that transaction have safely been stored - * elsewhere on disk. To achieve this, all of the buffers in a - * transaction need to be maintained on the transaction's checkpoint - * lists until they have been rewritten, at which point this function is - * called to remove the buffer from the existing transaction's - * checkpoint lists. - * - * The function returns 1 if it frees the transaction, 0 otherwise. - * The function can free jh and bh. - * - * This function is called with j_list_lock held. - * This function is called with jbd_lock_bh_state(jh2bh(jh)) - */ - -int __journal_remove_checkpoint(struct journal_head *jh) -{ - transaction_t *transaction; - journal_t *journal; - int ret = 0; - - JBUFFER_TRACE(jh, "entry"); - - if ((transaction = jh->b_cp_transaction) == NULL) { - JBUFFER_TRACE(jh, "not on transaction"); - goto out; - } - journal = transaction->t_journal; - - JBUFFER_TRACE(jh, "removing from transaction"); - __buffer_unlink(jh); - jh->b_cp_transaction = NULL; - journal_put_journal_head(jh); - - if (transaction->t_checkpoint_list != NULL || - transaction->t_checkpoint_io_list != NULL) - goto out; - - /* - * There is one special case to worry about: if we have just pulled the - * buffer off a running or committing transaction's checkpoing list, - * then even if the checkpoint list is empty, the transaction obviously - * cannot be dropped! - * - * The locking here around t_state is a bit sleazy. - * See the comment at the end of journal_commit_transaction(). - */ - if (transaction->t_state != T_FINISHED) - goto out; - - /* OK, that was the last buffer for the transaction: we can now - safely remove this transaction from the log */ - - __journal_drop_transaction(journal, transaction); - - /* Just in case anybody was waiting for more transactions to be - checkpointed... */ - wake_up(&journal->j_wait_logspace); - ret = 1; -out: - return ret; -} - -/* - * journal_insert_checkpoint: put a committed buffer onto a checkpoint - * list so that we know when it is safe to clean the transaction out of - * the log. - * - * Called with the journal locked. - * Called with j_list_lock held. - */ -void __journal_insert_checkpoint(struct journal_head *jh, - transaction_t *transaction) -{ - JBUFFER_TRACE(jh, "entry"); - J_ASSERT_JH(jh, buffer_dirty(jh2bh(jh)) || buffer_jbddirty(jh2bh(jh))); - J_ASSERT_JH(jh, jh->b_cp_transaction == NULL); - - /* Get reference for checkpointing transaction */ - journal_grab_journal_head(jh2bh(jh)); - jh->b_cp_transaction = transaction; - - if (!transaction->t_checkpoint_list) { - jh->b_cpnext = jh->b_cpprev = jh; - } else { - jh->b_cpnext = transaction->t_checkpoint_list; - jh->b_cpprev = transaction->t_checkpoint_list->b_cpprev; - jh->b_cpprev->b_cpnext = jh; - jh->b_cpnext->b_cpprev = jh; - } - transaction->t_checkpoint_list = jh; -} - -/* - * We've finished with this transaction structure: adios... - * - * The transaction must have no links except for the checkpoint by this - * point. - * - * Called with the journal locked. - * Called with j_list_lock held. - */ - -void __journal_drop_transaction(journal_t *journal, transaction_t *transaction) -{ - assert_spin_locked(&journal->j_list_lock); - if (transaction->t_cpnext) { - transaction->t_cpnext->t_cpprev = transaction->t_cpprev; - transaction->t_cpprev->t_cpnext = transaction->t_cpnext; - if (journal->j_checkpoint_transactions == transaction) - journal->j_checkpoint_transactions = - transaction->t_cpnext; - if (journal->j_checkpoint_transactions == transaction) - journal->j_checkpoint_transactions = NULL; - } - - J_ASSERT(transaction->t_state == T_FINISHED); - J_ASSERT(transaction->t_buffers == NULL); - J_ASSERT(transaction->t_sync_datalist == NULL); - J_ASSERT(transaction->t_forget == NULL); - J_ASSERT(transaction->t_iobuf_list == NULL); - J_ASSERT(transaction->t_shadow_list == NULL); - J_ASSERT(transaction->t_log_list == NULL); - J_ASSERT(transaction->t_checkpoint_list == NULL); - J_ASSERT(transaction->t_checkpoint_io_list == NULL); - J_ASSERT(transaction->t_updates == 0); - J_ASSERT(journal->j_committing_transaction != transaction); - J_ASSERT(journal->j_running_transaction != transaction); - - trace_jbd_drop_transaction(journal, transaction); - jbd_debug(1, "Dropping transaction %d, all done\n", transaction->t_tid); - kfree(transaction); -} diff --git a/fs/jbd/commit.c b/fs/jbd/commit.c deleted file mode 100644 index bb217dcb41af..000000000000 --- a/fs/jbd/commit.c +++ /dev/null @@ -1,1021 +0,0 @@ -/* - * linux/fs/jbd/commit.c - * - * Written by Stephen C. Tweedie , 1998 - * - * Copyright 1998 Red Hat corp --- All Rights Reserved - * - * This file is part of the Linux kernel and is made available under - * the terms of the GNU General Public License, version 2, or at your - * option, any later version, incorporated herein by reference. - * - * Journal commit routines for the generic filesystem journaling code; - * part of the ext2fs journaling system. - */ - -#include -#include -#include -#include -#include -#include -#include -#include -#include - -/* - * Default IO end handler for temporary BJ_IO buffer_heads. - */ -static void journal_end_buffer_io_sync(struct buffer_head *bh, int uptodate) -{ - BUFFER_TRACE(bh, ""); - if (uptodate) - set_buffer_uptodate(bh); - else - clear_buffer_uptodate(bh); - unlock_buffer(bh); -} - -/* - * When an ext3-ordered file is truncated, it is possible that many pages are - * not successfully freed, because they are attached to a committing transaction. - * After the transaction commits, these pages are left on the LRU, with no - * ->mapping, and with attached buffers. These pages are trivially reclaimable - * by the VM, but their apparent absence upsets the VM accounting, and it makes - * the numbers in /proc/meminfo look odd. - * - * So here, we have a buffer which has just come off the forget list. Look to - * see if we can strip all buffers from the backing page. - * - * Called under journal->j_list_lock. The caller provided us with a ref - * against the buffer, and we drop that here. - */ -static void release_buffer_page(struct buffer_head *bh) -{ - struct page *page; - - if (buffer_dirty(bh)) - goto nope; - if (atomic_read(&bh->b_count) != 1) - goto nope; - page = bh->b_page; - if (!page) - goto nope; - if (page->mapping) - goto nope; - - /* OK, it's a truncated page */ - if (!trylock_page(page)) - goto nope; - - page_cache_get(page); - __brelse(bh); - try_to_free_buffers(page); - unlock_page(page); - page_cache_release(page); - return; - -nope: - __brelse(bh); -} - -/* - * Decrement reference counter for data buffer. If it has been marked - * 'BH_Freed', release it and the page to which it belongs if possible. - */ -static void release_data_buffer(struct buffer_head *bh) -{ - if (buffer_freed(bh)) { - WARN_ON_ONCE(buffer_dirty(bh)); - clear_buffer_freed(bh); - clear_buffer_mapped(bh); - clear_buffer_new(bh); - clear_buffer_req(bh); - bh->b_bdev = NULL; - release_buffer_page(bh); - } else - put_bh(bh); -} - -/* - * Try to acquire jbd_lock_bh_state() against the buffer, when j_list_lock is - * held. For ranking reasons we must trylock. If we lose, schedule away and - * return 0. j_list_lock is dropped in this case. - */ -static int inverted_lock(journal_t *journal, struct buffer_head *bh) -{ - if (!jbd_trylock_bh_state(bh)) { - spin_unlock(&journal->j_list_lock); - schedule(); - return 0; - } - return 1; -} - -/* Done it all: now write the commit record. We should have - * cleaned up our previous buffers by now, so if we are in abort - * mode we can now just skip the rest of the journal write - * entirely. - * - * Returns 1 if the journal needs to be aborted or 0 on success - */ -static int journal_write_commit_record(journal_t *journal, - transaction_t *commit_transaction) -{ - struct journal_head *descriptor; - struct buffer_head *bh; - journal_header_t *header; - int ret; - - if (is_journal_aborted(journal)) - return 0; - - descriptor = journal_get_descriptor_buffer(journal); - if (!descriptor) - return 1; - - bh = jh2bh(descriptor); - - header = (journal_header_t *)(bh->b_data); - header->h_magic = cpu_to_be32(JFS_MAGIC_NUMBER); - header->h_blocktype = cpu_to_be32(JFS_COMMIT_BLOCK); - header->h_sequence = cpu_to_be32(commit_transaction->t_tid); - - JBUFFER_TRACE(descriptor, "write commit block"); - set_buffer_dirty(bh); - - if (journal->j_flags & JFS_BARRIER) - ret = __sync_dirty_buffer(bh, WRITE_SYNC | WRITE_FLUSH_FUA); - else - ret = sync_dirty_buffer(bh); - - put_bh(bh); /* One for getblk() */ - journal_put_journal_head(descriptor); - - return (ret == -EIO); -} - -static void journal_do_submit_data(struct buffer_head **wbuf, int bufs, - int write_op) -{ - int i; - - for (i = 0; i < bufs; i++) { - wbuf[i]->b_end_io = end_buffer_write_sync; - /* - * Here we write back pagecache data that may be mmaped. Since - * we cannot afford to clean the page and set PageWriteback - * here due to lock ordering (page lock ranks above transaction - * start), the data can change while IO is in flight. Tell the - * block layer it should bounce the bio pages if stable data - * during write is required. - * - * We use up our safety reference in submit_bh(). - */ - _submit_bh(write_op, wbuf[i], 1 << BIO_SNAP_STABLE); - } -} - -/* - * Submit all the data buffers to disk - */ -static int journal_submit_data_buffers(journal_t *journal, - transaction_t *commit_transaction, - int write_op) -{ - struct journal_head *jh; - struct buffer_head *bh; - int locked; - int bufs = 0; - struct buffer_head **wbuf = journal->j_wbuf; - int err = 0; - - /* - * Whenever we unlock the journal and sleep, things can get added - * onto ->t_sync_datalist, so we have to keep looping back to - * write_out_data until we *know* that the list is empty. - * - * Cleanup any flushed data buffers from the data list. Even in - * abort mode, we want to flush this out as soon as possible. - */ -write_out_data: - cond_resched(); - spin_lock(&journal->j_list_lock); - - while (commit_transaction->t_sync_datalist) { - jh = commit_transaction->t_sync_datalist; - bh = jh2bh(jh); - locked = 0; - - /* Get reference just to make sure buffer does not disappear - * when we are forced to drop various locks */ - get_bh(bh); - /* If the buffer is dirty, we need to submit IO and hence - * we need the buffer lock. We try to lock the buffer without - * blocking. If we fail, we need to drop j_list_lock and do - * blocking lock_buffer(). - */ - if (buffer_dirty(bh)) { - if (!trylock_buffer(bh)) { - BUFFER_TRACE(bh, "needs blocking lock"); - spin_unlock(&journal->j_list_lock); - trace_jbd_do_submit_data(journal, - commit_transaction); - /* Write out all data to prevent deadlocks */ - journal_do_submit_data(wbuf, bufs, write_op); - bufs = 0; - lock_buffer(bh); - spin_lock(&journal->j_list_lock); - } - locked = 1; - } - /* We have to get bh_state lock. Again out of order, sigh. */ - if (!inverted_lock(journal, bh)) { - jbd_lock_bh_state(bh); - spin_lock(&journal->j_list_lock); - } - /* Someone already cleaned up the buffer? */ - if (!buffer_jbd(bh) || bh2jh(bh) != jh - || jh->b_transaction != commit_transaction - || jh->b_jlist != BJ_SyncData) { - jbd_unlock_bh_state(bh); - if (locked) - unlock_buffer(bh); - BUFFER_TRACE(bh, "already cleaned up"); - release_data_buffer(bh); - continue; - } - if (locked && test_clear_buffer_dirty(bh)) { - BUFFER_TRACE(bh, "needs writeout, adding to array"); - wbuf[bufs++] = bh; - __journal_file_buffer(jh, commit_transaction, - BJ_Locked); - jbd_unlock_bh_state(bh); - if (bufs == journal->j_wbufsize) { - spin_unlock(&journal->j_list_lock); - trace_jbd_do_submit_data(journal, - commit_transaction); - journal_do_submit_data(wbuf, bufs, write_op); - bufs = 0; - goto write_out_data; - } - } else if (!locked && buffer_locked(bh)) { - __journal_file_buffer(jh, commit_transaction, - BJ_Locked); - jbd_unlock_bh_state(bh); - put_bh(bh); - } else { - BUFFER_TRACE(bh, "writeout complete: unfile"); - if (unlikely(!buffer_uptodate(bh))) - err = -EIO; - __journal_unfile_buffer(jh); - jbd_unlock_bh_state(bh); - if (locked) - unlock_buffer(bh); - release_data_buffer(bh); - } - - if (need_resched() || spin_needbreak(&journal->j_list_lock)) { - spin_unlock(&journal->j_list_lock); - goto write_out_data; - } - } - spin_unlock(&journal->j_list_lock); - trace_jbd_do_submit_data(journal, commit_transaction); - journal_do_submit_data(wbuf, bufs, write_op); - - return err; -} - -/* - * journal_commit_transaction - * - * The primary function for committing a transaction to the log. This - * function is called by the journal thread to begin a complete commit. - */ -void journal_commit_transaction(journal_t *journal) -{ - transaction_t *commit_transaction; - struct journal_head *jh, *new_jh, *descriptor; - struct buffer_head **wbuf = journal->j_wbuf; - int bufs; - int flags; - int err; - unsigned int blocknr; - ktime_t start_time; - u64 commit_time; - char *tagp = NULL; - journal_header_t *header; - journal_block_tag_t *tag = NULL; - int space_left = 0; - int first_tag = 0; - int tag_flag; - int i; - struct blk_plug plug; - int write_op = WRITE; - - /* - * First job: lock down the current transaction and wait for - * all outstanding updates to complete. - */ - - /* Do we need to erase the effects of a prior journal_flush? */ - if (journal->j_flags & JFS_FLUSHED) { - jbd_debug(3, "super block updated\n"); - mutex_lock(&journal->j_checkpoint_mutex); - /* - * We hold j_checkpoint_mutex so tail cannot change under us. - * We don't need any special data guarantees for writing sb - * since journal is empty and it is ok for write to be - * flushed only with transaction commit. - */ - journal_update_sb_log_tail(journal, journal->j_tail_sequence, - journal->j_tail, WRITE_SYNC); - mutex_unlock(&journal->j_checkpoint_mutex); - } else { - jbd_debug(3, "superblock not updated\n"); - } - - J_ASSERT(journal->j_running_transaction != NULL); - J_ASSERT(journal->j_committing_transaction == NULL); - - commit_transaction = journal->j_running_transaction; - - trace_jbd_start_commit(journal, commit_transaction); - jbd_debug(1, "JBD: starting commit of transaction %d\n", - commit_transaction->t_tid); - - spin_lock(&journal->j_state_lock); - J_ASSERT(commit_transaction->t_state == T_RUNNING); - commit_transaction->t_state = T_LOCKED; - - trace_jbd_commit_locking(journal, commit_transaction); - spin_lock(&commit_transaction->t_handle_lock); - while (commit_transaction->t_updates) { - DEFINE_WAIT(wait); - - prepare_to_wait(&journal->j_wait_updates, &wait, - TASK_UNINTERRUPTIBLE); - if (commit_transaction->t_updates) { - spin_unlock(&commit_transaction->t_handle_lock); - spin_unlock(&journal->j_state_lock); - schedule(); - spin_lock(&journal->j_state_lock); - spin_lock(&commit_transaction->t_handle_lock); - } - finish_wait(&journal->j_wait_updates, &wait); - } - spin_unlock(&commit_transaction->t_handle_lock); - - J_ASSERT (commit_transaction->t_outstanding_credits <= - journal->j_max_transaction_buffers); - - /* - * First thing we are allowed to do is to discard any remaining - * BJ_Reserved buffers. Note, it is _not_ permissible to assume - * that there are no such buffers: if a large filesystem - * operation like a truncate needs to split itself over multiple - * transactions, then it may try to do a journal_restart() while - * there are still BJ_Reserved buffers outstanding. These must - * be released cleanly from the current transaction. - * - * In this case, the filesystem must still reserve write access - * again before modifying the buffer in the new transaction, but - * we do not require it to remember exactly which old buffers it - * has reserved. This is consistent with the existing behaviour - * that multiple journal_get_write_access() calls to the same - * buffer are perfectly permissible. - */ - while (commit_transaction->t_reserved_list) { - jh = commit_transaction->t_reserved_list; - JBUFFER_TRACE(jh, "reserved, unused: refile"); - /* - * A journal_get_undo_access()+journal_release_buffer() may - * leave undo-committed data. - */ - if (jh->b_committed_data) { - struct buffer_head *bh = jh2bh(jh); - - jbd_lock_bh_state(bh); - jbd_free(jh->b_committed_data, bh->b_size); - jh->b_committed_data = NULL; - jbd_unlock_bh_state(bh); - } - journal_refile_buffer(journal, jh); - } - - /* - * Now try to drop any written-back buffers from the journal's - * checkpoint lists. We do this *before* commit because it potentially - * frees some memory - */ - spin_lock(&journal->j_list_lock); - __journal_clean_checkpoint_list(journal); - spin_unlock(&journal->j_list_lock); - - jbd_debug (3, "JBD: commit phase 1\n"); - - /* - * Clear revoked flag to reflect there is no revoked buffers - * in the next transaction which is going to be started. - */ - journal_clear_buffer_revoked_flags(journal); - - /* - * Switch to a new revoke table. - */ - journal_switch_revoke_table(journal); - - trace_jbd_commit_flushing(journal, commit_transaction); - commit_transaction->t_state = T_FLUSH; - journal->j_committing_transaction = commit_transaction; - journal->j_running_transaction = NULL; - start_time = ktime_get(); - commit_transaction->t_log_start = journal->j_head; - wake_up(&journal->j_wait_transaction_locked); - spin_unlock(&journal->j_state_lock); - - jbd_debug (3, "JBD: commit phase 2\n"); - - if (tid_geq(journal->j_commit_waited, commit_transaction->t_tid)) - write_op = WRITE_SYNC; - - /* - * Now start flushing things to disk, in the order they appear - * on the transaction lists. Data blocks go first. - */ - blk_start_plug(&plug); - err = journal_submit_data_buffers(journal, commit_transaction, - write_op); - blk_finish_plug(&plug); - - /* - * Wait for all previously submitted IO to complete. - */ - spin_lock(&journal->j_list_lock); - while (commit_transaction->t_locked_list) { - struct buffer_head *bh; - - jh = commit_transaction->t_locked_list->b_tprev; - bh = jh2bh(jh); - get_bh(bh); - if (buffer_locked(bh)) { - spin_unlock(&journal->j_list_lock); - wait_on_buffer(bh); - spin_lock(&journal->j_list_lock); - } - if (unlikely(!buffer_uptodate(bh))) { - if (!trylock_page(bh->b_page)) { - spin_unlock(&journal->j_list_lock); - lock_page(bh->b_page); - spin_lock(&journal->j_list_lock); - } - if (bh->b_page->mapping) - set_bit(AS_EIO, &bh->b_page->mapping->flags); - - unlock_page(bh->b_page); - SetPageError(bh->b_page); - err = -EIO; - } - if (!inverted_lock(journal, bh)) { - put_bh(bh); - spin_lock(&journal->j_list_lock); - continue; - } - if (buffer_jbd(bh) && bh2jh(bh) == jh && - jh->b_transaction == commit_transaction && - jh->b_jlist == BJ_Locked) - __journal_unfile_buffer(jh); - jbd_unlock_bh_state(bh); - release_data_buffer(bh); - cond_resched_lock(&journal->j_list_lock); - } - spin_unlock(&journal->j_list_lock); - - if (err) { - char b[BDEVNAME_SIZE]; - - printk(KERN_WARNING - "JBD: Detected IO errors while flushing file data " - "on %s\n", bdevname(journal->j_fs_dev, b)); - if (journal->j_flags & JFS_ABORT_ON_SYNCDATA_ERR) - journal_abort(journal, err); - err = 0; - } - - blk_start_plug(&plug); - - journal_write_revoke_records(journal, commit_transaction, write_op); - - /* - * If we found any dirty or locked buffers, then we should have - * looped back up to the write_out_data label. If there weren't - * any then journal_clean_data_list should have wiped the list - * clean by now, so check that it is in fact empty. - */ - J_ASSERT (commit_transaction->t_sync_datalist == NULL); - - jbd_debug (3, "JBD: commit phase 3\n"); - - /* - * Way to go: we have now written out all of the data for a - * transaction! Now comes the tricky part: we need to write out - * metadata. Loop over the transaction's entire buffer list: - */ - spin_lock(&journal->j_state_lock); - commit_transaction->t_state = T_COMMIT; - spin_unlock(&journal->j_state_lock); - - trace_jbd_commit_logging(journal, commit_transaction); - J_ASSERT(commit_transaction->t_nr_buffers <= - commit_transaction->t_outstanding_credits); - - descriptor = NULL; - bufs = 0; - while (commit_transaction->t_buffers) { - - /* Find the next buffer to be journaled... */ - - jh = commit_transaction->t_buffers; - - /* If we're in abort mode, we just un-journal the buffer and - release it. */ - - if (is_journal_aborted(journal)) { - clear_buffer_jbddirty(jh2bh(jh)); - JBUFFER_TRACE(jh, "journal is aborting: refile"); - journal_refile_buffer(journal, jh); - /* If that was the last one, we need to clean up - * any descriptor buffers which may have been - * already allocated, even if we are now - * aborting. */ - if (!commit_transaction->t_buffers) - goto start_journal_io; - continue; - } - - /* Make sure we have a descriptor block in which to - record the metadata buffer. */ - - if (!descriptor) { - struct buffer_head *bh; - - J_ASSERT (bufs == 0); - - jbd_debug(4, "JBD: get descriptor\n"); - - descriptor = journal_get_descriptor_buffer(journal); - if (!descriptor) { - journal_abort(journal, -EIO); - continue; - } - - bh = jh2bh(descriptor); - jbd_debug(4, "JBD: got buffer %llu (%p)\n", - (unsigned long long)bh->b_blocknr, bh->b_data); - header = (journal_header_t *)&bh->b_data[0]; - header->h_magic = cpu_to_be32(JFS_MAGIC_NUMBER); - header->h_blocktype = cpu_to_be32(JFS_DESCRIPTOR_BLOCK); - header->h_sequence = cpu_to_be32(commit_transaction->t_tid); - - tagp = &bh->b_data[sizeof(journal_header_t)]; - space_left = bh->b_size - sizeof(journal_header_t); - first_tag = 1; - set_buffer_jwrite(bh); - set_buffer_dirty(bh); - wbuf[bufs++] = bh; - - /* Record it so that we can wait for IO - completion later */ - BUFFER_TRACE(bh, "ph3: file as descriptor"); - journal_file_buffer(descriptor, commit_transaction, - BJ_LogCtl); - } - - /* Where is the buffer to be written? */ - - err = journal_next_log_block(journal, &blocknr); - /* If the block mapping failed, just abandon the buffer - and repeat this loop: we'll fall into the - refile-on-abort condition above. */ - if (err) { - journal_abort(journal, err); - continue; - } - - /* - * start_this_handle() uses t_outstanding_credits to determine - * the free space in the log, but this counter is changed - * by journal_next_log_block() also. - */ - commit_transaction->t_outstanding_credits--; - - /* Bump b_count to prevent truncate from stumbling over - the shadowed buffer! @@@ This can go if we ever get - rid of the BJ_IO/BJ_Shadow pairing of buffers. */ - get_bh(jh2bh(jh)); - - /* Make a temporary IO buffer with which to write it out - (this will requeue both the metadata buffer and the - temporary IO buffer). new_bh goes on BJ_IO*/ - - set_buffer_jwrite(jh2bh(jh)); - /* - * akpm: journal_write_metadata_buffer() sets - * new_bh->b_transaction to commit_transaction. - * We need to clean this up before we release new_bh - * (which is of type BJ_IO) - */ - JBUFFER_TRACE(jh, "ph3: write metadata"); - flags = journal_write_metadata_buffer(commit_transaction, - jh, &new_jh, blocknr); - set_buffer_jwrite(jh2bh(new_jh)); - wbuf[bufs++] = jh2bh(new_jh); - - /* Record the new block's tag in the current descriptor - buffer */ - - tag_flag = 0; - if (flags & 1) - tag_flag |= JFS_FLAG_ESCAPE; - if (!first_tag) - tag_flag |= JFS_FLAG_SAME_UUID; - - tag = (journal_block_tag_t *) tagp; - tag->t_blocknr = cpu_to_be32(jh2bh(jh)->b_blocknr); - tag->t_flags = cpu_to_be32(tag_flag); - tagp += sizeof(journal_block_tag_t); - space_left -= sizeof(journal_block_tag_t); - - if (first_tag) { - memcpy (tagp, journal->j_uuid, 16); - tagp += 16; - space_left -= 16; - first_tag = 0; - } - - /* If there's no more to do, or if the descriptor is full, - let the IO rip! */ - - if (bufs == journal->j_wbufsize || - commit_transaction->t_buffers == NULL || - space_left < sizeof(journal_block_tag_t) + 16) { - - jbd_debug(4, "JBD: Submit %d IOs\n", bufs); - - /* Write an end-of-descriptor marker before - submitting the IOs. "tag" still points to - the last tag we set up. */ - - tag->t_flags |= cpu_to_be32(JFS_FLAG_LAST_TAG); - -start_journal_io: - for (i = 0; i < bufs; i++) { - struct buffer_head *bh = wbuf[i]; - lock_buffer(bh); - clear_buffer_dirty(bh); - set_buffer_uptodate(bh); - bh->b_end_io = journal_end_buffer_io_sync; - /* - * In data=journal mode, here we can end up - * writing pagecache data that might be - * mmapped. Since we can't afford to clean the - * page and set PageWriteback (see the comment - * near the other use of _submit_bh()), the - * data can change while the write is in - * flight. Tell the block layer to bounce the - * bio pages if stable pages are required. - */ - _submit_bh(write_op, bh, 1 << BIO_SNAP_STABLE); - } - cond_resched(); - - /* Force a new descriptor to be generated next - time round the loop. */ - descriptor = NULL; - bufs = 0; - } - } - - blk_finish_plug(&plug); - - /* Lo and behold: we have just managed to send a transaction to - the log. Before we can commit it, wait for the IO so far to - complete. Control buffers being written are on the - transaction's t_log_list queue, and metadata buffers are on - the t_iobuf_list queue. - - Wait for the buffers in reverse order. That way we are - less likely to be woken up until all IOs have completed, and - so we incur less scheduling load. - */ - - jbd_debug(3, "JBD: commit phase 4\n"); - - /* - * akpm: these are BJ_IO, and j_list_lock is not needed. - * See __journal_try_to_free_buffer. - */ -wait_for_iobuf: - while (commit_transaction->t_iobuf_list != NULL) { - struct buffer_head *bh; - - jh = commit_transaction->t_iobuf_list->b_tprev; - bh = jh2bh(jh); - if (buffer_locked(bh)) { - wait_on_buffer(bh); - goto wait_for_iobuf; - } - if (cond_resched()) - goto wait_for_iobuf; - - if (unlikely(!buffer_uptodate(bh))) - err = -EIO; - - clear_buffer_jwrite(bh); - - JBUFFER_TRACE(jh, "ph4: unfile after journal write"); - journal_unfile_buffer(journal, jh); - - /* - * ->t_iobuf_list should contain only dummy buffer_heads - * which were created by journal_write_metadata_buffer(). - */ - BUFFER_TRACE(bh, "dumping temporary bh"); - journal_put_journal_head(jh); - __brelse(bh); - J_ASSERT_BH(bh, atomic_read(&bh->b_count) == 0); - free_buffer_head(bh); - - /* We also have to unlock and free the corresponding - shadowed buffer */ - jh = commit_transaction->t_shadow_list->b_tprev; - bh = jh2bh(jh); - clear_buffer_jwrite(bh); - J_ASSERT_BH(bh, buffer_jbddirty(bh)); - - /* The metadata is now released for reuse, but we need - to remember it against this transaction so that when - we finally commit, we can do any checkpointing - required. */ - JBUFFER_TRACE(jh, "file as BJ_Forget"); - journal_file_buffer(jh, commit_transaction, BJ_Forget); - /* - * Wake up any transactions which were waiting for this - * IO to complete. The barrier must be here so that changes - * by journal_file_buffer() take effect before wake_up_bit() - * does the waitqueue check. - */ - smp_mb(); - wake_up_bit(&bh->b_state, BH_Unshadow); - JBUFFER_TRACE(jh, "brelse shadowed buffer"); - __brelse(bh); - } - - J_ASSERT (commit_transaction->t_shadow_list == NULL); - - jbd_debug(3, "JBD: commit phase 5\n"); - - /* Here we wait for the revoke record and descriptor record buffers */ - wait_for_ctlbuf: - while (commit_transaction->t_log_list != NULL) { - struct buffer_head *bh; - - jh = commit_transaction->t_log_list->b_tprev; - bh = jh2bh(jh); - if (buffer_locked(bh)) { - wait_on_buffer(bh); - goto wait_for_ctlbuf; - } - if (cond_resched()) - goto wait_for_ctlbuf; - - if (unlikely(!buffer_uptodate(bh))) - err = -EIO; - - BUFFER_TRACE(bh, "ph5: control buffer writeout done: unfile"); - clear_buffer_jwrite(bh); - journal_unfile_buffer(journal, jh); - journal_put_journal_head(jh); - __brelse(bh); /* One for getblk */ - /* AKPM: bforget here */ - } - - if (err) - journal_abort(journal, err); - - jbd_debug(3, "JBD: commit phase 6\n"); - - /* All metadata is written, now write commit record and do cleanup */ - spin_lock(&journal->j_state_lock); - J_ASSERT(commit_transaction->t_state == T_COMMIT); - commit_transaction->t_state = T_COMMIT_RECORD; - spin_unlock(&journal->j_state_lock); - - if (journal_write_commit_record(journal, commit_transaction)) - err = -EIO; - - if (err) - journal_abort(journal, err); - - /* End of a transaction! Finally, we can do checkpoint - processing: any buffers committed as a result of this - transaction can be removed from any checkpoint list it was on - before. */ - - jbd_debug(3, "JBD: commit phase 7\n"); - - J_ASSERT(commit_transaction->t_sync_datalist == NULL); - J_ASSERT(commit_transaction->t_buffers == NULL); - J_ASSERT(commit_transaction->t_checkpoint_list == NULL); - J_ASSERT(commit_transaction->t_iobuf_list == NULL); - J_ASSERT(commit_transaction->t_shadow_list == NULL); - J_ASSERT(commit_transaction->t_log_list == NULL); - -restart_loop: - /* - * As there are other places (journal_unmap_buffer()) adding buffers - * to this list we have to be careful and hold the j_list_lock. - */ - spin_lock(&journal->j_list_lock); - while (commit_transaction->t_forget) { - transaction_t *cp_transaction; - struct buffer_head *bh; - int try_to_free = 0; - - jh = commit_transaction->t_forget; - spin_unlock(&journal->j_list_lock); - bh = jh2bh(jh); - /* - * Get a reference so that bh cannot be freed before we are - * done with it. - */ - get_bh(bh); - jbd_lock_bh_state(bh); - J_ASSERT_JH(jh, jh->b_transaction == commit_transaction || - jh->b_transaction == journal->j_running_transaction); - - /* - * If there is undo-protected committed data against - * this buffer, then we can remove it now. If it is a - * buffer needing such protection, the old frozen_data - * field now points to a committed version of the - * buffer, so rotate that field to the new committed - * data. - * - * Otherwise, we can just throw away the frozen data now. - */ - if (jh->b_committed_data) { - jbd_free(jh->b_committed_data, bh->b_size); - jh->b_committed_data = NULL; - if (jh->b_frozen_data) { - jh->b_committed_data = jh->b_frozen_data; - jh->b_frozen_data = NULL; - } - } else if (jh->b_frozen_data) { - jbd_free(jh->b_frozen_data, bh->b_size); - jh->b_frozen_data = NULL; - } - - spin_lock(&journal->j_list_lock); - cp_transaction = jh->b_cp_transaction; - if (cp_transaction) { - JBUFFER_TRACE(jh, "remove from old cp transaction"); - __journal_remove_checkpoint(jh); - } - - /* Only re-checkpoint the buffer_head if it is marked - * dirty. If the buffer was added to the BJ_Forget list - * by journal_forget, it may no longer be dirty and - * there's no point in keeping a checkpoint record for - * it. */ - - /* - * A buffer which has been freed while still being journaled by - * a previous transaction. - */ - if (buffer_freed(bh)) { - /* - * If the running transaction is the one containing - * "add to orphan" operation (b_next_transaction != - * NULL), we have to wait for that transaction to - * commit before we can really get rid of the buffer. - * So just clear b_modified to not confuse transaction - * credit accounting and refile the buffer to - * BJ_Forget of the running transaction. If the just - * committed transaction contains "add to orphan" - * operation, we can completely invalidate the buffer - * now. We are rather throughout in that since the - * buffer may be still accessible when blocksize < - * pagesize and it is attached to the last partial - * page. - */ - jh->b_modified = 0; - if (!jh->b_next_transaction) { - clear_buffer_freed(bh); - clear_buffer_jbddirty(bh); - clear_buffer_mapped(bh); - clear_buffer_new(bh); - clear_buffer_req(bh); - bh->b_bdev = NULL; - } - } - - if (buffer_jbddirty(bh)) { - JBUFFER_TRACE(jh, "add to new checkpointing trans"); - __journal_insert_checkpoint(jh, commit_transaction); - if (is_journal_aborted(journal)) - clear_buffer_jbddirty(bh); - } else { - J_ASSERT_BH(bh, !buffer_dirty(bh)); - /* - * The buffer on BJ_Forget list and not jbddirty means - * it has been freed by this transaction and hence it - * could not have been reallocated until this - * transaction has committed. *BUT* it could be - * reallocated once we have written all the data to - * disk and before we process the buffer on BJ_Forget - * list. - */ - if (!jh->b_next_transaction) - try_to_free = 1; - } - JBUFFER_TRACE(jh, "refile or unfile freed buffer"); - __journal_refile_buffer(jh); - jbd_unlock_bh_state(bh); - if (try_to_free) - release_buffer_page(bh); - else - __brelse(bh); - cond_resched_lock(&journal->j_list_lock); - } - spin_unlock(&journal->j_list_lock); - /* - * This is a bit sleazy. We use j_list_lock to protect transition - * of a transaction into T_FINISHED state and calling - * __journal_drop_transaction(). Otherwise we could race with - * other checkpointing code processing the transaction... - */ - spin_lock(&journal->j_state_lock); - spin_lock(&journal->j_list_lock); - /* - * Now recheck if some buffers did not get attached to the transaction - * while the lock was dropped... - */ - if (commit_transaction->t_forget) { - spin_unlock(&journal->j_list_lock); - spin_unlock(&journal->j_state_lock); - goto restart_loop; - } - - /* Done with this transaction! */ - - jbd_debug(3, "JBD: commit phase 8\n"); - - J_ASSERT(commit_transaction->t_state == T_COMMIT_RECORD); - - commit_transaction->t_state = T_FINISHED; - J_ASSERT(commit_transaction == journal->j_committing_transaction); - journal->j_commit_sequence = commit_transaction->t_tid; - journal->j_committing_transaction = NULL; - commit_time = ktime_to_ns(ktime_sub(ktime_get(), start_time)); - - /* - * weight the commit time higher than the average time so we don't - * react too strongly to vast changes in commit time - */ - if (likely(journal->j_average_commit_time)) - journal->j_average_commit_time = (commit_time*3 + - journal->j_average_commit_time) / 4; - else - journal->j_average_commit_time = commit_time; - - spin_unlock(&journal->j_state_lock); - - if (commit_transaction->t_checkpoint_list == NULL && - commit_transaction->t_checkpoint_io_list == NULL) { - __journal_drop_transaction(journal, commit_transaction); - } else { - if (journal->j_checkpoint_transactions == NULL) { - journal->j_checkpoint_transactions = commit_transaction; - commit_transaction->t_cpnext = commit_transaction; - commit_transaction->t_cpprev = commit_transaction; - } else { - commit_transaction->t_cpnext = - journal->j_checkpoint_transactions; - commit_transaction->t_cpprev = - commit_transaction->t_cpnext->t_cpprev; - commit_transaction->t_cpnext->t_cpprev = - commit_transaction; - commit_transaction->t_cpprev->t_cpnext = - commit_transaction; - } - } - spin_unlock(&journal->j_list_lock); - - trace_jbd_end_commit(journal, commit_transaction); - jbd_debug(1, "JBD: commit %d complete, head %d\n", - journal->j_commit_sequence, journal->j_tail_sequence); - - wake_up(&journal->j_wait_done_commit); -} diff --git a/fs/jbd/journal.c b/fs/jbd/journal.c deleted file mode 100644 index c46a79adb6ad..000000000000 --- a/fs/jbd/journal.c +++ /dev/null @@ -1,2145 +0,0 @@ -/* - * linux/fs/jbd/journal.c - * - * Written by Stephen C. Tweedie , 1998 - * - * Copyright 1998 Red Hat corp --- All Rights Reserved - * - * This file is part of the Linux kernel and is made available under - * the terms of the GNU General Public License, version 2, or at your - * option, any later version, incorporated herein by reference. - * - * Generic filesystem journal-writing code; part of the ext2fs - * journaling system. - * - * This file manages journals: areas of disk reserved for logging - * transactional updates. This includes the kernel journaling thread - * which is responsible for scheduling updates to the log. - * - * We do not actually manage the physical storage of the journal in this - * file: that is left to a per-journal policy function, which allows us - * to store the journal within a filesystem-specified area for ext2 - * journaling (ext2 can use a reserved inode for storing the log). - */ - -#include -#include -#include -#include -#include -#include -#include -#include -#include -#include -#include -#include -#include -#include -#include - -#define CREATE_TRACE_POINTS -#include - -#include -#include - -EXPORT_SYMBOL(journal_start); -EXPORT_SYMBOL(journal_restart); -EXPORT_SYMBOL(journal_extend); -EXPORT_SYMBOL(journal_stop); -EXPORT_SYMBOL(journal_lock_updates); -EXPORT_SYMBOL(journal_unlock_updates); -EXPORT_SYMBOL(journal_get_write_access); -EXPORT_SYMBOL(journal_get_create_access); -EXPORT_SYMBOL(journal_get_undo_access); -EXPORT_SYMBOL(journal_dirty_data); -EXPORT_SYMBOL(journal_dirty_metadata); -EXPORT_SYMBOL(journal_release_buffer); -EXPORT_SYMBOL(journal_forget); -#if 0 -EXPORT_SYMBOL(journal_sync_buffer); -#endif -EXPORT_SYMBOL(journal_flush); -EXPORT_SYMBOL(journal_revoke); - -EXPORT_SYMBOL(journal_init_dev); -EXPORT_SYMBOL(journal_init_inode); -EXPORT_SYMBOL(journal_update_format); -EXPORT_SYMBOL(journal_check_used_features); -EXPORT_SYMBOL(journal_check_available_features); -EXPORT_SYMBOL(journal_set_features); -EXPORT_SYMBOL(journal_create); -EXPORT_SYMBOL(journal_load); -EXPORT_SYMBOL(journal_destroy); -EXPORT_SYMBOL(journal_abort); -EXPORT_SYMBOL(journal_errno); -EXPORT_SYMBOL(journal_ack_err); -EXPORT_SYMBOL(journal_clear_err); -EXPORT_SYMBOL(log_wait_commit); -EXPORT_SYMBOL(log_start_commit); -EXPORT_SYMBOL(journal_start_commit); -EXPORT_SYMBOL(journal_force_commit_nested); -EXPORT_SYMBOL(journal_wipe); -EXPORT_SYMBOL(journal_blocks_per_page); -EXPORT_SYMBOL(journal_invalidatepage); -EXPORT_SYMBOL(journal_try_to_free_buffers); -EXPORT_SYMBOL(journal_force_commit); - -static int journal_convert_superblock_v1(journal_t *, journal_superblock_t *); -static void __journal_abort_soft (journal_t *journal, int errno); -static const char *journal_dev_name(journal_t *journal, char *buffer); - -#ifdef CONFIG_JBD_DEBUG -void __jbd_debug(int level, const char *file, const char *func, - unsigned int line, const char *fmt, ...) -{ - struct va_format vaf; - va_list args; - - if (level > journal_enable_debug) - return; - va_start(args, fmt); - vaf.fmt = fmt; - vaf.va = &args; - printk(KERN_DEBUG "%s: (%s, %u): %pV\n", file, func, line, &vaf); - va_end(args); -} -EXPORT_SYMBOL(__jbd_debug); -#endif - -/* - * Helper function used to manage commit timeouts - */ - -static void commit_timeout(unsigned long __data) -{ - struct task_struct * p = (struct task_struct *) __data; - - wake_up_process(p); -} - -/* - * kjournald: The main thread function used to manage a logging device - * journal. - * - * This kernel thread is responsible for two things: - * - * 1) COMMIT: Every so often we need to commit the current state of the - * filesystem to disk. The journal thread is responsible for writing - * all of the metadata buffers to disk. - * - * 2) CHECKPOINT: We cannot reuse a used section of the log file until all - * of the data in that part of the log has been rewritten elsewhere on - * the disk. Flushing these old buffers to reclaim space in the log is - * known as checkpointing, and this thread is responsible for that job. - */ - -static int kjournald(void *arg) -{ - journal_t *journal = arg; - transaction_t *transaction; - - /* - * Set up an interval timer which can be used to trigger a commit wakeup - * after the commit interval expires - */ - setup_timer(&journal->j_commit_timer, commit_timeout, - (unsigned long)current); - - set_freezable(); - - /* Record that the journal thread is running */ - journal->j_task = current; - wake_up(&journal->j_wait_done_commit); - - printk(KERN_INFO "kjournald starting. Commit interval %ld seconds\n", - journal->j_commit_interval / HZ); - - /* - * And now, wait forever for commit wakeup events. - */ - spin_lock(&journal->j_state_lock); - -loop: - if (journal->j_flags & JFS_UNMOUNT) - goto end_loop; - - jbd_debug(1, "commit_sequence=%d, commit_request=%d\n", - journal->j_commit_sequence, journal->j_commit_request); - - if (journal->j_commit_sequence != journal->j_commit_request) { - jbd_debug(1, "OK, requests differ\n"); - spin_unlock(&journal->j_state_lock); - del_timer_sync(&journal->j_commit_timer); - journal_commit_transaction(journal); - spin_lock(&journal->j_state_lock); - goto loop; - } - - wake_up(&journal->j_wait_done_commit); - if (freezing(current)) { - /* - * The simpler the better. Flushing journal isn't a - * good idea, because that depends on threads that may - * be already stopped. - */ - jbd_debug(1, "Now suspending kjournald\n"); - spin_unlock(&journal->j_state_lock); - try_to_freeze(); - spin_lock(&journal->j_state_lock); - } else { - /* - * We assume on resume that commits are already there, - * so we don't sleep - */ - DEFINE_WAIT(wait); - int should_sleep = 1; - - prepare_to_wait(&journal->j_wait_commit, &wait, - TASK_INTERRUPTIBLE); - if (journal->j_commit_sequence != journal->j_commit_request) - should_sleep = 0; - transaction = journal->j_running_transaction; - if (transaction && time_after_eq(jiffies, - transaction->t_expires)) - should_sleep = 0; - if (journal->j_flags & JFS_UNMOUNT) - should_sleep = 0; - if (should_sleep) { - spin_unlock(&journal->j_state_lock); - schedule(); - spin_lock(&journal->j_state_lock); - } - finish_wait(&journal->j_wait_commit, &wait); - } - - jbd_debug(1, "kjournald wakes\n"); - - /* - * Were we woken up by a commit wakeup event? - */ - transaction = journal->j_running_transaction; - if (transaction && time_after_eq(jiffies, transaction->t_expires)) { - journal->j_commit_request = transaction->t_tid; - jbd_debug(1, "woke because of timeout\n"); - } - goto loop; - -end_loop: - spin_unlock(&journal->j_state_lock); - del_timer_sync(&journal->j_commit_timer); - journal->j_task = NULL; - wake_up(&journal->j_wait_done_commit); - jbd_debug(1, "Journal thread exiting.\n"); - return 0; -} - -static int journal_start_thread(journal_t *journal) -{ - struct task_struct *t; - - t = kthread_run(kjournald, journal, "kjournald"); - if (IS_ERR(t)) - return PTR_ERR(t); - - wait_event(journal->j_wait_done_commit, journal->j_task != NULL); - return 0; -} - -static void journal_kill_thread(journal_t *journal) -{ - spin_lock(&journal->j_state_lock); - journal->j_flags |= JFS_UNMOUNT; - - while (journal->j_task) { - wake_up(&journal->j_wait_commit); - spin_unlock(&journal->j_state_lock); - wait_event(journal->j_wait_done_commit, - journal->j_task == NULL); - spin_lock(&journal->j_state_lock); - } - spin_unlock(&journal->j_state_lock); -} - -/* - * journal_write_metadata_buffer: write a metadata buffer to the journal. - * - * Writes a metadata buffer to a given disk block. The actual IO is not - * performed but a new buffer_head is constructed which labels the data - * to be written with the correct destination disk block. - * - * Any magic-number escaping which needs to be done will cause a - * copy-out here. If the buffer happens to start with the - * JFS_MAGIC_NUMBER, then we can't write it to the log directly: the - * magic number is only written to the log for descripter blocks. In - * this case, we copy the data and replace the first word with 0, and we - * return a result code which indicates that this buffer needs to be - * marked as an escaped buffer in the corresponding log descriptor - * block. The missing word can then be restored when the block is read - * during recovery. - * - * If the source buffer has already been modified by a new transaction - * since we took the last commit snapshot, we use the frozen copy of - * that data for IO. If we end up using the existing buffer_head's data - * for the write, then we *have* to lock the buffer to prevent anyone - * else from using and possibly modifying it while the IO is in - * progress. - * - * The function returns a pointer to the buffer_heads to be used for IO. - * - * We assume that the journal has already been locked in this function. - * - * Return value: - * <0: Error - * >=0: Finished OK - * - * On success: - * Bit 0 set == escape performed on the data - * Bit 1 set == buffer copy-out performed (kfree the data after IO) - */ - -int journal_write_metadata_buffer(transaction_t *transaction, - struct journal_head *jh_in, - struct journal_head **jh_out, - unsigned int blocknr) -{ - int need_copy_out = 0; - int done_copy_out = 0; - int do_escape = 0; - char *mapped_data; - struct buffer_head *new_bh; - struct journal_head *new_jh; - struct page *new_page; - unsigned int new_offset; - struct buffer_head *bh_in = jh2bh(jh_in); - journal_t *journal = transaction->t_journal; - - /* - * The buffer really shouldn't be locked: only the current committing - * transaction is allowed to write it, so nobody else is allowed - * to do any IO. - * - * akpm: except if we're journalling data, and write() output is - * also part of a shared mapping, and another thread has - * decided to launch a writepage() against this buffer. - */ - J_ASSERT_BH(bh_in, buffer_jbddirty(bh_in)); - - new_bh = alloc_buffer_head(GFP_NOFS|__GFP_NOFAIL); - /* keep subsequent assertions sane */ - atomic_set(&new_bh->b_count, 1); - new_jh = journal_add_journal_head(new_bh); /* This sleeps */ - - /* - * If a new transaction has already done a buffer copy-out, then - * we use that version of the data for the commit. - */ - jbd_lock_bh_state(bh_in); -repeat: - if (jh_in->b_frozen_data) { - done_copy_out = 1; - new_page = virt_to_page(jh_in->b_frozen_data); - new_offset = offset_in_page(jh_in->b_frozen_data); - } else { - new_page = jh2bh(jh_in)->b_page; - new_offset = offset_in_page(jh2bh(jh_in)->b_data); - } - - mapped_data = kmap_atomic(new_page); - /* - * Check for escaping - */ - if (*((__be32 *)(mapped_data + new_offset)) == - cpu_to_be32(JFS_MAGIC_NUMBER)) { - need_copy_out = 1; - do_escape = 1; - } - kunmap_atomic(mapped_data); - - /* - * Do we need to do a data copy? - */ - if (need_copy_out && !done_copy_out) { - char *tmp; - - jbd_unlock_bh_state(bh_in); - tmp = jbd_alloc(bh_in->b_size, GFP_NOFS); - jbd_lock_bh_state(bh_in); - if (jh_in->b_frozen_data) { - jbd_free(tmp, bh_in->b_size); - goto repeat; - } - - jh_in->b_frozen_data = tmp; - mapped_data = kmap_atomic(new_page); - memcpy(tmp, mapped_data + new_offset, jh2bh(jh_in)->b_size); - kunmap_atomic(mapped_data); - - new_page = virt_to_page(tmp); - new_offset = offset_in_page(tmp); - done_copy_out = 1; - } - - /* - * Did we need to do an escaping? Now we've done all the - * copying, we can finally do so. - */ - if (do_escape) { - mapped_data = kmap_atomic(new_page); - *((unsigned int *)(mapped_data + new_offset)) = 0; - kunmap_atomic(mapped_data); - } - - set_bh_page(new_bh, new_page, new_offset); - new_jh->b_transaction = NULL; - new_bh->b_size = jh2bh(jh_in)->b_size; - new_bh->b_bdev = transaction->t_journal->j_dev; - new_bh->b_blocknr = blocknr; - set_buffer_mapped(new_bh); - set_buffer_dirty(new_bh); - - *jh_out = new_jh; - - /* - * The to-be-written buffer needs to get moved to the io queue, - * and the original buffer whose contents we are shadowing or - * copying is moved to the transaction's shadow queue. - */ - JBUFFER_TRACE(jh_in, "file as BJ_Shadow"); - spin_lock(&journal->j_list_lock); - __journal_file_buffer(jh_in, transaction, BJ_Shadow); - spin_unlock(&journal->j_list_lock); - jbd_unlock_bh_state(bh_in); - - JBUFFER_TRACE(new_jh, "file as BJ_IO"); - journal_file_buffer(new_jh, transaction, BJ_IO); - - return do_escape | (done_copy_out << 1); -} - -/* - * Allocation code for the journal file. Manage the space left in the - * journal, so that we can begin checkpointing when appropriate. - */ - -/* - * __log_space_left: Return the number of free blocks left in the journal. - * - * Called with the journal already locked. - * - * Called under j_state_lock - */ - -int __log_space_left(journal_t *journal) -{ - int left = journal->j_free; - - assert_spin_locked(&journal->j_state_lock); - - /* - * Be pessimistic here about the number of those free blocks which - * might be required for log descriptor control blocks. - */ - -#define MIN_LOG_RESERVED_BLOCKS 32 /* Allow for rounding errors */ - - left -= MIN_LOG_RESERVED_BLOCKS; - - if (left <= 0) - return 0; - left -= (left >> 3); - return left; -} - -/* - * Called under j_state_lock. Returns true if a transaction commit was started. - */ -int __log_start_commit(journal_t *journal, tid_t target) -{ - /* - * The only transaction we can possibly wait upon is the - * currently running transaction (if it exists). Otherwise, - * the target tid must be an old one. - */ - if (journal->j_commit_request != target && - journal->j_running_transaction && - journal->j_running_transaction->t_tid == target) { - /* - * We want a new commit: OK, mark the request and wakeup the - * commit thread. We do _not_ do the commit ourselves. - */ - - journal->j_commit_request = target; - jbd_debug(1, "JBD: requesting commit %d/%d\n", - journal->j_commit_request, - journal->j_commit_sequence); - wake_up(&journal->j_wait_commit); - return 1; - } else if (!tid_geq(journal->j_commit_request, target)) - /* This should never happen, but if it does, preserve - the evidence before kjournald goes into a loop and - increments j_commit_sequence beyond all recognition. */ - WARN_ONCE(1, "jbd: bad log_start_commit: %u %u %u %u\n", - journal->j_commit_request, journal->j_commit_sequence, - target, journal->j_running_transaction ? - journal->j_running_transaction->t_tid : 0); - return 0; -} - -int log_start_commit(journal_t *journal, tid_t tid) -{ - int ret; - - spin_lock(&journal->j_state_lock); - ret = __log_start_commit(journal, tid); - spin_unlock(&journal->j_state_lock); - return ret; -} - -/* - * Force and wait upon a commit if the calling process is not within - * transaction. This is used for forcing out undo-protected data which contains - * bitmaps, when the fs is running out of space. - * - * We can only force the running transaction if we don't have an active handle; - * otherwise, we will deadlock. - * - * Returns true if a transaction was started. - */ -int journal_force_commit_nested(journal_t *journal) -{ - transaction_t *transaction = NULL; - tid_t tid; - - spin_lock(&journal->j_state_lock); - if (journal->j_running_transaction && !current->journal_info) { - transaction = journal->j_running_transaction; - __log_start_commit(journal, transaction->t_tid); - } else if (journal->j_committing_transaction) - transaction = journal->j_committing_transaction; - - if (!transaction) { - spin_unlock(&journal->j_state_lock); - return 0; /* Nothing to retry */ - } - - tid = transaction->t_tid; - spin_unlock(&journal->j_state_lock); - log_wait_commit(journal, tid); - return 1; -} - -/* - * Start a commit of the current running transaction (if any). Returns true - * if a transaction is going to be committed (or is currently already - * committing), and fills its tid in at *ptid - */ -int journal_start_commit(journal_t *journal, tid_t *ptid) -{ - int ret = 0; - - spin_lock(&journal->j_state_lock); - if (journal->j_running_transaction) { - tid_t tid = journal->j_running_transaction->t_tid; - - __log_start_commit(journal, tid); - /* There's a running transaction and we've just made sure - * it's commit has been scheduled. */ - if (ptid) - *ptid = tid; - ret = 1; - } else if (journal->j_committing_transaction) { - /* - * If commit has been started, then we have to wait for - * completion of that transaction. - */ - if (ptid) - *ptid = journal->j_committing_transaction->t_tid; - ret = 1; - } - spin_unlock(&journal->j_state_lock); - return ret; -} - -/* - * Wait for a specified commit to complete. - * The caller may not hold the journal lock. - */ -int log_wait_commit(journal_t *journal, tid_t tid) -{ - int err = 0; - -#ifdef CONFIG_JBD_DEBUG - spin_lock(&journal->j_state_lock); - if (!tid_geq(journal->j_commit_request, tid)) { - printk(KERN_ERR - "%s: error: j_commit_request=%d, tid=%d\n", - __func__, journal->j_commit_request, tid); - } - spin_unlock(&journal->j_state_lock); -#endif - spin_lock(&journal->j_state_lock); - /* - * Not running or committing trans? Must be already committed. This - * saves us from waiting for a *long* time when tid overflows. - */ - if (!((journal->j_running_transaction && - journal->j_running_transaction->t_tid == tid) || - (journal->j_committing_transaction && - journal->j_committing_transaction->t_tid == tid))) - goto out_unlock; - - if (!tid_geq(journal->j_commit_waited, tid)) - journal->j_commit_waited = tid; - while (tid_gt(tid, journal->j_commit_sequence)) { - jbd_debug(1, "JBD: want %d, j_commit_sequence=%d\n", - tid, journal->j_commit_sequence); - wake_up(&journal->j_wait_commit); - spin_unlock(&journal->j_state_lock); - wait_event(journal->j_wait_done_commit, - !tid_gt(tid, journal->j_commit_sequence)); - spin_lock(&journal->j_state_lock); - } -out_unlock: - spin_unlock(&journal->j_state_lock); - - if (unlikely(is_journal_aborted(journal))) - err = -EIO; - return err; -} - -/* - * Return 1 if a given transaction has not yet sent barrier request - * connected with a transaction commit. If 0 is returned, transaction - * may or may not have sent the barrier. Used to avoid sending barrier - * twice in common cases. - */ -int journal_trans_will_send_data_barrier(journal_t *journal, tid_t tid) -{ - int ret = 0; - transaction_t *commit_trans; - - if (!(journal->j_flags & JFS_BARRIER)) - return 0; - spin_lock(&journal->j_state_lock); - /* Transaction already committed? */ - if (tid_geq(journal->j_commit_sequence, tid)) - goto out; - /* - * Transaction is being committed and we already proceeded to - * writing commit record? - */ - commit_trans = journal->j_committing_transaction; - if (commit_trans && commit_trans->t_tid == tid && - commit_trans->t_state >= T_COMMIT_RECORD) - goto out; - ret = 1; -out: - spin_unlock(&journal->j_state_lock); - return ret; -} -EXPORT_SYMBOL(journal_trans_will_send_data_barrier); - -/* - * Log buffer allocation routines: - */ - -int journal_next_log_block(journal_t *journal, unsigned int *retp) -{ - unsigned int blocknr; - - spin_lock(&journal->j_state_lock); - J_ASSERT(journal->j_free > 1); - - blocknr = journal->j_head; - journal->j_head++; - journal->j_free--; - if (journal->j_head == journal->j_last) - journal->j_head = journal->j_first; - spin_unlock(&journal->j_state_lock); - return journal_bmap(journal, blocknr, retp); -} - -/* - * Conversion of logical to physical block numbers for the journal - * - * On external journals the journal blocks are identity-mapped, so - * this is a no-op. If needed, we can use j_blk_offset - everything is - * ready. - */ -int journal_bmap(journal_t *journal, unsigned int blocknr, - unsigned int *retp) -{ - int err = 0; - unsigned int ret; - - if (journal->j_inode) { - ret = bmap(journal->j_inode, blocknr); - if (ret) - *retp = ret; - else { - char b[BDEVNAME_SIZE]; - - printk(KERN_ALERT "%s: journal block not found " - "at offset %u on %s\n", - __func__, - blocknr, - bdevname(journal->j_dev, b)); - err = -EIO; - __journal_abort_soft(journal, err); - } - } else { - *retp = blocknr; /* +journal->j_blk_offset */ - } - return err; -} - -/* - * We play buffer_head aliasing tricks to write data/metadata blocks to - * the journal without copying their contents, but for journal - * descriptor blocks we do need to generate bona fide buffers. - * - * After the caller of journal_get_descriptor_buffer() has finished modifying - * the buffer's contents they really should run flush_dcache_page(bh->b_page). - * But we don't bother doing that, so there will be coherency problems with - * mmaps of blockdevs which hold live JBD-controlled filesystems. - */ -struct journal_head *journal_get_descriptor_buffer(journal_t *journal) -{ - struct buffer_head *bh; - unsigned int blocknr; - int err; - - err = journal_next_log_block(journal, &blocknr); - - if (err) - return NULL; - - bh = __getblk(journal->j_dev, blocknr, journal->j_blocksize); - if (!bh) - return NULL; - lock_buffer(bh); - memset(bh->b_data, 0, journal->j_blocksize); - set_buffer_uptodate(bh); - unlock_buffer(bh); - BUFFER_TRACE(bh, "return this buffer"); - return journal_add_journal_head(bh); -} - -/* - * Management for journal control blocks: functions to create and - * destroy journal_t structures, and to initialise and read existing - * journal blocks from disk. */ - -/* First: create and setup a journal_t object in memory. We initialise - * very few fields yet: that has to wait until we have created the - * journal structures from from scratch, or loaded them from disk. */ - -static journal_t * journal_init_common (void) -{ - journal_t *journal; - int err; - - journal = kzalloc(sizeof(*journal), GFP_KERNEL); - if (!journal) - goto fail; - - init_waitqueue_head(&journal->j_wait_transaction_locked); - init_waitqueue_head(&journal->j_wait_logspace); - init_waitqueue_head(&journal->j_wait_done_commit); - init_waitqueue_head(&journal->j_wait_checkpoint); - init_waitqueue_head(&journal->j_wait_commit); - init_waitqueue_head(&journal->j_wait_updates); - mutex_init(&journal->j_checkpoint_mutex); - spin_lock_init(&journal->j_revoke_lock); - spin_lock_init(&journal->j_list_lock); - spin_lock_init(&journal->j_state_lock); - - journal->j_commit_interval = (HZ * JBD_DEFAULT_MAX_COMMIT_AGE); - - /* The journal is marked for error until we succeed with recovery! */ - journal->j_flags = JFS_ABORT; - - /* Set up a default-sized revoke table for the new mount. */ - err = journal_init_revoke(journal, JOURNAL_REVOKE_DEFAULT_HASH); - if (err) { - kfree(journal); - goto fail; - } - return journal; -fail: - return NULL; -} - -/* journal_init_dev and journal_init_inode: - * - * Create a journal structure assigned some fixed set of disk blocks to - * the journal. We don't actually touch those disk blocks yet, but we - * need to set up all of the mapping information to tell the journaling - * system where the journal blocks are. - * - */ - -/** - * journal_t * journal_init_dev() - creates and initialises a journal structure - * @bdev: Block device on which to create the journal - * @fs_dev: Device which hold journalled filesystem for this journal. - * @start: Block nr Start of journal. - * @len: Length of the journal in blocks. - * @blocksize: blocksize of journalling device - * - * Returns: a newly created journal_t * - * - * journal_init_dev creates a journal which maps a fixed contiguous - * range of blocks on an arbitrary block device. - * - */ -journal_t * journal_init_dev(struct block_device *bdev, - struct block_device *fs_dev, - int start, int len, int blocksize) -{ - journal_t *journal = journal_init_common(); - struct buffer_head *bh; - int n; - - if (!journal) - return NULL; - - /* journal descriptor can store up to n blocks -bzzz */ - journal->j_blocksize = blocksize; - n = journal->j_blocksize / sizeof(journal_block_tag_t); - journal->j_wbufsize = n; - journal->j_wbuf = kmalloc(n * sizeof(struct buffer_head*), GFP_KERNEL); - if (!journal->j_wbuf) { - printk(KERN_ERR "%s: Can't allocate bhs for commit thread\n", - __func__); - goto out_err; - } - journal->j_dev = bdev; - journal->j_fs_dev = fs_dev; - journal->j_blk_offset = start; - journal->j_maxlen = len; - - bh = __getblk(journal->j_dev, start, journal->j_blocksize); - if (!bh) { - printk(KERN_ERR - "%s: Cannot get buffer for journal superblock\n", - __func__); - goto out_err; - } - journal->j_sb_buffer = bh; - journal->j_superblock = (journal_superblock_t *)bh->b_data; - - return journal; -out_err: - kfree(journal->j_wbuf); - kfree(journal); - return NULL; -} - -/** - * journal_t * journal_init_inode () - creates a journal which maps to a inode. - * @inode: An inode to create the journal in - * - * journal_init_inode creates a journal which maps an on-disk inode as - * the journal. The inode must exist already, must support bmap() and - * must have all data blocks preallocated. - */ -journal_t * journal_init_inode (struct inode *inode) -{ - struct buffer_head *bh; - journal_t *journal = journal_init_common(); - int err; - int n; - unsigned int blocknr; - - if (!journal) - return NULL; - - journal->j_dev = journal->j_fs_dev = inode->i_sb->s_bdev; - journal->j_inode = inode; - jbd_debug(1, - "journal %p: inode %s/%ld, size %Ld, bits %d, blksize %ld\n", - journal, inode->i_sb->s_id, inode->i_ino, - (long long) inode->i_size, - inode->i_sb->s_blocksize_bits, inode->i_sb->s_blocksize); - - journal->j_maxlen = inode->i_size >> inode->i_sb->s_blocksize_bits; - journal->j_blocksize = inode->i_sb->s_blocksize; - - /* journal descriptor can store up to n blocks -bzzz */ - n = journal->j_blocksize / sizeof(journal_block_tag_t); - journal->j_wbufsize = n; - journal->j_wbuf = kmalloc(n * sizeof(struct buffer_head*), GFP_KERNEL); - if (!journal->j_wbuf) { - printk(KERN_ERR "%s: Can't allocate bhs for commit thread\n", - __func__); - goto out_err; - } - - err = journal_bmap(journal, 0, &blocknr); - /* If that failed, give up */ - if (err) { - printk(KERN_ERR "%s: Cannot locate journal superblock\n", - __func__); - goto out_err; - } - - bh = getblk_unmovable(journal->j_dev, blocknr, journal->j_blocksize); - if (!bh) { - printk(KERN_ERR - "%s: Cannot get buffer for journal superblock\n", - __func__); - goto out_err; - } - journal->j_sb_buffer = bh; - journal->j_superblock = (journal_superblock_t *)bh->b_data; - - return journal; -out_err: - kfree(journal->j_wbuf); - kfree(journal); - return NULL; -} - -/* - * If the journal init or create aborts, we need to mark the journal - * superblock as being NULL to prevent the journal destroy from writing - * back a bogus superblock. - */ -static void journal_fail_superblock (journal_t *journal) -{ - struct buffer_head *bh = journal->j_sb_buffer; - brelse(bh); - journal->j_sb_buffer = NULL; -} - -/* - * Given a journal_t structure, initialise the various fields for - * startup of a new journaling session. We use this both when creating - * a journal, and after recovering an old journal to reset it for - * subsequent use. - */ - -static int journal_reset(journal_t *journal) -{ - journal_superblock_t *sb = journal->j_superblock; - unsigned int first, last; - - first = be32_to_cpu(sb->s_first); - last = be32_to_cpu(sb->s_maxlen); - if (first + JFS_MIN_JOURNAL_BLOCKS > last + 1) { - printk(KERN_ERR "JBD: Journal too short (blocks %u-%u).\n", - first, last); - journal_fail_superblock(journal); - return -EINVAL; - } - - journal->j_first = first; - journal->j_last = last; - - journal->j_head = first; - journal->j_tail = first; - journal->j_free = last - first; - - journal->j_tail_sequence = journal->j_transaction_sequence; - journal->j_commit_sequence = journal->j_transaction_sequence - 1; - journal->j_commit_request = journal->j_commit_sequence; - - journal->j_max_transaction_buffers = journal->j_maxlen / 4; - - /* - * As a special case, if the on-disk copy is already marked as needing - * no recovery (s_start == 0), then we can safely defer the superblock - * update until the next commit by setting JFS_FLUSHED. This avoids - * attempting a write to a potential-readonly device. - */ - if (sb->s_start == 0) { - jbd_debug(1,"JBD: Skipping superblock update on recovered sb " - "(start %u, seq %d, errno %d)\n", - journal->j_tail, journal->j_tail_sequence, - journal->j_errno); - journal->j_flags |= JFS_FLUSHED; - } else { - /* Lock here to make assertions happy... */ - mutex_lock(&journal->j_checkpoint_mutex); - /* - * Update log tail information. We use WRITE_FUA since new - * transaction will start reusing journal space and so we - * must make sure information about current log tail is on - * disk before that. - */ - journal_update_sb_log_tail(journal, - journal->j_tail_sequence, - journal->j_tail, - WRITE_FUA); - mutex_unlock(&journal->j_checkpoint_mutex); - } - return journal_start_thread(journal); -} - -/** - * int journal_create() - Initialise the new journal file - * @journal: Journal to create. This structure must have been initialised - * - * Given a journal_t structure which tells us which disk blocks we can - * use, create a new journal superblock and initialise all of the - * journal fields from scratch. - **/ -int journal_create(journal_t *journal) -{ - unsigned int blocknr; - struct buffer_head *bh; - journal_superblock_t *sb; - int i, err; - - if (journal->j_maxlen < JFS_MIN_JOURNAL_BLOCKS) { - printk (KERN_ERR "Journal length (%d blocks) too short.\n", - journal->j_maxlen); - journal_fail_superblock(journal); - return -EINVAL; - } - - if (journal->j_inode == NULL) { - /* - * We don't know what block to start at! - */ - printk(KERN_EMERG - "%s: creation of journal on external device!\n", - __func__); - BUG(); - } - - /* Zero out the entire journal on disk. We cannot afford to - have any blocks on disk beginning with JFS_MAGIC_NUMBER. */ - jbd_debug(1, "JBD: Zeroing out journal blocks...\n"); - for (i = 0; i < journal->j_maxlen; i++) { - err = journal_bmap(journal, i, &blocknr); - if (err) - return err; - bh = __getblk(journal->j_dev, blocknr, journal->j_blocksize); - if (unlikely(!bh)) - return -ENOMEM; - lock_buffer(bh); - memset (bh->b_data, 0, journal->j_blocksize); - BUFFER_TRACE(bh, "marking dirty"); - mark_buffer_dirty(bh); - BUFFER_TRACE(bh, "marking uptodate"); - set_buffer_uptodate(bh); - unlock_buffer(bh); - __brelse(bh); - } - - sync_blockdev(journal->j_dev); - jbd_debug(1, "JBD: journal cleared.\n"); - - /* OK, fill in the initial static fields in the new superblock */ - sb = journal->j_superblock; - - sb->s_header.h_magic = cpu_to_be32(JFS_MAGIC_NUMBER); - sb->s_header.h_blocktype = cpu_to_be32(JFS_SUPERBLOCK_V2); - - sb->s_blocksize = cpu_to_be32(journal->j_blocksize); - sb->s_maxlen = cpu_to_be32(journal->j_maxlen); - sb->s_first = cpu_to_be32(1); - - journal->j_transaction_sequence = 1; - - journal->j_flags &= ~JFS_ABORT; - journal->j_format_version = 2; - - return journal_reset(journal); -} - -static void journal_write_superblock(journal_t *journal, int write_op) -{ - struct buffer_head *bh = journal->j_sb_buffer; - int ret; - - trace_journal_write_superblock(journal, write_op); - if (!(journal->j_flags & JFS_BARRIER)) - write_op &= ~(REQ_FUA | REQ_FLUSH); - lock_buffer(bh); - if (buffer_write_io_error(bh)) { - char b[BDEVNAME_SIZE]; - /* - * Oh, dear. A previous attempt to write the journal - * superblock failed. This could happen because the - * USB device was yanked out. Or it could happen to - * be a transient write error and maybe the block will - * be remapped. Nothing we can do but to retry the - * write and hope for the best. - */ - printk(KERN_ERR "JBD: previous I/O error detected " - "for journal superblock update for %s.\n", - journal_dev_name(journal, b)); - clear_buffer_write_io_error(bh); - set_buffer_uptodate(bh); - } - - get_bh(bh); - bh->b_end_io = end_buffer_write_sync; - ret = submit_bh(write_op, bh); - wait_on_buffer(bh); - if (buffer_write_io_error(bh)) { - clear_buffer_write_io_error(bh); - set_buffer_uptodate(bh); - ret = -EIO; - } - if (ret) { - char b[BDEVNAME_SIZE]; - printk(KERN_ERR "JBD: Error %d detected " - "when updating journal superblock for %s.\n", - ret, journal_dev_name(journal, b)); - } -} - -/** - * journal_update_sb_log_tail() - Update log tail in journal sb on disk. - * @journal: The journal to update. - * @tail_tid: TID of the new transaction at the tail of the log - * @tail_block: The first block of the transaction at the tail of the log - * @write_op: With which operation should we write the journal sb - * - * Update a journal's superblock information about log tail and write it to - * disk, waiting for the IO to complete. - */ -void journal_update_sb_log_tail(journal_t *journal, tid_t tail_tid, - unsigned int tail_block, int write_op) -{ - journal_superblock_t *sb = journal->j_superblock; - - BUG_ON(!mutex_is_locked(&journal->j_checkpoint_mutex)); - jbd_debug(1,"JBD: updating superblock (start %u, seq %u)\n", - tail_block, tail_tid); - - sb->s_sequence = cpu_to_be32(tail_tid); - sb->s_start = cpu_to_be32(tail_block); - - journal_write_superblock(journal, write_op); - - /* Log is no longer empty */ - spin_lock(&journal->j_state_lock); - WARN_ON(!sb->s_sequence); - journal->j_flags &= ~JFS_FLUSHED; - spin_unlock(&journal->j_state_lock); -} - -/** - * mark_journal_empty() - Mark on disk journal as empty. - * @journal: The journal to update. - * - * Update a journal's dynamic superblock fields to show that journal is empty. - * Write updated superblock to disk waiting for IO to complete. - */ -static void mark_journal_empty(journal_t *journal) -{ - journal_superblock_t *sb = journal->j_superblock; - - BUG_ON(!mutex_is_locked(&journal->j_checkpoint_mutex)); - spin_lock(&journal->j_state_lock); - /* Is it already empty? */ - if (sb->s_start == 0) { - spin_unlock(&journal->j_state_lock); - return; - } - jbd_debug(1, "JBD: Marking journal as empty (seq %d)\n", - journal->j_tail_sequence); - - sb->s_sequence = cpu_to_be32(journal->j_tail_sequence); - sb->s_start = cpu_to_be32(0); - spin_unlock(&journal->j_state_lock); - - journal_write_superblock(journal, WRITE_FUA); - - spin_lock(&journal->j_state_lock); - /* Log is empty */ - journal->j_flags |= JFS_FLUSHED; - spin_unlock(&journal->j_state_lock); -} - -/** - * journal_update_sb_errno() - Update error in the journal. - * @journal: The journal to update. - * - * Update a journal's errno. Write updated superblock to disk waiting for IO - * to complete. - */ -static void journal_update_sb_errno(journal_t *journal) -{ - journal_superblock_t *sb = journal->j_superblock; - - spin_lock(&journal->j_state_lock); - jbd_debug(1, "JBD: updating superblock error (errno %d)\n", - journal->j_errno); - sb->s_errno = cpu_to_be32(journal->j_errno); - spin_unlock(&journal->j_state_lock); - - journal_write_superblock(journal, WRITE_SYNC); -} - -/* - * Read the superblock for a given journal, performing initial - * validation of the format. - */ - -static int journal_get_superblock(journal_t *journal) -{ - struct buffer_head *bh; - journal_superblock_t *sb; - int err = -EIO; - - bh = journal->j_sb_buffer; - - J_ASSERT(bh != NULL); - if (!buffer_uptodate(bh)) { - ll_rw_block(READ, 1, &bh); - wait_on_buffer(bh); - if (!buffer_uptodate(bh)) { - printk (KERN_ERR - "JBD: IO error reading journal superblock\n"); - goto out; - } - } - - sb = journal->j_superblock; - - err = -EINVAL; - - if (sb->s_header.h_magic != cpu_to_be32(JFS_MAGIC_NUMBER) || - sb->s_blocksize != cpu_to_be32(journal->j_blocksize)) { - printk(KERN_WARNING "JBD: no valid journal superblock found\n"); - goto out; - } - - switch(be32_to_cpu(sb->s_header.h_blocktype)) { - case JFS_SUPERBLOCK_V1: - journal->j_format_version = 1; - break; - case JFS_SUPERBLOCK_V2: - journal->j_format_version = 2; - break; - default: - printk(KERN_WARNING "JBD: unrecognised superblock format ID\n"); - goto out; - } - - if (be32_to_cpu(sb->s_maxlen) < journal->j_maxlen) - journal->j_maxlen = be32_to_cpu(sb->s_maxlen); - else if (be32_to_cpu(sb->s_maxlen) > journal->j_maxlen) { - printk (KERN_WARNING "JBD: journal file too short\n"); - goto out; - } - - if (be32_to_cpu(sb->s_first) == 0 || - be32_to_cpu(sb->s_first) >= journal->j_maxlen) { - printk(KERN_WARNING - "JBD: Invalid start block of journal: %u\n", - be32_to_cpu(sb->s_first)); - goto out; - } - - return 0; - -out: - journal_fail_superblock(journal); - return err; -} - -/* - * Load the on-disk journal superblock and read the key fields into the - * journal_t. - */ - -static int load_superblock(journal_t *journal) -{ - int err; - journal_superblock_t *sb; - - err = journal_get_superblock(journal); - if (err) - return err; - - sb = journal->j_superblock; - - journal->j_tail_sequence = be32_to_cpu(sb->s_sequence); - journal->j_tail = be32_to_cpu(sb->s_start); - journal->j_first = be32_to_cpu(sb->s_first); - journal->j_last = be32_to_cpu(sb->s_maxlen); - journal->j_errno = be32_to_cpu(sb->s_errno); - - return 0; -} - - -/** - * int journal_load() - Read journal from disk. - * @journal: Journal to act on. - * - * Given a journal_t structure which tells us which disk blocks contain - * a journal, read the journal from disk to initialise the in-memory - * structures. - */ -int journal_load(journal_t *journal) -{ - int err; - journal_superblock_t *sb; - - err = load_superblock(journal); - if (err) - return err; - - sb = journal->j_superblock; - /* If this is a V2 superblock, then we have to check the - * features flags on it. */ - - if (journal->j_format_version >= 2) { - if ((sb->s_feature_ro_compat & - ~cpu_to_be32(JFS_KNOWN_ROCOMPAT_FEATURES)) || - (sb->s_feature_incompat & - ~cpu_to_be32(JFS_KNOWN_INCOMPAT_FEATURES))) { - printk (KERN_WARNING - "JBD: Unrecognised features on journal\n"); - return -EINVAL; - } - } - - /* Let the recovery code check whether it needs to recover any - * data from the journal. */ - if (journal_recover(journal)) - goto recovery_error; - - /* OK, we've finished with the dynamic journal bits: - * reinitialise the dynamic contents of the superblock in memory - * and reset them on disk. */ - if (journal_reset(journal)) - goto recovery_error; - - journal->j_flags &= ~JFS_ABORT; - journal->j_flags |= JFS_LOADED; - return 0; - -recovery_error: - printk (KERN_WARNING "JBD: recovery failed\n"); - return -EIO; -} - -/** - * void journal_destroy() - Release a journal_t structure. - * @journal: Journal to act on. - * - * Release a journal_t structure once it is no longer in use by the - * journaled object. - * Return <0 if we couldn't clean up the journal. - */ -int journal_destroy(journal_t *journal) -{ - int err = 0; - - - /* Wait for the commit thread to wake up and die. */ - journal_kill_thread(journal); - - /* Force a final log commit */ - if (journal->j_running_transaction) - journal_commit_transaction(journal); - - /* Force any old transactions to disk */ - - /* We cannot race with anybody but must keep assertions happy */ - mutex_lock(&journal->j_checkpoint_mutex); - /* Totally anal locking here... */ - spin_lock(&journal->j_list_lock); - while (journal->j_checkpoint_transactions != NULL) { - spin_unlock(&journal->j_list_lock); - log_do_checkpoint(journal); - spin_lock(&journal->j_list_lock); - } - - J_ASSERT(journal->j_running_transaction == NULL); - J_ASSERT(journal->j_committing_transaction == NULL); - J_ASSERT(journal->j_checkpoint_transactions == NULL); - spin_unlock(&journal->j_list_lock); - - if (journal->j_sb_buffer) { - if (!is_journal_aborted(journal)) { - journal->j_tail_sequence = - ++journal->j_transaction_sequence; - mark_journal_empty(journal); - } else - err = -EIO; - brelse(journal->j_sb_buffer); - } - mutex_unlock(&journal->j_checkpoint_mutex); - - iput(journal->j_inode); - if (journal->j_revoke) - journal_destroy_revoke(journal); - kfree(journal->j_wbuf); - kfree(journal); - - return err; -} - - -/** - *int journal_check_used_features () - Check if features specified are used. - * @journal: Journal to check. - * @compat: bitmask of compatible features - * @ro: bitmask of features that force read-only mount - * @incompat: bitmask of incompatible features - * - * Check whether the journal uses all of a given set of - * features. Return true (non-zero) if it does. - **/ - -int journal_check_used_features (journal_t *journal, unsigned long compat, - unsigned long ro, unsigned long incompat) -{ - journal_superblock_t *sb; - - if (!compat && !ro && !incompat) - return 1; - if (journal->j_format_version == 1) - return 0; - - sb = journal->j_superblock; - - if (((be32_to_cpu(sb->s_feature_compat) & compat) == compat) && - ((be32_to_cpu(sb->s_feature_ro_compat) & ro) == ro) && - ((be32_to_cpu(sb->s_feature_incompat) & incompat) == incompat)) - return 1; - - return 0; -} - -/** - * int journal_check_available_features() - Check feature set in journalling layer - * @journal: Journal to check. - * @compat: bitmask of compatible features - * @ro: bitmask of features that force read-only mount - * @incompat: bitmask of incompatible features - * - * Check whether the journaling code supports the use of - * all of a given set of features on this journal. Return true - * (non-zero) if it can. */ - -int journal_check_available_features (journal_t *journal, unsigned long compat, - unsigned long ro, unsigned long incompat) -{ - if (!compat && !ro && !incompat) - return 1; - - /* We can support any known requested features iff the - * superblock is in version 2. Otherwise we fail to support any - * extended sb features. */ - - if (journal->j_format_version != 2) - return 0; - - if ((compat & JFS_KNOWN_COMPAT_FEATURES) == compat && - (ro & JFS_KNOWN_ROCOMPAT_FEATURES) == ro && - (incompat & JFS_KNOWN_INCOMPAT_FEATURES) == incompat) - return 1; - - return 0; -} - -/** - * int journal_set_features () - Mark a given journal feature in the superblock - * @journal: Journal to act on. - * @compat: bitmask of compatible features - * @ro: bitmask of features that force read-only mount - * @incompat: bitmask of incompatible features - * - * Mark a given journal feature as present on the - * superblock. Returns true if the requested features could be set. - * - */ - -int journal_set_features (journal_t *journal, unsigned long compat, - unsigned long ro, unsigned long incompat) -{ - journal_superblock_t *sb; - - if (journal_check_used_features(journal, compat, ro, incompat)) - return 1; - - if (!journal_check_available_features(journal, compat, ro, incompat)) - return 0; - - jbd_debug(1, "Setting new features 0x%lx/0x%lx/0x%lx\n", - compat, ro, incompat); - - sb = journal->j_superblock; - - sb->s_feature_compat |= cpu_to_be32(compat); - sb->s_feature_ro_compat |= cpu_to_be32(ro); - sb->s_feature_incompat |= cpu_to_be32(incompat); - - return 1; -} - - -/** - * int journal_update_format () - Update on-disk journal structure. - * @journal: Journal to act on. - * - * Given an initialised but unloaded journal struct, poke about in the - * on-disk structure to update it to the most recent supported version. - */ -int journal_update_format (journal_t *journal) -{ - journal_superblock_t *sb; - int err; - - err = journal_get_superblock(journal); - if (err) - return err; - - sb = journal->j_superblock; - - switch (be32_to_cpu(sb->s_header.h_blocktype)) { - case JFS_SUPERBLOCK_V2: - return 0; - case JFS_SUPERBLOCK_V1: - return journal_convert_superblock_v1(journal, sb); - default: - break; - } - return -EINVAL; -} - -static int journal_convert_superblock_v1(journal_t *journal, - journal_superblock_t *sb) -{ - int offset, blocksize; - struct buffer_head *bh; - - printk(KERN_WARNING - "JBD: Converting superblock from version 1 to 2.\n"); - - /* Pre-initialise new fields to zero */ - offset = ((char *) &(sb->s_feature_compat)) - ((char *) sb); - blocksize = be32_to_cpu(sb->s_blocksize); - memset(&sb->s_feature_compat, 0, blocksize-offset); - - sb->s_nr_users = cpu_to_be32(1); - sb->s_header.h_blocktype = cpu_to_be32(JFS_SUPERBLOCK_V2); - journal->j_format_version = 2; - - bh = journal->j_sb_buffer; - BUFFER_TRACE(bh, "marking dirty"); - mark_buffer_dirty(bh); - sync_dirty_buffer(bh); - return 0; -} - - -/** - * int journal_flush () - Flush journal - * @journal: Journal to act on. - * - * Flush all data for a given journal to disk and empty the journal. - * Filesystems can use this when remounting readonly to ensure that - * recovery does not need to happen on remount. - */ - -int journal_flush(journal_t *journal) -{ - int err = 0; - transaction_t *transaction = NULL; - - spin_lock(&journal->j_state_lock); - - /* Force everything buffered to the log... */ - if (journal->j_running_transaction) { - transaction = journal->j_running_transaction; - __log_start_commit(journal, transaction->t_tid); - } else if (journal->j_committing_transaction) - transaction = journal->j_committing_transaction; - - /* Wait for the log commit to complete... */ - if (transaction) { - tid_t tid = transaction->t_tid; - - spin_unlock(&journal->j_state_lock); - log_wait_commit(journal, tid); - } else { - spin_unlock(&journal->j_state_lock); - } - - /* ...and flush everything in the log out to disk. */ - spin_lock(&journal->j_list_lock); - while (!err && journal->j_checkpoint_transactions != NULL) { - spin_unlock(&journal->j_list_lock); - mutex_lock(&journal->j_checkpoint_mutex); - err = log_do_checkpoint(journal); - mutex_unlock(&journal->j_checkpoint_mutex); - spin_lock(&journal->j_list_lock); - } - spin_unlock(&journal->j_list_lock); - - if (is_journal_aborted(journal)) - return -EIO; - - mutex_lock(&journal->j_checkpoint_mutex); - cleanup_journal_tail(journal); - - /* Finally, mark the journal as really needing no recovery. - * This sets s_start==0 in the underlying superblock, which is - * the magic code for a fully-recovered superblock. Any future - * commits of data to the journal will restore the current - * s_start value. */ - mark_journal_empty(journal); - mutex_unlock(&journal->j_checkpoint_mutex); - spin_lock(&journal->j_state_lock); - J_ASSERT(!journal->j_running_transaction); - J_ASSERT(!journal->j_committing_transaction); - J_ASSERT(!journal->j_checkpoint_transactions); - J_ASSERT(journal->j_head == journal->j_tail); - J_ASSERT(journal->j_tail_sequence == journal->j_transaction_sequence); - spin_unlock(&journal->j_state_lock); - return 0; -} - -/** - * int journal_wipe() - Wipe journal contents - * @journal: Journal to act on. - * @write: flag (see below) - * - * Wipe out all of the contents of a journal, safely. This will produce - * a warning if the journal contains any valid recovery information. - * Must be called between journal_init_*() and journal_load(). - * - * If 'write' is non-zero, then we wipe out the journal on disk; otherwise - * we merely suppress recovery. - */ - -int journal_wipe(journal_t *journal, int write) -{ - int err = 0; - - J_ASSERT (!(journal->j_flags & JFS_LOADED)); - - err = load_superblock(journal); - if (err) - return err; - - if (!journal->j_tail) - goto no_recovery; - - printk (KERN_WARNING "JBD: %s recovery information on journal\n", - write ? "Clearing" : "Ignoring"); - - err = journal_skip_recovery(journal); - if (write) { - /* Lock to make assertions happy... */ - mutex_lock(&journal->j_checkpoint_mutex); - mark_journal_empty(journal); - mutex_unlock(&journal->j_checkpoint_mutex); - } - - no_recovery: - return err; -} - -/* - * journal_dev_name: format a character string to describe on what - * device this journal is present. - */ - -static const char *journal_dev_name(journal_t *journal, char *buffer) -{ - struct block_device *bdev; - - if (journal->j_inode) - bdev = journal->j_inode->i_sb->s_bdev; - else - bdev = journal->j_dev; - - return bdevname(bdev, buffer); -} - -/* - * Journal abort has very specific semantics, which we describe - * for journal abort. - * - * Two internal function, which provide abort to te jbd layer - * itself are here. - */ - -/* - * Quick version for internal journal use (doesn't lock the journal). - * Aborts hard --- we mark the abort as occurred, but do _nothing_ else, - * and don't attempt to make any other journal updates. - */ -static void __journal_abort_hard(journal_t *journal) -{ - transaction_t *transaction; - char b[BDEVNAME_SIZE]; - - if (journal->j_flags & JFS_ABORT) - return; - - printk(KERN_ERR "Aborting journal on device %s.\n", - journal_dev_name(journal, b)); - - spin_lock(&journal->j_state_lock); - journal->j_flags |= JFS_ABORT; - transaction = journal->j_running_transaction; - if (transaction) - __log_start_commit(journal, transaction->t_tid); - spin_unlock(&journal->j_state_lock); -} - -/* Soft abort: record the abort error status in the journal superblock, - * but don't do any other IO. */ -static void __journal_abort_soft (journal_t *journal, int errno) -{ - if (journal->j_flags & JFS_ABORT) - return; - - if (!journal->j_errno) - journal->j_errno = errno; - - __journal_abort_hard(journal); - - if (errno) - journal_update_sb_errno(journal); -} - -/** - * void journal_abort () - Shutdown the journal immediately. - * @journal: the journal to shutdown. - * @errno: an error number to record in the journal indicating - * the reason for the shutdown. - * - * Perform a complete, immediate shutdown of the ENTIRE - * journal (not of a single transaction). This operation cannot be - * undone without closing and reopening the journal. - * - * The journal_abort function is intended to support higher level error - * recovery mechanisms such as the ext2/ext3 remount-readonly error - * mode. - * - * Journal abort has very specific semantics. Any existing dirty, - * unjournaled buffers in the main filesystem will still be written to - * disk by bdflush, but the journaling mechanism will be suspended - * immediately and no further transaction commits will be honoured. - * - * Any dirty, journaled buffers will be written back to disk without - * hitting the journal. Atomicity cannot be guaranteed on an aborted - * filesystem, but we _do_ attempt to leave as much data as possible - * behind for fsck to use for cleanup. - * - * Any attempt to get a new transaction handle on a journal which is in - * ABORT state will just result in an -EROFS error return. A - * journal_stop on an existing handle will return -EIO if we have - * entered abort state during the update. - * - * Recursive transactions are not disturbed by journal abort until the - * final journal_stop, which will receive the -EIO error. - * - * Finally, the journal_abort call allows the caller to supply an errno - * which will be recorded (if possible) in the journal superblock. This - * allows a client to record failure conditions in the middle of a - * transaction without having to complete the transaction to record the - * failure to disk. ext3_error, for example, now uses this - * functionality. - * - * Errors which originate from within the journaling layer will NOT - * supply an errno; a null errno implies that absolutely no further - * writes are done to the journal (unless there are any already in - * progress). - * - */ - -void journal_abort(journal_t *journal, int errno) -{ - __journal_abort_soft(journal, errno); -} - -/** - * int journal_errno () - returns the journal's error state. - * @journal: journal to examine. - * - * This is the errno numbet set with journal_abort(), the last - * time the journal was mounted - if the journal was stopped - * without calling abort this will be 0. - * - * If the journal has been aborted on this mount time -EROFS will - * be returned. - */ -int journal_errno(journal_t *journal) -{ - int err; - - spin_lock(&journal->j_state_lock); - if (journal->j_flags & JFS_ABORT) - err = -EROFS; - else - err = journal->j_errno; - spin_unlock(&journal->j_state_lock); - return err; -} - -/** - * int journal_clear_err () - clears the journal's error state - * @journal: journal to act on. - * - * An error must be cleared or Acked to take a FS out of readonly - * mode. - */ -int journal_clear_err(journal_t *journal) -{ - int err = 0; - - spin_lock(&journal->j_state_lock); - if (journal->j_flags & JFS_ABORT) - err = -EROFS; - else - journal->j_errno = 0; - spin_unlock(&journal->j_state_lock); - return err; -} - -/** - * void journal_ack_err() - Ack journal err. - * @journal: journal to act on. - * - * An error must be cleared or Acked to take a FS out of readonly - * mode. - */ -void journal_ack_err(journal_t *journal) -{ - spin_lock(&journal->j_state_lock); - if (journal->j_errno) - journal->j_flags |= JFS_ACK_ERR; - spin_unlock(&journal->j_state_lock); -} - -int journal_blocks_per_page(struct inode *inode) -{ - return 1 << (PAGE_CACHE_SHIFT - inode->i_sb->s_blocksize_bits); -} - -/* - * Journal_head storage management - */ -static struct kmem_cache *journal_head_cache; -#ifdef CONFIG_JBD_DEBUG -static atomic_t nr_journal_heads = ATOMIC_INIT(0); -#endif - -static int journal_init_journal_head_cache(void) -{ - int retval; - - J_ASSERT(journal_head_cache == NULL); - journal_head_cache = kmem_cache_create("journal_head", - sizeof(struct journal_head), - 0, /* offset */ - SLAB_TEMPORARY, /* flags */ - NULL); /* ctor */ - retval = 0; - if (!journal_head_cache) { - retval = -ENOMEM; - printk(KERN_EMERG "JBD: no memory for journal_head cache\n"); - } - return retval; -} - -static void journal_destroy_journal_head_cache(void) -{ - if (journal_head_cache) { - kmem_cache_destroy(journal_head_cache); - journal_head_cache = NULL; - } -} - -/* - * journal_head splicing and dicing - */ -static struct journal_head *journal_alloc_journal_head(void) -{ - struct journal_head *ret; - -#ifdef CONFIG_JBD_DEBUG - atomic_inc(&nr_journal_heads); -#endif - ret = kmem_cache_zalloc(journal_head_cache, GFP_NOFS); - if (ret == NULL) { - jbd_debug(1, "out of memory for journal_head\n"); - printk_ratelimited(KERN_NOTICE "ENOMEM in %s, retrying.\n", - __func__); - - while (ret == NULL) { - yield(); - ret = kmem_cache_zalloc(journal_head_cache, GFP_NOFS); - } - } - return ret; -} - -static void journal_free_journal_head(struct journal_head *jh) -{ -#ifdef CONFIG_JBD_DEBUG - atomic_dec(&nr_journal_heads); - memset(jh, JBD_POISON_FREE, sizeof(*jh)); -#endif - kmem_cache_free(journal_head_cache, jh); -} - -/* - * A journal_head is attached to a buffer_head whenever JBD has an - * interest in the buffer. - * - * Whenever a buffer has an attached journal_head, its ->b_state:BH_JBD bit - * is set. This bit is tested in core kernel code where we need to take - * JBD-specific actions. Testing the zeroness of ->b_private is not reliable - * there. - * - * When a buffer has its BH_JBD bit set, its ->b_count is elevated by one. - * - * When a buffer has its BH_JBD bit set it is immune from being released by - * core kernel code, mainly via ->b_count. - * - * A journal_head is detached from its buffer_head when the journal_head's - * b_jcount reaches zero. Running transaction (b_transaction) and checkpoint - * transaction (b_cp_transaction) hold their references to b_jcount. - * - * Various places in the kernel want to attach a journal_head to a buffer_head - * _before_ attaching the journal_head to a transaction. To protect the - * journal_head in this situation, journal_add_journal_head elevates the - * journal_head's b_jcount refcount by one. The caller must call - * journal_put_journal_head() to undo this. - * - * So the typical usage would be: - * - * (Attach a journal_head if needed. Increments b_jcount) - * struct journal_head *jh = journal_add_journal_head(bh); - * ... - * (Get another reference for transaction) - * journal_grab_journal_head(bh); - * jh->b_transaction = xxx; - * (Put original reference) - * journal_put_journal_head(jh); - */ - -/* - * Give a buffer_head a journal_head. - * - * May sleep. - */ -struct journal_head *journal_add_journal_head(struct buffer_head *bh) -{ - struct journal_head *jh; - struct journal_head *new_jh = NULL; - -repeat: - if (!buffer_jbd(bh)) - new_jh = journal_alloc_journal_head(); - - jbd_lock_bh_journal_head(bh); - if (buffer_jbd(bh)) { - jh = bh2jh(bh); - } else { - J_ASSERT_BH(bh, - (atomic_read(&bh->b_count) > 0) || - (bh->b_page && bh->b_page->mapping)); - - if (!new_jh) { - jbd_unlock_bh_journal_head(bh); - goto repeat; - } - - jh = new_jh; - new_jh = NULL; /* We consumed it */ - set_buffer_jbd(bh); - bh->b_private = jh; - jh->b_bh = bh; - get_bh(bh); - BUFFER_TRACE(bh, "added journal_head"); - } - jh->b_jcount++; - jbd_unlock_bh_journal_head(bh); - if (new_jh) - journal_free_journal_head(new_jh); - return bh->b_private; -} - -/* - * Grab a ref against this buffer_head's journal_head. If it ended up not - * having a journal_head, return NULL - */ -struct journal_head *journal_grab_journal_head(struct buffer_head *bh) -{ - struct journal_head *jh = NULL; - - jbd_lock_bh_journal_head(bh); - if (buffer_jbd(bh)) { - jh = bh2jh(bh); - jh->b_jcount++; - } - jbd_unlock_bh_journal_head(bh); - return jh; -} - -static void __journal_remove_journal_head(struct buffer_head *bh) -{ - struct journal_head *jh = bh2jh(bh); - - J_ASSERT_JH(jh, jh->b_jcount >= 0); - J_ASSERT_JH(jh, jh->b_transaction == NULL); - J_ASSERT_JH(jh, jh->b_next_transaction == NULL); - J_ASSERT_JH(jh, jh->b_cp_transaction == NULL); - J_ASSERT_JH(jh, jh->b_jlist == BJ_None); - J_ASSERT_BH(bh, buffer_jbd(bh)); - J_ASSERT_BH(bh, jh2bh(jh) == bh); - BUFFER_TRACE(bh, "remove journal_head"); - if (jh->b_frozen_data) { - printk(KERN_WARNING "%s: freeing b_frozen_data\n", __func__); - jbd_free(jh->b_frozen_data, bh->b_size); - } - if (jh->b_committed_data) { - printk(KERN_WARNING "%s: freeing b_committed_data\n", __func__); - jbd_free(jh->b_committed_data, bh->b_size); - } - bh->b_private = NULL; - jh->b_bh = NULL; /* debug, really */ - clear_buffer_jbd(bh); - journal_free_journal_head(jh); -} - -/* - * Drop a reference on the passed journal_head. If it fell to zero then - * release the journal_head from the buffer_head. - */ -void journal_put_journal_head(struct journal_head *jh) -{ - struct buffer_head *bh = jh2bh(jh); - - jbd_lock_bh_journal_head(bh); - J_ASSERT_JH(jh, jh->b_jcount > 0); - --jh->b_jcount; - if (!jh->b_jcount) { - __journal_remove_journal_head(bh); - jbd_unlock_bh_journal_head(bh); - __brelse(bh); - } else - jbd_unlock_bh_journal_head(bh); -} - -/* - * debugfs tunables - */ -#ifdef CONFIG_JBD_DEBUG - -u8 journal_enable_debug __read_mostly; -EXPORT_SYMBOL(journal_enable_debug); - -static struct dentry *jbd_debugfs_dir; -static struct dentry *jbd_debug; - -static void __init jbd_create_debugfs_entry(void) -{ - jbd_debugfs_dir = debugfs_create_dir("jbd", NULL); - if (jbd_debugfs_dir) - jbd_debug = debugfs_create_u8("jbd-debug", S_IRUGO | S_IWUSR, - jbd_debugfs_dir, - &journal_enable_debug); -} - -static void __exit jbd_remove_debugfs_entry(void) -{ - debugfs_remove(jbd_debug); - debugfs_remove(jbd_debugfs_dir); -} - -#else - -static inline void jbd_create_debugfs_entry(void) -{ -} - -static inline void jbd_remove_debugfs_entry(void) -{ -} - -#endif - -struct kmem_cache *jbd_handle_cache; - -static int __init journal_init_handle_cache(void) -{ - jbd_handle_cache = kmem_cache_create("journal_handle", - sizeof(handle_t), - 0, /* offset */ - SLAB_TEMPORARY, /* flags */ - NULL); /* ctor */ - if (jbd_handle_cache == NULL) { - printk(KERN_EMERG "JBD: failed to create handle cache\n"); - return -ENOMEM; - } - return 0; -} - -static void journal_destroy_handle_cache(void) -{ - if (jbd_handle_cache) - kmem_cache_destroy(jbd_handle_cache); -} - -/* - * Module startup and shutdown - */ - -static int __init journal_init_caches(void) -{ - int ret; - - ret = journal_init_revoke_caches(); - if (ret == 0) - ret = journal_init_journal_head_cache(); - if (ret == 0) - ret = journal_init_handle_cache(); - return ret; -} - -static void journal_destroy_caches(void) -{ - journal_destroy_revoke_caches(); - journal_destroy_journal_head_cache(); - journal_destroy_handle_cache(); -} - -static int __init journal_init(void) -{ - int ret; - - BUILD_BUG_ON(sizeof(struct journal_superblock_s) != 1024); - - ret = journal_init_caches(); - if (ret != 0) - journal_destroy_caches(); - jbd_create_debugfs_entry(); - return ret; -} - -static void __exit journal_exit(void) -{ -#ifdef CONFIG_JBD_DEBUG - int n = atomic_read(&nr_journal_heads); - if (n) - printk(KERN_ERR "JBD: leaked %d journal_heads!\n", n); -#endif - jbd_remove_debugfs_entry(); - journal_destroy_caches(); -} - -MODULE_LICENSE("GPL"); -module_init(journal_init); -module_exit(journal_exit); - diff --git a/fs/jbd/recovery.c b/fs/jbd/recovery.c deleted file mode 100644 index a748fe21465a..000000000000 --- a/fs/jbd/recovery.c +++ /dev/null @@ -1,594 +0,0 @@ -/* - * linux/fs/jbd/recovery.c - * - * Written by Stephen C. Tweedie , 1999 - * - * Copyright 1999-2000 Red Hat Software --- All Rights Reserved - * - * This file is part of the Linux kernel and is made available under - * the terms of the GNU General Public License, version 2, or at your - * option, any later version, incorporated herein by reference. - * - * Journal recovery routines for the generic filesystem journaling code; - * part of the ext2fs journaling system. - */ - -#ifndef __KERNEL__ -#include "jfs_user.h" -#else -#include -#include -#include -#include -#include -#endif - -/* - * Maintain information about the progress of the recovery job, so that - * the different passes can carry information between them. - */ -struct recovery_info -{ - tid_t start_transaction; - tid_t end_transaction; - - int nr_replays; - int nr_revokes; - int nr_revoke_hits; -}; - -enum passtype {PASS_SCAN, PASS_REVOKE, PASS_REPLAY}; -static int do_one_pass(journal_t *journal, - struct recovery_info *info, enum passtype pass); -static int scan_revoke_records(journal_t *, struct buffer_head *, - tid_t, struct recovery_info *); - -#ifdef __KERNEL__ - -/* Release readahead buffers after use */ -static void journal_brelse_array(struct buffer_head *b[], int n) -{ - while (--n >= 0) - brelse (b[n]); -} - - -/* - * When reading from the journal, we are going through the block device - * layer directly and so there is no readahead being done for us. We - * need to implement any readahead ourselves if we want it to happen at - * all. Recovery is basically one long sequential read, so make sure we - * do the IO in reasonably large chunks. - * - * This is not so critical that we need to be enormously clever about - * the readahead size, though. 128K is a purely arbitrary, good-enough - * fixed value. - */ - -#define MAXBUF 8 -static int do_readahead(journal_t *journal, unsigned int start) -{ - int err; - unsigned int max, nbufs, next; - unsigned int blocknr; - struct buffer_head *bh; - - struct buffer_head * bufs[MAXBUF]; - - /* Do up to 128K of readahead */ - max = start + (128 * 1024 / journal->j_blocksize); - if (max > journal->j_maxlen) - max = journal->j_maxlen; - - /* Do the readahead itself. We'll submit MAXBUF buffer_heads at - * a time to the block device IO layer. */ - - nbufs = 0; - - for (next = start; next < max; next++) { - err = journal_bmap(journal, next, &blocknr); - - if (err) { - printk (KERN_ERR "JBD: bad block at offset %u\n", - next); - goto failed; - } - - bh = __getblk(journal->j_dev, blocknr, journal->j_blocksize); - if (!bh) { - err = -ENOMEM; - goto failed; - } - - if (!buffer_uptodate(bh) && !buffer_locked(bh)) { - bufs[nbufs++] = bh; - if (nbufs == MAXBUF) { - ll_rw_block(READ, nbufs, bufs); - journal_brelse_array(bufs, nbufs); - nbufs = 0; - } - } else - brelse(bh); - } - - if (nbufs) - ll_rw_block(READ, nbufs, bufs); - err = 0; - -failed: - if (nbufs) - journal_brelse_array(bufs, nbufs); - return err; -} - -#endif /* __KERNEL__ */ - - -/* - * Read a block from the journal - */ - -static int jread(struct buffer_head **bhp, journal_t *journal, - unsigned int offset) -{ - int err; - unsigned int blocknr; - struct buffer_head *bh; - - *bhp = NULL; - - if (offset >= journal->j_maxlen) { - printk(KERN_ERR "JBD: corrupted journal superblock\n"); - return -EIO; - } - - err = journal_bmap(journal, offset, &blocknr); - - if (err) { - printk (KERN_ERR "JBD: bad block at offset %u\n", - offset); - return err; - } - - bh = __getblk(journal->j_dev, blocknr, journal->j_blocksize); - if (!bh) - return -ENOMEM; - - if (!buffer_uptodate(bh)) { - /* If this is a brand new buffer, start readahead. - Otherwise, we assume we are already reading it. */ - if (!buffer_req(bh)) - do_readahead(journal, offset); - wait_on_buffer(bh); - } - - if (!buffer_uptodate(bh)) { - printk (KERN_ERR "JBD: Failed to read block at offset %u\n", - offset); - brelse(bh); - return -EIO; - } - - *bhp = bh; - return 0; -} - - -/* - * Count the number of in-use tags in a journal descriptor block. - */ - -static int count_tags(struct buffer_head *bh, int size) -{ - char * tagp; - journal_block_tag_t * tag; - int nr = 0; - - tagp = &bh->b_data[sizeof(journal_header_t)]; - - while ((tagp - bh->b_data + sizeof(journal_block_tag_t)) <= size) { - tag = (journal_block_tag_t *) tagp; - - nr++; - tagp += sizeof(journal_block_tag_t); - if (!(tag->t_flags & cpu_to_be32(JFS_FLAG_SAME_UUID))) - tagp += 16; - - if (tag->t_flags & cpu_to_be32(JFS_FLAG_LAST_TAG)) - break; - } - - return nr; -} - - -/* Make sure we wrap around the log correctly! */ -#define wrap(journal, var) \ -do { \ - if (var >= (journal)->j_last) \ - var -= ((journal)->j_last - (journal)->j_first); \ -} while (0) - -/** - * journal_recover - recovers a on-disk journal - * @journal: the journal to recover - * - * The primary function for recovering the log contents when mounting a - * journaled device. - * - * Recovery is done in three passes. In the first pass, we look for the - * end of the log. In the second, we assemble the list of revoke - * blocks. In the third and final pass, we replay any un-revoked blocks - * in the log. - */ -int journal_recover(journal_t *journal) -{ - int err, err2; - journal_superblock_t * sb; - - struct recovery_info info; - - memset(&info, 0, sizeof(info)); - sb = journal->j_superblock; - - /* - * The journal superblock's s_start field (the current log head) - * is always zero if, and only if, the journal was cleanly - * unmounted. - */ - - if (!sb->s_start) { - jbd_debug(1, "No recovery required, last transaction %d\n", - be32_to_cpu(sb->s_sequence)); - journal->j_transaction_sequence = be32_to_cpu(sb->s_sequence) + 1; - return 0; - } - - err = do_one_pass(journal, &info, PASS_SCAN); - if (!err) - err = do_one_pass(journal, &info, PASS_REVOKE); - if (!err) - err = do_one_pass(journal, &info, PASS_REPLAY); - - jbd_debug(1, "JBD: recovery, exit status %d, " - "recovered transactions %u to %u\n", - err, info.start_transaction, info.end_transaction); - jbd_debug(1, "JBD: Replayed %d and revoked %d/%d blocks\n", - info.nr_replays, info.nr_revoke_hits, info.nr_revokes); - - /* Restart the log at the next transaction ID, thus invalidating - * any existing commit records in the log. */ - journal->j_transaction_sequence = ++info.end_transaction; - - journal_clear_revoke(journal); - err2 = sync_blockdev(journal->j_fs_dev); - if (!err) - err = err2; - /* Flush disk caches to get replayed data on the permanent storage */ - if (journal->j_flags & JFS_BARRIER) { - err2 = blkdev_issue_flush(journal->j_fs_dev, GFP_KERNEL, NULL); - if (!err) - err = err2; - } - - return err; -} - -/** - * journal_skip_recovery - Start journal and wipe exiting records - * @journal: journal to startup - * - * Locate any valid recovery information from the journal and set up the - * journal structures in memory to ignore it (presumably because the - * caller has evidence that it is out of date). - * This function does'nt appear to be exorted.. - * - * We perform one pass over the journal to allow us to tell the user how - * much recovery information is being erased, and to let us initialise - * the journal transaction sequence numbers to the next unused ID. - */ -int journal_skip_recovery(journal_t *journal) -{ - int err; - struct recovery_info info; - - memset (&info, 0, sizeof(info)); - - err = do_one_pass(journal, &info, PASS_SCAN); - - if (err) { - printk(KERN_ERR "JBD: error %d scanning journal\n", err); - ++journal->j_transaction_sequence; - } else { -#ifdef CONFIG_JBD_DEBUG - int dropped = info.end_transaction - - be32_to_cpu(journal->j_superblock->s_sequence); - jbd_debug(1, - "JBD: ignoring %d transaction%s from the journal.\n", - dropped, (dropped == 1) ? "" : "s"); -#endif - journal->j_transaction_sequence = ++info.end_transaction; - } - - journal->j_tail = 0; - return err; -} - -static int do_one_pass(journal_t *journal, - struct recovery_info *info, enum passtype pass) -{ - unsigned int first_commit_ID, next_commit_ID; - unsigned int next_log_block; - int err, success = 0; - journal_superblock_t * sb; - journal_header_t * tmp; - struct buffer_head * bh; - unsigned int sequence; - int blocktype; - - /* - * First thing is to establish what we expect to find in the log - * (in terms of transaction IDs), and where (in terms of log - * block offsets): query the superblock. - */ - - sb = journal->j_superblock; - next_commit_ID = be32_to_cpu(sb->s_sequence); - next_log_block = be32_to_cpu(sb->s_start); - - first_commit_ID = next_commit_ID; - if (pass == PASS_SCAN) - info->start_transaction = first_commit_ID; - - jbd_debug(1, "Starting recovery pass %d\n", pass); - - /* - * Now we walk through the log, transaction by transaction, - * making sure that each transaction has a commit block in the - * expected place. Each complete transaction gets replayed back - * into the main filesystem. - */ - - while (1) { - int flags; - char * tagp; - journal_block_tag_t * tag; - struct buffer_head * obh; - struct buffer_head * nbh; - - cond_resched(); - - /* If we already know where to stop the log traversal, - * check right now that we haven't gone past the end of - * the log. */ - - if (pass != PASS_SCAN) - if (tid_geq(next_commit_ID, info->end_transaction)) - break; - - jbd_debug(2, "Scanning for sequence ID %u at %u/%u\n", - next_commit_ID, next_log_block, journal->j_last); - - /* Skip over each chunk of the transaction looking - * either the next descriptor block or the final commit - * record. */ - - jbd_debug(3, "JBD: checking block %u\n", next_log_block); - err = jread(&bh, journal, next_log_block); - if (err) - goto failed; - - next_log_block++; - wrap(journal, next_log_block); - - /* What kind of buffer is it? - * - * If it is a descriptor block, check that it has the - * expected sequence number. Otherwise, we're all done - * here. */ - - tmp = (journal_header_t *)bh->b_data; - - if (tmp->h_magic != cpu_to_be32(JFS_MAGIC_NUMBER)) { - brelse(bh); - break; - } - - blocktype = be32_to_cpu(tmp->h_blocktype); - sequence = be32_to_cpu(tmp->h_sequence); - jbd_debug(3, "Found magic %d, sequence %d\n", - blocktype, sequence); - - if (sequence != next_commit_ID) { - brelse(bh); - break; - } - - /* OK, we have a valid descriptor block which matches - * all of the sequence number checks. What are we going - * to do with it? That depends on the pass... */ - - switch(blocktype) { - case JFS_DESCRIPTOR_BLOCK: - /* If it is a valid descriptor block, replay it - * in pass REPLAY; otherwise, just skip over the - * blocks it describes. */ - if (pass != PASS_REPLAY) { - next_log_block += - count_tags(bh, journal->j_blocksize); - wrap(journal, next_log_block); - brelse(bh); - continue; - } - - /* A descriptor block: we can now write all of - * the data blocks. Yay, useful work is finally - * getting done here! */ - - tagp = &bh->b_data[sizeof(journal_header_t)]; - while ((tagp - bh->b_data +sizeof(journal_block_tag_t)) - <= journal->j_blocksize) { - unsigned int io_block; - - tag = (journal_block_tag_t *) tagp; - flags = be32_to_cpu(tag->t_flags); - - io_block = next_log_block++; - wrap(journal, next_log_block); - err = jread(&obh, journal, io_block); - if (err) { - /* Recover what we can, but - * report failure at the end. */ - success = err; - printk (KERN_ERR - "JBD: IO error %d recovering " - "block %u in log\n", - err, io_block); - } else { - unsigned int blocknr; - - J_ASSERT(obh != NULL); - blocknr = be32_to_cpu(tag->t_blocknr); - - /* If the block has been - * revoked, then we're all done - * here. */ - if (journal_test_revoke - (journal, blocknr, - next_commit_ID)) { - brelse(obh); - ++info->nr_revoke_hits; - goto skip_write; - } - - /* Find a buffer for the new - * data being restored */ - nbh = __getblk(journal->j_fs_dev, - blocknr, - journal->j_blocksize); - if (nbh == NULL) { - printk(KERN_ERR - "JBD: Out of memory " - "during recovery.\n"); - err = -ENOMEM; - brelse(bh); - brelse(obh); - goto failed; - } - - lock_buffer(nbh); - memcpy(nbh->b_data, obh->b_data, - journal->j_blocksize); - if (flags & JFS_FLAG_ESCAPE) { - *((__be32 *)nbh->b_data) = - cpu_to_be32(JFS_MAGIC_NUMBER); - } - - BUFFER_TRACE(nbh, "marking dirty"); - set_buffer_uptodate(nbh); - mark_buffer_dirty(nbh); - BUFFER_TRACE(nbh, "marking uptodate"); - ++info->nr_replays; - /* ll_rw_block(WRITE, 1, &nbh); */ - unlock_buffer(nbh); - brelse(obh); - brelse(nbh); - } - - skip_write: - tagp += sizeof(journal_block_tag_t); - if (!(flags & JFS_FLAG_SAME_UUID)) - tagp += 16; - - if (flags & JFS_FLAG_LAST_TAG) - break; - } - - brelse(bh); - continue; - - case JFS_COMMIT_BLOCK: - /* Found an expected commit block: not much to - * do other than move on to the next sequence - * number. */ - brelse(bh); - next_commit_ID++; - continue; - - case JFS_REVOKE_BLOCK: - /* If we aren't in the REVOKE pass, then we can - * just skip over this block. */ - if (pass != PASS_REVOKE) { - brelse(bh); - continue; - } - - err = scan_revoke_records(journal, bh, - next_commit_ID, info); - brelse(bh); - if (err) - goto failed; - continue; - - default: - jbd_debug(3, "Unrecognised magic %d, end of scan.\n", - blocktype); - brelse(bh); - goto done; - } - } - - done: - /* - * We broke out of the log scan loop: either we came to the - * known end of the log or we found an unexpected block in the - * log. If the latter happened, then we know that the "current" - * transaction marks the end of the valid log. - */ - - if (pass == PASS_SCAN) - info->end_transaction = next_commit_ID; - else { - /* It's really bad news if different passes end up at - * different places (but possible due to IO errors). */ - if (info->end_transaction != next_commit_ID) { - printk (KERN_ERR "JBD: recovery pass %d ended at " - "transaction %u, expected %u\n", - pass, next_commit_ID, info->end_transaction); - if (!success) - success = -EIO; - } - } - - return success; - - failed: - return err; -} - - -/* Scan a revoke record, marking all blocks mentioned as revoked. */ - -static int scan_revoke_records(journal_t *journal, struct buffer_head *bh, - tid_t sequence, struct recovery_info *info) -{ - journal_revoke_header_t *header; - int offset, max; - - header = (journal_revoke_header_t *) bh->b_data; - offset = sizeof(journal_revoke_header_t); - max = be32_to_cpu(header->r_count); - - while (offset < max) { - unsigned int blocknr; - int err; - - blocknr = be32_to_cpu(* ((__be32 *) (bh->b_data+offset))); - offset += 4; - err = journal_set_revoke(journal, blocknr, sequence); - if (err) - return err; - ++info->nr_revokes; - } - return 0; -} diff --git a/fs/jbd/revoke.c b/fs/jbd/revoke.c deleted file mode 100644 index dcead636c33b..000000000000 --- a/fs/jbd/revoke.c +++ /dev/null @@ -1,733 +0,0 @@ -/* - * linux/fs/jbd/revoke.c - * - * Written by Stephen C. Tweedie , 2000 - * - * Copyright 2000 Red Hat corp --- All Rights Reserved - * - * This file is part of the Linux kernel and is made available under - * the terms of the GNU General Public License, version 2, or at your - * option, any later version, incorporated herein by reference. - * - * Journal revoke routines for the generic filesystem journaling code; - * part of the ext2fs journaling system. - * - * Revoke is the mechanism used to prevent old log records for deleted - * metadata from being replayed on top of newer data using the same - * blocks. The revoke mechanism is used in two separate places: - * - * + Commit: during commit we write the entire list of the current - * transaction's revoked blocks to the journal - * - * + Recovery: during recovery we record the transaction ID of all - * revoked blocks. If there are multiple revoke records in the log - * for a single block, only the last one counts, and if there is a log - * entry for a block beyond the last revoke, then that log entry still - * gets replayed. - * - * We can get interactions between revokes and new log data within a - * single transaction: - * - * Block is revoked and then journaled: - * The desired end result is the journaling of the new block, so we - * cancel the revoke before the transaction commits. - * - * Block is journaled and then revoked: - * The revoke must take precedence over the write of the block, so we - * need either to cancel the journal entry or to write the revoke - * later in the log than the log block. In this case, we choose the - * latter: journaling a block cancels any revoke record for that block - * in the current transaction, so any revoke for that block in the - * transaction must have happened after the block was journaled and so - * the revoke must take precedence. - * - * Block is revoked and then written as data: - * The data write is allowed to succeed, but the revoke is _not_ - * cancelled. We still need to prevent old log records from - * overwriting the new data. We don't even need to clear the revoke - * bit here. - * - * We cache revoke status of a buffer in the current transaction in b_states - * bits. As the name says, revokevalid flag indicates that the cached revoke - * status of a buffer is valid and we can rely on the cached status. - * - * Revoke information on buffers is a tri-state value: - * - * RevokeValid clear: no cached revoke status, need to look it up - * RevokeValid set, Revoked clear: - * buffer has not been revoked, and cancel_revoke - * need do nothing. - * RevokeValid set, Revoked set: - * buffer has been revoked. - * - * Locking rules: - * We keep two hash tables of revoke records. One hashtable belongs to the - * running transaction (is pointed to by journal->j_revoke), the other one - * belongs to the committing transaction. Accesses to the second hash table - * happen only from the kjournald and no other thread touches this table. Also - * journal_switch_revoke_table() which switches which hashtable belongs to the - * running and which to the committing transaction is called only from - * kjournald. Therefore we need no locks when accessing the hashtable belonging - * to the committing transaction. - * - * All users operating on the hash table belonging to the running transaction - * have a handle to the transaction. Therefore they are safe from kjournald - * switching hash tables under them. For operations on the lists of entries in - * the hash table j_revoke_lock is used. - * - * Finally, also replay code uses the hash tables but at this moment no one else - * can touch them (filesystem isn't mounted yet) and hence no locking is - * needed. - */ - -#ifndef __KERNEL__ -#include "jfs_user.h" -#else -#include -#include -#include -#include -#include -#include -#include -#include -#endif -#include -#include - -static struct kmem_cache *revoke_record_cache; -static struct kmem_cache *revoke_table_cache; - -/* Each revoke record represents one single revoked block. During - journal replay, this involves recording the transaction ID of the - last transaction to revoke this block. */ - -struct jbd_revoke_record_s -{ - struct list_head hash; - tid_t sequence; /* Used for recovery only */ - unsigned int blocknr; -}; - - -/* The revoke table is just a simple hash table of revoke records. */ -struct jbd_revoke_table_s -{ - /* It is conceivable that we might want a larger hash table - * for recovery. Must be a power of two. */ - int hash_size; - int hash_shift; - struct list_head *hash_table; -}; - - -#ifdef __KERNEL__ -static void write_one_revoke_record(journal_t *, transaction_t *, - struct journal_head **, int *, - struct jbd_revoke_record_s *, int); -static void flush_descriptor(journal_t *, struct journal_head *, int, int); -#endif - -/* Utility functions to maintain the revoke table */ - -static inline int hash(journal_t *journal, unsigned int block) -{ - struct jbd_revoke_table_s *table = journal->j_revoke; - - return hash_32(block, table->hash_shift); -} - -static int insert_revoke_hash(journal_t *journal, unsigned int blocknr, - tid_t seq) -{ - struct list_head *hash_list; - struct jbd_revoke_record_s *record; - -repeat: - record = kmem_cache_alloc(revoke_record_cache, GFP_NOFS); - if (!record) - goto oom; - - record->sequence = seq; - record->blocknr = blocknr; - hash_list = &journal->j_revoke->hash_table[hash(journal, blocknr)]; - spin_lock(&journal->j_revoke_lock); - list_add(&record->hash, hash_list); - spin_unlock(&journal->j_revoke_lock); - return 0; - -oom: - if (!journal_oom_retry) - return -ENOMEM; - jbd_debug(1, "ENOMEM in %s, retrying\n", __func__); - yield(); - goto repeat; -} - -/* Find a revoke record in the journal's hash table. */ - -static struct jbd_revoke_record_s *find_revoke_record(journal_t *journal, - unsigned int blocknr) -{ - struct list_head *hash_list; - struct jbd_revoke_record_s *record; - - hash_list = &journal->j_revoke->hash_table[hash(journal, blocknr)]; - - spin_lock(&journal->j_revoke_lock); - record = (struct jbd_revoke_record_s *) hash_list->next; - while (&(record->hash) != hash_list) { - if (record->blocknr == blocknr) { - spin_unlock(&journal->j_revoke_lock); - return record; - } - record = (struct jbd_revoke_record_s *) record->hash.next; - } - spin_unlock(&journal->j_revoke_lock); - return NULL; -} - -void journal_destroy_revoke_caches(void) -{ - if (revoke_record_cache) { - kmem_cache_destroy(revoke_record_cache); - revoke_record_cache = NULL; - } - if (revoke_table_cache) { - kmem_cache_destroy(revoke_table_cache); - revoke_table_cache = NULL; - } -} - -int __init journal_init_revoke_caches(void) -{ - J_ASSERT(!revoke_record_cache); - J_ASSERT(!revoke_table_cache); - - revoke_record_cache = kmem_cache_create("revoke_record", - sizeof(struct jbd_revoke_record_s), - 0, - SLAB_HWCACHE_ALIGN|SLAB_TEMPORARY, - NULL); - if (!revoke_record_cache) - goto record_cache_failure; - - revoke_table_cache = kmem_cache_create("revoke_table", - sizeof(struct jbd_revoke_table_s), - 0, SLAB_TEMPORARY, NULL); - if (!revoke_table_cache) - goto table_cache_failure; - - return 0; - -table_cache_failure: - journal_destroy_revoke_caches(); -record_cache_failure: - return -ENOMEM; -} - -static struct jbd_revoke_table_s *journal_init_revoke_table(int hash_size) -{ - int i; - struct jbd_revoke_table_s *table; - - table = kmem_cache_alloc(revoke_table_cache, GFP_KERNEL); - if (!table) - goto out; - - table->hash_size = hash_size; - table->hash_shift = ilog2(hash_size); - table->hash_table = - kmalloc(hash_size * sizeof(struct list_head), GFP_KERNEL); - if (!table->hash_table) { - kmem_cache_free(revoke_table_cache, table); - table = NULL; - goto out; - } - - for (i = 0; i < hash_size; i++) - INIT_LIST_HEAD(&table->hash_table[i]); - -out: - return table; -} - -static void journal_destroy_revoke_table(struct jbd_revoke_table_s *table) -{ - int i; - struct list_head *hash_list; - - for (i = 0; i < table->hash_size; i++) { - hash_list = &table->hash_table[i]; - J_ASSERT(list_empty(hash_list)); - } - - kfree(table->hash_table); - kmem_cache_free(revoke_table_cache, table); -} - -/* Initialise the revoke table for a given journal to a given size. */ -int journal_init_revoke(journal_t *journal, int hash_size) -{ - J_ASSERT(journal->j_revoke_table[0] == NULL); - J_ASSERT(is_power_of_2(hash_size)); - - journal->j_revoke_table[0] = journal_init_revoke_table(hash_size); - if (!journal->j_revoke_table[0]) - goto fail0; - - journal->j_revoke_table[1] = journal_init_revoke_table(hash_size); - if (!journal->j_revoke_table[1]) - goto fail1; - - journal->j_revoke = journal->j_revoke_table[1]; - - spin_lock_init(&journal->j_revoke_lock); - - return 0; - -fail1: - journal_destroy_revoke_table(journal->j_revoke_table[0]); -fail0: - return -ENOMEM; -} - -/* Destroy a journal's revoke table. The table must already be empty! */ -void journal_destroy_revoke(journal_t *journal) -{ - journal->j_revoke = NULL; - if (journal->j_revoke_table[0]) - journal_destroy_revoke_table(journal->j_revoke_table[0]); - if (journal->j_revoke_table[1]) - journal_destroy_revoke_table(journal->j_revoke_table[1]); -} - - -#ifdef __KERNEL__ - -/* - * journal_revoke: revoke a given buffer_head from the journal. This - * prevents the block from being replayed during recovery if we take a - * crash after this current transaction commits. Any subsequent - * metadata writes of the buffer in this transaction cancel the - * revoke. - * - * Note that this call may block --- it is up to the caller to make - * sure that there are no further calls to journal_write_metadata - * before the revoke is complete. In ext3, this implies calling the - * revoke before clearing the block bitmap when we are deleting - * metadata. - * - * Revoke performs a journal_forget on any buffer_head passed in as a - * parameter, but does _not_ forget the buffer_head if the bh was only - * found implicitly. - * - * bh_in may not be a journalled buffer - it may have come off - * the hash tables without an attached journal_head. - * - * If bh_in is non-zero, journal_revoke() will decrement its b_count - * by one. - */ - -int journal_revoke(handle_t *handle, unsigned int blocknr, - struct buffer_head *bh_in) -{ - struct buffer_head *bh = NULL; - journal_t *journal; - struct block_device *bdev; - int err; - - might_sleep(); - if (bh_in) - BUFFER_TRACE(bh_in, "enter"); - - journal = handle->h_transaction->t_journal; - if (!journal_set_features(journal, 0, 0, JFS_FEATURE_INCOMPAT_REVOKE)){ - J_ASSERT (!"Cannot set revoke feature!"); - return -EINVAL; - } - - bdev = journal->j_fs_dev; - bh = bh_in; - - if (!bh) { - bh = __find_get_block(bdev, blocknr, journal->j_blocksize); - if (bh) - BUFFER_TRACE(bh, "found on hash"); - } -#ifdef JBD_EXPENSIVE_CHECKING - else { - struct buffer_head *bh2; - - /* If there is a different buffer_head lying around in - * memory anywhere... */ - bh2 = __find_get_block(bdev, blocknr, journal->j_blocksize); - if (bh2) { - /* ... and it has RevokeValid status... */ - if (bh2 != bh && buffer_revokevalid(bh2)) - /* ...then it better be revoked too, - * since it's illegal to create a revoke - * record against a buffer_head which is - * not marked revoked --- that would - * risk missing a subsequent revoke - * cancel. */ - J_ASSERT_BH(bh2, buffer_revoked(bh2)); - put_bh(bh2); - } - } -#endif - - /* We really ought not ever to revoke twice in a row without - first having the revoke cancelled: it's illegal to free a - block twice without allocating it in between! */ - if (bh) { - if (!J_EXPECT_BH(bh, !buffer_revoked(bh), - "inconsistent data on disk")) { - if (!bh_in) - brelse(bh); - return -EIO; - } - set_buffer_revoked(bh); - set_buffer_revokevalid(bh); - if (bh_in) { - BUFFER_TRACE(bh_in, "call journal_forget"); - journal_forget(handle, bh_in); - } else { - BUFFER_TRACE(bh, "call brelse"); - __brelse(bh); - } - } - - jbd_debug(2, "insert revoke for block %u, bh_in=%p\n", blocknr, bh_in); - err = insert_revoke_hash(journal, blocknr, - handle->h_transaction->t_tid); - BUFFER_TRACE(bh_in, "exit"); - return err; -} - -/* - * Cancel an outstanding revoke. For use only internally by the - * journaling code (called from journal_get_write_access). - * - * We trust buffer_revoked() on the buffer if the buffer is already - * being journaled: if there is no revoke pending on the buffer, then we - * don't do anything here. - * - * This would break if it were possible for a buffer to be revoked and - * discarded, and then reallocated within the same transaction. In such - * a case we would have lost the revoked bit, but when we arrived here - * the second time we would still have a pending revoke to cancel. So, - * do not trust the Revoked bit on buffers unless RevokeValid is also - * set. - */ -int journal_cancel_revoke(handle_t *handle, struct journal_head *jh) -{ - struct jbd_revoke_record_s *record; - journal_t *journal = handle->h_transaction->t_journal; - int need_cancel; - int did_revoke = 0; /* akpm: debug */ - struct buffer_head *bh = jh2bh(jh); - - jbd_debug(4, "journal_head %p, cancelling revoke\n", jh); - - /* Is the existing Revoke bit valid? If so, we trust it, and - * only perform the full cancel if the revoke bit is set. If - * not, we can't trust the revoke bit, and we need to do the - * full search for a revoke record. */ - if (test_set_buffer_revokevalid(bh)) { - need_cancel = test_clear_buffer_revoked(bh); - } else { - need_cancel = 1; - clear_buffer_revoked(bh); - } - - if (need_cancel) { - record = find_revoke_record(journal, bh->b_blocknr); - if (record) { - jbd_debug(4, "cancelled existing revoke on " - "blocknr %llu\n", (unsigned long long)bh->b_blocknr); - spin_lock(&journal->j_revoke_lock); - list_del(&record->hash); - spin_unlock(&journal->j_revoke_lock); - kmem_cache_free(revoke_record_cache, record); - did_revoke = 1; - } - } - -#ifdef JBD_EXPENSIVE_CHECKING - /* There better not be one left behind by now! */ - record = find_revoke_record(journal, bh->b_blocknr); - J_ASSERT_JH(jh, record == NULL); -#endif - - /* Finally, have we just cleared revoke on an unhashed - * buffer_head? If so, we'd better make sure we clear the - * revoked status on any hashed alias too, otherwise the revoke - * state machine will get very upset later on. */ - if (need_cancel) { - struct buffer_head *bh2; - bh2 = __find_get_block(bh->b_bdev, bh->b_blocknr, bh->b_size); - if (bh2) { - if (bh2 != bh) - clear_buffer_revoked(bh2); - __brelse(bh2); - } - } - return did_revoke; -} - -/* - * journal_clear_revoked_flags clears revoked flag of buffers in - * revoke table to reflect there is no revoked buffer in the next - * transaction which is going to be started. - */ -void journal_clear_buffer_revoked_flags(journal_t *journal) -{ - struct jbd_revoke_table_s *revoke = journal->j_revoke; - int i = 0; - - for (i = 0; i < revoke->hash_size; i++) { - struct list_head *hash_list; - struct list_head *list_entry; - hash_list = &revoke->hash_table[i]; - - list_for_each(list_entry, hash_list) { - struct jbd_revoke_record_s *record; - struct buffer_head *bh; - record = (struct jbd_revoke_record_s *)list_entry; - bh = __find_get_block(journal->j_fs_dev, - record->blocknr, - journal->j_blocksize); - if (bh) { - clear_buffer_revoked(bh); - __brelse(bh); - } - } - } -} - -/* journal_switch_revoke table select j_revoke for next transaction - * we do not want to suspend any processing until all revokes are - * written -bzzz - */ -void journal_switch_revoke_table(journal_t *journal) -{ - int i; - - if (journal->j_revoke == journal->j_revoke_table[0]) - journal->j_revoke = journal->j_revoke_table[1]; - else - journal->j_revoke = journal->j_revoke_table[0]; - - for (i = 0; i < journal->j_revoke->hash_size; i++) - INIT_LIST_HEAD(&journal->j_revoke->hash_table[i]); -} - -/* - * Write revoke records to the journal for all entries in the current - * revoke hash, deleting the entries as we go. - */ -void journal_write_revoke_records(journal_t *journal, - transaction_t *transaction, int write_op) -{ - struct journal_head *descriptor; - struct jbd_revoke_record_s *record; - struct jbd_revoke_table_s *revoke; - struct list_head *hash_list; - int i, offset, count; - - descriptor = NULL; - offset = 0; - count = 0; - - /* select revoke table for committing transaction */ - revoke = journal->j_revoke == journal->j_revoke_table[0] ? - journal->j_revoke_table[1] : journal->j_revoke_table[0]; - - for (i = 0; i < revoke->hash_size; i++) { - hash_list = &revoke->hash_table[i]; - - while (!list_empty(hash_list)) { - record = (struct jbd_revoke_record_s *) - hash_list->next; - write_one_revoke_record(journal, transaction, - &descriptor, &offset, - record, write_op); - count++; - list_del(&record->hash); - kmem_cache_free(revoke_record_cache, record); - } - } - if (descriptor) - flush_descriptor(journal, descriptor, offset, write_op); - jbd_debug(1, "Wrote %d revoke records\n", count); -} - -/* - * Write out one revoke record. We need to create a new descriptor - * block if the old one is full or if we have not already created one. - */ - -static void write_one_revoke_record(journal_t *journal, - transaction_t *transaction, - struct journal_head **descriptorp, - int *offsetp, - struct jbd_revoke_record_s *record, - int write_op) -{ - struct journal_head *descriptor; - int offset; - journal_header_t *header; - - /* If we are already aborting, this all becomes a noop. We - still need to go round the loop in - journal_write_revoke_records in order to free all of the - revoke records: only the IO to the journal is omitted. */ - if (is_journal_aborted(journal)) - return; - - descriptor = *descriptorp; - offset = *offsetp; - - /* Make sure we have a descriptor with space left for the record */ - if (descriptor) { - if (offset == journal->j_blocksize) { - flush_descriptor(journal, descriptor, offset, write_op); - descriptor = NULL; - } - } - - if (!descriptor) { - descriptor = journal_get_descriptor_buffer(journal); - if (!descriptor) - return; - header = (journal_header_t *) &jh2bh(descriptor)->b_data[0]; - header->h_magic = cpu_to_be32(JFS_MAGIC_NUMBER); - header->h_blocktype = cpu_to_be32(JFS_REVOKE_BLOCK); - header->h_sequence = cpu_to_be32(transaction->t_tid); - - /* Record it so that we can wait for IO completion later */ - JBUFFER_TRACE(descriptor, "file as BJ_LogCtl"); - journal_file_buffer(descriptor, transaction, BJ_LogCtl); - - offset = sizeof(journal_revoke_header_t); - *descriptorp = descriptor; - } - - * ((__be32 *)(&jh2bh(descriptor)->b_data[offset])) = - cpu_to_be32(record->blocknr); - offset += 4; - *offsetp = offset; -} - -/* - * Flush a revoke descriptor out to the journal. If we are aborting, - * this is a noop; otherwise we are generating a buffer which needs to - * be waited for during commit, so it has to go onto the appropriate - * journal buffer list. - */ - -static void flush_descriptor(journal_t *journal, - struct journal_head *descriptor, - int offset, int write_op) -{ - journal_revoke_header_t *header; - struct buffer_head *bh = jh2bh(descriptor); - - if (is_journal_aborted(journal)) { - put_bh(bh); - return; - } - - header = (journal_revoke_header_t *) jh2bh(descriptor)->b_data; - header->r_count = cpu_to_be32(offset); - set_buffer_jwrite(bh); - BUFFER_TRACE(bh, "write"); - set_buffer_dirty(bh); - write_dirty_buffer(bh, write_op); -} -#endif - -/* - * Revoke support for recovery. - * - * Recovery needs to be able to: - * - * record all revoke records, including the tid of the latest instance - * of each revoke in the journal - * - * check whether a given block in a given transaction should be replayed - * (ie. has not been revoked by a revoke record in that or a subsequent - * transaction) - * - * empty the revoke table after recovery. - */ - -/* - * First, setting revoke records. We create a new revoke record for - * every block ever revoked in the log as we scan it for recovery, and - * we update the existing records if we find multiple revokes for a - * single block. - */ - -int journal_set_revoke(journal_t *journal, - unsigned int blocknr, - tid_t sequence) -{ - struct jbd_revoke_record_s *record; - - record = find_revoke_record(journal, blocknr); - if (record) { - /* If we have multiple occurrences, only record the - * latest sequence number in the hashed record */ - if (tid_gt(sequence, record->sequence)) - record->sequence = sequence; - return 0; - } - return insert_revoke_hash(journal, blocknr, sequence); -} - -/* - * Test revoke records. For a given block referenced in the log, has - * that block been revoked? A revoke record with a given transaction - * sequence number revokes all blocks in that transaction and earlier - * ones, but later transactions still need replayed. - */ - -int journal_test_revoke(journal_t *journal, - unsigned int blocknr, - tid_t sequence) -{ - struct jbd_revoke_record_s *record; - - record = find_revoke_record(journal, blocknr); - if (!record) - return 0; - if (tid_gt(sequence, record->sequence)) - return 0; - return 1; -} - -/* - * Finally, once recovery is over, we need to clear the revoke table so - * that it can be reused by the running filesystem. - */ - -void journal_clear_revoke(journal_t *journal) -{ - int i; - struct list_head *hash_list; - struct jbd_revoke_record_s *record; - struct jbd_revoke_table_s *revoke; - - revoke = journal->j_revoke; - - for (i = 0; i < revoke->hash_size; i++) { - hash_list = &revoke->hash_table[i]; - while (!list_empty(hash_list)) { - record = (struct jbd_revoke_record_s*) hash_list->next; - list_del(&record->hash); - kmem_cache_free(revoke_record_cache, record); - } - } -} diff --git a/fs/jbd/transaction.c b/fs/jbd/transaction.c deleted file mode 100644 index 1695ba8334a2..000000000000 --- a/fs/jbd/transaction.c +++ /dev/null @@ -1,2237 +0,0 @@ -/* - * linux/fs/jbd/transaction.c - * - * Written by Stephen C. Tweedie , 1998 - * - * Copyright 1998 Red Hat corp --- All Rights Reserved - * - * This file is part of the Linux kernel and is made available under - * the terms of the GNU General Public License, version 2, or at your - * option, any later version, incorporated herein by reference. - * - * Generic filesystem transaction handling code; part of the ext2fs - * journaling system. - * - * This file manages transactions (compound commits managed by the - * journaling code) and handles (individual atomic operations by the - * filesystem). - */ - -#include -#include -#include -#include -#include -#include -#include -#include -#include - -static void __journal_temp_unlink_buffer(struct journal_head *jh); - -/* - * get_transaction: obtain a new transaction_t object. - * - * Simply allocate and initialise a new transaction. Create it in - * RUNNING state and add it to the current journal (which should not - * have an existing running transaction: we only make a new transaction - * once we have started to commit the old one). - * - * Preconditions: - * The journal MUST be locked. We don't perform atomic mallocs on the - * new transaction and we can't block without protecting against other - * processes trying to touch the journal while it is in transition. - * - * Called under j_state_lock - */ - -static transaction_t * -get_transaction(journal_t *journal, transaction_t *transaction) -{ - transaction->t_journal = journal; - transaction->t_state = T_RUNNING; - transaction->t_start_time = ktime_get(); - transaction->t_tid = journal->j_transaction_sequence++; - transaction->t_expires = jiffies + journal->j_commit_interval; - spin_lock_init(&transaction->t_handle_lock); - - /* Set up the commit timer for the new transaction. */ - journal->j_commit_timer.expires = - round_jiffies_up(transaction->t_expires); - add_timer(&journal->j_commit_timer); - - J_ASSERT(journal->j_running_transaction == NULL); - journal->j_running_transaction = transaction; - - return transaction; -} - -/* - * Handle management. - * - * A handle_t is an object which represents a single atomic update to a - * filesystem, and which tracks all of the modifications which form part - * of that one update. - */ - -/* - * start_this_handle: Given a handle, deal with any locking or stalling - * needed to make sure that there is enough journal space for the handle - * to begin. Attach the handle to a transaction and set up the - * transaction's buffer credits. - */ - -static int start_this_handle(journal_t *journal, handle_t *handle) -{ - transaction_t *transaction; - int needed; - int nblocks = handle->h_buffer_credits; - transaction_t *new_transaction = NULL; - int ret = 0; - - if (nblocks > journal->j_max_transaction_buffers) { - printk(KERN_ERR "JBD: %s wants too many credits (%d > %d)\n", - current->comm, nblocks, - journal->j_max_transaction_buffers); - ret = -ENOSPC; - goto out; - } - -alloc_transaction: - if (!journal->j_running_transaction) { - new_transaction = kzalloc(sizeof(*new_transaction), - GFP_NOFS|__GFP_NOFAIL); - if (!new_transaction) { - ret = -ENOMEM; - goto out; - } - } - - jbd_debug(3, "New handle %p going live.\n", handle); - -repeat: - - /* - * We need to hold j_state_lock until t_updates has been incremented, - * for proper journal barrier handling - */ - spin_lock(&journal->j_state_lock); -repeat_locked: - if (is_journal_aborted(journal) || - (journal->j_errno != 0 && !(journal->j_flags & JFS_ACK_ERR))) { - spin_unlock(&journal->j_state_lock); - ret = -EROFS; - goto out; - } - - /* Wait on the journal's transaction barrier if necessary */ - if (journal->j_barrier_count) { - spin_unlock(&journal->j_state_lock); - wait_event(journal->j_wait_transaction_locked, - journal->j_barrier_count == 0); - goto repeat; - } - - if (!journal->j_running_transaction) { - if (!new_transaction) { - spin_unlock(&journal->j_state_lock); - goto alloc_transaction; - } - get_transaction(journal, new_transaction); - new_transaction = NULL; - } - - transaction = journal->j_running_transaction; - - /* - * If the current transaction is locked down for commit, wait for the - * lock to be released. - */ - if (transaction->t_state == T_LOCKED) { - DEFINE_WAIT(wait); - - prepare_to_wait(&journal->j_wait_transaction_locked, - &wait, TASK_UNINTERRUPTIBLE); - spin_unlock(&journal->j_state_lock); - schedule(); - finish_wait(&journal->j_wait_transaction_locked, &wait); - goto repeat; - } - - /* - * If there is not enough space left in the log to write all potential - * buffers requested by this operation, we need to stall pending a log - * checkpoint to free some more log space. - */ - spin_lock(&transaction->t_handle_lock); - needed = transaction->t_outstanding_credits + nblocks; - - if (needed > journal->j_max_transaction_buffers) { - /* - * If the current transaction is already too large, then start - * to commit it: we can then go back and attach this handle to - * a new transaction. - */ - DEFINE_WAIT(wait); - - jbd_debug(2, "Handle %p starting new commit...\n", handle); - spin_unlock(&transaction->t_handle_lock); - prepare_to_wait(&journal->j_wait_transaction_locked, &wait, - TASK_UNINTERRUPTIBLE); - __log_start_commit(journal, transaction->t_tid); - spin_unlock(&journal->j_state_lock); - schedule(); - finish_wait(&journal->j_wait_transaction_locked, &wait); - goto repeat; - } - - /* - * The commit code assumes that it can get enough log space - * without forcing a checkpoint. This is *critical* for - * correctness: a checkpoint of a buffer which is also - * associated with a committing transaction creates a deadlock, - * so commit simply cannot force through checkpoints. - * - * We must therefore ensure the necessary space in the journal - * *before* starting to dirty potentially checkpointed buffers - * in the new transaction. - * - * The worst part is, any transaction currently committing can - * reduce the free space arbitrarily. Be careful to account for - * those buffers when checkpointing. - */ - - /* - * @@@ AKPM: This seems rather over-defensive. We're giving commit - * a _lot_ of headroom: 1/4 of the journal plus the size of - * the committing transaction. Really, we only need to give it - * committing_transaction->t_outstanding_credits plus "enough" for - * the log control blocks. - * Also, this test is inconsistent with the matching one in - * journal_extend(). - */ - if (__log_space_left(journal) < jbd_space_needed(journal)) { - jbd_debug(2, "Handle %p waiting for checkpoint...\n", handle); - spin_unlock(&transaction->t_handle_lock); - __log_wait_for_space(journal); - goto repeat_locked; - } - - /* OK, account for the buffers that this operation expects to - * use and add the handle to the running transaction. */ - - handle->h_transaction = transaction; - transaction->t_outstanding_credits += nblocks; - transaction->t_updates++; - transaction->t_handle_count++; - jbd_debug(4, "Handle %p given %d credits (total %d, free %d)\n", - handle, nblocks, transaction->t_outstanding_credits, - __log_space_left(journal)); - spin_unlock(&transaction->t_handle_lock); - spin_unlock(&journal->j_state_lock); - - lock_map_acquire(&handle->h_lockdep_map); -out: - if (unlikely(new_transaction)) /* It's usually NULL */ - kfree(new_transaction); - return ret; -} - -static struct lock_class_key jbd_handle_key; - -/* Allocate a new handle. This should probably be in a slab... */ -static handle_t *new_handle(int nblocks) -{ - handle_t *handle = jbd_alloc_handle(GFP_NOFS); - if (!handle) - return NULL; - handle->h_buffer_credits = nblocks; - handle->h_ref = 1; - - lockdep_init_map(&handle->h_lockdep_map, "jbd_handle", &jbd_handle_key, 0); - - return handle; -} - -/** - * handle_t *journal_start() - Obtain a new handle. - * @journal: Journal to start transaction on. - * @nblocks: number of block buffer we might modify - * - * We make sure that the transaction can guarantee at least nblocks of - * modified buffers in the log. We block until the log can guarantee - * that much space. - * - * This function is visible to journal users (like ext3fs), so is not - * called with the journal already locked. - * - * Return a pointer to a newly allocated handle, or an ERR_PTR() value - * on failure. - */ -handle_t *journal_start(journal_t *journal, int nblocks) -{ - handle_t *handle = journal_current_handle(); - int err; - - if (!journal) - return ERR_PTR(-EROFS); - - if (handle) { - J_ASSERT(handle->h_transaction->t_journal == journal); - handle->h_ref++; - return handle; - } - - handle = new_handle(nblocks); - if (!handle) - return ERR_PTR(-ENOMEM); - - current->journal_info = handle; - - err = start_this_handle(journal, handle); - if (err < 0) { - jbd_free_handle(handle); - current->journal_info = NULL; - handle = ERR_PTR(err); - } - return handle; -} - -/** - * int journal_extend() - extend buffer credits. - * @handle: handle to 'extend' - * @nblocks: nr blocks to try to extend by. - * - * Some transactions, such as large extends and truncates, can be done - * atomically all at once or in several stages. The operation requests - * a credit for a number of buffer modications in advance, but can - * extend its credit if it needs more. - * - * journal_extend tries to give the running handle more buffer credits. - * It does not guarantee that allocation - this is a best-effort only. - * The calling process MUST be able to deal cleanly with a failure to - * extend here. - * - * Return 0 on success, non-zero on failure. - * - * return code < 0 implies an error - * return code > 0 implies normal transaction-full status. - */ -int journal_extend(handle_t *handle, int nblocks) -{ - transaction_t *transaction = handle->h_transaction; - journal_t *journal = transaction->t_journal; - int result; - int wanted; - - result = -EIO; - if (is_handle_aborted(handle)) - goto out; - - result = 1; - - spin_lock(&journal->j_state_lock); - - /* Don't extend a locked-down transaction! */ - if (handle->h_transaction->t_state != T_RUNNING) { - jbd_debug(3, "denied handle %p %d blocks: " - "transaction not running\n", handle, nblocks); - goto error_out; - } - - spin_lock(&transaction->t_handle_lock); - wanted = transaction->t_outstanding_credits + nblocks; - - if (wanted > journal->j_max_transaction_buffers) { - jbd_debug(3, "denied handle %p %d blocks: " - "transaction too large\n", handle, nblocks); - goto unlock; - } - - if (wanted > __log_space_left(journal)) { - jbd_debug(3, "denied handle %p %d blocks: " - "insufficient log space\n", handle, nblocks); - goto unlock; - } - - handle->h_buffer_credits += nblocks; - transaction->t_outstanding_credits += nblocks; - result = 0; - - jbd_debug(3, "extended handle %p by %d\n", handle, nblocks); -unlock: - spin_unlock(&transaction->t_handle_lock); -error_out: - spin_unlock(&journal->j_state_lock); -out: - return result; -} - - -/** - * int journal_restart() - restart a handle. - * @handle: handle to restart - * @nblocks: nr credits requested - * - * Restart a handle for a multi-transaction filesystem - * operation. - * - * If the journal_extend() call above fails to grant new buffer credits - * to a running handle, a call to journal_restart will commit the - * handle's transaction so far and reattach the handle to a new - * transaction capabable of guaranteeing the requested number of - * credits. - */ - -int journal_restart(handle_t *handle, int nblocks) -{ - transaction_t *transaction = handle->h_transaction; - journal_t *journal = transaction->t_journal; - int ret; - - /* If we've had an abort of any type, don't even think about - * actually doing the restart! */ - if (is_handle_aborted(handle)) - return 0; - - /* - * First unlink the handle from its current transaction, and start the - * commit on that. - */ - J_ASSERT(transaction->t_updates > 0); - J_ASSERT(journal_current_handle() == handle); - - spin_lock(&journal->j_state_lock); - spin_lock(&transaction->t_handle_lock); - transaction->t_outstanding_credits -= handle->h_buffer_credits; - transaction->t_updates--; - - if (!transaction->t_updates) - wake_up(&journal->j_wait_updates); - spin_unlock(&transaction->t_handle_lock); - - jbd_debug(2, "restarting handle %p\n", handle); - __log_start_commit(journal, transaction->t_tid); - spin_unlock(&journal->j_state_lock); - - lock_map_release(&handle->h_lockdep_map); - handle->h_buffer_credits = nblocks; - ret = start_this_handle(journal, handle); - return ret; -} - - -/** - * void journal_lock_updates () - establish a transaction barrier. - * @journal: Journal to establish a barrier on. - * - * This locks out any further updates from being started, and blocks until all - * existing updates have completed, returning only once the journal is in a - * quiescent state with no updates running. - * - * We do not use simple mutex for synchronization as there are syscalls which - * want to return with filesystem locked and that trips up lockdep. Also - * hibernate needs to lock filesystem but locked mutex then blocks hibernation. - * Since locking filesystem is rare operation, we use simple counter and - * waitqueue for locking. - */ -void journal_lock_updates(journal_t *journal) -{ - DEFINE_WAIT(wait); - -wait: - /* Wait for previous locked operation to finish */ - wait_event(journal->j_wait_transaction_locked, - journal->j_barrier_count == 0); - - spin_lock(&journal->j_state_lock); - /* - * Check reliably under the lock whether we are the ones winning the race - * and locking the journal - */ - if (journal->j_barrier_count > 0) { - spin_unlock(&journal->j_state_lock); - goto wait; - } - ++journal->j_barrier_count; - - /* Wait until there are no running updates */ - while (1) { - transaction_t *transaction = journal->j_running_transaction; - - if (!transaction) - break; - - spin_lock(&transaction->t_handle_lock); - if (!transaction->t_updates) { - spin_unlock(&transaction->t_handle_lock); - break; - } - prepare_to_wait(&journal->j_wait_updates, &wait, - TASK_UNINTERRUPTIBLE); - spin_unlock(&transaction->t_handle_lock); - spin_unlock(&journal->j_state_lock); - schedule(); - finish_wait(&journal->j_wait_updates, &wait); - spin_lock(&journal->j_state_lock); - } - spin_unlock(&journal->j_state_lock); -} - -/** - * void journal_unlock_updates (journal_t* journal) - release barrier - * @journal: Journal to release the barrier on. - * - * Release a transaction barrier obtained with journal_lock_updates(). - */ -void journal_unlock_updates (journal_t *journal) -{ - J_ASSERT(journal->j_barrier_count != 0); - - spin_lock(&journal->j_state_lock); - --journal->j_barrier_count; - spin_unlock(&journal->j_state_lock); - wake_up(&journal->j_wait_transaction_locked); -} - -static void warn_dirty_buffer(struct buffer_head *bh) -{ - char b[BDEVNAME_SIZE]; - - printk(KERN_WARNING - "JBD: Spotted dirty metadata buffer (dev = %s, blocknr = %llu). " - "There's a risk of filesystem corruption in case of system " - "crash.\n", - bdevname(bh->b_bdev, b), (unsigned long long)bh->b_blocknr); -} - -/* - * If the buffer is already part of the current transaction, then there - * is nothing we need to do. If it is already part of a prior - * transaction which we are still committing to disk, then we need to - * make sure that we do not overwrite the old copy: we do copy-out to - * preserve the copy going to disk. We also account the buffer against - * the handle's metadata buffer credits (unless the buffer is already - * part of the transaction, that is). - * - */ -static int -do_get_write_access(handle_t *handle, struct journal_head *jh, - int force_copy) -{ - struct buffer_head *bh; - transaction_t *transaction; - journal_t *journal; - int error; - char *frozen_buffer = NULL; - int need_copy = 0; - - if (is_handle_aborted(handle)) - return -EROFS; - - transaction = handle->h_transaction; - journal = transaction->t_journal; - - jbd_debug(5, "journal_head %p, force_copy %d\n", jh, force_copy); - - JBUFFER_TRACE(jh, "entry"); -repeat: - bh = jh2bh(jh); - - /* @@@ Need to check for errors here at some point. */ - - lock_buffer(bh); - jbd_lock_bh_state(bh); - - /* We now hold the buffer lock so it is safe to query the buffer - * state. Is the buffer dirty? - * - * If so, there are two possibilities. The buffer may be - * non-journaled, and undergoing a quite legitimate writeback. - * Otherwise, it is journaled, and we don't expect dirty buffers - * in that state (the buffers should be marked JBD_Dirty - * instead.) So either the IO is being done under our own - * control and this is a bug, or it's a third party IO such as - * dump(8) (which may leave the buffer scheduled for read --- - * ie. locked but not dirty) or tune2fs (which may actually have - * the buffer dirtied, ugh.) */ - - if (buffer_dirty(bh)) { - /* - * First question: is this buffer already part of the current - * transaction or the existing committing transaction? - */ - if (jh->b_transaction) { - J_ASSERT_JH(jh, - jh->b_transaction == transaction || - jh->b_transaction == - journal->j_committing_transaction); - if (jh->b_next_transaction) - J_ASSERT_JH(jh, jh->b_next_transaction == - transaction); - warn_dirty_buffer(bh); - } - /* - * In any case we need to clean the dirty flag and we must - * do it under the buffer lock to be sure we don't race - * with running write-out. - */ - JBUFFER_TRACE(jh, "Journalling dirty buffer"); - clear_buffer_dirty(bh); - set_buffer_jbddirty(bh); - } - - unlock_buffer(bh); - - error = -EROFS; - if (is_handle_aborted(handle)) { - jbd_unlock_bh_state(bh); - goto out; - } - error = 0; - - /* - * The buffer is already part of this transaction if b_transaction or - * b_next_transaction points to it - */ - if (jh->b_transaction == transaction || - jh->b_next_transaction == transaction) - goto done; - - /* - * this is the first time this transaction is touching this buffer, - * reset the modified flag - */ - jh->b_modified = 0; - - /* - * If there is already a copy-out version of this buffer, then we don't - * need to make another one - */ - if (jh->b_frozen_data) { - JBUFFER_TRACE(jh, "has frozen data"); - J_ASSERT_JH(jh, jh->b_next_transaction == NULL); - jh->b_next_transaction = transaction; - goto done; - } - - /* Is there data here we need to preserve? */ - - if (jh->b_transaction && jh->b_transaction != transaction) { - JBUFFER_TRACE(jh, "owned by older transaction"); - J_ASSERT_JH(jh, jh->b_next_transaction == NULL); - J_ASSERT_JH(jh, jh->b_transaction == - journal->j_committing_transaction); - - /* There is one case we have to be very careful about. - * If the committing transaction is currently writing - * this buffer out to disk and has NOT made a copy-out, - * then we cannot modify the buffer contents at all - * right now. The essence of copy-out is that it is the - * extra copy, not the primary copy, which gets - * journaled. If the primary copy is already going to - * disk then we cannot do copy-out here. */ - - if (jh->b_jlist == BJ_Shadow) { - DEFINE_WAIT_BIT(wait, &bh->b_state, BH_Unshadow); - wait_queue_head_t *wqh; - - wqh = bit_waitqueue(&bh->b_state, BH_Unshadow); - - JBUFFER_TRACE(jh, "on shadow: sleep"); - jbd_unlock_bh_state(bh); - /* commit wakes up all shadow buffers after IO */ - for ( ; ; ) { - prepare_to_wait(wqh, &wait.wait, - TASK_UNINTERRUPTIBLE); - if (jh->b_jlist != BJ_Shadow) - break; - schedule(); - } - finish_wait(wqh, &wait.wait); - goto repeat; - } - - /* Only do the copy if the currently-owning transaction - * still needs it. If it is on the Forget list, the - * committing transaction is past that stage. The - * buffer had better remain locked during the kmalloc, - * but that should be true --- we hold the journal lock - * still and the buffer is already on the BUF_JOURNAL - * list so won't be flushed. - * - * Subtle point, though: if this is a get_undo_access, - * then we will be relying on the frozen_data to contain - * the new value of the committed_data record after the - * transaction, so we HAVE to force the frozen_data copy - * in that case. */ - - if (jh->b_jlist != BJ_Forget || force_copy) { - JBUFFER_TRACE(jh, "generate frozen data"); - if (!frozen_buffer) { - JBUFFER_TRACE(jh, "allocate memory for buffer"); - jbd_unlock_bh_state(bh); - frozen_buffer = - jbd_alloc(jh2bh(jh)->b_size, - GFP_NOFS); - if (!frozen_buffer) { - printk(KERN_ERR - "%s: OOM for frozen_buffer\n", - __func__); - JBUFFER_TRACE(jh, "oom!"); - error = -ENOMEM; - jbd_lock_bh_state(bh); - goto done; - } - goto repeat; - } - jh->b_frozen_data = frozen_buffer; - frozen_buffer = NULL; - need_copy = 1; - } - jh->b_next_transaction = transaction; - } - - - /* - * Finally, if the buffer is not journaled right now, we need to make - * sure it doesn't get written to disk before the caller actually - * commits the new data - */ - if (!jh->b_transaction) { - JBUFFER_TRACE(jh, "no transaction"); - J_ASSERT_JH(jh, !jh->b_next_transaction); - JBUFFER_TRACE(jh, "file as BJ_Reserved"); - spin_lock(&journal->j_list_lock); - __journal_file_buffer(jh, transaction, BJ_Reserved); - spin_unlock(&journal->j_list_lock); - } - -done: - if (need_copy) { - struct page *page; - int offset; - char *source; - - J_EXPECT_JH(jh, buffer_uptodate(jh2bh(jh)), - "Possible IO failure.\n"); - page = jh2bh(jh)->b_page; - offset = offset_in_page(jh2bh(jh)->b_data); - source = kmap_atomic(page); - memcpy(jh->b_frozen_data, source+offset, jh2bh(jh)->b_size); - kunmap_atomic(source); - } - jbd_unlock_bh_state(bh); - - /* - * If we are about to journal a buffer, then any revoke pending on it is - * no longer valid - */ - journal_cancel_revoke(handle, jh); - -out: - if (unlikely(frozen_buffer)) /* It's usually NULL */ - jbd_free(frozen_buffer, bh->b_size); - - JBUFFER_TRACE(jh, "exit"); - return error; -} - -/** - * int journal_get_write_access() - notify intent to modify a buffer for metadata (not data) update. - * @handle: transaction to add buffer modifications to - * @bh: bh to be used for metadata writes - * - * Returns an error code or 0 on success. - * - * In full data journalling mode the buffer may be of type BJ_AsyncData, - * because we're write()ing a buffer which is also part of a shared mapping. - */ - -int journal_get_write_access(handle_t *handle, struct buffer_head *bh) -{ - struct journal_head *jh = journal_add_journal_head(bh); - int rc; - - /* We do not want to get caught playing with fields which the - * log thread also manipulates. Make sure that the buffer - * completes any outstanding IO before proceeding. */ - rc = do_get_write_access(handle, jh, 0); - journal_put_journal_head(jh); - return rc; -} - - -/* - * When the user wants to journal a newly created buffer_head - * (ie. getblk() returned a new buffer and we are going to populate it - * manually rather than reading off disk), then we need to keep the - * buffer_head locked until it has been completely filled with new - * data. In this case, we should be able to make the assertion that - * the bh is not already part of an existing transaction. - * - * The buffer should already be locked by the caller by this point. - * There is no lock ranking violation: it was a newly created, - * unlocked buffer beforehand. */ - -/** - * int journal_get_create_access () - notify intent to use newly created bh - * @handle: transaction to new buffer to - * @bh: new buffer. - * - * Call this if you create a new bh. - */ -int journal_get_create_access(handle_t *handle, struct buffer_head *bh) -{ - transaction_t *transaction = handle->h_transaction; - journal_t *journal = transaction->t_journal; - struct journal_head *jh = journal_add_journal_head(bh); - int err; - - jbd_debug(5, "journal_head %p\n", jh); - err = -EROFS; - if (is_handle_aborted(handle)) - goto out; - err = 0; - - JBUFFER_TRACE(jh, "entry"); - /* - * The buffer may already belong to this transaction due to pre-zeroing - * in the filesystem's new_block code. It may also be on the previous, - * committing transaction's lists, but it HAS to be in Forget state in - * that case: the transaction must have deleted the buffer for it to be - * reused here. - */ - jbd_lock_bh_state(bh); - spin_lock(&journal->j_list_lock); - J_ASSERT_JH(jh, (jh->b_transaction == transaction || - jh->b_transaction == NULL || - (jh->b_transaction == journal->j_committing_transaction && - jh->b_jlist == BJ_Forget))); - - J_ASSERT_JH(jh, jh->b_next_transaction == NULL); - J_ASSERT_JH(jh, buffer_locked(jh2bh(jh))); - - if (jh->b_transaction == NULL) { - /* - * Previous journal_forget() could have left the buffer - * with jbddirty bit set because it was being committed. When - * the commit finished, we've filed the buffer for - * checkpointing and marked it dirty. Now we are reallocating - * the buffer so the transaction freeing it must have - * committed and so it's safe to clear the dirty bit. - */ - clear_buffer_dirty(jh2bh(jh)); - - /* first access by this transaction */ - jh->b_modified = 0; - - JBUFFER_TRACE(jh, "file as BJ_Reserved"); - __journal_file_buffer(jh, transaction, BJ_Reserved); - } else if (jh->b_transaction == journal->j_committing_transaction) { - /* first access by this transaction */ - jh->b_modified = 0; - - JBUFFER_TRACE(jh, "set next transaction"); - jh->b_next_transaction = transaction; - } - spin_unlock(&journal->j_list_lock); - jbd_unlock_bh_state(bh); - - /* - * akpm: I added this. ext3_alloc_branch can pick up new indirect - * blocks which contain freed but then revoked metadata. We need - * to cancel the revoke in case we end up freeing it yet again - * and the reallocating as data - this would cause a second revoke, - * which hits an assertion error. - */ - JBUFFER_TRACE(jh, "cancelling revoke"); - journal_cancel_revoke(handle, jh); -out: - journal_put_journal_head(jh); - return err; -} - -/** - * int journal_get_undo_access() - Notify intent to modify metadata with non-rewindable consequences - * @handle: transaction - * @bh: buffer to undo - * - * Sometimes there is a need to distinguish between metadata which has - * been committed to disk and that which has not. The ext3fs code uses - * this for freeing and allocating space, we have to make sure that we - * do not reuse freed space until the deallocation has been committed, - * since if we overwrote that space we would make the delete - * un-rewindable in case of a crash. - * - * To deal with that, journal_get_undo_access requests write access to a - * buffer for parts of non-rewindable operations such as delete - * operations on the bitmaps. The journaling code must keep a copy of - * the buffer's contents prior to the undo_access call until such time - * as we know that the buffer has definitely been committed to disk. - * - * We never need to know which transaction the committed data is part - * of, buffers touched here are guaranteed to be dirtied later and so - * will be committed to a new transaction in due course, at which point - * we can discard the old committed data pointer. - * - * Returns error number or 0 on success. - */ -int journal_get_undo_access(handle_t *handle, struct buffer_head *bh) -{ - int err; - struct journal_head *jh = journal_add_journal_head(bh); - char *committed_data = NULL; - - JBUFFER_TRACE(jh, "entry"); - - /* - * Do this first --- it can drop the journal lock, so we want to - * make sure that obtaining the committed_data is done - * atomically wrt. completion of any outstanding commits. - */ - err = do_get_write_access(handle, jh, 1); - if (err) - goto out; - -repeat: - if (!jh->b_committed_data) { - committed_data = jbd_alloc(jh2bh(jh)->b_size, GFP_NOFS); - if (!committed_data) { - printk(KERN_ERR "%s: No memory for committed data\n", - __func__); - err = -ENOMEM; - goto out; - } - } - - jbd_lock_bh_state(bh); - if (!jh->b_committed_data) { - /* Copy out the current buffer contents into the - * preserved, committed copy. */ - JBUFFER_TRACE(jh, "generate b_committed data"); - if (!committed_data) { - jbd_unlock_bh_state(bh); - goto repeat; - } - - jh->b_committed_data = committed_data; - committed_data = NULL; - memcpy(jh->b_committed_data, bh->b_data, bh->b_size); - } - jbd_unlock_bh_state(bh); -out: - journal_put_journal_head(jh); - if (unlikely(committed_data)) - jbd_free(committed_data, bh->b_size); - return err; -} - -/** - * int journal_dirty_data() - mark a buffer as containing dirty data to be flushed - * @handle: transaction - * @bh: bufferhead to mark - * - * Description: - * Mark a buffer as containing dirty data which needs to be flushed before - * we can commit the current transaction. - * - * The buffer is placed on the transaction's data list and is marked as - * belonging to the transaction. - * - * Returns error number or 0 on success. - * - * journal_dirty_data() can be called via page_launder->ext3_writepage - * by kswapd. - */ -int journal_dirty_data(handle_t *handle, struct buffer_head *bh) -{ - journal_t *journal = handle->h_transaction->t_journal; - int need_brelse = 0; - struct journal_head *jh; - int ret = 0; - - if (is_handle_aborted(handle)) - return ret; - - jh = journal_add_journal_head(bh); - JBUFFER_TRACE(jh, "entry"); - - /* - * The buffer could *already* be dirty. Writeout can start - * at any time. - */ - jbd_debug(4, "jh: %p, tid:%d\n", jh, handle->h_transaction->t_tid); - - /* - * What if the buffer is already part of a running transaction? - * - * There are two cases: - * 1) It is part of the current running transaction. Refile it, - * just in case we have allocated it as metadata, deallocated - * it, then reallocated it as data. - * 2) It is part of the previous, still-committing transaction. - * If all we want to do is to guarantee that the buffer will be - * written to disk before this new transaction commits, then - * being sure that the *previous* transaction has this same - * property is sufficient for us! Just leave it on its old - * transaction. - * - * In case (2), the buffer must not already exist as metadata - * --- that would violate write ordering (a transaction is free - * to write its data at any point, even before the previous - * committing transaction has committed). The caller must - * never, ever allow this to happen: there's nothing we can do - * about it in this layer. - */ - jbd_lock_bh_state(bh); - spin_lock(&journal->j_list_lock); - - /* Now that we have bh_state locked, are we really still mapped? */ - if (!buffer_mapped(bh)) { - JBUFFER_TRACE(jh, "unmapped buffer, bailing out"); - goto no_journal; - } - - if (jh->b_transaction) { - JBUFFER_TRACE(jh, "has transaction"); - if (jh->b_transaction != handle->h_transaction) { - JBUFFER_TRACE(jh, "belongs to older transaction"); - J_ASSERT_JH(jh, jh->b_transaction == - journal->j_committing_transaction); - - /* @@@ IS THIS TRUE ? */ - /* - * Not any more. Scenario: someone does a write() - * in data=journal mode. The buffer's transaction has - * moved into commit. Then someone does another - * write() to the file. We do the frozen data copyout - * and set b_next_transaction to point to j_running_t. - * And while we're in that state, someone does a - * writepage() in an attempt to pageout the same area - * of the file via a shared mapping. At present that - * calls journal_dirty_data(), and we get right here. - * It may be too late to journal the data. Simply - * falling through to the next test will suffice: the - * data will be dirty and wil be checkpointed. The - * ordering comments in the next comment block still - * apply. - */ - //J_ASSERT_JH(jh, jh->b_next_transaction == NULL); - - /* - * If we're journalling data, and this buffer was - * subject to a write(), it could be metadata, forget - * or shadow against the committing transaction. Now, - * someone has dirtied the same darn page via a mapping - * and it is being writepage()'d. - * We *could* just steal the page from commit, with some - * fancy locking there. Instead, we just skip it - - * don't tie the page's buffers to the new transaction - * at all. - * Implication: if we crash before the writepage() data - * is written into the filesystem, recovery will replay - * the write() data. - */ - if (jh->b_jlist != BJ_None && - jh->b_jlist != BJ_SyncData && - jh->b_jlist != BJ_Locked) { - JBUFFER_TRACE(jh, "Not stealing"); - goto no_journal; - } - - /* - * This buffer may be undergoing writeout in commit. We - * can't return from here and let the caller dirty it - * again because that can cause the write-out loop in - * commit to never terminate. - */ - if (buffer_dirty(bh)) { - get_bh(bh); - spin_unlock(&journal->j_list_lock); - jbd_unlock_bh_state(bh); - need_brelse = 1; - sync_dirty_buffer(bh); - jbd_lock_bh_state(bh); - spin_lock(&journal->j_list_lock); - /* Since we dropped the lock... */ - if (!buffer_mapped(bh)) { - JBUFFER_TRACE(jh, "buffer got unmapped"); - goto no_journal; - } - /* The buffer may become locked again at any - time if it is redirtied */ - } - - /* - * We cannot remove the buffer with io error from the - * committing transaction, because otherwise it would - * miss the error and the commit would not abort. - */ - if (unlikely(!buffer_uptodate(bh))) { - ret = -EIO; - goto no_journal; - } - /* We might have slept so buffer could be refiled now */ - if (jh->b_transaction != NULL && - jh->b_transaction != handle->h_transaction) { - JBUFFER_TRACE(jh, "unfile from commit"); - __journal_temp_unlink_buffer(jh); - /* It still points to the committing - * transaction; move it to this one so - * that the refile assert checks are - * happy. */ - jh->b_transaction = handle->h_transaction; - } - /* The buffer will be refiled below */ - - } - /* - * Special case --- the buffer might actually have been - * allocated and then immediately deallocated in the previous, - * committing transaction, so might still be left on that - * transaction's metadata lists. - */ - if (jh->b_jlist != BJ_SyncData && jh->b_jlist != BJ_Locked) { - JBUFFER_TRACE(jh, "not on correct data list: unfile"); - J_ASSERT_JH(jh, jh->b_jlist != BJ_Shadow); - JBUFFER_TRACE(jh, "file as data"); - __journal_file_buffer(jh, handle->h_transaction, - BJ_SyncData); - } - } else { - JBUFFER_TRACE(jh, "not on a transaction"); - __journal_file_buffer(jh, handle->h_transaction, BJ_SyncData); - } -no_journal: - spin_unlock(&journal->j_list_lock); - jbd_unlock_bh_state(bh); - if (need_brelse) { - BUFFER_TRACE(bh, "brelse"); - __brelse(bh); - } - JBUFFER_TRACE(jh, "exit"); - journal_put_journal_head(jh); - return ret; -} - -/** - * int journal_dirty_metadata() - mark a buffer as containing dirty metadata - * @handle: transaction to add buffer to. - * @bh: buffer to mark - * - * Mark dirty metadata which needs to be journaled as part of the current - * transaction. - * - * The buffer is placed on the transaction's metadata list and is marked - * as belonging to the transaction. - * - * Returns error number or 0 on success. - * - * Special care needs to be taken if the buffer already belongs to the - * current committing transaction (in which case we should have frozen - * data present for that commit). In that case, we don't relink the - * buffer: that only gets done when the old transaction finally - * completes its commit. - */ -int journal_dirty_metadata(handle_t *handle, struct buffer_head *bh) -{ - transaction_t *transaction = handle->h_transaction; - journal_t *journal = transaction->t_journal; - struct journal_head *jh = bh2jh(bh); - - jbd_debug(5, "journal_head %p\n", jh); - JBUFFER_TRACE(jh, "entry"); - if (is_handle_aborted(handle)) - goto out; - - jbd_lock_bh_state(bh); - - if (jh->b_modified == 0) { - /* - * This buffer's got modified and becoming part - * of the transaction. This needs to be done - * once a transaction -bzzz - */ - jh->b_modified = 1; - J_ASSERT_JH(jh, handle->h_buffer_credits > 0); - handle->h_buffer_credits--; - } - - /* - * fastpath, to avoid expensive locking. If this buffer is already - * on the running transaction's metadata list there is nothing to do. - * Nobody can take it off again because there is a handle open. - * I _think_ we're OK here with SMP barriers - a mistaken decision will - * result in this test being false, so we go in and take the locks. - */ - if (jh->b_transaction == transaction && jh->b_jlist == BJ_Metadata) { - JBUFFER_TRACE(jh, "fastpath"); - J_ASSERT_JH(jh, jh->b_transaction == - journal->j_running_transaction); - goto out_unlock_bh; - } - - set_buffer_jbddirty(bh); - - /* - * Metadata already on the current transaction list doesn't - * need to be filed. Metadata on another transaction's list must - * be committing, and will be refiled once the commit completes: - * leave it alone for now. - */ - if (jh->b_transaction != transaction) { - JBUFFER_TRACE(jh, "already on other transaction"); - J_ASSERT_JH(jh, jh->b_transaction == - journal->j_committing_transaction); - J_ASSERT_JH(jh, jh->b_next_transaction == transaction); - /* And this case is illegal: we can't reuse another - * transaction's data buffer, ever. */ - goto out_unlock_bh; - } - - /* That test should have eliminated the following case: */ - J_ASSERT_JH(jh, jh->b_frozen_data == NULL); - - JBUFFER_TRACE(jh, "file as BJ_Metadata"); - spin_lock(&journal->j_list_lock); - __journal_file_buffer(jh, handle->h_transaction, BJ_Metadata); - spin_unlock(&journal->j_list_lock); -out_unlock_bh: - jbd_unlock_bh_state(bh); -out: - JBUFFER_TRACE(jh, "exit"); - return 0; -} - -/* - * journal_release_buffer: undo a get_write_access without any buffer - * updates, if the update decided in the end that it didn't need access. - * - */ -void -journal_release_buffer(handle_t *handle, struct buffer_head *bh) -{ - BUFFER_TRACE(bh, "entry"); -} - -/** - * void journal_forget() - bforget() for potentially-journaled buffers. - * @handle: transaction handle - * @bh: bh to 'forget' - * - * We can only do the bforget if there are no commits pending against the - * buffer. If the buffer is dirty in the current running transaction we - * can safely unlink it. - * - * bh may not be a journalled buffer at all - it may be a non-JBD - * buffer which came off the hashtable. Check for this. - * - * Decrements bh->b_count by one. - * - * Allow this call even if the handle has aborted --- it may be part of - * the caller's cleanup after an abort. - */ -int journal_forget (handle_t *handle, struct buffer_head *bh) -{ - transaction_t *transaction = handle->h_transaction; - journal_t *journal = transaction->t_journal; - struct journal_head *jh; - int drop_reserve = 0; - int err = 0; - int was_modified = 0; - - BUFFER_TRACE(bh, "entry"); - - jbd_lock_bh_state(bh); - spin_lock(&journal->j_list_lock); - - if (!buffer_jbd(bh)) - goto not_jbd; - jh = bh2jh(bh); - - /* Critical error: attempting to delete a bitmap buffer, maybe? - * Don't do any jbd operations, and return an error. */ - if (!J_EXPECT_JH(jh, !jh->b_committed_data, - "inconsistent data on disk")) { - err = -EIO; - goto not_jbd; - } - - /* keep track of whether or not this transaction modified us */ - was_modified = jh->b_modified; - - /* - * The buffer's going from the transaction, we must drop - * all references -bzzz - */ - jh->b_modified = 0; - - if (jh->b_transaction == handle->h_transaction) { - J_ASSERT_JH(jh, !jh->b_frozen_data); - - /* If we are forgetting a buffer which is already part - * of this transaction, then we can just drop it from - * the transaction immediately. */ - clear_buffer_dirty(bh); - clear_buffer_jbddirty(bh); - - JBUFFER_TRACE(jh, "belongs to current transaction: unfile"); - - /* - * we only want to drop a reference if this transaction - * modified the buffer - */ - if (was_modified) - drop_reserve = 1; - - /* - * We are no longer going to journal this buffer. - * However, the commit of this transaction is still - * important to the buffer: the delete that we are now - * processing might obsolete an old log entry, so by - * committing, we can satisfy the buffer's checkpoint. - * - * So, if we have a checkpoint on the buffer, we should - * now refile the buffer on our BJ_Forget list so that - * we know to remove the checkpoint after we commit. - */ - - if (jh->b_cp_transaction) { - __journal_temp_unlink_buffer(jh); - __journal_file_buffer(jh, transaction, BJ_Forget); - } else { - __journal_unfile_buffer(jh); - if (!buffer_jbd(bh)) { - spin_unlock(&journal->j_list_lock); - jbd_unlock_bh_state(bh); - __bforget(bh); - goto drop; - } - } - } else if (jh->b_transaction) { - J_ASSERT_JH(jh, (jh->b_transaction == - journal->j_committing_transaction)); - /* However, if the buffer is still owned by a prior - * (committing) transaction, we can't drop it yet... */ - JBUFFER_TRACE(jh, "belongs to older transaction"); - /* ... but we CAN drop it from the new transaction if we - * have also modified it since the original commit. */ - - if (jh->b_next_transaction) { - J_ASSERT(jh->b_next_transaction == transaction); - jh->b_next_transaction = NULL; - - /* - * only drop a reference if this transaction modified - * the buffer - */ - if (was_modified) - drop_reserve = 1; - } - } - -not_jbd: - spin_unlock(&journal->j_list_lock); - jbd_unlock_bh_state(bh); - __brelse(bh); -drop: - if (drop_reserve) { - /* no need to reserve log space for this block -bzzz */ - handle->h_buffer_credits++; - } - return err; -} - -/** - * int journal_stop() - complete a transaction - * @handle: tranaction to complete. - * - * All done for a particular handle. - * - * There is not much action needed here. We just return any remaining - * buffer credits to the transaction and remove the handle. The only - * complication is that we need to start a commit operation if the - * filesystem is marked for synchronous update. - * - * journal_stop itself will not usually return an error, but it may - * do so in unusual circumstances. In particular, expect it to - * return -EIO if a journal_abort has been executed since the - * transaction began. - */ -int journal_stop(handle_t *handle) -{ - transaction_t *transaction = handle->h_transaction; - journal_t *journal = transaction->t_journal; - int err; - pid_t pid; - - J_ASSERT(journal_current_handle() == handle); - - if (is_handle_aborted(handle)) - err = -EIO; - else { - J_ASSERT(transaction->t_updates > 0); - err = 0; - } - - if (--handle->h_ref > 0) { - jbd_debug(4, "h_ref %d -> %d\n", handle->h_ref + 1, - handle->h_ref); - return err; - } - - jbd_debug(4, "Handle %p going down\n", handle); - - /* - * Implement synchronous transaction batching. If the handle - * was synchronous, don't force a commit immediately. Let's - * yield and let another thread piggyback onto this transaction. - * Keep doing that while new threads continue to arrive. - * It doesn't cost much - we're about to run a commit and sleep - * on IO anyway. Speeds up many-threaded, many-dir operations - * by 30x or more... - * - * We try and optimize the sleep time against what the underlying disk - * can do, instead of having a static sleep time. This is useful for - * the case where our storage is so fast that it is more optimal to go - * ahead and force a flush and wait for the transaction to be committed - * than it is to wait for an arbitrary amount of time for new writers to - * join the transaction. We achieve this by measuring how long it takes - * to commit a transaction, and compare it with how long this - * transaction has been running, and if run time < commit time then we - * sleep for the delta and commit. This greatly helps super fast disks - * that would see slowdowns as more threads started doing fsyncs. - * - * But don't do this if this process was the most recent one to - * perform a synchronous write. We do this to detect the case where a - * single process is doing a stream of sync writes. No point in waiting - * for joiners in that case. - */ - pid = current->pid; - if (handle->h_sync && journal->j_last_sync_writer != pid) { - u64 commit_time, trans_time; - - journal->j_last_sync_writer = pid; - - spin_lock(&journal->j_state_lock); - commit_time = journal->j_average_commit_time; - spin_unlock(&journal->j_state_lock); - - trans_time = ktime_to_ns(ktime_sub(ktime_get(), - transaction->t_start_time)); - - commit_time = min_t(u64, commit_time, - 1000*jiffies_to_usecs(1)); - - if (trans_time < commit_time) { - ktime_t expires = ktime_add_ns(ktime_get(), - commit_time); - set_current_state(TASK_UNINTERRUPTIBLE); - schedule_hrtimeout(&expires, HRTIMER_MODE_ABS); - } - } - - current->journal_info = NULL; - spin_lock(&journal->j_state_lock); - spin_lock(&transaction->t_handle_lock); - transaction->t_outstanding_credits -= handle->h_buffer_credits; - transaction->t_updates--; - if (!transaction->t_updates) { - wake_up(&journal->j_wait_updates); - if (journal->j_barrier_count) - wake_up(&journal->j_wait_transaction_locked); - } - - /* - * If the handle is marked SYNC, we need to set another commit - * going! We also want to force a commit if the current - * transaction is occupying too much of the log, or if the - * transaction is too old now. - */ - if (handle->h_sync || - transaction->t_outstanding_credits > - journal->j_max_transaction_buffers || - time_after_eq(jiffies, transaction->t_expires)) { - /* Do this even for aborted journals: an abort still - * completes the commit thread, it just doesn't write - * anything to disk. */ - tid_t tid = transaction->t_tid; - - spin_unlock(&transaction->t_handle_lock); - jbd_debug(2, "transaction too old, requesting commit for " - "handle %p\n", handle); - /* This is non-blocking */ - __log_start_commit(journal, transaction->t_tid); - spin_unlock(&journal->j_state_lock); - - /* - * Special case: JFS_SYNC synchronous updates require us - * to wait for the commit to complete. - */ - if (handle->h_sync && !(current->flags & PF_MEMALLOC)) - err = log_wait_commit(journal, tid); - } else { - spin_unlock(&transaction->t_handle_lock); - spin_unlock(&journal->j_state_lock); - } - - lock_map_release(&handle->h_lockdep_map); - - jbd_free_handle(handle); - return err; -} - -/** - * int journal_force_commit() - force any uncommitted transactions - * @journal: journal to force - * - * For synchronous operations: force any uncommitted transactions - * to disk. May seem kludgy, but it reuses all the handle batching - * code in a very simple manner. - */ -int journal_force_commit(journal_t *journal) -{ - handle_t *handle; - int ret; - - handle = journal_start(journal, 1); - if (IS_ERR(handle)) { - ret = PTR_ERR(handle); - } else { - handle->h_sync = 1; - ret = journal_stop(handle); - } - return ret; -} - -/* - * - * List management code snippets: various functions for manipulating the - * transaction buffer lists. - * - */ - -/* - * Append a buffer to a transaction list, given the transaction's list head - * pointer. - * - * j_list_lock is held. - * - * jbd_lock_bh_state(jh2bh(jh)) is held. - */ - -static inline void -__blist_add_buffer(struct journal_head **list, struct journal_head *jh) -{ - if (!*list) { - jh->b_tnext = jh->b_tprev = jh; - *list = jh; - } else { - /* Insert at the tail of the list to preserve order */ - struct journal_head *first = *list, *last = first->b_tprev; - jh->b_tprev = last; - jh->b_tnext = first; - last->b_tnext = first->b_tprev = jh; - } -} - -/* - * Remove a buffer from a transaction list, given the transaction's list - * head pointer. - * - * Called with j_list_lock held, and the journal may not be locked. - * - * jbd_lock_bh_state(jh2bh(jh)) is held. - */ - -static inline void -__blist_del_buffer(struct journal_head **list, struct journal_head *jh) -{ - if (*list == jh) { - *list = jh->b_tnext; - if (*list == jh) - *list = NULL; - } - jh->b_tprev->b_tnext = jh->b_tnext; - jh->b_tnext->b_tprev = jh->b_tprev; -} - -/* - * Remove a buffer from the appropriate transaction list. - * - * Note that this function can *change* the value of - * bh->b_transaction->t_sync_datalist, t_buffers, t_forget, - * t_iobuf_list, t_shadow_list, t_log_list or t_reserved_list. If the caller - * is holding onto a copy of one of thee pointers, it could go bad. - * Generally the caller needs to re-read the pointer from the transaction_t. - * - * Called under j_list_lock. The journal may not be locked. - */ -static void __journal_temp_unlink_buffer(struct journal_head *jh) -{ - struct journal_head **list = NULL; - transaction_t *transaction; - struct buffer_head *bh = jh2bh(jh); - - J_ASSERT_JH(jh, jbd_is_locked_bh_state(bh)); - transaction = jh->b_transaction; - if (transaction) - assert_spin_locked(&transaction->t_journal->j_list_lock); - - J_ASSERT_JH(jh, jh->b_jlist < BJ_Types); - if (jh->b_jlist != BJ_None) - J_ASSERT_JH(jh, transaction != NULL); - - switch (jh->b_jlist) { - case BJ_None: - return; - case BJ_SyncData: - list = &transaction->t_sync_datalist; - break; - case BJ_Metadata: - transaction->t_nr_buffers--; - J_ASSERT_JH(jh, transaction->t_nr_buffers >= 0); - list = &transaction->t_buffers; - break; - case BJ_Forget: - list = &transaction->t_forget; - break; - case BJ_IO: - list = &transaction->t_iobuf_list; - break; - case BJ_Shadow: - list = &transaction->t_shadow_list; - break; - case BJ_LogCtl: - list = &transaction->t_log_list; - break; - case BJ_Reserved: - list = &transaction->t_reserved_list; - break; - case BJ_Locked: - list = &transaction->t_locked_list; - break; - } - - __blist_del_buffer(list, jh); - jh->b_jlist = BJ_None; - if (test_clear_buffer_jbddirty(bh)) - mark_buffer_dirty(bh); /* Expose it to the VM */ -} - -/* - * Remove buffer from all transactions. - * - * Called with bh_state lock and j_list_lock - * - * jh and bh may be already freed when this function returns. - */ -void __journal_unfile_buffer(struct journal_head *jh) -{ - __journal_temp_unlink_buffer(jh); - jh->b_transaction = NULL; - journal_put_journal_head(jh); -} - -void journal_unfile_buffer(journal_t *journal, struct journal_head *jh) -{ - struct buffer_head *bh = jh2bh(jh); - - /* Get reference so that buffer cannot be freed before we unlock it */ - get_bh(bh); - jbd_lock_bh_state(bh); - spin_lock(&journal->j_list_lock); - __journal_unfile_buffer(jh); - spin_unlock(&journal->j_list_lock); - jbd_unlock_bh_state(bh); - __brelse(bh); -} - -/* - * Called from journal_try_to_free_buffers(). - * - * Called under jbd_lock_bh_state(bh) - */ -static void -__journal_try_to_free_buffer(journal_t *journal, struct buffer_head *bh) -{ - struct journal_head *jh; - - jh = bh2jh(bh); - - if (buffer_locked(bh) || buffer_dirty(bh)) - goto out; - - if (jh->b_next_transaction != NULL) - goto out; - - spin_lock(&journal->j_list_lock); - if (jh->b_transaction != NULL && jh->b_cp_transaction == NULL) { - if (jh->b_jlist == BJ_SyncData || jh->b_jlist == BJ_Locked) { - /* A written-back ordered data buffer */ - JBUFFER_TRACE(jh, "release data"); - __journal_unfile_buffer(jh); - } - } else if (jh->b_cp_transaction != NULL && jh->b_transaction == NULL) { - /* written-back checkpointed metadata buffer */ - if (jh->b_jlist == BJ_None) { - JBUFFER_TRACE(jh, "remove from checkpoint list"); - __journal_remove_checkpoint(jh); - } - } - spin_unlock(&journal->j_list_lock); -out: - return; -} - -/** - * int journal_try_to_free_buffers() - try to free page buffers. - * @journal: journal for operation - * @page: to try and free - * @gfp_mask: we use the mask to detect how hard should we try to release - * buffers. If __GFP_WAIT and __GFP_FS is set, we wait for commit code to - * release the buffers. - * - * - * For all the buffers on this page, - * if they are fully written out ordered data, move them onto BUF_CLEAN - * so try_to_free_buffers() can reap them. - * - * This function returns non-zero if we wish try_to_free_buffers() - * to be called. We do this if the page is releasable by try_to_free_buffers(). - * We also do it if the page has locked or dirty buffers and the caller wants - * us to perform sync or async writeout. - * - * This complicates JBD locking somewhat. We aren't protected by the - * BKL here. We wish to remove the buffer from its committing or - * running transaction's ->t_datalist via __journal_unfile_buffer. - * - * This may *change* the value of transaction_t->t_datalist, so anyone - * who looks at t_datalist needs to lock against this function. - * - * Even worse, someone may be doing a journal_dirty_data on this - * buffer. So we need to lock against that. journal_dirty_data() - * will come out of the lock with the buffer dirty, which makes it - * ineligible for release here. - * - * Who else is affected by this? hmm... Really the only contender - * is do_get_write_access() - it could be looking at the buffer while - * journal_try_to_free_buffer() is changing its state. But that - * cannot happen because we never reallocate freed data as metadata - * while the data is part of a transaction. Yes? - * - * Return 0 on failure, 1 on success - */ -int journal_try_to_free_buffers(journal_t *journal, - struct page *page, gfp_t gfp_mask) -{ - struct buffer_head *head; - struct buffer_head *bh; - int ret = 0; - - J_ASSERT(PageLocked(page)); - - head = page_buffers(page); - bh = head; - do { - struct journal_head *jh; - - /* - * We take our own ref against the journal_head here to avoid - * having to add tons of locking around each instance of - * journal_put_journal_head(). - */ - jh = journal_grab_journal_head(bh); - if (!jh) - continue; - - jbd_lock_bh_state(bh); - __journal_try_to_free_buffer(journal, bh); - journal_put_journal_head(jh); - jbd_unlock_bh_state(bh); - if (buffer_jbd(bh)) - goto busy; - } while ((bh = bh->b_this_page) != head); - - ret = try_to_free_buffers(page); - -busy: - return ret; -} - -/* - * This buffer is no longer needed. If it is on an older transaction's - * checkpoint list we need to record it on this transaction's forget list - * to pin this buffer (and hence its checkpointing transaction) down until - * this transaction commits. If the buffer isn't on a checkpoint list, we - * release it. - * Returns non-zero if JBD no longer has an interest in the buffer. - * - * Called under j_list_lock. - * - * Called under jbd_lock_bh_state(bh). - */ -static int __dispose_buffer(struct journal_head *jh, transaction_t *transaction) -{ - int may_free = 1; - struct buffer_head *bh = jh2bh(jh); - - if (jh->b_cp_transaction) { - JBUFFER_TRACE(jh, "on running+cp transaction"); - __journal_temp_unlink_buffer(jh); - /* - * We don't want to write the buffer anymore, clear the - * bit so that we don't confuse checks in - * __journal_file_buffer - */ - clear_buffer_dirty(bh); - __journal_file_buffer(jh, transaction, BJ_Forget); - may_free = 0; - } else { - JBUFFER_TRACE(jh, "on running transaction"); - __journal_unfile_buffer(jh); - } - return may_free; -} - -/* - * journal_invalidatepage - * - * This code is tricky. It has a number of cases to deal with. - * - * There are two invariants which this code relies on: - * - * i_size must be updated on disk before we start calling invalidatepage on the - * data. - * - * This is done in ext3 by defining an ext3_setattr method which - * updates i_size before truncate gets going. By maintaining this - * invariant, we can be sure that it is safe to throw away any buffers - * attached to the current transaction: once the transaction commits, - * we know that the data will not be needed. - * - * Note however that we can *not* throw away data belonging to the - * previous, committing transaction! - * - * Any disk blocks which *are* part of the previous, committing - * transaction (and which therefore cannot be discarded immediately) are - * not going to be reused in the new running transaction - * - * The bitmap committed_data images guarantee this: any block which is - * allocated in one transaction and removed in the next will be marked - * as in-use in the committed_data bitmap, so cannot be reused until - * the next transaction to delete the block commits. This means that - * leaving committing buffers dirty is quite safe: the disk blocks - * cannot be reallocated to a different file and so buffer aliasing is - * not possible. - * - * - * The above applies mainly to ordered data mode. In writeback mode we - * don't make guarantees about the order in which data hits disk --- in - * particular we don't guarantee that new dirty data is flushed before - * transaction commit --- so it is always safe just to discard data - * immediately in that mode. --sct - */ - -/* - * The journal_unmap_buffer helper function returns zero if the buffer - * concerned remains pinned as an anonymous buffer belonging to an older - * transaction. - * - * We're outside-transaction here. Either or both of j_running_transaction - * and j_committing_transaction may be NULL. - */ -static int journal_unmap_buffer(journal_t *journal, struct buffer_head *bh, - int partial_page) -{ - transaction_t *transaction; - struct journal_head *jh; - int may_free = 1; - - BUFFER_TRACE(bh, "entry"); - -retry: - /* - * It is safe to proceed here without the j_list_lock because the - * buffers cannot be stolen by try_to_free_buffers as long as we are - * holding the page lock. --sct - */ - - if (!buffer_jbd(bh)) - goto zap_buffer_unlocked; - - spin_lock(&journal->j_state_lock); - jbd_lock_bh_state(bh); - spin_lock(&journal->j_list_lock); - - jh = journal_grab_journal_head(bh); - if (!jh) - goto zap_buffer_no_jh; - - /* - * We cannot remove the buffer from checkpoint lists until the - * transaction adding inode to orphan list (let's call it T) - * is committed. Otherwise if the transaction changing the - * buffer would be cleaned from the journal before T is - * committed, a crash will cause that the correct contents of - * the buffer will be lost. On the other hand we have to - * clear the buffer dirty bit at latest at the moment when the - * transaction marking the buffer as freed in the filesystem - * structures is committed because from that moment on the - * block can be reallocated and used by a different page. - * Since the block hasn't been freed yet but the inode has - * already been added to orphan list, it is safe for us to add - * the buffer to BJ_Forget list of the newest transaction. - * - * Also we have to clear buffer_mapped flag of a truncated buffer - * because the buffer_head may be attached to the page straddling - * i_size (can happen only when blocksize < pagesize) and thus the - * buffer_head can be reused when the file is extended again. So we end - * up keeping around invalidated buffers attached to transactions' - * BJ_Forget list just to stop checkpointing code from cleaning up - * the transaction this buffer was modified in. - */ - transaction = jh->b_transaction; - if (transaction == NULL) { - /* First case: not on any transaction. If it - * has no checkpoint link, then we can zap it: - * it's a writeback-mode buffer so we don't care - * if it hits disk safely. */ - if (!jh->b_cp_transaction) { - JBUFFER_TRACE(jh, "not on any transaction: zap"); - goto zap_buffer; - } - - if (!buffer_dirty(bh)) { - /* bdflush has written it. We can drop it now */ - goto zap_buffer; - } - - /* OK, it must be in the journal but still not - * written fully to disk: it's metadata or - * journaled data... */ - - if (journal->j_running_transaction) { - /* ... and once the current transaction has - * committed, the buffer won't be needed any - * longer. */ - JBUFFER_TRACE(jh, "checkpointed: add to BJ_Forget"); - may_free = __dispose_buffer(jh, - journal->j_running_transaction); - goto zap_buffer; - } else { - /* There is no currently-running transaction. So the - * orphan record which we wrote for this file must have - * passed into commit. We must attach this buffer to - * the committing transaction, if it exists. */ - if (journal->j_committing_transaction) { - JBUFFER_TRACE(jh, "give to committing trans"); - may_free = __dispose_buffer(jh, - journal->j_committing_transaction); - goto zap_buffer; - } else { - /* The orphan record's transaction has - * committed. We can cleanse this buffer */ - clear_buffer_jbddirty(bh); - goto zap_buffer; - } - } - } else if (transaction == journal->j_committing_transaction) { - JBUFFER_TRACE(jh, "on committing transaction"); - if (jh->b_jlist == BJ_Locked) { - /* - * The buffer is on the committing transaction's locked - * list. We have the buffer locked, so I/O has - * completed. So we can nail the buffer now. - */ - may_free = __dispose_buffer(jh, transaction); - goto zap_buffer; - } - /* - * The buffer is committing, we simply cannot touch - * it. If the page is straddling i_size we have to wait - * for commit and try again. - */ - if (partial_page) { - tid_t tid = journal->j_committing_transaction->t_tid; - - journal_put_journal_head(jh); - spin_unlock(&journal->j_list_lock); - jbd_unlock_bh_state(bh); - spin_unlock(&journal->j_state_lock); - unlock_buffer(bh); - log_wait_commit(journal, tid); - lock_buffer(bh); - goto retry; - } - /* - * OK, buffer won't be reachable after truncate. We just set - * j_next_transaction to the running transaction (if there is - * one) and mark buffer as freed so that commit code knows it - * should clear dirty bits when it is done with the buffer. - */ - set_buffer_freed(bh); - if (journal->j_running_transaction && buffer_jbddirty(bh)) - jh->b_next_transaction = journal->j_running_transaction; - journal_put_journal_head(jh); - spin_unlock(&journal->j_list_lock); - jbd_unlock_bh_state(bh); - spin_unlock(&journal->j_state_lock); - return 0; - } else { - /* Good, the buffer belongs to the running transaction. - * We are writing our own transaction's data, not any - * previous one's, so it is safe to throw it away - * (remember that we expect the filesystem to have set - * i_size already for this truncate so recovery will not - * expose the disk blocks we are discarding here.) */ - J_ASSERT_JH(jh, transaction == journal->j_running_transaction); - JBUFFER_TRACE(jh, "on running transaction"); - may_free = __dispose_buffer(jh, transaction); - } - -zap_buffer: - /* - * This is tricky. Although the buffer is truncated, it may be reused - * if blocksize < pagesize and it is attached to the page straddling - * EOF. Since the buffer might have been added to BJ_Forget list of the - * running transaction, journal_get_write_access() won't clear - * b_modified and credit accounting gets confused. So clear b_modified - * here. */ - jh->b_modified = 0; - journal_put_journal_head(jh); -zap_buffer_no_jh: - spin_unlock(&journal->j_list_lock); - jbd_unlock_bh_state(bh); - spin_unlock(&journal->j_state_lock); -zap_buffer_unlocked: - clear_buffer_dirty(bh); - J_ASSERT_BH(bh, !buffer_jbddirty(bh)); - clear_buffer_mapped(bh); - clear_buffer_req(bh); - clear_buffer_new(bh); - bh->b_bdev = NULL; - return may_free; -} - -/** - * void journal_invalidatepage() - invalidate a journal page - * @journal: journal to use for flush - * @page: page to flush - * @offset: offset of the range to invalidate - * @length: length of the range to invalidate - * - * Reap page buffers containing data in specified range in page. - */ -void journal_invalidatepage(journal_t *journal, - struct page *page, - unsigned int offset, - unsigned int length) -{ - struct buffer_head *head, *bh, *next; - unsigned int stop = offset + length; - unsigned int curr_off = 0; - int partial_page = (offset || length < PAGE_CACHE_SIZE); - int may_free = 1; - - if (!PageLocked(page)) - BUG(); - if (!page_has_buffers(page)) - return; - - BUG_ON(stop > PAGE_CACHE_SIZE || stop < length); - - /* We will potentially be playing with lists other than just the - * data lists (especially for journaled data mode), so be - * cautious in our locking. */ - - head = bh = page_buffers(page); - do { - unsigned int next_off = curr_off + bh->b_size; - next = bh->b_this_page; - - if (next_off > stop) - return; - - if (offset <= curr_off) { - /* This block is wholly outside the truncation point */ - lock_buffer(bh); - may_free &= journal_unmap_buffer(journal, bh, - partial_page); - unlock_buffer(bh); - } - curr_off = next_off; - bh = next; - - } while (bh != head); - - if (!partial_page) { - if (may_free && try_to_free_buffers(page)) - J_ASSERT(!page_has_buffers(page)); - } -} - -/* - * File a buffer on the given transaction list. - */ -void __journal_file_buffer(struct journal_head *jh, - transaction_t *transaction, int jlist) -{ - struct journal_head **list = NULL; - int was_dirty = 0; - struct buffer_head *bh = jh2bh(jh); - - J_ASSERT_JH(jh, jbd_is_locked_bh_state(bh)); - assert_spin_locked(&transaction->t_journal->j_list_lock); - - J_ASSERT_JH(jh, jh->b_jlist < BJ_Types); - J_ASSERT_JH(jh, jh->b_transaction == transaction || - jh->b_transaction == NULL); - - if (jh->b_transaction && jh->b_jlist == jlist) - return; - - if (jlist == BJ_Metadata || jlist == BJ_Reserved || - jlist == BJ_Shadow || jlist == BJ_Forget) { - /* - * For metadata buffers, we track dirty bit in buffer_jbddirty - * instead of buffer_dirty. We should not see a dirty bit set - * here because we clear it in do_get_write_access but e.g. - * tune2fs can modify the sb and set the dirty bit at any time - * so we try to gracefully handle that. - */ - if (buffer_dirty(bh)) - warn_dirty_buffer(bh); - if (test_clear_buffer_dirty(bh) || - test_clear_buffer_jbddirty(bh)) - was_dirty = 1; - } - - if (jh->b_transaction) - __journal_temp_unlink_buffer(jh); - else - journal_grab_journal_head(bh); - jh->b_transaction = transaction; - - switch (jlist) { - case BJ_None: - J_ASSERT_JH(jh, !jh->b_committed_data); - J_ASSERT_JH(jh, !jh->b_frozen_data); - return; - case BJ_SyncData: - list = &transaction->t_sync_datalist; - break; - case BJ_Metadata: - transaction->t_nr_buffers++; - list = &transaction->t_buffers; - break; - case BJ_Forget: - list = &transaction->t_forget; - break; - case BJ_IO: - list = &transaction->t_iobuf_list; - break; - case BJ_Shadow: - list = &transaction->t_shadow_list; - break; - case BJ_LogCtl: - list = &transaction->t_log_list; - break; - case BJ_Reserved: - list = &transaction->t_reserved_list; - break; - case BJ_Locked: - list = &transaction->t_locked_list; - break; - } - - __blist_add_buffer(list, jh); - jh->b_jlist = jlist; - - if (was_dirty) - set_buffer_jbddirty(bh); -} - -void journal_file_buffer(struct journal_head *jh, - transaction_t *transaction, int jlist) -{ - jbd_lock_bh_state(jh2bh(jh)); - spin_lock(&transaction->t_journal->j_list_lock); - __journal_file_buffer(jh, transaction, jlist); - spin_unlock(&transaction->t_journal->j_list_lock); - jbd_unlock_bh_state(jh2bh(jh)); -} - -/* - * Remove a buffer from its current buffer list in preparation for - * dropping it from its current transaction entirely. If the buffer has - * already started to be used by a subsequent transaction, refile the - * buffer on that transaction's metadata list. - * - * Called under j_list_lock - * Called under jbd_lock_bh_state(jh2bh(jh)) - * - * jh and bh may be already free when this function returns - */ -void __journal_refile_buffer(struct journal_head *jh) -{ - int was_dirty, jlist; - struct buffer_head *bh = jh2bh(jh); - - J_ASSERT_JH(jh, jbd_is_locked_bh_state(bh)); - if (jh->b_transaction) - assert_spin_locked(&jh->b_transaction->t_journal->j_list_lock); - - /* If the buffer is now unused, just drop it. */ - if (jh->b_next_transaction == NULL) { - __journal_unfile_buffer(jh); - return; - } - - /* - * It has been modified by a later transaction: add it to the new - * transaction's metadata list. - */ - - was_dirty = test_clear_buffer_jbddirty(bh); - __journal_temp_unlink_buffer(jh); - /* - * We set b_transaction here because b_next_transaction will inherit - * our jh reference and thus __journal_file_buffer() must not take a - * new one. - */ - jh->b_transaction = jh->b_next_transaction; - jh->b_next_transaction = NULL; - if (buffer_freed(bh)) - jlist = BJ_Forget; - else if (jh->b_modified) - jlist = BJ_Metadata; - else - jlist = BJ_Reserved; - __journal_file_buffer(jh, jh->b_transaction, jlist); - J_ASSERT_JH(jh, jh->b_transaction->t_state == T_RUNNING); - - if (was_dirty) - set_buffer_jbddirty(bh); -} - -/* - * __journal_refile_buffer() with necessary locking added. We take our bh - * reference so that we can safely unlock bh. - * - * The jh and bh may be freed by this call. - */ -void journal_refile_buffer(journal_t *journal, struct journal_head *jh) -{ - struct buffer_head *bh = jh2bh(jh); - - /* Get reference so that buffer cannot be freed before we unlock it */ - get_bh(bh); - jbd_lock_bh_state(bh); - spin_lock(&journal->j_list_lock); - __journal_refile_buffer(jh); - jbd_unlock_bh_state(bh); - spin_unlock(&journal->j_list_lock); - __brelse(bh); -} diff --git a/include/linux/jbd.h b/include/linux/jbd.h deleted file mode 100644 index d32615280be9..000000000000 --- a/include/linux/jbd.h +++ /dev/null @@ -1,1047 +0,0 @@ -/* - * linux/include/linux/jbd.h - * - * Written by Stephen C. Tweedie - * - * Copyright 1998-2000 Red Hat, Inc --- All Rights Reserved - * - * This file is part of the Linux kernel and is made available under - * the terms of the GNU General Public License, version 2, or at your - * option, any later version, incorporated herein by reference. - * - * Definitions for transaction data structures for the buffer cache - * filesystem journaling support. - */ - -#ifndef _LINUX_JBD_H -#define _LINUX_JBD_H - -/* Allow this file to be included directly into e2fsprogs */ -#ifndef __KERNEL__ -#include "jfs_compat.h" -#define JFS_DEBUG -#define jfs_debug jbd_debug -#else - -#include -#include -#include -#include -#include -#include -#include -#include - -#define journal_oom_retry 1 - -/* - * Define JBD_PARANOID_IOFAIL to cause a kernel BUG() if ext3 finds - * certain classes of error which can occur due to failed IOs. Under - * normal use we want ext3 to continue after such errors, because - * hardware _can_ fail, but for debugging purposes when running tests on - * known-good hardware we may want to trap these errors. - */ -#undef JBD_PARANOID_IOFAIL - -/* - * The default maximum commit age, in seconds. - */ -#define JBD_DEFAULT_MAX_COMMIT_AGE 5 - -#ifdef CONFIG_JBD_DEBUG -/* - * Define JBD_EXPENSIVE_CHECKING to enable more expensive internal - * consistency checks. By default we don't do this unless - * CONFIG_JBD_DEBUG is on. - */ -#define JBD_EXPENSIVE_CHECKING -extern u8 journal_enable_debug; - -void __jbd_debug(int level, const char *file, const char *func, - unsigned int line, const char *fmt, ...); - -#define jbd_debug(n, fmt, a...) \ - __jbd_debug((n), __FILE__, __func__, __LINE__, (fmt), ##a) -#else -#define jbd_debug(n, fmt, a...) /**/ -#endif - -static inline void *jbd_alloc(size_t size, gfp_t flags) -{ - return (void *)__get_free_pages(flags, get_order(size)); -} - -static inline void jbd_free(void *ptr, size_t size) -{ - free_pages((unsigned long)ptr, get_order(size)); -} - -#define JFS_MIN_JOURNAL_BLOCKS 1024 - - -/** - * typedef handle_t - The handle_t type represents a single atomic update being performed by some process. - * - * All filesystem modifications made by the process go - * through this handle. Recursive operations (such as quota operations) - * are gathered into a single update. - * - * The buffer credits field is used to account for journaled buffers - * being modified by the running process. To ensure that there is - * enough log space for all outstanding operations, we need to limit the - * number of outstanding buffers possible at any time. When the - * operation completes, any buffer credits not used are credited back to - * the transaction, so that at all times we know how many buffers the - * outstanding updates on a transaction might possibly touch. - * - * This is an opaque datatype. - **/ -typedef struct handle_s handle_t; /* Atomic operation type */ - - -/** - * typedef journal_t - The journal_t maintains all of the journaling state information for a single filesystem. - * - * journal_t is linked to from the fs superblock structure. - * - * We use the journal_t to keep track of all outstanding transaction - * activity on the filesystem, and to manage the state of the log - * writing process. - * - * This is an opaque datatype. - **/ -typedef struct journal_s journal_t; /* Journal control structure */ -#endif - -/* - * Internal structures used by the logging mechanism: - */ - -#define JFS_MAGIC_NUMBER 0xc03b3998U /* The first 4 bytes of /dev/random! */ - -/* - * On-disk structures - */ - -/* - * Descriptor block types: - */ - -#define JFS_DESCRIPTOR_BLOCK 1 -#define JFS_COMMIT_BLOCK 2 -#define JFS_SUPERBLOCK_V1 3 -#define JFS_SUPERBLOCK_V2 4 -#define JFS_REVOKE_BLOCK 5 - -/* - * Standard header for all descriptor blocks: - */ -typedef struct journal_header_s -{ - __be32 h_magic; - __be32 h_blocktype; - __be32 h_sequence; -} journal_header_t; - - -/* - * The block tag: used to describe a single buffer in the journal - */ -typedef struct journal_block_tag_s -{ - __be32 t_blocknr; /* The on-disk block number */ - __be32 t_flags; /* See below */ -} journal_block_tag_t; - -/* - * The revoke descriptor: used on disk to describe a series of blocks to - * be revoked from the log - */ -typedef struct journal_revoke_header_s -{ - journal_header_t r_header; - __be32 r_count; /* Count of bytes used in the block */ -} journal_revoke_header_t; - - -/* Definitions for the journal tag flags word: */ -#define JFS_FLAG_ESCAPE 1 /* on-disk block is escaped */ -#define JFS_FLAG_SAME_UUID 2 /* block has same uuid as previous */ -#define JFS_FLAG_DELETED 4 /* block deleted by this transaction */ -#define JFS_FLAG_LAST_TAG 8 /* last tag in this descriptor block */ - - -/* - * The journal superblock. All fields are in big-endian byte order. - */ -typedef struct journal_superblock_s -{ -/* 0x0000 */ - journal_header_t s_header; - -/* 0x000C */ - /* Static information describing the journal */ - __be32 s_blocksize; /* journal device blocksize */ - __be32 s_maxlen; /* total blocks in journal file */ - __be32 s_first; /* first block of log information */ - -/* 0x0018 */ - /* Dynamic information describing the current state of the log */ - __be32 s_sequence; /* first commit ID expected in log */ - __be32 s_start; /* blocknr of start of log */ - -/* 0x0020 */ - /* Error value, as set by journal_abort(). */ - __be32 s_errno; - -/* 0x0024 */ - /* Remaining fields are only valid in a version-2 superblock */ - __be32 s_feature_compat; /* compatible feature set */ - __be32 s_feature_incompat; /* incompatible feature set */ - __be32 s_feature_ro_compat; /* readonly-compatible feature set */ -/* 0x0030 */ - __u8 s_uuid[16]; /* 128-bit uuid for journal */ - -/* 0x0040 */ - __be32 s_nr_users; /* Nr of filesystems sharing log */ - - __be32 s_dynsuper; /* Blocknr of dynamic superblock copy*/ - -/* 0x0048 */ - __be32 s_max_transaction; /* Limit of journal blocks per trans.*/ - __be32 s_max_trans_data; /* Limit of data blocks per trans. */ - -/* 0x0050 */ - __u32 s_padding[44]; - -/* 0x0100 */ - __u8 s_users[16*48]; /* ids of all fs'es sharing the log */ -/* 0x0400 */ -} journal_superblock_t; - -#define JFS_HAS_COMPAT_FEATURE(j,mask) \ - ((j)->j_format_version >= 2 && \ - ((j)->j_superblock->s_feature_compat & cpu_to_be32((mask)))) -#define JFS_HAS_RO_COMPAT_FEATURE(j,mask) \ - ((j)->j_format_version >= 2 && \ - ((j)->j_superblock->s_feature_ro_compat & cpu_to_be32((mask)))) -#define JFS_HAS_INCOMPAT_FEATURE(j,mask) \ - ((j)->j_format_version >= 2 && \ - ((j)->j_superblock->s_feature_incompat & cpu_to_be32((mask)))) - -#define JFS_FEATURE_INCOMPAT_REVOKE 0x00000001 - -/* Features known to this kernel version: */ -#define JFS_KNOWN_COMPAT_FEATURES 0 -#define JFS_KNOWN_ROCOMPAT_FEATURES 0 -#define JFS_KNOWN_INCOMPAT_FEATURES JFS_FEATURE_INCOMPAT_REVOKE - -#ifdef __KERNEL__ - -#include -#include - -enum jbd_state_bits { - BH_JBD /* Has an attached ext3 journal_head */ - = BH_PrivateStart, - BH_JWrite, /* Being written to log (@@@ DEBUGGING) */ - BH_Freed, /* Has been freed (truncated) */ - BH_Revoked, /* Has been revoked from the log */ - BH_RevokeValid, /* Revoked flag is valid */ - BH_JBDDirty, /* Is dirty but journaled */ - BH_State, /* Pins most journal_head state */ - BH_JournalHead, /* Pins bh->b_private and jh->b_bh */ - BH_Unshadow, /* Dummy bit, for BJ_Shadow wakeup filtering */ - BH_JBDPrivateStart, /* First bit available for private use by FS */ -}; - -BUFFER_FNS(JBD, jbd) -BUFFER_FNS(JWrite, jwrite) -BUFFER_FNS(JBDDirty, jbddirty) -TAS_BUFFER_FNS(JBDDirty, jbddirty) -BUFFER_FNS(Revoked, revoked) -TAS_BUFFER_FNS(Revoked, revoked) -BUFFER_FNS(RevokeValid, revokevalid) -TAS_BUFFER_FNS(RevokeValid, revokevalid) -BUFFER_FNS(Freed, freed) - -#include - -#define J_ASSERT(assert) BUG_ON(!(assert)) - -#define J_ASSERT_BH(bh, expr) J_ASSERT(expr) -#define J_ASSERT_JH(jh, expr) J_ASSERT(expr) - -#if defined(JBD_PARANOID_IOFAIL) -#define J_EXPECT(expr, why...) J_ASSERT(expr) -#define J_EXPECT_BH(bh, expr, why...) J_ASSERT_BH(bh, expr) -#define J_EXPECT_JH(jh, expr, why...) J_ASSERT_JH(jh, expr) -#else -#define __journal_expect(expr, why...) \ - ({ \ - int val = (expr); \ - if (!val) { \ - printk(KERN_ERR \ - "EXT3-fs unexpected failure: %s;\n",# expr); \ - printk(KERN_ERR why "\n"); \ - } \ - val; \ - }) -#define J_EXPECT(expr, why...) __journal_expect(expr, ## why) -#define J_EXPECT_BH(bh, expr, why...) __journal_expect(expr, ## why) -#define J_EXPECT_JH(jh, expr, why...) __journal_expect(expr, ## why) -#endif - -struct jbd_revoke_table_s; - -/** - * struct handle_s - this is the concrete type associated with handle_t. - * @h_transaction: Which compound transaction is this update a part of? - * @h_buffer_credits: Number of remaining buffers we are allowed to dirty. - * @h_ref: Reference count on this handle - * @h_err: Field for caller's use to track errors through large fs operations - * @h_sync: flag for sync-on-close - * @h_jdata: flag to force data journaling - * @h_aborted: flag indicating fatal error on handle - * @h_lockdep_map: lockdep info for debugging lock problems - */ -struct handle_s -{ - /* Which compound transaction is this update a part of? */ - transaction_t *h_transaction; - - /* Number of remaining buffers we are allowed to dirty: */ - int h_buffer_credits; - - /* Reference count on this handle */ - int h_ref; - - /* Field for caller's use to track errors through large fs */ - /* operations */ - int h_err; - - /* Flags [no locking] */ - unsigned int h_sync: 1; /* sync-on-close */ - unsigned int h_jdata: 1; /* force data journaling */ - unsigned int h_aborted: 1; /* fatal error on handle */ - -#ifdef CONFIG_DEBUG_LOCK_ALLOC - struct lockdep_map h_lockdep_map; -#endif -}; - - -/* The transaction_t type is the guts of the journaling mechanism. It - * tracks a compound transaction through its various states: - * - * RUNNING: accepting new updates - * LOCKED: Updates still running but we don't accept new ones - * RUNDOWN: Updates are tidying up but have finished requesting - * new buffers to modify (state not used for now) - * FLUSH: All updates complete, but we are still writing to disk - * COMMIT: All data on disk, writing commit record - * FINISHED: We still have to keep the transaction for checkpointing. - * - * The transaction keeps track of all of the buffers modified by a - * running transaction, and all of the buffers committed but not yet - * flushed to home for finished transactions. - */ - -/* - * Lock ranking: - * - * j_list_lock - * ->jbd_lock_bh_journal_head() (This is "innermost") - * - * j_state_lock - * ->jbd_lock_bh_state() - * - * jbd_lock_bh_state() - * ->j_list_lock - * - * j_state_lock - * ->t_handle_lock - * - * j_state_lock - * ->j_list_lock (journal_unmap_buffer) - * - */ - -struct transaction_s -{ - /* Pointer to the journal for this transaction. [no locking] */ - journal_t *t_journal; - - /* Sequence number for this transaction [no locking] */ - tid_t t_tid; - - /* - * Transaction's current state - * [no locking - only kjournald alters this] - * [j_list_lock] guards transition of a transaction into T_FINISHED - * state and subsequent call of __journal_drop_transaction() - * FIXME: needs barriers - * KLUDGE: [use j_state_lock] - */ - enum { - T_RUNNING, - T_LOCKED, - T_FLUSH, - T_COMMIT, - T_COMMIT_RECORD, - T_FINISHED - } t_state; - - /* - * Where in the log does this transaction's commit start? [no locking] - */ - unsigned int t_log_start; - - /* Number of buffers on the t_buffers list [j_list_lock] */ - int t_nr_buffers; - - /* - * Doubly-linked circular list of all buffers reserved but not yet - * modified by this transaction [j_list_lock] - */ - struct journal_head *t_reserved_list; - - /* - * Doubly-linked circular list of all buffers under writeout during - * commit [j_list_lock] - */ - struct journal_head *t_locked_list; - - /* - * Doubly-linked circular list of all metadata buffers owned by this - * transaction [j_list_lock] - */ - struct journal_head *t_buffers; - - /* - * Doubly-linked circular list of all data buffers still to be - * flushed before this transaction can be committed [j_list_lock] - */ - struct journal_head *t_sync_datalist; - - /* - * Doubly-linked circular list of all forget buffers (superseded - * buffers which we can un-checkpoint once this transaction commits) - * [j_list_lock] - */ - struct journal_head *t_forget; - - /* - * Doubly-linked circular list of all buffers still to be flushed before - * this transaction can be checkpointed. [j_list_lock] - */ - struct journal_head *t_checkpoint_list; - - /* - * Doubly-linked circular list of all buffers submitted for IO while - * checkpointing. [j_list_lock] - */ - struct journal_head *t_checkpoint_io_list; - - /* - * Doubly-linked circular list of temporary buffers currently undergoing - * IO in the log [j_list_lock] - */ - struct journal_head *t_iobuf_list; - - /* - * Doubly-linked circular list of metadata buffers being shadowed by log - * IO. The IO buffers on the iobuf list and the shadow buffers on this - * list match each other one for one at all times. [j_list_lock] - */ - struct journal_head *t_shadow_list; - - /* - * Doubly-linked circular list of control buffers being written to the - * log. [j_list_lock] - */ - struct journal_head *t_log_list; - - /* - * Protects info related to handles - */ - spinlock_t t_handle_lock; - - /* - * Number of outstanding updates running on this transaction - * [t_handle_lock] - */ - int t_updates; - - /* - * Number of buffers reserved for use by all handles in this transaction - * handle but not yet modified. [t_handle_lock] - */ - int t_outstanding_credits; - - /* - * Forward and backward links for the circular list of all transactions - * awaiting checkpoint. [j_list_lock] - */ - transaction_t *t_cpnext, *t_cpprev; - - /* - * When will the transaction expire (become due for commit), in jiffies? - * [no locking] - */ - unsigned long t_expires; - - /* - * When this transaction started, in nanoseconds [no locking] - */ - ktime_t t_start_time; - - /* - * How many handles used this transaction? [t_handle_lock] - */ - int t_handle_count; -}; - -/** - * struct journal_s - this is the concrete type associated with journal_t. - * @j_flags: General journaling state flags - * @j_errno: Is there an outstanding uncleared error on the journal (from a - * prior abort)? - * @j_sb_buffer: First part of superblock buffer - * @j_superblock: Second part of superblock buffer - * @j_format_version: Version of the superblock format - * @j_state_lock: Protect the various scalars in the journal - * @j_barrier_count: Number of processes waiting to create a barrier lock - * @j_running_transaction: The current running transaction.. - * @j_committing_transaction: the transaction we are pushing to disk - * @j_checkpoint_transactions: a linked circular list of all transactions - * waiting for checkpointing - * @j_wait_transaction_locked: Wait queue for waiting for a locked transaction - * to start committing, or for a barrier lock to be released - * @j_wait_logspace: Wait queue for waiting for checkpointing to complete - * @j_wait_done_commit: Wait queue for waiting for commit to complete - * @j_wait_checkpoint: Wait queue to trigger checkpointing - * @j_wait_commit: Wait queue to trigger commit - * @j_wait_updates: Wait queue to wait for updates to complete - * @j_checkpoint_mutex: Mutex for locking against concurrent checkpoints - * @j_head: Journal head - identifies the first unused block in the journal - * @j_tail: Journal tail - identifies the oldest still-used block in the - * journal. - * @j_free: Journal free - how many free blocks are there in the journal? - * @j_first: The block number of the first usable block - * @j_last: The block number one beyond the last usable block - * @j_dev: Device where we store the journal - * @j_blocksize: blocksize for the location where we store the journal. - * @j_blk_offset: starting block offset for into the device where we store the - * journal - * @j_fs_dev: Device which holds the client fs. For internal journal this will - * be equal to j_dev - * @j_maxlen: Total maximum capacity of the journal region on disk. - * @j_list_lock: Protects the buffer lists and internal buffer state. - * @j_inode: Optional inode where we store the journal. If present, all journal - * block numbers are mapped into this inode via bmap(). - * @j_tail_sequence: Sequence number of the oldest transaction in the log - * @j_transaction_sequence: Sequence number of the next transaction to grant - * @j_commit_sequence: Sequence number of the most recently committed - * transaction - * @j_commit_request: Sequence number of the most recent transaction wanting - * commit - * @j_commit_waited: Sequence number of the most recent transaction someone - * is waiting for to commit. - * @j_uuid: Uuid of client object. - * @j_task: Pointer to the current commit thread for this journal - * @j_max_transaction_buffers: Maximum number of metadata buffers to allow in a - * single compound commit transaction - * @j_commit_interval: What is the maximum transaction lifetime before we begin - * a commit? - * @j_commit_timer: The timer used to wakeup the commit thread - * @j_revoke_lock: Protect the revoke table - * @j_revoke: The revoke table - maintains the list of revoked blocks in the - * current transaction. - * @j_revoke_table: alternate revoke tables for j_revoke - * @j_wbuf: array of buffer_heads for journal_commit_transaction - * @j_wbufsize: maximum number of buffer_heads allowed in j_wbuf, the - * number that will fit in j_blocksize - * @j_last_sync_writer: most recent pid which did a synchronous write - * @j_average_commit_time: the average amount of time in nanoseconds it - * takes to commit a transaction to the disk. - * @j_private: An opaque pointer to fs-private information. - */ - -struct journal_s -{ - /* General journaling state flags [j_state_lock] */ - unsigned long j_flags; - - /* - * Is there an outstanding uncleared error on the journal (from a prior - * abort)? [j_state_lock] - */ - int j_errno; - - /* The superblock buffer */ - struct buffer_head *j_sb_buffer; - journal_superblock_t *j_superblock; - - /* Version of the superblock format */ - int j_format_version; - - /* - * Protect the various scalars in the journal - */ - spinlock_t j_state_lock; - - /* - * Number of processes waiting to create a barrier lock [j_state_lock] - */ - int j_barrier_count; - - /* - * Transactions: The current running transaction... - * [j_state_lock] [caller holding open handle] - */ - transaction_t *j_running_transaction; - - /* - * the transaction we are pushing to disk - * [j_state_lock] [caller holding open handle] - */ - transaction_t *j_committing_transaction; - - /* - * ... and a linked circular list of all transactions waiting for - * checkpointing. [j_list_lock] - */ - transaction_t *j_checkpoint_transactions; - - /* - * Wait queue for waiting for a locked transaction to start committing, - * or for a barrier lock to be released - */ - wait_queue_head_t j_wait_transaction_locked; - - /* Wait queue for waiting for checkpointing to complete */ - wait_queue_head_t j_wait_logspace; - - /* Wait queue for waiting for commit to complete */ - wait_queue_head_t j_wait_done_commit; - - /* Wait queue to trigger checkpointing */ - wait_queue_head_t j_wait_checkpoint; - - /* Wait queue to trigger commit */ - wait_queue_head_t j_wait_commit; - - /* Wait queue to wait for updates to complete */ - wait_queue_head_t j_wait_updates; - - /* Semaphore for locking against concurrent checkpoints */ - struct mutex j_checkpoint_mutex; - - /* - * Journal head: identifies the first unused block in the journal. - * [j_state_lock] - */ - unsigned int j_head; - - /* - * Journal tail: identifies the oldest still-used block in the journal. - * [j_state_lock] - */ - unsigned int j_tail; - - /* - * Journal free: how many free blocks are there in the journal? - * [j_state_lock] - */ - unsigned int j_free; - - /* - * Journal start and end: the block numbers of the first usable block - * and one beyond the last usable block in the journal. [j_state_lock] - */ - unsigned int j_first; - unsigned int j_last; - - /* - * Device, blocksize and starting block offset for the location where we - * store the journal. - */ - struct block_device *j_dev; - int j_blocksize; - unsigned int j_blk_offset; - - /* - * Device which holds the client fs. For internal journal this will be - * equal to j_dev. - */ - struct block_device *j_fs_dev; - - /* Total maximum capacity of the journal region on disk. */ - unsigned int j_maxlen; - - /* - * Protects the buffer lists and internal buffer state. - */ - spinlock_t j_list_lock; - - /* Optional inode where we store the journal. If present, all */ - /* journal block numbers are mapped into this inode via */ - /* bmap(). */ - struct inode *j_inode; - - /* - * Sequence number of the oldest transaction in the log [j_state_lock] - */ - tid_t j_tail_sequence; - - /* - * Sequence number of the next transaction to grant [j_state_lock] - */ - tid_t j_transaction_sequence; - - /* - * Sequence number of the most recently committed transaction - * [j_state_lock]. - */ - tid_t j_commit_sequence; - - /* - * Sequence number of the most recent transaction wanting commit - * [j_state_lock] - */ - tid_t j_commit_request; - - /* - * Sequence number of the most recent transaction someone is waiting - * for to commit. - * [j_state_lock] - */ - tid_t j_commit_waited; - - /* - * Journal uuid: identifies the object (filesystem, LVM volume etc) - * backed by this journal. This will eventually be replaced by an array - * of uuids, allowing us to index multiple devices within a single - * journal and to perform atomic updates across them. - */ - __u8 j_uuid[16]; - - /* Pointer to the current commit thread for this journal */ - struct task_struct *j_task; - - /* - * Maximum number of metadata buffers to allow in a single compound - * commit transaction - */ - int j_max_transaction_buffers; - - /* - * What is the maximum transaction lifetime before we begin a commit? - */ - unsigned long j_commit_interval; - - /* The timer used to wakeup the commit thread: */ - struct timer_list j_commit_timer; - - /* - * The revoke table: maintains the list of revoked blocks in the - * current transaction. [j_revoke_lock] - */ - spinlock_t j_revoke_lock; - struct jbd_revoke_table_s *j_revoke; - struct jbd_revoke_table_s *j_revoke_table[2]; - - /* - * array of bhs for journal_commit_transaction - */ - struct buffer_head **j_wbuf; - int j_wbufsize; - - /* - * this is the pid of the last person to run a synchronous operation - * through the journal. - */ - pid_t j_last_sync_writer; - - /* - * the average amount of time in nanoseconds it takes to commit a - * transaction to the disk. [j_state_lock] - */ - u64 j_average_commit_time; - - /* - * An opaque pointer to fs-private information. ext3 puts its - * superblock pointer here - */ - void *j_private; -}; - -/* - * Journal flag definitions - */ -#define JFS_UNMOUNT 0x001 /* Journal thread is being destroyed */ -#define JFS_ABORT 0x002 /* Journaling has been aborted for errors. */ -#define JFS_ACK_ERR 0x004 /* The errno in the sb has been acked */ -#define JFS_FLUSHED 0x008 /* The journal superblock has been flushed */ -#define JFS_LOADED 0x010 /* The journal superblock has been loaded */ -#define JFS_BARRIER 0x020 /* Use IDE barriers */ -#define JFS_ABORT_ON_SYNCDATA_ERR 0x040 /* Abort the journal on file - * data write error in ordered - * mode */ - -/* - * Function declarations for the journaling transaction and buffer - * management - */ - -/* Filing buffers */ -extern void journal_unfile_buffer(journal_t *, struct journal_head *); -extern void __journal_unfile_buffer(struct journal_head *); -extern void __journal_refile_buffer(struct journal_head *); -extern void journal_refile_buffer(journal_t *, struct journal_head *); -extern void __journal_file_buffer(struct journal_head *, transaction_t *, int); -extern void __journal_free_buffer(struct journal_head *bh); -extern void journal_file_buffer(struct journal_head *, transaction_t *, int); -extern void __journal_clean_data_list(transaction_t *transaction); - -/* Log buffer allocation */ -extern struct journal_head * journal_get_descriptor_buffer(journal_t *); -int journal_next_log_block(journal_t *, unsigned int *); - -/* Commit management */ -extern void journal_commit_transaction(journal_t *); - -/* Checkpoint list management */ -int __journal_clean_checkpoint_list(journal_t *journal); -int __journal_remove_checkpoint(struct journal_head *); -void __journal_insert_checkpoint(struct journal_head *, transaction_t *); - -/* Buffer IO */ -extern int -journal_write_metadata_buffer(transaction_t *transaction, - struct journal_head *jh_in, - struct journal_head **jh_out, - unsigned int blocknr); - -/* Transaction locking */ -extern void __wait_on_journal (journal_t *); - -/* - * Journal locking. - * - * We need to lock the journal during transaction state changes so that nobody - * ever tries to take a handle on the running transaction while we are in the - * middle of moving it to the commit phase. j_state_lock does this. - * - * Note that the locking is completely interrupt unsafe. We never touch - * journal structures from interrupts. - */ - -static inline handle_t *journal_current_handle(void) -{ - return current->journal_info; -} - -/* The journaling code user interface: - * - * Create and destroy handles - * Register buffer modifications against the current transaction. - */ - -extern handle_t *journal_start(journal_t *, int nblocks); -extern int journal_restart (handle_t *, int nblocks); -extern int journal_extend (handle_t *, int nblocks); -extern int journal_get_write_access(handle_t *, struct buffer_head *); -extern int journal_get_create_access (handle_t *, struct buffer_head *); -extern int journal_get_undo_access(handle_t *, struct buffer_head *); -extern int journal_dirty_data (handle_t *, struct buffer_head *); -extern int journal_dirty_metadata (handle_t *, struct buffer_head *); -extern void journal_release_buffer (handle_t *, struct buffer_head *); -extern int journal_forget (handle_t *, struct buffer_head *); -extern void journal_sync_buffer (struct buffer_head *); -extern void journal_invalidatepage(journal_t *, - struct page *, unsigned int, unsigned int); -extern int journal_try_to_free_buffers(journal_t *, struct page *, gfp_t); -extern int journal_stop(handle_t *); -extern int journal_flush (journal_t *); -extern void journal_lock_updates (journal_t *); -extern void journal_unlock_updates (journal_t *); - -extern journal_t * journal_init_dev(struct block_device *bdev, - struct block_device *fs_dev, - int start, int len, int bsize); -extern journal_t * journal_init_inode (struct inode *); -extern int journal_update_format (journal_t *); -extern int journal_check_used_features - (journal_t *, unsigned long, unsigned long, unsigned long); -extern int journal_check_available_features - (journal_t *, unsigned long, unsigned long, unsigned long); -extern int journal_set_features - (journal_t *, unsigned long, unsigned long, unsigned long); -extern int journal_create (journal_t *); -extern int journal_load (journal_t *journal); -extern int journal_destroy (journal_t *); -extern int journal_recover (journal_t *journal); -extern int journal_wipe (journal_t *, int); -extern int journal_skip_recovery (journal_t *); -extern void journal_update_sb_log_tail (journal_t *, tid_t, unsigned int, - int); -extern void journal_abort (journal_t *, int); -extern int journal_errno (journal_t *); -extern void journal_ack_err (journal_t *); -extern int journal_clear_err (journal_t *); -extern int journal_bmap(journal_t *, unsigned int, unsigned int *); -extern int journal_force_commit(journal_t *); - -/* - * journal_head management - */ -struct journal_head *journal_add_journal_head(struct buffer_head *bh); -struct journal_head *journal_grab_journal_head(struct buffer_head *bh); -void journal_put_journal_head(struct journal_head *jh); - -/* - * handle management - */ -extern struct kmem_cache *jbd_handle_cache; - -static inline handle_t *jbd_alloc_handle(gfp_t gfp_flags) -{ - return kmem_cache_zalloc(jbd_handle_cache, gfp_flags); -} - -static inline void jbd_free_handle(handle_t *handle) -{ - kmem_cache_free(jbd_handle_cache, handle); -} - -/* Primary revoke support */ -#define JOURNAL_REVOKE_DEFAULT_HASH 256 -extern int journal_init_revoke(journal_t *, int); -extern void journal_destroy_revoke_caches(void); -extern int journal_init_revoke_caches(void); - -extern void journal_destroy_revoke(journal_t *); -extern int journal_revoke (handle_t *, - unsigned int, struct buffer_head *); -extern int journal_cancel_revoke(handle_t *, struct journal_head *); -extern void journal_write_revoke_records(journal_t *, - transaction_t *, int); - -/* Recovery revoke support */ -extern int journal_set_revoke(journal_t *, unsigned int, tid_t); -extern int journal_test_revoke(journal_t *, unsigned int, tid_t); -extern void journal_clear_revoke(journal_t *); -extern void journal_switch_revoke_table(journal_t *journal); -extern void journal_clear_buffer_revoked_flags(journal_t *journal); - -/* - * The log thread user interface: - * - * Request space in the current transaction, and force transaction commit - * transitions on demand. - */ - -int __log_space_left(journal_t *); /* Called with journal locked */ -int log_start_commit(journal_t *journal, tid_t tid); -int __log_start_commit(journal_t *journal, tid_t tid); -int journal_start_commit(journal_t *journal, tid_t *tid); -int journal_force_commit_nested(journal_t *journal); -int log_wait_commit(journal_t *journal, tid_t tid); -int log_do_checkpoint(journal_t *journal); -int journal_trans_will_send_data_barrier(journal_t *journal, tid_t tid); - -void __log_wait_for_space(journal_t *journal); -extern void __journal_drop_transaction(journal_t *, transaction_t *); -extern int cleanup_journal_tail(journal_t *); - -/* - * is_journal_abort - * - * Simple test wrapper function to test the JFS_ABORT state flag. This - * bit, when set, indicates that we have had a fatal error somewhere, - * either inside the journaling layer or indicated to us by the client - * (eg. ext3), and that we and should not commit any further - * transactions. - */ - -static inline int is_journal_aborted(journal_t *journal) -{ - return journal->j_flags & JFS_ABORT; -} - -static inline int is_handle_aborted(handle_t *handle) -{ - if (handle->h_aborted) - return 1; - return is_journal_aborted(handle->h_transaction->t_journal); -} - -static inline void journal_abort_handle(handle_t *handle) -{ - handle->h_aborted = 1; -} - -#endif /* __KERNEL__ */ - -/* Comparison functions for transaction IDs: perform comparisons using - * modulo arithmetic so that they work over sequence number wraps. */ - -static inline int tid_gt(tid_t x, tid_t y) -{ - int difference = (x - y); - return (difference > 0); -} - -static inline int tid_geq(tid_t x, tid_t y) -{ - int difference = (x - y); - return (difference >= 0); -} - -extern int journal_blocks_per_page(struct inode *inode); - -/* - * Return the minimum number of blocks which must be free in the journal - * before a new transaction may be started. Must be called under j_state_lock. - */ -static inline int jbd_space_needed(journal_t *journal) -{ - int nblocks = journal->j_max_transaction_buffers; - if (journal->j_committing_transaction) - nblocks += journal->j_committing_transaction-> - t_outstanding_credits; - return nblocks; -} - -/* - * Definitions which augment the buffer_head layer - */ - -/* journaling buffer types */ -#define BJ_None 0 /* Not journaled */ -#define BJ_SyncData 1 /* Normal data: flush before commit */ -#define BJ_Metadata 2 /* Normal journaled metadata */ -#define BJ_Forget 3 /* Buffer superseded by this transaction */ -#define BJ_IO 4 /* Buffer is for temporary IO use */ -#define BJ_Shadow 5 /* Buffer contents being shadowed to the log */ -#define BJ_LogCtl 6 /* Buffer contains log descriptors */ -#define BJ_Reserved 7 /* Buffer is reserved for access by journal */ -#define BJ_Locked 8 /* Locked for I/O during commit */ -#define BJ_Types 9 - -extern int jbd_blocks_per_page(struct inode *inode); - -#ifdef __KERNEL__ - -#define buffer_trace_init(bh) do {} while (0) -#define print_buffer_fields(bh) do {} while (0) -#define print_buffer_trace(bh) do {} while (0) -#define BUFFER_TRACE(bh, info) do {} while (0) -#define BUFFER_TRACE2(bh, bh2, info) do {} while (0) -#define JBUFFER_TRACE(jh, info) do {} while (0) - -#endif /* __KERNEL__ */ - -#endif /* _LINUX_JBD_H */ diff --git a/include/linux/jbd2.h b/include/linux/jbd2.h index edb640ae9a94..ad4b28647298 100644 --- a/include/linux/jbd2.h +++ b/include/linux/jbd2.h @@ -29,6 +29,7 @@ #include #include #include +#include #include #endif @@ -336,7 +337,45 @@ BUFFER_FNS(Freed, freed) BUFFER_FNS(Shadow, shadow) BUFFER_FNS(Verified, verified) -#include +static inline struct buffer_head *jh2bh(struct journal_head *jh) +{ + return jh->b_bh; +} + +static inline struct journal_head *bh2jh(struct buffer_head *bh) +{ + return bh->b_private; +} + +static inline void jbd_lock_bh_state(struct buffer_head *bh) +{ + bit_spin_lock(BH_State, &bh->b_state); +} + +static inline int jbd_trylock_bh_state(struct buffer_head *bh) +{ + return bit_spin_trylock(BH_State, &bh->b_state); +} + +static inline int jbd_is_locked_bh_state(struct buffer_head *bh) +{ + return bit_spin_is_locked(BH_State, &bh->b_state); +} + +static inline void jbd_unlock_bh_state(struct buffer_head *bh) +{ + bit_spin_unlock(BH_State, &bh->b_state); +} + +static inline void jbd_lock_bh_journal_head(struct buffer_head *bh) +{ + bit_spin_lock(BH_JournalHead, &bh->b_state); +} + +static inline void jbd_unlock_bh_journal_head(struct buffer_head *bh) +{ + bit_spin_unlock(BH_JournalHead, &bh->b_state); +} #define J_ASSERT(assert) BUG_ON(!(assert)) diff --git a/include/linux/jbd_common.h b/include/linux/jbd_common.h deleted file mode 100644 index 3dc53432355f..000000000000 --- a/include/linux/jbd_common.h +++ /dev/null @@ -1,46 +0,0 @@ -#ifndef _LINUX_JBD_STATE_H -#define _LINUX_JBD_STATE_H - -#include - -static inline struct buffer_head *jh2bh(struct journal_head *jh) -{ - return jh->b_bh; -} - -static inline struct journal_head *bh2jh(struct buffer_head *bh) -{ - return bh->b_private; -} - -static inline void jbd_lock_bh_state(struct buffer_head *bh) -{ - bit_spin_lock(BH_State, &bh->b_state); -} - -static inline int jbd_trylock_bh_state(struct buffer_head *bh) -{ - return bit_spin_trylock(BH_State, &bh->b_state); -} - -static inline int jbd_is_locked_bh_state(struct buffer_head *bh) -{ - return bit_spin_is_locked(BH_State, &bh->b_state); -} - -static inline void jbd_unlock_bh_state(struct buffer_head *bh) -{ - bit_spin_unlock(BH_State, &bh->b_state); -} - -static inline void jbd_lock_bh_journal_head(struct buffer_head *bh) -{ - bit_spin_lock(BH_JournalHead, &bh->b_state); -} - -static inline void jbd_unlock_bh_journal_head(struct buffer_head *bh) -{ - bit_spin_unlock(BH_JournalHead, &bh->b_state); -} - -#endif diff --git a/include/trace/events/ext3.h b/include/trace/events/ext3.h deleted file mode 100644 index fc733d28117a..000000000000 --- a/include/trace/events/ext3.h +++ /dev/null @@ -1,866 +0,0 @@ -#undef TRACE_SYSTEM -#define TRACE_SYSTEM ext3 - -#if !defined(_TRACE_EXT3_H) || defined(TRACE_HEADER_MULTI_READ) -#define _TRACE_EXT3_H - -#include - -TRACE_EVENT(ext3_free_inode, - TP_PROTO(struct inode *inode), - - TP_ARGS(inode), - - TP_STRUCT__entry( - __field( dev_t, dev ) - __field( ino_t, ino ) - __field( umode_t, mode ) - __field( uid_t, uid ) - __field( gid_t, gid ) - __field( blkcnt_t, blocks ) - ), - - TP_fast_assign( - __entry->dev = inode->i_sb->s_dev; - __entry->ino = inode->i_ino; - __entry->mode = inode->i_mode; - __entry->uid = i_uid_read(inode); - __entry->gid = i_gid_read(inode); - __entry->blocks = inode->i_blocks; - ), - - TP_printk("dev %d,%d ino %lu mode 0%o uid %u gid %u blocks %lu", - MAJOR(__entry->dev), MINOR(__entry->dev), - (unsigned long) __entry->ino, - __entry->mode, __entry->uid, __entry->gid, - (unsigned long) __entry->blocks) -); - -TRACE_EVENT(ext3_request_inode, - TP_PROTO(struct inode *dir, int mode), - - TP_ARGS(dir, mode), - - TP_STRUCT__entry( - __field( dev_t, dev ) - __field( ino_t, dir ) - __field( umode_t, mode ) - ), - - TP_fast_assign( - __entry->dev = dir->i_sb->s_dev; - __entry->dir = dir->i_ino; - __entry->mode = mode; - ), - - TP_printk("dev %d,%d dir %lu mode 0%o", - MAJOR(__entry->dev), MINOR(__entry->dev), - (unsigned long) __entry->dir, __entry->mode) -); - -TRACE_EVENT(ext3_allocate_inode, - TP_PROTO(struct inode *inode, struct inode *dir, int mode), - - TP_ARGS(inode, dir, mode), - - TP_STRUCT__entry( - __field( dev_t, dev ) - __field( ino_t, ino ) - __field( ino_t, dir ) - __field( umode_t, mode ) - ), - - TP_fast_assign( - __entry->dev = inode->i_sb->s_dev; - __entry->ino = inode->i_ino; - __entry->dir = dir->i_ino; - __entry->mode = mode; - ), - - TP_printk("dev %d,%d ino %lu dir %lu mode 0%o", - MAJOR(__entry->dev), MINOR(__entry->dev), - (unsigned long) __entry->ino, - (unsigned long) __entry->dir, __entry->mode) -); - -TRACE_EVENT(ext3_evict_inode, - TP_PROTO(struct inode *inode), - - TP_ARGS(inode), - - TP_STRUCT__entry( - __field( dev_t, dev ) - __field( ino_t, ino ) - __field( int, nlink ) - ), - - TP_fast_assign( - __entry->dev = inode->i_sb->s_dev; - __entry->ino = inode->i_ino; - __entry->nlink = inode->i_nlink; - ), - - TP_printk("dev %d,%d ino %lu nlink %d", - MAJOR(__entry->dev), MINOR(__entry->dev), - (unsigned long) __entry->ino, __entry->nlink) -); - -TRACE_EVENT(ext3_drop_inode, - TP_PROTO(struct inode *inode, int drop), - - TP_ARGS(inode, drop), - - TP_STRUCT__entry( - __field( dev_t, dev ) - __field( ino_t, ino ) - __field( int, drop ) - ), - - TP_fast_assign( - __entry->dev = inode->i_sb->s_dev; - __entry->ino = inode->i_ino; - __entry->drop = drop; - ), - - TP_printk("dev %d,%d ino %lu drop %d", - MAJOR(__entry->dev), MINOR(__entry->dev), - (unsigned long) __entry->ino, __entry->drop) -); - -TRACE_EVENT(ext3_mark_inode_dirty, - TP_PROTO(struct inode *inode, unsigned long IP), - - TP_ARGS(inode, IP), - - TP_STRUCT__entry( - __field( dev_t, dev ) - __field( ino_t, ino ) - __field(unsigned long, ip ) - ), - - TP_fast_assign( - __entry->dev = inode->i_sb->s_dev; - __entry->ino = inode->i_ino; - __entry->ip = IP; - ), - - TP_printk("dev %d,%d ino %lu caller %pS", - MAJOR(__entry->dev), MINOR(__entry->dev), - (unsigned long) __entry->ino, (void *)__entry->ip) -); - -TRACE_EVENT(ext3_write_begin, - TP_PROTO(struct inode *inode, loff_t pos, unsigned int len, - unsigned int flags), - - TP_ARGS(inode, pos, len, flags), - - TP_STRUCT__entry( - __field( dev_t, dev ) - __field( ino_t, ino ) - __field( loff_t, pos ) - __field( unsigned int, len ) - __field( unsigned int, flags ) - ), - - TP_fast_assign( - __entry->dev = inode->i_sb->s_dev; - __entry->ino = inode->i_ino; - __entry->pos = pos; - __entry->len = len; - __entry->flags = flags; - ), - - TP_printk("dev %d,%d ino %lu pos %llu len %u flags %u", - MAJOR(__entry->dev), MINOR(__entry->dev), - (unsigned long) __entry->ino, - (unsigned long long) __entry->pos, __entry->len, - __entry->flags) -); - -DECLARE_EVENT_CLASS(ext3__write_end, - TP_PROTO(struct inode *inode, loff_t pos, unsigned int len, - unsigned int copied), - - TP_ARGS(inode, pos, len, copied), - - TP_STRUCT__entry( - __field( dev_t, dev ) - __field( ino_t, ino ) - __field( loff_t, pos ) - __field( unsigned int, len ) - __field( unsigned int, copied ) - ), - - TP_fast_assign( - __entry->dev = inode->i_sb->s_dev; - __entry->ino = inode->i_ino; - __entry->pos = pos; - __entry->len = len; - __entry->copied = copied; - ), - - TP_printk("dev %d,%d ino %lu pos %llu len %u copied %u", - MAJOR(__entry->dev), MINOR(__entry->dev), - (unsigned long) __entry->ino, - (unsigned long long) __entry->pos, __entry->len, - __entry->copied) -); - -DEFINE_EVENT(ext3__write_end, ext3_ordered_write_end, - - TP_PROTO(struct inode *inode, loff_t pos, unsigned int len, - unsigned int copied), - - TP_ARGS(inode, pos, len, copied) -); - -DEFINE_EVENT(ext3__write_end, ext3_writeback_write_end, - - TP_PROTO(struct inode *inode, loff_t pos, unsigned int len, - unsigned int copied), - - TP_ARGS(inode, pos, len, copied) -); - -DEFINE_EVENT(ext3__write_end, ext3_journalled_write_end, - - TP_PROTO(struct inode *inode, loff_t pos, unsigned int len, - unsigned int copied), - - TP_ARGS(inode, pos, len, copied) -); - -DECLARE_EVENT_CLASS(ext3__page_op, - TP_PROTO(struct page *page), - - TP_ARGS(page), - - TP_STRUCT__entry( - __field( dev_t, dev ) - __field( ino_t, ino ) - __field( pgoff_t, index ) - - ), - - TP_fast_assign( - __entry->index = page->index; - __entry->ino = page->mapping->host->i_ino; - __entry->dev = page->mapping->host->i_sb->s_dev; - ), - - TP_printk("dev %d,%d ino %lu page_index %lu", - MAJOR(__entry->dev), MINOR(__entry->dev), - (unsigned long) __entry->ino, __entry->index) -); - -DEFINE_EVENT(ext3__page_op, ext3_ordered_writepage, - - TP_PROTO(struct page *page), - - TP_ARGS(page) -); - -DEFINE_EVENT(ext3__page_op, ext3_writeback_writepage, - - TP_PROTO(struct page *page), - - TP_ARGS(page) -); - -DEFINE_EVENT(ext3__page_op, ext3_journalled_writepage, - - TP_PROTO(struct page *page), - - TP_ARGS(page) -); - -DEFINE_EVENT(ext3__page_op, ext3_readpage, - - TP_PROTO(struct page *page), - - TP_ARGS(page) -); - -DEFINE_EVENT(ext3__page_op, ext3_releasepage, - - TP_PROTO(struct page *page), - - TP_ARGS(page) -); - -TRACE_EVENT(ext3_invalidatepage, - TP_PROTO(struct page *page, unsigned int offset, unsigned int length), - - TP_ARGS(page, offset, length), - - TP_STRUCT__entry( - __field( pgoff_t, index ) - __field( unsigned int, offset ) - __field( unsigned int, length ) - __field( ino_t, ino ) - __field( dev_t, dev ) - - ), - - TP_fast_assign( - __entry->index = page->index; - __entry->offset = offset; - __entry->length = length; - __entry->ino = page->mapping->host->i_ino; - __entry->dev = page->mapping->host->i_sb->s_dev; - ), - - TP_printk("dev %d,%d ino %lu page_index %lu offset %u length %u", - MAJOR(__entry->dev), MINOR(__entry->dev), - (unsigned long) __entry->ino, - __entry->index, __entry->offset, __entry->length) -); - -TRACE_EVENT(ext3_discard_blocks, - TP_PROTO(struct super_block *sb, unsigned long blk, - unsigned long count), - - TP_ARGS(sb, blk, count), - - TP_STRUCT__entry( - __field( dev_t, dev ) - __field( unsigned long, blk ) - __field( unsigned long, count ) - - ), - - TP_fast_assign( - __entry->dev = sb->s_dev; - __entry->blk = blk; - __entry->count = count; - ), - - TP_printk("dev %d,%d blk %lu count %lu", - MAJOR(__entry->dev), MINOR(__entry->dev), - __entry->blk, __entry->count) -); - -TRACE_EVENT(ext3_request_blocks, - TP_PROTO(struct inode *inode, unsigned long goal, - unsigned long count), - - TP_ARGS(inode, goal, count), - - TP_STRUCT__entry( - __field( dev_t, dev ) - __field( ino_t, ino ) - __field( unsigned long, count ) - __field( unsigned long, goal ) - ), - - TP_fast_assign( - __entry->dev = inode->i_sb->s_dev; - __entry->ino = inode->i_ino; - __entry->count = count; - __entry->goal = goal; - ), - - TP_printk("dev %d,%d ino %lu count %lu goal %lu ", - MAJOR(__entry->dev), MINOR(__entry->dev), - (unsigned long) __entry->ino, - __entry->count, __entry->goal) -); - -TRACE_EVENT(ext3_allocate_blocks, - TP_PROTO(struct inode *inode, unsigned long goal, - unsigned long count, unsigned long block), - - TP_ARGS(inode, goal, count, block), - - TP_STRUCT__entry( - __field( dev_t, dev ) - __field( ino_t, ino ) - __field( unsigned long, block ) - __field( unsigned long, count ) - __field( unsigned long, goal ) - ), - - TP_fast_assign( - __entry->dev = inode->i_sb->s_dev; - __entry->ino = inode->i_ino; - __entry->block = block; - __entry->count = count; - __entry->goal = goal; - ), - - TP_printk("dev %d,%d ino %lu count %lu block %lu goal %lu", - MAJOR(__entry->dev), MINOR(__entry->dev), - (unsigned long) __entry->ino, - __entry->count, __entry->block, - __entry->goal) -); - -TRACE_EVENT(ext3_free_blocks, - TP_PROTO(struct inode *inode, unsigned long block, - unsigned long count), - - TP_ARGS(inode, block, count), - - TP_STRUCT__entry( - __field( dev_t, dev ) - __field( ino_t, ino ) - __field( umode_t, mode ) - __field( unsigned long, block ) - __field( unsigned long, count ) - ), - - TP_fast_assign( - __entry->dev = inode->i_sb->s_dev; - __entry->ino = inode->i_ino; - __entry->mode = inode->i_mode; - __entry->block = block; - __entry->count = count; - ), - - TP_printk("dev %d,%d ino %lu mode 0%o block %lu count %lu", - MAJOR(__entry->dev), MINOR(__entry->dev), - (unsigned long) __entry->ino, - __entry->mode, __entry->block, __entry->count) -); - -TRACE_EVENT(ext3_sync_file_enter, - TP_PROTO(struct file *file, int datasync), - - TP_ARGS(file, datasync), - - TP_STRUCT__entry( - __field( dev_t, dev ) - __field( ino_t, ino ) - __field( ino_t, parent ) - __field( int, datasync ) - ), - - TP_fast_assign( - struct dentry *dentry = file->f_path.dentry; - - __entry->dev = d_inode(dentry)->i_sb->s_dev; - __entry->ino = d_inode(dentry)->i_ino; - __entry->datasync = datasync; - __entry->parent = d_inode(dentry->d_parent)->i_ino; - ), - - TP_printk("dev %d,%d ino %lu parent %ld datasync %d ", - MAJOR(__entry->dev), MINOR(__entry->dev), - (unsigned long) __entry->ino, - (unsigned long) __entry->parent, __entry->datasync) -); - -TRACE_EVENT(ext3_sync_file_exit, - TP_PROTO(struct inode *inode, int ret), - - TP_ARGS(inode, ret), - - TP_STRUCT__entry( - __field( int, ret ) - __field( ino_t, ino ) - __field( dev_t, dev ) - ), - - TP_fast_assign( - __entry->ret = ret; - __entry->ino = inode->i_ino; - __entry->dev = inode->i_sb->s_dev; - ), - - TP_printk("dev %d,%d ino %lu ret %d", - MAJOR(__entry->dev), MINOR(__entry->dev), - (unsigned long) __entry->ino, - __entry->ret) -); - -TRACE_EVENT(ext3_sync_fs, - TP_PROTO(struct super_block *sb, int wait), - - TP_ARGS(sb, wait), - - TP_STRUCT__entry( - __field( dev_t, dev ) - __field( int, wait ) - - ), - - TP_fast_assign( - __entry->dev = sb->s_dev; - __entry->wait = wait; - ), - - TP_printk("dev %d,%d wait %d", - MAJOR(__entry->dev), MINOR(__entry->dev), - __entry->wait) -); - -TRACE_EVENT(ext3_rsv_window_add, - TP_PROTO(struct super_block *sb, - struct ext3_reserve_window_node *rsv_node), - - TP_ARGS(sb, rsv_node), - - TP_STRUCT__entry( - __field( unsigned long, start ) - __field( unsigned long, end ) - __field( dev_t, dev ) - ), - - TP_fast_assign( - __entry->dev = sb->s_dev; - __entry->start = rsv_node->rsv_window._rsv_start; - __entry->end = rsv_node->rsv_window._rsv_end; - ), - - TP_printk("dev %d,%d start %lu end %lu", - MAJOR(__entry->dev), MINOR(__entry->dev), - __entry->start, __entry->end) -); - -TRACE_EVENT(ext3_discard_reservation, - TP_PROTO(struct inode *inode, - struct ext3_reserve_window_node *rsv_node), - - TP_ARGS(inode, rsv_node), - - TP_STRUCT__entry( - __field( unsigned long, start ) - __field( unsigned long, end ) - __field( ino_t, ino ) - __field( dev_t, dev ) - ), - - TP_fast_assign( - __entry->start = rsv_node->rsv_window._rsv_start; - __entry->end = rsv_node->rsv_window._rsv_end; - __entry->ino = inode->i_ino; - __entry->dev = inode->i_sb->s_dev; - ), - - TP_printk("dev %d,%d ino %lu start %lu end %lu", - MAJOR(__entry->dev), MINOR(__entry->dev), - (unsigned long)__entry->ino, __entry->start, - __entry->end) -); - -TRACE_EVENT(ext3_alloc_new_reservation, - TP_PROTO(struct super_block *sb, unsigned long goal), - - TP_ARGS(sb, goal), - - TP_STRUCT__entry( - __field( dev_t, dev ) - __field( unsigned long, goal ) - ), - - TP_fast_assign( - __entry->dev = sb->s_dev; - __entry->goal = goal; - ), - - TP_printk("dev %d,%d goal %lu", - MAJOR(__entry->dev), MINOR(__entry->dev), - __entry->goal) -); - -TRACE_EVENT(ext3_reserved, - TP_PROTO(struct super_block *sb, unsigned long block, - struct ext3_reserve_window_node *rsv_node), - - TP_ARGS(sb, block, rsv_node), - - TP_STRUCT__entry( - __field( unsigned long, block ) - __field( unsigned long, start ) - __field( unsigned long, end ) - __field( dev_t, dev ) - ), - - TP_fast_assign( - __entry->block = block; - __entry->start = rsv_node->rsv_window._rsv_start; - __entry->end = rsv_node->rsv_window._rsv_end; - __entry->dev = sb->s_dev; - ), - - TP_printk("dev %d,%d block %lu, start %lu end %lu", - MAJOR(__entry->dev), MINOR(__entry->dev), - __entry->block, __entry->start, __entry->end) -); - -TRACE_EVENT(ext3_forget, - TP_PROTO(struct inode *inode, int is_metadata, unsigned long block), - - TP_ARGS(inode, is_metadata, block), - - TP_STRUCT__entry( - __field( dev_t, dev ) - __field( ino_t, ino ) - __field( umode_t, mode ) - __field( int, is_metadata ) - __field( unsigned long, block ) - ), - - TP_fast_assign( - __entry->dev = inode->i_sb->s_dev; - __entry->ino = inode->i_ino; - __entry->mode = inode->i_mode; - __entry->is_metadata = is_metadata; - __entry->block = block; - ), - - TP_printk("dev %d,%d ino %lu mode 0%o is_metadata %d block %lu", - MAJOR(__entry->dev), MINOR(__entry->dev), - (unsigned long) __entry->ino, - __entry->mode, __entry->is_metadata, __entry->block) -); - -TRACE_EVENT(ext3_read_block_bitmap, - TP_PROTO(struct super_block *sb, unsigned int group), - - TP_ARGS(sb, group), - - TP_STRUCT__entry( - __field( dev_t, dev ) - __field( __u32, group ) - - ), - - TP_fast_assign( - __entry->dev = sb->s_dev; - __entry->group = group; - ), - - TP_printk("dev %d,%d group %u", - MAJOR(__entry->dev), MINOR(__entry->dev), - __entry->group) -); - -TRACE_EVENT(ext3_direct_IO_enter, - TP_PROTO(struct inode *inode, loff_t offset, unsigned long len, int rw), - - TP_ARGS(inode, offset, len, rw), - - TP_STRUCT__entry( - __field( ino_t, ino ) - __field( dev_t, dev ) - __field( loff_t, pos ) - __field( unsigned long, len ) - __field( int, rw ) - ), - - TP_fast_assign( - __entry->ino = inode->i_ino; - __entry->dev = inode->i_sb->s_dev; - __entry->pos = offset; - __entry->len = len; - __entry->rw = rw; - ), - - TP_printk("dev %d,%d ino %lu pos %llu len %lu rw %d", - MAJOR(__entry->dev), MINOR(__entry->dev), - (unsigned long) __entry->ino, - (unsigned long long) __entry->pos, __entry->len, - __entry->rw) -); - -TRACE_EVENT(ext3_direct_IO_exit, - TP_PROTO(struct inode *inode, loff_t offset, unsigned long len, - int rw, int ret), - - TP_ARGS(inode, offset, len, rw, ret), - - TP_STRUCT__entry( - __field( ino_t, ino ) - __field( dev_t, dev ) - __field( loff_t, pos ) - __field( unsigned long, len ) - __field( int, rw ) - __field( int, ret ) - ), - - TP_fast_assign( - __entry->ino = inode->i_ino; - __entry->dev = inode->i_sb->s_dev; - __entry->pos = offset; - __entry->len = len; - __entry->rw = rw; - __entry->ret = ret; - ), - - TP_printk("dev %d,%d ino %lu pos %llu len %lu rw %d ret %d", - MAJOR(__entry->dev), MINOR(__entry->dev), - (unsigned long) __entry->ino, - (unsigned long long) __entry->pos, __entry->len, - __entry->rw, __entry->ret) -); - -TRACE_EVENT(ext3_unlink_enter, - TP_PROTO(struct inode *parent, struct dentry *dentry), - - TP_ARGS(parent, dentry), - - TP_STRUCT__entry( - __field( ino_t, parent ) - __field( ino_t, ino ) - __field( loff_t, size ) - __field( dev_t, dev ) - ), - - TP_fast_assign( - __entry->parent = parent->i_ino; - __entry->ino = d_inode(dentry)->i_ino; - __entry->size = d_inode(dentry)->i_size; - __entry->dev = d_inode(dentry)->i_sb->s_dev; - ), - - TP_printk("dev %d,%d ino %lu size %lld parent %ld", - MAJOR(__entry->dev), MINOR(__entry->dev), - (unsigned long) __entry->ino, - (unsigned long long)__entry->size, - (unsigned long) __entry->parent) -); - -TRACE_EVENT(ext3_unlink_exit, - TP_PROTO(struct dentry *dentry, int ret), - - TP_ARGS(dentry, ret), - - TP_STRUCT__entry( - __field( ino_t, ino ) - __field( dev_t, dev ) - __field( int, ret ) - ), - - TP_fast_assign( - __entry->ino = d_inode(dentry)->i_ino; - __entry->dev = d_inode(dentry)->i_sb->s_dev; - __entry->ret = ret; - ), - - TP_printk("dev %d,%d ino %lu ret %d", - MAJOR(__entry->dev), MINOR(__entry->dev), - (unsigned long) __entry->ino, - __entry->ret) -); - -DECLARE_EVENT_CLASS(ext3__truncate, - TP_PROTO(struct inode *inode), - - TP_ARGS(inode), - - TP_STRUCT__entry( - __field( ino_t, ino ) - __field( dev_t, dev ) - __field( blkcnt_t, blocks ) - ), - - TP_fast_assign( - __entry->ino = inode->i_ino; - __entry->dev = inode->i_sb->s_dev; - __entry->blocks = inode->i_blocks; - ), - - TP_printk("dev %d,%d ino %lu blocks %lu", - MAJOR(__entry->dev), MINOR(__entry->dev), - (unsigned long) __entry->ino, (unsigned long) __entry->blocks) -); - -DEFINE_EVENT(ext3__truncate, ext3_truncate_enter, - - TP_PROTO(struct inode *inode), - - TP_ARGS(inode) -); - -DEFINE_EVENT(ext3__truncate, ext3_truncate_exit, - - TP_PROTO(struct inode *inode), - - TP_ARGS(inode) -); - -TRACE_EVENT(ext3_get_blocks_enter, - TP_PROTO(struct inode *inode, unsigned long lblk, - unsigned long len, int create), - - TP_ARGS(inode, lblk, len, create), - - TP_STRUCT__entry( - __field( ino_t, ino ) - __field( dev_t, dev ) - __field( unsigned long, lblk ) - __field( unsigned long, len ) - __field( int, create ) - ), - - TP_fast_assign( - __entry->ino = inode->i_ino; - __entry->dev = inode->i_sb->s_dev; - __entry->lblk = lblk; - __entry->len = len; - __entry->create = create; - ), - - TP_printk("dev %d,%d ino %lu lblk %lu len %lu create %u", - MAJOR(__entry->dev), MINOR(__entry->dev), - (unsigned long) __entry->ino, - __entry->lblk, __entry->len, __entry->create) -); - -TRACE_EVENT(ext3_get_blocks_exit, - TP_PROTO(struct inode *inode, unsigned long lblk, - unsigned long pblk, unsigned long len, int ret), - - TP_ARGS(inode, lblk, pblk, len, ret), - - TP_STRUCT__entry( - __field( ino_t, ino ) - __field( dev_t, dev ) - __field( unsigned long, lblk ) - __field( unsigned long, pblk ) - __field( unsigned long, len ) - __field( int, ret ) - ), - - TP_fast_assign( - __entry->ino = inode->i_ino; - __entry->dev = inode->i_sb->s_dev; - __entry->lblk = lblk; - __entry->pblk = pblk; - __entry->len = len; - __entry->ret = ret; - ), - - TP_printk("dev %d,%d ino %lu lblk %lu pblk %lu len %lu ret %d", - MAJOR(__entry->dev), MINOR(__entry->dev), - (unsigned long) __entry->ino, - __entry->lblk, __entry->pblk, - __entry->len, __entry->ret) -); - -TRACE_EVENT(ext3_load_inode, - TP_PROTO(struct inode *inode), - - TP_ARGS(inode), - - TP_STRUCT__entry( - __field( ino_t, ino ) - __field( dev_t, dev ) - ), - - TP_fast_assign( - __entry->ino = inode->i_ino; - __entry->dev = inode->i_sb->s_dev; - ), - - TP_printk("dev %d,%d ino %lu", - MAJOR(__entry->dev), MINOR(__entry->dev), - (unsigned long) __entry->ino) -); - -#endif /* _TRACE_EXT3_H */ - -/* This part must be outside protection */ -#include diff --git a/include/trace/events/jbd.h b/include/trace/events/jbd.h deleted file mode 100644 index da6f2591c25e..000000000000 --- a/include/trace/events/jbd.h +++ /dev/null @@ -1,194 +0,0 @@ -#undef TRACE_SYSTEM -#define TRACE_SYSTEM jbd - -#if !defined(_TRACE_JBD_H) || defined(TRACE_HEADER_MULTI_READ) -#define _TRACE_JBD_H - -#include -#include - -TRACE_EVENT(jbd_checkpoint, - - TP_PROTO(journal_t *journal, int result), - - TP_ARGS(journal, result), - - TP_STRUCT__entry( - __field( dev_t, dev ) - __field( int, result ) - ), - - TP_fast_assign( - __entry->dev = journal->j_fs_dev->bd_dev; - __entry->result = result; - ), - - TP_printk("dev %d,%d result %d", - MAJOR(__entry->dev), MINOR(__entry->dev), - __entry->result) -); - -DECLARE_EVENT_CLASS(jbd_commit, - - TP_PROTO(journal_t *journal, transaction_t *commit_transaction), - - TP_ARGS(journal, commit_transaction), - - TP_STRUCT__entry( - __field( dev_t, dev ) - __field( int, transaction ) - ), - - TP_fast_assign( - __entry->dev = journal->j_fs_dev->bd_dev; - __entry->transaction = commit_transaction->t_tid; - ), - - TP_printk("dev %d,%d transaction %d", - MAJOR(__entry->dev), MINOR(__entry->dev), - __entry->transaction) -); - -DEFINE_EVENT(jbd_commit, jbd_start_commit, - - TP_PROTO(journal_t *journal, transaction_t *commit_transaction), - - TP_ARGS(journal, commit_transaction) -); - -DEFINE_EVENT(jbd_commit, jbd_commit_locking, - - TP_PROTO(journal_t *journal, transaction_t *commit_transaction), - - TP_ARGS(journal, commit_transaction) -); - -DEFINE_EVENT(jbd_commit, jbd_commit_flushing, - - TP_PROTO(journal_t *journal, transaction_t *commit_transaction), - - TP_ARGS(journal, commit_transaction) -); - -DEFINE_EVENT(jbd_commit, jbd_commit_logging, - - TP_PROTO(journal_t *journal, transaction_t *commit_transaction), - - TP_ARGS(journal, commit_transaction) -); - -TRACE_EVENT(jbd_drop_transaction, - - TP_PROTO(journal_t *journal, transaction_t *commit_transaction), - - TP_ARGS(journal, commit_transaction), - - TP_STRUCT__entry( - __field( dev_t, dev ) - __field( int, transaction ) - ), - - TP_fast_assign( - __entry->dev = journal->j_fs_dev->bd_dev; - __entry->transaction = commit_transaction->t_tid; - ), - - TP_printk("dev %d,%d transaction %d", - MAJOR(__entry->dev), MINOR(__entry->dev), - __entry->transaction) -); - -TRACE_EVENT(jbd_end_commit, - TP_PROTO(journal_t *journal, transaction_t *commit_transaction), - - TP_ARGS(journal, commit_transaction), - - TP_STRUCT__entry( - __field( dev_t, dev ) - __field( int, transaction ) - __field( int, head ) - ), - - TP_fast_assign( - __entry->dev = journal->j_fs_dev->bd_dev; - __entry->transaction = commit_transaction->t_tid; - __entry->head = journal->j_tail_sequence; - ), - - TP_printk("dev %d,%d transaction %d head %d", - MAJOR(__entry->dev), MINOR(__entry->dev), - __entry->transaction, __entry->head) -); - -TRACE_EVENT(jbd_do_submit_data, - TP_PROTO(journal_t *journal, transaction_t *commit_transaction), - - TP_ARGS(journal, commit_transaction), - - TP_STRUCT__entry( - __field( dev_t, dev ) - __field( int, transaction ) - ), - - TP_fast_assign( - __entry->dev = journal->j_fs_dev->bd_dev; - __entry->transaction = commit_transaction->t_tid; - ), - - TP_printk("dev %d,%d transaction %d", - MAJOR(__entry->dev), MINOR(__entry->dev), - __entry->transaction) -); - -TRACE_EVENT(jbd_cleanup_journal_tail, - - TP_PROTO(journal_t *journal, tid_t first_tid, - unsigned long block_nr, unsigned long freed), - - TP_ARGS(journal, first_tid, block_nr, freed), - - TP_STRUCT__entry( - __field( dev_t, dev ) - __field( tid_t, tail_sequence ) - __field( tid_t, first_tid ) - __field(unsigned long, block_nr ) - __field(unsigned long, freed ) - ), - - TP_fast_assign( - __entry->dev = journal->j_fs_dev->bd_dev; - __entry->tail_sequence = journal->j_tail_sequence; - __entry->first_tid = first_tid; - __entry->block_nr = block_nr; - __entry->freed = freed; - ), - - TP_printk("dev %d,%d from %u to %u offset %lu freed %lu", - MAJOR(__entry->dev), MINOR(__entry->dev), - __entry->tail_sequence, __entry->first_tid, - __entry->block_nr, __entry->freed) -); - -TRACE_EVENT(journal_write_superblock, - TP_PROTO(journal_t *journal, int write_op), - - TP_ARGS(journal, write_op), - - TP_STRUCT__entry( - __field( dev_t, dev ) - __field( int, write_op ) - ), - - TP_fast_assign( - __entry->dev = journal->j_fs_dev->bd_dev; - __entry->write_op = write_op; - ), - - TP_printk("dev %d,%d write_op %x", MAJOR(__entry->dev), - MINOR(__entry->dev), __entry->write_op) -); - -#endif /* _TRACE_JBD_H */ - -/* This part must be outside protection */ -#include -- cgit v1.2.3-70-g09d2 From a3ad0a9da863fa554fc17fa8345a07adcdd27d3c Mon Sep 17 00:00:00 2001 From: Jan Kara Date: Thu, 18 Jun 2015 17:19:14 +0200 Subject: block: Remove forced page bouncing under IO JBD layer wrote back data buffers without setting PageWriteback bit. Thus standard mechanism for guaranteeing stable pages under IO did not work. Since JBD is gone now and there is no other user of the functionality, just remove it. Acked-by: Jens Axboe Signed-off-by: Jan Kara --- block/bounce.c | 31 ++++--------------------------- include/linux/blk_types.h | 5 ++--- 2 files changed, 6 insertions(+), 30 deletions(-) (limited to 'include/linux') diff --git a/block/bounce.c b/block/bounce.c index b17311227c12..31cad13a0c9d 100644 --- a/block/bounce.c +++ b/block/bounce.c @@ -176,26 +176,8 @@ static void bounce_end_io_read_isa(struct bio *bio, int err) __bounce_end_io_read(bio, isa_page_pool, err); } -#ifdef CONFIG_NEED_BOUNCE_POOL -static int must_snapshot_stable_pages(struct request_queue *q, struct bio *bio) -{ - if (bio_data_dir(bio) != WRITE) - return 0; - - if (!bdi_cap_stable_pages_required(&q->backing_dev_info)) - return 0; - - return test_bit(BIO_SNAP_STABLE, &bio->bi_flags); -} -#else -static int must_snapshot_stable_pages(struct request_queue *q, struct bio *bio) -{ - return 0; -} -#endif /* CONFIG_NEED_BOUNCE_POOL */ - static void __blk_queue_bounce(struct request_queue *q, struct bio **bio_orig, - mempool_t *pool, int force) + mempool_t *pool) { struct bio *bio; int rw = bio_data_dir(*bio_orig); @@ -203,8 +185,6 @@ static void __blk_queue_bounce(struct request_queue *q, struct bio **bio_orig, struct bvec_iter iter; unsigned i; - if (force) - goto bounce; bio_for_each_segment(from, *bio_orig, iter) if (page_to_pfn(from.bv_page) > queue_bounce_pfn(q)) goto bounce; @@ -216,7 +196,7 @@ bounce: bio_for_each_segment_all(to, bio, i) { struct page *page = to->bv_page; - if (page_to_pfn(page) <= queue_bounce_pfn(q) && !force) + if (page_to_pfn(page) <= queue_bounce_pfn(q)) continue; to->bv_page = mempool_alloc(pool, q->bounce_gfp); @@ -254,7 +234,6 @@ bounce: void blk_queue_bounce(struct request_queue *q, struct bio **bio_orig) { - int must_bounce; mempool_t *pool; /* @@ -263,15 +242,13 @@ void blk_queue_bounce(struct request_queue *q, struct bio **bio_orig) if (!bio_has_data(*bio_orig)) return; - must_bounce = must_snapshot_stable_pages(q, *bio_orig); - /* * for non-isa bounce case, just check if the bounce pfn is equal * to or bigger than the highest pfn in the system -- in that case, * don't waste time iterating over bio segments */ if (!(q->bounce_gfp & GFP_DMA)) { - if (queue_bounce_pfn(q) >= blk_max_pfn && !must_bounce) + if (queue_bounce_pfn(q) >= blk_max_pfn) return; pool = page_pool; } else { @@ -282,7 +259,7 @@ void blk_queue_bounce(struct request_queue *q, struct bio **bio_orig) /* * slow path */ - __blk_queue_bounce(q, bio_orig, pool, must_bounce); + __blk_queue_bounce(q, bio_orig, pool); } EXPORT_SYMBOL(blk_queue_bounce); diff --git a/include/linux/blk_types.h b/include/linux/blk_types.h index 7303b3405520..89fd49184b48 100644 --- a/include/linux/blk_types.h +++ b/include/linux/blk_types.h @@ -118,9 +118,8 @@ struct bio { #define BIO_USER_MAPPED 4 /* contains user pages */ #define BIO_NULL_MAPPED 5 /* contains invalid user pages */ #define BIO_QUIET 6 /* Make BIO Quiet */ -#define BIO_SNAP_STABLE 7 /* bio data must be snapshotted during write */ -#define BIO_CHAIN 8 /* chained bio, ->bi_remaining in effect */ -#define BIO_REFFED 9 /* bio has elevated ->bi_cnt */ +#define BIO_CHAIN 7 /* chained bio, ->bi_remaining in effect */ +#define BIO_REFFED 8 /* bio has elevated ->bi_cnt */ /* * Flags starting here get preserved by bio_reset() - this includes -- cgit v1.2.3-70-g09d2 From bc27381edbeb654d819b7e1464091c456a0d3e64 Mon Sep 17 00:00:00 2001 From: Giuseppe Barba Date: Tue, 21 Jul 2015 10:35:41 +0200 Subject: iio: st-sensors: add configuration for WhoAmI address This patch permits to configure the WhoAmI register address because some device could have not a standard address for this register. Signed-off-by: Giuseppe Barba Reviewed-by: Denis Ciocca Acked-by: Denis Ciocca Signed-off-by: Jonathan Cameron --- drivers/iio/accel/st_accel_core.c | 5 +++ drivers/iio/common/st_sensors/st_sensors_core.c | 49 ++++++++++++------------- drivers/iio/gyro/st_gyro_core.c | 3 ++ drivers/iio/magnetometer/st_magn_core.c | 3 ++ drivers/iio/pressure/st_pressure_core.c | 3 ++ include/linux/iio/common/st_sensors.h | 2 + 6 files changed, 39 insertions(+), 26 deletions(-) (limited to 'include/linux') diff --git a/drivers/iio/accel/st_accel_core.c b/drivers/iio/accel/st_accel_core.c index 4002e6410444..12b42f6e70ab 100644 --- a/drivers/iio/accel/st_accel_core.c +++ b/drivers/iio/accel/st_accel_core.c @@ -226,6 +226,7 @@ static const struct iio_chan_spec st_accel_16bit_channels[] = { static const struct st_sensor_settings st_accel_sensors_settings[] = { { .wai = ST_ACCEL_1_WAI_EXP, + .wai_addr = ST_SENSORS_DEFAULT_WAI_ADDRESS, .sensors_supported = { [0] = LIS3DH_ACCEL_DEV_NAME, [1] = LSM303DLHC_ACCEL_DEV_NAME, @@ -297,6 +298,7 @@ static const struct st_sensor_settings st_accel_sensors_settings[] = { }, { .wai = ST_ACCEL_2_WAI_EXP, + .wai_addr = ST_SENSORS_DEFAULT_WAI_ADDRESS, .sensors_supported = { [0] = LIS331DLH_ACCEL_DEV_NAME, [1] = LSM303DL_ACCEL_DEV_NAME, @@ -359,6 +361,7 @@ static const struct st_sensor_settings st_accel_sensors_settings[] = { }, { .wai = ST_ACCEL_3_WAI_EXP, + .wai_addr = ST_SENSORS_DEFAULT_WAI_ADDRESS, .sensors_supported = { [0] = LSM330_ACCEL_DEV_NAME, }, @@ -437,6 +440,7 @@ static const struct st_sensor_settings st_accel_sensors_settings[] = { }, { .wai = ST_ACCEL_4_WAI_EXP, + .wai_addr = ST_SENSORS_DEFAULT_WAI_ADDRESS, .sensors_supported = { [0] = LIS3LV02DL_ACCEL_DEV_NAME, }, @@ -494,6 +498,7 @@ static const struct st_sensor_settings st_accel_sensors_settings[] = { }, { .wai = ST_ACCEL_5_WAI_EXP, + .wai_addr = ST_SENSORS_DEFAULT_WAI_ADDRESS, .sensors_supported = { [0] = LIS331DL_ACCEL_DEV_NAME, }, diff --git a/drivers/iio/common/st_sensors/st_sensors_core.c b/drivers/iio/common/st_sensors/st_sensors_core.c index 8086cbcff87d..d44bf1680859 100644 --- a/drivers/iio/common/st_sensors/st_sensors_core.c +++ b/drivers/iio/common/st_sensors/st_sensors_core.c @@ -479,46 +479,43 @@ int st_sensors_check_device_support(struct iio_dev *indio_dev, int num_sensors_list, const struct st_sensor_settings *sensor_settings) { - u8 wai; int i, n, err; + u8 wai; struct st_sensor_data *sdata = iio_priv(indio_dev); - err = sdata->tf->read_byte(&sdata->tb, sdata->dev, - ST_SENSORS_DEFAULT_WAI_ADDRESS, &wai); - if (err < 0) { - dev_err(&indio_dev->dev, "failed to read Who-Am-I register.\n"); - goto read_wai_error; - } - for (i = 0; i < num_sensors_list; i++) { - if (sensor_settings[i].wai == wai) + for (n = 0; n < ST_SENSORS_MAX_4WAI; n++) { + if (strcmp(indio_dev->name, + sensor_settings[i].sensors_supported[n]) == 0) { + break; + } + } + if (n < ST_SENSORS_MAX_4WAI) break; } - if (i == num_sensors_list) - goto device_not_supported; + if (i == num_sensors_list) { + dev_err(&indio_dev->dev, "device name %s not recognized.\n", + indio_dev->name); + return -ENODEV; + } - for (n = 0; n < ARRAY_SIZE(sensor_settings[i].sensors_supported); n++) { - if (strcmp(indio_dev->name, - &sensor_settings[i].sensors_supported[n][0]) == 0) - break; + err = sdata->tf->read_byte(&sdata->tb, sdata->dev, + sensor_settings[i].wai_addr, &wai); + if (err < 0) { + dev_err(&indio_dev->dev, "failed to read Who-Am-I register.\n"); + return err; } - if (n == ARRAY_SIZE(sensor_settings[i].sensors_supported)) { - dev_err(&indio_dev->dev, "device name \"%s\" and WhoAmI (0x%02x) mismatch", - indio_dev->name, wai); - goto sensor_name_mismatch; + + if (sensor_settings[i].wai != wai) { + dev_err(&indio_dev->dev, "%s: WhoAmI mismatch (0x%x).\n", + indio_dev->name, wai); + return -EINVAL; } sdata->sensor_settings = (struct st_sensor_settings *)&sensor_settings[i]; return i; - -device_not_supported: - dev_err(&indio_dev->dev, "device not supported: WhoAmI (0x%x).\n", wai); -sensor_name_mismatch: - err = -ENODEV; -read_wai_error: - return err; } EXPORT_SYMBOL(st_sensors_check_device_support); diff --git a/drivers/iio/gyro/st_gyro_core.c b/drivers/iio/gyro/st_gyro_core.c index ffe96642b6d0..4b993a5bc9a1 100644 --- a/drivers/iio/gyro/st_gyro_core.c +++ b/drivers/iio/gyro/st_gyro_core.c @@ -131,6 +131,7 @@ static const struct iio_chan_spec st_gyro_16bit_channels[] = { static const struct st_sensor_settings st_gyro_sensors_settings[] = { { .wai = ST_GYRO_1_WAI_EXP, + .wai_addr = ST_SENSORS_DEFAULT_WAI_ADDRESS, .sensors_supported = { [0] = L3G4200D_GYRO_DEV_NAME, [1] = LSM330DL_GYRO_DEV_NAME, @@ -190,6 +191,7 @@ static const struct st_sensor_settings st_gyro_sensors_settings[] = { }, { .wai = ST_GYRO_2_WAI_EXP, + .wai_addr = ST_SENSORS_DEFAULT_WAI_ADDRESS, .sensors_supported = { [0] = L3GD20_GYRO_DEV_NAME, [1] = LSM330D_GYRO_DEV_NAME, @@ -252,6 +254,7 @@ static const struct st_sensor_settings st_gyro_sensors_settings[] = { }, { .wai = ST_GYRO_3_WAI_EXP, + .wai_addr = ST_SENSORS_DEFAULT_WAI_ADDRESS, .sensors_supported = { [0] = L3GD20_GYRO_DEV_NAME, }, diff --git a/drivers/iio/magnetometer/st_magn_core.c b/drivers/iio/magnetometer/st_magn_core.c index b4bcfb790f49..8d7d3a172874 100644 --- a/drivers/iio/magnetometer/st_magn_core.c +++ b/drivers/iio/magnetometer/st_magn_core.c @@ -192,6 +192,7 @@ static const struct iio_chan_spec st_magn_2_16bit_channels[] = { static const struct st_sensor_settings st_magn_sensors_settings[] = { { .wai = 0, /* This sensor has no valid WhoAmI report 0 */ + .wai_addr = ST_SENSORS_DEFAULT_WAI_ADDRESS, .sensors_supported = { [0] = LSM303DLH_MAGN_DEV_NAME, }, @@ -268,6 +269,7 @@ static const struct st_sensor_settings st_magn_sensors_settings[] = { }, { .wai = ST_MAGN_1_WAI_EXP, + .wai_addr = ST_SENSORS_DEFAULT_WAI_ADDRESS, .sensors_supported = { [0] = LSM303DLHC_MAGN_DEV_NAME, [1] = LSM303DLM_MAGN_DEV_NAME, @@ -346,6 +348,7 @@ static const struct st_sensor_settings st_magn_sensors_settings[] = { }, { .wai = ST_MAGN_2_WAI_EXP, + .wai_addr = ST_SENSORS_DEFAULT_WAI_ADDRESS, .sensors_supported = { [0] = LIS3MDL_MAGN_DEV_NAME, }, diff --git a/drivers/iio/pressure/st_pressure_core.c b/drivers/iio/pressure/st_pressure_core.c index e881fa6291e9..eb41d2b92c24 100644 --- a/drivers/iio/pressure/st_pressure_core.c +++ b/drivers/iio/pressure/st_pressure_core.c @@ -178,6 +178,7 @@ static const struct iio_chan_spec st_press_lps001wp_channels[] = { static const struct st_sensor_settings st_press_sensors_settings[] = { { .wai = ST_PRESS_LPS331AP_WAI_EXP, + .wai_addr = ST_SENSORS_DEFAULT_WAI_ADDRESS, .sensors_supported = { [0] = LPS331AP_PRESS_DEV_NAME, }, @@ -225,6 +226,7 @@ static const struct st_sensor_settings st_press_sensors_settings[] = { }, { .wai = ST_PRESS_LPS001WP_WAI_EXP, + .wai_addr = ST_SENSORS_DEFAULT_WAI_ADDRESS, .sensors_supported = { [0] = LPS001WP_PRESS_DEV_NAME, }, @@ -260,6 +262,7 @@ static const struct st_sensor_settings st_press_sensors_settings[] = { }, { .wai = ST_PRESS_LPS25H_WAI_EXP, + .wai_addr = ST_SENSORS_DEFAULT_WAI_ADDRESS, .sensors_supported = { [0] = LPS25H_PRESS_DEV_NAME, }, diff --git a/include/linux/iio/common/st_sensors.h b/include/linux/iio/common/st_sensors.h index 2c476acb87d9..3c17cd7fdf06 100644 --- a/include/linux/iio/common/st_sensors.h +++ b/include/linux/iio/common/st_sensors.h @@ -166,6 +166,7 @@ struct st_sensor_transfer_function { /** * struct st_sensor_settings - ST specific sensor settings * @wai: Contents of WhoAmI register. + * @wai_addr: The address of WhoAmI register. * @sensors_supported: List of supported sensors by struct itself. * @ch: IIO channels for the sensor. * @odr: Output data rate register and ODR list available. @@ -179,6 +180,7 @@ struct st_sensor_transfer_function { */ struct st_sensor_settings { u8 wai; + u8 wai_addr; char sensors_supported[ST_SENSORS_MAX_4WAI][ST_SENSORS_MAX_NAME]; struct iio_chan_spec *ch; int num_ch; -- cgit v1.2.3-70-g09d2 From b6830f6df8914faae9561bb245860c21af9b9e9b Mon Sep 17 00:00:00 2001 From: Peter Hurley Date: Sat, 27 Jun 2015 09:19:00 -0400 Subject: serial: 8250: Split base port operations from universal driver Refactor base port operations into new file; 8250_port.c. Legacy irq handling, RSA port support, port storage for universal driver, driver definition, module parameters and linkage remain in 8250_core.c The source file split and resulting modules is diagrammed below: 8250_core.c ====> 8250_core.c __ \ \ \ +-- 8250.ko (alias 8250_core) \ 8250_pnp.c __/ (universal driver) \ => 8250_port.c __ \ +-- 8250_base.ko 8250_dma.c __/ (port operations) Signed-off-by: Peter Hurley Signed-off-by: Greg Kroah-Hartman --- drivers/tty/serial/8250/8250.h | 11 + drivers/tty/serial/8250/8250_core.c | 3260 +++-------------------------------- drivers/tty/serial/8250/8250_port.c | 2898 +++++++++++++++++++++++++++++++ drivers/tty/serial/8250/Makefile | 5 +- include/linux/serial_8250.h | 5 + 5 files changed, 3116 insertions(+), 3063 deletions(-) create mode 100644 drivers/tty/serial/8250/8250_port.c (limited to 'include/linux') diff --git a/drivers/tty/serial/8250/8250.h b/drivers/tty/serial/8250/8250.h index c43f74c53cd9..dd233108ec07 100644 --- a/drivers/tty/serial/8250/8250.h +++ b/drivers/tty/serial/8250/8250.h @@ -211,3 +211,14 @@ static inline int ns16550a_goto_highspeed(struct uart_8250_port *up) } return 1; } + +static inline int serial_index(struct uart_port *port) +{ + return port->minor - 64; +} + +#if 0 +#define DEBUG_INTR(fmt...) printk(fmt) +#else +#define DEBUG_INTR(fmt...) do { } while (0) +#endif diff --git a/drivers/tty/serial/8250/8250_core.c b/drivers/tty/serial/8250/8250_core.c index 37fff12dd4d0..cfbb9d728e31 100644 --- a/drivers/tty/serial/8250/8250_core.c +++ b/drivers/tty/serial/8250/8250_core.c @@ -1,25 +1,23 @@ /* - * Driver for 8250/16550-type serial ports + * Universal/legacy driver for 8250/16550-type serial ports * * Based on drivers/char/serial.c, by Linus Torvalds, Theodore Ts'o. * * Copyright (C) 2001 Russell King. * + * Supports: ISA-compatible 8250/16550 ports + * PNP 8250/16550 ports + * early_serial_setup() ports + * userspace-configurable "phantom" ports + * "serial8250" platform devices + * serial8250_register_8250_port() ports + * * This program is free software; you can redistribute it and/or modify * it under the terms of the GNU General Public License as published by * the Free Software Foundation; either version 2 of the License, or * (at your option) any later version. - * - * A note about mapbase / membase - * - * mapbase is the physical address of the IO port. - * membase is an 'ioremapped' cookie. */ -#if defined(CONFIG_SERIAL_8250_CONSOLE) && defined(CONFIG_MAGIC_SYSRQ) -#define SUPPORT_SYSRQ -#endif - #include #include #include @@ -58,33 +56,10 @@ static unsigned int nr_uarts = CONFIG_SERIAL_8250_RUNTIME_UARTS; static struct uart_driver serial8250_reg; -static int serial_index(struct uart_port *port) -{ - return port->minor - 64; -} - static unsigned int skip_txen_test; /* force skip of txen test at init time */ -/* - * Debugging. - */ -#if 0 -#define DEBUG_AUTOCONF(fmt...) printk(fmt) -#else -#define DEBUG_AUTOCONF(fmt...) do { } while (0) -#endif - -#if 0 -#define DEBUG_INTR(fmt...) printk(fmt) -#else -#define DEBUG_INTR(fmt...) do { } while (0) -#endif - #define PASS_LIMIT 512 -#define BOTH_EMPTY (UART_LSR_TEMT | UART_LSR_THRE) - - #include /* * SERIAL_PORT_DFNS tells us about built-in ports that have no @@ -120,2695 +95,268 @@ static struct hlist_head irq_lists[NR_IRQ_HASH]; static DEFINE_MUTEX(hash_mutex); /* Used to walk the hash */ /* - * Here we define the default xmit fifo size used for each type of UART. + * This is the serial driver's interrupt routine. + * + * Arjan thinks the old way was overly complex, so it got simplified. + * Alan disagrees, saying that need the complexity to handle the weird + * nature of ISA shared interrupts. (This is a special exception.) + * + * In order to handle ISA shared interrupts properly, we need to check + * that all ports have been serviced, and therefore the ISA interrupt + * line has been de-asserted. + * + * This means we need to loop through all ports. checking that they + * don't have an interrupt pending. */ -static const struct serial8250_config uart_config[] = { - [PORT_UNKNOWN] = { - .name = "unknown", - .fifo_size = 1, - .tx_loadsz = 1, - }, - [PORT_8250] = { - .name = "8250", - .fifo_size = 1, - .tx_loadsz = 1, - }, - [PORT_16450] = { - .name = "16450", - .fifo_size = 1, - .tx_loadsz = 1, - }, - [PORT_16550] = { - .name = "16550", - .fifo_size = 1, - .tx_loadsz = 1, - }, - [PORT_16550A] = { - .name = "16550A", - .fifo_size = 16, - .tx_loadsz = 16, - .fcr = UART_FCR_ENABLE_FIFO | UART_FCR_R_TRIG_10, - .rxtrig_bytes = {1, 4, 8, 14}, - .flags = UART_CAP_FIFO, - }, - [PORT_CIRRUS] = { - .name = "Cirrus", - .fifo_size = 1, - .tx_loadsz = 1, - }, - [PORT_16650] = { - .name = "ST16650", - .fifo_size = 1, - .tx_loadsz = 1, - .flags = UART_CAP_FIFO | UART_CAP_EFR | UART_CAP_SLEEP, - }, - [PORT_16650V2] = { - .name = "ST16650V2", - .fifo_size = 32, - .tx_loadsz = 16, - .fcr = UART_FCR_ENABLE_FIFO | UART_FCR_R_TRIG_01 | - UART_FCR_T_TRIG_00, - .rxtrig_bytes = {8, 16, 24, 28}, - .flags = UART_CAP_FIFO | UART_CAP_EFR | UART_CAP_SLEEP, - }, - [PORT_16750] = { - .name = "TI16750", - .fifo_size = 64, - .tx_loadsz = 64, - .fcr = UART_FCR_ENABLE_FIFO | UART_FCR_R_TRIG_10 | - UART_FCR7_64BYTE, - .rxtrig_bytes = {1, 16, 32, 56}, - .flags = UART_CAP_FIFO | UART_CAP_SLEEP | UART_CAP_AFE, - }, - [PORT_STARTECH] = { - .name = "Startech", - .fifo_size = 1, - .tx_loadsz = 1, - }, - [PORT_16C950] = { - .name = "16C950/954", - .fifo_size = 128, - .tx_loadsz = 128, - .fcr = UART_FCR_ENABLE_FIFO | UART_FCR_R_TRIG_10, - /* UART_CAP_EFR breaks billionon CF bluetooth card. */ - .flags = UART_CAP_FIFO | UART_CAP_SLEEP, - }, - [PORT_16654] = { - .name = "ST16654", - .fifo_size = 64, - .tx_loadsz = 32, - .fcr = UART_FCR_ENABLE_FIFO | UART_FCR_R_TRIG_01 | - UART_FCR_T_TRIG_10, - .rxtrig_bytes = {8, 16, 56, 60}, - .flags = UART_CAP_FIFO | UART_CAP_EFR | UART_CAP_SLEEP, - }, - [PORT_16850] = { - .name = "XR16850", - .fifo_size = 128, - .tx_loadsz = 128, - .fcr = UART_FCR_ENABLE_FIFO | UART_FCR_R_TRIG_10, - .flags = UART_CAP_FIFO | UART_CAP_EFR | UART_CAP_SLEEP, - }, - [PORT_RSA] = { - .name = "RSA", - .fifo_size = 2048, - .tx_loadsz = 2048, - .fcr = UART_FCR_ENABLE_FIFO | UART_FCR_R_TRIG_11, - .flags = UART_CAP_FIFO, - }, - [PORT_NS16550A] = { - .name = "NS16550A", - .fifo_size = 16, - .tx_loadsz = 16, - .fcr = UART_FCR_ENABLE_FIFO | UART_FCR_R_TRIG_10, - .flags = UART_CAP_FIFO | UART_NATSEMI, - }, - [PORT_XSCALE] = { - .name = "XScale", - .fifo_size = 32, - .tx_loadsz = 32, - .fcr = UART_FCR_ENABLE_FIFO | UART_FCR_R_TRIG_10, - .flags = UART_CAP_FIFO | UART_CAP_UUE | UART_CAP_RTOIE, - }, - [PORT_OCTEON] = { - .name = "OCTEON", - .fifo_size = 64, - .tx_loadsz = 64, - .fcr = UART_FCR_ENABLE_FIFO | UART_FCR_R_TRIG_10, - .flags = UART_CAP_FIFO, - }, - [PORT_AR7] = { - .name = "AR7", - .fifo_size = 16, - .tx_loadsz = 16, - .fcr = UART_FCR_ENABLE_FIFO | UART_FCR_R_TRIG_00, - .flags = UART_CAP_FIFO | UART_CAP_AFE, - }, - [PORT_U6_16550A] = { - .name = "U6_16550A", - .fifo_size = 64, - .tx_loadsz = 64, - .fcr = UART_FCR_ENABLE_FIFO | UART_FCR_R_TRIG_10, - .flags = UART_CAP_FIFO | UART_CAP_AFE, - }, - [PORT_TEGRA] = { - .name = "Tegra", - .fifo_size = 32, - .tx_loadsz = 8, - .fcr = UART_FCR_ENABLE_FIFO | UART_FCR_R_TRIG_01 | - UART_FCR_T_TRIG_01, - .rxtrig_bytes = {1, 4, 8, 14}, - .flags = UART_CAP_FIFO | UART_CAP_RTOIE, - }, - [PORT_XR17D15X] = { - .name = "XR17D15X", - .fifo_size = 64, - .tx_loadsz = 64, - .fcr = UART_FCR_ENABLE_FIFO | UART_FCR_R_TRIG_10, - .flags = UART_CAP_FIFO | UART_CAP_AFE | UART_CAP_EFR | - UART_CAP_SLEEP, - }, - [PORT_XR17V35X] = { - .name = "XR17V35X", - .fifo_size = 256, - .tx_loadsz = 256, - .fcr = UART_FCR_ENABLE_FIFO | UART_FCR_R_TRIG_11 | - UART_FCR_T_TRIG_11, - .flags = UART_CAP_FIFO | UART_CAP_AFE | UART_CAP_EFR | - UART_CAP_SLEEP, - }, - [PORT_LPC3220] = { - .name = "LPC3220", - .fifo_size = 64, - .tx_loadsz = 32, - .fcr = UART_FCR_DMA_SELECT | UART_FCR_ENABLE_FIFO | - UART_FCR_R_TRIG_00 | UART_FCR_T_TRIG_00, - .flags = UART_CAP_FIFO, - }, - [PORT_BRCM_TRUMANAGE] = { - .name = "TruManage", - .fifo_size = 1, - .tx_loadsz = 1024, - .flags = UART_CAP_HFIFO, - }, - [PORT_8250_CIR] = { - .name = "CIR port" - }, - [PORT_ALTR_16550_F32] = { - .name = "Altera 16550 FIFO32", - .fifo_size = 32, - .tx_loadsz = 32, - .fcr = UART_FCR_ENABLE_FIFO | UART_FCR_R_TRIG_10, - .flags = UART_CAP_FIFO | UART_CAP_AFE, - }, - [PORT_ALTR_16550_F64] = { - .name = "Altera 16550 FIFO64", - .fifo_size = 64, - .tx_loadsz = 64, - .fcr = UART_FCR_ENABLE_FIFO | UART_FCR_R_TRIG_10, - .flags = UART_CAP_FIFO | UART_CAP_AFE, - }, - [PORT_ALTR_16550_F128] = { - .name = "Altera 16550 FIFO128", - .fifo_size = 128, - .tx_loadsz = 128, - .fcr = UART_FCR_ENABLE_FIFO | UART_FCR_R_TRIG_10, - .flags = UART_CAP_FIFO | UART_CAP_AFE, - }, -/* tx_loadsz is set to 63-bytes instead of 64-bytes to implement -workaround of errata A-008006 which states that tx_loadsz should be -configured less than Maximum supported fifo bytes */ - [PORT_16550A_FSL64] = { - .name = "16550A_FSL64", - .fifo_size = 64, - .tx_loadsz = 63, - .fcr = UART_FCR_ENABLE_FIFO | UART_FCR_R_TRIG_10 | - UART_FCR7_64BYTE, - .flags = UART_CAP_FIFO, - }, -}; - -/* Uart divisor latch read */ -static int default_serial_dl_read(struct uart_8250_port *up) -{ - return serial_in(up, UART_DLL) | serial_in(up, UART_DLM) << 8; -} - -/* Uart divisor latch write */ -static void default_serial_dl_write(struct uart_8250_port *up, int value) -{ - serial_out(up, UART_DLL, value & 0xff); - serial_out(up, UART_DLM, value >> 8 & 0xff); -} - -#if defined(CONFIG_MIPS_ALCHEMY) || defined(CONFIG_SERIAL_8250_RT288X) - -/* Au1x00/RT288x UART hardware has a weird register layout */ -static const s8 au_io_in_map[8] = { - 0, /* UART_RX */ - 2, /* UART_IER */ - 3, /* UART_IIR */ - 5, /* UART_LCR */ - 6, /* UART_MCR */ - 7, /* UART_LSR */ - 8, /* UART_MSR */ - -1, /* UART_SCR (unmapped) */ -}; - -static const s8 au_io_out_map[8] = { - 1, /* UART_TX */ - 2, /* UART_IER */ - 4, /* UART_FCR */ - 5, /* UART_LCR */ - 6, /* UART_MCR */ - -1, /* UART_LSR (unmapped) */ - -1, /* UART_MSR (unmapped) */ - -1, /* UART_SCR (unmapped) */ -}; - -static unsigned int au_serial_in(struct uart_port *p, int offset) -{ - if (offset >= ARRAY_SIZE(au_io_in_map)) - return UINT_MAX; - offset = au_io_in_map[offset]; - if (offset < 0) - return UINT_MAX; - return __raw_readl(p->membase + (offset << p->regshift)); -} - -static void au_serial_out(struct uart_port *p, int offset, int value) -{ - if (offset >= ARRAY_SIZE(au_io_out_map)) - return; - offset = au_io_out_map[offset]; - if (offset < 0) - return; - __raw_writel(value, p->membase + (offset << p->regshift)); -} - -/* Au1x00 haven't got a standard divisor latch */ -static int au_serial_dl_read(struct uart_8250_port *up) +static irqreturn_t serial8250_interrupt(int irq, void *dev_id) { - return __raw_readl(up->port.membase + 0x28); -} + struct irq_info *i = dev_id; + struct list_head *l, *end = NULL; + int pass_counter = 0, handled = 0; -static void au_serial_dl_write(struct uart_8250_port *up, int value) -{ - __raw_writel(value, up->port.membase + 0x28); -} + DEBUG_INTR("serial8250_interrupt(%d)...", irq); -#endif + spin_lock(&i->lock); -static unsigned int hub6_serial_in(struct uart_port *p, int offset) -{ - offset = offset << p->regshift; - outb(p->hub6 - 1 + offset, p->iobase); - return inb(p->iobase + 1); -} + l = i->head; + do { + struct uart_8250_port *up; + struct uart_port *port; -static void hub6_serial_out(struct uart_port *p, int offset, int value) -{ - offset = offset << p->regshift; - outb(p->hub6 - 1 + offset, p->iobase); - outb(value, p->iobase + 1); -} + up = list_entry(l, struct uart_8250_port, list); + port = &up->port; -static unsigned int mem_serial_in(struct uart_port *p, int offset) -{ - offset = offset << p->regshift; - return readb(p->membase + offset); -} + if (port->handle_irq(port)) { + handled = 1; + end = NULL; + } else if (end == NULL) + end = l; -static void mem_serial_out(struct uart_port *p, int offset, int value) -{ - offset = offset << p->regshift; - writeb(value, p->membase + offset); -} + l = l->next; -static void mem32_serial_out(struct uart_port *p, int offset, int value) -{ - offset = offset << p->regshift; - writel(value, p->membase + offset); -} + if (l == i->head && pass_counter++ > PASS_LIMIT) { + /* If we hit this, we're dead. */ + printk_ratelimited(KERN_ERR + "serial8250: too much work for irq%d\n", irq); + break; + } + } while (l != end); -static unsigned int mem32_serial_in(struct uart_port *p, int offset) -{ - offset = offset << p->regshift; - return readl(p->membase + offset); -} + spin_unlock(&i->lock); -static void mem32be_serial_out(struct uart_port *p, int offset, int value) -{ - offset = offset << p->regshift; - iowrite32be(value, p->membase + offset); -} + DEBUG_INTR("end.\n"); -static unsigned int mem32be_serial_in(struct uart_port *p, int offset) -{ - offset = offset << p->regshift; - return ioread32be(p->membase + offset); + return IRQ_RETVAL(handled); } -static unsigned int io_serial_in(struct uart_port *p, int offset) +/* + * To support ISA shared interrupts, we need to have one interrupt + * handler that ensures that the IRQ line has been deasserted + * before returning. Failing to do this will result in the IRQ + * line being stuck active, and, since ISA irqs are edge triggered, + * no more IRQs will be seen. + */ +static void serial_do_unlink(struct irq_info *i, struct uart_8250_port *up) { - offset = offset << p->regshift; - return inb(p->iobase + offset); -} + spin_lock_irq(&i->lock); -static void io_serial_out(struct uart_port *p, int offset, int value) -{ - offset = offset << p->regshift; - outb(value, p->iobase + offset); + if (!list_empty(i->head)) { + if (i->head == &up->list) + i->head = i->head->next; + list_del(&up->list); + } else { + BUG_ON(i->head != &up->list); + i->head = NULL; + } + spin_unlock_irq(&i->lock); + /* List empty so throw away the hash node */ + if (i->head == NULL) { + hlist_del(&i->node); + kfree(i); + } } -static int serial8250_default_handle_irq(struct uart_port *port); -static int exar_handle_irq(struct uart_port *port); - -static void set_io_from_upio(struct uart_port *p) +static int serial_link_irq_chain(struct uart_8250_port *up) { - struct uart_8250_port *up = up_to_u8250p(p); - - up->dl_read = default_serial_dl_read; - up->dl_write = default_serial_dl_write; + struct hlist_head *h; + struct hlist_node *n; + struct irq_info *i; + int ret, irq_flags = up->port.flags & UPF_SHARE_IRQ ? IRQF_SHARED : 0; - switch (p->iotype) { - case UPIO_HUB6: - p->serial_in = hub6_serial_in; - p->serial_out = hub6_serial_out; - break; + mutex_lock(&hash_mutex); - case UPIO_MEM: - p->serial_in = mem_serial_in; - p->serial_out = mem_serial_out; - break; + h = &irq_lists[up->port.irq % NR_IRQ_HASH]; - case UPIO_MEM32: - p->serial_in = mem32_serial_in; - p->serial_out = mem32_serial_out; - break; + hlist_for_each(n, h) { + i = hlist_entry(n, struct irq_info, node); + if (i->irq == up->port.irq) + break; + } - case UPIO_MEM32BE: - p->serial_in = mem32be_serial_in; - p->serial_out = mem32be_serial_out; - break; + if (n == NULL) { + i = kzalloc(sizeof(struct irq_info), GFP_KERNEL); + if (i == NULL) { + mutex_unlock(&hash_mutex); + return -ENOMEM; + } + spin_lock_init(&i->lock); + i->irq = up->port.irq; + hlist_add_head(&i->node, h); + } + mutex_unlock(&hash_mutex); -#if defined(CONFIG_MIPS_ALCHEMY) || defined(CONFIG_SERIAL_8250_RT288X) - case UPIO_AU: - p->serial_in = au_serial_in; - p->serial_out = au_serial_out; - up->dl_read = au_serial_dl_read; - up->dl_write = au_serial_dl_write; - break; -#endif + spin_lock_irq(&i->lock); - default: - p->serial_in = io_serial_in; - p->serial_out = io_serial_out; - break; - } - /* Remember loaded iotype */ - up->cur_iotype = p->iotype; - p->handle_irq = serial8250_default_handle_irq; -} + if (i->head) { + list_add(&up->list, i->head); + spin_unlock_irq(&i->lock); -static void -serial_port_out_sync(struct uart_port *p, int offset, int value) -{ - switch (p->iotype) { - case UPIO_MEM: - case UPIO_MEM32: - case UPIO_MEM32BE: - case UPIO_AU: - p->serial_out(p, offset, value); - p->serial_in(p, UART_LCR); /* safe, no side-effects */ - break; - default: - p->serial_out(p, offset, value); + ret = 0; + } else { + INIT_LIST_HEAD(&up->list); + i->head = &up->list; + spin_unlock_irq(&i->lock); + irq_flags |= up->port.irqflags; + ret = request_irq(up->port.irq, serial8250_interrupt, + irq_flags, "serial", i); + if (ret < 0) + serial_do_unlink(i, up); } -} -/* - * For the 16C950 - */ -static void serial_icr_write(struct uart_8250_port *up, int offset, int value) -{ - serial_out(up, UART_SCR, offset); - serial_out(up, UART_ICR, value); + return ret; } -static unsigned int serial_icr_read(struct uart_8250_port *up, int offset) +static void serial_unlink_irq_chain(struct uart_8250_port *up) { - unsigned int value; + /* + * yes, some broken gcc emit "warning: 'i' may be used uninitialized" + * but no, we are not going to take a patch that assigns NULL below. + */ + struct irq_info *i; + struct hlist_node *n; + struct hlist_head *h; - serial_icr_write(up, UART_ACR, up->acr | UART_ACR_ICRRD); - serial_out(up, UART_SCR, offset); - value = serial_in(up, UART_ICR); - serial_icr_write(up, UART_ACR, up->acr); + mutex_lock(&hash_mutex); - return value; -} + h = &irq_lists[up->port.irq % NR_IRQ_HASH]; -/* - * FIFO support. - */ -static void serial8250_clear_fifos(struct uart_8250_port *p) -{ - if (p->capabilities & UART_CAP_FIFO) { - serial_out(p, UART_FCR, UART_FCR_ENABLE_FIFO); - serial_out(p, UART_FCR, UART_FCR_ENABLE_FIFO | - UART_FCR_CLEAR_RCVR | UART_FCR_CLEAR_XMIT); - serial_out(p, UART_FCR, 0); + hlist_for_each(n, h) { + i = hlist_entry(n, struct irq_info, node); + if (i->irq == up->port.irq) + break; } -} -void serial8250_clear_and_reinit_fifos(struct uart_8250_port *p) -{ - serial8250_clear_fifos(p); - serial_out(p, UART_FCR, p->fcr); -} -EXPORT_SYMBOL_GPL(serial8250_clear_and_reinit_fifos); + BUG_ON(n == NULL); + BUG_ON(i->head == NULL); -void serial8250_rpm_get(struct uart_8250_port *p) -{ - if (!(p->capabilities & UART_CAP_RPM)) - return; - pm_runtime_get_sync(p->port.dev); -} -EXPORT_SYMBOL_GPL(serial8250_rpm_get); + if (list_empty(i->head)) + free_irq(up->port.irq, i); -void serial8250_rpm_put(struct uart_8250_port *p) -{ - if (!(p->capabilities & UART_CAP_RPM)) - return; - pm_runtime_mark_last_busy(p->port.dev); - pm_runtime_put_autosuspend(p->port.dev); + serial_do_unlink(i, up); + mutex_unlock(&hash_mutex); } -EXPORT_SYMBOL_GPL(serial8250_rpm_put); /* - * These two wrappers ensure that enable_runtime_pm_tx() can be called more than - * once and disable_runtime_pm_tx() will still disable RPM because the fifo is - * empty and the HW can idle again. + * This function is used to handle ports that do not have an + * interrupt. This doesn't work very well for 16450's, but gives + * barely passable results for a 16550A. (Although at the expense + * of much CPU overhead). */ -static void serial8250_rpm_get_tx(struct uart_8250_port *p) +static void serial8250_timeout(unsigned long data) { - unsigned char rpm_active; - - if (!(p->capabilities & UART_CAP_RPM)) - return; + struct uart_8250_port *up = (struct uart_8250_port *)data; - rpm_active = xchg(&p->rpm_tx_active, 1); - if (rpm_active) - return; - pm_runtime_get_sync(p->port.dev); + up->port.handle_irq(&up->port); + mod_timer(&up->timer, jiffies + uart_poll_timeout(&up->port)); } -static void serial8250_rpm_put_tx(struct uart_8250_port *p) +static void serial8250_backup_timeout(unsigned long data) { - unsigned char rpm_active; + struct uart_8250_port *up = (struct uart_8250_port *)data; + unsigned int iir, ier = 0, lsr; + unsigned long flags; - if (!(p->capabilities & UART_CAP_RPM)) - return; - - rpm_active = xchg(&p->rpm_tx_active, 0); - if (!rpm_active) - return; - pm_runtime_mark_last_busy(p->port.dev); - pm_runtime_put_autosuspend(p->port.dev); -} - -/* - * IER sleep support. UARTs which have EFRs need the "extended - * capability" bit enabled. Note that on XR16C850s, we need to - * reset LCR to write to IER. - */ -static void serial8250_set_sleep(struct uart_8250_port *p, int sleep) -{ - unsigned char lcr = 0, efr = 0; - /* - * Exar UARTs have a SLEEP register that enables or disables - * each UART to enter sleep mode separately. On the XR17V35x the - * register is accessible to each UART at the UART_EXAR_SLEEP - * offset but the UART channel may only write to the corresponding - * bit. - */ - serial8250_rpm_get(p); - if ((p->port.type == PORT_XR17V35X) || - (p->port.type == PORT_XR17D15X)) { - serial_out(p, UART_EXAR_SLEEP, sleep ? 0xff : 0); - goto out; - } - - if (p->capabilities & UART_CAP_SLEEP) { - if (p->capabilities & UART_CAP_EFR) { - lcr = serial_in(p, UART_LCR); - efr = serial_in(p, UART_EFR); - serial_out(p, UART_LCR, UART_LCR_CONF_MODE_B); - serial_out(p, UART_EFR, UART_EFR_ECB); - serial_out(p, UART_LCR, 0); - } - serial_out(p, UART_IER, sleep ? UART_IERX_SLEEP : 0); - if (p->capabilities & UART_CAP_EFR) { - serial_out(p, UART_LCR, UART_LCR_CONF_MODE_B); - serial_out(p, UART_EFR, efr); - serial_out(p, UART_LCR, lcr); - } - } -out: - serial8250_rpm_put(p); -} - -#ifdef CONFIG_SERIAL_8250_RSA -/* - * Attempts to turn on the RSA FIFO. Returns zero on failure. - * We set the port uart clock rate if we succeed. - */ -static int __enable_rsa(struct uart_8250_port *up) -{ - unsigned char mode; - int result; - - mode = serial_in(up, UART_RSA_MSR); - result = mode & UART_RSA_MSR_FIFO; - - if (!result) { - serial_out(up, UART_RSA_MSR, mode | UART_RSA_MSR_FIFO); - mode = serial_in(up, UART_RSA_MSR); - result = mode & UART_RSA_MSR_FIFO; - } - - if (result) - up->port.uartclk = SERIAL_RSA_BAUD_BASE * 16; - - return result; -} - -static void enable_rsa(struct uart_8250_port *up) -{ - if (up->port.type == PORT_RSA) { - if (up->port.uartclk != SERIAL_RSA_BAUD_BASE * 16) { - spin_lock_irq(&up->port.lock); - __enable_rsa(up); - spin_unlock_irq(&up->port.lock); - } - if (up->port.uartclk == SERIAL_RSA_BAUD_BASE * 16) - serial_out(up, UART_RSA_FRR, 0); - } -} - -/* - * Attempts to turn off the RSA FIFO. Returns zero on failure. - * It is unknown why interrupts were disabled in here. However, - * the caller is expected to preserve this behaviour by grabbing - * the spinlock before calling this function. - */ -static void disable_rsa(struct uart_8250_port *up) -{ - unsigned char mode; - int result; - - if (up->port.type == PORT_RSA && - up->port.uartclk == SERIAL_RSA_BAUD_BASE * 16) { - spin_lock_irq(&up->port.lock); - - mode = serial_in(up, UART_RSA_MSR); - result = !(mode & UART_RSA_MSR_FIFO); - - if (!result) { - serial_out(up, UART_RSA_MSR, mode & ~UART_RSA_MSR_FIFO); - mode = serial_in(up, UART_RSA_MSR); - result = !(mode & UART_RSA_MSR_FIFO); - } - - if (result) - up->port.uartclk = SERIAL_RSA_BAUD_BASE_LO * 16; - spin_unlock_irq(&up->port.lock); - } -} -#endif /* CONFIG_SERIAL_8250_RSA */ - -/* - * This is a quickie test to see how big the FIFO is. - * It doesn't work at all the time, more's the pity. - */ -static int size_fifo(struct uart_8250_port *up) -{ - unsigned char old_fcr, old_mcr, old_lcr; - unsigned short old_dl; - int count; - - old_lcr = serial_in(up, UART_LCR); - serial_out(up, UART_LCR, 0); - old_fcr = serial_in(up, UART_FCR); - old_mcr = serial_in(up, UART_MCR); - serial_out(up, UART_FCR, UART_FCR_ENABLE_FIFO | - UART_FCR_CLEAR_RCVR | UART_FCR_CLEAR_XMIT); - serial_out(up, UART_MCR, UART_MCR_LOOP); - serial_out(up, UART_LCR, UART_LCR_CONF_MODE_A); - old_dl = serial_dl_read(up); - serial_dl_write(up, 0x0001); - serial_out(up, UART_LCR, 0x03); - for (count = 0; count < 256; count++) - serial_out(up, UART_TX, count); - mdelay(20);/* FIXME - schedule_timeout */ - for (count = 0; (serial_in(up, UART_LSR) & UART_LSR_DR) && - (count < 256); count++) - serial_in(up, UART_RX); - serial_out(up, UART_FCR, old_fcr); - serial_out(up, UART_MCR, old_mcr); - serial_out(up, UART_LCR, UART_LCR_CONF_MODE_A); - serial_dl_write(up, old_dl); - serial_out(up, UART_LCR, old_lcr); - - return count; -} - -/* - * Read UART ID using the divisor method - set DLL and DLM to zero - * and the revision will be in DLL and device type in DLM. We - * preserve the device state across this. - */ -static unsigned int autoconfig_read_divisor_id(struct uart_8250_port *p) -{ - unsigned char old_dll, old_dlm, old_lcr; - unsigned int id; - - old_lcr = serial_in(p, UART_LCR); - serial_out(p, UART_LCR, UART_LCR_CONF_MODE_A); - - old_dll = serial_in(p, UART_DLL); - old_dlm = serial_in(p, UART_DLM); - - serial_out(p, UART_DLL, 0); - serial_out(p, UART_DLM, 0); - - id = serial_in(p, UART_DLL) | serial_in(p, UART_DLM) << 8; - - serial_out(p, UART_DLL, old_dll); - serial_out(p, UART_DLM, old_dlm); - serial_out(p, UART_LCR, old_lcr); - - return id; -} - -/* - * This is a helper routine to autodetect StarTech/Exar/Oxsemi UART's. - * When this function is called we know it is at least a StarTech - * 16650 V2, but it might be one of several StarTech UARTs, or one of - * its clones. (We treat the broken original StarTech 16650 V1 as a - * 16550, and why not? Startech doesn't seem to even acknowledge its - * existence.) - * - * What evil have men's minds wrought... - */ -static void autoconfig_has_efr(struct uart_8250_port *up) -{ - unsigned int id1, id2, id3, rev; - - /* - * Everything with an EFR has SLEEP - */ - up->capabilities |= UART_CAP_EFR | UART_CAP_SLEEP; - - /* - * First we check to see if it's an Oxford Semiconductor UART. - * - * If we have to do this here because some non-National - * Semiconductor clone chips lock up if you try writing to the - * LSR register (which serial_icr_read does) - */ - - /* - * Check for Oxford Semiconductor 16C950. - * - * EFR [4] must be set else this test fails. - * - * This shouldn't be necessary, but Mike Hudson (Exoray@isys.ca) - * claims that it's needed for 952 dual UART's (which are not - * recommended for new designs). - */ - up->acr = 0; - serial_out(up, UART_LCR, UART_LCR_CONF_MODE_B); - serial_out(up, UART_EFR, UART_EFR_ECB); - serial_out(up, UART_LCR, 0x00); - id1 = serial_icr_read(up, UART_ID1); - id2 = serial_icr_read(up, UART_ID2); - id3 = serial_icr_read(up, UART_ID3); - rev = serial_icr_read(up, UART_REV); - - DEBUG_AUTOCONF("950id=%02x:%02x:%02x:%02x ", id1, id2, id3, rev); - - if (id1 == 0x16 && id2 == 0xC9 && - (id3 == 0x50 || id3 == 0x52 || id3 == 0x54)) { - up->port.type = PORT_16C950; - - /* - * Enable work around for the Oxford Semiconductor 952 rev B - * chip which causes it to seriously miscalculate baud rates - * when DLL is 0. - */ - if (id3 == 0x52 && rev == 0x01) - up->bugs |= UART_BUG_QUOT; - return; - } - - /* - * We check for a XR16C850 by setting DLL and DLM to 0, and then - * reading back DLL and DLM. The chip type depends on the DLM - * value read back: - * 0x10 - XR16C850 and the DLL contains the chip revision. - * 0x12 - XR16C2850. - * 0x14 - XR16C854. - */ - id1 = autoconfig_read_divisor_id(up); - DEBUG_AUTOCONF("850id=%04x ", id1); - - id2 = id1 >> 8; - if (id2 == 0x10 || id2 == 0x12 || id2 == 0x14) { - up->port.type = PORT_16850; - return; - } - - /* - * It wasn't an XR16C850. - * - * We distinguish between the '654 and the '650 by counting - * how many bytes are in the FIFO. I'm using this for now, - * since that's the technique that was sent to me in the - * serial driver update, but I'm not convinced this works. - * I've had problems doing this in the past. -TYT - */ - if (size_fifo(up) == 64) - up->port.type = PORT_16654; - else - up->port.type = PORT_16650V2; -} - -/* - * We detected a chip without a FIFO. Only two fall into - * this category - the original 8250 and the 16450. The - * 16450 has a scratch register (accessible with LCR=0) - */ -static void autoconfig_8250(struct uart_8250_port *up) -{ - unsigned char scratch, status1, status2; - - up->port.type = PORT_8250; - - scratch = serial_in(up, UART_SCR); - serial_out(up, UART_SCR, 0xa5); - status1 = serial_in(up, UART_SCR); - serial_out(up, UART_SCR, 0x5a); - status2 = serial_in(up, UART_SCR); - serial_out(up, UART_SCR, scratch); - - if (status1 == 0xa5 && status2 == 0x5a) - up->port.type = PORT_16450; -} - -static int broken_efr(struct uart_8250_port *up) -{ - /* - * Exar ST16C2550 "A2" devices incorrectly detect as - * having an EFR, and report an ID of 0x0201. See - * http://linux.derkeiler.com/Mailing-Lists/Kernel/2004-11/4812.html - */ - if (autoconfig_read_divisor_id(up) == 0x0201 && size_fifo(up) == 16) - return 1; - - return 0; -} - -/* - * We know that the chip has FIFOs. Does it have an EFR? The - * EFR is located in the same register position as the IIR and - * we know the top two bits of the IIR are currently set. The - * EFR should contain zero. Try to read the EFR. - */ -static void autoconfig_16550a(struct uart_8250_port *up) -{ - unsigned char status1, status2; - unsigned int iersave; - - up->port.type = PORT_16550A; - up->capabilities |= UART_CAP_FIFO; - - /* - * XR17V35x UARTs have an extra divisor register, DLD - * that gets enabled with when DLAB is set which will - * cause the device to incorrectly match and assign - * port type to PORT_16650. The EFR for this UART is - * found at offset 0x09. Instead check the Deice ID (DVID) - * register for a 2, 4 or 8 port UART. - */ - if (up->port.flags & UPF_EXAR_EFR) { - status1 = serial_in(up, UART_EXAR_DVID); - if (status1 == 0x82 || status1 == 0x84 || status1 == 0x88) { - DEBUG_AUTOCONF("Exar XR17V35x "); - up->port.type = PORT_XR17V35X; - up->capabilities |= UART_CAP_AFE | UART_CAP_EFR | - UART_CAP_SLEEP; - - return; - } - - } - - /* - * Check for presence of the EFR when DLAB is set. - * Only ST16C650V1 UARTs pass this test. - */ - serial_out(up, UART_LCR, UART_LCR_CONF_MODE_A); - if (serial_in(up, UART_EFR) == 0) { - serial_out(up, UART_EFR, 0xA8); - if (serial_in(up, UART_EFR) != 0) { - DEBUG_AUTOCONF("EFRv1 "); - up->port.type = PORT_16650; - up->capabilities |= UART_CAP_EFR | UART_CAP_SLEEP; - } else { - serial_out(up, UART_LCR, 0); - serial_out(up, UART_FCR, UART_FCR_ENABLE_FIFO | - UART_FCR7_64BYTE); - status1 = serial_in(up, UART_IIR) >> 5; - serial_out(up, UART_FCR, 0); - serial_out(up, UART_LCR, 0); - - if (status1 == 7) - up->port.type = PORT_16550A_FSL64; - else - DEBUG_AUTOCONF("Motorola 8xxx DUART "); - } - serial_out(up, UART_EFR, 0); - return; - } - - /* - * Maybe it requires 0xbf to be written to the LCR. - * (other ST16C650V2 UARTs, TI16C752A, etc) - */ - serial_out(up, UART_LCR, UART_LCR_CONF_MODE_B); - if (serial_in(up, UART_EFR) == 0 && !broken_efr(up)) { - DEBUG_AUTOCONF("EFRv2 "); - autoconfig_has_efr(up); - return; - } - - /* - * Check for a National Semiconductor SuperIO chip. - * Attempt to switch to bank 2, read the value of the LOOP bit - * from EXCR1. Switch back to bank 0, change it in MCR. Then - * switch back to bank 2, read it from EXCR1 again and check - * it's changed. If so, set baud_base in EXCR2 to 921600. -- dwmw2 - */ - serial_out(up, UART_LCR, 0); - status1 = serial_in(up, UART_MCR); - serial_out(up, UART_LCR, 0xE0); - status2 = serial_in(up, 0x02); /* EXCR1 */ - - if (!((status2 ^ status1) & UART_MCR_LOOP)) { - serial_out(up, UART_LCR, 0); - serial_out(up, UART_MCR, status1 ^ UART_MCR_LOOP); - serial_out(up, UART_LCR, 0xE0); - status2 = serial_in(up, 0x02); /* EXCR1 */ - serial_out(up, UART_LCR, 0); - serial_out(up, UART_MCR, status1); - - if ((status2 ^ status1) & UART_MCR_LOOP) { - unsigned short quot; - - serial_out(up, UART_LCR, 0xE0); - - quot = serial_dl_read(up); - quot <<= 3; - - if (ns16550a_goto_highspeed(up)) - serial_dl_write(up, quot); - - serial_out(up, UART_LCR, 0); - - up->port.uartclk = 921600*16; - up->port.type = PORT_NS16550A; - up->capabilities |= UART_NATSEMI; - return; - } - } - - /* - * No EFR. Try to detect a TI16750, which only sets bit 5 of - * the IIR when 64 byte FIFO mode is enabled when DLAB is set. - * Try setting it with and without DLAB set. Cheap clones - * set bit 5 without DLAB set. - */ - serial_out(up, UART_LCR, 0); - serial_out(up, UART_FCR, UART_FCR_ENABLE_FIFO | UART_FCR7_64BYTE); - status1 = serial_in(up, UART_IIR) >> 5; - serial_out(up, UART_FCR, UART_FCR_ENABLE_FIFO); - serial_out(up, UART_LCR, UART_LCR_CONF_MODE_A); - serial_out(up, UART_FCR, UART_FCR_ENABLE_FIFO | UART_FCR7_64BYTE); - status2 = serial_in(up, UART_IIR) >> 5; - serial_out(up, UART_FCR, UART_FCR_ENABLE_FIFO); - serial_out(up, UART_LCR, 0); - - DEBUG_AUTOCONF("iir1=%d iir2=%d ", status1, status2); - - if (status1 == 6 && status2 == 7) { - up->port.type = PORT_16750; - up->capabilities |= UART_CAP_AFE | UART_CAP_SLEEP; - return; - } - - /* - * Try writing and reading the UART_IER_UUE bit (b6). - * If it works, this is probably one of the Xscale platform's - * internal UARTs. - * We're going to explicitly set the UUE bit to 0 before - * trying to write and read a 1 just to make sure it's not - * already a 1 and maybe locked there before we even start start. - */ - iersave = serial_in(up, UART_IER); - serial_out(up, UART_IER, iersave & ~UART_IER_UUE); - if (!(serial_in(up, UART_IER) & UART_IER_UUE)) { - /* - * OK it's in a known zero state, try writing and reading - * without disturbing the current state of the other bits. - */ - serial_out(up, UART_IER, iersave | UART_IER_UUE); - if (serial_in(up, UART_IER) & UART_IER_UUE) { - /* - * It's an Xscale. - * We'll leave the UART_IER_UUE bit set to 1 (enabled). - */ - DEBUG_AUTOCONF("Xscale "); - up->port.type = PORT_XSCALE; - up->capabilities |= UART_CAP_UUE | UART_CAP_RTOIE; - return; - } - } else { - /* - * If we got here we couldn't force the IER_UUE bit to 0. - * Log it and continue. - */ - DEBUG_AUTOCONF("Couldn't force IER_UUE to 0 "); - } - serial_out(up, UART_IER, iersave); - - /* - * Exar uarts have EFR in a weird location - */ - if (up->port.flags & UPF_EXAR_EFR) { - DEBUG_AUTOCONF("Exar XR17D15x "); - up->port.type = PORT_XR17D15X; - up->capabilities |= UART_CAP_AFE | UART_CAP_EFR | - UART_CAP_SLEEP; - - return; - } - - /* - * We distinguish between 16550A and U6 16550A by counting - * how many bytes are in the FIFO. - */ - if (up->port.type == PORT_16550A && size_fifo(up) == 64) { - up->port.type = PORT_U6_16550A; - up->capabilities |= UART_CAP_AFE; - } -} - -/* - * This routine is called by rs_init() to initialize a specific serial - * port. It determines what type of UART chip this serial port is - * using: 8250, 16450, 16550, 16550A. The important question is - * whether or not this UART is a 16550A or not, since this will - * determine whether or not we can use its FIFO features or not. - */ -static void autoconfig(struct uart_8250_port *up) -{ - unsigned char status1, scratch, scratch2, scratch3; - unsigned char save_lcr, save_mcr; - struct uart_port *port = &up->port; - unsigned long flags; - unsigned int old_capabilities; - - if (!port->iobase && !port->mapbase && !port->membase) - return; - - DEBUG_AUTOCONF("ttyS%d: autoconf (0x%04lx, 0x%p): ", - serial_index(port), port->iobase, port->membase); - - /* - * We really do need global IRQs disabled here - we're going to - * be frobbing the chips IRQ enable register to see if it exists. - */ - spin_lock_irqsave(&port->lock, flags); - - up->capabilities = 0; - up->bugs = 0; - - if (!(port->flags & UPF_BUGGY_UART)) { - /* - * Do a simple existence test first; if we fail this, - * there's no point trying anything else. - * - * 0x80 is used as a nonsense port to prevent against - * false positives due to ISA bus float. The - * assumption is that 0x80 is a non-existent port; - * which should be safe since include/asm/io.h also - * makes this assumption. - * - * Note: this is safe as long as MCR bit 4 is clear - * and the device is in "PC" mode. - */ - scratch = serial_in(up, UART_IER); - serial_out(up, UART_IER, 0); -#ifdef __i386__ - outb(0xff, 0x080); -#endif - /* - * Mask out IER[7:4] bits for test as some UARTs (e.g. TL - * 16C754B) allow only to modify them if an EFR bit is set. - */ - scratch2 = serial_in(up, UART_IER) & 0x0f; - serial_out(up, UART_IER, 0x0F); -#ifdef __i386__ - outb(0, 0x080); -#endif - scratch3 = serial_in(up, UART_IER) & 0x0f; - serial_out(up, UART_IER, scratch); - if (scratch2 != 0 || scratch3 != 0x0F) { - /* - * We failed; there's nothing here - */ - spin_unlock_irqrestore(&port->lock, flags); - DEBUG_AUTOCONF("IER test failed (%02x, %02x) ", - scratch2, scratch3); - goto out; - } - } - - save_mcr = serial_in(up, UART_MCR); - save_lcr = serial_in(up, UART_LCR); - - /* - * Check to see if a UART is really there. Certain broken - * internal modems based on the Rockwell chipset fail this - * test, because they apparently don't implement the loopback - * test mode. So this test is skipped on the COM 1 through - * COM 4 ports. This *should* be safe, since no board - * manufacturer would be stupid enough to design a board - * that conflicts with COM 1-4 --- we hope! - */ - if (!(port->flags & UPF_SKIP_TEST)) { - serial_out(up, UART_MCR, UART_MCR_LOOP | 0x0A); - status1 = serial_in(up, UART_MSR) & 0xF0; - serial_out(up, UART_MCR, save_mcr); - if (status1 != 0x90) { - spin_unlock_irqrestore(&port->lock, flags); - DEBUG_AUTOCONF("LOOP test failed (%02x) ", - status1); - goto out; - } - } - - /* - * We're pretty sure there's a port here. Lets find out what - * type of port it is. The IIR top two bits allows us to find - * out if it's 8250 or 16450, 16550, 16550A or later. This - * determines what we test for next. - * - * We also initialise the EFR (if any) to zero for later. The - * EFR occupies the same register location as the FCR and IIR. - */ - serial_out(up, UART_LCR, UART_LCR_CONF_MODE_B); - serial_out(up, UART_EFR, 0); - serial_out(up, UART_LCR, 0); - - serial_out(up, UART_FCR, UART_FCR_ENABLE_FIFO); - scratch = serial_in(up, UART_IIR) >> 6; - - switch (scratch) { - case 0: - autoconfig_8250(up); - break; - case 1: - port->type = PORT_UNKNOWN; - break; - case 2: - port->type = PORT_16550; - break; - case 3: - autoconfig_16550a(up); - break; - } - -#ifdef CONFIG_SERIAL_8250_RSA - /* - * Only probe for RSA ports if we got the region. - */ - if (port->type == PORT_16550A && up->probe & UART_PROBE_RSA && - __enable_rsa(up)) - port->type = PORT_RSA; -#endif - - serial_out(up, UART_LCR, save_lcr); - - port->fifosize = uart_config[up->port.type].fifo_size; - old_capabilities = up->capabilities; - up->capabilities = uart_config[port->type].flags; - up->tx_loadsz = uart_config[port->type].tx_loadsz; - - if (port->type == PORT_UNKNOWN) - goto out_lock; - - /* - * Reset the UART. - */ -#ifdef CONFIG_SERIAL_8250_RSA - if (port->type == PORT_RSA) - serial_out(up, UART_RSA_FRR, 0); -#endif - serial_out(up, UART_MCR, save_mcr); - serial8250_clear_fifos(up); - serial_in(up, UART_RX); - if (up->capabilities & UART_CAP_UUE) - serial_out(up, UART_IER, UART_IER_UUE); - else - serial_out(up, UART_IER, 0); - -out_lock: - spin_unlock_irqrestore(&port->lock, flags); - if (up->capabilities != old_capabilities) { - printk(KERN_WARNING - "ttyS%d: detected caps %08x should be %08x\n", - serial_index(port), old_capabilities, - up->capabilities); - } -out: - DEBUG_AUTOCONF("iir=%d ", scratch); - DEBUG_AUTOCONF("type=%s\n", uart_config[port->type].name); -} - -static void autoconfig_irq(struct uart_8250_port *up) -{ - struct uart_port *port = &up->port; - unsigned char save_mcr, save_ier; - unsigned char save_ICP = 0; - unsigned int ICP = 0; - unsigned long irqs; - int irq; - - if (port->flags & UPF_FOURPORT) { - ICP = (port->iobase & 0xfe0) | 0x1f; - save_ICP = inb_p(ICP); - outb_p(0x80, ICP); - inb_p(ICP); - } - - /* forget possible initially masked and pending IRQ */ - probe_irq_off(probe_irq_on()); - save_mcr = serial_in(up, UART_MCR); - save_ier = serial_in(up, UART_IER); - serial_out(up, UART_MCR, UART_MCR_OUT1 | UART_MCR_OUT2); - - irqs = probe_irq_on(); - serial_out(up, UART_MCR, 0); - udelay(10); - if (port->flags & UPF_FOURPORT) { - serial_out(up, UART_MCR, - UART_MCR_DTR | UART_MCR_RTS); - } else { - serial_out(up, UART_MCR, - UART_MCR_DTR | UART_MCR_RTS | UART_MCR_OUT2); - } - serial_out(up, UART_IER, 0x0f); /* enable all intrs */ - serial_in(up, UART_LSR); - serial_in(up, UART_RX); - serial_in(up, UART_IIR); - serial_in(up, UART_MSR); - serial_out(up, UART_TX, 0xFF); - udelay(20); - irq = probe_irq_off(irqs); - - serial_out(up, UART_MCR, save_mcr); - serial_out(up, UART_IER, save_ier); - - if (port->flags & UPF_FOURPORT) - outb_p(save_ICP, ICP); - - port->irq = (irq > 0) ? irq : 0; -} - -static inline void __stop_tx(struct uart_8250_port *p) -{ - if (p->ier & UART_IER_THRI) { - p->ier &= ~UART_IER_THRI; - serial_out(p, UART_IER, p->ier); - serial8250_rpm_put_tx(p); - } -} - -static void serial8250_stop_tx(struct uart_port *port) -{ - struct uart_8250_port *up = up_to_u8250p(port); - - serial8250_rpm_get(up); - __stop_tx(up); - - /* - * We really want to stop the transmitter from sending. - */ - if (port->type == PORT_16C950) { - up->acr |= UART_ACR_TXDIS; - serial_icr_write(up, UART_ACR, up->acr); - } - serial8250_rpm_put(up); -} - -static void serial8250_start_tx(struct uart_port *port) -{ - struct uart_8250_port *up = up_to_u8250p(port); - - serial8250_rpm_get_tx(up); - - if (up->dma && !up->dma->tx_dma(up)) - return; - - if (!(up->ier & UART_IER_THRI)) { - up->ier |= UART_IER_THRI; - serial_port_out(port, UART_IER, up->ier); - - if (up->bugs & UART_BUG_TXEN) { - unsigned char lsr; - lsr = serial_in(up, UART_LSR); - up->lsr_saved_flags |= lsr & LSR_SAVE_FLAGS; - if (lsr & UART_LSR_THRE) - serial8250_tx_chars(up); - } - } - - /* - * Re-enable the transmitter if we disabled it. - */ - if (port->type == PORT_16C950 && up->acr & UART_ACR_TXDIS) { - up->acr &= ~UART_ACR_TXDIS; - serial_icr_write(up, UART_ACR, up->acr); - } -} - -static void serial8250_throttle(struct uart_port *port) -{ - port->throttle(port); -} - -static void serial8250_unthrottle(struct uart_port *port) -{ - port->unthrottle(port); -} - -static void serial8250_stop_rx(struct uart_port *port) -{ - struct uart_8250_port *up = up_to_u8250p(port); - - serial8250_rpm_get(up); - - up->ier &= ~(UART_IER_RLSI | UART_IER_RDI); - up->port.read_status_mask &= ~UART_LSR_DR; - serial_port_out(port, UART_IER, up->ier); - - serial8250_rpm_put(up); -} - -static void serial8250_disable_ms(struct uart_port *port) -{ - struct uart_8250_port *up = - container_of(port, struct uart_8250_port, port); - - /* no MSR capabilities */ - if (up->bugs & UART_BUG_NOMSR) - return; - - up->ier &= ~UART_IER_MSI; - serial_port_out(port, UART_IER, up->ier); -} - -static void serial8250_enable_ms(struct uart_port *port) -{ - struct uart_8250_port *up = up_to_u8250p(port); - - /* no MSR capabilities */ - if (up->bugs & UART_BUG_NOMSR) - return; - - up->ier |= UART_IER_MSI; - - serial8250_rpm_get(up); - serial_port_out(port, UART_IER, up->ier); - serial8250_rpm_put(up); -} - -/* - * serial8250_rx_chars: processes according to the passed in LSR - * value, and returns the remaining LSR bits not handled - * by this Rx routine. - */ -unsigned char -serial8250_rx_chars(struct uart_8250_port *up, unsigned char lsr) -{ - struct uart_port *port = &up->port; - unsigned char ch; - int max_count = 256; - char flag; - - do { - if (likely(lsr & UART_LSR_DR)) - ch = serial_in(up, UART_RX); - else - /* - * Intel 82571 has a Serial Over Lan device that will - * set UART_LSR_BI without setting UART_LSR_DR when - * it receives a break. To avoid reading from the - * receive buffer without UART_LSR_DR bit set, we - * just force the read character to be 0 - */ - ch = 0; - - flag = TTY_NORMAL; - port->icount.rx++; - - lsr |= up->lsr_saved_flags; - up->lsr_saved_flags = 0; - - if (unlikely(lsr & UART_LSR_BRK_ERROR_BITS)) { - if (lsr & UART_LSR_BI) { - lsr &= ~(UART_LSR_FE | UART_LSR_PE); - port->icount.brk++; - /* - * We do the SysRQ and SAK checking - * here because otherwise the break - * may get masked by ignore_status_mask - * or read_status_mask. - */ - if (uart_handle_break(port)) - goto ignore_char; - } else if (lsr & UART_LSR_PE) - port->icount.parity++; - else if (lsr & UART_LSR_FE) - port->icount.frame++; - if (lsr & UART_LSR_OE) - port->icount.overrun++; - - /* - * Mask off conditions which should be ignored. - */ - lsr &= port->read_status_mask; - - if (lsr & UART_LSR_BI) { - DEBUG_INTR("handling break...."); - flag = TTY_BREAK; - } else if (lsr & UART_LSR_PE) - flag = TTY_PARITY; - else if (lsr & UART_LSR_FE) - flag = TTY_FRAME; - } - if (uart_handle_sysrq_char(port, ch)) - goto ignore_char; - - uart_insert_char(port, lsr, UART_LSR_OE, ch, flag); - -ignore_char: - lsr = serial_in(up, UART_LSR); - } while ((lsr & (UART_LSR_DR | UART_LSR_BI)) && (--max_count > 0)); - spin_unlock(&port->lock); - tty_flip_buffer_push(&port->state->port); - spin_lock(&port->lock); - return lsr; -} -EXPORT_SYMBOL_GPL(serial8250_rx_chars); - -void serial8250_tx_chars(struct uart_8250_port *up) -{ - struct uart_port *port = &up->port; - struct circ_buf *xmit = &port->state->xmit; - int count; - - if (port->x_char) { - serial_out(up, UART_TX, port->x_char); - port->icount.tx++; - port->x_char = 0; - return; - } - if (uart_tx_stopped(port)) { - serial8250_stop_tx(port); - return; - } - if (uart_circ_empty(xmit)) { - __stop_tx(up); - return; - } - - count = up->tx_loadsz; - do { - serial_out(up, UART_TX, xmit->buf[xmit->tail]); - xmit->tail = (xmit->tail + 1) & (UART_XMIT_SIZE - 1); - port->icount.tx++; - if (uart_circ_empty(xmit)) - break; - if (up->capabilities & UART_CAP_HFIFO) { - if ((serial_port_in(port, UART_LSR) & BOTH_EMPTY) != - BOTH_EMPTY) - break; - } - } while (--count > 0); - - if (uart_circ_chars_pending(xmit) < WAKEUP_CHARS) - uart_write_wakeup(port); - - DEBUG_INTR("THRE..."); - - /* - * With RPM enabled, we have to wait until the FIFO is empty before the - * HW can go idle. So we get here once again with empty FIFO and disable - * the interrupt and RPM in __stop_tx() - */ - if (uart_circ_empty(xmit) && !(up->capabilities & UART_CAP_RPM)) - __stop_tx(up); -} -EXPORT_SYMBOL_GPL(serial8250_tx_chars); - -/* Caller holds uart port lock */ -unsigned int serial8250_modem_status(struct uart_8250_port *up) -{ - struct uart_port *port = &up->port; - unsigned int status = serial_in(up, UART_MSR); - - status |= up->msr_saved_flags; - up->msr_saved_flags = 0; - if (status & UART_MSR_ANY_DELTA && up->ier & UART_IER_MSI && - port->state != NULL) { - if (status & UART_MSR_TERI) - port->icount.rng++; - if (status & UART_MSR_DDSR) - port->icount.dsr++; - if (status & UART_MSR_DDCD) - uart_handle_dcd_change(port, status & UART_MSR_DCD); - if (status & UART_MSR_DCTS) - uart_handle_cts_change(port, status & UART_MSR_CTS); - - wake_up_interruptible(&port->state->port.delta_msr_wait); - } - - return status; -} -EXPORT_SYMBOL_GPL(serial8250_modem_status); - -/* - * This handles the interrupt from one port. - */ -int serial8250_handle_irq(struct uart_port *port, unsigned int iir) -{ - unsigned char status; - unsigned long flags; - struct uart_8250_port *up = up_to_u8250p(port); - int dma_err = 0; - - if (iir & UART_IIR_NO_INT) - return 0; - - spin_lock_irqsave(&port->lock, flags); - - status = serial_port_in(port, UART_LSR); - - DEBUG_INTR("status = %x...", status); - - if (status & (UART_LSR_DR | UART_LSR_BI)) { - if (up->dma) - dma_err = up->dma->rx_dma(up, iir); - - if (!up->dma || dma_err) - status = serial8250_rx_chars(up, status); - } - serial8250_modem_status(up); - if ((!up->dma || (up->dma && up->dma->tx_err)) && - (status & UART_LSR_THRE)) - serial8250_tx_chars(up); - - spin_unlock_irqrestore(&port->lock, flags); - return 1; -} -EXPORT_SYMBOL_GPL(serial8250_handle_irq); - -static int serial8250_default_handle_irq(struct uart_port *port) -{ - struct uart_8250_port *up = up_to_u8250p(port); - unsigned int iir; - int ret; - - serial8250_rpm_get(up); - - iir = serial_port_in(port, UART_IIR); - ret = serial8250_handle_irq(port, iir); - - serial8250_rpm_put(up); - return ret; -} - -/* - * These Exar UARTs have an extra interrupt indicator that could - * fire for a few unimplemented interrupts. One of which is a - * wakeup event when coming out of sleep. Put this here just - * to be on the safe side that these interrupts don't go unhandled. - */ -static int exar_handle_irq(struct uart_port *port) -{ - unsigned char int0, int1, int2, int3; - unsigned int iir = serial_port_in(port, UART_IIR); - int ret; - - ret = serial8250_handle_irq(port, iir); - - if ((port->type == PORT_XR17V35X) || - (port->type == PORT_XR17D15X)) { - int0 = serial_port_in(port, 0x80); - int1 = serial_port_in(port, 0x81); - int2 = serial_port_in(port, 0x82); - int3 = serial_port_in(port, 0x83); - } - - return ret; -} - -/* - * This is the serial driver's interrupt routine. - * - * Arjan thinks the old way was overly complex, so it got simplified. - * Alan disagrees, saying that need the complexity to handle the weird - * nature of ISA shared interrupts. (This is a special exception.) - * - * In order to handle ISA shared interrupts properly, we need to check - * that all ports have been serviced, and therefore the ISA interrupt - * line has been de-asserted. - * - * This means we need to loop through all ports. checking that they - * don't have an interrupt pending. - */ -static irqreturn_t serial8250_interrupt(int irq, void *dev_id) -{ - struct irq_info *i = dev_id; - struct list_head *l, *end = NULL; - int pass_counter = 0, handled = 0; - - DEBUG_INTR("serial8250_interrupt(%d)...", irq); - - spin_lock(&i->lock); - - l = i->head; - do { - struct uart_8250_port *up; - struct uart_port *port; - - up = list_entry(l, struct uart_8250_port, list); - port = &up->port; - - if (port->handle_irq(port)) { - handled = 1; - end = NULL; - } else if (end == NULL) - end = l; - - l = l->next; - - if (l == i->head && pass_counter++ > PASS_LIMIT) { - /* If we hit this, we're dead. */ - printk_ratelimited(KERN_ERR - "serial8250: too much work for irq%d\n", irq); - break; - } - } while (l != end); - - spin_unlock(&i->lock); - - DEBUG_INTR("end.\n"); - - return IRQ_RETVAL(handled); -} - -/* - * To support ISA shared interrupts, we need to have one interrupt - * handler that ensures that the IRQ line has been deasserted - * before returning. Failing to do this will result in the IRQ - * line being stuck active, and, since ISA irqs are edge triggered, - * no more IRQs will be seen. - */ -static void serial_do_unlink(struct irq_info *i, struct uart_8250_port *up) -{ - spin_lock_irq(&i->lock); - - if (!list_empty(i->head)) { - if (i->head == &up->list) - i->head = i->head->next; - list_del(&up->list); - } else { - BUG_ON(i->head != &up->list); - i->head = NULL; - } - spin_unlock_irq(&i->lock); - /* List empty so throw away the hash node */ - if (i->head == NULL) { - hlist_del(&i->node); - kfree(i); - } -} - -static int serial_link_irq_chain(struct uart_8250_port *up) -{ - struct hlist_head *h; - struct hlist_node *n; - struct irq_info *i; - int ret, irq_flags = up->port.flags & UPF_SHARE_IRQ ? IRQF_SHARED : 0; - - mutex_lock(&hash_mutex); - - h = &irq_lists[up->port.irq % NR_IRQ_HASH]; - - hlist_for_each(n, h) { - i = hlist_entry(n, struct irq_info, node); - if (i->irq == up->port.irq) - break; - } - - if (n == NULL) { - i = kzalloc(sizeof(struct irq_info), GFP_KERNEL); - if (i == NULL) { - mutex_unlock(&hash_mutex); - return -ENOMEM; - } - spin_lock_init(&i->lock); - i->irq = up->port.irq; - hlist_add_head(&i->node, h); - } - mutex_unlock(&hash_mutex); - - spin_lock_irq(&i->lock); - - if (i->head) { - list_add(&up->list, i->head); - spin_unlock_irq(&i->lock); - - ret = 0; - } else { - INIT_LIST_HEAD(&up->list); - i->head = &up->list; - spin_unlock_irq(&i->lock); - irq_flags |= up->port.irqflags; - ret = request_irq(up->port.irq, serial8250_interrupt, - irq_flags, "serial", i); - if (ret < 0) - serial_do_unlink(i, up); - } - - return ret; -} - -static void serial_unlink_irq_chain(struct uart_8250_port *up) -{ - /* - * yes, some broken gcc emit "warning: 'i' may be used uninitialized" - * but no, we are not going to take a patch that assigns NULL below. - */ - struct irq_info *i; - struct hlist_node *n; - struct hlist_head *h; - - mutex_lock(&hash_mutex); - - h = &irq_lists[up->port.irq % NR_IRQ_HASH]; - - hlist_for_each(n, h) { - i = hlist_entry(n, struct irq_info, node); - if (i->irq == up->port.irq) - break; - } - - BUG_ON(n == NULL); - BUG_ON(i->head == NULL); - - if (list_empty(i->head)) - free_irq(up->port.irq, i); - - serial_do_unlink(i, up); - mutex_unlock(&hash_mutex); -} - -/* - * This function is used to handle ports that do not have an - * interrupt. This doesn't work very well for 16450's, but gives - * barely passable results for a 16550A. (Although at the expense - * of much CPU overhead). - */ -static void serial8250_timeout(unsigned long data) -{ - struct uart_8250_port *up = (struct uart_8250_port *)data; - - up->port.handle_irq(&up->port); - mod_timer(&up->timer, jiffies + uart_poll_timeout(&up->port)); -} - -static void serial8250_backup_timeout(unsigned long data) -{ - struct uart_8250_port *up = (struct uart_8250_port *)data; - unsigned int iir, ier = 0, lsr; - unsigned long flags; - - spin_lock_irqsave(&up->port.lock, flags); - - /* - * Must disable interrupts or else we risk racing with the interrupt - * based handler. - */ - if (up->port.irq) { - ier = serial_in(up, UART_IER); - serial_out(up, UART_IER, 0); - } - - iir = serial_in(up, UART_IIR); - - /* - * This should be a safe test for anyone who doesn't trust the - * IIR bits on their UART, but it's specifically designed for - * the "Diva" UART used on the management processor on many HP - * ia64 and parisc boxes. - */ - lsr = serial_in(up, UART_LSR); - up->lsr_saved_flags |= lsr & LSR_SAVE_FLAGS; - if ((iir & UART_IIR_NO_INT) && (up->ier & UART_IER_THRI) && - (!uart_circ_empty(&up->port.state->xmit) || up->port.x_char) && - (lsr & UART_LSR_THRE)) { - iir &= ~(UART_IIR_ID | UART_IIR_NO_INT); - iir |= UART_IIR_THRI; - } - - if (!(iir & UART_IIR_NO_INT)) - serial8250_tx_chars(up); - - if (up->port.irq) - serial_out(up, UART_IER, ier); - - spin_unlock_irqrestore(&up->port.lock, flags); - - /* Standard timer interval plus 0.2s to keep the port running */ - mod_timer(&up->timer, - jiffies + uart_poll_timeout(&up->port) + HZ / 5); -} - -static int univ8250_setup_irq(struct uart_8250_port *up) -{ - struct uart_port *port = &up->port; - int retval = 0; - - /* - * The above check will only give an accurate result the first time - * the port is opened so this value needs to be preserved. - */ - if (up->bugs & UART_BUG_THRE) { - pr_debug("ttyS%d - using backup timer\n", serial_index(port)); - - up->timer.function = serial8250_backup_timeout; - up->timer.data = (unsigned long)up; - mod_timer(&up->timer, jiffies + - uart_poll_timeout(port) + HZ / 5); - } - - /* - * If the "interrupt" for this port doesn't correspond with any - * hardware interrupt, we use a timer-based system. The original - * driver used to do this with IRQ0. - */ - if (!port->irq) { - up->timer.data = (unsigned long)up; - mod_timer(&up->timer, jiffies + uart_poll_timeout(port)); - } else - retval = serial_link_irq_chain(up); - - return retval; -} - -static void univ8250_release_irq(struct uart_8250_port *up) -{ - struct uart_port *port = &up->port; - - del_timer_sync(&up->timer); - up->timer.function = serial8250_timeout; - if (port->irq) - serial_unlink_irq_chain(up); -} - -static unsigned int serial8250_tx_empty(struct uart_port *port) -{ - struct uart_8250_port *up = up_to_u8250p(port); - unsigned long flags; - unsigned int lsr; - - serial8250_rpm_get(up); - - spin_lock_irqsave(&port->lock, flags); - lsr = serial_port_in(port, UART_LSR); - up->lsr_saved_flags |= lsr & LSR_SAVE_FLAGS; - spin_unlock_irqrestore(&port->lock, flags); - - serial8250_rpm_put(up); - - return (lsr & BOTH_EMPTY) == BOTH_EMPTY ? TIOCSER_TEMT : 0; -} - -static unsigned int serial8250_get_mctrl(struct uart_port *port) -{ - struct uart_8250_port *up = up_to_u8250p(port); - unsigned int status; - unsigned int ret; - - serial8250_rpm_get(up); - status = serial8250_modem_status(up); - serial8250_rpm_put(up); - - ret = 0; - if (status & UART_MSR_DCD) - ret |= TIOCM_CAR; - if (status & UART_MSR_RI) - ret |= TIOCM_RNG; - if (status & UART_MSR_DSR) - ret |= TIOCM_DSR; - if (status & UART_MSR_CTS) - ret |= TIOCM_CTS; - return ret; -} - -void serial8250_do_set_mctrl(struct uart_port *port, unsigned int mctrl) -{ - struct uart_8250_port *up = up_to_u8250p(port); - unsigned char mcr = 0; - - if (mctrl & TIOCM_RTS) - mcr |= UART_MCR_RTS; - if (mctrl & TIOCM_DTR) - mcr |= UART_MCR_DTR; - if (mctrl & TIOCM_OUT1) - mcr |= UART_MCR_OUT1; - if (mctrl & TIOCM_OUT2) - mcr |= UART_MCR_OUT2; - if (mctrl & TIOCM_LOOP) - mcr |= UART_MCR_LOOP; - - mcr = (mcr & up->mcr_mask) | up->mcr_force | up->mcr; - - serial_port_out(port, UART_MCR, mcr); -} -EXPORT_SYMBOL_GPL(serial8250_do_set_mctrl); - -static void serial8250_set_mctrl(struct uart_port *port, unsigned int mctrl) -{ - if (port->set_mctrl) - port->set_mctrl(port, mctrl); - else - serial8250_do_set_mctrl(port, mctrl); -} - -static void serial8250_break_ctl(struct uart_port *port, int break_state) -{ - struct uart_8250_port *up = up_to_u8250p(port); - unsigned long flags; - - serial8250_rpm_get(up); - spin_lock_irqsave(&port->lock, flags); - if (break_state == -1) - up->lcr |= UART_LCR_SBC; - else - up->lcr &= ~UART_LCR_SBC; - serial_port_out(port, UART_LCR, up->lcr); - spin_unlock_irqrestore(&port->lock, flags); - serial8250_rpm_put(up); -} - -/* - * Wait for transmitter & holding register to empty - */ -static void wait_for_xmitr(struct uart_8250_port *up, int bits) -{ - unsigned int status, tmout = 10000; - - /* Wait up to 10ms for the character(s) to be sent. */ - for (;;) { - status = serial_in(up, UART_LSR); - - up->lsr_saved_flags |= status & LSR_SAVE_FLAGS; - - if ((status & bits) == bits) - break; - if (--tmout == 0) - break; - udelay(1); - } - - /* Wait up to 1s for flow control if necessary */ - if (up->port.flags & UPF_CONS_FLOW) { - unsigned int tmout; - for (tmout = 1000000; tmout; tmout--) { - unsigned int msr = serial_in(up, UART_MSR); - up->msr_saved_flags |= msr & MSR_SAVE_FLAGS; - if (msr & UART_MSR_CTS) - break; - udelay(1); - touch_nmi_watchdog(); - } - } -} - -#ifdef CONFIG_CONSOLE_POLL -/* - * Console polling routines for writing and reading from the uart while - * in an interrupt or debug context. - */ - -static int serial8250_get_poll_char(struct uart_port *port) -{ - struct uart_8250_port *up = up_to_u8250p(port); - unsigned char lsr; - int status; - - serial8250_rpm_get(up); - - lsr = serial_port_in(port, UART_LSR); - - if (!(lsr & UART_LSR_DR)) { - status = NO_POLL_CHAR; - goto out; - } - - status = serial_port_in(port, UART_RX); -out: - serial8250_rpm_put(up); - return status; -} - - -static void serial8250_put_poll_char(struct uart_port *port, - unsigned char c) -{ - unsigned int ier; - struct uart_8250_port *up = up_to_u8250p(port); - - serial8250_rpm_get(up); - /* - * First save the IER then disable the interrupts - */ - ier = serial_port_in(port, UART_IER); - if (up->capabilities & UART_CAP_UUE) - serial_port_out(port, UART_IER, UART_IER_UUE); - else - serial_port_out(port, UART_IER, 0); - - wait_for_xmitr(up, BOTH_EMPTY); - /* - * Send the character out. - */ - serial_port_out(port, UART_TX, c); - - /* - * Finally, wait for transmitter to become empty - * and restore the IER - */ - wait_for_xmitr(up, BOTH_EMPTY); - serial_port_out(port, UART_IER, ier); - serial8250_rpm_put(up); -} - -#endif /* CONFIG_CONSOLE_POLL */ - -int serial8250_do_startup(struct uart_port *port) -{ - struct uart_8250_port *up = up_to_u8250p(port); - unsigned long flags; - unsigned char lsr, iir; - int retval; - - if (port->type == PORT_8250_CIR) - return -ENODEV; - - if (!port->fifosize) - port->fifosize = uart_config[port->type].fifo_size; - if (!up->tx_loadsz) - up->tx_loadsz = uart_config[port->type].tx_loadsz; - if (!up->capabilities) - up->capabilities = uart_config[port->type].flags; - up->mcr = 0; - - if (port->iotype != up->cur_iotype) - set_io_from_upio(port); - - serial8250_rpm_get(up); - if (port->type == PORT_16C950) { - /* Wake up and initialize UART */ - up->acr = 0; - serial_port_out(port, UART_LCR, UART_LCR_CONF_MODE_B); - serial_port_out(port, UART_EFR, UART_EFR_ECB); - serial_port_out(port, UART_IER, 0); - serial_port_out(port, UART_LCR, 0); - serial_icr_write(up, UART_CSR, 0); /* Reset the UART */ - serial_port_out(port, UART_LCR, UART_LCR_CONF_MODE_B); - serial_port_out(port, UART_EFR, UART_EFR_ECB); - serial_port_out(port, UART_LCR, 0); - } - -#ifdef CONFIG_SERIAL_8250_RSA - /* - * If this is an RSA port, see if we can kick it up to the - * higher speed clock. - */ - enable_rsa(up); -#endif - /* - * Clear the FIFO buffers and disable them. - * (they will be reenabled in set_termios()) - */ - serial8250_clear_fifos(up); - - /* - * Clear the interrupt registers. - */ - serial_port_in(port, UART_LSR); - serial_port_in(port, UART_RX); - serial_port_in(port, UART_IIR); - serial_port_in(port, UART_MSR); - - /* - * At this point, there's no way the LSR could still be 0xff; - * if it is, then bail out, because there's likely no UART - * here. - */ - if (!(port->flags & UPF_BUGGY_UART) && - (serial_port_in(port, UART_LSR) == 0xff)) { - printk_ratelimited(KERN_INFO "ttyS%d: LSR safety check engaged!\n", - serial_index(port)); - retval = -ENODEV; - goto out; - } - - /* - * For a XR16C850, we need to set the trigger levels - */ - if (port->type == PORT_16850) { - unsigned char fctr; - - serial_out(up, UART_LCR, UART_LCR_CONF_MODE_B); - - fctr = serial_in(up, UART_FCTR) & ~(UART_FCTR_RX|UART_FCTR_TX); - serial_port_out(port, UART_FCTR, - fctr | UART_FCTR_TRGD | UART_FCTR_RX); - serial_port_out(port, UART_TRG, UART_TRG_96); - serial_port_out(port, UART_FCTR, - fctr | UART_FCTR_TRGD | UART_FCTR_TX); - serial_port_out(port, UART_TRG, UART_TRG_96); - - serial_port_out(port, UART_LCR, 0); - } - - if (port->irq) { - unsigned char iir1; - /* - * Test for UARTs that do not reassert THRE when the - * transmitter is idle and the interrupt has already - * been cleared. Real 16550s should always reassert - * this interrupt whenever the transmitter is idle and - * the interrupt is enabled. Delays are necessary to - * allow register changes to become visible. - */ - spin_lock_irqsave(&port->lock, flags); - if (up->port.irqflags & IRQF_SHARED) - disable_irq_nosync(port->irq); - - wait_for_xmitr(up, UART_LSR_THRE); - serial_port_out_sync(port, UART_IER, UART_IER_THRI); - udelay(1); /* allow THRE to set */ - iir1 = serial_port_in(port, UART_IIR); - serial_port_out(port, UART_IER, 0); - serial_port_out_sync(port, UART_IER, UART_IER_THRI); - udelay(1); /* allow a working UART time to re-assert THRE */ - iir = serial_port_in(port, UART_IIR); - serial_port_out(port, UART_IER, 0); - - if (port->irqflags & IRQF_SHARED) - enable_irq(port->irq); - spin_unlock_irqrestore(&port->lock, flags); - - /* - * If the interrupt is not reasserted, or we otherwise - * don't trust the iir, setup a timer to kick the UART - * on a regular basis. - */ - if ((!(iir1 & UART_IIR_NO_INT) && (iir & UART_IIR_NO_INT)) || - up->port.flags & UPF_BUG_THRE) { - up->bugs |= UART_BUG_THRE; - } - } - - retval = up->ops->setup_irq(up); - if (retval) - goto out; - - /* - * Now, initialize the UART - */ - serial_port_out(port, UART_LCR, UART_LCR_WLEN8); - - spin_lock_irqsave(&port->lock, flags); - if (up->port.flags & UPF_FOURPORT) { - if (!up->port.irq) - up->port.mctrl |= TIOCM_OUT1; - } else - /* - * Most PC uarts need OUT2 raised to enable interrupts. - */ - if (port->irq) - up->port.mctrl |= TIOCM_OUT2; - - serial8250_set_mctrl(port, port->mctrl); - - /* Serial over Lan (SoL) hack: - Intel 8257x Gigabit ethernet chips have a - 16550 emulation, to be used for Serial Over Lan. - Those chips take a longer time than a normal - serial device to signalize that a transmission - data was queued. Due to that, the above test generally - fails. One solution would be to delay the reading of - iir. However, this is not reliable, since the timeout - is variable. So, let's just don't test if we receive - TX irq. This way, we'll never enable UART_BUG_TXEN. - */ - if (up->port.flags & UPF_NO_TXEN_TEST) - goto dont_test_tx_en; - - /* - * Do a quick test to see if we receive an - * interrupt when we enable the TX irq. - */ - serial_port_out(port, UART_IER, UART_IER_THRI); - lsr = serial_port_in(port, UART_LSR); - iir = serial_port_in(port, UART_IIR); - serial_port_out(port, UART_IER, 0); - - if (lsr & UART_LSR_TEMT && iir & UART_IIR_NO_INT) { - if (!(up->bugs & UART_BUG_TXEN)) { - up->bugs |= UART_BUG_TXEN; - pr_debug("ttyS%d - enabling bad tx status workarounds\n", - serial_index(port)); - } - } else { - up->bugs &= ~UART_BUG_TXEN; - } - -dont_test_tx_en: - spin_unlock_irqrestore(&port->lock, flags); - - /* - * Clear the interrupt registers again for luck, and clear the - * saved flags to avoid getting false values from polling - * routines or the previous session. - */ - serial_port_in(port, UART_LSR); - serial_port_in(port, UART_RX); - serial_port_in(port, UART_IIR); - serial_port_in(port, UART_MSR); - up->lsr_saved_flags = 0; - up->msr_saved_flags = 0; - - /* - * Request DMA channels for both RX and TX. - */ - if (up->dma) { - retval = serial8250_request_dma(up); - if (retval) { - pr_warn_ratelimited("ttyS%d - failed to request DMA\n", - serial_index(port)); - up->dma = NULL; - } - } - - /* - * Finally, enable interrupts. Note: Modem status interrupts - * are set via set_termios(), which will be occurring imminently - * anyway, so we don't enable them here. - */ - up->ier = UART_IER_RLSI | UART_IER_RDI; - serial_port_out(port, UART_IER, up->ier); - - if (port->flags & UPF_FOURPORT) { - unsigned int icp; - /* - * Enable interrupts on the AST Fourport board - */ - icp = (port->iobase & 0xfe0) | 0x01f; - outb_p(0x80, icp); - inb_p(icp); - } - retval = 0; -out: - serial8250_rpm_put(up); - return retval; -} -EXPORT_SYMBOL_GPL(serial8250_do_startup); - -static int serial8250_startup(struct uart_port *port) -{ - if (port->startup) - return port->startup(port); - return serial8250_do_startup(port); -} - -void serial8250_do_shutdown(struct uart_port *port) -{ - struct uart_8250_port *up = up_to_u8250p(port); - unsigned long flags; - - serial8250_rpm_get(up); - /* - * Disable interrupts from this port - */ - up->ier = 0; - serial_port_out(port, UART_IER, 0); - - if (up->dma) - serial8250_release_dma(up); - - spin_lock_irqsave(&port->lock, flags); - if (port->flags & UPF_FOURPORT) { - /* reset interrupts on the AST Fourport board */ - inb((port->iobase & 0xfe0) | 0x1f); - port->mctrl |= TIOCM_OUT1; - } else - port->mctrl &= ~TIOCM_OUT2; - - serial8250_set_mctrl(port, port->mctrl); - spin_unlock_irqrestore(&port->lock, flags); - - /* - * Disable break condition and FIFOs - */ - serial_port_out(port, UART_LCR, - serial_port_in(port, UART_LCR) & ~UART_LCR_SBC); - serial8250_clear_fifos(up); - -#ifdef CONFIG_SERIAL_8250_RSA - /* - * Reset the RSA board back to 115kbps compat mode. - */ - disable_rsa(up); -#endif - - /* - * Read data port to reset things, and then unlink from - * the IRQ chain. - */ - serial_port_in(port, UART_RX); - serial8250_rpm_put(up); - - up->ops->release_irq(up); -} -EXPORT_SYMBOL_GPL(serial8250_do_shutdown); - -static void serial8250_shutdown(struct uart_port *port) -{ - if (port->shutdown) - port->shutdown(port); - else - serial8250_do_shutdown(port); -} - -/* - * XR17V35x UARTs have an extra fractional divisor register (DLD) - * Calculate divisor with extra 4-bit fractional portion - */ -static unsigned int xr17v35x_get_divisor(struct uart_8250_port *up, - unsigned int baud, - unsigned int *frac) -{ - struct uart_port *port = &up->port; - unsigned int quot_16; - - quot_16 = DIV_ROUND_CLOSEST(port->uartclk, baud); - *frac = quot_16 & 0x0f; - - return quot_16 >> 4; -} - -static unsigned int serial8250_get_divisor(struct uart_8250_port *up, - unsigned int baud, - unsigned int *frac) -{ - struct uart_port *port = &up->port; - unsigned int quot; - - /* - * Handle magic divisors for baud rates above baud_base on - * SMSC SuperIO chips. - * - */ - if ((port->flags & UPF_MAGIC_MULTIPLIER) && - baud == (port->uartclk/4)) - quot = 0x8001; - else if ((port->flags & UPF_MAGIC_MULTIPLIER) && - baud == (port->uartclk/8)) - quot = 0x8002; - else if (up->port.type == PORT_XR17V35X) - quot = xr17v35x_get_divisor(up, baud, frac); - else - quot = uart_get_divisor(port, baud); - - /* - * Oxford Semi 952 rev B workaround - */ - if (up->bugs & UART_BUG_QUOT && (quot & 0xff) == 0) - quot++; - - return quot; -} - -static unsigned char serial8250_compute_lcr(struct uart_8250_port *up, - tcflag_t c_cflag) -{ - unsigned char cval; - - switch (c_cflag & CSIZE) { - case CS5: - cval = UART_LCR_WLEN5; - break; - case CS6: - cval = UART_LCR_WLEN6; - break; - case CS7: - cval = UART_LCR_WLEN7; - break; - default: - case CS8: - cval = UART_LCR_WLEN8; - break; - } - - if (c_cflag & CSTOPB) - cval |= UART_LCR_STOP; - if (c_cflag & PARENB) { - cval |= UART_LCR_PARITY; - if (up->bugs & UART_BUG_PARITY) - up->fifo_bug = true; - } - if (!(c_cflag & PARODD)) - cval |= UART_LCR_EPAR; -#ifdef CMSPAR - if (c_cflag & CMSPAR) - cval |= UART_LCR_SPAR; -#endif - - return cval; -} - -static void serial8250_set_divisor(struct uart_port *port, unsigned int baud, - unsigned int quot, unsigned int quot_frac) -{ - struct uart_8250_port *up = up_to_u8250p(port); - - /* Workaround to enable 115200 baud on OMAP1510 internal ports */ - if (is_omap1510_8250(up)) { - if (baud == 115200) { - quot = 1; - serial_port_out(port, UART_OMAP_OSC_12M_SEL, 1); - } else - serial_port_out(port, UART_OMAP_OSC_12M_SEL, 0); - } - - /* - * For NatSemi, switch to bank 2 not bank 1, to avoid resetting EXCR2, - * otherwise just set DLAB - */ - if (up->capabilities & UART_NATSEMI) - serial_port_out(port, UART_LCR, 0xe0); - else - serial_port_out(port, UART_LCR, up->lcr | UART_LCR_DLAB); - - serial_dl_write(up, quot); - - /* XR17V35x UARTs have an extra fractional divisor register (DLD) */ - if (up->port.type == PORT_XR17V35X) - serial_port_out(port, 0x2, quot_frac); -} - -void -serial8250_do_set_termios(struct uart_port *port, struct ktermios *termios, - struct ktermios *old) -{ - struct uart_8250_port *up = up_to_u8250p(port); - unsigned char cval; - unsigned long flags; - unsigned int baud, quot, frac = 0; - - cval = serial8250_compute_lcr(up, termios->c_cflag); - - /* - * Ask the core to calculate the divisor for us. - */ - baud = uart_get_baud_rate(port, termios, old, - port->uartclk / 16 / 0xffff, - port->uartclk / 16); - quot = serial8250_get_divisor(up, baud, &frac); - - /* - * Ok, we're now changing the port state. Do it with - * interrupts disabled. - */ - serial8250_rpm_get(up); - spin_lock_irqsave(&port->lock, flags); - - up->lcr = cval; /* Save computed LCR */ - - if (up->capabilities & UART_CAP_FIFO && port->fifosize > 1) { - /* NOTE: If fifo_bug is not set, a user can set RX_trigger. */ - if ((baud < 2400 && !up->dma) || up->fifo_bug) { - up->fcr &= ~UART_FCR_TRIGGER_MASK; - up->fcr |= UART_FCR_TRIGGER_1; - } - } - - /* - * MCR-based auto flow control. When AFE is enabled, RTS will be - * deasserted when the receive FIFO contains more characters than - * the trigger, or the MCR RTS bit is cleared. In the case where - * the remote UART is not using CTS auto flow control, we must - * have sufficient FIFO entries for the latency of the remote - * UART to respond. IOW, at least 32 bytes of FIFO. - */ - if (up->capabilities & UART_CAP_AFE && port->fifosize >= 32) { - up->mcr &= ~UART_MCR_AFE; - if (termios->c_cflag & CRTSCTS) - up->mcr |= UART_MCR_AFE; - } - - /* - * Update the per-port timeout. - */ - uart_update_timeout(port, termios->c_cflag, baud); - - port->read_status_mask = UART_LSR_OE | UART_LSR_THRE | UART_LSR_DR; - if (termios->c_iflag & INPCK) - port->read_status_mask |= UART_LSR_FE | UART_LSR_PE; - if (termios->c_iflag & (IGNBRK | BRKINT | PARMRK)) - port->read_status_mask |= UART_LSR_BI; - - /* - * Characteres to ignore - */ - port->ignore_status_mask = 0; - if (termios->c_iflag & IGNPAR) - port->ignore_status_mask |= UART_LSR_PE | UART_LSR_FE; - if (termios->c_iflag & IGNBRK) { - port->ignore_status_mask |= UART_LSR_BI; - /* - * If we're ignoring parity and break indicators, - * ignore overruns too (for real raw support). - */ - if (termios->c_iflag & IGNPAR) - port->ignore_status_mask |= UART_LSR_OE; - } - - /* - * ignore all characters if CREAD is not set - */ - if ((termios->c_cflag & CREAD) == 0) - port->ignore_status_mask |= UART_LSR_DR; - - /* - * CTS flow control flag and modem status interrupts - */ - up->ier &= ~UART_IER_MSI; - if (!(up->bugs & UART_BUG_NOMSR) && - UART_ENABLE_MS(&up->port, termios->c_cflag)) - up->ier |= UART_IER_MSI; - if (up->capabilities & UART_CAP_UUE) - up->ier |= UART_IER_UUE; - if (up->capabilities & UART_CAP_RTOIE) - up->ier |= UART_IER_RTOIE; - - serial_port_out(port, UART_IER, up->ier); - - if (up->capabilities & UART_CAP_EFR) { - unsigned char efr = 0; - /* - * TI16C752/Startech hardware flow control. FIXME: - * - TI16C752 requires control thresholds to be set. - * - UART_MCR_RTS is ineffective if auto-RTS mode is enabled. - */ - if (termios->c_cflag & CRTSCTS) - efr |= UART_EFR_CTS; - - serial_port_out(port, UART_LCR, UART_LCR_CONF_MODE_B); - if (port->flags & UPF_EXAR_EFR) - serial_port_out(port, UART_XR_EFR, efr); - else - serial_port_out(port, UART_EFR, efr); - } - - serial8250_set_divisor(port, baud, quot, frac); + spin_lock_irqsave(&up->port.lock, flags); /* - * LCR DLAB must be set to enable 64-byte FIFO mode. If the FCR - * is written without DLAB set, this mode will be disabled. + * Must disable interrupts or else we risk racing with the interrupt + * based handler. */ - if (port->type == PORT_16750) - serial_port_out(port, UART_FCR, up->fcr); - - serial_port_out(port, UART_LCR, up->lcr); /* reset DLAB */ - if (port->type != PORT_16750) { - /* emulated UARTs (Lucent Venus 167x) need two steps */ - if (up->fcr & UART_FCR_ENABLE_FIFO) - serial_port_out(port, UART_FCR, UART_FCR_ENABLE_FIFO); - serial_port_out(port, UART_FCR, up->fcr); /* set fcr */ - } - serial8250_set_mctrl(port, port->mctrl); - spin_unlock_irqrestore(&port->lock, flags); - serial8250_rpm_put(up); - - /* Don't rewrite B0 */ - if (tty_termios_baud_rate(termios)) - tty_termios_encode_baud_rate(termios, baud, baud); -} -EXPORT_SYMBOL(serial8250_do_set_termios); - -static void -serial8250_set_termios(struct uart_port *port, struct ktermios *termios, - struct ktermios *old) -{ - if (port->set_termios) - port->set_termios(port, termios, old); - else - serial8250_do_set_termios(port, termios, old); -} - -static void -serial8250_set_ldisc(struct uart_port *port, struct ktermios *termios) -{ - if (termios->c_line == N_PPS) { - port->flags |= UPF_HARDPPS_CD; - spin_lock_irq(&port->lock); - serial8250_enable_ms(port); - spin_unlock_irq(&port->lock); - } else { - port->flags &= ~UPF_HARDPPS_CD; - if (!UART_ENABLE_MS(port, termios->c_cflag)) { - spin_lock_irq(&port->lock); - serial8250_disable_ms(port); - spin_unlock_irq(&port->lock); - } + if (up->port.irq) { + ier = serial_in(up, UART_IER); + serial_out(up, UART_IER, 0); } -} + iir = serial_in(up, UART_IIR); -void serial8250_do_pm(struct uart_port *port, unsigned int state, - unsigned int oldstate) -{ - struct uart_8250_port *p = up_to_u8250p(port); + /* + * This should be a safe test for anyone who doesn't trust the + * IIR bits on their UART, but it's specifically designed for + * the "Diva" UART used on the management processor on many HP + * ia64 and parisc boxes. + */ + lsr = serial_in(up, UART_LSR); + up->lsr_saved_flags |= lsr & LSR_SAVE_FLAGS; + if ((iir & UART_IIR_NO_INT) && (up->ier & UART_IER_THRI) && + (!uart_circ_empty(&up->port.state->xmit) || up->port.x_char) && + (lsr & UART_LSR_THRE)) { + iir &= ~(UART_IIR_ID | UART_IIR_NO_INT); + iir |= UART_IIR_THRI; + } - serial8250_set_sleep(p, state != 0); -} -EXPORT_SYMBOL(serial8250_do_pm); + if (!(iir & UART_IIR_NO_INT)) + serial8250_tx_chars(up); -static void -serial8250_pm(struct uart_port *port, unsigned int state, - unsigned int oldstate) -{ - if (port->pm) - port->pm(port, state, oldstate); - else - serial8250_do_pm(port, state, oldstate); -} + if (up->port.irq) + serial_out(up, UART_IER, ier); -static unsigned int serial8250_port_size(struct uart_8250_port *pt) -{ - if (pt->port.mapsize) - return pt->port.mapsize; - if (pt->port.iotype == UPIO_AU) { - if (pt->port.type == PORT_RT2880) - return 0x100; - return 0x1000; - } - if (is_omap1_8250(pt)) - return 0x16 << pt->port.regshift; + spin_unlock_irqrestore(&up->port.lock, flags); - return 8 << pt->port.regshift; + /* Standard timer interval plus 0.2s to keep the port running */ + mod_timer(&up->timer, + jiffies + uart_poll_timeout(&up->port) + HZ / 5); } -/* - * Resource handling. - */ -static int serial8250_request_std_resource(struct uart_8250_port *up) +static int univ8250_setup_irq(struct uart_8250_port *up) { - unsigned int size = serial8250_port_size(up); struct uart_port *port = &up->port; - int ret = 0; + int retval = 0; - switch (port->iotype) { - case UPIO_AU: - case UPIO_TSI: - case UPIO_MEM32: - case UPIO_MEM32BE: - case UPIO_MEM: - if (!port->mapbase) - break; + /* + * The above check will only give an accurate result the first time + * the port is opened so this value needs to be preserved. + */ + if (up->bugs & UART_BUG_THRE) { + pr_debug("ttyS%d - using backup timer\n", serial_index(port)); - if (!request_mem_region(port->mapbase, size, "serial")) { - ret = -EBUSY; - break; - } + up->timer.function = serial8250_backup_timeout; + up->timer.data = (unsigned long)up; + mod_timer(&up->timer, jiffies + + uart_poll_timeout(port) + HZ / 5); + } - if (port->flags & UPF_IOREMAP) { - port->membase = ioremap_nocache(port->mapbase, size); - if (!port->membase) { - release_mem_region(port->mapbase, size); - ret = -ENOMEM; - } - } - break; + /* + * If the "interrupt" for this port doesn't correspond with any + * hardware interrupt, we use a timer-based system. The original + * driver used to do this with IRQ0. + */ + if (!port->irq) { + up->timer.data = (unsigned long)up; + mod_timer(&up->timer, jiffies + uart_poll_timeout(port)); + } else + retval = serial_link_irq_chain(up); - case UPIO_HUB6: - case UPIO_PORT: - if (!request_region(port->iobase, size, "serial")) - ret = -EBUSY; - break; - } - return ret; + return retval; } -static void serial8250_release_std_resource(struct uart_8250_port *up) +static void univ8250_release_irq(struct uart_8250_port *up) { - unsigned int size = serial8250_port_size(up); struct uart_port *port = &up->port; - switch (port->iotype) { - case UPIO_AU: - case UPIO_TSI: - case UPIO_MEM32: - case UPIO_MEM32BE: - case UPIO_MEM: - if (!port->mapbase) - break; - - if (port->flags & UPF_IOREMAP) { - iounmap(port->membase); - port->membase = NULL; - } - - release_mem_region(port->mapbase, size); - break; - - case UPIO_HUB6: - case UPIO_PORT: - release_region(port->iobase, size); - break; - } + del_timer_sync(&up->timer); + up->timer.function = serial8250_timeout; + if (port->irq) + serial_unlink_irq_chain(up); } #ifdef CONFIG_SERIAL_8250_RSA @@ -2848,259 +396,6 @@ static void serial8250_release_rsa_resource(struct uart_8250_port *up) } #endif -static void serial8250_release_port(struct uart_port *port) -{ - struct uart_8250_port *up = up_to_u8250p(port); - - serial8250_release_std_resource(up); -} - -static int serial8250_request_port(struct uart_port *port) -{ - struct uart_8250_port *up = up_to_u8250p(port); - int ret; - - if (port->type == PORT_8250_CIR) - return -ENODEV; - - ret = serial8250_request_std_resource(up); - - return ret; -} - -static int fcr_get_rxtrig_bytes(struct uart_8250_port *up) -{ - const struct serial8250_config *conf_type = &uart_config[up->port.type]; - unsigned char bytes; - - bytes = conf_type->rxtrig_bytes[UART_FCR_R_TRIG_BITS(up->fcr)]; - - return bytes ? bytes : -EOPNOTSUPP; -} - -static int bytes_to_fcr_rxtrig(struct uart_8250_port *up, unsigned char bytes) -{ - const struct serial8250_config *conf_type = &uart_config[up->port.type]; - int i; - - if (!conf_type->rxtrig_bytes[UART_FCR_R_TRIG_BITS(UART_FCR_R_TRIG_00)]) - return -EOPNOTSUPP; - - for (i = 1; i < UART_FCR_R_TRIG_MAX_STATE; i++) { - if (bytes < conf_type->rxtrig_bytes[i]) - /* Use the nearest lower value */ - return (--i) << UART_FCR_R_TRIG_SHIFT; - } - - return UART_FCR_R_TRIG_11; -} - -static int do_get_rxtrig(struct tty_port *port) -{ - struct uart_state *state = container_of(port, struct uart_state, port); - struct uart_port *uport = state->uart_port; - struct uart_8250_port *up = - container_of(uport, struct uart_8250_port, port); - - if (!(up->capabilities & UART_CAP_FIFO) || uport->fifosize <= 1) - return -EINVAL; - - return fcr_get_rxtrig_bytes(up); -} - -static int do_serial8250_get_rxtrig(struct tty_port *port) -{ - int rxtrig_bytes; - - mutex_lock(&port->mutex); - rxtrig_bytes = do_get_rxtrig(port); - mutex_unlock(&port->mutex); - - return rxtrig_bytes; -} - -static ssize_t serial8250_get_attr_rx_trig_bytes(struct device *dev, - struct device_attribute *attr, char *buf) -{ - struct tty_port *port = dev_get_drvdata(dev); - int rxtrig_bytes; - - rxtrig_bytes = do_serial8250_get_rxtrig(port); - if (rxtrig_bytes < 0) - return rxtrig_bytes; - - return snprintf(buf, PAGE_SIZE, "%d\n", rxtrig_bytes); -} - -static int do_set_rxtrig(struct tty_port *port, unsigned char bytes) -{ - struct uart_state *state = container_of(port, struct uart_state, port); - struct uart_port *uport = state->uart_port; - struct uart_8250_port *up = - container_of(uport, struct uart_8250_port, port); - int rxtrig; - - if (!(up->capabilities & UART_CAP_FIFO) || uport->fifosize <= 1 || - up->fifo_bug) - return -EINVAL; - - rxtrig = bytes_to_fcr_rxtrig(up, bytes); - if (rxtrig < 0) - return rxtrig; - - serial8250_clear_fifos(up); - up->fcr &= ~UART_FCR_TRIGGER_MASK; - up->fcr |= (unsigned char)rxtrig; - serial_out(up, UART_FCR, up->fcr); - return 0; -} - -static int do_serial8250_set_rxtrig(struct tty_port *port, unsigned char bytes) -{ - int ret; - - mutex_lock(&port->mutex); - ret = do_set_rxtrig(port, bytes); - mutex_unlock(&port->mutex); - - return ret; -} - -static ssize_t serial8250_set_attr_rx_trig_bytes(struct device *dev, - struct device_attribute *attr, const char *buf, size_t count) -{ - struct tty_port *port = dev_get_drvdata(dev); - unsigned char bytes; - int ret; - - if (!count) - return -EINVAL; - - ret = kstrtou8(buf, 10, &bytes); - if (ret < 0) - return ret; - - ret = do_serial8250_set_rxtrig(port, bytes); - if (ret < 0) - return ret; - - return count; -} - -static DEVICE_ATTR(rx_trig_bytes, S_IRUSR | S_IWUSR | S_IRGRP, - serial8250_get_attr_rx_trig_bytes, - serial8250_set_attr_rx_trig_bytes); - -static struct attribute *serial8250_dev_attrs[] = { - &dev_attr_rx_trig_bytes.attr, - NULL, - }; - -static struct attribute_group serial8250_dev_attr_group = { - .attrs = serial8250_dev_attrs, - }; - -static void register_dev_spec_attr_grp(struct uart_8250_port *up) -{ - const struct serial8250_config *conf_type = &uart_config[up->port.type]; - - if (conf_type->rxtrig_bytes[0]) - up->port.attr_group = &serial8250_dev_attr_group; -} - -static void serial8250_config_port(struct uart_port *port, int flags) -{ - struct uart_8250_port *up = up_to_u8250p(port); - int ret; - - if (port->type == PORT_8250_CIR) - return; - - /* - * Find the region that we can probe for. This in turn - * tells us whether we can probe for the type of port. - */ - ret = serial8250_request_std_resource(up); - if (ret < 0) - return; - - if (port->iotype != up->cur_iotype) - set_io_from_upio(port); - - if (flags & UART_CONFIG_TYPE) - autoconfig(up); - - /* if access method is AU, it is a 16550 with a quirk */ - if (port->type == PORT_16550A && port->iotype == UPIO_AU) - up->bugs |= UART_BUG_NOMSR; - - /* HW bugs may trigger IRQ while IIR == NO_INT */ - if (port->type == PORT_TEGRA) - up->bugs |= UART_BUG_NOMSR; - - if (port->type != PORT_UNKNOWN && flags & UART_CONFIG_IRQ) - autoconfig_irq(up); - - if (port->type == PORT_UNKNOWN) - serial8250_release_std_resource(up); - - /* Fixme: probably not the best place for this */ - if ((port->type == PORT_XR17V35X) || - (port->type == PORT_XR17D15X)) - port->handle_irq = exar_handle_irq; - - register_dev_spec_attr_grp(up); - up->fcr = uart_config[up->port.type].fcr; -} - -static int -serial8250_verify_port(struct uart_port *port, struct serial_struct *ser) -{ - if (ser->irq >= nr_irqs || ser->irq < 0 || - ser->baud_base < 9600 || ser->type < PORT_UNKNOWN || - ser->type >= ARRAY_SIZE(uart_config) || ser->type == PORT_CIRRUS || - ser->type == PORT_STARTECH) - return -EINVAL; - return 0; -} - -static const char * -serial8250_type(struct uart_port *port) -{ - int type = port->type; - - if (type >= ARRAY_SIZE(uart_config)) - type = 0; - return uart_config[type].name; -} - -static const struct uart_ops serial8250_pops = { - .tx_empty = serial8250_tx_empty, - .set_mctrl = serial8250_set_mctrl, - .get_mctrl = serial8250_get_mctrl, - .stop_tx = serial8250_stop_tx, - .start_tx = serial8250_start_tx, - .throttle = serial8250_throttle, - .unthrottle = serial8250_unthrottle, - .stop_rx = serial8250_stop_rx, - .enable_ms = serial8250_enable_ms, - .break_ctl = serial8250_break_ctl, - .startup = serial8250_startup, - .shutdown = serial8250_shutdown, - .set_termios = serial8250_set_termios, - .set_ldisc = serial8250_set_ldisc, - .pm = serial8250_pm, - .type = serial8250_type, - .release_port = serial8250_release_port, - .request_port = serial8250_request_port, - .config_port = serial8250_config_port, - .verify_port = serial8250_verify_port, -#ifdef CONFIG_CONSOLE_POLL - .poll_get_char = serial8250_get_poll_char, - .poll_put_char = serial8250_put_poll_char, -#endif -}; - static const struct uart_ops *base_ops; static struct uart_ops univ8250_port_ops; @@ -3139,42 +434,6 @@ void serial8250_set_isa_configurator( } EXPORT_SYMBOL(serial8250_set_isa_configurator); -static void serial8250_init_port(struct uart_8250_port *up) -{ - struct uart_port *port = &up->port; - - spin_lock_init(&port->lock); - port->ops = &serial8250_pops; - - up->cur_iotype = 0xFF; -} - -static void serial8250_set_defaults(struct uart_8250_port *up) -{ - struct uart_port *port = &up->port; - - if (up->port.flags & UPF_FIXED_TYPE) { - unsigned int type = up->port.type; - - if (!up->port.fifosize) - up->port.fifosize = uart_config[type].fifo_size; - if (!up->tx_loadsz) - up->tx_loadsz = uart_config[type].tx_loadsz; - if (!up->capabilities) - up->capabilities = uart_config[type].flags; - } - - set_io_from_upio(port); - - /* default dma handlers */ - if (up->dma) { - if (!up->dma->tx_dma) - up->dma->tx_dma = serial8250_tx_dma; - if (!up->dma->rx_dma) - up->dma->rx_dma = serial8250_rx_dma; - } -} - #ifdef CONFIG_SERIAL_8250_RSA static void univ8250_config_port(struct uart_port *port, int flags) @@ -3324,94 +583,6 @@ serial8250_register_ports(struct uart_driver *drv, struct device *dev) #ifdef CONFIG_SERIAL_8250_CONSOLE -static void serial8250_console_putchar(struct uart_port *port, int ch) -{ - struct uart_8250_port *up = up_to_u8250p(port); - - wait_for_xmitr(up, UART_LSR_THRE); - serial_port_out(port, UART_TX, ch); -} - -/* - * Print a string to the serial port trying not to disturb - * any possible real use of the port... - * - * The console_lock must be held when we get here. - */ -static void serial8250_console_write(struct uart_8250_port *up, const char *s, - unsigned int count) -{ - struct uart_port *port = &up->port; - unsigned long flags; - unsigned int ier; - int locked = 1; - - touch_nmi_watchdog(); - - serial8250_rpm_get(up); - - if (port->sysrq) - locked = 0; - else if (oops_in_progress) - locked = spin_trylock_irqsave(&port->lock, flags); - else - spin_lock_irqsave(&port->lock, flags); - - /* - * First save the IER then disable the interrupts - */ - ier = serial_port_in(port, UART_IER); - - if (up->capabilities & UART_CAP_UUE) - serial_port_out(port, UART_IER, UART_IER_UUE); - else - serial_port_out(port, UART_IER, 0); - - /* check scratch reg to see if port powered off during system sleep */ - if (up->canary && (up->canary != serial_port_in(port, UART_SCR))) { - struct ktermios termios; - unsigned int baud, quot, frac = 0; - - termios.c_cflag = port->cons->cflag; - if (port->state->port.tty && termios.c_cflag == 0) - termios.c_cflag = port->state->port.tty->termios.c_cflag; - - baud = uart_get_baud_rate(port, &termios, NULL, - port->uartclk / 16 / 0xffff, - port->uartclk / 16); - quot = serial8250_get_divisor(up, baud, &frac); - - serial8250_set_divisor(port, baud, quot, frac); - serial_port_out(port, UART_LCR, up->lcr); - serial_port_out(port, UART_MCR, UART_MCR_DTR | UART_MCR_RTS); - - up->canary = 0; - } - - uart_console_write(port, s, count, serial8250_console_putchar); - - /* - * Finally, wait for transmitter to become empty - * and restore the IER - */ - wait_for_xmitr(up, BOTH_EMPTY); - serial_port_out(port, UART_IER, ier); - - /* - * The receive handling will happen properly because the - * receive ready bit will still be set; it is not cleared - * on read. However, modem control will not, we must - * call it if we have saved something in the saved flags - * while processing with interrupts off. - */ - if (up->msr_saved_flags) - serial8250_modem_status(up); - - if (locked) - spin_unlock_irqrestore(&port->lock, flags); - serial8250_rpm_put(up); -} - static void univ8250_console_write(struct console *co, const char *s, unsigned int count) { @@ -3420,39 +591,6 @@ static void univ8250_console_write(struct console *co, const char *s, serial8250_console_write(up, s, count); } -static unsigned int probe_baud(struct uart_port *port) -{ - unsigned char lcr, dll, dlm; - unsigned int quot; - - lcr = serial_port_in(port, UART_LCR); - serial_port_out(port, UART_LCR, lcr | UART_LCR_DLAB); - dll = serial_port_in(port, UART_DLL); - dlm = serial_port_in(port, UART_DLM); - serial_port_out(port, UART_LCR, lcr); - - quot = (dlm << 8) | dll; - return (port->uartclk / 16) / quot; -} - -static int serial8250_console_setup(struct uart_port *port, char *options, bool probe) -{ - int baud = 9600; - int bits = 8; - int parity = 'n'; - int flow = 'n'; - - if (!port->iobase && !port->membase) - return -ENODEV; - - if (options) - uart_parse_options(options, &baud, &parity, &bits, &flow); - else if (probe) - baud = probe_baud(port); - - return uart_set_options(port, port->cons, baud, parity, bits, flow); -} - static int univ8250_console_setup(struct console *co, char *options) { struct uart_port *port; diff --git a/drivers/tty/serial/8250/8250_port.c b/drivers/tty/serial/8250/8250_port.c new file mode 100644 index 000000000000..4dc143eee086 --- /dev/null +++ b/drivers/tty/serial/8250/8250_port.c @@ -0,0 +1,2898 @@ +/* + * Base port operations for 8250/16550-type serial ports + * + * Based on drivers/char/serial.c, by Linus Torvalds, Theodore Ts'o. + * Split from 8250_core.c, Copyright (C) 2001 Russell King. + * + * This program is free software; you can redistribute it and/or modify + * it under the terms of the GNU General Public License as published by + * the Free Software Foundation; either version 2 of the License, or + * (at your option) any later version. + * + * A note about mapbase / membase + * + * mapbase is the physical address of the IO port. + * membase is an 'ioremapped' cookie. + */ + +#if defined(CONFIG_SERIAL_8250_CONSOLE) && defined(CONFIG_MAGIC_SYSRQ) +#define SUPPORT_SYSRQ +#endif + +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include + +#include +#include + +#include "8250.h" + +/* + * Debugging. + */ +#if 0 +#define DEBUG_AUTOCONF(fmt...) printk(fmt) +#else +#define DEBUG_AUTOCONF(fmt...) do { } while (0) +#endif + +#define BOTH_EMPTY (UART_LSR_TEMT | UART_LSR_THRE) + +/* + * Here we define the default xmit fifo size used for each type of UART. + */ +static const struct serial8250_config uart_config[] = { + [PORT_UNKNOWN] = { + .name = "unknown", + .fifo_size = 1, + .tx_loadsz = 1, + }, + [PORT_8250] = { + .name = "8250", + .fifo_size = 1, + .tx_loadsz = 1, + }, + [PORT_16450] = { + .name = "16450", + .fifo_size = 1, + .tx_loadsz = 1, + }, + [PORT_16550] = { + .name = "16550", + .fifo_size = 1, + .tx_loadsz = 1, + }, + [PORT_16550A] = { + .name = "16550A", + .fifo_size = 16, + .tx_loadsz = 16, + .fcr = UART_FCR_ENABLE_FIFO | UART_FCR_R_TRIG_10, + .rxtrig_bytes = {1, 4, 8, 14}, + .flags = UART_CAP_FIFO, + }, + [PORT_CIRRUS] = { + .name = "Cirrus", + .fifo_size = 1, + .tx_loadsz = 1, + }, + [PORT_16650] = { + .name = "ST16650", + .fifo_size = 1, + .tx_loadsz = 1, + .flags = UART_CAP_FIFO | UART_CAP_EFR | UART_CAP_SLEEP, + }, + [PORT_16650V2] = { + .name = "ST16650V2", + .fifo_size = 32, + .tx_loadsz = 16, + .fcr = UART_FCR_ENABLE_FIFO | UART_FCR_R_TRIG_01 | + UART_FCR_T_TRIG_00, + .rxtrig_bytes = {8, 16, 24, 28}, + .flags = UART_CAP_FIFO | UART_CAP_EFR | UART_CAP_SLEEP, + }, + [PORT_16750] = { + .name = "TI16750", + .fifo_size = 64, + .tx_loadsz = 64, + .fcr = UART_FCR_ENABLE_FIFO | UART_FCR_R_TRIG_10 | + UART_FCR7_64BYTE, + .rxtrig_bytes = {1, 16, 32, 56}, + .flags = UART_CAP_FIFO | UART_CAP_SLEEP | UART_CAP_AFE, + }, + [PORT_STARTECH] = { + .name = "Startech", + .fifo_size = 1, + .tx_loadsz = 1, + }, + [PORT_16C950] = { + .name = "16C950/954", + .fifo_size = 128, + .tx_loadsz = 128, + .fcr = UART_FCR_ENABLE_FIFO | UART_FCR_R_TRIG_10, + /* UART_CAP_EFR breaks billionon CF bluetooth card. */ + .flags = UART_CAP_FIFO | UART_CAP_SLEEP, + }, + [PORT_16654] = { + .name = "ST16654", + .fifo_size = 64, + .tx_loadsz = 32, + .fcr = UART_FCR_ENABLE_FIFO | UART_FCR_R_TRIG_01 | + UART_FCR_T_TRIG_10, + .rxtrig_bytes = {8, 16, 56, 60}, + .flags = UART_CAP_FIFO | UART_CAP_EFR | UART_CAP_SLEEP, + }, + [PORT_16850] = { + .name = "XR16850", + .fifo_size = 128, + .tx_loadsz = 128, + .fcr = UART_FCR_ENABLE_FIFO | UART_FCR_R_TRIG_10, + .flags = UART_CAP_FIFO | UART_CAP_EFR | UART_CAP_SLEEP, + }, + [PORT_RSA] = { + .name = "RSA", + .fifo_size = 2048, + .tx_loadsz = 2048, + .fcr = UART_FCR_ENABLE_FIFO | UART_FCR_R_TRIG_11, + .flags = UART_CAP_FIFO, + }, + [PORT_NS16550A] = { + .name = "NS16550A", + .fifo_size = 16, + .tx_loadsz = 16, + .fcr = UART_FCR_ENABLE_FIFO | UART_FCR_R_TRIG_10, + .flags = UART_CAP_FIFO | UART_NATSEMI, + }, + [PORT_XSCALE] = { + .name = "XScale", + .fifo_size = 32, + .tx_loadsz = 32, + .fcr = UART_FCR_ENABLE_FIFO | UART_FCR_R_TRIG_10, + .flags = UART_CAP_FIFO | UART_CAP_UUE | UART_CAP_RTOIE, + }, + [PORT_OCTEON] = { + .name = "OCTEON", + .fifo_size = 64, + .tx_loadsz = 64, + .fcr = UART_FCR_ENABLE_FIFO | UART_FCR_R_TRIG_10, + .flags = UART_CAP_FIFO, + }, + [PORT_AR7] = { + .name = "AR7", + .fifo_size = 16, + .tx_loadsz = 16, + .fcr = UART_FCR_ENABLE_FIFO | UART_FCR_R_TRIG_00, + .flags = UART_CAP_FIFO | UART_CAP_AFE, + }, + [PORT_U6_16550A] = { + .name = "U6_16550A", + .fifo_size = 64, + .tx_loadsz = 64, + .fcr = UART_FCR_ENABLE_FIFO | UART_FCR_R_TRIG_10, + .flags = UART_CAP_FIFO | UART_CAP_AFE, + }, + [PORT_TEGRA] = { + .name = "Tegra", + .fifo_size = 32, + .tx_loadsz = 8, + .fcr = UART_FCR_ENABLE_FIFO | UART_FCR_R_TRIG_01 | + UART_FCR_T_TRIG_01, + .rxtrig_bytes = {1, 4, 8, 14}, + .flags = UART_CAP_FIFO | UART_CAP_RTOIE, + }, + [PORT_XR17D15X] = { + .name = "XR17D15X", + .fifo_size = 64, + .tx_loadsz = 64, + .fcr = UART_FCR_ENABLE_FIFO | UART_FCR_R_TRIG_10, + .flags = UART_CAP_FIFO | UART_CAP_AFE | UART_CAP_EFR | + UART_CAP_SLEEP, + }, + [PORT_XR17V35X] = { + .name = "XR17V35X", + .fifo_size = 256, + .tx_loadsz = 256, + .fcr = UART_FCR_ENABLE_FIFO | UART_FCR_R_TRIG_11 | + UART_FCR_T_TRIG_11, + .flags = UART_CAP_FIFO | UART_CAP_AFE | UART_CAP_EFR | + UART_CAP_SLEEP, + }, + [PORT_LPC3220] = { + .name = "LPC3220", + .fifo_size = 64, + .tx_loadsz = 32, + .fcr = UART_FCR_DMA_SELECT | UART_FCR_ENABLE_FIFO | + UART_FCR_R_TRIG_00 | UART_FCR_T_TRIG_00, + .flags = UART_CAP_FIFO, + }, + [PORT_BRCM_TRUMANAGE] = { + .name = "TruManage", + .fifo_size = 1, + .tx_loadsz = 1024, + .flags = UART_CAP_HFIFO, + }, + [PORT_8250_CIR] = { + .name = "CIR port" + }, + [PORT_ALTR_16550_F32] = { + .name = "Altera 16550 FIFO32", + .fifo_size = 32, + .tx_loadsz = 32, + .fcr = UART_FCR_ENABLE_FIFO | UART_FCR_R_TRIG_10, + .flags = UART_CAP_FIFO | UART_CAP_AFE, + }, + [PORT_ALTR_16550_F64] = { + .name = "Altera 16550 FIFO64", + .fifo_size = 64, + .tx_loadsz = 64, + .fcr = UART_FCR_ENABLE_FIFO | UART_FCR_R_TRIG_10, + .flags = UART_CAP_FIFO | UART_CAP_AFE, + }, + [PORT_ALTR_16550_F128] = { + .name = "Altera 16550 FIFO128", + .fifo_size = 128, + .tx_loadsz = 128, + .fcr = UART_FCR_ENABLE_FIFO | UART_FCR_R_TRIG_10, + .flags = UART_CAP_FIFO | UART_CAP_AFE, + }, +/* tx_loadsz is set to 63-bytes instead of 64-bytes to implement +workaround of errata A-008006 which states that tx_loadsz should be +configured less than Maximum supported fifo bytes */ + [PORT_16550A_FSL64] = { + .name = "16550A_FSL64", + .fifo_size = 64, + .tx_loadsz = 63, + .fcr = UART_FCR_ENABLE_FIFO | UART_FCR_R_TRIG_10 | + UART_FCR7_64BYTE, + .flags = UART_CAP_FIFO, + }, +}; + +/* Uart divisor latch read */ +static int default_serial_dl_read(struct uart_8250_port *up) +{ + return serial_in(up, UART_DLL) | serial_in(up, UART_DLM) << 8; +} + +/* Uart divisor latch write */ +static void default_serial_dl_write(struct uart_8250_port *up, int value) +{ + serial_out(up, UART_DLL, value & 0xff); + serial_out(up, UART_DLM, value >> 8 & 0xff); +} + +#if defined(CONFIG_MIPS_ALCHEMY) || defined(CONFIG_SERIAL_8250_RT288X) + +/* Au1x00/RT288x UART hardware has a weird register layout */ +static const s8 au_io_in_map[8] = { + 0, /* UART_RX */ + 2, /* UART_IER */ + 3, /* UART_IIR */ + 5, /* UART_LCR */ + 6, /* UART_MCR */ + 7, /* UART_LSR */ + 8, /* UART_MSR */ + -1, /* UART_SCR (unmapped) */ +}; + +static const s8 au_io_out_map[8] = { + 1, /* UART_TX */ + 2, /* UART_IER */ + 4, /* UART_FCR */ + 5, /* UART_LCR */ + 6, /* UART_MCR */ + -1, /* UART_LSR (unmapped) */ + -1, /* UART_MSR (unmapped) */ + -1, /* UART_SCR (unmapped) */ +}; + +static unsigned int au_serial_in(struct uart_port *p, int offset) +{ + if (offset >= ARRAY_SIZE(au_io_in_map)) + return UINT_MAX; + offset = au_io_in_map[offset]; + if (offset < 0) + return UINT_MAX; + return __raw_readl(p->membase + (offset << p->regshift)); +} + +static void au_serial_out(struct uart_port *p, int offset, int value) +{ + if (offset >= ARRAY_SIZE(au_io_out_map)) + return; + offset = au_io_out_map[offset]; + if (offset < 0) + return; + __raw_writel(value, p->membase + (offset << p->regshift)); +} + +/* Au1x00 haven't got a standard divisor latch */ +static int au_serial_dl_read(struct uart_8250_port *up) +{ + return __raw_readl(up->port.membase + 0x28); +} + +static void au_serial_dl_write(struct uart_8250_port *up, int value) +{ + __raw_writel(value, up->port.membase + 0x28); +} + +#endif + +static unsigned int hub6_serial_in(struct uart_port *p, int offset) +{ + offset = offset << p->regshift; + outb(p->hub6 - 1 + offset, p->iobase); + return inb(p->iobase + 1); +} + +static void hub6_serial_out(struct uart_port *p, int offset, int value) +{ + offset = offset << p->regshift; + outb(p->hub6 - 1 + offset, p->iobase); + outb(value, p->iobase + 1); +} + +static unsigned int mem_serial_in(struct uart_port *p, int offset) +{ + offset = offset << p->regshift; + return readb(p->membase + offset); +} + +static void mem_serial_out(struct uart_port *p, int offset, int value) +{ + offset = offset << p->regshift; + writeb(value, p->membase + offset); +} + +static void mem32_serial_out(struct uart_port *p, int offset, int value) +{ + offset = offset << p->regshift; + writel(value, p->membase + offset); +} + +static unsigned int mem32_serial_in(struct uart_port *p, int offset) +{ + offset = offset << p->regshift; + return readl(p->membase + offset); +} + +static void mem32be_serial_out(struct uart_port *p, int offset, int value) +{ + offset = offset << p->regshift; + iowrite32be(value, p->membase + offset); +} + +static unsigned int mem32be_serial_in(struct uart_port *p, int offset) +{ + offset = offset << p->regshift; + return ioread32be(p->membase + offset); +} + +static unsigned int io_serial_in(struct uart_port *p, int offset) +{ + offset = offset << p->regshift; + return inb(p->iobase + offset); +} + +static void io_serial_out(struct uart_port *p, int offset, int value) +{ + offset = offset << p->regshift; + outb(value, p->iobase + offset); +} + +static int serial8250_default_handle_irq(struct uart_port *port); +static int exar_handle_irq(struct uart_port *port); + +static void set_io_from_upio(struct uart_port *p) +{ + struct uart_8250_port *up = up_to_u8250p(p); + + up->dl_read = default_serial_dl_read; + up->dl_write = default_serial_dl_write; + + switch (p->iotype) { + case UPIO_HUB6: + p->serial_in = hub6_serial_in; + p->serial_out = hub6_serial_out; + break; + + case UPIO_MEM: + p->serial_in = mem_serial_in; + p->serial_out = mem_serial_out; + break; + + case UPIO_MEM32: + p->serial_in = mem32_serial_in; + p->serial_out = mem32_serial_out; + break; + + case UPIO_MEM32BE: + p->serial_in = mem32be_serial_in; + p->serial_out = mem32be_serial_out; + break; + +#if defined(CONFIG_MIPS_ALCHEMY) || defined(CONFIG_SERIAL_8250_RT288X) + case UPIO_AU: + p->serial_in = au_serial_in; + p->serial_out = au_serial_out; + up->dl_read = au_serial_dl_read; + up->dl_write = au_serial_dl_write; + break; +#endif + + default: + p->serial_in = io_serial_in; + p->serial_out = io_serial_out; + break; + } + /* Remember loaded iotype */ + up->cur_iotype = p->iotype; + p->handle_irq = serial8250_default_handle_irq; +} + +static void +serial_port_out_sync(struct uart_port *p, int offset, int value) +{ + switch (p->iotype) { + case UPIO_MEM: + case UPIO_MEM32: + case UPIO_MEM32BE: + case UPIO_AU: + p->serial_out(p, offset, value); + p->serial_in(p, UART_LCR); /* safe, no side-effects */ + break; + default: + p->serial_out(p, offset, value); + } +} + +/* + * For the 16C950 + */ +static void serial_icr_write(struct uart_8250_port *up, int offset, int value) +{ + serial_out(up, UART_SCR, offset); + serial_out(up, UART_ICR, value); +} + +static unsigned int serial_icr_read(struct uart_8250_port *up, int offset) +{ + unsigned int value; + + serial_icr_write(up, UART_ACR, up->acr | UART_ACR_ICRRD); + serial_out(up, UART_SCR, offset); + value = serial_in(up, UART_ICR); + serial_icr_write(up, UART_ACR, up->acr); + + return value; +} + +/* + * FIFO support. + */ +static void serial8250_clear_fifos(struct uart_8250_port *p) +{ + if (p->capabilities & UART_CAP_FIFO) { + serial_out(p, UART_FCR, UART_FCR_ENABLE_FIFO); + serial_out(p, UART_FCR, UART_FCR_ENABLE_FIFO | + UART_FCR_CLEAR_RCVR | UART_FCR_CLEAR_XMIT); + serial_out(p, UART_FCR, 0); + } +} + +void serial8250_clear_and_reinit_fifos(struct uart_8250_port *p) +{ + serial8250_clear_fifos(p); + serial_out(p, UART_FCR, p->fcr); +} +EXPORT_SYMBOL_GPL(serial8250_clear_and_reinit_fifos); + +void serial8250_rpm_get(struct uart_8250_port *p) +{ + if (!(p->capabilities & UART_CAP_RPM)) + return; + pm_runtime_get_sync(p->port.dev); +} +EXPORT_SYMBOL_GPL(serial8250_rpm_get); + +void serial8250_rpm_put(struct uart_8250_port *p) +{ + if (!(p->capabilities & UART_CAP_RPM)) + return; + pm_runtime_mark_last_busy(p->port.dev); + pm_runtime_put_autosuspend(p->port.dev); +} +EXPORT_SYMBOL_GPL(serial8250_rpm_put); + +/* + * These two wrappers ensure that enable_runtime_pm_tx() can be called more than + * once and disable_runtime_pm_tx() will still disable RPM because the fifo is + * empty and the HW can idle again. + */ +static void serial8250_rpm_get_tx(struct uart_8250_port *p) +{ + unsigned char rpm_active; + + if (!(p->capabilities & UART_CAP_RPM)) + return; + + rpm_active = xchg(&p->rpm_tx_active, 1); + if (rpm_active) + return; + pm_runtime_get_sync(p->port.dev); +} + +static void serial8250_rpm_put_tx(struct uart_8250_port *p) +{ + unsigned char rpm_active; + + if (!(p->capabilities & UART_CAP_RPM)) + return; + + rpm_active = xchg(&p->rpm_tx_active, 0); + if (!rpm_active) + return; + pm_runtime_mark_last_busy(p->port.dev); + pm_runtime_put_autosuspend(p->port.dev); +} + +/* + * IER sleep support. UARTs which have EFRs need the "extended + * capability" bit enabled. Note that on XR16C850s, we need to + * reset LCR to write to IER. + */ +static void serial8250_set_sleep(struct uart_8250_port *p, int sleep) +{ + unsigned char lcr = 0, efr = 0; + /* + * Exar UARTs have a SLEEP register that enables or disables + * each UART to enter sleep mode separately. On the XR17V35x the + * register is accessible to each UART at the UART_EXAR_SLEEP + * offset but the UART channel may only write to the corresponding + * bit. + */ + serial8250_rpm_get(p); + if ((p->port.type == PORT_XR17V35X) || + (p->port.type == PORT_XR17D15X)) { + serial_out(p, UART_EXAR_SLEEP, sleep ? 0xff : 0); + goto out; + } + + if (p->capabilities & UART_CAP_SLEEP) { + if (p->capabilities & UART_CAP_EFR) { + lcr = serial_in(p, UART_LCR); + efr = serial_in(p, UART_EFR); + serial_out(p, UART_LCR, UART_LCR_CONF_MODE_B); + serial_out(p, UART_EFR, UART_EFR_ECB); + serial_out(p, UART_LCR, 0); + } + serial_out(p, UART_IER, sleep ? UART_IERX_SLEEP : 0); + if (p->capabilities & UART_CAP_EFR) { + serial_out(p, UART_LCR, UART_LCR_CONF_MODE_B); + serial_out(p, UART_EFR, efr); + serial_out(p, UART_LCR, lcr); + } + } +out: + serial8250_rpm_put(p); +} + +#ifdef CONFIG_SERIAL_8250_RSA +/* + * Attempts to turn on the RSA FIFO. Returns zero on failure. + * We set the port uart clock rate if we succeed. + */ +static int __enable_rsa(struct uart_8250_port *up) +{ + unsigned char mode; + int result; + + mode = serial_in(up, UART_RSA_MSR); + result = mode & UART_RSA_MSR_FIFO; + + if (!result) { + serial_out(up, UART_RSA_MSR, mode | UART_RSA_MSR_FIFO); + mode = serial_in(up, UART_RSA_MSR); + result = mode & UART_RSA_MSR_FIFO; + } + + if (result) + up->port.uartclk = SERIAL_RSA_BAUD_BASE * 16; + + return result; +} + +static void enable_rsa(struct uart_8250_port *up) +{ + if (up->port.type == PORT_RSA) { + if (up->port.uartclk != SERIAL_RSA_BAUD_BASE * 16) { + spin_lock_irq(&up->port.lock); + __enable_rsa(up); + spin_unlock_irq(&up->port.lock); + } + if (up->port.uartclk == SERIAL_RSA_BAUD_BASE * 16) + serial_out(up, UART_RSA_FRR, 0); + } +} + +/* + * Attempts to turn off the RSA FIFO. Returns zero on failure. + * It is unknown why interrupts were disabled in here. However, + * the caller is expected to preserve this behaviour by grabbing + * the spinlock before calling this function. + */ +static void disable_rsa(struct uart_8250_port *up) +{ + unsigned char mode; + int result; + + if (up->port.type == PORT_RSA && + up->port.uartclk == SERIAL_RSA_BAUD_BASE * 16) { + spin_lock_irq(&up->port.lock); + + mode = serial_in(up, UART_RSA_MSR); + result = !(mode & UART_RSA_MSR_FIFO); + + if (!result) { + serial_out(up, UART_RSA_MSR, mode & ~UART_RSA_MSR_FIFO); + mode = serial_in(up, UART_RSA_MSR); + result = !(mode & UART_RSA_MSR_FIFO); + } + + if (result) + up->port.uartclk = SERIAL_RSA_BAUD_BASE_LO * 16; + spin_unlock_irq(&up->port.lock); + } +} +#endif /* CONFIG_SERIAL_8250_RSA */ + +/* + * This is a quickie test to see how big the FIFO is. + * It doesn't work at all the time, more's the pity. + */ +static int size_fifo(struct uart_8250_port *up) +{ + unsigned char old_fcr, old_mcr, old_lcr; + unsigned short old_dl; + int count; + + old_lcr = serial_in(up, UART_LCR); + serial_out(up, UART_LCR, 0); + old_fcr = serial_in(up, UART_FCR); + old_mcr = serial_in(up, UART_MCR); + serial_out(up, UART_FCR, UART_FCR_ENABLE_FIFO | + UART_FCR_CLEAR_RCVR | UART_FCR_CLEAR_XMIT); + serial_out(up, UART_MCR, UART_MCR_LOOP); + serial_out(up, UART_LCR, UART_LCR_CONF_MODE_A); + old_dl = serial_dl_read(up); + serial_dl_write(up, 0x0001); + serial_out(up, UART_LCR, 0x03); + for (count = 0; count < 256; count++) + serial_out(up, UART_TX, count); + mdelay(20);/* FIXME - schedule_timeout */ + for (count = 0; (serial_in(up, UART_LSR) & UART_LSR_DR) && + (count < 256); count++) + serial_in(up, UART_RX); + serial_out(up, UART_FCR, old_fcr); + serial_out(up, UART_MCR, old_mcr); + serial_out(up, UART_LCR, UART_LCR_CONF_MODE_A); + serial_dl_write(up, old_dl); + serial_out(up, UART_LCR, old_lcr); + + return count; +} + +/* + * Read UART ID using the divisor method - set DLL and DLM to zero + * and the revision will be in DLL and device type in DLM. We + * preserve the device state across this. + */ +static unsigned int autoconfig_read_divisor_id(struct uart_8250_port *p) +{ + unsigned char old_dll, old_dlm, old_lcr; + unsigned int id; + + old_lcr = serial_in(p, UART_LCR); + serial_out(p, UART_LCR, UART_LCR_CONF_MODE_A); + + old_dll = serial_in(p, UART_DLL); + old_dlm = serial_in(p, UART_DLM); + + serial_out(p, UART_DLL, 0); + serial_out(p, UART_DLM, 0); + + id = serial_in(p, UART_DLL) | serial_in(p, UART_DLM) << 8; + + serial_out(p, UART_DLL, old_dll); + serial_out(p, UART_DLM, old_dlm); + serial_out(p, UART_LCR, old_lcr); + + return id; +} + +/* + * This is a helper routine to autodetect StarTech/Exar/Oxsemi UART's. + * When this function is called we know it is at least a StarTech + * 16650 V2, but it might be one of several StarTech UARTs, or one of + * its clones. (We treat the broken original StarTech 16650 V1 as a + * 16550, and why not? Startech doesn't seem to even acknowledge its + * existence.) + * + * What evil have men's minds wrought... + */ +static void autoconfig_has_efr(struct uart_8250_port *up) +{ + unsigned int id1, id2, id3, rev; + + /* + * Everything with an EFR has SLEEP + */ + up->capabilities |= UART_CAP_EFR | UART_CAP_SLEEP; + + /* + * First we check to see if it's an Oxford Semiconductor UART. + * + * If we have to do this here because some non-National + * Semiconductor clone chips lock up if you try writing to the + * LSR register (which serial_icr_read does) + */ + + /* + * Check for Oxford Semiconductor 16C950. + * + * EFR [4] must be set else this test fails. + * + * This shouldn't be necessary, but Mike Hudson (Exoray@isys.ca) + * claims that it's needed for 952 dual UART's (which are not + * recommended for new designs). + */ + up->acr = 0; + serial_out(up, UART_LCR, UART_LCR_CONF_MODE_B); + serial_out(up, UART_EFR, UART_EFR_ECB); + serial_out(up, UART_LCR, 0x00); + id1 = serial_icr_read(up, UART_ID1); + id2 = serial_icr_read(up, UART_ID2); + id3 = serial_icr_read(up, UART_ID3); + rev = serial_icr_read(up, UART_REV); + + DEBUG_AUTOCONF("950id=%02x:%02x:%02x:%02x ", id1, id2, id3, rev); + + if (id1 == 0x16 && id2 == 0xC9 && + (id3 == 0x50 || id3 == 0x52 || id3 == 0x54)) { + up->port.type = PORT_16C950; + + /* + * Enable work around for the Oxford Semiconductor 952 rev B + * chip which causes it to seriously miscalculate baud rates + * when DLL is 0. + */ + if (id3 == 0x52 && rev == 0x01) + up->bugs |= UART_BUG_QUOT; + return; + } + + /* + * We check for a XR16C850 by setting DLL and DLM to 0, and then + * reading back DLL and DLM. The chip type depends on the DLM + * value read back: + * 0x10 - XR16C850 and the DLL contains the chip revision. + * 0x12 - XR16C2850. + * 0x14 - XR16C854. + */ + id1 = autoconfig_read_divisor_id(up); + DEBUG_AUTOCONF("850id=%04x ", id1); + + id2 = id1 >> 8; + if (id2 == 0x10 || id2 == 0x12 || id2 == 0x14) { + up->port.type = PORT_16850; + return; + } + + /* + * It wasn't an XR16C850. + * + * We distinguish between the '654 and the '650 by counting + * how many bytes are in the FIFO. I'm using this for now, + * since that's the technique that was sent to me in the + * serial driver update, but I'm not convinced this works. + * I've had problems doing this in the past. -TYT + */ + if (size_fifo(up) == 64) + up->port.type = PORT_16654; + else + up->port.type = PORT_16650V2; +} + +/* + * We detected a chip without a FIFO. Only two fall into + * this category - the original 8250 and the 16450. The + * 16450 has a scratch register (accessible with LCR=0) + */ +static void autoconfig_8250(struct uart_8250_port *up) +{ + unsigned char scratch, status1, status2; + + up->port.type = PORT_8250; + + scratch = serial_in(up, UART_SCR); + serial_out(up, UART_SCR, 0xa5); + status1 = serial_in(up, UART_SCR); + serial_out(up, UART_SCR, 0x5a); + status2 = serial_in(up, UART_SCR); + serial_out(up, UART_SCR, scratch); + + if (status1 == 0xa5 && status2 == 0x5a) + up->port.type = PORT_16450; +} + +static int broken_efr(struct uart_8250_port *up) +{ + /* + * Exar ST16C2550 "A2" devices incorrectly detect as + * having an EFR, and report an ID of 0x0201. See + * http://linux.derkeiler.com/Mailing-Lists/Kernel/2004-11/4812.html + */ + if (autoconfig_read_divisor_id(up) == 0x0201 && size_fifo(up) == 16) + return 1; + + return 0; +} + +/* + * We know that the chip has FIFOs. Does it have an EFR? The + * EFR is located in the same register position as the IIR and + * we know the top two bits of the IIR are currently set. The + * EFR should contain zero. Try to read the EFR. + */ +static void autoconfig_16550a(struct uart_8250_port *up) +{ + unsigned char status1, status2; + unsigned int iersave; + + up->port.type = PORT_16550A; + up->capabilities |= UART_CAP_FIFO; + + /* + * XR17V35x UARTs have an extra divisor register, DLD + * that gets enabled with when DLAB is set which will + * cause the device to incorrectly match and assign + * port type to PORT_16650. The EFR for this UART is + * found at offset 0x09. Instead check the Deice ID (DVID) + * register for a 2, 4 or 8 port UART. + */ + if (up->port.flags & UPF_EXAR_EFR) { + status1 = serial_in(up, UART_EXAR_DVID); + if (status1 == 0x82 || status1 == 0x84 || status1 == 0x88) { + DEBUG_AUTOCONF("Exar XR17V35x "); + up->port.type = PORT_XR17V35X; + up->capabilities |= UART_CAP_AFE | UART_CAP_EFR | + UART_CAP_SLEEP; + + return; + } + + } + + /* + * Check for presence of the EFR when DLAB is set. + * Only ST16C650V1 UARTs pass this test. + */ + serial_out(up, UART_LCR, UART_LCR_CONF_MODE_A); + if (serial_in(up, UART_EFR) == 0) { + serial_out(up, UART_EFR, 0xA8); + if (serial_in(up, UART_EFR) != 0) { + DEBUG_AUTOCONF("EFRv1 "); + up->port.type = PORT_16650; + up->capabilities |= UART_CAP_EFR | UART_CAP_SLEEP; + } else { + serial_out(up, UART_LCR, 0); + serial_out(up, UART_FCR, UART_FCR_ENABLE_FIFO | + UART_FCR7_64BYTE); + status1 = serial_in(up, UART_IIR) >> 5; + serial_out(up, UART_FCR, 0); + serial_out(up, UART_LCR, 0); + + if (status1 == 7) + up->port.type = PORT_16550A_FSL64; + else + DEBUG_AUTOCONF("Motorola 8xxx DUART "); + } + serial_out(up, UART_EFR, 0); + return; + } + + /* + * Maybe it requires 0xbf to be written to the LCR. + * (other ST16C650V2 UARTs, TI16C752A, etc) + */ + serial_out(up, UART_LCR, UART_LCR_CONF_MODE_B); + if (serial_in(up, UART_EFR) == 0 && !broken_efr(up)) { + DEBUG_AUTOCONF("EFRv2 "); + autoconfig_has_efr(up); + return; + } + + /* + * Check for a National Semiconductor SuperIO chip. + * Attempt to switch to bank 2, read the value of the LOOP bit + * from EXCR1. Switch back to bank 0, change it in MCR. Then + * switch back to bank 2, read it from EXCR1 again and check + * it's changed. If so, set baud_base in EXCR2 to 921600. -- dwmw2 + */ + serial_out(up, UART_LCR, 0); + status1 = serial_in(up, UART_MCR); + serial_out(up, UART_LCR, 0xE0); + status2 = serial_in(up, 0x02); /* EXCR1 */ + + if (!((status2 ^ status1) & UART_MCR_LOOP)) { + serial_out(up, UART_LCR, 0); + serial_out(up, UART_MCR, status1 ^ UART_MCR_LOOP); + serial_out(up, UART_LCR, 0xE0); + status2 = serial_in(up, 0x02); /* EXCR1 */ + serial_out(up, UART_LCR, 0); + serial_out(up, UART_MCR, status1); + + if ((status2 ^ status1) & UART_MCR_LOOP) { + unsigned short quot; + + serial_out(up, UART_LCR, 0xE0); + + quot = serial_dl_read(up); + quot <<= 3; + + if (ns16550a_goto_highspeed(up)) + serial_dl_write(up, quot); + + serial_out(up, UART_LCR, 0); + + up->port.uartclk = 921600*16; + up->port.type = PORT_NS16550A; + up->capabilities |= UART_NATSEMI; + return; + } + } + + /* + * No EFR. Try to detect a TI16750, which only sets bit 5 of + * the IIR when 64 byte FIFO mode is enabled when DLAB is set. + * Try setting it with and without DLAB set. Cheap clones + * set bit 5 without DLAB set. + */ + serial_out(up, UART_LCR, 0); + serial_out(up, UART_FCR, UART_FCR_ENABLE_FIFO | UART_FCR7_64BYTE); + status1 = serial_in(up, UART_IIR) >> 5; + serial_out(up, UART_FCR, UART_FCR_ENABLE_FIFO); + serial_out(up, UART_LCR, UART_LCR_CONF_MODE_A); + serial_out(up, UART_FCR, UART_FCR_ENABLE_FIFO | UART_FCR7_64BYTE); + status2 = serial_in(up, UART_IIR) >> 5; + serial_out(up, UART_FCR, UART_FCR_ENABLE_FIFO); + serial_out(up, UART_LCR, 0); + + DEBUG_AUTOCONF("iir1=%d iir2=%d ", status1, status2); + + if (status1 == 6 && status2 == 7) { + up->port.type = PORT_16750; + up->capabilities |= UART_CAP_AFE | UART_CAP_SLEEP; + return; + } + + /* + * Try writing and reading the UART_IER_UUE bit (b6). + * If it works, this is probably one of the Xscale platform's + * internal UARTs. + * We're going to explicitly set the UUE bit to 0 before + * trying to write and read a 1 just to make sure it's not + * already a 1 and maybe locked there before we even start start. + */ + iersave = serial_in(up, UART_IER); + serial_out(up, UART_IER, iersave & ~UART_IER_UUE); + if (!(serial_in(up, UART_IER) & UART_IER_UUE)) { + /* + * OK it's in a known zero state, try writing and reading + * without disturbing the current state of the other bits. + */ + serial_out(up, UART_IER, iersave | UART_IER_UUE); + if (serial_in(up, UART_IER) & UART_IER_UUE) { + /* + * It's an Xscale. + * We'll leave the UART_IER_UUE bit set to 1 (enabled). + */ + DEBUG_AUTOCONF("Xscale "); + up->port.type = PORT_XSCALE; + up->capabilities |= UART_CAP_UUE | UART_CAP_RTOIE; + return; + } + } else { + /* + * If we got here we couldn't force the IER_UUE bit to 0. + * Log it and continue. + */ + DEBUG_AUTOCONF("Couldn't force IER_UUE to 0 "); + } + serial_out(up, UART_IER, iersave); + + /* + * Exar uarts have EFR in a weird location + */ + if (up->port.flags & UPF_EXAR_EFR) { + DEBUG_AUTOCONF("Exar XR17D15x "); + up->port.type = PORT_XR17D15X; + up->capabilities |= UART_CAP_AFE | UART_CAP_EFR | + UART_CAP_SLEEP; + + return; + } + + /* + * We distinguish between 16550A and U6 16550A by counting + * how many bytes are in the FIFO. + */ + if (up->port.type == PORT_16550A && size_fifo(up) == 64) { + up->port.type = PORT_U6_16550A; + up->capabilities |= UART_CAP_AFE; + } +} + +/* + * This routine is called by rs_init() to initialize a specific serial + * port. It determines what type of UART chip this serial port is + * using: 8250, 16450, 16550, 16550A. The important question is + * whether or not this UART is a 16550A or not, since this will + * determine whether or not we can use its FIFO features or not. + */ +static void autoconfig(struct uart_8250_port *up) +{ + unsigned char status1, scratch, scratch2, scratch3; + unsigned char save_lcr, save_mcr; + struct uart_port *port = &up->port; + unsigned long flags; + unsigned int old_capabilities; + + if (!port->iobase && !port->mapbase && !port->membase) + return; + + DEBUG_AUTOCONF("ttyS%d: autoconf (0x%04lx, 0x%p): ", + serial_index(port), port->iobase, port->membase); + + /* + * We really do need global IRQs disabled here - we're going to + * be frobbing the chips IRQ enable register to see if it exists. + */ + spin_lock_irqsave(&port->lock, flags); + + up->capabilities = 0; + up->bugs = 0; + + if (!(port->flags & UPF_BUGGY_UART)) { + /* + * Do a simple existence test first; if we fail this, + * there's no point trying anything else. + * + * 0x80 is used as a nonsense port to prevent against + * false positives due to ISA bus float. The + * assumption is that 0x80 is a non-existent port; + * which should be safe since include/asm/io.h also + * makes this assumption. + * + * Note: this is safe as long as MCR bit 4 is clear + * and the device is in "PC" mode. + */ + scratch = serial_in(up, UART_IER); + serial_out(up, UART_IER, 0); +#ifdef __i386__ + outb(0xff, 0x080); +#endif + /* + * Mask out IER[7:4] bits for test as some UARTs (e.g. TL + * 16C754B) allow only to modify them if an EFR bit is set. + */ + scratch2 = serial_in(up, UART_IER) & 0x0f; + serial_out(up, UART_IER, 0x0F); +#ifdef __i386__ + outb(0, 0x080); +#endif + scratch3 = serial_in(up, UART_IER) & 0x0f; + serial_out(up, UART_IER, scratch); + if (scratch2 != 0 || scratch3 != 0x0F) { + /* + * We failed; there's nothing here + */ + spin_unlock_irqrestore(&port->lock, flags); + DEBUG_AUTOCONF("IER test failed (%02x, %02x) ", + scratch2, scratch3); + goto out; + } + } + + save_mcr = serial_in(up, UART_MCR); + save_lcr = serial_in(up, UART_LCR); + + /* + * Check to see if a UART is really there. Certain broken + * internal modems based on the Rockwell chipset fail this + * test, because they apparently don't implement the loopback + * test mode. So this test is skipped on the COM 1 through + * COM 4 ports. This *should* be safe, since no board + * manufacturer would be stupid enough to design a board + * that conflicts with COM 1-4 --- we hope! + */ + if (!(port->flags & UPF_SKIP_TEST)) { + serial_out(up, UART_MCR, UART_MCR_LOOP | 0x0A); + status1 = serial_in(up, UART_MSR) & 0xF0; + serial_out(up, UART_MCR, save_mcr); + if (status1 != 0x90) { + spin_unlock_irqrestore(&port->lock, flags); + DEBUG_AUTOCONF("LOOP test failed (%02x) ", + status1); + goto out; + } + } + + /* + * We're pretty sure there's a port here. Lets find out what + * type of port it is. The IIR top two bits allows us to find + * out if it's 8250 or 16450, 16550, 16550A or later. This + * determines what we test for next. + * + * We also initialise the EFR (if any) to zero for later. The + * EFR occupies the same register location as the FCR and IIR. + */ + serial_out(up, UART_LCR, UART_LCR_CONF_MODE_B); + serial_out(up, UART_EFR, 0); + serial_out(up, UART_LCR, 0); + + serial_out(up, UART_FCR, UART_FCR_ENABLE_FIFO); + scratch = serial_in(up, UART_IIR) >> 6; + + switch (scratch) { + case 0: + autoconfig_8250(up); + break; + case 1: + port->type = PORT_UNKNOWN; + break; + case 2: + port->type = PORT_16550; + break; + case 3: + autoconfig_16550a(up); + break; + } + +#ifdef CONFIG_SERIAL_8250_RSA + /* + * Only probe for RSA ports if we got the region. + */ + if (port->type == PORT_16550A && up->probe & UART_PROBE_RSA && + __enable_rsa(up)) + port->type = PORT_RSA; +#endif + + serial_out(up, UART_LCR, save_lcr); + + port->fifosize = uart_config[up->port.type].fifo_size; + old_capabilities = up->capabilities; + up->capabilities = uart_config[port->type].flags; + up->tx_loadsz = uart_config[port->type].tx_loadsz; + + if (port->type == PORT_UNKNOWN) + goto out_lock; + + /* + * Reset the UART. + */ +#ifdef CONFIG_SERIAL_8250_RSA + if (port->type == PORT_RSA) + serial_out(up, UART_RSA_FRR, 0); +#endif + serial_out(up, UART_MCR, save_mcr); + serial8250_clear_fifos(up); + serial_in(up, UART_RX); + if (up->capabilities & UART_CAP_UUE) + serial_out(up, UART_IER, UART_IER_UUE); + else + serial_out(up, UART_IER, 0); + +out_lock: + spin_unlock_irqrestore(&port->lock, flags); + if (up->capabilities != old_capabilities) { + printk(KERN_WARNING + "ttyS%d: detected caps %08x should be %08x\n", + serial_index(port), old_capabilities, + up->capabilities); + } +out: + DEBUG_AUTOCONF("iir=%d ", scratch); + DEBUG_AUTOCONF("type=%s\n", uart_config[port->type].name); +} + +static void autoconfig_irq(struct uart_8250_port *up) +{ + struct uart_port *port = &up->port; + unsigned char save_mcr, save_ier; + unsigned char save_ICP = 0; + unsigned int ICP = 0; + unsigned long irqs; + int irq; + + if (port->flags & UPF_FOURPORT) { + ICP = (port->iobase & 0xfe0) | 0x1f; + save_ICP = inb_p(ICP); + outb_p(0x80, ICP); + inb_p(ICP); + } + + /* forget possible initially masked and pending IRQ */ + probe_irq_off(probe_irq_on()); + save_mcr = serial_in(up, UART_MCR); + save_ier = serial_in(up, UART_IER); + serial_out(up, UART_MCR, UART_MCR_OUT1 | UART_MCR_OUT2); + + irqs = probe_irq_on(); + serial_out(up, UART_MCR, 0); + udelay(10); + if (port->flags & UPF_FOURPORT) { + serial_out(up, UART_MCR, + UART_MCR_DTR | UART_MCR_RTS); + } else { + serial_out(up, UART_MCR, + UART_MCR_DTR | UART_MCR_RTS | UART_MCR_OUT2); + } + serial_out(up, UART_IER, 0x0f); /* enable all intrs */ + serial_in(up, UART_LSR); + serial_in(up, UART_RX); + serial_in(up, UART_IIR); + serial_in(up, UART_MSR); + serial_out(up, UART_TX, 0xFF); + udelay(20); + irq = probe_irq_off(irqs); + + serial_out(up, UART_MCR, save_mcr); + serial_out(up, UART_IER, save_ier); + + if (port->flags & UPF_FOURPORT) + outb_p(save_ICP, ICP); + + port->irq = (irq > 0) ? irq : 0; +} + +static inline void __stop_tx(struct uart_8250_port *p) +{ + if (p->ier & UART_IER_THRI) { + p->ier &= ~UART_IER_THRI; + serial_out(p, UART_IER, p->ier); + serial8250_rpm_put_tx(p); + } +} + +static void serial8250_stop_tx(struct uart_port *port) +{ + struct uart_8250_port *up = up_to_u8250p(port); + + serial8250_rpm_get(up); + __stop_tx(up); + + /* + * We really want to stop the transmitter from sending. + */ + if (port->type == PORT_16C950) { + up->acr |= UART_ACR_TXDIS; + serial_icr_write(up, UART_ACR, up->acr); + } + serial8250_rpm_put(up); +} + +static void serial8250_start_tx(struct uart_port *port) +{ + struct uart_8250_port *up = up_to_u8250p(port); + + serial8250_rpm_get_tx(up); + + if (up->dma && !up->dma->tx_dma(up)) + return; + + if (!(up->ier & UART_IER_THRI)) { + up->ier |= UART_IER_THRI; + serial_port_out(port, UART_IER, up->ier); + + if (up->bugs & UART_BUG_TXEN) { + unsigned char lsr; + lsr = serial_in(up, UART_LSR); + up->lsr_saved_flags |= lsr & LSR_SAVE_FLAGS; + if (lsr & UART_LSR_THRE) + serial8250_tx_chars(up); + } + } + + /* + * Re-enable the transmitter if we disabled it. + */ + if (port->type == PORT_16C950 && up->acr & UART_ACR_TXDIS) { + up->acr &= ~UART_ACR_TXDIS; + serial_icr_write(up, UART_ACR, up->acr); + } +} + +static void serial8250_throttle(struct uart_port *port) +{ + port->throttle(port); +} + +static void serial8250_unthrottle(struct uart_port *port) +{ + port->unthrottle(port); +} + +static void serial8250_stop_rx(struct uart_port *port) +{ + struct uart_8250_port *up = up_to_u8250p(port); + + serial8250_rpm_get(up); + + up->ier &= ~(UART_IER_RLSI | UART_IER_RDI); + up->port.read_status_mask &= ~UART_LSR_DR; + serial_port_out(port, UART_IER, up->ier); + + serial8250_rpm_put(up); +} + +static void serial8250_disable_ms(struct uart_port *port) +{ + struct uart_8250_port *up = + container_of(port, struct uart_8250_port, port); + + /* no MSR capabilities */ + if (up->bugs & UART_BUG_NOMSR) + return; + + up->ier &= ~UART_IER_MSI; + serial_port_out(port, UART_IER, up->ier); +} + +static void serial8250_enable_ms(struct uart_port *port) +{ + struct uart_8250_port *up = up_to_u8250p(port); + + /* no MSR capabilities */ + if (up->bugs & UART_BUG_NOMSR) + return; + + up->ier |= UART_IER_MSI; + + serial8250_rpm_get(up); + serial_port_out(port, UART_IER, up->ier); + serial8250_rpm_put(up); +} + +/* + * serial8250_rx_chars: processes according to the passed in LSR + * value, and returns the remaining LSR bits not handled + * by this Rx routine. + */ +unsigned char +serial8250_rx_chars(struct uart_8250_port *up, unsigned char lsr) +{ + struct uart_port *port = &up->port; + unsigned char ch; + int max_count = 256; + char flag; + + do { + if (likely(lsr & UART_LSR_DR)) + ch = serial_in(up, UART_RX); + else + /* + * Intel 82571 has a Serial Over Lan device that will + * set UART_LSR_BI without setting UART_LSR_DR when + * it receives a break. To avoid reading from the + * receive buffer without UART_LSR_DR bit set, we + * just force the read character to be 0 + */ + ch = 0; + + flag = TTY_NORMAL; + port->icount.rx++; + + lsr |= up->lsr_saved_flags; + up->lsr_saved_flags = 0; + + if (unlikely(lsr & UART_LSR_BRK_ERROR_BITS)) { + if (lsr & UART_LSR_BI) { + lsr &= ~(UART_LSR_FE | UART_LSR_PE); + port->icount.brk++; + /* + * We do the SysRQ and SAK checking + * here because otherwise the break + * may get masked by ignore_status_mask + * or read_status_mask. + */ + if (uart_handle_break(port)) + goto ignore_char; + } else if (lsr & UART_LSR_PE) + port->icount.parity++; + else if (lsr & UART_LSR_FE) + port->icount.frame++; + if (lsr & UART_LSR_OE) + port->icount.overrun++; + + /* + * Mask off conditions which should be ignored. + */ + lsr &= port->read_status_mask; + + if (lsr & UART_LSR_BI) { + DEBUG_INTR("handling break...."); + flag = TTY_BREAK; + } else if (lsr & UART_LSR_PE) + flag = TTY_PARITY; + else if (lsr & UART_LSR_FE) + flag = TTY_FRAME; + } + if (uart_handle_sysrq_char(port, ch)) + goto ignore_char; + + uart_insert_char(port, lsr, UART_LSR_OE, ch, flag); + +ignore_char: + lsr = serial_in(up, UART_LSR); + } while ((lsr & (UART_LSR_DR | UART_LSR_BI)) && (--max_count > 0)); + spin_unlock(&port->lock); + tty_flip_buffer_push(&port->state->port); + spin_lock(&port->lock); + return lsr; +} +EXPORT_SYMBOL_GPL(serial8250_rx_chars); + +void serial8250_tx_chars(struct uart_8250_port *up) +{ + struct uart_port *port = &up->port; + struct circ_buf *xmit = &port->state->xmit; + int count; + + if (port->x_char) { + serial_out(up, UART_TX, port->x_char); + port->icount.tx++; + port->x_char = 0; + return; + } + if (uart_tx_stopped(port)) { + serial8250_stop_tx(port); + return; + } + if (uart_circ_empty(xmit)) { + __stop_tx(up); + return; + } + + count = up->tx_loadsz; + do { + serial_out(up, UART_TX, xmit->buf[xmit->tail]); + xmit->tail = (xmit->tail + 1) & (UART_XMIT_SIZE - 1); + port->icount.tx++; + if (uart_circ_empty(xmit)) + break; + if (up->capabilities & UART_CAP_HFIFO) { + if ((serial_port_in(port, UART_LSR) & BOTH_EMPTY) != + BOTH_EMPTY) + break; + } + } while (--count > 0); + + if (uart_circ_chars_pending(xmit) < WAKEUP_CHARS) + uart_write_wakeup(port); + + DEBUG_INTR("THRE..."); + + /* + * With RPM enabled, we have to wait until the FIFO is empty before the + * HW can go idle. So we get here once again with empty FIFO and disable + * the interrupt and RPM in __stop_tx() + */ + if (uart_circ_empty(xmit) && !(up->capabilities & UART_CAP_RPM)) + __stop_tx(up); +} +EXPORT_SYMBOL_GPL(serial8250_tx_chars); + +/* Caller holds uart port lock */ +unsigned int serial8250_modem_status(struct uart_8250_port *up) +{ + struct uart_port *port = &up->port; + unsigned int status = serial_in(up, UART_MSR); + + status |= up->msr_saved_flags; + up->msr_saved_flags = 0; + if (status & UART_MSR_ANY_DELTA && up->ier & UART_IER_MSI && + port->state != NULL) { + if (status & UART_MSR_TERI) + port->icount.rng++; + if (status & UART_MSR_DDSR) + port->icount.dsr++; + if (status & UART_MSR_DDCD) + uart_handle_dcd_change(port, status & UART_MSR_DCD); + if (status & UART_MSR_DCTS) + uart_handle_cts_change(port, status & UART_MSR_CTS); + + wake_up_interruptible(&port->state->port.delta_msr_wait); + } + + return status; +} +EXPORT_SYMBOL_GPL(serial8250_modem_status); + +/* + * This handles the interrupt from one port. + */ +int serial8250_handle_irq(struct uart_port *port, unsigned int iir) +{ + unsigned char status; + unsigned long flags; + struct uart_8250_port *up = up_to_u8250p(port); + int dma_err = 0; + + if (iir & UART_IIR_NO_INT) + return 0; + + spin_lock_irqsave(&port->lock, flags); + + status = serial_port_in(port, UART_LSR); + + DEBUG_INTR("status = %x...", status); + + if (status & (UART_LSR_DR | UART_LSR_BI)) { + if (up->dma) + dma_err = up->dma->rx_dma(up, iir); + + if (!up->dma || dma_err) + status = serial8250_rx_chars(up, status); + } + serial8250_modem_status(up); + if ((!up->dma || (up->dma && up->dma->tx_err)) && + (status & UART_LSR_THRE)) + serial8250_tx_chars(up); + + spin_unlock_irqrestore(&port->lock, flags); + return 1; +} +EXPORT_SYMBOL_GPL(serial8250_handle_irq); + +static int serial8250_default_handle_irq(struct uart_port *port) +{ + struct uart_8250_port *up = up_to_u8250p(port); + unsigned int iir; + int ret; + + serial8250_rpm_get(up); + + iir = serial_port_in(port, UART_IIR); + ret = serial8250_handle_irq(port, iir); + + serial8250_rpm_put(up); + return ret; +} + +/* + * These Exar UARTs have an extra interrupt indicator that could + * fire for a few unimplemented interrupts. One of which is a + * wakeup event when coming out of sleep. Put this here just + * to be on the safe side that these interrupts don't go unhandled. + */ +static int exar_handle_irq(struct uart_port *port) +{ + unsigned char int0, int1, int2, int3; + unsigned int iir = serial_port_in(port, UART_IIR); + int ret; + + ret = serial8250_handle_irq(port, iir); + + if ((port->type == PORT_XR17V35X) || + (port->type == PORT_XR17D15X)) { + int0 = serial_port_in(port, 0x80); + int1 = serial_port_in(port, 0x81); + int2 = serial_port_in(port, 0x82); + int3 = serial_port_in(port, 0x83); + } + + return ret; +} + +static unsigned int serial8250_tx_empty(struct uart_port *port) +{ + struct uart_8250_port *up = up_to_u8250p(port); + unsigned long flags; + unsigned int lsr; + + serial8250_rpm_get(up); + + spin_lock_irqsave(&port->lock, flags); + lsr = serial_port_in(port, UART_LSR); + up->lsr_saved_flags |= lsr & LSR_SAVE_FLAGS; + spin_unlock_irqrestore(&port->lock, flags); + + serial8250_rpm_put(up); + + return (lsr & BOTH_EMPTY) == BOTH_EMPTY ? TIOCSER_TEMT : 0; +} + +static unsigned int serial8250_get_mctrl(struct uart_port *port) +{ + struct uart_8250_port *up = up_to_u8250p(port); + unsigned int status; + unsigned int ret; + + serial8250_rpm_get(up); + status = serial8250_modem_status(up); + serial8250_rpm_put(up); + + ret = 0; + if (status & UART_MSR_DCD) + ret |= TIOCM_CAR; + if (status & UART_MSR_RI) + ret |= TIOCM_RNG; + if (status & UART_MSR_DSR) + ret |= TIOCM_DSR; + if (status & UART_MSR_CTS) + ret |= TIOCM_CTS; + return ret; +} + +void serial8250_do_set_mctrl(struct uart_port *port, unsigned int mctrl) +{ + struct uart_8250_port *up = up_to_u8250p(port); + unsigned char mcr = 0; + + if (mctrl & TIOCM_RTS) + mcr |= UART_MCR_RTS; + if (mctrl & TIOCM_DTR) + mcr |= UART_MCR_DTR; + if (mctrl & TIOCM_OUT1) + mcr |= UART_MCR_OUT1; + if (mctrl & TIOCM_OUT2) + mcr |= UART_MCR_OUT2; + if (mctrl & TIOCM_LOOP) + mcr |= UART_MCR_LOOP; + + mcr = (mcr & up->mcr_mask) | up->mcr_force | up->mcr; + + serial_port_out(port, UART_MCR, mcr); +} +EXPORT_SYMBOL_GPL(serial8250_do_set_mctrl); + +static void serial8250_set_mctrl(struct uart_port *port, unsigned int mctrl) +{ + if (port->set_mctrl) + port->set_mctrl(port, mctrl); + else + serial8250_do_set_mctrl(port, mctrl); +} + +static void serial8250_break_ctl(struct uart_port *port, int break_state) +{ + struct uart_8250_port *up = up_to_u8250p(port); + unsigned long flags; + + serial8250_rpm_get(up); + spin_lock_irqsave(&port->lock, flags); + if (break_state == -1) + up->lcr |= UART_LCR_SBC; + else + up->lcr &= ~UART_LCR_SBC; + serial_port_out(port, UART_LCR, up->lcr); + spin_unlock_irqrestore(&port->lock, flags); + serial8250_rpm_put(up); +} + +/* + * Wait for transmitter & holding register to empty + */ +static void wait_for_xmitr(struct uart_8250_port *up, int bits) +{ + unsigned int status, tmout = 10000; + + /* Wait up to 10ms for the character(s) to be sent. */ + for (;;) { + status = serial_in(up, UART_LSR); + + up->lsr_saved_flags |= status & LSR_SAVE_FLAGS; + + if ((status & bits) == bits) + break; + if (--tmout == 0) + break; + udelay(1); + } + + /* Wait up to 1s for flow control if necessary */ + if (up->port.flags & UPF_CONS_FLOW) { + unsigned int tmout; + for (tmout = 1000000; tmout; tmout--) { + unsigned int msr = serial_in(up, UART_MSR); + up->msr_saved_flags |= msr & MSR_SAVE_FLAGS; + if (msr & UART_MSR_CTS) + break; + udelay(1); + touch_nmi_watchdog(); + } + } +} + +#ifdef CONFIG_CONSOLE_POLL +/* + * Console polling routines for writing and reading from the uart while + * in an interrupt or debug context. + */ + +static int serial8250_get_poll_char(struct uart_port *port) +{ + struct uart_8250_port *up = up_to_u8250p(port); + unsigned char lsr; + int status; + + serial8250_rpm_get(up); + + lsr = serial_port_in(port, UART_LSR); + + if (!(lsr & UART_LSR_DR)) { + status = NO_POLL_CHAR; + goto out; + } + + status = serial_port_in(port, UART_RX); +out: + serial8250_rpm_put(up); + return status; +} + + +static void serial8250_put_poll_char(struct uart_port *port, + unsigned char c) +{ + unsigned int ier; + struct uart_8250_port *up = up_to_u8250p(port); + + serial8250_rpm_get(up); + /* + * First save the IER then disable the interrupts + */ + ier = serial_port_in(port, UART_IER); + if (up->capabilities & UART_CAP_UUE) + serial_port_out(port, UART_IER, UART_IER_UUE); + else + serial_port_out(port, UART_IER, 0); + + wait_for_xmitr(up, BOTH_EMPTY); + /* + * Send the character out. + */ + serial_port_out(port, UART_TX, c); + + /* + * Finally, wait for transmitter to become empty + * and restore the IER + */ + wait_for_xmitr(up, BOTH_EMPTY); + serial_port_out(port, UART_IER, ier); + serial8250_rpm_put(up); +} + +#endif /* CONFIG_CONSOLE_POLL */ + +int serial8250_do_startup(struct uart_port *port) +{ + struct uart_8250_port *up = up_to_u8250p(port); + unsigned long flags; + unsigned char lsr, iir; + int retval; + + if (port->type == PORT_8250_CIR) + return -ENODEV; + + if (!port->fifosize) + port->fifosize = uart_config[port->type].fifo_size; + if (!up->tx_loadsz) + up->tx_loadsz = uart_config[port->type].tx_loadsz; + if (!up->capabilities) + up->capabilities = uart_config[port->type].flags; + up->mcr = 0; + + if (port->iotype != up->cur_iotype) + set_io_from_upio(port); + + serial8250_rpm_get(up); + if (port->type == PORT_16C950) { + /* Wake up and initialize UART */ + up->acr = 0; + serial_port_out(port, UART_LCR, UART_LCR_CONF_MODE_B); + serial_port_out(port, UART_EFR, UART_EFR_ECB); + serial_port_out(port, UART_IER, 0); + serial_port_out(port, UART_LCR, 0); + serial_icr_write(up, UART_CSR, 0); /* Reset the UART */ + serial_port_out(port, UART_LCR, UART_LCR_CONF_MODE_B); + serial_port_out(port, UART_EFR, UART_EFR_ECB); + serial_port_out(port, UART_LCR, 0); + } + +#ifdef CONFIG_SERIAL_8250_RSA + /* + * If this is an RSA port, see if we can kick it up to the + * higher speed clock. + */ + enable_rsa(up); +#endif + /* + * Clear the FIFO buffers and disable them. + * (they will be reenabled in set_termios()) + */ + serial8250_clear_fifos(up); + + /* + * Clear the interrupt registers. + */ + serial_port_in(port, UART_LSR); + serial_port_in(port, UART_RX); + serial_port_in(port, UART_IIR); + serial_port_in(port, UART_MSR); + + /* + * At this point, there's no way the LSR could still be 0xff; + * if it is, then bail out, because there's likely no UART + * here. + */ + if (!(port->flags & UPF_BUGGY_UART) && + (serial_port_in(port, UART_LSR) == 0xff)) { + printk_ratelimited(KERN_INFO "ttyS%d: LSR safety check engaged!\n", + serial_index(port)); + retval = -ENODEV; + goto out; + } + + /* + * For a XR16C850, we need to set the trigger levels + */ + if (port->type == PORT_16850) { + unsigned char fctr; + + serial_out(up, UART_LCR, UART_LCR_CONF_MODE_B); + + fctr = serial_in(up, UART_FCTR) & ~(UART_FCTR_RX|UART_FCTR_TX); + serial_port_out(port, UART_FCTR, + fctr | UART_FCTR_TRGD | UART_FCTR_RX); + serial_port_out(port, UART_TRG, UART_TRG_96); + serial_port_out(port, UART_FCTR, + fctr | UART_FCTR_TRGD | UART_FCTR_TX); + serial_port_out(port, UART_TRG, UART_TRG_96); + + serial_port_out(port, UART_LCR, 0); + } + + if (port->irq) { + unsigned char iir1; + /* + * Test for UARTs that do not reassert THRE when the + * transmitter is idle and the interrupt has already + * been cleared. Real 16550s should always reassert + * this interrupt whenever the transmitter is idle and + * the interrupt is enabled. Delays are necessary to + * allow register changes to become visible. + */ + spin_lock_irqsave(&port->lock, flags); + if (up->port.irqflags & IRQF_SHARED) + disable_irq_nosync(port->irq); + + wait_for_xmitr(up, UART_LSR_THRE); + serial_port_out_sync(port, UART_IER, UART_IER_THRI); + udelay(1); /* allow THRE to set */ + iir1 = serial_port_in(port, UART_IIR); + serial_port_out(port, UART_IER, 0); + serial_port_out_sync(port, UART_IER, UART_IER_THRI); + udelay(1); /* allow a working UART time to re-assert THRE */ + iir = serial_port_in(port, UART_IIR); + serial_port_out(port, UART_IER, 0); + + if (port->irqflags & IRQF_SHARED) + enable_irq(port->irq); + spin_unlock_irqrestore(&port->lock, flags); + + /* + * If the interrupt is not reasserted, or we otherwise + * don't trust the iir, setup a timer to kick the UART + * on a regular basis. + */ + if ((!(iir1 & UART_IIR_NO_INT) && (iir & UART_IIR_NO_INT)) || + up->port.flags & UPF_BUG_THRE) { + up->bugs |= UART_BUG_THRE; + } + } + + retval = up->ops->setup_irq(up); + if (retval) + goto out; + + /* + * Now, initialize the UART + */ + serial_port_out(port, UART_LCR, UART_LCR_WLEN8); + + spin_lock_irqsave(&port->lock, flags); + if (up->port.flags & UPF_FOURPORT) { + if (!up->port.irq) + up->port.mctrl |= TIOCM_OUT1; + } else + /* + * Most PC uarts need OUT2 raised to enable interrupts. + */ + if (port->irq) + up->port.mctrl |= TIOCM_OUT2; + + serial8250_set_mctrl(port, port->mctrl); + + /* Serial over Lan (SoL) hack: + Intel 8257x Gigabit ethernet chips have a + 16550 emulation, to be used for Serial Over Lan. + Those chips take a longer time than a normal + serial device to signalize that a transmission + data was queued. Due to that, the above test generally + fails. One solution would be to delay the reading of + iir. However, this is not reliable, since the timeout + is variable. So, let's just don't test if we receive + TX irq. This way, we'll never enable UART_BUG_TXEN. + */ + if (up->port.flags & UPF_NO_TXEN_TEST) + goto dont_test_tx_en; + + /* + * Do a quick test to see if we receive an + * interrupt when we enable the TX irq. + */ + serial_port_out(port, UART_IER, UART_IER_THRI); + lsr = serial_port_in(port, UART_LSR); + iir = serial_port_in(port, UART_IIR); + serial_port_out(port, UART_IER, 0); + + if (lsr & UART_LSR_TEMT && iir & UART_IIR_NO_INT) { + if (!(up->bugs & UART_BUG_TXEN)) { + up->bugs |= UART_BUG_TXEN; + pr_debug("ttyS%d - enabling bad tx status workarounds\n", + serial_index(port)); + } + } else { + up->bugs &= ~UART_BUG_TXEN; + } + +dont_test_tx_en: + spin_unlock_irqrestore(&port->lock, flags); + + /* + * Clear the interrupt registers again for luck, and clear the + * saved flags to avoid getting false values from polling + * routines or the previous session. + */ + serial_port_in(port, UART_LSR); + serial_port_in(port, UART_RX); + serial_port_in(port, UART_IIR); + serial_port_in(port, UART_MSR); + up->lsr_saved_flags = 0; + up->msr_saved_flags = 0; + + /* + * Request DMA channels for both RX and TX. + */ + if (up->dma) { + retval = serial8250_request_dma(up); + if (retval) { + pr_warn_ratelimited("ttyS%d - failed to request DMA\n", + serial_index(port)); + up->dma = NULL; + } + } + + /* + * Finally, enable interrupts. Note: Modem status interrupts + * are set via set_termios(), which will be occurring imminently + * anyway, so we don't enable them here. + */ + up->ier = UART_IER_RLSI | UART_IER_RDI; + serial_port_out(port, UART_IER, up->ier); + + if (port->flags & UPF_FOURPORT) { + unsigned int icp; + /* + * Enable interrupts on the AST Fourport board + */ + icp = (port->iobase & 0xfe0) | 0x01f; + outb_p(0x80, icp); + inb_p(icp); + } + retval = 0; +out: + serial8250_rpm_put(up); + return retval; +} +EXPORT_SYMBOL_GPL(serial8250_do_startup); + +static int serial8250_startup(struct uart_port *port) +{ + if (port->startup) + return port->startup(port); + return serial8250_do_startup(port); +} + +void serial8250_do_shutdown(struct uart_port *port) +{ + struct uart_8250_port *up = up_to_u8250p(port); + unsigned long flags; + + serial8250_rpm_get(up); + /* + * Disable interrupts from this port + */ + up->ier = 0; + serial_port_out(port, UART_IER, 0); + + if (up->dma) + serial8250_release_dma(up); + + spin_lock_irqsave(&port->lock, flags); + if (port->flags & UPF_FOURPORT) { + /* reset interrupts on the AST Fourport board */ + inb((port->iobase & 0xfe0) | 0x1f); + port->mctrl |= TIOCM_OUT1; + } else + port->mctrl &= ~TIOCM_OUT2; + + serial8250_set_mctrl(port, port->mctrl); + spin_unlock_irqrestore(&port->lock, flags); + + /* + * Disable break condition and FIFOs + */ + serial_port_out(port, UART_LCR, + serial_port_in(port, UART_LCR) & ~UART_LCR_SBC); + serial8250_clear_fifos(up); + +#ifdef CONFIG_SERIAL_8250_RSA + /* + * Reset the RSA board back to 115kbps compat mode. + */ + disable_rsa(up); +#endif + + /* + * Read data port to reset things, and then unlink from + * the IRQ chain. + */ + serial_port_in(port, UART_RX); + serial8250_rpm_put(up); + + up->ops->release_irq(up); +} +EXPORT_SYMBOL_GPL(serial8250_do_shutdown); + +static void serial8250_shutdown(struct uart_port *port) +{ + if (port->shutdown) + port->shutdown(port); + else + serial8250_do_shutdown(port); +} + +/* + * XR17V35x UARTs have an extra fractional divisor register (DLD) + * Calculate divisor with extra 4-bit fractional portion + */ +static unsigned int xr17v35x_get_divisor(struct uart_8250_port *up, + unsigned int baud, + unsigned int *frac) +{ + struct uart_port *port = &up->port; + unsigned int quot_16; + + quot_16 = DIV_ROUND_CLOSEST(port->uartclk, baud); + *frac = quot_16 & 0x0f; + + return quot_16 >> 4; +} + +static unsigned int serial8250_get_divisor(struct uart_8250_port *up, + unsigned int baud, + unsigned int *frac) +{ + struct uart_port *port = &up->port; + unsigned int quot; + + /* + * Handle magic divisors for baud rates above baud_base on + * SMSC SuperIO chips. + * + */ + if ((port->flags & UPF_MAGIC_MULTIPLIER) && + baud == (port->uartclk/4)) + quot = 0x8001; + else if ((port->flags & UPF_MAGIC_MULTIPLIER) && + baud == (port->uartclk/8)) + quot = 0x8002; + else if (up->port.type == PORT_XR17V35X) + quot = xr17v35x_get_divisor(up, baud, frac); + else + quot = uart_get_divisor(port, baud); + + /* + * Oxford Semi 952 rev B workaround + */ + if (up->bugs & UART_BUG_QUOT && (quot & 0xff) == 0) + quot++; + + return quot; +} + +static unsigned char serial8250_compute_lcr(struct uart_8250_port *up, + tcflag_t c_cflag) +{ + unsigned char cval; + + switch (c_cflag & CSIZE) { + case CS5: + cval = UART_LCR_WLEN5; + break; + case CS6: + cval = UART_LCR_WLEN6; + break; + case CS7: + cval = UART_LCR_WLEN7; + break; + default: + case CS8: + cval = UART_LCR_WLEN8; + break; + } + + if (c_cflag & CSTOPB) + cval |= UART_LCR_STOP; + if (c_cflag & PARENB) { + cval |= UART_LCR_PARITY; + if (up->bugs & UART_BUG_PARITY) + up->fifo_bug = true; + } + if (!(c_cflag & PARODD)) + cval |= UART_LCR_EPAR; +#ifdef CMSPAR + if (c_cflag & CMSPAR) + cval |= UART_LCR_SPAR; +#endif + + return cval; +} + +static void serial8250_set_divisor(struct uart_port *port, unsigned int baud, + unsigned int quot, unsigned int quot_frac) +{ + struct uart_8250_port *up = up_to_u8250p(port); + + /* Workaround to enable 115200 baud on OMAP1510 internal ports */ + if (is_omap1510_8250(up)) { + if (baud == 115200) { + quot = 1; + serial_port_out(port, UART_OMAP_OSC_12M_SEL, 1); + } else + serial_port_out(port, UART_OMAP_OSC_12M_SEL, 0); + } + + /* + * For NatSemi, switch to bank 2 not bank 1, to avoid resetting EXCR2, + * otherwise just set DLAB + */ + if (up->capabilities & UART_NATSEMI) + serial_port_out(port, UART_LCR, 0xe0); + else + serial_port_out(port, UART_LCR, up->lcr | UART_LCR_DLAB); + + serial_dl_write(up, quot); + + /* XR17V35x UARTs have an extra fractional divisor register (DLD) */ + if (up->port.type == PORT_XR17V35X) + serial_port_out(port, 0x2, quot_frac); +} + +void +serial8250_do_set_termios(struct uart_port *port, struct ktermios *termios, + struct ktermios *old) +{ + struct uart_8250_port *up = up_to_u8250p(port); + unsigned char cval; + unsigned long flags; + unsigned int baud, quot, frac = 0; + + cval = serial8250_compute_lcr(up, termios->c_cflag); + + /* + * Ask the core to calculate the divisor for us. + */ + baud = uart_get_baud_rate(port, termios, old, + port->uartclk / 16 / 0xffff, + port->uartclk / 16); + quot = serial8250_get_divisor(up, baud, &frac); + + /* + * Ok, we're now changing the port state. Do it with + * interrupts disabled. + */ + serial8250_rpm_get(up); + spin_lock_irqsave(&port->lock, flags); + + up->lcr = cval; /* Save computed LCR */ + + if (up->capabilities & UART_CAP_FIFO && port->fifosize > 1) { + /* NOTE: If fifo_bug is not set, a user can set RX_trigger. */ + if ((baud < 2400 && !up->dma) || up->fifo_bug) { + up->fcr &= ~UART_FCR_TRIGGER_MASK; + up->fcr |= UART_FCR_TRIGGER_1; + } + } + + /* + * MCR-based auto flow control. When AFE is enabled, RTS will be + * deasserted when the receive FIFO contains more characters than + * the trigger, or the MCR RTS bit is cleared. In the case where + * the remote UART is not using CTS auto flow control, we must + * have sufficient FIFO entries for the latency of the remote + * UART to respond. IOW, at least 32 bytes of FIFO. + */ + if (up->capabilities & UART_CAP_AFE && port->fifosize >= 32) { + up->mcr &= ~UART_MCR_AFE; + if (termios->c_cflag & CRTSCTS) + up->mcr |= UART_MCR_AFE; + } + + /* + * Update the per-port timeout. + */ + uart_update_timeout(port, termios->c_cflag, baud); + + port->read_status_mask = UART_LSR_OE | UART_LSR_THRE | UART_LSR_DR; + if (termios->c_iflag & INPCK) + port->read_status_mask |= UART_LSR_FE | UART_LSR_PE; + if (termios->c_iflag & (IGNBRK | BRKINT | PARMRK)) + port->read_status_mask |= UART_LSR_BI; + + /* + * Characteres to ignore + */ + port->ignore_status_mask = 0; + if (termios->c_iflag & IGNPAR) + port->ignore_status_mask |= UART_LSR_PE | UART_LSR_FE; + if (termios->c_iflag & IGNBRK) { + port->ignore_status_mask |= UART_LSR_BI; + /* + * If we're ignoring parity and break indicators, + * ignore overruns too (for real raw support). + */ + if (termios->c_iflag & IGNPAR) + port->ignore_status_mask |= UART_LSR_OE; + } + + /* + * ignore all characters if CREAD is not set + */ + if ((termios->c_cflag & CREAD) == 0) + port->ignore_status_mask |= UART_LSR_DR; + + /* + * CTS flow control flag and modem status interrupts + */ + up->ier &= ~UART_IER_MSI; + if (!(up->bugs & UART_BUG_NOMSR) && + UART_ENABLE_MS(&up->port, termios->c_cflag)) + up->ier |= UART_IER_MSI; + if (up->capabilities & UART_CAP_UUE) + up->ier |= UART_IER_UUE; + if (up->capabilities & UART_CAP_RTOIE) + up->ier |= UART_IER_RTOIE; + + serial_port_out(port, UART_IER, up->ier); + + if (up->capabilities & UART_CAP_EFR) { + unsigned char efr = 0; + /* + * TI16C752/Startech hardware flow control. FIXME: + * - TI16C752 requires control thresholds to be set. + * - UART_MCR_RTS is ineffective if auto-RTS mode is enabled. + */ + if (termios->c_cflag & CRTSCTS) + efr |= UART_EFR_CTS; + + serial_port_out(port, UART_LCR, UART_LCR_CONF_MODE_B); + if (port->flags & UPF_EXAR_EFR) + serial_port_out(port, UART_XR_EFR, efr); + else + serial_port_out(port, UART_EFR, efr); + } + + serial8250_set_divisor(port, baud, quot, frac); + + /* + * LCR DLAB must be set to enable 64-byte FIFO mode. If the FCR + * is written without DLAB set, this mode will be disabled. + */ + if (port->type == PORT_16750) + serial_port_out(port, UART_FCR, up->fcr); + + serial_port_out(port, UART_LCR, up->lcr); /* reset DLAB */ + if (port->type != PORT_16750) { + /* emulated UARTs (Lucent Venus 167x) need two steps */ + if (up->fcr & UART_FCR_ENABLE_FIFO) + serial_port_out(port, UART_FCR, UART_FCR_ENABLE_FIFO); + serial_port_out(port, UART_FCR, up->fcr); /* set fcr */ + } + serial8250_set_mctrl(port, port->mctrl); + spin_unlock_irqrestore(&port->lock, flags); + serial8250_rpm_put(up); + + /* Don't rewrite B0 */ + if (tty_termios_baud_rate(termios)) + tty_termios_encode_baud_rate(termios, baud, baud); +} +EXPORT_SYMBOL(serial8250_do_set_termios); + +static void +serial8250_set_termios(struct uart_port *port, struct ktermios *termios, + struct ktermios *old) +{ + if (port->set_termios) + port->set_termios(port, termios, old); + else + serial8250_do_set_termios(port, termios, old); +} + +static void +serial8250_set_ldisc(struct uart_port *port, struct ktermios *termios) +{ + if (termios->c_line == N_PPS) { + port->flags |= UPF_HARDPPS_CD; + spin_lock_irq(&port->lock); + serial8250_enable_ms(port); + spin_unlock_irq(&port->lock); + } else { + port->flags &= ~UPF_HARDPPS_CD; + if (!UART_ENABLE_MS(port, termios->c_cflag)) { + spin_lock_irq(&port->lock); + serial8250_disable_ms(port); + spin_unlock_irq(&port->lock); + } + } +} + + +void serial8250_do_pm(struct uart_port *port, unsigned int state, + unsigned int oldstate) +{ + struct uart_8250_port *p = up_to_u8250p(port); + + serial8250_set_sleep(p, state != 0); +} +EXPORT_SYMBOL(serial8250_do_pm); + +static void +serial8250_pm(struct uart_port *port, unsigned int state, + unsigned int oldstate) +{ + if (port->pm) + port->pm(port, state, oldstate); + else + serial8250_do_pm(port, state, oldstate); +} + +static unsigned int serial8250_port_size(struct uart_8250_port *pt) +{ + if (pt->port.mapsize) + return pt->port.mapsize; + if (pt->port.iotype == UPIO_AU) { + if (pt->port.type == PORT_RT2880) + return 0x100; + return 0x1000; + } + if (is_omap1_8250(pt)) + return 0x16 << pt->port.regshift; + + return 8 << pt->port.regshift; +} + +/* + * Resource handling. + */ +static int serial8250_request_std_resource(struct uart_8250_port *up) +{ + unsigned int size = serial8250_port_size(up); + struct uart_port *port = &up->port; + int ret = 0; + + switch (port->iotype) { + case UPIO_AU: + case UPIO_TSI: + case UPIO_MEM32: + case UPIO_MEM32BE: + case UPIO_MEM: + if (!port->mapbase) + break; + + if (!request_mem_region(port->mapbase, size, "serial")) { + ret = -EBUSY; + break; + } + + if (port->flags & UPF_IOREMAP) { + port->membase = ioremap_nocache(port->mapbase, size); + if (!port->membase) { + release_mem_region(port->mapbase, size); + ret = -ENOMEM; + } + } + break; + + case UPIO_HUB6: + case UPIO_PORT: + if (!request_region(port->iobase, size, "serial")) + ret = -EBUSY; + break; + } + return ret; +} + +static void serial8250_release_std_resource(struct uart_8250_port *up) +{ + unsigned int size = serial8250_port_size(up); + struct uart_port *port = &up->port; + + switch (port->iotype) { + case UPIO_AU: + case UPIO_TSI: + case UPIO_MEM32: + case UPIO_MEM32BE: + case UPIO_MEM: + if (!port->mapbase) + break; + + if (port->flags & UPF_IOREMAP) { + iounmap(port->membase); + port->membase = NULL; + } + + release_mem_region(port->mapbase, size); + break; + + case UPIO_HUB6: + case UPIO_PORT: + release_region(port->iobase, size); + break; + } +} + +static void serial8250_release_port(struct uart_port *port) +{ + struct uart_8250_port *up = up_to_u8250p(port); + + serial8250_release_std_resource(up); +} + +static int serial8250_request_port(struct uart_port *port) +{ + struct uart_8250_port *up = up_to_u8250p(port); + int ret; + + if (port->type == PORT_8250_CIR) + return -ENODEV; + + ret = serial8250_request_std_resource(up); + + return ret; +} + +static int fcr_get_rxtrig_bytes(struct uart_8250_port *up) +{ + const struct serial8250_config *conf_type = &uart_config[up->port.type]; + unsigned char bytes; + + bytes = conf_type->rxtrig_bytes[UART_FCR_R_TRIG_BITS(up->fcr)]; + + return bytes ? bytes : -EOPNOTSUPP; +} + +static int bytes_to_fcr_rxtrig(struct uart_8250_port *up, unsigned char bytes) +{ + const struct serial8250_config *conf_type = &uart_config[up->port.type]; + int i; + + if (!conf_type->rxtrig_bytes[UART_FCR_R_TRIG_BITS(UART_FCR_R_TRIG_00)]) + return -EOPNOTSUPP; + + for (i = 1; i < UART_FCR_R_TRIG_MAX_STATE; i++) { + if (bytes < conf_type->rxtrig_bytes[i]) + /* Use the nearest lower value */ + return (--i) << UART_FCR_R_TRIG_SHIFT; + } + + return UART_FCR_R_TRIG_11; +} + +static int do_get_rxtrig(struct tty_port *port) +{ + struct uart_state *state = container_of(port, struct uart_state, port); + struct uart_port *uport = state->uart_port; + struct uart_8250_port *up = + container_of(uport, struct uart_8250_port, port); + + if (!(up->capabilities & UART_CAP_FIFO) || uport->fifosize <= 1) + return -EINVAL; + + return fcr_get_rxtrig_bytes(up); +} + +static int do_serial8250_get_rxtrig(struct tty_port *port) +{ + int rxtrig_bytes; + + mutex_lock(&port->mutex); + rxtrig_bytes = do_get_rxtrig(port); + mutex_unlock(&port->mutex); + + return rxtrig_bytes; +} + +static ssize_t serial8250_get_attr_rx_trig_bytes(struct device *dev, + struct device_attribute *attr, char *buf) +{ + struct tty_port *port = dev_get_drvdata(dev); + int rxtrig_bytes; + + rxtrig_bytes = do_serial8250_get_rxtrig(port); + if (rxtrig_bytes < 0) + return rxtrig_bytes; + + return snprintf(buf, PAGE_SIZE, "%d\n", rxtrig_bytes); +} + +static int do_set_rxtrig(struct tty_port *port, unsigned char bytes) +{ + struct uart_state *state = container_of(port, struct uart_state, port); + struct uart_port *uport = state->uart_port; + struct uart_8250_port *up = + container_of(uport, struct uart_8250_port, port); + int rxtrig; + + if (!(up->capabilities & UART_CAP_FIFO) || uport->fifosize <= 1 || + up->fifo_bug) + return -EINVAL; + + rxtrig = bytes_to_fcr_rxtrig(up, bytes); + if (rxtrig < 0) + return rxtrig; + + serial8250_clear_fifos(up); + up->fcr &= ~UART_FCR_TRIGGER_MASK; + up->fcr |= (unsigned char)rxtrig; + serial_out(up, UART_FCR, up->fcr); + return 0; +} + +static int do_serial8250_set_rxtrig(struct tty_port *port, unsigned char bytes) +{ + int ret; + + mutex_lock(&port->mutex); + ret = do_set_rxtrig(port, bytes); + mutex_unlock(&port->mutex); + + return ret; +} + +static ssize_t serial8250_set_attr_rx_trig_bytes(struct device *dev, + struct device_attribute *attr, const char *buf, size_t count) +{ + struct tty_port *port = dev_get_drvdata(dev); + unsigned char bytes; + int ret; + + if (!count) + return -EINVAL; + + ret = kstrtou8(buf, 10, &bytes); + if (ret < 0) + return ret; + + ret = do_serial8250_set_rxtrig(port, bytes); + if (ret < 0) + return ret; + + return count; +} + +static DEVICE_ATTR(rx_trig_bytes, S_IRUSR | S_IWUSR | S_IRGRP, + serial8250_get_attr_rx_trig_bytes, + serial8250_set_attr_rx_trig_bytes); + +static struct attribute *serial8250_dev_attrs[] = { + &dev_attr_rx_trig_bytes.attr, + NULL, + }; + +static struct attribute_group serial8250_dev_attr_group = { + .attrs = serial8250_dev_attrs, + }; + +static void register_dev_spec_attr_grp(struct uart_8250_port *up) +{ + const struct serial8250_config *conf_type = &uart_config[up->port.type]; + + if (conf_type->rxtrig_bytes[0]) + up->port.attr_group = &serial8250_dev_attr_group; +} + +static void serial8250_config_port(struct uart_port *port, int flags) +{ + struct uart_8250_port *up = up_to_u8250p(port); + int ret; + + if (port->type == PORT_8250_CIR) + return; + + /* + * Find the region that we can probe for. This in turn + * tells us whether we can probe for the type of port. + */ + ret = serial8250_request_std_resource(up); + if (ret < 0) + return; + + if (port->iotype != up->cur_iotype) + set_io_from_upio(port); + + if (flags & UART_CONFIG_TYPE) + autoconfig(up); + + /* if access method is AU, it is a 16550 with a quirk */ + if (port->type == PORT_16550A && port->iotype == UPIO_AU) + up->bugs |= UART_BUG_NOMSR; + + /* HW bugs may trigger IRQ while IIR == NO_INT */ + if (port->type == PORT_TEGRA) + up->bugs |= UART_BUG_NOMSR; + + if (port->type != PORT_UNKNOWN && flags & UART_CONFIG_IRQ) + autoconfig_irq(up); + + if (port->type == PORT_UNKNOWN) + serial8250_release_std_resource(up); + + /* Fixme: probably not the best place for this */ + if ((port->type == PORT_XR17V35X) || + (port->type == PORT_XR17D15X)) + port->handle_irq = exar_handle_irq; + + register_dev_spec_attr_grp(up); + up->fcr = uart_config[up->port.type].fcr; +} + +static int +serial8250_verify_port(struct uart_port *port, struct serial_struct *ser) +{ + if (ser->irq >= nr_irqs || ser->irq < 0 || + ser->baud_base < 9600 || ser->type < PORT_UNKNOWN || + ser->type >= ARRAY_SIZE(uart_config) || ser->type == PORT_CIRRUS || + ser->type == PORT_STARTECH) + return -EINVAL; + return 0; +} + +static const char * +serial8250_type(struct uart_port *port) +{ + int type = port->type; + + if (type >= ARRAY_SIZE(uart_config)) + type = 0; + return uart_config[type].name; +} + +static const struct uart_ops serial8250_pops = { + .tx_empty = serial8250_tx_empty, + .set_mctrl = serial8250_set_mctrl, + .get_mctrl = serial8250_get_mctrl, + .stop_tx = serial8250_stop_tx, + .start_tx = serial8250_start_tx, + .throttle = serial8250_throttle, + .unthrottle = serial8250_unthrottle, + .stop_rx = serial8250_stop_rx, + .enable_ms = serial8250_enable_ms, + .break_ctl = serial8250_break_ctl, + .startup = serial8250_startup, + .shutdown = serial8250_shutdown, + .set_termios = serial8250_set_termios, + .set_ldisc = serial8250_set_ldisc, + .pm = serial8250_pm, + .type = serial8250_type, + .release_port = serial8250_release_port, + .request_port = serial8250_request_port, + .config_port = serial8250_config_port, + .verify_port = serial8250_verify_port, +#ifdef CONFIG_CONSOLE_POLL + .poll_get_char = serial8250_get_poll_char, + .poll_put_char = serial8250_put_poll_char, +#endif +}; + +void serial8250_init_port(struct uart_8250_port *up) +{ + struct uart_port *port = &up->port; + + spin_lock_init(&port->lock); + port->ops = &serial8250_pops; + + up->cur_iotype = 0xFF; +} +EXPORT_SYMBOL_GPL(serial8250_init_port); + +void serial8250_set_defaults(struct uart_8250_port *up) +{ + struct uart_port *port = &up->port; + + if (up->port.flags & UPF_FIXED_TYPE) { + unsigned int type = up->port.type; + + if (!up->port.fifosize) + up->port.fifosize = uart_config[type].fifo_size; + if (!up->tx_loadsz) + up->tx_loadsz = uart_config[type].tx_loadsz; + if (!up->capabilities) + up->capabilities = uart_config[type].flags; + } + + set_io_from_upio(port); + + /* default dma handlers */ + if (up->dma) { + if (!up->dma->tx_dma) + up->dma->tx_dma = serial8250_tx_dma; + if (!up->dma->rx_dma) + up->dma->rx_dma = serial8250_rx_dma; + } +} +EXPORT_SYMBOL_GPL(serial8250_set_defaults); + +#ifdef CONFIG_SERIAL_8250_CONSOLE + +static void serial8250_console_putchar(struct uart_port *port, int ch) +{ + struct uart_8250_port *up = up_to_u8250p(port); + + wait_for_xmitr(up, UART_LSR_THRE); + serial_port_out(port, UART_TX, ch); +} + +/* + * Print a string to the serial port trying not to disturb + * any possible real use of the port... + * + * The console_lock must be held when we get here. + */ +void serial8250_console_write(struct uart_8250_port *up, const char *s, + unsigned int count) +{ + struct uart_port *port = &up->port; + unsigned long flags; + unsigned int ier; + int locked = 1; + + touch_nmi_watchdog(); + + serial8250_rpm_get(up); + + if (port->sysrq) + locked = 0; + else if (oops_in_progress) + locked = spin_trylock_irqsave(&port->lock, flags); + else + spin_lock_irqsave(&port->lock, flags); + + /* + * First save the IER then disable the interrupts + */ + ier = serial_port_in(port, UART_IER); + + if (up->capabilities & UART_CAP_UUE) + serial_port_out(port, UART_IER, UART_IER_UUE); + else + serial_port_out(port, UART_IER, 0); + + /* check scratch reg to see if port powered off during system sleep */ + if (up->canary && (up->canary != serial_port_in(port, UART_SCR))) { + struct ktermios termios; + unsigned int baud, quot, frac = 0; + + termios.c_cflag = port->cons->cflag; + if (port->state->port.tty && termios.c_cflag == 0) + termios.c_cflag = port->state->port.tty->termios.c_cflag; + + baud = uart_get_baud_rate(port, &termios, NULL, + port->uartclk / 16 / 0xffff, + port->uartclk / 16); + quot = serial8250_get_divisor(up, baud, &frac); + + serial8250_set_divisor(port, baud, quot, frac); + serial_port_out(port, UART_LCR, up->lcr); + serial_port_out(port, UART_MCR, UART_MCR_DTR | UART_MCR_RTS); + + up->canary = 0; + } + + uart_console_write(port, s, count, serial8250_console_putchar); + + /* + * Finally, wait for transmitter to become empty + * and restore the IER + */ + wait_for_xmitr(up, BOTH_EMPTY); + serial_port_out(port, UART_IER, ier); + + /* + * The receive handling will happen properly because the + * receive ready bit will still be set; it is not cleared + * on read. However, modem control will not, we must + * call it if we have saved something in the saved flags + * while processing with interrupts off. + */ + if (up->msr_saved_flags) + serial8250_modem_status(up); + + if (locked) + spin_unlock_irqrestore(&port->lock, flags); + serial8250_rpm_put(up); +} + +static unsigned int probe_baud(struct uart_port *port) +{ + unsigned char lcr, dll, dlm; + unsigned int quot; + + lcr = serial_port_in(port, UART_LCR); + serial_port_out(port, UART_LCR, lcr | UART_LCR_DLAB); + dll = serial_port_in(port, UART_DLL); + dlm = serial_port_in(port, UART_DLM); + serial_port_out(port, UART_LCR, lcr); + + quot = (dlm << 8) | dll; + return (port->uartclk / 16) / quot; +} + +int serial8250_console_setup(struct uart_port *port, char *options, bool probe) +{ + int baud = 9600; + int bits = 8; + int parity = 'n'; + int flow = 'n'; + + if (!port->iobase && !port->membase) + return -ENODEV; + + if (options) + uart_parse_options(options, &baud, &parity, &bits, &flow); + else if (probe) + baud = probe_baud(port); + + return uart_set_options(port, port->cons, baud, parity, bits, flow); +} + +#endif /* CONFIG_SERIAL_8250_CONSOLE */ diff --git a/drivers/tty/serial/8250/Makefile b/drivers/tty/serial/8250/Makefile index 706295913c34..39c6d2277570 100644 --- a/drivers/tty/serial/8250/Makefile +++ b/drivers/tty/serial/8250/Makefile @@ -2,10 +2,11 @@ # Makefile for the 8250 serial device drivers. # -obj-$(CONFIG_SERIAL_8250) += 8250.o +obj-$(CONFIG_SERIAL_8250) += 8250.o 8250_base.o 8250-y := 8250_core.o 8250-$(CONFIG_SERIAL_8250_PNP) += 8250_pnp.o -8250-$(CONFIG_SERIAL_8250_DMA) += 8250_dma.o +8250_base-y := 8250_port.o +8250_base-$(CONFIG_SERIAL_8250_DMA) += 8250_dma.o obj-$(CONFIG_SERIAL_8250_GSC) += 8250_gsc.o obj-$(CONFIG_SERIAL_8250_PCI) += 8250_pci.o obj-$(CONFIG_SERIAL_8250_HP300) += 8250_hp300.o diff --git a/include/linux/serial_8250.h b/include/linux/serial_8250.h index ba82c07feb95..7f156bde38d9 100644 --- a/include/linux/serial_8250.h +++ b/include/linux/serial_8250.h @@ -152,6 +152,11 @@ int serial8250_handle_irq(struct uart_port *port, unsigned int iir); unsigned char serial8250_rx_chars(struct uart_8250_port *up, unsigned char lsr); void serial8250_tx_chars(struct uart_8250_port *up); unsigned int serial8250_modem_status(struct uart_8250_port *up); +void serial8250_init_port(struct uart_8250_port *up); +void serial8250_set_defaults(struct uart_8250_port *up); +void serial8250_console_write(struct uart_8250_port *up, const char *s, + unsigned int count); +int serial8250_console_setup(struct uart_port *port, char *options, bool probe); extern void serial8250_set_isa_configurator(void (*v) (int port, struct uart_port *up, -- cgit v1.2.3-70-g09d2 From f3fb7ef3981abdca871d65e8c7d9a827225eb2ba Mon Sep 17 00:00:00 2001 From: Vineet Gupta Date: Thu, 9 Jul 2015 11:50:38 +0530 Subject: tty/early: make serial8250_early_{in,out} static again Commit ed71871bed719 ("tty/8250_early: Turn serial_in/serial_out into weak symbols") made these routines weak to allow platform specific Big endian override However recent updates to core, specifically ebc5e20082 ("serial: of_serial: Support big-endian register accesses") and 6e63be3fee14 ("serial: earlycon: Add support for big-endian MMIO accesses") means that round about way to overide the early serial accessors is no longer needed. Cc: Jiri Slaby Cc: Peter Hurley Cc: Rob Herring Cc: Kevin Cernekee Acked-by: Noam Camus Signed-off-by: Vineet Gupta Reviewed-by: Peter Hurley Signed-off-by: Greg Kroah-Hartman --- drivers/tty/serial/8250/8250_early.c | 4 ++-- include/linux/serial_8250.h | 2 -- 2 files changed, 2 insertions(+), 4 deletions(-) (limited to 'include/linux') diff --git a/drivers/tty/serial/8250/8250_early.c b/drivers/tty/serial/8250/8250_early.c index 771dda29a0f8..faed05f25bc2 100644 --- a/drivers/tty/serial/8250/8250_early.c +++ b/drivers/tty/serial/8250/8250_early.c @@ -35,7 +35,7 @@ #include #include -unsigned int __weak __init serial8250_early_in(struct uart_port *port, int offset) +static unsigned int __init serial8250_early_in(struct uart_port *port, int offset) { switch (port->iotype) { case UPIO_MEM: @@ -51,7 +51,7 @@ unsigned int __weak __init serial8250_early_in(struct uart_port *port, int offse } } -void __weak __init serial8250_early_out(struct uart_port *port, int offset, int value) +static void __init serial8250_early_out(struct uart_port *port, int offset, int value) { switch (port->iotype) { case UPIO_MEM: diff --git a/include/linux/serial_8250.h b/include/linux/serial_8250.h index 7f156bde38d9..faa0e0370ce7 100644 --- a/include/linux/serial_8250.h +++ b/include/linux/serial_8250.h @@ -136,8 +136,6 @@ void serial8250_resume_port(int line); extern int early_serial_setup(struct uart_port *port); -extern unsigned int serial8250_early_in(struct uart_port *port, int offset); -extern void serial8250_early_out(struct uart_port *port, int offset, int value); extern int early_serial8250_setup(struct earlycon_device *device, const char *options); extern void serial8250_do_set_termios(struct uart_port *port, -- cgit v1.2.3-70-g09d2 From 3fad386014ddc34513647a3e49d9fc9db2990cbc Mon Sep 17 00:00:00 2001 From: Cyrille Pitchen Date: Thu, 2 Jul 2015 15:18:10 +0200 Subject: tty/serial: at91: fix some macro definitions to fit coding style This patch updates macro definitions in atmel_serial.h to fit the 80 column rule. Please note that some deprecated comments such as "[AT91SAM9261 only]" are removed as the corresponding bits also exist in some later chips. Signed-off-by: Cyrille Pitchen Acked-by: Alexandre Belloni Acked-by: Nicolas Ferre Signed-off-by: Greg Kroah-Hartman --- include/linux/atmel_serial.h | 204 +++++++++++++++++++++---------------------- 1 file changed, 102 insertions(+), 102 deletions(-) (limited to 'include/linux') diff --git a/include/linux/atmel_serial.h b/include/linux/atmel_serial.h index 00beddf6be20..c384c21d65f0 100644 --- a/include/linux/atmel_serial.h +++ b/include/linux/atmel_serial.h @@ -16,115 +16,115 @@ #ifndef ATMEL_SERIAL_H #define ATMEL_SERIAL_H -#define ATMEL_US_CR 0x00 /* Control Register */ -#define ATMEL_US_RSTRX (1 << 2) /* Reset Receiver */ -#define ATMEL_US_RSTTX (1 << 3) /* Reset Transmitter */ -#define ATMEL_US_RXEN (1 << 4) /* Receiver Enable */ -#define ATMEL_US_RXDIS (1 << 5) /* Receiver Disable */ -#define ATMEL_US_TXEN (1 << 6) /* Transmitter Enable */ -#define ATMEL_US_TXDIS (1 << 7) /* Transmitter Disable */ -#define ATMEL_US_RSTSTA (1 << 8) /* Reset Status Bits */ -#define ATMEL_US_STTBRK (1 << 9) /* Start Break */ -#define ATMEL_US_STPBRK (1 << 10) /* Stop Break */ -#define ATMEL_US_STTTO (1 << 11) /* Start Time-out */ -#define ATMEL_US_SENDA (1 << 12) /* Send Address */ -#define ATMEL_US_RSTIT (1 << 13) /* Reset Iterations */ -#define ATMEL_US_RSTNACK (1 << 14) /* Reset Non Acknowledge */ -#define ATMEL_US_RETTO (1 << 15) /* Rearm Time-out */ -#define ATMEL_US_DTREN (1 << 16) /* Data Terminal Ready Enable [AT91RM9200 only] */ -#define ATMEL_US_DTRDIS (1 << 17) /* Data Terminal Ready Disable [AT91RM9200 only] */ -#define ATMEL_US_RTSEN (1 << 18) /* Request To Send Enable */ -#define ATMEL_US_RTSDIS (1 << 19) /* Request To Send Disable */ +#define ATMEL_US_CR 0x00 /* Control Register */ +#define ATMEL_US_RSTRX BIT(2) /* Reset Receiver */ +#define ATMEL_US_RSTTX BIT(3) /* Reset Transmitter */ +#define ATMEL_US_RXEN BIT(4) /* Receiver Enable */ +#define ATMEL_US_RXDIS BIT(5) /* Receiver Disable */ +#define ATMEL_US_TXEN BIT(6) /* Transmitter Enable */ +#define ATMEL_US_TXDIS BIT(7) /* Transmitter Disable */ +#define ATMEL_US_RSTSTA BIT(8) /* Reset Status Bits */ +#define ATMEL_US_STTBRK BIT(9) /* Start Break */ +#define ATMEL_US_STPBRK BIT(10) /* Stop Break */ +#define ATMEL_US_STTTO BIT(11) /* Start Time-out */ +#define ATMEL_US_SENDA BIT(12) /* Send Address */ +#define ATMEL_US_RSTIT BIT(13) /* Reset Iterations */ +#define ATMEL_US_RSTNACK BIT(14) /* Reset Non Acknowledge */ +#define ATMEL_US_RETTO BIT(15) /* Rearm Time-out */ +#define ATMEL_US_DTREN BIT(16) /* Data Terminal Ready Enable */ +#define ATMEL_US_DTRDIS BIT(17) /* Data Terminal Ready Disable */ +#define ATMEL_US_RTSEN BIT(18) /* Request To Send Enable */ +#define ATMEL_US_RTSDIS BIT(19) /* Request To Send Disable */ -#define ATMEL_US_MR 0x04 /* Mode Register */ -#define ATMEL_US_USMODE (0xf << 0) /* Mode of the USART */ -#define ATMEL_US_USMODE_NORMAL 0 -#define ATMEL_US_USMODE_RS485 1 -#define ATMEL_US_USMODE_HWHS 2 -#define ATMEL_US_USMODE_MODEM 3 -#define ATMEL_US_USMODE_ISO7816_T0 4 -#define ATMEL_US_USMODE_ISO7816_T1 6 -#define ATMEL_US_USMODE_IRDA 8 -#define ATMEL_US_USCLKS (3 << 4) /* Clock Selection */ -#define ATMEL_US_USCLKS_MCK (0 << 4) -#define ATMEL_US_USCLKS_MCK_DIV8 (1 << 4) -#define ATMEL_US_USCLKS_SCK (3 << 4) -#define ATMEL_US_CHRL (3 << 6) /* Character Length */ -#define ATMEL_US_CHRL_5 (0 << 6) -#define ATMEL_US_CHRL_6 (1 << 6) -#define ATMEL_US_CHRL_7 (2 << 6) -#define ATMEL_US_CHRL_8 (3 << 6) -#define ATMEL_US_SYNC (1 << 8) /* Synchronous Mode Select */ -#define ATMEL_US_PAR (7 << 9) /* Parity Type */ -#define ATMEL_US_PAR_EVEN (0 << 9) -#define ATMEL_US_PAR_ODD (1 << 9) -#define ATMEL_US_PAR_SPACE (2 << 9) -#define ATMEL_US_PAR_MARK (3 << 9) -#define ATMEL_US_PAR_NONE (4 << 9) -#define ATMEL_US_PAR_MULTI_DROP (6 << 9) -#define ATMEL_US_NBSTOP (3 << 12) /* Number of Stop Bits */ -#define ATMEL_US_NBSTOP_1 (0 << 12) -#define ATMEL_US_NBSTOP_1_5 (1 << 12) -#define ATMEL_US_NBSTOP_2 (2 << 12) -#define ATMEL_US_CHMODE (3 << 14) /* Channel Mode */ -#define ATMEL_US_CHMODE_NORMAL (0 << 14) -#define ATMEL_US_CHMODE_ECHO (1 << 14) -#define ATMEL_US_CHMODE_LOC_LOOP (2 << 14) -#define ATMEL_US_CHMODE_REM_LOOP (3 << 14) -#define ATMEL_US_MSBF (1 << 16) /* Bit Order */ -#define ATMEL_US_MODE9 (1 << 17) /* 9-bit Character Length */ -#define ATMEL_US_CLKO (1 << 18) /* Clock Output Select */ -#define ATMEL_US_OVER (1 << 19) /* Oversampling Mode */ -#define ATMEL_US_INACK (1 << 20) /* Inhibit Non Acknowledge */ -#define ATMEL_US_DSNACK (1 << 21) /* Disable Successive NACK */ -#define ATMEL_US_MAX_ITER (7 << 24) /* Max Iterations */ -#define ATMEL_US_FILTER (1 << 28) /* Infrared Receive Line Filter */ +#define ATMEL_US_MR 0x04 /* Mode Register */ +#define ATMEL_US_USMODE GENMASK(3, 0) /* Mode of the USART */ +#define ATMEL_US_USMODE_NORMAL 0 +#define ATMEL_US_USMODE_RS485 1 +#define ATMEL_US_USMODE_HWHS 2 +#define ATMEL_US_USMODE_MODEM 3 +#define ATMEL_US_USMODE_ISO7816_T0 4 +#define ATMEL_US_USMODE_ISO7816_T1 6 +#define ATMEL_US_USMODE_IRDA 8 +#define ATMEL_US_USCLKS GENMASK(5, 4) /* Clock Selection */ +#define ATMEL_US_USCLKS_MCK (0 << 4) +#define ATMEL_US_USCLKS_MCK_DIV8 (1 << 4) +#define ATMEL_US_USCLKS_SCK (3 << 4) +#define ATMEL_US_CHRL GENMASK(7, 6) /* Character Length */ +#define ATMEL_US_CHRL_5 (0 << 6) +#define ATMEL_US_CHRL_6 (1 << 6) +#define ATMEL_US_CHRL_7 (2 << 6) +#define ATMEL_US_CHRL_8 (3 << 6) +#define ATMEL_US_SYNC BIT(8) /* Synchronous Mode Select */ +#define ATMEL_US_PAR GENMASK(11, 9) /* Parity Type */ +#define ATMEL_US_PAR_EVEN (0 << 9) +#define ATMEL_US_PAR_ODD (1 << 9) +#define ATMEL_US_PAR_SPACE (2 << 9) +#define ATMEL_US_PAR_MARK (3 << 9) +#define ATMEL_US_PAR_NONE (4 << 9) +#define ATMEL_US_PAR_MULTI_DROP (6 << 9) +#define ATMEL_US_NBSTOP GENMASK(13, 12) /* Number of Stop Bits */ +#define ATMEL_US_NBSTOP_1 (0 << 12) +#define ATMEL_US_NBSTOP_1_5 (1 << 12) +#define ATMEL_US_NBSTOP_2 (2 << 12) +#define ATMEL_US_CHMODE GENMASK(15, 14) /* Channel Mode */ +#define ATMEL_US_CHMODE_NORMAL (0 << 14) +#define ATMEL_US_CHMODE_ECHO (1 << 14) +#define ATMEL_US_CHMODE_LOC_LOOP (2 << 14) +#define ATMEL_US_CHMODE_REM_LOOP (3 << 14) +#define ATMEL_US_MSBF BIT(16) /* Bit Order */ +#define ATMEL_US_MODE9 BIT(17) /* 9-bit Character Length */ +#define ATMEL_US_CLKO BIT(18) /* Clock Output Select */ +#define ATMEL_US_OVER BIT(19) /* Oversampling Mode */ +#define ATMEL_US_INACK BIT(20) /* Inhibit Non Acknowledge */ +#define ATMEL_US_DSNACK BIT(21) /* Disable Successive NACK */ +#define ATMEL_US_MAX_ITER GENMASK(26, 24) /* Max Iterations */ +#define ATMEL_US_FILTER BIT(28) /* Infrared Receive Line Filter */ -#define ATMEL_US_IER 0x08 /* Interrupt Enable Register */ -#define ATMEL_US_RXRDY (1 << 0) /* Receiver Ready */ -#define ATMEL_US_TXRDY (1 << 1) /* Transmitter Ready */ -#define ATMEL_US_RXBRK (1 << 2) /* Break Received / End of Break */ -#define ATMEL_US_ENDRX (1 << 3) /* End of Receiver Transfer */ -#define ATMEL_US_ENDTX (1 << 4) /* End of Transmitter Transfer */ -#define ATMEL_US_OVRE (1 << 5) /* Overrun Error */ -#define ATMEL_US_FRAME (1 << 6) /* Framing Error */ -#define ATMEL_US_PARE (1 << 7) /* Parity Error */ -#define ATMEL_US_TIMEOUT (1 << 8) /* Receiver Time-out */ -#define ATMEL_US_TXEMPTY (1 << 9) /* Transmitter Empty */ -#define ATMEL_US_ITERATION (1 << 10) /* Max number of Repetitions Reached */ -#define ATMEL_US_TXBUFE (1 << 11) /* Transmission Buffer Empty */ -#define ATMEL_US_RXBUFF (1 << 12) /* Reception Buffer Full */ -#define ATMEL_US_NACK (1 << 13) /* Non Acknowledge */ -#define ATMEL_US_RIIC (1 << 16) /* Ring Indicator Input Change [AT91RM9200 only] */ -#define ATMEL_US_DSRIC (1 << 17) /* Data Set Ready Input Change [AT91RM9200 only] */ -#define ATMEL_US_DCDIC (1 << 18) /* Data Carrier Detect Input Change [AT91RM9200 only] */ -#define ATMEL_US_CTSIC (1 << 19) /* Clear to Send Input Change */ -#define ATMEL_US_RI (1 << 20) /* RI */ -#define ATMEL_US_DSR (1 << 21) /* DSR */ -#define ATMEL_US_DCD (1 << 22) /* DCD */ -#define ATMEL_US_CTS (1 << 23) /* CTS */ +#define ATMEL_US_IER 0x08 /* Interrupt Enable Register */ +#define ATMEL_US_RXRDY BIT(0) /* Receiver Ready */ +#define ATMEL_US_TXRDY BIT(1) /* Transmitter Ready */ +#define ATMEL_US_RXBRK BIT(2) /* Break Received / End of Break */ +#define ATMEL_US_ENDRX BIT(3) /* End of Receiver Transfer */ +#define ATMEL_US_ENDTX BIT(4) /* End of Transmitter Transfer */ +#define ATMEL_US_OVRE BIT(5) /* Overrun Error */ +#define ATMEL_US_FRAME BIT(6) /* Framing Error */ +#define ATMEL_US_PARE BIT(7) /* Parity Error */ +#define ATMEL_US_TIMEOUT BIT(8) /* Receiver Time-out */ +#define ATMEL_US_TXEMPTY BIT(9) /* Transmitter Empty */ +#define ATMEL_US_ITERATION BIT(10) /* Max number of Repetitions Reached */ +#define ATMEL_US_TXBUFE BIT(11) /* Transmission Buffer Empty */ +#define ATMEL_US_RXBUFF BIT(12) /* Reception Buffer Full */ +#define ATMEL_US_NACK BIT(13) /* Non Acknowledge */ +#define ATMEL_US_RIIC BIT(16) /* Ring Indicator Input Change */ +#define ATMEL_US_DSRIC BIT(17) /* Data Set Ready Input Change */ +#define ATMEL_US_DCDIC BIT(18) /* Data Carrier Detect Input Change */ +#define ATMEL_US_CTSIC BIT(19) /* Clear to Send Input Change */ +#define ATMEL_US_RI BIT(20) /* RI */ +#define ATMEL_US_DSR BIT(21) /* DSR */ +#define ATMEL_US_DCD BIT(22) /* DCD */ +#define ATMEL_US_CTS BIT(23) /* CTS */ -#define ATMEL_US_IDR 0x0c /* Interrupt Disable Register */ -#define ATMEL_US_IMR 0x10 /* Interrupt Mask Register */ -#define ATMEL_US_CSR 0x14 /* Channel Status Register */ -#define ATMEL_US_RHR 0x18 /* Receiver Holding Register */ -#define ATMEL_US_THR 0x1c /* Transmitter Holding Register */ -#define ATMEL_US_SYNH (1 << 15) /* Transmit/Receive Sync [AT91SAM9261 only] */ +#define ATMEL_US_IDR 0x0c /* Interrupt Disable Register */ +#define ATMEL_US_IMR 0x10 /* Interrupt Mask Register */ +#define ATMEL_US_CSR 0x14 /* Channel Status Register */ +#define ATMEL_US_RHR 0x18 /* Receiver Holding Register */ +#define ATMEL_US_THR 0x1c /* Transmitter Holding Register */ +#define ATMEL_US_SYNH BIT(15) /* Transmit/Receive Sync */ -#define ATMEL_US_BRGR 0x20 /* Baud Rate Generator Register */ -#define ATMEL_US_CD (0xffff << 0) /* Clock Divider */ +#define ATMEL_US_BRGR 0x20 /* Baud Rate Generator Register */ +#define ATMEL_US_CD GENMASK(15, 0) /* Clock Divider */ -#define ATMEL_US_RTOR 0x24 /* Receiver Time-out Register */ -#define ATMEL_US_TO (0xffff << 0) /* Time-out Value */ +#define ATMEL_US_RTOR 0x24 /* Receiver Time-out Register */ +#define ATMEL_US_TO GENMASK(15, 0) /* Time-out Value */ -#define ATMEL_US_TTGR 0x28 /* Transmitter Timeguard Register */ -#define ATMEL_US_TG (0xff << 0) /* Timeguard Value */ +#define ATMEL_US_TTGR 0x28 /* Transmitter Timeguard Register */ +#define ATMEL_US_TG GENMASK(7, 0) /* Timeguard Value */ -#define ATMEL_US_FIDI 0x40 /* FI DI Ratio Register */ -#define ATMEL_US_NER 0x44 /* Number of Errors Register */ -#define ATMEL_US_IF 0x4c /* IrDA Filter Register */ +#define ATMEL_US_FIDI 0x40 /* FI DI Ratio Register */ +#define ATMEL_US_NER 0x44 /* Number of Errors Register */ +#define ATMEL_US_IF 0x4c /* IrDA Filter Register */ -#define ATMEL_US_NAME 0xf0 /* Ip Name */ -#define ATMEL_US_VERSION 0xfc /* Ip Version */ +#define ATMEL_US_NAME 0xf0 /* Ip Name */ +#define ATMEL_US_VERSION 0xfc /* Ip Version */ #endif -- cgit v1.2.3-70-g09d2 From b5199d46817766c95ef759684658cd8e359c6d27 Mon Sep 17 00:00:00 2001 From: Cyrille Pitchen Date: Thu, 2 Jul 2015 15:18:12 +0200 Subject: tty/serial: at91: add support to FIFOs Depending on the hardware, TX and RX FIFOs may be available. The RX FIFO can avoid receive overruns, especially when DMA transfers are not used to read data from the Receive Holding Register. For heavy system load, The CPU is likely not be able to fetch data fast enough from the RHR. In addition, the RX FIFO can supersede the DMA/PDC to control the RTS line when the Hardware Handshaking mode is enabled. Two thresholds are to be set for that purpose: - When the number of data in the RX FIFO crosses and becomes lower than or equal to the low threshold, the RTS line is set to low level: the remote peer is requested to send data. - When the number of data in the RX FIFO crosses and becomes greater than or equal to the high threshold, the RTS line is set to high level: the remote peer should stop sending new data. - low threshold <= high threshold Once these two thresholds are set properly, this new feature is enabled by setting the FIFO RTS Control bit of the FIFO Mode Register. FIFOs also introduce a new multiple data mode: the USART works either in multiple data mode or in single data (legacy) mode. If MODE9 bit is set into the Mode Register or if USMODE is set to either LIN_MASTER, LIN_SLAVE or LON_MODE, FIFOs operate in single data mode. Otherwise, they operate in multiple data mode. In this new multiple data mode, accesses to the Receive Holding Register or Transmit Holding Register slightly change. Since this driver implements neither the 9bit data feature (MODE9 bit set into the Mode Register) nor LIN modes, the USART works in multiple data mode whenever FIFOs are available and enabled. We also assume that data are 8bit wide. In single data mode, 32bit access CAN be used to read a single data from RHR or write a single data into THR. However in multiple data mode, a 32bit access to RHR now allows us to read four consecutive data from RX FIFO. Also a 32bit access to THR now allows to write four consecutive data into TX FIFO. So we MUST use 8bit access whenever only one data have to be read/written at a time. Signed-off-by: Cyrille Pitchen Signed-off-by: Greg Kroah-Hartman --- drivers/tty/serial/atmel_serial.c | 100 +++++++++++++++++++++++++++++++++++--- include/linux/atmel_serial.h | 36 ++++++++++++++ 2 files changed, 130 insertions(+), 6 deletions(-) (limited to 'include/linux') diff --git a/drivers/tty/serial/atmel_serial.c b/drivers/tty/serial/atmel_serial.c index e7c337de31d1..87de21f0c7a3 100644 --- a/drivers/tty/serial/atmel_serial.c +++ b/drivers/tty/serial/atmel_serial.c @@ -56,6 +56,15 @@ /* Revisit: We should calculate this based on the actual port settings */ #define PDC_RX_TIMEOUT (3 * 10) /* 3 bytes */ +/* The minium number of data FIFOs should be able to contain */ +#define ATMEL_MIN_FIFO_SIZE 8 +/* + * These two offsets are substracted from the RX FIFO size to define the RTS + * high and low thresholds + */ +#define ATMEL_RTS_HIGH_OFFSET 16 +#define ATMEL_RTS_LOW_OFFSET 20 + #if defined(CONFIG_SERIAL_ATMEL_CONSOLE) && defined(CONFIG_MAGIC_SYSRQ) #define SUPPORT_SYSRQ #endif @@ -141,6 +150,9 @@ struct atmel_uart_port { struct mctrl_gpios *gpios; int gpio_irq[UART_GPIO_MAX]; unsigned int tx_done_mask; + u32 fifo_size; + u32 rts_high; + u32 rts_low; bool ms_irq_enabled; bool is_usart; /* usart or uart */ struct timer_list uart_timer; /* uart timer */ @@ -191,6 +203,16 @@ static inline void atmel_uart_writel(struct uart_port *port, u32 reg, u32 value) __raw_writel(value, port->membase + reg); } +static inline u8 atmel_uart_readb(struct uart_port *port, u32 reg) +{ + return __raw_readb(port->membase + reg); +} + +static inline void atmel_uart_writeb(struct uart_port *port, u32 reg, u8 value) +{ + __raw_writeb(value, port->membase + reg); +} + #ifdef CONFIG_SERIAL_ATMEL_PDC static bool atmel_use_pdc_rx(struct uart_port *port) { @@ -635,7 +657,7 @@ static void atmel_rx_chars(struct uart_port *port) status = atmel_uart_readl(port, ATMEL_US_CSR); while (status & ATMEL_US_RXRDY) { - ch = atmel_uart_readl(port, ATMEL_US_RHR); + ch = atmel_uart_readb(port, ATMEL_US_RHR); /* * note that the error handling code is @@ -686,7 +708,7 @@ static void atmel_tx_chars(struct uart_port *port) if (port->x_char && (atmel_uart_readl(port, ATMEL_US_CSR) & atmel_port->tx_done_mask)) { - atmel_uart_writel(port, ATMEL_US_THR, port->x_char); + atmel_uart_writeb(port, ATMEL_US_THR, port->x_char); port->icount.tx++; port->x_char = 0; } @@ -695,7 +717,7 @@ static void atmel_tx_chars(struct uart_port *port) while (atmel_uart_readl(port, ATMEL_US_CSR) & atmel_port->tx_done_mask) { - atmel_uart_writel(port, ATMEL_US_THR, xmit->buf[xmit->tail]); + atmel_uart_writeb(port, ATMEL_US_THR, xmit->buf[xmit->tail]); xmit->tail = (xmit->tail + 1) & (UART_XMIT_SIZE - 1); port->icount.tx++; if (uart_circ_empty(xmit)) @@ -1796,6 +1818,29 @@ static int atmel_startup(struct uart_port *port) atmel_set_ops(port); } + /* + * Enable FIFO when available + */ + if (atmel_port->fifo_size) { + unsigned int txrdym = ATMEL_US_ONE_DATA; + unsigned int rxrdym = ATMEL_US_ONE_DATA; + unsigned int fmr; + + atmel_uart_writel(port, ATMEL_US_CR, + ATMEL_US_FIFOEN | + ATMEL_US_RXFCLR | + ATMEL_US_TXFLCLR); + + fmr = ATMEL_US_TXRDYM(txrdym) | ATMEL_US_RXRDYM(rxrdym); + if (atmel_port->rts_high && + atmel_port->rts_low) + fmr |= ATMEL_US_FRTSC | + ATMEL_US_RXFTHRES(atmel_port->rts_high) | + ATMEL_US_RXFTHRES2(atmel_port->rts_low); + + atmel_uart_writel(port, ATMEL_US_FMR, fmr); + } + /* Save current CSR for comparison in atmel_tasklet_func() */ atmel_port->irq_status_prev = atmel_get_lines_status(port); atmel_port->irq_status = atmel_port->irq_status_prev; @@ -2213,7 +2258,7 @@ static int atmel_poll_get_char(struct uart_port *port) while (!(atmel_uart_readl(port, ATMEL_US_CSR) & ATMEL_US_RXRDY)) cpu_relax(); - return atmel_uart_readl(port, ATMEL_US_RHR); + return atmel_uart_readb(port, ATMEL_US_RHR); } static void atmel_poll_put_char(struct uart_port *port, unsigned char ch) @@ -2221,7 +2266,7 @@ static void atmel_poll_put_char(struct uart_port *port, unsigned char ch) while (!(atmel_uart_readl(port, ATMEL_US_CSR) & ATMEL_US_TXRDY)) cpu_relax(); - atmel_uart_writel(port, ATMEL_US_THR, ch); + atmel_uart_writeb(port, ATMEL_US_THR, ch); } #endif @@ -2328,7 +2373,7 @@ static void atmel_console_putchar(struct uart_port *port, int ch) { while (!(atmel_uart_readl(port, ATMEL_US_CSR) & ATMEL_US_TXRDY)) cpu_relax(); - atmel_uart_writel(port, ATMEL_US_THR, ch); + atmel_uart_writeb(port, ATMEL_US_THR, ch); } /* @@ -2603,6 +2648,48 @@ static int atmel_init_gpios(struct atmel_uart_port *p, struct device *dev) return 0; } +static void atmel_serial_probe_fifos(struct atmel_uart_port *port, + struct platform_device *pdev) +{ + port->fifo_size = 0; + port->rts_low = 0; + port->rts_high = 0; + + if (of_property_read_u32(pdev->dev.of_node, + "atmel,fifo-size", + &port->fifo_size)) + return; + + if (!port->fifo_size) + return; + + if (port->fifo_size < ATMEL_MIN_FIFO_SIZE) { + port->fifo_size = 0; + dev_err(&pdev->dev, "Invalid FIFO size\n"); + return; + } + + /* + * 0 <= rts_low <= rts_high <= fifo_size + * Once their CTS line asserted by the remote peer, some x86 UARTs tend + * to flush their internal TX FIFO, commonly up to 16 data, before + * actually stopping to send new data. So we try to set the RTS High + * Threshold to a reasonably high value respecting this 16 data + * empirical rule when possible. + */ + port->rts_high = max_t(int, port->fifo_size >> 1, + port->fifo_size - ATMEL_RTS_HIGH_OFFSET); + port->rts_low = max_t(int, port->fifo_size >> 2, + port->fifo_size - ATMEL_RTS_LOW_OFFSET); + + dev_info(&pdev->dev, "Using FIFO (%u data)\n", + port->fifo_size); + dev_dbg(&pdev->dev, "RTS High Threshold : %2u data\n", + port->rts_high); + dev_dbg(&pdev->dev, "RTS Low Threshold : %2u data\n", + port->rts_low); +} + static int atmel_serial_probe(struct platform_device *pdev) { struct atmel_uart_port *port; @@ -2639,6 +2726,7 @@ static int atmel_serial_probe(struct platform_device *pdev) port = &atmel_ports[ret]; port->backup_imr = 0; port->uart.line = ret; + atmel_serial_probe_fifos(port, pdev); spin_lock_init(&port->lock_suspended); diff --git a/include/linux/atmel_serial.h b/include/linux/atmel_serial.h index c384c21d65f0..ee696d7e8a43 100644 --- a/include/linux/atmel_serial.h +++ b/include/linux/atmel_serial.h @@ -35,6 +35,11 @@ #define ATMEL_US_DTRDIS BIT(17) /* Data Terminal Ready Disable */ #define ATMEL_US_RTSEN BIT(18) /* Request To Send Enable */ #define ATMEL_US_RTSDIS BIT(19) /* Request To Send Disable */ +#define ATMEL_US_TXFCLR BIT(24) /* Transmit FIFO Clear */ +#define ATMEL_US_RXFCLR BIT(25) /* Receive FIFO Clear */ +#define ATMEL_US_TXFLCLR BIT(26) /* Transmit FIFO Lock Clear */ +#define ATMEL_US_FIFOEN BIT(30) /* FIFO enable */ +#define ATMEL_US_FIFODIS BIT(31) /* FIFO disable */ #define ATMEL_US_MR 0x04 /* Mode Register */ #define ATMEL_US_USMODE GENMASK(3, 0) /* Mode of the USART */ @@ -124,6 +129,37 @@ #define ATMEL_US_NER 0x44 /* Number of Errors Register */ #define ATMEL_US_IF 0x4c /* IrDA Filter Register */ +#define ATMEL_US_CMPR 0x90 /* Comparaison Register */ +#define ATMEL_US_FMR 0xa0 /* FIFO Mode Register */ +#define ATMEL_US_TXRDYM(data) (((data) & 0x3) << 0) /* TX Ready Mode */ +#define ATMEL_US_RXRDYM(data) (((data) & 0x3) << 4) /* RX Ready Mode */ +#define ATMEL_US_ONE_DATA 0x0 +#define ATMEL_US_TWO_DATA 0x1 +#define ATMEL_US_FOUR_DATA 0x2 +#define ATMEL_US_FRTSC BIT(7) /* FIFO RTS pin Control */ +#define ATMEL_US_TXFTHRES(thr) (((thr) & 0x3f) << 8) /* TX FIFO Threshold */ +#define ATMEL_US_RXFTHRES(thr) (((thr) & 0x3f) << 16) /* RX FIFO Threshold */ +#define ATMEL_US_RXFTHRES2(thr) (((thr) & 0x3f) << 24) /* RX FIFO Threshold2 */ + +#define ATMEL_US_FLR 0xa4 /* FIFO Level Register */ +#define ATMEL_US_TXFL(reg) (((reg) >> 0) & 0x3f) /* TX FIFO Level */ +#define ATMEL_US_RXFL(reg) (((reg) >> 16) & 0x3f) /* RX FIFO Level */ + +#define ATMEL_US_FIER 0xa8 /* FIFO Interrupt Enable Register */ +#define ATMEL_US_FIDR 0xac /* FIFO Interrupt Disable Register */ +#define ATMEL_US_FIMR 0xb0 /* FIFO Interrupt Mask Register */ +#define ATMEL_US_FESR 0xb4 /* FIFO Event Status Register */ +#define ATMEL_US_TXFEF BIT(0) /* Transmit FIFO Empty Flag */ +#define ATMEL_US_TXFFF BIT(1) /* Transmit FIFO Full Flag */ +#define ATMEL_US_TXFTHF BIT(2) /* Transmit FIFO Threshold Flag */ +#define ATMEL_US_RXFEF BIT(3) /* Receive FIFO Empty Flag */ +#define ATMEL_US_RXFFF BIT(4) /* Receive FIFO Full Flag */ +#define ATMEL_US_RXFTHF BIT(5) /* Receive FIFO Threshold Flag */ +#define ATMEL_US_TXFPTEF BIT(6) /* Transmit FIFO Pointer Error Flag */ +#define ATMEL_US_RXFPTEF BIT(7) /* Receive FIFO Pointer Error Flag */ +#define ATMEL_US_TXFLOCK BIT(8) /* Transmit FIFO Lock (FESR only) */ +#define ATMEL_US_RXFTHF2 BIT(9) /* Receive FIFO Threshold Flag 2 */ + #define ATMEL_US_NAME 0xf0 /* Ip Name */ #define ATMEL_US_VERSION 0xfc /* Ip Version */ -- cgit v1.2.3-70-g09d2 From e2dfa3d38797058fa03478b08bab3d3c4b081615 Mon Sep 17 00:00:00 2001 From: Peter Hurley Date: Sun, 12 Jul 2015 22:49:08 -0400 Subject: tty: core: Add tty_debug() for printk(KERN_DEBUG) messages Introduce tty_debug() macro to output uniform debug information for tty core debug messages (function name and tty name). Note: printk(KERN_DEBUG) is retained here over pr_debug() since messages can be enabled in non-DEBUG builds. Signed-off-by: Peter Hurley Signed-off-by: Greg Kroah-Hartman --- drivers/tty/tty_io.c | 41 +++++++++++++++++------------------------ include/linux/tty.h | 6 ++++++ 2 files changed, 23 insertions(+), 24 deletions(-) (limited to 'include/linux') diff --git a/drivers/tty/tty_io.c b/drivers/tty/tty_io.c index 1738fcaea891..9537979c9c51 100644 --- a/drivers/tty/tty_io.c +++ b/drivers/tty/tty_io.c @@ -531,8 +531,8 @@ static void __proc_set_tty(struct tty_struct *tty) spin_unlock_irqrestore(&tty->ctrl_lock, flags); tty->session = get_pid(task_session(current)); if (current->signal->tty) { - printk(KERN_DEBUG "%s: %s: current tty %s not NULL!!\n", - __func__, tty->name, current->signal->tty->name); + tty_debug(tty, "current tty %s not NULL!!\n", + current->signal->tty->name); tty_kref_put(current->signal->tty); } put_pid(current->signal->tty_old_pgrp); @@ -775,7 +775,7 @@ static void do_tty_hangup(struct work_struct *work) void tty_hangup(struct tty_struct *tty) { #ifdef TTY_DEBUG_HANGUP - printk(KERN_DEBUG "%s hangup...\n", tty_name(tty)); + tty_debug(tty, "\n"); #endif schedule_work(&tty->hangup_work); } @@ -794,7 +794,7 @@ EXPORT_SYMBOL(tty_hangup); void tty_vhangup(struct tty_struct *tty) { #ifdef TTY_DEBUG_HANGUP - printk(KERN_DEBUG "%s vhangup...\n", tty_name(tty)); + tty_debug(tty, "\n") #endif __tty_hangup(tty, 0); } @@ -833,7 +833,7 @@ void tty_vhangup_self(void) static void tty_vhangup_session(struct tty_struct *tty) { #ifdef TTY_DEBUG_HANGUP - printk(KERN_DEBUG "%s vhangup session...\n", tty_name(tty)); + tty_debug(tty, "\n"); #endif __tty_hangup(tty, 1); } @@ -930,7 +930,7 @@ void disassociate_ctty(int on_exit) tty_kref_put(tty); } else { #ifdef TTY_DEBUG_HANGUP - printk(KERN_DEBUG "%s: no current tty\n", __func__); + tty_debug(tty, "no current tty\n"); #endif } @@ -1712,8 +1712,7 @@ static int tty_release_checks(struct tty_struct *tty, int idx) { #ifdef TTY_PARANOIA_CHECK if (idx < 0 || idx >= tty->driver->num) { - printk(KERN_DEBUG "%s: %s: bad idx %d\n", - __func__, tty->name, idx); + tty_debug(tty, "bad idx %d\n", idx); return -1; } @@ -1722,22 +1721,20 @@ static int tty_release_checks(struct tty_struct *tty, int idx) return 0; if (tty != tty->driver->ttys[idx]) { - printk(KERN_DEBUG "%s: %s: bad driver table[%d] = %p\n", - __func__, tty->name, idx, tty->driver->ttys[idx]); + tty_debug(tty, "bad driver table[%d] = %p\n", + idx, tty->driver->ttys[idx]); return -1; } if (tty->driver->other) { struct tty_struct *o_tty = tty->link; if (o_tty != tty->driver->other->ttys[idx]) { - printk(KERN_DEBUG "%s: %s: bad other table[%d] = %p\n", - __func__, tty->name, idx, - tty->driver->other->ttys[idx]); + tty_debug(tty, "bad other table[%d] = %p\n", + idx, tty->driver->other->ttys[idx]); return -1; } if (o_tty->link != tty) { - printk(KERN_DEBUG "%s: %s: bad link = %p\n", - __func__, tty->name, o_tty->link); + tty_debug(tty, "bad link = %p\n", o_tty->link); return -1; } } @@ -1792,8 +1789,7 @@ int tty_release(struct inode *inode, struct file *filp) } #ifdef TTY_DEBUG_HANGUP - printk(KERN_DEBUG "%s: %s (tty count=%d)...\n", __func__, - tty_name(tty), tty->count); + tty_debug(tty, "(tty count=%d)...\n", tty->count); #endif if (tty->ops->close) @@ -1905,7 +1901,7 @@ int tty_release(struct inode *inode, struct file *filp) return 0; #ifdef TTY_DEBUG_HANGUP - printk(KERN_DEBUG "%s: %s: final close\n", __func__, tty_name(tty)); + tty_debug(tty, "final close\n"); #endif /* * Ask the line discipline code to release its structures @@ -1916,8 +1912,7 @@ int tty_release(struct inode *inode, struct file *filp) tty_flush_works(tty); #ifdef TTY_DEBUG_HANGUP - printk(KERN_DEBUG "%s: %s: freeing structure...\n", __func__, - tty_name(tty)); + tty_debug(tty, "freeing structure...\n"); #endif /* * The release_tty function takes care of the details of clearing @@ -2108,8 +2103,7 @@ retry_open: tty->driver->subtype == PTY_TYPE_MASTER) noctty = 1; #ifdef TTY_DEBUG_HANGUP - printk(KERN_DEBUG "%s: %s: (tty count=%d)\n", __func__, tty->name, - tty->count); + tty_debug(tty, "(tty count=%d)\n", tty->count); #endif if (tty->ops->open) retval = tty->ops->open(tty, filp); @@ -2119,8 +2113,7 @@ retry_open: if (retval) { #ifdef TTY_DEBUG_HANGUP - printk(KERN_DEBUG "%s: %s: error %d, releasing...\n", __func__, - tty->name, retval); + tty_debug(tty, "error %d, releasing...\n", retval); #endif tty_unlock(tty); /* need to call tty_release without BTM */ tty_release(inode, filp); diff --git a/include/linux/tty.h b/include/linux/tty.h index ad6c8913aa3e..d072ded41678 100644 --- a/include/linux/tty.h +++ b/include/linux/tty.h @@ -709,4 +709,10 @@ static inline void proc_tty_register_driver(struct tty_driver *d) {} static inline void proc_tty_unregister_driver(struct tty_driver *d) {} #endif +#define tty_debug(tty, f, args...) \ + do { \ + printk(KERN_DEBUG "%s: %s: " f, __func__, \ + tty_name(tty), ##args); \ + } while (0) + #endif -- cgit v1.2.3-70-g09d2 From a3bd4f989f532694337dd30538b635d5213ab86a Mon Sep 17 00:00:00 2001 From: Dong Aisheng Date: Wed, 22 Jul 2015 20:53:09 +0800 Subject: mmc: sdhci-esdhc-imx: clear f_max in boarddata After commit 8d86e4fcccf6 ("mmc: sdhci-esdhc-imx: Call mmc_of_parse()"), it's not used anymore. Signed-off-by: Dong Aisheng Reviewed-by: Johan Derycke Signed-off-by: Ulf Hansson --- drivers/mmc/host/sdhci-esdhc-imx.c | 7 +------ include/linux/platform_data/mmc-esdhc-imx.h | 1 - 2 files changed, 1 insertion(+), 7 deletions(-) (limited to 'include/linux') diff --git a/drivers/mmc/host/sdhci-esdhc-imx.c b/drivers/mmc/host/sdhci-esdhc-imx.c index 1b0e61847e73..c6b9f6492e1a 100644 --- a/drivers/mmc/host/sdhci-esdhc-imx.c +++ b/drivers/mmc/host/sdhci-esdhc-imx.c @@ -581,13 +581,8 @@ static void esdhc_writeb_le(struct sdhci_host *host, u8 val, int reg) static unsigned int esdhc_pltfm_get_max_clock(struct sdhci_host *host) { struct sdhci_pltfm_host *pltfm_host = sdhci_priv(host); - struct pltfm_imx_data *imx_data = pltfm_host->priv; - struct esdhc_platform_data *boarddata = &imx_data->boarddata; - if (boarddata->f_max && (boarddata->f_max < pltfm_host->clock)) - return boarddata->f_max; - else - return pltfm_host->clock; + return pltfm_host->clock; } static unsigned int esdhc_pltfm_get_min_clock(struct sdhci_host *host) diff --git a/include/linux/platform_data/mmc-esdhc-imx.h b/include/linux/platform_data/mmc-esdhc-imx.h index 75f70f6ac137..e1571efa3f2b 100644 --- a/include/linux/platform_data/mmc-esdhc-imx.h +++ b/include/linux/platform_data/mmc-esdhc-imx.h @@ -43,7 +43,6 @@ struct esdhc_platform_data { enum wp_types wp_type; enum cd_types cd_type; int max_bus_width; - unsigned int f_max; bool support_vsel; unsigned int delay_line; }; -- cgit v1.2.3-70-g09d2 From 8766018b6ef73ca124d13b0d0a06dec906726cc8 Mon Sep 17 00:00:00 2001 From: Henry Chen Date: Fri, 24 Jul 2015 13:24:41 +0800 Subject: regulator: mt6311: Add support for mt6311 regulator Add regulator support for mt6311. It has 2 regulaotrs - Buck and LDO, provide the related buck/ldo voltage data to the driver, and creates the regulator_desc table. Supported operations for Buck are enabled/disabled and voltage change, only enabled/disabled for LDO. Signed-off-by: Henry Chen Reviewed-by: Javier Martinez Canillas Signed-off-by: Mark Brown --- drivers/regulator/Kconfig | 9 ++ drivers/regulator/Makefile | 1 + drivers/regulator/mt6311-regulator.c | 180 +++++++++++++++++++++++++++++++++++ drivers/regulator/mt6311-regulator.h | 65 +++++++++++++ include/linux/regulator/mt6311.h | 29 ++++++ 5 files changed, 284 insertions(+) create mode 100644 drivers/regulator/mt6311-regulator.c create mode 100644 drivers/regulator/mt6311-regulator.h create mode 100644 include/linux/regulator/mt6311.h (limited to 'include/linux') diff --git a/drivers/regulator/Kconfig b/drivers/regulator/Kconfig index bef3bde6971b..aab09ac499a0 100644 --- a/drivers/regulator/Kconfig +++ b/drivers/regulator/Kconfig @@ -451,6 +451,15 @@ config REGULATOR_MC13892 Say y here to support the regulators found on the Freescale MC13892 PMIC. +config REGULATOR_MT6311 + tristate "MediaTek MT6311 PMIC" + depends on I2C + help + Say y here to select this option to enable the power regulator of + MediaTek MT6311 PMIC. + This driver supports the control of different power rails of device + through regulator interface. + config REGULATOR_MT6397 tristate "MediaTek MT6397 PMIC" depends on MFD_MT6397 diff --git a/drivers/regulator/Makefile b/drivers/regulator/Makefile index 91bf76267404..45e790f92715 100644 --- a/drivers/regulator/Makefile +++ b/drivers/regulator/Makefile @@ -60,6 +60,7 @@ obj-$(CONFIG_REGULATOR_MAX77843) += max77843.o obj-$(CONFIG_REGULATOR_MC13783) += mc13783-regulator.o obj-$(CONFIG_REGULATOR_MC13892) += mc13892-regulator.o obj-$(CONFIG_REGULATOR_MC13XXX_CORE) += mc13xxx-regulator-core.o +obj-$(CONFIG_REGULATOR_MT6311) += mt6311-regulator.o obj-$(CONFIG_REGULATOR_MT6397) += mt6397-regulator.o obj-$(CONFIG_REGULATOR_QCOM_RPM) += qcom_rpm-regulator.o obj-$(CONFIG_REGULATOR_QCOM_SPMI) += qcom_spmi-regulator.o diff --git a/drivers/regulator/mt6311-regulator.c b/drivers/regulator/mt6311-regulator.c new file mode 100644 index 000000000000..096e6202be1c --- /dev/null +++ b/drivers/regulator/mt6311-regulator.c @@ -0,0 +1,180 @@ +/* + * Copyright (c) 2015 MediaTek Inc. + * Author: Henry Chen + * + * This program is free software; you can redistribute it and/or modify + * it under the terms of the GNU General Public License version 2 as + * published by the Free Software Foundation. + * + * This program is distributed in the hope that it will be useful, + * but WITHOUT ANY WARRANTY; without even the implied warranty of + * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the + * GNU General Public License for more details. + */ + +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include "mt6311-regulator.h" + +static const struct regmap_config mt6311_regmap_config = { + .reg_bits = 8, + .val_bits = 8, + .max_register = MT6311_FQMTR_CON4, +}; + +/* Default limits measured in millivolts and milliamps */ +#define MT6311_MIN_UV 600000 +#define MT6311_MAX_UV 1400000 +#define MT6311_STEP_UV 6250 + +static const struct regulator_linear_range buck_volt_range[] = { + REGULATOR_LINEAR_RANGE(MT6311_MIN_UV, 0, 0x7f, MT6311_STEP_UV), +}; + +static struct regulator_ops mt6311_buck_ops = { + .list_voltage = regulator_list_voltage_linear_range, + .map_voltage = regulator_map_voltage_linear_range, + .set_voltage_sel = regulator_set_voltage_sel_regmap, + .get_voltage_sel = regulator_get_voltage_sel_regmap, + .set_voltage_time_sel = regulator_set_voltage_time_sel, + .enable = regulator_enable_regmap, + .disable = regulator_disable_regmap, + .is_enabled = regulator_is_enabled_regmap, +}; + +static struct regulator_ops mt6311_ldo_ops = { + .enable = regulator_enable_regmap, + .disable = regulator_disable_regmap, + .is_enabled = regulator_is_enabled_regmap, +}; + +#define MT6311_BUCK(_id) \ +{\ + .name = #_id,\ + .ops = &mt6311_buck_ops,\ + .of_match = of_match_ptr(#_id),\ + .regulators_node = of_match_ptr("regulators"),\ + .type = REGULATOR_VOLTAGE,\ + .id = MT6311_ID_##_id,\ + .n_voltages = (MT6311_MAX_UV - MT6311_MIN_UV) / MT6311_STEP_UV + 1,\ + .min_uV = MT6311_MIN_UV,\ + .uV_step = MT6311_STEP_UV,\ + .owner = THIS_MODULE,\ + .linear_ranges = buck_volt_range, \ + .n_linear_ranges = ARRAY_SIZE(buck_volt_range), \ + .enable_reg = MT6311_VDVFS11_CON9,\ + .enable_mask = MT6311_PMIC_VDVFS11_EN_MASK,\ + .vsel_reg = MT6311_VDVFS11_CON12,\ + .vsel_mask = MT6311_PMIC_VDVFS11_VOSEL_MASK,\ +} + +#define MT6311_LDO(_id) \ +{\ + .name = #_id,\ + .ops = &mt6311_ldo_ops,\ + .of_match = of_match_ptr(#_id),\ + .regulators_node = of_match_ptr("regulators"),\ + .type = REGULATOR_VOLTAGE,\ + .id = MT6311_ID_##_id,\ + .owner = THIS_MODULE,\ + .enable_reg = MT6311_LDO_CON3,\ + .enable_mask = MT6311_PMIC_RG_VBIASN_EN_MASK,\ +} + +static struct regulator_desc mt6311_regulators[] = { + MT6311_BUCK(VDVFS), + MT6311_LDO(VBIASN), +}; + +/* + * I2C driver interface functions + */ +static int mt6311_i2c_probe(struct i2c_client *i2c, + const struct i2c_device_id *id) +{ + struct regulator_config config = { }; + struct regulator_dev *rdev; + struct regmap *regmap; + int error, i, ret; + unsigned int data; + + regmap = devm_regmap_init_i2c(i2c, &mt6311_regmap_config); + if (IS_ERR(regmap)) { + error = PTR_ERR(regmap); + dev_err(&i2c->dev, "Failed to allocate register map: %d\n", + error); + return error; + } + + ret = regmap_read(regmap, MT6311_SWCID, &data); + if (ret < 0) { + dev_err(&i2c->dev, "Failed to read DEVICE_ID reg: %d\n", ret); + return ret; + } + + switch (data) { + case MT6311_E1_CID_CODE: + case MT6311_E2_CID_CODE: + case MT6311_E3_CID_CODE: + break; + default: + dev_err(&i2c->dev, "Unsupported device id = 0x%x.\n", data); + return -ENODEV; + } + + for (i = 0; i < MT6311_MAX_REGULATORS; i++) { + config.dev = &i2c->dev; + config.regmap = regmap; + + rdev = devm_regulator_register(&i2c->dev, + &mt6311_regulators[i], &config); + if (IS_ERR(rdev)) { + dev_err(&i2c->dev, + "Failed to register MT6311 regulator\n"); + return PTR_ERR(rdev); + } + } + + return 0; +} + +static const struct i2c_device_id mt6311_i2c_id[] = { + {"mt6311", 0}, + {}, +}; +MODULE_DEVICE_TABLE(i2c, mt6311_i2c_id); + +#ifdef CONFIG_OF +static const struct of_device_id mt6311_dt_ids[] = { + { .compatible = "mediatek,mt6311-regulator", + .data = &mt6311_i2c_id[0] }, + {}, +}; +MODULE_DEVICE_TABLE(of, mt6311_dt_ids); +#endif + +static struct i2c_driver mt6311_regulator_driver = { + .driver = { + .name = "mt6311", + .owner = THIS_MODULE, + .of_match_table = of_match_ptr(mt6311_dt_ids), + }, + .probe = mt6311_i2c_probe, + .id_table = mt6311_i2c_id, +}; + +module_i2c_driver(mt6311_regulator_driver); + +MODULE_AUTHOR("Henry Chen "); +MODULE_DESCRIPTION("Regulator device driver for Mediatek MT6311"); +MODULE_LICENSE("GPL v2"); diff --git a/drivers/regulator/mt6311-regulator.h b/drivers/regulator/mt6311-regulator.h new file mode 100644 index 000000000000..5218db46a798 --- /dev/null +++ b/drivers/regulator/mt6311-regulator.h @@ -0,0 +1,65 @@ +/* + * Copyright (c) 2015 MediaTek Inc. + * Author: Henry Chen + * + * This program is free software; you can redistribute it and/or modify + * it under the terms of the GNU General Public License version 2 as + * published by the Free Software Foundation. + * + * This program is distributed in the hope that it will be useful, + * but WITHOUT ANY WARRANTY; without even the implied warranty of + * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the + * GNU General Public License for more details. + */ + +#ifndef __MT6311_REGULATOR_H__ +#define __MT6311_REGULATOR_H__ + +#define MT6311_SWCID 0x01 + +#define MT6311_TOP_INT_CON 0x18 +#define MT6311_TOP_INT_MON 0x19 + +#define MT6311_VDVFS11_CON0 0x87 +#define MT6311_VDVFS11_CON7 0x88 +#define MT6311_VDVFS11_CON8 0x89 +#define MT6311_VDVFS11_CON9 0x8A +#define MT6311_VDVFS11_CON10 0x8B +#define MT6311_VDVFS11_CON11 0x8C +#define MT6311_VDVFS11_CON12 0x8D +#define MT6311_VDVFS11_CON13 0x8E +#define MT6311_VDVFS11_CON14 0x8F +#define MT6311_VDVFS11_CON15 0x90 +#define MT6311_VDVFS11_CON16 0x91 +#define MT6311_VDVFS11_CON17 0x92 +#define MT6311_VDVFS11_CON18 0x93 +#define MT6311_VDVFS11_CON19 0x94 + +#define MT6311_LDO_CON0 0xCC +#define MT6311_LDO_OCFB0 0xCD +#define MT6311_LDO_CON2 0xCE +#define MT6311_LDO_CON3 0xCF +#define MT6311_LDO_CON4 0xD0 +#define MT6311_FQMTR_CON0 0xD1 +#define MT6311_FQMTR_CON1 0xD2 +#define MT6311_FQMTR_CON2 0xD3 +#define MT6311_FQMTR_CON3 0xD4 +#define MT6311_FQMTR_CON4 0xD5 + +#define MT6311_PMIC_RG_INT_POL_MASK 0x1 +#define MT6311_PMIC_RG_INT_EN_MASK 0x2 +#define MT6311_PMIC_RG_BUCK_OC_INT_STATUS_MASK 0x10 + +#define MT6311_PMIC_VDVFS11_EN_CTRL_MASK 0x1 +#define MT6311_PMIC_VDVFS11_VOSEL_CTRL_MASK 0x2 +#define MT6311_PMIC_VDVFS11_EN_SEL_MASK 0x3 +#define MT6311_PMIC_VDVFS11_VOSEL_SEL_MASK 0xc +#define MT6311_PMIC_VDVFS11_EN_MASK 0x1 +#define MT6311_PMIC_VDVFS11_VOSEL_MASK 0x7F +#define MT6311_PMIC_VDVFS11_VOSEL_ON_MASK 0x7F +#define MT6311_PMIC_VDVFS11_VOSEL_SLEEP_MASK 0x7F +#define MT6311_PMIC_NI_VDVFS11_VOSEL_MASK 0x7F + +#define MT6311_PMIC_RG_VBIASN_EN_MASK 0x1 + +#endif diff --git a/include/linux/regulator/mt6311.h b/include/linux/regulator/mt6311.h new file mode 100644 index 000000000000..8473259395b6 --- /dev/null +++ b/include/linux/regulator/mt6311.h @@ -0,0 +1,29 @@ +/* + * Copyright (c) 2015 MediaTek Inc. + * Author: Henry Chen + * + * This program is free software; you can redistribute it and/or modify + * it under the terms of the GNU General Public License version 2 as + * published by the Free Software Foundation. + * + * This program is distributed in the hope that it will be useful, + * but WITHOUT ANY WARRANTY; without even the implied warranty of + * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the + * GNU General Public License for more details. + */ + +#ifndef __LINUX_REGULATOR_MT6311_H +#define __LINUX_REGULATOR_MT6311_H + +#define MT6311_MAX_REGULATORS 2 + +enum { + MT6311_ID_VDVFS = 0, + MT6311_ID_VBIASN, +}; + +#define MT6311_E1_CID_CODE 0x10 +#define MT6311_E2_CID_CODE 0x20 +#define MT6311_E3_CID_CODE 0x30 + +#endif /* __LINUX_REGULATOR_MT6311_H */ -- cgit v1.2.3-70-g09d2 From fa466c91970a0207d9384016cc7884a7f61834b6 Mon Sep 17 00:00:00 2001 From: Franklin S Cooper Jr Date: Wed, 22 Jul 2015 07:32:22 -0500 Subject: spi: davinci: Choose correct pre-scaler limit based on SOC Currently the pre-scaler limit is incorrect. The value differs slightly for various devices so a single value can't be used. Using the compatible field select the correct pre-scaler limit. Add new compatible field value for Keystone devices to support their unique pre-scaler limit value. Signed-off-by: Franklin S Cooper Jr Reviewed-by: Sekhar Nori Signed-off-by: Mark Brown --- .../devicetree/bindings/spi/spi-davinci.txt | 2 + drivers/spi/spi-davinci.c | 43 ++++++++++++++++++---- include/linux/platform_data/spi-davinci.h | 1 + 3 files changed, 39 insertions(+), 7 deletions(-) (limited to 'include/linux') diff --git a/Documentation/devicetree/bindings/spi/spi-davinci.txt b/Documentation/devicetree/bindings/spi/spi-davinci.txt index 12ecfe9e3599..d1e914adcf6e 100644 --- a/Documentation/devicetree/bindings/spi/spi-davinci.txt +++ b/Documentation/devicetree/bindings/spi/spi-davinci.txt @@ -12,6 +12,8 @@ Required properties: - compatible: - "ti,dm6441-spi" for SPI used similar to that on DM644x SoC family - "ti,da830-spi" for SPI used similar to that on DA8xx SoC family + - "ti,keystone-spi" for SPI used similar to that on Keystone2 SoC + family - reg: Offset and length of SPI controller register space - num-cs: Number of chip selects. This includes internal as well as GPIO chip selects. diff --git a/drivers/spi/spi-davinci.c b/drivers/spi/spi-davinci.c index b4605c4158f4..3cf9faa6cc3f 100644 --- a/drivers/spi/spi-davinci.c +++ b/drivers/spi/spi-davinci.c @@ -139,6 +139,8 @@ struct davinci_spi { u32 (*get_tx)(struct davinci_spi *); u8 *bytes_per_word; + + u8 prescaler_limit; }; static struct davinci_spi_config davinci_spi_default_cfg; @@ -266,7 +268,7 @@ static inline int davinci_spi_get_prescale(struct davinci_spi *dspi, /* Subtract 1 to match what will be programmed into SPI register. */ ret = DIV_ROUND_UP(clk_get_rate(dspi->clk), max_speed_hz) - 1; - if (ret < 0 || ret > 255) + if (ret < dspi->prescaler_limit || ret > 255) return -EINVAL; return ret; @@ -833,13 +835,40 @@ rx_dma_failed: } #if defined(CONFIG_OF) + +/* OF SPI data structure */ +struct davinci_spi_of_data { + u8 version; + u8 prescaler_limit; +}; + +static const struct davinci_spi_of_data dm6441_spi_data = { + .version = SPI_VERSION_1, + .prescaler_limit = 2, +}; + +static const struct davinci_spi_of_data da830_spi_data = { + .version = SPI_VERSION_2, + .prescaler_limit = 2, +}; + +static const struct davinci_spi_of_data keystone_spi_data = { + .version = SPI_VERSION_1, + .prescaler_limit = 0, +}; + static const struct of_device_id davinci_spi_of_match[] = { { .compatible = "ti,dm6441-spi", + .data = &dm6441_spi_data, }, { .compatible = "ti,da830-spi", - .data = (void *)SPI_VERSION_2, + .data = &da830_spi_data, + }, + { + .compatible = "ti,keystone-spi", + .data = &keystone_spi_data, }, { }, }; @@ -858,21 +887,21 @@ static int spi_davinci_get_pdata(struct platform_device *pdev, struct davinci_spi *dspi) { struct device_node *node = pdev->dev.of_node; + struct davinci_spi_of_data *spi_data; struct davinci_spi_platform_data *pdata; unsigned int num_cs, intr_line = 0; const struct of_device_id *match; pdata = &dspi->pdata; - pdata->version = SPI_VERSION_1; match = of_match_device(davinci_spi_of_match, &pdev->dev); if (!match) return -ENODEV; - /* match data has the SPI version number for SPI_VERSION_2 */ - if (match->data == (void *)SPI_VERSION_2) - pdata->version = SPI_VERSION_2; + spi_data = (struct davinci_spi_of_data *)match->data; + pdata->version = spi_data->version; + pdata->prescaler_limit = spi_data->prescaler_limit; /* * default num_cs is 1 and all chipsel are internal to the chip * indicated by chip_sel being NULL or cs_gpios being NULL or @@ -992,7 +1021,7 @@ static int davinci_spi_probe(struct platform_device *pdev) dspi->bitbang.chipselect = davinci_spi_chipselect; dspi->bitbang.setup_transfer = davinci_spi_setup_transfer; - + dspi->prescaler_limit = pdata->prescaler_limit; dspi->version = pdata->version; dspi->bitbang.flags = SPI_NO_CS | SPI_LSB_FIRST | SPI_LOOP; diff --git a/include/linux/platform_data/spi-davinci.h b/include/linux/platform_data/spi-davinci.h index 8dc2fa47a2aa..f4edcb03c40c 100644 --- a/include/linux/platform_data/spi-davinci.h +++ b/include/linux/platform_data/spi-davinci.h @@ -49,6 +49,7 @@ struct davinci_spi_platform_data { u8 num_chipselect; u8 intr_line; u8 *chip_sel; + u8 prescaler_limit; bool cshold_bug; enum dma_event_q dma_event_q; }; -- cgit v1.2.3-70-g09d2 From 3a003baeec246f604ed1d2e0087560d7f15edcc6 Mon Sep 17 00:00:00 2001 From: Stephen Boyd Date: Fri, 17 Jul 2015 14:41:54 -0700 Subject: regulator: Add over current protection (OCP) support Some regulators can automatically shut down when they detect an over current event. Add an op (set_over_current_protection) and a DT property + constraint to support this capability. Signed-off-by: Stephen Boyd Signed-off-by: Mark Brown --- Documentation/devicetree/bindings/regulator/regulator.txt | 1 + drivers/regulator/core.c | 9 +++++++++ drivers/regulator/of_regulator.c | 3 +++ include/linux/regulator/driver.h | 1 + include/linux/regulator/machine.h | 1 + 5 files changed, 15 insertions(+) (limited to 'include/linux') diff --git a/Documentation/devicetree/bindings/regulator/regulator.txt b/Documentation/devicetree/bindings/regulator/regulator.txt index db88feb28c03..24bd422cecd5 100644 --- a/Documentation/devicetree/bindings/regulator/regulator.txt +++ b/Documentation/devicetree/bindings/regulator/regulator.txt @@ -42,6 +42,7 @@ Optional properties: - regulator-system-load: Load in uA present on regulator that is not captured by any consumer request. - regulator-pull-down: Enable pull down resistor when the regulator is disabled. +- regulator-over-current-protection: Enable over current protection. Deprecated properties: - regulator-compatible: If a regulator chip contains multiple diff --git a/drivers/regulator/core.c b/drivers/regulator/core.c index c9f72019bd68..520413e2bca0 100644 --- a/drivers/regulator/core.c +++ b/drivers/regulator/core.c @@ -1081,6 +1081,15 @@ static int set_machine_constraints(struct regulator_dev *rdev, } } + if (rdev->constraints->over_current_protection + && ops->set_over_current_protection) { + ret = ops->set_over_current_protection(rdev); + if (ret < 0) { + rdev_err(rdev, "failed to set over current protection\n"); + goto out; + } + } + print_constraints(rdev); return 0; out: diff --git a/drivers/regulator/of_regulator.c b/drivers/regulator/of_regulator.c index b1c485b24ab2..250700c853bf 100644 --- a/drivers/regulator/of_regulator.c +++ b/drivers/regulator/of_regulator.c @@ -107,6 +107,9 @@ static void of_get_regulation_constraints(struct device_node *np, if (!of_property_read_u32(np, "regulator-system-load", &pval)) constraints->system_load = pval; + constraints->over_current_protection = of_property_read_bool(np, + "regulator-over-current-protection"); + for (i = 0; i < ARRAY_SIZE(regulator_states); i++) { switch (i) { case PM_SUSPEND_MEM: diff --git a/include/linux/regulator/driver.h b/include/linux/regulator/driver.h index 4db9fbe4889d..45932228cbf5 100644 --- a/include/linux/regulator/driver.h +++ b/include/linux/regulator/driver.h @@ -148,6 +148,7 @@ struct regulator_ops { int (*get_current_limit) (struct regulator_dev *); int (*set_input_current_limit) (struct regulator_dev *, int lim_uA); + int (*set_over_current_protection) (struct regulator_dev *); /* enable/disable regulator */ int (*enable) (struct regulator_dev *); diff --git a/include/linux/regulator/machine.h b/include/linux/regulator/machine.h index b11be1260129..a1067d0b3991 100644 --- a/include/linux/regulator/machine.h +++ b/include/linux/regulator/machine.h @@ -147,6 +147,7 @@ struct regulation_constraints { unsigned ramp_disable:1; /* disable ramp delay */ unsigned soft_start:1; /* ramp voltage slowly */ unsigned pull_down:1; /* pull down resistor when regulator off */ + unsigned over_current_protection:1; /* auto disable on over current */ }; /** -- cgit v1.2.3-70-g09d2 From e3eea1404f5ff7a2ceb7b5e7ba412a6fd94f2935 Mon Sep 17 00:00:00 2001 From: "Steven Rostedt (Red Hat)" Date: Fri, 24 Jul 2015 10:38:12 -0400 Subject: ftrace: Fix breakage of set_ftrace_pid Commit 4104d326b670 ("ftrace: Remove global function list and call function directly") simplified the ftrace code by removing the global_ops list with a new design. But this cleanup also broke the filtering of PIDs that are added to the set_ftrace_pid file. Add back the proper hooks to have pid filtering working once again. Cc: stable@vger.kernel.org # 3.16+ Reported-by: Matt Fleming Reported-by: Richard Weinberger Tested-by: Matt Fleming Signed-off-by: Steven Rostedt --- include/linux/ftrace.h | 3 +++ kernel/trace/ftrace.c | 52 +++++++++++++++++++++++++++++++++----------------- 2 files changed, 37 insertions(+), 18 deletions(-) (limited to 'include/linux') diff --git a/include/linux/ftrace.h b/include/linux/ftrace.h index 1da602982cf9..6cd8c0ee4b6f 100644 --- a/include/linux/ftrace.h +++ b/include/linux/ftrace.h @@ -116,6 +116,7 @@ ftrace_func_t ftrace_ops_get_func(struct ftrace_ops *ops); * SAVE_REGS. If another ops with this flag set is already registered * for any of the functions that this ops will be registered for, then * this ops will fail to register or set_filter_ip. + * PID - Is affected by set_ftrace_pid (allows filtering on those pids) */ enum { FTRACE_OPS_FL_ENABLED = 1 << 0, @@ -132,6 +133,7 @@ enum { FTRACE_OPS_FL_MODIFYING = 1 << 11, FTRACE_OPS_FL_ALLOC_TRAMP = 1 << 12, FTRACE_OPS_FL_IPMODIFY = 1 << 13, + FTRACE_OPS_FL_PID = 1 << 14, }; #ifdef CONFIG_DYNAMIC_FTRACE @@ -159,6 +161,7 @@ struct ftrace_ops { struct ftrace_ops *next; unsigned long flags; void *private; + ftrace_func_t saved_func; int __percpu *disabled; #ifdef CONFIG_DYNAMIC_FTRACE int nr_trampolines; diff --git a/kernel/trace/ftrace.c b/kernel/trace/ftrace.c index 02bece4a99ea..eb11011b5292 100644 --- a/kernel/trace/ftrace.c +++ b/kernel/trace/ftrace.c @@ -98,6 +98,13 @@ struct ftrace_pid { struct pid *pid; }; +static bool ftrace_pids_enabled(void) +{ + return !list_empty(&ftrace_pids); +} + +static void ftrace_update_trampoline(struct ftrace_ops *ops); + /* * ftrace_disabled is set when an anomaly is discovered. * ftrace_disabled is much stronger than ftrace_enabled. @@ -109,7 +116,6 @@ static DEFINE_MUTEX(ftrace_lock); static struct ftrace_ops *ftrace_control_list __read_mostly = &ftrace_list_end; static struct ftrace_ops *ftrace_ops_list __read_mostly = &ftrace_list_end; ftrace_func_t ftrace_trace_function __read_mostly = ftrace_stub; -ftrace_func_t ftrace_pid_function __read_mostly = ftrace_stub; static struct ftrace_ops global_ops; static struct ftrace_ops control_ops; @@ -183,14 +189,7 @@ static void ftrace_pid_func(unsigned long ip, unsigned long parent_ip, if (!test_tsk_trace_trace(current)) return; - ftrace_pid_function(ip, parent_ip, op, regs); -} - -static void set_ftrace_pid_function(ftrace_func_t func) -{ - /* do not set ftrace_pid_function to itself! */ - if (func != ftrace_pid_func) - ftrace_pid_function = func; + op->saved_func(ip, parent_ip, op, regs); } /** @@ -202,7 +201,6 @@ static void set_ftrace_pid_function(ftrace_func_t func) void clear_ftrace_function(void) { ftrace_trace_function = ftrace_stub; - ftrace_pid_function = ftrace_stub; } static void control_ops_disable_all(struct ftrace_ops *ops) @@ -436,6 +434,12 @@ static int __register_ftrace_function(struct ftrace_ops *ops) } else add_ftrace_ops(&ftrace_ops_list, ops); + /* Always save the function, and reset at unregistering */ + ops->saved_func = ops->func; + + if (ops->flags & FTRACE_OPS_FL_PID && ftrace_pids_enabled()) + ops->func = ftrace_pid_func; + ftrace_update_trampoline(ops); if (ftrace_enabled) @@ -463,15 +467,28 @@ static int __unregister_ftrace_function(struct ftrace_ops *ops) if (ftrace_enabled) update_ftrace_function(); + ops->func = ops->saved_func; + return 0; } static void ftrace_update_pid_func(void) { + bool enabled = ftrace_pids_enabled(); + struct ftrace_ops *op; + /* Only do something if we are tracing something */ if (ftrace_trace_function == ftrace_stub) return; + do_for_each_ftrace_op(op, ftrace_ops_list) { + if (op->flags & FTRACE_OPS_FL_PID) { + op->func = enabled ? ftrace_pid_func : + op->saved_func; + ftrace_update_trampoline(op); + } + } while_for_each_ftrace_op(op); + update_ftrace_function(); } @@ -1133,7 +1150,8 @@ static struct ftrace_ops global_ops = { .local_hash.filter_hash = EMPTY_HASH, INIT_OPS_HASH(global_ops) .flags = FTRACE_OPS_FL_RECURSION_SAFE | - FTRACE_OPS_FL_INITIALIZED, + FTRACE_OPS_FL_INITIALIZED | + FTRACE_OPS_FL_PID, }; /* @@ -5023,7 +5041,9 @@ static void ftrace_update_trampoline(struct ftrace_ops *ops) static struct ftrace_ops global_ops = { .func = ftrace_stub, - .flags = FTRACE_OPS_FL_RECURSION_SAFE | FTRACE_OPS_FL_INITIALIZED, + .flags = FTRACE_OPS_FL_RECURSION_SAFE | + FTRACE_OPS_FL_INITIALIZED | + FTRACE_OPS_FL_PID, }; static int __init ftrace_nodyn_init(void) @@ -5080,11 +5100,6 @@ void ftrace_init_array_ops(struct trace_array *tr, ftrace_func_t func) if (WARN_ON(tr->ops->func != ftrace_stub)) printk("ftrace ops had %pS for function\n", tr->ops->func); - /* Only the top level instance does pid tracing */ - if (!list_empty(&ftrace_pids)) { - set_ftrace_pid_function(func); - func = ftrace_pid_func; - } } tr->ops->func = func; tr->ops->private = tr; @@ -5371,7 +5386,7 @@ static void *fpid_start(struct seq_file *m, loff_t *pos) { mutex_lock(&ftrace_lock); - if (list_empty(&ftrace_pids) && (!*pos)) + if (!ftrace_pids_enabled() && (!*pos)) return (void *) 1; return seq_list_start(&ftrace_pids, *pos); @@ -5610,6 +5625,7 @@ static struct ftrace_ops graph_ops = { .func = ftrace_stub, .flags = FTRACE_OPS_FL_RECURSION_SAFE | FTRACE_OPS_FL_INITIALIZED | + FTRACE_OPS_FL_PID | FTRACE_OPS_FL_STUB, #ifdef FTRACE_GRAPH_TRAMP_ADDR .trampoline = FTRACE_GRAPH_TRAMP_ADDR, -- cgit v1.2.3-70-g09d2 From e0910bace663b78c026b73bbd711a24ccf410531 Mon Sep 17 00:00:00 2001 From: Nicolas Dichtel Date: Thu, 23 Jul 2015 15:43:56 +0200 Subject: lwtunnel: export linux/lwtunnel.h to userspace Note also that include/linux/lwtunnel.h is not needed. CC: Thomas Graf CC: Roopa Prabhu Fixes: 499a24256862 ("lwtunnel: infrastructure for handling light weight tunnels like mpls") Signed-off-by: Nicolas Dichtel Acked-by: Roopa Prabhu Signed-off-by: David S. Miller --- include/linux/lwtunnel.h | 6 ------ include/uapi/linux/Kbuild | 1 + 2 files changed, 1 insertion(+), 6 deletions(-) delete mode 100644 include/linux/lwtunnel.h (limited to 'include/linux') diff --git a/include/linux/lwtunnel.h b/include/linux/lwtunnel.h deleted file mode 100644 index 97f32f8b4ae1..000000000000 --- a/include/linux/lwtunnel.h +++ /dev/null @@ -1,6 +0,0 @@ -#ifndef _LINUX_LWTUNNEL_H_ -#define _LINUX_LWTUNNEL_H_ - -#include - -#endif /* _LINUX_LWTUNNEL_H_ */ diff --git a/include/uapi/linux/Kbuild b/include/uapi/linux/Kbuild index 1ff9942718fe..aafb9937b162 100644 --- a/include/uapi/linux/Kbuild +++ b/include/uapi/linux/Kbuild @@ -243,6 +243,7 @@ header-y += limits.h header-y += llc.h header-y += loop.h header-y += lp.h +header-y += lwtunnel.h header-y += magic.h header-y += major.h header-y += map_to_7segment.h -- cgit v1.2.3-70-g09d2 From 0d3f2c92e004c67404fabea19728c1962b777bd6 Mon Sep 17 00:00:00 2001 From: Sudeep Holla Date: Wed, 15 Jul 2015 15:38:29 +0100 Subject: irqchip/gic: Remove redundant gic_set_irqchip_flags Now that the GIC chip implementation enables IRQCHIP_SKIP_SET_WAKE and IRQCHIP_MASK_ON_SUSPEND by default, the platforms requiring them need not override the irqchip flags as before. This patch removes all the users of gic_set_irqchip_flags and the function itself. Signed-off-by: Sudeep Holla Acked-by: Linus Walleij Cc: Marc Zyngier Cc: Simon Horman Cc: Jason Cooper Cc: Michal Simek Cc: Magnus Damm Cc: Gregory CLEMENT Cc: Geert Uytterhoeven Cc: Lorenzo Pieralisi Cc: linux-arm-kernel@lists.infradead.org Link: http://lkml.kernel.org/r/1436971109-20189-2-git-send-email-sudeep.holla@arm.com Signed-off-by: Thomas Gleixner --- arch/arm/mach-shmobile/intc-sh73a0.c | 1 - arch/arm/mach-shmobile/setup-r8a7779.c | 1 - arch/arm/mach-ux500/cpu.c | 1 - arch/arm/mach-zynq/common.c | 1 - drivers/irqchip/irq-gic.c | 5 ----- include/linux/irqchip/arm-gic.h | 1 - 6 files changed, 10 deletions(-) (limited to 'include/linux') diff --git a/arch/arm/mach-shmobile/intc-sh73a0.c b/arch/arm/mach-shmobile/intc-sh73a0.c index fd63ae6532fc..151a71a41fe3 100644 --- a/arch/arm/mach-shmobile/intc-sh73a0.c +++ b/arch/arm/mach-shmobile/intc-sh73a0.c @@ -313,7 +313,6 @@ void __init sh73a0_init_irq(void) void __iomem *gic_cpu_base = IOMEM(0xf0000100); void __iomem *intevtsa = ioremap_nocache(0xffd20100, PAGE_SIZE); - gic_set_irqchip_flags(IRQCHIP_SKIP_SET_WAKE); gic_init(0, 29, gic_dist_base, gic_cpu_base); register_intc_controller(&intcs_desc); diff --git a/arch/arm/mach-shmobile/setup-r8a7779.c b/arch/arm/mach-shmobile/setup-r8a7779.c index c03e562be12b..aea5cff9495d 100644 --- a/arch/arm/mach-shmobile/setup-r8a7779.c +++ b/arch/arm/mach-shmobile/setup-r8a7779.c @@ -719,7 +719,6 @@ void __init r8a7779_init_irq_dt(void) void __iomem *gic_dist_base = ioremap_nocache(0xf0001000, 0x1000); void __iomem *gic_cpu_base = ioremap_nocache(0xf0000100, 0x1000); #endif - gic_set_irqchip_flags(IRQCHIP_SKIP_SET_WAKE); #ifdef CONFIG_ARCH_SHMOBILE_LEGACY gic_init(0, 29, gic_dist_base, gic_cpu_base); diff --git a/arch/arm/mach-ux500/cpu.c b/arch/arm/mach-ux500/cpu.c index e31d3d61c998..6cb10c77afd8 100644 --- a/arch/arm/mach-ux500/cpu.c +++ b/arch/arm/mach-ux500/cpu.c @@ -56,7 +56,6 @@ void __init ux500_init_irq(void) struct device_node *np; struct resource r; - gic_set_irqchip_flags(IRQCHIP_SKIP_SET_WAKE | IRQCHIP_MASK_ON_SUSPEND); irqchip_init(); np = of_find_compatible_node(NULL, NULL, "stericsson,db8500-prcmu"); of_address_to_resource(np, 0, &r); diff --git a/arch/arm/mach-zynq/common.c b/arch/arm/mach-zynq/common.c index 616d5840fc2e..2ad1accfba35 100644 --- a/arch/arm/mach-zynq/common.c +++ b/arch/arm/mach-zynq/common.c @@ -186,7 +186,6 @@ static void __init zynq_map_io(void) static void __init zynq_irq_init(void) { - gic_set_irqchip_flags(IRQCHIP_SKIP_SET_WAKE | IRQCHIP_MASK_ON_SUSPEND); irqchip_init(); } diff --git a/drivers/irqchip/irq-gic.c b/drivers/irqchip/irq-gic.c index 39ff8df8cf64..80fde37076c4 100644 --- a/drivers/irqchip/irq-gic.c +++ b/drivers/irqchip/irq-gic.c @@ -881,11 +881,6 @@ static const struct irq_domain_ops gic_irq_domain_ops = { .xlate = gic_irq_domain_xlate, }; -void gic_set_irqchip_flags(unsigned long flags) -{ - gic_chip.flags |= flags; -} - void __init gic_init_bases(unsigned int gic_nr, int irq_start, void __iomem *dist_base, void __iomem *cpu_base, u32 percpu_offset, struct device_node *node) diff --git a/include/linux/irqchip/arm-gic.h b/include/linux/irqchip/arm-gic.h index 9de976b4f9a7..61a2007eb49a 100644 --- a/include/linux/irqchip/arm-gic.h +++ b/include/linux/irqchip/arm-gic.h @@ -95,7 +95,6 @@ struct device_node; -void gic_set_irqchip_flags(unsigned long flags); void gic_init_bases(unsigned int, int, void __iomem *, void __iomem *, u32 offset, struct device_node *); void gic_cascade_irq(unsigned int gic_nr, unsigned int irq); -- cgit v1.2.3-70-g09d2 From be9b22b6a7e6725162c64155a08b71f0654b675c Mon Sep 17 00:00:00 2001 From: Brian Norris Date: Wed, 22 Jul 2015 16:21:39 -0700 Subject: genirq: Add chip_[suspend|resume] PM support to irq_chip Some (admittedly odd) irqchips perform functions that are not directly related to any of their child IRQ lines, and therefore need to perform some tasks during suspend/resume regardless of whether there are any "installed" interrupts for the irqchip. However, the current generic-chip framework does not call the chip's irq_{suspend,resume} when there are no interrupts installed (this makes sense, because there are no irq_data objects for such a call to be made). More specifically, irq-bcm7120-l2 configures both a forwarding mask (which affects other top-level GIC IRQs) and a second-level interrupt mask (for managing its own child interrupts). The former must be saved/restored on suspend/resume, even when there's nothing to do for the latter. This patch adds a new set of suspend/resume hooks to irq_chip_generic, to help represent *chip* suspend/resume, rather than IRQ suspend/resume. These callbacks will always be called for an IRQ chip (regardless of the installed interrupts) and are based on the per-chip irq_chip_generic struct, rather than the per-IRQ irq_data struct. The original problem report is described in extra detail here: http://lkml.kernel.org/g/20150619224123.GL4917@ld-irv-0074 Signed-off-by: Brian Norris Tested-by: Florian Fainelli Cc: Gregory Fong Cc: bcm-kernel-feedback-list@broadcom.com Cc: linux-mips@linux-mips.org Cc: Kevin Cernekee Cc: Jason Cooper Link: http://lkml.kernel.org/r/1437607300-40858-1-git-send-email-computersforpeace@gmail.com Signed-off-by: Thomas Gleixner --- include/linux/irq.h | 14 ++++++++++++-- kernel/irq/generic-chip.c | 6 ++++++ 2 files changed, 18 insertions(+), 2 deletions(-) (limited to 'include/linux') diff --git a/include/linux/irq.h b/include/linux/irq.h index 5284cb166d90..2c8730a108be 100644 --- a/include/linux/irq.h +++ b/include/linux/irq.h @@ -324,8 +324,10 @@ static inline irq_hw_number_t irqd_to_hwirq(struct irq_data *d) * @irq_bus_sync_unlock:function to sync and unlock slow bus (i2c) chips * @irq_cpu_online: configure an interrupt source for a secondary CPU * @irq_cpu_offline: un-configure an interrupt source for a secondary CPU - * @irq_suspend: function called from core code on suspend once per chip - * @irq_resume: function called from core code on resume once per chip + * @irq_suspend: function called from core code on suspend once per + * chip, when one or more interrupts are installed + * @irq_resume: function called from core code on resume once per chip, + * when one ore more interrupts are installed * @irq_pm_shutdown: function called from core code on shutdown once per chip * @irq_calc_mask: Optional function to set irq_data.mask for special cases * @irq_print_chip: optional to print special chip info in show_interrupts @@ -760,6 +762,12 @@ struct irq_chip_type { * @reg_base: Register base address (virtual) * @reg_readl: Alternate I/O accessor (defaults to readl if NULL) * @reg_writel: Alternate I/O accessor (defaults to writel if NULL) + * @suspend: Function called from core code on suspend once per + * chip; can be useful instead of irq_chip::suspend to + * handle chip details even when no interrupts are in use + * @resume: Function called from core code on resume once per chip; + * can be useful instead of irq_chip::suspend to handle + * chip details even when no interrupts are in use * @irq_base: Interrupt base nr for this chip * @irq_cnt: Number of interrupts handled by this chip * @mask_cache: Cached mask register shared between all chip types @@ -786,6 +794,8 @@ struct irq_chip_generic { void __iomem *reg_base; u32 (*reg_readl)(void __iomem *addr); void (*reg_writel)(u32 val, void __iomem *addr); + void (*suspend)(struct irq_chip_generic *gc); + void (*resume)(struct irq_chip_generic *gc); unsigned int irq_base; unsigned int irq_cnt; u32 mask_cache; diff --git a/kernel/irq/generic-chip.c b/kernel/irq/generic-chip.c index 15b370daf234..abd286afbd27 100644 --- a/kernel/irq/generic-chip.c +++ b/kernel/irq/generic-chip.c @@ -553,6 +553,9 @@ static int irq_gc_suspend(void) if (data) ct->chip.irq_suspend(data); } + + if (gc->suspend) + gc->suspend(gc); } return 0; } @@ -564,6 +567,9 @@ static void irq_gc_resume(void) list_for_each_entry(gc, &gc_list, list) { struct irq_chip_type *ct = gc->chip_types; + if (gc->resume) + gc->resume(gc); + if (ct->chip.irq_resume) { struct irq_data *data = irq_gc_get_irq_data(gc); -- cgit v1.2.3-70-g09d2 From 2be6967cdbc95a9960b620defedbf5e02e2af619 Mon Sep 17 00:00:00 2001 From: Saeed Mahameed Date: Thu, 23 Jul 2015 23:35:56 +0300 Subject: net/mlx5e: Support ETH_RSS_HASH_XOR The ConnectX-4 HW implements inverted XOR8. To make it act as XOR we re-order the HW RSS indirection table. Set XOR to be the default RSS hash function and add ethtool API to control it. Signed-off-by: Saeed Mahameed Signed-off-by: Amir Vadai Signed-off-by: David S. Miller --- drivers/net/ethernet/mellanox/mlx5/core/en.h | 1 + .../net/ethernet/mellanox/mlx5/core/en_ethtool.c | 39 ++++++++++++++++++ drivers/net/ethernet/mellanox/mlx5/core/en_main.c | 46 +++++++++++++++++----- include/linux/mlx5/mlx5_ifc.h | 6 +-- 4 files changed, 79 insertions(+), 13 deletions(-) (limited to 'include/linux') diff --git a/drivers/net/ethernet/mellanox/mlx5/core/en.h b/drivers/net/ethernet/mellanox/mlx5/core/en.h index 3d23bd657e3c..61d8433392aa 100644 --- a/drivers/net/ethernet/mellanox/mlx5/core/en.h +++ b/drivers/net/ethernet/mellanox/mlx5/core/en.h @@ -195,6 +195,7 @@ struct mlx5e_params { u16 rx_hash_log_tbl_sz; bool lro_en; u32 lro_wqe_sz; + u8 rss_hfunc; }; enum { diff --git a/drivers/net/ethernet/mellanox/mlx5/core/en_ethtool.c b/drivers/net/ethernet/mellanox/mlx5/core/en_ethtool.c index 388938482ff9..cb2853570504 100644 --- a/drivers/net/ethernet/mellanox/mlx5/core/en_ethtool.c +++ b/drivers/net/ethernet/mellanox/mlx5/core/en_ethtool.c @@ -662,6 +662,43 @@ out: return err; } +static int mlx5e_get_rxfh(struct net_device *netdev, u32 *indir, u8 *key, + u8 *hfunc) +{ + struct mlx5e_priv *priv = netdev_priv(netdev); + + if (hfunc) + *hfunc = priv->params.rss_hfunc; + + return 0; +} + +static int mlx5e_set_rxfh(struct net_device *netdev, const u32 *indir, + const u8 *key, const u8 hfunc) +{ + struct mlx5e_priv *priv = netdev_priv(netdev); + int err = 0; + + if (hfunc == ETH_RSS_HASH_NO_CHANGE) + return 0; + + if ((hfunc != ETH_RSS_HASH_XOR) && + (hfunc != ETH_RSS_HASH_TOP)) + return -EINVAL; + + mutex_lock(&priv->state_lock); + + priv->params.rss_hfunc = hfunc; + if (test_bit(MLX5E_STATE_OPENED, &priv->state)) { + mlx5e_close_locked(priv->netdev); + err = mlx5e_open_locked(priv->netdev); + } + + mutex_unlock(&priv->state_lock); + + return err; +} + const struct ethtool_ops mlx5e_ethtool_ops = { .get_drvinfo = mlx5e_get_drvinfo, .get_link = ethtool_op_get_link, @@ -676,4 +713,6 @@ const struct ethtool_ops mlx5e_ethtool_ops = { .set_coalesce = mlx5e_set_coalesce, .get_settings = mlx5e_get_settings, .set_settings = mlx5e_set_settings, + .get_rxfh = mlx5e_get_rxfh, + .set_rxfh = mlx5e_set_rxfh, }; diff --git a/drivers/net/ethernet/mellanox/mlx5/core/en_main.c b/drivers/net/ethernet/mellanox/mlx5/core/en_main.c index 40206da1f9d7..07d36275021e 100644 --- a/drivers/net/ethernet/mellanox/mlx5/core/en_main.c +++ b/drivers/net/ethernet/mellanox/mlx5/core/en_main.c @@ -1158,6 +1158,24 @@ static void mlx5e_close_tises(struct mlx5e_priv *priv) mlx5e_close_tis(priv, tc); } +static int mlx5e_rx_hash_fn(int hfunc) +{ + return (hfunc == ETH_RSS_HASH_TOP) ? + MLX5_RX_HASH_FN_TOEPLITZ : + MLX5_RX_HASH_FN_INVERTED_XOR8; +} + +static int mlx5e_bits_invert(unsigned long a, int size) +{ + int inv = 0; + int i; + + for (i = 0; i < size; i++) + inv |= (test_bit(size - i - 1, &a) ? 1 : 0) << i; + + return inv; +} + static int mlx5e_open_rqt(struct mlx5e_priv *priv) { struct mlx5_core_dev *mdev = priv->mdev; @@ -1166,11 +1184,10 @@ static int mlx5e_open_rqt(struct mlx5e_priv *priv) void *rqtc; int inlen; int err; - int sz; + int log_tbl_sz = priv->params.rx_hash_log_tbl_sz; + int sz = 1 << log_tbl_sz; int i; - sz = 1 << priv->params.rx_hash_log_tbl_sz; - inlen = MLX5_ST_SZ_BYTES(create_rqt_in) + sizeof(u32) * sz; in = mlx5_vzalloc(inlen); if (!in) @@ -1182,8 +1199,12 @@ static int mlx5e_open_rqt(struct mlx5e_priv *priv) MLX5_SET(rqtc, rqtc, rqt_max_size, sz); for (i = 0; i < sz; i++) { - int ix = i % priv->params.num_channels; + int ix = i; + + if (priv->params.rss_hfunc == ETH_RSS_HASH_XOR) + ix = mlx5e_bits_invert(i, log_tbl_sz); + ix = ix % priv->params.num_channels; MLX5_SET(rqtc, rqtc, rq_num[i], priv->channel[ix]->rq.rqn); } @@ -1254,12 +1275,16 @@ static void mlx5e_build_tir_ctx(struct mlx5e_priv *priv, u32 *tirc, int tt) MLX5_SET(tirc, tirc, indirect_table, priv->rqtn); MLX5_SET(tirc, tirc, rx_hash_fn, - MLX5_TIRC_RX_HASH_FN_HASH_TOEPLITZ); - MLX5_SET(tirc, tirc, rx_hash_symmetric, 1); - netdev_rss_key_fill(MLX5_ADDR_OF(tirc, tirc, - rx_hash_toeplitz_key), - MLX5_FLD_SZ_BYTES(tirc, - rx_hash_toeplitz_key)); + mlx5e_rx_hash_fn(priv->params.rss_hfunc)); + if (priv->params.rss_hfunc == ETH_RSS_HASH_TOP) { + void *rss_key = MLX5_ADDR_OF(tirc, tirc, + rx_hash_toeplitz_key); + size_t len = MLX5_FLD_SZ_BYTES(tirc, + rx_hash_toeplitz_key); + + MLX5_SET(tirc, tirc, rx_hash_symmetric, 1); + netdev_rss_key_fill(rss_key, len); + } break; } @@ -1700,6 +1725,7 @@ static void mlx5e_build_netdev_priv(struct mlx5_core_dev *mdev, MLX5E_PARAMS_DEFAULT_RX_HASH_LOG_TBL_SZ; priv->params.num_tc = 1; priv->params.default_vlan_prio = 0; + priv->params.rss_hfunc = ETH_RSS_HASH_XOR; priv->params.lro_en = false && !!MLX5_CAP_ETH(priv->mdev, lro_cap); priv->params.lro_wqe_sz = diff --git a/include/linux/mlx5/mlx5_ifc.h b/include/linux/mlx5/mlx5_ifc.h index 6d2f6fee041c..c60a62bba652 100644 --- a/include/linux/mlx5/mlx5_ifc.h +++ b/include/linux/mlx5/mlx5_ifc.h @@ -1936,9 +1936,9 @@ enum { }; enum { - MLX5_TIRC_RX_HASH_FN_HASH_NONE = 0x0, - MLX5_TIRC_RX_HASH_FN_HASH_INVERTED_XOR8 = 0x1, - MLX5_TIRC_RX_HASH_FN_HASH_TOEPLITZ = 0x2, + MLX5_RX_HASH_FN_NONE = 0x0, + MLX5_RX_HASH_FN_INVERTED_XOR8 = 0x1, + MLX5_RX_HASH_FN_TOEPLITZ = 0x2, }; enum { -- cgit v1.2.3-70-g09d2 From 311c7c71c9bb8786c96fee353fe9886c08b017fe Mon Sep 17 00:00:00 2001 From: Saeed Mahameed Date: Thu, 23 Jul 2015 23:35:57 +0300 Subject: net/mlx5e: Allocate DMA coherent memory on reader NUMA node By affinity hints and XPS, each mlx5e channel is assigned a CPU core. Channel DMA coherent memory that is written by the NIC and read by SW (e.g CQ buffer) is allocated on the NUMA node of the CPU core assigned for the channel. Channel DMA coherent memory that is written by SW and read by the NIC (e.g SQ/RQ buffer) is allocated on the NUMA node of the NIC. Doorbell record (written by SW and read by the NIC) is an exception since it is accessed by SW more frequently. Signed-off-by: Saeed Mahameed Signed-off-by: Amir Vadai Signed-off-by: David S. Miller --- drivers/net/ethernet/mellanox/mlx5/core/alloc.c | 48 +++++++++++++++++++---- drivers/net/ethernet/mellanox/mlx5/core/en_main.c | 11 ++++-- drivers/net/ethernet/mellanox/mlx5/core/main.c | 6 ++- drivers/net/ethernet/mellanox/mlx5/core/wq.c | 12 +++--- drivers/net/ethernet/mellanox/mlx5/core/wq.h | 3 +- include/linux/mlx5/driver.h | 8 ++++ 6 files changed, 70 insertions(+), 18 deletions(-) (limited to 'include/linux') diff --git a/drivers/net/ethernet/mellanox/mlx5/core/alloc.c b/drivers/net/ethernet/mellanox/mlx5/core/alloc.c index 0715b497511f..6cb38304669f 100644 --- a/drivers/net/ethernet/mellanox/mlx5/core/alloc.c +++ b/drivers/net/ethernet/mellanox/mlx5/core/alloc.c @@ -45,15 +45,34 @@ * register it in a memory region at HCA virtual address 0. */ -int mlx5_buf_alloc(struct mlx5_core_dev *dev, int size, struct mlx5_buf *buf) +static void *mlx5_dma_zalloc_coherent_node(struct mlx5_core_dev *dev, + size_t size, dma_addr_t *dma_handle, + int node) +{ + struct mlx5_priv *priv = &dev->priv; + int original_node; + void *cpu_handle; + + mutex_lock(&priv->alloc_mutex); + original_node = dev_to_node(&dev->pdev->dev); + set_dev_node(&dev->pdev->dev, node); + cpu_handle = dma_zalloc_coherent(&dev->pdev->dev, size, + dma_handle, GFP_KERNEL); + set_dev_node(&dev->pdev->dev, original_node); + mutex_unlock(&priv->alloc_mutex); + return cpu_handle; +} + +int mlx5_buf_alloc_node(struct mlx5_core_dev *dev, int size, + struct mlx5_buf *buf, int node) { dma_addr_t t; buf->size = size; buf->npages = 1; buf->page_shift = (u8)get_order(size) + PAGE_SHIFT; - buf->direct.buf = dma_zalloc_coherent(&dev->pdev->dev, - size, &t, GFP_KERNEL); + buf->direct.buf = mlx5_dma_zalloc_coherent_node(dev, size, + &t, node); if (!buf->direct.buf) return -ENOMEM; @@ -66,6 +85,11 @@ int mlx5_buf_alloc(struct mlx5_core_dev *dev, int size, struct mlx5_buf *buf) return 0; } + +int mlx5_buf_alloc(struct mlx5_core_dev *dev, int size, struct mlx5_buf *buf) +{ + return mlx5_buf_alloc_node(dev, size, buf, dev->priv.numa_node); +} EXPORT_SYMBOL_GPL(mlx5_buf_alloc); void mlx5_buf_free(struct mlx5_core_dev *dev, struct mlx5_buf *buf) @@ -75,7 +99,8 @@ void mlx5_buf_free(struct mlx5_core_dev *dev, struct mlx5_buf *buf) } EXPORT_SYMBOL_GPL(mlx5_buf_free); -static struct mlx5_db_pgdir *mlx5_alloc_db_pgdir(struct device *dma_device) +static struct mlx5_db_pgdir *mlx5_alloc_db_pgdir(struct mlx5_core_dev *dev, + int node) { struct mlx5_db_pgdir *pgdir; @@ -84,8 +109,9 @@ static struct mlx5_db_pgdir *mlx5_alloc_db_pgdir(struct device *dma_device) return NULL; bitmap_fill(pgdir->bitmap, MLX5_DB_PER_PAGE); - pgdir->db_page = dma_alloc_coherent(dma_device, PAGE_SIZE, - &pgdir->db_dma, GFP_KERNEL); + + pgdir->db_page = mlx5_dma_zalloc_coherent_node(dev, PAGE_SIZE, + &pgdir->db_dma, node); if (!pgdir->db_page) { kfree(pgdir); return NULL; @@ -118,7 +144,7 @@ static int mlx5_alloc_db_from_pgdir(struct mlx5_db_pgdir *pgdir, return 0; } -int mlx5_db_alloc(struct mlx5_core_dev *dev, struct mlx5_db *db) +int mlx5_db_alloc_node(struct mlx5_core_dev *dev, struct mlx5_db *db, int node) { struct mlx5_db_pgdir *pgdir; int ret = 0; @@ -129,7 +155,7 @@ int mlx5_db_alloc(struct mlx5_core_dev *dev, struct mlx5_db *db) if (!mlx5_alloc_db_from_pgdir(pgdir, db)) goto out; - pgdir = mlx5_alloc_db_pgdir(&(dev->pdev->dev)); + pgdir = mlx5_alloc_db_pgdir(dev, node); if (!pgdir) { ret = -ENOMEM; goto out; @@ -145,6 +171,12 @@ out: return ret; } +EXPORT_SYMBOL_GPL(mlx5_db_alloc_node); + +int mlx5_db_alloc(struct mlx5_core_dev *dev, struct mlx5_db *db) +{ + return mlx5_db_alloc_node(dev, db, dev->priv.numa_node); +} EXPORT_SYMBOL_GPL(mlx5_db_alloc); void mlx5_db_free(struct mlx5_core_dev *dev, struct mlx5_db *db) diff --git a/drivers/net/ethernet/mellanox/mlx5/core/en_main.c b/drivers/net/ethernet/mellanox/mlx5/core/en_main.c index 07d36275021e..57cc8960b73b 100644 --- a/drivers/net/ethernet/mellanox/mlx5/core/en_main.c +++ b/drivers/net/ethernet/mellanox/mlx5/core/en_main.c @@ -272,6 +272,8 @@ static int mlx5e_create_rq(struct mlx5e_channel *c, int err; int i; + param->wq.db_numa_node = cpu_to_node(c->cpu); + err = mlx5_wq_ll_create(mdev, ¶m->wq, rqc_wq, &rq->wq, &rq->wq_ctrl); if (err) @@ -502,6 +504,8 @@ static int mlx5e_create_sq(struct mlx5e_channel *c, if (err) return err; + param->wq.db_numa_node = cpu_to_node(c->cpu); + err = mlx5_wq_cyc_create(mdev, ¶m->wq, sqc_wq, &sq->wq, &sq->wq_ctrl); if (err) @@ -702,7 +706,8 @@ static int mlx5e_create_cq(struct mlx5e_channel *c, int err; u32 i; - param->wq.numa = cpu_to_node(c->cpu); + param->wq.buf_numa_node = cpu_to_node(c->cpu); + param->wq.db_numa_node = cpu_to_node(c->cpu); param->eq_ix = c->ix; err = mlx5_cqwq_create(mdev, ¶m->wq, param->cqc, &cq->wq, @@ -1000,7 +1005,7 @@ static void mlx5e_build_rq_param(struct mlx5e_priv *priv, MLX5_SET(wq, wq, log_wq_sz, priv->params.log_rq_size); MLX5_SET(wq, wq, pd, priv->pdn); - param->wq.numa = dev_to_node(&priv->mdev->pdev->dev); + param->wq.buf_numa_node = dev_to_node(&priv->mdev->pdev->dev); param->wq.linear = 1; } @@ -1014,7 +1019,7 @@ static void mlx5e_build_sq_param(struct mlx5e_priv *priv, MLX5_SET(wq, wq, log_wq_stride, ilog2(MLX5_SEND_WQE_BB)); MLX5_SET(wq, wq, pd, priv->pdn); - param->wq.numa = dev_to_node(&priv->mdev->pdev->dev); + param->wq.buf_numa_node = dev_to_node(&priv->mdev->pdev->dev); } static void mlx5e_build_common_cq_param(struct mlx5e_priv *priv, diff --git a/drivers/net/ethernet/mellanox/mlx5/core/main.c b/drivers/net/ethernet/mellanox/mlx5/core/main.c index afad529838de..c34eafbf1c04 100644 --- a/drivers/net/ethernet/mellanox/mlx5/core/main.c +++ b/drivers/net/ethernet/mellanox/mlx5/core/main.c @@ -455,7 +455,7 @@ static int mlx5_irq_set_affinity_hint(struct mlx5_core_dev *mdev, int i) struct mlx5_priv *priv = &mdev->priv; struct msix_entry *msix = priv->msix_arr; int irq = msix[i + MLX5_EQ_VEC_COMP_BASE].vector; - int numa_node = dev_to_node(&mdev->pdev->dev); + int numa_node = priv->numa_node; int err; if (!zalloc_cpumask_var(&priv->irq_info[i].mask, GFP_KERNEL)) { @@ -668,6 +668,10 @@ static int mlx5_dev_init(struct mlx5_core_dev *dev, struct pci_dev *pdev) INIT_LIST_HEAD(&priv->pgdir_list); spin_lock_init(&priv->mkey_lock); + mutex_init(&priv->alloc_mutex); + + priv->numa_node = dev_to_node(&dev->pdev->dev); + priv->dbg_root = debugfs_create_dir(dev_name(&pdev->dev), mlx5_debugfs_root); if (!priv->dbg_root) return -ENOMEM; diff --git a/drivers/net/ethernet/mellanox/mlx5/core/wq.c b/drivers/net/ethernet/mellanox/mlx5/core/wq.c index 8388411582cf..ce21ee5b2357 100644 --- a/drivers/net/ethernet/mellanox/mlx5/core/wq.c +++ b/drivers/net/ethernet/mellanox/mlx5/core/wq.c @@ -73,13 +73,14 @@ int mlx5_wq_cyc_create(struct mlx5_core_dev *mdev, struct mlx5_wq_param *param, wq->log_stride = MLX5_GET(wq, wqc, log_wq_stride); wq->sz_m1 = (1 << MLX5_GET(wq, wqc, log_wq_sz)) - 1; - err = mlx5_db_alloc(mdev, &wq_ctrl->db); + err = mlx5_db_alloc_node(mdev, &wq_ctrl->db, param->db_numa_node); if (err) { mlx5_core_warn(mdev, "mlx5_db_alloc() failed, %d\n", err); return err; } - err = mlx5_buf_alloc(mdev, mlx5_wq_cyc_get_byte_size(wq), &wq_ctrl->buf); + err = mlx5_buf_alloc_node(mdev, mlx5_wq_cyc_get_byte_size(wq), + &wq_ctrl->buf, param->buf_numa_node); if (err) { mlx5_core_warn(mdev, "mlx5_buf_alloc() failed, %d\n", err); goto err_db_free; @@ -108,13 +109,14 @@ int mlx5_cqwq_create(struct mlx5_core_dev *mdev, struct mlx5_wq_param *param, wq->log_sz = MLX5_GET(cqc, cqc, log_cq_size); wq->sz_m1 = (1 << wq->log_sz) - 1; - err = mlx5_db_alloc(mdev, &wq_ctrl->db); + err = mlx5_db_alloc_node(mdev, &wq_ctrl->db, param->db_numa_node); if (err) { mlx5_core_warn(mdev, "mlx5_db_alloc() failed, %d\n", err); return err; } - err = mlx5_buf_alloc(mdev, mlx5_cqwq_get_byte_size(wq), &wq_ctrl->buf); + err = mlx5_buf_alloc_node(mdev, mlx5_cqwq_get_byte_size(wq), + &wq_ctrl->buf, param->buf_numa_node); if (err) { mlx5_core_warn(mdev, "mlx5_buf_alloc() failed, %d\n", err); goto err_db_free; @@ -144,7 +146,7 @@ int mlx5_wq_ll_create(struct mlx5_core_dev *mdev, struct mlx5_wq_param *param, wq->log_stride = MLX5_GET(wq, wqc, log_wq_stride); wq->sz_m1 = (1 << MLX5_GET(wq, wqc, log_wq_sz)) - 1; - err = mlx5_db_alloc(mdev, &wq_ctrl->db); + err = mlx5_db_alloc_node(mdev, &wq_ctrl->db, param->db_numa_node); if (err) { mlx5_core_warn(mdev, "mlx5_db_alloc() failed, %d\n", err); return err; diff --git a/drivers/net/ethernet/mellanox/mlx5/core/wq.h b/drivers/net/ethernet/mellanox/mlx5/core/wq.h index e0ddd69fb429..6c2a8f95093c 100644 --- a/drivers/net/ethernet/mellanox/mlx5/core/wq.h +++ b/drivers/net/ethernet/mellanox/mlx5/core/wq.h @@ -37,7 +37,8 @@ struct mlx5_wq_param { int linear; - int numa; + int buf_numa_node; + int db_numa_node; }; struct mlx5_wq_ctrl { diff --git a/include/linux/mlx5/driver.h b/include/linux/mlx5/driver.h index 5722d88c2429..1c0d5d062d7c 100644 --- a/include/linux/mlx5/driver.h +++ b/include/linux/mlx5/driver.h @@ -463,6 +463,10 @@ struct mlx5_priv { /* end: mr staff */ /* start: alloc staff */ + /* protect buffer alocation according to numa node */ + struct mutex alloc_mutex; + int numa_node; + struct mutex pgdir_mutex; struct list_head pgdir_list; /* end: alloc staff */ @@ -672,6 +676,8 @@ void mlx5_health_cleanup(void); void __init mlx5_health_init(void); void mlx5_start_health_poll(struct mlx5_core_dev *dev); void mlx5_stop_health_poll(struct mlx5_core_dev *dev); +int mlx5_buf_alloc_node(struct mlx5_core_dev *dev, int size, + struct mlx5_buf *buf, int node); int mlx5_buf_alloc(struct mlx5_core_dev *dev, int size, struct mlx5_buf *buf); void mlx5_buf_free(struct mlx5_core_dev *dev, struct mlx5_buf *buf); struct mlx5_cmd_mailbox *mlx5_alloc_cmd_mailbox_chain(struct mlx5_core_dev *dev, @@ -773,6 +779,8 @@ void mlx5_eq_debugfs_cleanup(struct mlx5_core_dev *dev); int mlx5_cq_debugfs_init(struct mlx5_core_dev *dev); void mlx5_cq_debugfs_cleanup(struct mlx5_core_dev *dev); int mlx5_db_alloc(struct mlx5_core_dev *dev, struct mlx5_db *db); +int mlx5_db_alloc_node(struct mlx5_core_dev *dev, struct mlx5_db *db, + int node); void mlx5_db_free(struct mlx5_core_dev *dev, struct mlx5_db *db); const char *mlx5_command_str(int command); -- cgit v1.2.3-70-g09d2 From 88a85f99e51fb2373259ab83c8bb130a9bbf3804 Mon Sep 17 00:00:00 2001 From: Achiad Shochat Date: Thu, 23 Jul 2015 23:35:59 +0300 Subject: net/mlx5e: TX latency optimization to save DMA reads A regular TX WQE execution involves two or more DMA reads - one to fetch the WQE, and another one per WQE gather entry. These DMA reads obviously increase the TX latency. There are two mlx5 mechanisms to bypass these DMA reads: 1) Inline WQE 2) Blue Flame (BF) An inline WQE contains a whole packet, thus saves the DMA read/s of the regular WQE gather entry/s. Inline WQE support was already added in the previous commit. A BF WQE is written directly to the device I/O mapped memory, thus enables saving the DMA read that fetches the WQE. The BF WQE I/O write must be in cache line granularity, thus uses the CPU write combining mechanism. A BF WQE I/O write acts also as a TX doorbell for notifying the device of new TX WQEs. A BF WQE is written to the same I/O mapped address as the regular TX doorbell, thus this address is being mapped twice - once by ioremap() and once by io_mapping_map_wc(). While both mechanisms reduce the TX latency, they both consume more CPU cycles than a regular WQE: - A BF WQE must still be written to host memory, in addition to being written directly to the device I/O mapped memory. - An inline WQE involves copying the SKB data into it. To handle this tradeoff, we introduce here a heuristic algorithm that strives to avoid using these two mechanisms in case the TX queue is being back-pressured by the device, and limit their usage rate otherwise. An inline WQE will always be "Blue Flamed" (written directly to the device I/O mapped memory) while a BF WQE may not be inlined (may contain gather entries). Preliminary testing using netperf UDP_RR shows that the latency goes down from 17.5us to 16.9us, while the message rate (tested with pktgen) stays the same. Signed-off-by: Achiad Shochat Signed-off-by: Amir Vadai Signed-off-by: David S. Miller --- drivers/net/ethernet/mellanox/mlx5/core/en.h | 24 +++++++++++++++------ drivers/net/ethernet/mellanox/mlx5/core/en_main.c | 12 ++++++----- drivers/net/ethernet/mellanox/mlx5/core/en_tx.c | 26 ++++++++++++++++++----- drivers/net/ethernet/mellanox/mlx5/core/main.c | 26 +++++++++++++++++++++-- drivers/net/ethernet/mellanox/mlx5/core/uar.c | 6 ++++++ include/linux/mlx5/driver.h | 4 +++- 6 files changed, 79 insertions(+), 19 deletions(-) (limited to 'include/linux') diff --git a/drivers/net/ethernet/mellanox/mlx5/core/en.h b/drivers/net/ethernet/mellanox/mlx5/core/en.h index d9dc506188c8..b66edd2c5a61 100644 --- a/drivers/net/ethernet/mellanox/mlx5/core/en.h +++ b/drivers/net/ethernet/mellanox/mlx5/core/en.h @@ -60,6 +60,7 @@ #define MLX5E_TX_CQ_POLL_BUDGET 128 #define MLX5E_UPDATE_STATS_INTERVAL 200 /* msecs */ +#define MLX5E_SQ_BF_BUDGET 16 static const char vport_strings[][ETH_GSTRING_LEN] = { /* vport statistics */ @@ -268,7 +269,9 @@ struct mlx5e_sq { /* dirtied @xmit */ u16 pc ____cacheline_aligned_in_smp; u32 dma_fifo_pc; - u32 bf_offset; + u16 bf_offset; + u16 prev_cc; + u8 bf_budget; struct mlx5e_sq_stats stats; struct mlx5e_cq cq; @@ -281,9 +284,10 @@ struct mlx5e_sq { struct mlx5_wq_cyc wq; u32 dma_fifo_mask; void __iomem *uar_map; + void __iomem *uar_bf_map; struct netdev_queue *txq; u32 sqn; - u32 bf_buf_size; + u16 bf_buf_size; u16 max_inline; u16 edge; struct device *pdev; @@ -493,8 +497,10 @@ int mlx5e_update_priv_params(struct mlx5e_priv *priv, struct mlx5e_params *new_params); static inline void mlx5e_tx_notify_hw(struct mlx5e_sq *sq, - struct mlx5e_tx_wqe *wqe) + struct mlx5e_tx_wqe *wqe, int bf_sz) { + u16 ofst = MLX5_BF_OFFSET + sq->bf_offset; + /* ensure wqe is visible to device before updating doorbell record */ dma_wmb(); @@ -505,9 +511,15 @@ static inline void mlx5e_tx_notify_hw(struct mlx5e_sq *sq, */ wmb(); - mlx5_write64((__be32 *)&wqe->ctrl, - sq->uar_map + MLX5_BF_OFFSET + sq->bf_offset, - NULL); + if (bf_sz) { + __iowrite64_copy(sq->uar_bf_map + ofst, &wqe->ctrl, bf_sz); + + /* flush the write-combining mapped buffer */ + wmb(); + + } else { + mlx5_write64((__be32 *)&wqe->ctrl, sq->uar_map + ofst, NULL); + } sq->bf_offset ^= sq->bf_buf_size; } diff --git a/drivers/net/ethernet/mellanox/mlx5/core/en_main.c b/drivers/net/ethernet/mellanox/mlx5/core/en_main.c index c55fad431cbf..4a87e9dcf52c 100644 --- a/drivers/net/ethernet/mellanox/mlx5/core/en_main.c +++ b/drivers/net/ethernet/mellanox/mlx5/core/en_main.c @@ -514,6 +514,7 @@ static int mlx5e_create_sq(struct mlx5e_channel *c, sq->wq.db = &sq->wq.db[MLX5_SND_DBR]; sq->uar_map = sq->uar.map; + sq->uar_bf_map = sq->uar.bf_map; sq->bf_buf_size = (1 << MLX5_CAP_GEN(mdev, log_bf_reg_size)) / 2; sq->max_inline = param->max_inline; @@ -524,11 +525,12 @@ static int mlx5e_create_sq(struct mlx5e_channel *c, txq_ix = c->ix + tc * priv->params.num_channels; sq->txq = netdev_get_tx_queue(priv->netdev, txq_ix); - sq->pdev = c->pdev; - sq->mkey_be = c->mkey_be; - sq->channel = c; - sq->tc = tc; - sq->edge = (sq->wq.sz_m1 + 1) - MLX5_SEND_WQE_MAX_WQEBBS; + sq->pdev = c->pdev; + sq->mkey_be = c->mkey_be; + sq->channel = c; + sq->tc = tc; + sq->edge = (sq->wq.sz_m1 + 1) - MLX5_SEND_WQE_MAX_WQEBBS; + sq->bf_budget = MLX5E_SQ_BF_BUDGET; priv->txq_to_sq_map[txq_ix] = sq; return 0; diff --git a/drivers/net/ethernet/mellanox/mlx5/core/en_tx.c b/drivers/net/ethernet/mellanox/mlx5/core/en_tx.c index 351ac6982e22..64380bc0cd6a 100644 --- a/drivers/net/ethernet/mellanox/mlx5/core/en_tx.c +++ b/drivers/net/ethernet/mellanox/mlx5/core/en_tx.c @@ -57,7 +57,7 @@ void mlx5e_send_nop(struct mlx5e_sq *sq, bool notify_hw) if (notify_hw) { cseg->fm_ce_se = MLX5_WQE_CTRL_CQ_UPDATE; - mlx5e_tx_notify_hw(sq, wqe); + mlx5e_tx_notify_hw(sq, wqe, 0); } } @@ -110,7 +110,7 @@ u16 mlx5e_select_queue(struct net_device *dev, struct sk_buff *skb, } static inline u16 mlx5e_get_inline_hdr_size(struct mlx5e_sq *sq, - struct sk_buff *skb) + struct sk_buff *skb, bool bf) { /* Some NIC TX decisions, e.g loopback, are based on the packet * headers and occur before the data gather. @@ -118,7 +118,7 @@ static inline u16 mlx5e_get_inline_hdr_size(struct mlx5e_sq *sq, */ #define MLX5E_MIN_INLINE (ETH_HLEN + 2/*vlan tag*/) - if (skb_headlen(skb) <= sq->max_inline) + if (bf && (skb_headlen(skb) <= sq->max_inline)) return skb_headlen(skb); return MLX5E_MIN_INLINE; @@ -137,6 +137,7 @@ static netdev_tx_t mlx5e_sq_xmit(struct mlx5e_sq *sq, struct sk_buff *skb) u8 opcode = MLX5_OPCODE_SEND; dma_addr_t dma_addr = 0; + bool bf = false; u16 headlen; u16 ds_cnt; u16 ihs; @@ -149,6 +150,11 @@ static netdev_tx_t mlx5e_sq_xmit(struct mlx5e_sq *sq, struct sk_buff *skb) else sq->stats.csum_offload_none++; + if (sq->cc != sq->prev_cc) { + sq->prev_cc = sq->cc; + sq->bf_budget = (sq->cc == sq->pc) ? MLX5E_SQ_BF_BUDGET : 0; + } + if (skb_is_gso(skb)) { u32 payload_len; @@ -161,7 +167,10 @@ static netdev_tx_t mlx5e_sq_xmit(struct mlx5e_sq *sq, struct sk_buff *skb) sq->stats.tso_packets++; sq->stats.tso_bytes += payload_len; } else { - ihs = mlx5e_get_inline_hdr_size(sq, skb); + bf = sq->bf_budget && + !skb->xmit_more && + !skb_shinfo(skb)->nr_frags; + ihs = mlx5e_get_inline_hdr_size(sq, skb, bf); MLX5E_TX_SKB_CB(skb)->num_bytes = max_t(unsigned int, skb->len, ETH_ZLEN); } @@ -233,14 +242,21 @@ static netdev_tx_t mlx5e_sq_xmit(struct mlx5e_sq *sq, struct sk_buff *skb) } if (!skb->xmit_more || netif_xmit_stopped(sq->txq)) { + int bf_sz = 0; + + if (bf && sq->uar_bf_map) + bf_sz = MLX5E_TX_SKB_CB(skb)->num_wqebbs << 3; + cseg->fm_ce_se = MLX5_WQE_CTRL_CQ_UPDATE; - mlx5e_tx_notify_hw(sq, wqe); + mlx5e_tx_notify_hw(sq, wqe, bf_sz); } /* fill sq edge with nops to avoid wqe wrap around */ while ((sq->pc & wq->sz_m1) > sq->edge) mlx5e_send_nop(sq, false); + sq->bf_budget = bf ? sq->bf_budget - 1 : 0; + sq->stats.packets++; return NETDEV_TX_OK; diff --git a/drivers/net/ethernet/mellanox/mlx5/core/main.c b/drivers/net/ethernet/mellanox/mlx5/core/main.c index c34eafbf1c04..603a8b0908ee 100644 --- a/drivers/net/ethernet/mellanox/mlx5/core/main.c +++ b/drivers/net/ethernet/mellanox/mlx5/core/main.c @@ -654,6 +654,22 @@ static int mlx5_core_set_issi(struct mlx5_core_dev *dev) } #endif +static int map_bf_area(struct mlx5_core_dev *dev) +{ + resource_size_t bf_start = pci_resource_start(dev->pdev, 0); + resource_size_t bf_len = pci_resource_len(dev->pdev, 0); + + dev->priv.bf_mapping = io_mapping_create_wc(bf_start, bf_len); + + return dev->priv.bf_mapping ? 0 : -ENOMEM; +} + +static void unmap_bf_area(struct mlx5_core_dev *dev) +{ + if (dev->priv.bf_mapping) + io_mapping_free(dev->priv.bf_mapping); +} + static int mlx5_dev_init(struct mlx5_core_dev *dev, struct pci_dev *pdev) { struct mlx5_priv *priv = &dev->priv; @@ -808,10 +824,13 @@ static int mlx5_dev_init(struct mlx5_core_dev *dev, struct pci_dev *pdev) goto err_stop_eqs; } + if (map_bf_area(dev)) + dev_err(&pdev->dev, "Failed to map blue flame area\n"); + err = mlx5_irq_set_affinity_hints(dev); if (err) { dev_err(&pdev->dev, "Failed to alloc affinity hint cpumask\n"); - goto err_free_comp_eqs; + goto err_unmap_bf_area; } MLX5_INIT_DOORBELL_LOCK(&priv->cq_uar_lock); @@ -823,7 +842,9 @@ static int mlx5_dev_init(struct mlx5_core_dev *dev, struct pci_dev *pdev) return 0; -err_free_comp_eqs: +err_unmap_bf_area: + unmap_bf_area(dev); + free_comp_eqs(dev); err_stop_eqs: @@ -881,6 +902,7 @@ static void mlx5_dev_cleanup(struct mlx5_core_dev *dev) mlx5_cleanup_qp_table(dev); mlx5_cleanup_cq_table(dev); mlx5_irq_clear_affinity_hints(dev); + unmap_bf_area(dev); free_comp_eqs(dev); mlx5_stop_eqs(dev); mlx5_free_uuars(dev, &priv->uuari); diff --git a/drivers/net/ethernet/mellanox/mlx5/core/uar.c b/drivers/net/ethernet/mellanox/mlx5/core/uar.c index 9ef85873ceea..eb05c845ece9 100644 --- a/drivers/net/ethernet/mellanox/mlx5/core/uar.c +++ b/drivers/net/ethernet/mellanox/mlx5/core/uar.c @@ -32,6 +32,7 @@ #include #include +#include #include #include #include "mlx5_core.h" @@ -246,6 +247,10 @@ int mlx5_alloc_map_uar(struct mlx5_core_dev *mdev, struct mlx5_uar *uar) goto err_free_uar; } + if (mdev->priv.bf_mapping) + uar->bf_map = io_mapping_map_wc(mdev->priv.bf_mapping, + uar->index << PAGE_SHIFT); + return 0; err_free_uar: @@ -257,6 +262,7 @@ EXPORT_SYMBOL(mlx5_alloc_map_uar); void mlx5_unmap_free_uar(struct mlx5_core_dev *mdev, struct mlx5_uar *uar) { + io_mapping_unmap(uar->bf_map); iounmap(uar->map); mlx5_cmd_free_uar(mdev, uar->index); } diff --git a/include/linux/mlx5/driver.h b/include/linux/mlx5/driver.h index 1c0d5d062d7c..5fe0cae1a515 100644 --- a/include/linux/mlx5/driver.h +++ b/include/linux/mlx5/driver.h @@ -380,7 +380,7 @@ struct mlx5_uar { u32 index; struct list_head bf_list; unsigned free_bf_bmap; - void __iomem *wc_map; + void __iomem *bf_map; void __iomem *map; }; @@ -435,6 +435,8 @@ struct mlx5_priv { struct mlx5_uuar_info uuari; MLX5_DECLARE_DOORBELL_LOCK(cq_uar_lock); + struct io_mapping *bf_mapping; + /* pages stuff */ struct workqueue_struct *pg_wq; struct rb_root page_root; -- cgit v1.2.3-70-g09d2 From e018a0cce3d849bc73e72686c571420adc40bad2 Mon Sep 17 00:00:00 2001 From: Andy Shevchenko Date: Fri, 24 Jul 2015 21:24:04 +0300 Subject: net/macb: convert to kernel doc This patch coverts struct description to the kernel doc format. There is no functional change. Signed-off-by: Andy Shevchenko Signed-off-by: David S. Miller --- include/linux/platform_data/macb.h | 14 +++++++++++--- 1 file changed, 11 insertions(+), 3 deletions(-) (limited to 'include/linux') diff --git a/include/linux/platform_data/macb.h b/include/linux/platform_data/macb.h index 044a124bfbbc..21b15f6fee25 100644 --- a/include/linux/platform_data/macb.h +++ b/include/linux/platform_data/macb.h @@ -8,11 +8,19 @@ #ifndef __MACB_PDATA_H__ #define __MACB_PDATA_H__ +/** + * struct macb_platform_data - platform data for MACB Ethernet + * @phy_mask: phy mask passed when register the MDIO bus + * within the driver + * @phy_irq_pin: PHY IRQ + * @is_rmii: using RMII interface? + * @rev_eth_addr: reverse Ethernet address byte order + */ struct macb_platform_data { u32 phy_mask; - int phy_irq_pin; /* PHY IRQ */ - u8 is_rmii; /* using RMII interface? */ - u8 rev_eth_addr; /* reverse Ethernet address byte order */ + int phy_irq_pin; + u8 is_rmii; + u8 rev_eth_addr; }; #endif /* __MACB_PDATA_H__ */ -- cgit v1.2.3-70-g09d2 From e6942b7de2dfe44ebde9bae57dadece5abca9de8 Mon Sep 17 00:00:00 2001 From: Peter Zijlstra Date: Wed, 23 Apr 2014 19:32:50 +0200 Subject: atomic: Provide atomic_{or,xor,and} Implement atomic logic ops -- atomic_{or,xor,and}. These will replace the atomic_{set,clear}_mask functions that are available on some archs. Signed-off-by: Peter Zijlstra (Intel) Signed-off-by: Thomas Gleixner --- arch/alpha/include/asm/atomic.h | 1 - arch/arc/include/asm/atomic.h | 1 - arch/arm/include/asm/atomic.h | 1 - arch/arm64/include/asm/atomic.h | 1 - arch/avr32/include/asm/atomic.h | 2 -- arch/blackfin/include/asm/atomic.h | 2 -- arch/frv/include/asm/atomic.h | 2 -- arch/h8300/include/asm/atomic.h | 2 -- arch/hexagon/include/asm/atomic.h | 2 -- arch/ia64/include/asm/atomic.h | 2 -- arch/m32r/include/asm/atomic.h | 2 -- arch/m68k/include/asm/atomic.h | 2 -- arch/metag/include/asm/atomic_lnkget.h | 2 -- arch/mips/include/asm/atomic.h | 2 -- arch/mn10300/include/asm/atomic.h | 2 -- arch/parisc/include/asm/atomic.h | 2 -- arch/powerpc/include/asm/atomic.h | 2 -- arch/s390/include/asm/atomic.h | 2 -- arch/sh/include/asm/atomic-grb.h | 2 -- arch/sparc/include/asm/atomic_32.h | 2 -- arch/sparc/include/asm/atomic_64.h | 2 -- arch/tile/include/asm/atomic_32.h | 2 -- arch/tile/include/asm/atomic_64.h | 2 -- arch/x86/include/asm/atomic.h | 2 -- arch/xtensa/include/asm/atomic.h | 2 -- include/asm-generic/atomic.h | 21 ++++++++++++--------- include/asm-generic/atomic64.h | 4 ++++ include/linux/atomic.h | 13 ------------- lib/atomic64.c | 3 +++ 29 files changed, 19 insertions(+), 68 deletions(-) (limited to 'include/linux') diff --git a/arch/alpha/include/asm/atomic.h b/arch/alpha/include/asm/atomic.h index 0eff853398d2..e8c956098424 100644 --- a/arch/alpha/include/asm/atomic.h +++ b/arch/alpha/include/asm/atomic.h @@ -110,7 +110,6 @@ static __inline__ long atomic64_##op##_return(long i, atomic64_t * v) \ ATOMIC_OPS(add) ATOMIC_OPS(sub) -#define CONFIG_ARCH_HAS_ATOMIC_OR #define atomic_andnot atomic_andnot #define atomic64_andnot atomic64_andnot diff --git a/arch/arc/include/asm/atomic.h b/arch/arc/include/asm/atomic.h index e90b701fc6a8..2a847821dee1 100644 --- a/arch/arc/include/asm/atomic.h +++ b/arch/arc/include/asm/atomic.h @@ -144,7 +144,6 @@ static inline int atomic_##op##_return(int i, atomic_t *v) \ ATOMIC_OPS(add, +=, add) ATOMIC_OPS(sub, -=, sub) -#define CONFIG_ARCH_HAS_ATOMIC_OR #define atomic_andnot atomic_andnot ATOMIC_OP(and, &=, and) diff --git a/arch/arm/include/asm/atomic.h b/arch/arm/include/asm/atomic.h index ff214bac9cb4..82b75a7cb762 100644 --- a/arch/arm/include/asm/atomic.h +++ b/arch/arm/include/asm/atomic.h @@ -194,7 +194,6 @@ static inline int __atomic_add_unless(atomic_t *v, int a, int u) ATOMIC_OPS(add, +=, add) ATOMIC_OPS(sub, -=, sub) -#define CONFIG_ARCH_HAS_ATOMIC_OR #define atomic_andnot atomic_andnot ATOMIC_OP(and, &=, and) diff --git a/arch/arm64/include/asm/atomic.h b/arch/arm64/include/asm/atomic.h index 2876173397b2..866a71fca9a3 100644 --- a/arch/arm64/include/asm/atomic.h +++ b/arch/arm64/include/asm/atomic.h @@ -85,7 +85,6 @@ static inline int atomic_##op##_return(int i, atomic_t *v) \ ATOMIC_OPS(add, add) ATOMIC_OPS(sub, sub) -#define CONFIG_ARCH_HAS_ATOMIC_OR #define atomic_andnot atomic_andnot ATOMIC_OP(and, and) diff --git a/arch/avr32/include/asm/atomic.h b/arch/avr32/include/asm/atomic.h index 115d3005e4bc..97c9bdf83409 100644 --- a/arch/avr32/include/asm/atomic.h +++ b/arch/avr32/include/asm/atomic.h @@ -51,8 +51,6 @@ static inline void atomic_##op(int i, atomic_t *v) \ (void)__atomic_##op##_return(i, v); \ } -#define CONFIG_ARCH_HAS_ATOMIC_OR - ATOMIC_OP(and, and) ATOMIC_OP(or, or) ATOMIC_OP(xor, eor) diff --git a/arch/blackfin/include/asm/atomic.h b/arch/blackfin/include/asm/atomic.h index eafa55b81a7b..2d6a7a3823c3 100644 --- a/arch/blackfin/include/asm/atomic.h +++ b/arch/blackfin/include/asm/atomic.h @@ -28,8 +28,6 @@ asmlinkage int __raw_atomic_test_asm(const volatile int *ptr, int value); #define atomic_add_return(i, v) __raw_atomic_add_asm(&(v)->counter, i) #define atomic_sub_return(i, v) __raw_atomic_add_asm(&(v)->counter, -(i)) -#define CONFIG_ARCH_HAS_ATOMIC_OR - #define atomic_or(i, v) (void)__raw_atomic_or_asm(&(v)->counter, i) #define atomic_and(i, v) (void)__raw_atomic_and_asm(&(v)->counter, i) #define atomic_xor(i, v) (void)__raw_atomic_xor_asm(&(v)->counter, i) diff --git a/arch/frv/include/asm/atomic.h b/arch/frv/include/asm/atomic.h index 74d22454d7c6..fc48bea26b40 100644 --- a/arch/frv/include/asm/atomic.h +++ b/arch/frv/include/asm/atomic.h @@ -192,8 +192,6 @@ static inline void atomic64_##op(long long i, atomic64_t *v) \ (void)__atomic64_fetch_##op(i, &v->counter); \ } -#define CONFIG_ARCH_HAS_ATOMIC_OR - ATOMIC_OP(or) ATOMIC_OP(and) ATOMIC_OP(xor) diff --git a/arch/h8300/include/asm/atomic.h b/arch/h8300/include/asm/atomic.h index f181f820be33..c4d061f09c44 100644 --- a/arch/h8300/include/asm/atomic.h +++ b/arch/h8300/include/asm/atomic.h @@ -41,8 +41,6 @@ static inline void atomic_##op(int i, atomic_t *v) \ ATOMIC_OP_RETURN(add, +=) ATOMIC_OP_RETURN(sub, -=) -#define CONFIG_ARCH_HAS_ATOMIC_OR - ATOMIC_OP(and, &=) ATOMIC_OP(or, |=) ATOMIC_OP(xor, ^=) diff --git a/arch/hexagon/include/asm/atomic.h b/arch/hexagon/include/asm/atomic.h index 4efe2c7c0dd8..811d61f6422d 100644 --- a/arch/hexagon/include/asm/atomic.h +++ b/arch/hexagon/include/asm/atomic.h @@ -132,8 +132,6 @@ static inline int atomic_##op##_return(int i, atomic_t *v) \ ATOMIC_OPS(add) ATOMIC_OPS(sub) -#define CONFIG_ARCH_HAS_ATOMIC_OR - ATOMIC_OP(and) ATOMIC_OP(or) ATOMIC_OP(xor) diff --git a/arch/ia64/include/asm/atomic.h b/arch/ia64/include/asm/atomic.h index 0809ef5d6b9a..be4beeb77d57 100644 --- a/arch/ia64/include/asm/atomic.h +++ b/arch/ia64/include/asm/atomic.h @@ -69,8 +69,6 @@ ATOMIC_OP(sub, -) : ia64_atomic_sub(__ia64_asr_i, v); \ }) -#define CONFIG_ARCH_HAS_ATOMIC_OR - ATOMIC_OP(and, &) ATOMIC_OP(or, |) ATOMIC_OP(xor, ^) diff --git a/arch/m32r/include/asm/atomic.h b/arch/m32r/include/asm/atomic.h index 7245463c1e98..b2a13fbd5be0 100644 --- a/arch/m32r/include/asm/atomic.h +++ b/arch/m32r/include/asm/atomic.h @@ -94,8 +94,6 @@ static __inline__ int atomic_##op##_return(int i, atomic_t *v) \ ATOMIC_OPS(add) ATOMIC_OPS(sub) -#define CONFIG_ARCH_HAS_ATOMIC_OR - ATOMIC_OP(and) ATOMIC_OP(or) ATOMIC_OP(xor) diff --git a/arch/m68k/include/asm/atomic.h b/arch/m68k/include/asm/atomic.h index c30e43ea49a3..93ebd96aa494 100644 --- a/arch/m68k/include/asm/atomic.h +++ b/arch/m68k/include/asm/atomic.h @@ -77,8 +77,6 @@ static inline int atomic_##op##_return(int i, atomic_t * v) \ ATOMIC_OPS(add, +=, add) ATOMIC_OPS(sub, -=, sub) -#define CONFIG_ARCH_HAS_ATOMIC_OR - ATOMIC_OP(and, &=, and) ATOMIC_OP(or, |=, or) ATOMIC_OP(xor, ^=, eor) diff --git a/arch/metag/include/asm/atomic_lnkget.h b/arch/metag/include/asm/atomic_lnkget.h index 930c12cb8d37..0642606de901 100644 --- a/arch/metag/include/asm/atomic_lnkget.h +++ b/arch/metag/include/asm/atomic_lnkget.h @@ -74,8 +74,6 @@ static inline int atomic_##op##_return(int i, atomic_t *v) \ ATOMIC_OPS(add) ATOMIC_OPS(sub) -#define CONFIG_ARCH_HAS_ATOMIC_OR - ATOMIC_OP(and) ATOMIC_OP(or) ATOMIC_OP(xor) diff --git a/arch/mips/include/asm/atomic.h b/arch/mips/include/asm/atomic.h index 0430ba6ab762..4c42fd9af777 100644 --- a/arch/mips/include/asm/atomic.h +++ b/arch/mips/include/asm/atomic.h @@ -137,8 +137,6 @@ static __inline__ int atomic_##op##_return(int i, atomic_t * v) \ ATOMIC_OPS(add, +=, addu) ATOMIC_OPS(sub, -=, subu) -#define CONFIG_ARCH_HAS_ATOMIC_OR - ATOMIC_OP(and, &=, and) ATOMIC_OP(or, |=, or) ATOMIC_OP(xor, ^=, xor) diff --git a/arch/mn10300/include/asm/atomic.h b/arch/mn10300/include/asm/atomic.h index 03eea8158cf9..f5a63f0bda46 100644 --- a/arch/mn10300/include/asm/atomic.h +++ b/arch/mn10300/include/asm/atomic.h @@ -89,8 +89,6 @@ static inline int atomic_##op##_return(int i, atomic_t *v) \ ATOMIC_OPS(add) ATOMIC_OPS(sub) -#define CONFIG_ARCH_HAS_ATOMIC_OR - ATOMIC_OP(and) ATOMIC_OP(or) ATOMIC_OP(xor) diff --git a/arch/parisc/include/asm/atomic.h b/arch/parisc/include/asm/atomic.h index be2c50ddebd6..2536965d00ea 100644 --- a/arch/parisc/include/asm/atomic.h +++ b/arch/parisc/include/asm/atomic.h @@ -126,8 +126,6 @@ static __inline__ int atomic_##op##_return(int i, atomic_t *v) \ ATOMIC_OPS(add, +=) ATOMIC_OPS(sub, -=) -#define CONFIG_ARCH_HAS_ATOMIC_OR - ATOMIC_OP(and, &=) ATOMIC_OP(or, |=) ATOMIC_OP(xor, ^=) diff --git a/arch/powerpc/include/asm/atomic.h b/arch/powerpc/include/asm/atomic.h index 6ca89e2aca15..55f106ed12bf 100644 --- a/arch/powerpc/include/asm/atomic.h +++ b/arch/powerpc/include/asm/atomic.h @@ -67,8 +67,6 @@ static __inline__ int atomic_##op##_return(int a, atomic_t *v) \ ATOMIC_OPS(add, add) ATOMIC_OPS(sub, subf) -#define CONFIG_ARCH_HAS_ATOMIC_OR - ATOMIC_OP(and, and) ATOMIC_OP(or, or) ATOMIC_OP(xor, xor) diff --git a/arch/s390/include/asm/atomic.h b/arch/s390/include/asm/atomic.h index b3859d8e001f..d761aeff72da 100644 --- a/arch/s390/include/asm/atomic.h +++ b/arch/s390/include/asm/atomic.h @@ -282,8 +282,6 @@ static inline void atomic64_##op(long i, atomic64_t *v) \ __ATOMIC64_LOOP(v, i, __ATOMIC64_##OP, __ATOMIC64_NO_BARRIER); \ } -#define CONFIG_ARCH_HAS_ATOMIC_OR - ATOMIC64_OP(and, AND) ATOMIC64_OP(or, OR) ATOMIC64_OP(xor, XOR) diff --git a/arch/sh/include/asm/atomic-grb.h b/arch/sh/include/asm/atomic-grb.h index 4b03830d48c7..b94df40e5f2d 100644 --- a/arch/sh/include/asm/atomic-grb.h +++ b/arch/sh/include/asm/atomic-grb.h @@ -48,8 +48,6 @@ static inline int atomic_##op##_return(int i, atomic_t *v) \ ATOMIC_OPS(add) ATOMIC_OPS(sub) -#define CONFIG_ARCH_HAS_ATOMIC_OR - ATOMIC_OP(and) ATOMIC_OP(or) ATOMIC_OP(xor) diff --git a/arch/sparc/include/asm/atomic_32.h b/arch/sparc/include/asm/atomic_32.h index e19d8880b146..7dcbebbcaec6 100644 --- a/arch/sparc/include/asm/atomic_32.h +++ b/arch/sparc/include/asm/atomic_32.h @@ -17,8 +17,6 @@ #include #include -#define CONFIG_ARCH_HAS_ATOMIC_OR - #define ATOMIC_INIT(i) { (i) } int atomic_add_return(int, atomic_t *); diff --git a/arch/sparc/include/asm/atomic_64.h b/arch/sparc/include/asm/atomic_64.h index d6af27c93450..917084ace49d 100644 --- a/arch/sparc/include/asm/atomic_64.h +++ b/arch/sparc/include/asm/atomic_64.h @@ -33,8 +33,6 @@ long atomic64_##op##_return(long, atomic64_t *); ATOMIC_OPS(add) ATOMIC_OPS(sub) -#define CONFIG_ARCH_HAS_ATOMIC_OR - ATOMIC_OP(and) ATOMIC_OP(or) ATOMIC_OP(xor) diff --git a/arch/tile/include/asm/atomic_32.h b/arch/tile/include/asm/atomic_32.h index 94237922f0dd..d320ce253d86 100644 --- a/arch/tile/include/asm/atomic_32.h +++ b/arch/tile/include/asm/atomic_32.h @@ -41,8 +41,6 @@ static inline void atomic_##op(int i, atomic_t *v) \ _atomic_##op((unsigned long *)&v->counter, i); \ } -#define CONFIG_ARCH_HAS_ATOMIC_OR - ATOMIC_OP(and) ATOMIC_OP(or) ATOMIC_OP(xor) diff --git a/arch/tile/include/asm/atomic_64.h b/arch/tile/include/asm/atomic_64.h index d07d9fc6e2a1..096a56d6ead4 100644 --- a/arch/tile/include/asm/atomic_64.h +++ b/arch/tile/include/asm/atomic_64.h @@ -58,8 +58,6 @@ static inline int __atomic_add_unless(atomic_t *v, int a, int u) return oldval; } -#define CONFIG_ARCH_HAS_ATOMIC_OR - static inline void atomic_and(int i, atomic_t *v) { __insn_fetchand4((void *)&v->counter, i); diff --git a/arch/x86/include/asm/atomic.h b/arch/x86/include/asm/atomic.h index f3a3ec040694..b3493023efda 100644 --- a/arch/x86/include/asm/atomic.h +++ b/arch/x86/include/asm/atomic.h @@ -191,8 +191,6 @@ static inline void atomic_##op(int i, atomic_t *v) \ : "memory"); \ } -#define CONFIG_ARCH_HAS_ATOMIC_OR - ATOMIC_OP(and) ATOMIC_OP(or) ATOMIC_OP(xor) diff --git a/arch/xtensa/include/asm/atomic.h b/arch/xtensa/include/asm/atomic.h index 4dd2450300a6..31371f43c23b 100644 --- a/arch/xtensa/include/asm/atomic.h +++ b/arch/xtensa/include/asm/atomic.h @@ -145,8 +145,6 @@ static inline int atomic_##op##_return(int i, atomic_t * v) \ ATOMIC_OPS(add) ATOMIC_OPS(sub) -#define CONFIG_ARCH_HAS_ATOMIC_OR - ATOMIC_OP(and) ATOMIC_OP(or) ATOMIC_OP(xor) diff --git a/include/asm-generic/atomic.h b/include/asm-generic/atomic.h index 92947e0a532a..a41b0b8f7404 100644 --- a/include/asm-generic/atomic.h +++ b/include/asm-generic/atomic.h @@ -102,24 +102,27 @@ ATOMIC_OP_RETURN(sub, -) ATOMIC_OP(and, &) #endif -#ifndef atomic_clear_mask -#define atomic_clear_mask(i, v) atomic_and(~(i), (v)) -#endif - #ifndef atomic_or -#ifndef CONFIG_ARCH_HAS_ATOMIC_OR -#define CONFIG_ARCH_HAS_ATOMIC_OR -#endif ATOMIC_OP(or, |) #endif -#ifndef atomic_set_mask -#define atomic_set_mask(i, v) atomic_or((i), (v)) +#ifndef atomic_xor +ATOMIC_OP(xor, ^) #endif #undef ATOMIC_OP_RETURN #undef ATOMIC_OP +static inline __deprecated void atomic_clear_mask(unsigned int mask, atomic_t *v) +{ + atomic_and(~mask, v); +} + +static inline __deprecated void atomic_set_mask(unsigned int mask, atomic_t *v) +{ + atomic_or(mask, v); +} + /* * Atomic operations that C can't guarantee us. Useful for * resource counting etc.. diff --git a/include/asm-generic/atomic64.h b/include/asm-generic/atomic64.h index 30ad9c86cebb..d48e78ccad3d 100644 --- a/include/asm-generic/atomic64.h +++ b/include/asm-generic/atomic64.h @@ -32,6 +32,10 @@ extern long long atomic64_##op##_return(long long a, atomic64_t *v); ATOMIC64_OPS(add) ATOMIC64_OPS(sub) +ATOMIC64_OP(and) +ATOMIC64_OP(or) +ATOMIC64_OP(xor) + #undef ATOMIC64_OPS #undef ATOMIC64_OP_RETURN #undef ATOMIC64_OP diff --git a/include/linux/atomic.h b/include/linux/atomic.h index 5b08a8540ecf..7d6279012a1f 100644 --- a/include/linux/atomic.h +++ b/include/linux/atomic.h @@ -111,19 +111,6 @@ static inline int atomic_dec_if_positive(atomic_t *v) } #endif -#ifndef CONFIG_ARCH_HAS_ATOMIC_OR -static inline void atomic_or(int i, atomic_t *v) -{ - int old; - int new; - - do { - old = atomic_read(v); - new = old | i; - } while (atomic_cmpxchg(v, old, new) != old); -} -#endif /* #ifndef CONFIG_ARCH_HAS_ATOMIC_OR */ - #include #ifdef CONFIG_GENERIC_ATOMIC64 #include diff --git a/lib/atomic64.c b/lib/atomic64.c index 1298c05ef528..2886ebac6567 100644 --- a/lib/atomic64.c +++ b/lib/atomic64.c @@ -102,6 +102,9 @@ EXPORT_SYMBOL(atomic64_##op##_return); ATOMIC64_OPS(add, +=) ATOMIC64_OPS(sub, -=) +ATOMIC64_OP(and, &=) +ATOMIC64_OP(or, |=) +ATOMIC64_OP(xor, ^=) #undef ATOMIC64_OPS #undef ATOMIC64_OP_RETURN -- cgit v1.2.3-70-g09d2 From de9e432cb5de1bf2952919dc0b22e4bec0ed8d53 Mon Sep 17 00:00:00 2001 From: Peter Zijlstra Date: Fri, 24 Apr 2015 01:12:32 +0200 Subject: atomic: Collapse all atomic_{set,clear}_mask definitions Move the now generic definitions of atomic_{set,clear}_mask() into linux/atomic.h to avoid endless and pointless repetition. Also, provide an atomic_andnot() wrapper for those few archs that can implement that. Signed-off-by: Peter Zijlstra (Intel) Signed-off-by: Thomas Gleixner --- arch/arc/include/asm/atomic.h | 10 ---------- arch/blackfin/include/asm/atomic.h | 10 ---------- arch/frv/include/asm/atomic.h | 10 ---------- arch/h8300/include/asm/atomic.h | 10 ---------- arch/m32r/include/asm/atomic.h | 11 ----------- arch/m68k/include/asm/atomic.h | 10 ---------- arch/metag/include/asm/atomic_lnkget.h | 10 ---------- arch/metag/include/asm/atomic_lock1.h | 10 ---------- arch/mn10300/include/asm/atomic.h | 24 ------------------------ arch/powerpc/kernel/misc_32.S | 19 ------------------- arch/s390/include/asm/atomic.h | 10 ---------- arch/sh/include/asm/atomic.h | 10 ---------- arch/x86/include/asm/atomic.h | 10 ---------- arch/xtensa/include/asm/atomic.h | 10 ---------- include/asm-generic/atomic.h | 10 ---------- include/linux/atomic.h | 25 +++++++++++++++++++++++++ 16 files changed, 25 insertions(+), 174 deletions(-) (limited to 'include/linux') diff --git a/arch/arc/include/asm/atomic.h b/arch/arc/include/asm/atomic.h index 2a847821dee1..d8a85e706fba 100644 --- a/arch/arc/include/asm/atomic.h +++ b/arch/arc/include/asm/atomic.h @@ -155,16 +155,6 @@ ATOMIC_OP(xor, ^=, xor) #undef ATOMIC_OP_RETURN #undef ATOMIC_OP -static inline __deprecated void atomic_clear_mask(unsigned int mask, atomic_t *v) -{ - atomic_and(~mask, v); -} - -static inline __deprecated void atomic_set_mask(unsigned int mask, atomic_t *v) -{ - atomic_or(mask, v); -} - /** * __atomic_add_unless - add unless the number is a given value * @v: pointer of type atomic_t diff --git a/arch/blackfin/include/asm/atomic.h b/arch/blackfin/include/asm/atomic.h index 2d6a7a3823c3..1c1c42330c99 100644 --- a/arch/blackfin/include/asm/atomic.h +++ b/arch/blackfin/include/asm/atomic.h @@ -32,16 +32,6 @@ asmlinkage int __raw_atomic_test_asm(const volatile int *ptr, int value); #define atomic_and(i, v) (void)__raw_atomic_and_asm(&(v)->counter, i) #define atomic_xor(i, v) (void)__raw_atomic_xor_asm(&(v)->counter, i) -static inline __deprecated void atomic_clear_mask(unsigned int mask, atomic_t *v) -{ - atomic_and(~mask, v); -} - -static inline __deprecated void atomic_set_mask(unsigned int mask, atomic_t *v) -{ - atomic_or(mask, v); -} - #endif #include diff --git a/arch/frv/include/asm/atomic.h b/arch/frv/include/asm/atomic.h index fc48bea26b40..0da689def4cc 100644 --- a/arch/frv/include/asm/atomic.h +++ b/arch/frv/include/asm/atomic.h @@ -198,14 +198,4 @@ ATOMIC_OP(xor) #undef ATOMIC_OP -static inline __deprecated void atomic_clear_mask(unsigned int mask, atomic_t *v) -{ - atomic_and(~mask, v); -} - -static inline __deprecated void atomic_set_mask(unsigned int mask, atomic_t *v) -{ - atomic_or(mask, v); -} - #endif /* _ASM_ATOMIC_H */ diff --git a/arch/h8300/include/asm/atomic.h b/arch/h8300/include/asm/atomic.h index c4d061f09c44..702ee539f87d 100644 --- a/arch/h8300/include/asm/atomic.h +++ b/arch/h8300/include/asm/atomic.h @@ -89,14 +89,4 @@ static inline int __atomic_add_unless(atomic_t *v, int a, int u) return ret; } -static inline __deprecated void atomic_clear_mask(unsigned int mask, atomic_t *v) -{ - atomic_and(~mask, v); -} - -static inline __deprecated void atomic_set_mask(unsigned int mask, atomic_t *v) -{ - atomic_or(mask, v); -} - #endif /* __ARCH_H8300_ATOMIC __ */ diff --git a/arch/m32r/include/asm/atomic.h b/arch/m32r/include/asm/atomic.h index b2a13fbd5be0..025e2a170493 100644 --- a/arch/m32r/include/asm/atomic.h +++ b/arch/m32r/include/asm/atomic.h @@ -243,15 +243,4 @@ static __inline__ int __atomic_add_unless(atomic_t *v, int a, int u) return c; } - -static __inline__ __deprecated void atomic_clear_mask(unsigned int mask, atomic_t *v) -{ - atomic_and(~mask, v); -} - -static __inline__ __deprecated void atomic_set_mask(unsigned int mask, atomic_t *v) -{ - atomic_or(mask, v); -} - #endif /* _ASM_M32R_ATOMIC_H */ diff --git a/arch/m68k/include/asm/atomic.h b/arch/m68k/include/asm/atomic.h index 93ebd96aa494..039fac120cc0 100644 --- a/arch/m68k/include/asm/atomic.h +++ b/arch/m68k/include/asm/atomic.h @@ -174,16 +174,6 @@ static inline int atomic_add_negative(int i, atomic_t *v) return c != 0; } -static inline __deprecated void atomic_clear_mask(unsigned int mask, atomic_t *v) -{ - atomic_and(~mask, v); -} - -static inline __deprecated void atomic_set_mask(unsigned int mask, atomic_t *v) -{ - atomic_or(mask, v); -} - static __inline__ int __atomic_add_unless(atomic_t *v, int a, int u) { int c, old; diff --git a/arch/metag/include/asm/atomic_lnkget.h b/arch/metag/include/asm/atomic_lnkget.h index 0642606de901..21c4c268b86c 100644 --- a/arch/metag/include/asm/atomic_lnkget.h +++ b/arch/metag/include/asm/atomic_lnkget.h @@ -82,16 +82,6 @@ ATOMIC_OP(xor) #undef ATOMIC_OP_RETURN #undef ATOMIC_OP -static inline __deprecated void atomic_clear_mask(unsigned int mask, atomic_t *v) -{ - atomic_and(~mask, v); -} - -static inline __deprecated void atomic_set_mask(unsigned int mask, atomic_t *v) -{ - atomic_or(mask, v); -} - static inline int atomic_cmpxchg(atomic_t *v, int old, int new) { int result, temp; diff --git a/arch/metag/include/asm/atomic_lock1.h b/arch/metag/include/asm/atomic_lock1.h index 7d88725a85da..f8efe380fe8b 100644 --- a/arch/metag/include/asm/atomic_lock1.h +++ b/arch/metag/include/asm/atomic_lock1.h @@ -76,16 +76,6 @@ ATOMIC_OP(xor, ^=) #undef ATOMIC_OP_RETURN #undef ATOMIC_OP -static inline __deprecated void atomic_clear_mask(unsigned int mask, atomic_t *v) -{ - atomic_and(~mask, v); -} - -static inline __deprecated void atomic_set_mask(unsigned int mask, atomic_t *v) -{ - atomic_or(mask, v); -} - static inline int atomic_cmpxchg(atomic_t *v, int old, int new) { int ret; diff --git a/arch/mn10300/include/asm/atomic.h b/arch/mn10300/include/asm/atomic.h index f5a63f0bda46..375e59140c9c 100644 --- a/arch/mn10300/include/asm/atomic.h +++ b/arch/mn10300/include/asm/atomic.h @@ -131,30 +131,6 @@ static inline void atomic_dec(atomic_t *v) #define atomic_xchg(ptr, v) (xchg(&(ptr)->counter, (v))) #define atomic_cmpxchg(v, old, new) (cmpxchg(&((v)->counter), (old), (new))) -/** - * atomic_clear_mask - Atomically clear bits in memory - * @mask: Mask of the bits to be cleared - * @v: pointer to word in memory - * - * Atomically clears the bits set in mask from the memory word specified. - */ -static inline __deprecated void atomic_clear_mask(unsigned int mask, atomic_t *v) -{ - atomic_and(~mask, v); -} - -/** - * atomic_set_mask - Atomically set bits in memory - * @mask: Mask of the bits to be set - * @v: pointer to word in memory - * - * Atomically sets the bits set in mask from the memory word specified. - */ -static inline __deprecated void atomic_set_mask(unsigned int mask, atomic_t *v) -{ - atomic_or(mask, v); -} - #endif /* __KERNEL__ */ #endif /* CONFIG_SMP */ #endif /* _ASM_ATOMIC_H */ diff --git a/arch/powerpc/kernel/misc_32.S b/arch/powerpc/kernel/misc_32.S index 7c6bb4b17b49..ed3ab509faca 100644 --- a/arch/powerpc/kernel/misc_32.S +++ b/arch/powerpc/kernel/misc_32.S @@ -595,25 +595,6 @@ _GLOBAL(copy_page) li r11,4 b 2b -/* - * void atomic_clear_mask(atomic_t mask, atomic_t *addr) - * void atomic_set_mask(atomic_t mask, atomic_t *addr); - */ -_GLOBAL(atomic_clear_mask) -10: lwarx r5,0,r4 - andc r5,r5,r3 - PPC405_ERR77(0,r4) - stwcx. r5,0,r4 - bne- 10b - blr -_GLOBAL(atomic_set_mask) -10: lwarx r5,0,r4 - or r5,r5,r3 - PPC405_ERR77(0,r4) - stwcx. r5,0,r4 - bne- 10b - blr - /* * Extended precision shifts. * diff --git a/arch/s390/include/asm/atomic.h b/arch/s390/include/asm/atomic.h index d761aeff72da..117fa5c921c1 100644 --- a/arch/s390/include/asm/atomic.h +++ b/arch/s390/include/asm/atomic.h @@ -132,16 +132,6 @@ ATOMIC_OP(xor, XOR) #undef ATOMIC_OP -static inline __deprecated void atomic_clear_mask(unsigned int mask, atomic_t *v) -{ - atomic_and(~mask, v); -} - -static inline __deprecated void atomic_set_mask(unsigned int mask, atomic_t *v) -{ - atomic_or(mask, v); -} - #define atomic_xchg(v, new) (xchg(&((v)->counter), new)) static inline int atomic_cmpxchg(atomic_t *v, int old, int new) diff --git a/arch/sh/include/asm/atomic.h b/arch/sh/include/asm/atomic.h index cee0245257e1..05b9f74ce2d5 100644 --- a/arch/sh/include/asm/atomic.h +++ b/arch/sh/include/asm/atomic.h @@ -25,16 +25,6 @@ #include #endif -static inline __deprecated void atomic_clear_mask(unsigned int mask, atomic_t *v) -{ - atomic_and(~mask, v); -} - -static inline __deprecated void atomic_set_mask(unsigned int mask, atomic_t *v) -{ - atomic_or(mask, v); -} - #define atomic_add_negative(a, v) (atomic_add_return((a), (v)) < 0) #define atomic_dec_return(v) atomic_sub_return(1, (v)) #define atomic_inc_return(v) atomic_add_return(1, (v)) diff --git a/arch/x86/include/asm/atomic.h b/arch/x86/include/asm/atomic.h index b3493023efda..fb52aa644aab 100644 --- a/arch/x86/include/asm/atomic.h +++ b/arch/x86/include/asm/atomic.h @@ -234,16 +234,6 @@ static __always_inline short int atomic_inc_short(short int *v) return *v; } -static inline __deprecated void atomic_clear_mask(unsigned int mask, atomic_t *v) -{ - atomic_and(~mask, v); -} - -static inline __deprecated void atomic_set_mask(unsigned int mask, atomic_t *v) -{ - atomic_or(mask, v); -} - #ifdef CONFIG_X86_32 # include #else diff --git a/arch/xtensa/include/asm/atomic.h b/arch/xtensa/include/asm/atomic.h index 31371f43c23b..e0be67936990 100644 --- a/arch/xtensa/include/asm/atomic.h +++ b/arch/xtensa/include/asm/atomic.h @@ -153,16 +153,6 @@ ATOMIC_OP(xor) #undef ATOMIC_OP_RETURN #undef ATOMIC_OP -static inline __deprecated void atomic_set_mask(unsigned int mask, atomic_t *v) -{ - atomic_or(mask, v); -} - -static inline __deprecated void atomic_clear_mask(unsigned int mask, atomic_t *v) -{ - atomic_and(~mask, v); -} - /** * atomic_sub_and_test - subtract value from variable and test result * @i: integer value to subtract diff --git a/include/asm-generic/atomic.h b/include/asm-generic/atomic.h index a41b0b8f7404..d4d7e337fdcb 100644 --- a/include/asm-generic/atomic.h +++ b/include/asm-generic/atomic.h @@ -113,16 +113,6 @@ ATOMIC_OP(xor, ^) #undef ATOMIC_OP_RETURN #undef ATOMIC_OP -static inline __deprecated void atomic_clear_mask(unsigned int mask, atomic_t *v) -{ - atomic_and(~mask, v); -} - -static inline __deprecated void atomic_set_mask(unsigned int mask, atomic_t *v) -{ - atomic_or(mask, v); -} - /* * Atomic operations that C can't guarantee us. Useful for * resource counting etc.. diff --git a/include/linux/atomic.h b/include/linux/atomic.h index 7d6279012a1f..8b98b423388f 100644 --- a/include/linux/atomic.h +++ b/include/linux/atomic.h @@ -28,6 +28,23 @@ static inline int atomic_add_unless(atomic_t *v, int a, int u) #define atomic_inc_not_zero(v) atomic_add_unless((v), 1, 0) #endif +#ifndef atomic_andnot +static inline void atomic_andnot(int i, atomic_t *v) +{ + atomic_and(~i, v); +} +#endif + +static inline __deprecated void atomic_clear_mask(unsigned int mask, atomic_t *v) +{ + atomic_andnot(mask, v); +} + +static inline __deprecated void atomic_set_mask(unsigned int mask, atomic_t *v) +{ + atomic_or(mask, v); +} + /** * atomic_inc_not_zero_hint - increment if not null * @v: pointer of type atomic_t @@ -115,4 +132,12 @@ static inline int atomic_dec_if_positive(atomic_t *v) #ifdef CONFIG_GENERIC_ATOMIC64 #include #endif + +#ifndef atomic64_andnot +static inline void atomic64_andnot(long long i, atomic64_t *v) +{ + atomic64_and(~i, v); +} +#endif + #endif /* _LINUX_ATOMIC_H */ -- cgit v1.2.3-70-g09d2 From 91492a44b998cf762150de8f1b40bda1902e8ea7 Mon Sep 17 00:00:00 2001 From: Rabin Vincent Date: Wed, 22 Jul 2015 15:05:18 +0200 Subject: gpio: generic: support input-only chips Allow chips to indicates that they are input-only and thus cannot set the output value. This will be used by the gpio-etraxfs driver. Signed-off-by: Rabin Vincent Signed-off-by: Linus Walleij --- drivers/gpio/gpio-generic.c | 23 ++++++++++++++++++++--- include/linux/basic_mmio_gpio.h | 1 + 2 files changed, 21 insertions(+), 3 deletions(-) (limited to 'include/linux') diff --git a/drivers/gpio/gpio-generic.c b/drivers/gpio/gpio-generic.c index 802e6d2c64e9..a3f07537fe62 100644 --- a/drivers/gpio/gpio-generic.c +++ b/drivers/gpio/gpio-generic.c @@ -153,6 +153,10 @@ static int bgpio_get(struct gpio_chip *gc, unsigned int gpio) return !!(bgc->read_reg(bgc->reg_dat) & bgc->pin2mask(bgc, gpio)); } +static void bgpio_set_none(struct gpio_chip *gc, unsigned int gpio, int val) +{ +} + static void bgpio_set(struct gpio_chip *gc, unsigned int gpio, int val) { struct bgpio_chip *bgc = to_bgpio_chip(gc); @@ -279,6 +283,12 @@ static int bgpio_simple_dir_in(struct gpio_chip *gc, unsigned int gpio) return 0; } +static int bgpio_dir_out_err(struct gpio_chip *gc, unsigned int gpio, + int val) +{ + return -EINVAL; +} + static int bgpio_simple_dir_out(struct gpio_chip *gc, unsigned int gpio, int val) { @@ -460,6 +470,9 @@ static int bgpio_setup_io(struct bgpio_chip *bgc, bgc->reg_set = set; bgc->gc.set = bgpio_set_set; bgc->gc.set_multiple = bgpio_set_multiple_set; + } else if (flags & BGPIOF_NO_OUTPUT) { + bgc->gc.set = bgpio_set_none; + bgc->gc.set_multiple = NULL; } else { bgc->gc.set = bgpio_set; bgc->gc.set_multiple = bgpio_set_multiple; @@ -476,7 +489,8 @@ static int bgpio_setup_io(struct bgpio_chip *bgc, static int bgpio_setup_direction(struct bgpio_chip *bgc, void __iomem *dirout, - void __iomem *dirin) + void __iomem *dirin, + unsigned long flags) { if (dirout && dirin) { return -EINVAL; @@ -491,7 +505,10 @@ static int bgpio_setup_direction(struct bgpio_chip *bgc, bgc->gc.direction_input = bgpio_dir_in_inv; bgc->gc.get_direction = bgpio_get_dir_inv; } else { - bgc->gc.direction_output = bgpio_simple_dir_out; + if (flags & BGPIOF_NO_OUTPUT) + bgc->gc.direction_output = bgpio_dir_out_err; + else + bgc->gc.direction_output = bgpio_simple_dir_out; bgc->gc.direction_input = bgpio_simple_dir_in; } @@ -543,7 +560,7 @@ int bgpio_init(struct bgpio_chip *bgc, struct device *dev, if (ret) return ret; - ret = bgpio_setup_direction(bgc, dirout, dirin); + ret = bgpio_setup_direction(bgc, dirout, dirin, flags); if (ret) return ret; diff --git a/include/linux/basic_mmio_gpio.h b/include/linux/basic_mmio_gpio.h index 14eea946e640..ed3768f4ecc7 100644 --- a/include/linux/basic_mmio_gpio.h +++ b/include/linux/basic_mmio_gpio.h @@ -75,5 +75,6 @@ int bgpio_init(struct bgpio_chip *bgc, struct device *dev, #define BGPIOF_UNREADABLE_REG_DIR BIT(2) /* reg_dir is unreadable */ #define BGPIOF_BIG_ENDIAN_BYTE_ORDER BIT(3) #define BGPIOF_READ_OUTPUT_REG_SET BIT(4) /* reg_set stores output value */ +#define BGPIOF_NO_OUTPUT BIT(5) /* only input */ #endif /* __BASIC_MMIO_GPIO_H */ -- cgit v1.2.3-70-g09d2 From 35068ce8cbf1749ef1a4b9b1493af83b8488c37b Mon Sep 17 00:00:00 2001 From: Tomeu Vizoso Date: Wed, 1 Jul 2015 09:10:43 +0200 Subject: of: constify drv arg of of_driver_match_device stub With this change the stub has the same signature as the actual function, preventing this compiler warning when building without CONFIG_OF: drivers/base/property.c: In function 'fwnode_driver_match_device': >> drivers/base/property.c:608:38: warning: passing argument 2 of 'of_driver_match_device' discards 'const' qualifier from pointer target type return of_driver_match_device(dev, drv); ^ In file included from drivers/base/property.c:18:0: include/linux/of_device.h:61:19: note: expected 'struct device_driver *' but argument is of type 'const struct device_driver *' static inline int of_driver_match_device(struct device *dev, ^ Signed-off-by: Tomeu Vizoso Signed-off-by: Rob Herring --- include/linux/of_device.h | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) (limited to 'include/linux') diff --git a/include/linux/of_device.h b/include/linux/of_device.h index 4c508549833a..cc7dd687a89d 100644 --- a/include/linux/of_device.h +++ b/include/linux/of_device.h @@ -59,7 +59,7 @@ void of_dma_configure(struct device *dev, struct device_node *np); #else /* CONFIG_OF */ static inline int of_driver_match_device(struct device *dev, - struct device_driver *drv) + const struct device_driver *drv) { return 0; } -- cgit v1.2.3-70-g09d2 From 77fc29c4bbbbd01ee22c50ce8260fd0f2e08c124 Mon Sep 17 00:00:00 2001 From: Hadar Hen Zion Date: Mon, 27 Jul 2015 14:46:31 +0300 Subject: net/mlx4_core: Preparations for 802.1ad VLAN support mlx4_core preparation to support hardware accelerated 802.1ad VLAN device. To allow 802.1ad accelerated device, "packet has vlan" (phv) Firmware capability should be available. Firmware without the phv capability won't behave properly and can't support 802.1ad device acceleration. The driver checks the Firmware capability and sets the phv bit accordingly in SET_PORT command. Signed-off-by: Hadar Hen Zion Signed-off-by: Amir Vadai Signed-off-by: David S. Miller --- drivers/net/ethernet/mellanox/mlx4/fw.c | 82 +++++++++++++++++++++++++++++++ drivers/net/ethernet/mellanox/mlx4/fw.h | 1 + drivers/net/ethernet/mellanox/mlx4/main.c | 15 ++++++ drivers/net/ethernet/mellanox/mlx4/mlx4.h | 3 ++ include/linux/mlx4/device.h | 5 ++ 5 files changed, 106 insertions(+) (limited to 'include/linux') diff --git a/drivers/net/ethernet/mellanox/mlx4/fw.c b/drivers/net/ethernet/mellanox/mlx4/fw.c index e30bf57ad7a1..5a1c3d249530 100644 --- a/drivers/net/ethernet/mellanox/mlx4/fw.c +++ b/drivers/net/ethernet/mellanox/mlx4/fw.c @@ -154,6 +154,7 @@ static void dump_dev_cap_flags2(struct mlx4_dev *dev, u64 flags) [26] = "Port ETS Scheduler support", [27] = "Port beacon support", [28] = "RX-ALL support", + [29] = "802.1ad offload support", }; int i; @@ -307,6 +308,7 @@ int mlx4_QUERY_FUNC_CAP_wrapper(struct mlx4_dev *dev, int slave, #define QUERY_FUNC_CAP_FLAGS0_FORCE_PHY_WQE_GID 0x80 #define QUERY_FUNC_CAP_SUPPORTS_NON_POWER_OF_2_NUM_EQS (1 << 31) +#define QUERY_FUNC_CAP_PHV_BIT 0x40 if (vhcr->op_modifier == 1) { struct mlx4_active_ports actv_ports = @@ -351,6 +353,12 @@ int mlx4_QUERY_FUNC_CAP_wrapper(struct mlx4_dev *dev, int slave, MLX4_PUT(outbox->buf, dev->caps.phys_port_id[vhcr->in_modifier], QUERY_FUNC_CAP_PHYS_PORT_ID); + if (dev->caps.phv_bit[port]) { + field = QUERY_FUNC_CAP_PHV_BIT; + MLX4_PUT(outbox->buf, field, + QUERY_FUNC_CAP_FLAGS0_OFFSET); + } + } else if (vhcr->op_modifier == 0) { struct mlx4_active_ports actv_ports = mlx4_get_active_ports(dev, slave); @@ -600,6 +608,9 @@ int mlx4_QUERY_FUNC_CAP(struct mlx4_dev *dev, u8 gen_or_port, MLX4_GET(func_cap->phys_port_id, outbox, QUERY_FUNC_CAP_PHYS_PORT_ID); + MLX4_GET(field, outbox, QUERY_FUNC_CAP_FLAGS0_OFFSET); + func_cap->flags |= (field & QUERY_FUNC_CAP_PHV_BIT); + /* All other resources are allocated by the master, but we still report * 'num' and 'reserved' capabilities as follows: * - num remains the maximum resource index @@ -700,6 +711,7 @@ int mlx4_QUERY_DEV_CAP(struct mlx4_dev *dev, struct mlx4_dev_cap *dev_cap) #define QUERY_DEV_CAP_D_MPT_ENTRY_SZ_OFFSET 0x92 #define QUERY_DEV_CAP_BMME_FLAGS_OFFSET 0x94 #define QUERY_DEV_CAP_CONFIG_DEV_OFFSET 0x94 +#define QUERY_DEV_CAP_PHV_EN_OFFSET 0x96 #define QUERY_DEV_CAP_RSVD_LKEY_OFFSET 0x98 #define QUERY_DEV_CAP_MAX_ICM_SZ_OFFSET 0xa0 #define QUERY_DEV_CAP_ETH_BACKPL_OFFSET 0x9c @@ -898,6 +910,12 @@ int mlx4_QUERY_DEV_CAP(struct mlx4_dev *dev, struct mlx4_dev_cap *dev_cap) dev_cap->flags2 |= MLX4_DEV_CAP_FLAG2_CONFIG_DEV; if (field & (1 << 2)) dev_cap->flags2 |= MLX4_DEV_CAP_FLAG2_IGNORE_FCS; + MLX4_GET(field, outbox, QUERY_DEV_CAP_PHV_EN_OFFSET); + if (field & 0x80) + dev_cap->flags2 |= MLX4_DEV_CAP_FLAG2_PHV_EN; + if (field & 0x40) + dev_cap->flags2 |= MLX4_DEV_CAP_FLAG2_SKIP_OUTER_VLAN; + MLX4_GET(dev_cap->reserved_lkey, outbox, QUERY_DEV_CAP_RSVD_LKEY_OFFSET); MLX4_GET(field32, outbox, QUERY_DEV_CAP_ETH_BACKPL_OFFSET); @@ -1992,6 +2010,10 @@ int mlx4_QUERY_HCA(struct mlx4_dev *dev, MLX4_GET(param->uar_page_sz, outbox, INIT_HCA_UAR_PAGE_SZ_OFFSET); MLX4_GET(param->log_uar_sz, outbox, INIT_HCA_LOG_UAR_SZ_OFFSET); + /* phv_check enable */ + MLX4_GET(byte_field, outbox, INIT_HCA_CACHELINE_SZ_OFFSET); + if (byte_field & 0x2) + param->phv_check_en = 1; out: mlx4_free_cmd_mailbox(dev, mailbox); @@ -2758,3 +2780,63 @@ int mlx4_ACCESS_REG_wrapper(struct mlx4_dev *dev, int slave, 0, MLX4_CMD_ACCESS_REG, MLX4_CMD_TIME_CLASS_C, MLX4_CMD_NATIVE); } + +static int mlx4_SET_PORT_phv_bit(struct mlx4_dev *dev, u8 port, u8 phv_bit) +{ +#define SET_PORT_GEN_PHV_VALID 0x10 +#define SET_PORT_GEN_PHV_EN 0x80 + + struct mlx4_cmd_mailbox *mailbox; + struct mlx4_set_port_general_context *context; + u32 in_mod; + int err; + + mailbox = mlx4_alloc_cmd_mailbox(dev); + if (IS_ERR(mailbox)) + return PTR_ERR(mailbox); + context = mailbox->buf; + + context->v_ignore_fcs |= SET_PORT_GEN_PHV_VALID; + if (phv_bit) + context->phv_en |= SET_PORT_GEN_PHV_EN; + + in_mod = MLX4_SET_PORT_GENERAL << 8 | port; + err = mlx4_cmd(dev, mailbox->dma, in_mod, MLX4_SET_PORT_ETH_OPCODE, + MLX4_CMD_SET_PORT, MLX4_CMD_TIME_CLASS_B, + MLX4_CMD_NATIVE); + + mlx4_free_cmd_mailbox(dev, mailbox); + return err; +} + +int get_phv_bit(struct mlx4_dev *dev, u8 port, int *phv) +{ + int err; + struct mlx4_func_cap func_cap; + + memset(&func_cap, 0, sizeof(func_cap)); + err = mlx4_QUERY_FUNC_CAP(dev, 1, &func_cap); + if (!err) + *phv = func_cap.flags & QUERY_FUNC_CAP_PHV_BIT; + return err; +} +EXPORT_SYMBOL(get_phv_bit); + +int set_phv_bit(struct mlx4_dev *dev, u8 port, int new_val) +{ + int ret; + + if (mlx4_is_slave(dev)) + return -EPERM; + + if (dev->caps.flags2 & MLX4_DEV_CAP_FLAG2_PHV_EN && + !(dev->caps.flags2 & MLX4_DEV_CAP_FLAG2_SKIP_OUTER_VLAN)) { + ret = mlx4_SET_PORT_phv_bit(dev, port, new_val); + if (!ret) + dev->caps.phv_bit[port] = new_val; + return ret; + } + + return -EOPNOTSUPP; +} +EXPORT_SYMBOL(set_phv_bit); diff --git a/drivers/net/ethernet/mellanox/mlx4/fw.h b/drivers/net/ethernet/mellanox/mlx4/fw.h index 07cb7c2461ad..08de5555c2f4 100644 --- a/drivers/net/ethernet/mellanox/mlx4/fw.h +++ b/drivers/net/ethernet/mellanox/mlx4/fw.h @@ -204,6 +204,7 @@ struct mlx4_init_hca_param { u16 cqe_size; /* For use only when CQE stride feature enabled */ u16 eqe_size; /* For use only when EQE stride feature enabled */ u8 rss_ip_frags; + u8 phv_check_en; /* for QUERY_HCA */ }; struct mlx4_init_ib_param { diff --git a/drivers/net/ethernet/mellanox/mlx4/main.c b/drivers/net/ethernet/mellanox/mlx4/main.c index d76f4257e305..6f35b6c06193 100644 --- a/drivers/net/ethernet/mellanox/mlx4/main.c +++ b/drivers/net/ethernet/mellanox/mlx4/main.c @@ -405,6 +405,21 @@ static int mlx4_dev_cap(struct mlx4_dev *dev, struct mlx4_dev_cap *dev_cap) dev->caps.max_gso_sz = dev_cap->max_gso_sz; dev->caps.max_rss_tbl_sz = dev_cap->max_rss_tbl_sz; + if (dev->caps.flags2 & MLX4_DEV_CAP_FLAG2_PHV_EN) { + struct mlx4_init_hca_param hca_param; + + memset(&hca_param, 0, sizeof(hca_param)); + err = mlx4_QUERY_HCA(dev, &hca_param); + /* Turn off PHV_EN flag in case phv_check_en is set. + * phv_check_en is a HW check that parse the packet and verify + * phv bit was reported correctly in the wqe. To allow QinQ + * PHV_EN flag should be set and phv_check_en must be cleared + * otherwise QinQ packets will be drop by the HW. + */ + if (err || hca_param.phv_check_en) + dev->caps.flags2 &= ~MLX4_DEV_CAP_FLAG2_PHV_EN; + } + /* Sense port always allowed on supported devices for ConnectX-1 and -2 */ if (mlx4_priv(dev)->pci_dev_data & MLX4_PCI_DEV_FORCE_SENSE_PORT) dev->caps.flags |= MLX4_DEV_CAP_FLAG_SENSE_SUPPORT; diff --git a/drivers/net/ethernet/mellanox/mlx4/mlx4.h b/drivers/net/ethernet/mellanox/mlx4/mlx4.h index a092c5c34d43..232b2b55f23b 100644 --- a/drivers/net/ethernet/mellanox/mlx4/mlx4.h +++ b/drivers/net/ethernet/mellanox/mlx4/mlx4.h @@ -787,6 +787,9 @@ struct mlx4_set_port_general_context { u8 pprx; u8 pfcrx; u16 reserved4; + u32 reserved5; + u8 phv_en; + u8 reserved6[3]; }; struct mlx4_set_port_rqp_calc_context { diff --git a/include/linux/mlx4/device.h b/include/linux/mlx4/device.h index fd13c1ce3b4a..bcbf8c72a77b 100644 --- a/include/linux/mlx4/device.h +++ b/include/linux/mlx4/device.h @@ -211,6 +211,8 @@ enum { MLX4_DEV_CAP_FLAG2_ETS_CFG = 1LL << 26, MLX4_DEV_CAP_FLAG2_PORT_BEACON = 1LL << 27, MLX4_DEV_CAP_FLAG2_IGNORE_FCS = 1LL << 28, + MLX4_DEV_CAP_FLAG2_PHV_EN = 1LL << 29, + MLX4_DEV_CAP_FLAG2_SKIP_OUTER_VLAN = 1LL << 30, }; enum { @@ -581,6 +583,7 @@ struct mlx4_caps { u64 phys_port_id[MLX4_MAX_PORTS + 1]; int tunnel_offload_mode; u8 rx_checksum_flags_port[MLX4_MAX_PORTS + 1]; + u8 phv_bit[MLX4_MAX_PORTS + 1]; u8 alloc_res_qp_mask; u32 dmfs_high_rate_qpn_base; u32 dmfs_high_rate_qpn_range; @@ -1332,6 +1335,8 @@ int mlx4_SET_PORT_BEACON(struct mlx4_dev *dev, u8 port, u16 time); int mlx4_SET_PORT_fcs_check(struct mlx4_dev *dev, u8 port, u8 ignore_fcs_value); int mlx4_SET_PORT_VXLAN(struct mlx4_dev *dev, u8 port, u8 steering, int enable); +int set_phv_bit(struct mlx4_dev *dev, u8 port, int new_val); +int get_phv_bit(struct mlx4_dev *dev, u8 port, int *phv); int mlx4_find_cached_mac(struct mlx4_dev *dev, u8 port, u64 mac, int *idx); int mlx4_find_cached_vlan(struct mlx4_dev *dev, u8 port, u16 vid, int *idx); int mlx4_register_vlan(struct mlx4_dev *dev, u8 port, u16 vlan, int *index); -- cgit v1.2.3-70-g09d2 From e802f8e4c54e6adf4215ef9fa3d6eea8fcb10bf9 Mon Sep 17 00:00:00 2001 From: Hadar Hen Zion Date: Mon, 27 Jul 2015 14:46:33 +0300 Subject: net/mlx4: Prepare VLAN macros for 802.1ad Hardware accelerated support To add Hardware accelerated support in 802.1ad vlan, replace Current VLAN macros to CVLAN. Replace: MLX4_WQE_CTRL_INS_VLAN MLX4_CQE_VLAN_PRESENT_MASK With: MLX4_WQE_CTRL_INS_CVLAN MLX4_CQE_CVLAN_PRESENT_MASK Signed-off-by: Hadar Hen Zion Signed-off-by: Amir Vadai Signed-off-by: David S. Miller --- drivers/infiniband/hw/mlx4/cq.c | 2 +- drivers/net/ethernet/mellanox/mlx4/en_rx.c | 6 +++--- drivers/net/ethernet/mellanox/mlx4/en_tx.c | 2 +- include/linux/mlx4/cq.h | 2 +- include/linux/mlx4/qp.h | 2 +- 5 files changed, 7 insertions(+), 7 deletions(-) (limited to 'include/linux') diff --git a/drivers/infiniband/hw/mlx4/cq.c b/drivers/infiniband/hw/mlx4/cq.c index 36eb3d012b6d..180a8f7ec82d 100644 --- a/drivers/infiniband/hw/mlx4/cq.c +++ b/drivers/infiniband/hw/mlx4/cq.c @@ -871,7 +871,7 @@ repoll: if (is_eth) { wc->sl = be16_to_cpu(cqe->sl_vid) >> 13; if (be32_to_cpu(cqe->vlan_my_qpn) & - MLX4_CQE_VLAN_PRESENT_MASK) { + MLX4_CQE_CVLAN_PRESENT_MASK) { wc->vlan_id = be16_to_cpu(cqe->sl_vid) & MLX4_CQE_VID_MASK; } else { diff --git a/drivers/net/ethernet/mellanox/mlx4/en_rx.c b/drivers/net/ethernet/mellanox/mlx4/en_rx.c index 12c65e1ad6a9..10f6c2f1d5a0 100644 --- a/drivers/net/ethernet/mellanox/mlx4/en_rx.c +++ b/drivers/net/ethernet/mellanox/mlx4/en_rx.c @@ -726,7 +726,7 @@ static int check_csum(struct mlx4_cqe *cqe, struct sk_buff *skb, void *va, hw_checksum = csum_unfold((__force __sum16)cqe->checksum); - if (cqe->vlan_my_qpn & cpu_to_be32(MLX4_CQE_VLAN_PRESENT_MASK) && + if (cqe->vlan_my_qpn & cpu_to_be32(MLX4_CQE_CVLAN_PRESENT_MASK) && !(dev_features & NETIF_F_HW_VLAN_CTAG_RX)) { hw_checksum = get_fixed_vlan_csum(hw_checksum, hdr); hdr += sizeof(struct vlan_hdr); @@ -907,7 +907,7 @@ int mlx4_en_process_rx_cq(struct net_device *dev, struct mlx4_en_cq *cq, int bud gro_skb->csum_level = 1; if ((cqe->vlan_my_qpn & - cpu_to_be32(MLX4_CQE_VLAN_PRESENT_MASK)) && + cpu_to_be32(MLX4_CQE_CVLAN_PRESENT_MASK)) && (dev->features & NETIF_F_HW_VLAN_CTAG_RX)) { u16 vid = be16_to_cpu(cqe->sl_vid); @@ -970,7 +970,7 @@ int mlx4_en_process_rx_cq(struct net_device *dev, struct mlx4_en_cq *cq, int bud PKT_HASH_TYPE_L3); if ((be32_to_cpu(cqe->vlan_my_qpn) & - MLX4_CQE_VLAN_PRESENT_MASK) && + MLX4_CQE_CVLAN_PRESENT_MASK) && (dev->features & NETIF_F_HW_VLAN_CTAG_RX)) __vlan_hwaccel_put_tag(skb, htons(ETH_P_8021Q), be16_to_cpu(cqe->sl_vid)); diff --git a/drivers/net/ethernet/mellanox/mlx4/en_tx.c b/drivers/net/ethernet/mellanox/mlx4/en_tx.c index c10d98f6ad96..7c858f67ef28 100644 --- a/drivers/net/ethernet/mellanox/mlx4/en_tx.c +++ b/drivers/net/ethernet/mellanox/mlx4/en_tx.c @@ -958,7 +958,7 @@ netdev_tx_t mlx4_en_xmit(struct sk_buff *skb, struct net_device *dev) ring->bf.offset ^= ring->bf.buf_size; } else { tx_desc->ctrl.vlan_tag = cpu_to_be16(vlan_tag); - tx_desc->ctrl.ins_vlan = MLX4_WQE_CTRL_INS_VLAN * + tx_desc->ctrl.ins_vlan = MLX4_WQE_CTRL_INS_CVLAN * !!skb_vlan_tag_present(skb); tx_desc->ctrl.fence_size = real_size; diff --git a/include/linux/mlx4/cq.h b/include/linux/mlx4/cq.h index e7ecc12a1163..899a97b20d27 100644 --- a/include/linux/mlx4/cq.h +++ b/include/linux/mlx4/cq.h @@ -88,7 +88,7 @@ struct mlx4_ts_cqe { enum { MLX4_CQE_L2_TUNNEL_IPOK = 1 << 31, - MLX4_CQE_VLAN_PRESENT_MASK = 1 << 29, + MLX4_CQE_CVLAN_PRESENT_MASK = 1 << 29, MLX4_CQE_L2_TUNNEL = 1 << 27, MLX4_CQE_L2_TUNNEL_CSUM = 1 << 26, MLX4_CQE_L2_TUNNEL_IPV4 = 1 << 25, diff --git a/include/linux/mlx4/qp.h b/include/linux/mlx4/qp.h index 6fed539e5456..6c619006c21f 100644 --- a/include/linux/mlx4/qp.h +++ b/include/linux/mlx4/qp.h @@ -272,7 +272,7 @@ enum { MLX4_WQE_CTRL_SOLICITED = 1 << 1, MLX4_WQE_CTRL_IP_CSUM = 1 << 4, MLX4_WQE_CTRL_TCP_UDP_CSUM = 1 << 5, - MLX4_WQE_CTRL_INS_VLAN = 1 << 6, + MLX4_WQE_CTRL_INS_CVLAN = 1 << 6, MLX4_WQE_CTRL_STRONG_ORDER = 1 << 7, MLX4_WQE_CTRL_FORCE_LOOPBACK = 1 << 0, }; -- cgit v1.2.3-70-g09d2 From e38af4faf01d0b35df6995fb395e5fa4a4898289 Mon Sep 17 00:00:00 2001 From: Hadar Hen Zion Date: Mon, 27 Jul 2015 14:46:34 +0300 Subject: net/mlx4_en: Add support for hardware accelerated 802.1ad vlan To enable device support in accelerated 802.1ad vlan, the port capability "packet has vlan enable" (phv_en) should be set. Firmware won't work properly, in case phv_en is not set. The user can enable "phv_en" port capability with the new ethtool private flag phv-bit. The phv-bit private flag default value is OFF, users who are interested in 802.1ad hardware acceleration should turn ON the phv-bit private flag: $ ethtool --set-priv-flags eth1 phv-bit on Once the private flag is set, the device is ready for 802.1ad vlan acceleration. The user should also change the interface device features and turn on "tx-vlan-stag-hw-insert" which is off by default: $ ethtool -K eth1 tx-vlan-stag-hw-insert on "phv-bit" private flag setting is available only for Physical Functions(PF), the Virtual Function (VF) will be able to use the feature by setting "tx-vlan-stag-hw-insert" ethtool device feature only if the feature was enabled by the Hypervisor. Signed-off-by: Hadar Hen Zion Signed-off-by: Amir Vadai Signed-off-by: David S. Miller --- drivers/net/ethernet/mellanox/mlx4/en_ethtool.c | 16 +++++++++ drivers/net/ethernet/mellanox/mlx4/en_netdev.c | 46 +++++++++++++++++++++++++ drivers/net/ethernet/mellanox/mlx4/en_rx.c | 16 ++++++++- drivers/net/ethernet/mellanox/mlx4/en_tx.c | 13 ++++--- drivers/net/ethernet/mellanox/mlx4/mlx4_en.h | 1 + include/linux/mlx4/cq.h | 1 + include/linux/mlx4/qp.h | 1 + 7 files changed, 89 insertions(+), 5 deletions(-) (limited to 'include/linux') diff --git a/drivers/net/ethernet/mellanox/mlx4/en_ethtool.c b/drivers/net/ethernet/mellanox/mlx4/en_ethtool.c index 70f65534e786..f79d8124321e 100644 --- a/drivers/net/ethernet/mellanox/mlx4/en_ethtool.c +++ b/drivers/net/ethernet/mellanox/mlx4/en_ethtool.c @@ -102,6 +102,7 @@ mlx4_en_get_drvinfo(struct net_device *dev, struct ethtool_drvinfo *drvinfo) static const char mlx4_en_priv_flags[][ETH_GSTRING_LEN] = { "blueflame", + "phv-bit" }; static const char main_strings[][ETH_GSTRING_LEN] = { @@ -1797,9 +1798,13 @@ static int mlx4_en_get_ts_info(struct net_device *dev, static int mlx4_en_set_priv_flags(struct net_device *dev, u32 flags) { struct mlx4_en_priv *priv = netdev_priv(dev); + struct mlx4_en_dev *mdev = priv->mdev; bool bf_enabled_new = !!(flags & MLX4_EN_PRIV_FLAGS_BLUEFLAME); bool bf_enabled_old = !!(priv->pflags & MLX4_EN_PRIV_FLAGS_BLUEFLAME); + bool phv_enabled_new = !!(flags & MLX4_EN_PRIV_FLAGS_PHV); + bool phv_enabled_old = !!(priv->pflags & MLX4_EN_PRIV_FLAGS_PHV); int i; + int ret = 0; if (bf_enabled_new != bf_enabled_old) { if (bf_enabled_new) { @@ -1825,6 +1830,17 @@ static int mlx4_en_set_priv_flags(struct net_device *dev, u32 flags) bf_enabled_new ? "Enabled" : "Disabled"); } + if (phv_enabled_new != phv_enabled_old) { + ret = set_phv_bit(mdev->dev, priv->port, (int)phv_enabled_new); + if (ret) + return ret; + else if (phv_enabled_new) + priv->pflags |= MLX4_EN_PRIV_FLAGS_PHV; + else + priv->pflags &= ~MLX4_EN_PRIV_FLAGS_PHV; + en_info(priv, "PHV bit %s\n", + phv_enabled_new ? "Enabled" : "Disabled"); + } return 0; } diff --git a/drivers/net/ethernet/mellanox/mlx4/en_netdev.c b/drivers/net/ethernet/mellanox/mlx4/en_netdev.c index e0de2fd1ce12..4726122ea76b 100644 --- a/drivers/net/ethernet/mellanox/mlx4/en_netdev.c +++ b/drivers/net/ethernet/mellanox/mlx4/en_netdev.c @@ -2184,6 +2184,25 @@ static int mlx4_en_ioctl(struct net_device *dev, struct ifreq *ifr, int cmd) } } +static netdev_features_t mlx4_en_fix_features(struct net_device *netdev, + netdev_features_t features) +{ + struct mlx4_en_priv *en_priv = netdev_priv(netdev); + struct mlx4_en_dev *mdev = en_priv->mdev; + + /* Since there is no support for separate RX C-TAG/S-TAG vlan accel + * enable/disable make sure S-TAG flag is always in same state as + * C-TAG. + */ + if (features & NETIF_F_HW_VLAN_CTAG_RX && + !(mdev->dev->caps.flags2 & MLX4_DEV_CAP_FLAG2_SKIP_OUTER_VLAN)) + features |= NETIF_F_HW_VLAN_STAG_RX; + else + features &= ~NETIF_F_HW_VLAN_STAG_RX; + + return features; +} + static int mlx4_en_set_features(struct net_device *netdev, netdev_features_t features) { @@ -2218,6 +2237,10 @@ static int mlx4_en_set_features(struct net_device *netdev, en_info(priv, "Turn %s TX vlan strip offload\n", (features & NETIF_F_HW_VLAN_CTAG_TX) ? "ON" : "OFF"); + if (DEV_FEATURE_CHANGED(netdev, features, NETIF_F_HW_VLAN_STAG_TX)) + en_info(priv, "Turn %s TX S-VLAN strip offload\n", + (features & NETIF_F_HW_VLAN_STAG_TX) ? "ON" : "OFF"); + if (DEV_FEATURE_CHANGED(netdev, features, NETIF_F_LOOPBACK)) { en_info(priv, "Turn %s loopback\n", (features & NETIF_F_LOOPBACK) ? "ON" : "OFF"); @@ -2460,6 +2483,7 @@ static const struct net_device_ops mlx4_netdev_ops = { .ndo_poll_controller = mlx4_en_netpoll, #endif .ndo_set_features = mlx4_en_set_features, + .ndo_fix_features = mlx4_en_fix_features, .ndo_setup_tc = mlx4_en_setup_tc, #ifdef CONFIG_RFS_ACCEL .ndo_rx_flow_steer = mlx4_en_filter_rfs, @@ -2500,6 +2524,7 @@ static const struct net_device_ops mlx4_netdev_ops_master = { .ndo_poll_controller = mlx4_en_netpoll, #endif .ndo_set_features = mlx4_en_set_features, + .ndo_fix_features = mlx4_en_fix_features, .ndo_setup_tc = mlx4_en_setup_tc, #ifdef CONFIG_RFS_ACCEL .ndo_rx_flow_steer = mlx4_en_filter_rfs, @@ -2931,6 +2956,27 @@ int mlx4_en_init_netdev(struct mlx4_en_dev *mdev, int port, dev->hw_features |= NETIF_F_LOOPBACK | NETIF_F_HW_VLAN_CTAG_TX | NETIF_F_HW_VLAN_CTAG_RX; + if (!(mdev->dev->caps.flags2 & MLX4_DEV_CAP_FLAG2_SKIP_OUTER_VLAN)) { + dev->features |= NETIF_F_HW_VLAN_STAG_RX | + NETIF_F_HW_VLAN_STAG_FILTER; + dev->hw_features |= NETIF_F_HW_VLAN_STAG_RX; + } + + if (mlx4_is_slave(mdev->dev)) { + int phv; + + err = get_phv_bit(mdev->dev, port, &phv); + if (!err && phv) { + dev->hw_features |= NETIF_F_HW_VLAN_STAG_TX; + priv->pflags |= MLX4_EN_PRIV_FLAGS_PHV; + } + } else { + if (mdev->dev->caps.flags2 & MLX4_DEV_CAP_FLAG2_PHV_EN && + !(mdev->dev->caps.flags2 & + MLX4_DEV_CAP_FLAG2_SKIP_OUTER_VLAN)) + dev->hw_features |= NETIF_F_HW_VLAN_STAG_TX; + } + if (mdev->dev->caps.flags & MLX4_DEV_CAP_FLAG_FCS_KEEP) dev->hw_features |= NETIF_F_RXFCS; diff --git a/drivers/net/ethernet/mellanox/mlx4/en_rx.c b/drivers/net/ethernet/mellanox/mlx4/en_rx.c index 10f6c2f1d5a0..a67fbb90d69e 100644 --- a/drivers/net/ethernet/mellanox/mlx4/en_rx.c +++ b/drivers/net/ethernet/mellanox/mlx4/en_rx.c @@ -912,6 +912,12 @@ int mlx4_en_process_rx_cq(struct net_device *dev, struct mlx4_en_cq *cq, int bud u16 vid = be16_to_cpu(cqe->sl_vid); __vlan_hwaccel_put_tag(gro_skb, htons(ETH_P_8021Q), vid); + } else if ((be32_to_cpu(cqe->vlan_my_qpn) & + MLX4_CQE_SVLAN_PRESENT_MASK) && + (dev->features & NETIF_F_HW_VLAN_STAG_RX)) { + __vlan_hwaccel_put_tag(gro_skb, + htons(ETH_P_8021AD), + be16_to_cpu(cqe->sl_vid)); } if (dev->features & NETIF_F_RXHASH) @@ -973,6 +979,11 @@ int mlx4_en_process_rx_cq(struct net_device *dev, struct mlx4_en_cq *cq, int bud MLX4_CQE_CVLAN_PRESENT_MASK) && (dev->features & NETIF_F_HW_VLAN_CTAG_RX)) __vlan_hwaccel_put_tag(skb, htons(ETH_P_8021Q), be16_to_cpu(cqe->sl_vid)); + else if ((be32_to_cpu(cqe->vlan_my_qpn) & + MLX4_CQE_SVLAN_PRESENT_MASK) && + (dev->features & NETIF_F_HW_VLAN_STAG_RX)) + __vlan_hwaccel_put_tag(skb, htons(ETH_P_8021AD), + be16_to_cpu(cqe->sl_vid)); if (ring->hwtstamp_rx_filter == HWTSTAMP_FILTER_ALL) { timestamp = mlx4_en_get_cqe_ts(cqe); @@ -1070,7 +1081,10 @@ static const int frag_sizes[] = { void mlx4_en_calc_rx_buf(struct net_device *dev) { struct mlx4_en_priv *priv = netdev_priv(dev); - int eff_mtu = dev->mtu + ETH_HLEN + VLAN_HLEN; + /* VLAN_HLEN is added twice,to support skb vlan tagged with multiple + * headers. (For example: ETH_P_8021Q and ETH_P_8021AD). + */ + int eff_mtu = dev->mtu + ETH_HLEN + (2 * VLAN_HLEN); int buf_size = 0; int i = 0; diff --git a/drivers/net/ethernet/mellanox/mlx4/en_tx.c b/drivers/net/ethernet/mellanox/mlx4/en_tx.c index 7c858f67ef28..494e7762fdb1 100644 --- a/drivers/net/ethernet/mellanox/mlx4/en_tx.c +++ b/drivers/net/ethernet/mellanox/mlx4/en_tx.c @@ -718,6 +718,7 @@ netdev_tx_t mlx4_en_xmit(struct sk_buff *skb, struct net_device *dev) u32 index, bf_index; __be32 op_own; u16 vlan_tag = 0; + u16 vlan_proto = 0; int i_frag; int lso_header_size; void *fragptr = NULL; @@ -750,9 +751,10 @@ netdev_tx_t mlx4_en_xmit(struct sk_buff *skb, struct net_device *dev) goto tx_drop; } - if (skb_vlan_tag_present(skb)) + if (skb_vlan_tag_present(skb)) { vlan_tag = skb_vlan_tag_get(skb); - + vlan_proto = be16_to_cpu(skb->vlan_proto); + } netdev_txq_bql_enqueue_prefetchw(ring->tx_queue); @@ -958,8 +960,11 @@ netdev_tx_t mlx4_en_xmit(struct sk_buff *skb, struct net_device *dev) ring->bf.offset ^= ring->bf.buf_size; } else { tx_desc->ctrl.vlan_tag = cpu_to_be16(vlan_tag); - tx_desc->ctrl.ins_vlan = MLX4_WQE_CTRL_INS_CVLAN * - !!skb_vlan_tag_present(skb); + if (vlan_proto == ETH_P_8021AD) + tx_desc->ctrl.ins_vlan = MLX4_WQE_CTRL_INS_SVLAN; + else if (vlan_proto == ETH_P_8021Q) + tx_desc->ctrl.ins_vlan = MLX4_WQE_CTRL_INS_CVLAN; + tx_desc->ctrl.fence_size = real_size; /* Ensure new descriptor hits memory diff --git a/drivers/net/ethernet/mellanox/mlx4/mlx4_en.h b/drivers/net/ethernet/mellanox/mlx4/mlx4_en.h index 666d1669eb52..defcf8c395bf 100644 --- a/drivers/net/ethernet/mellanox/mlx4/mlx4_en.h +++ b/drivers/net/ethernet/mellanox/mlx4/mlx4_en.h @@ -95,6 +95,7 @@ */ #define MLX4_EN_PRIV_FLAGS_BLUEFLAME 1 +#define MLX4_EN_PRIV_FLAGS_PHV 2 #define MLX4_EN_WATCHDOG_TIMEOUT (15 * HZ) diff --git a/include/linux/mlx4/cq.h b/include/linux/mlx4/cq.h index 899a97b20d27..09cebe528488 100644 --- a/include/linux/mlx4/cq.h +++ b/include/linux/mlx4/cq.h @@ -89,6 +89,7 @@ struct mlx4_ts_cqe { enum { MLX4_CQE_L2_TUNNEL_IPOK = 1 << 31, MLX4_CQE_CVLAN_PRESENT_MASK = 1 << 29, + MLX4_CQE_SVLAN_PRESENT_MASK = 1 << 30, MLX4_CQE_L2_TUNNEL = 1 << 27, MLX4_CQE_L2_TUNNEL_CSUM = 1 << 26, MLX4_CQE_L2_TUNNEL_IPV4 = 1 << 25, diff --git a/include/linux/mlx4/qp.h b/include/linux/mlx4/qp.h index 6c619006c21f..de45a51b3f04 100644 --- a/include/linux/mlx4/qp.h +++ b/include/linux/mlx4/qp.h @@ -273,6 +273,7 @@ enum { MLX4_WQE_CTRL_IP_CSUM = 1 << 4, MLX4_WQE_CTRL_TCP_UDP_CSUM = 1 << 5, MLX4_WQE_CTRL_INS_CVLAN = 1 << 6, + MLX4_WQE_CTRL_INS_SVLAN = 1 << 7, MLX4_WQE_CTRL_STRONG_ORDER = 1 << 7, MLX4_WQE_CTRL_FORCE_LOOPBACK = 1 << 0, }; -- cgit v1.2.3-70-g09d2 From 0817b62cc037a56c5e4238c7eb7522299ea27aef Mon Sep 17 00:00:00 2001 From: Boris Brezillon Date: Tue, 7 Jul 2015 20:48:08 +0200 Subject: clk: change clk_ops' ->determine_rate() prototype MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit Clock rates are stored in an unsigned long field, but ->determine_rate() (which returns a rounded rate from a requested one) returns a long value (errors are reported using negative error codes), which can lead to long overflow if the clock rate exceed 2Ghz. Change ->determine_rate() prototype to return 0 or an error code, and pass a pointer to a clk_rate_request structure containing the expected target rate and the rate constraints imposed by clk users. The clk_rate_request structure might be extended in the future to contain other kind of constraints like the rounding policy, the maximum clock inaccuracy or other things that are not yet supported by the CCF (power consumption constraints ?). Signed-off-by: Boris Brezillon CC: Jonathan Corbet CC: Tony Lindgren CC: Ralf Baechle CC: "Emilio López" CC: Maxime Ripard Acked-by: Tero Kristo CC: Peter De Schrijver CC: Prashant Gaikwad CC: Stephen Warren CC: Thierry Reding CC: Alexandre Courbot CC: linux-doc@vger.kernel.org CC: linux-kernel@vger.kernel.org CC: linux-arm-kernel@lists.infradead.org CC: linux-omap@vger.kernel.org CC: linux-mips@linux-mips.org CC: linux-tegra@vger.kernel.org [sboyd@codeaurora.org: Fix parent dereference problem in __clk_determine_rate()] Signed-off-by: Stephen Boyd Tested-by: Romain Perier Signed-off-by: Heiko Stuebner [sboyd@codeaurora.org: Folded in fix from Heiko for fixed-rate clocks without parents or a rate determining op] Signed-off-by: Stephen Boyd --- Documentation/clk.txt | 8 +- arch/arm/mach-omap2/dpll3xxx.c | 29 +++--- arch/arm/mach-omap2/dpll44xx.c | 30 +++--- arch/mips/alchemy/common/clock.c | 61 +++++-------- drivers/clk/at91/clk-programmable.c | 25 ++--- drivers/clk/at91/clk-usb.c | 28 +++--- drivers/clk/bcm/clk-kona.c | 34 ++++--- drivers/clk/clk-composite.c | 48 +++++----- drivers/clk/clk.c | 176 ++++++++++++++++++++---------------- drivers/clk/hisilicon/clk-hi3620.c | 39 ++++---- drivers/clk/mmp/clk-mix.c | 20 ++-- drivers/clk/qcom/clk-pll.c | 18 ++-- drivers/clk/qcom/clk-rcg.c | 44 ++++----- drivers/clk/qcom/clk-rcg2.c | 78 ++++++++-------- drivers/clk/sunxi/clk-factors.c | 21 ++--- drivers/clk/sunxi/clk-sun6i-ar100.c | 21 ++--- drivers/clk/sunxi/clk-sunxi.c | 20 ++-- drivers/clk/tegra/clk-emc.c | 28 +++--- include/linux/clk-provider.h | 49 ++++++---- include/linux/clk/ti.h | 16 +--- 20 files changed, 392 insertions(+), 401 deletions(-) (limited to 'include/linux') diff --git a/Documentation/clk.txt b/Documentation/clk.txt index f463bdc37f88..5c4bc4d01d0c 100644 --- a/Documentation/clk.txt +++ b/Documentation/clk.txt @@ -71,12 +71,8 @@ the operations defined in clk.h: long (*round_rate)(struct clk_hw *hw, unsigned long rate, unsigned long *parent_rate); - long (*determine_rate)(struct clk_hw *hw, - unsigned long rate, - unsigned long min_rate, - unsigned long max_rate, - unsigned long *best_parent_rate, - struct clk_hw **best_parent_clk); + int (*determine_rate)(struct clk_hw *hw, + struct clk_rate_request *req); int (*set_parent)(struct clk_hw *hw, u8 index); u8 (*get_parent)(struct clk_hw *hw); int (*set_rate)(struct clk_hw *hw, diff --git a/arch/arm/mach-omap2/dpll3xxx.c b/arch/arm/mach-omap2/dpll3xxx.c index 44e57ec225d4..8c57ace30421 100644 --- a/arch/arm/mach-omap2/dpll3xxx.c +++ b/arch/arm/mach-omap2/dpll3xxx.c @@ -462,43 +462,38 @@ void omap3_noncore_dpll_disable(struct clk_hw *hw) /** * omap3_noncore_dpll_determine_rate - determine rate for a DPLL * @hw: pointer to the clock to determine rate for - * @rate: target rate for the DPLL - * @best_parent_rate: pointer for returning best parent rate - * @best_parent_clk: pointer for returning best parent clock + * @req: target rate request * * Determines which DPLL mode to use for reaching a desired target rate. * Checks whether the DPLL shall be in bypass or locked mode, and if * locked, calculates the M,N values for the DPLL via round-rate. - * Returns a positive clock rate with success, negative error value - * in failure. + * Returns a 0 on success, negative error value in failure. */ -long omap3_noncore_dpll_determine_rate(struct clk_hw *hw, unsigned long rate, - unsigned long min_rate, - unsigned long max_rate, - unsigned long *best_parent_rate, - struct clk_hw **best_parent_clk) +int omap3_noncore_dpll_determine_rate(struct clk_hw *hw, + struct clk_rate_request *req) { struct clk_hw_omap *clk = to_clk_hw_omap(hw); struct dpll_data *dd; - if (!hw || !rate) + if (!req->rate) return -EINVAL; dd = clk->dpll_data; if (!dd) return -EINVAL; - if (__clk_get_rate(dd->clk_bypass) == rate && + if (__clk_get_rate(dd->clk_bypass) == req->rate && (dd->modes & (1 << DPLL_LOW_POWER_BYPASS))) { - *best_parent_clk = __clk_get_hw(dd->clk_bypass); + req->best_parent_hw = __clk_get_hw(dd->clk_bypass); } else { - rate = omap2_dpll_round_rate(hw, rate, best_parent_rate); - *best_parent_clk = __clk_get_hw(dd->clk_ref); + req->rate = omap2_dpll_round_rate(hw, req->rate, + &req->best_parent_rate); + req->best_parent_hw = __clk_get_hw(dd->clk_ref); } - *best_parent_rate = rate; + req->best_parent_rate = req->rate; - return rate; + return 0; } /** diff --git a/arch/arm/mach-omap2/dpll44xx.c b/arch/arm/mach-omap2/dpll44xx.c index f231be05b9a6..446a4e0d5a6a 100644 --- a/arch/arm/mach-omap2/dpll44xx.c +++ b/arch/arm/mach-omap2/dpll44xx.c @@ -191,42 +191,36 @@ out: /** * omap4_dpll_regm4xen_determine_rate - determine rate for a DPLL * @hw: pointer to the clock to determine rate for - * @rate: target rate for the DPLL - * @best_parent_rate: pointer for returning best parent rate - * @best_parent_clk: pointer for returning best parent clock + * @req: target rate request * * Determines which DPLL mode to use for reaching a desired rate. * Checks whether the DPLL shall be in bypass or locked mode, and if * locked, calculates the M,N values for the DPLL via round-rate. - * Returns a positive clock rate with success, negative error value - * in failure. + * Returns 0 on success and a negative error value otherwise. */ -long omap4_dpll_regm4xen_determine_rate(struct clk_hw *hw, unsigned long rate, - unsigned long min_rate, - unsigned long max_rate, - unsigned long *best_parent_rate, - struct clk_hw **best_parent_clk) +int omap4_dpll_regm4xen_determine_rate(struct clk_hw *hw, + struct clk_rate_request *req) { struct clk_hw_omap *clk = to_clk_hw_omap(hw); struct dpll_data *dd; - if (!hw || !rate) + if (!req->rate) return -EINVAL; dd = clk->dpll_data; if (!dd) return -EINVAL; - if (__clk_get_rate(dd->clk_bypass) == rate && + if (__clk_get_rate(dd->clk_bypass) == req->rate && (dd->modes & (1 << DPLL_LOW_POWER_BYPASS))) { - *best_parent_clk = __clk_get_hw(dd->clk_bypass); + req->best_parent_hw = __clk_get_hw(dd->clk_bypass); } else { - rate = omap4_dpll_regm4xen_round_rate(hw, rate, - best_parent_rate); - *best_parent_clk = __clk_get_hw(dd->clk_ref); + req->rate = omap4_dpll_regm4xen_round_rate(hw, req->rate, + &req->best_parent_rate); + req->best_parent_hw = __clk_get_hw(dd->clk_ref); } - *best_parent_rate = rate; + req->best_parent_rate = req->rate; - return rate; + return 0; } diff --git a/arch/mips/alchemy/common/clock.c b/arch/mips/alchemy/common/clock.c index 6e46abe0dac6..0b4cf3e9f005 100644 --- a/arch/mips/alchemy/common/clock.c +++ b/arch/mips/alchemy/common/clock.c @@ -389,10 +389,9 @@ static long alchemy_calc_div(unsigned long rate, unsigned long prate, return div1; } -static long alchemy_clk_fgcs_detr(struct clk_hw *hw, unsigned long rate, - unsigned long *best_parent_rate, - struct clk_hw **best_parent_clk, - int scale, int maxdiv) +static int alchemy_clk_fgcs_detr(struct clk_hw *hw, + struct clk_rate_request *req, + int scale, int maxdiv) { struct clk *pc, *bpc, *free; long tdv, tpr, pr, nr, br, bpr, diff, lastdiff; @@ -422,14 +421,14 @@ static long alchemy_clk_fgcs_detr(struct clk_hw *hw, unsigned long rate, } pr = clk_get_rate(pc); - if (pr < rate) + if (pr < req->rate) continue; /* what can hardware actually provide */ - tdv = alchemy_calc_div(rate, pr, scale, maxdiv, NULL); + tdv = alchemy_calc_div(req->rate, pr, scale, maxdiv, NULL); nr = pr / tdv; - diff = rate - nr; - if (nr > rate) + diff = req->rate - nr; + if (nr > req->rate) continue; if (diff < lastdiff) { @@ -448,15 +447,16 @@ static long alchemy_clk_fgcs_detr(struct clk_hw *hw, unsigned long rate, */ if (lastdiff && free) { for (j = (maxdiv == 4) ? 1 : scale; j <= maxdiv; j += scale) { - tpr = rate * j; + tpr = req->rate * j; if (tpr < 0) break; pr = clk_round_rate(free, tpr); - tdv = alchemy_calc_div(rate, pr, scale, maxdiv, NULL); + tdv = alchemy_calc_div(req->rate, pr, scale, maxdiv, + NULL); nr = pr / tdv; - diff = rate - nr; - if (nr > rate) + diff = req->rate - nr; + if (nr > req->rate) continue; if (diff < lastdiff) { lastdiff = diff; @@ -469,9 +469,10 @@ static long alchemy_clk_fgcs_detr(struct clk_hw *hw, unsigned long rate, } } - *best_parent_rate = bpr; - *best_parent_clk = __clk_get_hw(bpc); - return br; + req->best_parent_rate = bpr; + req->best_parent_hw = __clk_get_hw(bpc); + req->rate = br; + return 0; } static int alchemy_clk_fgv1_en(struct clk_hw *hw) @@ -562,14 +563,10 @@ static unsigned long alchemy_clk_fgv1_recalc(struct clk_hw *hw, return parent_rate / v; } -static long alchemy_clk_fgv1_detr(struct clk_hw *hw, unsigned long rate, - unsigned long min_rate, - unsigned long max_rate, - unsigned long *best_parent_rate, - struct clk_hw **best_parent_clk) +static int alchemy_clk_fgv1_detr(struct clk_hw *hw, + struct clk_rate_request *req) { - return alchemy_clk_fgcs_detr(hw, rate, best_parent_rate, - best_parent_clk, 2, 512); + return alchemy_clk_fgcs_detr(hw, req, 2, 512); } /* Au1000, Au1100, Au15x0, Au12x0 */ @@ -696,11 +693,8 @@ static unsigned long alchemy_clk_fgv2_recalc(struct clk_hw *hw, return t; } -static long alchemy_clk_fgv2_detr(struct clk_hw *hw, unsigned long rate, - unsigned long min_rate, - unsigned long max_rate, - unsigned long *best_parent_rate, - struct clk_hw **best_parent_clk) +static int alchemy_clk_fgv2_detr(struct clk_hw *hw, + struct clk_rate_request *req) { struct alchemy_fgcs_clk *c = to_fgcs_clk(hw); int scale, maxdiv; @@ -713,8 +707,7 @@ static long alchemy_clk_fgv2_detr(struct clk_hw *hw, unsigned long rate, maxdiv = 512; } - return alchemy_clk_fgcs_detr(hw, rate, best_parent_rate, - best_parent_clk, scale, maxdiv); + return alchemy_clk_fgcs_detr(hw, req, scale, maxdiv); } /* Au1300 larger input mux, no separate disable bit, flexible divider */ @@ -917,17 +910,13 @@ static int alchemy_clk_csrc_setr(struct clk_hw *hw, unsigned long rate, return 0; } -static long alchemy_clk_csrc_detr(struct clk_hw *hw, unsigned long rate, - unsigned long min_rate, - unsigned long max_rate, - unsigned long *best_parent_rate, - struct clk_hw **best_parent_clk) +static int alchemy_clk_csrc_detr(struct clk_hw *hw, + struct clk_rate_request *req) { struct alchemy_fgcs_clk *c = to_fgcs_clk(hw); int scale = c->dt[2] == 3 ? 1 : 2; /* au1300 check */ - return alchemy_clk_fgcs_detr(hw, rate, best_parent_rate, - best_parent_clk, scale, 4); + return alchemy_clk_fgcs_detr(hw, req, scale, 4); } static struct clk_ops alchemy_clkops_csrc = { diff --git a/drivers/clk/at91/clk-programmable.c b/drivers/clk/at91/clk-programmable.c index 8c86c0f7847a..43dacad5c96d 100644 --- a/drivers/clk/at91/clk-programmable.c +++ b/drivers/clk/at91/clk-programmable.c @@ -54,12 +54,8 @@ static unsigned long clk_programmable_recalc_rate(struct clk_hw *hw, return parent_rate >> pres; } -static long clk_programmable_determine_rate(struct clk_hw *hw, - unsigned long rate, - unsigned long min_rate, - unsigned long max_rate, - unsigned long *best_parent_rate, - struct clk_hw **best_parent_hw) +static int clk_programmable_determine_rate(struct clk_hw *hw, + struct clk_rate_request *req) { struct clk *parent = NULL; long best_rate = -EINVAL; @@ -76,24 +72,29 @@ static long clk_programmable_determine_rate(struct clk_hw *hw, parent_rate = __clk_get_rate(parent); for (shift = 0; shift < PROG_PRES_MASK; shift++) { tmp_rate = parent_rate >> shift; - if (tmp_rate <= rate) + if (tmp_rate <= req->rate) break; } - if (tmp_rate > rate) + if (tmp_rate > req->rate) continue; - if (best_rate < 0 || (rate - tmp_rate) < (rate - best_rate)) { + if (best_rate < 0 || + (req->rate - tmp_rate) < (req->rate - best_rate)) { best_rate = tmp_rate; - *best_parent_rate = parent_rate; - *best_parent_hw = __clk_get_hw(parent); + req->best_parent_rate = parent_rate; + req->best_parent_hw = __clk_get_hw(parent); } if (!best_rate) break; } - return best_rate; + if (best_rate < 0) + return best_rate; + + req->rate = best_rate; + return 0; } static int clk_programmable_set_parent(struct clk_hw *hw, u8 index) diff --git a/drivers/clk/at91/clk-usb.c b/drivers/clk/at91/clk-usb.c index b0cbd2b1ff59..24747df97742 100644 --- a/drivers/clk/at91/clk-usb.c +++ b/drivers/clk/at91/clk-usb.c @@ -56,12 +56,8 @@ static unsigned long at91sam9x5_clk_usb_recalc_rate(struct clk_hw *hw, return DIV_ROUND_CLOSEST(parent_rate, (usbdiv + 1)); } -static long at91sam9x5_clk_usb_determine_rate(struct clk_hw *hw, - unsigned long rate, - unsigned long min_rate, - unsigned long max_rate, - unsigned long *best_parent_rate, - struct clk_hw **best_parent_hw) +static int at91sam9x5_clk_usb_determine_rate(struct clk_hw *hw, + struct clk_rate_request *req) { struct clk *parent = NULL; long best_rate = -EINVAL; @@ -80,23 +76,23 @@ static long at91sam9x5_clk_usb_determine_rate(struct clk_hw *hw, for (div = 1; div < SAM9X5_USB_MAX_DIV + 2; div++) { unsigned long tmp_parent_rate; - tmp_parent_rate = rate * div; + tmp_parent_rate = req->rate * div; tmp_parent_rate = __clk_round_rate(parent, tmp_parent_rate); tmp_rate = DIV_ROUND_CLOSEST(tmp_parent_rate, div); - if (tmp_rate < rate) - tmp_diff = rate - tmp_rate; + if (tmp_rate < req->rate) + tmp_diff = req->rate - tmp_rate; else - tmp_diff = tmp_rate - rate; + tmp_diff = tmp_rate - req->rate; if (best_diff < 0 || best_diff > tmp_diff) { best_rate = tmp_rate; best_diff = tmp_diff; - *best_parent_rate = tmp_parent_rate; - *best_parent_hw = __clk_get_hw(parent); + req->best_parent_rate = tmp_parent_rate; + req->best_parent_hw = __clk_get_hw(parent); } - if (!best_diff || tmp_rate < rate) + if (!best_diff || tmp_rate < req->rate) break; } @@ -104,7 +100,11 @@ static long at91sam9x5_clk_usb_determine_rate(struct clk_hw *hw, break; } - return best_rate; + if (best_rate < 0) + return best_rate; + + req->rate = best_rate; + return 0; } static int at91sam9x5_clk_usb_set_parent(struct clk_hw *hw, u8 index) diff --git a/drivers/clk/bcm/clk-kona.c b/drivers/clk/bcm/clk-kona.c index 79a98506c433..d9c039c1902c 100644 --- a/drivers/clk/bcm/clk-kona.c +++ b/drivers/clk/bcm/clk-kona.c @@ -1017,10 +1017,8 @@ static long kona_peri_clk_round_rate(struct clk_hw *hw, unsigned long rate, rate ? rate : 1, *parent_rate, NULL); } -static long kona_peri_clk_determine_rate(struct clk_hw *hw, unsigned long rate, - unsigned long min_rate, - unsigned long max_rate, - unsigned long *best_parent_rate, struct clk_hw **best_parent) +static int kona_peri_clk_determine_rate(struct clk_hw *hw, + struct clk_rate_request *req) { struct kona_clk *bcm_clk = to_kona_clk(hw); struct clk *clk = hw->clk; @@ -1029,6 +1027,7 @@ static long kona_peri_clk_determine_rate(struct clk_hw *hw, unsigned long rate, unsigned long best_delta; unsigned long best_rate; u32 parent_count; + long rate; u32 which; /* @@ -1037,14 +1036,21 @@ static long kona_peri_clk_determine_rate(struct clk_hw *hw, unsigned long rate, */ WARN_ON_ONCE(bcm_clk->init_data.flags & CLK_SET_RATE_NO_REPARENT); parent_count = (u32)bcm_clk->init_data.num_parents; - if (parent_count < 2) - return kona_peri_clk_round_rate(hw, rate, best_parent_rate); + if (parent_count < 2) { + rate = kona_peri_clk_round_rate(hw, req->rate, + &req->best_parent_rate); + if (rate < 0) + return rate; + + req->rate = rate; + return 0; + } /* Unless we can do better, stick with current parent */ current_parent = clk_get_parent(clk); parent_rate = __clk_get_rate(current_parent); - best_rate = kona_peri_clk_round_rate(hw, rate, &parent_rate); - best_delta = abs(best_rate - rate); + best_rate = kona_peri_clk_round_rate(hw, req->rate, &parent_rate); + best_delta = abs(best_rate - req->rate); /* Check whether any other parent clock can produce a better result */ for (which = 0; which < parent_count; which++) { @@ -1058,17 +1064,19 @@ static long kona_peri_clk_determine_rate(struct clk_hw *hw, unsigned long rate, /* We don't support CLK_SET_RATE_PARENT */ parent_rate = __clk_get_rate(parent); - other_rate = kona_peri_clk_round_rate(hw, rate, &parent_rate); - delta = abs(other_rate - rate); + other_rate = kona_peri_clk_round_rate(hw, req->rate, + &parent_rate); + delta = abs(other_rate - req->rate); if (delta < best_delta) { best_delta = delta; best_rate = other_rate; - *best_parent = __clk_get_hw(parent); - *best_parent_rate = parent_rate; + req->best_parent_hw = __clk_get_hw(parent); + req->best_parent_rate = parent_rate; } } - return best_rate; + req->rate = best_rate; + return 0; } static int kona_peri_clk_set_parent(struct clk_hw *hw, u8 index) diff --git a/drivers/clk/clk-composite.c b/drivers/clk/clk-composite.c index 616f5aef3c26..9e69f346ecc6 100644 --- a/drivers/clk/clk-composite.c +++ b/drivers/clk/clk-composite.c @@ -55,11 +55,8 @@ static unsigned long clk_composite_recalc_rate(struct clk_hw *hw, return rate_ops->recalc_rate(rate_hw, parent_rate); } -static long clk_composite_determine_rate(struct clk_hw *hw, unsigned long rate, - unsigned long min_rate, - unsigned long max_rate, - unsigned long *best_parent_rate, - struct clk_hw **best_parent_p) +static int clk_composite_determine_rate(struct clk_hw *hw, + struct clk_rate_request *req) { struct clk_composite *composite = to_clk_composite(hw); const struct clk_ops *rate_ops = composite->rate_ops; @@ -71,25 +68,28 @@ static long clk_composite_determine_rate(struct clk_hw *hw, unsigned long rate, long tmp_rate, best_rate = 0; unsigned long rate_diff; unsigned long best_rate_diff = ULONG_MAX; + long rate; int i; if (rate_hw && rate_ops && rate_ops->determine_rate) { __clk_hw_set_clk(rate_hw, hw); - return rate_ops->determine_rate(rate_hw, rate, min_rate, - max_rate, - best_parent_rate, - best_parent_p); + return rate_ops->determine_rate(rate_hw, req); } else if (rate_hw && rate_ops && rate_ops->round_rate && mux_hw && mux_ops && mux_ops->set_parent) { - *best_parent_p = NULL; + req->best_parent_hw = NULL; if (__clk_get_flags(hw->clk) & CLK_SET_RATE_NO_REPARENT) { parent = clk_get_parent(mux_hw->clk); - *best_parent_p = __clk_get_hw(parent); - *best_parent_rate = __clk_get_rate(parent); + req->best_parent_hw = __clk_get_hw(parent); + req->best_parent_rate = __clk_get_rate(parent); - return rate_ops->round_rate(rate_hw, rate, - best_parent_rate); + rate = rate_ops->round_rate(rate_hw, req->rate, + &req->best_parent_rate); + if (rate < 0) + return rate; + + req->rate = rate; + return 0; } for (i = 0; i < __clk_get_num_parents(mux_hw->clk); i++) { @@ -99,33 +99,33 @@ static long clk_composite_determine_rate(struct clk_hw *hw, unsigned long rate, parent_rate = __clk_get_rate(parent); - tmp_rate = rate_ops->round_rate(rate_hw, rate, + tmp_rate = rate_ops->round_rate(rate_hw, req->rate, &parent_rate); if (tmp_rate < 0) continue; - rate_diff = abs(rate - tmp_rate); + rate_diff = abs(req->rate - tmp_rate); - if (!rate_diff || !*best_parent_p + if (!rate_diff || !req->best_parent_hw || best_rate_diff > rate_diff) { - *best_parent_p = __clk_get_hw(parent); - *best_parent_rate = parent_rate; + req->best_parent_hw = __clk_get_hw(parent); + req->best_parent_rate = parent_rate; best_rate_diff = rate_diff; best_rate = tmp_rate; } if (!rate_diff) - return rate; + return 0; } - return best_rate; + req->rate = best_rate; + return 0; } else if (mux_hw && mux_ops && mux_ops->determine_rate) { __clk_hw_set_clk(mux_hw, hw); - return mux_ops->determine_rate(mux_hw, rate, min_rate, - max_rate, best_parent_rate, - best_parent_p); + return mux_ops->determine_rate(mux_hw, req); } else { pr_err("clk: clk_composite_determine_rate function called, but no mux or rate callback set!\n"); + req->rate = 0; return 0; } } diff --git a/drivers/clk/clk.c b/drivers/clk/clk.c index ddb4b541016f..4e9ff928ef88 100644 --- a/drivers/clk/clk.c +++ b/drivers/clk/clk.c @@ -436,28 +436,31 @@ static bool mux_is_better_rate(unsigned long rate, unsigned long now, return now <= rate && now > best; } -static long -clk_mux_determine_rate_flags(struct clk_hw *hw, unsigned long rate, - unsigned long min_rate, - unsigned long max_rate, - unsigned long *best_parent_rate, - struct clk_hw **best_parent_p, +static int +clk_mux_determine_rate_flags(struct clk_hw *hw, struct clk_rate_request *req, unsigned long flags) { struct clk_core *core = hw->core, *parent, *best_parent = NULL; - int i, num_parents; - unsigned long parent_rate, best = 0; + int i, num_parents, ret; + unsigned long best = 0; + struct clk_rate_request parent_req = *req; /* if NO_REPARENT flag set, pass through to current parent */ if (core->flags & CLK_SET_RATE_NO_REPARENT) { parent = core->parent; - if (core->flags & CLK_SET_RATE_PARENT) - best = __clk_determine_rate(parent ? parent->hw : NULL, - rate, min_rate, max_rate); - else if (parent) + if (core->flags & CLK_SET_RATE_PARENT) { + ret = __clk_determine_rate(parent ? parent->hw : NULL, + &parent_req); + if (ret) + return ret; + + best = parent_req.rate; + } else if (parent) { best = clk_core_get_rate_nolock(parent); - else + } else { best = clk_core_get_rate_nolock(core); + } + goto out; } @@ -467,24 +470,30 @@ clk_mux_determine_rate_flags(struct clk_hw *hw, unsigned long rate, parent = clk_core_get_parent_by_index(core, i); if (!parent) continue; - if (core->flags & CLK_SET_RATE_PARENT) - parent_rate = __clk_determine_rate(parent->hw, rate, - min_rate, - max_rate); - else - parent_rate = clk_core_get_rate_nolock(parent); - if (mux_is_better_rate(rate, parent_rate, best, flags)) { + + if (core->flags & CLK_SET_RATE_PARENT) { + parent_req = *req; + ret = __clk_determine_rate(parent->hw, &parent_req); + if (ret) + continue; + } else { + parent_req.rate = clk_core_get_rate_nolock(parent); + } + + if (mux_is_better_rate(req->rate, parent_req.rate, + best, flags)) { best_parent = parent; - best = parent_rate; + best = parent_req.rate; } } out: if (best_parent) - *best_parent_p = best_parent->hw; - *best_parent_rate = best; + req->best_parent_hw = best_parent->hw; + req->best_parent_rate = best; + req->rate = best; - return best; + return 0; } struct clk *__clk_lookup(const char *name) @@ -515,28 +524,17 @@ static void clk_core_get_boundaries(struct clk_core *core, * directly as a determine_rate callback (e.g. for a mux), or from a more * complex clock that may combine a mux with other operations. */ -long __clk_mux_determine_rate(struct clk_hw *hw, unsigned long rate, - unsigned long min_rate, - unsigned long max_rate, - unsigned long *best_parent_rate, - struct clk_hw **best_parent_p) +int __clk_mux_determine_rate(struct clk_hw *hw, + struct clk_rate_request *req) { - return clk_mux_determine_rate_flags(hw, rate, min_rate, max_rate, - best_parent_rate, - best_parent_p, 0); + return clk_mux_determine_rate_flags(hw, req, 0); } EXPORT_SYMBOL_GPL(__clk_mux_determine_rate); -long __clk_mux_determine_rate_closest(struct clk_hw *hw, unsigned long rate, - unsigned long min_rate, - unsigned long max_rate, - unsigned long *best_parent_rate, - struct clk_hw **best_parent_p) +int __clk_mux_determine_rate_closest(struct clk_hw *hw, + struct clk_rate_request *req) { - return clk_mux_determine_rate_flags(hw, rate, min_rate, max_rate, - best_parent_rate, - best_parent_p, - CLK_MUX_ROUND_CLOSEST); + return clk_mux_determine_rate_flags(hw, req, CLK_MUX_ROUND_CLOSEST); } EXPORT_SYMBOL_GPL(__clk_mux_determine_rate_closest); @@ -759,14 +757,11 @@ int clk_enable(struct clk *clk) } EXPORT_SYMBOL_GPL(clk_enable); -static unsigned long clk_core_round_rate_nolock(struct clk_core *core, - unsigned long rate, - unsigned long min_rate, - unsigned long max_rate) +static int clk_core_round_rate_nolock(struct clk_core *core, + struct clk_rate_request *req) { - unsigned long parent_rate = 0; struct clk_core *parent; - struct clk_hw *parent_hw; + long rate; lockdep_assert_held(&prepare_lock); @@ -774,21 +769,30 @@ static unsigned long clk_core_round_rate_nolock(struct clk_core *core, return 0; parent = core->parent; - if (parent) - parent_rate = parent->rate; + if (parent) { + req->best_parent_hw = parent->hw; + req->best_parent_rate = parent->rate; + } else { + req->best_parent_hw = NULL; + req->best_parent_rate = 0; + } if (core->ops->determine_rate) { - parent_hw = parent ? parent->hw : NULL; - return core->ops->determine_rate(core->hw, rate, - min_rate, max_rate, - &parent_rate, &parent_hw); - } else if (core->ops->round_rate) - return core->ops->round_rate(core->hw, rate, &parent_rate); - else if (core->flags & CLK_SET_RATE_PARENT) - return clk_core_round_rate_nolock(core->parent, rate, min_rate, - max_rate); - else - return core->rate; + return core->ops->determine_rate(core->hw, req); + } else if (core->ops->round_rate) { + rate = core->ops->round_rate(core->hw, req->rate, + &req->best_parent_rate); + if (rate < 0) + return rate; + + req->rate = rate; + } else if (core->flags & CLK_SET_RATE_PARENT) { + return clk_core_round_rate_nolock(parent, req); + } else { + req->rate = core->rate; + } + + return 0; } /** @@ -800,15 +804,14 @@ static unsigned long clk_core_round_rate_nolock(struct clk_core *core, * * Useful for clk_ops such as .set_rate and .determine_rate. */ -unsigned long __clk_determine_rate(struct clk_hw *hw, - unsigned long rate, - unsigned long min_rate, - unsigned long max_rate) +int __clk_determine_rate(struct clk_hw *hw, struct clk_rate_request *req) { - if (!hw) + if (!hw) { + req->rate = 0; return 0; + } - return clk_core_round_rate_nolock(hw->core, rate, min_rate, max_rate); + return clk_core_round_rate_nolock(hw->core, req); } EXPORT_SYMBOL_GPL(__clk_determine_rate); @@ -821,15 +824,20 @@ EXPORT_SYMBOL_GPL(__clk_determine_rate); */ unsigned long __clk_round_rate(struct clk *clk, unsigned long rate) { - unsigned long min_rate; - unsigned long max_rate; + struct clk_rate_request req; + int ret; if (!clk) return 0; - clk_core_get_boundaries(clk->core, &min_rate, &max_rate); + clk_core_get_boundaries(clk->core, &req.min_rate, &req.max_rate); + req.rate = rate; + + ret = clk_core_round_rate_nolock(clk->core, &req); + if (ret) + return 0; - return clk_core_round_rate_nolock(clk->core, rate, min_rate, max_rate); + return req.rate; } EXPORT_SYMBOL_GPL(__clk_round_rate); @@ -1249,7 +1257,6 @@ static struct clk_core *clk_calc_new_rates(struct clk_core *core, { struct clk_core *top = core; struct clk_core *old_parent, *parent; - struct clk_hw *parent_hw; unsigned long best_parent_rate = 0; unsigned long new_rate; unsigned long min_rate; @@ -1270,20 +1277,29 @@ static struct clk_core *clk_calc_new_rates(struct clk_core *core, /* find the closest rate and parent clk/rate */ if (core->ops->determine_rate) { - parent_hw = parent ? parent->hw : NULL; - ret = core->ops->determine_rate(core->hw, rate, - min_rate, - max_rate, - &best_parent_rate, - &parent_hw); + struct clk_rate_request req; + + req.rate = rate; + req.min_rate = min_rate; + req.max_rate = max_rate; + if (parent) { + req.best_parent_hw = parent->hw; + req.best_parent_rate = parent->rate; + } else { + req.best_parent_hw = NULL; + req.best_parent_rate = 0; + } + + ret = core->ops->determine_rate(core->hw, &req); if (ret < 0) return NULL; - new_rate = ret; - parent = parent_hw ? parent_hw->core : NULL; + best_parent_rate = req.best_parent_rate; + new_rate = req.rate; + parent = req.best_parent_hw ? req.best_parent_hw->core : NULL; } else if (core->ops->round_rate) { ret = core->ops->round_rate(core->hw, rate, - &best_parent_rate); + &best_parent_rate); if (ret < 0) return NULL; diff --git a/drivers/clk/hisilicon/clk-hi3620.c b/drivers/clk/hisilicon/clk-hi3620.c index 715d34a5ef9b..a0674ba6659e 100644 --- a/drivers/clk/hisilicon/clk-hi3620.c +++ b/drivers/clk/hisilicon/clk-hi3620.c @@ -294,34 +294,29 @@ static unsigned long mmc_clk_recalc_rate(struct clk_hw *hw, } } -static long mmc_clk_determine_rate(struct clk_hw *hw, unsigned long rate, - unsigned long min_rate, - unsigned long max_rate, - unsigned long *best_parent_rate, - struct clk_hw **best_parent_p) +static int mmc_clk_determine_rate(struct clk_hw *hw, + struct clk_rate_request *req) { struct clk_mmc *mclk = to_mmc(hw); - unsigned long best = 0; - if ((rate <= 13000000) && (mclk->id == HI3620_MMC_CIUCLK1)) { - rate = 13000000; - best = 26000000; - } else if (rate <= 26000000) { - rate = 25000000; - best = 180000000; - } else if (rate <= 52000000) { - rate = 50000000; - best = 360000000; - } else if (rate <= 100000000) { - rate = 100000000; - best = 720000000; + if ((req->rate <= 13000000) && (mclk->id == HI3620_MMC_CIUCLK1)) { + req->rate = 13000000; + req->best_parent_rate = 26000000; + } else if (req->rate <= 26000000) { + req->rate = 25000000; + req->best_parent_rate = 180000000; + } else if (req->rate <= 52000000) { + req->rate = 50000000; + req->best_parent_rate = 360000000; + } else if (req->rate <= 100000000) { + req->rate = 100000000; + req->best_parent_rate = 720000000; } else { /* max is 180M */ - rate = 180000000; - best = 1440000000; + req->rate = 180000000; + req->best_parent_rate = 1440000000; } - *best_parent_rate = best; - return rate; + return 0; } static u32 mmc_clk_delay(u32 val, u32 para, u32 off, u32 len) diff --git a/drivers/clk/mmp/clk-mix.c b/drivers/clk/mmp/clk-mix.c index de6a873175d2..7a37432761f9 100644 --- a/drivers/clk/mmp/clk-mix.c +++ b/drivers/clk/mmp/clk-mix.c @@ -201,11 +201,8 @@ error: return ret; } -static long mmp_clk_mix_determine_rate(struct clk_hw *hw, unsigned long rate, - unsigned long min_rate, - unsigned long max_rate, - unsigned long *best_parent_rate, - struct clk_hw **best_parent_clk) +static int mmp_clk_mix_determine_rate(struct clk_hw *hw, + struct clk_rate_request *req) { struct mmp_clk_mix *mix = to_clk_mix(hw); struct mmp_clk_mix_clk_table *item; @@ -221,7 +218,7 @@ static long mmp_clk_mix_determine_rate(struct clk_hw *hw, unsigned long rate, parent = NULL; mix_rate_best = 0; parent_rate_best = 0; - gap_best = rate; + gap_best = req->rate; parent_best = NULL; if (mix->table) { @@ -233,7 +230,7 @@ static long mmp_clk_mix_determine_rate(struct clk_hw *hw, unsigned long rate, item->parent_index); parent_rate = __clk_get_rate(parent); mix_rate = parent_rate / item->divisor; - gap = abs(mix_rate - rate); + gap = abs(mix_rate - req->rate); if (parent_best == NULL || gap < gap_best) { parent_best = parent; parent_rate_best = parent_rate; @@ -251,7 +248,7 @@ static long mmp_clk_mix_determine_rate(struct clk_hw *hw, unsigned long rate, for (j = 0; j < div_val_max; j++) { div = _get_div(mix, j); mix_rate = parent_rate / div; - gap = abs(mix_rate - rate); + gap = abs(mix_rate - req->rate); if (parent_best == NULL || gap < gap_best) { parent_best = parent; parent_rate_best = parent_rate; @@ -265,10 +262,11 @@ static long mmp_clk_mix_determine_rate(struct clk_hw *hw, unsigned long rate, } found: - *best_parent_rate = parent_rate_best; - *best_parent_clk = __clk_get_hw(parent_best); + req->best_parent_rate = parent_rate_best; + req->best_parent_hw = __clk_get_hw(parent_best); + req->rate = mix_rate_best; - return mix_rate_best; + return 0; } static int mmp_clk_mix_set_rate_and_parent(struct clk_hw *hw, diff --git a/drivers/clk/qcom/clk-pll.c b/drivers/clk/qcom/clk-pll.c index 245d5063a385..6017a76b47c8 100644 --- a/drivers/clk/qcom/clk-pll.c +++ b/drivers/clk/qcom/clk-pll.c @@ -135,19 +135,23 @@ struct pll_freq_tbl *find_freq(const struct pll_freq_tbl *f, unsigned long rate) return NULL; } -static long -clk_pll_determine_rate(struct clk_hw *hw, unsigned long rate, - unsigned long min_rate, unsigned long max_rate, - unsigned long *p_rate, struct clk_hw **p) +static int +clk_pll_determine_rate(struct clk_hw *hw, struct clk_rate_request *req) { + struct clk *parent = __clk_get_parent(hw->clk); struct clk_pll *pll = to_clk_pll(hw); const struct pll_freq_tbl *f; - f = find_freq(pll->freq_tbl, rate); + req->best_parent_hw = __clk_get_hw(parent); + req->best_parent_rate = __clk_get_rate(parent); + + f = find_freq(pll->freq_tbl, req->rate); if (!f) - return clk_pll_recalc_rate(hw, *p_rate); + req->rate = clk_pll_recalc_rate(hw, req->best_parent_rate); + else + req->rate = f->freq; - return f->freq; + return 0; } static int diff --git a/drivers/clk/qcom/clk-rcg.c b/drivers/clk/qcom/clk-rcg.c index 7b3d62674203..2bc42bb21b3d 100644 --- a/drivers/clk/qcom/clk-rcg.c +++ b/drivers/clk/qcom/clk-rcg.c @@ -404,13 +404,11 @@ clk_dyn_rcg_recalc_rate(struct clk_hw *hw, unsigned long parent_rate) return calc_rate(parent_rate, m, n, mode, pre_div); } -static long _freq_tbl_determine_rate(struct clk_hw *hw, - const struct freq_tbl *f, unsigned long rate, - unsigned long min_rate, unsigned long max_rate, - unsigned long *p_rate, struct clk_hw **p_hw, +static int _freq_tbl_determine_rate(struct clk_hw *hw, const struct freq_tbl *f, + struct clk_rate_request *req, const struct parent_map *parent_map) { - unsigned long clk_flags; + unsigned long clk_flags, rate = req->rate; struct clk *p; int index; @@ -435,25 +433,24 @@ static long _freq_tbl_determine_rate(struct clk_hw *hw, } else { rate = __clk_get_rate(p); } - *p_hw = __clk_get_hw(p); - *p_rate = rate; + req->best_parent_hw = __clk_get_hw(p); + req->best_parent_rate = rate; + req->rate = f->freq; - return f->freq; + return 0; } -static long clk_rcg_determine_rate(struct clk_hw *hw, unsigned long rate, - unsigned long min_rate, unsigned long max_rate, - unsigned long *p_rate, struct clk_hw **p) +static int clk_rcg_determine_rate(struct clk_hw *hw, + struct clk_rate_request *req) { struct clk_rcg *rcg = to_clk_rcg(hw); - return _freq_tbl_determine_rate(hw, rcg->freq_tbl, rate, min_rate, - max_rate, p_rate, p, rcg->s.parent_map); + return _freq_tbl_determine_rate(hw, rcg->freq_tbl, req, + rcg->s.parent_map); } -static long clk_dyn_rcg_determine_rate(struct clk_hw *hw, unsigned long rate, - unsigned long min_rate, unsigned long max_rate, - unsigned long *p_rate, struct clk_hw **p) +static int clk_dyn_rcg_determine_rate(struct clk_hw *hw, + struct clk_rate_request *req) { struct clk_dyn_rcg *rcg = to_clk_dyn_rcg(hw); u32 reg; @@ -464,13 +461,11 @@ static long clk_dyn_rcg_determine_rate(struct clk_hw *hw, unsigned long rate, bank = reg_to_bank(rcg, reg); s = &rcg->s[bank]; - return _freq_tbl_determine_rate(hw, rcg->freq_tbl, rate, min_rate, - max_rate, p_rate, p, s->parent_map); + return _freq_tbl_determine_rate(hw, rcg->freq_tbl, req, s->parent_map); } -static long clk_rcg_bypass_determine_rate(struct clk_hw *hw, unsigned long rate, - unsigned long min_rate, unsigned long max_rate, - unsigned long *p_rate, struct clk_hw **p_hw) +static int clk_rcg_bypass_determine_rate(struct clk_hw *hw, + struct clk_rate_request *req) { struct clk_rcg *rcg = to_clk_rcg(hw); const struct freq_tbl *f = rcg->freq_tbl; @@ -478,10 +473,11 @@ static long clk_rcg_bypass_determine_rate(struct clk_hw *hw, unsigned long rate, int index = qcom_find_src_index(hw, rcg->s.parent_map, f->src); p = clk_get_parent_by_index(hw->clk, index); - *p_hw = __clk_get_hw(p); - *p_rate = __clk_round_rate(p, rate); + req->best_parent_hw = __clk_get_hw(p); + req->best_parent_rate = __clk_round_rate(p, req->rate); + req->rate = req->best_parent_rate; - return *p_rate; + return 0; } static int __clk_rcg_set_rate(struct clk_rcg *rcg, const struct freq_tbl *f) diff --git a/drivers/clk/qcom/clk-rcg2.c b/drivers/clk/qcom/clk-rcg2.c index b95d17fbb8d7..aa6c3bdac040 100644 --- a/drivers/clk/qcom/clk-rcg2.c +++ b/drivers/clk/qcom/clk-rcg2.c @@ -176,11 +176,10 @@ clk_rcg2_recalc_rate(struct clk_hw *hw, unsigned long parent_rate) return calc_rate(parent_rate, m, n, mode, hid_div); } -static long _freq_tbl_determine_rate(struct clk_hw *hw, - const struct freq_tbl *f, unsigned long rate, - unsigned long *p_rate, struct clk_hw **p_hw) +static int _freq_tbl_determine_rate(struct clk_hw *hw, + const struct freq_tbl *f, struct clk_rate_request *req) { - unsigned long clk_flags; + unsigned long clk_flags, rate = req->rate; struct clk *p; struct clk_rcg2 *rcg = to_clk_rcg2(hw); int index; @@ -210,19 +209,19 @@ static long _freq_tbl_determine_rate(struct clk_hw *hw, } else { rate = __clk_get_rate(p); } - *p_hw = __clk_get_hw(p); - *p_rate = rate; + req->best_parent_hw = __clk_get_hw(p); + req->best_parent_rate = rate; + req->rate = f->freq; - return f->freq; + return 0; } -static long clk_rcg2_determine_rate(struct clk_hw *hw, unsigned long rate, - unsigned long min_rate, unsigned long max_rate, - unsigned long *p_rate, struct clk_hw **p) +static int clk_rcg2_determine_rate(struct clk_hw *hw, + struct clk_rate_request *req) { struct clk_rcg2 *rcg = to_clk_rcg2(hw); - return _freq_tbl_determine_rate(hw, rcg->freq_tbl, rate, p_rate, p); + return _freq_tbl_determine_rate(hw, rcg->freq_tbl, req); } static int clk_rcg2_configure(struct clk_rcg2 *rcg, const struct freq_tbl *f) @@ -374,35 +373,34 @@ static int clk_edp_pixel_set_rate_and_parent(struct clk_hw *hw, return clk_edp_pixel_set_rate(hw, rate, parent_rate); } -static long clk_edp_pixel_determine_rate(struct clk_hw *hw, unsigned long rate, - unsigned long min_rate, - unsigned long max_rate, - unsigned long *p_rate, struct clk_hw **p) +static int clk_edp_pixel_determine_rate(struct clk_hw *hw, + struct clk_rate_request *req) { struct clk_rcg2 *rcg = to_clk_rcg2(hw); const struct freq_tbl *f = rcg->freq_tbl; const struct frac_entry *frac; int delta = 100000; - s64 src_rate = *p_rate; s64 request; u32 mask = BIT(rcg->hid_width) - 1; u32 hid_div; int index = qcom_find_src_index(hw, rcg->parent_map, f->src); + struct clk *p = clk_get_parent_by_index(hw->clk, index); /* Force the correct parent */ - *p = __clk_get_hw(clk_get_parent_by_index(hw->clk, index)); + req->best_parent_hw = __clk_get_hw(p); + req->best_parent_rate = __clk_get_rate(p); - if (src_rate == 810000000) + if (req->best_parent_rate == 810000000) frac = frac_table_810m; else frac = frac_table_675m; for (; frac->num; frac++) { - request = rate; + request = req->rate; request *= frac->den; request = div_s64(request, frac->num); - if ((src_rate < (request - delta)) || - (src_rate > (request + delta))) + if ((req->best_parent_rate < (request - delta)) || + (req->best_parent_rate > (request + delta))) continue; regmap_read(rcg->clkr.regmap, rcg->cmd_rcgr + CFG_REG, @@ -410,8 +408,10 @@ static long clk_edp_pixel_determine_rate(struct clk_hw *hw, unsigned long rate, hid_div >>= CFG_SRC_DIV_SHIFT; hid_div &= mask; - return calc_rate(src_rate, frac->num, frac->den, !!frac->den, - hid_div); + req->rate = calc_rate(req->best_parent_rate, + frac->num, frac->den, + !!frac->den, hid_div); + return 0; } return -EINVAL; @@ -428,9 +428,8 @@ const struct clk_ops clk_edp_pixel_ops = { }; EXPORT_SYMBOL_GPL(clk_edp_pixel_ops); -static long clk_byte_determine_rate(struct clk_hw *hw, unsigned long rate, - unsigned long min_rate, unsigned long max_rate, - unsigned long *p_rate, struct clk_hw **p_hw) +static int clk_byte_determine_rate(struct clk_hw *hw, + struct clk_rate_request *req) { struct clk_rcg2 *rcg = to_clk_rcg2(hw); const struct freq_tbl *f = rcg->freq_tbl; @@ -439,17 +438,19 @@ static long clk_byte_determine_rate(struct clk_hw *hw, unsigned long rate, u32 mask = BIT(rcg->hid_width) - 1; struct clk *p; - if (rate == 0) + if (req->rate == 0) return -EINVAL; p = clk_get_parent_by_index(hw->clk, index); - *p_hw = __clk_get_hw(p); - *p_rate = parent_rate = __clk_round_rate(p, rate); + req->best_parent_hw = __clk_get_hw(p); + req->best_parent_rate = parent_rate = __clk_round_rate(p, req->rate); - div = DIV_ROUND_UP((2 * parent_rate), rate) - 1; + div = DIV_ROUND_UP((2 * parent_rate), req->rate) - 1; div = min_t(u32, div, mask); - return calc_rate(parent_rate, 0, 0, 0, div); + req->rate = calc_rate(parent_rate, 0, 0, 0, div); + + return 0; } static int clk_byte_set_rate(struct clk_hw *hw, unsigned long rate, @@ -494,10 +495,8 @@ static const struct frac_entry frac_table_pixel[] = { { } }; -static long clk_pixel_determine_rate(struct clk_hw *hw, unsigned long rate, - unsigned long min_rate, - unsigned long max_rate, - unsigned long *p_rate, struct clk_hw **p) +static int clk_pixel_determine_rate(struct clk_hw *hw, + struct clk_rate_request *req) { struct clk_rcg2 *rcg = to_clk_rcg2(hw); unsigned long request, src_rate; @@ -507,18 +506,19 @@ static long clk_pixel_determine_rate(struct clk_hw *hw, unsigned long rate, int index = qcom_find_src_index(hw, rcg->parent_map, f->src); struct clk *parent = clk_get_parent_by_index(hw->clk, index); - *p = __clk_get_hw(parent); + req->best_parent_hw = __clk_get_hw(parent); for (; frac->num; frac++) { - request = (rate * frac->den) / frac->num; + request = (req->rate * frac->den) / frac->num; src_rate = __clk_round_rate(parent, request); if ((src_rate < (request - delta)) || (src_rate > (request + delta))) continue; - *p_rate = src_rate; - return (src_rate * frac->num) / frac->den; + req->best_parent_rate = src_rate; + req->rate = (src_rate * frac->num) / frac->den; + return 0; } return -EINVAL; diff --git a/drivers/clk/sunxi/clk-factors.c b/drivers/clk/sunxi/clk-factors.c index 8c20190a3e9f..7a485870991d 100644 --- a/drivers/clk/sunxi/clk-factors.c +++ b/drivers/clk/sunxi/clk-factors.c @@ -79,11 +79,8 @@ static long clk_factors_round_rate(struct clk_hw *hw, unsigned long rate, return rate; } -static long clk_factors_determine_rate(struct clk_hw *hw, unsigned long rate, - unsigned long min_rate, - unsigned long max_rate, - unsigned long *best_parent_rate, - struct clk_hw **best_parent_p) +static int clk_factors_determine_rate(struct clk_hw *hw, + struct clk_rate_request *req) { struct clk *clk = hw->clk, *parent, *best_parent = NULL; int i, num_parents; @@ -96,13 +93,14 @@ static long clk_factors_determine_rate(struct clk_hw *hw, unsigned long rate, if (!parent) continue; if (__clk_get_flags(clk) & CLK_SET_RATE_PARENT) - parent_rate = __clk_round_rate(parent, rate); + parent_rate = __clk_round_rate(parent, req->rate); else parent_rate = __clk_get_rate(parent); - child_rate = clk_factors_round_rate(hw, rate, &parent_rate); + child_rate = clk_factors_round_rate(hw, req->rate, + &parent_rate); - if (child_rate <= rate && child_rate > best_child_rate) { + if (child_rate <= req->rate && child_rate > best_child_rate) { best_parent = parent; best = parent_rate; best_child_rate = child_rate; @@ -110,10 +108,11 @@ static long clk_factors_determine_rate(struct clk_hw *hw, unsigned long rate, } if (best_parent) - *best_parent_p = __clk_get_hw(best_parent); - *best_parent_rate = best; + req->best_parent_hw = __clk_get_hw(best_parent); + req->best_parent_rate = best; + req->rate = best_child_rate; - return best_child_rate; + return 0; } static int clk_factors_set_rate(struct clk_hw *hw, unsigned long rate, diff --git a/drivers/clk/sunxi/clk-sun6i-ar100.c b/drivers/clk/sunxi/clk-sun6i-ar100.c index 63cf149195ae..d70c1ea345db 100644 --- a/drivers/clk/sunxi/clk-sun6i-ar100.c +++ b/drivers/clk/sunxi/clk-sun6i-ar100.c @@ -44,17 +44,14 @@ static unsigned long ar100_recalc_rate(struct clk_hw *hw, return (parent_rate >> shift) / (div + 1); } -static long ar100_determine_rate(struct clk_hw *hw, unsigned long rate, - unsigned long min_rate, - unsigned long max_rate, - unsigned long *best_parent_rate, - struct clk_hw **best_parent_clk) +static int ar100_determine_rate(struct clk_hw *hw, + struct clk_rate_request *req) { int nparents = __clk_get_num_parents(hw->clk); long best_rate = -EINVAL; int i; - *best_parent_clk = NULL; + req->best_parent_hw = NULL; for (i = 0; i < nparents; i++) { unsigned long parent_rate; @@ -65,7 +62,7 @@ static long ar100_determine_rate(struct clk_hw *hw, unsigned long rate, parent = clk_get_parent_by_index(hw->clk, i); parent_rate = __clk_get_rate(parent); - div = DIV_ROUND_UP(parent_rate, rate); + div = DIV_ROUND_UP(parent_rate, req->rate); /* * The AR100 clk contains 2 divisors: @@ -101,14 +98,16 @@ static long ar100_determine_rate(struct clk_hw *hw, unsigned long rate, continue; tmp_rate = (parent_rate >> shift) / div; - if (!*best_parent_clk || tmp_rate > best_rate) { - *best_parent_clk = __clk_get_hw(parent); - *best_parent_rate = parent_rate; + if (!req->best_parent_hw || tmp_rate > best_rate) { + req->best_parent_hw = __clk_get_hw(parent); + req->best_parent_rate = parent_rate; best_rate = tmp_rate; } } - return best_rate; + req->rate = best_rate; + + return 0; } static int ar100_set_parent(struct clk_hw *hw, u8 index) diff --git a/drivers/clk/sunxi/clk-sunxi.c b/drivers/clk/sunxi/clk-sunxi.c index 9a82f17d2d73..d0f72a151bf1 100644 --- a/drivers/clk/sunxi/clk-sunxi.c +++ b/drivers/clk/sunxi/clk-sunxi.c @@ -118,11 +118,8 @@ static long sun6i_ahb1_clk_round(unsigned long rate, u8 *divp, u8 *pre_divp, return (parent_rate / calcm) >> calcp; } -static long sun6i_ahb1_clk_determine_rate(struct clk_hw *hw, unsigned long rate, - unsigned long min_rate, - unsigned long max_rate, - unsigned long *best_parent_rate, - struct clk_hw **best_parent_clk) +static int sun6i_ahb1_clk_determine_rate(struct clk_hw *hw, + struct clk_rate_request *req) { struct clk *clk = hw->clk, *parent, *best_parent = NULL; int i, num_parents; @@ -135,14 +132,14 @@ static long sun6i_ahb1_clk_determine_rate(struct clk_hw *hw, unsigned long rate, if (!parent) continue; if (__clk_get_flags(clk) & CLK_SET_RATE_PARENT) - parent_rate = __clk_round_rate(parent, rate); + parent_rate = __clk_round_rate(parent, req->rate); else parent_rate = __clk_get_rate(parent); - child_rate = sun6i_ahb1_clk_round(rate, NULL, NULL, i, + child_rate = sun6i_ahb1_clk_round(req->rate, NULL, NULL, i, parent_rate); - if (child_rate <= rate && child_rate > best_child_rate) { + if (child_rate <= req->rate && child_rate > best_child_rate) { best_parent = parent; best = parent_rate; best_child_rate = child_rate; @@ -150,10 +147,11 @@ static long sun6i_ahb1_clk_determine_rate(struct clk_hw *hw, unsigned long rate, } if (best_parent) - *best_parent_clk = __clk_get_hw(best_parent); - *best_parent_rate = best; + req->best_parent_hw = __clk_get_hw(best_parent); + req->best_parent_rate = best; + req->rate = best_child_rate; - return best_child_rate; + return 0; } static int sun6i_ahb1_clk_set_rate(struct clk_hw *hw, unsigned long rate, diff --git a/drivers/clk/tegra/clk-emc.c b/drivers/clk/tegra/clk-emc.c index 7649685c86bc..08ae518c9950 100644 --- a/drivers/clk/tegra/clk-emc.c +++ b/drivers/clk/tegra/clk-emc.c @@ -116,11 +116,7 @@ static unsigned long emc_recalc_rate(struct clk_hw *hw, * safer since things have EMC rate floors. Also don't touch parent_rate * since we don't want the CCF to play with our parent clocks. */ -static long emc_determine_rate(struct clk_hw *hw, unsigned long rate, - unsigned long min_rate, - unsigned long max_rate, - unsigned long *best_parent_rate, - struct clk_hw **best_parent_hw) +static int emc_determine_rate(struct clk_hw *hw, struct clk_rate_request *req) { struct tegra_clk_emc *tegra; u8 ram_code = tegra_read_ram_code(); @@ -135,22 +131,28 @@ static long emc_determine_rate(struct clk_hw *hw, unsigned long rate, timing = tegra->timings + i; - if (timing->rate > max_rate) { + if (timing->rate > req->max_rate) { i = min(i, 1); - return tegra->timings[i - 1].rate; + req->rate = tegra->timings[i - 1].rate; + return 0; } - if (timing->rate < min_rate) + if (timing->rate < req->min_rate) continue; - if (timing->rate >= rate) - return timing->rate; + if (timing->rate >= req->rate) { + req->rate = timing->rate; + return 0; + } } - if (timing) - return timing->rate; + if (timing) { + req->rate = timing->rate; + return 0; + } - return __clk_get_rate(hw->clk); + req->rate = __clk_get_rate(hw->clk); + return 0; } static u8 emc_get_parent(struct clk_hw *hw) diff --git a/include/linux/clk-provider.h b/include/linux/clk-provider.h index 78842f46f152..14998f05acf2 100644 --- a/include/linux/clk-provider.h +++ b/include/linux/clk-provider.h @@ -37,6 +37,28 @@ struct clk_hw; struct clk_core; struct dentry; +/** + * struct clk_rate_request - Structure encoding the clk constraints that + * a clock user might require. + * + * @rate: Requested clock rate. This field will be adjusted by + * clock drivers according to hardware capabilities. + * @min_rate: Minimum rate imposed by clk users. + * @max_rate: Maximum rate a imposed by clk users. + * @best_parent_rate: The best parent rate a parent can provide to fulfill the + * requested constraints. + * @best_parent_hw: The most appropriate parent clock that fulfills the + * requested constraints. + * + */ +struct clk_rate_request { + unsigned long rate; + unsigned long min_rate; + unsigned long max_rate; + unsigned long best_parent_rate; + struct clk_hw *best_parent_hw; +}; + /** * struct clk_ops - Callback operations for hardware clocks; these are to * be provided by the clock implementation, and will be called by drivers @@ -176,12 +198,8 @@ struct clk_ops { unsigned long parent_rate); long (*round_rate)(struct clk_hw *hw, unsigned long rate, unsigned long *parent_rate); - long (*determine_rate)(struct clk_hw *hw, - unsigned long rate, - unsigned long min_rate, - unsigned long max_rate, - unsigned long *best_parent_rate, - struct clk_hw **best_parent_hw); + int (*determine_rate)(struct clk_hw *hw, + struct clk_rate_request *req); int (*set_parent)(struct clk_hw *hw, u8 index); u8 (*get_parent)(struct clk_hw *hw); int (*set_rate)(struct clk_hw *hw, unsigned long rate, @@ -578,20 +596,11 @@ unsigned long __clk_get_flags(struct clk *clk); bool __clk_is_prepared(struct clk *clk); bool __clk_is_enabled(struct clk *clk); struct clk *__clk_lookup(const char *name); -long __clk_mux_determine_rate(struct clk_hw *hw, unsigned long rate, - unsigned long min_rate, - unsigned long max_rate, - unsigned long *best_parent_rate, - struct clk_hw **best_parent_p); -unsigned long __clk_determine_rate(struct clk_hw *core, - unsigned long rate, - unsigned long min_rate, - unsigned long max_rate); -long __clk_mux_determine_rate_closest(struct clk_hw *hw, unsigned long rate, - unsigned long min_rate, - unsigned long max_rate, - unsigned long *best_parent_rate, - struct clk_hw **best_parent_p); +int __clk_mux_determine_rate(struct clk_hw *hw, + struct clk_rate_request *req); +int __clk_determine_rate(struct clk_hw *core, struct clk_rate_request *req); +int __clk_mux_determine_rate_closest(struct clk_hw *hw, + struct clk_rate_request *req); void clk_hw_reparent(struct clk_hw *hw, struct clk_hw *new_parent); static inline void __clk_hw_set_clk(struct clk_hw *dst, struct clk_hw *src) diff --git a/include/linux/clk/ti.h b/include/linux/clk/ti.h index 79b76e13d904..448b4f87b9eb 100644 --- a/include/linux/clk/ti.h +++ b/include/linux/clk/ti.h @@ -269,23 +269,15 @@ int omap3_noncore_dpll_set_rate_and_parent(struct clk_hw *hw, unsigned long rate, unsigned long parent_rate, u8 index); -long omap3_noncore_dpll_determine_rate(struct clk_hw *hw, - unsigned long rate, - unsigned long min_rate, - unsigned long max_rate, - unsigned long *best_parent_rate, - struct clk_hw **best_parent_clk); +int omap3_noncore_dpll_determine_rate(struct clk_hw *hw, + struct clk_rate_request *req); unsigned long omap4_dpll_regm4xen_recalc(struct clk_hw *hw, unsigned long parent_rate); long omap4_dpll_regm4xen_round_rate(struct clk_hw *hw, unsigned long target_rate, unsigned long *parent_rate); -long omap4_dpll_regm4xen_determine_rate(struct clk_hw *hw, - unsigned long rate, - unsigned long min_rate, - unsigned long max_rate, - unsigned long *best_parent_rate, - struct clk_hw **best_parent_clk); +int omap4_dpll_regm4xen_determine_rate(struct clk_hw *hw, + struct clk_rate_request *req); u8 omap2_init_dpll_parent(struct clk_hw *hw); unsigned long omap3_dpll_recalc(struct clk_hw *hw, unsigned long parent_rate); long omap2_dpll_round_rate(struct clk_hw *hw, unsigned long target_rate, -- cgit v1.2.3-70-g09d2 From 730daa164e7c7e31c08fab940549f4acc3329432 Mon Sep 17 00:00:00 2001 From: Kees Cook Date: Thu, 23 Jul 2015 18:02:48 -0700 Subject: Yama: remove needless CONFIG_SECURITY_YAMA_STACKED Now that minor LSMs can cleanly stack with major LSMs, remove the unneeded config for Yama to be made to explicitly stack. Just selecting the main Yama CONFIG will allow it to work, regardless of the major LSM. Since distros using Yama are already forcing it to stack, this is effectively a no-op change. Additionally add MAINTAINERS entry. Signed-off-by: Kees Cook Signed-off-by: James Morris --- Documentation/security/Yama.txt | 10 ++++------ MAINTAINERS | 6 ++++++ arch/mips/configs/pistachio_defconfig | 1 - include/linux/lsm_hooks.h | 6 ++++-- security/Kconfig | 5 ----- security/security.c | 11 +++-------- security/yama/Kconfig | 9 +-------- security/yama/yama_lsm.c | 32 ++++++++++---------------------- 8 files changed, 28 insertions(+), 52 deletions(-) (limited to 'include/linux') diff --git a/Documentation/security/Yama.txt b/Documentation/security/Yama.txt index 227a63f018a2..d9ee7d7a6c7f 100644 --- a/Documentation/security/Yama.txt +++ b/Documentation/security/Yama.txt @@ -1,9 +1,7 @@ -Yama is a Linux Security Module that collects a number of system-wide DAC -security protections that are not handled by the core kernel itself. To -select it at boot time, specify "security=yama" (though this will disable -any other LSM). - -Yama is controlled through sysctl in /proc/sys/kernel/yama: +Yama is a Linux Security Module that collects system-wide DAC security +protections that are not handled by the core kernel itself. This is +selectable at build-time with CONFIG_SECURITY_YAMA, and can be controlled +at run-time through sysctls in /proc/sys/kernel/yama: - ptrace_scope diff --git a/MAINTAINERS b/MAINTAINERS index a2264167791a..f8be2f797197 100644 --- a/MAINTAINERS +++ b/MAINTAINERS @@ -9102,6 +9102,12 @@ T: git git://git.kernel.org/pub/scm/linux/kernel/git/jj/apparmor-dev.git S: Supported F: security/apparmor/ +YAMA SECURITY MODULE +M: Kees Cook +T: git git://git.kernel.org/pub/scm/linux/kernel/git/kees/linux.git yama/tip +S: Supported +F: security/yama/ + SENSABLE PHANTOM M: Jiri Slaby S: Maintained diff --git a/arch/mips/configs/pistachio_defconfig b/arch/mips/configs/pistachio_defconfig index 1646cce032c3..642b50946943 100644 --- a/arch/mips/configs/pistachio_defconfig +++ b/arch/mips/configs/pistachio_defconfig @@ -320,7 +320,6 @@ CONFIG_KEYS=y CONFIG_SECURITY=y CONFIG_SECURITY_NETWORK=y CONFIG_SECURITY_YAMA=y -CONFIG_SECURITY_YAMA_STACKED=y CONFIG_DEFAULT_SECURITY_DAC=y CONFIG_CRYPTO_AUTHENC=y CONFIG_CRYPTO_HMAC=y diff --git a/include/linux/lsm_hooks.h b/include/linux/lsm_hooks.h index 9429f054c323..ec3a6bab29de 100644 --- a/include/linux/lsm_hooks.h +++ b/include/linux/lsm_hooks.h @@ -1881,8 +1881,10 @@ static inline void security_delete_hooks(struct security_hook_list *hooks, extern int __init security_module_enable(const char *module); extern void __init capability_add_hooks(void); -#ifdef CONFIG_SECURITY_YAMA_STACKED -void __init yama_add_hooks(void); +#ifdef CONFIG_SECURITY_YAMA +extern void __init yama_add_hooks(void); +#else +static inline void __init yama_add_hooks(void) { } #endif #endif /* ! __LINUX_LSM_HOOKS_H */ diff --git a/security/Kconfig b/security/Kconfig index bf4ec46474b6..e45237897b43 100644 --- a/security/Kconfig +++ b/security/Kconfig @@ -132,7 +132,6 @@ choice default DEFAULT_SECURITY_SMACK if SECURITY_SMACK default DEFAULT_SECURITY_TOMOYO if SECURITY_TOMOYO default DEFAULT_SECURITY_APPARMOR if SECURITY_APPARMOR - default DEFAULT_SECURITY_YAMA if SECURITY_YAMA default DEFAULT_SECURITY_DAC help @@ -151,9 +150,6 @@ choice config DEFAULT_SECURITY_APPARMOR bool "AppArmor" if SECURITY_APPARMOR=y - config DEFAULT_SECURITY_YAMA - bool "Yama" if SECURITY_YAMA=y - config DEFAULT_SECURITY_DAC bool "Unix Discretionary Access Controls" @@ -165,7 +161,6 @@ config DEFAULT_SECURITY default "smack" if DEFAULT_SECURITY_SMACK default "tomoyo" if DEFAULT_SECURITY_TOMOYO default "apparmor" if DEFAULT_SECURITY_APPARMOR - default "yama" if DEFAULT_SECURITY_YAMA default "" if DEFAULT_SECURITY_DAC endmenu diff --git a/security/security.c b/security/security.c index 595fffab48b0..e693ffcf9266 100644 --- a/security/security.c +++ b/security/security.c @@ -56,18 +56,13 @@ int __init security_init(void) pr_info("Security Framework initialized\n"); /* - * Always load the capability module. + * Load minor LSMs, with the capability module always first. */ capability_add_hooks(); -#ifdef CONFIG_SECURITY_YAMA_STACKED - /* - * If Yama is configured for stacking load it next. - */ yama_add_hooks(); -#endif + /* - * Load the chosen module if there is one. - * This will also find yama if it is stacking + * Load all the remaining security modules. */ do_security_initcalls(); diff --git a/security/yama/Kconfig b/security/yama/Kconfig index 3123e1da2fed..90c605eea892 100644 --- a/security/yama/Kconfig +++ b/security/yama/Kconfig @@ -6,14 +6,7 @@ config SECURITY_YAMA This selects Yama, which extends DAC support with additional system-wide security settings beyond regular Linux discretionary access controls. Currently available is ptrace scope restriction. + Like capabilities, this security module stacks with other LSMs. Further information can be found in Documentation/security/Yama.txt. If you are unsure how to answer this question, answer N. - -config SECURITY_YAMA_STACKED - bool "Yama stacked with other LSMs" - depends on SECURITY_YAMA - default n - help - When Yama is built into the kernel, force it to stack with the - selected primary LSM. diff --git a/security/yama/yama_lsm.c b/security/yama/yama_lsm.c index 9ed32502470e..d3c19c970a06 100644 --- a/security/yama/yama_lsm.c +++ b/security/yama/yama_lsm.c @@ -353,11 +353,6 @@ static struct security_hook_list yama_hooks[] = { LSM_HOOK_INIT(task_free, yama_task_free), }; -void __init yama_add_hooks(void) -{ - security_add_hooks(yama_hooks, ARRAY_SIZE(yama_hooks)); -} - #ifdef CONFIG_SYSCTL static int yama_dointvec_minmax(struct ctl_table *table, int write, void __user *buffer, size_t *lenp, loff_t *ppos) @@ -396,25 +391,18 @@ static struct ctl_table yama_sysctl_table[] = { }, { } }; -#endif /* CONFIG_SYSCTL */ - -static __init int yama_init(void) +static void __init yama_init_sysctl(void) { -#ifndef CONFIG_SECURITY_YAMA_STACKED - /* - * If yama is being stacked this is already taken care of. - */ - if (!security_module_enable("yama")) - return 0; -#endif - pr_info("Yama: becoming mindful.\n"); - -#ifdef CONFIG_SYSCTL if (!register_sysctl_paths(yama_sysctl_path, yama_sysctl_table)) panic("Yama: sysctl registration failed.\n"); -#endif - - return 0; } +#else +static inline void yama_init_sysctl(void) { } +#endif /* CONFIG_SYSCTL */ -security_initcall(yama_init); +void __init yama_add_hooks(void) +{ + pr_info("Yama: becoming mindful.\n"); + security_add_hooks(yama_hooks, ARRAY_SIZE(yama_hooks)); + yama_init_sysctl(); +} -- cgit v1.2.3-70-g09d2 From 13b2c4a0c3b1cd37ee6bcfbb5b6e2b94e9a75364 Mon Sep 17 00:00:00 2001 From: Mika Westerberg Date: Mon, 27 Jul 2015 18:03:56 +0300 Subject: PM / QoS: Make it possible to expose device latency tolerance to userspace Typically when a device is created the bus core it belongs to (for example PCI) does not know if the device supports things like latency tolerance. This is left to the driver that binds to the device in question. However, at that time the device has already been created and there is no way to set its dev->power.set_latency_tolerance anymore. So follow what has been done for other PM QoS attributes as well and allow drivers to expose and hide latency tolerance from userspace, if the device supports it. Acked-by: Rafael J. Wysocki Signed-off-by: Mika Westerberg Signed-off-by: Andy Shevchenko Signed-off-by: Lee Jones --- drivers/base/power/power.h | 2 ++ drivers/base/power/qos.c | 37 +++++++++++++++++++++++++++++++++++++ drivers/base/power/sysfs.c | 11 +++++++++++ include/linux/pm_qos.h | 5 +++++ 4 files changed, 55 insertions(+) (limited to 'include/linux') diff --git a/drivers/base/power/power.h b/drivers/base/power/power.h index f1a5d95e7b20..998fa6b23084 100644 --- a/drivers/base/power/power.h +++ b/drivers/base/power/power.h @@ -73,6 +73,8 @@ extern int pm_qos_sysfs_add_resume_latency(struct device *dev); extern void pm_qos_sysfs_remove_resume_latency(struct device *dev); extern int pm_qos_sysfs_add_flags(struct device *dev); extern void pm_qos_sysfs_remove_flags(struct device *dev); +extern int pm_qos_sysfs_add_latency_tolerance(struct device *dev); +extern void pm_qos_sysfs_remove_latency_tolerance(struct device *dev); #else /* CONFIG_PM */ diff --git a/drivers/base/power/qos.c b/drivers/base/power/qos.c index e56d538d039e..7f3646e459cb 100644 --- a/drivers/base/power/qos.c +++ b/drivers/base/power/qos.c @@ -883,3 +883,40 @@ int dev_pm_qos_update_user_latency_tolerance(struct device *dev, s32 val) mutex_unlock(&dev_pm_qos_mtx); return ret; } + +/** + * dev_pm_qos_expose_latency_tolerance - Expose latency tolerance to userspace + * @dev: Device whose latency tolerance to expose + */ +int dev_pm_qos_expose_latency_tolerance(struct device *dev) +{ + int ret; + + if (!dev->power.set_latency_tolerance) + return -EINVAL; + + mutex_lock(&dev_pm_qos_sysfs_mtx); + ret = pm_qos_sysfs_add_latency_tolerance(dev); + mutex_unlock(&dev_pm_qos_sysfs_mtx); + + return ret; +} +EXPORT_SYMBOL_GPL(dev_pm_qos_expose_latency_tolerance); + +/** + * dev_pm_qos_hide_latency_tolerance - Hide latency tolerance from userspace + * @dev: Device whose latency tolerance to hide + */ +void dev_pm_qos_hide_latency_tolerance(struct device *dev) +{ + mutex_lock(&dev_pm_qos_sysfs_mtx); + pm_qos_sysfs_remove_latency_tolerance(dev); + mutex_unlock(&dev_pm_qos_sysfs_mtx); + + /* Remove the request from user space now */ + pm_runtime_get_sync(dev); + dev_pm_qos_update_user_latency_tolerance(dev, + PM_QOS_LATENCY_TOLERANCE_NO_CONSTRAINT); + pm_runtime_put(dev); +} +EXPORT_SYMBOL_GPL(dev_pm_qos_hide_latency_tolerance); diff --git a/drivers/base/power/sysfs.c b/drivers/base/power/sysfs.c index d2be3f9c211c..a7b46798c81d 100644 --- a/drivers/base/power/sysfs.c +++ b/drivers/base/power/sysfs.c @@ -738,6 +738,17 @@ void pm_qos_sysfs_remove_flags(struct device *dev) sysfs_unmerge_group(&dev->kobj, &pm_qos_flags_attr_group); } +int pm_qos_sysfs_add_latency_tolerance(struct device *dev) +{ + return sysfs_merge_group(&dev->kobj, + &pm_qos_latency_tolerance_attr_group); +} + +void pm_qos_sysfs_remove_latency_tolerance(struct device *dev) +{ + sysfs_unmerge_group(&dev->kobj, &pm_qos_latency_tolerance_attr_group); +} + void rpm_sysfs_remove(struct device *dev) { sysfs_unmerge_group(&dev->kobj, &pm_runtime_attr_group); diff --git a/include/linux/pm_qos.h b/include/linux/pm_qos.h index 7b3ae0cffc05..0f65d36c2a75 100644 --- a/include/linux/pm_qos.h +++ b/include/linux/pm_qos.h @@ -161,6 +161,8 @@ void dev_pm_qos_hide_flags(struct device *dev); int dev_pm_qos_update_flags(struct device *dev, s32 mask, bool set); s32 dev_pm_qos_get_user_latency_tolerance(struct device *dev); int dev_pm_qos_update_user_latency_tolerance(struct device *dev, s32 val); +int dev_pm_qos_expose_latency_tolerance(struct device *dev); +void dev_pm_qos_hide_latency_tolerance(struct device *dev); static inline s32 dev_pm_qos_requested_resume_latency(struct device *dev) { @@ -229,6 +231,9 @@ static inline s32 dev_pm_qos_get_user_latency_tolerance(struct device *dev) { return PM_QOS_LATENCY_TOLERANCE_NO_CONSTRAINT; } static inline int dev_pm_qos_update_user_latency_tolerance(struct device *dev, s32 val) { return 0; } +static inline int dev_pm_qos_expose_latency_tolerance(struct device *dev) + { return 0; } +static inline void dev_pm_qos_hide_latency_tolerance(struct device *dev) {} static inline s32 dev_pm_qos_requested_resume_latency(struct device *dev) { return 0; } static inline s32 dev_pm_qos_requested_flags(struct device *dev) { return 0; } -- cgit v1.2.3-70-g09d2 From 2e0fed7f7cdc41679e209c5636ad7537dc6210a9 Mon Sep 17 00:00:00 2001 From: Andy Shevchenko Date: Mon, 27 Jul 2015 18:03:59 +0300 Subject: klist: implement klist_prev() klist_prev() gets the previous element in the list. It is useful to traverse through the list in reverse order, for example, to provide LIFO (last in first out) variant of access. Signed-off-by: Andy Shevchenko Acked-by: Greg Kroah-Hartman Signed-off-by: Lee Jones --- include/linux/klist.h | 1 + lib/klist.c | 41 +++++++++++++++++++++++++++++++++++++++++ 2 files changed, 42 insertions(+) (limited to 'include/linux') diff --git a/include/linux/klist.h b/include/linux/klist.h index 61e5b723ae73..953f283f8451 100644 --- a/include/linux/klist.h +++ b/include/linux/klist.h @@ -63,6 +63,7 @@ extern void klist_iter_init(struct klist *k, struct klist_iter *i); extern void klist_iter_init_node(struct klist *k, struct klist_iter *i, struct klist_node *n); extern void klist_iter_exit(struct klist_iter *i); +extern struct klist_node *klist_prev(struct klist_iter *i); extern struct klist_node *klist_next(struct klist_iter *i); #endif diff --git a/lib/klist.c b/lib/klist.c index 89b485a2a58d..d74cf7a29afd 100644 --- a/lib/klist.c +++ b/lib/klist.c @@ -323,6 +323,47 @@ static struct klist_node *to_klist_node(struct list_head *n) return container_of(n, struct klist_node, n_node); } +/** + * klist_prev - Ante up prev node in list. + * @i: Iterator structure. + * + * First grab list lock. Decrement the reference count of the previous + * node, if there was one. Grab the prev node, increment its reference + * count, drop the lock, and return that prev node. + */ +struct klist_node *klist_prev(struct klist_iter *i) +{ + void (*put)(struct klist_node *) = i->i_klist->put; + struct klist_node *last = i->i_cur; + struct klist_node *prev; + + spin_lock(&i->i_klist->k_lock); + + if (last) { + prev = to_klist_node(last->n_node.prev); + if (!klist_dec_and_del(last)) + put = NULL; + } else + prev = to_klist_node(i->i_klist->k_list.prev); + + i->i_cur = NULL; + while (prev != to_klist_node(&i->i_klist->k_list)) { + if (likely(!knode_dead(prev))) { + kref_get(&prev->n_ref); + i->i_cur = prev; + break; + } + prev = to_klist_node(prev->n_node.prev); + } + + spin_unlock(&i->i_klist->k_lock); + + if (put && last) + put(last); + return i->i_cur; +} +EXPORT_SYMBOL_GPL(klist_prev); + /** * klist_next - Ante up next node in list. * @i: Iterator structure. -- cgit v1.2.3-70-g09d2 From 3d060aeb72113cda0acf906bfe26914fc689506a Mon Sep 17 00:00:00 2001 From: Andy Shevchenko Date: Mon, 27 Jul 2015 18:04:00 +0300 Subject: driver core: implement device_for_each_child_reverse() The new function device_for_each_child_reverse() is helpful to traverse the registered devices in a reversed order, e.g. in the case when an operation on each device should be done first on the last added device, then on one before last and so on. Signed-off-by: Andy Shevchenko Acked-by: Greg Kroah-Hartman Signed-off-by: Lee Jones --- drivers/base/core.c | 43 +++++++++++++++++++++++++++++++++++++++++++ include/linux/device.h | 2 ++ 2 files changed, 45 insertions(+) (limited to 'include/linux') diff --git a/drivers/base/core.c b/drivers/base/core.c index dafae6d2f7ac..7d6279554afc 100644 --- a/drivers/base/core.c +++ b/drivers/base/core.c @@ -1252,6 +1252,19 @@ void device_unregister(struct device *dev) } EXPORT_SYMBOL_GPL(device_unregister); +static struct device *prev_device(struct klist_iter *i) +{ + struct klist_node *n = klist_prev(i); + struct device *dev = NULL; + struct device_private *p; + + if (n) { + p = to_device_private_parent(n); + dev = p->device; + } + return dev; +} + static struct device *next_device(struct klist_iter *i) { struct klist_node *n = klist_next(i); @@ -1340,6 +1353,36 @@ int device_for_each_child(struct device *parent, void *data, } EXPORT_SYMBOL_GPL(device_for_each_child); +/** + * device_for_each_child_reverse - device child iterator in reversed order. + * @parent: parent struct device. + * @fn: function to be called for each device. + * @data: data for the callback. + * + * Iterate over @parent's child devices, and call @fn for each, + * passing it @data. + * + * We check the return of @fn each time. If it returns anything + * other than 0, we break out and return that value. + */ +int device_for_each_child_reverse(struct device *parent, void *data, + int (*fn)(struct device *dev, void *data)) +{ + struct klist_iter i; + struct device *child; + int error = 0; + + if (!parent->p) + return 0; + + klist_iter_init(&parent->p->klist_children, &i); + while ((child = prev_device(&i)) && !error) + error = fn(child, data); + klist_iter_exit(&i); + return error; +} +EXPORT_SYMBOL_GPL(device_for_each_child_reverse); + /** * device_find_child - device iterator for locating a particular device. * @parent: parent struct device diff --git a/include/linux/device.h b/include/linux/device.h index 5a31bf3a4024..af6fbc35d8a6 100644 --- a/include/linux/device.h +++ b/include/linux/device.h @@ -958,6 +958,8 @@ extern int __must_check device_add(struct device *dev); extern void device_del(struct device *dev); extern int device_for_each_child(struct device *dev, void *data, int (*fn)(struct device *dev, void *data)); +extern int device_for_each_child_reverse(struct device *dev, void *data, + int (*fn)(struct device *dev, void *data)); extern struct device *device_find_child(struct device *dev, void *data, int (*match)(struct device *dev, void *data)); extern int device_rename(struct device *dev, const char *new_name); -- cgit v1.2.3-70-g09d2 From 28355f81969962cf01aef5b13d7de5b4ab0c5f13 Mon Sep 17 00:00:00 2001 From: Tomeu Vizoso Date: Tue, 14 Jul 2015 10:29:54 +0200 Subject: gpio: defer probe if pinctrl cannot be found When an OF node has a pin range for its GPIOs, return -EPROBE_DEFER if the pin controller isn't available. Otherwise, the GPIO range wouldn't be set at all unless the pin controller probed always before the GPIO chip. With this change, the probe of the GPIO chip will be deferred and will be retried at a later point, hopefully once the pin controller has been registered and probed already. Signed-off-by: Tomeu Vizoso Signed-off-by: Linus Walleij --- drivers/gpio/gpiolib-of.c | 27 ++++++++++++++++++--------- drivers/gpio/gpiolib.c | 5 ++++- include/linux/of_gpio.h | 4 ++-- 3 files changed, 24 insertions(+), 12 deletions(-) (limited to 'include/linux') diff --git a/drivers/gpio/gpiolib-of.c b/drivers/gpio/gpiolib-of.c index 1e36ec5e2e0c..fa6e3c8823d6 100644 --- a/drivers/gpio/gpiolib-of.c +++ b/drivers/gpio/gpiolib-of.c @@ -335,7 +335,7 @@ void of_mm_gpiochip_remove(struct of_mm_gpio_chip *mm_gc) EXPORT_SYMBOL(of_mm_gpiochip_remove); #ifdef CONFIG_PINCTRL -static void of_gpiochip_add_pin_range(struct gpio_chip *chip) +static int of_gpiochip_add_pin_range(struct gpio_chip *chip) { struct device_node *np = chip->of_node; struct of_phandle_args pinspec; @@ -346,7 +346,7 @@ static void of_gpiochip_add_pin_range(struct gpio_chip *chip) struct property *group_names; if (!np) - return; + return 0; group_names = of_find_property(np, group_names_propname, NULL); @@ -358,7 +358,7 @@ static void of_gpiochip_add_pin_range(struct gpio_chip *chip) pctldev = of_pinctrl_get(pinspec.np); if (!pctldev) - break; + return -EPROBE_DEFER; if (pinspec.args[2]) { if (group_names) { @@ -378,7 +378,7 @@ static void of_gpiochip_add_pin_range(struct gpio_chip *chip) pinspec.args[1], pinspec.args[2]); if (ret) - break; + return ret; } else { /* npins == 0: special range */ if (pinspec.args[1]) { @@ -408,32 +408,41 @@ static void of_gpiochip_add_pin_range(struct gpio_chip *chip) ret = gpiochip_add_pingroup_range(chip, pctldev, pinspec.args[0], name); if (ret) - break; + return ret; } } + + return 0; } #else -static void of_gpiochip_add_pin_range(struct gpio_chip *chip) {} +static int of_gpiochip_add_pin_range(struct gpio_chip *chip) { return 0; } #endif -void of_gpiochip_add(struct gpio_chip *chip) +int of_gpiochip_add(struct gpio_chip *chip) { + int status; + if ((!chip->of_node) && (chip->dev)) chip->of_node = chip->dev->of_node; if (!chip->of_node) - return; + return 0; if (!chip->of_xlate) { chip->of_gpio_n_cells = 2; chip->of_xlate = of_gpio_simple_xlate; } - of_gpiochip_add_pin_range(chip); + status = of_gpiochip_add_pin_range(chip); + if (status) + return status; + of_node_get(chip->of_node); of_gpiochip_scan_hogs(chip); + + return 0; } void of_gpiochip_remove(struct gpio_chip *chip) diff --git a/drivers/gpio/gpiolib.c b/drivers/gpio/gpiolib.c index 9312bbcb19b9..1b5b8da71154 100644 --- a/drivers/gpio/gpiolib.c +++ b/drivers/gpio/gpiolib.c @@ -290,7 +290,10 @@ int gpiochip_add(struct gpio_chip *chip) if (!chip->owner && chip->dev && chip->dev->driver) chip->owner = chip->dev->driver->owner; - of_gpiochip_add(chip); + status = of_gpiochip_add(chip); + if (status) + goto err_remove_chip; + acpi_gpiochip_add(chip); status = gpiochip_sysfs_register(chip); diff --git a/include/linux/of_gpio.h b/include/linux/of_gpio.h index 69dbe312b11b..f3191828f037 100644 --- a/include/linux/of_gpio.h +++ b/include/linux/of_gpio.h @@ -54,7 +54,7 @@ extern int of_mm_gpiochip_add(struct device_node *np, struct of_mm_gpio_chip *mm_gc); extern void of_mm_gpiochip_remove(struct of_mm_gpio_chip *mm_gc); -extern void of_gpiochip_add(struct gpio_chip *gc); +extern int of_gpiochip_add(struct gpio_chip *gc); extern void of_gpiochip_remove(struct gpio_chip *gc); extern int of_gpio_simple_xlate(struct gpio_chip *gc, const struct of_phandle_args *gpiospec, @@ -76,7 +76,7 @@ static inline int of_gpio_simple_xlate(struct gpio_chip *gc, return -ENOSYS; } -static inline void of_gpiochip_add(struct gpio_chip *gc) { } +static inline int of_gpiochip_add(struct gpio_chip *gc) { return 0; } static inline void of_gpiochip_remove(struct gpio_chip *gc) { } #endif /* CONFIG_OF_GPIO */ -- cgit v1.2.3-70-g09d2 From 559ed40752dc63e68f9b9ad301b20e6a3fe5cf21 Mon Sep 17 00:00:00 2001 From: "Rafael J. Wysocki" Date: Sun, 26 Jul 2015 02:07:47 +0200 Subject: cpufreq: Avoid attempts to create duplicate symbolic links After commit 87549141d516 (cpufreq: Stop migrating sysfs files on hotplug) there is a problem with CPUs that share cpufreq policy objects with other CPUs and are initially offline. Say CPU1 shares a policy with CPU0 which is online and is registered first. As part of the registration process, cpufreq_add_dev() is called for it. It creates the policy object and a symbolic link to it from the CPU1's sysfs directory. If CPU1 is registered subsequently and it is offline at that time, cpufreq_add_dev() will attempt to create a symbolic link to the policy object for it, but that link is present already, so a warning about that will be triggered. To avoid that warning, make cpufreq use an additional CPU mask containing related CPUs that are actually present for each policy object. That mask is initialized when the policy object is populated after its creation (for the first online CPU using it) and it includes CPUs from the "policy CPUs" mask returned by the cpufreq driver's ->init() callback that are physically present at that time. Symbolic links to the policy are created only for the CPUs in that mask. If cpufreq_add_dev() is invoked for an offline CPU, it checks the new mask and only creates the symlink if the CPU was not in it (the CPU is added to the mask at the same time). In turn, cpufreq_remove_dev() drops the given CPU from the new mask, removes its symlink to the policy object and returns, unless it is the CPU owning the policy object. In that case, the policy object is moved to a new CPU's sysfs directory or deleted if the CPU being removed was the last user of the policy. While at it, notice that cpufreq_remove_dev() can't fail, because its return value is ignored, so make it ignore return values from __cpufreq_remove_dev_prepare() and __cpufreq_remove_dev_finish() and prevent these functions from aborting on errors returned by __cpufreq_governor(). Also drop the now unused sif argument from them. Fixes: 87549141d516 (cpufreq: Stop migrating sysfs files on hotplug) Signed-off-by: Rafael J. Wysocki Reported-and-tested-by: Russell King Acked-by: Viresh Kumar --- drivers/cpufreq/cpufreq.c | 108 +++++++++++++++++++++++----------------------- include/linux/cpufreq.h | 1 + 2 files changed, 56 insertions(+), 53 deletions(-) (limited to 'include/linux') diff --git a/drivers/cpufreq/cpufreq.c b/drivers/cpufreq/cpufreq.c index 26063afb3eba..7a3c30c4336f 100644 --- a/drivers/cpufreq/cpufreq.c +++ b/drivers/cpufreq/cpufreq.c @@ -1002,7 +1002,7 @@ static int cpufreq_add_dev_symlink(struct cpufreq_policy *policy) int ret = 0; /* Some related CPUs might not be present (physically hotplugged) */ - for_each_cpu_and(j, policy->related_cpus, cpu_present_mask) { + for_each_cpu(j, policy->real_cpus) { if (j == policy->kobj_cpu) continue; @@ -1019,7 +1019,7 @@ static void cpufreq_remove_dev_symlink(struct cpufreq_policy *policy) unsigned int j; /* Some related CPUs might not be present (physically hotplugged) */ - for_each_cpu_and(j, policy->related_cpus, cpu_present_mask) { + for_each_cpu(j, policy->real_cpus) { if (j == policy->kobj_cpu) continue; @@ -1163,11 +1163,14 @@ static struct cpufreq_policy *cpufreq_policy_alloc(struct device *dev) if (!zalloc_cpumask_var(&policy->related_cpus, GFP_KERNEL)) goto err_free_cpumask; + if (!zalloc_cpumask_var(&policy->real_cpus, GFP_KERNEL)) + goto err_free_rcpumask; + ret = kobject_init_and_add(&policy->kobj, &ktype_cpufreq, &dev->kobj, "cpufreq"); if (ret) { pr_err("%s: failed to init policy->kobj: %d\n", __func__, ret); - goto err_free_rcpumask; + goto err_free_real_cpus; } INIT_LIST_HEAD(&policy->policy_list); @@ -1184,6 +1187,8 @@ static struct cpufreq_policy *cpufreq_policy_alloc(struct device *dev) return policy; +err_free_real_cpus: + free_cpumask_var(policy->real_cpus); err_free_rcpumask: free_cpumask_var(policy->related_cpus); err_free_cpumask: @@ -1234,6 +1239,7 @@ static void cpufreq_policy_free(struct cpufreq_policy *policy, bool notify) write_unlock_irqrestore(&cpufreq_driver_lock, flags); cpufreq_policy_put_kobj(policy, notify); + free_cpumask_var(policy->real_cpus); free_cpumask_var(policy->related_cpus); free_cpumask_var(policy->cpus); kfree(policy); @@ -1258,14 +1264,17 @@ static int cpufreq_add_dev(struct device *dev, struct subsys_interface *sif) pr_debug("adding CPU %u\n", cpu); - /* - * Only possible if 'cpu' wasn't physically present earlier and we are - * here from subsys_interface add callback. A hotplug notifier will - * follow and we will handle it like logical CPU hotplug then. For now, - * just create the sysfs link. - */ - if (cpu_is_offline(cpu)) - return add_cpu_dev_symlink(per_cpu(cpufreq_cpu_data, cpu), cpu); + if (cpu_is_offline(cpu)) { + /* + * Only possible if we are here from the subsys_interface add + * callback. A hotplug notifier will follow and we will handle + * it as CPU online then. For now, just create the sysfs link, + * unless there is no policy or the link is already present. + */ + policy = per_cpu(cpufreq_cpu_data, cpu); + return policy && !cpumask_test_and_set_cpu(cpu, policy->real_cpus) + ? add_cpu_dev_symlink(policy, cpu) : 0; + } if (!down_read_trylock(&cpufreq_rwsem)) return 0; @@ -1307,6 +1316,10 @@ static int cpufreq_add_dev(struct device *dev, struct subsys_interface *sif) /* related cpus should atleast have policy->cpus */ cpumask_or(policy->related_cpus, policy->related_cpus, policy->cpus); + /* Remember which CPUs have been present at the policy creation time. */ + if (!recover_policy) + cpumask_and(policy->real_cpus, policy->cpus, cpu_present_mask); + /* * affected cpus must always be the one, which are online. We aren't * managing offline cpus here. @@ -1420,8 +1433,7 @@ nomem_out: return ret; } -static int __cpufreq_remove_dev_prepare(struct device *dev, - struct subsys_interface *sif) +static int __cpufreq_remove_dev_prepare(struct device *dev) { unsigned int cpu = dev->id; int ret = 0; @@ -1437,10 +1449,8 @@ static int __cpufreq_remove_dev_prepare(struct device *dev, if (has_target()) { ret = __cpufreq_governor(policy, CPUFREQ_GOV_STOP); - if (ret) { + if (ret) pr_err("%s: Failed to stop governor\n", __func__); - return ret; - } } down_write(&policy->rwsem); @@ -1473,8 +1483,7 @@ static int __cpufreq_remove_dev_prepare(struct device *dev, return ret; } -static int __cpufreq_remove_dev_finish(struct device *dev, - struct subsys_interface *sif) +static int __cpufreq_remove_dev_finish(struct device *dev) { unsigned int cpu = dev->id; int ret; @@ -1492,10 +1501,8 @@ static int __cpufreq_remove_dev_finish(struct device *dev, /* If cpu is last user of policy, free policy */ if (has_target()) { ret = __cpufreq_governor(policy, CPUFREQ_GOV_POLICY_EXIT); - if (ret) { + if (ret) pr_err("%s: Failed to exit governor\n", __func__); - return ret; - } } /* @@ -1506,10 +1513,6 @@ static int __cpufreq_remove_dev_finish(struct device *dev, if (cpufreq_driver->exit) cpufreq_driver->exit(policy); - /* Free the policy only if the driver is getting removed. */ - if (sif) - cpufreq_policy_free(policy, true); - return 0; } @@ -1521,42 +1524,41 @@ static int __cpufreq_remove_dev_finish(struct device *dev, static int cpufreq_remove_dev(struct device *dev, struct subsys_interface *sif) { unsigned int cpu = dev->id; - int ret; - - /* - * Only possible if 'cpu' is getting physically removed now. A hotplug - * notifier should have already been called and we just need to remove - * link or free policy here. - */ - if (cpu_is_offline(cpu)) { - struct cpufreq_policy *policy = per_cpu(cpufreq_cpu_data, cpu); - struct cpumask mask; + struct cpufreq_policy *policy = per_cpu(cpufreq_cpu_data, cpu); - if (!policy) - return 0; + if (!policy) + return 0; - cpumask_copy(&mask, policy->related_cpus); - cpumask_clear_cpu(cpu, &mask); + if (cpu_online(cpu)) { + __cpufreq_remove_dev_prepare(dev); + __cpufreq_remove_dev_finish(dev); + } - /* - * Free policy only if all policy->related_cpus are removed - * physically. - */ - if (cpumask_intersects(&mask, cpu_present_mask)) { - remove_cpu_dev_symlink(policy, cpu); - return 0; - } + cpumask_clear_cpu(cpu, policy->real_cpus); + if (cpumask_empty(policy->real_cpus)) { cpufreq_policy_free(policy, true); return 0; } - ret = __cpufreq_remove_dev_prepare(dev, sif); + if (cpu != policy->kobj_cpu) { + remove_cpu_dev_symlink(policy, cpu); + } else { + /* + * The CPU owning the policy object is going away. Move it to + * another suitable CPU. + */ + unsigned int new_cpu = cpumask_first(policy->real_cpus); + struct device *new_dev = get_cpu_device(new_cpu); + + dev_dbg(dev, "%s: Moving policy object to CPU%u\n", __func__, new_cpu); - if (!ret) - ret = __cpufreq_remove_dev_finish(dev, sif); + sysfs_remove_link(&new_dev->kobj, "cpufreq"); + policy->kobj_cpu = new_cpu; + WARN_ON(kobject_move(&policy->kobj, &new_dev->kobj)); + } - return ret; + return 0; } static void handle_update(struct work_struct *work) @@ -2395,11 +2397,11 @@ static int cpufreq_cpu_callback(struct notifier_block *nfb, break; case CPU_DOWN_PREPARE: - __cpufreq_remove_dev_prepare(dev, NULL); + __cpufreq_remove_dev_prepare(dev); break; case CPU_POST_DEAD: - __cpufreq_remove_dev_finish(dev, NULL); + __cpufreq_remove_dev_finish(dev); break; case CPU_DOWN_FAILED: diff --git a/include/linux/cpufreq.h b/include/linux/cpufreq.h index 29ad97c34fd5..bde1e567b3a9 100644 --- a/include/linux/cpufreq.h +++ b/include/linux/cpufreq.h @@ -62,6 +62,7 @@ struct cpufreq_policy { /* CPUs sharing clock, require sw coordination */ cpumask_var_t cpus; /* Online CPUs only */ cpumask_var_t related_cpus; /* Online + Offline CPUs */ + cpumask_var_t real_cpus; /* Related and present */ unsigned int shared_type; /* ACPI: ANY or ALL affected CPUs should set cpufreq */ -- cgit v1.2.3-70-g09d2 From 841df7df196237ea63233f0f9eaa41db53afd70f Mon Sep 17 00:00:00 2001 From: Jan Kara Date: Tue, 28 Jul 2015 14:57:14 -0400 Subject: jbd2: avoid infinite loop when destroying aborted journal Commit 6f6a6fda2945 "jbd2: fix ocfs2 corrupt when updating journal superblock fails" changed jbd2_cleanup_journal_tail() to return EIO when the journal is aborted. That makes logic in jbd2_log_do_checkpoint() bail out which is fine, except that jbd2_journal_destroy() expects jbd2_log_do_checkpoint() to always make a progress in cleaning the journal. Without it jbd2_journal_destroy() just loops in an infinite loop. Fix jbd2_journal_destroy() to cleanup journal checkpoint lists of jbd2_log_do_checkpoint() fails with error. Reported-by: Eryu Guan Tested-by: Eryu Guan Fixes: 6f6a6fda294506dfe0e3e0a253bb2d2923f28f0a Signed-off-by: Jan Kara Signed-off-by: Theodore Ts'o --- fs/jbd2/checkpoint.c | 39 +++++++++++++++++++++++++++++++++------ fs/jbd2/commit.c | 2 +- fs/jbd2/journal.c | 11 ++++++++++- include/linux/jbd2.h | 3 ++- 4 files changed, 46 insertions(+), 9 deletions(-) (limited to 'include/linux') diff --git a/fs/jbd2/checkpoint.c b/fs/jbd2/checkpoint.c index 4227dc4f7437..8c44654ce274 100644 --- a/fs/jbd2/checkpoint.c +++ b/fs/jbd2/checkpoint.c @@ -417,12 +417,12 @@ int jbd2_cleanup_journal_tail(journal_t *journal) * journal_clean_one_cp_list * * Find all the written-back checkpoint buffers in the given list and - * release them. + * release them. If 'destroy' is set, clean all buffers unconditionally. * * Called with j_list_lock held. * Returns 1 if we freed the transaction, 0 otherwise. */ -static int journal_clean_one_cp_list(struct journal_head *jh) +static int journal_clean_one_cp_list(struct journal_head *jh, bool destroy) { struct journal_head *last_jh; struct journal_head *next_jh = jh; @@ -436,7 +436,10 @@ static int journal_clean_one_cp_list(struct journal_head *jh) do { jh = next_jh; next_jh = jh->b_cpnext; - ret = __try_to_free_cp_buf(jh); + if (!destroy) + ret = __try_to_free_cp_buf(jh); + else + ret = __jbd2_journal_remove_checkpoint(jh) + 1; if (!ret) return freed; if (ret == 2) @@ -459,10 +462,11 @@ static int journal_clean_one_cp_list(struct journal_head *jh) * journal_clean_checkpoint_list * * Find all the written-back checkpoint buffers in the journal and release them. + * If 'destroy' is set, release all buffers unconditionally. * * Called with j_list_lock held. */ -void __jbd2_journal_clean_checkpoint_list(journal_t *journal) +void __jbd2_journal_clean_checkpoint_list(journal_t *journal, bool destroy) { transaction_t *transaction, *last_transaction, *next_transaction; int ret; @@ -476,7 +480,8 @@ void __jbd2_journal_clean_checkpoint_list(journal_t *journal) do { transaction = next_transaction; next_transaction = transaction->t_cpnext; - ret = journal_clean_one_cp_list(transaction->t_checkpoint_list); + ret = journal_clean_one_cp_list(transaction->t_checkpoint_list, + destroy); /* * This function only frees up some memory if possible so we * dont have an obligation to finish processing. Bail out if @@ -492,7 +497,7 @@ void __jbd2_journal_clean_checkpoint_list(journal_t *journal) * we can possibly see not yet submitted buffers on io_list */ ret = journal_clean_one_cp_list(transaction-> - t_checkpoint_io_list); + t_checkpoint_io_list, destroy); if (need_resched()) return; /* @@ -505,6 +510,28 @@ void __jbd2_journal_clean_checkpoint_list(journal_t *journal) } while (transaction != last_transaction); } +/* + * Remove buffers from all checkpoint lists as journal is aborted and we just + * need to free memory + */ +void jbd2_journal_destroy_checkpoint(journal_t *journal) +{ + /* + * We loop because __jbd2_journal_clean_checkpoint_list() may abort + * early due to a need of rescheduling. + */ + while (1) { + spin_lock(&journal->j_list_lock); + if (!journal->j_checkpoint_transactions) { + spin_unlock(&journal->j_list_lock); + break; + } + __jbd2_journal_clean_checkpoint_list(journal, true); + spin_unlock(&journal->j_list_lock); + cond_resched(); + } +} + /* * journal_remove_checkpoint: called after a buffer has been committed * to disk (either by being write-back flushed to disk, or being diff --git a/fs/jbd2/commit.c b/fs/jbd2/commit.c index b73e0215baa7..362e5f614450 100644 --- a/fs/jbd2/commit.c +++ b/fs/jbd2/commit.c @@ -510,7 +510,7 @@ void jbd2_journal_commit_transaction(journal_t *journal) * frees some memory */ spin_lock(&journal->j_list_lock); - __jbd2_journal_clean_checkpoint_list(journal); + __jbd2_journal_clean_checkpoint_list(journal, false); spin_unlock(&journal->j_list_lock); jbd_debug(3, "JBD2: commit phase 1\n"); diff --git a/fs/jbd2/journal.c b/fs/jbd2/journal.c index fe1b4bdecdfa..8270fe9e3641 100644 --- a/fs/jbd2/journal.c +++ b/fs/jbd2/journal.c @@ -1693,8 +1693,17 @@ int jbd2_journal_destroy(journal_t *journal) while (journal->j_checkpoint_transactions != NULL) { spin_unlock(&journal->j_list_lock); mutex_lock(&journal->j_checkpoint_mutex); - jbd2_log_do_checkpoint(journal); + err = jbd2_log_do_checkpoint(journal); mutex_unlock(&journal->j_checkpoint_mutex); + /* + * If checkpointing failed, just free the buffers to avoid + * looping forever + */ + if (err) { + jbd2_journal_destroy_checkpoint(journal); + spin_lock(&journal->j_list_lock); + break; + } spin_lock(&journal->j_list_lock); } diff --git a/include/linux/jbd2.h b/include/linux/jbd2.h index edb640ae9a94..eb1cebed3f36 100644 --- a/include/linux/jbd2.h +++ b/include/linux/jbd2.h @@ -1042,8 +1042,9 @@ void jbd2_update_log_tail(journal_t *journal, tid_t tid, unsigned long block); extern void jbd2_journal_commit_transaction(journal_t *); /* Checkpoint list management */ -void __jbd2_journal_clean_checkpoint_list(journal_t *journal); +void __jbd2_journal_clean_checkpoint_list(journal_t *journal, bool destroy); int __jbd2_journal_remove_checkpoint(struct journal_head *); +void jbd2_journal_destroy_checkpoint(journal_t *journal); void __jbd2_journal_insert_checkpoint(struct journal_head *, transaction_t *); -- cgit v1.2.3-70-g09d2 From 9783c0d98501aa146ff467916ab4b8830a655d7c Mon Sep 17 00:00:00 2001 From: Stephen Boyd Date: Thu, 16 Jul 2015 12:50:27 -0700 Subject: clk: Allow providers to configure min/max rates clk providers are using the consumer APIs to set min/max rates on the clock they're providing. To encourage clk providers to move away from the consumer APIs, add a provider API to set the min/max rate of a clock. The assumption is that this is done before the clock can be requested via clk_get() and that the clock rate is already within the boundaries of the min/max that's configured. Tested-by: Sudeep Holla Signed-off-by: Stephen Boyd --- drivers/clk/clk.c | 16 ++++++++++++++-- include/linux/clk-provider.h | 2 ++ 2 files changed, 16 insertions(+), 2 deletions(-) (limited to 'include/linux') diff --git a/drivers/clk/clk.c b/drivers/clk/clk.c index bd6dfbe04cf0..1ac237fe2fdb 100644 --- a/drivers/clk/clk.c +++ b/drivers/clk/clk.c @@ -58,6 +58,8 @@ struct clk_core { unsigned long flags; unsigned int enable_count; unsigned int prepare_count; + unsigned long min_rate; + unsigned long max_rate; unsigned long accuracy; int phase; struct hlist_head children; @@ -512,8 +514,8 @@ static void clk_core_get_boundaries(struct clk_core *core, { struct clk *clk_user; - *min_rate = 0; - *max_rate = ULONG_MAX; + *min_rate = core->min_rate; + *max_rate = core->max_rate; hlist_for_each_entry(clk_user, &core->clks, clks_node) *min_rate = max(*min_rate, clk_user->min_rate); @@ -522,6 +524,14 @@ static void clk_core_get_boundaries(struct clk_core *core, *max_rate = min(*max_rate, clk_user->max_rate); } +void clk_hw_set_rate_range(struct clk_hw *hw, unsigned long min_rate, + unsigned long max_rate) +{ + hw->core->min_rate = min_rate; + hw->core->max_rate = max_rate; +} +EXPORT_SYMBOL_GPL(clk_hw_set_rate_range); + /* * Helper for finding best parent to provide a given frequency. This can be used * directly as a determine_rate callback (e.g. for a mux), or from a more @@ -2498,6 +2508,8 @@ struct clk *clk_register(struct device *dev, struct clk_hw *hw) core->hw = hw; core->flags = hw->init->flags; core->num_parents = hw->init->num_parents; + core->min_rate = 0; + core->max_rate = ULONG_MAX; hw->core = core; /* allocate local copy in case parent_names is __initdata */ diff --git a/include/linux/clk-provider.h b/include/linux/clk-provider.h index 2116e2b8a5f2..d62e7eab1dbe 100644 --- a/include/linux/clk-provider.h +++ b/include/linux/clk-provider.h @@ -619,6 +619,8 @@ int __clk_determine_rate(struct clk_hw *core, struct clk_rate_request *req); int __clk_mux_determine_rate_closest(struct clk_hw *hw, struct clk_rate_request *req); void clk_hw_reparent(struct clk_hw *hw, struct clk_hw *new_parent); +void clk_hw_set_rate_range(struct clk_hw *hw, unsigned long min_rate, + unsigned long max_rate); static inline void __clk_hw_set_clk(struct clk_hw *dst, struct clk_hw *src) { -- cgit v1.2.3-70-g09d2 From afe76c8fd030dd6b75fa69f7af7b7eb1e212f248 Mon Sep 17 00:00:00 2001 From: Jim Quinlan Date: Fri, 15 May 2015 15:45:47 -0400 Subject: clk: allow a clk divider with max divisor when zero This commit allows certain Broadcom STB clock dividers to be used with clk-divider.c. It allows for a clock whose field value is the equal to the divisor, execpt when the field value is zero, in which case the divisor is 2^width. For example, consider a divisor clock with a two bit field: value divisor 0 4 1 1 2 2 3 3 Signed-off-by: Jim Quinlan Signed-off-by: Michael Turquette --- drivers/clk/clk-divider.c | 16 +++++++++++----- include/linux/clk-provider.h | 4 ++++ 2 files changed, 15 insertions(+), 5 deletions(-) (limited to 'include/linux') diff --git a/drivers/clk/clk-divider.c b/drivers/clk/clk-divider.c index 706b5783c360..2cab88b9c1a8 100644 --- a/drivers/clk/clk-divider.c +++ b/drivers/clk/clk-divider.c @@ -78,12 +78,14 @@ static unsigned int _get_table_div(const struct clk_div_table *table, } static unsigned int _get_div(const struct clk_div_table *table, - unsigned int val, unsigned long flags) + unsigned int val, unsigned long flags, u8 width) { if (flags & CLK_DIVIDER_ONE_BASED) return val; if (flags & CLK_DIVIDER_POWER_OF_TWO) return 1 << val; + if (flags & CLK_DIVIDER_MAX_AT_ZERO) + return val ? val : div_mask(width) + 1; if (table) return _get_table_div(table, val); return val + 1; @@ -101,12 +103,14 @@ static unsigned int _get_table_val(const struct clk_div_table *table, } static unsigned int _get_val(const struct clk_div_table *table, - unsigned int div, unsigned long flags) + unsigned int div, unsigned long flags, u8 width) { if (flags & CLK_DIVIDER_ONE_BASED) return div; if (flags & CLK_DIVIDER_POWER_OF_TWO) return __ffs(div); + if (flags & CLK_DIVIDER_MAX_AT_ZERO) + return (div == div_mask(width) + 1) ? 0 : div; if (table) return _get_table_val(table, div); return div - 1; @@ -117,9 +121,10 @@ unsigned long divider_recalc_rate(struct clk_hw *hw, unsigned long parent_rate, const struct clk_div_table *table, unsigned long flags) { + struct clk_divider *divider = to_clk_divider(hw); unsigned int div; - div = _get_div(table, val, flags); + div = _get_div(table, val, flags, divider->width); if (!div) { WARN(!(flags & CLK_DIVIDER_ALLOW_ZERO), "%s: Zero divisor and CLK_DIVIDER_ALLOW_ZERO not set\n", @@ -351,7 +356,8 @@ static long clk_divider_round_rate(struct clk_hw *hw, unsigned long rate, if (divider->flags & CLK_DIVIDER_READ_ONLY) { bestdiv = readl(divider->reg) >> divider->shift; bestdiv &= div_mask(divider->width); - bestdiv = _get_div(divider->table, bestdiv, divider->flags); + bestdiv = _get_div(divider->table, bestdiv, divider->flags, + divider->width); return DIV_ROUND_UP(*prate, bestdiv); } @@ -370,7 +376,7 @@ int divider_get_val(unsigned long rate, unsigned long parent_rate, if (!_is_valid_div(table, div, flags)) return -EINVAL; - value = _get_val(table, div, flags); + value = _get_val(table, div, flags, width); return min_t(unsigned int, value, div_mask(width)); } diff --git a/include/linux/clk-provider.h b/include/linux/clk-provider.h index 402478ed9933..699a25075170 100644 --- a/include/linux/clk-provider.h +++ b/include/linux/clk-provider.h @@ -361,6 +361,9 @@ struct clk_div_table { * to the closest integer instead of the up one. * CLK_DIVIDER_READ_ONLY - The divider settings are preconfigured and should * not be changed by the clock framework. + * CLK_DIVIDER_MAX_AT_ZERO - For dividers which are like CLK_DIVIDER_ONE_BASED + * except when the value read from the register is zero, the divisor is + * 2^width of the field. */ struct clk_divider { struct clk_hw hw; @@ -378,6 +381,7 @@ struct clk_divider { #define CLK_DIVIDER_HIWORD_MASK BIT(3) #define CLK_DIVIDER_ROUND_CLOSEST BIT(4) #define CLK_DIVIDER_READ_ONLY BIT(5) +#define CLK_DIVIDER_MAX_AT_ZERO BIT(6) extern const struct clk_ops clk_divider_ops; -- cgit v1.2.3-70-g09d2 From 37bff2c159a3629b592e54162239cb8c337c965d Mon Sep 17 00:00:00 2001 From: Stephen Boyd Date: Fri, 24 Jul 2015 09:31:29 -0700 Subject: clk: gpio: Mark parent_names array const Let's encourage const arrays of parent names like other basic clock types. Cc: Sergej Sawazki Signed-off-by: Stephen Boyd --- drivers/clk/clk-gpio.c | 13 +++++++------ include/linux/clk-provider.h | 2 +- 2 files changed, 8 insertions(+), 7 deletions(-) (limited to 'include/linux') diff --git a/drivers/clk/clk-gpio.c b/drivers/clk/clk-gpio.c index 41277a1526c7..10819e248414 100644 --- a/drivers/clk/clk-gpio.c +++ b/drivers/clk/clk-gpio.c @@ -95,7 +95,7 @@ const struct clk_ops clk_gpio_mux_ops = { EXPORT_SYMBOL_GPL(clk_gpio_mux_ops); static struct clk *clk_register_gpio(struct device *dev, const char *name, - const char **parent_names, u8 num_parents, unsigned gpio, + const char * const *parent_names, u8 num_parents, unsigned gpio, bool active_low, unsigned long flags, const struct clk_ops *clk_gpio_ops) { @@ -188,7 +188,7 @@ EXPORT_SYMBOL_GPL(clk_register_gpio_gate); * @flags: clock flags */ struct clk *clk_register_gpio_mux(struct device *dev, const char *name, - const char **parent_names, u8 num_parents, unsigned gpio, + const char * const *parent_names, u8 num_parents, unsigned gpio, bool active_low, unsigned long flags) { if (num_parents != 2) { @@ -213,7 +213,7 @@ struct clk_gpio_delayed_register_data { struct mutex lock; struct clk *clk; struct clk *(*clk_register_get)(const char *name, - const char **parent_names, u8 num_parents, + const char * const *parent_names, u8 num_parents, unsigned gpio, bool active_low); }; @@ -273,7 +273,7 @@ out: } static struct clk *of_clk_gpio_gate_delayed_register_get(const char *name, - const char **parent_names, u8 num_parents, + const char * const *parent_names, u8 num_parents, unsigned gpio, bool active_low) { return clk_register_gpio_gate(NULL, name, parent_names[0], @@ -281,7 +281,7 @@ static struct clk *of_clk_gpio_gate_delayed_register_get(const char *name, } static struct clk *of_clk_gpio_mux_delayed_register_get(const char *name, - const char **parent_names, u8 num_parents, unsigned gpio, + const char * const *parent_names, u8 num_parents, unsigned gpio, bool active_low) { return clk_register_gpio_mux(NULL, name, parent_names, num_parents, @@ -291,7 +291,8 @@ static struct clk *of_clk_gpio_mux_delayed_register_get(const char *name, static void __init of_gpio_clk_setup(struct device_node *node, const char *gpio_name, struct clk *(*clk_register_get)(const char *name, - const char **parent_names, u8 num_parents, + const char * const *parent_names, + u8 num_parents, unsigned gpio, bool active_low)) { struct clk_gpio_delayed_register_data *data; diff --git a/include/linux/clk-provider.h b/include/linux/clk-provider.h index 699a25075170..06a56e55cfaf 100644 --- a/include/linux/clk-provider.h +++ b/include/linux/clk-provider.h @@ -583,7 +583,7 @@ void of_gpio_clk_gate_setup(struct device_node *node); extern const struct clk_ops clk_gpio_mux_ops; struct clk *clk_register_gpio_mux(struct device *dev, const char *name, - const char **parent_names, u8 num_parents, unsigned gpio, + const char * const *parent_names, u8 num_parents, unsigned gpio, bool active_low, unsigned long flags); void of_gpio_mux_clk_setup(struct device_node *node); -- cgit v1.2.3-70-g09d2 From 4b638df4c9d556a6d947d6dbac364bee37b68b8e Mon Sep 17 00:00:00 2001 From: Bjorn Andersson Date: Fri, 26 Jun 2015 14:50:10 -0700 Subject: soc: qcom: Add Shared Memory Manager driver The Shared Memory Manager driver implements an interface for allocating and accessing items in the memory area shared among all of the processors in a Qualcomm platform. Signed-off-by: Bjorn Andersson Acked-by: Andy Gross Signed-off-by: Andy Gross --- drivers/soc/qcom/Kconfig | 8 + drivers/soc/qcom/Makefile | 1 + drivers/soc/qcom/smem.c | 775 ++++++++++++++++++++++++++++++++++++++++++ include/linux/soc/qcom/smem.h | 11 + 4 files changed, 795 insertions(+) create mode 100644 drivers/soc/qcom/smem.c create mode 100644 include/linux/soc/qcom/smem.h (limited to 'include/linux') diff --git a/drivers/soc/qcom/Kconfig b/drivers/soc/qcom/Kconfig index 5eea374c8fa6..8544e1594c2c 100644 --- a/drivers/soc/qcom/Kconfig +++ b/drivers/soc/qcom/Kconfig @@ -17,3 +17,11 @@ config QCOM_PM QCOM Platform specific power driver to manage cores and L2 low power modes. It interface with various system drivers to put the cores in low power modes. + +config QCOM_SMEM + tristate "Qualcomm Shared Memory Manager (SMEM)" + depends on ARCH_QCOM + help + Say y here to enable support for the Qualcomm Shared Memory Manager. + The driver provides an interface to items in a heap shared among all + processors in a Qualcomm platform. diff --git a/drivers/soc/qcom/Makefile b/drivers/soc/qcom/Makefile index 931d385386c5..3a033c43c0ef 100644 --- a/drivers/soc/qcom/Makefile +++ b/drivers/soc/qcom/Makefile @@ -1,2 +1,3 @@ obj-$(CONFIG_QCOM_GSBI) += qcom_gsbi.o obj-$(CONFIG_QCOM_PM) += spm.o +obj-$(CONFIG_QCOM_SMEM) += smem.o diff --git a/drivers/soc/qcom/smem.c b/drivers/soc/qcom/smem.c new file mode 100644 index 000000000000..7c2c324c4b10 --- /dev/null +++ b/drivers/soc/qcom/smem.c @@ -0,0 +1,775 @@ +/* + * Copyright (c) 2015, Sony Mobile Communications AB. + * Copyright (c) 2012-2013, The Linux Foundation. All rights reserved. + * + * This program is free software; you can redistribute it and/or modify + * it under the terms of the GNU General Public License version 2 and + * only version 2 as published by the Free Software Foundation. + * + * This program is distributed in the hope that it will be useful, + * but WITHOUT ANY WARRANTY; without even the implied warranty of + * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the + * GNU General Public License for more details. + */ + +#include +#include +#include +#include +#include +#include +#include +#include + +/* + * The Qualcomm shared memory system is a allocate only heap structure that + * consists of one of more memory areas that can be accessed by the processors + * in the SoC. + * + * All systems contains a global heap, accessible by all processors in the SoC, + * with a table of contents data structure (@smem_header) at the beginning of + * the main shared memory block. + * + * The global header contains meta data for allocations as well as a fixed list + * of 512 entries (@smem_global_entry) that can be initialized to reference + * parts of the shared memory space. + * + * + * In addition to this global heap a set of "private" heaps can be set up at + * boot time with access restrictions so that only certain processor pairs can + * access the data. + * + * These partitions are referenced from an optional partition table + * (@smem_ptable), that is found 4kB from the end of the main smem region. The + * partition table entries (@smem_ptable_entry) lists the involved processors + * (or hosts) and their location in the main shared memory region. + * + * Each partition starts with a header (@smem_partition_header) that identifies + * the partition and holds properties for the two internal memory regions. The + * two regions are cached and non-cached memory respectively. Each region + * contain a link list of allocation headers (@smem_private_entry) followed by + * their data. + * + * Items in the non-cached region are allocated from the start of the partition + * while items in the cached region are allocated from the end. The free area + * is hence the region between the cached and non-cached offsets. + * + * + * To synchronize allocations in the shared memory heaps a remote spinlock must + * be held - currently lock number 3 of the sfpb or tcsr is used for this on all + * platforms. + * + */ + +/* + * Item 3 of the global heap contains an array of versions for the various + * software components in the SoC. We verify that the boot loader version is + * what the expected version (SMEM_EXPECTED_VERSION) as a sanity check. + */ +#define SMEM_ITEM_VERSION 3 +#define SMEM_MASTER_SBL_VERSION_INDEX 7 +#define SMEM_EXPECTED_VERSION 11 + +/* + * The first 8 items are only to be allocated by the boot loader while + * initializing the heap. + */ +#define SMEM_ITEM_LAST_FIXED 8 + +/* Highest accepted item number, for both global and private heaps */ +#define SMEM_ITEM_COUNT 512 + +/* Processor/host identifier for the application processor */ +#define SMEM_HOST_APPS 0 + +/* Max number of processors/hosts in a system */ +#define SMEM_HOST_COUNT 9 + +/** + * struct smem_proc_comm - proc_comm communication struct (legacy) + * @command: current command to be executed + * @status: status of the currently requested command + * @params: parameters to the command + */ +struct smem_proc_comm { + u32 command; + u32 status; + u32 params[2]; +}; + +/** + * struct smem_global_entry - entry to reference smem items on the heap + * @allocated: boolean to indicate if this entry is used + * @offset: offset to the allocated space + * @size: size of the allocated space, 8 byte aligned + * @aux_base: base address for the memory region used by this unit, or 0 for + * the default region. bits 0,1 are reserved + */ +struct smem_global_entry { + u32 allocated; + u32 offset; + u32 size; + u32 aux_base; /* bits 1:0 reserved */ +}; +#define AUX_BASE_MASK 0xfffffffc + +/** + * struct smem_header - header found in beginning of primary smem region + * @proc_comm: proc_comm communication interface (legacy) + * @version: array of versions for the various subsystems + * @initialized: boolean to indicate that smem is initialized + * @free_offset: index of the first unallocated byte in smem + * @available: number of bytes available for allocation + * @reserved: reserved field, must be 0 + * toc: array of references to items + */ +struct smem_header { + struct smem_proc_comm proc_comm[4]; + u32 version[32]; + u32 initialized; + u32 free_offset; + u32 available; + u32 reserved; + struct smem_global_entry toc[SMEM_ITEM_COUNT]; +}; + +/** + * struct smem_ptable_entry - one entry in the @smem_ptable list + * @offset: offset, within the main shared memory region, of the partition + * @size: size of the partition + * @flags: flags for the partition (currently unused) + * @host0: first processor/host with access to this partition + * @host1: second processor/host with access to this partition + * @reserved: reserved entries for later use + */ +struct smem_ptable_entry { + u32 offset; + u32 size; + u32 flags; + u16 host0; + u16 host1; + u32 reserved[8]; +}; + +/** + * struct smem_ptable - partition table for the private partitions + * @magic: magic number, must be SMEM_PTABLE_MAGIC + * @version: version of the partition table + * @num_entries: number of partitions in the table + * @reserved: for now reserved entries + * @entry: list of @smem_ptable_entry for the @num_entries partitions + */ +struct smem_ptable { + u32 magic; + u32 version; + u32 num_entries; + u32 reserved[5]; + struct smem_ptable_entry entry[]; +}; +#define SMEM_PTABLE_MAGIC 0x434f5424 /* "$TOC" */ + +/** + * struct smem_partition_header - header of the partitions + * @magic: magic number, must be SMEM_PART_MAGIC + * @host0: first processor/host with access to this partition + * @host1: second processor/host with access to this partition + * @size: size of the partition + * @offset_free_uncached: offset to the first free byte of uncached memory in + * this partition + * @offset_free_cached: offset to the first free byte of cached memory in this + * partition + * @reserved: for now reserved entries + */ +struct smem_partition_header { + u32 magic; + u16 host0; + u16 host1; + u32 size; + u32 offset_free_uncached; + u32 offset_free_cached; + u32 reserved[3]; +}; +#define SMEM_PART_MAGIC 0x54525024 /* "$PRT" */ + +/** + * struct smem_private_entry - header of each item in the private partition + * @canary: magic number, must be SMEM_PRIVATE_CANARY + * @item: identifying number of the smem item + * @size: size of the data, including padding bytes + * @padding_data: number of bytes of padding of data + * @padding_hdr: number of bytes of padding between the header and the data + * @reserved: for now reserved entry + */ +struct smem_private_entry { + u16 canary; + u16 item; + u32 size; /* includes padding bytes */ + u16 padding_data; + u16 padding_hdr; + u32 reserved; +}; +#define SMEM_PRIVATE_CANARY 0xa5a5 + +/** + * struct smem_region - representation of a chunk of memory used for smem + * @aux_base: identifier of aux_mem base + * @virt_base: virtual base address of memory with this aux_mem identifier + * @size: size of the memory region + */ +struct smem_region { + u32 aux_base; + void __iomem *virt_base; + size_t size; +}; + +/** + * struct qcom_smem - device data for the smem device + * @dev: device pointer + * @hwlock: reference to a hwspinlock + * @partitions: list of pointers to partitions affecting the current + * processor/host + * @num_regions: number of @regions + * @regions: list of the memory regions defining the shared memory + */ +struct qcom_smem { + struct device *dev; + + struct hwspinlock *hwlock; + + struct smem_partition_header *partitions[SMEM_HOST_COUNT]; + + unsigned num_regions; + struct smem_region regions[0]; +}; + +/* Pointer to the one and only smem handle */ +static struct qcom_smem *__smem; + +/* Timeout (ms) for the trylock of remote spinlocks */ +#define HWSPINLOCK_TIMEOUT 1000 + +static int qcom_smem_alloc_private(struct qcom_smem *smem, + unsigned host, + unsigned item, + size_t size) +{ + struct smem_partition_header *phdr; + struct smem_private_entry *hdr; + size_t alloc_size; + void *p; + + /* We're not going to find it if there's no matching partition */ + if (host >= SMEM_HOST_COUNT || !smem->partitions[host]) + return -ENOENT; + + phdr = smem->partitions[host]; + + p = (void *)phdr + sizeof(*phdr); + while (p < (void *)phdr + phdr->offset_free_uncached) { + hdr = p; + + if (hdr->canary != SMEM_PRIVATE_CANARY) { + dev_err(smem->dev, + "Found invalid canary in host %d partition\n", + host); + return -EINVAL; + } + + if (hdr->item == item) + return -EEXIST; + + p += sizeof(*hdr) + hdr->padding_hdr + hdr->size; + } + + /* Check that we don't grow into the cached region */ + alloc_size = sizeof(*hdr) + ALIGN(size, 8); + if (p + alloc_size >= (void *)phdr + phdr->offset_free_cached) { + dev_err(smem->dev, "Out of memory\n"); + return -ENOSPC; + } + + hdr = p; + hdr->canary = SMEM_PRIVATE_CANARY; + hdr->item = item; + hdr->size = ALIGN(size, 8); + hdr->padding_data = hdr->size - size; + hdr->padding_hdr = 0; + + /* + * Ensure the header is written before we advance the free offset, so + * that remote processors that does not take the remote spinlock still + * gets a consistent view of the linked list. + */ + wmb(); + phdr->offset_free_uncached += alloc_size; + + return 0; +} + +static int qcom_smem_alloc_global(struct qcom_smem *smem, + unsigned item, + size_t size) +{ + struct smem_header *header; + struct smem_global_entry *entry; + + if (WARN_ON(item >= SMEM_ITEM_COUNT)) + return -EINVAL; + + header = smem->regions[0].virt_base; + entry = &header->toc[item]; + if (entry->allocated) + return -EEXIST; + + size = ALIGN(size, 8); + if (WARN_ON(size > header->available)) + return -ENOMEM; + + entry->offset = header->free_offset; + entry->size = size; + + /* + * Ensure the header is consistent before we mark the item allocated, + * so that remote processors will get a consistent view of the item + * even though they do not take the spinlock on read. + */ + wmb(); + entry->allocated = 1; + + header->free_offset += size; + header->available -= size; + + return 0; +} + +/** + * qcom_smem_alloc() - allocate space for a smem item + * @host: remote processor id, or -1 + * @item: smem item handle + * @size: number of bytes to be allocated + * + * Allocate space for a given smem item of size @size, given that the item is + * not yet allocated. + */ +int qcom_smem_alloc(unsigned host, unsigned item, size_t size) +{ + unsigned long flags; + int ret; + + if (!__smem) + return -EPROBE_DEFER; + + if (item < SMEM_ITEM_LAST_FIXED) { + dev_err(__smem->dev, + "Rejecting allocation of static entry %d\n", item); + return -EINVAL; + } + + ret = hwspin_lock_timeout_irqsave(__smem->hwlock, + HWSPINLOCK_TIMEOUT, + &flags); + if (ret) + return ret; + + ret = qcom_smem_alloc_private(__smem, host, item, size); + if (ret == -ENOENT) + ret = qcom_smem_alloc_global(__smem, item, size); + + hwspin_unlock_irqrestore(__smem->hwlock, &flags); + + return ret; +} +EXPORT_SYMBOL(qcom_smem_alloc); + +static int qcom_smem_get_global(struct qcom_smem *smem, + unsigned item, + void **ptr, + size_t *size) +{ + struct smem_header *header; + struct smem_region *area; + struct smem_global_entry *entry; + u32 aux_base; + unsigned i; + + if (WARN_ON(item >= SMEM_ITEM_COUNT)) + return -EINVAL; + + header = smem->regions[0].virt_base; + entry = &header->toc[item]; + if (!entry->allocated) + return -ENXIO; + + if (ptr != NULL) { + aux_base = entry->aux_base & AUX_BASE_MASK; + + for (i = 0; i < smem->num_regions; i++) { + area = &smem->regions[i]; + + if (area->aux_base == aux_base || !aux_base) { + *ptr = area->virt_base + entry->offset; + break; + } + } + } + if (size != NULL) + *size = entry->size; + + return 0; +} + +static int qcom_smem_get_private(struct qcom_smem *smem, + unsigned host, + unsigned item, + void **ptr, + size_t *size) +{ + struct smem_partition_header *phdr; + struct smem_private_entry *hdr; + void *p; + + /* We're not going to find it if there's no matching partition */ + if (host >= SMEM_HOST_COUNT || !smem->partitions[host]) + return -ENOENT; + + phdr = smem->partitions[host]; + + p = (void *)phdr + sizeof(*phdr); + while (p < (void *)phdr + phdr->offset_free_uncached) { + hdr = p; + + if (hdr->canary != SMEM_PRIVATE_CANARY) { + dev_err(smem->dev, + "Found invalid canary in host %d partition\n", + host); + return -EINVAL; + } + + if (hdr->item == item) { + if (ptr != NULL) + *ptr = p + sizeof(*hdr) + hdr->padding_hdr; + + if (size != NULL) + *size = hdr->size - hdr->padding_data; + + return 0; + } + + p += sizeof(*hdr) + hdr->padding_hdr + hdr->size; + } + + return -ENOENT; +} + +/** + * qcom_smem_get() - resolve ptr of size of a smem item + * @host: the remote processor, or -1 + * @item: smem item handle + * @ptr: pointer to be filled out with address of the item + * @size: pointer to be filled out with size of the item + * + * Looks up pointer and size of a smem item. + */ +int qcom_smem_get(unsigned host, unsigned item, void **ptr, size_t *size) +{ + unsigned long flags; + int ret; + + if (!__smem) + return -EPROBE_DEFER; + + ret = hwspin_lock_timeout_irqsave(__smem->hwlock, + HWSPINLOCK_TIMEOUT, + &flags); + if (ret) + return ret; + + ret = qcom_smem_get_private(__smem, host, item, ptr, size); + if (ret == -ENOENT) + ret = qcom_smem_get_global(__smem, item, ptr, size); + + hwspin_unlock_irqrestore(__smem->hwlock, &flags); + return ret; + +} +EXPORT_SYMBOL(qcom_smem_get); + +/** + * qcom_smem_get_free_space() - retrieve amount of free space in a partition + * @host: the remote processor identifying a partition, or -1 + * + * To be used by smem clients as a quick way to determine if any new + * allocations has been made. + */ +int qcom_smem_get_free_space(unsigned host) +{ + struct smem_partition_header *phdr; + struct smem_header *header; + unsigned ret; + + if (!__smem) + return -EPROBE_DEFER; + + if (host < SMEM_HOST_COUNT && __smem->partitions[host]) { + phdr = __smem->partitions[host]; + ret = phdr->offset_free_cached - phdr->offset_free_uncached; + } else { + header = __smem->regions[0].virt_base; + ret = header->available; + } + + return ret; +} +EXPORT_SYMBOL(qcom_smem_get_free_space); + +static int qcom_smem_get_sbl_version(struct qcom_smem *smem) +{ + unsigned *versions; + size_t size; + int ret; + + ret = qcom_smem_get_global(smem, SMEM_ITEM_VERSION, + (void **)&versions, &size); + if (ret < 0) { + dev_err(smem->dev, "Unable to read the version item\n"); + return -ENOENT; + } + + if (size < sizeof(unsigned) * SMEM_MASTER_SBL_VERSION_INDEX) { + dev_err(smem->dev, "Version item is too small\n"); + return -EINVAL; + } + + return versions[SMEM_MASTER_SBL_VERSION_INDEX]; +} + +static int qcom_smem_enumerate_partitions(struct qcom_smem *smem, + unsigned local_host) +{ + struct smem_partition_header *header; + struct smem_ptable_entry *entry; + struct smem_ptable *ptable; + unsigned remote_host; + int i; + + ptable = smem->regions[0].virt_base + smem->regions[0].size - SZ_4K; + if (ptable->magic != SMEM_PTABLE_MAGIC) + return 0; + + if (ptable->version != 1) { + dev_err(smem->dev, + "Unsupported partition header version %d\n", + ptable->version); + return -EINVAL; + } + + for (i = 0; i < ptable->num_entries; i++) { + entry = &ptable->entry[i]; + + if (entry->host0 != local_host && entry->host1 != local_host) + continue; + + if (!entry->offset) + continue; + + if (!entry->size) + continue; + + if (entry->host0 == local_host) + remote_host = entry->host1; + else + remote_host = entry->host0; + + if (remote_host >= SMEM_HOST_COUNT) { + dev_err(smem->dev, + "Invalid remote host %d\n", + remote_host); + return -EINVAL; + } + + if (smem->partitions[remote_host]) { + dev_err(smem->dev, + "Already found a partition for host %d\n", + remote_host); + return -EINVAL; + } + + header = smem->regions[0].virt_base + entry->offset; + + if (header->magic != SMEM_PART_MAGIC) { + dev_err(smem->dev, + "Partition %d has invalid magic\n", i); + return -EINVAL; + } + + if (header->host0 != local_host && header->host1 != local_host) { + dev_err(smem->dev, + "Partition %d hosts are invalid\n", i); + return -EINVAL; + } + + if (header->host0 != remote_host && header->host1 != remote_host) { + dev_err(smem->dev, + "Partition %d hosts are invalid\n", i); + return -EINVAL; + } + + if (header->size != entry->size) { + dev_err(smem->dev, + "Partition %d has invalid size\n", i); + return -EINVAL; + } + + if (header->offset_free_uncached > header->size) { + dev_err(smem->dev, + "Partition %d has invalid free pointer\n", i); + return -EINVAL; + } + + smem->partitions[remote_host] = header; + } + + return 0; +} + +static int qcom_smem_count_mem_regions(struct platform_device *pdev) +{ + struct resource *res; + int num_regions = 0; + int i; + + for (i = 0; i < pdev->num_resources; i++) { + res = &pdev->resource[i]; + + if (resource_type(res) == IORESOURCE_MEM) + num_regions++; + } + + return num_regions; +} + +static int qcom_smem_probe(struct platform_device *pdev) +{ + struct smem_header *header; + struct device_node *np; + struct qcom_smem *smem; + struct resource *res; + struct resource r; + size_t array_size; + int num_regions = 0; + int hwlock_id; + u32 version; + int ret; + int i; + + num_regions = qcom_smem_count_mem_regions(pdev) + 1; + + array_size = num_regions * sizeof(struct smem_region); + smem = devm_kzalloc(&pdev->dev, sizeof(*smem) + array_size, GFP_KERNEL); + if (!smem) + return -ENOMEM; + + smem->dev = &pdev->dev; + smem->num_regions = num_regions; + + np = of_parse_phandle(pdev->dev.of_node, "memory-region", 0); + if (!np) { + dev_err(&pdev->dev, "No memory-region specified\n"); + return -EINVAL; + } + + ret = of_address_to_resource(np, 0, &r); + of_node_put(np); + if (ret) + return ret; + + smem->regions[0].aux_base = (u32)r.start; + smem->regions[0].size = resource_size(&r); + smem->regions[0].virt_base = devm_ioremap_nocache(&pdev->dev, + r.start, + resource_size(&r)); + if (!smem->regions[0].virt_base) + return -ENOMEM; + + for (i = 1; i < num_regions; i++) { + res = platform_get_resource(pdev, IORESOURCE_MEM, i - 1); + + smem->regions[i].aux_base = (u32)res->start; + smem->regions[i].size = resource_size(res); + smem->regions[i].virt_base = devm_ioremap_nocache(&pdev->dev, + res->start, + resource_size(res)); + if (!smem->regions[i].virt_base) + return -ENOMEM; + } + + header = smem->regions[0].virt_base; + if (header->initialized != 1 || header->reserved) { + dev_err(&pdev->dev, "SMEM is not initialized by SBL\n"); + return -EINVAL; + } + + version = qcom_smem_get_sbl_version(smem); + if (version >> 16 != SMEM_EXPECTED_VERSION) { + dev_err(&pdev->dev, "Unsupported SMEM version 0x%x\n", version); + return -EINVAL; + } + + ret = qcom_smem_enumerate_partitions(smem, SMEM_HOST_APPS); + if (ret < 0) + return ret; + + hwlock_id = of_hwspin_lock_get_id(pdev->dev.of_node, 0); + if (hwlock_id < 0) { + dev_err(&pdev->dev, "failed to retrieve hwlock\n"); + return hwlock_id; + } + + smem->hwlock = hwspin_lock_request_specific(hwlock_id); + if (!smem->hwlock) + return -ENXIO; + + __smem = smem; + + return 0; +} + +static int qcom_smem_remove(struct platform_device *pdev) +{ + __smem = NULL; + hwspin_lock_free(__smem->hwlock); + + return 0; +} + +static const struct of_device_id qcom_smem_of_match[] = { + { .compatible = "qcom,smem" }, + {} +}; +MODULE_DEVICE_TABLE(of, qcom_smem_of_match); + +static struct platform_driver qcom_smem_driver = { + .probe = qcom_smem_probe, + .remove = qcom_smem_remove, + .driver = { + .name = "qcom-smem", + .of_match_table = qcom_smem_of_match, + .suppress_bind_attrs = true, + }, +}; + +static int __init qcom_smem_init(void) +{ + return platform_driver_register(&qcom_smem_driver); +} +arch_initcall(qcom_smem_init); + +static void __exit qcom_smem_exit(void) +{ + platform_driver_unregister(&qcom_smem_driver); +} +module_exit(qcom_smem_exit) + +MODULE_AUTHOR("Bjorn Andersson "); +MODULE_DESCRIPTION("Qualcomm Shared Memory Manager"); +MODULE_LICENSE("GPL v2"); diff --git a/include/linux/soc/qcom/smem.h b/include/linux/soc/qcom/smem.h new file mode 100644 index 000000000000..bc9630d3aced --- /dev/null +++ b/include/linux/soc/qcom/smem.h @@ -0,0 +1,11 @@ +#ifndef __QCOM_SMEM_H__ +#define __QCOM_SMEM_H__ + +#define QCOM_SMEM_HOST_ANY -1 + +int qcom_smem_alloc(unsigned host, unsigned item, size_t size); +int qcom_smem_get(unsigned host, unsigned item, void **ptr, size_t *size); + +int qcom_smem_get_free_space(unsigned host); + +#endif -- cgit v1.2.3-70-g09d2 From 0933328a1b8adb6c8b2b8c8b823dad0295659c40 Mon Sep 17 00:00:00 2001 From: Joachim Eastwood Date: Wed, 29 Jul 2015 00:09:02 +0200 Subject: stmmac: remove unused stmmac_of_data struct As dwmac-* drivers that need OF match have been converted to use their own internal OF match data structure this can now be removed. Signed-off-by: Joachim Eastwood Signed-off-by: David S. Miller --- Documentation/networking/stmmac.txt | 2 -- include/linux/stmmac.h | 18 ------------------ 2 files changed, 20 deletions(-) (limited to 'include/linux') diff --git a/Documentation/networking/stmmac.txt b/Documentation/networking/stmmac.txt index 5fddefa69baf..de5c42342ec3 100644 --- a/Documentation/networking/stmmac.txt +++ b/Documentation/networking/stmmac.txt @@ -274,8 +274,6 @@ capability register can replace what has been passed from the platform. Please see the following document: Documentation/devicetree/bindings/net/stmmac.txt -and the stmmac_of_data structure inside the include/linux/stmmac.h header file. - 4.11) This is a summary of the content of some relevant files: o stmmac_main.c: to implement the main network device driver; o stmmac_mdio.c: to provide mdio functions; diff --git a/include/linux/stmmac.h b/include/linux/stmmac.h index c86a20047cb1..b43cd56b78e9 100644 --- a/include/linux/stmmac.h +++ b/include/linux/stmmac.h @@ -125,22 +125,4 @@ struct plat_stmmacenet_data { void (*exit)(struct platform_device *pdev, void *priv); void *bsp_priv; }; - -/* of_data for SoC glue layer device tree bindings */ - -struct stmmac_of_data { - int has_gmac; - int enh_desc; - int tx_coe; - int rx_coe; - int bugged_jumbo; - int pmt; - int riwt_off; - void (*fix_mac_speed)(void *priv, unsigned int speed); - void (*bus_setup)(void __iomem *ioaddr); - void *(*setup)(struct platform_device *pdev); - void (*free)(struct platform_device *pdev, void *priv); - int (*init)(struct platform_device *pdev, void *priv); - void (*exit)(struct platform_device *pdev, void *priv); -}; #endif -- cgit v1.2.3-70-g09d2 From 75fee59550a9899fd9438ebc0a64c972829a8dd2 Mon Sep 17 00:00:00 2001 From: Joachim Eastwood Date: Wed, 29 Jul 2015 00:09:03 +0200 Subject: stmmac: remove setup/free glue callbacks As all dwmac-* drivers have been converted to have a proper probe function the setup callback can now be removed. Also remove the free callback that wasn't used by any driver. New dwmac-* drivers should implement standard probe and remove functions to preform any needed setup and teardown. Signed-off-by: Joachim Eastwood Signed-off-by: David S. Miller --- Documentation/networking/stmmac.txt | 8 ++------ drivers/net/ethernet/stmicro/stmmac/dwmac-generic.c | 7 ------- drivers/net/ethernet/stmicro/stmmac/stmmac_platform.c | 3 --- include/linux/stmmac.h | 2 -- 4 files changed, 2 insertions(+), 18 deletions(-) (limited to 'include/linux') diff --git a/Documentation/networking/stmmac.txt b/Documentation/networking/stmmac.txt index de5c42342ec3..2903b1cf4d70 100644 --- a/Documentation/networking/stmmac.txt +++ b/Documentation/networking/stmmac.txt @@ -135,8 +135,6 @@ struct plat_stmmacenet_data { int maxmtu; void (*fix_mac_speed)(void *priv, unsigned int speed); void (*bus_setup)(void __iomem *ioaddr); - void *(*setup)(struct platform_device *pdev); - void (*free)(struct platform_device *pdev, void *priv); int (*init)(struct platform_device *pdev, void *priv); void (*exit)(struct platform_device *pdev, void *priv); void *bsp_priv; @@ -177,12 +175,10 @@ Where: o bus_setup: perform HW setup of the bus. For example, on some ST platforms this field is used to configure the AMBA bridge to generate more efficient STBus traffic. - o setup/init/exit: callbacks used for calling a custom initialization; + o init/exit: callbacks used for calling a custom initialization; this is sometime necessary on some platforms (e.g. ST boxes) where the HW needs to have set some PIO lines or system cfg - registers. setup should return a pointer to private data, - which will be stored in bsp_priv, and then passed to init and - exit callbacks. init/exit callbacks should not use or modify + registers. init/exit callbacks should not use or modify platform data. o bsp_priv: another private pointer. diff --git a/drivers/net/ethernet/stmicro/stmmac/dwmac-generic.c b/drivers/net/ethernet/stmicro/stmmac/dwmac-generic.c index f4fe9f1a33b4..b1e5f24708c9 100644 --- a/drivers/net/ethernet/stmicro/stmmac/dwmac-generic.c +++ b/drivers/net/ethernet/stmicro/stmmac/dwmac-generic.c @@ -46,13 +46,6 @@ static int dwmac_generic_probe(struct platform_device *pdev) plat_dat->unicast_filter_entries = 1; } - /* Custom setup (if needed) */ - if (plat_dat->setup) { - plat_dat->bsp_priv = plat_dat->setup(pdev); - if (IS_ERR(plat_dat->bsp_priv)) - return PTR_ERR(plat_dat->bsp_priv); - } - /* Custom initialisation (if needed) */ if (plat_dat->init) { ret = plat_dat->init(pdev, plat_dat->bsp_priv); diff --git a/drivers/net/ethernet/stmicro/stmmac/stmmac_platform.c b/drivers/net/ethernet/stmicro/stmmac/stmmac_platform.c index 55e569b330b2..1cb660405f35 100644 --- a/drivers/net/ethernet/stmicro/stmmac/stmmac_platform.c +++ b/drivers/net/ethernet/stmicro/stmmac/stmmac_platform.c @@ -300,9 +300,6 @@ int stmmac_pltfr_remove(struct platform_device *pdev) if (priv->plat->exit) priv->plat->exit(pdev, priv->plat->bsp_priv); - if (priv->plat->free) - priv->plat->free(pdev, priv->plat->bsp_priv); - return ret; } EXPORT_SYMBOL_GPL(stmmac_pltfr_remove); diff --git a/include/linux/stmmac.h b/include/linux/stmmac.h index b43cd56b78e9..eead8ab93c0a 100644 --- a/include/linux/stmmac.h +++ b/include/linux/stmmac.h @@ -119,8 +119,6 @@ struct plat_stmmacenet_data { int rx_fifo_size; void (*fix_mac_speed)(void *priv, unsigned int speed); void (*bus_setup)(void __iomem *ioaddr); - void *(*setup)(struct platform_device *pdev); - void (*free)(struct platform_device *pdev, void *priv); int (*init)(struct platform_device *pdev, void *priv); void (*exit)(struct platform_device *pdev, void *priv); void *bsp_priv; -- cgit v1.2.3-70-g09d2 From d71ba788345c2b5646101766e0c52714a9b5ed7f Mon Sep 17 00:00:00 2001 From: Paolo Bonzini Date: Wed, 29 Jul 2015 11:56:48 +0200 Subject: KVM: move code related to KVM_SET_BOOT_CPU_ID to x86 This is another remnant of ia64 support. Signed-off-by: Paolo Bonzini --- arch/x86/include/asm/kvm_host.h | 3 +++ arch/x86/kvm/x86.c | 21 +++++++++++++++++++++ include/linux/kvm_host.h | 16 ---------------- virt/kvm/kvm_main.c | 14 -------------- 4 files changed, 24 insertions(+), 30 deletions(-) (limited to 'include/linux') diff --git a/arch/x86/include/asm/kvm_host.h b/arch/x86/include/asm/kvm_host.h index fa32b5314dcd..2f9e504f9f0c 100644 --- a/arch/x86/include/asm/kvm_host.h +++ b/arch/x86/include/asm/kvm_host.h @@ -667,6 +667,7 @@ struct kvm_arch { #endif bool boot_vcpu_runs_old_kvmclock; + u32 bsp_vcpu_id; u64 disabled_quirks; }; @@ -1215,5 +1216,7 @@ int __x86_set_memory_region(struct kvm *kvm, const struct kvm_userspace_memory_region *mem); int x86_set_memory_region(struct kvm *kvm, const struct kvm_userspace_memory_region *mem); +bool kvm_vcpu_is_reset_bsp(struct kvm_vcpu *vcpu); +bool kvm_vcpu_is_bsp(struct kvm_vcpu *vcpu); #endif /* _ASM_X86_KVM_HOST_H */ diff --git a/arch/x86/kvm/x86.c b/arch/x86/kvm/x86.c index 28076c266a9a..c675ea3351cf 100644 --- a/arch/x86/kvm/x86.c +++ b/arch/x86/kvm/x86.c @@ -2461,6 +2461,7 @@ int kvm_vm_ioctl_check_extension(struct kvm *kvm, long ext) case KVM_CAP_TSC_DEADLINE_TIMER: case KVM_CAP_ENABLE_CAP_VM: case KVM_CAP_DISABLE_QUIRKS: + case KVM_CAP_SET_BOOT_CPU_ID: #ifdef CONFIG_KVM_DEVICE_ASSIGNMENT case KVM_CAP_ASSIGN_DEV_IRQ: case KVM_CAP_PCI_2_3: @@ -3777,6 +3778,15 @@ long kvm_arch_vm_ioctl(struct file *filp, r = kvm_vm_ioctl_reinject(kvm, &control); break; } + case KVM_SET_BOOT_CPU_ID: + r = 0; + mutex_lock(&kvm->lock); + if (atomic_read(&kvm->online_vcpus) != 0) + r = -EBUSY; + else + kvm->arch.bsp_vcpu_id = arg; + mutex_unlock(&kvm->lock); + break; case KVM_XEN_HVM_CONFIG: { r = -EFAULT; if (copy_from_user(&kvm->arch.xen_hvm_config, argp, @@ -7291,6 +7301,17 @@ void kvm_arch_check_processor_compat(void *rtn) kvm_x86_ops->check_processor_compatibility(rtn); } +bool kvm_vcpu_is_reset_bsp(struct kvm_vcpu *vcpu) +{ + return vcpu->kvm->arch.bsp_vcpu_id == vcpu->vcpu_id; +} +EXPORT_SYMBOL_GPL(kvm_vcpu_is_reset_bsp); + +bool kvm_vcpu_is_bsp(struct kvm_vcpu *vcpu) +{ + return (vcpu->arch.apic_base & MSR_IA32_APICBASE_BSP) != 0; +} + bool kvm_vcpu_compatible(struct kvm_vcpu *vcpu) { return irqchip_in_kernel(vcpu->kvm) == (vcpu->arch.apic != NULL); diff --git a/include/linux/kvm_host.h b/include/linux/kvm_host.h index 51103f0feb7e..bd1097a95704 100644 --- a/include/linux/kvm_host.h +++ b/include/linux/kvm_host.h @@ -364,9 +364,6 @@ struct kvm { struct kvm_memslots *memslots[KVM_ADDRESS_SPACE_NUM]; struct srcu_struct srcu; struct srcu_struct irq_srcu; -#ifdef CONFIG_KVM_APIC_ARCHITECTURE - u32 bsp_vcpu_id; -#endif struct kvm_vcpu *vcpus[KVM_MAX_VCPUS]; atomic_t online_vcpus; int last_boosted_vcpu; @@ -1059,22 +1056,9 @@ static inline int kvm_ioeventfd(struct kvm *kvm, struct kvm_ioeventfd *args) #endif /* CONFIG_HAVE_KVM_EVENTFD */ #ifdef CONFIG_KVM_APIC_ARCHITECTURE -static inline bool kvm_vcpu_is_reset_bsp(struct kvm_vcpu *vcpu) -{ - return vcpu->kvm->bsp_vcpu_id == vcpu->vcpu_id; -} - -static inline bool kvm_vcpu_is_bsp(struct kvm_vcpu *vcpu) -{ - return (vcpu->arch.apic_base & MSR_IA32_APICBASE_BSP) != 0; -} - bool kvm_vcpu_compatible(struct kvm_vcpu *vcpu); - #else - static inline bool kvm_vcpu_compatible(struct kvm_vcpu *vcpu) { return true; } - #endif static inline void kvm_make_request(int req, struct kvm_vcpu *vcpu) diff --git a/virt/kvm/kvm_main.c b/virt/kvm/kvm_main.c index 8b8a44453670..8dc4828f623f 100644 --- a/virt/kvm/kvm_main.c +++ b/virt/kvm/kvm_main.c @@ -2618,9 +2618,6 @@ static long kvm_vm_ioctl_check_extension_generic(struct kvm *kvm, long arg) case KVM_CAP_USER_MEMORY: case KVM_CAP_DESTROY_MEMORY_REGION_WORKS: case KVM_CAP_JOIN_MEMORY_REGIONS_WORKS: -#ifdef CONFIG_KVM_APIC_ARCHITECTURE - case KVM_CAP_SET_BOOT_CPU_ID: -#endif case KVM_CAP_INTERNAL_ERROR_DATA: #ifdef CONFIG_HAVE_KVM_MSI case KVM_CAP_SIGNAL_MSI: @@ -2716,17 +2713,6 @@ static long kvm_vm_ioctl(struct file *filp, r = kvm_ioeventfd(kvm, &data); break; } -#ifdef CONFIG_KVM_APIC_ARCHITECTURE - case KVM_SET_BOOT_CPU_ID: - r = 0; - mutex_lock(&kvm->lock); - if (atomic_read(&kvm->online_vcpus) != 0) - r = -EBUSY; - else - kvm->bsp_vcpu_id = arg; - mutex_unlock(&kvm->lock); - break; -#endif #ifdef CONFIG_HAVE_KVM_MSI case KVM_SIGNAL_MSI: { struct kvm_msi msi; -- cgit v1.2.3-70-g09d2 From e075867681ca9b8c0b8823e24d0fb4ce3b4f2655 Mon Sep 17 00:00:00 2001 From: Frederic Weisbecker Date: Fri, 10 Oct 2014 02:44:01 +0200 Subject: jiffies: Remove HZ > USEC_PER_SEC special case HZ never goes much further 1000 and a bit. And if we ever reach one tick per microsecond, we might be having a problem. Lets stop maintaining this special case, just leave a paranoid check. Reviewed-by: Rik van Riel Cc: Christoph Lameter Cc: Ingo Molnar Cc; John Stultz Cc: Peter Zijlstra Cc: Preeti U Murthy Cc: Rik van Riel Cc: Thomas Gleixner Cc: Viresh Kumar Signed-off-by: Frederic Weisbecker --- include/linux/jiffies.h | 9 +-------- kernel/time/time.c | 10 +++++++--- 2 files changed, 8 insertions(+), 11 deletions(-) (limited to 'include/linux') diff --git a/include/linux/jiffies.h b/include/linux/jiffies.h index 535fd3bb1ba8..7c6febede6ba 100644 --- a/include/linux/jiffies.h +++ b/include/linux/jiffies.h @@ -363,18 +363,11 @@ static inline unsigned long msecs_to_jiffies(const unsigned int m) } extern unsigned long __usecs_to_jiffies(const unsigned int u); -#if HZ <= USEC_PER_SEC && !(USEC_PER_SEC % HZ) +#if !(USEC_PER_SEC % HZ) static inline unsigned long _usecs_to_jiffies(const unsigned int u) { return (u + (USEC_PER_SEC / HZ) - 1) / (USEC_PER_SEC / HZ); } -#elif HZ > USEC_PER_SEC && !(HZ % USEC_PER_SEC) -static inline unsigned long _usecs_to_jiffies(const unsigned int u) -{ - return u * (HZ / USEC_PER_SEC); -} -static inline unsigned long _usecs_to_jiffies(const unsigned int u) -{ #else static inline unsigned long _usecs_to_jiffies(const unsigned int u) { diff --git a/kernel/time/time.c b/kernel/time/time.c index 85d5bb1d67eb..ad1bf23e6eb7 100644 --- a/kernel/time/time.c +++ b/kernel/time/time.c @@ -268,10 +268,14 @@ EXPORT_SYMBOL(jiffies_to_msecs); unsigned int jiffies_to_usecs(const unsigned long j) { -#if HZ <= USEC_PER_SEC && !(USEC_PER_SEC % HZ) + /* + * Hz usually doesn't go much further MSEC_PER_SEC. + * jiffies_to_usecs() and usecs_to_jiffies() depend on that. + */ + BUILD_BUG_ON(HZ > USEC_PER_SEC); + +#if !(USEC_PER_SEC % HZ) return (USEC_PER_SEC / HZ) * j; -#elif HZ > USEC_PER_SEC && !(HZ % USEC_PER_SEC) - return (j + (HZ / USEC_PER_SEC) - 1)/(HZ / USEC_PER_SEC); #else # if BITS_PER_LONG == 32 return (HZ_TO_USEC_MUL32 * j) >> HZ_TO_USEC_SHR32; -- cgit v1.2.3-70-g09d2 From 03f6199a359e460714b6bd08c10b566760f150a6 Mon Sep 17 00:00:00 2001 From: Chris Metcalf Date: Fri, 10 Jul 2015 15:37:25 -0400 Subject: nohz: Prevent tilegx network driver interrupts Normally the tilegx networking shim sends irqs to all the cores to distribute the load of processing incoming-packet interrupts, so that you can get to multiple Gb's of traffic inbound. However, in nohz_full mode we don't want to interrupt the nohz_full cores by default, so we limit the set of cores we use to only the online housekeeping cores. To make client code easier to read, we introduce a new nohz_full accessor, housekeeping_cpumask(), which returns a pointer to the housekeeping_mask if nohz_full is enabled, and otherwise returns the cpu_possible_mask. Signed-off-by: Chris Metcalf Cc: Christoph Lameter Cc: Ingo Molnar Cc: Peter Zijlstra Cc: Preeti U Murthy Cc: Rik van Riel Cc: Thomas Gleixner Cc: Viresh Kumar Signed-off-by: Frederic Weisbecker --- drivers/net/ethernet/tile/tilegx.c | 4 +++- include/linux/tick.h | 9 +++++++++ 2 files changed, 12 insertions(+), 1 deletion(-) (limited to 'include/linux') diff --git a/drivers/net/ethernet/tile/tilegx.c b/drivers/net/ethernet/tile/tilegx.c index a3f7610002aa..0a15acc075b3 100644 --- a/drivers/net/ethernet/tile/tilegx.c +++ b/drivers/net/ethernet/tile/tilegx.c @@ -40,6 +40,7 @@ #include #include #include +#include #include #include @@ -2273,7 +2274,8 @@ static int __init tile_net_init_module(void) tile_net_dev_init(name, mac); if (!network_cpus_init()) - network_cpus_map = *cpu_online_mask; + cpumask_and(&network_cpus_map, housekeeping_cpumask(), + cpu_online_mask); return 0; } diff --git a/include/linux/tick.h b/include/linux/tick.h index edbfc9a5293e..1ca93f2de6f5 100644 --- a/include/linux/tick.h +++ b/include/linux/tick.h @@ -163,6 +163,15 @@ static inline void tick_nohz_full_kick_all(void) { } static inline void __tick_nohz_task_switch(struct task_struct *tsk) { } #endif +static inline const struct cpumask *housekeeping_cpumask(void) +{ +#ifdef CONFIG_NO_HZ_FULL + if (tick_nohz_full_enabled()) + return housekeeping_mask; +#endif + return cpu_possible_mask; +} + static inline bool is_housekeeping_cpu(int cpu) { #ifdef CONFIG_NO_HZ_FULL -- cgit v1.2.3-70-g09d2 From 73738a95d00467812664b7f86ba3052f5faf96d7 Mon Sep 17 00:00:00 2001 From: Frederic Weisbecker Date: Wed, 27 May 2015 19:22:08 +0200 Subject: nohz: Restart nohz full tick from irq exit Restart the tick when necessary from the irq exit path. It makes nohz full more flexible, simplify the related IPIs and doesn't bring significant overhead on irq exit. In a longer term view, it will allow us to piggyback the nohz kick on the scheduler IPI in the future instead of sending a dedicated IPI that often doubles the scheduler IPI on task wakeup. This will require more changes though including careful review of resched_curr() callers to include nohz full needs. Reviewed-by: Rik van Riel Cc: Christoph Lameter Cc: Ingo Molnar Cc: Peter Zijlstra Cc: Preeti U Murthy Cc: Rik van Riel Cc: Thomas Gleixner Cc: Viresh Kumar Signed-off-by: Frederic Weisbecker --- include/linux/tick.h | 8 -------- kernel/time/tick-sched.c | 34 ++++++++++------------------------ 2 files changed, 10 insertions(+), 32 deletions(-) (limited to 'include/linux') diff --git a/include/linux/tick.h b/include/linux/tick.h index 1ca93f2de6f5..7d35b0fec399 100644 --- a/include/linux/tick.h +++ b/include/linux/tick.h @@ -147,7 +147,6 @@ static inline void tick_nohz_full_add_cpus_to(struct cpumask *mask) cpumask_or(mask, mask, tick_nohz_full_mask); } -extern void __tick_nohz_full_check(void); extern void tick_nohz_full_kick(void); extern void tick_nohz_full_kick_cpu(int cpu); extern void tick_nohz_full_kick_all(void); @@ -156,7 +155,6 @@ extern void __tick_nohz_task_switch(struct task_struct *tsk); static inline bool tick_nohz_full_enabled(void) { return false; } static inline bool tick_nohz_full_cpu(int cpu) { return false; } static inline void tick_nohz_full_add_cpus_to(struct cpumask *mask) { } -static inline void __tick_nohz_full_check(void) { } static inline void tick_nohz_full_kick_cpu(int cpu) { } static inline void tick_nohz_full_kick(void) { } static inline void tick_nohz_full_kick_all(void) { } @@ -190,12 +188,6 @@ static inline void housekeeping_affine(struct task_struct *t) #endif } -static inline void tick_nohz_full_check(void) -{ - if (tick_nohz_full_enabled()) - __tick_nohz_full_check(); -} - static inline void tick_nohz_task_switch(struct task_struct *tsk) { if (tick_nohz_full_enabled()) diff --git a/kernel/time/tick-sched.c b/kernel/time/tick-sched.c index d6c8eff6e7b4..a06cd4af0ff1 100644 --- a/kernel/time/tick-sched.c +++ b/kernel/time/tick-sched.c @@ -197,25 +197,9 @@ static bool can_stop_full_tick(void) return true; } -static void tick_nohz_restart_sched_tick(struct tick_sched *ts, ktime_t now); - -/* - * Re-evaluate the need for the tick on the current CPU - * and restart it if necessary. - */ -void __tick_nohz_full_check(void) -{ - struct tick_sched *ts = this_cpu_ptr(&tick_cpu_sched); - - if (tick_nohz_full_cpu(smp_processor_id())) { - if (ts->tick_stopped && !can_stop_full_tick()) - tick_nohz_restart_sched_tick(ts, ktime_get()); - } -} - static void nohz_full_kick_work_func(struct irq_work *work) { - __tick_nohz_full_check(); + /* Empty, the tick restart happens on tick_nohz_irq_exit() */ } static DEFINE_PER_CPU(struct irq_work, nohz_full_kick_work) = { @@ -250,7 +234,7 @@ void tick_nohz_full_kick_cpu(int cpu) static void nohz_full_kick_ipi(void *info) { - __tick_nohz_full_check(); + /* Empty, the tick restart happens on tick_nohz_irq_exit() */ } /* @@ -703,7 +687,9 @@ out: return tick; } -static void tick_nohz_full_stop_tick(struct tick_sched *ts) +static void tick_nohz_restart_sched_tick(struct tick_sched *ts, ktime_t now); + +static void tick_nohz_full_update_tick(struct tick_sched *ts) { #ifdef CONFIG_NO_HZ_FULL int cpu = smp_processor_id(); @@ -714,10 +700,10 @@ static void tick_nohz_full_stop_tick(struct tick_sched *ts) if (!ts->tick_stopped && ts->nohz_mode == NOHZ_MODE_INACTIVE) return; - if (!can_stop_full_tick()) - return; - - tick_nohz_stop_sched_tick(ts, ktime_get(), cpu); + if (can_stop_full_tick()) + tick_nohz_stop_sched_tick(ts, ktime_get(), cpu); + else if (ts->tick_stopped) + tick_nohz_restart_sched_tick(ts, ktime_get()); #endif } @@ -847,7 +833,7 @@ void tick_nohz_irq_exit(void) if (ts->inidle) __tick_nohz_idle_enter(ts); else - tick_nohz_full_stop_tick(ts); + tick_nohz_full_update_tick(ts); } /** -- cgit v1.2.3-70-g09d2 From de734f89b67c2df30e35a09e7e56a3659e5b6ac6 Mon Sep 17 00:00:00 2001 From: Frederic Weisbecker Date: Thu, 11 Jun 2015 18:07:12 +0200 Subject: nohz: Remove useless argument on tick_nohz_task_switch() Leftover from early code. Cc: Christoph Lameter Cc: Ingo Molnar Cc: Peter Zijlstra Cc: Preeti U Murthy Cc: Rik van Riel Cc: Thomas Gleixner Cc: Viresh Kumar Signed-off-by: Frederic Weisbecker --- include/linux/tick.h | 8 ++++---- kernel/sched/core.c | 2 +- kernel/time/tick-sched.c | 2 +- 3 files changed, 6 insertions(+), 6 deletions(-) (limited to 'include/linux') diff --git a/include/linux/tick.h b/include/linux/tick.h index 7d35b0fec399..48d901f83f92 100644 --- a/include/linux/tick.h +++ b/include/linux/tick.h @@ -150,7 +150,7 @@ static inline void tick_nohz_full_add_cpus_to(struct cpumask *mask) extern void tick_nohz_full_kick(void); extern void tick_nohz_full_kick_cpu(int cpu); extern void tick_nohz_full_kick_all(void); -extern void __tick_nohz_task_switch(struct task_struct *tsk); +extern void __tick_nohz_task_switch(void); #else static inline bool tick_nohz_full_enabled(void) { return false; } static inline bool tick_nohz_full_cpu(int cpu) { return false; } @@ -158,7 +158,7 @@ static inline void tick_nohz_full_add_cpus_to(struct cpumask *mask) { } static inline void tick_nohz_full_kick_cpu(int cpu) { } static inline void tick_nohz_full_kick(void) { } static inline void tick_nohz_full_kick_all(void) { } -static inline void __tick_nohz_task_switch(struct task_struct *tsk) { } +static inline void __tick_nohz_task_switch(void) { } #endif static inline const struct cpumask *housekeeping_cpumask(void) @@ -188,10 +188,10 @@ static inline void housekeeping_affine(struct task_struct *t) #endif } -static inline void tick_nohz_task_switch(struct task_struct *tsk) +static inline void tick_nohz_task_switch(void) { if (tick_nohz_full_enabled()) - __tick_nohz_task_switch(tsk); + __tick_nohz_task_switch(); } #endif diff --git a/kernel/sched/core.c b/kernel/sched/core.c index 78b4bad10081..4d34035bb3ee 100644 --- a/kernel/sched/core.c +++ b/kernel/sched/core.c @@ -2489,7 +2489,7 @@ static struct rq *finish_task_switch(struct task_struct *prev) put_task_struct(prev); } - tick_nohz_task_switch(current); + tick_nohz_task_switch(); return rq; } diff --git a/kernel/time/tick-sched.c b/kernel/time/tick-sched.c index 6b0d14d4c350..3319e16f31e5 100644 --- a/kernel/time/tick-sched.c +++ b/kernel/time/tick-sched.c @@ -258,7 +258,7 @@ void tick_nohz_full_kick_all(void) * It might need the tick due to per task/process properties: * perf events, posix cpu timers, ... */ -void __tick_nohz_task_switch(struct task_struct *tsk) +void __tick_nohz_task_switch(void) { unsigned long flags; -- cgit v1.2.3-70-g09d2 From 4246a0b63bd8f56a1469b12eafeb875b1041a451 Mon Sep 17 00:00:00 2001 From: Christoph Hellwig Date: Mon, 20 Jul 2015 15:29:37 +0200 Subject: block: add a bi_error field to struct bio Currently we have two different ways to signal an I/O error on a BIO: (1) by clearing the BIO_UPTODATE flag (2) by returning a Linux errno value to the bi_end_io callback The first one has the drawback of only communicating a single possible error (-EIO), and the second one has the drawback of not beeing persistent when bios are queued up, and are not passed along from child to parent bio in the ever more popular chaining scenario. Having both mechanisms available has the additional drawback of utterly confusing driver authors and introducing bugs where various I/O submitters only deal with one of them, and the others have to add boilerplate code to deal with both kinds of error returns. So add a new bi_error field to store an errno value directly in struct bio and remove the existing mechanisms to clean all this up. Signed-off-by: Christoph Hellwig Reviewed-by: Hannes Reinecke Reviewed-by: NeilBrown Signed-off-by: Jens Axboe --- Documentation/block/biodoc.txt | 2 +- arch/m68k/emu/nfblock.c | 2 +- arch/powerpc/sysdev/axonram.c | 2 +- arch/xtensa/platforms/iss/simdisk.c | 12 ++----- block/bio-integrity.c | 11 +++---- block/bio.c | 43 +++++++++++-------------- block/blk-core.c | 15 ++++----- block/blk-lib.c | 30 ++++++++---------- block/blk-map.c | 2 +- block/blk-mq.c | 6 ++-- block/bounce.c | 27 ++++++++-------- drivers/block/aoe/aoecmd.c | 10 +++--- drivers/block/aoe/aoedev.c | 2 +- drivers/block/brd.c | 13 +++++--- drivers/block/drbd/drbd_actlog.c | 4 +-- drivers/block/drbd/drbd_bitmap.c | 19 +++--------- drivers/block/drbd/drbd_int.h | 11 ++++--- drivers/block/drbd/drbd_req.c | 10 +++--- drivers/block/drbd/drbd_worker.c | 44 +++++++------------------- drivers/block/floppy.c | 7 +++-- drivers/block/null_blk.c | 2 +- drivers/block/pktcdvd.c | 32 +++++++++---------- drivers/block/ps3vram.c | 3 +- drivers/block/rsxx/dev.c | 9 ++++-- drivers/block/umem.c | 4 +-- drivers/block/xen-blkback/blkback.c | 4 +-- drivers/block/xen-blkfront.c | 9 ++---- drivers/block/zram/zram_drv.c | 5 ++- drivers/md/bcache/btree.c | 10 +++--- drivers/md/bcache/closure.h | 2 +- drivers/md/bcache/io.c | 8 ++--- drivers/md/bcache/journal.c | 8 ++--- drivers/md/bcache/movinggc.c | 8 ++--- drivers/md/bcache/request.c | 27 ++++++++-------- drivers/md/bcache/super.c | 14 ++++----- drivers/md/bcache/writeback.c | 10 +++--- drivers/md/dm-bio-prison.c | 6 ++-- drivers/md/dm-bufio.c | 26 ++++++++++------ drivers/md/dm-cache-target.c | 24 +++++++------- drivers/md/dm-crypt.c | 14 ++++----- drivers/md/dm-flakey.c | 2 +- drivers/md/dm-io.c | 6 ++-- drivers/md/dm-log-writes.c | 11 +++---- drivers/md/dm-raid1.c | 24 +++++++------- drivers/md/dm-snap.c | 6 ++-- drivers/md/dm-stripe.c | 2 +- drivers/md/dm-thin.c | 41 +++++++++++++----------- drivers/md/dm-verity.c | 9 +++--- drivers/md/dm-zero.c | 2 +- drivers/md/dm.c | 15 +++++---- drivers/md/faulty.c | 4 +-- drivers/md/linear.c | 2 +- drivers/md/md.c | 18 +++++------ drivers/md/multipath.c | 12 +++---- drivers/md/raid0.c | 2 +- drivers/md/raid1.c | 53 ++++++++++++++++--------------- drivers/md/raid10.c | 55 +++++++++++++++----------------- drivers/md/raid5.c | 52 +++++++++++++++---------------- drivers/nvdimm/blk.c | 5 +-- drivers/nvdimm/btt.c | 5 +-- drivers/nvdimm/pmem.c | 2 +- drivers/s390/block/dcssblk.c | 2 +- drivers/s390/block/xpram.c | 3 +- drivers/target/target_core_iblock.c | 21 +++++-------- drivers/target/target_core_pscsi.c | 6 ++-- fs/btrfs/check-integrity.c | 10 +++--- fs/btrfs/compression.c | 24 ++++++++------ fs/btrfs/disk-io.c | 35 +++++++++++---------- fs/btrfs/extent_io.c | 30 +++++++----------- fs/btrfs/inode.c | 50 ++++++++++++++++-------------- fs/btrfs/raid56.c | 62 +++++++++++++++++-------------------- fs/btrfs/scrub.c | 22 ++++++------- fs/btrfs/volumes.c | 23 +++++++------- fs/buffer.c | 4 +-- fs/direct-io.c | 13 ++++---- fs/ext4/page-io.c | 15 ++++----- fs/ext4/readpage.c | 6 ++-- fs/f2fs/data.c | 10 +++--- fs/gfs2/lops.c | 10 +++--- fs/gfs2/ops_fstype.c | 6 ++-- fs/jfs/jfs_logmgr.c | 8 ++--- fs/jfs/jfs_metapage.c | 8 ++--- fs/logfs/dev_bdev.c | 12 +++---- fs/mpage.c | 4 +-- fs/nfs/blocklayout/blocklayout.c | 14 ++++----- fs/nilfs2/segbuf.c | 5 ++- fs/ocfs2/cluster/heartbeat.c | 9 +++--- fs/xfs/xfs_aops.c | 5 ++- fs/xfs/xfs_buf.c | 7 ++--- include/linux/bio.h | 13 +++++--- include/linux/blk_types.h | 4 +-- include/linux/swap.h | 4 +-- kernel/power/swap.c | 12 +++---- kernel/trace/blktrace.c | 10 ++---- mm/page_io.c | 12 +++---- 95 files changed, 622 insertions(+), 682 deletions(-) (limited to 'include/linux') diff --git a/Documentation/block/biodoc.txt b/Documentation/block/biodoc.txt index fd12c0d835fd..5be8a7f4cc7f 100644 --- a/Documentation/block/biodoc.txt +++ b/Documentation/block/biodoc.txt @@ -1109,7 +1109,7 @@ it will loop and handle as many sectors (on a bio-segment granularity) as specified. Now bh->b_end_io is replaced by bio->bi_end_io, but most of the time the -right thing to use is bio_endio(bio, uptodate) instead. +right thing to use is bio_endio(bio) instead. If the driver is dropping the io_request_lock from its request_fn strategy, then it just needs to replace that with q->queue_lock instead. diff --git a/arch/m68k/emu/nfblock.c b/arch/m68k/emu/nfblock.c index 2d75ae246167..f2a00c591bf7 100644 --- a/arch/m68k/emu/nfblock.c +++ b/arch/m68k/emu/nfblock.c @@ -76,7 +76,7 @@ static void nfhd_make_request(struct request_queue *queue, struct bio *bio) bvec_to_phys(&bvec)); sec += len; } - bio_endio(bio, 0); + bio_endio(bio); } static int nfhd_getgeo(struct block_device *bdev, struct hd_geometry *geo) diff --git a/arch/powerpc/sysdev/axonram.c b/arch/powerpc/sysdev/axonram.c index ee90db17b097..f86250c48b53 100644 --- a/arch/powerpc/sysdev/axonram.c +++ b/arch/powerpc/sysdev/axonram.c @@ -132,7 +132,7 @@ axon_ram_make_request(struct request_queue *queue, struct bio *bio) phys_mem += vec.bv_len; transfered += vec.bv_len; } - bio_endio(bio, 0); + bio_endio(bio); } /** diff --git a/arch/xtensa/platforms/iss/simdisk.c b/arch/xtensa/platforms/iss/simdisk.c index 48eebacdf5fe..fa84ca990caa 100644 --- a/arch/xtensa/platforms/iss/simdisk.c +++ b/arch/xtensa/platforms/iss/simdisk.c @@ -101,8 +101,9 @@ static void simdisk_transfer(struct simdisk *dev, unsigned long sector, spin_unlock(&dev->lock); } -static int simdisk_xfer_bio(struct simdisk *dev, struct bio *bio) +static void simdisk_make_request(struct request_queue *q, struct bio *bio) { + struct simdisk *dev = q->queuedata; struct bio_vec bvec; struct bvec_iter iter; sector_t sector = bio->bi_iter.bi_sector; @@ -116,17 +117,10 @@ static int simdisk_xfer_bio(struct simdisk *dev, struct bio *bio) sector += len; __bio_kunmap_atomic(buffer); } - return 0; -} -static void simdisk_make_request(struct request_queue *q, struct bio *bio) -{ - struct simdisk *dev = q->queuedata; - int status = simdisk_xfer_bio(dev, bio); - bio_endio(bio, status); + bio_endio(bio); } - static int simdisk_open(struct block_device *bdev, fmode_t mode) { struct simdisk *dev = bdev->bd_disk->private_data; diff --git a/block/bio-integrity.c b/block/bio-integrity.c index 719b7152aed1..4aecca79374a 100644 --- a/block/bio-integrity.c +++ b/block/bio-integrity.c @@ -355,13 +355,12 @@ static void bio_integrity_verify_fn(struct work_struct *work) container_of(work, struct bio_integrity_payload, bip_work); struct bio *bio = bip->bip_bio; struct blk_integrity *bi = bdev_get_integrity(bio->bi_bdev); - int error; - error = bio_integrity_process(bio, bi->verify_fn); + bio->bi_error = bio_integrity_process(bio, bi->verify_fn); /* Restore original bio completion handler */ bio->bi_end_io = bip->bip_end_io; - bio_endio(bio, error); + bio_endio(bio); } /** @@ -376,7 +375,7 @@ static void bio_integrity_verify_fn(struct work_struct *work) * in process context. This function postpones completion * accordingly. */ -void bio_integrity_endio(struct bio *bio, int error) +void bio_integrity_endio(struct bio *bio) { struct bio_integrity_payload *bip = bio_integrity(bio); @@ -386,9 +385,9 @@ void bio_integrity_endio(struct bio *bio, int error) * integrity metadata. Restore original bio end_io handler * and run it. */ - if (error) { + if (bio->bi_error) { bio->bi_end_io = bip->bip_end_io; - bio_endio(bio, error); + bio_endio(bio); return; } diff --git a/block/bio.c b/block/bio.c index 2a00d349cd68..a23f489f398f 100644 --- a/block/bio.c +++ b/block/bio.c @@ -269,7 +269,6 @@ static void bio_free(struct bio *bio) void bio_init(struct bio *bio) { memset(bio, 0, sizeof(*bio)); - bio->bi_flags = 1 << BIO_UPTODATE; atomic_set(&bio->__bi_remaining, 1); atomic_set(&bio->__bi_cnt, 1); } @@ -292,14 +291,17 @@ void bio_reset(struct bio *bio) __bio_free(bio); memset(bio, 0, BIO_RESET_BYTES); - bio->bi_flags = flags | (1 << BIO_UPTODATE); + bio->bi_flags = flags; atomic_set(&bio->__bi_remaining, 1); } EXPORT_SYMBOL(bio_reset); -static void bio_chain_endio(struct bio *bio, int error) +static void bio_chain_endio(struct bio *bio) { - bio_endio(bio->bi_private, error); + struct bio *parent = bio->bi_private; + + parent->bi_error = bio->bi_error; + bio_endio(parent); bio_put(bio); } @@ -896,11 +898,11 @@ struct submit_bio_ret { int error; }; -static void submit_bio_wait_endio(struct bio *bio, int error) +static void submit_bio_wait_endio(struct bio *bio) { struct submit_bio_ret *ret = bio->bi_private; - ret->error = error; + ret->error = bio->bi_error; complete(&ret->event); } @@ -1445,7 +1447,7 @@ void bio_unmap_user(struct bio *bio) } EXPORT_SYMBOL(bio_unmap_user); -static void bio_map_kern_endio(struct bio *bio, int err) +static void bio_map_kern_endio(struct bio *bio) { bio_put(bio); } @@ -1501,13 +1503,13 @@ struct bio *bio_map_kern(struct request_queue *q, void *data, unsigned int len, } EXPORT_SYMBOL(bio_map_kern); -static void bio_copy_kern_endio(struct bio *bio, int err) +static void bio_copy_kern_endio(struct bio *bio) { bio_free_pages(bio); bio_put(bio); } -static void bio_copy_kern_endio_read(struct bio *bio, int err) +static void bio_copy_kern_endio_read(struct bio *bio) { char *p = bio->bi_private; struct bio_vec *bvec; @@ -1518,7 +1520,7 @@ static void bio_copy_kern_endio_read(struct bio *bio, int err) p += bvec->bv_len; } - bio_copy_kern_endio(bio, err); + bio_copy_kern_endio(bio); } /** @@ -1778,25 +1780,15 @@ static inline bool bio_remaining_done(struct bio *bio) /** * bio_endio - end I/O on a bio * @bio: bio - * @error: error, if any * * Description: - * bio_endio() will end I/O on the whole bio. bio_endio() is the - * preferred way to end I/O on a bio, it takes care of clearing - * BIO_UPTODATE on error. @error is 0 on success, and and one of the - * established -Exxxx (-EIO, for instance) error values in case - * something went wrong. No one should call bi_end_io() directly on a - * bio unless they own it and thus know that it has an end_io - * function. + * bio_endio() will end I/O on the whole bio. bio_endio() is the preferred + * way to end I/O on a bio. No one should call bi_end_io() directly on a + * bio unless they own it and thus know that it has an end_io function. **/ -void bio_endio(struct bio *bio, int error) +void bio_endio(struct bio *bio) { while (bio) { - if (error) - clear_bit(BIO_UPTODATE, &bio->bi_flags); - else if (!test_bit(BIO_UPTODATE, &bio->bi_flags)) - error = -EIO; - if (unlikely(!bio_remaining_done(bio))) break; @@ -1810,11 +1802,12 @@ void bio_endio(struct bio *bio, int error) */ if (bio->bi_end_io == bio_chain_endio) { struct bio *parent = bio->bi_private; + parent->bi_error = bio->bi_error; bio_put(bio); bio = parent; } else { if (bio->bi_end_io) - bio->bi_end_io(bio, error); + bio->bi_end_io(bio); bio = NULL; } } diff --git a/block/blk-core.c b/block/blk-core.c index 627ed0c593fb..7ef15b947b91 100644 --- a/block/blk-core.c +++ b/block/blk-core.c @@ -143,9 +143,7 @@ static void req_bio_endio(struct request *rq, struct bio *bio, unsigned int nbytes, int error) { if (error) - clear_bit(BIO_UPTODATE, &bio->bi_flags); - else if (!test_bit(BIO_UPTODATE, &bio->bi_flags)) - error = -EIO; + bio->bi_error = error; if (unlikely(rq->cmd_flags & REQ_QUIET)) set_bit(BIO_QUIET, &bio->bi_flags); @@ -154,7 +152,7 @@ static void req_bio_endio(struct request *rq, struct bio *bio, /* don't actually finish bio if it's part of flush sequence */ if (bio->bi_iter.bi_size == 0 && !(rq->cmd_flags & REQ_FLUSH_SEQ)) - bio_endio(bio, error); + bio_endio(bio); } void blk_dump_rq_flags(struct request *rq, char *msg) @@ -1620,7 +1618,8 @@ static void blk_queue_bio(struct request_queue *q, struct bio *bio) blk_queue_bounce(q, &bio); if (bio_integrity_enabled(bio) && bio_integrity_prep(bio)) { - bio_endio(bio, -EIO); + bio->bi_error = -EIO; + bio_endio(bio); return; } @@ -1673,7 +1672,8 @@ get_rq: */ req = get_request(q, rw_flags, bio, GFP_NOIO); if (IS_ERR(req)) { - bio_endio(bio, PTR_ERR(req)); /* @q is dead */ + bio->bi_error = PTR_ERR(req); + bio_endio(bio); goto out_unlock; } @@ -1896,7 +1896,8 @@ generic_make_request_checks(struct bio *bio) return true; end_io: - bio_endio(bio, err); + bio->bi_error = err; + bio_endio(bio); return false; } diff --git a/block/blk-lib.c b/block/blk-lib.c index 7688ee3f5d72..6dee17443f14 100644 --- a/block/blk-lib.c +++ b/block/blk-lib.c @@ -11,16 +11,16 @@ struct bio_batch { atomic_t done; - unsigned long flags; + int error; struct completion *wait; }; -static void bio_batch_end_io(struct bio *bio, int err) +static void bio_batch_end_io(struct bio *bio) { struct bio_batch *bb = bio->bi_private; - if (err && (err != -EOPNOTSUPP)) - clear_bit(BIO_UPTODATE, &bb->flags); + if (bio->bi_error && bio->bi_error != -EOPNOTSUPP) + bb->error = bio->bi_error; if (atomic_dec_and_test(&bb->done)) complete(bb->wait); bio_put(bio); @@ -78,7 +78,7 @@ int blkdev_issue_discard(struct block_device *bdev, sector_t sector, } atomic_set(&bb.done, 1); - bb.flags = 1 << BIO_UPTODATE; + bb.error = 0; bb.wait = &wait; blk_start_plug(&plug); @@ -134,9 +134,8 @@ int blkdev_issue_discard(struct block_device *bdev, sector_t sector, if (!atomic_dec_and_test(&bb.done)) wait_for_completion_io(&wait); - if (!test_bit(BIO_UPTODATE, &bb.flags)) - ret = -EIO; - + if (bb.error) + return bb.error; return ret; } EXPORT_SYMBOL(blkdev_issue_discard); @@ -172,7 +171,7 @@ int blkdev_issue_write_same(struct block_device *bdev, sector_t sector, return -EOPNOTSUPP; atomic_set(&bb.done, 1); - bb.flags = 1 << BIO_UPTODATE; + bb.error = 0; bb.wait = &wait; while (nr_sects) { @@ -208,9 +207,8 @@ int blkdev_issue_write_same(struct block_device *bdev, sector_t sector, if (!atomic_dec_and_test(&bb.done)) wait_for_completion_io(&wait); - if (!test_bit(BIO_UPTODATE, &bb.flags)) - ret = -ENOTSUPP; - + if (bb.error) + return bb.error; return ret; } EXPORT_SYMBOL(blkdev_issue_write_same); @@ -236,7 +234,7 @@ static int __blkdev_issue_zeroout(struct block_device *bdev, sector_t sector, DECLARE_COMPLETION_ONSTACK(wait); atomic_set(&bb.done, 1); - bb.flags = 1 << BIO_UPTODATE; + bb.error = 0; bb.wait = &wait; ret = 0; @@ -270,10 +268,8 @@ static int __blkdev_issue_zeroout(struct block_device *bdev, sector_t sector, if (!atomic_dec_and_test(&bb.done)) wait_for_completion_io(&wait); - if (!test_bit(BIO_UPTODATE, &bb.flags)) - /* One of bios in the batch was completed with error.*/ - ret = -EIO; - + if (bb.error) + return bb.error; return ret; } diff --git a/block/blk-map.c b/block/blk-map.c index da310a105429..5fe1c30bfba7 100644 --- a/block/blk-map.c +++ b/block/blk-map.c @@ -103,7 +103,7 @@ int blk_rq_map_user_iov(struct request_queue *q, struct request *rq, * normal IO completion path */ bio_get(bio); - bio_endio(bio, 0); + bio_endio(bio); __blk_rq_unmap_user(bio); return -EINVAL; } diff --git a/block/blk-mq.c b/block/blk-mq.c index 7d842db59699..94559025c5e6 100644 --- a/block/blk-mq.c +++ b/block/blk-mq.c @@ -1199,7 +1199,7 @@ static struct request *blk_mq_map_request(struct request_queue *q, struct blk_mq_alloc_data alloc_data; if (unlikely(blk_mq_queue_enter(q, GFP_KERNEL))) { - bio_endio(bio, -EIO); + bio_io_error(bio); return NULL; } @@ -1283,7 +1283,7 @@ static void blk_mq_make_request(struct request_queue *q, struct bio *bio) blk_queue_bounce(q, &bio); if (bio_integrity_enabled(bio) && bio_integrity_prep(bio)) { - bio_endio(bio, -EIO); + bio_io_error(bio); return; } @@ -1368,7 +1368,7 @@ static void blk_sq_make_request(struct request_queue *q, struct bio *bio) blk_queue_bounce(q, &bio); if (bio_integrity_enabled(bio) && bio_integrity_prep(bio)) { - bio_endio(bio, -EIO); + bio_io_error(bio); return; } diff --git a/block/bounce.c b/block/bounce.c index b17311227c12..f4db245b9f3a 100644 --- a/block/bounce.c +++ b/block/bounce.c @@ -123,7 +123,7 @@ static void copy_to_high_bio_irq(struct bio *to, struct bio *from) } } -static void bounce_end_io(struct bio *bio, mempool_t *pool, int err) +static void bounce_end_io(struct bio *bio, mempool_t *pool) { struct bio *bio_orig = bio->bi_private; struct bio_vec *bvec, *org_vec; @@ -141,39 +141,40 @@ static void bounce_end_io(struct bio *bio, mempool_t *pool, int err) mempool_free(bvec->bv_page, pool); } - bio_endio(bio_orig, err); + bio_orig->bi_error = bio->bi_error; + bio_endio(bio_orig); bio_put(bio); } -static void bounce_end_io_write(struct bio *bio, int err) +static void bounce_end_io_write(struct bio *bio) { - bounce_end_io(bio, page_pool, err); + bounce_end_io(bio, page_pool); } -static void bounce_end_io_write_isa(struct bio *bio, int err) +static void bounce_end_io_write_isa(struct bio *bio) { - bounce_end_io(bio, isa_page_pool, err); + bounce_end_io(bio, isa_page_pool); } -static void __bounce_end_io_read(struct bio *bio, mempool_t *pool, int err) +static void __bounce_end_io_read(struct bio *bio, mempool_t *pool) { struct bio *bio_orig = bio->bi_private; - if (test_bit(BIO_UPTODATE, &bio->bi_flags)) + if (!bio->bi_error) copy_to_high_bio_irq(bio_orig, bio); - bounce_end_io(bio, pool, err); + bounce_end_io(bio, pool); } -static void bounce_end_io_read(struct bio *bio, int err) +static void bounce_end_io_read(struct bio *bio) { - __bounce_end_io_read(bio, page_pool, err); + __bounce_end_io_read(bio, page_pool); } -static void bounce_end_io_read_isa(struct bio *bio, int err) +static void bounce_end_io_read_isa(struct bio *bio) { - __bounce_end_io_read(bio, isa_page_pool, err); + __bounce_end_io_read(bio, isa_page_pool); } #ifdef CONFIG_NEED_BOUNCE_POOL diff --git a/drivers/block/aoe/aoecmd.c b/drivers/block/aoe/aoecmd.c index 422b7d84f686..ad80c85e0857 100644 --- a/drivers/block/aoe/aoecmd.c +++ b/drivers/block/aoe/aoecmd.c @@ -1110,7 +1110,7 @@ aoe_end_request(struct aoedev *d, struct request *rq, int fastfail) d->ip.rq = NULL; do { bio = rq->bio; - bok = !fastfail && test_bit(BIO_UPTODATE, &bio->bi_flags); + bok = !fastfail && !bio->bi_error; } while (__blk_end_request(rq, bok ? 0 : -EIO, bio->bi_iter.bi_size)); /* cf. http://lkml.org/lkml/2006/10/31/28 */ @@ -1172,7 +1172,7 @@ ktiocomplete(struct frame *f) ahout->cmdstat, ahin->cmdstat, d->aoemajor, d->aoeminor); noskb: if (buf) - clear_bit(BIO_UPTODATE, &buf->bio->bi_flags); + buf->bio->bi_error = -EIO; goto out; } @@ -1185,7 +1185,7 @@ noskb: if (buf) "aoe: runt data size in read from", (long) d->aoemajor, d->aoeminor, skb->len, n); - clear_bit(BIO_UPTODATE, &buf->bio->bi_flags); + buf->bio->bi_error = -EIO; break; } if (n > f->iter.bi_size) { @@ -1193,7 +1193,7 @@ noskb: if (buf) "aoe: too-large data size in read from", (long) d->aoemajor, d->aoeminor, n, f->iter.bi_size); - clear_bit(BIO_UPTODATE, &buf->bio->bi_flags); + buf->bio->bi_error = -EIO; break; } bvcpy(skb, f->buf->bio, f->iter, n); @@ -1695,7 +1695,7 @@ aoe_failbuf(struct aoedev *d, struct buf *buf) if (buf == NULL) return; buf->iter.bi_size = 0; - clear_bit(BIO_UPTODATE, &buf->bio->bi_flags); + buf->bio->bi_error = -EIO; if (buf->nframesout == 0) aoe_end_buf(d, buf); } diff --git a/drivers/block/aoe/aoedev.c b/drivers/block/aoe/aoedev.c index e774c50b6842..ffd1947500c6 100644 --- a/drivers/block/aoe/aoedev.c +++ b/drivers/block/aoe/aoedev.c @@ -170,7 +170,7 @@ aoe_failip(struct aoedev *d) if (rq == NULL) return; while ((bio = d->ip.nxbio)) { - clear_bit(BIO_UPTODATE, &bio->bi_flags); + bio->bi_error = -EIO; d->ip.nxbio = bio->bi_next; n = (unsigned long) rq->special; rq->special = (void *) --n; diff --git a/drivers/block/brd.c b/drivers/block/brd.c index e573e470bd8a..f9ab74505e69 100644 --- a/drivers/block/brd.c +++ b/drivers/block/brd.c @@ -331,14 +331,12 @@ static void brd_make_request(struct request_queue *q, struct bio *bio) struct bio_vec bvec; sector_t sector; struct bvec_iter iter; - int err = -EIO; sector = bio->bi_iter.bi_sector; if (bio_end_sector(bio) > get_capacity(bdev->bd_disk)) - goto out; + goto io_error; if (unlikely(bio->bi_rw & REQ_DISCARD)) { - err = 0; discard_from_brd(brd, sector, bio->bi_iter.bi_size); goto out; } @@ -349,15 +347,20 @@ static void brd_make_request(struct request_queue *q, struct bio *bio) bio_for_each_segment(bvec, bio, iter) { unsigned int len = bvec.bv_len; + int err; + err = brd_do_bvec(brd, bvec.bv_page, len, bvec.bv_offset, rw, sector); if (err) - break; + goto io_error; sector += len >> SECTOR_SHIFT; } out: - bio_endio(bio, err); + bio_endio(bio); + return; +io_error: + bio_io_error(bio); } static int brd_rw_page(struct block_device *bdev, sector_t sector, diff --git a/drivers/block/drbd/drbd_actlog.c b/drivers/block/drbd/drbd_actlog.c index 1318e3217cb0..b3868e7a1ffd 100644 --- a/drivers/block/drbd/drbd_actlog.c +++ b/drivers/block/drbd/drbd_actlog.c @@ -175,11 +175,11 @@ static int _drbd_md_sync_page_io(struct drbd_device *device, atomic_inc(&device->md_io.in_use); /* drbd_md_put_buffer() is in the completion handler */ device->md_io.submit_jif = jiffies; if (drbd_insert_fault(device, (rw & WRITE) ? DRBD_FAULT_MD_WR : DRBD_FAULT_MD_RD)) - bio_endio(bio, -EIO); + bio_io_error(bio); else submit_bio(rw, bio); wait_until_done_or_force_detached(device, bdev, &device->md_io.done); - if (bio_flagged(bio, BIO_UPTODATE)) + if (!bio->bi_error) err = device->md_io.error; out: diff --git a/drivers/block/drbd/drbd_bitmap.c b/drivers/block/drbd/drbd_bitmap.c index 434c77dcc99e..e5e0f19ceda0 100644 --- a/drivers/block/drbd/drbd_bitmap.c +++ b/drivers/block/drbd/drbd_bitmap.c @@ -941,36 +941,27 @@ static void drbd_bm_aio_ctx_destroy(struct kref *kref) } /* bv_page may be a copy, or may be the original */ -static void drbd_bm_endio(struct bio *bio, int error) +static void drbd_bm_endio(struct bio *bio) { struct drbd_bm_aio_ctx *ctx = bio->bi_private; struct drbd_device *device = ctx->device; struct drbd_bitmap *b = device->bitmap; unsigned int idx = bm_page_to_idx(bio->bi_io_vec[0].bv_page); - int uptodate = bio_flagged(bio, BIO_UPTODATE); - - - /* strange behavior of some lower level drivers... - * fail the request by clearing the uptodate flag, - * but do not return any error?! - * do we want to WARN() on this? */ - if (!error && !uptodate) - error = -EIO; if ((ctx->flags & BM_AIO_COPY_PAGES) == 0 && !bm_test_page_unchanged(b->bm_pages[idx])) drbd_warn(device, "bitmap page idx %u changed during IO!\n", idx); - if (error) { + if (bio->bi_error) { /* ctx error will hold the completed-last non-zero error code, * in case error codes differ. */ - ctx->error = error; + ctx->error = bio->bi_error; bm_set_page_io_err(b->bm_pages[idx]); /* Not identical to on disk version of it. * Is BM_PAGE_IO_ERROR enough? */ if (__ratelimit(&drbd_ratelimit_state)) drbd_err(device, "IO ERROR %d on bitmap page idx %u\n", - error, idx); + bio->bi_error, idx); } else { bm_clear_page_io_err(b->bm_pages[idx]); dynamic_drbd_dbg(device, "bitmap page idx %u completed\n", idx); @@ -1031,7 +1022,7 @@ static void bm_page_io_async(struct drbd_bm_aio_ctx *ctx, int page_nr) __must_ho if (drbd_insert_fault(device, (rw & WRITE) ? DRBD_FAULT_MD_WR : DRBD_FAULT_MD_RD)) { bio->bi_rw |= rw; - bio_endio(bio, -EIO); + bio_io_error(bio); } else { submit_bio(rw, bio); /* this should not count as user activity and cause the diff --git a/drivers/block/drbd/drbd_int.h b/drivers/block/drbd/drbd_int.h index efd19c2da9c2..a08c4a9179f1 100644 --- a/drivers/block/drbd/drbd_int.h +++ b/drivers/block/drbd/drbd_int.h @@ -1481,9 +1481,9 @@ extern int drbd_khelper(struct drbd_device *device, char *cmd); /* drbd_worker.c */ /* bi_end_io handlers */ -extern void drbd_md_endio(struct bio *bio, int error); -extern void drbd_peer_request_endio(struct bio *bio, int error); -extern void drbd_request_endio(struct bio *bio, int error); +extern void drbd_md_endio(struct bio *bio); +extern void drbd_peer_request_endio(struct bio *bio); +extern void drbd_request_endio(struct bio *bio); extern int drbd_worker(struct drbd_thread *thi); enum drbd_ret_code drbd_resync_after_valid(struct drbd_device *device, int o_minor); void drbd_resync_after_changed(struct drbd_device *device); @@ -1604,12 +1604,13 @@ static inline void drbd_generic_make_request(struct drbd_device *device, __release(local); if (!bio->bi_bdev) { drbd_err(device, "drbd_generic_make_request: bio->bi_bdev == NULL\n"); - bio_endio(bio, -ENODEV); + bio->bi_error = -ENODEV; + bio_endio(bio); return; } if (drbd_insert_fault(device, fault_type)) - bio_endio(bio, -EIO); + bio_io_error(bio); else generic_make_request(bio); } diff --git a/drivers/block/drbd/drbd_req.c b/drivers/block/drbd/drbd_req.c index 3907202fb9d9..9cb41166366e 100644 --- a/drivers/block/drbd/drbd_req.c +++ b/drivers/block/drbd/drbd_req.c @@ -201,7 +201,8 @@ void start_new_tl_epoch(struct drbd_connection *connection) void complete_master_bio(struct drbd_device *device, struct bio_and_error *m) { - bio_endio(m->bio, m->error); + m->bio->bi_error = m->error; + bio_endio(m->bio); dec_ap_bio(device); } @@ -1153,12 +1154,12 @@ drbd_submit_req_private_bio(struct drbd_request *req) rw == WRITE ? DRBD_FAULT_DT_WR : rw == READ ? DRBD_FAULT_DT_RD : DRBD_FAULT_DT_RA)) - bio_endio(bio, -EIO); + bio_io_error(bio); else generic_make_request(bio); put_ldev(device); } else - bio_endio(bio, -EIO); + bio_io_error(bio); } static void drbd_queue_write(struct drbd_device *device, struct drbd_request *req) @@ -1191,7 +1192,8 @@ drbd_request_prepare(struct drbd_device *device, struct bio *bio, unsigned long /* only pass the error to the upper layers. * if user cannot handle io errors, that's not our business. */ drbd_err(device, "could not kmalloc() req\n"); - bio_endio(bio, -ENOMEM); + bio->bi_error = -ENOMEM; + bio_endio(bio); return ERR_PTR(-ENOMEM); } req->start_jif = start_jif; diff --git a/drivers/block/drbd/drbd_worker.c b/drivers/block/drbd/drbd_worker.c index d0fae55d871d..5578c1477ba6 100644 --- a/drivers/block/drbd/drbd_worker.c +++ b/drivers/block/drbd/drbd_worker.c @@ -65,12 +65,12 @@ rwlock_t global_state_lock; /* used for synchronous meta data and bitmap IO * submitted by drbd_md_sync_page_io() */ -void drbd_md_endio(struct bio *bio, int error) +void drbd_md_endio(struct bio *bio) { struct drbd_device *device; device = bio->bi_private; - device->md_io.error = error; + device->md_io.error = bio->bi_error; /* We grabbed an extra reference in _drbd_md_sync_page_io() to be able * to timeout on the lower level device, and eventually detach from it. @@ -170,31 +170,20 @@ void drbd_endio_write_sec_final(struct drbd_peer_request *peer_req) __releases(l /* writes on behalf of the partner, or resync writes, * "submitted" by the receiver. */ -void drbd_peer_request_endio(struct bio *bio, int error) +void drbd_peer_request_endio(struct bio *bio) { struct drbd_peer_request *peer_req = bio->bi_private; struct drbd_device *device = peer_req->peer_device->device; - int uptodate = bio_flagged(bio, BIO_UPTODATE); int is_write = bio_data_dir(bio) == WRITE; int is_discard = !!(bio->bi_rw & REQ_DISCARD); - if (error && __ratelimit(&drbd_ratelimit_state)) + if (bio->bi_error && __ratelimit(&drbd_ratelimit_state)) drbd_warn(device, "%s: error=%d s=%llus\n", is_write ? (is_discard ? "discard" : "write") - : "read", error, + : "read", bio->bi_error, (unsigned long long)peer_req->i.sector); - if (!error && !uptodate) { - if (__ratelimit(&drbd_ratelimit_state)) - drbd_warn(device, "%s: setting error to -EIO s=%llus\n", - is_write ? "write" : "read", - (unsigned long long)peer_req->i.sector); - /* strange behavior of some lower level drivers... - * fail the request by clearing the uptodate flag, - * but do not return any error?! */ - error = -EIO; - } - if (error) + if (bio->bi_error) set_bit(__EE_WAS_ERROR, &peer_req->flags); bio_put(bio); /* no need for the bio anymore */ @@ -208,24 +197,13 @@ void drbd_peer_request_endio(struct bio *bio, int error) /* read, readA or write requests on R_PRIMARY coming from drbd_make_request */ -void drbd_request_endio(struct bio *bio, int error) +void drbd_request_endio(struct bio *bio) { unsigned long flags; struct drbd_request *req = bio->bi_private; struct drbd_device *device = req->device; struct bio_and_error m; enum drbd_req_event what; - int uptodate = bio_flagged(bio, BIO_UPTODATE); - - if (!error && !uptodate) { - drbd_warn(device, "p %s: setting error to -EIO\n", - bio_data_dir(bio) == WRITE ? "write" : "read"); - /* strange behavior of some lower level drivers... - * fail the request by clearing the uptodate flag, - * but do not return any error?! */ - error = -EIO; - } - /* If this request was aborted locally before, * but now was completed "successfully", @@ -259,14 +237,14 @@ void drbd_request_endio(struct bio *bio, int error) if (__ratelimit(&drbd_ratelimit_state)) drbd_emerg(device, "delayed completion of aborted local request; disk-timeout may be too aggressive\n"); - if (!error) + if (!bio->bi_error) panic("possible random memory corruption caused by delayed completion of aborted local request\n"); } /* to avoid recursion in __req_mod */ - if (unlikely(error)) { + if (unlikely(bio->bi_error)) { if (bio->bi_rw & REQ_DISCARD) - what = (error == -EOPNOTSUPP) + what = (bio->bi_error == -EOPNOTSUPP) ? DISCARD_COMPLETED_NOTSUPP : DISCARD_COMPLETED_WITH_ERROR; else @@ -279,7 +257,7 @@ void drbd_request_endio(struct bio *bio, int error) what = COMPLETED_OK; bio_put(req->private_bio); - req->private_bio = ERR_PTR(error); + req->private_bio = ERR_PTR(bio->bi_error); /* not req_mod(), we need irqsave here! */ spin_lock_irqsave(&device->resource->req_lock, flags); diff --git a/drivers/block/floppy.c b/drivers/block/floppy.c index a08cda955285..331363e7de0f 100644 --- a/drivers/block/floppy.c +++ b/drivers/block/floppy.c @@ -3771,13 +3771,14 @@ struct rb0_cbdata { struct completion complete; }; -static void floppy_rb0_cb(struct bio *bio, int err) +static void floppy_rb0_cb(struct bio *bio) { struct rb0_cbdata *cbdata = (struct rb0_cbdata *)bio->bi_private; int drive = cbdata->drive; - if (err) { - pr_info("floppy: error %d while reading block 0\n", err); + if (bio->bi_error) { + pr_info("floppy: error %d while reading block 0\n", + bio->bi_error); set_bit(FD_OPEN_SHOULD_FAIL_BIT, &UDRS->flags); } complete(&cbdata->complete); diff --git a/drivers/block/null_blk.c b/drivers/block/null_blk.c index 69de41a87b74..016a59afcf24 100644 --- a/drivers/block/null_blk.c +++ b/drivers/block/null_blk.c @@ -222,7 +222,7 @@ static void end_cmd(struct nullb_cmd *cmd) blk_end_request_all(cmd->rq, 0); break; case NULL_Q_BIO: - bio_endio(cmd->bio, 0); + bio_endio(cmd->bio); break; } diff --git a/drivers/block/pktcdvd.c b/drivers/block/pktcdvd.c index 4c20c228184c..a7a259e031da 100644 --- a/drivers/block/pktcdvd.c +++ b/drivers/block/pktcdvd.c @@ -977,7 +977,7 @@ static void pkt_make_local_copy(struct packet_data *pkt, struct bio_vec *bvec) } } -static void pkt_end_io_read(struct bio *bio, int err) +static void pkt_end_io_read(struct bio *bio) { struct packet_data *pkt = bio->bi_private; struct pktcdvd_device *pd = pkt->pd; @@ -985,9 +985,9 @@ static void pkt_end_io_read(struct bio *bio, int err) pkt_dbg(2, pd, "bio=%p sec0=%llx sec=%llx err=%d\n", bio, (unsigned long long)pkt->sector, - (unsigned long long)bio->bi_iter.bi_sector, err); + (unsigned long long)bio->bi_iter.bi_sector, bio->bi_error); - if (err) + if (bio->bi_error) atomic_inc(&pkt->io_errors); if (atomic_dec_and_test(&pkt->io_wait)) { atomic_inc(&pkt->run_sm); @@ -996,13 +996,13 @@ static void pkt_end_io_read(struct bio *bio, int err) pkt_bio_finished(pd); } -static void pkt_end_io_packet_write(struct bio *bio, int err) +static void pkt_end_io_packet_write(struct bio *bio) { struct packet_data *pkt = bio->bi_private; struct pktcdvd_device *pd = pkt->pd; BUG_ON(!pd); - pkt_dbg(2, pd, "id=%d, err=%d\n", pkt->id, err); + pkt_dbg(2, pd, "id=%d, err=%d\n", pkt->id, bio->bi_error); pd->stats.pkt_ended++; @@ -1340,22 +1340,22 @@ static void pkt_start_write(struct pktcdvd_device *pd, struct packet_data *pkt) pkt_queue_bio(pd, pkt->w_bio); } -static void pkt_finish_packet(struct packet_data *pkt, int uptodate) +static void pkt_finish_packet(struct packet_data *pkt, int error) { struct bio *bio; - if (!uptodate) + if (error) pkt->cache_valid = 0; /* Finish all bios corresponding to this packet */ - while ((bio = bio_list_pop(&pkt->orig_bios))) - bio_endio(bio, uptodate ? 0 : -EIO); + while ((bio = bio_list_pop(&pkt->orig_bios))) { + bio->bi_error = error; + bio_endio(bio); + } } static void pkt_run_state_machine(struct pktcdvd_device *pd, struct packet_data *pkt) { - int uptodate; - pkt_dbg(2, pd, "pkt %d\n", pkt->id); for (;;) { @@ -1384,7 +1384,7 @@ static void pkt_run_state_machine(struct pktcdvd_device *pd, struct packet_data if (atomic_read(&pkt->io_wait) > 0) return; - if (test_bit(BIO_UPTODATE, &pkt->w_bio->bi_flags)) { + if (!pkt->w_bio->bi_error) { pkt_set_state(pkt, PACKET_FINISHED_STATE); } else { pkt_set_state(pkt, PACKET_RECOVERY_STATE); @@ -1401,8 +1401,7 @@ static void pkt_run_state_machine(struct pktcdvd_device *pd, struct packet_data break; case PACKET_FINISHED_STATE: - uptodate = test_bit(BIO_UPTODATE, &pkt->w_bio->bi_flags); - pkt_finish_packet(pkt, uptodate); + pkt_finish_packet(pkt, pkt->w_bio->bi_error); return; default: @@ -2332,13 +2331,14 @@ static void pkt_close(struct gendisk *disk, fmode_t mode) } -static void pkt_end_io_read_cloned(struct bio *bio, int err) +static void pkt_end_io_read_cloned(struct bio *bio) { struct packet_stacked_data *psd = bio->bi_private; struct pktcdvd_device *pd = psd->pd; + psd->bio->bi_error = bio->bi_error; bio_put(bio); - bio_endio(psd->bio, err); + bio_endio(psd->bio); mempool_free(psd, psd_pool); pkt_bio_finished(pd); } diff --git a/drivers/block/ps3vram.c b/drivers/block/ps3vram.c index b1612eb16172..49b4706b162c 100644 --- a/drivers/block/ps3vram.c +++ b/drivers/block/ps3vram.c @@ -593,7 +593,8 @@ out: next = bio_list_peek(&priv->list); spin_unlock_irq(&priv->lock); - bio_endio(bio, error); + bio->bi_error = error; + bio_endio(bio); return next; } diff --git a/drivers/block/rsxx/dev.c b/drivers/block/rsxx/dev.c index ac8c62cb4875..63b9d2ffa8ee 100644 --- a/drivers/block/rsxx/dev.c +++ b/drivers/block/rsxx/dev.c @@ -137,7 +137,10 @@ static void bio_dma_done_cb(struct rsxx_cardinfo *card, if (!card->eeh_state && card->gendisk) disk_stats_complete(card, meta->bio, meta->start_time); - bio_endio(meta->bio, atomic_read(&meta->error) ? -EIO : 0); + if (atomic_read(&meta->error)) + bio_io_error(meta->bio); + else + bio_endio(meta->bio); kmem_cache_free(bio_meta_pool, meta); } } @@ -199,7 +202,9 @@ static void rsxx_make_request(struct request_queue *q, struct bio *bio) queue_err: kmem_cache_free(bio_meta_pool, bio_meta); req_err: - bio_endio(bio, st); + if (st) + bio->bi_error = st; + bio_endio(bio); } /*----------------- Device Setup -------------------*/ diff --git a/drivers/block/umem.c b/drivers/block/umem.c index 4cf81b5bf0f7..3b3afd2ec5d6 100644 --- a/drivers/block/umem.c +++ b/drivers/block/umem.c @@ -456,7 +456,7 @@ static void process_page(unsigned long data) PCI_DMA_TODEVICE : PCI_DMA_FROMDEVICE); if (control & DMASCR_HARD_ERROR) { /* error */ - clear_bit(BIO_UPTODATE, &bio->bi_flags); + bio->bi_error = -EIO; dev_printk(KERN_WARNING, &card->dev->dev, "I/O error on sector %d/%d\n", le32_to_cpu(desc->local_addr)>>9, @@ -505,7 +505,7 @@ static void process_page(unsigned long data) return_bio = bio->bi_next; bio->bi_next = NULL; - bio_endio(bio, 0); + bio_endio(bio); } } diff --git a/drivers/block/xen-blkback/blkback.c b/drivers/block/xen-blkback/blkback.c index ced96777b677..662648e08596 100644 --- a/drivers/block/xen-blkback/blkback.c +++ b/drivers/block/xen-blkback/blkback.c @@ -1078,9 +1078,9 @@ static void __end_block_io_op(struct pending_req *pending_req, int error) /* * bio callback. */ -static void end_block_io_op(struct bio *bio, int error) +static void end_block_io_op(struct bio *bio) { - __end_block_io_op(bio->bi_private, error); + __end_block_io_op(bio->bi_private, bio->bi_error); bio_put(bio); } diff --git a/drivers/block/xen-blkfront.c b/drivers/block/xen-blkfront.c index 6d89ed35d80c..d542db7a6c73 100644 --- a/drivers/block/xen-blkfront.c +++ b/drivers/block/xen-blkfront.c @@ -82,7 +82,6 @@ struct blk_shadow { struct split_bio { struct bio *bio; atomic_t pending; - int err; }; static DEFINE_MUTEX(blkfront_mutex); @@ -1478,16 +1477,14 @@ static int blkfront_probe(struct xenbus_device *dev, return 0; } -static void split_bio_end(struct bio *bio, int error) +static void split_bio_end(struct bio *bio) { struct split_bio *split_bio = bio->bi_private; - if (error) - split_bio->err = error; - if (atomic_dec_and_test(&split_bio->pending)) { split_bio->bio->bi_phys_segments = 0; - bio_endio(split_bio->bio, split_bio->err); + split_bio->bio->bi_error = bio->bi_error; + bio_endio(split_bio->bio); kfree(split_bio); } bio_put(bio); diff --git a/drivers/block/zram/zram_drv.c b/drivers/block/zram/zram_drv.c index f439ad2800da..68c3d4800464 100644 --- a/drivers/block/zram/zram_drv.c +++ b/drivers/block/zram/zram_drv.c @@ -850,7 +850,7 @@ static void __zram_make_request(struct zram *zram, struct bio *bio) if (unlikely(bio->bi_rw & REQ_DISCARD)) { zram_bio_discard(zram, index, offset, bio); - bio_endio(bio, 0); + bio_endio(bio); return; } @@ -883,8 +883,7 @@ static void __zram_make_request(struct zram *zram, struct bio *bio) update_position(&index, &offset, &bvec); } - set_bit(BIO_UPTODATE, &bio->bi_flags); - bio_endio(bio, 0); + bio_endio(bio); return; out: diff --git a/drivers/md/bcache/btree.c b/drivers/md/bcache/btree.c index 00cde40db572..83392f856dfd 100644 --- a/drivers/md/bcache/btree.c +++ b/drivers/md/bcache/btree.c @@ -278,7 +278,7 @@ err: goto out; } -static void btree_node_read_endio(struct bio *bio, int error) +static void btree_node_read_endio(struct bio *bio) { struct closure *cl = bio->bi_private; closure_put(cl); @@ -305,7 +305,7 @@ static void bch_btree_node_read(struct btree *b) bch_submit_bbio(bio, b->c, &b->key, 0); closure_sync(&cl); - if (!test_bit(BIO_UPTODATE, &bio->bi_flags)) + if (bio->bi_error) set_btree_node_io_error(b); bch_bbio_free(bio, b->c); @@ -371,15 +371,15 @@ static void btree_node_write_done(struct closure *cl) __btree_node_write_done(cl); } -static void btree_node_write_endio(struct bio *bio, int error) +static void btree_node_write_endio(struct bio *bio) { struct closure *cl = bio->bi_private; struct btree *b = container_of(cl, struct btree, io); - if (error) + if (bio->bi_error) set_btree_node_io_error(b); - bch_bbio_count_io_errors(b->c, bio, error, "writing btree"); + bch_bbio_count_io_errors(b->c, bio, bio->bi_error, "writing btree"); closure_put(cl); } diff --git a/drivers/md/bcache/closure.h b/drivers/md/bcache/closure.h index 79a6d63e8ed3..782cc2c8a185 100644 --- a/drivers/md/bcache/closure.h +++ b/drivers/md/bcache/closure.h @@ -38,7 +38,7 @@ * they are running owned by the thread that is running them. Otherwise, suppose * you submit some bios and wish to have a function run when they all complete: * - * foo_endio(struct bio *bio, int error) + * foo_endio(struct bio *bio) * { * closure_put(cl); * } diff --git a/drivers/md/bcache/io.c b/drivers/md/bcache/io.c index bf6a9ca18403..9440df94bc83 100644 --- a/drivers/md/bcache/io.c +++ b/drivers/md/bcache/io.c @@ -55,19 +55,19 @@ static void bch_bio_submit_split_done(struct closure *cl) s->bio->bi_end_io = s->bi_end_io; s->bio->bi_private = s->bi_private; - bio_endio(s->bio, 0); + bio_endio(s->bio); closure_debug_destroy(&s->cl); mempool_free(s, s->p->bio_split_hook); } -static void bch_bio_submit_split_endio(struct bio *bio, int error) +static void bch_bio_submit_split_endio(struct bio *bio) { struct closure *cl = bio->bi_private; struct bio_split_hook *s = container_of(cl, struct bio_split_hook, cl); - if (error) - clear_bit(BIO_UPTODATE, &s->bio->bi_flags); + if (bio->bi_error) + s->bio->bi_error = bio->bi_error; bio_put(bio); closure_put(cl); diff --git a/drivers/md/bcache/journal.c b/drivers/md/bcache/journal.c index 418607a6ba33..d6a4e16030a6 100644 --- a/drivers/md/bcache/journal.c +++ b/drivers/md/bcache/journal.c @@ -24,7 +24,7 @@ * bit. */ -static void journal_read_endio(struct bio *bio, int error) +static void journal_read_endio(struct bio *bio) { struct closure *cl = bio->bi_private; closure_put(cl); @@ -401,7 +401,7 @@ retry: #define last_seq(j) ((j)->seq - fifo_used(&(j)->pin) + 1) -static void journal_discard_endio(struct bio *bio, int error) +static void journal_discard_endio(struct bio *bio) { struct journal_device *ja = container_of(bio, struct journal_device, discard_bio); @@ -547,11 +547,11 @@ void bch_journal_next(struct journal *j) pr_debug("journal_pin full (%zu)", fifo_used(&j->pin)); } -static void journal_write_endio(struct bio *bio, int error) +static void journal_write_endio(struct bio *bio) { struct journal_write *w = bio->bi_private; - cache_set_err_on(error, w->c, "journal io error"); + cache_set_err_on(bio->bi_error, w->c, "journal io error"); closure_put(&w->c->journal.io); } diff --git a/drivers/md/bcache/movinggc.c b/drivers/md/bcache/movinggc.c index cd7490311e51..b929fc944e9c 100644 --- a/drivers/md/bcache/movinggc.c +++ b/drivers/md/bcache/movinggc.c @@ -60,20 +60,20 @@ static void write_moving_finish(struct closure *cl) closure_return_with_destructor(cl, moving_io_destructor); } -static void read_moving_endio(struct bio *bio, int error) +static void read_moving_endio(struct bio *bio) { struct bbio *b = container_of(bio, struct bbio, bio); struct moving_io *io = container_of(bio->bi_private, struct moving_io, cl); - if (error) - io->op.error = error; + if (bio->bi_error) + io->op.error = bio->bi_error; else if (!KEY_DIRTY(&b->key) && ptr_stale(io->op.c, &b->key, 0)) { io->op.error = -EINTR; } - bch_bbio_endio(io->op.c, bio, error, "reading data to move"); + bch_bbio_endio(io->op.c, bio, bio->bi_error, "reading data to move"); } static void moving_init(struct moving_io *io) diff --git a/drivers/md/bcache/request.c b/drivers/md/bcache/request.c index f292790997d7..a09b9462ff49 100644 --- a/drivers/md/bcache/request.c +++ b/drivers/md/bcache/request.c @@ -173,22 +173,22 @@ static void bch_data_insert_error(struct closure *cl) bch_data_insert_keys(cl); } -static void bch_data_insert_endio(struct bio *bio, int error) +static void bch_data_insert_endio(struct bio *bio) { struct closure *cl = bio->bi_private; struct data_insert_op *op = container_of(cl, struct data_insert_op, cl); - if (error) { + if (bio->bi_error) { /* TODO: We could try to recover from this. */ if (op->writeback) - op->error = error; + op->error = bio->bi_error; else if (!op->replace) set_closure_fn(cl, bch_data_insert_error, op->wq); else set_closure_fn(cl, NULL, NULL); } - bch_bbio_endio(op->c, bio, error, "writing data to cache"); + bch_bbio_endio(op->c, bio, bio->bi_error, "writing data to cache"); } static void bch_data_insert_start(struct closure *cl) @@ -477,7 +477,7 @@ struct search { struct data_insert_op iop; }; -static void bch_cache_read_endio(struct bio *bio, int error) +static void bch_cache_read_endio(struct bio *bio) { struct bbio *b = container_of(bio, struct bbio, bio); struct closure *cl = bio->bi_private; @@ -490,15 +490,15 @@ static void bch_cache_read_endio(struct bio *bio, int error) * from the backing device. */ - if (error) - s->iop.error = error; + if (bio->bi_error) + s->iop.error = bio->bi_error; else if (!KEY_DIRTY(&b->key) && ptr_stale(s->iop.c, &b->key, 0)) { atomic_long_inc(&s->iop.c->cache_read_races); s->iop.error = -EINTR; } - bch_bbio_endio(s->iop.c, bio, error, "reading from cache"); + bch_bbio_endio(s->iop.c, bio, bio->bi_error, "reading from cache"); } /* @@ -591,13 +591,13 @@ static void cache_lookup(struct closure *cl) /* Common code for the make_request functions */ -static void request_endio(struct bio *bio, int error) +static void request_endio(struct bio *bio) { struct closure *cl = bio->bi_private; - if (error) { + if (bio->bi_error) { struct search *s = container_of(cl, struct search, cl); - s->iop.error = error; + s->iop.error = bio->bi_error; /* Only cache read errors are recoverable */ s->recoverable = false; } @@ -613,7 +613,8 @@ static void bio_complete(struct search *s) &s->d->disk->part0, s->start_time); trace_bcache_request_end(s->d, s->orig_bio); - bio_endio(s->orig_bio, s->iop.error); + s->orig_bio->bi_error = s->iop.error; + bio_endio(s->orig_bio); s->orig_bio = NULL; } } @@ -992,7 +993,7 @@ static void cached_dev_make_request(struct request_queue *q, struct bio *bio) } else { if ((bio->bi_rw & REQ_DISCARD) && !blk_queue_discard(bdev_get_queue(dc->bdev))) - bio_endio(bio, 0); + bio_endio(bio); else bch_generic_make_request(bio, &d->bio_split_hook); } diff --git a/drivers/md/bcache/super.c b/drivers/md/bcache/super.c index fc8e545ced18..be01fd3c87f1 100644 --- a/drivers/md/bcache/super.c +++ b/drivers/md/bcache/super.c @@ -221,7 +221,7 @@ err: return err; } -static void write_bdev_super_endio(struct bio *bio, int error) +static void write_bdev_super_endio(struct bio *bio) { struct cached_dev *dc = bio->bi_private; /* XXX: error checking */ @@ -290,11 +290,11 @@ void bch_write_bdev_super(struct cached_dev *dc, struct closure *parent) closure_return_with_destructor(cl, bch_write_bdev_super_unlock); } -static void write_super_endio(struct bio *bio, int error) +static void write_super_endio(struct bio *bio) { struct cache *ca = bio->bi_private; - bch_count_io_errors(ca, error, "writing superblock"); + bch_count_io_errors(ca, bio->bi_error, "writing superblock"); closure_put(&ca->set->sb_write); } @@ -339,12 +339,12 @@ void bcache_write_super(struct cache_set *c) /* UUID io */ -static void uuid_endio(struct bio *bio, int error) +static void uuid_endio(struct bio *bio) { struct closure *cl = bio->bi_private; struct cache_set *c = container_of(cl, struct cache_set, uuid_write); - cache_set_err_on(error, c, "accessing uuids"); + cache_set_err_on(bio->bi_error, c, "accessing uuids"); bch_bbio_free(bio, c); closure_put(cl); } @@ -512,11 +512,11 @@ static struct uuid_entry *uuid_find_empty(struct cache_set *c) * disk. */ -static void prio_endio(struct bio *bio, int error) +static void prio_endio(struct bio *bio) { struct cache *ca = bio->bi_private; - cache_set_err_on(error, ca->set, "accessing priorities"); + cache_set_err_on(bio->bi_error, ca->set, "accessing priorities"); bch_bbio_free(bio, ca->set); closure_put(&ca->prio); } diff --git a/drivers/md/bcache/writeback.c b/drivers/md/bcache/writeback.c index f1986bcd1bf0..b4fc874c30fd 100644 --- a/drivers/md/bcache/writeback.c +++ b/drivers/md/bcache/writeback.c @@ -166,12 +166,12 @@ static void write_dirty_finish(struct closure *cl) closure_return_with_destructor(cl, dirty_io_destructor); } -static void dirty_endio(struct bio *bio, int error) +static void dirty_endio(struct bio *bio) { struct keybuf_key *w = bio->bi_private; struct dirty_io *io = w->private; - if (error) + if (bio->bi_error) SET_KEY_DIRTY(&w->key, false); closure_put(&io->cl); @@ -193,15 +193,15 @@ static void write_dirty(struct closure *cl) continue_at(cl, write_dirty_finish, system_wq); } -static void read_dirty_endio(struct bio *bio, int error) +static void read_dirty_endio(struct bio *bio) { struct keybuf_key *w = bio->bi_private; struct dirty_io *io = w->private; bch_count_io_errors(PTR_CACHE(io->dc->disk.c, &w->key, 0), - error, "reading dirty data from cache"); + bio->bi_error, "reading dirty data from cache"); - dirty_endio(bio, error); + dirty_endio(bio); } static void read_dirty_submit(struct closure *cl) diff --git a/drivers/md/dm-bio-prison.c b/drivers/md/dm-bio-prison.c index cd6d1d21e057..03af174485d3 100644 --- a/drivers/md/dm-bio-prison.c +++ b/drivers/md/dm-bio-prison.c @@ -236,8 +236,10 @@ void dm_cell_error(struct dm_bio_prison *prison, bio_list_init(&bios); dm_cell_release(prison, cell, &bios); - while ((bio = bio_list_pop(&bios))) - bio_endio(bio, error); + while ((bio = bio_list_pop(&bios))) { + bio->bi_error = error; + bio_endio(bio); + } } EXPORT_SYMBOL_GPL(dm_cell_error); diff --git a/drivers/md/dm-bufio.c b/drivers/md/dm-bufio.c index 86dbbc737402..83cc52eaf56d 100644 --- a/drivers/md/dm-bufio.c +++ b/drivers/md/dm-bufio.c @@ -545,7 +545,8 @@ static void dmio_complete(unsigned long error, void *context) { struct dm_buffer *b = context; - b->bio.bi_end_io(&b->bio, error ? -EIO : 0); + b->bio.bi_error = error ? -EIO : 0; + b->bio.bi_end_io(&b->bio); } static void use_dmio(struct dm_buffer *b, int rw, sector_t block, @@ -575,13 +576,16 @@ static void use_dmio(struct dm_buffer *b, int rw, sector_t block, b->bio.bi_end_io = end_io; r = dm_io(&io_req, 1, ®ion, NULL); - if (r) - end_io(&b->bio, r); + if (r) { + b->bio.bi_error = r; + end_io(&b->bio); + } } -static void inline_endio(struct bio *bio, int error) +static void inline_endio(struct bio *bio) { bio_end_io_t *end_fn = bio->bi_private; + int error = bio->bi_error; /* * Reset the bio to free any attached resources @@ -589,7 +593,8 @@ static void inline_endio(struct bio *bio, int error) */ bio_reset(bio); - end_fn(bio, error); + bio->bi_error = error; + end_fn(bio); } static void use_inline_bio(struct dm_buffer *b, int rw, sector_t block, @@ -661,13 +666,14 @@ static void submit_io(struct dm_buffer *b, int rw, sector_t block, * Set the error, clear B_WRITING bit and wake anyone who was waiting on * it. */ -static void write_endio(struct bio *bio, int error) +static void write_endio(struct bio *bio) { struct dm_buffer *b = container_of(bio, struct dm_buffer, bio); - b->write_error = error; - if (unlikely(error)) { + b->write_error = bio->bi_error; + if (unlikely(bio->bi_error)) { struct dm_bufio_client *c = b->c; + int error = bio->bi_error; (void)cmpxchg(&c->async_write_error, 0, error); } @@ -1026,11 +1032,11 @@ found_buffer: * The endio routine for reading: set the error, clear the bit and wake up * anyone waiting on the buffer. */ -static void read_endio(struct bio *bio, int error) +static void read_endio(struct bio *bio) { struct dm_buffer *b = container_of(bio, struct dm_buffer, bio); - b->read_error = error; + b->read_error = bio->bi_error; BUG_ON(!test_bit(B_READING, &b->state)); diff --git a/drivers/md/dm-cache-target.c b/drivers/md/dm-cache-target.c index 1b4e1756b169..04d0dadc48b1 100644 --- a/drivers/md/dm-cache-target.c +++ b/drivers/md/dm-cache-target.c @@ -919,14 +919,14 @@ static void defer_writethrough_bio(struct cache *cache, struct bio *bio) wake_worker(cache); } -static void writethrough_endio(struct bio *bio, int err) +static void writethrough_endio(struct bio *bio) { struct per_bio_data *pb = get_per_bio_data(bio, PB_DATA_SIZE_WT); dm_unhook_bio(&pb->hook_info, bio); - if (err) { - bio_endio(bio, err); + if (bio->bi_error) { + bio_endio(bio); return; } @@ -1231,7 +1231,7 @@ static void migration_success_post_commit(struct dm_cache_migration *mg) * The block was promoted via an overwrite, so it's dirty. */ set_dirty(cache, mg->new_oblock, mg->cblock); - bio_endio(mg->new_ocell->holder, 0); + bio_endio(mg->new_ocell->holder); cell_defer(cache, mg->new_ocell, false); } free_io_migration(mg); @@ -1284,7 +1284,7 @@ static void issue_copy(struct dm_cache_migration *mg) } } -static void overwrite_endio(struct bio *bio, int err) +static void overwrite_endio(struct bio *bio) { struct dm_cache_migration *mg = bio->bi_private; struct cache *cache = mg->cache; @@ -1294,7 +1294,7 @@ static void overwrite_endio(struct bio *bio, int err) dm_unhook_bio(&pb->hook_info, bio); - if (err) + if (bio->bi_error) mg->err = true; mg->requeue_holder = false; @@ -1358,7 +1358,7 @@ static void issue_discard(struct dm_cache_migration *mg) b = to_dblock(from_dblock(b) + 1); } - bio_endio(bio, 0); + bio_endio(bio); cell_defer(mg->cache, mg->new_ocell, false); free_migration(mg); } @@ -1631,7 +1631,7 @@ static void process_discard_bio(struct cache *cache, struct prealloc *structs, calc_discard_block_range(cache, bio, &b, &e); if (b == e) { - bio_endio(bio, 0); + bio_endio(bio); return; } @@ -2213,8 +2213,10 @@ static void requeue_deferred_bios(struct cache *cache) bio_list_merge(&bios, &cache->deferred_bios); bio_list_init(&cache->deferred_bios); - while ((bio = bio_list_pop(&bios))) - bio_endio(bio, DM_ENDIO_REQUEUE); + while ((bio = bio_list_pop(&bios))) { + bio->bi_error = DM_ENDIO_REQUEUE; + bio_endio(bio); + } } static int more_work(struct cache *cache) @@ -3119,7 +3121,7 @@ static int cache_map(struct dm_target *ti, struct bio *bio) * This is a duplicate writethrough io that is no * longer needed because the block has been demoted. */ - bio_endio(bio, 0); + bio_endio(bio); // FIXME: remap everything as a miss cell_defer(cache, cell, false); r = DM_MAPIO_SUBMITTED; diff --git a/drivers/md/dm-crypt.c b/drivers/md/dm-crypt.c index 0f48fed44a17..744b80c608e5 100644 --- a/drivers/md/dm-crypt.c +++ b/drivers/md/dm-crypt.c @@ -1076,7 +1076,8 @@ static void crypt_dec_pending(struct dm_crypt_io *io) if (io->ctx.req) crypt_free_req(cc, io->ctx.req, base_bio); - bio_endio(base_bio, error); + base_bio->bi_error = error; + bio_endio(base_bio); } /* @@ -1096,15 +1097,12 @@ static void crypt_dec_pending(struct dm_crypt_io *io) * The work is done per CPU global for all dm-crypt instances. * They should not depend on each other and do not block. */ -static void crypt_endio(struct bio *clone, int error) +static void crypt_endio(struct bio *clone) { struct dm_crypt_io *io = clone->bi_private; struct crypt_config *cc = io->cc; unsigned rw = bio_data_dir(clone); - if (unlikely(!bio_flagged(clone, BIO_UPTODATE) && !error)) - error = -EIO; - /* * free the processed pages */ @@ -1113,13 +1111,13 @@ static void crypt_endio(struct bio *clone, int error) bio_put(clone); - if (rw == READ && !error) { + if (rw == READ && !clone->bi_error) { kcryptd_queue_crypt(io); return; } - if (unlikely(error)) - io->error = error; + if (unlikely(clone->bi_error)) + io->error = clone->bi_error; crypt_dec_pending(io); } diff --git a/drivers/md/dm-flakey.c b/drivers/md/dm-flakey.c index b257e46876d3..04481247aab8 100644 --- a/drivers/md/dm-flakey.c +++ b/drivers/md/dm-flakey.c @@ -296,7 +296,7 @@ static int flakey_map(struct dm_target *ti, struct bio *bio) * Drop writes? */ if (test_bit(DROP_WRITES, &fc->flags)) { - bio_endio(bio, 0); + bio_endio(bio); return DM_MAPIO_SUBMITTED; } diff --git a/drivers/md/dm-io.c b/drivers/md/dm-io.c index 74adcd2c967e..efc6659f9d6a 100644 --- a/drivers/md/dm-io.c +++ b/drivers/md/dm-io.c @@ -134,12 +134,12 @@ static void dec_count(struct io *io, unsigned int region, int error) complete_io(io); } -static void endio(struct bio *bio, int error) +static void endio(struct bio *bio) { struct io *io; unsigned region; - if (error && bio_data_dir(bio) == READ) + if (bio->bi_error && bio_data_dir(bio) == READ) zero_fill_bio(bio); /* @@ -149,7 +149,7 @@ static void endio(struct bio *bio, int error) bio_put(bio); - dec_count(io, region, error); + dec_count(io, region, bio->bi_error); } /*----------------------------------------------------------------- diff --git a/drivers/md/dm-log-writes.c b/drivers/md/dm-log-writes.c index ad1b049ae2ab..e9d17488d5e3 100644 --- a/drivers/md/dm-log-writes.c +++ b/drivers/md/dm-log-writes.c @@ -146,16 +146,16 @@ static void put_io_block(struct log_writes_c *lc) } } -static void log_end_io(struct bio *bio, int err) +static void log_end_io(struct bio *bio) { struct log_writes_c *lc = bio->bi_private; struct bio_vec *bvec; int i; - if (err) { + if (bio->bi_error) { unsigned long flags; - DMERR("Error writing log block, error=%d", err); + DMERR("Error writing log block, error=%d", bio->bi_error); spin_lock_irqsave(&lc->blocks_lock, flags); lc->logging_enabled = false; spin_unlock_irqrestore(&lc->blocks_lock, flags); @@ -205,7 +205,6 @@ static int write_metadata(struct log_writes_c *lc, void *entry, bio->bi_bdev = lc->logdev->bdev; bio->bi_end_io = log_end_io; bio->bi_private = lc; - set_bit(BIO_UPTODATE, &bio->bi_flags); page = alloc_page(GFP_KERNEL); if (!page) { @@ -270,7 +269,6 @@ static int log_one_block(struct log_writes_c *lc, bio->bi_bdev = lc->logdev->bdev; bio->bi_end_io = log_end_io; bio->bi_private = lc; - set_bit(BIO_UPTODATE, &bio->bi_flags); for (i = 0; i < block->vec_cnt; i++) { /* @@ -292,7 +290,6 @@ static int log_one_block(struct log_writes_c *lc, bio->bi_bdev = lc->logdev->bdev; bio->bi_end_io = log_end_io; bio->bi_private = lc; - set_bit(BIO_UPTODATE, &bio->bi_flags); ret = bio_add_page(bio, block->vecs[i].bv_page, block->vecs[i].bv_len, 0); @@ -606,7 +603,7 @@ static int log_writes_map(struct dm_target *ti, struct bio *bio) WARN_ON(flush_bio || fua_bio); if (lc->device_supports_discard) goto map_bio; - bio_endio(bio, 0); + bio_endio(bio); return DM_MAPIO_SUBMITTED; } diff --git a/drivers/md/dm-raid1.c b/drivers/md/dm-raid1.c index d83696bf403b..e1eabfb2f52d 100644 --- a/drivers/md/dm-raid1.c +++ b/drivers/md/dm-raid1.c @@ -490,9 +490,11 @@ static void hold_bio(struct mirror_set *ms, struct bio *bio) * If device is suspended, complete the bio. */ if (dm_noflush_suspending(ms->ti)) - bio_endio(bio, DM_ENDIO_REQUEUE); + bio->bi_error = DM_ENDIO_REQUEUE; else - bio_endio(bio, -EIO); + bio->bi_error = -EIO; + + bio_endio(bio); return; } @@ -515,7 +517,7 @@ static void read_callback(unsigned long error, void *context) bio_set_m(bio, NULL); if (likely(!error)) { - bio_endio(bio, 0); + bio_endio(bio); return; } @@ -531,7 +533,7 @@ static void read_callback(unsigned long error, void *context) DMERR_LIMIT("Read failure on mirror device %s. Failing I/O.", m->dev->name); - bio_endio(bio, -EIO); + bio_io_error(bio); } /* Asynchronous read. */ @@ -580,7 +582,7 @@ static void do_reads(struct mirror_set *ms, struct bio_list *reads) if (likely(m)) read_async_bio(m, bio); else - bio_endio(bio, -EIO); + bio_io_error(bio); } } @@ -598,7 +600,7 @@ static void do_reads(struct mirror_set *ms, struct bio_list *reads) static void write_callback(unsigned long error, void *context) { - unsigned i, ret = 0; + unsigned i; struct bio *bio = (struct bio *) context; struct mirror_set *ms; int should_wake = 0; @@ -614,7 +616,7 @@ static void write_callback(unsigned long error, void *context) * regions with the same code. */ if (likely(!error)) { - bio_endio(bio, ret); + bio_endio(bio); return; } @@ -623,7 +625,8 @@ static void write_callback(unsigned long error, void *context) * degrade the array. */ if (bio->bi_rw & REQ_DISCARD) { - bio_endio(bio, -EOPNOTSUPP); + bio->bi_error = -EOPNOTSUPP; + bio_endio(bio); return; } @@ -828,13 +831,12 @@ static void do_failures(struct mirror_set *ms, struct bio_list *failures) * be wrong if the failed leg returned after reboot and * got replicated back to the good legs.) */ - if (unlikely(!get_valid_mirror(ms) || (keep_log(ms) && ms->log_failure))) - bio_endio(bio, -EIO); + bio_io_error(bio); else if (errors_handled(ms) && !keep_log(ms)) hold_bio(ms, bio); else - bio_endio(bio, 0); + bio_endio(bio); } } diff --git a/drivers/md/dm-snap.c b/drivers/md/dm-snap.c index 7c82d3ccce87..dd8ca0bb0980 100644 --- a/drivers/md/dm-snap.c +++ b/drivers/md/dm-snap.c @@ -1490,7 +1490,7 @@ out: error_bios(snapshot_bios); } else { if (full_bio) - bio_endio(full_bio, 0); + bio_endio(full_bio); flush_bios(snapshot_bios); } @@ -1580,11 +1580,11 @@ static void start_copy(struct dm_snap_pending_exception *pe) dm_kcopyd_copy(s->kcopyd_client, &src, 1, &dest, 0, copy_callback, pe); } -static void full_bio_end_io(struct bio *bio, int error) +static void full_bio_end_io(struct bio *bio) { void *callback_data = bio->bi_private; - dm_kcopyd_do_callback(callback_data, 0, error ? 1 : 0); + dm_kcopyd_do_callback(callback_data, 0, bio->bi_error ? 1 : 0); } static void start_full_bio(struct dm_snap_pending_exception *pe, diff --git a/drivers/md/dm-stripe.c b/drivers/md/dm-stripe.c index a672a1502c14..4f94c7da82f6 100644 --- a/drivers/md/dm-stripe.c +++ b/drivers/md/dm-stripe.c @@ -273,7 +273,7 @@ static int stripe_map_range(struct stripe_c *sc, struct bio *bio, return DM_MAPIO_REMAPPED; } else { /* The range doesn't map to the target stripe */ - bio_endio(bio, 0); + bio_endio(bio); return DM_MAPIO_SUBMITTED; } } diff --git a/drivers/md/dm-thin.c b/drivers/md/dm-thin.c index c33f61a4cc28..2ade2c46dca9 100644 --- a/drivers/md/dm-thin.c +++ b/drivers/md/dm-thin.c @@ -614,8 +614,10 @@ static void error_bio_list(struct bio_list *bios, int error) { struct bio *bio; - while ((bio = bio_list_pop(bios))) - bio_endio(bio, error); + while ((bio = bio_list_pop(bios))) { + bio->bi_error = error; + bio_endio(bio); + } } static void error_thin_bio_list(struct thin_c *tc, struct bio_list *master, int error) @@ -864,14 +866,14 @@ static void copy_complete(int read_err, unsigned long write_err, void *context) complete_mapping_preparation(m); } -static void overwrite_endio(struct bio *bio, int err) +static void overwrite_endio(struct bio *bio) { struct dm_thin_endio_hook *h = dm_per_bio_data(bio, sizeof(struct dm_thin_endio_hook)); struct dm_thin_new_mapping *m = h->overwrite_mapping; bio->bi_end_io = m->saved_bi_end_io; - m->err = err; + m->err = bio->bi_error; complete_mapping_preparation(m); } @@ -996,7 +998,7 @@ static void process_prepared_mapping(struct dm_thin_new_mapping *m) */ if (bio) { inc_remap_and_issue_cell(tc, m->cell, m->data_block); - bio_endio(bio, 0); + bio_endio(bio); } else { inc_all_io_entry(tc->pool, m->cell->holder); remap_and_issue(tc, m->cell->holder, m->data_block); @@ -1026,7 +1028,7 @@ static void process_prepared_discard_fail(struct dm_thin_new_mapping *m) static void process_prepared_discard_success(struct dm_thin_new_mapping *m) { - bio_endio(m->bio, 0); + bio_endio(m->bio); free_discard_mapping(m); } @@ -1040,7 +1042,7 @@ static void process_prepared_discard_no_passdown(struct dm_thin_new_mapping *m) metadata_operation_failed(tc->pool, "dm_thin_remove_range", r); bio_io_error(m->bio); } else - bio_endio(m->bio, 0); + bio_endio(m->bio); cell_defer_no_holder(tc, m->cell); mempool_free(m, tc->pool->mapping_pool); @@ -1111,7 +1113,8 @@ static void process_prepared_discard_passdown(struct dm_thin_new_mapping *m) * Even if r is set, there could be sub discards in flight that we * need to wait for. */ - bio_endio(m->bio, r); + m->bio->bi_error = r; + bio_endio(m->bio); cell_defer_no_holder(tc, m->cell); mempool_free(m, pool->mapping_pool); } @@ -1487,9 +1490,10 @@ static void handle_unserviceable_bio(struct pool *pool, struct bio *bio) { int error = should_error_unserviceable_bio(pool); - if (error) - bio_endio(bio, error); - else + if (error) { + bio->bi_error = error; + bio_endio(bio); + } else retry_on_resume(bio); } @@ -1625,7 +1629,7 @@ static void process_discard_cell_passdown(struct thin_c *tc, struct dm_bio_priso * will prevent completion until the sub range discards have * completed. */ - bio_endio(bio, 0); + bio_endio(bio); } static void process_discard_bio(struct thin_c *tc, struct bio *bio) @@ -1639,7 +1643,7 @@ static void process_discard_bio(struct thin_c *tc, struct bio *bio) /* * The discard covers less than a block. */ - bio_endio(bio, 0); + bio_endio(bio); return; } @@ -1784,7 +1788,7 @@ static void provision_block(struct thin_c *tc, struct bio *bio, dm_block_t block if (bio_data_dir(bio) == READ) { zero_fill_bio(bio); cell_defer_no_holder(tc, cell); - bio_endio(bio, 0); + bio_endio(bio); return; } @@ -1849,7 +1853,7 @@ static void process_cell(struct thin_c *tc, struct dm_bio_prison_cell *cell) } else { zero_fill_bio(bio); - bio_endio(bio, 0); + bio_endio(bio); } } else provision_block(tc, bio, block, cell); @@ -1920,7 +1924,7 @@ static void __process_bio_read_only(struct thin_c *tc, struct bio *bio, } zero_fill_bio(bio); - bio_endio(bio, 0); + bio_endio(bio); break; default: @@ -1945,7 +1949,7 @@ static void process_cell_read_only(struct thin_c *tc, struct dm_bio_prison_cell static void process_bio_success(struct thin_c *tc, struct bio *bio) { - bio_endio(bio, 0); + bio_endio(bio); } static void process_bio_fail(struct thin_c *tc, struct bio *bio) @@ -2581,7 +2585,8 @@ static int thin_bio_map(struct dm_target *ti, struct bio *bio) thin_hook_bio(tc, bio); if (tc->requeue_mode) { - bio_endio(bio, DM_ENDIO_REQUEUE); + bio->bi_error = DM_ENDIO_REQUEUE; + bio_endio(bio); return DM_MAPIO_SUBMITTED; } diff --git a/drivers/md/dm-verity.c b/drivers/md/dm-verity.c index bb9c6a00e4b0..4b34df8fdb58 100644 --- a/drivers/md/dm-verity.c +++ b/drivers/md/dm-verity.c @@ -458,8 +458,9 @@ static void verity_finish_io(struct dm_verity_io *io, int error) bio->bi_end_io = io->orig_bi_end_io; bio->bi_private = io->orig_bi_private; + bio->bi_error = error; - bio_endio(bio, error); + bio_endio(bio); } static void verity_work(struct work_struct *w) @@ -469,12 +470,12 @@ static void verity_work(struct work_struct *w) verity_finish_io(io, verity_verify_io(io)); } -static void verity_end_io(struct bio *bio, int error) +static void verity_end_io(struct bio *bio) { struct dm_verity_io *io = bio->bi_private; - if (error) { - verity_finish_io(io, error); + if (bio->bi_error) { + verity_finish_io(io, bio->bi_error); return; } diff --git a/drivers/md/dm-zero.c b/drivers/md/dm-zero.c index b9a64bbce304..766bc93006e6 100644 --- a/drivers/md/dm-zero.c +++ b/drivers/md/dm-zero.c @@ -47,7 +47,7 @@ static int zero_map(struct dm_target *ti, struct bio *bio) break; } - bio_endio(bio, 0); + bio_endio(bio); /* accepted bio, don't make new request */ return DM_MAPIO_SUBMITTED; diff --git a/drivers/md/dm.c b/drivers/md/dm.c index f331d888e7f5..7f367fcace03 100644 --- a/drivers/md/dm.c +++ b/drivers/md/dm.c @@ -944,7 +944,8 @@ static void dec_pending(struct dm_io *io, int error) } else { /* done with normal IO or empty flush */ trace_block_bio_complete(md->queue, bio, io_error); - bio_endio(bio, io_error); + bio->bi_error = io_error; + bio_endio(bio); } } } @@ -957,17 +958,15 @@ static void disable_write_same(struct mapped_device *md) limits->max_write_same_sectors = 0; } -static void clone_endio(struct bio *bio, int error) +static void clone_endio(struct bio *bio) { + int error = bio->bi_error; int r = error; struct dm_target_io *tio = container_of(bio, struct dm_target_io, clone); struct dm_io *io = tio->io; struct mapped_device *md = tio->io->md; dm_endio_fn endio = tio->ti->type->end_io; - if (!bio_flagged(bio, BIO_UPTODATE) && !error) - error = -EIO; - if (endio) { r = endio(tio->ti, bio, error); if (r < 0 || r == DM_ENDIO_REQUEUE) @@ -996,7 +995,7 @@ static void clone_endio(struct bio *bio, int error) /* * Partial completion handling for request-based dm */ -static void end_clone_bio(struct bio *clone, int error) +static void end_clone_bio(struct bio *clone) { struct dm_rq_clone_bio_info *info = container_of(clone, struct dm_rq_clone_bio_info, clone); @@ -1013,13 +1012,13 @@ static void end_clone_bio(struct bio *clone, int error) * the remainder. */ return; - else if (error) { + else if (bio->bi_error) { /* * Don't notice the error to the upper layer yet. * The error handling decision is made by the target driver, * when the request is completed. */ - tio->error = error; + tio->error = bio->bi_error; return; } diff --git a/drivers/md/faulty.c b/drivers/md/faulty.c index 1277eb26b58a..4a8e15058e8b 100644 --- a/drivers/md/faulty.c +++ b/drivers/md/faulty.c @@ -70,7 +70,7 @@ #include -static void faulty_fail(struct bio *bio, int error) +static void faulty_fail(struct bio *bio) { struct bio *b = bio->bi_private; @@ -181,7 +181,7 @@ static void make_request(struct mddev *mddev, struct bio *bio) /* special case - don't decrement, don't generic_make_request, * just fail immediately */ - bio_endio(bio, -EIO); + bio_io_error(bio); return; } diff --git a/drivers/md/linear.c b/drivers/md/linear.c index fa7d577f3d12..aefd66142eef 100644 --- a/drivers/md/linear.c +++ b/drivers/md/linear.c @@ -297,7 +297,7 @@ static void linear_make_request(struct mddev *mddev, struct bio *bio) if (unlikely((split->bi_rw & REQ_DISCARD) && !blk_queue_discard(bdev_get_queue(split->bi_bdev)))) { /* Just ignore it */ - bio_endio(split, 0); + bio_endio(split); } else generic_make_request(split); } while (split != bio); diff --git a/drivers/md/md.c b/drivers/md/md.c index d429c30cd514..ac4381a6625c 100644 --- a/drivers/md/md.c +++ b/drivers/md/md.c @@ -263,7 +263,9 @@ static void md_make_request(struct request_queue *q, struct bio *bio) return; } if (mddev->ro == 1 && unlikely(rw == WRITE)) { - bio_endio(bio, bio_sectors(bio) == 0 ? 0 : -EROFS); + if (bio_sectors(bio) != 0) + bio->bi_error = -EROFS; + bio_endio(bio); return; } smp_rmb(); /* Ensure implications of 'active' are visible */ @@ -377,7 +379,7 @@ static int md_mergeable_bvec(struct request_queue *q, * Generic flush handling for md */ -static void md_end_flush(struct bio *bio, int err) +static void md_end_flush(struct bio *bio) { struct md_rdev *rdev = bio->bi_private; struct mddev *mddev = rdev->mddev; @@ -433,7 +435,7 @@ static void md_submit_flush_data(struct work_struct *ws) if (bio->bi_iter.bi_size == 0) /* an empty barrier - all done */ - bio_endio(bio, 0); + bio_endio(bio); else { bio->bi_rw &= ~REQ_FLUSH; mddev->pers->make_request(mddev, bio); @@ -728,15 +730,13 @@ void md_rdev_clear(struct md_rdev *rdev) } EXPORT_SYMBOL_GPL(md_rdev_clear); -static void super_written(struct bio *bio, int error) +static void super_written(struct bio *bio) { struct md_rdev *rdev = bio->bi_private; struct mddev *mddev = rdev->mddev; - if (error || !test_bit(BIO_UPTODATE, &bio->bi_flags)) { - printk("md: super_written gets error=%d, uptodate=%d\n", - error, test_bit(BIO_UPTODATE, &bio->bi_flags)); - WARN_ON(test_bit(BIO_UPTODATE, &bio->bi_flags)); + if (bio->bi_error) { + printk("md: super_written gets error=%d\n", bio->bi_error); md_error(mddev, rdev); } @@ -791,7 +791,7 @@ int sync_page_io(struct md_rdev *rdev, sector_t sector, int size, bio_add_page(bio, page, size, 0); submit_bio_wait(rw, bio); - ret = test_bit(BIO_UPTODATE, &bio->bi_flags); + ret = !bio->bi_error; bio_put(bio); return ret; } diff --git a/drivers/md/multipath.c b/drivers/md/multipath.c index ac3ede2bd00e..082a489af9d3 100644 --- a/drivers/md/multipath.c +++ b/drivers/md/multipath.c @@ -77,18 +77,18 @@ static void multipath_end_bh_io (struct multipath_bh *mp_bh, int err) struct bio *bio = mp_bh->master_bio; struct mpconf *conf = mp_bh->mddev->private; - bio_endio(bio, err); + bio->bi_error = err; + bio_endio(bio); mempool_free(mp_bh, conf->pool); } -static void multipath_end_request(struct bio *bio, int error) +static void multipath_end_request(struct bio *bio) { - int uptodate = test_bit(BIO_UPTODATE, &bio->bi_flags); struct multipath_bh *mp_bh = bio->bi_private; struct mpconf *conf = mp_bh->mddev->private; struct md_rdev *rdev = conf->multipaths[mp_bh->path].rdev; - if (uptodate) + if (!bio->bi_error) multipath_end_bh_io(mp_bh, 0); else if (!(bio->bi_rw & REQ_RAHEAD)) { /* @@ -101,7 +101,7 @@ static void multipath_end_request(struct bio *bio, int error) (unsigned long long)bio->bi_iter.bi_sector); multipath_reschedule_retry(mp_bh); } else - multipath_end_bh_io(mp_bh, error); + multipath_end_bh_io(mp_bh, bio->bi_error); rdev_dec_pending(rdev, conf->mddev); } @@ -123,7 +123,7 @@ static void multipath_make_request(struct mddev *mddev, struct bio * bio) mp_bh->path = multipath_map(conf); if (mp_bh->path < 0) { - bio_endio(bio, -EIO); + bio_io_error(bio); mempool_free(mp_bh, conf->pool); return; } diff --git a/drivers/md/raid0.c b/drivers/md/raid0.c index efb654eb5399..e6e0ae56f66b 100644 --- a/drivers/md/raid0.c +++ b/drivers/md/raid0.c @@ -543,7 +543,7 @@ static void raid0_make_request(struct mddev *mddev, struct bio *bio) if (unlikely((split->bi_rw & REQ_DISCARD) && !blk_queue_discard(bdev_get_queue(split->bi_bdev)))) { /* Just ignore it */ - bio_endio(split, 0); + bio_endio(split); } else generic_make_request(split); } while (split != bio); diff --git a/drivers/md/raid1.c b/drivers/md/raid1.c index f80f1af61ce7..9aa7d1fb2bc1 100644 --- a/drivers/md/raid1.c +++ b/drivers/md/raid1.c @@ -255,9 +255,10 @@ static void call_bio_endio(struct r1bio *r1_bio) done = 1; if (!test_bit(R1BIO_Uptodate, &r1_bio->state)) - clear_bit(BIO_UPTODATE, &bio->bi_flags); + bio->bi_error = -EIO; + if (done) { - bio_endio(bio, 0); + bio_endio(bio); /* * Wake up any possible resync thread that waits for the device * to go idle. @@ -312,9 +313,9 @@ static int find_bio_disk(struct r1bio *r1_bio, struct bio *bio) return mirror; } -static void raid1_end_read_request(struct bio *bio, int error) +static void raid1_end_read_request(struct bio *bio) { - int uptodate = test_bit(BIO_UPTODATE, &bio->bi_flags); + int uptodate = !bio->bi_error; struct r1bio *r1_bio = bio->bi_private; int mirror; struct r1conf *conf = r1_bio->mddev->private; @@ -397,9 +398,8 @@ static void r1_bio_write_done(struct r1bio *r1_bio) } } -static void raid1_end_write_request(struct bio *bio, int error) +static void raid1_end_write_request(struct bio *bio) { - int uptodate = test_bit(BIO_UPTODATE, &bio->bi_flags); struct r1bio *r1_bio = bio->bi_private; int mirror, behind = test_bit(R1BIO_BehindIO, &r1_bio->state); struct r1conf *conf = r1_bio->mddev->private; @@ -410,7 +410,7 @@ static void raid1_end_write_request(struct bio *bio, int error) /* * 'one mirror IO has finished' event handler: */ - if (!uptodate) { + if (bio->bi_error) { set_bit(WriteErrorSeen, &conf->mirrors[mirror].rdev->flags); if (!test_and_set_bit(WantReplacement, @@ -793,7 +793,7 @@ static void flush_pending_writes(struct r1conf *conf) if (unlikely((bio->bi_rw & REQ_DISCARD) && !blk_queue_discard(bdev_get_queue(bio->bi_bdev)))) /* Just ignore it */ - bio_endio(bio, 0); + bio_endio(bio); else generic_make_request(bio); bio = next; @@ -1068,7 +1068,7 @@ static void raid1_unplug(struct blk_plug_cb *cb, bool from_schedule) if (unlikely((bio->bi_rw & REQ_DISCARD) && !blk_queue_discard(bdev_get_queue(bio->bi_bdev)))) /* Just ignore it */ - bio_endio(bio, 0); + bio_endio(bio); else generic_make_request(bio); bio = next; @@ -1734,7 +1734,7 @@ abort: return err; } -static void end_sync_read(struct bio *bio, int error) +static void end_sync_read(struct bio *bio) { struct r1bio *r1_bio = bio->bi_private; @@ -1745,16 +1745,16 @@ static void end_sync_read(struct bio *bio, int error) * or re-read if the read failed. * We don't do much here, just schedule handling by raid1d */ - if (test_bit(BIO_UPTODATE, &bio->bi_flags)) + if (!bio->bi_error) set_bit(R1BIO_Uptodate, &r1_bio->state); if (atomic_dec_and_test(&r1_bio->remaining)) reschedule_retry(r1_bio); } -static void end_sync_write(struct bio *bio, int error) +static void end_sync_write(struct bio *bio) { - int uptodate = test_bit(BIO_UPTODATE, &bio->bi_flags); + int uptodate = !bio->bi_error; struct r1bio *r1_bio = bio->bi_private; struct mddev *mddev = r1_bio->mddev; struct r1conf *conf = mddev->private; @@ -1941,7 +1941,7 @@ static int fix_sync_read_error(struct r1bio *r1_bio) idx ++; } set_bit(R1BIO_Uptodate, &r1_bio->state); - set_bit(BIO_UPTODATE, &bio->bi_flags); + bio->bi_error = 0; return 1; } @@ -1965,15 +1965,14 @@ static void process_checks(struct r1bio *r1_bio) for (i = 0; i < conf->raid_disks * 2; i++) { int j; int size; - int uptodate; + int error; struct bio *b = r1_bio->bios[i]; if (b->bi_end_io != end_sync_read) continue; - /* fixup the bio for reuse, but preserve BIO_UPTODATE */ - uptodate = test_bit(BIO_UPTODATE, &b->bi_flags); + /* fixup the bio for reuse, but preserve errno */ + error = b->bi_error; bio_reset(b); - if (!uptodate) - clear_bit(BIO_UPTODATE, &b->bi_flags); + b->bi_error = error; b->bi_vcnt = vcnt; b->bi_iter.bi_size = r1_bio->sectors << 9; b->bi_iter.bi_sector = r1_bio->sector + @@ -1996,7 +1995,7 @@ static void process_checks(struct r1bio *r1_bio) } for (primary = 0; primary < conf->raid_disks * 2; primary++) if (r1_bio->bios[primary]->bi_end_io == end_sync_read && - test_bit(BIO_UPTODATE, &r1_bio->bios[primary]->bi_flags)) { + !r1_bio->bios[primary]->bi_error) { r1_bio->bios[primary]->bi_end_io = NULL; rdev_dec_pending(conf->mirrors[primary].rdev, mddev); break; @@ -2006,14 +2005,14 @@ static void process_checks(struct r1bio *r1_bio) int j; struct bio *pbio = r1_bio->bios[primary]; struct bio *sbio = r1_bio->bios[i]; - int uptodate = test_bit(BIO_UPTODATE, &sbio->bi_flags); + int error = sbio->bi_error; if (sbio->bi_end_io != end_sync_read) continue; - /* Now we can 'fixup' the BIO_UPTODATE flag */ - set_bit(BIO_UPTODATE, &sbio->bi_flags); + /* Now we can 'fixup' the error value */ + sbio->bi_error = 0; - if (uptodate) { + if (!error) { for (j = vcnt; j-- ; ) { struct page *p, *s; p = pbio->bi_io_vec[j].bv_page; @@ -2028,7 +2027,7 @@ static void process_checks(struct r1bio *r1_bio) if (j >= 0) atomic64_add(r1_bio->sectors, &mddev->resync_mismatches); if (j < 0 || (test_bit(MD_RECOVERY_CHECK, &mddev->recovery) - && uptodate)) { + && !error)) { /* No need to write to this device. */ sbio->bi_end_io = NULL; rdev_dec_pending(conf->mirrors[i].rdev, mddev); @@ -2269,11 +2268,11 @@ static void handle_sync_write_finished(struct r1conf *conf, struct r1bio *r1_bio struct bio *bio = r1_bio->bios[m]; if (bio->bi_end_io == NULL) continue; - if (test_bit(BIO_UPTODATE, &bio->bi_flags) && + if (!bio->bi_error && test_bit(R1BIO_MadeGood, &r1_bio->state)) { rdev_clear_badblocks(rdev, r1_bio->sector, s, 0); } - if (!test_bit(BIO_UPTODATE, &bio->bi_flags) && + if (bio->bi_error && test_bit(R1BIO_WriteError, &r1_bio->state)) { if (!rdev_set_badblocks(rdev, r1_bio->sector, s, 0)) md_error(conf->mddev, rdev); diff --git a/drivers/md/raid10.c b/drivers/md/raid10.c index 940f2f365461..929e9a26d81b 100644 --- a/drivers/md/raid10.c +++ b/drivers/md/raid10.c @@ -101,7 +101,7 @@ static int _enough(struct r10conf *conf, int previous, int ignore); static sector_t reshape_request(struct mddev *mddev, sector_t sector_nr, int *skipped); static void reshape_request_write(struct mddev *mddev, struct r10bio *r10_bio); -static void end_reshape_write(struct bio *bio, int error); +static void end_reshape_write(struct bio *bio); static void end_reshape(struct r10conf *conf); static void * r10bio_pool_alloc(gfp_t gfp_flags, void *data) @@ -307,9 +307,9 @@ static void raid_end_bio_io(struct r10bio *r10_bio) } else done = 1; if (!test_bit(R10BIO_Uptodate, &r10_bio->state)) - clear_bit(BIO_UPTODATE, &bio->bi_flags); + bio->bi_error = -EIO; if (done) { - bio_endio(bio, 0); + bio_endio(bio); /* * Wake up any possible resync thread that waits for the device * to go idle. @@ -358,9 +358,9 @@ static int find_bio_disk(struct r10conf *conf, struct r10bio *r10_bio, return r10_bio->devs[slot].devnum; } -static void raid10_end_read_request(struct bio *bio, int error) +static void raid10_end_read_request(struct bio *bio) { - int uptodate = test_bit(BIO_UPTODATE, &bio->bi_flags); + int uptodate = !bio->bi_error; struct r10bio *r10_bio = bio->bi_private; int slot, dev; struct md_rdev *rdev; @@ -438,9 +438,8 @@ static void one_write_done(struct r10bio *r10_bio) } } -static void raid10_end_write_request(struct bio *bio, int error) +static void raid10_end_write_request(struct bio *bio) { - int uptodate = test_bit(BIO_UPTODATE, &bio->bi_flags); struct r10bio *r10_bio = bio->bi_private; int dev; int dec_rdev = 1; @@ -460,7 +459,7 @@ static void raid10_end_write_request(struct bio *bio, int error) /* * this branch is our 'one mirror IO has finished' event handler: */ - if (!uptodate) { + if (bio->bi_error) { if (repl) /* Never record new bad blocks to replacement, * just fail it. @@ -957,7 +956,7 @@ static void flush_pending_writes(struct r10conf *conf) if (unlikely((bio->bi_rw & REQ_DISCARD) && !blk_queue_discard(bdev_get_queue(bio->bi_bdev)))) /* Just ignore it */ - bio_endio(bio, 0); + bio_endio(bio); else generic_make_request(bio); bio = next; @@ -1133,7 +1132,7 @@ static void raid10_unplug(struct blk_plug_cb *cb, bool from_schedule) if (unlikely((bio->bi_rw & REQ_DISCARD) && !blk_queue_discard(bdev_get_queue(bio->bi_bdev)))) /* Just ignore it */ - bio_endio(bio, 0); + bio_endio(bio); else generic_make_request(bio); bio = next; @@ -1916,7 +1915,7 @@ abort: return err; } -static void end_sync_read(struct bio *bio, int error) +static void end_sync_read(struct bio *bio) { struct r10bio *r10_bio = bio->bi_private; struct r10conf *conf = r10_bio->mddev->private; @@ -1928,7 +1927,7 @@ static void end_sync_read(struct bio *bio, int error) } else d = find_bio_disk(conf, r10_bio, bio, NULL, NULL); - if (test_bit(BIO_UPTODATE, &bio->bi_flags)) + if (!bio->bi_error) set_bit(R10BIO_Uptodate, &r10_bio->state); else /* The write handler will notice the lack of @@ -1977,9 +1976,8 @@ static void end_sync_request(struct r10bio *r10_bio) } } -static void end_sync_write(struct bio *bio, int error) +static void end_sync_write(struct bio *bio) { - int uptodate = test_bit(BIO_UPTODATE, &bio->bi_flags); struct r10bio *r10_bio = bio->bi_private; struct mddev *mddev = r10_bio->mddev; struct r10conf *conf = mddev->private; @@ -1996,7 +1994,7 @@ static void end_sync_write(struct bio *bio, int error) else rdev = conf->mirrors[d].rdev; - if (!uptodate) { + if (bio->bi_error) { if (repl) md_error(mddev, rdev); else { @@ -2044,7 +2042,7 @@ static void sync_request_write(struct mddev *mddev, struct r10bio *r10_bio) /* find the first device with a block */ for (i=0; icopies; i++) - if (test_bit(BIO_UPTODATE, &r10_bio->devs[i].bio->bi_flags)) + if (!r10_bio->devs[i].bio->bi_error) break; if (i == conf->copies) @@ -2064,7 +2062,7 @@ static void sync_request_write(struct mddev *mddev, struct r10bio *r10_bio) continue; if (i == first) continue; - if (test_bit(BIO_UPTODATE, &r10_bio->devs[i].bio->bi_flags)) { + if (!r10_bio->devs[i].bio->bi_error) { /* We know that the bi_io_vec layout is the same for * both 'first' and 'i', so we just compare them. * All vec entries are PAGE_SIZE; @@ -2706,8 +2704,7 @@ static void handle_write_completed(struct r10conf *conf, struct r10bio *r10_bio) rdev = conf->mirrors[dev].rdev; if (r10_bio->devs[m].bio == NULL) continue; - if (test_bit(BIO_UPTODATE, - &r10_bio->devs[m].bio->bi_flags)) { + if (!r10_bio->devs[m].bio->bi_error) { rdev_clear_badblocks( rdev, r10_bio->devs[m].addr, @@ -2722,8 +2719,8 @@ static void handle_write_completed(struct r10conf *conf, struct r10bio *r10_bio) rdev = conf->mirrors[dev].replacement; if (r10_bio->devs[m].repl_bio == NULL) continue; - if (test_bit(BIO_UPTODATE, - &r10_bio->devs[m].repl_bio->bi_flags)) { + + if (!r10_bio->devs[m].repl_bio->bi_error) { rdev_clear_badblocks( rdev, r10_bio->devs[m].addr, @@ -2748,8 +2745,7 @@ static void handle_write_completed(struct r10conf *conf, struct r10bio *r10_bio) r10_bio->devs[m].addr, r10_bio->sectors, 0); rdev_dec_pending(rdev, conf->mddev); - } else if (bio != NULL && - !test_bit(BIO_UPTODATE, &bio->bi_flags)) { + } else if (bio != NULL && bio->bi_error) { if (!narrow_write_error(r10_bio, m)) { md_error(conf->mddev, rdev); set_bit(R10BIO_Degraded, @@ -3263,7 +3259,7 @@ static sector_t sync_request(struct mddev *mddev, sector_t sector_nr, bio = r10_bio->devs[i].bio; bio_reset(bio); - clear_bit(BIO_UPTODATE, &bio->bi_flags); + bio->bi_error = -EIO; if (conf->mirrors[d].rdev == NULL || test_bit(Faulty, &conf->mirrors[d].rdev->flags)) continue; @@ -3300,7 +3296,7 @@ static sector_t sync_request(struct mddev *mddev, sector_t sector_nr, /* Need to set up for writing to the replacement */ bio = r10_bio->devs[i].repl_bio; bio_reset(bio); - clear_bit(BIO_UPTODATE, &bio->bi_flags); + bio->bi_error = -EIO; sector = r10_bio->devs[i].addr; atomic_inc(&conf->mirrors[d].rdev->nr_pending); @@ -3377,7 +3373,7 @@ static sector_t sync_request(struct mddev *mddev, sector_t sector_nr, if (bio->bi_end_io == end_sync_read) { md_sync_acct(bio->bi_bdev, nr_sectors); - set_bit(BIO_UPTODATE, &bio->bi_flags); + bio->bi_error = 0; generic_make_request(bio); } } @@ -4380,7 +4376,7 @@ read_more: read_bio->bi_end_io = end_sync_read; read_bio->bi_rw = READ; read_bio->bi_flags &= (~0UL << BIO_RESET_BITS); - __set_bit(BIO_UPTODATE, &read_bio->bi_flags); + read_bio->bi_error = 0; read_bio->bi_vcnt = 0; read_bio->bi_iter.bi_size = 0; r10_bio->master_bio = read_bio; @@ -4601,9 +4597,8 @@ static int handle_reshape_read_error(struct mddev *mddev, return 0; } -static void end_reshape_write(struct bio *bio, int error) +static void end_reshape_write(struct bio *bio) { - int uptodate = test_bit(BIO_UPTODATE, &bio->bi_flags); struct r10bio *r10_bio = bio->bi_private; struct mddev *mddev = r10_bio->mddev; struct r10conf *conf = mddev->private; @@ -4620,7 +4615,7 @@ static void end_reshape_write(struct bio *bio, int error) rdev = conf->mirrors[d].rdev; } - if (!uptodate) { + if (bio->bi_error) { /* FIXME should record badblock */ md_error(mddev, rdev); } diff --git a/drivers/md/raid5.c b/drivers/md/raid5.c index 59e44e99eef3..84d6eec1033e 100644 --- a/drivers/md/raid5.c +++ b/drivers/md/raid5.c @@ -233,7 +233,7 @@ static void return_io(struct bio *return_bi) bi->bi_iter.bi_size = 0; trace_block_bio_complete(bdev_get_queue(bi->bi_bdev), bi, 0); - bio_endio(bi, 0); + bio_endio(bi); bi = return_bi; } } @@ -887,9 +887,9 @@ static int use_new_offset(struct r5conf *conf, struct stripe_head *sh) } static void -raid5_end_read_request(struct bio *bi, int error); +raid5_end_read_request(struct bio *bi); static void -raid5_end_write_request(struct bio *bi, int error); +raid5_end_write_request(struct bio *bi); static void ops_run_io(struct stripe_head *sh, struct stripe_head_state *s) { @@ -2277,12 +2277,11 @@ static void shrink_stripes(struct r5conf *conf) conf->slab_cache = NULL; } -static void raid5_end_read_request(struct bio * bi, int error) +static void raid5_end_read_request(struct bio * bi) { struct stripe_head *sh = bi->bi_private; struct r5conf *conf = sh->raid_conf; int disks = sh->disks, i; - int uptodate = test_bit(BIO_UPTODATE, &bi->bi_flags); char b[BDEVNAME_SIZE]; struct md_rdev *rdev = NULL; sector_t s; @@ -2291,9 +2290,9 @@ static void raid5_end_read_request(struct bio * bi, int error) if (bi == &sh->dev[i].req) break; - pr_debug("end_read_request %llu/%d, count: %d, uptodate %d.\n", + pr_debug("end_read_request %llu/%d, count: %d, error %d.\n", (unsigned long long)sh->sector, i, atomic_read(&sh->count), - uptodate); + bi->bi_error); if (i == disks) { BUG(); return; @@ -2312,7 +2311,7 @@ static void raid5_end_read_request(struct bio * bi, int error) s = sh->sector + rdev->new_data_offset; else s = sh->sector + rdev->data_offset; - if (uptodate) { + if (!bi->bi_error) { set_bit(R5_UPTODATE, &sh->dev[i].flags); if (test_bit(R5_ReadError, &sh->dev[i].flags)) { /* Note that this cannot happen on a @@ -2400,13 +2399,12 @@ static void raid5_end_read_request(struct bio * bi, int error) release_stripe(sh); } -static void raid5_end_write_request(struct bio *bi, int error) +static void raid5_end_write_request(struct bio *bi) { struct stripe_head *sh = bi->bi_private; struct r5conf *conf = sh->raid_conf; int disks = sh->disks, i; struct md_rdev *uninitialized_var(rdev); - int uptodate = test_bit(BIO_UPTODATE, &bi->bi_flags); sector_t first_bad; int bad_sectors; int replacement = 0; @@ -2429,23 +2427,23 @@ static void raid5_end_write_request(struct bio *bi, int error) break; } } - pr_debug("end_write_request %llu/%d, count %d, uptodate: %d.\n", + pr_debug("end_write_request %llu/%d, count %d, error: %d.\n", (unsigned long long)sh->sector, i, atomic_read(&sh->count), - uptodate); + bi->bi_error); if (i == disks) { BUG(); return; } if (replacement) { - if (!uptodate) + if (bi->bi_error) md_error(conf->mddev, rdev); else if (is_badblock(rdev, sh->sector, STRIPE_SECTORS, &first_bad, &bad_sectors)) set_bit(R5_MadeGoodRepl, &sh->dev[i].flags); } else { - if (!uptodate) { + if (bi->bi_error) { set_bit(STRIPE_DEGRADED, &sh->state); set_bit(WriteErrorSeen, &rdev->flags); set_bit(R5_WriteError, &sh->dev[i].flags); @@ -2466,7 +2464,7 @@ static void raid5_end_write_request(struct bio *bi, int error) } rdev_dec_pending(rdev, conf->mddev); - if (sh->batch_head && !uptodate && !replacement) + if (sh->batch_head && bi->bi_error && !replacement) set_bit(STRIPE_BATCH_ERR, &sh->batch_head->state); if (!test_and_clear_bit(R5_DOUBLE_LOCKED, &sh->dev[i].flags)) @@ -3107,7 +3105,8 @@ handle_failed_stripe(struct r5conf *conf, struct stripe_head *sh, while (bi && bi->bi_iter.bi_sector < sh->dev[i].sector + STRIPE_SECTORS) { struct bio *nextbi = r5_next_bio(bi, sh->dev[i].sector); - clear_bit(BIO_UPTODATE, &bi->bi_flags); + + bi->bi_error = -EIO; if (!raid5_dec_bi_active_stripes(bi)) { md_write_end(conf->mddev); bi->bi_next = *return_bi; @@ -3131,7 +3130,8 @@ handle_failed_stripe(struct r5conf *conf, struct stripe_head *sh, while (bi && bi->bi_iter.bi_sector < sh->dev[i].sector + STRIPE_SECTORS) { struct bio *bi2 = r5_next_bio(bi, sh->dev[i].sector); - clear_bit(BIO_UPTODATE, &bi->bi_flags); + + bi->bi_error = -EIO; if (!raid5_dec_bi_active_stripes(bi)) { md_write_end(conf->mddev); bi->bi_next = *return_bi; @@ -3156,7 +3156,8 @@ handle_failed_stripe(struct r5conf *conf, struct stripe_head *sh, sh->dev[i].sector + STRIPE_SECTORS) { struct bio *nextbi = r5_next_bio(bi, sh->dev[i].sector); - clear_bit(BIO_UPTODATE, &bi->bi_flags); + + bi->bi_error = -EIO; if (!raid5_dec_bi_active_stripes(bi)) { bi->bi_next = *return_bi; *return_bi = bi; @@ -4749,12 +4750,11 @@ static struct bio *remove_bio_from_retry(struct r5conf *conf) * first). * If the read failed.. */ -static void raid5_align_endio(struct bio *bi, int error) +static void raid5_align_endio(struct bio *bi) { struct bio* raid_bi = bi->bi_private; struct mddev *mddev; struct r5conf *conf; - int uptodate = test_bit(BIO_UPTODATE, &bi->bi_flags); struct md_rdev *rdev; bio_put(bi); @@ -4766,10 +4766,10 @@ static void raid5_align_endio(struct bio *bi, int error) rdev_dec_pending(rdev, conf->mddev); - if (!error && uptodate) { + if (!bi->bi_error) { trace_block_bio_complete(bdev_get_queue(raid_bi->bi_bdev), raid_bi, 0); - bio_endio(raid_bi, 0); + bio_endio(raid_bi); if (atomic_dec_and_test(&conf->active_aligned_reads)) wake_up(&conf->wait_for_quiescent); return; @@ -5133,7 +5133,7 @@ static void make_discard_request(struct mddev *mddev, struct bio *bi) remaining = raid5_dec_bi_active_stripes(bi); if (remaining == 0) { md_write_end(mddev); - bio_endio(bi, 0); + bio_endio(bi); } } @@ -5297,7 +5297,7 @@ static void make_request(struct mddev *mddev, struct bio * bi) release_stripe_plug(mddev, sh); } else { /* cannot get stripe for read-ahead, just give-up */ - clear_bit(BIO_UPTODATE, &bi->bi_flags); + bi->bi_error = -EIO; break; } } @@ -5311,7 +5311,7 @@ static void make_request(struct mddev *mddev, struct bio * bi) trace_block_bio_complete(bdev_get_queue(bi->bi_bdev), bi, 0); - bio_endio(bi, 0); + bio_endio(bi); } } @@ -5707,7 +5707,7 @@ static int retry_aligned_read(struct r5conf *conf, struct bio *raid_bio) if (remaining == 0) { trace_block_bio_complete(bdev_get_queue(raid_bio->bi_bdev), raid_bio, 0); - bio_endio(raid_bio, 0); + bio_endio(raid_bio); } if (atomic_dec_and_test(&conf->active_aligned_reads)) wake_up(&conf->wait_for_quiescent); diff --git a/drivers/nvdimm/blk.c b/drivers/nvdimm/blk.c index 4f97b248c236..0df77cb07df6 100644 --- a/drivers/nvdimm/blk.c +++ b/drivers/nvdimm/blk.c @@ -180,7 +180,7 @@ static void nd_blk_make_request(struct request_queue *q, struct bio *bio) * another kernel subsystem, and we just pass it through. */ if (bio_integrity_enabled(bio) && bio_integrity_prep(bio)) { - err = -EIO; + bio->bi_error = -EIO; goto out; } @@ -199,6 +199,7 @@ static void nd_blk_make_request(struct request_queue *q, struct bio *bio) "io error in %s sector %lld, len %d,\n", (rw == READ) ? "READ" : "WRITE", (unsigned long long) iter.bi_sector, len); + bio->bi_error = err; break; } } @@ -206,7 +207,7 @@ static void nd_blk_make_request(struct request_queue *q, struct bio *bio) nd_iostat_end(bio, start); out: - bio_endio(bio, err); + bio_endio(bio); } static int nd_blk_rw_bytes(struct nd_namespace_common *ndns, diff --git a/drivers/nvdimm/btt.c b/drivers/nvdimm/btt.c index 411c7b2bb37a..341202ed32b4 100644 --- a/drivers/nvdimm/btt.c +++ b/drivers/nvdimm/btt.c @@ -1189,7 +1189,7 @@ static void btt_make_request(struct request_queue *q, struct bio *bio) * another kernel subsystem, and we just pass it through. */ if (bio_integrity_enabled(bio) && bio_integrity_prep(bio)) { - err = -EIO; + bio->bi_error = -EIO; goto out; } @@ -1211,6 +1211,7 @@ static void btt_make_request(struct request_queue *q, struct bio *bio) "io error in %s sector %lld, len %d,\n", (rw == READ) ? "READ" : "WRITE", (unsigned long long) iter.bi_sector, len); + bio->bi_error = err; break; } } @@ -1218,7 +1219,7 @@ static void btt_make_request(struct request_queue *q, struct bio *bio) nd_iostat_end(bio, start); out: - bio_endio(bio, err); + bio_endio(bio); } static int btt_rw_page(struct block_device *bdev, sector_t sector, diff --git a/drivers/nvdimm/pmem.c b/drivers/nvdimm/pmem.c index ade9eb917a4d..4c079d5cb539 100644 --- a/drivers/nvdimm/pmem.c +++ b/drivers/nvdimm/pmem.c @@ -77,7 +77,7 @@ static void pmem_make_request(struct request_queue *q, struct bio *bio) if (bio_data_dir(bio)) wmb_pmem(); - bio_endio(bio, 0); + bio_endio(bio); } static int pmem_rw_page(struct block_device *bdev, sector_t sector, diff --git a/drivers/s390/block/dcssblk.c b/drivers/s390/block/dcssblk.c index da212813f2d5..8bcb822b0bac 100644 --- a/drivers/s390/block/dcssblk.c +++ b/drivers/s390/block/dcssblk.c @@ -871,7 +871,7 @@ dcssblk_make_request(struct request_queue *q, struct bio *bio) } bytes_done += bvec.bv_len; } - bio_endio(bio, 0); + bio_endio(bio); return; fail: bio_io_error(bio); diff --git a/drivers/s390/block/xpram.c b/drivers/s390/block/xpram.c index 7d4e9397ac31..93856b9b6214 100644 --- a/drivers/s390/block/xpram.c +++ b/drivers/s390/block/xpram.c @@ -220,8 +220,7 @@ static void xpram_make_request(struct request_queue *q, struct bio *bio) index++; } } - set_bit(BIO_UPTODATE, &bio->bi_flags); - bio_endio(bio, 0); + bio_endio(bio); return; fail: bio_io_error(bio); diff --git a/drivers/target/target_core_iblock.c b/drivers/target/target_core_iblock.c index 6d88d24e6cce..5a9982f5d5d6 100644 --- a/drivers/target/target_core_iblock.c +++ b/drivers/target/target_core_iblock.c @@ -306,20 +306,13 @@ static void iblock_complete_cmd(struct se_cmd *cmd) kfree(ibr); } -static void iblock_bio_done(struct bio *bio, int err) +static void iblock_bio_done(struct bio *bio) { struct se_cmd *cmd = bio->bi_private; struct iblock_req *ibr = cmd->priv; - /* - * Set -EIO if !BIO_UPTODATE and the passed is still err=0 - */ - if (!test_bit(BIO_UPTODATE, &bio->bi_flags) && !err) - err = -EIO; - - if (err != 0) { - pr_err("test_bit(BIO_UPTODATE) failed for bio: %p," - " err: %d\n", bio, err); + if (bio->bi_error) { + pr_err("bio error: %p, err: %d\n", bio, bio->bi_error); /* * Bump the ib_bio_err_cnt and release bio. */ @@ -370,15 +363,15 @@ static void iblock_submit_bios(struct bio_list *list, int rw) blk_finish_plug(&plug); } -static void iblock_end_io_flush(struct bio *bio, int err) +static void iblock_end_io_flush(struct bio *bio) { struct se_cmd *cmd = bio->bi_private; - if (err) - pr_err("IBLOCK: cache flush failed: %d\n", err); + if (bio->bi_error) + pr_err("IBLOCK: cache flush failed: %d\n", bio->bi_error); if (cmd) { - if (err) + if (bio->bi_error) target_complete_cmd(cmd, SAM_STAT_CHECK_CONDITION); else target_complete_cmd(cmd, SAM_STAT_GOOD); diff --git a/drivers/target/target_core_pscsi.c b/drivers/target/target_core_pscsi.c index 08e9084ee615..de18790eb21c 100644 --- a/drivers/target/target_core_pscsi.c +++ b/drivers/target/target_core_pscsi.c @@ -852,7 +852,7 @@ static ssize_t pscsi_show_configfs_dev_params(struct se_device *dev, char *b) return bl; } -static void pscsi_bi_endio(struct bio *bio, int error) +static void pscsi_bi_endio(struct bio *bio) { bio_put(bio); } @@ -973,7 +973,7 @@ fail: while (*hbio) { bio = *hbio; *hbio = (*hbio)->bi_next; - bio_endio(bio, 0); /* XXX: should be error */ + bio_endio(bio); } return TCM_LOGICAL_UNIT_COMMUNICATION_FAILURE; } @@ -1061,7 +1061,7 @@ fail_free_bio: while (hbio) { struct bio *bio = hbio; hbio = hbio->bi_next; - bio_endio(bio, 0); /* XXX: should be error */ + bio_endio(bio); } ret = TCM_LOGICAL_UNIT_COMMUNICATION_FAILURE; fail: diff --git a/fs/btrfs/check-integrity.c b/fs/btrfs/check-integrity.c index ce7dec88f4b8..541fbfaed276 100644 --- a/fs/btrfs/check-integrity.c +++ b/fs/btrfs/check-integrity.c @@ -343,7 +343,7 @@ static int btrfsic_process_written_superblock( struct btrfsic_state *state, struct btrfsic_block *const block, struct btrfs_super_block *const super_hdr); -static void btrfsic_bio_end_io(struct bio *bp, int bio_error_status); +static void btrfsic_bio_end_io(struct bio *bp); static void btrfsic_bh_end_io(struct buffer_head *bh, int uptodate); static int btrfsic_is_block_ref_by_superblock(const struct btrfsic_state *state, const struct btrfsic_block *block, @@ -2207,7 +2207,7 @@ continue_loop: goto again; } -static void btrfsic_bio_end_io(struct bio *bp, int bio_error_status) +static void btrfsic_bio_end_io(struct bio *bp) { struct btrfsic_block *block = (struct btrfsic_block *)bp->bi_private; int iodone_w_error; @@ -2215,7 +2215,7 @@ static void btrfsic_bio_end_io(struct bio *bp, int bio_error_status) /* mutex is not held! This is not save if IO is not yet completed * on umount */ iodone_w_error = 0; - if (bio_error_status) + if (bp->bi_error) iodone_w_error = 1; BUG_ON(NULL == block); @@ -2230,7 +2230,7 @@ static void btrfsic_bio_end_io(struct bio *bp, int bio_error_status) BTRFSIC_PRINT_MASK_END_IO_BIO_BH)) printk(KERN_INFO "bio_end_io(err=%d) for %c @%llu (%s/%llu/%d)\n", - bio_error_status, + bp->bi_error, btrfsic_get_block_type(dev_state->state, block), block->logical_bytenr, dev_state->name, block->dev_bytenr, block->mirror_num); @@ -2252,7 +2252,7 @@ static void btrfsic_bio_end_io(struct bio *bp, int bio_error_status) block = next_block; } while (NULL != block); - bp->bi_end_io(bp, bio_error_status); + bp->bi_end_io(bp); } static void btrfsic_bh_end_io(struct buffer_head *bh, int uptodate) diff --git a/fs/btrfs/compression.c b/fs/btrfs/compression.c index ce62324c78e7..302266ec2cdb 100644 --- a/fs/btrfs/compression.c +++ b/fs/btrfs/compression.c @@ -152,7 +152,7 @@ fail: * The compressed pages are freed here, and it must be run * in process context */ -static void end_compressed_bio_read(struct bio *bio, int err) +static void end_compressed_bio_read(struct bio *bio) { struct compressed_bio *cb = bio->bi_private; struct inode *inode; @@ -160,7 +160,7 @@ static void end_compressed_bio_read(struct bio *bio, int err) unsigned long index; int ret; - if (err) + if (bio->bi_error) cb->errors = 1; /* if there are more bios still pending for this compressed @@ -210,7 +210,7 @@ csum_failed: bio_for_each_segment_all(bvec, cb->orig_bio, i) SetPageChecked(bvec->bv_page); - bio_endio(cb->orig_bio, 0); + bio_endio(cb->orig_bio); } /* finally free the cb struct */ @@ -266,7 +266,7 @@ static noinline void end_compressed_writeback(struct inode *inode, * This also calls the writeback end hooks for the file pages so that * metadata and checksums can be updated in the file. */ -static void end_compressed_bio_write(struct bio *bio, int err) +static void end_compressed_bio_write(struct bio *bio) { struct extent_io_tree *tree; struct compressed_bio *cb = bio->bi_private; @@ -274,7 +274,7 @@ static void end_compressed_bio_write(struct bio *bio, int err) struct page *page; unsigned long index; - if (err) + if (bio->bi_error) cb->errors = 1; /* if there are more bios still pending for this compressed @@ -293,7 +293,7 @@ static void end_compressed_bio_write(struct bio *bio, int err) cb->start, cb->start + cb->len - 1, NULL, - err ? 0 : 1); + bio->bi_error ? 0 : 1); cb->compressed_pages[0]->mapping = NULL; end_compressed_writeback(inode, cb); @@ -697,8 +697,10 @@ int btrfs_submit_compressed_read(struct inode *inode, struct bio *bio, ret = btrfs_map_bio(root, READ, comp_bio, mirror_num, 0); - if (ret) - bio_endio(comp_bio, ret); + if (ret) { + bio->bi_error = ret; + bio_endio(comp_bio); + } bio_put(comp_bio); @@ -724,8 +726,10 @@ int btrfs_submit_compressed_read(struct inode *inode, struct bio *bio, } ret = btrfs_map_bio(root, READ, comp_bio, mirror_num, 0); - if (ret) - bio_endio(comp_bio, ret); + if (ret) { + bio->bi_error = ret; + bio_endio(comp_bio); + } bio_put(comp_bio); return 0; diff --git a/fs/btrfs/disk-io.c b/fs/btrfs/disk-io.c index a9aadb2ad525..a8c0de888a9d 100644 --- a/fs/btrfs/disk-io.c +++ b/fs/btrfs/disk-io.c @@ -703,7 +703,7 @@ static int btree_io_failed_hook(struct page *page, int failed_mirror) return -EIO; /* we fixed nothing */ } -static void end_workqueue_bio(struct bio *bio, int err) +static void end_workqueue_bio(struct bio *bio) { struct btrfs_end_io_wq *end_io_wq = bio->bi_private; struct btrfs_fs_info *fs_info; @@ -711,7 +711,7 @@ static void end_workqueue_bio(struct bio *bio, int err) btrfs_work_func_t func; fs_info = end_io_wq->info; - end_io_wq->error = err; + end_io_wq->error = bio->bi_error; if (bio->bi_rw & REQ_WRITE) { if (end_io_wq->metadata == BTRFS_WQ_ENDIO_METADATA) { @@ -808,7 +808,8 @@ static void run_one_async_done(struct btrfs_work *work) /* If an error occured we just want to clean up the bio and move on */ if (async->error) { - bio_endio(async->bio, async->error); + async->bio->bi_error = async->error; + bio_endio(async->bio); return; } @@ -908,8 +909,10 @@ static int __btree_submit_bio_done(struct inode *inode, int rw, struct bio *bio, * submission context. Just jump into btrfs_map_bio */ ret = btrfs_map_bio(BTRFS_I(inode)->root, rw, bio, mirror_num, 1); - if (ret) - bio_endio(bio, ret); + if (ret) { + bio->bi_error = ret; + bio_endio(bio); + } return ret; } @@ -960,10 +963,13 @@ static int btree_submit_bio_hook(struct inode *inode, int rw, struct bio *bio, __btree_submit_bio_done); } - if (ret) { + if (ret) + goto out_w_error; + return 0; + out_w_error: - bio_endio(bio, ret); - } + bio->bi_error = ret; + bio_endio(bio); return ret; } @@ -1735,16 +1741,15 @@ static void end_workqueue_fn(struct btrfs_work *work) { struct bio *bio; struct btrfs_end_io_wq *end_io_wq; - int error; end_io_wq = container_of(work, struct btrfs_end_io_wq, work); bio = end_io_wq->bio; - error = end_io_wq->error; + bio->bi_error = end_io_wq->error; bio->bi_private = end_io_wq->private; bio->bi_end_io = end_io_wq->end_io; kmem_cache_free(btrfs_end_io_wq_cache, end_io_wq); - bio_endio(bio, error); + bio_endio(bio); } static int cleaner_kthread(void *arg) @@ -3323,10 +3328,8 @@ static int write_dev_supers(struct btrfs_device *device, * endio for the write_dev_flush, this will wake anyone waiting * for the barrier when it is done */ -static void btrfs_end_empty_barrier(struct bio *bio, int err) +static void btrfs_end_empty_barrier(struct bio *bio) { - if (err) - clear_bit(BIO_UPTODATE, &bio->bi_flags); if (bio->bi_private) complete(bio->bi_private); bio_put(bio); @@ -3354,8 +3357,8 @@ static int write_dev_flush(struct btrfs_device *device, int wait) wait_for_completion(&device->flush_wait); - if (!bio_flagged(bio, BIO_UPTODATE)) { - ret = -EIO; + if (bio->bi_error) { + ret = bio->bi_error; btrfs_dev_stat_inc_and_print(device, BTRFS_DEV_STAT_FLUSH_ERRS); } diff --git a/fs/btrfs/extent_io.c b/fs/btrfs/extent_io.c index 02d05817cbdf..c22f175ed024 100644 --- a/fs/btrfs/extent_io.c +++ b/fs/btrfs/extent_io.c @@ -2486,7 +2486,7 @@ int end_extent_writepage(struct page *page, int err, u64 start, u64 end) * Scheduling is not allowed, so the extent state tree is expected * to have one and only one object corresponding to this IO. */ -static void end_bio_extent_writepage(struct bio *bio, int err) +static void end_bio_extent_writepage(struct bio *bio) { struct bio_vec *bvec; u64 start; @@ -2516,7 +2516,7 @@ static void end_bio_extent_writepage(struct bio *bio, int err) start = page_offset(page); end = start + bvec->bv_offset + bvec->bv_len - 1; - if (end_extent_writepage(page, err, start, end)) + if (end_extent_writepage(page, bio->bi_error, start, end)) continue; end_page_writeback(page); @@ -2548,10 +2548,10 @@ endio_readpage_release_extent(struct extent_io_tree *tree, u64 start, u64 len, * Scheduling is not allowed, so the extent state tree is expected * to have one and only one object corresponding to this IO. */ -static void end_bio_extent_readpage(struct bio *bio, int err) +static void end_bio_extent_readpage(struct bio *bio) { struct bio_vec *bvec; - int uptodate = test_bit(BIO_UPTODATE, &bio->bi_flags); + int uptodate = !bio->bi_error; struct btrfs_io_bio *io_bio = btrfs_io_bio(bio); struct extent_io_tree *tree; u64 offset = 0; @@ -2564,16 +2564,13 @@ static void end_bio_extent_readpage(struct bio *bio, int err) int ret; int i; - if (err) - uptodate = 0; - bio_for_each_segment_all(bvec, bio, i) { struct page *page = bvec->bv_page; struct inode *inode = page->mapping->host; pr_debug("end_bio_extent_readpage: bi_sector=%llu, err=%d, " - "mirror=%u\n", (u64)bio->bi_iter.bi_sector, err, - io_bio->mirror_num); + "mirror=%u\n", (u64)bio->bi_iter.bi_sector, + bio->bi_error, io_bio->mirror_num); tree = &BTRFS_I(inode)->io_tree; /* We always issue full-page reads, but if some block @@ -2614,8 +2611,7 @@ static void end_bio_extent_readpage(struct bio *bio, int err) if (tree->ops && tree->ops->readpage_io_failed_hook) { ret = tree->ops->readpage_io_failed_hook(page, mirror); - if (!ret && !err && - test_bit(BIO_UPTODATE, &bio->bi_flags)) + if (!ret && !bio->bi_error) uptodate = 1; } else { /* @@ -2631,10 +2627,7 @@ static void end_bio_extent_readpage(struct bio *bio, int err) ret = bio_readpage_error(bio, offset, page, start, end, mirror); if (ret == 0) { - uptodate = - test_bit(BIO_UPTODATE, &bio->bi_flags); - if (err) - uptodate = 0; + uptodate = !bio->bi_error; offset += len; continue; } @@ -2684,7 +2677,7 @@ readpage_ok: endio_readpage_release_extent(tree, extent_start, extent_len, uptodate); if (io_bio->end_io) - io_bio->end_io(io_bio, err); + io_bio->end_io(io_bio, bio->bi_error); bio_put(bio); } @@ -3696,7 +3689,7 @@ static void set_btree_ioerr(struct page *page) } } -static void end_bio_extent_buffer_writepage(struct bio *bio, int err) +static void end_bio_extent_buffer_writepage(struct bio *bio) { struct bio_vec *bvec; struct extent_buffer *eb; @@ -3709,7 +3702,8 @@ static void end_bio_extent_buffer_writepage(struct bio *bio, int err) BUG_ON(!eb); done = atomic_dec_and_test(&eb->io_pages); - if (err || test_bit(EXTENT_BUFFER_WRITE_ERR, &eb->bflags)) { + if (bio->bi_error || + test_bit(EXTENT_BUFFER_WRITE_ERR, &eb->bflags)) { ClearPageUptodate(page); set_btree_ioerr(page); } diff --git a/fs/btrfs/inode.c b/fs/btrfs/inode.c index b33c0cf02668..6b8becfe2057 100644 --- a/fs/btrfs/inode.c +++ b/fs/btrfs/inode.c @@ -1845,8 +1845,10 @@ static int __btrfs_submit_bio_done(struct inode *inode, int rw, struct bio *bio, int ret; ret = btrfs_map_bio(root, rw, bio, mirror_num, 1); - if (ret) - bio_endio(bio, ret); + if (ret) { + bio->bi_error = ret; + bio_endio(bio); + } return ret; } @@ -1906,8 +1908,10 @@ mapit: ret = btrfs_map_bio(root, rw, bio, mirror_num, 0); out: - if (ret < 0) - bio_endio(bio, ret); + if (ret < 0) { + bio->bi_error = ret; + bio_endio(bio); + } return ret; } @@ -7689,13 +7693,13 @@ struct btrfs_retry_complete { int uptodate; }; -static void btrfs_retry_endio_nocsum(struct bio *bio, int err) +static void btrfs_retry_endio_nocsum(struct bio *bio) { struct btrfs_retry_complete *done = bio->bi_private; struct bio_vec *bvec; int i; - if (err) + if (bio->bi_error) goto end; done->uptodate = 1; @@ -7744,7 +7748,7 @@ try_again: return 0; } -static void btrfs_retry_endio(struct bio *bio, int err) +static void btrfs_retry_endio(struct bio *bio) { struct btrfs_retry_complete *done = bio->bi_private; struct btrfs_io_bio *io_bio = btrfs_io_bio(bio); @@ -7753,7 +7757,7 @@ static void btrfs_retry_endio(struct bio *bio, int err) int ret; int i; - if (err) + if (bio->bi_error) goto end; uptodate = 1; @@ -7836,12 +7840,13 @@ static int btrfs_subio_endio_read(struct inode *inode, } } -static void btrfs_endio_direct_read(struct bio *bio, int err) +static void btrfs_endio_direct_read(struct bio *bio) { struct btrfs_dio_private *dip = bio->bi_private; struct inode *inode = dip->inode; struct bio *dio_bio; struct btrfs_io_bio *io_bio = btrfs_io_bio(bio); + int err = bio->bi_error; if (dip->flags & BTRFS_DIO_ORIG_BIO_SUBMITTED) err = btrfs_subio_endio_read(inode, io_bio, err); @@ -7852,17 +7857,14 @@ static void btrfs_endio_direct_read(struct bio *bio, int err) kfree(dip); - /* If we had a csum failure make sure to clear the uptodate flag */ - if (err) - clear_bit(BIO_UPTODATE, &dio_bio->bi_flags); - dio_end_io(dio_bio, err); + dio_end_io(dio_bio, bio->bi_error); if (io_bio->end_io) io_bio->end_io(io_bio, err); bio_put(bio); } -static void btrfs_endio_direct_write(struct bio *bio, int err) +static void btrfs_endio_direct_write(struct bio *bio) { struct btrfs_dio_private *dip = bio->bi_private; struct inode *inode = dip->inode; @@ -7876,7 +7878,8 @@ static void btrfs_endio_direct_write(struct bio *bio, int err) again: ret = btrfs_dec_test_first_ordered_pending(inode, &ordered, &ordered_offset, - ordered_bytes, !err); + ordered_bytes, + !bio->bi_error); if (!ret) goto out_test; @@ -7899,10 +7902,7 @@ out_test: kfree(dip); - /* If we had an error make sure to clear the uptodate flag */ - if (err) - clear_bit(BIO_UPTODATE, &dio_bio->bi_flags); - dio_end_io(dio_bio, err); + dio_end_io(dio_bio, bio->bi_error); bio_put(bio); } @@ -7917,9 +7917,10 @@ static int __btrfs_submit_bio_start_direct_io(struct inode *inode, int rw, return 0; } -static void btrfs_end_dio_bio(struct bio *bio, int err) +static void btrfs_end_dio_bio(struct bio *bio) { struct btrfs_dio_private *dip = bio->bi_private; + int err = bio->bi_error; if (err) btrfs_warn(BTRFS_I(dip->inode)->root->fs_info, @@ -7948,8 +7949,8 @@ static void btrfs_end_dio_bio(struct bio *bio, int err) if (dip->errors) { bio_io_error(dip->orig_bio); } else { - set_bit(BIO_UPTODATE, &dip->dio_bio->bi_flags); - bio_endio(dip->orig_bio, 0); + dip->dio_bio->bi_error = 0; + bio_endio(dip->orig_bio); } out: bio_put(bio); @@ -8220,7 +8221,8 @@ free_ordered: * callbacks - they require an allocated dip and a clone of dio_bio. */ if (io_bio && dip) { - bio_endio(io_bio, ret); + io_bio->bi_error = -EIO; + bio_endio(io_bio); /* * The end io callbacks free our dip, do the final put on io_bio * and all the cleanup and final put for dio_bio (through @@ -8247,7 +8249,7 @@ free_ordered: unlock_extent(&BTRFS_I(inode)->io_tree, file_offset, file_offset + dio_bio->bi_iter.bi_size - 1); } - clear_bit(BIO_UPTODATE, &dio_bio->bi_flags); + dio_bio->bi_error = -EIO; /* * Releases and cleans up our dio_bio, no need to bio_put() * nor bio_endio()/bio_io_error() against dio_bio. diff --git a/fs/btrfs/raid56.c b/fs/btrfs/raid56.c index fa72068bd256..0a02e24900aa 100644 --- a/fs/btrfs/raid56.c +++ b/fs/btrfs/raid56.c @@ -851,7 +851,7 @@ static void free_raid_bio(struct btrfs_raid_bio *rbio) * this frees the rbio and runs through all the bios in the * bio_list and calls end_io on them */ -static void rbio_orig_end_io(struct btrfs_raid_bio *rbio, int err, int uptodate) +static void rbio_orig_end_io(struct btrfs_raid_bio *rbio, int err) { struct bio *cur = bio_list_get(&rbio->bio_list); struct bio *next; @@ -864,9 +864,8 @@ static void rbio_orig_end_io(struct btrfs_raid_bio *rbio, int err, int uptodate) while (cur) { next = cur->bi_next; cur->bi_next = NULL; - if (uptodate) - set_bit(BIO_UPTODATE, &cur->bi_flags); - bio_endio(cur, err); + cur->bi_error = err; + bio_endio(cur); cur = next; } } @@ -875,9 +874,10 @@ static void rbio_orig_end_io(struct btrfs_raid_bio *rbio, int err, int uptodate) * end io function used by finish_rmw. When we finally * get here, we've written a full stripe */ -static void raid_write_end_io(struct bio *bio, int err) +static void raid_write_end_io(struct bio *bio) { struct btrfs_raid_bio *rbio = bio->bi_private; + int err = bio->bi_error; if (err) fail_bio_stripe(rbio, bio); @@ -893,7 +893,7 @@ static void raid_write_end_io(struct bio *bio, int err) if (atomic_read(&rbio->error) > rbio->bbio->max_errors) err = -EIO; - rbio_orig_end_io(rbio, err, 0); + rbio_orig_end_io(rbio, err); return; } @@ -1071,7 +1071,7 @@ static int rbio_add_io_page(struct btrfs_raid_bio *rbio, * devices or if they are not contiguous */ if (last_end == disk_start && stripe->dev->bdev && - test_bit(BIO_UPTODATE, &last->bi_flags) && + !last->bi_error && last->bi_bdev == stripe->dev->bdev) { ret = bio_add_page(last, page, PAGE_CACHE_SIZE, 0); if (ret == PAGE_CACHE_SIZE) @@ -1087,7 +1087,6 @@ static int rbio_add_io_page(struct btrfs_raid_bio *rbio, bio->bi_iter.bi_size = 0; bio->bi_bdev = stripe->dev->bdev; bio->bi_iter.bi_sector = disk_start >> 9; - set_bit(BIO_UPTODATE, &bio->bi_flags); bio_add_page(bio, page, PAGE_CACHE_SIZE, 0); bio_list_add(bio_list, bio); @@ -1312,13 +1311,12 @@ write_data: bio->bi_private = rbio; bio->bi_end_io = raid_write_end_io; - BUG_ON(!test_bit(BIO_UPTODATE, &bio->bi_flags)); submit_bio(WRITE, bio); } return; cleanup: - rbio_orig_end_io(rbio, -EIO, 0); + rbio_orig_end_io(rbio, -EIO); } /* @@ -1441,11 +1439,11 @@ static void set_bio_pages_uptodate(struct bio *bio) * This will usually kick off finish_rmw once all the bios are read in, but it * may trigger parity reconstruction if we had any errors along the way */ -static void raid_rmw_end_io(struct bio *bio, int err) +static void raid_rmw_end_io(struct bio *bio) { struct btrfs_raid_bio *rbio = bio->bi_private; - if (err) + if (bio->bi_error) fail_bio_stripe(rbio, bio); else set_bio_pages_uptodate(bio); @@ -1455,7 +1453,6 @@ static void raid_rmw_end_io(struct bio *bio, int err) if (!atomic_dec_and_test(&rbio->stripes_pending)) return; - err = 0; if (atomic_read(&rbio->error) > rbio->bbio->max_errors) goto cleanup; @@ -1469,7 +1466,7 @@ static void raid_rmw_end_io(struct bio *bio, int err) cleanup: - rbio_orig_end_io(rbio, -EIO, 0); + rbio_orig_end_io(rbio, -EIO); } static void async_rmw_stripe(struct btrfs_raid_bio *rbio) @@ -1572,14 +1569,13 @@ static int raid56_rmw_stripe(struct btrfs_raid_bio *rbio) btrfs_bio_wq_end_io(rbio->fs_info, bio, BTRFS_WQ_ENDIO_RAID56); - BUG_ON(!test_bit(BIO_UPTODATE, &bio->bi_flags)); submit_bio(READ, bio); } /* the actual write will happen once the reads are done */ return 0; cleanup: - rbio_orig_end_io(rbio, -EIO, 0); + rbio_orig_end_io(rbio, -EIO); return -EIO; finish: @@ -1964,7 +1960,7 @@ cleanup_io: else clear_bit(RBIO_CACHE_READY_BIT, &rbio->flags); - rbio_orig_end_io(rbio, err, err == 0); + rbio_orig_end_io(rbio, err); } else if (err == 0) { rbio->faila = -1; rbio->failb = -1; @@ -1976,7 +1972,7 @@ cleanup_io: else BUG(); } else { - rbio_orig_end_io(rbio, err, 0); + rbio_orig_end_io(rbio, err); } } @@ -1984,7 +1980,7 @@ cleanup_io: * This is called only for stripes we've read from disk to * reconstruct the parity. */ -static void raid_recover_end_io(struct bio *bio, int err) +static void raid_recover_end_io(struct bio *bio) { struct btrfs_raid_bio *rbio = bio->bi_private; @@ -1992,7 +1988,7 @@ static void raid_recover_end_io(struct bio *bio, int err) * we only read stripe pages off the disk, set them * up to date if there were no errors */ - if (err) + if (bio->bi_error) fail_bio_stripe(rbio, bio); else set_bio_pages_uptodate(bio); @@ -2002,7 +1998,7 @@ static void raid_recover_end_io(struct bio *bio, int err) return; if (atomic_read(&rbio->error) > rbio->bbio->max_errors) - rbio_orig_end_io(rbio, -EIO, 0); + rbio_orig_end_io(rbio, -EIO); else __raid_recover_end_io(rbio); } @@ -2094,7 +2090,6 @@ static int __raid56_parity_recover(struct btrfs_raid_bio *rbio) btrfs_bio_wq_end_io(rbio->fs_info, bio, BTRFS_WQ_ENDIO_RAID56); - BUG_ON(!test_bit(BIO_UPTODATE, &bio->bi_flags)); submit_bio(READ, bio); } out: @@ -2102,7 +2097,7 @@ out: cleanup: if (rbio->operation == BTRFS_RBIO_READ_REBUILD) - rbio_orig_end_io(rbio, -EIO, 0); + rbio_orig_end_io(rbio, -EIO); return -EIO; } @@ -2277,11 +2272,12 @@ static int alloc_rbio_essential_pages(struct btrfs_raid_bio *rbio) * end io function used by finish_rmw. When we finally * get here, we've written a full stripe */ -static void raid_write_parity_end_io(struct bio *bio, int err) +static void raid_write_parity_end_io(struct bio *bio) { struct btrfs_raid_bio *rbio = bio->bi_private; + int err = bio->bi_error; - if (err) + if (bio->bi_error) fail_bio_stripe(rbio, bio); bio_put(bio); @@ -2294,7 +2290,7 @@ static void raid_write_parity_end_io(struct bio *bio, int err) if (atomic_read(&rbio->error)) err = -EIO; - rbio_orig_end_io(rbio, err, 0); + rbio_orig_end_io(rbio, err); } static noinline void finish_parity_scrub(struct btrfs_raid_bio *rbio, @@ -2437,7 +2433,7 @@ submit_write: nr_data = bio_list_size(&bio_list); if (!nr_data) { /* Every parity is right */ - rbio_orig_end_io(rbio, 0, 0); + rbio_orig_end_io(rbio, 0); return; } @@ -2450,13 +2446,12 @@ submit_write: bio->bi_private = rbio; bio->bi_end_io = raid_write_parity_end_io; - BUG_ON(!test_bit(BIO_UPTODATE, &bio->bi_flags)); submit_bio(WRITE, bio); } return; cleanup: - rbio_orig_end_io(rbio, -EIO, 0); + rbio_orig_end_io(rbio, -EIO); } static inline int is_data_stripe(struct btrfs_raid_bio *rbio, int stripe) @@ -2524,7 +2519,7 @@ static void validate_rbio_for_parity_scrub(struct btrfs_raid_bio *rbio) return; cleanup: - rbio_orig_end_io(rbio, -EIO, 0); + rbio_orig_end_io(rbio, -EIO); } /* @@ -2535,11 +2530,11 @@ cleanup: * This will usually kick off finish_rmw once all the bios are read in, but it * may trigger parity reconstruction if we had any errors along the way */ -static void raid56_parity_scrub_end_io(struct bio *bio, int err) +static void raid56_parity_scrub_end_io(struct bio *bio) { struct btrfs_raid_bio *rbio = bio->bi_private; - if (err) + if (bio->bi_error) fail_bio_stripe(rbio, bio); else set_bio_pages_uptodate(bio); @@ -2632,14 +2627,13 @@ static void raid56_parity_scrub_stripe(struct btrfs_raid_bio *rbio) btrfs_bio_wq_end_io(rbio->fs_info, bio, BTRFS_WQ_ENDIO_RAID56); - BUG_ON(!test_bit(BIO_UPTODATE, &bio->bi_flags)); submit_bio(READ, bio); } /* the actual write will happen once the reads are done */ return; cleanup: - rbio_orig_end_io(rbio, -EIO, 0); + rbio_orig_end_io(rbio, -EIO); return; finish: diff --git a/fs/btrfs/scrub.c b/fs/btrfs/scrub.c index 94db0fa5225a..ebb8260186fe 100644 --- a/fs/btrfs/scrub.c +++ b/fs/btrfs/scrub.c @@ -278,7 +278,7 @@ static int scrub_pages(struct scrub_ctx *sctx, u64 logical, u64 len, u64 physical, struct btrfs_device *dev, u64 flags, u64 gen, int mirror_num, u8 *csum, int force, u64 physical_for_dev_replace); -static void scrub_bio_end_io(struct bio *bio, int err); +static void scrub_bio_end_io(struct bio *bio); static void scrub_bio_end_io_worker(struct btrfs_work *work); static void scrub_block_complete(struct scrub_block *sblock); static void scrub_remap_extent(struct btrfs_fs_info *fs_info, @@ -295,7 +295,7 @@ static void scrub_free_wr_ctx(struct scrub_wr_ctx *wr_ctx); static int scrub_add_page_to_wr_bio(struct scrub_ctx *sctx, struct scrub_page *spage); static void scrub_wr_submit(struct scrub_ctx *sctx); -static void scrub_wr_bio_end_io(struct bio *bio, int err); +static void scrub_wr_bio_end_io(struct bio *bio); static void scrub_wr_bio_end_io_worker(struct btrfs_work *work); static int write_page_nocow(struct scrub_ctx *sctx, u64 physical_for_dev_replace, struct page *page); @@ -1429,11 +1429,11 @@ struct scrub_bio_ret { int error; }; -static void scrub_bio_wait_endio(struct bio *bio, int error) +static void scrub_bio_wait_endio(struct bio *bio) { struct scrub_bio_ret *ret = bio->bi_private; - ret->error = error; + ret->error = bio->bi_error; complete(&ret->event); } @@ -1790,12 +1790,12 @@ static void scrub_wr_submit(struct scrub_ctx *sctx) btrfsic_submit_bio(WRITE, sbio->bio); } -static void scrub_wr_bio_end_io(struct bio *bio, int err) +static void scrub_wr_bio_end_io(struct bio *bio) { struct scrub_bio *sbio = bio->bi_private; struct btrfs_fs_info *fs_info = sbio->dev->dev_root->fs_info; - sbio->err = err; + sbio->err = bio->bi_error; sbio->bio = bio; btrfs_init_work(&sbio->work, btrfs_scrubwrc_helper, @@ -2098,7 +2098,7 @@ static void scrub_submit(struct scrub_ctx *sctx) */ printk_ratelimited(KERN_WARNING "BTRFS: scrub_submit(bio bdev == NULL) is unexpected!\n"); - bio_endio(sbio->bio, -EIO); + bio_io_error(sbio->bio); } else { btrfsic_submit_bio(READ, sbio->bio); } @@ -2260,12 +2260,12 @@ leave_nomem: return 0; } -static void scrub_bio_end_io(struct bio *bio, int err) +static void scrub_bio_end_io(struct bio *bio) { struct scrub_bio *sbio = bio->bi_private; struct btrfs_fs_info *fs_info = sbio->dev->dev_root->fs_info; - sbio->err = err; + sbio->err = bio->bi_error; sbio->bio = bio; btrfs_queue_work(fs_info->scrub_workers, &sbio->work); @@ -2672,11 +2672,11 @@ static void scrub_parity_bio_endio_worker(struct btrfs_work *work) scrub_pending_bio_dec(sctx); } -static void scrub_parity_bio_endio(struct bio *bio, int error) +static void scrub_parity_bio_endio(struct bio *bio) { struct scrub_parity *sparity = (struct scrub_parity *)bio->bi_private; - if (error) + if (bio->bi_error) bitmap_or(sparity->ebitmap, sparity->ebitmap, sparity->dbitmap, sparity->nsectors); diff --git a/fs/btrfs/volumes.c b/fs/btrfs/volumes.c index fbe7c104531c..8f2ca18c71f4 100644 --- a/fs/btrfs/volumes.c +++ b/fs/btrfs/volumes.c @@ -5741,23 +5741,23 @@ int btrfs_rmap_block(struct btrfs_mapping_tree *map_tree, return 0; } -static inline void btrfs_end_bbio(struct btrfs_bio *bbio, struct bio *bio, int err) +static inline void btrfs_end_bbio(struct btrfs_bio *bbio, struct bio *bio) { bio->bi_private = bbio->private; bio->bi_end_io = bbio->end_io; - bio_endio(bio, err); + bio_endio(bio); btrfs_put_bbio(bbio); } -static void btrfs_end_bio(struct bio *bio, int err) +static void btrfs_end_bio(struct bio *bio) { struct btrfs_bio *bbio = bio->bi_private; int is_orig_bio = 0; - if (err) { + if (bio->bi_error) { atomic_inc(&bbio->error); - if (err == -EIO || err == -EREMOTEIO) { + if (bio->bi_error == -EIO || bio->bi_error == -EREMOTEIO) { unsigned int stripe_index = btrfs_io_bio(bio)->stripe_index; struct btrfs_device *dev; @@ -5795,17 +5795,16 @@ static void btrfs_end_bio(struct bio *bio, int err) * beyond the tolerance of the btrfs bio */ if (atomic_read(&bbio->error) > bbio->max_errors) { - err = -EIO; + bio->bi_error = -EIO; } else { /* * this bio is actually up to date, we didn't * go over the max number of errors */ - set_bit(BIO_UPTODATE, &bio->bi_flags); - err = 0; + bio->bi_error = 0; } - btrfs_end_bbio(bbio, bio, err); + btrfs_end_bbio(bbio, bio); } else if (!is_orig_bio) { bio_put(bio); } @@ -5826,7 +5825,7 @@ static noinline void btrfs_schedule_bio(struct btrfs_root *root, struct btrfs_pending_bios *pending_bios; if (device->missing || !device->bdev) { - bio_endio(bio, -EIO); + bio_io_error(bio); return; } @@ -5973,8 +5972,8 @@ static void bbio_error(struct btrfs_bio *bbio, struct bio *bio, u64 logical) btrfs_io_bio(bio)->mirror_num = bbio->mirror_num; bio->bi_iter.bi_sector = logical >> 9; - - btrfs_end_bbio(bbio, bio, -EIO); + bio->bi_error = -EIO; + btrfs_end_bbio(bbio, bio); } } diff --git a/fs/buffer.c b/fs/buffer.c index 1cf7a53a0277..7a49bb84ecb5 100644 --- a/fs/buffer.c +++ b/fs/buffer.c @@ -2957,14 +2957,14 @@ sector_t generic_block_bmap(struct address_space *mapping, sector_t block, } EXPORT_SYMBOL(generic_block_bmap); -static void end_bio_bh_io_sync(struct bio *bio, int err) +static void end_bio_bh_io_sync(struct bio *bio) { struct buffer_head *bh = bio->bi_private; if (unlikely (test_bit(BIO_QUIET,&bio->bi_flags))) set_bit(BH_Quiet, &bh->b_state); - bh->b_end_io(bh, test_bit(BIO_UPTODATE, &bio->bi_flags)); + bh->b_end_io(bh, !bio->bi_error); bio_put(bio); } diff --git a/fs/direct-io.c b/fs/direct-io.c index 745d2342651a..e1639c8c14d5 100644 --- a/fs/direct-io.c +++ b/fs/direct-io.c @@ -285,7 +285,7 @@ static int dio_bio_complete(struct dio *dio, struct bio *bio); /* * Asynchronous IO callback. */ -static void dio_bio_end_aio(struct bio *bio, int error) +static void dio_bio_end_aio(struct bio *bio) { struct dio *dio = bio->bi_private; unsigned long remaining; @@ -318,7 +318,7 @@ static void dio_bio_end_aio(struct bio *bio, int error) * During I/O bi_private points at the dio. After I/O, bi_private is used to * implement a singly-linked list of completed BIOs, at dio->bio_list. */ -static void dio_bio_end_io(struct bio *bio, int error) +static void dio_bio_end_io(struct bio *bio) { struct dio *dio = bio->bi_private; unsigned long flags; @@ -345,9 +345,9 @@ void dio_end_io(struct bio *bio, int error) struct dio *dio = bio->bi_private; if (dio->is_async) - dio_bio_end_aio(bio, error); + dio_bio_end_aio(bio); else - dio_bio_end_io(bio, error); + dio_bio_end_io(bio); } EXPORT_SYMBOL_GPL(dio_end_io); @@ -457,11 +457,10 @@ static struct bio *dio_await_one(struct dio *dio) */ static int dio_bio_complete(struct dio *dio, struct bio *bio) { - const int uptodate = test_bit(BIO_UPTODATE, &bio->bi_flags); struct bio_vec *bvec; unsigned i; - if (!uptodate) + if (bio->bi_error) dio->io_error = -EIO; if (dio->is_async && dio->rw == READ) { @@ -476,7 +475,7 @@ static int dio_bio_complete(struct dio *dio, struct bio *bio) } bio_put(bio); } - return uptodate ? 0 : -EIO; + return bio->bi_error; } /* diff --git a/fs/ext4/page-io.c b/fs/ext4/page-io.c index 5602450f03f6..aa95566f14be 100644 --- a/fs/ext4/page-io.c +++ b/fs/ext4/page-io.c @@ -61,7 +61,6 @@ static void buffer_io_error(struct buffer_head *bh) static void ext4_finish_bio(struct bio *bio) { int i; - int error = !test_bit(BIO_UPTODATE, &bio->bi_flags); struct bio_vec *bvec; bio_for_each_segment_all(bvec, bio, i) { @@ -88,7 +87,7 @@ static void ext4_finish_bio(struct bio *bio) } #endif - if (error) { + if (bio->bi_error) { SetPageError(page); set_bit(AS_EIO, &page->mapping->flags); } @@ -107,7 +106,7 @@ static void ext4_finish_bio(struct bio *bio) continue; } clear_buffer_async_write(bh); - if (error) + if (bio->bi_error) buffer_io_error(bh); } while ((bh = bh->b_this_page) != head); bit_spin_unlock(BH_Uptodate_Lock, &head->b_state); @@ -310,27 +309,25 @@ ext4_io_end_t *ext4_get_io_end(ext4_io_end_t *io_end) } /* BIO completion function for page writeback */ -static void ext4_end_bio(struct bio *bio, int error) +static void ext4_end_bio(struct bio *bio) { ext4_io_end_t *io_end = bio->bi_private; sector_t bi_sector = bio->bi_iter.bi_sector; BUG_ON(!io_end); bio->bi_end_io = NULL; - if (test_bit(BIO_UPTODATE, &bio->bi_flags)) - error = 0; - if (error) { + if (bio->bi_error) { struct inode *inode = io_end->inode; ext4_warning(inode->i_sb, "I/O error %d writing to inode %lu " "(offset %llu size %ld starting block %llu)", - error, inode->i_ino, + bio->bi_error, inode->i_ino, (unsigned long long) io_end->offset, (long) io_end->size, (unsigned long long) bi_sector >> (inode->i_blkbits - 9)); - mapping_set_error(inode->i_mapping, error); + mapping_set_error(inode->i_mapping, bio->bi_error); } if (io_end->flag & EXT4_IO_END_UNWRITTEN) { diff --git a/fs/ext4/readpage.c b/fs/ext4/readpage.c index ec3ef93a52db..5de5b871c178 100644 --- a/fs/ext4/readpage.c +++ b/fs/ext4/readpage.c @@ -98,7 +98,7 @@ static inline bool ext4_bio_encrypted(struct bio *bio) * status of that page is hard. See end_buffer_async_read() for the details. * There is no point in duplicating all that complexity. */ -static void mpage_end_io(struct bio *bio, int err) +static void mpage_end_io(struct bio *bio) { struct bio_vec *bv; int i; @@ -106,7 +106,7 @@ static void mpage_end_io(struct bio *bio, int err) if (ext4_bio_encrypted(bio)) { struct ext4_crypto_ctx *ctx = bio->bi_private; - if (err) { + if (bio->bi_error) { ext4_release_crypto_ctx(ctx); } else { INIT_WORK(&ctx->r.work, completion_pages); @@ -118,7 +118,7 @@ static void mpage_end_io(struct bio *bio, int err) bio_for_each_segment_all(bv, bio, i) { struct page *page = bv->bv_page; - if (!err) { + if (!bio->bi_error) { SetPageUptodate(page); } else { ClearPageUptodate(page); diff --git a/fs/f2fs/data.c b/fs/f2fs/data.c index 9bedfa8dd3a5..8f0baa7ffb50 100644 --- a/fs/f2fs/data.c +++ b/fs/f2fs/data.c @@ -29,13 +29,13 @@ static struct kmem_cache *extent_tree_slab; static struct kmem_cache *extent_node_slab; -static void f2fs_read_end_io(struct bio *bio, int err) +static void f2fs_read_end_io(struct bio *bio) { struct bio_vec *bvec; int i; if (f2fs_bio_encrypted(bio)) { - if (err) { + if (bio->bi_error) { f2fs_release_crypto_ctx(bio->bi_private); } else { f2fs_end_io_crypto_work(bio->bi_private, bio); @@ -46,7 +46,7 @@ static void f2fs_read_end_io(struct bio *bio, int err) bio_for_each_segment_all(bvec, bio, i) { struct page *page = bvec->bv_page; - if (!err) { + if (!bio->bi_error) { SetPageUptodate(page); } else { ClearPageUptodate(page); @@ -57,7 +57,7 @@ static void f2fs_read_end_io(struct bio *bio, int err) bio_put(bio); } -static void f2fs_write_end_io(struct bio *bio, int err) +static void f2fs_write_end_io(struct bio *bio) { struct f2fs_sb_info *sbi = bio->bi_private; struct bio_vec *bvec; @@ -68,7 +68,7 @@ static void f2fs_write_end_io(struct bio *bio, int err) f2fs_restore_and_release_control_page(&page); - if (unlikely(err)) { + if (unlikely(bio->bi_error)) { set_page_dirty(page); set_bit(AS_EIO, &page->mapping->flags); f2fs_stop_checkpoint(sbi); diff --git a/fs/gfs2/lops.c b/fs/gfs2/lops.c index 2c1ae861dc94..c0a1b967deba 100644 --- a/fs/gfs2/lops.c +++ b/fs/gfs2/lops.c @@ -202,22 +202,22 @@ static void gfs2_end_log_write_bh(struct gfs2_sbd *sdp, struct bio_vec *bvec, * */ -static void gfs2_end_log_write(struct bio *bio, int error) +static void gfs2_end_log_write(struct bio *bio) { struct gfs2_sbd *sdp = bio->bi_private; struct bio_vec *bvec; struct page *page; int i; - if (error) { - sdp->sd_log_error = error; - fs_err(sdp, "Error %d writing to log\n", error); + if (bio->bi_error) { + sdp->sd_log_error = bio->bi_error; + fs_err(sdp, "Error %d writing to log\n", bio->bi_error); } bio_for_each_segment_all(bvec, bio, i) { page = bvec->bv_page; if (page_has_buffers(page)) - gfs2_end_log_write_bh(sdp, bvec, error); + gfs2_end_log_write_bh(sdp, bvec, bio->bi_error); else mempool_free(page, gfs2_page_pool); } diff --git a/fs/gfs2/ops_fstype.c b/fs/gfs2/ops_fstype.c index 1e3a93f2f71d..02586e7eb964 100644 --- a/fs/gfs2/ops_fstype.c +++ b/fs/gfs2/ops_fstype.c @@ -171,14 +171,14 @@ static int gfs2_check_sb(struct gfs2_sbd *sdp, int silent) return -EINVAL; } -static void end_bio_io_page(struct bio *bio, int error) +static void end_bio_io_page(struct bio *bio) { struct page *page = bio->bi_private; - if (!error) + if (!bio->bi_error) SetPageUptodate(page); else - pr_warn("error %d reading superblock\n", error); + pr_warn("error %d reading superblock\n", bio->bi_error); unlock_page(page); } diff --git a/fs/jfs/jfs_logmgr.c b/fs/jfs/jfs_logmgr.c index bc462dcd7a40..d301acfdb80d 100644 --- a/fs/jfs/jfs_logmgr.c +++ b/fs/jfs/jfs_logmgr.c @@ -2011,7 +2011,7 @@ static int lbmRead(struct jfs_log * log, int pn, struct lbuf ** bpp) /*check if journaling to disk has been disabled*/ if (log->no_integrity) { bio->bi_iter.bi_size = 0; - lbmIODone(bio, 0); + lbmIODone(bio); } else { submit_bio(READ_SYNC, bio); } @@ -2158,7 +2158,7 @@ static void lbmStartIO(struct lbuf * bp) /* check if journaling to disk has been disabled */ if (log->no_integrity) { bio->bi_iter.bi_size = 0; - lbmIODone(bio, 0); + lbmIODone(bio); } else { submit_bio(WRITE_SYNC, bio); INCREMENT(lmStat.submitted); @@ -2196,7 +2196,7 @@ static int lbmIOWait(struct lbuf * bp, int flag) * * executed at INTIODONE level */ -static void lbmIODone(struct bio *bio, int error) +static void lbmIODone(struct bio *bio) { struct lbuf *bp = bio->bi_private; struct lbuf *nextbp, *tail; @@ -2212,7 +2212,7 @@ static void lbmIODone(struct bio *bio, int error) bp->l_flag |= lbmDONE; - if (!test_bit(BIO_UPTODATE, &bio->bi_flags)) { + if (bio->bi_error) { bp->l_flag |= lbmERROR; jfs_err("lbmIODone: I/O error in JFS log"); diff --git a/fs/jfs/jfs_metapage.c b/fs/jfs/jfs_metapage.c index 16a0922beb59..a3eb316b1ac3 100644 --- a/fs/jfs/jfs_metapage.c +++ b/fs/jfs/jfs_metapage.c @@ -276,11 +276,11 @@ static void last_read_complete(struct page *page) unlock_page(page); } -static void metapage_read_end_io(struct bio *bio, int err) +static void metapage_read_end_io(struct bio *bio) { struct page *page = bio->bi_private; - if (!test_bit(BIO_UPTODATE, &bio->bi_flags)) { + if (bio->bi_error) { printk(KERN_ERR "metapage_read_end_io: I/O error\n"); SetPageError(page); } @@ -331,13 +331,13 @@ static void last_write_complete(struct page *page) end_page_writeback(page); } -static void metapage_write_end_io(struct bio *bio, int err) +static void metapage_write_end_io(struct bio *bio) { struct page *page = bio->bi_private; BUG_ON(!PagePrivate(page)); - if (! test_bit(BIO_UPTODATE, &bio->bi_flags)) { + if (bio->bi_error) { printk(KERN_ERR "metapage_write_end_io: I/O error\n"); SetPageError(page); } diff --git a/fs/logfs/dev_bdev.c b/fs/logfs/dev_bdev.c index 76279e11982d..cea0cc9878b7 100644 --- a/fs/logfs/dev_bdev.c +++ b/fs/logfs/dev_bdev.c @@ -53,16 +53,14 @@ static int bdev_readpage(void *_sb, struct page *page) static DECLARE_WAIT_QUEUE_HEAD(wq); -static void writeseg_end_io(struct bio *bio, int err) +static void writeseg_end_io(struct bio *bio) { - const int uptodate = test_bit(BIO_UPTODATE, &bio->bi_flags); struct bio_vec *bvec; int i; struct super_block *sb = bio->bi_private; struct logfs_super *super = logfs_super(sb); - BUG_ON(!uptodate); /* FIXME: Retry io or write elsewhere */ - BUG_ON(err); + BUG_ON(bio->bi_error); /* FIXME: Retry io or write elsewhere */ bio_for_each_segment_all(bvec, bio, i) { end_page_writeback(bvec->bv_page); @@ -153,14 +151,12 @@ static void bdev_writeseg(struct super_block *sb, u64 ofs, size_t len) } -static void erase_end_io(struct bio *bio, int err) +static void erase_end_io(struct bio *bio) { - const int uptodate = test_bit(BIO_UPTODATE, &bio->bi_flags); struct super_block *sb = bio->bi_private; struct logfs_super *super = logfs_super(sb); - BUG_ON(!uptodate); /* FIXME: Retry io or write elsewhere */ - BUG_ON(err); + BUG_ON(bio->bi_error); /* FIXME: Retry io or write elsewhere */ BUG_ON(bio->bi_vcnt == 0); bio_put(bio); if (atomic_dec_and_test(&super->s_pending_writes)) diff --git a/fs/mpage.c b/fs/mpage.c index ca0244b69de8..abac9361b3f1 100644 --- a/fs/mpage.c +++ b/fs/mpage.c @@ -42,14 +42,14 @@ * status of that page is hard. See end_buffer_async_read() for the details. * There is no point in duplicating all that complexity. */ -static void mpage_end_io(struct bio *bio, int err) +static void mpage_end_io(struct bio *bio) { struct bio_vec *bv; int i; bio_for_each_segment_all(bv, bio, i) { struct page *page = bv->bv_page; - page_endio(page, bio_data_dir(bio), err); + page_endio(page, bio_data_dir(bio), bio->bi_error); } bio_put(bio); diff --git a/fs/nfs/blocklayout/blocklayout.c b/fs/nfs/blocklayout/blocklayout.c index d2554fe140a3..9cd4eb3a1e22 100644 --- a/fs/nfs/blocklayout/blocklayout.c +++ b/fs/nfs/blocklayout/blocklayout.c @@ -116,7 +116,7 @@ bl_submit_bio(int rw, struct bio *bio) static struct bio * bl_alloc_init_bio(int npg, struct block_device *bdev, sector_t disk_sector, - void (*end_io)(struct bio *, int err), struct parallel_io *par) + bio_end_io_t end_io, struct parallel_io *par) { struct bio *bio; @@ -139,8 +139,7 @@ bl_alloc_init_bio(int npg, struct block_device *bdev, sector_t disk_sector, static struct bio * do_add_page_to_bio(struct bio *bio, int npg, int rw, sector_t isect, struct page *page, struct pnfs_block_dev_map *map, - struct pnfs_block_extent *be, - void (*end_io)(struct bio *, int err), + struct pnfs_block_extent *be, bio_end_io_t end_io, struct parallel_io *par, unsigned int offset, int *len) { struct pnfs_block_dev *dev = @@ -183,11 +182,11 @@ retry: return bio; } -static void bl_end_io_read(struct bio *bio, int err) +static void bl_end_io_read(struct bio *bio) { struct parallel_io *par = bio->bi_private; - if (err) { + if (bio->bi_error) { struct nfs_pgio_header *header = par->data; if (!header->pnfs_error) @@ -316,13 +315,12 @@ out: return PNFS_ATTEMPTED; } -static void bl_end_io_write(struct bio *bio, int err) +static void bl_end_io_write(struct bio *bio) { struct parallel_io *par = bio->bi_private; - const int uptodate = test_bit(BIO_UPTODATE, &bio->bi_flags); struct nfs_pgio_header *header = par->data; - if (!uptodate) { + if (bio->bi_error) { if (!header->pnfs_error) header->pnfs_error = -EIO; pnfs_set_lo_fail(header->lseg); diff --git a/fs/nilfs2/segbuf.c b/fs/nilfs2/segbuf.c index 42468e5ab3e7..550b10efb14e 100644 --- a/fs/nilfs2/segbuf.c +++ b/fs/nilfs2/segbuf.c @@ -338,12 +338,11 @@ void nilfs_add_checksums_on_logs(struct list_head *logs, u32 seed) /* * BIO operations */ -static void nilfs_end_bio_write(struct bio *bio, int err) +static void nilfs_end_bio_write(struct bio *bio) { - const int uptodate = test_bit(BIO_UPTODATE, &bio->bi_flags); struct nilfs_segment_buffer *segbuf = bio->bi_private; - if (!uptodate) + if (bio->bi_error) atomic_inc(&segbuf->sb_err); bio_put(bio); diff --git a/fs/ocfs2/cluster/heartbeat.c b/fs/ocfs2/cluster/heartbeat.c index 16eff45727ee..140de3c93d2e 100644 --- a/fs/ocfs2/cluster/heartbeat.c +++ b/fs/ocfs2/cluster/heartbeat.c @@ -372,14 +372,13 @@ static void o2hb_wait_on_io(struct o2hb_region *reg, wait_for_completion(&wc->wc_io_complete); } -static void o2hb_bio_end_io(struct bio *bio, - int error) +static void o2hb_bio_end_io(struct bio *bio) { struct o2hb_bio_wait_ctxt *wc = bio->bi_private; - if (error) { - mlog(ML_ERROR, "IO Error %d\n", error); - wc->wc_error = error; + if (bio->bi_error) { + mlog(ML_ERROR, "IO Error %d\n", bio->bi_error); + wc->wc_error = bio->bi_error; } o2hb_bio_wait_dec(wc, 1); diff --git a/fs/xfs/xfs_aops.c b/fs/xfs/xfs_aops.c index 3859f5e27a4d..3714844a81d8 100644 --- a/fs/xfs/xfs_aops.c +++ b/fs/xfs/xfs_aops.c @@ -351,12 +351,11 @@ xfs_imap_valid( */ STATIC void xfs_end_bio( - struct bio *bio, - int error) + struct bio *bio) { xfs_ioend_t *ioend = bio->bi_private; - ioend->io_error = test_bit(BIO_UPTODATE, &bio->bi_flags) ? 0 : error; + ioend->io_error = bio->bi_error; /* Toss bio and pass work off to an xfsdatad thread */ bio->bi_private = NULL; diff --git a/fs/xfs/xfs_buf.c b/fs/xfs/xfs_buf.c index a4b7d92e946c..01bd6781974e 100644 --- a/fs/xfs/xfs_buf.c +++ b/fs/xfs/xfs_buf.c @@ -1096,8 +1096,7 @@ xfs_bwrite( STATIC void xfs_buf_bio_end_io( - struct bio *bio, - int error) + struct bio *bio) { xfs_buf_t *bp = (xfs_buf_t *)bio->bi_private; @@ -1105,10 +1104,10 @@ xfs_buf_bio_end_io( * don't overwrite existing errors - otherwise we can lose errors on * buffers that require multiple bios to complete. */ - if (error) { + if (bio->bi_error) { spin_lock(&bp->b_lock); if (!bp->b_io_error) - bp->b_io_error = error; + bp->b_io_error = bio->bi_error; spin_unlock(&bp->b_lock); } diff --git a/include/linux/bio.h b/include/linux/bio.h index 5e963a6d7c14..6b918177002d 100644 --- a/include/linux/bio.h +++ b/include/linux/bio.h @@ -195,8 +195,6 @@ static inline bool bvec_gap_to_prev(struct bio_vec *bprv, unsigned int offset) return offset || ((bprv->bv_offset + bprv->bv_len) & (PAGE_SIZE - 1)); } -#define bio_io_error(bio) bio_endio((bio), -EIO) - /* * drivers should _never_ use the all version - the bio may have been split * before it got to the driver and the driver won't own all of it @@ -426,7 +424,14 @@ static inline struct bio *bio_clone_kmalloc(struct bio *bio, gfp_t gfp_mask) } -extern void bio_endio(struct bio *, int); +extern void bio_endio(struct bio *); + +static inline void bio_io_error(struct bio *bio) +{ + bio->bi_error = -EIO; + bio_endio(bio); +} + struct request_queue; extern int bio_phys_segments(struct request_queue *, struct bio *); @@ -717,7 +722,7 @@ extern void bio_integrity_free(struct bio *); extern int bio_integrity_add_page(struct bio *, struct page *, unsigned int, unsigned int); extern bool bio_integrity_enabled(struct bio *bio); extern int bio_integrity_prep(struct bio *); -extern void bio_integrity_endio(struct bio *, int); +extern void bio_integrity_endio(struct bio *); extern void bio_integrity_advance(struct bio *, unsigned int); extern void bio_integrity_trim(struct bio *, unsigned int, unsigned int); extern int bio_integrity_clone(struct bio *, struct bio *, gfp_t); diff --git a/include/linux/blk_types.h b/include/linux/blk_types.h index 7303b3405520..6164fb8a817b 100644 --- a/include/linux/blk_types.h +++ b/include/linux/blk_types.h @@ -14,7 +14,7 @@ struct page; struct block_device; struct io_context; struct cgroup_subsys_state; -typedef void (bio_end_io_t) (struct bio *, int); +typedef void (bio_end_io_t) (struct bio *); typedef void (bio_destructor_t) (struct bio *); /* @@ -53,6 +53,7 @@ struct bio { struct bvec_iter bi_iter; + int bi_error; /* Number of segments in this BIO after * physical address coalescing is performed. */ @@ -111,7 +112,6 @@ struct bio { /* * bio flags */ -#define BIO_UPTODATE 0 /* ok after I/O completion */ #define BIO_SEG_VALID 1 /* bi_phys_segments valid */ #define BIO_CLONED 2 /* doesn't own data */ #define BIO_BOUNCED 3 /* bio is a bounce bio */ diff --git a/include/linux/swap.h b/include/linux/swap.h index 38874729dc5f..31496d201fdc 100644 --- a/include/linux/swap.h +++ b/include/linux/swap.h @@ -373,9 +373,9 @@ static inline void mem_cgroup_uncharge_swap(swp_entry_t entry) /* linux/mm/page_io.c */ extern int swap_readpage(struct page *); extern int swap_writepage(struct page *page, struct writeback_control *wbc); -extern void end_swap_bio_write(struct bio *bio, int err); +extern void end_swap_bio_write(struct bio *bio); extern int __swap_writepage(struct page *page, struct writeback_control *wbc, - void (*end_write_func)(struct bio *, int)); + bio_end_io_t end_write_func); extern int swap_set_page_dirty(struct page *page); int add_swap_extent(struct swap_info_struct *sis, unsigned long start_page, diff --git a/kernel/power/swap.c b/kernel/power/swap.c index 2f30ca91e4fa..b2066fb5b10f 100644 --- a/kernel/power/swap.c +++ b/kernel/power/swap.c @@ -227,27 +227,23 @@ static void hib_init_batch(struct hib_bio_batch *hb) hb->error = 0; } -static void hib_end_io(struct bio *bio, int error) +static void hib_end_io(struct bio *bio) { struct hib_bio_batch *hb = bio->bi_private; - const int uptodate = test_bit(BIO_UPTODATE, &bio->bi_flags); struct page *page = bio->bi_io_vec[0].bv_page; - if (!uptodate || error) { + if (bio->bi_error) { printk(KERN_ALERT "Read-error on swap-device (%u:%u:%Lu)\n", imajor(bio->bi_bdev->bd_inode), iminor(bio->bi_bdev->bd_inode), (unsigned long long)bio->bi_iter.bi_sector); - - if (!error) - error = -EIO; } if (bio_data_dir(bio) == WRITE) put_page(page); - if (error && !hb->error) - hb->error = error; + if (bio->bi_error && !hb->error) + hb->error = bio->bi_error; if (atomic_dec_and_test(&hb->count)) wake_up(&hb->wait); diff --git a/kernel/trace/blktrace.c b/kernel/trace/blktrace.c index b3e6b39b6cf9..90e72a0c3047 100644 --- a/kernel/trace/blktrace.c +++ b/kernel/trace/blktrace.c @@ -778,9 +778,6 @@ static void blk_add_trace_bio(struct request_queue *q, struct bio *bio, if (likely(!bt)) return; - if (!error && !bio_flagged(bio, BIO_UPTODATE)) - error = EIO; - __blk_add_trace(bt, bio->bi_iter.bi_sector, bio->bi_iter.bi_size, bio->bi_rw, what, error, 0, NULL); } @@ -887,8 +884,7 @@ static void blk_add_trace_split(void *ignore, __blk_add_trace(bt, bio->bi_iter.bi_sector, bio->bi_iter.bi_size, bio->bi_rw, BLK_TA_SPLIT, - !bio_flagged(bio, BIO_UPTODATE), - sizeof(rpdu), &rpdu); + bio->bi_error, sizeof(rpdu), &rpdu); } } @@ -920,8 +916,8 @@ static void blk_add_trace_bio_remap(void *ignore, r.sector_from = cpu_to_be64(from); __blk_add_trace(bt, bio->bi_iter.bi_sector, bio->bi_iter.bi_size, - bio->bi_rw, BLK_TA_REMAP, - !bio_flagged(bio, BIO_UPTODATE), sizeof(r), &r); + bio->bi_rw, BLK_TA_REMAP, bio->bi_error, + sizeof(r), &r); } /** diff --git a/mm/page_io.c b/mm/page_io.c index 520baa4b04d7..338ce68942a0 100644 --- a/mm/page_io.c +++ b/mm/page_io.c @@ -43,12 +43,11 @@ static struct bio *get_swap_bio(gfp_t gfp_flags, return bio; } -void end_swap_bio_write(struct bio *bio, int err) +void end_swap_bio_write(struct bio *bio) { - const int uptodate = test_bit(BIO_UPTODATE, &bio->bi_flags); struct page *page = bio->bi_io_vec[0].bv_page; - if (!uptodate) { + if (bio->bi_error) { SetPageError(page); /* * We failed to write the page out to swap-space. @@ -69,12 +68,11 @@ void end_swap_bio_write(struct bio *bio, int err) bio_put(bio); } -static void end_swap_bio_read(struct bio *bio, int err) +static void end_swap_bio_read(struct bio *bio) { - const int uptodate = test_bit(BIO_UPTODATE, &bio->bi_flags); struct page *page = bio->bi_io_vec[0].bv_page; - if (!uptodate) { + if (bio->bi_error) { SetPageError(page); ClearPageUptodate(page); printk(KERN_ALERT "Read-error on swap-device (%u:%u:%Lu)\n", @@ -254,7 +252,7 @@ static sector_t swap_page_sector(struct page *page) } int __swap_writepage(struct page *page, struct writeback_control *wbc, - void (*end_write_func)(struct bio *, int)) + bio_end_io_t end_write_func) { struct bio *bio; int ret, rw = WRITE; -- cgit v1.2.3-70-g09d2 From b7c44ed9d2fc6b461378c65eaf144ccc80a47772 Mon Sep 17 00:00:00 2001 From: Jens Axboe Date: Fri, 24 Jul 2015 12:37:59 -0600 Subject: block: manipulate bio->bi_flags through helpers Some places use helpers now, others don't. We only have the 'is set' helper, add helpers for setting and clearing flags too. It was a bit of a mess of atomic vs non-atomic access. With BIO_UPTODATE gone, we don't have any risk of concurrent access to the flags. So relax the restriction and don't make any of them atomic. The flags that do have serialization issues (reffed and chained), we already handle those separately. Signed-off-by: Jens Axboe --- block/bio.c | 14 +++++++------- block/blk-core.c | 2 +- block/blk-map.c | 2 +- block/blk-merge.c | 2 +- block/bounce.c | 2 +- drivers/md/raid1.c | 4 ++-- drivers/md/raid10.c | 6 +++--- drivers/md/raid5.c | 2 +- fs/buffer.c | 2 +- include/linux/bio.h | 15 +++++++++++++++ include/linux/blk_types.h | 2 -- 11 files changed, 33 insertions(+), 20 deletions(-) (limited to 'include/linux') diff --git a/block/bio.c b/block/bio.c index a23f489f398f..911ae8f82752 100644 --- a/block/bio.c +++ b/block/bio.c @@ -311,7 +311,7 @@ static void bio_chain_endio(struct bio *bio) */ static inline void bio_inc_remaining(struct bio *bio) { - bio->bi_flags |= (1 << BIO_CHAIN); + bio_set_flag(bio, BIO_CHAIN); smp_mb__before_atomic(); atomic_inc(&bio->__bi_remaining); } @@ -495,7 +495,7 @@ struct bio *bio_alloc_bioset(gfp_t gfp_mask, int nr_iovecs, struct bio_set *bs) if (unlikely(!bvl)) goto err_free; - bio->bi_flags |= 1 << BIO_OWNS_VEC; + bio_set_flag(bio, BIO_OWNS_VEC); } else if (nr_iovecs) { bvl = bio->bi_inline_vecs; } @@ -580,7 +580,7 @@ void __bio_clone_fast(struct bio *bio, struct bio *bio_src) * so we don't set nor calculate new physical/hw segment counts here */ bio->bi_bdev = bio_src->bi_bdev; - bio->bi_flags |= 1 << BIO_CLONED; + bio_set_flag(bio, BIO_CLONED); bio->bi_rw = bio_src->bi_rw; bio->bi_iter = bio_src->bi_iter; bio->bi_io_vec = bio_src->bi_io_vec; @@ -829,7 +829,7 @@ static int __bio_add_page(struct request_queue *q, struct bio *bio, struct page /* If we may be able to merge these biovecs, force a recount */ if (bio->bi_vcnt > 1 && (BIOVEC_PHYS_MERGEABLE(bvec-1, bvec))) - bio->bi_flags &= ~(1 << BIO_SEG_VALID); + bio_clear_flag(bio, BIO_SEG_VALID); done: return len; @@ -1390,7 +1390,7 @@ struct bio *bio_map_user_iov(struct request_queue *q, if (iter->type & WRITE) bio->bi_rw |= REQ_WRITE; - bio->bi_flags |= (1 << BIO_USER_MAPPED); + bio_set_flag(bio, BIO_USER_MAPPED); /* * subtle -- if __bio_map_user() ended up bouncing a bio, @@ -1770,7 +1770,7 @@ static inline bool bio_remaining_done(struct bio *bio) BUG_ON(atomic_read(&bio->__bi_remaining) <= 0); if (atomic_dec_and_test(&bio->__bi_remaining)) { - clear_bit(BIO_CHAIN, &bio->bi_flags); + bio_clear_flag(bio, BIO_CHAIN); return true; } @@ -1866,7 +1866,7 @@ void bio_trim(struct bio *bio, int offset, int size) if (offset == 0 && size == bio->bi_iter.bi_size) return; - clear_bit(BIO_SEG_VALID, &bio->bi_flags); + bio_clear_flag(bio, BIO_SEG_VALID); bio_advance(bio, offset << 9); diff --git a/block/blk-core.c b/block/blk-core.c index 7ef15b947b91..d1796b54e97a 100644 --- a/block/blk-core.c +++ b/block/blk-core.c @@ -146,7 +146,7 @@ static void req_bio_endio(struct request *rq, struct bio *bio, bio->bi_error = error; if (unlikely(rq->cmd_flags & REQ_QUIET)) - set_bit(BIO_QUIET, &bio->bi_flags); + bio_set_flag(bio, BIO_QUIET); bio_advance(bio, nbytes); diff --git a/block/blk-map.c b/block/blk-map.c index 5fe1c30bfba7..233841644c9d 100644 --- a/block/blk-map.c +++ b/block/blk-map.c @@ -94,7 +94,7 @@ int blk_rq_map_user_iov(struct request_queue *q, struct request *rq, return PTR_ERR(bio); if (map_data && map_data->null_mapped) - bio->bi_flags |= (1 << BIO_NULL_MAPPED); + bio_set_flag(bio, BIO_NULL_MAPPED); if (bio->bi_iter.bi_size != iter->count) { /* diff --git a/block/blk-merge.c b/block/blk-merge.c index 30a0d9f89017..a455b9860143 100644 --- a/block/blk-merge.c +++ b/block/blk-merge.c @@ -116,7 +116,7 @@ void blk_recount_segments(struct request_queue *q, struct bio *bio) bio->bi_next = nxt; } - bio->bi_flags |= (1 << BIO_SEG_VALID); + bio_set_flag(bio, BIO_SEG_VALID); } EXPORT_SYMBOL(blk_recount_segments); diff --git a/block/bounce.c b/block/bounce.c index f4db245b9f3a..2c310ea007ee 100644 --- a/block/bounce.c +++ b/block/bounce.c @@ -186,7 +186,7 @@ static int must_snapshot_stable_pages(struct request_queue *q, struct bio *bio) if (!bdi_cap_stable_pages_required(&q->backing_dev_info)) return 0; - return test_bit(BIO_SNAP_STABLE, &bio->bi_flags); + return bio_flagged(bio, BIO_SNAP_STABLE); } #else static int must_snapshot_stable_pages(struct request_queue *q, struct bio *bio) diff --git a/drivers/md/raid1.c b/drivers/md/raid1.c index 9aa7d1fb2bc1..60d0a8626e63 100644 --- a/drivers/md/raid1.c +++ b/drivers/md/raid1.c @@ -1157,7 +1157,7 @@ static void make_request(struct mddev *mddev, struct bio * bio) * non-zero, then it is the number of not-completed requests. */ bio->bi_phys_segments = 0; - clear_bit(BIO_SEG_VALID, &bio->bi_flags); + bio_clear_flag(bio, BIO_SEG_VALID); if (rw == READ) { /* @@ -2711,7 +2711,7 @@ static sector_t sync_request(struct mddev *mddev, sector_t sector_nr, int *skipp /* remove last page from this bio */ bio->bi_vcnt--; bio->bi_iter.bi_size -= len; - __clear_bit(BIO_SEG_VALID, &bio->bi_flags); + bio_clear_flag(bio, BIO_SEG_VALID); } goto bio_full; } diff --git a/drivers/md/raid10.c b/drivers/md/raid10.c index 929e9a26d81b..316ff6f611e9 100644 --- a/drivers/md/raid10.c +++ b/drivers/md/raid10.c @@ -1216,7 +1216,7 @@ static void __make_request(struct mddev *mddev, struct bio *bio) * non-zero, then it is the number of not-completed requests. */ bio->bi_phys_segments = 0; - clear_bit(BIO_SEG_VALID, &bio->bi_flags); + bio_clear_flag(bio, BIO_SEG_VALID); if (rw == READ) { /* @@ -3353,7 +3353,7 @@ static sector_t sync_request(struct mddev *mddev, sector_t sector_nr, /* remove last page from this bio */ bio2->bi_vcnt--; bio2->bi_iter.bi_size -= len; - __clear_bit(BIO_SEG_VALID, &bio2->bi_flags); + bio_clear_flag(bio2, BIO_SEG_VALID); } goto bio_full; } @@ -4433,7 +4433,7 @@ read_more: /* Remove last page from this bio */ bio2->bi_vcnt--; bio2->bi_iter.bi_size -= len; - __clear_bit(BIO_SEG_VALID, &bio2->bi_flags); + bio_clear_flag(bio2, BIO_SEG_VALID); } goto bio_full; } diff --git a/drivers/md/raid5.c b/drivers/md/raid5.c index 84d6eec1033e..e3d48775c9df 100644 --- a/drivers/md/raid5.c +++ b/drivers/md/raid5.c @@ -4850,7 +4850,7 @@ static int chunk_aligned_read(struct mddev *mddev, struct bio * raid_bio) rcu_read_unlock(); raid_bio->bi_next = (void*)rdev; align_bi->bi_bdev = rdev->bdev; - __clear_bit(BIO_SEG_VALID, &align_bi->bi_flags); + bio_clear_flag(align_bi, BIO_SEG_VALID); if (!bio_fits_rdev(align_bi) || is_badblock(rdev, align_bi->bi_iter.bi_sector, diff --git a/fs/buffer.c b/fs/buffer.c index 7a49bb84ecb5..7887bb466368 100644 --- a/fs/buffer.c +++ b/fs/buffer.c @@ -2961,7 +2961,7 @@ static void end_bio_bh_io_sync(struct bio *bio) { struct buffer_head *bh = bio->bi_private; - if (unlikely (test_bit(BIO_QUIET,&bio->bi_flags))) + if (unlikely(bio_flagged(bio, BIO_QUIET))) set_bit(BH_Quiet, &bh->b_state); bh->b_end_io(bh, !bio->bi_error); diff --git a/include/linux/bio.h b/include/linux/bio.h index 6b918177002d..986e6e19feb5 100644 --- a/include/linux/bio.h +++ b/include/linux/bio.h @@ -304,6 +304,21 @@ static inline void bio_cnt_set(struct bio *bio, unsigned int count) atomic_set(&bio->__bi_cnt, count); } +static inline bool bio_flagged(struct bio *bio, unsigned int bit) +{ + return (bio->bi_flags & (1UL << bit)) != 0; +} + +static inline void bio_set_flag(struct bio *bio, unsigned int bit) +{ + bio->bi_flags |= (1UL << bit); +} + +static inline void bio_clear_flag(struct bio *bio, unsigned int bit) +{ + bio->bi_flags &= ~(1UL << bit); +} + enum bip_flags { BIP_BLOCK_INTEGRITY = 1 << 0, /* block layer owns integrity data */ BIP_MAPPED_INTEGRITY = 1 << 1, /* ref tag has been remapped */ diff --git a/include/linux/blk_types.h b/include/linux/blk_types.h index 6164fb8a817b..a765a50e780f 100644 --- a/include/linux/blk_types.h +++ b/include/linux/blk_types.h @@ -129,8 +129,6 @@ struct bio { #define BIO_RESET_BITS 13 #define BIO_OWNS_VEC 13 /* bio_free() should free bvec */ -#define bio_flagged(bio, flag) ((bio)->bi_flags & (1 << (flag))) - /* * top 4 bits of bio flags indicate the pool this bio came from */ -- cgit v1.2.3-70-g09d2 From 2c68f6dc6e621153a708bef6c569805762da2020 Mon Sep 17 00:00:00 2001 From: Jens Axboe Date: Tue, 28 Jul 2015 13:14:32 -0600 Subject: block: shrink struct bio down to 2 cache lines again Commit bcf2843b3f8f added ->bi_error to cleanup the error passing for struct bio, but that ended up adding 4 bytes and a 4 byte hole to the size of struct bio. For a clean config, that bumped it from 128 bytes, to 136 bytes, on x86-64. The ->bi_flags member is currently an unsigned long, but it fits easily within an int. Change it to an unsigned int, adjust the the pool offset code, and move ->bi_error into the new hole. Then we end up with a 128 byte bio again. Change the bio flag set/clear to use cmpxchg to ensure we don't lose any flags when manipulating them. Signed-off-by: Jens Axboe --- include/linux/bio.h | 6 +++--- include/linux/blk_types.h | 6 +++--- 2 files changed, 6 insertions(+), 6 deletions(-) (limited to 'include/linux') diff --git a/include/linux/bio.h b/include/linux/bio.h index 986e6e19feb5..b7892a1906bd 100644 --- a/include/linux/bio.h +++ b/include/linux/bio.h @@ -306,17 +306,17 @@ static inline void bio_cnt_set(struct bio *bio, unsigned int count) static inline bool bio_flagged(struct bio *bio, unsigned int bit) { - return (bio->bi_flags & (1UL << bit)) != 0; + return (bio->bi_flags & (1U << bit)) != 0; } static inline void bio_set_flag(struct bio *bio, unsigned int bit) { - bio->bi_flags |= (1UL << bit); + bio->bi_flags |= (1U << bit); } static inline void bio_clear_flag(struct bio *bio, unsigned int bit) { - bio->bi_flags &= ~(1UL << bit); + bio->bi_flags &= ~(1U << bit); } enum bip_flags { diff --git a/include/linux/blk_types.h b/include/linux/blk_types.h index a765a50e780f..4b7b4ebaa633 100644 --- a/include/linux/blk_types.h +++ b/include/linux/blk_types.h @@ -46,14 +46,14 @@ struct bvec_iter { struct bio { struct bio *bi_next; /* request queue link */ struct block_device *bi_bdev; - unsigned long bi_flags; /* status, command, etc */ + unsigned int bi_flags; /* status, command, etc */ + int bi_error; unsigned long bi_rw; /* bottom bits READ/WRITE, * top bits priority */ struct bvec_iter bi_iter; - int bi_error; /* Number of segments in this BIO after * physical address coalescing is performed. */ @@ -134,7 +134,7 @@ struct bio { */ #define BIO_POOL_BITS (4) #define BIO_POOL_NONE ((1UL << BIO_POOL_BITS) - 1) -#define BIO_POOL_OFFSET (BITS_PER_LONG - BIO_POOL_BITS) +#define BIO_POOL_OFFSET (32 - BIO_POOL_BITS) #define BIO_POOL_MASK (1UL << BIO_POOL_OFFSET) #define BIO_POOL_IDX(bio) ((bio)->bi_flags >> BIO_POOL_OFFSET) -- cgit v1.2.3-70-g09d2 From 83b7b67c780500a1d5d87c44ee8963166154adfa Mon Sep 17 00:00:00 2001 From: Chanwoo Choi Date: Wed, 1 Jul 2015 13:11:34 +0900 Subject: usb: phy: msm-usb: Replace deprecated API of extcon This patch removes the deprecated notifier API of extcon framwork and then use the new extcon API with the unique id to indicate the each external connector (USB, USB-HOST). Alter deprecated API as following: - extcon_register_interest() -> extcon_register_notifier() - extcon_get_cable_state(*edev, char *) -> extcon_get_cable_state_(*edev, id) Cc: Felipe Balbi Cc: Greg Kroah-Hartman Signed-off-by: Chanwoo Choi Signed-off-by: Felipe Balbi --- drivers/usb/phy/phy-msm-usb.c | 20 ++++++++++---------- include/linux/usb/msm_hsusb.h | 2 +- 2 files changed, 11 insertions(+), 11 deletions(-) (limited to 'include/linux') diff --git a/drivers/usb/phy/phy-msm-usb.c b/drivers/usb/phy/phy-msm-usb.c index 00c49bb1bd29..61d86d8bf5b7 100644 --- a/drivers/usb/phy/phy-msm-usb.c +++ b/drivers/usb/phy/phy-msm-usb.c @@ -1561,15 +1561,16 @@ static int msm_otg_read_dt(struct platform_device *pdev, struct msm_otg *motg) } if (!IS_ERR(ext_vbus)) { + motg->vbus.extcon = ext_vbus; motg->vbus.nb.notifier_call = msm_otg_vbus_notifier; - ret = extcon_register_interest(&motg->vbus.conn, ext_vbus->name, - "USB", &motg->vbus.nb); + ret = extcon_register_notifier(ext_vbus, EXTCON_USB, + &motg->vbus.nb); if (ret < 0) { dev_err(&pdev->dev, "register VBUS notifier failed\n"); return ret; } - ret = extcon_get_cable_state(ext_vbus, "USB"); + ret = extcon_get_cable_state_(ext_vbus, EXTCON_USB); if (ret) set_bit(B_SESS_VLD, &motg->inputs); else @@ -1577,15 +1578,16 @@ static int msm_otg_read_dt(struct platform_device *pdev, struct msm_otg *motg) } if (!IS_ERR(ext_id)) { + motg->id.extcon = ext_id; motg->id.nb.notifier_call = msm_otg_id_notifier; - ret = extcon_register_interest(&motg->id.conn, ext_id->name, - "USB-HOST", &motg->id.nb); + ret = extcon_register_notifier(ext_id, EXTCON_USB_HOST, + &motg->id.nb); if (ret < 0) { dev_err(&pdev->dev, "register ID notifier failed\n"); return ret; } - ret = extcon_get_cable_state(ext_id, "USB-HOST"); + ret = extcon_get_cable_state_(ext_id, EXTCON_USB_HOST); if (ret) clear_bit(ID, &motg->inputs); else @@ -1805,10 +1807,8 @@ static int msm_otg_remove(struct platform_device *pdev) if (phy->otg->host || phy->otg->gadget) return -EBUSY; - if (motg->id.conn.edev) - extcon_unregister_interest(&motg->id.conn); - if (motg->vbus.conn.edev) - extcon_unregister_interest(&motg->vbus.conn); + extcon_unregister_notifier(motg->id.extcon, EXTCON_USB_HOST, &motg->id.nb); + extcon_unregister_notifier(motg->vbus.extcon, EXTCON_USB, &motg->vbus.nb); msm_otg_debugfs_cleanup(); cancel_delayed_work_sync(&motg->chg_work); diff --git a/include/linux/usb/msm_hsusb.h b/include/linux/usb/msm_hsusb.h index e55a1504266e..5df2c8f59aa0 100644 --- a/include/linux/usb/msm_hsusb.h +++ b/include/linux/usb/msm_hsusb.h @@ -128,7 +128,7 @@ struct msm_otg_platform_data { */ struct msm_usb_cable { struct notifier_block nb; - struct extcon_specific_cable_nb conn; + struct extcon_dev *extcon; }; /** -- cgit v1.2.3-70-g09d2 From ccdf138fe3e243c70301fcb6a101e366b7daef07 Mon Sep 17 00:00:00 2001 From: Robert Baldyga Date: Mon, 4 May 2015 14:55:11 +0200 Subject: usb: gadget: add usb_gadget_activate/deactivate functions These functions allows to deactivate gadget to make it not visible to host and make it active again when gadget driver is finally ready. They are needed to fix usb_function_activate() and usb_function_deactivate() functions which currently are not working as usb_gadget_connect() is called immediately after function bind regardless to previous calls of usb_gadget_disconnect() function. Signed-off-by: Robert Baldyga Signed-off-by: Felipe Balbi --- include/linux/usb/gadget.h | 100 ++++++++++++++++++++++++++++++++++++++++++--- 1 file changed, 94 insertions(+), 6 deletions(-) (limited to 'include/linux') diff --git a/include/linux/usb/gadget.h b/include/linux/usb/gadget.h index 4f3dfb7d0654..15604bb3e524 100644 --- a/include/linux/usb/gadget.h +++ b/include/linux/usb/gadget.h @@ -526,6 +526,9 @@ struct usb_gadget_ops { * @quirk_ep_out_aligned_size: epout requires buffer size to be aligned to * MaxPacketSize. * @is_selfpowered: if the gadget is self-powered. + * @deactivated: True if gadget is deactivated - in deactivated state it cannot + * be connected. + * @connected: True if gadget is connected. * * Gadgets have a mostly-portable "gadget driver" implementing device * functions, handling all usb configurations and interfaces. Gadget @@ -568,6 +571,8 @@ struct usb_gadget { unsigned a_alt_hnp_support:1; unsigned quirk_ep_out_aligned_size:1; unsigned is_selfpowered:1; + unsigned deactivated:1; + unsigned connected:1; }; #define work_to_gadget(w) (container_of((w), struct usb_gadget, work)) @@ -771,9 +776,24 @@ static inline int usb_gadget_vbus_disconnect(struct usb_gadget *gadget) */ static inline int usb_gadget_connect(struct usb_gadget *gadget) { + int ret; + if (!gadget->ops->pullup) return -EOPNOTSUPP; - return gadget->ops->pullup(gadget, 1); + + if (gadget->deactivated) { + /* + * If gadget is deactivated we only save new state. + * Gadget will be connected automatically after activation. + */ + gadget->connected = true; + return 0; + } + + ret = gadget->ops->pullup(gadget, 1); + if (!ret) + gadget->connected = 1; + return ret; } /** @@ -784,20 +804,88 @@ static inline int usb_gadget_connect(struct usb_gadget *gadget) * as a disconnect (when a VBUS session is active). Not all systems * support software pullup controls. * + * Returns zero on success, else negative errno. + */ +static inline int usb_gadget_disconnect(struct usb_gadget *gadget) +{ + int ret; + + if (!gadget->ops->pullup) + return -EOPNOTSUPP; + + if (gadget->deactivated) { + /* + * If gadget is deactivated we only save new state. + * Gadget will stay disconnected after activation. + */ + gadget->connected = false; + return 0; + } + + ret = gadget->ops->pullup(gadget, 0); + if (!ret) + gadget->connected = 0; + return ret; +} + +/** + * usb_gadget_deactivate - deactivate function which is not ready to work + * @gadget: the peripheral being deactivated + * * This routine may be used during the gadget driver bind() call to prevent * the peripheral from ever being visible to the USB host, unless later - * usb_gadget_connect() is called. For example, user mode components may + * usb_gadget_activate() is called. For example, user mode components may * need to be activated before the system can talk to hosts. * * Returns zero on success, else negative errno. */ -static inline int usb_gadget_disconnect(struct usb_gadget *gadget) +static inline int usb_gadget_deactivate(struct usb_gadget *gadget) { - if (!gadget->ops->pullup) - return -EOPNOTSUPP; - return gadget->ops->pullup(gadget, 0); + int ret; + + if (gadget->deactivated) + return 0; + + if (gadget->connected) { + ret = usb_gadget_disconnect(gadget); + if (ret) + return ret; + /* + * If gadget was being connected before deactivation, we want + * to reconnect it in usb_gadget_activate(). + */ + gadget->connected = true; + } + gadget->deactivated = true; + + return 0; } +/** + * usb_gadget_activate - activate function which is not ready to work + * @gadget: the peripheral being activated + * + * This routine activates gadget which was previously deactivated with + * usb_gadget_deactivate() call. It calls usb_gadget_connect() if needed. + * + * Returns zero on success, else negative errno. + */ +static inline int usb_gadget_activate(struct usb_gadget *gadget) +{ + if (!gadget->deactivated) + return 0; + + gadget->deactivated = false; + + /* + * If gadget has been connected before deactivation, or became connected + * while it was being deactivated, we call usb_gadget_connect(). + */ + if (gadget->connected) + return usb_gadget_connect(gadget); + + return 0; +} /*-------------------------------------------------------------------------*/ -- cgit v1.2.3-70-g09d2 From d5bb9b81dbfa35d117ecb58022ee6e7e41e4772d Mon Sep 17 00:00:00 2001 From: Robert Baldyga Date: Mon, 4 May 2015 14:55:13 +0200 Subject: usb: composite: add bind_deactivated flag to usb_function This patch introduces 'bind_deactivated' flag in struct usb_function. Functions which don't want to be activated automatically after bind should set this flag, and when they start to be ready to work they should call usb_function_activate(). When USB function sets 'bind_deactivated' flag, initial deactivation counter is incremented automatically, so there is no need to call usb_function_deactivate() in function bind. Signed-off-by: Robert Baldyga Signed-off-by: Felipe Balbi --- drivers/usb/gadget/composite.c | 6 ++++++ include/linux/usb/composite.h | 2 ++ 2 files changed, 8 insertions(+) (limited to 'include/linux') diff --git a/drivers/usb/gadget/composite.c b/drivers/usb/gadget/composite.c index 86d4e8fdf8d3..36c6f47642f8 100644 --- a/drivers/usb/gadget/composite.c +++ b/drivers/usb/gadget/composite.c @@ -209,6 +209,12 @@ int usb_add_function(struct usb_configuration *config, function->config = config; list_add_tail(&function->list, &config->functions); + if (function->bind_deactivated) { + value = usb_function_deactivate(function); + if (value) + goto done; + } + /* REVISIT *require* function->bind? */ if (function->bind) { value = function->bind(config, function); diff --git a/include/linux/usb/composite.h b/include/linux/usb/composite.h index 2511469a9904..1074b8921a5d 100644 --- a/include/linux/usb/composite.h +++ b/include/linux/usb/composite.h @@ -228,6 +228,8 @@ struct usb_function { struct list_head list; DECLARE_BITMAP(endpoints, 32); const struct usb_function_instance *fi; + + unsigned int bind_deactivated:1; }; int usb_add_function(struct usb_configuration *, struct usb_function *); -- cgit v1.2.3-70-g09d2 From 6a88bbe8e30d4beb2320b5a7452242a1fe7889c5 Mon Sep 17 00:00:00 2001 From: Li Jun Date: Thu, 9 Jul 2015 15:18:40 +0800 Subject: usb: otg: add usb_otg_caps structure for otg capabilities This patch adds a structure usb_otg_caps to cover all otg related capabilities of the device, including otg revision, and if hnp/srp/adp is supported. Signed-off-by: Li Jun Reviewed-by: Roger Quadros Signed-off-by: Felipe Balbi --- include/linux/usb/otg.h | 15 +++++++++++++++ 1 file changed, 15 insertions(+) (limited to 'include/linux') diff --git a/include/linux/usb/otg.h b/include/linux/usb/otg.h index 52661c5da690..bd1dcf816100 100644 --- a/include/linux/usb/otg.h +++ b/include/linux/usb/otg.h @@ -41,6 +41,21 @@ struct usb_otg { }; +/** + * struct usb_otg_caps - describes the otg capabilities of the device + * @otg_rev: The OTG revision number the device is compliant with, it's + * in binary-coded decimal (i.e. 2.0 is 0200H). + * @hnp_support: Indicates if the device supports HNP. + * @srp_support: Indicates if the device supports SRP. + * @adp_support: Indicates if the device supports ADP. + */ +struct usb_otg_caps { + u16 otg_rev; + bool hnp_support; + bool srp_support; + bool adp_support; +}; + extern const char *usb_otg_state_string(enum usb_otg_state state); /* Context: can sleep */ -- cgit v1.2.3-70-g09d2 From 84704bb3d183e55d042bf57043552f2649443a64 Mon Sep 17 00:00:00 2001 From: Macpaul Lin Date: Thu, 9 Jul 2015 15:18:41 +0800 Subject: usb: add usb_otg_caps to usb_gadget structure. Add usb_otg_caps pointer to usb_gadget structure to indicate its otg capabilities. Signed-off-by: Macpaul Lin Signed-off-by: Li Jun Reviewed-by: Roger Quadros Signed-off-by: Felipe Balbi --- include/linux/usb/gadget.h | 2 ++ 1 file changed, 2 insertions(+) (limited to 'include/linux') diff --git a/include/linux/usb/gadget.h b/include/linux/usb/gadget.h index 15604bb3e524..fffceafb6b8c 100644 --- a/include/linux/usb/gadget.h +++ b/include/linux/usb/gadget.h @@ -511,6 +511,7 @@ struct usb_gadget_ops { * @dev: Driver model state for this abstract device. * @out_epnum: last used out ep number * @in_epnum: last used in ep number + * @otg_caps: OTG capabilities of this gadget. * @sg_supported: true if we can handle scatter-gather * @is_otg: True if the USB device port uses a Mini-AB jack, so that the * gadget driver must provide a USB OTG descriptor. @@ -562,6 +563,7 @@ struct usb_gadget { struct device dev; unsigned out_epnum; unsigned in_epnum; + struct usb_otg_caps *otg_caps; unsigned sg_supported:1; unsigned is_otg:1; -- cgit v1.2.3-70-g09d2 From 929412d94f2b75fe2a662afa2977bfb6a233c1c3 Mon Sep 17 00:00:00 2001 From: Li Jun Date: Thu, 9 Jul 2015 15:18:44 +0800 Subject: usb: common: add API to update usb otg capabilities by device tree Check property of usb hardware to update otg version and disable SRP, HNP and ADP if its disable flag is present. Reviewed-by: Roger Quadros Signed-off-by: Li Jun Signed-off-by: Felipe Balbi --- drivers/usb/common/common.c | 56 +++++++++++++++++++++++++++++++++++++++++++++ include/linux/usb/of.h | 7 ++++++ 2 files changed, 63 insertions(+) (limited to 'include/linux') diff --git a/drivers/usb/common/common.c b/drivers/usb/common/common.c index b530fd403ffb..9e39286a4e5a 100644 --- a/drivers/usb/common/common.c +++ b/drivers/usb/common/common.c @@ -154,6 +154,62 @@ bool of_usb_host_tpl_support(struct device_node *np) return false; } EXPORT_SYMBOL_GPL(of_usb_host_tpl_support); + +/** + * of_usb_update_otg_caps - to update usb otg capabilities according to + * the passed properties in DT. + * @np: Pointer to the given device_node + * @otg_caps: Pointer to the target usb_otg_caps to be set + * + * The function updates the otg capabilities + */ +int of_usb_update_otg_caps(struct device_node *np, + struct usb_otg_caps *otg_caps) +{ + u32 otg_rev; + + if (!otg_caps) + return -EINVAL; + + if (!of_property_read_u32(np, "otg-rev", &otg_rev)) { + switch (otg_rev) { + case 0x0100: + case 0x0120: + case 0x0130: + case 0x0200: + /* Choose the lesser one if it's already been set */ + if (otg_caps->otg_rev) + otg_caps->otg_rev = min_t(u16, otg_rev, + otg_caps->otg_rev); + else + otg_caps->otg_rev = otg_rev; + break; + default: + pr_err("%s: unsupported otg-rev: 0x%x\n", + np->full_name, otg_rev); + return -EINVAL; + } + } else { + /* + * otg-rev is mandatory for otg properties, if not passed + * we set it to be 0 and assume it's a legacy otg device. + * Non-dt platform can set it afterwards. + */ + otg_caps->otg_rev = 0; + } + + if (of_find_property(np, "hnp-disable", NULL)) + otg_caps->hnp_support = false; + if (of_find_property(np, "srp-disable", NULL)) + otg_caps->srp_support = false; + if (of_find_property(np, "adp-disable", NULL) || + (otg_caps->otg_rev < 0x0200)) + otg_caps->adp_support = false; + + return 0; +} +EXPORT_SYMBOL_GPL(of_usb_update_otg_caps); + #endif MODULE_LICENSE("GPL"); diff --git a/include/linux/usb/of.h b/include/linux/usb/of.h index cfe0528cdbb1..8c5a818ec244 100644 --- a/include/linux/usb/of.h +++ b/include/linux/usb/of.h @@ -15,6 +15,8 @@ enum usb_dr_mode of_usb_get_dr_mode(struct device_node *np); enum usb_device_speed of_usb_get_maximum_speed(struct device_node *np); bool of_usb_host_tpl_support(struct device_node *np); +int of_usb_update_otg_caps(struct device_node *np, + struct usb_otg_caps *otg_caps); #else static inline enum usb_dr_mode of_usb_get_dr_mode(struct device_node *np) { @@ -30,6 +32,11 @@ static inline bool of_usb_host_tpl_support(struct device_node *np) { return false; } +static inline int of_usb_update_otg_caps(struct device_node *np, + struct usb_otg_caps *otg_caps) +{ + return 0; +} #endif #if IS_ENABLED(CONFIG_OF) && IS_ENABLED(CONFIG_USB_SUPPORT) -- cgit v1.2.3-70-g09d2 From 79742351c89b76ebcf82b73103aed50f98ac2ee4 Mon Sep 17 00:00:00 2001 From: Li Jun Date: Thu, 9 Jul 2015 15:18:45 +0800 Subject: usb: chipidea: set usb otg capabilities Init and update otg capabilities by DT, set gadget's otg capabilities accordingly. Acked-by: Peter Chen Reviewed-by: Roger Quadros Signed-off-by: Li Jun Signed-off-by: Felipe Balbi --- drivers/usb/chipidea/core.c | 15 +++++++++++++++ drivers/usb/chipidea/udc.c | 7 ++++++- include/linux/usb/chipidea.h | 1 + 3 files changed, 22 insertions(+), 1 deletion(-) (limited to 'include/linux') diff --git a/drivers/usb/chipidea/core.c b/drivers/usb/chipidea/core.c index 74fea4fa41b1..1e6d5f0c18f2 100644 --- a/drivers/usb/chipidea/core.c +++ b/drivers/usb/chipidea/core.c @@ -560,6 +560,8 @@ static irqreturn_t ci_irq(int irq, void *data) static int ci_get_platdata(struct device *dev, struct ci_hdrc_platform_data *platdata) { + int ret; + if (!platdata->phy_mode) platdata->phy_mode = of_usb_get_phy_mode(dev->of_node); @@ -588,6 +590,19 @@ static int ci_get_platdata(struct device *dev, of_usb_host_tpl_support(dev->of_node); } + if (platdata->dr_mode == USB_DR_MODE_OTG) { + /* We can support HNP and SRP of OTG 2.0 */ + platdata->ci_otg_caps.otg_rev = 0x0200; + platdata->ci_otg_caps.hnp_support = true; + platdata->ci_otg_caps.srp_support = true; + + /* Update otg capabilities by DT properties */ + ret = of_usb_update_otg_caps(dev->of_node, + &platdata->ci_otg_caps); + if (ret) + return ret; + } + if (of_usb_get_maximum_speed(dev->of_node) == USB_SPEED_FULL) platdata->flags |= CI_HDRC_FORCE_FULLSPEED; diff --git a/drivers/usb/chipidea/udc.c b/drivers/usb/chipidea/udc.c index 764f668d45a9..b7cca3e597bf 100644 --- a/drivers/usb/chipidea/udc.c +++ b/drivers/usb/chipidea/udc.c @@ -1827,6 +1827,7 @@ static irqreturn_t udc_irq(struct ci_hdrc *ci) static int udc_start(struct ci_hdrc *ci) { struct device *dev = ci->dev; + struct usb_otg_caps *otg_caps = &ci->platdata->ci_otg_caps; int retval = 0; spin_lock_init(&ci->lock); @@ -1834,8 +1835,12 @@ static int udc_start(struct ci_hdrc *ci) ci->gadget.ops = &usb_gadget_ops; ci->gadget.speed = USB_SPEED_UNKNOWN; ci->gadget.max_speed = USB_SPEED_HIGH; - ci->gadget.is_otg = ci->is_otg ? 1 : 0; ci->gadget.name = ci->platdata->name; + ci->gadget.otg_caps = otg_caps; + + if (otg_caps->hnp_support || otg_caps->srp_support || + otg_caps->adp_support) + ci->gadget.is_otg = 1; INIT_LIST_HEAD(&ci->gadget.ep_list); diff --git a/include/linux/usb/chipidea.h b/include/linux/usb/chipidea.h index ab94f78c4dd1..e10cefc721ad 100644 --- a/include/linux/usb/chipidea.h +++ b/include/linux/usb/chipidea.h @@ -34,6 +34,7 @@ struct ci_hdrc_platform_data { #define CI_HDRC_CONTROLLER_STOPPED_EVENT 1 void (*notify_event) (struct ci_hdrc *ci, unsigned event); struct regulator *reg_vbus; + struct usb_otg_caps ci_otg_caps; bool tpl_support; }; -- cgit v1.2.3-70-g09d2 From d1606dfb98e59221332704c05f5908d9116456ab Mon Sep 17 00:00:00 2001 From: Li Jun Date: Thu, 9 Jul 2015 15:18:47 +0800 Subject: usb: gadget: add usb otg descriptor allocate and init interface Allocate usb otg descriptor and initialize it according to gadget's otg capabilities, if usb_otg_caps is not set, keep settings as current gadget drivers. With this 2 new interfaces, gadget can use usb_otg_descriptor for OTG 1.x, and usb_otg20_descriptor for OTG 2.0 or above, and otg features can be decided by the combination of usb hardware property and driver config. Signed-off-by: Li Jun Reviewed-by: Roger Quadros Signed-off-by: Felipe Balbi --- drivers/usb/gadget/config.c | 56 +++++++++++++++++++++++++++++++++++++++++++++ include/linux/usb/gadget.h | 4 ++++ 2 files changed, 60 insertions(+) (limited to 'include/linux') diff --git a/drivers/usb/gadget/config.c b/drivers/usb/gadget/config.c index 34e12fc52c23..0fafa7a1b6f6 100644 --- a/drivers/usb/gadget/config.c +++ b/drivers/usb/gadget/config.c @@ -20,6 +20,7 @@ #include #include #include +#include /** * usb_descriptor_fillbuf - fill buffer with descriptors @@ -195,3 +196,58 @@ void usb_free_all_descriptors(struct usb_function *f) usb_free_descriptors(f->ss_descriptors); } EXPORT_SYMBOL_GPL(usb_free_all_descriptors); + +struct usb_descriptor_header *usb_otg_descriptor_alloc( + struct usb_gadget *gadget) +{ + struct usb_descriptor_header *otg_desc; + unsigned length = 0; + + if (gadget->otg_caps && (gadget->otg_caps->otg_rev >= 0x0200)) + length = sizeof(struct usb_otg20_descriptor); + else + length = sizeof(struct usb_otg_descriptor); + + otg_desc = kzalloc(length, GFP_KERNEL); + return otg_desc; +} +EXPORT_SYMBOL_GPL(usb_otg_descriptor_alloc); + +int usb_otg_descriptor_init(struct usb_gadget *gadget, + struct usb_descriptor_header *otg_desc) +{ + struct usb_otg_descriptor *otg1x_desc; + struct usb_otg20_descriptor *otg20_desc; + struct usb_otg_caps *otg_caps = gadget->otg_caps; + u8 otg_attributes = 0; + + if (!otg_desc) + return -EINVAL; + + if (otg_caps && otg_caps->otg_rev) { + if (otg_caps->hnp_support) + otg_attributes |= USB_OTG_HNP; + if (otg_caps->srp_support) + otg_attributes |= USB_OTG_SRP; + if (otg_caps->adp_support && (otg_caps->otg_rev >= 0x0200)) + otg_attributes |= USB_OTG_ADP; + } else { + otg_attributes = USB_OTG_SRP | USB_OTG_HNP; + } + + if (otg_caps && (otg_caps->otg_rev >= 0x0200)) { + otg20_desc = (struct usb_otg20_descriptor *)otg_desc; + otg20_desc->bLength = sizeof(struct usb_otg20_descriptor); + otg20_desc->bDescriptorType = USB_DT_OTG; + otg20_desc->bmAttributes = otg_attributes; + otg20_desc->bcdOTG = cpu_to_le16(otg_caps->otg_rev); + } else { + otg1x_desc = (struct usb_otg_descriptor *)otg_desc; + otg1x_desc->bLength = sizeof(struct usb_otg_descriptor); + otg1x_desc->bDescriptorType = USB_DT_OTG; + otg1x_desc->bmAttributes = otg_attributes; + } + + return 0; +} +EXPORT_SYMBOL_GPL(usb_otg_descriptor_init); diff --git a/include/linux/usb/gadget.h b/include/linux/usb/gadget.h index fffceafb6b8c..cea0511a1bc9 100644 --- a/include/linux/usb/gadget.h +++ b/include/linux/usb/gadget.h @@ -1092,6 +1092,10 @@ int usb_assign_descriptors(struct usb_function *f, struct usb_descriptor_header **ss); void usb_free_all_descriptors(struct usb_function *f); +struct usb_descriptor_header *usb_otg_descriptor_alloc( + struct usb_gadget *gadget); +int usb_otg_descriptor_init(struct usb_gadget *gadget, + struct usb_descriptor_header *otg_desc); /*-------------------------------------------------------------------------*/ /* utility to simplify map/unmap of usb_requests to/from DMA */ -- cgit v1.2.3-70-g09d2 From f2ab3298fb4932358d27fc4c7ea1a1891ad7e042 Mon Sep 17 00:00:00 2001 From: Bjorn Andersson Date: Mon, 27 Jul 2015 20:20:30 -0700 Subject: soc: qcom: Add Shared Memory Driver This adds the Qualcomm Shared Memory Driver (SMD) providing communication channels to remote processors, ontop of SMEM. Signed-off-by: Bjorn Andersson Signed-off-by: Andy Gross --- drivers/soc/qcom/Kconfig | 8 + drivers/soc/qcom/Makefile | 1 + drivers/soc/qcom/smd.c | 1319 ++++++++++++++++++++++++++++++++++++++++++ include/linux/soc/qcom/smd.h | 46 ++ 4 files changed, 1374 insertions(+) create mode 100644 drivers/soc/qcom/smd.c create mode 100644 include/linux/soc/qcom/smd.h (limited to 'include/linux') diff --git a/drivers/soc/qcom/Kconfig b/drivers/soc/qcom/Kconfig index 0d4faaf32662..188295e2c9ba 100644 --- a/drivers/soc/qcom/Kconfig +++ b/drivers/soc/qcom/Kconfig @@ -19,6 +19,14 @@ config QCOM_PM modes. It interface with various system drivers to put the cores in low power modes. +config QCOM_SMD + tristate "Qualcomm Shared Memory Driver (SMD)" + depends on QCOM_SMEM + help + Say y here to enable support for the Qualcomm Shared Memory Driver + providing communication channels to remote processors in Qualcomm + platforms. + config QCOM_SMEM tristate "Qualcomm Shared Memory Manager (SMEM)" depends on ARCH_QCOM diff --git a/drivers/soc/qcom/Makefile b/drivers/soc/qcom/Makefile index 3a033c43c0ef..f961a8796ed2 100644 --- a/drivers/soc/qcom/Makefile +++ b/drivers/soc/qcom/Makefile @@ -1,3 +1,4 @@ obj-$(CONFIG_QCOM_GSBI) += qcom_gsbi.o obj-$(CONFIG_QCOM_PM) += spm.o +obj-$(CONFIG_QCOM_SMD) += smd.o obj-$(CONFIG_QCOM_SMEM) += smem.o diff --git a/drivers/soc/qcom/smd.c b/drivers/soc/qcom/smd.c new file mode 100644 index 000000000000..327adcf117c1 --- /dev/null +++ b/drivers/soc/qcom/smd.c @@ -0,0 +1,1319 @@ +/* + * Copyright (c) 2015, Sony Mobile Communications AB. + * Copyright (c) 2012-2013, The Linux Foundation. All rights reserved. + * + * This program is free software; you can redistribute it and/or modify + * it under the terms of the GNU General Public License version 2 and + * only version 2 as published by the Free Software Foundation. + * + * This program is distributed in the hope that it will be useful, + * but WITHOUT ANY WARRANTY; without even the implied warranty of + * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the + * GNU General Public License for more details. + */ + +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include + +/* + * The Qualcomm Shared Memory communication solution provides point-to-point + * channels for clients to send and receive streaming or packet based data. + * + * Each channel consists of a control item (channel info) and a ring buffer + * pair. The channel info carry information related to channel state, flow + * control and the offsets within the ring buffer. + * + * All allocated channels are listed in an allocation table, identifying the + * pair of items by name, type and remote processor. + * + * Upon creating a new channel the remote processor allocates channel info and + * ring buffer items from the smem heap and populate the allocation table. An + * interrupt is sent to the other end of the channel and a scan for new + * channels should be done. A channel never goes away, it will only change + * state. + * + * The remote processor signals it intent for bring up the communication + * channel by setting the state of its end of the channel to "opening" and + * sends out an interrupt. We detect this change and register a smd device to + * consume the channel. Upon finding a consumer we finish the handshake and the + * channel is up. + * + * Upon closing a channel, the remote processor will update the state of its + * end of the channel and signal us, we will then unregister any attached + * device and close our end of the channel. + * + * Devices attached to a channel can use the qcom_smd_send function to push + * data to the channel, this is done by copying the data into the tx ring + * buffer, updating the pointers in the channel info and signaling the remote + * processor. + * + * The remote processor does the equivalent when it transfer data and upon + * receiving the interrupt we check the channel info for new data and delivers + * this to the attached device. If the device is not ready to receive the data + * we leave it in the ring buffer for now. + */ + +struct smd_channel_info; +struct smd_channel_info_word; + +#define SMD_ALLOC_TBL_COUNT 2 +#define SMD_ALLOC_TBL_SIZE 64 + +/* + * This lists the various smem heap items relevant for the allocation table and + * smd channel entries. + */ +static const struct { + unsigned alloc_tbl_id; + unsigned info_base_id; + unsigned fifo_base_id; +} smem_items[SMD_ALLOC_TBL_COUNT] = { + { + .alloc_tbl_id = 13, + .info_base_id = 14, + .fifo_base_id = 338 + }, + { + .alloc_tbl_id = 14, + .info_base_id = 266, + .fifo_base_id = 202, + }, +}; + +/** + * struct qcom_smd_edge - representing a remote processor + * @smd: handle to qcom_smd + * @of_node: of_node handle for information related to this edge + * @edge_id: identifier of this edge + * @irq: interrupt for signals on this edge + * @ipc_regmap: regmap handle holding the outgoing ipc register + * @ipc_offset: offset within @ipc_regmap of the register for ipc + * @ipc_bit: bit in the register at @ipc_offset of @ipc_regmap + * @channels: list of all channels detected on this edge + * @channels_lock: guard for modifications of @channels + * @allocated: array of bitmaps representing already allocated channels + * @need_rescan: flag that the @work needs to scan smem for new channels + * @smem_available: last available amount of smem triggering a channel scan + * @work: work item for edge house keeping + */ +struct qcom_smd_edge { + struct qcom_smd *smd; + struct device_node *of_node; + unsigned edge_id; + + int irq; + + struct regmap *ipc_regmap; + int ipc_offset; + int ipc_bit; + + struct list_head channels; + spinlock_t channels_lock; + + DECLARE_BITMAP(allocated[SMD_ALLOC_TBL_COUNT], SMD_ALLOC_TBL_SIZE); + + bool need_rescan; + unsigned smem_available; + + struct work_struct work; +}; + +/* + * SMD channel states. + */ +enum smd_channel_state { + SMD_CHANNEL_CLOSED, + SMD_CHANNEL_OPENING, + SMD_CHANNEL_OPENED, + SMD_CHANNEL_FLUSHING, + SMD_CHANNEL_CLOSING, + SMD_CHANNEL_RESET, + SMD_CHANNEL_RESET_OPENING +}; + +/** + * struct qcom_smd_channel - smd channel struct + * @edge: qcom_smd_edge this channel is living on + * @qsdev: reference to a associated smd client device + * @name: name of the channel + * @state: local state of the channel + * @remote_state: remote state of the channel + * @tx_info: byte aligned outgoing channel info + * @rx_info: byte aligned incoming channel info + * @tx_info_word: word aligned outgoing channel info + * @rx_info_word: word aligned incoming channel info + * @tx_lock: lock to make writes to the channel mutually exclusive + * @fblockread_event: wakeup event tied to tx fBLOCKREADINTR + * @tx_fifo: pointer to the outgoing ring buffer + * @rx_fifo: pointer to the incoming ring buffer + * @fifo_size: size of each ring buffer + * @bounce_buffer: bounce buffer for reading wrapped packets + * @cb: callback function registered for this channel + * @recv_lock: guard for rx info modifications and cb pointer + * @pkt_size: size of the currently handled packet + * @list: lite entry for @channels in qcom_smd_edge + */ +struct qcom_smd_channel { + struct qcom_smd_edge *edge; + + struct qcom_smd_device *qsdev; + + char *name; + enum smd_channel_state state; + enum smd_channel_state remote_state; + + struct smd_channel_info *tx_info; + struct smd_channel_info *rx_info; + + struct smd_channel_info_word *tx_info_word; + struct smd_channel_info_word *rx_info_word; + + struct mutex tx_lock; + wait_queue_head_t fblockread_event; + + void *tx_fifo; + void *rx_fifo; + int fifo_size; + + void *bounce_buffer; + int (*cb)(struct qcom_smd_device *, const void *, size_t); + + spinlock_t recv_lock; + + int pkt_size; + + struct list_head list; +}; + +/** + * struct qcom_smd - smd struct + * @dev: device struct + * @num_edges: number of entries in @edges + * @edges: array of edges to be handled + */ +struct qcom_smd { + struct device *dev; + + unsigned num_edges; + struct qcom_smd_edge edges[0]; +}; + +/* + * Format of the smd_info smem items, for byte aligned channels. + */ +struct smd_channel_info { + u32 state; + u8 fDSR; + u8 fCTS; + u8 fCD; + u8 fRI; + u8 fHEAD; + u8 fTAIL; + u8 fSTATE; + u8 fBLOCKREADINTR; + u32 tail; + u32 head; +}; + +/* + * Format of the smd_info smem items, for word aligned channels. + */ +struct smd_channel_info_word { + u32 state; + u32 fDSR; + u32 fCTS; + u32 fCD; + u32 fRI; + u32 fHEAD; + u32 fTAIL; + u32 fSTATE; + u32 fBLOCKREADINTR; + u32 tail; + u32 head; +}; + +#define GET_RX_CHANNEL_INFO(channel, param) \ + (channel->rx_info_word ? \ + channel->rx_info_word->param : \ + channel->rx_info->param) + +#define SET_RX_CHANNEL_INFO(channel, param, value) \ + (channel->rx_info_word ? \ + (channel->rx_info_word->param = value) : \ + (channel->rx_info->param = value)) + +#define GET_TX_CHANNEL_INFO(channel, param) \ + (channel->tx_info_word ? \ + channel->tx_info_word->param : \ + channel->tx_info->param) + +#define SET_TX_CHANNEL_INFO(channel, param, value) \ + (channel->tx_info_word ? \ + (channel->tx_info_word->param = value) : \ + (channel->tx_info->param = value)) + +/** + * struct qcom_smd_alloc_entry - channel allocation entry + * @name: channel name + * @cid: channel index + * @flags: channel flags and edge id + * @ref_count: reference count of the channel + */ +struct qcom_smd_alloc_entry { + u8 name[20]; + u32 cid; + u32 flags; + u32 ref_count; +} __packed; + +#define SMD_CHANNEL_FLAGS_EDGE_MASK 0xff +#define SMD_CHANNEL_FLAGS_STREAM BIT(8) +#define SMD_CHANNEL_FLAGS_PACKET BIT(9) + +/* + * Each smd packet contains a 20 byte header, with the first 4 being the length + * of the packet. + */ +#define SMD_PACKET_HEADER_LEN 20 + +/* + * Signal the remote processor associated with 'channel'. + */ +static void qcom_smd_signal_channel(struct qcom_smd_channel *channel) +{ + struct qcom_smd_edge *edge = channel->edge; + + regmap_write(edge->ipc_regmap, edge->ipc_offset, BIT(edge->ipc_bit)); +} + +/* + * Initialize the tx channel info + */ +static void qcom_smd_channel_reset(struct qcom_smd_channel *channel) +{ + SET_TX_CHANNEL_INFO(channel, state, SMD_CHANNEL_CLOSED); + SET_TX_CHANNEL_INFO(channel, fDSR, 0); + SET_TX_CHANNEL_INFO(channel, fCTS, 0); + SET_TX_CHANNEL_INFO(channel, fCD, 0); + SET_TX_CHANNEL_INFO(channel, fRI, 0); + SET_TX_CHANNEL_INFO(channel, fHEAD, 0); + SET_TX_CHANNEL_INFO(channel, fTAIL, 0); + SET_TX_CHANNEL_INFO(channel, fSTATE, 1); + SET_TX_CHANNEL_INFO(channel, fBLOCKREADINTR, 0); + SET_TX_CHANNEL_INFO(channel, head, 0); + SET_TX_CHANNEL_INFO(channel, tail, 0); + + qcom_smd_signal_channel(channel); + + channel->state = SMD_CHANNEL_CLOSED; + channel->pkt_size = 0; +} + +/* + * Calculate the amount of data available in the rx fifo + */ +static size_t qcom_smd_channel_get_rx_avail(struct qcom_smd_channel *channel) +{ + unsigned head; + unsigned tail; + + head = GET_RX_CHANNEL_INFO(channel, head); + tail = GET_RX_CHANNEL_INFO(channel, tail); + + return (head - tail) & (channel->fifo_size - 1); +} + +/* + * Set tx channel state and inform the remote processor + */ +static void qcom_smd_channel_set_state(struct qcom_smd_channel *channel, + int state) +{ + struct qcom_smd_edge *edge = channel->edge; + bool is_open = state == SMD_CHANNEL_OPENED; + + if (channel->state == state) + return; + + dev_dbg(edge->smd->dev, "set_state(%s, %d)\n", channel->name, state); + + SET_TX_CHANNEL_INFO(channel, fDSR, is_open); + SET_TX_CHANNEL_INFO(channel, fCTS, is_open); + SET_TX_CHANNEL_INFO(channel, fCD, is_open); + + SET_TX_CHANNEL_INFO(channel, state, state); + SET_TX_CHANNEL_INFO(channel, fSTATE, 1); + + channel->state = state; + qcom_smd_signal_channel(channel); +} + +/* + * Copy count bytes of data using 32bit accesses, if that's required. + */ +static void smd_copy_to_fifo(void __iomem *_dst, + const void *_src, + size_t count, + bool word_aligned) +{ + u32 *dst = (u32 *)_dst; + u32 *src = (u32 *)_src; + + if (word_aligned) { + count /= sizeof(u32); + while (count--) + writel_relaxed(*src++, dst++); + } else { + memcpy_toio(_dst, _src, count); + } +} + +/* + * Copy count bytes of data using 32bit accesses, if that is required. + */ +static void smd_copy_from_fifo(void *_dst, + const void __iomem *_src, + size_t count, + bool word_aligned) +{ + u32 *dst = (u32 *)_dst; + u32 *src = (u32 *)_src; + + if (word_aligned) { + count /= sizeof(u32); + while (count--) + *dst++ = readl_relaxed(src++); + } else { + memcpy_fromio(_dst, _src, count); + } +} + +/* + * Read count bytes of data from the rx fifo into buf, but don't advance the + * tail. + */ +static size_t qcom_smd_channel_peek(struct qcom_smd_channel *channel, + void *buf, size_t count) +{ + bool word_aligned; + unsigned tail; + size_t len; + + word_aligned = channel->rx_info_word != NULL; + tail = GET_RX_CHANNEL_INFO(channel, tail); + + len = min_t(size_t, count, channel->fifo_size - tail); + if (len) { + smd_copy_from_fifo(buf, + channel->rx_fifo + tail, + len, + word_aligned); + } + + if (len != count) { + smd_copy_from_fifo(buf + len, + channel->rx_fifo, + count - len, + word_aligned); + } + + return count; +} + +/* + * Advance the rx tail by count bytes. + */ +static void qcom_smd_channel_advance(struct qcom_smd_channel *channel, + size_t count) +{ + unsigned tail; + + tail = GET_RX_CHANNEL_INFO(channel, tail); + tail += count; + tail &= (channel->fifo_size - 1); + SET_RX_CHANNEL_INFO(channel, tail, tail); +} + +/* + * Read out a single packet from the rx fifo and deliver it to the device + */ +static int qcom_smd_channel_recv_single(struct qcom_smd_channel *channel) +{ + struct qcom_smd_device *qsdev = channel->qsdev; + unsigned tail; + size_t len; + void *ptr; + int ret; + + if (!channel->cb) + return 0; + + tail = GET_RX_CHANNEL_INFO(channel, tail); + + /* Use bounce buffer if the data wraps */ + if (tail + channel->pkt_size >= channel->fifo_size) { + ptr = channel->bounce_buffer; + len = qcom_smd_channel_peek(channel, ptr, channel->pkt_size); + } else { + ptr = channel->rx_fifo + tail; + len = channel->pkt_size; + } + + ret = channel->cb(qsdev, ptr, len); + if (ret < 0) + return ret; + + /* Only forward the tail if the client consumed the data */ + qcom_smd_channel_advance(channel, len); + + channel->pkt_size = 0; + + return 0; +} + +/* + * Per channel interrupt handling + */ +static bool qcom_smd_channel_intr(struct qcom_smd_channel *channel) +{ + bool need_state_scan = false; + int remote_state; + u32 pktlen; + int avail; + int ret; + + /* Handle state changes */ + remote_state = GET_RX_CHANNEL_INFO(channel, state); + if (remote_state != channel->remote_state) { + channel->remote_state = remote_state; + need_state_scan = true; + } + /* Indicate that we have seen any state change */ + SET_RX_CHANNEL_INFO(channel, fSTATE, 0); + + /* Signal waiting qcom_smd_send() about the interrupt */ + if (!GET_TX_CHANNEL_INFO(channel, fBLOCKREADINTR)) + wake_up_interruptible(&channel->fblockread_event); + + /* Don't consume any data until we've opened the channel */ + if (channel->state != SMD_CHANNEL_OPENED) + goto out; + + /* Indicate that we've seen the new data */ + SET_RX_CHANNEL_INFO(channel, fHEAD, 0); + + /* Consume data */ + for (;;) { + avail = qcom_smd_channel_get_rx_avail(channel); + + if (!channel->pkt_size && avail >= SMD_PACKET_HEADER_LEN) { + qcom_smd_channel_peek(channel, &pktlen, sizeof(pktlen)); + qcom_smd_channel_advance(channel, SMD_PACKET_HEADER_LEN); + channel->pkt_size = pktlen; + } else if (channel->pkt_size && avail >= channel->pkt_size) { + ret = qcom_smd_channel_recv_single(channel); + if (ret) + break; + } else { + break; + } + } + + /* Indicate that we have seen and updated tail */ + SET_RX_CHANNEL_INFO(channel, fTAIL, 1); + + /* Signal the remote that we've consumed the data (if requested) */ + if (!GET_RX_CHANNEL_INFO(channel, fBLOCKREADINTR)) { + /* Ensure ordering of channel info updates */ + wmb(); + + qcom_smd_signal_channel(channel); + } + +out: + return need_state_scan; +} + +/* + * The edge interrupts are triggered by the remote processor on state changes, + * channel info updates or when new channels are created. + */ +static irqreturn_t qcom_smd_edge_intr(int irq, void *data) +{ + struct qcom_smd_edge *edge = data; + struct qcom_smd_channel *channel; + unsigned available; + bool kick_worker = false; + + /* + * Handle state changes or data on each of the channels on this edge + */ + spin_lock(&edge->channels_lock); + list_for_each_entry(channel, &edge->channels, list) { + spin_lock(&channel->recv_lock); + kick_worker |= qcom_smd_channel_intr(channel); + spin_unlock(&channel->recv_lock); + } + spin_unlock(&edge->channels_lock); + + /* + * Creating a new channel requires allocating an smem entry, so we only + * have to scan if the amount of available space in smem have changed + * since last scan. + */ + available = qcom_smem_get_free_space(edge->edge_id); + if (available != edge->smem_available) { + edge->smem_available = available; + edge->need_rescan = true; + kick_worker = true; + } + + if (kick_worker) + schedule_work(&edge->work); + + return IRQ_HANDLED; +} + +/* + * Delivers any outstanding packets in the rx fifo, can be used after probe of + * the clients to deliver any packets that wasn't delivered before the client + * was setup. + */ +static void qcom_smd_channel_resume(struct qcom_smd_channel *channel) +{ + unsigned long flags; + + spin_lock_irqsave(&channel->recv_lock, flags); + qcom_smd_channel_intr(channel); + spin_unlock_irqrestore(&channel->recv_lock, flags); +} + +/* + * Calculate how much space is available in the tx fifo. + */ +static size_t qcom_smd_get_tx_avail(struct qcom_smd_channel *channel) +{ + unsigned head; + unsigned tail; + unsigned mask = channel->fifo_size - 1; + + head = GET_TX_CHANNEL_INFO(channel, head); + tail = GET_TX_CHANNEL_INFO(channel, tail); + + return mask - ((head - tail) & mask); +} + +/* + * Write count bytes of data into channel, possibly wrapping in the ring buffer + */ +static int qcom_smd_write_fifo(struct qcom_smd_channel *channel, + const void *data, + size_t count) +{ + bool word_aligned; + unsigned head; + size_t len; + + word_aligned = channel->tx_info_word != NULL; + head = GET_TX_CHANNEL_INFO(channel, head); + + len = min_t(size_t, count, channel->fifo_size - head); + if (len) { + smd_copy_to_fifo(channel->tx_fifo + head, + data, + len, + word_aligned); + } + + if (len != count) { + smd_copy_to_fifo(channel->tx_fifo, + data + len, + count - len, + word_aligned); + } + + head += count; + head &= (channel->fifo_size - 1); + SET_TX_CHANNEL_INFO(channel, head, head); + + return count; +} + +/** + * qcom_smd_send - write data to smd channel + * @channel: channel handle + * @data: buffer of data to write + * @len: number of bytes to write + * + * This is a blocking write of len bytes into the channel's tx ring buffer and + * signal the remote end. It will sleep until there is enough space available + * in the tx buffer, utilizing the fBLOCKREADINTR signaling mechanism to avoid + * polling. + */ +int qcom_smd_send(struct qcom_smd_channel *channel, const void *data, int len) +{ + u32 hdr[5] = {len,}; + int tlen = sizeof(hdr) + len; + int ret; + + /* Word aligned channels only accept word size aligned data */ + if (channel->rx_info_word != NULL && len % 4) + return -EINVAL; + + ret = mutex_lock_interruptible(&channel->tx_lock); + if (ret) + return ret; + + while (qcom_smd_get_tx_avail(channel) < tlen) { + if (channel->state != SMD_CHANNEL_OPENED) { + ret = -EPIPE; + goto out; + } + + SET_TX_CHANNEL_INFO(channel, fBLOCKREADINTR, 1); + + ret = wait_event_interruptible(channel->fblockread_event, + qcom_smd_get_tx_avail(channel) >= tlen || + channel->state != SMD_CHANNEL_OPENED); + if (ret) + goto out; + + SET_TX_CHANNEL_INFO(channel, fBLOCKREADINTR, 0); + } + + SET_TX_CHANNEL_INFO(channel, fTAIL, 0); + + qcom_smd_write_fifo(channel, hdr, sizeof(hdr)); + qcom_smd_write_fifo(channel, data, len); + + SET_TX_CHANNEL_INFO(channel, fHEAD, 1); + + /* Ensure ordering of channel info updates */ + wmb(); + + qcom_smd_signal_channel(channel); + +out: + mutex_unlock(&channel->tx_lock); + + return ret; +} +EXPORT_SYMBOL(qcom_smd_send); + +static struct qcom_smd_device *to_smd_device(struct device *dev) +{ + return container_of(dev, struct qcom_smd_device, dev); +} + +static struct qcom_smd_driver *to_smd_driver(struct device *dev) +{ + struct qcom_smd_device *qsdev = to_smd_device(dev); + + return container_of(qsdev->dev.driver, struct qcom_smd_driver, driver); +} + +static int qcom_smd_dev_match(struct device *dev, struct device_driver *drv) +{ + return of_driver_match_device(dev, drv); +} + +/* + * Probe the smd client. + * + * The remote side have indicated that it want the channel to be opened, so + * complete the state handshake and probe our client driver. + */ +static int qcom_smd_dev_probe(struct device *dev) +{ + struct qcom_smd_device *qsdev = to_smd_device(dev); + struct qcom_smd_driver *qsdrv = to_smd_driver(dev); + struct qcom_smd_channel *channel = qsdev->channel; + size_t bb_size; + int ret; + + /* + * Packets are maximum 4k, but reduce if the fifo is smaller + */ + bb_size = min(channel->fifo_size, SZ_4K); + channel->bounce_buffer = kmalloc(bb_size, GFP_KERNEL); + if (!channel->bounce_buffer) + return -ENOMEM; + + channel->cb = qsdrv->callback; + + qcom_smd_channel_set_state(channel, SMD_CHANNEL_OPENING); + + qcom_smd_channel_set_state(channel, SMD_CHANNEL_OPENED); + + ret = qsdrv->probe(qsdev); + if (ret) + goto err; + + qcom_smd_channel_resume(channel); + + return 0; + +err: + dev_err(&qsdev->dev, "probe failed\n"); + + channel->cb = NULL; + kfree(channel->bounce_buffer); + channel->bounce_buffer = NULL; + + qcom_smd_channel_set_state(channel, SMD_CHANNEL_CLOSED); + return ret; +} + +/* + * Remove the smd client. + * + * The channel is going away, for some reason, so remove the smd client and + * reset the channel state. + */ +static int qcom_smd_dev_remove(struct device *dev) +{ + struct qcom_smd_device *qsdev = to_smd_device(dev); + struct qcom_smd_driver *qsdrv = to_smd_driver(dev); + struct qcom_smd_channel *channel = qsdev->channel; + unsigned long flags; + + qcom_smd_channel_set_state(channel, SMD_CHANNEL_CLOSING); + + /* + * Make sure we don't race with the code receiving data. + */ + spin_lock_irqsave(&channel->recv_lock, flags); + channel->cb = NULL; + spin_unlock_irqrestore(&channel->recv_lock, flags); + + /* Wake up any sleepers in qcom_smd_send() */ + wake_up_interruptible(&channel->fblockread_event); + + /* + * We expect that the client might block in remove() waiting for any + * outstanding calls to qcom_smd_send() to wake up and finish. + */ + if (qsdrv->remove) + qsdrv->remove(qsdev); + + /* + * The client is now gone, cleanup and reset the channel state. + */ + channel->qsdev = NULL; + kfree(channel->bounce_buffer); + channel->bounce_buffer = NULL; + + qcom_smd_channel_set_state(channel, SMD_CHANNEL_CLOSED); + + qcom_smd_channel_reset(channel); + + return 0; +} + +static struct bus_type qcom_smd_bus = { + .name = "qcom_smd", + .match = qcom_smd_dev_match, + .probe = qcom_smd_dev_probe, + .remove = qcom_smd_dev_remove, +}; + +/* + * Release function for the qcom_smd_device object. + */ +static void qcom_smd_release_device(struct device *dev) +{ + struct qcom_smd_device *qsdev = to_smd_device(dev); + + kfree(qsdev); +} + +/* + * Finds the device_node for the smd child interested in this channel. + */ +static struct device_node *qcom_smd_match_channel(struct device_node *edge_node, + const char *channel) +{ + struct device_node *child; + const char *name; + const char *key; + int ret; + + for_each_available_child_of_node(edge_node, child) { + key = "qcom,smd-channels"; + ret = of_property_read_string(child, key, &name); + if (ret) { + of_node_put(child); + continue; + } + + if (strcmp(name, channel) == 0) + return child; + } + + return NULL; +} + +/* + * Create a smd client device for channel that is being opened. + */ +static int qcom_smd_create_device(struct qcom_smd_channel *channel) +{ + struct qcom_smd_device *qsdev; + struct qcom_smd_edge *edge = channel->edge; + struct device_node *node; + struct qcom_smd *smd = edge->smd; + int ret; + + if (channel->qsdev) + return -EEXIST; + + node = qcom_smd_match_channel(edge->of_node, channel->name); + if (!node) { + dev_dbg(smd->dev, "no match for '%s'\n", channel->name); + return -ENXIO; + } + + dev_dbg(smd->dev, "registering '%s'\n", channel->name); + + qsdev = kzalloc(sizeof(*qsdev), GFP_KERNEL); + if (!qsdev) + return -ENOMEM; + + dev_set_name(&qsdev->dev, "%s.%s", edge->of_node->name, node->name); + qsdev->dev.parent = smd->dev; + qsdev->dev.bus = &qcom_smd_bus; + qsdev->dev.release = qcom_smd_release_device; + qsdev->dev.of_node = node; + + qsdev->channel = channel; + + channel->qsdev = qsdev; + + ret = device_register(&qsdev->dev); + if (ret) { + dev_err(smd->dev, "device_register failed: %d\n", ret); + put_device(&qsdev->dev); + } + + return ret; +} + +/* + * Destroy a smd client device for a channel that's going away. + */ +static void qcom_smd_destroy_device(struct qcom_smd_channel *channel) +{ + struct device *dev; + + BUG_ON(!channel->qsdev); + + dev = &channel->qsdev->dev; + + device_unregister(dev); + of_node_put(dev->of_node); + put_device(dev); +} + +/** + * qcom_smd_driver_register - register a smd driver + * @qsdrv: qcom_smd_driver struct + */ +int qcom_smd_driver_register(struct qcom_smd_driver *qsdrv) +{ + qsdrv->driver.bus = &qcom_smd_bus; + return driver_register(&qsdrv->driver); +} +EXPORT_SYMBOL(qcom_smd_driver_register); + +/** + * qcom_smd_driver_unregister - unregister a smd driver + * @qsdrv: qcom_smd_driver struct + */ +void qcom_smd_driver_unregister(struct qcom_smd_driver *qsdrv) +{ + driver_unregister(&qsdrv->driver); +} +EXPORT_SYMBOL(qcom_smd_driver_unregister); + +/* + * Allocate the qcom_smd_channel object for a newly found smd channel, + * retrieving and validating the smem items involved. + */ +static struct qcom_smd_channel *qcom_smd_create_channel(struct qcom_smd_edge *edge, + unsigned smem_info_item, + unsigned smem_fifo_item, + char *name) +{ + struct qcom_smd_channel *channel; + struct qcom_smd *smd = edge->smd; + size_t fifo_size; + size_t info_size; + void *fifo_base; + void *info; + int ret; + + channel = devm_kzalloc(smd->dev, sizeof(*channel), GFP_KERNEL); + if (!channel) + return ERR_PTR(-ENOMEM); + + channel->edge = edge; + channel->name = devm_kstrdup(smd->dev, name, GFP_KERNEL); + if (!channel->name) + return ERR_PTR(-ENOMEM); + + mutex_init(&channel->tx_lock); + spin_lock_init(&channel->recv_lock); + init_waitqueue_head(&channel->fblockread_event); + + ret = qcom_smem_get(edge->edge_id, smem_info_item, (void **)&info, &info_size); + if (ret) + goto free_name_and_channel; + + /* + * Use the size of the item to figure out which channel info struct to + * use. + */ + if (info_size == 2 * sizeof(struct smd_channel_info_word)) { + channel->tx_info_word = info; + channel->rx_info_word = info + sizeof(struct smd_channel_info_word); + } else if (info_size == 2 * sizeof(struct smd_channel_info)) { + channel->tx_info = info; + channel->rx_info = info + sizeof(struct smd_channel_info); + } else { + dev_err(smd->dev, + "channel info of size %zu not supported\n", info_size); + ret = -EINVAL; + goto free_name_and_channel; + } + + ret = qcom_smem_get(edge->edge_id, smem_fifo_item, &fifo_base, &fifo_size); + if (ret) + goto free_name_and_channel; + + /* The channel consist of a rx and tx fifo of equal size */ + fifo_size /= 2; + + dev_dbg(smd->dev, "new channel '%s' info-size: %zu fifo-size: %zu\n", + name, info_size, fifo_size); + + channel->tx_fifo = fifo_base; + channel->rx_fifo = fifo_base + fifo_size; + channel->fifo_size = fifo_size; + + qcom_smd_channel_reset(channel); + + return channel; + +free_name_and_channel: + devm_kfree(smd->dev, channel->name); + devm_kfree(smd->dev, channel); + + return ERR_PTR(ret); +} + +/* + * Scans the allocation table for any newly allocated channels, calls + * qcom_smd_create_channel() to create representations of these and add + * them to the edge's list of channels. + */ +static void qcom_discover_channels(struct qcom_smd_edge *edge) +{ + struct qcom_smd_alloc_entry *alloc_tbl; + struct qcom_smd_alloc_entry *entry; + struct qcom_smd_channel *channel; + struct qcom_smd *smd = edge->smd; + unsigned long flags; + unsigned fifo_id; + unsigned info_id; + int ret; + int tbl; + int i; + + for (tbl = 0; tbl < SMD_ALLOC_TBL_COUNT; tbl++) { + ret = qcom_smem_get(edge->edge_id, + smem_items[tbl].alloc_tbl_id, + (void **)&alloc_tbl, + NULL); + if (ret < 0) + continue; + + for (i = 0; i < SMD_ALLOC_TBL_SIZE; i++) { + entry = &alloc_tbl[i]; + if (test_bit(i, edge->allocated[tbl])) + continue; + + if (entry->ref_count == 0) + continue; + + if (!entry->name[0]) + continue; + + if (!(entry->flags & SMD_CHANNEL_FLAGS_PACKET)) + continue; + + if ((entry->flags & SMD_CHANNEL_FLAGS_EDGE_MASK) != edge->edge_id) + continue; + + info_id = smem_items[tbl].info_base_id + entry->cid; + fifo_id = smem_items[tbl].fifo_base_id + entry->cid; + + channel = qcom_smd_create_channel(edge, info_id, fifo_id, entry->name); + if (IS_ERR(channel)) + continue; + + spin_lock_irqsave(&edge->channels_lock, flags); + list_add(&channel->list, &edge->channels); + spin_unlock_irqrestore(&edge->channels_lock, flags); + + dev_dbg(smd->dev, "new channel found: '%s'\n", channel->name); + set_bit(i, edge->allocated[tbl]); + } + } + + schedule_work(&edge->work); +} + +/* + * This per edge worker scans smem for any new channels and register these. It + * then scans all registered channels for state changes that should be handled + * by creating or destroying smd client devices for the registered channels. + * + * LOCKING: edge->channels_lock is not needed to be held during the traversal + * of the channels list as it's done synchronously with the only writer. + */ +static void qcom_channel_state_worker(struct work_struct *work) +{ + struct qcom_smd_channel *channel; + struct qcom_smd_edge *edge = container_of(work, + struct qcom_smd_edge, + work); + unsigned remote_state; + + /* + * Rescan smem if we have reason to belive that there are new channels. + */ + if (edge->need_rescan) { + edge->need_rescan = false; + qcom_discover_channels(edge); + } + + /* + * Register a device for any closed channel where the remote processor + * is showing interest in opening the channel. + */ + list_for_each_entry(channel, &edge->channels, list) { + if (channel->state != SMD_CHANNEL_CLOSED) + continue; + + remote_state = GET_RX_CHANNEL_INFO(channel, state); + if (remote_state != SMD_CHANNEL_OPENING && + remote_state != SMD_CHANNEL_OPENED) + continue; + + qcom_smd_create_device(channel); + } + + /* + * Unregister the device for any channel that is opened where the + * remote processor is closing the channel. + */ + list_for_each_entry(channel, &edge->channels, list) { + if (channel->state != SMD_CHANNEL_OPENING && + channel->state != SMD_CHANNEL_OPENED) + continue; + + remote_state = GET_RX_CHANNEL_INFO(channel, state); + if (remote_state == SMD_CHANNEL_OPENING || + remote_state == SMD_CHANNEL_OPENED) + continue; + + qcom_smd_destroy_device(channel); + } +} + +/* + * Parses an of_node describing an edge. + */ +static int qcom_smd_parse_edge(struct device *dev, + struct device_node *node, + struct qcom_smd_edge *edge) +{ + struct device_node *syscon_np; + const char *key; + int irq; + int ret; + + INIT_LIST_HEAD(&edge->channels); + spin_lock_init(&edge->channels_lock); + + INIT_WORK(&edge->work, qcom_channel_state_worker); + + edge->of_node = of_node_get(node); + + irq = irq_of_parse_and_map(node, 0); + if (irq < 0) { + dev_err(dev, "required smd interrupt missing\n"); + return -EINVAL; + } + + ret = devm_request_irq(dev, irq, + qcom_smd_edge_intr, IRQF_TRIGGER_RISING, + node->name, edge); + if (ret) { + dev_err(dev, "failed to request smd irq\n"); + return ret; + } + + edge->irq = irq; + + key = "qcom,smd-edge"; + ret = of_property_read_u32(node, key, &edge->edge_id); + if (ret) { + dev_err(dev, "edge missing %s property\n", key); + return -EINVAL; + } + + syscon_np = of_parse_phandle(node, "qcom,ipc", 0); + if (!syscon_np) { + dev_err(dev, "no qcom,ipc node\n"); + return -ENODEV; + } + + edge->ipc_regmap = syscon_node_to_regmap(syscon_np); + if (IS_ERR(edge->ipc_regmap)) + return PTR_ERR(edge->ipc_regmap); + + key = "qcom,ipc"; + ret = of_property_read_u32_index(node, key, 1, &edge->ipc_offset); + if (ret < 0) { + dev_err(dev, "no offset in %s\n", key); + return -EINVAL; + } + + ret = of_property_read_u32_index(node, key, 2, &edge->ipc_bit); + if (ret < 0) { + dev_err(dev, "no bit in %s\n", key); + return -EINVAL; + } + + return 0; +} + +static int qcom_smd_probe(struct platform_device *pdev) +{ + struct qcom_smd_edge *edge; + struct device_node *node; + struct qcom_smd *smd; + size_t array_size; + int num_edges; + int ret; + int i = 0; + + /* Wait for smem */ + ret = qcom_smem_get(QCOM_SMEM_HOST_ANY, smem_items[0].alloc_tbl_id, NULL, NULL); + if (ret == -EPROBE_DEFER) + return ret; + + num_edges = of_get_available_child_count(pdev->dev.of_node); + array_size = sizeof(*smd) + num_edges * sizeof(struct qcom_smd_edge); + smd = devm_kzalloc(&pdev->dev, array_size, GFP_KERNEL); + if (!smd) + return -ENOMEM; + smd->dev = &pdev->dev; + + smd->num_edges = num_edges; + for_each_available_child_of_node(pdev->dev.of_node, node) { + edge = &smd->edges[i++]; + edge->smd = smd; + + ret = qcom_smd_parse_edge(&pdev->dev, node, edge); + if (ret) + continue; + + edge->need_rescan = true; + schedule_work(&edge->work); + } + + platform_set_drvdata(pdev, smd); + + return 0; +} + +/* + * Shut down all smd clients by making sure that each edge stops processing + * events and scanning for new channels, then call destroy on the devices. + */ +static int qcom_smd_remove(struct platform_device *pdev) +{ + struct qcom_smd_channel *channel; + struct qcom_smd_edge *edge; + struct qcom_smd *smd = platform_get_drvdata(pdev); + int i; + + for (i = 0; i < smd->num_edges; i++) { + edge = &smd->edges[i]; + + disable_irq(edge->irq); + cancel_work_sync(&edge->work); + + list_for_each_entry(channel, &edge->channels, list) { + if (!channel->qsdev) + continue; + + qcom_smd_destroy_device(channel); + } + } + + return 0; +} + +static const struct of_device_id qcom_smd_of_match[] = { + { .compatible = "qcom,smd" }, + {} +}; +MODULE_DEVICE_TABLE(of, qcom_smd_of_match); + +static struct platform_driver qcom_smd_driver = { + .probe = qcom_smd_probe, + .remove = qcom_smd_remove, + .driver = { + .name = "qcom-smd", + .of_match_table = qcom_smd_of_match, + }, +}; + +static int __init qcom_smd_init(void) +{ + int ret; + + ret = bus_register(&qcom_smd_bus); + if (ret) { + pr_err("failed to register smd bus: %d\n", ret); + return ret; + } + + return platform_driver_register(&qcom_smd_driver); +} +postcore_initcall(qcom_smd_init); + +static void __exit qcom_smd_exit(void) +{ + platform_driver_unregister(&qcom_smd_driver); + bus_unregister(&qcom_smd_bus); +} +module_exit(qcom_smd_exit); + +MODULE_AUTHOR("Bjorn Andersson "); +MODULE_DESCRIPTION("Qualcomm Shared Memory Driver"); +MODULE_LICENSE("GPL v2"); diff --git a/include/linux/soc/qcom/smd.h b/include/linux/soc/qcom/smd.h new file mode 100644 index 000000000000..d7e50aa6a4ac --- /dev/null +++ b/include/linux/soc/qcom/smd.h @@ -0,0 +1,46 @@ +#ifndef __QCOM_SMD_H__ +#define __QCOM_SMD_H__ + +#include +#include + +struct qcom_smd; +struct qcom_smd_channel; +struct qcom_smd_lookup; + +/** + * struct qcom_smd_device - smd device struct + * @dev: the device struct + * @channel: handle to the smd channel for this device + */ +struct qcom_smd_device { + struct device dev; + struct qcom_smd_channel *channel; +}; + +/** + * struct qcom_smd_driver - smd driver struct + * @driver: underlying device driver + * @probe: invoked when the smd channel is found + * @remove: invoked when the smd channel is closed + * @callback: invoked when an inbound message is received on the channel, + * should return 0 on success or -EBUSY if the data cannot be + * consumed at this time + */ +struct qcom_smd_driver { + struct device_driver driver; + int (*probe)(struct qcom_smd_device *dev); + void (*remove)(struct qcom_smd_device *dev); + int (*callback)(struct qcom_smd_device *, const void *, size_t); +}; + +int qcom_smd_driver_register(struct qcom_smd_driver *drv); +void qcom_smd_driver_unregister(struct qcom_smd_driver *drv); + +#define module_qcom_smd_driver(__smd_driver) \ + module_driver(__smd_driver, qcom_smd_driver_register, \ + qcom_smd_driver_unregister) + +int qcom_smd_send(struct qcom_smd_channel *channel, const void *data, int len); + +#endif -- cgit v1.2.3-70-g09d2 From 936f14cf4e67168fcd37f10cebf5a475f490fb6e Mon Sep 17 00:00:00 2001 From: Bjorn Andersson Date: Mon, 27 Jul 2015 20:20:32 -0700 Subject: soc: qcom: Driver for the Qualcomm RPM over SMD Driver for the Resource Power Manager (RPM) found in Qualcomm 8974 based devices. The driver exposes resources that child drivers can operate on; to implementing regulator, clock and bus frequency drivers. Signed-off-by: Bjorn Andersson Signed-off-by: Andy Gross --- drivers/soc/qcom/Kconfig | 14 +++ drivers/soc/qcom/Makefile | 1 + drivers/soc/qcom/smd-rpm.c | 244 +++++++++++++++++++++++++++++++++++++++ include/linux/soc/qcom/smd-rpm.h | 35 ++++++ 4 files changed, 294 insertions(+) create mode 100644 drivers/soc/qcom/smd-rpm.c create mode 100644 include/linux/soc/qcom/smd-rpm.h (limited to 'include/linux') diff --git a/drivers/soc/qcom/Kconfig b/drivers/soc/qcom/Kconfig index 188295e2c9ba..ba47b70f4d85 100644 --- a/drivers/soc/qcom/Kconfig +++ b/drivers/soc/qcom/Kconfig @@ -27,6 +27,20 @@ config QCOM_SMD providing communication channels to remote processors in Qualcomm platforms. +config QCOM_SMD_RPM + tristate "Qualcomm Resource Power Manager (RPM) over SMD" + depends on QCOM_SMD && OF + help + If you say yes to this option, support will be included for the + Resource Power Manager system found in the Qualcomm 8974 based + devices. + + This is required to access many regulators, clocks and bus + frequencies controlled by the RPM on these devices. + + Say M here if you want to include support for the Qualcomm RPM as a + module. This will build a module called "qcom-smd-rpm". + config QCOM_SMEM tristate "Qualcomm Shared Memory Manager (SMEM)" depends on ARCH_QCOM diff --git a/drivers/soc/qcom/Makefile b/drivers/soc/qcom/Makefile index f961a8796ed2..10a93d168e0e 100644 --- a/drivers/soc/qcom/Makefile +++ b/drivers/soc/qcom/Makefile @@ -1,4 +1,5 @@ obj-$(CONFIG_QCOM_GSBI) += qcom_gsbi.o obj-$(CONFIG_QCOM_PM) += spm.o obj-$(CONFIG_QCOM_SMD) += smd.o +obj-$(CONFIG_QCOM_SMD_RPM) += smd-rpm.o obj-$(CONFIG_QCOM_SMEM) += smem.o diff --git a/drivers/soc/qcom/smd-rpm.c b/drivers/soc/qcom/smd-rpm.c new file mode 100644 index 000000000000..1392ccf14a20 --- /dev/null +++ b/drivers/soc/qcom/smd-rpm.c @@ -0,0 +1,244 @@ +/* + * Copyright (c) 2015, Sony Mobile Communications AB. + * Copyright (c) 2012-2013, The Linux Foundation. All rights reserved. + * + * This program is free software; you can redistribute it and/or modify + * it under the terms of the GNU General Public License version 2 and + * only version 2 as published by the Free Software Foundation. + * + * This program is distributed in the hope that it will be useful, + * but WITHOUT ANY WARRANTY; without even the implied warranty of + * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the + * GNU General Public License for more details. + */ + +#include +#include +#include +#include +#include + +#include +#include + +#define RPM_REQUEST_TIMEOUT (5 * HZ) + +/** + * struct qcom_smd_rpm - state of the rpm device driver + * @rpm_channel: reference to the smd channel + * @ack: completion for acks + * @lock: mutual exclusion around the send/complete pair + * @ack_status: result of the rpm request + */ +struct qcom_smd_rpm { + struct qcom_smd_channel *rpm_channel; + + struct completion ack; + struct mutex lock; + int ack_status; +}; + +/** + * struct qcom_rpm_header - header for all rpm requests and responses + * @service_type: identifier of the service + * @length: length of the payload + */ +struct qcom_rpm_header { + u32 service_type; + u32 length; +}; + +/** + * struct qcom_rpm_request - request message to the rpm + * @msg_id: identifier of the outgoing message + * @flags: active/sleep state flags + * @type: resource type + * @id: resource id + * @data_len: length of the payload following this header + */ +struct qcom_rpm_request { + u32 msg_id; + u32 flags; + u32 type; + u32 id; + u32 data_len; +}; + +/** + * struct qcom_rpm_message - response message from the rpm + * @msg_type: indicator of the type of message + * @length: the size of this message, including the message header + * @msg_id: message id + * @message: textual message from the rpm + * + * Multiple of these messages can be stacked in an rpm message. + */ +struct qcom_rpm_message { + u32 msg_type; + u32 length; + union { + u32 msg_id; + u8 message[0]; + }; +}; + +#define RPM_SERVICE_TYPE_REQUEST 0x00716572 /* "req\0" */ + +#define RPM_MSG_TYPE_ERR 0x00727265 /* "err\0" */ +#define RPM_MSG_TYPE_MSG_ID 0x2367736d /* "msg#" */ + +/** + * qcom_rpm_smd_write - write @buf to @type:@id + * @rpm: rpm handle + * @type: resource type + * @id: resource identifier + * @buf: the data to be written + * @count: number of bytes in @buf + */ +int qcom_rpm_smd_write(struct qcom_smd_rpm *rpm, + int state, + u32 type, u32 id, + void *buf, + size_t count) +{ + static unsigned msg_id = 1; + int left; + int ret; + + struct { + struct qcom_rpm_header hdr; + struct qcom_rpm_request req; + u8 payload[count]; + } pkt; + + /* SMD packets to the RPM may not exceed 256 bytes */ + if (WARN_ON(sizeof(pkt) >= 256)) + return -EINVAL; + + mutex_lock(&rpm->lock); + + pkt.hdr.service_type = RPM_SERVICE_TYPE_REQUEST; + pkt.hdr.length = sizeof(struct qcom_rpm_request) + count; + + pkt.req.msg_id = msg_id++; + pkt.req.flags = BIT(state); + pkt.req.type = type; + pkt.req.id = id; + pkt.req.data_len = count; + memcpy(pkt.payload, buf, count); + + ret = qcom_smd_send(rpm->rpm_channel, &pkt, sizeof(pkt)); + if (ret) + goto out; + + left = wait_for_completion_timeout(&rpm->ack, RPM_REQUEST_TIMEOUT); + if (!left) + ret = -ETIMEDOUT; + else + ret = rpm->ack_status; + +out: + mutex_unlock(&rpm->lock); + return ret; +} +EXPORT_SYMBOL(qcom_rpm_smd_write); + +static int qcom_smd_rpm_callback(struct qcom_smd_device *qsdev, + const void *data, + size_t count) +{ + const struct qcom_rpm_header *hdr = data; + const struct qcom_rpm_message *msg; + struct qcom_smd_rpm *rpm = dev_get_drvdata(&qsdev->dev); + const u8 *buf = data + sizeof(struct qcom_rpm_header); + const u8 *end = buf + hdr->length; + char msgbuf[32]; + int status = 0; + u32 len; + + if (hdr->service_type != RPM_SERVICE_TYPE_REQUEST || + hdr->length < sizeof(struct qcom_rpm_message)) { + dev_err(&qsdev->dev, "invalid request\n"); + return 0; + } + + while (buf < end) { + msg = (struct qcom_rpm_message *)buf; + switch (msg->msg_type) { + case RPM_MSG_TYPE_MSG_ID: + break; + case RPM_MSG_TYPE_ERR: + len = min_t(u32, ALIGN(msg->length, 4), sizeof(msgbuf)); + memcpy_fromio(msgbuf, msg->message, len); + msgbuf[len - 1] = 0; + + if (!strcmp(msgbuf, "resource does not exist")) + status = -ENXIO; + else + status = -EINVAL; + break; + } + + buf = PTR_ALIGN(buf + 2 * sizeof(u32) + msg->length, 4); + } + + rpm->ack_status = status; + complete(&rpm->ack); + return 0; +} + +static int qcom_smd_rpm_probe(struct qcom_smd_device *sdev) +{ + struct qcom_smd_rpm *rpm; + + rpm = devm_kzalloc(&sdev->dev, sizeof(*rpm), GFP_KERNEL); + if (!rpm) + return -ENOMEM; + + mutex_init(&rpm->lock); + init_completion(&rpm->ack); + + rpm->rpm_channel = sdev->channel; + + dev_set_drvdata(&sdev->dev, rpm); + + return of_platform_populate(sdev->dev.of_node, NULL, NULL, &sdev->dev); +} + +static void qcom_smd_rpm_remove(struct qcom_smd_device *sdev) +{ + of_platform_depopulate(&sdev->dev); +} + +static const struct of_device_id qcom_smd_rpm_of_match[] = { + { .compatible = "qcom,rpm-msm8974" }, + {} +}; +MODULE_DEVICE_TABLE(of, qcom_smd_rpm_of_match); + +static struct qcom_smd_driver qcom_smd_rpm_driver = { + .probe = qcom_smd_rpm_probe, + .remove = qcom_smd_rpm_remove, + .callback = qcom_smd_rpm_callback, + .driver = { + .name = "qcom_smd_rpm", + .owner = THIS_MODULE, + .of_match_table = qcom_smd_rpm_of_match, + }, +}; + +static int __init qcom_smd_rpm_init(void) +{ + return qcom_smd_driver_register(&qcom_smd_rpm_driver); +} +arch_initcall(qcom_smd_rpm_init); + +static void __exit qcom_smd_rpm_exit(void) +{ + qcom_smd_driver_unregister(&qcom_smd_rpm_driver); +} +module_exit(qcom_smd_rpm_exit); + +MODULE_AUTHOR("Bjorn Andersson "); +MODULE_DESCRIPTION("Qualcomm SMD backed RPM driver"); +MODULE_LICENSE("GPL v2"); diff --git a/include/linux/soc/qcom/smd-rpm.h b/include/linux/soc/qcom/smd-rpm.h new file mode 100644 index 000000000000..2a53dcaeeeed --- /dev/null +++ b/include/linux/soc/qcom/smd-rpm.h @@ -0,0 +1,35 @@ +#ifndef __QCOM_SMD_RPM_H__ +#define __QCOM_SMD_RPM_H__ + +struct qcom_smd_rpm; + +#define QCOM_SMD_RPM_ACTIVE_STATE 0 +#define QCOM_SMD_RPM_SLEEP_STATE 1 + +/* + * Constants used for addressing resources in the RPM. + */ +#define QCOM_SMD_RPM_BOOST 0x61747362 +#define QCOM_SMD_RPM_BUS_CLK 0x316b6c63 +#define QCOM_SMD_RPM_BUS_MASTER 0x73616d62 +#define QCOM_SMD_RPM_BUS_SLAVE 0x766c7362 +#define QCOM_SMD_RPM_CLK_BUF_A 0x616B6C63 +#define QCOM_SMD_RPM_LDOA 0x616f646c +#define QCOM_SMD_RPM_LDOB 0x626F646C +#define QCOM_SMD_RPM_MEM_CLK 0x326b6c63 +#define QCOM_SMD_RPM_MISC_CLK 0x306b6c63 +#define QCOM_SMD_RPM_NCPA 0x6170636E +#define QCOM_SMD_RPM_NCPB 0x6270636E +#define QCOM_SMD_RPM_OCMEM_PWR 0x706d636f +#define QCOM_SMD_RPM_QPIC_CLK 0x63697071 +#define QCOM_SMD_RPM_SMPA 0x61706d73 +#define QCOM_SMD_RPM_SMPB 0x62706d73 +#define QCOM_SMD_RPM_SPDM 0x63707362 +#define QCOM_SMD_RPM_VSA 0x00617376 + +int qcom_rpm_smd_write(struct qcom_smd_rpm *rpm, + int state, + u32 resource_type, u32 resource_id, + void *buf, size_t count); + +#endif -- cgit v1.2.3-70-g09d2 From ad3aedfbb04b3a2af54473cfe31f13953cfe9d84 Mon Sep 17 00:00:00 2001 From: Marc Zyngier Date: Tue, 28 Jul 2015 14:46:08 +0100 Subject: genirq/irqdomain: Allow irq domain aliasing It is not uncommon (at least with the ARM stuff) to have a piece of hardware that implements different flavours of "interrupts". A typical example of this is the GICv3 ITS, which implements standard PCI/MSI support, but also some form of "generic MSI". So far, the PCI/MSI domain is registered using the ITS device_node, so that irq_find_host can return it. On the contrary, the raw MSI domain is not registered with an device_node, making it impossible to be looked up by another subsystem (obviously, using the same device_node twice would only result in confusion, as it is not defined which one irq_find_host would return). A solution to this is to "type" domains that may be aliasing, and to be able to lookup an device_node that matches a given type. For this, we introduce irq_find_matching_host() as a superset of irq_find_host: struct irq_domain *irq_find_matching_host(struct device_node *node, enum irq_domain_bus_token bus_token); where bus_token is the "type" we want to match the domain against (so far, only DOMAIN_BUS_ANY is defined). This result in some moderately invasive changes on the PPC side (which is the only user of the .match method). This has otherwise no functionnal change. Reviewed-by: Hanjun Guo Signed-off-by: Marc Zyngier Cc: Cc: Yijing Wang Cc: Ma Jun Cc: Lorenzo Pieralisi Cc: Duc Dang Cc: Bjorn Helgaas Cc: Jiang Liu Cc: Jason Cooper Link: http://lkml.kernel.org/r/1438091186-10244-2-git-send-email-marc.zyngier@arm.com Signed-off-by: Thomas Gleixner --- arch/powerpc/platforms/512x/mpc5121_ads_cpld.c | 3 ++- arch/powerpc/platforms/cell/interrupt.c | 3 ++- arch/powerpc/platforms/embedded6xx/flipper-pic.c | 3 ++- arch/powerpc/platforms/powermac/pic.c | 3 ++- arch/powerpc/platforms/powernv/opal-irqchip.c | 3 ++- arch/powerpc/platforms/ps3/interrupt.c | 3 ++- arch/powerpc/sysdev/ehv_pic.c | 3 ++- arch/powerpc/sysdev/i8259.c | 3 ++- arch/powerpc/sysdev/ipic.c | 3 ++- arch/powerpc/sysdev/mpic.c | 3 ++- arch/powerpc/sysdev/qe_lib/qe_ic.c | 3 ++- arch/powerpc/sysdev/xics/xics-common.c | 3 ++- include/linux/irqdomain.h | 23 +++++++++++++++++++++-- kernel/irq/irqdomain.c | 18 +++++++++++++----- 14 files changed, 58 insertions(+), 19 deletions(-) (limited to 'include/linux') diff --git a/arch/powerpc/platforms/512x/mpc5121_ads_cpld.c b/arch/powerpc/platforms/512x/mpc5121_ads_cpld.c index ca3a062ed1b9..11090ab4bf59 100644 --- a/arch/powerpc/platforms/512x/mpc5121_ads_cpld.c +++ b/arch/powerpc/platforms/512x/mpc5121_ads_cpld.c @@ -123,7 +123,8 @@ cpld_pic_cascade(unsigned int irq, struct irq_desc *desc) } static int -cpld_pic_host_match(struct irq_domain *h, struct device_node *node) +cpld_pic_host_match(struct irq_domain *h, struct device_node *node, + enum irq_domain_bus_token bus_token) { return cpld_pic_node == node; } diff --git a/arch/powerpc/platforms/cell/interrupt.c b/arch/powerpc/platforms/cell/interrupt.c index 3af8324c122e..a15f1efc295f 100644 --- a/arch/powerpc/platforms/cell/interrupt.c +++ b/arch/powerpc/platforms/cell/interrupt.c @@ -222,7 +222,8 @@ void iic_request_IPIs(void) #endif /* CONFIG_SMP */ -static int iic_host_match(struct irq_domain *h, struct device_node *node) +static int iic_host_match(struct irq_domain *h, struct device_node *node, + enum irq_domain_bus_token bus_token) { return of_device_is_compatible(node, "IBM,CBEA-Internal-Interrupt-Controller"); diff --git a/arch/powerpc/platforms/embedded6xx/flipper-pic.c b/arch/powerpc/platforms/embedded6xx/flipper-pic.c index 4cde8e7da4b8..b7866e01483d 100644 --- a/arch/powerpc/platforms/embedded6xx/flipper-pic.c +++ b/arch/powerpc/platforms/embedded6xx/flipper-pic.c @@ -108,7 +108,8 @@ static int flipper_pic_map(struct irq_domain *h, unsigned int virq, return 0; } -static int flipper_pic_match(struct irq_domain *h, struct device_node *np) +static int flipper_pic_match(struct irq_domain *h, struct device_node *np, + enum irq_domain_bus_token bus_token) { return 1; } diff --git a/arch/powerpc/platforms/powermac/pic.c b/arch/powerpc/platforms/powermac/pic.c index 59cfc9d63c2d..6f4f8b060def 100644 --- a/arch/powerpc/platforms/powermac/pic.c +++ b/arch/powerpc/platforms/powermac/pic.c @@ -268,7 +268,8 @@ static struct irqaction gatwick_cascade_action = { .name = "cascade", }; -static int pmac_pic_host_match(struct irq_domain *h, struct device_node *node) +static int pmac_pic_host_match(struct irq_domain *h, struct device_node *node, + enum irq_domain_bus_token bus_token) { /* We match all, we don't always have a node anyway */ return 1; diff --git a/arch/powerpc/platforms/powernv/opal-irqchip.c b/arch/powerpc/platforms/powernv/opal-irqchip.c index e2e7d75f52f3..2c91ee7800b9 100644 --- a/arch/powerpc/platforms/powernv/opal-irqchip.c +++ b/arch/powerpc/platforms/powernv/opal-irqchip.c @@ -134,7 +134,8 @@ static void opal_handle_irq_work(struct irq_work *work) opal_handle_events(be64_to_cpu(last_outstanding_events)); } -static int opal_event_match(struct irq_domain *h, struct device_node *node) +static int opal_event_match(struct irq_domain *h, struct device_node *node, + enum irq_domain_bus_token bus_token) { return h->of_node == node; } diff --git a/arch/powerpc/platforms/ps3/interrupt.c b/arch/powerpc/platforms/ps3/interrupt.c index a6c42f34303a..638c4060938e 100644 --- a/arch/powerpc/platforms/ps3/interrupt.c +++ b/arch/powerpc/platforms/ps3/interrupt.c @@ -678,7 +678,8 @@ static int ps3_host_map(struct irq_domain *h, unsigned int virq, return 0; } -static int ps3_host_match(struct irq_domain *h, struct device_node *np) +static int ps3_host_match(struct irq_domain *h, struct device_node *np, + enum irq_domain_bus_token bus_token) { /* Match all */ return 1; diff --git a/arch/powerpc/sysdev/ehv_pic.c b/arch/powerpc/sysdev/ehv_pic.c index 2d20f10a4203..eca0b00794fa 100644 --- a/arch/powerpc/sysdev/ehv_pic.c +++ b/arch/powerpc/sysdev/ehv_pic.c @@ -177,7 +177,8 @@ unsigned int ehv_pic_get_irq(void) return irq_linear_revmap(global_ehv_pic->irqhost, irq); } -static int ehv_pic_host_match(struct irq_domain *h, struct device_node *node) +static int ehv_pic_host_match(struct irq_domain *h, struct device_node *node, + enum irq_domain_bus_token bus_token) { /* Exact match, unless ehv_pic node is NULL */ return h->of_node == NULL || h->of_node == node; diff --git a/arch/powerpc/sysdev/i8259.c b/arch/powerpc/sysdev/i8259.c index 31c33475c7b7..e1a9c2c2d5d3 100644 --- a/arch/powerpc/sysdev/i8259.c +++ b/arch/powerpc/sysdev/i8259.c @@ -162,7 +162,8 @@ static struct resource pic_edgectrl_iores = { .flags = IORESOURCE_BUSY, }; -static int i8259_host_match(struct irq_domain *h, struct device_node *node) +static int i8259_host_match(struct irq_domain *h, struct device_node *node, + enum irq_domain_bus_token bus_token) { return h->of_node == NULL || h->of_node == node; } diff --git a/arch/powerpc/sysdev/ipic.c b/arch/powerpc/sysdev/ipic.c index d78f1364b639..6b2b68914810 100644 --- a/arch/powerpc/sysdev/ipic.c +++ b/arch/powerpc/sysdev/ipic.c @@ -671,7 +671,8 @@ static struct irq_chip ipic_edge_irq_chip = { .irq_set_type = ipic_set_irq_type, }; -static int ipic_host_match(struct irq_domain *h, struct device_node *node) +static int ipic_host_match(struct irq_domain *h, struct device_node *node, + enum irq_domain_bus_token bus_token) { /* Exact match, unless ipic node is NULL */ return h->of_node == NULL || h->of_node == node; diff --git a/arch/powerpc/sysdev/mpic.c b/arch/powerpc/sysdev/mpic.c index c8e73332eaad..97a8ae8f94dd 100644 --- a/arch/powerpc/sysdev/mpic.c +++ b/arch/powerpc/sysdev/mpic.c @@ -1007,7 +1007,8 @@ static struct irq_chip mpic_irq_ht_chip = { #endif /* CONFIG_MPIC_U3_HT_IRQS */ -static int mpic_host_match(struct irq_domain *h, struct device_node *node) +static int mpic_host_match(struct irq_domain *h, struct device_node *node, + enum irq_domain_bus_token bus_token) { /* Exact match, unless mpic node is NULL */ return h->of_node == NULL || h->of_node == node; diff --git a/arch/powerpc/sysdev/qe_lib/qe_ic.c b/arch/powerpc/sysdev/qe_lib/qe_ic.c index 6512cd8caa51..47b352e4bc74 100644 --- a/arch/powerpc/sysdev/qe_lib/qe_ic.c +++ b/arch/powerpc/sysdev/qe_lib/qe_ic.c @@ -244,7 +244,8 @@ static struct irq_chip qe_ic_irq_chip = { .irq_mask_ack = qe_ic_mask_irq, }; -static int qe_ic_host_match(struct irq_domain *h, struct device_node *node) +static int qe_ic_host_match(struct irq_domain *h, struct device_node *node, + enum irq_domain_bus_token bus_token) { /* Exact match, unless qe_ic node is NULL */ return h->of_node == NULL || h->of_node == node; diff --git a/arch/powerpc/sysdev/xics/xics-common.c b/arch/powerpc/sysdev/xics/xics-common.c index 08c248eb491b..47e43b7b076b 100644 --- a/arch/powerpc/sysdev/xics/xics-common.c +++ b/arch/powerpc/sysdev/xics/xics-common.c @@ -298,7 +298,8 @@ int xics_get_irq_server(unsigned int virq, const struct cpumask *cpumask, } #endif /* CONFIG_SMP */ -static int xics_host_match(struct irq_domain *h, struct device_node *node) +static int xics_host_match(struct irq_domain *h, struct device_node *node, + enum irq_domain_bus_token bus_token) { struct ics *ics; diff --git a/include/linux/irqdomain.h b/include/linux/irqdomain.h index 744ac0ec98eb..91a83adf5e45 100644 --- a/include/linux/irqdomain.h +++ b/include/linux/irqdomain.h @@ -45,6 +45,17 @@ struct irq_data; /* Number of irqs reserved for a legacy isa controller */ #define NUM_ISA_INTERRUPTS 16 +/* + * Should several domains have the same device node, but serve + * different purposes (for example one domain is for PCI/MSI, and the + * other for wired IRQs), they can be distinguished using a + * bus-specific token. Most domains are expected to only carry + * DOMAIN_BUS_ANY. + */ +enum irq_domain_bus_token { + DOMAIN_BUS_ANY = 0, +}; + /** * struct irq_domain_ops - Methods for irq_domain objects * @match: Match an interrupt controller device node to a host, returns @@ -61,7 +72,8 @@ struct irq_data; * to setup the irq_desc when returning from map(). */ struct irq_domain_ops { - int (*match)(struct irq_domain *d, struct device_node *node); + int (*match)(struct irq_domain *d, struct device_node *node, + enum irq_domain_bus_token bus_token); int (*map)(struct irq_domain *d, unsigned int virq, irq_hw_number_t hw); void (*unmap)(struct irq_domain *d, unsigned int virq); int (*xlate)(struct irq_domain *d, struct device_node *node, @@ -116,6 +128,7 @@ struct irq_domain { /* Optional data */ struct device_node *of_node; + enum irq_domain_bus_token bus_token; struct irq_domain_chip_generic *gc; #ifdef CONFIG_IRQ_DOMAIN_HIERARCHY struct irq_domain *parent; @@ -161,9 +174,15 @@ struct irq_domain *irq_domain_add_legacy(struct device_node *of_node, irq_hw_number_t first_hwirq, const struct irq_domain_ops *ops, void *host_data); -extern struct irq_domain *irq_find_host(struct device_node *node); +extern struct irq_domain *irq_find_matching_host(struct device_node *node, + enum irq_domain_bus_token bus_token); extern void irq_set_default_host(struct irq_domain *host); +static inline struct irq_domain *irq_find_host(struct device_node *node) +{ + return irq_find_matching_host(node, DOMAIN_BUS_ANY); +} + /** * irq_domain_add_linear() - Allocate and register a linear revmap irq_domain. * @of_node: pointer to interrupt controller's device tree node. diff --git a/kernel/irq/irqdomain.c b/kernel/irq/irqdomain.c index 8c3577fef78c..79baaf8a7813 100644 --- a/kernel/irq/irqdomain.c +++ b/kernel/irq/irqdomain.c @@ -187,10 +187,12 @@ struct irq_domain *irq_domain_add_legacy(struct device_node *of_node, EXPORT_SYMBOL_GPL(irq_domain_add_legacy); /** - * irq_find_host() - Locates a domain for a given device node + * irq_find_matching_host() - Locates a domain for a given device node * @node: device-tree node of the interrupt controller + * @bus_token: domain-specific data */ -struct irq_domain *irq_find_host(struct device_node *node) +struct irq_domain *irq_find_matching_host(struct device_node *node, + enum irq_domain_bus_token bus_token) { struct irq_domain *h, *found = NULL; int rc; @@ -199,13 +201,19 @@ struct irq_domain *irq_find_host(struct device_node *node) * it might potentially be set to match all interrupts in * the absence of a device node. This isn't a problem so far * yet though... + * + * bus_token == DOMAIN_BUS_ANY matches any domain, any other + * values must generate an exact match for the domain to be + * selected. */ mutex_lock(&irq_domain_mutex); list_for_each_entry(h, &irq_domain_list, link) { if (h->ops->match) - rc = h->ops->match(h, node); + rc = h->ops->match(h, node, bus_token); else - rc = (h->of_node != NULL) && (h->of_node == node); + rc = ((h->of_node != NULL) && (h->of_node == node) && + ((bus_token == DOMAIN_BUS_ANY) || + (h->bus_token == bus_token))); if (rc) { found = h; @@ -215,7 +223,7 @@ struct irq_domain *irq_find_host(struct device_node *node) mutex_unlock(&irq_domain_mutex); return found; } -EXPORT_SYMBOL_GPL(irq_find_host); +EXPORT_SYMBOL_GPL(irq_find_matching_host); /** * irq_set_default_host() - Set a "default" irq domain -- cgit v1.2.3-70-g09d2 From 0380839dc90c53e24ddfa0f17ad909c2ddc345c2 Mon Sep 17 00:00:00 2001 From: Marc Zyngier Date: Tue, 28 Jul 2015 14:46:09 +0100 Subject: PCI/MSI: Register irq domain with specific token When creating a PCI/MSI domain, tag it with DOMAIN_BUS_PCI_MSI so that it can be looked-up using irq_find_matching_host(). Acked-by: Bjorn Helgaas Reviewed-by: Hanjun Guo Signed-off-by: Marc Zyngier Cc: Cc: Yijing Wang Cc: Ma Jun Cc: Lorenzo Pieralisi Cc: Duc Dang Cc: Jiang Liu Cc: Jason Cooper Link: http://lkml.kernel.org/r/1438091186-10244-3-git-send-email-marc.zyngier@arm.com Signed-off-by: Thomas Gleixner --- drivers/pci/msi.c | 9 ++++++++- include/linux/irqdomain.h | 1 + 2 files changed, 9 insertions(+), 1 deletion(-) (limited to 'include/linux') diff --git a/drivers/pci/msi.c b/drivers/pci/msi.c index cd4c78c193de..3aae7c9ad31c 100644 --- a/drivers/pci/msi.c +++ b/drivers/pci/msi.c @@ -1273,12 +1273,19 @@ struct irq_domain *pci_msi_create_irq_domain(struct device_node *node, struct msi_domain_info *info, struct irq_domain *parent) { + struct irq_domain *domain; + if (info->flags & MSI_FLAG_USE_DEF_DOM_OPS) pci_msi_domain_update_dom_ops(info); if (info->flags & MSI_FLAG_USE_DEF_CHIP_OPS) pci_msi_domain_update_chip_ops(info); - return msi_create_irq_domain(node, info, parent); + domain = msi_create_irq_domain(node, info, parent); + if (!domain) + return NULL; + + domain->bus_token = DOMAIN_BUS_PCI_MSI; + return domain; } /** diff --git a/include/linux/irqdomain.h b/include/linux/irqdomain.h index 91a83adf5e45..25e9e6696a65 100644 --- a/include/linux/irqdomain.h +++ b/include/linux/irqdomain.h @@ -54,6 +54,7 @@ struct irq_data; */ enum irq_domain_bus_token { DOMAIN_BUS_ANY = 0, + DOMAIN_BUS_PCI_MSI, }; /** -- cgit v1.2.3-70-g09d2 From f1421db8ca4c110144be97a5997ed83d34685db5 Mon Sep 17 00:00:00 2001 From: Marc Zyngier Date: Tue, 28 Jul 2015 14:46:10 +0100 Subject: device core: Introduce per-device MSI domain pointer As MSI-type features are creeping into non-PCI devices, it is starting to make sense to give our struct device some form of support for this, by allowing a pointer to an MSI irq domain to be set/retrieved. Reviewed-by: Hanjun Guo Signed-off-by: Marc Zyngier Cc: Cc: Yijing Wang Cc: Ma Jun Cc: Lorenzo Pieralisi Cc: Duc Dang Cc: Bjorn Helgaas Cc: Jiang Liu Cc: Jason Cooper Link: http://lkml.kernel.org/r/1438091186-10244-4-git-send-email-marc.zyngier@arm.com Signed-off-by: Thomas Gleixner --- include/linux/device.h | 20 ++++++++++++++++++++ 1 file changed, 20 insertions(+) (limited to 'include/linux') diff --git a/include/linux/device.h b/include/linux/device.h index 3d3139ad5705..50e000576d3c 100644 --- a/include/linux/device.h +++ b/include/linux/device.h @@ -715,6 +715,7 @@ struct device_dma_parameters { * @pins: For device pin management. * See Documentation/pinctrl.txt for details. * @msi_list: Hosts MSI descriptors + * @msi_domain: The generic MSI domain this device is using. * @numa_node: NUMA node this device is close to. * @dma_mask: Dma mask (if dma'ble device). * @coherent_dma_mask: Like dma_mask, but for alloc_coherent mapping as not all @@ -775,6 +776,9 @@ struct device { struct dev_pm_info power; struct dev_pm_domain *pm_domain; +#ifdef CONFIG_GENERIC_MSI_IRQ_DOMAIN + struct irq_domain *msi_domain; +#endif #ifdef CONFIG_PINCTRL struct dev_pin_info *pins; #endif @@ -865,6 +869,22 @@ static inline void set_dev_node(struct device *dev, int node) } #endif +static inline struct irq_domain *dev_get_msi_domain(const struct device *dev) +{ +#ifdef CONFIG_GENERIC_MSI_IRQ_DOMAIN + return dev->msi_domain; +#else + return NULL; +#endif +} + +static inline void dev_set_msi_domain(struct device *dev, struct irq_domain *d) +{ +#ifdef CONFIG_GENERIC_MSI_IRQ_DOMAIN + dev->msi_domain = d; +#endif +} + static inline void *dev_get_drvdata(const struct device *dev) { return dev->driver_data; -- cgit v1.2.3-70-g09d2 From b165e2b60b39888a7ff8efbc1de40137471dda41 Mon Sep 17 00:00:00 2001 From: Marc Zyngier Date: Tue, 28 Jul 2015 14:46:12 +0100 Subject: PCI/MSI: Add support for OF-provided msi_domain In order to populate the PCI host bridge msi_domain, use the "msi-parent" attribute to lookup a corresponding irq domain. If found, this is our MSI domain. This gets plugged into the core PCI code. Acked-by: Bjorn Helgaas Signed-off-by: Marc Zyngier Cc: Cc: Yijing Wang Cc: Ma Jun Cc: Lorenzo Pieralisi Cc: Duc Dang Cc: Hanjun Guo Cc: Jiang Liu Cc: Jason Cooper Link: http://lkml.kernel.org/r/1438091186-10244-6-git-send-email-marc.zyngier@arm.com Signed-off-by: Thomas Gleixner --- drivers/pci/of.c | 25 +++++++++++++++++++++++++ drivers/pci/probe.c | 5 ++++- include/linux/pci.h | 4 ++++ 3 files changed, 33 insertions(+), 1 deletion(-) (limited to 'include/linux') diff --git a/drivers/pci/of.c b/drivers/pci/of.c index f0929934bb7a..85844d8c3efd 100644 --- a/drivers/pci/of.c +++ b/drivers/pci/of.c @@ -9,6 +9,7 @@ * 2 of the License, or (at your option) any later version. */ +#include #include #include #include @@ -59,3 +60,27 @@ struct device_node * __weak pcibios_get_phb_of_node(struct pci_bus *bus) return of_node_get(bus->bridge->parent->of_node); return NULL; } + +struct irq_domain *pci_host_bridge_of_msi_domain(struct pci_bus *bus) +{ +#ifdef CONFIG_IRQ_DOMAIN + struct device_node *np; + struct irq_domain *d; + + if (!bus->dev.of_node) + return NULL; + + /* Start looking for a phandle to an MSI controller. */ + np = of_parse_phandle(bus->dev.of_node, "msi-parent", 0); + if (!np) + return NULL; + + d = irq_find_matching_host(np, DOMAIN_BUS_PCI_MSI); + if (d) + return d; + + return irq_find_host(np); +#else + return NULL; +#endif +} diff --git a/drivers/pci/probe.c b/drivers/pci/probe.c index c03ecbffc50b..a7afeacce7f1 100644 --- a/drivers/pci/probe.c +++ b/drivers/pci/probe.c @@ -663,12 +663,15 @@ static void pci_set_bus_speed(struct pci_bus *bus) static struct irq_domain *pci_host_bridge_msi_domain(struct pci_bus *bus) { + struct irq_domain *d; + /* * Any firmware interface that can resolve the msi_domain * should be called from here. */ + d = pci_host_bridge_of_msi_domain(bus); - return NULL; + return d; } static void pci_set_bus_msi_domain(struct pci_bus *bus) diff --git a/include/linux/pci.h b/include/linux/pci.h index fbf245f5eba7..772616d3b184 100644 --- a/include/linux/pci.h +++ b/include/linux/pci.h @@ -1841,10 +1841,12 @@ int pci_vpd_find_info_keyword(const u8 *buf, unsigned int off, /* PCI <-> OF binding helpers */ #ifdef CONFIG_OF struct device_node; +struct irq_domain; void pci_set_of_node(struct pci_dev *dev); void pci_release_of_node(struct pci_dev *dev); void pci_set_bus_of_node(struct pci_bus *bus); void pci_release_bus_of_node(struct pci_bus *bus); +struct irq_domain *pci_host_bridge_of_msi_domain(struct pci_bus *bus); /* Arch may override this (weak) */ struct device_node *pcibios_get_phb_of_node(struct pci_bus *bus); @@ -1867,6 +1869,8 @@ static inline void pci_set_bus_of_node(struct pci_bus *bus) { } static inline void pci_release_bus_of_node(struct pci_bus *bus) { } static inline struct device_node * pci_device_to_OF_node(const struct pci_dev *pdev) { return NULL; } +static inline struct irq_domain * +pci_host_bridge_of_msi_domain(struct pci_bus *bus) { return NULL; } #endif /* CONFIG_OF */ #ifdef CONFIG_EEH -- cgit v1.2.3-70-g09d2 From c706c239af5bc297b5fbf1adc715632e1c222f7a Mon Sep 17 00:00:00 2001 From: Marc Zyngier Date: Tue, 28 Jul 2015 14:46:15 +0100 Subject: of/platform: Assign MSI domain to platform device As for PCI, we're able to populate the msi_domain field at probe time, provided that the device tree has an "msi-parent" property. Signed-off-by: Marc Zyngier Cc: Cc: Yijing Wang Cc: Ma Jun Cc: Lorenzo Pieralisi Cc: Duc Dang Cc: Hanjun Guo Cc: Bjorn Helgaas Cc: Jiang Liu Cc: Jason Cooper Link: http://lkml.kernel.org/r/1438091186-10244-9-git-send-email-marc.zyngier@arm.com Signed-off-by: Thomas Gleixner --- drivers/of/irq.c | 21 +++++++++++++++++++++ drivers/of/platform.c | 1 + include/linux/irqdomain.h | 1 + include/linux/of_irq.h | 1 + 4 files changed, 24 insertions(+) (limited to 'include/linux') diff --git a/drivers/of/irq.c b/drivers/of/irq.c index 3cf7a01f557f..2956d725649f 100644 --- a/drivers/of/irq.c +++ b/drivers/of/irq.c @@ -18,6 +18,7 @@ * driver. */ +#include #include #include #include @@ -576,3 +577,23 @@ err: kfree(desc); } } + +/** + * of_msi_configure - Set the msi_domain field of a device + * @dev: device structure to associate with an MSI irq domain + * @np: device node for that device + */ +void of_msi_configure(struct device *dev, struct device_node *np) +{ + struct device_node *msi_np; + struct irq_domain *d; + + msi_np = of_parse_phandle(np, "msi-parent", 0); + if (!msi_np) + return; + + d = irq_find_matching_host(msi_np, DOMAIN_BUS_PLATFORM_MSI); + if (!d) + d = irq_find_host(msi_np); + dev_set_msi_domain(dev, d); +} diff --git a/drivers/of/platform.c b/drivers/of/platform.c index ddf8e42c9367..8a002d6151f2 100644 --- a/drivers/of/platform.c +++ b/drivers/of/platform.c @@ -184,6 +184,7 @@ static struct platform_device *of_platform_device_create_pdata( dev->dev.bus = &platform_bus_type; dev->dev.platform_data = platform_data; of_dma_configure(&dev->dev, dev->dev.of_node); + of_msi_configure(&dev->dev, dev->dev.of_node); if (of_device_add(dev) != 0) { of_dma_deconfigure(&dev->dev); diff --git a/include/linux/irqdomain.h b/include/linux/irqdomain.h index 25e9e6696a65..b4a74f73a0c3 100644 --- a/include/linux/irqdomain.h +++ b/include/linux/irqdomain.h @@ -55,6 +55,7 @@ struct irq_data; enum irq_domain_bus_token { DOMAIN_BUS_ANY = 0, DOMAIN_BUS_PCI_MSI, + DOMAIN_BUS_PLATFORM_MSI, }; /** diff --git a/include/linux/of_irq.h b/include/linux/of_irq.h index d884929a7747..4bcbd586a672 100644 --- a/include/linux/of_irq.h +++ b/include/linux/of_irq.h @@ -74,6 +74,7 @@ static inline int of_irq_to_resource_table(struct device_node *dev, */ extern unsigned int irq_of_parse_and_map(struct device_node *node, int index); extern struct device_node *of_irq_find_parent(struct device_node *child); +extern void of_msi_configure(struct device *dev, struct device_node *np); #else /* !CONFIG_OF */ static inline unsigned int irq_of_parse_and_map(struct device_node *dev, -- cgit v1.2.3-70-g09d2 From c09fcc4b2b48d58d769a8cff5041533535ece449 Mon Sep 17 00:00:00 2001 From: Marc Zyngier Date: Tue, 28 Jul 2015 14:46:16 +0100 Subject: drivers/base: Add MSI domain support for non-PCI devices With the msi_list and the msi_domain properties now being at the generic device level, it is starting to be relatively easy to offer a generic way of providing non-PCI MSIs. The two major hurdles with this idea are: - Lack of global ID that identifies a device: this is worked around by having a global ID allocator for each device that gets enrolled in the platform MSI subsystem - Lack of standard way to write the message in the generating device. This is solved by mandating driver code to provide a write_msg callback, so that everyone can have their own square wheel Apart from that, the API is fairly straightforward: - platform_msi_create_irq_domain creates an MSI domain that gets tagged with DOMAIN_BUS_PLATFORM_MSI - platform_msi_domain_alloc_irqs allocate MSIs for a given device, populating the msi_list - platform_msi_domain_free_irqs does what is written on the tin [ tglx: Created a seperate struct platform_msi_desc and added kerneldoc entries ] Signed-off-by: Marc Zyngier Cc: Cc: Yijing Wang Cc: Ma Jun Cc: Lorenzo Pieralisi Cc: Duc Dang Cc: Hanjun Guo Cc: Bjorn Helgaas Cc: Jiang Liu Cc: Jason Cooper Link: http://lkml.kernel.org/r/1438091186-10244-10-git-send-email-marc.zyngier@arm.com Signed-off-by: Thomas Gleixner --- drivers/base/Makefile | 1 + drivers/base/platform-msi.c | 282 ++++++++++++++++++++++++++++++++++++++++++++ include/linux/msi.h | 22 ++++ 3 files changed, 305 insertions(+) create mode 100644 drivers/base/platform-msi.c (limited to 'include/linux') diff --git a/drivers/base/Makefile b/drivers/base/Makefile index 527d291706e8..6b2a84e7f2be 100644 --- a/drivers/base/Makefile +++ b/drivers/base/Makefile @@ -22,6 +22,7 @@ obj-$(CONFIG_REGMAP) += regmap/ obj-$(CONFIG_SOC_BUS) += soc.o obj-$(CONFIG_PINCTRL) += pinctrl.o obj-$(CONFIG_DEV_COREDUMP) += devcoredump.o +obj-$(CONFIG_GENERIC_MSI_IRQ_DOMAIN) += platform-msi.o ccflags-$(CONFIG_DEBUG_DRIVER) := -DDEBUG diff --git a/drivers/base/platform-msi.c b/drivers/base/platform-msi.c new file mode 100644 index 000000000000..1857a5dd0816 --- /dev/null +++ b/drivers/base/platform-msi.c @@ -0,0 +1,282 @@ +/* + * MSI framework for platform devices + * + * Copyright (C) 2015 ARM Limited, All Rights Reserved. + * Author: Marc Zyngier + * + * This program is free software; you can redistribute it and/or modify + * it under the terms of the GNU General Public License version 2 as + * published by the Free Software Foundation. + * + * This program is distributed in the hope that it will be useful, + * but WITHOUT ANY WARRANTY; without even the implied warranty of + * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the + * GNU General Public License for more details. + * + * You should have received a copy of the GNU General Public License + * along with this program. If not, see . + */ + +#include +#include +#include +#include +#include +#include + +#define DEV_ID_SHIFT 24 + +/* + * Internal data structure containing a (made up, but unique) devid + * and the callback to write the MSI message. + */ +struct platform_msi_priv_data { + irq_write_msi_msg_t write_msg; + int devid; +}; + +/* The devid allocator */ +static DEFINE_IDA(platform_msi_devid_ida); + +#ifdef GENERIC_MSI_DOMAIN_OPS +/* + * Convert an msi_desc to a globaly unique identifier (per-device + * devid + msi_desc position in the msi_list). + */ +static irq_hw_number_t platform_msi_calc_hwirq(struct msi_desc *desc) +{ + u32 devid; + + devid = desc->platform.msi_priv_data->devid; + + return (devid << (32 - DEV_ID_SHIFT)) | desc->platform.msi_index; +} + +static void platform_msi_set_desc(msi_alloc_info_t *arg, struct msi_desc *desc) +{ + arg->desc = desc; + arg->hwirq = platform_msi_calc_hwirq(desc); +} + +static int platform_msi_init(struct irq_domain *domain, + struct msi_domain_info *info, + unsigned int virq, irq_hw_number_t hwirq, + msi_alloc_info_t *arg) +{ + struct irq_data *data; + + irq_domain_set_hwirq_and_chip(domain, virq, hwirq, + info->chip, info->chip_data); + + /* + * Save the MSI descriptor in handler_data so that the + * irq_write_msi_msg callback can retrieve it (and the + * associated device). + */ + data = irq_domain_get_irq_data(domain, virq); + data->handler_data = arg->desc; + + return 0; +} +#else +#define platform_msi_set_desc NULL +#define platform_msi_init NULL +#endif + +static void platform_msi_update_dom_ops(struct msi_domain_info *info) +{ + struct msi_domain_ops *ops = info->ops; + + BUG_ON(!ops); + + if (ops->msi_init == NULL) + ops->msi_init = platform_msi_init; + if (ops->set_desc == NULL) + ops->set_desc = platform_msi_set_desc; +} + +static void platform_msi_write_msg(struct irq_data *data, struct msi_msg *msg) +{ + struct msi_desc *desc = irq_data_get_irq_handler_data(data); + struct platform_msi_priv_data *priv_data; + + priv_data = desc->platform.msi_priv_data; + + priv_data->write_msg(desc, msg); +} + +static void platform_msi_update_chip_ops(struct msi_domain_info *info) +{ + struct irq_chip *chip = info->chip; + + BUG_ON(!chip); + if (!chip->irq_mask) + chip->irq_mask = irq_chip_mask_parent; + if (!chip->irq_unmask) + chip->irq_unmask = irq_chip_unmask_parent; + if (!chip->irq_eoi) + chip->irq_eoi = irq_chip_eoi_parent; + if (!chip->irq_set_affinity) + chip->irq_set_affinity = msi_domain_set_affinity; + if (!chip->irq_write_msi_msg) + chip->irq_write_msi_msg = platform_msi_write_msg; +} + +static void platform_msi_free_descs(struct device *dev) +{ + struct msi_desc *desc, *tmp; + + list_for_each_entry_safe(desc, tmp, dev_to_msi_list(dev), list) { + list_del(&desc->list); + free_msi_entry(desc); + } +} + +static int platform_msi_alloc_descs(struct device *dev, int nvec, + struct platform_msi_priv_data *data) + +{ + int i; + + for (i = 0; i < nvec; i++) { + struct msi_desc *desc; + + desc = alloc_msi_entry(dev); + if (!desc) + break; + + desc->platform.msi_priv_data = data; + desc->platform.msi_index = i; + desc->nvec_used = 1; + + list_add_tail(&desc->list, dev_to_msi_list(dev)); + } + + if (i != nvec) { + /* Clean up the mess */ + platform_msi_free_descs(dev); + + return -ENOMEM; + } + + return 0; +} + +/** + * platform_msi_create_irq_domain - Create a platform MSI interrupt domain + * @np: Optional device-tree node of the interrupt controller + * @info: MSI domain info + * @parent: Parent irq domain + * + * Updates the domain and chip ops and creates a platform MSI + * interrupt domain. + * + * Returns: + * A domain pointer or NULL in case of failure. + */ +struct irq_domain *platform_msi_create_irq_domain(struct device_node *np, + struct msi_domain_info *info, + struct irq_domain *parent) +{ + struct irq_domain *domain; + + if (info->flags & MSI_FLAG_USE_DEF_DOM_OPS) + platform_msi_update_dom_ops(info); + if (info->flags & MSI_FLAG_USE_DEF_CHIP_OPS) + platform_msi_update_chip_ops(info); + + domain = msi_create_irq_domain(np, info, parent); + if (domain) + domain->bus_token = DOMAIN_BUS_PLATFORM_MSI; + + return domain; +} + +/** + * platform_msi_domain_alloc_irqs - Allocate MSI interrupts for @dev + * @dev: The device for which to allocate interrupts + * @nvec: The number of interrupts to allocate + * @write_msi_msg: Callback to write an interrupt message for @dev + * + * Returns: + * Zero for success, or an error code in case of failure + */ +int platform_msi_domain_alloc_irqs(struct device *dev, unsigned int nvec, + irq_write_msi_msg_t write_msi_msg) +{ + struct platform_msi_priv_data *priv_data; + int err; + + /* + * Limit the number of interrupts to 256 per device. Should we + * need to bump this up, DEV_ID_SHIFT should be adjusted + * accordingly (which would impact the max number of MSI + * capable devices). + */ + if (!dev->msi_domain || !write_msi_msg || !nvec || + nvec > (1 << (32 - DEV_ID_SHIFT))) + return -EINVAL; + + if (dev->msi_domain->bus_token != DOMAIN_BUS_PLATFORM_MSI) { + dev_err(dev, "Incompatible msi_domain, giving up\n"); + return -EINVAL; + } + + /* Already had a helping of MSI? Greed... */ + if (!list_empty(dev_to_msi_list(dev))) + return -EBUSY; + + priv_data = kzalloc(sizeof(*priv_data), GFP_KERNEL); + if (!priv_data) + return -ENOMEM; + + priv_data->devid = ida_simple_get(&platform_msi_devid_ida, + 0, 1 << DEV_ID_SHIFT, GFP_KERNEL); + if (priv_data->devid < 0) { + err = priv_data->devid; + goto out_free_data; + } + + priv_data->write_msg = write_msi_msg; + + err = platform_msi_alloc_descs(dev, nvec, priv_data); + if (err) + goto out_free_id; + + err = msi_domain_alloc_irqs(dev->msi_domain, dev, nvec); + if (err) + goto out_free_desc; + + return 0; + +out_free_desc: + platform_msi_free_descs(dev); +out_free_id: + ida_simple_remove(&platform_msi_devid_ida, priv_data->devid); +out_free_data: + kfree(priv_data); + + return err; +} + +/** + * platform_msi_domain_free_irqs - Free MSI interrupts for @dev + * @dev: The device for which to free interrupts + */ +void platform_msi_domain_free_irqs(struct device *dev) +{ + struct msi_desc *desc; + + desc = first_msi_entry(dev); + if (desc) { + struct platform_msi_priv_data *data; + + data = desc->platform.msi_priv_data; + + ida_simple_remove(&platform_msi_devid_ida, data->devid); + kfree(data); + } + + msi_domain_free_irqs(dev->msi_domain, dev); + platform_msi_free_descs(dev); +} diff --git a/include/linux/msi.h b/include/linux/msi.h index f83c87e447bc..809b749f9300 100644 --- a/include/linux/msi.h +++ b/include/linux/msi.h @@ -15,9 +15,23 @@ extern int pci_msi_ignore_mask; struct irq_data; struct msi_desc; struct pci_dev; +struct platform_msi_priv_data; void __get_cached_msi_msg(struct msi_desc *entry, struct msi_msg *msg); void get_cached_msi_msg(unsigned int irq, struct msi_msg *msg); +typedef void (*irq_write_msi_msg_t)(struct msi_desc *desc, + struct msi_msg *msg); + +/** + * platform_msi_desc - Platform device specific msi descriptor data + * @msi_priv_data: Pointer to platform private data + * @msi_index: The index of the MSI descriptor for multi MSI + */ +struct platform_msi_desc { + struct platform_msi_priv_data *msi_priv_data; + u16 msi_index; +}; + /** * struct msi_desc - Descriptor structure for MSI based interrupts * @list: List head for management @@ -36,6 +50,7 @@ void get_cached_msi_msg(unsigned int irq, struct msi_msg *msg); * @default_irq:[PCI MSI/X] The default pre-assigned non-MSI irq * @mask_pos: [PCI MSI] Mask register position * @mask_base: [PCI MSI-X] Mask register base address + * @platform: [platform] Platform device specific msi descriptor data */ struct msi_desc { /* Shared device/bus type independent data */ @@ -71,6 +86,7 @@ struct msi_desc { * anonymous for now as it would require an immediate * tree wide cleanup. */ + struct platform_msi_desc platform; }; }; @@ -257,6 +273,12 @@ int msi_domain_alloc_irqs(struct irq_domain *domain, struct device *dev, void msi_domain_free_irqs(struct irq_domain *domain, struct device *dev); struct msi_domain_info *msi_get_domain_info(struct irq_domain *domain); +struct irq_domain *platform_msi_create_irq_domain(struct device_node *np, + struct msi_domain_info *info, + struct irq_domain *parent); +int platform_msi_domain_alloc_irqs(struct device *dev, unsigned int nvec, + irq_write_msi_msg_t write_msi_msg); +void platform_msi_domain_free_irqs(struct device *dev); #endif /* CONFIG_GENERIC_MSI_IRQ_DOMAIN */ #ifdef CONFIG_PCI_MSI_IRQ_DOMAIN -- cgit v1.2.3-70-g09d2 From a5716070d88cba1a0a8a18fea809ea6e3374e276 Mon Sep 17 00:00:00 2001 From: Marc Zyngier Date: Tue, 28 Jul 2015 14:46:17 +0100 Subject: genirq: Add DOMAIN_BUS_NEXUS irqdomain property Some IRQ domains are not designed to directly provide interrupts to devices, but strictly to be used by other domains. An example of this is the GICv3 ITS, which is completely bus agnostic, and on which it is possible to implement a PCI/MSI domain. Just introduce the irq_domain_bus_token property for now. Signed-off-by: Marc Zyngier Cc: Cc: Yijing Wang Cc: Ma Jun Cc: Lorenzo Pieralisi Cc: Duc Dang Cc: Hanjun Guo Cc: Bjorn Helgaas Cc: Jiang Liu Cc: Jason Cooper Link: http://lkml.kernel.org/r/1438091186-10244-11-git-send-email-marc.zyngier@arm.com Signed-off-by: Thomas Gleixner --- include/linux/irqdomain.h | 1 + 1 file changed, 1 insertion(+) (limited to 'include/linux') diff --git a/include/linux/irqdomain.h b/include/linux/irqdomain.h index b4a74f73a0c3..d3ca79236fb0 100644 --- a/include/linux/irqdomain.h +++ b/include/linux/irqdomain.h @@ -56,6 +56,7 @@ enum irq_domain_bus_token { DOMAIN_BUS_ANY = 0, DOMAIN_BUS_PCI_MSI, DOMAIN_BUS_PLATFORM_MSI, + DOMAIN_BUS_NEXUS, }; /** -- cgit v1.2.3-70-g09d2 From f130420e51df30891b55efcef24f5358b2fc2b97 Mon Sep 17 00:00:00 2001 From: Marc Zyngier Date: Tue, 28 Jul 2015 14:46:18 +0100 Subject: irqchip/gicv3-its: Split PCI/MSI code from the core ITS driver It is becoming obvious that having the PCI/MSI code in the same file as the the core ITS code is giving people implementing non-PCI MSI support the wrong kind of idea. In order to make things a bit clearer, let's move the PCI/MSI code out to its own file. Hopefully it will make it clear that whoever thinks of hooking into the core ITS better have a very strong point. We use a temporary entry point that will get removed in a subsequent patch, once the proper infrastructure is added. Signed-off-by: Marc Zyngier Cc: Cc: Yijing Wang Cc: Ma Jun Cc: Lorenzo Pieralisi Cc: Duc Dang Cc: Hanjun Guo Cc: Bjorn Helgaas Cc: Jiang Liu Cc: Jason Cooper Link: http://lkml.kernel.org/r/1438091186-10244-12-git-send-email-marc.zyngier@arm.com Signed-off-by: Thomas Gleixner --- drivers/irqchip/Makefile | 2 +- drivers/irqchip/irq-gic-v3-its-pci-msi.c | 105 +++++++++++++++++++++++++++++++ drivers/irqchip/irq-gic-v3-its.c | 94 ++++----------------------- include/linux/irqchip/arm-gic-v3.h | 6 ++ 4 files changed, 123 insertions(+), 84 deletions(-) create mode 100644 drivers/irqchip/irq-gic-v3-its-pci-msi.c (limited to 'include/linux') diff --git a/drivers/irqchip/Makefile b/drivers/irqchip/Makefile index b8d4e9691890..0d5f2a98a6ef 100644 --- a/drivers/irqchip/Makefile +++ b/drivers/irqchip/Makefile @@ -22,7 +22,7 @@ obj-$(CONFIG_ARCH_SPEAR3XX) += spear-shirq.o obj-$(CONFIG_ARM_GIC) += irq-gic.o irq-gic-common.o obj-$(CONFIG_ARM_GIC_V2M) += irq-gic-v2m.o obj-$(CONFIG_ARM_GIC_V3) += irq-gic-v3.o irq-gic-common.o -obj-$(CONFIG_ARM_GIC_V3_ITS) += irq-gic-v3-its.o +obj-$(CONFIG_ARM_GIC_V3_ITS) += irq-gic-v3-its.o irq-gic-v3-its-pci-msi.o obj-$(CONFIG_ARM_NVIC) += irq-nvic.o obj-$(CONFIG_ARM_VIC) += irq-vic.o obj-$(CONFIG_ATMEL_AIC_IRQ) += irq-atmel-aic-common.o irq-atmel-aic.o diff --git a/drivers/irqchip/irq-gic-v3-its-pci-msi.c b/drivers/irqchip/irq-gic-v3-its-pci-msi.c new file mode 100644 index 000000000000..76147219da7f --- /dev/null +++ b/drivers/irqchip/irq-gic-v3-its-pci-msi.c @@ -0,0 +1,105 @@ +/* + * Copyright (C) 2013-2015 ARM Limited, All Rights Reserved. + * Author: Marc Zyngier + * + * This program is free software; you can redistribute it and/or modify + * it under the terms of the GNU General Public License version 2 as + * published by the Free Software Foundation. + * + * This program is distributed in the hope that it will be useful, + * but WITHOUT ANY WARRANTY; without even the implied warranty of + * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the + * GNU General Public License for more details. + * + * You should have received a copy of the GNU General Public License + * along with this program. If not, see . + */ + +#include +#include +#include +#include + +#include + +static void its_mask_msi_irq(struct irq_data *d) +{ + pci_msi_mask_irq(d); + irq_chip_mask_parent(d); +} + +static void its_unmask_msi_irq(struct irq_data *d) +{ + pci_msi_unmask_irq(d); + irq_chip_unmask_parent(d); +} + +static struct irq_chip its_msi_irq_chip = { + .name = "ITS-MSI", + .irq_unmask = its_unmask_msi_irq, + .irq_mask = its_mask_msi_irq, + .irq_eoi = irq_chip_eoi_parent, + .irq_write_msi_msg = pci_msi_domain_write_msg, +}; + +struct its_pci_alias { + struct pci_dev *pdev; + u32 dev_id; + u32 count; +}; + +static int its_pci_msi_vec_count(struct pci_dev *pdev) +{ + int msi, msix; + + msi = max(pci_msi_vec_count(pdev), 0); + msix = max(pci_msix_vec_count(pdev), 0); + + return max(msi, msix); +} + +static int its_get_pci_alias(struct pci_dev *pdev, u16 alias, void *data) +{ + struct its_pci_alias *dev_alias = data; + + dev_alias->dev_id = alias; + if (pdev != dev_alias->pdev) + dev_alias->count += its_pci_msi_vec_count(dev_alias->pdev); + + return 0; +} + +static int its_pci_msi_prepare(struct irq_domain *domain, struct device *dev, + int nvec, msi_alloc_info_t *info) +{ + struct pci_dev *pdev; + struct its_pci_alias dev_alias; + + if (!dev_is_pci(dev)) + return -EINVAL; + + pdev = to_pci_dev(dev); + dev_alias.pdev = pdev; + dev_alias.count = nvec; + + pci_for_each_dma_alias(pdev, its_get_pci_alias, &dev_alias); + + return its_msi_prepare(domain, dev_alias.dev_id, dev_alias.count, info); +} + +static struct msi_domain_ops its_pci_msi_ops = { + .msi_prepare = its_pci_msi_prepare, +}; + +static struct msi_domain_info its_pci_msi_domain_info = { + .flags = (MSI_FLAG_USE_DEF_DOM_OPS | MSI_FLAG_USE_DEF_CHIP_OPS | + MSI_FLAG_MULTI_PCI_MSI | MSI_FLAG_PCI_MSIX), + .ops = &its_pci_msi_ops, + .chip = &its_msi_irq_chip, +}; + +struct irq_domain *its_pci_msi_alloc_domain(struct device_node *np, + struct irq_domain *parent) +{ + return pci_msi_create_irq_domain(np, &its_pci_msi_domain_info, parent); +} diff --git a/drivers/irqchip/irq-gic-v3-its.c b/drivers/irqchip/irq-gic-v3-its.c index 1df956afb937..7f995d876029 100644 --- a/drivers/irqchip/irq-gic-v3-its.c +++ b/drivers/irqchip/irq-gic-v3-its.c @@ -642,26 +642,6 @@ static struct irq_chip its_irq_chip = { .irq_compose_msi_msg = its_irq_compose_msi_msg, }; -static void its_mask_msi_irq(struct irq_data *d) -{ - pci_msi_mask_irq(d); - irq_chip_mask_parent(d); -} - -static void its_unmask_msi_irq(struct irq_data *d) -{ - pci_msi_unmask_irq(d); - irq_chip_unmask_parent(d); -} - -static struct irq_chip its_msi_irq_chip = { - .name = "ITS-MSI", - .irq_unmask = its_unmask_msi_irq, - .irq_mask = its_mask_msi_irq, - .irq_eoi = irq_chip_eoi_parent, - .irq_write_msi_msg = pci_msi_domain_write_msg, -}; - /* * How we allocate LPIs: * @@ -1208,85 +1188,34 @@ static int its_alloc_device_irq(struct its_device *dev, irq_hw_number_t *hwirq) return 0; } -struct its_pci_alias { - struct pci_dev *pdev; - u32 dev_id; - u32 count; -}; - -static int its_pci_msi_vec_count(struct pci_dev *pdev) -{ - int msi, msix; - - msi = max(pci_msi_vec_count(pdev), 0); - msix = max(pci_msix_vec_count(pdev), 0); - - return max(msi, msix); -} - -static int its_get_pci_alias(struct pci_dev *pdev, u16 alias, void *data) +int its_msi_prepare(struct irq_domain *domain, u32 dev_id, + int nvec, msi_alloc_info_t *info) { - struct its_pci_alias *dev_alias = data; - - dev_alias->dev_id = alias; - if (pdev != dev_alias->pdev) - dev_alias->count += its_pci_msi_vec_count(dev_alias->pdev); - - return 0; -} - -static int its_msi_prepare(struct irq_domain *domain, struct device *dev, - int nvec, msi_alloc_info_t *info) -{ - struct pci_dev *pdev; struct its_node *its; struct its_device *its_dev; - struct its_pci_alias dev_alias; - - if (!dev_is_pci(dev)) - return -EINVAL; - - pdev = to_pci_dev(dev); - dev_alias.pdev = pdev; - dev_alias.count = nvec; - pci_for_each_dma_alias(pdev, its_get_pci_alias, &dev_alias); its = domain->parent->host_data; - - its_dev = its_find_device(its, dev_alias.dev_id); + its_dev = its_find_device(its, dev_id); if (its_dev) { /* * We already have seen this ID, probably through * another alias (PCI bridge of some sort). No need to * create the device. */ - dev_dbg(dev, "Reusing ITT for devID %x\n", dev_alias.dev_id); + pr_debug("Reusing ITT for devID %x\n", dev_id); goto out; } - its_dev = its_create_device(its, dev_alias.dev_id, dev_alias.count); + its_dev = its_create_device(its, dev_id, nvec); if (!its_dev) return -ENOMEM; - dev_dbg(&pdev->dev, "ITT %d entries, %d bits\n", - dev_alias.count, ilog2(dev_alias.count)); + pr_debug("ITT %d entries, %d bits\n", nvec, ilog2(nvec)); out: info->scratchpad[0].ptr = its_dev; - info->scratchpad[1].ptr = dev; return 0; } -static struct msi_domain_ops its_pci_msi_ops = { - .msi_prepare = its_msi_prepare, -}; - -static struct msi_domain_info its_pci_msi_domain_info = { - .flags = (MSI_FLAG_USE_DEF_DOM_OPS | MSI_FLAG_USE_DEF_CHIP_OPS | - MSI_FLAG_MULTI_PCI_MSI | MSI_FLAG_PCI_MSIX), - .ops = &its_pci_msi_ops, - .chip = &its_msi_irq_chip, -}; - static int its_irq_gic_domain_alloc(struct irq_domain *domain, unsigned int virq, irq_hw_number_t hwirq) @@ -1322,9 +1251,9 @@ static int its_irq_domain_alloc(struct irq_domain *domain, unsigned int virq, irq_domain_set_hwirq_and_chip(domain, virq + i, hwirq, &its_irq_chip, its_dev); - dev_dbg(info->scratchpad[1].ptr, "ID:%d pID:%d vID:%d\n", - (int)(hwirq - its_dev->event_map.lpi_base), - (int)hwirq, virq + i); + pr_debug("ID:%d pID:%d vID:%d\n", + (int)(hwirq - its_dev->event_map.lpi_base), + (int) hwirq, virq + i); } return 0; @@ -1523,9 +1452,8 @@ static int its_probe(struct device_node *node, struct irq_domain *parent) its->domain->parent = parent; - its->msi_chip.domain = pci_msi_create_irq_domain(node, - &its_pci_msi_domain_info, - its->domain); + its->msi_chip.domain = its_pci_msi_alloc_domain(node, + its->domain); if (!its->msi_chip.domain) { err = -ENOMEM; goto out_free_domains; diff --git a/include/linux/irqchip/arm-gic-v3.h b/include/linux/irqchip/arm-gic-v3.h index ffbc034c8810..d6149baaf643 100644 --- a/include/linux/irqchip/arm-gic-v3.h +++ b/include/linux/irqchip/arm-gic-v3.h @@ -360,6 +360,7 @@ #ifndef __ASSEMBLY__ #include +#include /* * We need a value to serve as a irq-type for LPIs. Choose one that will @@ -388,6 +389,11 @@ struct irq_domain; int its_cpu_init(void); int its_init(struct device_node *node, struct rdists *rdists, struct irq_domain *domain); +int its_msi_prepare(struct irq_domain *domain, u32 dev_id, + int nvec, msi_alloc_info_t *info); + +struct irq_domain *its_pci_msi_alloc_domain(struct device_node *node, + struct irq_domain *parent); #endif -- cgit v1.2.3-70-g09d2 From 54456db9a23753b87ce4d49adabe7da853bf13a2 Mon Sep 17 00:00:00 2001 From: Marc Zyngier Date: Tue, 28 Jul 2015 14:46:21 +0100 Subject: irqchip/gicv3-its: Make the PCI/MSI code standalone We can now lookup the base ITS domain, making it possible to initialize the PCI/MSI code independently from the main ITS subsystem. This allows us to remove all the previously add hooks. Signed-off-by: Marc Zyngier Cc: Cc: Yijing Wang Cc: Ma Jun Cc: Lorenzo Pieralisi Cc: Duc Dang Cc: Hanjun Guo Cc: Bjorn Helgaas Cc: Jiang Liu Cc: Jason Cooper Link: http://lkml.kernel.org/r/1438091186-10244-15-git-send-email-marc.zyngier@arm.com Signed-off-by: Thomas Gleixner --- drivers/irqchip/irq-gic-v3-its-pci-msi.c | 47 +++++++++++++++++++++++++++---- drivers/irqchip/irq-gic-v3-its.c | 48 +++++++++++++++++++++----------- include/linux/irqchip/arm-gic-v3.h | 5 ---- 3 files changed, 73 insertions(+), 27 deletions(-) (limited to 'include/linux') diff --git a/drivers/irqchip/irq-gic-v3-its-pci-msi.c b/drivers/irqchip/irq-gic-v3-its-pci-msi.c index 76147219da7f..cf351c637464 100644 --- a/drivers/irqchip/irq-gic-v3-its-pci-msi.c +++ b/drivers/irqchip/irq-gic-v3-its-pci-msi.c @@ -20,8 +20,6 @@ #include #include -#include - static void its_mask_msi_irq(struct irq_data *d) { pci_msi_mask_irq(d); @@ -74,17 +72,24 @@ static int its_pci_msi_prepare(struct irq_domain *domain, struct device *dev, { struct pci_dev *pdev; struct its_pci_alias dev_alias; + struct msi_domain_info *msi_info; if (!dev_is_pci(dev)) return -EINVAL; + msi_info = msi_get_domain_info(domain->parent); + pdev = to_pci_dev(dev); dev_alias.pdev = pdev; dev_alias.count = nvec; pci_for_each_dma_alias(pdev, its_get_pci_alias, &dev_alias); - return its_msi_prepare(domain, dev_alias.dev_id, dev_alias.count, info); + /* ITS specific DeviceID, as the core ITS ignores dev. */ + info->scratchpad[0].ul = dev_alias.dev_id; + + return msi_info->ops->msi_prepare(domain->parent, + dev, dev_alias.count, info); } static struct msi_domain_ops its_pci_msi_ops = { @@ -98,8 +103,38 @@ static struct msi_domain_info its_pci_msi_domain_info = { .chip = &its_msi_irq_chip, }; -struct irq_domain *its_pci_msi_alloc_domain(struct device_node *np, - struct irq_domain *parent) +static struct of_device_id its_device_id[] = { + { .compatible = "arm,gic-v3-its", }, + {}, +}; + +static int __init its_pci_msi_init(void) { - return pci_msi_create_irq_domain(np, &its_pci_msi_domain_info, parent); + struct device_node *np; + struct irq_domain *parent; + + for (np = of_find_matching_node(NULL, its_device_id); np; + np = of_find_matching_node(np, its_device_id)) { + if (!of_property_read_bool(np, "msi-controller")) + continue; + + parent = irq_find_matching_host(np, DOMAIN_BUS_NEXUS); + if (!parent || !msi_get_domain_info(parent)) { + pr_err("%s: unable to locate ITS domain\n", + np->full_name); + continue; + } + + if (!pci_msi_create_irq_domain(np, &its_pci_msi_domain_info, + parent)) { + pr_err("%s: unable to create PCI domain\n", + np->full_name); + continue; + } + + pr_info("PCI/MSI: %s domain created\n", np->full_name); + } + + return 0; } +early_initcall(its_pci_msi_init); diff --git a/drivers/irqchip/irq-gic-v3-its.c b/drivers/irqchip/irq-gic-v3-its.c index dc4fbbfa0212..26b55c53755f 100644 --- a/drivers/irqchip/irq-gic-v3-its.c +++ b/drivers/irqchip/irq-gic-v3-its.c @@ -59,7 +59,6 @@ struct its_collection { struct its_node { raw_spinlock_t lock; struct list_head entry; - struct irq_domain *domain; void __iomem *base; unsigned long phys_base; struct its_cmd_block *cmd_base; @@ -1187,13 +1186,25 @@ static int its_alloc_device_irq(struct its_device *dev, irq_hw_number_t *hwirq) return 0; } -int its_msi_prepare(struct irq_domain *domain, u32 dev_id, - int nvec, msi_alloc_info_t *info) +static int its_msi_prepare(struct irq_domain *domain, struct device *dev, + int nvec, msi_alloc_info_t *info) { struct its_node *its; struct its_device *its_dev; + struct msi_domain_info *msi_info; + u32 dev_id; + + /* + * We ignore "dev" entierely, and rely on the dev_id that has + * been passed via the scratchpad. This limits this domain's + * usefulness to upper layers that definitely know that they + * are built on top of the ITS. + */ + dev_id = info->scratchpad[0].ul; + + msi_info = msi_get_domain_info(domain); + its = msi_info->data; - its = domain->parent->host_data; its_dev = its_find_device(its, dev_id); if (its_dev) { /* @@ -1215,6 +1226,10 @@ out: return 0; } +static struct msi_domain_ops its_msi_domain_ops = { + .msi_prepare = its_msi_prepare, +}; + static int its_irq_gic_domain_alloc(struct irq_domain *domain, unsigned int virq, irq_hw_number_t hwirq) @@ -1353,7 +1368,7 @@ static int its_probe(struct device_node *node, struct irq_domain *parent) struct resource res; struct its_node *its; void __iomem *its_base; - struct irq_domain *inner_domain = NULL; + struct irq_domain *inner_domain; u32 val; u64 baser, tmp; int err; @@ -1443,20 +1458,26 @@ static int its_probe(struct device_node *node, struct irq_domain *parent) writel_relaxed(GITS_CTLR_ENABLE, its->base + GITS_CTLR); if (of_property_read_bool(node, "msi-controller")) { + struct msi_domain_info *info; + + info = kzalloc(sizeof(*info), GFP_KERNEL); + if (!info) { + err = -ENOMEM; + goto out_free_tables; + } + inner_domain = irq_domain_add_tree(node, &its_domain_ops, its); if (!inner_domain) { err = -ENOMEM; + kfree(info); goto out_free_tables; } inner_domain->parent = parent; inner_domain->bus_token = DOMAIN_BUS_NEXUS; - - its->domain = its_pci_msi_alloc_domain(node, inner_domain); - if (!its->domain) { - err = -ENOMEM; - goto out_free_domains; - } + info->ops = &its_msi_domain_ops; + info->data = its; + inner_domain->host_data = info; } spin_lock(&its_lock); @@ -1465,11 +1486,6 @@ static int its_probe(struct device_node *node, struct irq_domain *parent) return 0; -out_free_domains: - if (its->domain) - irq_domain_remove(its->domain); - if (inner_domain) - irq_domain_remove(inner_domain); out_free_tables: its_free_tables(its); out_free_cmd: diff --git a/include/linux/irqchip/arm-gic-v3.h b/include/linux/irqchip/arm-gic-v3.h index d6149baaf643..bf982e021fbd 100644 --- a/include/linux/irqchip/arm-gic-v3.h +++ b/include/linux/irqchip/arm-gic-v3.h @@ -389,11 +389,6 @@ struct irq_domain; int its_cpu_init(void); int its_init(struct device_node *node, struct rdists *rdists, struct irq_domain *domain); -int its_msi_prepare(struct irq_domain *domain, u32 dev_id, - int nvec, msi_alloc_info_t *info); - -struct irq_domain *its_pci_msi_alloc_domain(struct device_node *node, - struct irq_domain *parent); #endif -- cgit v1.2.3-70-g09d2 From f075915ac0b11847fcfc8c4d55526a317e71c4d1 Mon Sep 17 00:00:00 2001 From: Marc Zyngier Date: Tue, 28 Jul 2015 14:46:26 +0100 Subject: PCI/MSI: Drop domain field from msi_controller The only three users of that field are not using the msi_controller structure anymore, so drop it altogether. Acked-by: Bjorn Helgaas Signed-off-by: Marc Zyngier Cc: Cc: Yijing Wang Cc: Ma Jun Cc: Lorenzo Pieralisi Cc: Duc Dang Cc: Hanjun Guo Cc: Jiang Liu Cc: Jason Cooper Link: http://lkml.kernel.org/r/1438091186-10244-20-git-send-email-marc.zyngier@arm.com Signed-off-by: Thomas Gleixner --- drivers/pci/msi.c | 3 --- include/linux/msi.h | 3 --- 2 files changed, 6 deletions(-) (limited to 'include/linux') diff --git a/drivers/pci/msi.c b/drivers/pci/msi.c index 460334409794..f70aa0f5cbaf 100644 --- a/drivers/pci/msi.c +++ b/drivers/pci/msi.c @@ -45,9 +45,6 @@ static struct irq_domain *pci_msi_get_domain(struct pci_dev *dev) if (domain) return domain; - if (dev->bus->msi && (domain = dev->bus->msi->domain)) - return domain; - return arch_get_pci_msi_domain(dev); } diff --git a/include/linux/msi.h b/include/linux/msi.h index 809b749f9300..ad939d0ba816 100644 --- a/include/linux/msi.h +++ b/include/linux/msi.h @@ -160,9 +160,6 @@ struct msi_controller { struct device *dev; struct device_node *of_node; struct list_head list; -#ifdef CONFIG_GENERIC_MSI_IRQ_DOMAIN - struct irq_domain *domain; -#endif int (*setup_irq)(struct msi_controller *chip, struct pci_dev *dev, struct msi_desc *desc); -- cgit v1.2.3-70-g09d2 From 72b1e5e4cac72efa6b739b47e41f53e4520b4194 Mon Sep 17 00:00:00 2001 From: Florian Westphal Date: Thu, 23 Jul 2015 16:21:30 +0200 Subject: netfilter: bridge: reduce nf_bridge_info to 32 bytes again We can use union for most of the temporary cruft (original ipv4/ipv6 address, source mac, physoutdev) since they're used during different stages of br netfilter traversal. Also get rid of the last two ->mask users. Shrinks struct from 48 to 32 on 64bit arch. Signed-off-by: Florian Westphal Signed-off-by: Pablo Neira Ayuso --- include/linux/netfilter_bridge.h | 12 +++++++++--- include/linux/skbuff.h | 19 +++++++++++++------ net/bridge/br_netfilter_hooks.c | 14 ++++++-------- net/bridge/br_netfilter_ipv6.c | 2 +- net/ipv4/netfilter/nf_defrag_ipv4.c | 7 ++----- net/ipv6/netfilter/nf_defrag_ipv6_hooks.c | 7 ++----- 6 files changed, 33 insertions(+), 28 deletions(-) (limited to 'include/linux') diff --git a/include/linux/netfilter_bridge.h b/include/linux/netfilter_bridge.h index 6d80fc686323..2437b8a5d7a9 100644 --- a/include/linux/netfilter_bridge.h +++ b/include/linux/netfilter_bridge.h @@ -17,9 +17,6 @@ enum nf_br_hook_priorities { #if IS_ENABLED(CONFIG_BRIDGE_NETFILTER) -#define BRNF_BRIDGED_DNAT 0x02 -#define BRNF_NF_BRIDGE_PREROUTING 0x08 - int br_handle_frame_finish(struct sock *sk, struct sk_buff *skb); static inline void br_drop_fake_rtable(struct sk_buff *skb) @@ -63,8 +60,17 @@ nf_bridge_get_physoutdev(const struct sk_buff *skb) { return skb->nf_bridge ? skb->nf_bridge->physoutdev : NULL; } + +static inline bool nf_bridge_in_prerouting(const struct sk_buff *skb) +{ + return skb->nf_bridge && skb->nf_bridge->in_prerouting; +} #else #define br_drop_fake_rtable(skb) do { } while (0) +static inline bool nf_bridge_in_prerouting(const struct sk_buff *skb) +{ + return false; +} #endif /* CONFIG_BRIDGE_NETFILTER */ #endif diff --git a/include/linux/skbuff.h b/include/linux/skbuff.h index d6cdd6e87d53..ac732e67a6c8 100644 --- a/include/linux/skbuff.h +++ b/include/linux/skbuff.h @@ -173,17 +173,24 @@ struct nf_bridge_info { BRNF_PROTO_8021Q, BRNF_PROTO_PPPOE } orig_proto:8; - bool pkt_otherhost; + u8 pkt_otherhost:1; + u8 in_prerouting:1; + u8 bridged_dnat:1; __u16 frag_max_size; - unsigned int mask; struct net_device *physindev; union { - struct net_device *physoutdev; - char neigh_header[8]; - }; - union { + /* prerouting: detect dnat in orig/reply direction */ __be32 ipv4_daddr; struct in6_addr ipv6_daddr; + + /* after prerouting + nat detected: store original source + * mac since neigh resolution overwrites it, only used while + * skb is out in neigh layer. + */ + char neigh_header[8]; + + /* always valid & non-NULL from FORWARD on, for physdev match */ + struct net_device *physoutdev; }; }; #endif diff --git a/net/bridge/br_netfilter_hooks.c b/net/bridge/br_netfilter_hooks.c index c8b9bcfe997e..ec51c2ba30e9 100644 --- a/net/bridge/br_netfilter_hooks.c +++ b/net/bridge/br_netfilter_hooks.c @@ -284,7 +284,7 @@ int br_nf_pre_routing_finish_bridge(struct sock *sk, struct sk_buff *skb) nf_bridge->neigh_header, ETH_HLEN-ETH_ALEN); /* tell br_dev_xmit to continue with forwarding */ - nf_bridge->mask |= BRNF_BRIDGED_DNAT; + nf_bridge->bridged_dnat = 1; /* FIXME Need to refragment */ ret = neigh->output(neigh, skb); } @@ -356,7 +356,7 @@ static int br_nf_pre_routing_finish(struct sock *sk, struct sk_buff *skb) skb->pkt_type = PACKET_OTHERHOST; nf_bridge->pkt_otherhost = false; } - nf_bridge->mask &= ~BRNF_NF_BRIDGE_PREROUTING; + nf_bridge->in_prerouting = 0; if (br_nf_ipv4_daddr_was_changed(skb, nf_bridge)) { if ((err = ip_route_input(skb, iph->daddr, iph->saddr, iph->tos, dev))) { struct in_device *in_dev = __in_dev_get_rcu(dev); @@ -444,7 +444,7 @@ struct net_device *setup_pre_routing(struct sk_buff *skb) nf_bridge->pkt_otherhost = true; } - nf_bridge->mask |= BRNF_NF_BRIDGE_PREROUTING; + nf_bridge->in_prerouting = 1; nf_bridge->physindev = skb->dev; skb->dev = brnf_get_logical_dev(skb, skb->dev); @@ -850,10 +850,8 @@ static unsigned int ip_sabotage_in(const struct nf_hook_ops *ops, struct sk_buff *skb, const struct nf_hook_state *state) { - if (skb->nf_bridge && - !(skb->nf_bridge->mask & BRNF_NF_BRIDGE_PREROUTING)) { + if (skb->nf_bridge && !skb->nf_bridge->in_prerouting) return NF_STOP; - } return NF_ACCEPT; } @@ -872,7 +870,7 @@ static void br_nf_pre_routing_finish_bridge_slow(struct sk_buff *skb) struct nf_bridge_info *nf_bridge = nf_bridge_info_get(skb); skb_pull(skb, ETH_HLEN); - nf_bridge->mask &= ~BRNF_BRIDGED_DNAT; + nf_bridge->bridged_dnat = 0; BUILD_BUG_ON(sizeof(nf_bridge->neigh_header) != (ETH_HLEN - ETH_ALEN)); @@ -887,7 +885,7 @@ static void br_nf_pre_routing_finish_bridge_slow(struct sk_buff *skb) static int br_nf_dev_xmit(struct sk_buff *skb) { - if (skb->nf_bridge && (skb->nf_bridge->mask & BRNF_BRIDGED_DNAT)) { + if (skb->nf_bridge && skb->nf_bridge->bridged_dnat) { br_nf_pre_routing_finish_bridge_slow(skb); return 1; } diff --git a/net/bridge/br_netfilter_ipv6.c b/net/bridge/br_netfilter_ipv6.c index 13b7d1e3d185..77383bfe7ea3 100644 --- a/net/bridge/br_netfilter_ipv6.c +++ b/net/bridge/br_netfilter_ipv6.c @@ -174,7 +174,7 @@ static int br_nf_pre_routing_finish_ipv6(struct sock *sk, struct sk_buff *skb) skb->pkt_type = PACKET_OTHERHOST; nf_bridge->pkt_otherhost = false; } - nf_bridge->mask &= ~BRNF_NF_BRIDGE_PREROUTING; + nf_bridge->in_prerouting = 0; if (br_nf_ipv6_daddr_was_changed(skb, nf_bridge)) { skb_dst_drop(skb); v6ops->route_input(skb); diff --git a/net/ipv4/netfilter/nf_defrag_ipv4.c b/net/ipv4/netfilter/nf_defrag_ipv4.c index c88b7d434718..b69e82bda215 100644 --- a/net/ipv4/netfilter/nf_defrag_ipv4.c +++ b/net/ipv4/netfilter/nf_defrag_ipv4.c @@ -49,12 +49,9 @@ static enum ip_defrag_users nf_ct_defrag_user(unsigned int hooknum, if (skb->nfct) zone = nf_ct_zone((struct nf_conn *)skb->nfct); #endif - -#if IS_ENABLED(CONFIG_BRIDGE_NETFILTER) - if (skb->nf_bridge && - skb->nf_bridge->mask & BRNF_NF_BRIDGE_PREROUTING) + if (nf_bridge_in_prerouting(skb)) return IP_DEFRAG_CONNTRACK_BRIDGE_IN + zone; -#endif + if (hooknum == NF_INET_PRE_ROUTING) return IP_DEFRAG_CONNTRACK_IN + zone; else diff --git a/net/ipv6/netfilter/nf_defrag_ipv6_hooks.c b/net/ipv6/netfilter/nf_defrag_ipv6_hooks.c index a45db0b4785c..267fb8d5876e 100644 --- a/net/ipv6/netfilter/nf_defrag_ipv6_hooks.c +++ b/net/ipv6/netfilter/nf_defrag_ipv6_hooks.c @@ -39,12 +39,9 @@ static enum ip6_defrag_users nf_ct6_defrag_user(unsigned int hooknum, if (skb->nfct) zone = nf_ct_zone((struct nf_conn *)skb->nfct); #endif - -#if IS_ENABLED(CONFIG_BRIDGE_NETFILTER) - if (skb->nf_bridge && - skb->nf_bridge->mask & BRNF_NF_BRIDGE_PREROUTING) + if (nf_bridge_in_prerouting(skb)) return IP6_DEFRAG_CONNTRACK_BRIDGE_IN + zone; -#endif + if (hooknum == NF_INET_PRE_ROUTING) return IP6_DEFRAG_CONNTRACK_IN + zone; else -- cgit v1.2.3-70-g09d2 From dd489240a21afc3ff3962aba5d987229536cae63 Mon Sep 17 00:00:00 2001 From: Paolo Bonzini Date: Wed, 29 Jul 2015 11:32:20 +0200 Subject: KVM: document memory barriers for kvm->vcpus/kvm->online_vcpus Signed-off-by: Paolo Bonzini --- include/linux/kvm_host.h | 4 ++++ virt/kvm/kvm_main.c | 5 +++++ 2 files changed, 9 insertions(+) (limited to 'include/linux') diff --git a/include/linux/kvm_host.h b/include/linux/kvm_host.h index bd1097a95704..81089cf1f0c1 100644 --- a/include/linux/kvm_host.h +++ b/include/linux/kvm_host.h @@ -427,6 +427,10 @@ struct kvm { static inline struct kvm_vcpu *kvm_get_vcpu(struct kvm *kvm, int i) { + /* Pairs with smp_wmb() in kvm_vm_ioctl_create_vcpu, in case + * the caller has read kvm->online_vcpus before (as is the case + * for kvm_for_each_vcpu, for example). + */ smp_rmb(); return kvm->vcpus[i]; } diff --git a/virt/kvm/kvm_main.c b/virt/kvm/kvm_main.c index 8dc4828f623f..d8db2f8fce9c 100644 --- a/virt/kvm/kvm_main.c +++ b/virt/kvm/kvm_main.c @@ -2206,6 +2206,11 @@ static int kvm_vm_ioctl_create_vcpu(struct kvm *kvm, u32 id) } kvm->vcpus[atomic_read(&kvm->online_vcpus)] = vcpu; + + /* + * Pairs with smp_rmb() in kvm_get_vcpu. Write kvm->vcpus + * before kvm->online_vcpu's incremented value. + */ smp_wmb(); atomic_inc(&kvm->online_vcpus); -- cgit v1.2.3-70-g09d2 From ffd9a0fcbbed300b55f84e8397e96c2edd06cbdf Mon Sep 17 00:00:00 2001 From: Robert Baldyga Date: Tue, 28 Jul 2015 07:19:58 +0200 Subject: usb: gadget: add 'quirk_altset_not_supp' to usb_gadget Due to some UDC controllers may not support altsettings, usb gadget layer needs to provide a generic way to inform gadget functions about non-standard hardware limitations. This patch adds 'quirk_altset_not_supp' field to struct usb_gadget and helper function gadget_is_altset_supported(). It also sets 'quirk_altset_not_supp' to 1 in pxa25x_udc and pxa27x_udc drivers, which have such limitation. Signed-off-by: Robert Baldyga Signed-off-by: Felipe Balbi --- drivers/usb/gadget/udc/pxa25x_udc.c | 1 + drivers/usb/gadget/udc/pxa27x_udc.c | 1 + include/linux/usb/gadget.h | 11 +++++++++++ 3 files changed, 13 insertions(+) (limited to 'include/linux') diff --git a/drivers/usb/gadget/udc/pxa25x_udc.c b/drivers/usb/gadget/udc/pxa25x_udc.c index f6cbe667ce39..27f944231477 100644 --- a/drivers/usb/gadget/udc/pxa25x_udc.c +++ b/drivers/usb/gadget/udc/pxa25x_udc.c @@ -1176,6 +1176,7 @@ static void udc_reinit(struct pxa25x_udc *dev) INIT_LIST_HEAD (&dev->gadget.ep_list); INIT_LIST_HEAD (&dev->gadget.ep0->ep_list); dev->ep0state = EP0_IDLE; + dev->gadget.quirk_altset_not_supp = 1; /* basic endpoint records init */ for (i = 0; i < PXA_UDC_NUM_ENDPOINTS; i++) { diff --git a/drivers/usb/gadget/udc/pxa27x_udc.c b/drivers/usb/gadget/udc/pxa27x_udc.c index 042f06b52677..670ac0b12f00 100644 --- a/drivers/usb/gadget/udc/pxa27x_udc.c +++ b/drivers/usb/gadget/udc/pxa27x_udc.c @@ -1710,6 +1710,7 @@ static void udc_init_data(struct pxa_udc *dev) INIT_LIST_HEAD(&dev->gadget.ep_list); INIT_LIST_HEAD(&dev->gadget.ep0->ep_list); dev->udc_usb_ep[0].pxa_ep = &dev->pxa_ep[0]; + dev->gadget.quirk_altset_not_supp = 1; ep0_idle(dev); /* PXA endpoints init */ diff --git a/include/linux/usb/gadget.h b/include/linux/usb/gadget.h index cea0511a1bc9..31be84b7e645 100644 --- a/include/linux/usb/gadget.h +++ b/include/linux/usb/gadget.h @@ -572,6 +572,7 @@ struct usb_gadget { unsigned a_hnp_support:1; unsigned a_alt_hnp_support:1; unsigned quirk_ep_out_aligned_size:1; + unsigned quirk_altset_not_supp:1; unsigned is_selfpowered:1; unsigned deactivated:1; unsigned connected:1; @@ -609,6 +610,16 @@ usb_ep_align_maybe(struct usb_gadget *g, struct usb_ep *ep, size_t len) round_up(len, (size_t)ep->desc->wMaxPacketSize); } +/** + * gadget_is_altset_supported - return true iff the hardware supports + * altsettings + * @g: controller to check for quirk + */ +static inline int gadget_is_altset_supported(struct usb_gadget *g) +{ + return !g->quirk_altset_not_supp; +} + /** * gadget_is_dualspeed - return true iff the hardware handles high speed * @g: controller that might support both high and full speeds -- cgit v1.2.3-70-g09d2 From 02ded1b0d8e73dad7d2626c960ef20fb7dc30753 Mon Sep 17 00:00:00 2001 From: Robert Baldyga Date: Tue, 28 Jul 2015 07:19:59 +0200 Subject: usb: gadget: add 'quirk_stall_not_supp' to usb_gadget Due to some UDC controllers may not support stalling, usb gadget layer needs to provide a generic way to inform gadget functions about non-standard hardware limitations. This patch adds 'quirk_stall_not_supp' field to struct usb_gadget and helper function gadget_is_stall_supported(). It also sets 'quirk_stall_not_supp' to 1 in at91_udc driver, which has such limitation. Signed-off-by: Robert Baldyga Signed-off-by: Felipe Balbi --- drivers/usb/gadget/udc/at91_udc.c | 1 + include/linux/usb/gadget.h | 10 ++++++++++ 2 files changed, 11 insertions(+) (limited to 'include/linux') diff --git a/drivers/usb/gadget/udc/at91_udc.c b/drivers/usb/gadget/udc/at91_udc.c index fc4226462f8f..32f50a7944dd 100644 --- a/drivers/usb/gadget/udc/at91_udc.c +++ b/drivers/usb/gadget/udc/at91_udc.c @@ -825,6 +825,7 @@ static void udc_reinit(struct at91_udc *udc) INIT_LIST_HEAD(&udc->gadget.ep_list); INIT_LIST_HEAD(&udc->gadget.ep0->ep_list); + udc->gadget.quirk_stall_not_supp = 1; for (i = 0; i < NUM_ENDPOINTS; i++) { struct at91_ep *ep = &udc->ep[i]; diff --git a/include/linux/usb/gadget.h b/include/linux/usb/gadget.h index 31be84b7e645..f195a76548f6 100644 --- a/include/linux/usb/gadget.h +++ b/include/linux/usb/gadget.h @@ -573,6 +573,7 @@ struct usb_gadget { unsigned a_alt_hnp_support:1; unsigned quirk_ep_out_aligned_size:1; unsigned quirk_altset_not_supp:1; + unsigned quirk_stall_not_supp:1; unsigned is_selfpowered:1; unsigned deactivated:1; unsigned connected:1; @@ -620,6 +621,15 @@ static inline int gadget_is_altset_supported(struct usb_gadget *g) return !g->quirk_altset_not_supp; } +/** + * gadget_is_stall_supported - return true iff the hardware supports stalling + * @g: controller to check for quirk + */ +static inline int gadget_is_stall_supported(struct usb_gadget *g) +{ + return !g->quirk_stall_not_supp; +} + /** * gadget_is_dualspeed - return true iff the hardware handles high speed * @g: controller that might support both high and full speeds -- cgit v1.2.3-70-g09d2 From ca1023c81dd10f76a5d0a8be2fdbe724fe7a126a Mon Sep 17 00:00:00 2001 From: Robert Baldyga Date: Tue, 28 Jul 2015 07:20:00 +0200 Subject: usb: gadget: add 'quirk_zlp_not_supp' to usb_gadget Due to some UDC controllers may not support zlp, usb gadget layer needs to provide a generic way to inform gadget functions about non-standard hardware limitations. This patch adds 'quirk_zlp_not_supp' field to struct usb_gadget and helper function gadget_is_zlp_supported(). It also sets 'quirk_zlp_not_supp' to 1 in musb UDC driver, which has such limitation. [ balbi@ti.com : make it build ] Signed-off-by: Robert Baldyga Signed-off-by: Felipe Balbi --- drivers/usb/musb/musb_gadget.c | 1 + include/linux/usb/gadget.h | 10 ++++++++++ 2 files changed, 11 insertions(+) (limited to 'include/linux') diff --git a/drivers/usb/musb/musb_gadget.c b/drivers/usb/musb/musb_gadget.c index 625d482f1a97..9e18178f1d45 100644 --- a/drivers/usb/musb/musb_gadget.c +++ b/drivers/usb/musb/musb_gadget.c @@ -2075,6 +2075,7 @@ __acquires(musb->lock) musb->g.b_hnp_enable = 0; musb->g.a_alt_hnp_support = 0; musb->g.a_hnp_support = 0; + musb->g.quirk_zlp_not_supp = 1; /* Normal reset, as B-Device; * or else after HNP, as A-Device diff --git a/include/linux/usb/gadget.h b/include/linux/usb/gadget.h index f195a76548f6..353a72096dda 100644 --- a/include/linux/usb/gadget.h +++ b/include/linux/usb/gadget.h @@ -574,6 +574,7 @@ struct usb_gadget { unsigned quirk_ep_out_aligned_size:1; unsigned quirk_altset_not_supp:1; unsigned quirk_stall_not_supp:1; + unsigned quirk_zlp_not_supp:1; unsigned is_selfpowered:1; unsigned deactivated:1; unsigned connected:1; @@ -630,6 +631,15 @@ static inline int gadget_is_stall_supported(struct usb_gadget *g) return !g->quirk_stall_not_supp; } +/** + * gadget_is_zlp_supported - return true iff the hardware supports zlp + * @g: controller to check for quirk + */ +static inline int gadget_is_zlp_supported(struct usb_gadget *g) +{ + return !g->quirk_zlp_not_supp; +} + /** * gadget_is_dualspeed - return true iff the hardware handles high speed * @g: controller that might support both high and full speeds -- cgit v1.2.3-70-g09d2 From 6f98f545b0b4effdf67e83e214a4eb13cd41fba2 Mon Sep 17 00:00:00 2001 From: "Ivan T. Ivanov" Date: Tue, 28 Jul 2015 11:10:22 +0300 Subject: usb: phy: msm: Add D+/D- lines route control apq8016-sbc board is using Dual SPDT USB Switch (TC7USB40MU), witch is controlled by GPIO to de/multiplex D+/D- USB lines to USB2513B Hub and uB connector. Add support for this. Signed-off-by: Ivan T. Ivanov Signed-off-by: Felipe Balbi --- .../devicetree/bindings/usb/msm-hsusb.txt | 4 ++ drivers/usb/phy/phy-msm-usb.c | 47 ++++++++++++++++++++++ include/linux/usb/msm_hsusb.h | 7 ++++ 3 files changed, 58 insertions(+) (limited to 'include/linux') diff --git a/Documentation/devicetree/bindings/usb/msm-hsusb.txt b/Documentation/devicetree/bindings/usb/msm-hsusb.txt index bd8d9e753029..8654a3ec23e4 100644 --- a/Documentation/devicetree/bindings/usb/msm-hsusb.txt +++ b/Documentation/devicetree/bindings/usb/msm-hsusb.txt @@ -52,6 +52,10 @@ Required properties: Optional properties: - dr_mode: One of "host", "peripheral" or "otg". Defaults to "otg" +- switch-gpio: A phandle + gpio-specifier pair. Some boards are using Dual + SPDT USB Switch, witch is cotrolled by GPIO to de/multiplex + D+/D- USB lines between connectors. + - qcom,phy-init-sequence: PHY configuration sequence values. This is related to Device Mode Eye Diagram test. Start address at which these values will be written is ULPI_EXT_VENDOR_SPECIFIC. Value of -1 is reserved as diff --git a/drivers/usb/phy/phy-msm-usb.c b/drivers/usb/phy/phy-msm-usb.c index 61d86d8bf5b7..c58c3c0dbe35 100644 --- a/drivers/usb/phy/phy-msm-usb.c +++ b/drivers/usb/phy/phy-msm-usb.c @@ -18,6 +18,7 @@ #include #include +#include #include #include #include @@ -32,6 +33,7 @@ #include #include #include +#include #include #include @@ -1471,6 +1473,14 @@ static int msm_otg_vbus_notifier(struct notifier_block *nb, unsigned long event, else clear_bit(B_SESS_VLD, &motg->inputs); + if (test_bit(B_SESS_VLD, &motg->inputs)) { + /* Switch D+/D- lines to Device connector */ + gpiod_set_value_cansleep(motg->switch_gpio, 0); + } else { + /* Switch D+/D- lines to Hub */ + gpiod_set_value_cansleep(motg->switch_gpio, 1); + } + schedule_work(&motg->sm_work); return NOTIFY_DONE; @@ -1546,6 +1556,11 @@ static int msm_otg_read_dt(struct platform_device *pdev, struct msm_otg *motg) motg->manual_pullup = of_property_read_bool(node, "qcom,manual-pullup"); + motg->switch_gpio = devm_gpiod_get_optional(&pdev->dev, "switch", + GPIOD_OUT_LOW); + if (IS_ERR(motg->switch_gpio)) + return PTR_ERR(motg->switch_gpio); + ext_id = ERR_PTR(-ENODEV); ext_vbus = ERR_PTR(-ENODEV); if (of_property_read_bool(node, "extcon")) { @@ -1617,6 +1632,19 @@ static int msm_otg_read_dt(struct platform_device *pdev, struct msm_otg *motg) return 0; } +static int msm_otg_reboot_notify(struct notifier_block *this, + unsigned long code, void *unused) +{ + struct msm_otg *motg = container_of(this, struct msm_otg, reboot); + + /* + * Ensure that D+/D- lines are routed to uB connector, so + * we could load bootloader/kernel at next reboot + */ + gpiod_set_value_cansleep(motg->switch_gpio, 0); + return NOTIFY_DONE; +} + static int msm_otg_probe(struct platform_device *pdev) { struct regulator_bulk_data regs[3]; @@ -1781,6 +1809,17 @@ static int msm_otg_probe(struct platform_device *pdev) dev_dbg(&pdev->dev, "Can not create mode change file\n"); } + if (test_bit(B_SESS_VLD, &motg->inputs)) { + /* Switch D+/D- lines to Device connector */ + gpiod_set_value_cansleep(motg->switch_gpio, 0); + } else { + /* Switch D+/D- lines to Hub */ + gpiod_set_value_cansleep(motg->switch_gpio, 1); + } + + motg->reboot.notifier_call = msm_otg_reboot_notify; + register_reboot_notifier(&motg->reboot); + pm_runtime_set_active(&pdev->dev); pm_runtime_enable(&pdev->dev); @@ -1807,6 +1846,14 @@ static int msm_otg_remove(struct platform_device *pdev) if (phy->otg->host || phy->otg->gadget) return -EBUSY; + unregister_reboot_notifier(&motg->reboot); + + /* + * Ensure that D+/D- lines are routed to uB connector, so + * we could load bootloader/kernel at next reboot + */ + gpiod_set_value_cansleep(motg->switch_gpio, 0); + extcon_unregister_notifier(motg->id.extcon, EXTCON_USB_HOST, &motg->id.nb); extcon_unregister_notifier(motg->vbus.extcon, EXTCON_USB, &motg->vbus.nb); diff --git a/include/linux/usb/msm_hsusb.h b/include/linux/usb/msm_hsusb.h index 5df2c8f59aa0..8c8f6854c993 100644 --- a/include/linux/usb/msm_hsusb.h +++ b/include/linux/usb/msm_hsusb.h @@ -155,6 +155,10 @@ struct msm_usb_cable { * starting controller using usbcmd run/stop bit. * @vbus: VBUS signal state trakining, using extcon framework * @id: ID signal state trakining, using extcon framework + * @switch_gpio: Descriptor for GPIO used to control external Dual + * SPDT USB Switch. + * @reboot: Used to inform the driver to route USB D+/D- line to Device + * connector */ struct msm_otg { struct usb_phy phy; @@ -188,6 +192,9 @@ struct msm_otg { struct msm_usb_cable vbus; struct msm_usb_cable id; + + struct gpio_desc *switch_gpio; + struct notifier_block reboot; }; #endif -- cgit v1.2.3-70-g09d2 From 7b36f92934e40d1ee24e5617ddedb852e10086ca Mon Sep 17 00:00:00 2001 From: Daniel Borkmann Date: Thu, 30 Jul 2015 12:42:47 +0200 Subject: bpf: provide helper that indicates eBPF was migrated During recent discussions we had with Michael, we found that it would be useful to have an indicator that tells the JIT that an eBPF program had been migrated from classic instructions into eBPF instructions, as only in that case A and X need to be cleared in the prologue. Such eBPF programs do not set a particular type, but all have BPF_PROG_TYPE_UNSPEC. Thus, introduce a small helper for cde66c2d88da ("s390/bpf: Only clear A and X for converted BPF programs") and possibly others in future. Signed-off-by: Daniel Borkmann Cc: Michael Holzheu Acked-by: Alexei Starovoitov Signed-off-by: David S. Miller --- arch/s390/net/bpf_jit_comp.c | 2 +- include/linux/filter.h | 10 ++++++++++ 2 files changed, 11 insertions(+), 1 deletion(-) (limited to 'include/linux') diff --git a/arch/s390/net/bpf_jit_comp.c b/arch/s390/net/bpf_jit_comp.c index bbbac6da37af..9f4bbc09bf07 100644 --- a/arch/s390/net/bpf_jit_comp.c +++ b/arch/s390/net/bpf_jit_comp.c @@ -1245,7 +1245,7 @@ static int bpf_jit_prog(struct bpf_jit *jit, struct bpf_prog *fp) jit->lit = jit->lit_start; jit->prg = 0; - bpf_jit_prologue(jit, fp->type == BPF_PROG_TYPE_UNSPEC); + bpf_jit_prologue(jit, bpf_prog_was_classic(fp)); for (i = 0; i < fp->len; i += insn_count) { insn_count = bpf_jit_insn(jit, fp, i); if (insn_count < 0) diff --git a/include/linux/filter.h b/include/linux/filter.h index 69d00555ce35..6b025491120d 100644 --- a/include/linux/filter.h +++ b/include/linux/filter.h @@ -354,6 +354,16 @@ static inline unsigned int bpf_prog_size(unsigned int proglen) offsetof(struct bpf_prog, insns[proglen])); } +static inline bool bpf_prog_was_classic(const struct bpf_prog *prog) +{ + /* When classic BPF programs have been loaded and the arch + * does not have a classic BPF JIT (anymore), they have been + * converted via bpf_migrate_filter() to eBPF and thus always + * have an unspec program type. + */ + return prog->type == BPF_PROG_TYPE_UNSPEC; +} + #define bpf_classic_proglen(fprog) (fprog->len * sizeof(fprog->filter[0])) #ifdef CONFIG_DEBUG_SET_MODULE_RONX -- cgit v1.2.3-70-g09d2 From b13138ef72178a13f34e33883f9f093f9e3b1bda Mon Sep 17 00:00:00 2001 From: Daniel Borkmann Date: Thu, 30 Jul 2015 12:42:49 +0200 Subject: bpf: also show process name/pid in bpf_jit_dump It can be useful for testing to see the actual process/pid who is loading a given filter. I was running some BPF test program and noticed unusual filter loads from time to time, triggered by some other application in the background. bpf_jit_disasm is still working after this change. Signed-off-by: Daniel Borkmann Acked-by: Alexei Starovoitov Signed-off-by: David S. Miller --- include/linux/filter.h | 6 ++++-- 1 file changed, 4 insertions(+), 2 deletions(-) (limited to 'include/linux') diff --git a/include/linux/filter.h b/include/linux/filter.h index 6b025491120d..fa2cab985e57 100644 --- a/include/linux/filter.h +++ b/include/linux/filter.h @@ -12,6 +12,7 @@ #include #include #include +#include #include @@ -438,8 +439,9 @@ void bpf_jit_free(struct bpf_prog *fp); static inline void bpf_jit_dump(unsigned int flen, unsigned int proglen, u32 pass, void *image) { - pr_err("flen=%u proglen=%u pass=%u image=%pK\n", - flen, proglen, pass, image); + pr_err("flen=%u proglen=%u pass=%u image=%pK from=%s pid=%d\n", flen, + proglen, pass, image, current->comm, task_pid_nr(current)); + if (image) print_hex_dump(KERN_ERR, "JIT code: ", DUMP_PREFIX_OFFSET, 16, 1, image, proglen, false); -- cgit v1.2.3-70-g09d2 From 890e4847587fcff5eb0438e90992ad7d2a261f33 Mon Sep 17 00:00:00 2001 From: Jiang Liu Date: Wed, 10 Jun 2015 16:54:58 +0800 Subject: PCI: Add pcibios_alloc_irq() and pcibios_free_irq() Add pcibios_alloc_irq() and pcibios_free_irq(), which are called when binding/unbinding PCI device drivers. PCI arch code may implement these to manage IRQ resources for hotplugged devices. [bhelgaas: changelog] Signed-off-by: Jiang Liu Signed-off-by: Bjorn Helgaas Acked-by: Thomas Gleixner --- drivers/pci/pci-driver.c | 26 ++++++++++++++++++++------ include/linux/pci.h | 2 ++ 2 files changed, 22 insertions(+), 6 deletions(-) (limited to 'include/linux') diff --git a/drivers/pci/pci-driver.c b/drivers/pci/pci-driver.c index 3cb2210de553..52a880ca1768 100644 --- a/drivers/pci/pci-driver.c +++ b/drivers/pci/pci-driver.c @@ -388,18 +388,31 @@ static int __pci_device_probe(struct pci_driver *drv, struct pci_dev *pci_dev) return error; } +int __weak pcibios_alloc_irq(struct pci_dev *dev) +{ + return 0; +} + +void __weak pcibios_free_irq(struct pci_dev *dev) +{ +} + static int pci_device_probe(struct device *dev) { - int error = 0; - struct pci_driver *drv; - struct pci_dev *pci_dev; + int error; + struct pci_dev *pci_dev = to_pci_dev(dev); + struct pci_driver *drv = to_pci_driver(dev->driver); + + error = pcibios_alloc_irq(pci_dev); + if (error < 0) + return error; - drv = to_pci_driver(dev->driver); - pci_dev = to_pci_dev(dev); pci_dev_get(pci_dev); error = __pci_device_probe(drv, pci_dev); - if (error) + if (error) { + pcibios_free_irq(pci_dev); pci_dev_put(pci_dev); + } return error; } @@ -415,6 +428,7 @@ static int pci_device_remove(struct device *dev) drv->remove(pci_dev); pm_runtime_put_noidle(dev); } + pcibios_free_irq(pci_dev); pci_dev->driver = NULL; } diff --git a/include/linux/pci.h b/include/linux/pci.h index 8a0321a8fb59..b4832c92f23e 100644 --- a/include/linux/pci.h +++ b/include/linux/pci.h @@ -1645,6 +1645,8 @@ int pcibios_set_pcie_reset_state(struct pci_dev *dev, int pcibios_add_device(struct pci_dev *dev); void pcibios_release_device(struct pci_dev *dev); void pcibios_penalize_isa_irq(int irq, int active); +int pcibios_alloc_irq(struct pci_dev *dev); +void pcibios_free_irq(struct pci_dev *dev); #ifdef CONFIG_HIBERNATE_CALLBACKS extern struct dev_pm_ops pcibios_pm_ops; -- cgit v1.2.3-70-g09d2 From 811a4e6fce09bc9239c664c6a1a53645a678c303 Mon Sep 17 00:00:00 2001 From: Jiang Liu Date: Wed, 10 Jun 2015 16:55:00 +0800 Subject: PCI: Add helpers to manage pci_dev->irq and pci_dev->irq_managed Add pci_has_managed_irq(), pci_set_managed_irq(), and pci_reset_managed_irq() to simplify code. No functional change. [bhelgaas: changelog] Signed-off-by: Jiang Liu Signed-off-by: Bjorn Helgaas Acked-by: Thomas Gleixner --- arch/x86/pci/intel_mid_pci.c | 4 ++-- arch/x86/pci/irq.c | 10 ++++------ drivers/acpi/pci_irq.c | 10 ++++------ include/linux/pci.h | 17 +++++++++++++++++ 4 files changed, 27 insertions(+), 14 deletions(-) (limited to 'include/linux') diff --git a/arch/x86/pci/intel_mid_pci.c b/arch/x86/pci/intel_mid_pci.c index fb7a1f96d80c..22aaefb4f1ca 100644 --- a/arch/x86/pci/intel_mid_pci.c +++ b/arch/x86/pci/intel_mid_pci.c @@ -211,7 +211,7 @@ static int intel_mid_pci_irq_enable(struct pci_dev *dev) struct irq_alloc_info info; int polarity; - if (dev->irq_managed && dev->irq > 0) + if (pci_has_managed_irq(dev)) return 0; if (intel_mid_identify_cpu() == INTEL_MID_CPU_CHIP_TANGIER) @@ -234,7 +234,7 @@ static int intel_mid_pci_irq_enable(struct pci_dev *dev) static void intel_mid_pci_irq_disable(struct pci_dev *dev) { - if (dev->irq_managed && dev->irq > 0) { + if (pci_has_managed_irq(dev)) { mp_unmap_irq(dev->irq); dev->irq_managed = 0; /* diff --git a/arch/x86/pci/irq.c b/arch/x86/pci/irq.c index 72108f0b66b1..32e70343e6fd 100644 --- a/arch/x86/pci/irq.c +++ b/arch/x86/pci/irq.c @@ -1202,7 +1202,7 @@ static int pirq_enable_irq(struct pci_dev *dev) struct pci_dev *temp_dev; int irq; - if (dev->irq_managed && dev->irq > 0) + if (pci_has_managed_irq(dev)) return 0; irq = IO_APIC_get_PCI_irq_vector(dev->bus->number, @@ -1230,8 +1230,7 @@ static int pirq_enable_irq(struct pci_dev *dev) } dev = temp_dev; if (irq >= 0) { - dev->irq_managed = 1; - dev->irq = irq; + pci_set_managed_irq(dev, irq); dev_info(&dev->dev, "PCI->APIC IRQ transform: " "INT %c -> IRQ %d\n", 'A' + pin - 1, irq); return 0; @@ -1259,9 +1258,8 @@ static int pirq_enable_irq(struct pci_dev *dev) static void pirq_disable_irq(struct pci_dev *dev) { - if (io_apic_assign_pci_irqs && dev->irq_managed && dev->irq) { + if (io_apic_assign_pci_irqs && pci_has_managed_irq(dev)) { mp_unmap_irq(dev->irq); - dev->irq = 0; - dev->irq_managed = 0; + pci_reset_managed_irq(dev); } } diff --git a/drivers/acpi/pci_irq.c b/drivers/acpi/pci_irq.c index d1aad6900b4c..afa16c557c17 100644 --- a/drivers/acpi/pci_irq.c +++ b/drivers/acpi/pci_irq.c @@ -412,7 +412,7 @@ int acpi_pci_irq_enable(struct pci_dev *dev) return 0; } - if (dev->irq_managed && dev->irq > 0) + if (pci_has_managed_irq(dev)) return 0; entry = acpi_pci_irq_lookup(dev, pin); @@ -457,8 +457,7 @@ int acpi_pci_irq_enable(struct pci_dev *dev) kfree(entry); return rc; } - dev->irq = rc; - dev->irq_managed = 1; + pci_set_managed_irq(dev, rc); if (link) snprintf(link_desc, sizeof(link_desc), " -> Link[%s]", link); @@ -481,7 +480,7 @@ void acpi_pci_irq_disable(struct pci_dev *dev) u8 pin; pin = dev->pin; - if (!pin || !dev->irq_managed || dev->irq <= 0) + if (!pin || !pci_has_managed_irq(dev)) return; entry = acpi_pci_irq_lookup(dev, pin); @@ -503,7 +502,6 @@ void acpi_pci_irq_disable(struct pci_dev *dev) dev_dbg(&dev->dev, "PCI INT %c disabled\n", pin_name(pin)); if (gsi >= 0) { acpi_unregister_gsi(gsi); - dev->irq_managed = 0; - dev->irq = 0; + pci_reset_managed_irq(dev); } } diff --git a/include/linux/pci.h b/include/linux/pci.h index b4832c92f23e..b7ab0c424ed6 100644 --- a/include/linux/pci.h +++ b/include/linux/pci.h @@ -963,6 +963,23 @@ static inline int pci_is_managed(struct pci_dev *pdev) return pdev->is_managed; } +static inline void pci_set_managed_irq(struct pci_dev *pdev, unsigned int irq) +{ + pdev->irq = irq; + pdev->irq_managed = 1; +} + +static inline void pci_reset_managed_irq(struct pci_dev *pdev) +{ + pdev->irq = 0; + pdev->irq_managed = 0; +} + +static inline bool pci_has_managed_irq(struct pci_dev *pdev) +{ + return pdev->irq_managed && pdev->irq > 0; +} + void pci_disable_device(struct pci_dev *dev); extern unsigned int pcibios_max_latency; -- cgit v1.2.3-70-g09d2 From 67546762978f523749eac157903e0b01c18e083a Mon Sep 17 00:00:00 2001 From: Yijing Wang Date: Fri, 17 Jul 2015 17:16:31 +0800 Subject: PCI: Protect pci_bus->slots with pci_slot_mutex, not pci_bus_sem Rajat Jain reported a deadlock when PCIe hot-add and AER recovery happen at the same time: thread 1: pciehp_enable_slot pciehp_configure_device pci_bus_add_devices pci_bus_add_device device_attach device_lock(dev) # acquire device lock ... pciehp_probe init_slot pci_hp_register pci_create_slot down_write(pci_bus_sem) # deadlock here thread 2: aer_isr_one_error aer_process_err_device do_recovery broadcast_error_message(..., report_error_detected) pci_walk_bus(..., cb=report_error_detected, ...) down_read(&pci_bus_sem) # acquire pci_bus_sem report_error_detected(dev) # cb() device_lock(dev) # deadlock here Previously, the bus->devices and bus->slots list were protected by pci_bus_sem. In pci_create_slot(), we held it for writing so we could add to the bus->slots list. Add a new local pci_slot_mutex to protect bus->slots. Hold pci_bus_sem for reading while searching the bus->devices list. [bhelgaas: changelog] Link: http://lkml.kernel.org/r/CAA93t1qpPqbih+UB0McA_d_+2rVaNkXsinAUxYzK9+JXSS+L-g@mail.gmail.com Reported-by: Rajat Jain Tested-by: Guenter Roeck Signed-off-by: Yijing Wang Signed-off-by: Bjorn Helgaas --- drivers/pci/slot.c | 18 +++++++++++------- include/linux/pci.h | 3 ++- 2 files changed, 13 insertions(+), 8 deletions(-) (limited to 'include/linux') diff --git a/drivers/pci/slot.c b/drivers/pci/slot.c index 396c200b9ddb..4bd3fce93fa4 100644 --- a/drivers/pci/slot.c +++ b/drivers/pci/slot.c @@ -14,6 +14,7 @@ struct kset *pci_slots_kset; EXPORT_SYMBOL_GPL(pci_slots_kset); +static DEFINE_MUTEX(pci_slot_mutex); static ssize_t pci_slot_attr_show(struct kobject *kobj, struct attribute *attr, char *buf) @@ -106,9 +107,11 @@ static void pci_slot_release(struct kobject *kobj) dev_dbg(&slot->bus->dev, "dev %02x, released physical slot %s\n", slot->number, pci_slot_name(slot)); + down_read(&pci_bus_sem); list_for_each_entry(dev, &slot->bus->devices, bus_list) if (PCI_SLOT(dev->devfn) == slot->number) dev->slot = NULL; + up_read(&pci_bus_sem); list_del(&slot->list); @@ -194,9 +197,8 @@ static int rename_slot(struct pci_slot *slot, const char *name) static struct pci_slot *get_slot(struct pci_bus *parent, int slot_nr) { struct pci_slot *slot; - /* - * We already hold pci_bus_sem so don't worry - */ + + /* We already hold pci_slot_mutex */ list_for_each_entry(slot, &parent->slots, list) if (slot->number == slot_nr) { kobject_get(&slot->kobj); @@ -253,7 +255,7 @@ struct pci_slot *pci_create_slot(struct pci_bus *parent, int slot_nr, int err = 0; char *slot_name = NULL; - down_write(&pci_bus_sem); + mutex_lock(&pci_slot_mutex); if (slot_nr == -1) goto placeholder; @@ -301,16 +303,18 @@ placeholder: INIT_LIST_HEAD(&slot->list); list_add(&slot->list, &parent->slots); + down_read(&pci_bus_sem); list_for_each_entry(dev, &parent->devices, bus_list) if (PCI_SLOT(dev->devfn) == slot_nr) dev->slot = slot; + up_read(&pci_bus_sem); dev_dbg(&parent->dev, "dev %02x, created physical slot %s\n", slot_nr, pci_slot_name(slot)); out: kfree(slot_name); - up_write(&pci_bus_sem); + mutex_unlock(&pci_slot_mutex); return slot; err: kfree(slot); @@ -332,9 +336,9 @@ void pci_destroy_slot(struct pci_slot *slot) dev_dbg(&slot->bus->dev, "dev %02x, dec refcount to %d\n", slot->number, atomic_read(&slot->kobj.kref.refcount) - 1); - down_write(&pci_bus_sem); + mutex_lock(&pci_slot_mutex); kobject_put(&slot->kobj); - up_write(&pci_bus_sem); + mutex_unlock(&pci_slot_mutex); } EXPORT_SYMBOL_GPL(pci_destroy_slot); diff --git a/include/linux/pci.h b/include/linux/pci.h index 8a0321a8fb59..aaee493174e2 100644 --- a/include/linux/pci.h +++ b/include/linux/pci.h @@ -446,7 +446,8 @@ struct pci_bus { struct list_head children; /* list of child buses */ struct list_head devices; /* list of devices on this bus */ struct pci_dev *self; /* bridge device as seen by parent */ - struct list_head slots; /* list of slots on this bus */ + struct list_head slots; /* list of slots on this bus; + protected by pci_slot_mutex */ struct resource *resource[PCI_BRIDGE_RESOURCE_NUM]; struct list_head resources; /* address space routed to this bus */ struct resource busn_res; /* bus numbers routed to this bus */ -- cgit v1.2.3-70-g09d2 From 017ffe64e8b8c8db0f50433a71da41c6a4e12710 Mon Sep 17 00:00:00 2001 From: Yijing Wang Date: Fri, 17 Jul 2015 17:16:32 +0800 Subject: PCI: Hold pci_slot_mutex while searching bus->slots list Previously, pci_setup_device() and similar functions searched the pci_bus->slots list without any locking. It was possible for another thread to update the list while we searched it. Add pci_dev_assign_slot() to search the list while holding pci_slot_mutex. [bhelgaas: changelog, fold in CONFIG_SYSFS fix] Tested-by: Guenter Roeck Signed-off-by: Yijing Wang Signed-off-by: Bjorn Helgaas --- arch/powerpc/kernel/pci_of_scan.c | 6 +----- arch/sparc/kernel/pci.c | 6 +----- drivers/pci/probe.c | 6 +----- drivers/pci/slot.c | 11 +++++++++++ include/linux/pci.h | 5 +++++ 5 files changed, 19 insertions(+), 15 deletions(-) (limited to 'include/linux') diff --git a/arch/powerpc/kernel/pci_of_scan.c b/arch/powerpc/kernel/pci_of_scan.c index 42e02a2d570b..5e2debfc6ce5 100644 --- a/arch/powerpc/kernel/pci_of_scan.c +++ b/arch/powerpc/kernel/pci_of_scan.c @@ -126,7 +126,6 @@ struct pci_dev *of_create_pci_dev(struct device_node *node, { struct pci_dev *dev; const char *type; - struct pci_slot *slot; dev = pci_alloc_dev(bus); if (!dev) @@ -145,10 +144,7 @@ struct pci_dev *of_create_pci_dev(struct device_node *node, dev->needs_freset = 0; /* pcie fundamental reset required */ set_pcie_port_type(dev); - list_for_each_entry(slot, &dev->bus->slots, list) - if (PCI_SLOT(dev->devfn) == slot->number) - dev->slot = slot; - + pci_dev_assign_slot(dev); dev->vendor = get_int_prop(node, "vendor-id", 0xffff); dev->device = get_int_prop(node, "device-id", 0xffff); dev->subsystem_vendor = get_int_prop(node, "subsystem-vendor-id", 0); diff --git a/arch/sparc/kernel/pci.c b/arch/sparc/kernel/pci.c index c928bc64b4ba..3a0e1a986bfe 100644 --- a/arch/sparc/kernel/pci.c +++ b/arch/sparc/kernel/pci.c @@ -249,7 +249,6 @@ static struct pci_dev *of_create_pci_dev(struct pci_pbm_info *pbm, struct pci_bus *bus, int devfn) { struct dev_archdata *sd; - struct pci_slot *slot; struct platform_device *op; struct pci_dev *dev; const char *type; @@ -290,10 +289,7 @@ static struct pci_dev *of_create_pci_dev(struct pci_pbm_info *pbm, dev->multifunction = 0; /* maybe a lie? */ set_pcie_port_type(dev); - list_for_each_entry(slot, &dev->bus->slots, list) - if (PCI_SLOT(dev->devfn) == slot->number) - dev->slot = slot; - + pci_dev_assign_slot(dev); dev->vendor = of_getintprop_default(node, "vendor-id", 0xffff); dev->device = of_getintprop_default(node, "device-id", 0xffff); dev->subsystem_vendor = diff --git a/drivers/pci/probe.c b/drivers/pci/probe.c index cefd636681b6..2a9ce16cb374 100644 --- a/drivers/pci/probe.c +++ b/drivers/pci/probe.c @@ -1133,7 +1133,6 @@ int pci_setup_device(struct pci_dev *dev) { u32 class; u8 hdr_type; - struct pci_slot *slot; int pos = 0; struct pci_bus_region region; struct resource *res; @@ -1149,10 +1148,7 @@ int pci_setup_device(struct pci_dev *dev) dev->error_state = pci_channel_io_normal; set_pcie_port_type(dev); - list_for_each_entry(slot, &dev->bus->slots, list) - if (PCI_SLOT(dev->devfn) == slot->number) - dev->slot = slot; - + pci_dev_assign_slot(dev); /* Assume 32-bit PCI; let 64-bit PCI cards (which are far rarer) set this higher, assuming the system even supports it. */ dev->dma_mask = 0xffffffff; diff --git a/drivers/pci/slot.c b/drivers/pci/slot.c index 4bd3fce93fa4..429d34c348b9 100644 --- a/drivers/pci/slot.c +++ b/drivers/pci/slot.c @@ -194,6 +194,17 @@ static int rename_slot(struct pci_slot *slot, const char *name) return result; } +void pci_dev_assign_slot(struct pci_dev *dev) +{ + struct pci_slot *slot; + + mutex_lock(&pci_slot_mutex); + list_for_each_entry(slot, &dev->bus->slots, list) + if (PCI_SLOT(dev->devfn) == slot->number) + dev->slot = slot; + mutex_unlock(&pci_slot_mutex); +} + static struct pci_slot *get_slot(struct pci_bus *parent, int slot_nr) { struct pci_slot *slot; diff --git a/include/linux/pci.h b/include/linux/pci.h index aaee493174e2..b3ba7fef2916 100644 --- a/include/linux/pci.h +++ b/include/linux/pci.h @@ -798,6 +798,11 @@ struct pci_slot *pci_create_slot(struct pci_bus *parent, int slot_nr, const char *name, struct hotplug_slot *hotplug); void pci_destroy_slot(struct pci_slot *slot); +#ifdef CONFIG_SYSFS +void pci_dev_assign_slot(struct pci_dev *dev); +#else +static inline void pci_dev_assign_slot(struct pci_dev *dev) { } +#endif int pci_scan_slot(struct pci_bus *bus, int devfn); struct pci_dev *pci_scan_single_device(struct pci_bus *bus, int devfn); void pci_device_add(struct pci_dev *dev, struct pci_bus *bus); -- cgit v1.2.3-70-g09d2 From 8013d1d7eafb0589ca766db6b74026f76b7f5cb4 Mon Sep 17 00:00:00 2001 From: Hangbin Liu Date: Thu, 30 Jul 2015 14:28:42 +0800 Subject: net/ipv6: add sysctl option accept_ra_min_hop_limit Commit 6fd99094de2b ("ipv6: Don't reduce hop limit for an interface") disabled accept hop limit from RA if it is smaller than the current hop limit for security stuff. But this behavior kind of break the RFC definition. RFC 4861, 6.3.4. Processing Received Router Advertisements A Router Advertisement field (e.g., Cur Hop Limit, Reachable Time, and Retrans Timer) may contain a value denoting that it is unspecified. In such cases, the parameter should be ignored and the host should continue using whatever value it is already using. If the received Cur Hop Limit value is non-zero, the host SHOULD set its CurHopLimit variable to the received value. So add sysctl option accept_ra_min_hop_limit to let user choose the minimum hop limit value they can accept from RA. And set default to 1 to meet RFC standards. Signed-off-by: Hangbin Liu Acked-by: YOSHIFUJI Hideaki Signed-off-by: David S. Miller --- Documentation/networking/ip-sysctl.txt | 8 ++++++++ include/linux/ipv6.h | 1 + include/uapi/linux/ipv6.h | 1 + net/ipv6/addrconf.c | 10 ++++++++++ net/ipv6/ndisc.c | 16 +++++++--------- 5 files changed, 27 insertions(+), 9 deletions(-) (limited to 'include/linux') diff --git a/Documentation/networking/ip-sysctl.txt b/Documentation/networking/ip-sysctl.txt index 1a5ab21bcca5..00d26d919459 100644 --- a/Documentation/networking/ip-sysctl.txt +++ b/Documentation/networking/ip-sysctl.txt @@ -1340,6 +1340,14 @@ accept_ra_from_local - BOOLEAN disabled if accept_ra_from_local is disabled on a specific interface. +accept_ra_min_hop_limit - INTEGER + Minimum hop limit Information in Router Advertisement. + + Hop limit Information in Router Advertisement less than this + variable shall be ignored. + + Default: 1 + accept_ra_pinfo - BOOLEAN Learn Prefix Information in Router Advertisement. diff --git a/include/linux/ipv6.h b/include/linux/ipv6.h index 06ed637225b8..cb9dcad72372 100644 --- a/include/linux/ipv6.h +++ b/include/linux/ipv6.h @@ -29,6 +29,7 @@ struct ipv6_devconf { __s32 max_desync_factor; __s32 max_addresses; __s32 accept_ra_defrtr; + __s32 accept_ra_min_hop_limit; __s32 accept_ra_pinfo; #ifdef CONFIG_IPV6_ROUTER_PREF __s32 accept_ra_rtr_pref; diff --git a/include/uapi/linux/ipv6.h b/include/uapi/linux/ipv6.h index 641a146ead7d..80f3b74446a1 100644 --- a/include/uapi/linux/ipv6.h +++ b/include/uapi/linux/ipv6.h @@ -172,6 +172,7 @@ enum { DEVCONF_ACCEPT_RA_MTU, DEVCONF_STABLE_SECRET, DEVCONF_USE_OIF_ADDRS_ONLY, + DEVCONF_ACCEPT_RA_MIN_HOP_LIMIT, DEVCONF_MAX }; diff --git a/net/ipv6/addrconf.c b/net/ipv6/addrconf.c index eb0c6a3a8a00..53e3a9d756b0 100644 --- a/net/ipv6/addrconf.c +++ b/net/ipv6/addrconf.c @@ -195,6 +195,7 @@ static struct ipv6_devconf ipv6_devconf __read_mostly = { .max_addresses = IPV6_MAX_ADDRESSES, .accept_ra_defrtr = 1, .accept_ra_from_local = 0, + .accept_ra_min_hop_limit= 1, .accept_ra_pinfo = 1, #ifdef CONFIG_IPV6_ROUTER_PREF .accept_ra_rtr_pref = 1, @@ -237,6 +238,7 @@ static struct ipv6_devconf ipv6_devconf_dflt __read_mostly = { .max_addresses = IPV6_MAX_ADDRESSES, .accept_ra_defrtr = 1, .accept_ra_from_local = 0, + .accept_ra_min_hop_limit= 1, .accept_ra_pinfo = 1, #ifdef CONFIG_IPV6_ROUTER_PREF .accept_ra_rtr_pref = 1, @@ -4588,6 +4590,7 @@ static inline void ipv6_store_devconf(struct ipv6_devconf *cnf, array[DEVCONF_MAX_DESYNC_FACTOR] = cnf->max_desync_factor; array[DEVCONF_MAX_ADDRESSES] = cnf->max_addresses; array[DEVCONF_ACCEPT_RA_DEFRTR] = cnf->accept_ra_defrtr; + array[DEVCONF_ACCEPT_RA_MIN_HOP_LIMIT] = cnf->accept_ra_min_hop_limit; array[DEVCONF_ACCEPT_RA_PINFO] = cnf->accept_ra_pinfo; #ifdef CONFIG_IPV6_ROUTER_PREF array[DEVCONF_ACCEPT_RA_RTR_PREF] = cnf->accept_ra_rtr_pref; @@ -5484,6 +5487,13 @@ static struct addrconf_sysctl_table .mode = 0644, .proc_handler = proc_dointvec, }, + { + .procname = "accept_ra_min_hop_limit", + .data = &ipv6_devconf.accept_ra_min_hop_limit, + .maxlen = sizeof(int), + .mode = 0644, + .proc_handler = proc_dointvec, + }, { .procname = "accept_ra_pinfo", .data = &ipv6_devconf.accept_ra_pinfo, diff --git a/net/ipv6/ndisc.c b/net/ipv6/ndisc.c index 0a05b35a90fc..6e184e02fd3c 100644 --- a/net/ipv6/ndisc.c +++ b/net/ipv6/ndisc.c @@ -1225,18 +1225,16 @@ static void ndisc_router_discovery(struct sk_buff *skb) if (rt) rt6_set_expires(rt, jiffies + (HZ * lifetime)); - if (ra_msg->icmph.icmp6_hop_limit) { - /* Only set hop_limit on the interface if it is higher than - * the current hop_limit. - */ - if (in6_dev->cnf.hop_limit < ra_msg->icmph.icmp6_hop_limit) { + if (in6_dev->cnf.accept_ra_min_hop_limit < 256 && + ra_msg->icmph.icmp6_hop_limit) { + if (in6_dev->cnf.accept_ra_min_hop_limit <= ra_msg->icmph.icmp6_hop_limit) { in6_dev->cnf.hop_limit = ra_msg->icmph.icmp6_hop_limit; + if (rt) + dst_metric_set(&rt->dst, RTAX_HOPLIMIT, + ra_msg->icmph.icmp6_hop_limit); } else { - ND_PRINTK(2, warn, "RA: Got route advertisement with lower hop_limit than current\n"); + ND_PRINTK(2, warn, "RA: Got route advertisement with lower hop_limit than minimum\n"); } - if (rt) - dst_metric_set(&rt->dst, RTAX_HOPLIMIT, - ra_msg->icmph.icmp6_hop_limit); } skip_defrtr: -- cgit v1.2.3-70-g09d2 From 97da89767d398c1dfa1f34e5f312eb8ebb382f7f Mon Sep 17 00:00:00 2001 From: Oleg Nesterov Date: Tue, 21 Jul 2015 15:40:16 +0200 Subject: uprobes: Export 'struct return_instance', introduce arch_uretprobe_is_alive() Add the new "weak" helper, arch_uretprobe_is_alive(), used by the next patches. It should return true if this return_instance is still valid. The arch agnostic version just always returns true. The patch exports "struct return_instance" for the architectures which want to override this hook. We can also cleanup prepare_uretprobe() if we pass the new return_instance to arch_uretprobe_hijack_return_addr(). Tested-by: Pratyush Anand Signed-off-by: Oleg Nesterov Acked-by: Srikar Dronamraju Acked-by: Anton Arapov Cc: Andy Lutomirski Cc: Linus Torvalds Cc: Peter Zijlstra Cc: Thomas Gleixner Link: http://lkml.kernel.org/r/20150721134016.GA4762@redhat.com Signed-off-by: Ingo Molnar --- include/linux/uprobes.h | 10 ++++++++++ kernel/events/uprobes.c | 14 +++++--------- 2 files changed, 15 insertions(+), 9 deletions(-) (limited to 'include/linux') diff --git a/include/linux/uprobes.h b/include/linux/uprobes.h index 60beb5dc7977..50d2764d66a8 100644 --- a/include/linux/uprobes.h +++ b/include/linux/uprobes.h @@ -92,6 +92,15 @@ struct uprobe_task { unsigned int depth; }; +struct return_instance { + struct uprobe *uprobe; + unsigned long func; + unsigned long orig_ret_vaddr; /* original return address */ + bool chained; /* true, if instance is nested */ + + struct return_instance *next; /* keep as stack */ +}; + struct xol_area; struct uprobes_state { @@ -128,6 +137,7 @@ extern bool arch_uprobe_xol_was_trapped(struct task_struct *tsk); extern int arch_uprobe_exception_notify(struct notifier_block *self, unsigned long val, void *data); extern void arch_uprobe_abort_xol(struct arch_uprobe *aup, struct pt_regs *regs); extern unsigned long arch_uretprobe_hijack_return_addr(unsigned long trampoline_vaddr, struct pt_regs *regs); +extern bool arch_uretprobe_is_alive(struct return_instance *ret, struct pt_regs *regs); extern bool arch_uprobe_ignore(struct arch_uprobe *aup, struct pt_regs *regs); extern void arch_uprobe_copy_ixol(struct page *page, unsigned long vaddr, void *src, unsigned long len); diff --git a/kernel/events/uprobes.c b/kernel/events/uprobes.c index 98e4d97b8c31..1c71b6242a7e 100644 --- a/kernel/events/uprobes.c +++ b/kernel/events/uprobes.c @@ -86,15 +86,6 @@ struct uprobe { struct arch_uprobe arch; }; -struct return_instance { - struct uprobe *uprobe; - unsigned long func; - unsigned long orig_ret_vaddr; /* original return address */ - bool chained; /* true, if instance is nested */ - - struct return_instance *next; /* keep as stack */ -}; - /* * Execute out of line area: anonymous executable mapping installed * by the probed task to execute the copy of the original instruction @@ -1818,6 +1809,11 @@ bool __weak arch_uprobe_ignore(struct arch_uprobe *aup, struct pt_regs *regs) return false; } +bool __weak arch_uretprobe_is_alive(struct return_instance *ret, struct pt_regs *regs) +{ + return true; +} + /* * Run handler and ask thread to singlestep. * Ensure all non-fatal signals cannot interrupt thread while it singlesteps. -- cgit v1.2.3-70-g09d2 From 7b868e4802a86d867aad1be0471b5767d9c20e10 Mon Sep 17 00:00:00 2001 From: Oleg Nesterov Date: Tue, 21 Jul 2015 15:40:18 +0200 Subject: uprobes/x86: Reimplement arch_uretprobe_is_alive() Add the x86 specific version of arch_uretprobe_is_alive() helper. It returns true if the stack frame mangled by prepare_uretprobe() is still on stack. So if it returns false, we know that the probed function has already returned. We add the new return_instance->stack member and change the generic code to initialize it in prepare_uretprobe, but it should be equally useful for other architectures. TODO: this assumes that the probed application can't use multiple stacks (say sigaltstack). We will try to improve this logic later. Tested-by: Pratyush Anand Signed-off-by: Oleg Nesterov Acked-by: Srikar Dronamraju Acked-by: Anton Arapov Cc: Andy Lutomirski Cc: Linus Torvalds Cc: Peter Zijlstra Cc: Thomas Gleixner Link: http://lkml.kernel.org/r/20150721134018.GA4766@redhat.com Signed-off-by: Ingo Molnar --- arch/x86/kernel/uprobes.c | 5 +++++ include/linux/uprobes.h | 1 + kernel/events/uprobes.c | 1 + 3 files changed, 7 insertions(+) (limited to 'include/linux') diff --git a/arch/x86/kernel/uprobes.c b/arch/x86/kernel/uprobes.c index 66476244731e..58e9b842633f 100644 --- a/arch/x86/kernel/uprobes.c +++ b/arch/x86/kernel/uprobes.c @@ -985,3 +985,8 @@ arch_uretprobe_hijack_return_addr(unsigned long trampoline_vaddr, struct pt_regs return -1; } + +bool arch_uretprobe_is_alive(struct return_instance *ret, struct pt_regs *regs) +{ + return regs->sp <= ret->stack; +} diff --git a/include/linux/uprobes.h b/include/linux/uprobes.h index 50d2764d66a8..7ab6d2c8be49 100644 --- a/include/linux/uprobes.h +++ b/include/linux/uprobes.h @@ -95,6 +95,7 @@ struct uprobe_task { struct return_instance { struct uprobe *uprobe; unsigned long func; + unsigned long stack; /* stack pointer */ unsigned long orig_ret_vaddr; /* original return address */ bool chained; /* true, if instance is nested */ diff --git a/kernel/events/uprobes.c b/kernel/events/uprobes.c index 1c71b6242a7e..c5f316e06dc0 100644 --- a/kernel/events/uprobes.c +++ b/kernel/events/uprobes.c @@ -1562,6 +1562,7 @@ static void prepare_uretprobe(struct uprobe *uprobe, struct pt_regs *regs) ri->uprobe = get_uprobe(uprobe); ri->func = instruction_pointer(regs); + ri->stack = user_stack_pointer(regs); ri->orig_ret_vaddr = orig_ret_vaddr; ri->chained = chained; -- cgit v1.2.3-70-g09d2 From 86dcb702e74b8ab7d3b2d36984ef00671cea73b9 Mon Sep 17 00:00:00 2001 From: Oleg Nesterov Date: Tue, 21 Jul 2015 15:40:26 +0200 Subject: uprobes: Add the "enum rp_check ctx" arg to arch_uretprobe_is_alive() arch/x86 doesn't care (so far), but as Pratyush Anand pointed out other architectures might want why arch_uretprobe_is_alive() was called and use different checks depending on the context. Add the new argument to distinguish 2 callers. Tested-by: Pratyush Anand Signed-off-by: Oleg Nesterov Acked-by: Srikar Dronamraju Acked-by: Anton Arapov Cc: Andy Lutomirski Cc: Linus Torvalds Cc: Peter Zijlstra Cc: Thomas Gleixner Link: http://lkml.kernel.org/r/20150721134026.GA4779@redhat.com Signed-off-by: Ingo Molnar --- arch/x86/kernel/uprobes.c | 3 ++- include/linux/uprobes.h | 7 ++++++- kernel/events/uprobes.c | 9 ++++++--- 3 files changed, 14 insertions(+), 5 deletions(-) (limited to 'include/linux') diff --git a/arch/x86/kernel/uprobes.c b/arch/x86/kernel/uprobes.c index 58e9b842633f..acf8b9010bbf 100644 --- a/arch/x86/kernel/uprobes.c +++ b/arch/x86/kernel/uprobes.c @@ -986,7 +986,8 @@ arch_uretprobe_hijack_return_addr(unsigned long trampoline_vaddr, struct pt_regs return -1; } -bool arch_uretprobe_is_alive(struct return_instance *ret, struct pt_regs *regs) +bool arch_uretprobe_is_alive(struct return_instance *ret, enum rp_check ctx, + struct pt_regs *regs) { return regs->sp <= ret->stack; } diff --git a/include/linux/uprobes.h b/include/linux/uprobes.h index 7ab6d2c8be49..c0a540239ab6 100644 --- a/include/linux/uprobes.h +++ b/include/linux/uprobes.h @@ -102,6 +102,11 @@ struct return_instance { struct return_instance *next; /* keep as stack */ }; +enum rp_check { + RP_CHECK_CALL, + RP_CHECK_RET, +}; + struct xol_area; struct uprobes_state { @@ -138,7 +143,7 @@ extern bool arch_uprobe_xol_was_trapped(struct task_struct *tsk); extern int arch_uprobe_exception_notify(struct notifier_block *self, unsigned long val, void *data); extern void arch_uprobe_abort_xol(struct arch_uprobe *aup, struct pt_regs *regs); extern unsigned long arch_uretprobe_hijack_return_addr(unsigned long trampoline_vaddr, struct pt_regs *regs); -extern bool arch_uretprobe_is_alive(struct return_instance *ret, struct pt_regs *regs); +extern bool arch_uretprobe_is_alive(struct return_instance *ret, enum rp_check ctx, struct pt_regs *regs); extern bool arch_uprobe_ignore(struct arch_uprobe *aup, struct pt_regs *regs); extern void arch_uprobe_copy_ixol(struct page *page, unsigned long vaddr, void *src, unsigned long len); diff --git a/kernel/events/uprobes.c b/kernel/events/uprobes.c index 7e61c8ca27e0..df5661a44e35 100644 --- a/kernel/events/uprobes.c +++ b/kernel/events/uprobes.c @@ -1514,7 +1514,9 @@ static unsigned long get_trampoline_vaddr(void) static void cleanup_return_instances(struct uprobe_task *utask, struct pt_regs *regs) { struct return_instance *ri = utask->return_instances; - while (ri && !arch_uretprobe_is_alive(ri, regs)) { + enum rp_check ctx = RP_CHECK_CALL; + + while (ri && !arch_uretprobe_is_alive(ri, ctx, regs)) { ri = free_ret_instance(ri); utask->depth--; } @@ -1805,7 +1807,7 @@ static void handle_trampoline(struct pt_regs *regs) * could hit this trampoline on return. TODO: sigaltstack(). */ next = find_next_ret_chain(ri); - valid = !next || arch_uretprobe_is_alive(next, regs); + valid = !next || arch_uretprobe_is_alive(next, RP_CHECK_RET, regs); instruction_pointer_set(regs, ri->orig_ret_vaddr); do { @@ -1830,7 +1832,8 @@ bool __weak arch_uprobe_ignore(struct arch_uprobe *aup, struct pt_regs *regs) return false; } -bool __weak arch_uretprobe_is_alive(struct return_instance *ret, struct pt_regs *regs) +bool __weak arch_uretprobe_is_alive(struct return_instance *ret, enum rp_check ctx, + struct pt_regs *regs) { return true; } -- cgit v1.2.3-70-g09d2 From db087ef69a2b155ae001665bf0b3806abde7ee34 Mon Sep 17 00:00:00 2001 From: Oleg Nesterov Date: Tue, 21 Jul 2015 15:40:28 +0200 Subject: uprobes/x86: Make arch_uretprobe_is_alive(RP_CHECK_CALL) more clever The previous change documents that cleanup_return_instances() can't always detect the dead frames, the stack can grow. But there is one special case which imho worth fixing: arch_uretprobe_is_alive() can return true when the stack didn't actually grow, but the next "call" insn uses the already invalidated frame. Test-case: #include #include jmp_buf jmp; int nr = 1024; void func_2(void) { if (--nr == 0) return; longjmp(jmp, 1); } void func_1(void) { setjmp(jmp); func_2(); } int main(void) { func_1(); return 0; } If you ret-probe func_1() and func_2() prepare_uretprobe() hits the MAX_URETPROBE_DEPTH limit and "return" from func_2() is not reported. When we know that the new call is not chained, we can do the more strict check. In this case "sp" points to the new ret-addr, so every frame which uses the same "sp" must be dead. The only complication is that arch_uretprobe_is_alive() needs to know was it chained or not, so we add the new RP_CHECK_CHAIN_CALL enum and change prepare_uretprobe() to pass RP_CHECK_CALL only if !chained. Note: arch_uretprobe_is_alive() could also re-read *sp and check if this word is still trampoline_vaddr. This could obviously improve the logic, but I would like to avoid another copy_from_user() especially in the case when we can't avoid the false "alive == T" positives. Tested-by: Pratyush Anand Signed-off-by: Oleg Nesterov Acked-by: Srikar Dronamraju Acked-by: Anton Arapov Cc: Andy Lutomirski Cc: Linus Torvalds Cc: Peter Zijlstra Cc: Thomas Gleixner Link: http://lkml.kernel.org/r/20150721134028.GA4786@redhat.com Signed-off-by: Ingo Molnar --- arch/x86/kernel/uprobes.c | 5 ++++- include/linux/uprobes.h | 1 + kernel/events/uprobes.c | 14 +++++++------- 3 files changed, 12 insertions(+), 8 deletions(-) (limited to 'include/linux') diff --git a/arch/x86/kernel/uprobes.c b/arch/x86/kernel/uprobes.c index acf8b9010bbf..bf4db6eaec8f 100644 --- a/arch/x86/kernel/uprobes.c +++ b/arch/x86/kernel/uprobes.c @@ -989,5 +989,8 @@ arch_uretprobe_hijack_return_addr(unsigned long trampoline_vaddr, struct pt_regs bool arch_uretprobe_is_alive(struct return_instance *ret, enum rp_check ctx, struct pt_regs *regs) { - return regs->sp <= ret->stack; + if (ctx == RP_CHECK_CALL) /* sp was just decremented by "call" insn */ + return regs->sp < ret->stack; + else + return regs->sp <= ret->stack; } diff --git a/include/linux/uprobes.h b/include/linux/uprobes.h index c0a540239ab6..0bdc72f36905 100644 --- a/include/linux/uprobes.h +++ b/include/linux/uprobes.h @@ -104,6 +104,7 @@ struct return_instance { enum rp_check { RP_CHECK_CALL, + RP_CHECK_CHAIN_CALL, RP_CHECK_RET, }; diff --git a/kernel/events/uprobes.c b/kernel/events/uprobes.c index df5661a44e35..0f370ef57a02 100644 --- a/kernel/events/uprobes.c +++ b/kernel/events/uprobes.c @@ -1511,10 +1511,11 @@ static unsigned long get_trampoline_vaddr(void) return trampoline_vaddr; } -static void cleanup_return_instances(struct uprobe_task *utask, struct pt_regs *regs) +static void cleanup_return_instances(struct uprobe_task *utask, bool chained, + struct pt_regs *regs) { struct return_instance *ri = utask->return_instances; - enum rp_check ctx = RP_CHECK_CALL; + enum rp_check ctx = chained ? RP_CHECK_CHAIN_CALL : RP_CHECK_CALL; while (ri && !arch_uretprobe_is_alive(ri, ctx, regs)) { ri = free_ret_instance(ri); @@ -1528,7 +1529,7 @@ static void prepare_uretprobe(struct uprobe *uprobe, struct pt_regs *regs) struct return_instance *ri; struct uprobe_task *utask; unsigned long orig_ret_vaddr, trampoline_vaddr; - bool chained = false; + bool chained; if (!get_xol_area()) return; @@ -1554,14 +1555,15 @@ static void prepare_uretprobe(struct uprobe *uprobe, struct pt_regs *regs) goto fail; /* drop the entries invalidated by longjmp() */ - cleanup_return_instances(utask, regs); + chained = (orig_ret_vaddr == trampoline_vaddr); + cleanup_return_instances(utask, chained, regs); /* * We don't want to keep trampoline address in stack, rather keep the * original return address of first caller thru all the consequent * instances. This also makes breakpoint unwrapping easier. */ - if (orig_ret_vaddr == trampoline_vaddr) { + if (chained) { if (!utask->return_instances) { /* * This situation is not possible. Likely we have an @@ -1570,8 +1572,6 @@ static void prepare_uretprobe(struct uprobe *uprobe, struct pt_regs *regs) uprobe_warn(current, "handle tail call"); goto fail; } - - chained = true; orig_ret_vaddr = utask->return_instances->orig_ret_vaddr; } -- cgit v1.2.3-70-g09d2 From fa8ad7889d83bcf0a6cdbf6d3622f3ec019cde14 Mon Sep 17 00:00:00 2001 From: Mark Rutland Date: Mon, 6 Jul 2015 12:23:53 +0100 Subject: arm: perf: factor arm_pmu core out to drivers To enable sharing of the arm_pmu code with arm64, this patch factors it out to drivers/perf/. A new drivers/perf directory is added for performance monitor drivers to live under. MAINTAINERS is updated accordingly. Files added previously without a corresponsing MAINTAINERS update (perf_regs.c, perf_callchain.c, and perf_event.h) are also added. Cc: Arnaldo Carvalho de Melo Cc: Greg Kroah-Hartman Cc: Ingo Molnar Cc: Linus Walleij Cc: Paul Mackerras Cc: Peter Zijlstra Cc: Russell King Cc: Will Deacon Signed-off-by: Mark Rutland [will: augmented Kconfig help slightly] Signed-off-by: Will Deacon --- MAINTAINERS | 6 +- arch/arm/Kconfig | 8 +- arch/arm/include/asm/pmu.h | 154 ------ arch/arm/kernel/Makefile | 3 +- arch/arm/kernel/perf_event.c | 921 ------------------------------------ arch/arm/kernel/perf_event_v6.c | 2 +- arch/arm/kernel/perf_event_v7.c | 2 +- arch/arm/kernel/perf_event_xscale.c | 2 +- arch/arm/mach-ux500/cpu-db8500.c | 2 +- drivers/Kconfig | 2 + drivers/Makefile | 1 + drivers/perf/Kconfig | 15 + drivers/perf/Makefile | 1 + drivers/perf/arm_pmu.c | 921 ++++++++++++++++++++++++++++++++++++ include/linux/perf/arm_pmu.h | 154 ++++++ 15 files changed, 1105 insertions(+), 1089 deletions(-) delete mode 100644 arch/arm/include/asm/pmu.h delete mode 100644 arch/arm/kernel/perf_event.c create mode 100644 drivers/perf/Kconfig create mode 100644 drivers/perf/Makefile create mode 100644 drivers/perf/arm_pmu.c create mode 100644 include/linux/perf/arm_pmu.h (limited to 'include/linux') diff --git a/MAINTAINERS b/MAINTAINERS index fd6078443083..485c92ced47d 100644 --- a/MAINTAINERS +++ b/MAINTAINERS @@ -799,11 +799,13 @@ F: arch/arm/include/asm/floppy.h ARM PMU PROFILING AND DEBUGGING M: Will Deacon S: Maintained -F: arch/arm/kernel/perf_event* +F: arch/arm/kernel/perf_* F: arch/arm/oprofile/common.c -F: arch/arm/include/asm/pmu.h F: arch/arm/kernel/hw_breakpoint.c F: arch/arm/include/asm/hw_breakpoint.h +F: arch/arm/include/asm/perf_event.h +F: drivers/perf/arm_pmu.c +F: include/linux/perf/arm_pmu.h ARM PORT M: Russell King diff --git a/arch/arm/Kconfig b/arch/arm/Kconfig index 1c5021002fe4..4f7bc3d4b186 100644 --- a/arch/arm/Kconfig +++ b/arch/arm/Kconfig @@ -1701,12 +1701,8 @@ config HIGHPTE user-space 2nd level page tables to reside in high memory. config HW_PERF_EVENTS - bool "Enable hardware performance counter support for perf events" - depends on PERF_EVENTS - default y - help - Enable hardware performance counter support for perf events. If - disabled, perf events will use software events only. + def_bool y + depends on ARM_PMU config SYS_SUPPORTS_HUGETLBFS def_bool y diff --git a/arch/arm/include/asm/pmu.h b/arch/arm/include/asm/pmu.h deleted file mode 100644 index 3fc87dfd77e6..000000000000 --- a/arch/arm/include/asm/pmu.h +++ /dev/null @@ -1,154 +0,0 @@ -/* - * linux/arch/arm/include/asm/pmu.h - * - * Copyright (C) 2009 picoChip Designs Ltd, Jamie Iles - * - * This program is free software; you can redistribute it and/or modify - * it under the terms of the GNU General Public License version 2 as - * published by the Free Software Foundation. - * - */ - -#ifndef __ARM_PMU_H__ -#define __ARM_PMU_H__ - -#include -#include - -#include - -/* - * struct arm_pmu_platdata - ARM PMU platform data - * - * @handle_irq: an optional handler which will be called from the - * interrupt and passed the address of the low level handler, - * and can be used to implement any platform specific handling - * before or after calling it. - */ -struct arm_pmu_platdata { - irqreturn_t (*handle_irq)(int irq, void *dev, - irq_handler_t pmu_handler); -}; - -#ifdef CONFIG_HW_PERF_EVENTS - -/* - * The ARMv7 CPU PMU supports up to 32 event counters. - */ -#define ARMPMU_MAX_HWEVENTS 32 - -#define HW_OP_UNSUPPORTED 0xFFFF -#define C(_x) PERF_COUNT_HW_CACHE_##_x -#define CACHE_OP_UNSUPPORTED 0xFFFF - -#define PERF_MAP_ALL_UNSUPPORTED \ - [0 ... PERF_COUNT_HW_MAX - 1] = HW_OP_UNSUPPORTED - -#define PERF_CACHE_MAP_ALL_UNSUPPORTED \ -[0 ... C(MAX) - 1] = { \ - [0 ... C(OP_MAX) - 1] = { \ - [0 ... C(RESULT_MAX) - 1] = CACHE_OP_UNSUPPORTED, \ - }, \ -} - -/* The events for a given PMU register set. */ -struct pmu_hw_events { - /* - * The events that are active on the PMU for the given index. - */ - struct perf_event *events[ARMPMU_MAX_HWEVENTS]; - - /* - * A 1 bit for an index indicates that the counter is being used for - * an event. A 0 means that the counter can be used. - */ - DECLARE_BITMAP(used_mask, ARMPMU_MAX_HWEVENTS); - - /* - * Hardware lock to serialize accesses to PMU registers. Needed for the - * read/modify/write sequences. - */ - raw_spinlock_t pmu_lock; - - /* - * When using percpu IRQs, we need a percpu dev_id. Place it here as we - * already have to allocate this struct per cpu. - */ - struct arm_pmu *percpu_pmu; -}; - -struct arm_pmu { - struct pmu pmu; - cpumask_t active_irqs; - cpumask_t supported_cpus; - int *irq_affinity; - char *name; - irqreturn_t (*handle_irq)(int irq_num, void *dev); - void (*enable)(struct perf_event *event); - void (*disable)(struct perf_event *event); - int (*get_event_idx)(struct pmu_hw_events *hw_events, - struct perf_event *event); - void (*clear_event_idx)(struct pmu_hw_events *hw_events, - struct perf_event *event); - int (*set_event_filter)(struct hw_perf_event *evt, - struct perf_event_attr *attr); - u32 (*read_counter)(struct perf_event *event); - void (*write_counter)(struct perf_event *event, u32 val); - void (*start)(struct arm_pmu *); - void (*stop)(struct arm_pmu *); - void (*reset)(void *); - int (*request_irq)(struct arm_pmu *, irq_handler_t handler); - void (*free_irq)(struct arm_pmu *); - int (*map_event)(struct perf_event *event); - int num_events; - atomic_t active_events; - struct mutex reserve_mutex; - u64 max_period; - struct platform_device *plat_device; - struct pmu_hw_events __percpu *hw_events; - struct notifier_block hotplug_nb; -}; - -#define to_arm_pmu(p) (container_of(p, struct arm_pmu, pmu)) - -int armpmu_register(struct arm_pmu *armpmu, int type); - -u64 armpmu_event_update(struct perf_event *event); - -int armpmu_event_set_period(struct perf_event *event); - -int armpmu_map_event(struct perf_event *event, - const unsigned (*event_map)[PERF_COUNT_HW_MAX], - const unsigned (*cache_map)[PERF_COUNT_HW_CACHE_MAX] - [PERF_COUNT_HW_CACHE_OP_MAX] - [PERF_COUNT_HW_CACHE_RESULT_MAX], - u32 raw_event_mask); - -struct pmu_probe_info { - unsigned int cpuid; - unsigned int mask; - int (*init)(struct arm_pmu *); -}; - -#define PMU_PROBE(_cpuid, _mask, _fn) \ -{ \ - .cpuid = (_cpuid), \ - .mask = (_mask), \ - .init = (_fn), \ -} - -#define ARM_PMU_PROBE(_cpuid, _fn) \ - PMU_PROBE(_cpuid, ARM_CPU_PART_MASK, _fn) - -#define ARM_PMU_XSCALE_MASK ((0xff << 24) | ARM_CPU_XSCALE_ARCH_MASK) - -#define XSCALE_PMU_PROBE(_version, _fn) \ - PMU_PROBE(ARM_CPU_IMP_INTEL << 24 | _version, ARM_PMU_XSCALE_MASK, _fn) - -int arm_pmu_device_probe(struct platform_device *pdev, - const struct of_device_id *of_table, - const struct pmu_probe_info *probe_table); - -#endif /* CONFIG_HW_PERF_EVENTS */ - -#endif /* __ARM_PMU_H__ */ diff --git a/arch/arm/kernel/Makefile b/arch/arm/kernel/Makefile index e69f7a19735d..fcb25c1c5c21 100644 --- a/arch/arm/kernel/Makefile +++ b/arch/arm/kernel/Makefile @@ -71,8 +71,7 @@ obj-$(CONFIG_CPU_PJ4) += pj4-cp0.o obj-$(CONFIG_CPU_PJ4B) += pj4-cp0.o obj-$(CONFIG_IWMMXT) += iwmmxt.o obj-$(CONFIG_PERF_EVENTS) += perf_regs.o perf_callchain.o -obj-$(CONFIG_HW_PERF_EVENTS) += perf_event.o \ - perf_event_xscale.o perf_event_v6.o \ +obj-$(CONFIG_HW_PERF_EVENTS) += perf_event_xscale.o perf_event_v6.o \ perf_event_v7.o CFLAGS_pj4-cp0.o := -marm AFLAGS_iwmmxt.o := -Wa,-mcpu=iwmmxt diff --git a/arch/arm/kernel/perf_event.c b/arch/arm/kernel/perf_event.c deleted file mode 100644 index 1cb40651d783..000000000000 --- a/arch/arm/kernel/perf_event.c +++ /dev/null @@ -1,921 +0,0 @@ -#undef DEBUG - -/* - * ARM performance counter support. - * - * Copyright (C) 2009 picoChip Designs, Ltd., Jamie Iles - * Copyright (C) 2010 ARM Ltd., Will Deacon - * - * This code is based on the sparc64 perf event code, which is in turn based - * on the x86 code. - */ -#define pr_fmt(fmt) "hw perfevents: " fmt - -#include -#include -#include -#include -#include -#include -#include -#include -#include -#include - -#include -#include -#include - -static int -armpmu_map_cache_event(const unsigned (*cache_map) - [PERF_COUNT_HW_CACHE_MAX] - [PERF_COUNT_HW_CACHE_OP_MAX] - [PERF_COUNT_HW_CACHE_RESULT_MAX], - u64 config) -{ - unsigned int cache_type, cache_op, cache_result, ret; - - cache_type = (config >> 0) & 0xff; - if (cache_type >= PERF_COUNT_HW_CACHE_MAX) - return -EINVAL; - - cache_op = (config >> 8) & 0xff; - if (cache_op >= PERF_COUNT_HW_CACHE_OP_MAX) - return -EINVAL; - - cache_result = (config >> 16) & 0xff; - if (cache_result >= PERF_COUNT_HW_CACHE_RESULT_MAX) - return -EINVAL; - - ret = (int)(*cache_map)[cache_type][cache_op][cache_result]; - - if (ret == CACHE_OP_UNSUPPORTED) - return -ENOENT; - - return ret; -} - -static int -armpmu_map_hw_event(const unsigned (*event_map)[PERF_COUNT_HW_MAX], u64 config) -{ - int mapping; - - if (config >= PERF_COUNT_HW_MAX) - return -EINVAL; - - mapping = (*event_map)[config]; - return mapping == HW_OP_UNSUPPORTED ? -ENOENT : mapping; -} - -static int -armpmu_map_raw_event(u32 raw_event_mask, u64 config) -{ - return (int)(config & raw_event_mask); -} - -int -armpmu_map_event(struct perf_event *event, - const unsigned (*event_map)[PERF_COUNT_HW_MAX], - const unsigned (*cache_map) - [PERF_COUNT_HW_CACHE_MAX] - [PERF_COUNT_HW_CACHE_OP_MAX] - [PERF_COUNT_HW_CACHE_RESULT_MAX], - u32 raw_event_mask) -{ - u64 config = event->attr.config; - int type = event->attr.type; - - if (type == event->pmu->type) - return armpmu_map_raw_event(raw_event_mask, config); - - switch (type) { - case PERF_TYPE_HARDWARE: - return armpmu_map_hw_event(event_map, config); - case PERF_TYPE_HW_CACHE: - return armpmu_map_cache_event(cache_map, config); - case PERF_TYPE_RAW: - return armpmu_map_raw_event(raw_event_mask, config); - } - - return -ENOENT; -} - -int armpmu_event_set_period(struct perf_event *event) -{ - struct arm_pmu *armpmu = to_arm_pmu(event->pmu); - struct hw_perf_event *hwc = &event->hw; - s64 left = local64_read(&hwc->period_left); - s64 period = hwc->sample_period; - int ret = 0; - - if (unlikely(left <= -period)) { - left = period; - local64_set(&hwc->period_left, left); - hwc->last_period = period; - ret = 1; - } - - if (unlikely(left <= 0)) { - left += period; - local64_set(&hwc->period_left, left); - hwc->last_period = period; - ret = 1; - } - - /* - * Limit the maximum period to prevent the counter value - * from overtaking the one we are about to program. In - * effect we are reducing max_period to account for - * interrupt latency (and we are being very conservative). - */ - if (left > (armpmu->max_period >> 1)) - left = armpmu->max_period >> 1; - - local64_set(&hwc->prev_count, (u64)-left); - - armpmu->write_counter(event, (u64)(-left) & 0xffffffff); - - perf_event_update_userpage(event); - - return ret; -} - -u64 armpmu_event_update(struct perf_event *event) -{ - struct arm_pmu *armpmu = to_arm_pmu(event->pmu); - struct hw_perf_event *hwc = &event->hw; - u64 delta, prev_raw_count, new_raw_count; - -again: - prev_raw_count = local64_read(&hwc->prev_count); - new_raw_count = armpmu->read_counter(event); - - if (local64_cmpxchg(&hwc->prev_count, prev_raw_count, - new_raw_count) != prev_raw_count) - goto again; - - delta = (new_raw_count - prev_raw_count) & armpmu->max_period; - - local64_add(delta, &event->count); - local64_sub(delta, &hwc->period_left); - - return new_raw_count; -} - -static void -armpmu_read(struct perf_event *event) -{ - armpmu_event_update(event); -} - -static void -armpmu_stop(struct perf_event *event, int flags) -{ - struct arm_pmu *armpmu = to_arm_pmu(event->pmu); - struct hw_perf_event *hwc = &event->hw; - - /* - * ARM pmu always has to update the counter, so ignore - * PERF_EF_UPDATE, see comments in armpmu_start(). - */ - if (!(hwc->state & PERF_HES_STOPPED)) { - armpmu->disable(event); - armpmu_event_update(event); - hwc->state |= PERF_HES_STOPPED | PERF_HES_UPTODATE; - } -} - -static void armpmu_start(struct perf_event *event, int flags) -{ - struct arm_pmu *armpmu = to_arm_pmu(event->pmu); - struct hw_perf_event *hwc = &event->hw; - - /* - * ARM pmu always has to reprogram the period, so ignore - * PERF_EF_RELOAD, see the comment below. - */ - if (flags & PERF_EF_RELOAD) - WARN_ON_ONCE(!(hwc->state & PERF_HES_UPTODATE)); - - hwc->state = 0; - /* - * Set the period again. Some counters can't be stopped, so when we - * were stopped we simply disabled the IRQ source and the counter - * may have been left counting. If we don't do this step then we may - * get an interrupt too soon or *way* too late if the overflow has - * happened since disabling. - */ - armpmu_event_set_period(event); - armpmu->enable(event); -} - -static void -armpmu_del(struct perf_event *event, int flags) -{ - struct arm_pmu *armpmu = to_arm_pmu(event->pmu); - struct pmu_hw_events *hw_events = this_cpu_ptr(armpmu->hw_events); - struct hw_perf_event *hwc = &event->hw; - int idx = hwc->idx; - - armpmu_stop(event, PERF_EF_UPDATE); - hw_events->events[idx] = NULL; - clear_bit(idx, hw_events->used_mask); - if (armpmu->clear_event_idx) - armpmu->clear_event_idx(hw_events, event); - - perf_event_update_userpage(event); -} - -static int -armpmu_add(struct perf_event *event, int flags) -{ - struct arm_pmu *armpmu = to_arm_pmu(event->pmu); - struct pmu_hw_events *hw_events = this_cpu_ptr(armpmu->hw_events); - struct hw_perf_event *hwc = &event->hw; - int idx; - int err = 0; - - /* An event following a process won't be stopped earlier */ - if (!cpumask_test_cpu(smp_processor_id(), &armpmu->supported_cpus)) - return -ENOENT; - - perf_pmu_disable(event->pmu); - - /* If we don't have a space for the counter then finish early. */ - idx = armpmu->get_event_idx(hw_events, event); - if (idx < 0) { - err = idx; - goto out; - } - - /* - * If there is an event in the counter we are going to use then make - * sure it is disabled. - */ - event->hw.idx = idx; - armpmu->disable(event); - hw_events->events[idx] = event; - - hwc->state = PERF_HES_STOPPED | PERF_HES_UPTODATE; - if (flags & PERF_EF_START) - armpmu_start(event, PERF_EF_RELOAD); - - /* Propagate our changes to the userspace mapping. */ - perf_event_update_userpage(event); - -out: - perf_pmu_enable(event->pmu); - return err; -} - -static int -validate_event(struct pmu *pmu, struct pmu_hw_events *hw_events, - struct perf_event *event) -{ - struct arm_pmu *armpmu; - - if (is_software_event(event)) - return 1; - - /* - * Reject groups spanning multiple HW PMUs (e.g. CPU + CCI). The - * core perf code won't check that the pmu->ctx == leader->ctx - * until after pmu->event_init(event). - */ - if (event->pmu != pmu) - return 0; - - if (event->state < PERF_EVENT_STATE_OFF) - return 1; - - if (event->state == PERF_EVENT_STATE_OFF && !event->attr.enable_on_exec) - return 1; - - armpmu = to_arm_pmu(event->pmu); - return armpmu->get_event_idx(hw_events, event) >= 0; -} - -static int -validate_group(struct perf_event *event) -{ - struct perf_event *sibling, *leader = event->group_leader; - struct pmu_hw_events fake_pmu; - - /* - * Initialise the fake PMU. We only need to populate the - * used_mask for the purposes of validation. - */ - memset(&fake_pmu.used_mask, 0, sizeof(fake_pmu.used_mask)); - - if (!validate_event(event->pmu, &fake_pmu, leader)) - return -EINVAL; - - list_for_each_entry(sibling, &leader->sibling_list, group_entry) { - if (!validate_event(event->pmu, &fake_pmu, sibling)) - return -EINVAL; - } - - if (!validate_event(event->pmu, &fake_pmu, event)) - return -EINVAL; - - return 0; -} - -static irqreturn_t armpmu_dispatch_irq(int irq, void *dev) -{ - struct arm_pmu *armpmu; - struct platform_device *plat_device; - struct arm_pmu_platdata *plat; - int ret; - u64 start_clock, finish_clock; - - /* - * we request the IRQ with a (possibly percpu) struct arm_pmu**, but - * the handlers expect a struct arm_pmu*. The percpu_irq framework will - * do any necessary shifting, we just need to perform the first - * dereference. - */ - armpmu = *(void **)dev; - plat_device = armpmu->plat_device; - plat = dev_get_platdata(&plat_device->dev); - - start_clock = sched_clock(); - if (plat && plat->handle_irq) - ret = plat->handle_irq(irq, armpmu, armpmu->handle_irq); - else - ret = armpmu->handle_irq(irq, armpmu); - finish_clock = sched_clock(); - - perf_sample_event_took(finish_clock - start_clock); - return ret; -} - -static void -armpmu_release_hardware(struct arm_pmu *armpmu) -{ - armpmu->free_irq(armpmu); -} - -static int -armpmu_reserve_hardware(struct arm_pmu *armpmu) -{ - int err = armpmu->request_irq(armpmu, armpmu_dispatch_irq); - if (err) { - armpmu_release_hardware(armpmu); - return err; - } - - return 0; -} - -static void -hw_perf_event_destroy(struct perf_event *event) -{ - struct arm_pmu *armpmu = to_arm_pmu(event->pmu); - atomic_t *active_events = &armpmu->active_events; - struct mutex *pmu_reserve_mutex = &armpmu->reserve_mutex; - - if (atomic_dec_and_mutex_lock(active_events, pmu_reserve_mutex)) { - armpmu_release_hardware(armpmu); - mutex_unlock(pmu_reserve_mutex); - } -} - -static int -event_requires_mode_exclusion(struct perf_event_attr *attr) -{ - return attr->exclude_idle || attr->exclude_user || - attr->exclude_kernel || attr->exclude_hv; -} - -static int -__hw_perf_event_init(struct perf_event *event) -{ - struct arm_pmu *armpmu = to_arm_pmu(event->pmu); - struct hw_perf_event *hwc = &event->hw; - int mapping; - - mapping = armpmu->map_event(event); - - if (mapping < 0) { - pr_debug("event %x:%llx not supported\n", event->attr.type, - event->attr.config); - return mapping; - } - - /* - * We don't assign an index until we actually place the event onto - * hardware. Use -1 to signify that we haven't decided where to put it - * yet. For SMP systems, each core has it's own PMU so we can't do any - * clever allocation or constraints checking at this point. - */ - hwc->idx = -1; - hwc->config_base = 0; - hwc->config = 0; - hwc->event_base = 0; - - /* - * Check whether we need to exclude the counter from certain modes. - */ - if ((!armpmu->set_event_filter || - armpmu->set_event_filter(hwc, &event->attr)) && - event_requires_mode_exclusion(&event->attr)) { - pr_debug("ARM performance counters do not support " - "mode exclusion\n"); - return -EOPNOTSUPP; - } - - /* - * Store the event encoding into the config_base field. - */ - hwc->config_base |= (unsigned long)mapping; - - if (!is_sampling_event(event)) { - /* - * For non-sampling runs, limit the sample_period to half - * of the counter width. That way, the new counter value - * is far less likely to overtake the previous one unless - * you have some serious IRQ latency issues. - */ - hwc->sample_period = armpmu->max_period >> 1; - hwc->last_period = hwc->sample_period; - local64_set(&hwc->period_left, hwc->sample_period); - } - - if (event->group_leader != event) { - if (validate_group(event) != 0) - return -EINVAL; - } - - return 0; -} - -static int armpmu_event_init(struct perf_event *event) -{ - struct arm_pmu *armpmu = to_arm_pmu(event->pmu); - int err = 0; - atomic_t *active_events = &armpmu->active_events; - - /* - * Reject CPU-affine events for CPUs that are of a different class to - * that which this PMU handles. Process-following events (where - * event->cpu == -1) can be migrated between CPUs, and thus we have to - * reject them later (in armpmu_add) if they're scheduled on a - * different class of CPU. - */ - if (event->cpu != -1 && - !cpumask_test_cpu(event->cpu, &armpmu->supported_cpus)) - return -ENOENT; - - /* does not support taken branch sampling */ - if (has_branch_stack(event)) - return -EOPNOTSUPP; - - if (armpmu->map_event(event) == -ENOENT) - return -ENOENT; - - event->destroy = hw_perf_event_destroy; - - if (!atomic_inc_not_zero(active_events)) { - mutex_lock(&armpmu->reserve_mutex); - if (atomic_read(active_events) == 0) - err = armpmu_reserve_hardware(armpmu); - - if (!err) - atomic_inc(active_events); - mutex_unlock(&armpmu->reserve_mutex); - } - - if (err) - return err; - - err = __hw_perf_event_init(event); - if (err) - hw_perf_event_destroy(event); - - return err; -} - -static void armpmu_enable(struct pmu *pmu) -{ - struct arm_pmu *armpmu = to_arm_pmu(pmu); - struct pmu_hw_events *hw_events = this_cpu_ptr(armpmu->hw_events); - int enabled = bitmap_weight(hw_events->used_mask, armpmu->num_events); - - /* For task-bound events we may be called on other CPUs */ - if (!cpumask_test_cpu(smp_processor_id(), &armpmu->supported_cpus)) - return; - - if (enabled) - armpmu->start(armpmu); -} - -static void armpmu_disable(struct pmu *pmu) -{ - struct arm_pmu *armpmu = to_arm_pmu(pmu); - - /* For task-bound events we may be called on other CPUs */ - if (!cpumask_test_cpu(smp_processor_id(), &armpmu->supported_cpus)) - return; - - armpmu->stop(armpmu); -} - -/* - * In heterogeneous systems, events are specific to a particular - * microarchitecture, and aren't suitable for another. Thus, only match CPUs of - * the same microarchitecture. - */ -static int armpmu_filter_match(struct perf_event *event) -{ - struct arm_pmu *armpmu = to_arm_pmu(event->pmu); - unsigned int cpu = smp_processor_id(); - return cpumask_test_cpu(cpu, &armpmu->supported_cpus); -} - -static void armpmu_init(struct arm_pmu *armpmu) -{ - atomic_set(&armpmu->active_events, 0); - mutex_init(&armpmu->reserve_mutex); - - armpmu->pmu = (struct pmu) { - .pmu_enable = armpmu_enable, - .pmu_disable = armpmu_disable, - .event_init = armpmu_event_init, - .add = armpmu_add, - .del = armpmu_del, - .start = armpmu_start, - .stop = armpmu_stop, - .read = armpmu_read, - .filter_match = armpmu_filter_match, - }; -} - -int armpmu_register(struct arm_pmu *armpmu, int type) -{ - armpmu_init(armpmu); - pr_info("enabled with %s PMU driver, %d counters available\n", - armpmu->name, armpmu->num_events); - return perf_pmu_register(&armpmu->pmu, armpmu->name, type); -} - -/* Set at runtime when we know what CPU type we are. */ -static struct arm_pmu *__oprofile_cpu_pmu; - -/* - * Despite the names, these two functions are CPU-specific and are used - * by the OProfile/perf code. - */ -const char *perf_pmu_name(void) -{ - if (!__oprofile_cpu_pmu) - return NULL; - - return __oprofile_cpu_pmu->name; -} -EXPORT_SYMBOL_GPL(perf_pmu_name); - -int perf_num_counters(void) -{ - int max_events = 0; - - if (__oprofile_cpu_pmu != NULL) - max_events = __oprofile_cpu_pmu->num_events; - - return max_events; -} -EXPORT_SYMBOL_GPL(perf_num_counters); - -static void cpu_pmu_enable_percpu_irq(void *data) -{ - int irq = *(int *)data; - - enable_percpu_irq(irq, IRQ_TYPE_NONE); -} - -static void cpu_pmu_disable_percpu_irq(void *data) -{ - int irq = *(int *)data; - - disable_percpu_irq(irq); -} - -static void cpu_pmu_free_irq(struct arm_pmu *cpu_pmu) -{ - int i, irq, irqs; - struct platform_device *pmu_device = cpu_pmu->plat_device; - struct pmu_hw_events __percpu *hw_events = cpu_pmu->hw_events; - - irqs = min(pmu_device->num_resources, num_possible_cpus()); - - irq = platform_get_irq(pmu_device, 0); - if (irq >= 0 && irq_is_percpu(irq)) { - on_each_cpu(cpu_pmu_disable_percpu_irq, &irq, 1); - free_percpu_irq(irq, &hw_events->percpu_pmu); - } else { - for (i = 0; i < irqs; ++i) { - int cpu = i; - - if (cpu_pmu->irq_affinity) - cpu = cpu_pmu->irq_affinity[i]; - - if (!cpumask_test_and_clear_cpu(cpu, &cpu_pmu->active_irqs)) - continue; - irq = platform_get_irq(pmu_device, i); - if (irq >= 0) - free_irq(irq, per_cpu_ptr(&hw_events->percpu_pmu, cpu)); - } - } -} - -static int cpu_pmu_request_irq(struct arm_pmu *cpu_pmu, irq_handler_t handler) -{ - int i, err, irq, irqs; - struct platform_device *pmu_device = cpu_pmu->plat_device; - struct pmu_hw_events __percpu *hw_events = cpu_pmu->hw_events; - - if (!pmu_device) - return -ENODEV; - - irqs = min(pmu_device->num_resources, num_possible_cpus()); - if (irqs < 1) { - pr_warn_once("perf/ARM: No irqs for PMU defined, sampling events not supported\n"); - return 0; - } - - irq = platform_get_irq(pmu_device, 0); - if (irq >= 0 && irq_is_percpu(irq)) { - err = request_percpu_irq(irq, handler, "arm-pmu", - &hw_events->percpu_pmu); - if (err) { - pr_err("unable to request IRQ%d for ARM PMU counters\n", - irq); - return err; - } - on_each_cpu(cpu_pmu_enable_percpu_irq, &irq, 1); - } else { - for (i = 0; i < irqs; ++i) { - int cpu = i; - - err = 0; - irq = platform_get_irq(pmu_device, i); - if (irq < 0) - continue; - - if (cpu_pmu->irq_affinity) - cpu = cpu_pmu->irq_affinity[i]; - - /* - * If we have a single PMU interrupt that we can't shift, - * assume that we're running on a uniprocessor machine and - * continue. Otherwise, continue without this interrupt. - */ - if (irq_set_affinity(irq, cpumask_of(cpu)) && irqs > 1) { - pr_warn("unable to set irq affinity (irq=%d, cpu=%u)\n", - irq, cpu); - continue; - } - - err = request_irq(irq, handler, - IRQF_NOBALANCING | IRQF_NO_THREAD, "arm-pmu", - per_cpu_ptr(&hw_events->percpu_pmu, cpu)); - if (err) { - pr_err("unable to request IRQ%d for ARM PMU counters\n", - irq); - return err; - } - - cpumask_set_cpu(cpu, &cpu_pmu->active_irqs); - } - } - - return 0; -} - -/* - * PMU hardware loses all context when a CPU goes offline. - * When a CPU is hotplugged back in, since some hardware registers are - * UNKNOWN at reset, the PMU must be explicitly reset to avoid reading - * junk values out of them. - */ -static int cpu_pmu_notify(struct notifier_block *b, unsigned long action, - void *hcpu) -{ - int cpu = (unsigned long)hcpu; - struct arm_pmu *pmu = container_of(b, struct arm_pmu, hotplug_nb); - - if ((action & ~CPU_TASKS_FROZEN) != CPU_STARTING) - return NOTIFY_DONE; - - if (!cpumask_test_cpu(cpu, &pmu->supported_cpus)) - return NOTIFY_DONE; - - if (pmu->reset) - pmu->reset(pmu); - else - return NOTIFY_DONE; - - return NOTIFY_OK; -} - -static int cpu_pmu_init(struct arm_pmu *cpu_pmu) -{ - int err; - int cpu; - struct pmu_hw_events __percpu *cpu_hw_events; - - cpu_hw_events = alloc_percpu(struct pmu_hw_events); - if (!cpu_hw_events) - return -ENOMEM; - - cpu_pmu->hotplug_nb.notifier_call = cpu_pmu_notify; - err = register_cpu_notifier(&cpu_pmu->hotplug_nb); - if (err) - goto out_hw_events; - - for_each_possible_cpu(cpu) { - struct pmu_hw_events *events = per_cpu_ptr(cpu_hw_events, cpu); - raw_spin_lock_init(&events->pmu_lock); - events->percpu_pmu = cpu_pmu; - } - - cpu_pmu->hw_events = cpu_hw_events; - cpu_pmu->request_irq = cpu_pmu_request_irq; - cpu_pmu->free_irq = cpu_pmu_free_irq; - - /* Ensure the PMU has sane values out of reset. */ - if (cpu_pmu->reset) - on_each_cpu_mask(&cpu_pmu->supported_cpus, cpu_pmu->reset, - cpu_pmu, 1); - - /* If no interrupts available, set the corresponding capability flag */ - if (!platform_get_irq(cpu_pmu->plat_device, 0)) - cpu_pmu->pmu.capabilities |= PERF_PMU_CAP_NO_INTERRUPT; - - return 0; - -out_hw_events: - free_percpu(cpu_hw_events); - return err; -} - -static void cpu_pmu_destroy(struct arm_pmu *cpu_pmu) -{ - unregister_cpu_notifier(&cpu_pmu->hotplug_nb); - free_percpu(cpu_pmu->hw_events); -} - -/* - * CPU PMU identification and probing. - */ -static int probe_current_pmu(struct arm_pmu *pmu, - const struct pmu_probe_info *info) -{ - int cpu = get_cpu(); - unsigned int cpuid = read_cpuid_id(); - int ret = -ENODEV; - - pr_info("probing PMU on CPU %d\n", cpu); - - for (; info->init != NULL; info++) { - if ((cpuid & info->mask) != info->cpuid) - continue; - ret = info->init(pmu); - break; - } - - put_cpu(); - return ret; -} - -static int of_pmu_irq_cfg(struct arm_pmu *pmu) -{ - int *irqs, i = 0; - bool using_spi = false; - struct platform_device *pdev = pmu->plat_device; - - irqs = kcalloc(pdev->num_resources, sizeof(*irqs), GFP_KERNEL); - if (!irqs) - return -ENOMEM; - - do { - struct device_node *dn; - int cpu, irq; - - /* See if we have an affinity entry */ - dn = of_parse_phandle(pdev->dev.of_node, "interrupt-affinity", i); - if (!dn) - break; - - /* Check the IRQ type and prohibit a mix of PPIs and SPIs */ - irq = platform_get_irq(pdev, i); - if (irq >= 0) { - bool spi = !irq_is_percpu(irq); - - if (i > 0 && spi != using_spi) { - pr_err("PPI/SPI IRQ type mismatch for %s!\n", - dn->name); - kfree(irqs); - return -EINVAL; - } - - using_spi = spi; - } - - /* Now look up the logical CPU number */ - for_each_possible_cpu(cpu) - if (dn == of_cpu_device_node_get(cpu)) - break; - - if (cpu >= nr_cpu_ids) { - pr_warn("Failed to find logical CPU for %s\n", - dn->name); - of_node_put(dn); - cpumask_setall(&pmu->supported_cpus); - break; - } - of_node_put(dn); - - /* For SPIs, we need to track the affinity per IRQ */ - if (using_spi) { - if (i >= pdev->num_resources) { - of_node_put(dn); - break; - } - - irqs[i] = cpu; - } - - /* Keep track of the CPUs containing this PMU type */ - cpumask_set_cpu(cpu, &pmu->supported_cpus); - of_node_put(dn); - i++; - } while (1); - - /* If we didn't manage to parse anything, claim to support all CPUs */ - if (cpumask_weight(&pmu->supported_cpus) == 0) - cpumask_setall(&pmu->supported_cpus); - - /* If we matched up the IRQ affinities, use them to route the SPIs */ - if (using_spi && i == pdev->num_resources) - pmu->irq_affinity = irqs; - else - kfree(irqs); - - return 0; -} - -int arm_pmu_device_probe(struct platform_device *pdev, - const struct of_device_id *of_table, - const struct pmu_probe_info *probe_table) -{ - const struct of_device_id *of_id; - const int (*init_fn)(struct arm_pmu *); - struct device_node *node = pdev->dev.of_node; - struct arm_pmu *pmu; - int ret = -ENODEV; - - pmu = kzalloc(sizeof(struct arm_pmu), GFP_KERNEL); - if (!pmu) { - pr_info("failed to allocate PMU device!\n"); - return -ENOMEM; - } - - if (!__oprofile_cpu_pmu) - __oprofile_cpu_pmu = pmu; - - pmu->plat_device = pdev; - - if (node && (of_id = of_match_node(of_table, pdev->dev.of_node))) { - init_fn = of_id->data; - - ret = of_pmu_irq_cfg(pmu); - if (!ret) - ret = init_fn(pmu); - } else { - ret = probe_current_pmu(pmu, probe_table); - cpumask_setall(&pmu->supported_cpus); - } - - if (ret) { - pr_info("failed to probe PMU!\n"); - goto out_free; - } - - ret = cpu_pmu_init(pmu); - if (ret) - goto out_free; - - ret = armpmu_register(pmu, -1); - if (ret) - goto out_destroy; - - return 0; - -out_destroy: - cpu_pmu_destroy(pmu); -out_free: - pr_info("failed to register PMU devices!\n"); - kfree(pmu); - return ret; -} diff --git a/arch/arm/kernel/perf_event_v6.c b/arch/arm/kernel/perf_event_v6.c index 09f83e414a72..09413e7b49aa 100644 --- a/arch/arm/kernel/perf_event_v6.c +++ b/arch/arm/kernel/perf_event_v6.c @@ -34,9 +34,9 @@ #include #include -#include #include +#include #include enum armv6_perf_types { diff --git a/arch/arm/kernel/perf_event_v7.c b/arch/arm/kernel/perf_event_v7.c index f9b37f876e20..126dc679b230 100644 --- a/arch/arm/kernel/perf_event_v7.c +++ b/arch/arm/kernel/perf_event_v7.c @@ -21,11 +21,11 @@ #include #include #include -#include #include #include "../vfp/vfpinstr.h" #include +#include #include /* diff --git a/arch/arm/kernel/perf_event_xscale.c b/arch/arm/kernel/perf_event_xscale.c index 304d056d5b25..aa0499e2eef7 100644 --- a/arch/arm/kernel/perf_event_xscale.c +++ b/arch/arm/kernel/perf_event_xscale.c @@ -16,9 +16,9 @@ #include #include -#include #include +#include #include enum xscale_perf_types { diff --git a/arch/arm/mach-ux500/cpu-db8500.c b/arch/arm/mach-ux500/cpu-db8500.c index 16913800bbf9..5578dc1ab52b 100644 --- a/arch/arm/mach-ux500/cpu-db8500.c +++ b/arch/arm/mach-ux500/cpu-db8500.c @@ -20,10 +20,10 @@ #include #include #include +#include #include #include -#include #include #include "setup.h" diff --git a/drivers/Kconfig b/drivers/Kconfig index 6e973b8e3a3b..3497485f5eab 100644 --- a/drivers/Kconfig +++ b/drivers/Kconfig @@ -176,6 +176,8 @@ source "drivers/powercap/Kconfig" source "drivers/mcb/Kconfig" +source "drivers/perf/Kconfig" + source "drivers/ras/Kconfig" source "drivers/thunderbolt/Kconfig" diff --git a/drivers/Makefile b/drivers/Makefile index b64b49f6e01b..f245f2291b8a 100644 --- a/drivers/Makefile +++ b/drivers/Makefile @@ -161,6 +161,7 @@ obj-$(CONFIG_NTB) += ntb/ obj-$(CONFIG_FMC) += fmc/ obj-$(CONFIG_POWERCAP) += powercap/ obj-$(CONFIG_MCB) += mcb/ +obj-$(CONFIG_PERF_EVENTS) += perf/ obj-$(CONFIG_RAS) += ras/ obj-$(CONFIG_THUNDERBOLT) += thunderbolt/ obj-$(CONFIG_CORESIGHT) += hwtracing/coresight/ diff --git a/drivers/perf/Kconfig b/drivers/perf/Kconfig new file mode 100644 index 000000000000..d9de36ee165d --- /dev/null +++ b/drivers/perf/Kconfig @@ -0,0 +1,15 @@ +# +# Performance Monitor Drivers +# + +menu "Performance monitor support" + +config ARM_PMU + depends on PERF_EVENTS && ARM + bool "ARM PMU framework" + default y + help + Say y if you want to use CPU performance monitors on ARM-based + systems. + +endmenu diff --git a/drivers/perf/Makefile b/drivers/perf/Makefile new file mode 100644 index 000000000000..acd2397ded94 --- /dev/null +++ b/drivers/perf/Makefile @@ -0,0 +1 @@ +obj-$(CONFIG_ARM_PMU) += arm_pmu.o diff --git a/drivers/perf/arm_pmu.c b/drivers/perf/arm_pmu.c new file mode 100644 index 000000000000..2365a32a595e --- /dev/null +++ b/drivers/perf/arm_pmu.c @@ -0,0 +1,921 @@ +#undef DEBUG + +/* + * ARM performance counter support. + * + * Copyright (C) 2009 picoChip Designs, Ltd., Jamie Iles + * Copyright (C) 2010 ARM Ltd., Will Deacon + * + * This code is based on the sparc64 perf event code, which is in turn based + * on the x86 code. + */ +#define pr_fmt(fmt) "hw perfevents: " fmt + +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include + +#include +#include + +static int +armpmu_map_cache_event(const unsigned (*cache_map) + [PERF_COUNT_HW_CACHE_MAX] + [PERF_COUNT_HW_CACHE_OP_MAX] + [PERF_COUNT_HW_CACHE_RESULT_MAX], + u64 config) +{ + unsigned int cache_type, cache_op, cache_result, ret; + + cache_type = (config >> 0) & 0xff; + if (cache_type >= PERF_COUNT_HW_CACHE_MAX) + return -EINVAL; + + cache_op = (config >> 8) & 0xff; + if (cache_op >= PERF_COUNT_HW_CACHE_OP_MAX) + return -EINVAL; + + cache_result = (config >> 16) & 0xff; + if (cache_result >= PERF_COUNT_HW_CACHE_RESULT_MAX) + return -EINVAL; + + ret = (int)(*cache_map)[cache_type][cache_op][cache_result]; + + if (ret == CACHE_OP_UNSUPPORTED) + return -ENOENT; + + return ret; +} + +static int +armpmu_map_hw_event(const unsigned (*event_map)[PERF_COUNT_HW_MAX], u64 config) +{ + int mapping; + + if (config >= PERF_COUNT_HW_MAX) + return -EINVAL; + + mapping = (*event_map)[config]; + return mapping == HW_OP_UNSUPPORTED ? -ENOENT : mapping; +} + +static int +armpmu_map_raw_event(u32 raw_event_mask, u64 config) +{ + return (int)(config & raw_event_mask); +} + +int +armpmu_map_event(struct perf_event *event, + const unsigned (*event_map)[PERF_COUNT_HW_MAX], + const unsigned (*cache_map) + [PERF_COUNT_HW_CACHE_MAX] + [PERF_COUNT_HW_CACHE_OP_MAX] + [PERF_COUNT_HW_CACHE_RESULT_MAX], + u32 raw_event_mask) +{ + u64 config = event->attr.config; + int type = event->attr.type; + + if (type == event->pmu->type) + return armpmu_map_raw_event(raw_event_mask, config); + + switch (type) { + case PERF_TYPE_HARDWARE: + return armpmu_map_hw_event(event_map, config); + case PERF_TYPE_HW_CACHE: + return armpmu_map_cache_event(cache_map, config); + case PERF_TYPE_RAW: + return armpmu_map_raw_event(raw_event_mask, config); + } + + return -ENOENT; +} + +int armpmu_event_set_period(struct perf_event *event) +{ + struct arm_pmu *armpmu = to_arm_pmu(event->pmu); + struct hw_perf_event *hwc = &event->hw; + s64 left = local64_read(&hwc->period_left); + s64 period = hwc->sample_period; + int ret = 0; + + if (unlikely(left <= -period)) { + left = period; + local64_set(&hwc->period_left, left); + hwc->last_period = period; + ret = 1; + } + + if (unlikely(left <= 0)) { + left += period; + local64_set(&hwc->period_left, left); + hwc->last_period = period; + ret = 1; + } + + /* + * Limit the maximum period to prevent the counter value + * from overtaking the one we are about to program. In + * effect we are reducing max_period to account for + * interrupt latency (and we are being very conservative). + */ + if (left > (armpmu->max_period >> 1)) + left = armpmu->max_period >> 1; + + local64_set(&hwc->prev_count, (u64)-left); + + armpmu->write_counter(event, (u64)(-left) & 0xffffffff); + + perf_event_update_userpage(event); + + return ret; +} + +u64 armpmu_event_update(struct perf_event *event) +{ + struct arm_pmu *armpmu = to_arm_pmu(event->pmu); + struct hw_perf_event *hwc = &event->hw; + u64 delta, prev_raw_count, new_raw_count; + +again: + prev_raw_count = local64_read(&hwc->prev_count); + new_raw_count = armpmu->read_counter(event); + + if (local64_cmpxchg(&hwc->prev_count, prev_raw_count, + new_raw_count) != prev_raw_count) + goto again; + + delta = (new_raw_count - prev_raw_count) & armpmu->max_period; + + local64_add(delta, &event->count); + local64_sub(delta, &hwc->period_left); + + return new_raw_count; +} + +static void +armpmu_read(struct perf_event *event) +{ + armpmu_event_update(event); +} + +static void +armpmu_stop(struct perf_event *event, int flags) +{ + struct arm_pmu *armpmu = to_arm_pmu(event->pmu); + struct hw_perf_event *hwc = &event->hw; + + /* + * ARM pmu always has to update the counter, so ignore + * PERF_EF_UPDATE, see comments in armpmu_start(). + */ + if (!(hwc->state & PERF_HES_STOPPED)) { + armpmu->disable(event); + armpmu_event_update(event); + hwc->state |= PERF_HES_STOPPED | PERF_HES_UPTODATE; + } +} + +static void armpmu_start(struct perf_event *event, int flags) +{ + struct arm_pmu *armpmu = to_arm_pmu(event->pmu); + struct hw_perf_event *hwc = &event->hw; + + /* + * ARM pmu always has to reprogram the period, so ignore + * PERF_EF_RELOAD, see the comment below. + */ + if (flags & PERF_EF_RELOAD) + WARN_ON_ONCE(!(hwc->state & PERF_HES_UPTODATE)); + + hwc->state = 0; + /* + * Set the period again. Some counters can't be stopped, so when we + * were stopped we simply disabled the IRQ source and the counter + * may have been left counting. If we don't do this step then we may + * get an interrupt too soon or *way* too late if the overflow has + * happened since disabling. + */ + armpmu_event_set_period(event); + armpmu->enable(event); +} + +static void +armpmu_del(struct perf_event *event, int flags) +{ + struct arm_pmu *armpmu = to_arm_pmu(event->pmu); + struct pmu_hw_events *hw_events = this_cpu_ptr(armpmu->hw_events); + struct hw_perf_event *hwc = &event->hw; + int idx = hwc->idx; + + armpmu_stop(event, PERF_EF_UPDATE); + hw_events->events[idx] = NULL; + clear_bit(idx, hw_events->used_mask); + if (armpmu->clear_event_idx) + armpmu->clear_event_idx(hw_events, event); + + perf_event_update_userpage(event); +} + +static int +armpmu_add(struct perf_event *event, int flags) +{ + struct arm_pmu *armpmu = to_arm_pmu(event->pmu); + struct pmu_hw_events *hw_events = this_cpu_ptr(armpmu->hw_events); + struct hw_perf_event *hwc = &event->hw; + int idx; + int err = 0; + + /* An event following a process won't be stopped earlier */ + if (!cpumask_test_cpu(smp_processor_id(), &armpmu->supported_cpus)) + return -ENOENT; + + perf_pmu_disable(event->pmu); + + /* If we don't have a space for the counter then finish early. */ + idx = armpmu->get_event_idx(hw_events, event); + if (idx < 0) { + err = idx; + goto out; + } + + /* + * If there is an event in the counter we are going to use then make + * sure it is disabled. + */ + event->hw.idx = idx; + armpmu->disable(event); + hw_events->events[idx] = event; + + hwc->state = PERF_HES_STOPPED | PERF_HES_UPTODATE; + if (flags & PERF_EF_START) + armpmu_start(event, PERF_EF_RELOAD); + + /* Propagate our changes to the userspace mapping. */ + perf_event_update_userpage(event); + +out: + perf_pmu_enable(event->pmu); + return err; +} + +static int +validate_event(struct pmu *pmu, struct pmu_hw_events *hw_events, + struct perf_event *event) +{ + struct arm_pmu *armpmu; + + if (is_software_event(event)) + return 1; + + /* + * Reject groups spanning multiple HW PMUs (e.g. CPU + CCI). The + * core perf code won't check that the pmu->ctx == leader->ctx + * until after pmu->event_init(event). + */ + if (event->pmu != pmu) + return 0; + + if (event->state < PERF_EVENT_STATE_OFF) + return 1; + + if (event->state == PERF_EVENT_STATE_OFF && !event->attr.enable_on_exec) + return 1; + + armpmu = to_arm_pmu(event->pmu); + return armpmu->get_event_idx(hw_events, event) >= 0; +} + +static int +validate_group(struct perf_event *event) +{ + struct perf_event *sibling, *leader = event->group_leader; + struct pmu_hw_events fake_pmu; + + /* + * Initialise the fake PMU. We only need to populate the + * used_mask for the purposes of validation. + */ + memset(&fake_pmu.used_mask, 0, sizeof(fake_pmu.used_mask)); + + if (!validate_event(event->pmu, &fake_pmu, leader)) + return -EINVAL; + + list_for_each_entry(sibling, &leader->sibling_list, group_entry) { + if (!validate_event(event->pmu, &fake_pmu, sibling)) + return -EINVAL; + } + + if (!validate_event(event->pmu, &fake_pmu, event)) + return -EINVAL; + + return 0; +} + +static irqreturn_t armpmu_dispatch_irq(int irq, void *dev) +{ + struct arm_pmu *armpmu; + struct platform_device *plat_device; + struct arm_pmu_platdata *plat; + int ret; + u64 start_clock, finish_clock; + + /* + * we request the IRQ with a (possibly percpu) struct arm_pmu**, but + * the handlers expect a struct arm_pmu*. The percpu_irq framework will + * do any necessary shifting, we just need to perform the first + * dereference. + */ + armpmu = *(void **)dev; + plat_device = armpmu->plat_device; + plat = dev_get_platdata(&plat_device->dev); + + start_clock = sched_clock(); + if (plat && plat->handle_irq) + ret = plat->handle_irq(irq, armpmu, armpmu->handle_irq); + else + ret = armpmu->handle_irq(irq, armpmu); + finish_clock = sched_clock(); + + perf_sample_event_took(finish_clock - start_clock); + return ret; +} + +static void +armpmu_release_hardware(struct arm_pmu *armpmu) +{ + armpmu->free_irq(armpmu); +} + +static int +armpmu_reserve_hardware(struct arm_pmu *armpmu) +{ + int err = armpmu->request_irq(armpmu, armpmu_dispatch_irq); + if (err) { + armpmu_release_hardware(armpmu); + return err; + } + + return 0; +} + +static void +hw_perf_event_destroy(struct perf_event *event) +{ + struct arm_pmu *armpmu = to_arm_pmu(event->pmu); + atomic_t *active_events = &armpmu->active_events; + struct mutex *pmu_reserve_mutex = &armpmu->reserve_mutex; + + if (atomic_dec_and_mutex_lock(active_events, pmu_reserve_mutex)) { + armpmu_release_hardware(armpmu); + mutex_unlock(pmu_reserve_mutex); + } +} + +static int +event_requires_mode_exclusion(struct perf_event_attr *attr) +{ + return attr->exclude_idle || attr->exclude_user || + attr->exclude_kernel || attr->exclude_hv; +} + +static int +__hw_perf_event_init(struct perf_event *event) +{ + struct arm_pmu *armpmu = to_arm_pmu(event->pmu); + struct hw_perf_event *hwc = &event->hw; + int mapping; + + mapping = armpmu->map_event(event); + + if (mapping < 0) { + pr_debug("event %x:%llx not supported\n", event->attr.type, + event->attr.config); + return mapping; + } + + /* + * We don't assign an index until we actually place the event onto + * hardware. Use -1 to signify that we haven't decided where to put it + * yet. For SMP systems, each core has it's own PMU so we can't do any + * clever allocation or constraints checking at this point. + */ + hwc->idx = -1; + hwc->config_base = 0; + hwc->config = 0; + hwc->event_base = 0; + + /* + * Check whether we need to exclude the counter from certain modes. + */ + if ((!armpmu->set_event_filter || + armpmu->set_event_filter(hwc, &event->attr)) && + event_requires_mode_exclusion(&event->attr)) { + pr_debug("ARM performance counters do not support " + "mode exclusion\n"); + return -EOPNOTSUPP; + } + + /* + * Store the event encoding into the config_base field. + */ + hwc->config_base |= (unsigned long)mapping; + + if (!is_sampling_event(event)) { + /* + * For non-sampling runs, limit the sample_period to half + * of the counter width. That way, the new counter value + * is far less likely to overtake the previous one unless + * you have some serious IRQ latency issues. + */ + hwc->sample_period = armpmu->max_period >> 1; + hwc->last_period = hwc->sample_period; + local64_set(&hwc->period_left, hwc->sample_period); + } + + if (event->group_leader != event) { + if (validate_group(event) != 0) + return -EINVAL; + } + + return 0; +} + +static int armpmu_event_init(struct perf_event *event) +{ + struct arm_pmu *armpmu = to_arm_pmu(event->pmu); + int err = 0; + atomic_t *active_events = &armpmu->active_events; + + /* + * Reject CPU-affine events for CPUs that are of a different class to + * that which this PMU handles. Process-following events (where + * event->cpu == -1) can be migrated between CPUs, and thus we have to + * reject them later (in armpmu_add) if they're scheduled on a + * different class of CPU. + */ + if (event->cpu != -1 && + !cpumask_test_cpu(event->cpu, &armpmu->supported_cpus)) + return -ENOENT; + + /* does not support taken branch sampling */ + if (has_branch_stack(event)) + return -EOPNOTSUPP; + + if (armpmu->map_event(event) == -ENOENT) + return -ENOENT; + + event->destroy = hw_perf_event_destroy; + + if (!atomic_inc_not_zero(active_events)) { + mutex_lock(&armpmu->reserve_mutex); + if (atomic_read(active_events) == 0) + err = armpmu_reserve_hardware(armpmu); + + if (!err) + atomic_inc(active_events); + mutex_unlock(&armpmu->reserve_mutex); + } + + if (err) + return err; + + err = __hw_perf_event_init(event); + if (err) + hw_perf_event_destroy(event); + + return err; +} + +static void armpmu_enable(struct pmu *pmu) +{ + struct arm_pmu *armpmu = to_arm_pmu(pmu); + struct pmu_hw_events *hw_events = this_cpu_ptr(armpmu->hw_events); + int enabled = bitmap_weight(hw_events->used_mask, armpmu->num_events); + + /* For task-bound events we may be called on other CPUs */ + if (!cpumask_test_cpu(smp_processor_id(), &armpmu->supported_cpus)) + return; + + if (enabled) + armpmu->start(armpmu); +} + +static void armpmu_disable(struct pmu *pmu) +{ + struct arm_pmu *armpmu = to_arm_pmu(pmu); + + /* For task-bound events we may be called on other CPUs */ + if (!cpumask_test_cpu(smp_processor_id(), &armpmu->supported_cpus)) + return; + + armpmu->stop(armpmu); +} + +/* + * In heterogeneous systems, events are specific to a particular + * microarchitecture, and aren't suitable for another. Thus, only match CPUs of + * the same microarchitecture. + */ +static int armpmu_filter_match(struct perf_event *event) +{ + struct arm_pmu *armpmu = to_arm_pmu(event->pmu); + unsigned int cpu = smp_processor_id(); + return cpumask_test_cpu(cpu, &armpmu->supported_cpus); +} + +static void armpmu_init(struct arm_pmu *armpmu) +{ + atomic_set(&armpmu->active_events, 0); + mutex_init(&armpmu->reserve_mutex); + + armpmu->pmu = (struct pmu) { + .pmu_enable = armpmu_enable, + .pmu_disable = armpmu_disable, + .event_init = armpmu_event_init, + .add = armpmu_add, + .del = armpmu_del, + .start = armpmu_start, + .stop = armpmu_stop, + .read = armpmu_read, + .filter_match = armpmu_filter_match, + }; +} + +int armpmu_register(struct arm_pmu *armpmu, int type) +{ + armpmu_init(armpmu); + pr_info("enabled with %s PMU driver, %d counters available\n", + armpmu->name, armpmu->num_events); + return perf_pmu_register(&armpmu->pmu, armpmu->name, type); +} + +/* Set at runtime when we know what CPU type we are. */ +static struct arm_pmu *__oprofile_cpu_pmu; + +/* + * Despite the names, these two functions are CPU-specific and are used + * by the OProfile/perf code. + */ +const char *perf_pmu_name(void) +{ + if (!__oprofile_cpu_pmu) + return NULL; + + return __oprofile_cpu_pmu->name; +} +EXPORT_SYMBOL_GPL(perf_pmu_name); + +int perf_num_counters(void) +{ + int max_events = 0; + + if (__oprofile_cpu_pmu != NULL) + max_events = __oprofile_cpu_pmu->num_events; + + return max_events; +} +EXPORT_SYMBOL_GPL(perf_num_counters); + +static void cpu_pmu_enable_percpu_irq(void *data) +{ + int irq = *(int *)data; + + enable_percpu_irq(irq, IRQ_TYPE_NONE); +} + +static void cpu_pmu_disable_percpu_irq(void *data) +{ + int irq = *(int *)data; + + disable_percpu_irq(irq); +} + +static void cpu_pmu_free_irq(struct arm_pmu *cpu_pmu) +{ + int i, irq, irqs; + struct platform_device *pmu_device = cpu_pmu->plat_device; + struct pmu_hw_events __percpu *hw_events = cpu_pmu->hw_events; + + irqs = min(pmu_device->num_resources, num_possible_cpus()); + + irq = platform_get_irq(pmu_device, 0); + if (irq >= 0 && irq_is_percpu(irq)) { + on_each_cpu(cpu_pmu_disable_percpu_irq, &irq, 1); + free_percpu_irq(irq, &hw_events->percpu_pmu); + } else { + for (i = 0; i < irqs; ++i) { + int cpu = i; + + if (cpu_pmu->irq_affinity) + cpu = cpu_pmu->irq_affinity[i]; + + if (!cpumask_test_and_clear_cpu(cpu, &cpu_pmu->active_irqs)) + continue; + irq = platform_get_irq(pmu_device, i); + if (irq >= 0) + free_irq(irq, per_cpu_ptr(&hw_events->percpu_pmu, cpu)); + } + } +} + +static int cpu_pmu_request_irq(struct arm_pmu *cpu_pmu, irq_handler_t handler) +{ + int i, err, irq, irqs; + struct platform_device *pmu_device = cpu_pmu->plat_device; + struct pmu_hw_events __percpu *hw_events = cpu_pmu->hw_events; + + if (!pmu_device) + return -ENODEV; + + irqs = min(pmu_device->num_resources, num_possible_cpus()); + if (irqs < 1) { + pr_warn_once("perf/ARM: No irqs for PMU defined, sampling events not supported\n"); + return 0; + } + + irq = platform_get_irq(pmu_device, 0); + if (irq >= 0 && irq_is_percpu(irq)) { + err = request_percpu_irq(irq, handler, "arm-pmu", + &hw_events->percpu_pmu); + if (err) { + pr_err("unable to request IRQ%d for ARM PMU counters\n", + irq); + return err; + } + on_each_cpu(cpu_pmu_enable_percpu_irq, &irq, 1); + } else { + for (i = 0; i < irqs; ++i) { + int cpu = i; + + err = 0; + irq = platform_get_irq(pmu_device, i); + if (irq < 0) + continue; + + if (cpu_pmu->irq_affinity) + cpu = cpu_pmu->irq_affinity[i]; + + /* + * If we have a single PMU interrupt that we can't shift, + * assume that we're running on a uniprocessor machine and + * continue. Otherwise, continue without this interrupt. + */ + if (irq_set_affinity(irq, cpumask_of(cpu)) && irqs > 1) { + pr_warn("unable to set irq affinity (irq=%d, cpu=%u)\n", + irq, cpu); + continue; + } + + err = request_irq(irq, handler, + IRQF_NOBALANCING | IRQF_NO_THREAD, "arm-pmu", + per_cpu_ptr(&hw_events->percpu_pmu, cpu)); + if (err) { + pr_err("unable to request IRQ%d for ARM PMU counters\n", + irq); + return err; + } + + cpumask_set_cpu(cpu, &cpu_pmu->active_irqs); + } + } + + return 0; +} + +/* + * PMU hardware loses all context when a CPU goes offline. + * When a CPU is hotplugged back in, since some hardware registers are + * UNKNOWN at reset, the PMU must be explicitly reset to avoid reading + * junk values out of them. + */ +static int cpu_pmu_notify(struct notifier_block *b, unsigned long action, + void *hcpu) +{ + int cpu = (unsigned long)hcpu; + struct arm_pmu *pmu = container_of(b, struct arm_pmu, hotplug_nb); + + if ((action & ~CPU_TASKS_FROZEN) != CPU_STARTING) + return NOTIFY_DONE; + + if (!cpumask_test_cpu(cpu, &pmu->supported_cpus)) + return NOTIFY_DONE; + + if (pmu->reset) + pmu->reset(pmu); + else + return NOTIFY_DONE; + + return NOTIFY_OK; +} + +static int cpu_pmu_init(struct arm_pmu *cpu_pmu) +{ + int err; + int cpu; + struct pmu_hw_events __percpu *cpu_hw_events; + + cpu_hw_events = alloc_percpu(struct pmu_hw_events); + if (!cpu_hw_events) + return -ENOMEM; + + cpu_pmu->hotplug_nb.notifier_call = cpu_pmu_notify; + err = register_cpu_notifier(&cpu_pmu->hotplug_nb); + if (err) + goto out_hw_events; + + for_each_possible_cpu(cpu) { + struct pmu_hw_events *events = per_cpu_ptr(cpu_hw_events, cpu); + raw_spin_lock_init(&events->pmu_lock); + events->percpu_pmu = cpu_pmu; + } + + cpu_pmu->hw_events = cpu_hw_events; + cpu_pmu->request_irq = cpu_pmu_request_irq; + cpu_pmu->free_irq = cpu_pmu_free_irq; + + /* Ensure the PMU has sane values out of reset. */ + if (cpu_pmu->reset) + on_each_cpu_mask(&cpu_pmu->supported_cpus, cpu_pmu->reset, + cpu_pmu, 1); + + /* If no interrupts available, set the corresponding capability flag */ + if (!platform_get_irq(cpu_pmu->plat_device, 0)) + cpu_pmu->pmu.capabilities |= PERF_PMU_CAP_NO_INTERRUPT; + + return 0; + +out_hw_events: + free_percpu(cpu_hw_events); + return err; +} + +static void cpu_pmu_destroy(struct arm_pmu *cpu_pmu) +{ + unregister_cpu_notifier(&cpu_pmu->hotplug_nb); + free_percpu(cpu_pmu->hw_events); +} + +/* + * CPU PMU identification and probing. + */ +static int probe_current_pmu(struct arm_pmu *pmu, + const struct pmu_probe_info *info) +{ + int cpu = get_cpu(); + unsigned int cpuid = read_cpuid_id(); + int ret = -ENODEV; + + pr_info("probing PMU on CPU %d\n", cpu); + + for (; info->init != NULL; info++) { + if ((cpuid & info->mask) != info->cpuid) + continue; + ret = info->init(pmu); + break; + } + + put_cpu(); + return ret; +} + +static int of_pmu_irq_cfg(struct arm_pmu *pmu) +{ + int *irqs, i = 0; + bool using_spi = false; + struct platform_device *pdev = pmu->plat_device; + + irqs = kcalloc(pdev->num_resources, sizeof(*irqs), GFP_KERNEL); + if (!irqs) + return -ENOMEM; + + do { + struct device_node *dn; + int cpu, irq; + + /* See if we have an affinity entry */ + dn = of_parse_phandle(pdev->dev.of_node, "interrupt-affinity", i); + if (!dn) + break; + + /* Check the IRQ type and prohibit a mix of PPIs and SPIs */ + irq = platform_get_irq(pdev, i); + if (irq >= 0) { + bool spi = !irq_is_percpu(irq); + + if (i > 0 && spi != using_spi) { + pr_err("PPI/SPI IRQ type mismatch for %s!\n", + dn->name); + kfree(irqs); + return -EINVAL; + } + + using_spi = spi; + } + + /* Now look up the logical CPU number */ + for_each_possible_cpu(cpu) + if (dn == of_cpu_device_node_get(cpu)) + break; + + if (cpu >= nr_cpu_ids) { + pr_warn("Failed to find logical CPU for %s\n", + dn->name); + of_node_put(dn); + cpumask_setall(&pmu->supported_cpus); + break; + } + of_node_put(dn); + + /* For SPIs, we need to track the affinity per IRQ */ + if (using_spi) { + if (i >= pdev->num_resources) { + of_node_put(dn); + break; + } + + irqs[i] = cpu; + } + + /* Keep track of the CPUs containing this PMU type */ + cpumask_set_cpu(cpu, &pmu->supported_cpus); + of_node_put(dn); + i++; + } while (1); + + /* If we didn't manage to parse anything, claim to support all CPUs */ + if (cpumask_weight(&pmu->supported_cpus) == 0) + cpumask_setall(&pmu->supported_cpus); + + /* If we matched up the IRQ affinities, use them to route the SPIs */ + if (using_spi && i == pdev->num_resources) + pmu->irq_affinity = irqs; + else + kfree(irqs); + + return 0; +} + +int arm_pmu_device_probe(struct platform_device *pdev, + const struct of_device_id *of_table, + const struct pmu_probe_info *probe_table) +{ + const struct of_device_id *of_id; + const int (*init_fn)(struct arm_pmu *); + struct device_node *node = pdev->dev.of_node; + struct arm_pmu *pmu; + int ret = -ENODEV; + + pmu = kzalloc(sizeof(struct arm_pmu), GFP_KERNEL); + if (!pmu) { + pr_info("failed to allocate PMU device!\n"); + return -ENOMEM; + } + + if (!__oprofile_cpu_pmu) + __oprofile_cpu_pmu = pmu; + + pmu->plat_device = pdev; + + if (node && (of_id = of_match_node(of_table, pdev->dev.of_node))) { + init_fn = of_id->data; + + ret = of_pmu_irq_cfg(pmu); + if (!ret) + ret = init_fn(pmu); + } else { + ret = probe_current_pmu(pmu, probe_table); + cpumask_setall(&pmu->supported_cpus); + } + + if (ret) { + pr_info("failed to probe PMU!\n"); + goto out_free; + } + + ret = cpu_pmu_init(pmu); + if (ret) + goto out_free; + + ret = armpmu_register(pmu, -1); + if (ret) + goto out_destroy; + + return 0; + +out_destroy: + cpu_pmu_destroy(pmu); +out_free: + pr_info("failed to register PMU devices!\n"); + kfree(pmu); + return ret; +} diff --git a/include/linux/perf/arm_pmu.h b/include/linux/perf/arm_pmu.h new file mode 100644 index 000000000000..bfa673bb822d --- /dev/null +++ b/include/linux/perf/arm_pmu.h @@ -0,0 +1,154 @@ +/* + * linux/arch/arm/include/asm/pmu.h + * + * Copyright (C) 2009 picoChip Designs Ltd, Jamie Iles + * + * This program is free software; you can redistribute it and/or modify + * it under the terms of the GNU General Public License version 2 as + * published by the Free Software Foundation. + * + */ + +#ifndef __ARM_PMU_H__ +#define __ARM_PMU_H__ + +#include +#include + +#include + +/* + * struct arm_pmu_platdata - ARM PMU platform data + * + * @handle_irq: an optional handler which will be called from the + * interrupt and passed the address of the low level handler, + * and can be used to implement any platform specific handling + * before or after calling it. + */ +struct arm_pmu_platdata { + irqreturn_t (*handle_irq)(int irq, void *dev, + irq_handler_t pmu_handler); +}; + +#ifdef CONFIG_ARM_PMU + +/* + * The ARMv7 CPU PMU supports up to 32 event counters. + */ +#define ARMPMU_MAX_HWEVENTS 32 + +#define HW_OP_UNSUPPORTED 0xFFFF +#define C(_x) PERF_COUNT_HW_CACHE_##_x +#define CACHE_OP_UNSUPPORTED 0xFFFF + +#define PERF_MAP_ALL_UNSUPPORTED \ + [0 ... PERF_COUNT_HW_MAX - 1] = HW_OP_UNSUPPORTED + +#define PERF_CACHE_MAP_ALL_UNSUPPORTED \ +[0 ... C(MAX) - 1] = { \ + [0 ... C(OP_MAX) - 1] = { \ + [0 ... C(RESULT_MAX) - 1] = CACHE_OP_UNSUPPORTED, \ + }, \ +} + +/* The events for a given PMU register set. */ +struct pmu_hw_events { + /* + * The events that are active on the PMU for the given index. + */ + struct perf_event *events[ARMPMU_MAX_HWEVENTS]; + + /* + * A 1 bit for an index indicates that the counter is being used for + * an event. A 0 means that the counter can be used. + */ + DECLARE_BITMAP(used_mask, ARMPMU_MAX_HWEVENTS); + + /* + * Hardware lock to serialize accesses to PMU registers. Needed for the + * read/modify/write sequences. + */ + raw_spinlock_t pmu_lock; + + /* + * When using percpu IRQs, we need a percpu dev_id. Place it here as we + * already have to allocate this struct per cpu. + */ + struct arm_pmu *percpu_pmu; +}; + +struct arm_pmu { + struct pmu pmu; + cpumask_t active_irqs; + cpumask_t supported_cpus; + int *irq_affinity; + char *name; + irqreturn_t (*handle_irq)(int irq_num, void *dev); + void (*enable)(struct perf_event *event); + void (*disable)(struct perf_event *event); + int (*get_event_idx)(struct pmu_hw_events *hw_events, + struct perf_event *event); + void (*clear_event_idx)(struct pmu_hw_events *hw_events, + struct perf_event *event); + int (*set_event_filter)(struct hw_perf_event *evt, + struct perf_event_attr *attr); + u32 (*read_counter)(struct perf_event *event); + void (*write_counter)(struct perf_event *event, u32 val); + void (*start)(struct arm_pmu *); + void (*stop)(struct arm_pmu *); + void (*reset)(void *); + int (*request_irq)(struct arm_pmu *, irq_handler_t handler); + void (*free_irq)(struct arm_pmu *); + int (*map_event)(struct perf_event *event); + int num_events; + atomic_t active_events; + struct mutex reserve_mutex; + u64 max_period; + struct platform_device *plat_device; + struct pmu_hw_events __percpu *hw_events; + struct notifier_block hotplug_nb; +}; + +#define to_arm_pmu(p) (container_of(p, struct arm_pmu, pmu)) + +int armpmu_register(struct arm_pmu *armpmu, int type); + +u64 armpmu_event_update(struct perf_event *event); + +int armpmu_event_set_period(struct perf_event *event); + +int armpmu_map_event(struct perf_event *event, + const unsigned (*event_map)[PERF_COUNT_HW_MAX], + const unsigned (*cache_map)[PERF_COUNT_HW_CACHE_MAX] + [PERF_COUNT_HW_CACHE_OP_MAX] + [PERF_COUNT_HW_CACHE_RESULT_MAX], + u32 raw_event_mask); + +struct pmu_probe_info { + unsigned int cpuid; + unsigned int mask; + int (*init)(struct arm_pmu *); +}; + +#define PMU_PROBE(_cpuid, _mask, _fn) \ +{ \ + .cpuid = (_cpuid), \ + .mask = (_mask), \ + .init = (_fn), \ +} + +#define ARM_PMU_PROBE(_cpuid, _fn) \ + PMU_PROBE(_cpuid, ARM_CPU_PART_MASK, _fn) + +#define ARM_PMU_XSCALE_MASK ((0xff << 24) | ARM_CPU_XSCALE_ARCH_MASK) + +#define XSCALE_PMU_PROBE(_version, _fn) \ + PMU_PROBE(ARM_CPU_IMP_INTEL << 24 | _version, ARM_PMU_XSCALE_MASK, _fn) + +int arm_pmu_device_probe(struct platform_device *pdev, + const struct of_device_id *of_table, + const struct pmu_probe_info *probe_table); + +#endif /* CONFIG_ARM_PMU */ + +#endif /* __ARM_PMU_H__ */ -- cgit v1.2.3-70-g09d2 From 34cadd9c1bcbd5ad5a1f379b013526a8046d4aed Mon Sep 17 00:00:00 2001 From: Jarkko Nikula Date: Thu, 30 Jul 2015 16:30:07 +0300 Subject: spi: pxa2xx: Add support for Intel Sunrisepoint Major difference in LPSS SPI between Intel Sunrisepoint PCH and earlier platforms is an integrated DMA (iDMA) engine. iDMA is an IP that is private for each LPSS host controller (UART/SPI/I2C). Other differences are private register space offset, a few private registers that are in different location and FIFO thresholds. Intel Sunrisepoint LPSS SPI and iDMA devices are probed and registered in MFD layer as platform devices. Here these compound devices are detected by matching against known PCI IDs. This allows us to share pxa2xx_spi_acpi_get_pdata() for setting up the platform data instead of duplicating it in MFD part. This patch adds configuration for Intel Sunrisepoint LPSS SPI, above detection and DMA filter function that picks the DMA channel only from an associated iDMA block. Signed-off-by: Jarkko Nikula Signed-off-by: Mark Brown --- drivers/spi/spi-pxa2xx.c | 59 ++++++++++++++++++++++++++++++++++++++++++---- include/linux/pxa2xx_ssp.h | 1 + 2 files changed, 56 insertions(+), 4 deletions(-) (limited to 'include/linux') diff --git a/drivers/spi/spi-pxa2xx.c b/drivers/spi/spi-pxa2xx.c index 7293d6d875c5..2c9fa409d2bf 100644 --- a/drivers/spi/spi-pxa2xx.c +++ b/drivers/spi/spi-pxa2xx.c @@ -21,6 +21,7 @@ #include #include #include +#include #include #include #include @@ -97,6 +98,15 @@ static const struct lpss_config lpss_platforms[] = { .tx_threshold_lo = 160, .tx_threshold_hi = 224, }, + { /* LPSS_SPT_SSP */ + .offset = 0x200, + .reg_general = -1, + .reg_ssp = 0x20, + .reg_cs_ctrl = 0x24, + .rx_threshold = 1, + .tx_threshold_lo = 32, + .tx_threshold_hi = 56, + }, }; static inline const struct lpss_config @@ -110,6 +120,7 @@ static bool is_lpss_ssp(const struct driver_data *drv_data) switch (drv_data->ssp_type) { case LPSS_LPT_SSP: case LPSS_BYT_SSP: + case LPSS_SPT_SSP: return true; default: return false; @@ -1107,6 +1118,7 @@ static int setup(struct spi_device *spi) break; case LPSS_LPT_SSP: case LPSS_BYT_SSP: + case LPSS_SPT_SSP: config = lpss_get_config(drv_data); tx_thres = config->tx_threshold_lo; tx_hi_thres = config->tx_threshold_hi; @@ -1276,6 +1288,30 @@ static const struct acpi_device_id pxa2xx_spi_acpi_match[] = { }; MODULE_DEVICE_TABLE(acpi, pxa2xx_spi_acpi_match); +/* + * PCI IDs of compound devices that integrate both host controller and private + * integrated DMA engine. Please note these are not used in module + * autoloading and probing in this module but matching the LPSS SSP type. + */ +static const struct pci_device_id pxa2xx_spi_pci_compound_match[] = { + /* SPT-LP */ + { PCI_VDEVICE(INTEL, 0x9d29), LPSS_SPT_SSP }, + { PCI_VDEVICE(INTEL, 0x9d2a), LPSS_SPT_SSP }, + /* SPT-H */ + { PCI_VDEVICE(INTEL, 0xa129), LPSS_SPT_SSP }, + { PCI_VDEVICE(INTEL, 0xa12a), LPSS_SPT_SSP }, +}; + +static bool pxa2xx_spi_idma_filter(struct dma_chan *chan, void *param) +{ + struct device *dev = param; + + if (dev != chan->device->dev->parent) + return false; + + return true; +} + static struct pxa2xx_spi_master * pxa2xx_spi_acpi_get_pdata(struct platform_device *pdev) { @@ -1283,16 +1319,25 @@ pxa2xx_spi_acpi_get_pdata(struct platform_device *pdev) struct acpi_device *adev; struct ssp_device *ssp; struct resource *res; - const struct acpi_device_id *id; + const struct acpi_device_id *adev_id = NULL; + const struct pci_device_id *pcidev_id = NULL; int devid, type; if (!ACPI_HANDLE(&pdev->dev) || acpi_bus_get_device(ACPI_HANDLE(&pdev->dev), &adev)) return NULL; - id = acpi_match_device(pdev->dev.driver->acpi_match_table, &pdev->dev); - if (id) - type = (int)id->driver_data; + if (dev_is_pci(pdev->dev.parent)) + pcidev_id = pci_match_id(pxa2xx_spi_pci_compound_match, + to_pci_dev(pdev->dev.parent)); + else + adev_id = acpi_match_device(pdev->dev.driver->acpi_match_table, + &pdev->dev); + + if (adev_id) + type = (int)adev_id->driver_data; + else if (pcidev_id) + type = (int)pcidev_id->driver_data; else return NULL; @@ -1311,6 +1356,12 @@ pxa2xx_spi_acpi_get_pdata(struct platform_device *pdev) if (IS_ERR(ssp->mmio_base)) return NULL; + if (pcidev_id) { + pdata->tx_param = pdev->dev.parent; + pdata->rx_param = pdev->dev.parent; + pdata->dma_filter = pxa2xx_spi_idma_filter; + } + ssp->clk = devm_clk_get(&pdev->dev, NULL); ssp->irq = platform_get_irq(pdev, 0); ssp->type = type; diff --git a/include/linux/pxa2xx_ssp.h b/include/linux/pxa2xx_ssp.h index 0485bab061fd..92273776bce6 100644 --- a/include/linux/pxa2xx_ssp.h +++ b/include/linux/pxa2xx_ssp.h @@ -197,6 +197,7 @@ enum pxa_ssp_type { QUARK_X1000_SSP, LPSS_LPT_SSP, /* Keep LPSS types sorted with lpss_platforms[] */ LPSS_BYT_SSP, + LPSS_SPT_SSP, }; struct ssp_device { -- cgit v1.2.3-70-g09d2 From ba2bbfbf63075850bb523e2adb815d45e3509995 Mon Sep 17 00:00:00 2001 From: Ulf Hansson Date: Thu, 18 Jun 2015 15:17:53 +0200 Subject: PM / Domains: Remove intermediate states from the power off sequence Genpd's ->runtime_suspend() (assigned to pm_genpd_runtime_suspend()) doesn't immediately walk the hierarchy of ->runtime_suspend() callbacks. Instead, pm_genpd_runtime_suspend() calls pm_genpd_poweroff() which postpones that until *all* the devices in the genpd are runtime suspended. When pm_genpd_poweroff() discovers that the last device in the genpd is about to be runtime suspended, it calls __pm_genpd_save_device() for *all* the devices in the genpd sequentially. Furthermore, __pm_genpd_save_device() invokes the ->start() callback, walks the hierarchy of the ->runtime_suspend() callbacks and invokes the ->stop() callback. This causes a "thundering herd" problem. Let's address this issue by having pm_genpd_runtime_suspend() immediately walk the hierarchy of the ->runtime_suspend() callbacks, instead of postponing that to the power off sequence via pm_genpd_poweroff(). If the selected ->runtime_suspend() callback doesn't return an error code, call pm_genpd_poweroff() to see if it's feasible to also power off the PM domain. Adopting this change enables us to simplify parts of the code in genpd, for example the locking mechanism. Additionally, it gives some positive side effects, as described below. i) One device's ->runtime_resume() latency is no longer affected by other devices' latencies in a genpd. The complexity genpd has to support the option to abort the power off sequence suffers from latency issues. More precisely, a device that is requested to be runtime resumed, may end up waiting for __pm_genpd_save_device() to complete its operations for *another* device. That's because pm_genpd_poweroff() can't confirm an abort request while it waits for __pm_genpd_save_device() to return. As this patch removes the intermediate states in pm_genpd_poweroff() while powering off the PM domain, we no longer need the ability to abort that sequence. ii) Make pm_runtime[_status]_suspended() reliable when used with genpd. Until the last device in a genpd becomes idle, pm_genpd_runtime_suspend() will return 0 without actually walking the hierarchy of the ->runtime_suspend() callbacks. However, by returning 0 the runtime PM core considers the device as runtime_suspended, so pm_runtime[_status]_suspended() will return true, even though the device isn't (yet) runtime suspended. After this patch, since pm_genpd_runtime_suspend() immediately walks the hierarchy of the ->runtime_suspend() callbacks, pm_runtime[_status]_suspended() will accurately reflect the status of the device. iii) Enable fine-grained PM through runtime PM callbacks in drivers/subsystems. There are currently cases were drivers/subsystems implements runtime PM callbacks to deploy fine-grained PM (e.g. gate clocks, move pinctrl to power-save state, etc.). While using the genpd, pm_genpd_runtime_suspend() postpones invoking these callbacks until *all* the devices in the genpd are runtime suspended. In essence, one runtime resumed device prevents fine-grained PM for other devices within the same genpd. After this patch, since pm_genpd_runtime_suspend() immediately walks the hierarchy of the ->runtime_suspend() callbacks, fine-grained PM is enabled throughout all the levels of runtime PM callbacks. iiii) Enable fine-grained PM for IRQ safe devices Per the definition for an IRQ safe device, its runtime PM callbacks must be able to execute in atomic context. In the path while genpd walks the hierarchy of the ->runtime_suspend() callbacks for the device, it uses a mutex. Therefore, genpd prevents that path to be executed for IRQ safe devices. As this patch changes pm_genpd_runtime_suspend() to immediately walk the hierarchy of the ->runtime_suspend() callbacks and without needing to use a mutex, fine-grained PM is enabled throughout all the levels of runtime PM callbacks for IRQ safe devices. Unfortunately this patch also comes with a drawback, as described in the summary below. Driver's/subsystem's runtime PM callbacks may be invoked even when the genpd hasn't actually powered off the PM domain, potentially introducing unnecessary latency. However, in most cases, saving/restoring register contexts for devices are typically fast operations or can be optimized in device specific ways (e.g. shadow copies of register contents in memory, device-specific checks to see if context has been lost before restoring context, etc.). Still, in some cases the driver/subsystem may suffer from latency if runtime PM is used in a very fine-grained manner (e.g. for each IO request or xfer). To prevent that extra overhead, the driver/subsystem may deploy the runtime PM autosuspend feature. Signed-off-by: Ulf Hansson Reviewed-by: Kevin Hilman Tested-by: Geert Uytterhoeven Tested-by: Lina Iyer Signed-off-by: Rafael J. Wysocki --- drivers/base/power/domain.c | 363 ++++++++------------------------------------ include/linux/pm_domain.h | 7 - 2 files changed, 62 insertions(+), 308 deletions(-) (limited to 'include/linux') diff --git a/drivers/base/power/domain.c b/drivers/base/power/domain.c index 0ee43c1056e0..a1abe16dfe16 100644 --- a/drivers/base/power/domain.c +++ b/drivers/base/power/domain.c @@ -114,8 +114,12 @@ static int genpd_stop_dev(struct generic_pm_domain *genpd, struct device *dev) stop_latency_ns, "stop"); } -static int genpd_start_dev(struct generic_pm_domain *genpd, struct device *dev) +static int genpd_start_dev(struct generic_pm_domain *genpd, struct device *dev, + bool timed) { + if (!timed) + return GENPD_DEV_CALLBACK(genpd, int, start, dev); + return GENPD_DEV_TIMED_CALLBACK(genpd, int, start, dev, start_latency_ns, "start"); } @@ -136,41 +140,6 @@ static void genpd_sd_counter_inc(struct generic_pm_domain *genpd) smp_mb__after_atomic(); } -static void genpd_acquire_lock(struct generic_pm_domain *genpd) -{ - DEFINE_WAIT(wait); - - mutex_lock(&genpd->lock); - /* - * Wait for the domain to transition into either the active, - * or the power off state. - */ - for (;;) { - prepare_to_wait(&genpd->status_wait_queue, &wait, - TASK_UNINTERRUPTIBLE); - if (genpd->status == GPD_STATE_ACTIVE - || genpd->status == GPD_STATE_POWER_OFF) - break; - mutex_unlock(&genpd->lock); - - schedule(); - - mutex_lock(&genpd->lock); - } - finish_wait(&genpd->status_wait_queue, &wait); -} - -static void genpd_release_lock(struct generic_pm_domain *genpd) -{ - mutex_unlock(&genpd->lock); -} - -static void genpd_set_active(struct generic_pm_domain *genpd) -{ - if (genpd->resume_count == 0) - genpd->status = GPD_STATE_ACTIVE; -} - static void genpd_recalc_cpu_exit_latency(struct generic_pm_domain *genpd) { s64 usecs64; @@ -251,35 +220,14 @@ static int genpd_power_off(struct generic_pm_domain *genpd, bool timed) * resume a device belonging to it. */ static int __pm_genpd_poweron(struct generic_pm_domain *genpd) - __releases(&genpd->lock) __acquires(&genpd->lock) { struct gpd_link *link; - DEFINE_WAIT(wait); int ret = 0; - /* If the domain's master is being waited for, we have to wait too. */ - for (;;) { - prepare_to_wait(&genpd->status_wait_queue, &wait, - TASK_UNINTERRUPTIBLE); - if (genpd->status != GPD_STATE_WAIT_MASTER) - break; - mutex_unlock(&genpd->lock); - - schedule(); - - mutex_lock(&genpd->lock); - } - finish_wait(&genpd->status_wait_queue, &wait); - if (genpd->status == GPD_STATE_ACTIVE || (genpd->prepared_count > 0 && genpd->suspend_power_off)) return 0; - if (genpd->status != GPD_STATE_POWER_OFF) { - genpd_set_active(genpd); - return 0; - } - if (genpd->cpuidle_data) { cpuidle_pause_and_lock(); genpd->cpuidle_data->idle_state->disabled = true; @@ -294,20 +242,8 @@ static int __pm_genpd_poweron(struct generic_pm_domain *genpd) */ list_for_each_entry(link, &genpd->slave_links, slave_node) { genpd_sd_counter_inc(link->master); - genpd->status = GPD_STATE_WAIT_MASTER; - - mutex_unlock(&genpd->lock); ret = pm_genpd_poweron(link->master); - - mutex_lock(&genpd->lock); - - /* - * The "wait for parent" status is guaranteed not to change - * while the master is powering on. - */ - genpd->status = GPD_STATE_POWER_OFF; - wake_up_all(&genpd->status_wait_queue); if (ret) { genpd_sd_counter_dec(link->master); goto err; @@ -319,8 +255,7 @@ static int __pm_genpd_poweron(struct generic_pm_domain *genpd) goto err; out: - genpd_set_active(genpd); - + genpd->status = GPD_STATE_ACTIVE; return 0; err: @@ -356,20 +291,18 @@ int pm_genpd_name_poweron(const char *domain_name) return genpd ? pm_genpd_poweron(genpd) : -EINVAL; } -static int genpd_start_dev_no_timing(struct generic_pm_domain *genpd, - struct device *dev) -{ - return GENPD_DEV_CALLBACK(genpd, int, start, dev); -} - static int genpd_save_dev(struct generic_pm_domain *genpd, struct device *dev) { return GENPD_DEV_TIMED_CALLBACK(genpd, int, save_state, dev, save_state_latency_ns, "state save"); } -static int genpd_restore_dev(struct generic_pm_domain *genpd, struct device *dev) +static int genpd_restore_dev(struct generic_pm_domain *genpd, + struct device *dev, bool timed) { + if (!timed) + return GENPD_DEV_CALLBACK(genpd, int, restore_state, dev); + return GENPD_DEV_TIMED_CALLBACK(genpd, int, restore_state, dev, restore_state_latency_ns, "state restore"); @@ -415,89 +348,6 @@ static int genpd_dev_pm_qos_notifier(struct notifier_block *nb, return NOTIFY_DONE; } -/** - * __pm_genpd_save_device - Save the pre-suspend state of a device. - * @pdd: Domain data of the device to save the state of. - * @genpd: PM domain the device belongs to. - */ -static int __pm_genpd_save_device(struct pm_domain_data *pdd, - struct generic_pm_domain *genpd) - __releases(&genpd->lock) __acquires(&genpd->lock) -{ - struct generic_pm_domain_data *gpd_data = to_gpd_data(pdd); - struct device *dev = pdd->dev; - int ret = 0; - - if (gpd_data->need_restore > 0) - return 0; - - /* - * If the value of the need_restore flag is still unknown at this point, - * we trust that pm_genpd_poweroff() has verified that the device is - * already runtime PM suspended. - */ - if (gpd_data->need_restore < 0) { - gpd_data->need_restore = 1; - return 0; - } - - mutex_unlock(&genpd->lock); - - genpd_start_dev(genpd, dev); - ret = genpd_save_dev(genpd, dev); - genpd_stop_dev(genpd, dev); - - mutex_lock(&genpd->lock); - - if (!ret) - gpd_data->need_restore = 1; - - return ret; -} - -/** - * __pm_genpd_restore_device - Restore the pre-suspend state of a device. - * @pdd: Domain data of the device to restore the state of. - * @genpd: PM domain the device belongs to. - */ -static void __pm_genpd_restore_device(struct pm_domain_data *pdd, - struct generic_pm_domain *genpd) - __releases(&genpd->lock) __acquires(&genpd->lock) -{ - struct generic_pm_domain_data *gpd_data = to_gpd_data(pdd); - struct device *dev = pdd->dev; - int need_restore = gpd_data->need_restore; - - gpd_data->need_restore = 0; - mutex_unlock(&genpd->lock); - - genpd_start_dev(genpd, dev); - - /* - * Call genpd_restore_dev() for recently added devices too (need_restore - * is negative then). - */ - if (need_restore) - genpd_restore_dev(genpd, dev); - - mutex_lock(&genpd->lock); -} - -/** - * genpd_abort_poweroff - Check if a PM domain power off should be aborted. - * @genpd: PM domain to check. - * - * Return true if a PM domain's status changed to GPD_STATE_ACTIVE during - * a "power off" operation, which means that a "power on" has occured in the - * meantime, or if its resume_count field is different from zero, which means - * that one of its devices has been resumed in the meantime. - */ -static bool genpd_abort_poweroff(struct generic_pm_domain *genpd) -{ - return genpd->status == GPD_STATE_WAIT_MASTER - || genpd->status == GPD_STATE_ACTIVE || genpd->resume_count > 0; -} - /** * genpd_queue_power_off_work - Queue up the execution of pm_genpd_poweroff(). * @genpd: PM domait to power off. @@ -515,34 +365,26 @@ static void genpd_queue_power_off_work(struct generic_pm_domain *genpd) * @genpd: PM domain to power down. * * If all of the @genpd's devices have been suspended and all of its subdomains - * have been powered down, run the runtime suspend callbacks provided by all of - * the @genpd's devices' drivers and remove power from @genpd. + * have been powered down, remove power from @genpd. */ static int pm_genpd_poweroff(struct generic_pm_domain *genpd) - __releases(&genpd->lock) __acquires(&genpd->lock) { struct pm_domain_data *pdd; struct gpd_link *link; - unsigned int not_suspended; - int ret = 0; + unsigned int not_suspended = 0; - start: /* * Do not try to power off the domain in the following situations: * (1) The domain is already in the "power off" state. - * (2) The domain is waiting for its master to power up. - * (3) One of the domain's devices is being resumed right now. - * (4) System suspend is in progress. + * (2) System suspend is in progress. */ if (genpd->status == GPD_STATE_POWER_OFF - || genpd->status == GPD_STATE_WAIT_MASTER - || genpd->resume_count > 0 || genpd->prepared_count > 0) + || genpd->prepared_count > 0) return 0; if (atomic_read(&genpd->sd_count) > 0) return -EBUSY; - not_suspended = 0; list_for_each_entry(pdd, &genpd->dev_list, list_node) { enum pm_qos_flags_status stat; @@ -560,41 +402,11 @@ static int pm_genpd_poweroff(struct generic_pm_domain *genpd) if (not_suspended > genpd->in_progress) return -EBUSY; - if (genpd->poweroff_task) { - /* - * Another instance of pm_genpd_poweroff() is executing - * callbacks, so tell it to start over and return. - */ - genpd->status = GPD_STATE_REPEAT; - return 0; - } - if (genpd->gov && genpd->gov->power_down_ok) { if (!genpd->gov->power_down_ok(&genpd->domain)) return -EAGAIN; } - genpd->status = GPD_STATE_BUSY; - genpd->poweroff_task = current; - - list_for_each_entry_reverse(pdd, &genpd->dev_list, list_node) { - ret = atomic_read(&genpd->sd_count) == 0 ? - __pm_genpd_save_device(pdd, genpd) : -EBUSY; - - if (genpd_abort_poweroff(genpd)) - goto out; - - if (ret) { - genpd_set_active(genpd); - goto out; - } - - if (genpd->status == GPD_STATE_REPEAT) { - genpd->poweroff_task = NULL; - goto start; - } - } - if (genpd->cpuidle_data) { /* * If cpuidle_data is set, cpuidle should turn the domain off @@ -607,14 +419,14 @@ static int pm_genpd_poweroff(struct generic_pm_domain *genpd) cpuidle_pause_and_lock(); genpd->cpuidle_data->idle_state->disabled = false; cpuidle_resume_and_unlock(); - goto out; + return 0; } if (genpd->power_off) { - if (atomic_read(&genpd->sd_count) > 0) { - ret = -EBUSY; - goto out; - } + int ret; + + if (atomic_read(&genpd->sd_count) > 0) + return -EBUSY; /* * If sd_count > 0 at this point, one of the subdomains hasn't @@ -625,10 +437,8 @@ static int pm_genpd_poweroff(struct generic_pm_domain *genpd) * happen very often). */ ret = genpd_power_off(genpd, true); - if (ret == -EBUSY) { - genpd_set_active(genpd); - goto out; - } + if (ret) + return ret; } genpd->status = GPD_STATE_POWER_OFF; @@ -638,10 +448,7 @@ static int pm_genpd_poweroff(struct generic_pm_domain *genpd) genpd_queue_power_off_work(link->master); } - out: - genpd->poweroff_task = NULL; - wake_up_all(&genpd->status_wait_queue); - return ret; + return 0; } /** @@ -654,9 +461,9 @@ static void genpd_power_off_work_fn(struct work_struct *work) genpd = container_of(work, struct generic_pm_domain, power_off_work); - genpd_acquire_lock(genpd); + mutex_lock(&genpd->lock); pm_genpd_poweroff(genpd); - genpd_release_lock(genpd); + mutex_unlock(&genpd->lock); } /** @@ -670,7 +477,6 @@ static void genpd_power_off_work_fn(struct work_struct *work) static int pm_genpd_runtime_suspend(struct device *dev) { struct generic_pm_domain *genpd; - struct generic_pm_domain_data *gpd_data; bool (*stop_ok)(struct device *__dev); int ret; @@ -684,10 +490,16 @@ static int pm_genpd_runtime_suspend(struct device *dev) if (stop_ok && !stop_ok(dev)) return -EBUSY; - ret = genpd_stop_dev(genpd, dev); + ret = genpd_save_dev(genpd, dev); if (ret) return ret; + ret = genpd_stop_dev(genpd, dev); + if (ret) { + genpd_restore_dev(genpd, dev, true); + return ret; + } + /* * If power.irq_safe is set, this routine will be run with interrupts * off, so it can't use mutexes. @@ -696,16 +508,6 @@ static int pm_genpd_runtime_suspend(struct device *dev) return 0; mutex_lock(&genpd->lock); - - /* - * If we have an unknown state of the need_restore flag, it means none - * of the runtime PM callbacks has been invoked yet. Let's update the - * flag to reflect that the current state is active. - */ - gpd_data = to_gpd_data(dev->power.subsys_data->domain_data); - if (gpd_data->need_restore < 0) - gpd_data->need_restore = 0; - genpd->in_progress++; pm_genpd_poweroff(genpd); genpd->in_progress--; @@ -725,8 +527,8 @@ static int pm_genpd_runtime_suspend(struct device *dev) static int pm_genpd_runtime_resume(struct device *dev) { struct generic_pm_domain *genpd; - DEFINE_WAIT(wait); int ret; + bool timed = true; dev_dbg(dev, "%s()\n", __func__); @@ -735,39 +537,21 @@ static int pm_genpd_runtime_resume(struct device *dev) return -EINVAL; /* If power.irq_safe, the PM domain is never powered off. */ - if (dev->power.irq_safe) - return genpd_start_dev_no_timing(genpd, dev); + if (dev->power.irq_safe) { + timed = false; + goto out; + } mutex_lock(&genpd->lock); ret = __pm_genpd_poweron(genpd); - if (ret) { - mutex_unlock(&genpd->lock); - return ret; - } - genpd->status = GPD_STATE_BUSY; - genpd->resume_count++; - for (;;) { - prepare_to_wait(&genpd->status_wait_queue, &wait, - TASK_UNINTERRUPTIBLE); - /* - * If current is the powering off task, we have been called - * reentrantly from one of the device callbacks, so we should - * not wait. - */ - if (!genpd->poweroff_task || genpd->poweroff_task == current) - break; - mutex_unlock(&genpd->lock); + mutex_unlock(&genpd->lock); - schedule(); + if (ret) + return ret; - mutex_lock(&genpd->lock); - } - finish_wait(&genpd->status_wait_queue, &wait); - __pm_genpd_restore_device(dev->power.subsys_data->domain_data, genpd); - genpd->resume_count--; - genpd_set_active(genpd); - wake_up_all(&genpd->status_wait_queue); - mutex_unlock(&genpd->lock); + out: + genpd_start_dev(genpd, dev, timed); + genpd_restore_dev(genpd, dev, timed); return 0; } @@ -883,7 +667,7 @@ static void pm_genpd_sync_poweron(struct generic_pm_domain *genpd, { struct gpd_link *link; - if (genpd->status != GPD_STATE_POWER_OFF) + if (genpd->status == GPD_STATE_ACTIVE) return; list_for_each_entry(link, &genpd->slave_links, slave_node) { @@ -960,14 +744,14 @@ static int pm_genpd_prepare(struct device *dev) if (resume_needed(dev, genpd)) pm_runtime_resume(dev); - genpd_acquire_lock(genpd); + mutex_lock(&genpd->lock); if (genpd->prepared_count++ == 0) { genpd->suspended_count = 0; genpd->suspend_power_off = genpd->status == GPD_STATE_POWER_OFF; } - genpd_release_lock(genpd); + mutex_unlock(&genpd->lock); if (genpd->suspend_power_off) { pm_runtime_put_noidle(dev); @@ -1102,7 +886,7 @@ static int pm_genpd_resume_noirq(struct device *dev) pm_genpd_sync_poweron(genpd, true); genpd->suspended_count--; - return genpd_start_dev(genpd, dev); + return genpd_start_dev(genpd, dev, true); } /** @@ -1230,7 +1014,7 @@ static int pm_genpd_thaw_noirq(struct device *dev) if (IS_ERR(genpd)) return -EINVAL; - return genpd->suspend_power_off ? 0 : genpd_start_dev(genpd, dev); + return genpd->suspend_power_off ? 0 : genpd_start_dev(genpd, dev, true); } /** @@ -1324,7 +1108,7 @@ static int pm_genpd_restore_noirq(struct device *dev) pm_genpd_sync_poweron(genpd, true); - return genpd_start_dev(genpd, dev); + return genpd_start_dev(genpd, dev, true); } /** @@ -1440,7 +1224,6 @@ static struct generic_pm_domain_data *genpd_alloc_dev_data(struct device *dev, gpd_data->td = *td; gpd_data->base.dev = dev; - gpd_data->need_restore = -1; gpd_data->td.constraint_changed = true; gpd_data->td.effective_constraint_ns = -1; gpd_data->nb.notifier_call = genpd_dev_pm_qos_notifier; @@ -1502,7 +1285,7 @@ int __pm_genpd_add_device(struct generic_pm_domain *genpd, struct device *dev, if (IS_ERR(gpd_data)) return PTR_ERR(gpd_data); - genpd_acquire_lock(genpd); + mutex_lock(&genpd->lock); if (genpd->prepared_count > 0) { ret = -EAGAIN; @@ -1519,7 +1302,7 @@ int __pm_genpd_add_device(struct generic_pm_domain *genpd, struct device *dev, list_add_tail(&gpd_data->base.list_node, &genpd->dev_list); out: - genpd_release_lock(genpd); + mutex_unlock(&genpd->lock); if (ret) genpd_free_dev_data(dev, gpd_data); @@ -1563,7 +1346,7 @@ int pm_genpd_remove_device(struct generic_pm_domain *genpd, gpd_data = to_gpd_data(pdd); dev_pm_qos_remove_notifier(dev, &gpd_data->nb); - genpd_acquire_lock(genpd); + mutex_lock(&genpd->lock); if (genpd->prepared_count > 0) { ret = -EAGAIN; @@ -1578,14 +1361,14 @@ int pm_genpd_remove_device(struct generic_pm_domain *genpd, list_del_init(&pdd->list_node); - genpd_release_lock(genpd); + mutex_unlock(&genpd->lock); genpd_free_dev_data(dev, gpd_data); return 0; out: - genpd_release_lock(genpd); + mutex_unlock(&genpd->lock); dev_pm_qos_add_notifier(dev, &gpd_data->nb); return ret; @@ -1606,17 +1389,9 @@ int pm_genpd_add_subdomain(struct generic_pm_domain *genpd, || genpd == subdomain) return -EINVAL; - start: - genpd_acquire_lock(genpd); + mutex_lock(&genpd->lock); mutex_lock_nested(&subdomain->lock, SINGLE_DEPTH_NESTING); - if (subdomain->status != GPD_STATE_POWER_OFF - && subdomain->status != GPD_STATE_ACTIVE) { - mutex_unlock(&subdomain->lock); - genpd_release_lock(genpd); - goto start; - } - if (genpd->status == GPD_STATE_POWER_OFF && subdomain->status != GPD_STATE_POWER_OFF) { ret = -EINVAL; @@ -1644,7 +1419,7 @@ int pm_genpd_add_subdomain(struct generic_pm_domain *genpd, out: mutex_unlock(&subdomain->lock); - genpd_release_lock(genpd); + mutex_unlock(&genpd->lock); return ret; } @@ -1692,8 +1467,7 @@ int pm_genpd_remove_subdomain(struct generic_pm_domain *genpd, if (IS_ERR_OR_NULL(genpd) || IS_ERR_OR_NULL(subdomain)) return -EINVAL; - start: - genpd_acquire_lock(genpd); + mutex_lock(&genpd->lock); list_for_each_entry(link, &genpd->master_links, master_node) { if (link->slave != subdomain) @@ -1701,13 +1475,6 @@ int pm_genpd_remove_subdomain(struct generic_pm_domain *genpd, mutex_lock_nested(&subdomain->lock, SINGLE_DEPTH_NESTING); - if (subdomain->status != GPD_STATE_POWER_OFF - && subdomain->status != GPD_STATE_ACTIVE) { - mutex_unlock(&subdomain->lock); - genpd_release_lock(genpd); - goto start; - } - list_del(&link->master_node); list_del(&link->slave_node); kfree(link); @@ -1720,7 +1487,7 @@ int pm_genpd_remove_subdomain(struct generic_pm_domain *genpd, break; } - genpd_release_lock(genpd); + mutex_unlock(&genpd->lock); return ret; } @@ -1744,7 +1511,7 @@ int pm_genpd_attach_cpuidle(struct generic_pm_domain *genpd, int state) if (IS_ERR_OR_NULL(genpd) || state < 0) return -EINVAL; - genpd_acquire_lock(genpd); + mutex_lock(&genpd->lock); if (genpd->cpuidle_data) { ret = -EEXIST; @@ -1775,7 +1542,7 @@ int pm_genpd_attach_cpuidle(struct generic_pm_domain *genpd, int state) genpd_recalc_cpu_exit_latency(genpd); out: - genpd_release_lock(genpd); + mutex_unlock(&genpd->lock); return ret; err: @@ -1812,7 +1579,7 @@ int pm_genpd_detach_cpuidle(struct generic_pm_domain *genpd) if (IS_ERR_OR_NULL(genpd)) return -EINVAL; - genpd_acquire_lock(genpd); + mutex_lock(&genpd->lock); cpuidle_data = genpd->cpuidle_data; if (!cpuidle_data) { @@ -1830,7 +1597,7 @@ int pm_genpd_detach_cpuidle(struct generic_pm_domain *genpd) kfree(cpuidle_data); out: - genpd_release_lock(genpd); + mutex_unlock(&genpd->lock); return ret; } @@ -1912,9 +1679,6 @@ void pm_genpd_init(struct generic_pm_domain *genpd, genpd->in_progress = 0; atomic_set(&genpd->sd_count, 0); genpd->status = is_off ? GPD_STATE_POWER_OFF : GPD_STATE_ACTIVE; - init_waitqueue_head(&genpd->status_wait_queue); - genpd->poweroff_task = NULL; - genpd->resume_count = 0; genpd->device_count = 0; genpd->max_off_time_ns = -1; genpd->max_off_time_changed = true; @@ -2293,9 +2057,6 @@ static int pm_genpd_summary_one(struct seq_file *s, { static const char * const status_lookup[] = { [GPD_STATE_ACTIVE] = "on", - [GPD_STATE_WAIT_MASTER] = "wait-master", - [GPD_STATE_BUSY] = "busy", - [GPD_STATE_REPEAT] = "off-in-progress", [GPD_STATE_POWER_OFF] = "off" }; struct pm_domain_data *pm_data; diff --git a/include/linux/pm_domain.h b/include/linux/pm_domain.h index 681ccb053f72..b2725e6e8e7b 100644 --- a/include/linux/pm_domain.h +++ b/include/linux/pm_domain.h @@ -22,9 +22,6 @@ enum gpd_status { GPD_STATE_ACTIVE = 0, /* PM domain is active */ - GPD_STATE_WAIT_MASTER, /* PM domain's master is being waited for */ - GPD_STATE_BUSY, /* Something is happening to the PM domain */ - GPD_STATE_REPEAT, /* Power off in progress, to be repeated */ GPD_STATE_POWER_OFF, /* PM domain is off */ }; @@ -59,9 +56,6 @@ struct generic_pm_domain { unsigned int in_progress; /* Number of devices being suspended now */ atomic_t sd_count; /* Number of subdomains with power "on" */ enum gpd_status status; /* Current state of the domain */ - wait_queue_head_t status_wait_queue; - struct task_struct *poweroff_task; /* Powering off task */ - unsigned int resume_count; /* Number of devices being resumed */ unsigned int device_count; /* Number of devices */ unsigned int suspended_count; /* System suspend device counter */ unsigned int prepared_count; /* Suspend counter of prepared devices */ @@ -113,7 +107,6 @@ struct generic_pm_domain_data { struct pm_domain_data base; struct gpd_timing_data td; struct notifier_block nb; - int need_restore; }; #ifdef CONFIG_PM_GENERIC_DOMAINS -- cgit v1.2.3-70-g09d2 From f70ea018da0631e10c26a02f5a82d626ffef5bd7 Mon Sep 17 00:00:00 2001 From: Tom Herbert Date: Fri, 31 Jul 2015 16:52:10 -0700 Subject: net: Add functions to get skb->hash based on flow structures Add skb_get_hash_flowi6 and skb_get_hash_flowi4 which derive an sk_buff hash from flowi6 and flowi4 structures respectively. These functions can be called when creating a packet in the output path where the new sk_buff does not yet contain a fully formed packet that is parsable by flow dissector. Signed-off-by: Tom Herbert Signed-off-by: David S. Miller --- include/linux/skbuff.h | 21 +++++++++++++++++ net/core/flow_dissector.c | 58 +++++++++++++++++++++++++++++++++++++++++++---- 2 files changed, 75 insertions(+), 4 deletions(-) (limited to 'include/linux') diff --git a/include/linux/skbuff.h b/include/linux/skbuff.h index 648a2c241993..b7c1286e247d 100644 --- a/include/linux/skbuff.h +++ b/include/linux/skbuff.h @@ -37,6 +37,7 @@ #include #include #include +#include /* A. Checksumming of received packets by device. * @@ -945,6 +946,26 @@ static inline __u32 skb_get_hash(struct sk_buff *skb) return skb->hash; } +__u32 __skb_get_hash_flowi6(struct sk_buff *skb, struct flowi6 *fl6); + +static inline __u32 skb_get_hash_flowi6(struct sk_buff *skb, struct flowi6 *fl6) +{ + if (!skb->l4_hash && !skb->sw_hash) + __skb_get_hash_flowi6(skb, fl6); + + return skb->hash; +} + +__u32 __skb_get_hash_flowi4(struct sk_buff *skb, struct flowi4 *fl); + +static inline __u32 skb_get_hash_flowi4(struct sk_buff *skb, struct flowi4 *fl4) +{ + if (!skb->l4_hash && !skb->sw_hash) + __skb_get_hash_flowi4(skb, fl4); + + return skb->hash; +} + __u32 skb_get_hash_perturb(const struct sk_buff *skb, u32 perturb); static inline __u32 skb_get_hash_raw(const struct sk_buff *skb) diff --git a/net/core/flow_dissector.c b/net/core/flow_dissector.c index 2a834c6179b9..11e6540fa386 100644 --- a/net/core/flow_dissector.c +++ b/net/core/flow_dissector.c @@ -590,6 +590,15 @@ void make_flow_keys_digest(struct flow_keys_digest *digest, } EXPORT_SYMBOL(make_flow_keys_digest); +static inline void __skb_set_sw_hash(struct sk_buff *skb, u32 hash, + struct flow_keys *keys) +{ + if (keys->ports.ports) + skb->l4_hash = 1; + skb->sw_hash = 1; + skb->hash = hash; +} + /** * __skb_get_hash: calculate a flow hash * @skb: sk_buff to calculate flow hash from @@ -609,10 +618,8 @@ void __skb_get_hash(struct sk_buff *skb) hash = ___skb_get_hash(skb, &keys, hashrnd); if (!hash) return; - if (keys.ports.ports) - skb->l4_hash = 1; - skb->sw_hash = 1; - skb->hash = hash; + + __skb_set_sw_hash(skb, hash, &keys); } EXPORT_SYMBOL(__skb_get_hash); @@ -624,6 +631,49 @@ __u32 skb_get_hash_perturb(const struct sk_buff *skb, u32 perturb) } EXPORT_SYMBOL(skb_get_hash_perturb); +__u32 __skb_get_hash_flowi6(struct sk_buff *skb, struct flowi6 *fl6) +{ + struct flow_keys keys; + + memset(&keys, 0, sizeof(keys)); + + memcpy(&keys.addrs.v6addrs.src, &fl6->saddr, + sizeof(keys.addrs.v6addrs.src)); + memcpy(&keys.addrs.v6addrs.dst, &fl6->daddr, + sizeof(keys.addrs.v6addrs.dst)); + keys.control.addr_type = FLOW_DISSECTOR_KEY_IPV6_ADDRS; + keys.ports.src = fl6->fl6_sport; + keys.ports.dst = fl6->fl6_dport; + keys.keyid.keyid = fl6->fl6_gre_key; + keys.tags.flow_label = (__force u32)fl6->flowlabel; + keys.basic.ip_proto = fl6->flowi6_proto; + + __skb_set_sw_hash(skb, flow_hash_from_keys(&keys), &keys); + + return skb->hash; +} +EXPORT_SYMBOL(__skb_get_hash_flowi6); + +__u32 __skb_get_hash_flowi4(struct sk_buff *skb, struct flowi4 *fl4) +{ + struct flow_keys keys; + + memset(&keys, 0, sizeof(keys)); + + keys.addrs.v4addrs.src = fl4->saddr; + keys.addrs.v4addrs.dst = fl4->daddr; + keys.control.addr_type = FLOW_DISSECTOR_KEY_IPV4_ADDRS; + keys.ports.src = fl4->fl4_sport; + keys.ports.dst = fl4->fl4_dport; + keys.keyid.keyid = fl4->fl4_gre_key; + keys.basic.ip_proto = fl4->flowi4_proto; + + __skb_set_sw_hash(skb, flow_hash_from_keys(&keys), &keys); + + return skb->hash; +} +EXPORT_SYMBOL(__skb_get_hash_flowi4); + u32 __skb_get_poff(const struct sk_buff *skb, void *data, const struct flow_keys *keys, int hlen) { -- cgit v1.2.3-70-g09d2 From e52e95199d0c1aa8a06dbbc07b30562fded8b298 Mon Sep 17 00:00:00 2001 From: Cristina Opriceana Date: Fri, 24 Jul 2015 16:23:43 +0300 Subject: include: linux: iio: Fix function parameter name in kernel doc Fix buffer name from kernel doc according to the function parameter. Signed-off-by: Cristina Opriceana Signed-off-by: Jonathan Cameron --- include/linux/iio/consumer.h | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) (limited to 'include/linux') diff --git a/include/linux/iio/consumer.h b/include/linux/iio/consumer.h index 26fb8f6342bb..fad58671c49e 100644 --- a/include/linux/iio/consumer.h +++ b/include/linux/iio/consumer.h @@ -100,7 +100,7 @@ void iio_channel_stop_all_cb(struct iio_cb_buffer *cb_buff); /** * iio_channel_cb_get_channels() - get access to the underlying channels. - * @cb_buff: The callback buffer from whom we want the channel + * @cb_buffer: The callback buffer from whom we want the channel * information. * * This function allows one to obtain information about the channels. -- cgit v1.2.3-70-g09d2 From 2854c098e222a706b91cc83b4a91dbfd97212765 Mon Sep 17 00:00:00 2001 From: Cristina Opriceana Date: Fri, 24 Jul 2015 16:26:09 +0300 Subject: include: linux: iio: Add missing kernel doc field Fix kernel doc for the iio_dev_attr structure by adding its missing field. Signed-off-by: Cristina Opriceana Signed-off-by: Jonathan Cameron --- include/linux/iio/sysfs.h | 3 ++- 1 file changed, 2 insertions(+), 1 deletion(-) (limited to 'include/linux') diff --git a/include/linux/iio/sysfs.h b/include/linux/iio/sysfs.h index 8a1d18640ab9..9cd8f747212f 100644 --- a/include/linux/iio/sysfs.h +++ b/include/linux/iio/sysfs.h @@ -18,7 +18,8 @@ struct iio_chan_spec; * struct iio_dev_attr - iio specific device attribute * @dev_attr: underlying device attribute * @address: associated register address - * @l: list head for maintaining list of dynamically created attrs. + * @l: list head for maintaining list of dynamically created attrs + * @c: specification for the underlying channel */ struct iio_dev_attr { struct device_attribute dev_attr; -- cgit v1.2.3-70-g09d2 From 76b235c6bcb16062d663e2ee96db0b69f2e6bc14 Mon Sep 17 00:00:00 2001 From: Peter Zijlstra Date: Fri, 24 Jul 2015 14:45:44 +0200 Subject: jump_label: Rename JUMP_LABEL_{EN,DIS}ABLE to JUMP_LABEL_{JMP,NOP} Since we've already stepped away from ENABLE is a JMP and DISABLE is a NOP with the branch_default bits, and are going to make it even worse, rename it to make it all clearer. This way we don't mix multiple levels of logic attributes, but have a plain 'physical' name for what the current instruction patching status of a jump label is. This is a first step in removing the naming confusion that has led to a stream of avoidable bugs such as: a833581e372a ("x86, perf: Fix static_key bug in load_mm_cr4()") Signed-off-by: Peter Zijlstra (Intel) Cc: Andrew Morton Cc: Linus Torvalds Cc: Paul E. McKenney Cc: Peter Zijlstra Cc: Thomas Gleixner Cc: linux-kernel@vger.kernel.org [ Beefed up the changelog. ] Signed-off-by: Ingo Molnar --- arch/arm/kernel/jump_label.c | 2 +- arch/arm64/kernel/jump_label.c | 2 +- arch/mips/kernel/jump_label.c | 2 +- arch/powerpc/kernel/jump_label.c | 2 +- arch/s390/kernel/jump_label.c | 2 +- arch/sparc/kernel/jump_label.c | 2 +- arch/x86/kernel/jump_label.c | 2 +- include/linux/jump_label.h | 4 ++-- kernel/jump_label.c | 18 +++++++++--------- 9 files changed, 18 insertions(+), 18 deletions(-) (limited to 'include/linux') diff --git a/arch/arm/kernel/jump_label.c b/arch/arm/kernel/jump_label.c index e39cbf488cfe..845a5dd9c42b 100644 --- a/arch/arm/kernel/jump_label.c +++ b/arch/arm/kernel/jump_label.c @@ -12,7 +12,7 @@ static void __arch_jump_label_transform(struct jump_entry *entry, void *addr = (void *)entry->code; unsigned int insn; - if (type == JUMP_LABEL_ENABLE) + if (type == JUMP_LABEL_JMP) insn = arm_gen_branch(entry->code, entry->target); else insn = arm_gen_nop(); diff --git a/arch/arm64/kernel/jump_label.c b/arch/arm64/kernel/jump_label.c index 4f1fec7a46db..c2dd1ad3e648 100644 --- a/arch/arm64/kernel/jump_label.c +++ b/arch/arm64/kernel/jump_label.c @@ -28,7 +28,7 @@ void arch_jump_label_transform(struct jump_entry *entry, void *addr = (void *)entry->code; u32 insn; - if (type == JUMP_LABEL_ENABLE) { + if (type == JUMP_LABEL_JMP) { insn = aarch64_insn_gen_branch_imm(entry->code, entry->target, AARCH64_INSN_BRANCH_NOLINK); diff --git a/arch/mips/kernel/jump_label.c b/arch/mips/kernel/jump_label.c index dda800e9e731..3e586daa3a32 100644 --- a/arch/mips/kernel/jump_label.c +++ b/arch/mips/kernel/jump_label.c @@ -51,7 +51,7 @@ void arch_jump_label_transform(struct jump_entry *e, /* Target must have the right alignment and ISA must be preserved. */ BUG_ON((e->target & J_ALIGN_MASK) != J_ISA_BIT); - if (type == JUMP_LABEL_ENABLE) { + if (type == JUMP_LABEL_JMP) { insn.j_format.opcode = J_ISA_BIT ? mm_j32_op : j_op; insn.j_format.target = e->target >> J_RANGE_SHIFT; } else { diff --git a/arch/powerpc/kernel/jump_label.c b/arch/powerpc/kernel/jump_label.c index a1ed8a8c7cb4..6472472093d0 100644 --- a/arch/powerpc/kernel/jump_label.c +++ b/arch/powerpc/kernel/jump_label.c @@ -17,7 +17,7 @@ void arch_jump_label_transform(struct jump_entry *entry, { u32 *addr = (u32 *)(unsigned long)entry->code; - if (type == JUMP_LABEL_ENABLE) + if (type == JUMP_LABEL_JMP) patch_branch(addr, entry->target, 0); else patch_instruction(addr, PPC_INST_NOP); diff --git a/arch/s390/kernel/jump_label.c b/arch/s390/kernel/jump_label.c index a90299600483..a83d2248fea9 100644 --- a/arch/s390/kernel/jump_label.c +++ b/arch/s390/kernel/jump_label.c @@ -64,7 +64,7 @@ static void __jump_label_transform(struct jump_entry *entry, { struct insn old, new; - if (type == JUMP_LABEL_ENABLE) { + if (type == JUMP_LABEL_JMP) { jump_label_make_nop(entry, &old); jump_label_make_branch(entry, &new); } else { diff --git a/arch/sparc/kernel/jump_label.c b/arch/sparc/kernel/jump_label.c index 48565c11e82a..59bbeff55024 100644 --- a/arch/sparc/kernel/jump_label.c +++ b/arch/sparc/kernel/jump_label.c @@ -16,7 +16,7 @@ void arch_jump_label_transform(struct jump_entry *entry, u32 val; u32 *insn = (u32 *) (unsigned long) entry->code; - if (type == JUMP_LABEL_ENABLE) { + if (type == JUMP_LABEL_JMP) { s32 off = (s32)entry->target - (s32)entry->code; #ifdef CONFIG_SPARC64 diff --git a/arch/x86/kernel/jump_label.c b/arch/x86/kernel/jump_label.c index 26d5a55a2736..e565e0e4d216 100644 --- a/arch/x86/kernel/jump_label.c +++ b/arch/x86/kernel/jump_label.c @@ -45,7 +45,7 @@ static void __jump_label_transform(struct jump_entry *entry, const unsigned char default_nop[] = { STATIC_KEY_INIT_NOP }; const unsigned char *ideal_nop = ideal_nops[NOP_ATOMIC5]; - if (type == JUMP_LABEL_ENABLE) { + if (type == JUMP_LABEL_JMP) { if (init) { /* * Jump label is enabled for the first time. diff --git a/include/linux/jump_label.h b/include/linux/jump_label.h index f4de473f226b..6a8b4fe10ad8 100644 --- a/include/linux/jump_label.h +++ b/include/linux/jump_label.h @@ -86,8 +86,8 @@ struct static_key { #ifndef __ASSEMBLY__ enum jump_label_type { - JUMP_LABEL_DISABLE = 0, - JUMP_LABEL_ENABLE, + JUMP_LABEL_NOP = 0, + JUMP_LABEL_JMP, }; struct module; diff --git a/kernel/jump_label.c b/kernel/jump_label.c index 52ebaca1b9fc..96d8945c8bf3 100644 --- a/kernel/jump_label.c +++ b/kernel/jump_label.c @@ -65,9 +65,9 @@ void static_key_slow_inc(struct static_key *key) jump_label_lock(); if (atomic_read(&key->enabled) == 0) { if (!jump_label_get_branch_default(key)) - jump_label_update(key, JUMP_LABEL_ENABLE); + jump_label_update(key, JUMP_LABEL_JMP); else - jump_label_update(key, JUMP_LABEL_DISABLE); + jump_label_update(key, JUMP_LABEL_NOP); } atomic_inc(&key->enabled); jump_label_unlock(); @@ -88,9 +88,9 @@ static void __static_key_slow_dec(struct static_key *key, schedule_delayed_work(work, rate_limit); } else { if (!jump_label_get_branch_default(key)) - jump_label_update(key, JUMP_LABEL_DISABLE); + jump_label_update(key, JUMP_LABEL_NOP); else - jump_label_update(key, JUMP_LABEL_ENABLE); + jump_label_update(key, JUMP_LABEL_JMP); } jump_label_unlock(); } @@ -184,9 +184,9 @@ static enum jump_label_type jump_label_type(struct static_key *key) bool state = static_key_enabled(key); if ((!true_branch && state) || (true_branch && !state)) - return JUMP_LABEL_ENABLE; + return JUMP_LABEL_JMP; - return JUMP_LABEL_DISABLE; + return JUMP_LABEL_NOP; } void __init jump_label_init(void) @@ -276,7 +276,7 @@ void jump_label_apply_nops(struct module *mod) return; for (iter = iter_start; iter < iter_stop; iter++) { - arch_jump_label_transform_static(iter, JUMP_LABEL_DISABLE); + arch_jump_label_transform_static(iter, JUMP_LABEL_NOP); } } @@ -318,8 +318,8 @@ static int jump_label_add_module(struct module *mod) jlm->next = key->next; key->next = jlm; - if (jump_label_type(key) == JUMP_LABEL_ENABLE) - __jump_label_update(key, iter, iter_stop, JUMP_LABEL_ENABLE); + if (jump_label_type(key) == JUMP_LABEL_JMP) + __jump_label_update(key, iter, iter_stop, JUMP_LABEL_JMP); } return 0; -- cgit v1.2.3-70-g09d2 From a1efb01feca597b2abbc89873b40ef8ec6690168 Mon Sep 17 00:00:00 2001 From: Peter Zijlstra Date: Fri, 24 Jul 2015 14:55:40 +0200 Subject: jump_label, locking/static_keys: Rename JUMP_LABEL_TYPE_* and related helpers to the static_key* pattern Rename the JUMP_LABEL_TYPE_* macros to be JUMP_TYPE_* and move the inline helpers into kernel/jump_label.c, since that's the only place they're ever used. Also rename the helpers where it's all about static keys. This is the second step in removing the naming confusion that has led to a stream of avoidable bugs such as: a833581e372a ("x86, perf: Fix static_key bug in load_mm_cr4()") Signed-off-by: Peter Zijlstra (Intel) Cc: Andrew Morton Cc: Linus Torvalds Cc: Paul E. McKenney Cc: Peter Zijlstra Cc: Thomas Gleixner Cc: linux-kernel@vger.kernel.org Signed-off-by: Ingo Molnar --- include/linux/jump_label.h | 25 +++++-------------------- kernel/jump_label.c | 25 ++++++++++++++++--------- 2 files changed, 21 insertions(+), 29 deletions(-) (limited to 'include/linux') diff --git a/include/linux/jump_label.h b/include/linux/jump_label.h index 6a8b4fe10ad8..0ddb208b8449 100644 --- a/include/linux/jump_label.h +++ b/include/linux/jump_label.h @@ -101,24 +101,9 @@ static inline int static_key_count(struct static_key *key) #ifdef HAVE_JUMP_LABEL -#define JUMP_LABEL_TYPE_FALSE_BRANCH 0UL -#define JUMP_LABEL_TYPE_TRUE_BRANCH 1UL -#define JUMP_LABEL_TYPE_MASK 1UL - -static -inline struct jump_entry *jump_label_get_entries(struct static_key *key) -{ - return (struct jump_entry *)((unsigned long)key->entries - & ~JUMP_LABEL_TYPE_MASK); -} - -static inline bool jump_label_get_branch_default(struct static_key *key) -{ - if (((unsigned long)key->entries & JUMP_LABEL_TYPE_MASK) == - JUMP_LABEL_TYPE_TRUE_BRANCH) - return true; - return false; -} +#define JUMP_TYPE_FALSE 0UL +#define JUMP_TYPE_TRUE 1UL +#define JUMP_TYPE_MASK 1UL static __always_inline bool static_key_false(struct static_key *key) { @@ -147,10 +132,10 @@ extern void jump_label_apply_nops(struct module *mod); #define STATIC_KEY_INIT_TRUE ((struct static_key) \ { .enabled = ATOMIC_INIT(1), \ - .entries = (void *)JUMP_LABEL_TYPE_TRUE_BRANCH }) + .entries = (void *)JUMP_TYPE_TRUE }) #define STATIC_KEY_INIT_FALSE ((struct static_key) \ { .enabled = ATOMIC_INIT(0), \ - .entries = (void *)JUMP_LABEL_TYPE_FALSE_BRANCH }) + .entries = (void *)JUMP_TYPE_FALSE }) #else /* !HAVE_JUMP_LABEL */ diff --git a/kernel/jump_label.c b/kernel/jump_label.c index 96d8945c8bf3..85a2a0086c67 100644 --- a/kernel/jump_label.c +++ b/kernel/jump_label.c @@ -56,6 +56,11 @@ jump_label_sort_entries(struct jump_entry *start, struct jump_entry *stop) static void jump_label_update(struct static_key *key, int enable); +static inline bool static_key_type(struct static_key *key) +{ + return (unsigned long)key->entries & JUMP_TYPE_MASK; +} + void static_key_slow_inc(struct static_key *key) { STATIC_KEY_CHECK_USE(); @@ -64,7 +69,7 @@ void static_key_slow_inc(struct static_key *key) jump_label_lock(); if (atomic_read(&key->enabled) == 0) { - if (!jump_label_get_branch_default(key)) + if (!static_key_type(key)) jump_label_update(key, JUMP_LABEL_JMP); else jump_label_update(key, JUMP_LABEL_NOP); @@ -87,7 +92,7 @@ static void __static_key_slow_dec(struct static_key *key, atomic_inc(&key->enabled); schedule_delayed_work(work, rate_limit); } else { - if (!jump_label_get_branch_default(key)) + if (!static_key_type(key)) jump_label_update(key, JUMP_LABEL_NOP); else jump_label_update(key, JUMP_LABEL_JMP); @@ -178,15 +183,17 @@ static void __jump_label_update(struct static_key *key, } } -static enum jump_label_type jump_label_type(struct static_key *key) +static inline struct jump_entry *static_key_entries(struct static_key *key) { - bool true_branch = jump_label_get_branch_default(key); - bool state = static_key_enabled(key); + return (struct jump_entry *)((unsigned long)key->entries & ~JUMP_TYPE_MASK); +} - if ((!true_branch && state) || (true_branch && !state)) - return JUMP_LABEL_JMP; +static enum jump_label_type jump_label_type(struct static_key *key) +{ + bool enabled = static_key_enabled(key); + bool type = static_key_type(key); - return JUMP_LABEL_NOP; + return enabled ^ type; } void __init jump_label_init(void) @@ -442,7 +449,7 @@ int jump_label_text_reserved(void *start, void *end) static void jump_label_update(struct static_key *key, int enable) { struct jump_entry *stop = __stop___jump_table; - struct jump_entry *entry = jump_label_get_entries(key); + struct jump_entry *entry = static_key_entries(key); #ifdef CONFIG_MODULES struct module *mod; -- cgit v1.2.3-70-g09d2 From e33886b38cc82a9fc3b2d655dfc7f50467594138 Mon Sep 17 00:00:00 2001 From: Peter Zijlstra Date: Fri, 24 Jul 2015 15:03:40 +0200 Subject: locking/static_keys: Add static_key_{en,dis}able() helpers Add two helpers to make it easier to treat the refcount as boolean. Suggested-by: Jason Baron Signed-off-by: Peter Zijlstra (Intel) Cc: Andrew Morton Cc: Linus Torvalds Cc: Paul E. McKenney Cc: Peter Zijlstra Cc: Thomas Gleixner Cc: linux-kernel@vger.kernel.org Signed-off-by: Ingo Molnar --- include/linux/jump_label.h | 20 ++++++++++++++++++++ kernel/sched/core.c | 6 ++---- 2 files changed, 22 insertions(+), 4 deletions(-) (limited to 'include/linux') diff --git a/include/linux/jump_label.h b/include/linux/jump_label.h index 0ddb208b8449..65f0ebac63cf 100644 --- a/include/linux/jump_label.h +++ b/include/linux/jump_label.h @@ -198,6 +198,26 @@ static inline bool static_key_enabled(struct static_key *key) return static_key_count(key) > 0; } +static inline void static_key_enable(struct static_key *key) +{ + int count = static_key_count(key); + + WARN_ON_ONCE(count < 0 || count > 1); + + if (!count) + static_key_slow_inc(key); +} + +static inline void static_key_disable(struct static_key *key) +{ + int count = static_key_count(key); + + WARN_ON_ONCE(count < 0 || count > 1); + + if (count) + static_key_slow_dec(key); +} + #endif /* _LINUX_JUMP_LABEL_H */ #endif /* __ASSEMBLY__ */ diff --git a/kernel/sched/core.c b/kernel/sched/core.c index 78b4bad10081..66ae8baf42fe 100644 --- a/kernel/sched/core.c +++ b/kernel/sched/core.c @@ -164,14 +164,12 @@ struct static_key sched_feat_keys[__SCHED_FEAT_NR] = { static void sched_feat_disable(int i) { - if (static_key_enabled(&sched_feat_keys[i])) - static_key_slow_dec(&sched_feat_keys[i]); + static_key_disable(&sched_feat_keys[i]); } static void sched_feat_enable(int i) { - if (!static_key_enabled(&sched_feat_keys[i])) - static_key_slow_inc(&sched_feat_keys[i]); + static_key_enable(&sched_feat_keys[i]); } #else static void sched_feat_disable(int i) { }; -- cgit v1.2.3-70-g09d2 From 11276d5306b8e5b438a36bbff855fe792d7eaa61 Mon Sep 17 00:00:00 2001 From: Peter Zijlstra Date: Fri, 24 Jul 2015 15:09:55 +0200 Subject: locking/static_keys: Add a new static_key interface There are various problems and short-comings with the current static_key interface: - static_key_{true,false}() read like a branch depending on the key value, instead of the actual likely/unlikely branch depending on init value. - static_key_{true,false}() are, as stated above, tied to the static_key init values STATIC_KEY_INIT_{TRUE,FALSE}. - we're limited to the 2 (out of 4) possible options that compile to a default NOP because that's what our arch_static_branch() assembly emits. So provide a new static_key interface: DEFINE_STATIC_KEY_TRUE(name); DEFINE_STATIC_KEY_FALSE(name); Which define a key of different types with an initial true/false value. Then allow: static_branch_likely() static_branch_unlikely() to take a key of either type and emit the right instruction for the case. This means adding a second arch_static_branch_jump() assembly helper which emits a JMP per default. In order to determine the right instruction for the right state, encode the branch type in the LSB of jump_entry::key. This is the final step in removing the naming confusion that has led to a stream of avoidable bugs such as: a833581e372a ("x86, perf: Fix static_key bug in load_mm_cr4()") ... but it also allows new static key combinations that will give us performance enhancements in the subsequent patches. Tested-by: Rabin Vincent # arm Signed-off-by: Peter Zijlstra (Intel) Acked-by: Michael Ellerman # ppc Acked-by: Heiko Carstens # s390 Cc: Andrew Morton Cc: Linus Torvalds Cc: Paul E. McKenney Cc: Peter Zijlstra Cc: Thomas Gleixner Cc: linux-kernel@vger.kernel.org Signed-off-by: Ingo Molnar --- arch/arm/include/asm/jump_label.h | 25 ++++-- arch/arm64/include/asm/jump_label.h | 18 +++- arch/mips/include/asm/jump_label.h | 19 ++++- arch/powerpc/include/asm/jump_label.h | 19 ++++- arch/s390/include/asm/jump_label.h | 19 ++++- arch/sparc/include/asm/jump_label.h | 35 ++++++-- arch/x86/include/asm/jump_label.h | 21 ++++- include/linux/jump_label.h | 149 +++++++++++++++++++++++++++++++--- kernel/jump_label.c | 37 +++++++-- 9 files changed, 298 insertions(+), 44 deletions(-) (limited to 'include/linux') diff --git a/arch/arm/include/asm/jump_label.h b/arch/arm/include/asm/jump_label.h index 5f337dc5c108..34f7b6980d21 100644 --- a/arch/arm/include/asm/jump_label.h +++ b/arch/arm/include/asm/jump_label.h @@ -4,23 +4,32 @@ #ifndef __ASSEMBLY__ #include +#include #define JUMP_LABEL_NOP_SIZE 4 -#ifdef CONFIG_THUMB2_KERNEL -#define JUMP_LABEL_NOP "nop.w" -#else -#define JUMP_LABEL_NOP "nop" -#endif +static __always_inline bool arch_static_branch(struct static_key *key, bool branch) +{ + asm_volatile_goto("1:\n\t" + WASM(nop) "\n\t" + ".pushsection __jump_table, \"aw\"\n\t" + ".word 1b, %l[l_yes], %c0\n\t" + ".popsection\n\t" + : : "i" (&((char *)key)[branch]) : : l_yes); + + return false; +l_yes: + return true; +} -static __always_inline bool arch_static_branch(struct static_key *key) +static __always_inline bool arch_static_branch_jump(struct static_key *key, bool branch) { asm_volatile_goto("1:\n\t" - JUMP_LABEL_NOP "\n\t" + WASM(b) " %l[l_yes]\n\t" ".pushsection __jump_table, \"aw\"\n\t" ".word 1b, %l[l_yes], %c0\n\t" ".popsection\n\t" - : : "i" (key) : : l_yes); + : : "i" (&((char *)key)[branch]) : : l_yes); return false; l_yes: diff --git a/arch/arm64/include/asm/jump_label.h b/arch/arm64/include/asm/jump_label.h index c0e5165c2f76..1b5e0e843c3a 100644 --- a/arch/arm64/include/asm/jump_label.h +++ b/arch/arm64/include/asm/jump_label.h @@ -26,14 +26,28 @@ #define JUMP_LABEL_NOP_SIZE AARCH64_INSN_SIZE -static __always_inline bool arch_static_branch(struct static_key *key) +static __always_inline bool arch_static_branch(struct static_key *key, bool branch) { asm goto("1: nop\n\t" ".pushsection __jump_table, \"aw\"\n\t" ".align 3\n\t" ".quad 1b, %l[l_yes], %c0\n\t" ".popsection\n\t" - : : "i"(key) : : l_yes); + : : "i"(&((char *)key)[branch]) : : l_yes); + + return false; +l_yes: + return true; +} + +static __always_inline bool arch_static_branch_jump(struct static_key *key, bool branch) +{ + asm goto("1: b %l[l_yes]\n\t" + ".pushsection __jump_table, \"aw\"\n\t" + ".align 3\n\t" + ".quad 1b, %l[l_yes], %c0\n\t" + ".popsection\n\t" + : : "i"(&((char *)key)[branch]) : : l_yes); return false; l_yes: diff --git a/arch/mips/include/asm/jump_label.h b/arch/mips/include/asm/jump_label.h index 608aa57799c8..e77672539e8e 100644 --- a/arch/mips/include/asm/jump_label.h +++ b/arch/mips/include/asm/jump_label.h @@ -26,14 +26,29 @@ #define NOP_INSN "nop" #endif -static __always_inline bool arch_static_branch(struct static_key *key) +static __always_inline bool arch_static_branch(struct static_key *key, bool branch) { asm_volatile_goto("1:\t" NOP_INSN "\n\t" "nop\n\t" ".pushsection __jump_table, \"aw\"\n\t" WORD_INSN " 1b, %l[l_yes], %0\n\t" ".popsection\n\t" - : : "i" (key) : : l_yes); + : : "i" (&((char *)key)[branch]) : : l_yes); + + return false; +l_yes: + return true; +} + +static __always_inline bool arch_static_branch_jump(struct static_key *key, bool branch) +{ + asm_volatile_goto("1:\tj %l[l_yes]\n\t" + "nop\n\t" + ".pushsection __jump_table, \"aw\"\n\t" + WORD_INSN " 1b, %l[l_yes], %0\n\t" + ".popsection\n\t" + : : "i" (&((char *)key)[branch]) : : l_yes); + return false; l_yes: return true; diff --git a/arch/powerpc/include/asm/jump_label.h b/arch/powerpc/include/asm/jump_label.h index efbf9a322a23..47e155f15433 100644 --- a/arch/powerpc/include/asm/jump_label.h +++ b/arch/powerpc/include/asm/jump_label.h @@ -18,14 +18,29 @@ #define JUMP_ENTRY_TYPE stringify_in_c(FTR_ENTRY_LONG) #define JUMP_LABEL_NOP_SIZE 4 -static __always_inline bool arch_static_branch(struct static_key *key) +static __always_inline bool arch_static_branch(struct static_key *key, bool branch) { asm_volatile_goto("1:\n\t" "nop\n\t" ".pushsection __jump_table, \"aw\"\n\t" JUMP_ENTRY_TYPE "1b, %l[l_yes], %c0\n\t" ".popsection \n\t" - : : "i" (key) : : l_yes); + : : "i" (&((char *)key)[branch]) : : l_yes); + + return false; +l_yes: + return true; +} + +static __always_inline bool arch_static_branch_jump(struct static_key *key, bool branch) +{ + asm_volatile_goto("1:\n\t" + "b %l[l_yes]\n\t" + ".pushsection __jump_table, \"aw\"\n\t" + JUMP_ENTRY_TYPE "1b, %l[l_yes], %c0\n\t" + ".popsection \n\t" + : : "i" (&((char *)key)[branch]) : : l_yes); + return false; l_yes: return true; diff --git a/arch/s390/include/asm/jump_label.h b/arch/s390/include/asm/jump_label.h index 69972b7957ee..7f9fd5e3f1bf 100644 --- a/arch/s390/include/asm/jump_label.h +++ b/arch/s390/include/asm/jump_label.h @@ -12,14 +12,29 @@ * We use a brcl 0,2 instruction for jump labels at compile time so it * can be easily distinguished from a hotpatch generated instruction. */ -static __always_inline bool arch_static_branch(struct static_key *key) +static __always_inline bool arch_static_branch(struct static_key *key, bool branch) { asm_volatile_goto("0: brcl 0,"__stringify(JUMP_LABEL_NOP_OFFSET)"\n" ".pushsection __jump_table, \"aw\"\n" ".balign 8\n" ".quad 0b, %l[label], %0\n" ".popsection\n" - : : "X" (key) : : label); + : : "X" (&((char *)key)[branch]) : : label); + + return false; +label: + return true; +} + +static __always_inline bool arch_static_branch_jump(struct static_key *key, bool branch) +{ + asm_volatile_goto("0: brcl 15, %l[label]\n" + ".pushsection __jump_table, \"aw\"\n" + ".balign 8\n" + ".quad 0b, %l[label], %0\n" + ".popsection\n" + : : "X" (&((char *)key)[branch]) : : label); + return false; label: return true; diff --git a/arch/sparc/include/asm/jump_label.h b/arch/sparc/include/asm/jump_label.h index cc9b04a2b11b..62d0354d1727 100644 --- a/arch/sparc/include/asm/jump_label.h +++ b/arch/sparc/include/asm/jump_label.h @@ -7,16 +7,33 @@ #define JUMP_LABEL_NOP_SIZE 4 -static __always_inline bool arch_static_branch(struct static_key *key) +static __always_inline bool arch_static_branch(struct static_key *key, bool branch) { - asm_volatile_goto("1:\n\t" - "nop\n\t" - "nop\n\t" - ".pushsection __jump_table, \"aw\"\n\t" - ".align 4\n\t" - ".word 1b, %l[l_yes], %c0\n\t" - ".popsection \n\t" - : : "i" (key) : : l_yes); + asm_volatile_goto("1:\n\t" + "nop\n\t" + "nop\n\t" + ".pushsection __jump_table, \"aw\"\n\t" + ".align 4\n\t" + ".word 1b, %l[l_yes], %c0\n\t" + ".popsection \n\t" + : : "i" (&((char *)key)[branch]) : : l_yes); + + return false; +l_yes: + return true; +} + +static __always_inline bool arch_static_branch_jump(struct static_key *key, bool branch) +{ + asm_volatile_goto("1:\n\t" + "b %l[l_yes]\n\t" + "nop\n\t" + ".pushsection __jump_table, \"aw\"\n\t" + ".align 4\n\t" + ".word 1b, %l[l_yes], %c0\n\t" + ".popsection \n\t" + : : "i" (&((char *)key)[branch]) : : l_yes); + return false; l_yes: return true; diff --git a/arch/x86/include/asm/jump_label.h b/arch/x86/include/asm/jump_label.h index a4c1cf7e93f8..28d7a857f9d1 100644 --- a/arch/x86/include/asm/jump_label.h +++ b/arch/x86/include/asm/jump_label.h @@ -16,7 +16,7 @@ # define STATIC_KEY_INIT_NOP GENERIC_NOP5_ATOMIC #endif -static __always_inline bool arch_static_branch(struct static_key *key) +static __always_inline bool arch_static_branch(struct static_key *key, bool branch) { asm_volatile_goto("1:" ".byte " __stringify(STATIC_KEY_INIT_NOP) "\n\t" @@ -24,7 +24,24 @@ static __always_inline bool arch_static_branch(struct static_key *key) _ASM_ALIGN "\n\t" _ASM_PTR "1b, %l[l_yes], %c0 \n\t" ".popsection \n\t" - : : "i" (key) : : l_yes); + : : "i" (&((char *)key)[branch]) : : l_yes); + + return false; +l_yes: + return true; +} + +static __always_inline bool arch_static_branch_jump(struct static_key *key, bool branch) +{ + asm_volatile_goto("1:" + ".byte 0xe9\n\t .long %l[l_yes] - 2f\n\t" + "2:\n\t" + ".pushsection __jump_table, \"aw\" \n\t" + _ASM_ALIGN "\n\t" + _ASM_PTR "1b, %l[l_yes], %c0 \n\t" + ".popsection \n\t" + : : "i" (&((char *)key)[branch]) : : l_yes); + return false; l_yes: return true; diff --git a/include/linux/jump_label.h b/include/linux/jump_label.h index 65f0ebac63cf..e337a1961933 100644 --- a/include/linux/jump_label.h +++ b/include/linux/jump_label.h @@ -107,12 +107,12 @@ static inline int static_key_count(struct static_key *key) static __always_inline bool static_key_false(struct static_key *key) { - return arch_static_branch(key); + return arch_static_branch(key, false); } static __always_inline bool static_key_true(struct static_key *key) { - return !static_key_false(key); + return !arch_static_branch(key, true); } extern struct jump_entry __start___jump_table[]; @@ -130,12 +130,12 @@ extern void static_key_slow_inc(struct static_key *key); extern void static_key_slow_dec(struct static_key *key); extern void jump_label_apply_nops(struct module *mod); -#define STATIC_KEY_INIT_TRUE ((struct static_key) \ +#define STATIC_KEY_INIT_TRUE \ { .enabled = ATOMIC_INIT(1), \ - .entries = (void *)JUMP_TYPE_TRUE }) -#define STATIC_KEY_INIT_FALSE ((struct static_key) \ + .entries = (void *)JUMP_TYPE_TRUE } +#define STATIC_KEY_INIT_FALSE \ { .enabled = ATOMIC_INIT(0), \ - .entries = (void *)JUMP_TYPE_FALSE }) + .entries = (void *)JUMP_TYPE_FALSE } #else /* !HAVE_JUMP_LABEL */ @@ -183,10 +183,8 @@ static inline int jump_label_apply_nops(struct module *mod) return 0; } -#define STATIC_KEY_INIT_TRUE ((struct static_key) \ - { .enabled = ATOMIC_INIT(1) }) -#define STATIC_KEY_INIT_FALSE ((struct static_key) \ - { .enabled = ATOMIC_INIT(0) }) +#define STATIC_KEY_INIT_TRUE { .enabled = ATOMIC_INIT(1) } +#define STATIC_KEY_INIT_FALSE { .enabled = ATOMIC_INIT(0) } #endif /* HAVE_JUMP_LABEL */ @@ -218,6 +216,137 @@ static inline void static_key_disable(struct static_key *key) static_key_slow_dec(key); } +/* -------------------------------------------------------------------------- */ + +/* + * Two type wrappers around static_key, such that we can use compile time + * type differentiation to emit the right code. + * + * All the below code is macros in order to play type games. + */ + +struct static_key_true { + struct static_key key; +}; + +struct static_key_false { + struct static_key key; +}; + +#define STATIC_KEY_TRUE_INIT (struct static_key_true) { .key = STATIC_KEY_INIT_TRUE, } +#define STATIC_KEY_FALSE_INIT (struct static_key_false){ .key = STATIC_KEY_INIT_FALSE, } + +#define DEFINE_STATIC_KEY_TRUE(name) \ + struct static_key_true name = STATIC_KEY_TRUE_INIT + +#define DEFINE_STATIC_KEY_FALSE(name) \ + struct static_key_false name = STATIC_KEY_FALSE_INIT + +#ifdef HAVE_JUMP_LABEL + +/* + * Combine the right initial value (type) with the right branch order + * to generate the desired result. + * + * + * type\branch| likely (1) | unlikely (0) + * -----------+-----------------------+------------------ + * | | + * true (1) | ... | ... + * | NOP | JMP L + * | | 1: ... + * | L: ... | + * | | + * | | L: + * | | jmp 1b + * | | + * -----------+-----------------------+------------------ + * | | + * false (0) | ... | ... + * | JMP L | NOP + * | | 1: ... + * | L: ... | + * | | + * | | L: + * | | jmp 1b + * | | + * -----------+-----------------------+------------------ + * + * The initial value is encoded in the LSB of static_key::entries, + * type: 0 = false, 1 = true. + * + * The branch type is encoded in the LSB of jump_entry::key, + * branch: 0 = unlikely, 1 = likely. + * + * This gives the following logic table: + * + * enabled type branch instuction + * -----------------------------+----------- + * 0 0 0 | NOP + * 0 0 1 | JMP + * 0 1 0 | NOP + * 0 1 1 | JMP + * + * 1 0 0 | JMP + * 1 0 1 | NOP + * 1 1 0 | JMP + * 1 1 1 | NOP + * + * Which gives the following functions: + * + * dynamic: instruction = enabled ^ branch + * static: instruction = type ^ branch + * + * See jump_label_type() / jump_label_init_type(). + */ + +extern bool ____wrong_branch_error(void); + +#define static_branch_likely(x) \ +({ \ + bool branch; \ + if (__builtin_types_compatible_p(typeof(*x), struct static_key_true)) \ + branch = !arch_static_branch(&(x)->key, true); \ + else if (__builtin_types_compatible_p(typeof(*x), struct static_key_false)) \ + branch = !arch_static_branch_jump(&(x)->key, true); \ + else \ + branch = ____wrong_branch_error(); \ + branch; \ +}) + +#define static_branch_unlikely(x) \ +({ \ + bool branch; \ + if (__builtin_types_compatible_p(typeof(*x), struct static_key_true)) \ + branch = arch_static_branch_jump(&(x)->key, false); \ + else if (__builtin_types_compatible_p(typeof(*x), struct static_key_false)) \ + branch = arch_static_branch(&(x)->key, false); \ + else \ + branch = ____wrong_branch_error(); \ + branch; \ +}) + +#else /* !HAVE_JUMP_LABEL */ + +#define static_branch_likely(x) likely(static_key_enabled(&(x)->key)) +#define static_branch_unlikely(x) unlikely(static_key_enabled(&(x)->key)) + +#endif /* HAVE_JUMP_LABEL */ + +/* + * Advanced usage; refcount, branch is enabled when: count != 0 + */ + +#define static_branch_inc(x) static_key_slow_inc(&(x)->key) +#define static_branch_dec(x) static_key_slow_dec(&(x)->key) + +/* + * Normal usage; boolean enable/disable. + */ + +#define static_branch_enable(x) static_key_enable(&(x)->key) +#define static_branch_disable(x) static_key_disable(&(x)->key) + #endif /* _LINUX_JUMP_LABEL_H */ #endif /* __ASSEMBLY__ */ diff --git a/kernel/jump_label.c b/kernel/jump_label.c index 2e7cc1e4b4b5..8fd00d892286 100644 --- a/kernel/jump_label.c +++ b/kernel/jump_label.c @@ -165,16 +165,22 @@ static inline bool static_key_type(struct static_key *key) static inline struct static_key *jump_entry_key(struct jump_entry *entry) { - return (struct static_key *)((unsigned long)entry->key); + return (struct static_key *)((unsigned long)entry->key & ~1UL); +} + +static bool jump_entry_branch(struct jump_entry *entry) +{ + return (unsigned long)entry->key & 1UL; } static enum jump_label_type jump_label_type(struct jump_entry *entry) { struct static_key *key = jump_entry_key(entry); bool enabled = static_key_enabled(key); - bool type = static_key_type(key); + bool branch = jump_entry_branch(entry); - return enabled ^ type; + /* See the comment in linux/jump_label.h */ + return enabled ^ branch; } static void __jump_label_update(struct static_key *key, @@ -205,7 +211,10 @@ void __init jump_label_init(void) for (iter = iter_start; iter < iter_stop; iter++) { struct static_key *iterk; - arch_jump_label_transform_static(iter, jump_label_type(iter)); + /* rewrite NOPs */ + if (jump_label_type(iter) == JUMP_LABEL_NOP) + arch_jump_label_transform_static(iter, JUMP_LABEL_NOP); + iterk = jump_entry_key(iter); if (iterk == key) continue; @@ -225,6 +234,16 @@ void __init jump_label_init(void) #ifdef CONFIG_MODULES +static enum jump_label_type jump_label_init_type(struct jump_entry *entry) +{ + struct static_key *key = jump_entry_key(entry); + bool type = static_key_type(key); + bool branch = jump_entry_branch(entry); + + /* See the comment in linux/jump_label.h */ + return type ^ branch; +} + struct static_key_mod { struct static_key_mod *next; struct jump_entry *entries; @@ -276,8 +295,11 @@ void jump_label_apply_nops(struct module *mod) if (iter_start == iter_stop) return; - for (iter = iter_start; iter < iter_stop; iter++) - arch_jump_label_transform_static(iter, JUMP_LABEL_NOP); + for (iter = iter_start; iter < iter_stop; iter++) { + /* Only write NOPs for arch_branch_static(). */ + if (jump_label_init_type(iter) == JUMP_LABEL_NOP) + arch_jump_label_transform_static(iter, JUMP_LABEL_NOP); + } } static int jump_label_add_module(struct module *mod) @@ -318,7 +340,8 @@ static int jump_label_add_module(struct module *mod) jlm->next = key->next; key->next = jlm; - if (jump_label_type(iter) == JUMP_LABEL_JMP) + /* Only update if we've changed from our initial state */ + if (jump_label_type(iter) != jump_label_init_type(iter)) __jump_label_update(key, iter, iter_stop); } -- cgit v1.2.3-70-g09d2 From 412758cb26704e5087ca2976ec3b28fb2bdbfad4 Mon Sep 17 00:00:00 2001 From: Jason Baron Date: Thu, 30 Jul 2015 03:59:48 +0000 Subject: jump label, locking/static_keys: Update docs Signed-off-by: Jason Baron Signed-off-by: Peter Zijlstra (Intel) Cc: Andrew Morton Cc: Linus Torvalds Cc: Paul E. McKenney Cc: Peter Zijlstra Cc: Thomas Gleixner Cc: benh@kernel.crashing.org Cc: bp@alien8.de Cc: davem@davemloft.net Cc: ddaney@caviumnetworks.com Cc: heiko.carstens@de.ibm.com Cc: linux-kernel@vger.kernel.org Cc: liuj97@gmail.com Cc: luto@amacapital.net Cc: michael@ellerman.id.au Cc: rabin@rab.in Cc: ralf@linux-mips.org Cc: rostedt@goodmis.org Cc: vbabka@suse.cz Cc: will.deacon@arm.com Link: http://lkml.kernel.org/r/6b50f2f6423a2244f37f4b1d2d6c211b9dcdf4f8.1438227999.git.jbaron@akamai.com Signed-off-by: Ingo Molnar --- Documentation/static-keys.txt | 99 +++++++++++++++++++++++-------------------- include/linux/jump_label.h | 67 ++++++++++++++++++++--------- 2 files changed, 98 insertions(+), 68 deletions(-) (limited to 'include/linux') diff --git a/Documentation/static-keys.txt b/Documentation/static-keys.txt index c4407a41b0fc..f4cb0b2d5cd7 100644 --- a/Documentation/static-keys.txt +++ b/Documentation/static-keys.txt @@ -1,7 +1,22 @@ Static Keys ----------- -By: Jason Baron +DEPRECATED API: + +The use of 'struct static_key' directly, is now DEPRECATED. In addition +static_key_{true,false}() is also DEPRECATED. IE DO NOT use the following: + +struct static_key false = STATIC_KEY_INIT_FALSE; +struct static_key true = STATIC_KEY_INIT_TRUE; +static_key_true() +static_key_false() + +The updated API replacements are: + +DEFINE_STATIC_KEY_TRUE(key); +DEFINE_STATIC_KEY_FALSE(key); +static_key_likely() +statick_key_unlikely() 0) Abstract @@ -9,22 +24,22 @@ Static keys allows the inclusion of seldom used features in performance-sensitive fast-path kernel code, via a GCC feature and a code patching technique. A quick example: - struct static_key key = STATIC_KEY_INIT_FALSE; + DEFINE_STATIC_KEY_FALSE(key); ... - if (static_key_false(&key)) + if (static_branch_unlikely(&key)) do unlikely code else do likely code ... - static_key_slow_inc(); + static_branch_enable(&key); ... - static_key_slow_inc(); + static_branch_disable(&key); ... -The static_key_false() branch will be generated into the code with as little +The static_branch_unlikely() branch will be generated into the code with as little impact to the likely code path as possible. @@ -56,7 +71,7 @@ the branch site to change the branch direction. For example, if we have a simple branch that is disabled by default: - if (static_key_false(&key)) + if (static_branch_unlikely(&key)) printk("I am the true branch\n"); Thus, by default the 'printk' will not be emitted. And the code generated will @@ -75,68 +90,55 @@ the basis for the static keys facility. In order to make use of this optimization you must first define a key: - struct static_key key; - -Which is initialized as: - - struct static_key key = STATIC_KEY_INIT_TRUE; + DEFINE_STATIC_KEY_TRUE(key); or: - struct static_key key = STATIC_KEY_INIT_FALSE; + DEFINE_STATIC_KEY_FALSE(key); + -If the key is not initialized, it is default false. The 'struct static_key', -must be a 'global'. That is, it can't be allocated on the stack or dynamically +The key must be global, that is, it can't be allocated on the stack or dynamically allocated at run-time. The key is then used in code as: - if (static_key_false(&key)) + if (static_branch_unlikely(&key)) do unlikely code else do likely code Or: - if (static_key_true(&key)) + if (static_branch_likely(&key)) do likely code else do unlikely code -A key that is initialized via 'STATIC_KEY_INIT_FALSE', must be used in a -'static_key_false()' construct. Likewise, a key initialized via -'STATIC_KEY_INIT_TRUE' must be used in a 'static_key_true()' construct. A -single key can be used in many branches, but all the branches must match the -way that the key has been initialized. +Keys defined via DEFINE_STATIC_KEY_TRUE(), or DEFINE_STATIC_KEY_FALSE, may +be used in either static_branch_likely() or static_branch_unlikely() +statemnts. -The branch(es) can then be switched via: +Branch(es) can be set true via: - static_key_slow_inc(&key); - ... - static_key_slow_dec(&key); +static_branch_enable(&key); -Thus, 'static_key_slow_inc()' means 'make the branch true', and -'static_key_slow_dec()' means 'make the branch false' with appropriate -reference counting. For example, if the key is initialized true, a -static_key_slow_dec(), will switch the branch to false. And a subsequent -static_key_slow_inc(), will change the branch back to true. Likewise, if the -key is initialized false, a 'static_key_slow_inc()', will change the branch to -true. And then a 'static_key_slow_dec()', will again make the branch false. +or false via: + +static_branch_disable(&key); -An example usage in the kernel is the implementation of tracepoints: +The branch(es) can then be switched via reference counts: - static inline void trace_##name(proto) \ - { \ - if (static_key_false(&__tracepoint_##name.key)) \ - __DO_TRACE(&__tracepoint_##name, \ - TP_PROTO(data_proto), \ - TP_ARGS(data_args), \ - TP_CONDITION(cond)); \ - } + static_branch_inc(&key); + ... + static_branch_dec(&key); -Tracepoints are disabled by default, and can be placed in performance critical -pieces of the kernel. Thus, by using a static key, the tracepoints can have -absolutely minimal impact when not in use. +Thus, 'static_branch_inc()' means 'make the branch true', and +'static_branch_dec()' means 'make the branch false' with appropriate +reference counting. For example, if the key is initialized true, a +static_branch_dec(), will switch the branch to false. And a subsequent +static_branch_inc(), will change the branch back to true. Likewise, if the +key is initialized false, a 'static_branch_inc()', will change the branch to +true. And then a 'static_branch_dec()', will again make the branch false. 4) Architecture level code patching interface, 'jump labels' @@ -150,9 +152,12 @@ simply fall back to a traditional, load, test, and jump sequence. * #define JUMP_LABEL_NOP_SIZE, see: arch/x86/include/asm/jump_label.h -* __always_inline bool arch_static_branch(struct static_key *key), see: +* __always_inline bool arch_static_branch(struct static_key *key, bool branch), see: arch/x86/include/asm/jump_label.h +* __always_inline bool arch_static_branch_jump(struct static_key *key, bool branch), + see: arch/x86/include/asm/jump_label.h + * void arch_jump_label_transform(struct jump_entry *entry, enum jump_label_type type), see: arch/x86/kernel/jump_label.c @@ -173,7 +178,7 @@ SYSCALL_DEFINE0(getppid) { int pid; -+ if (static_key_false(&key)) ++ if (static_branch_unlikely(&key)) + printk("I am the true branch\n"); rcu_read_lock(); diff --git a/include/linux/jump_label.h b/include/linux/jump_label.h index e337a1961933..7f653e8f6690 100644 --- a/include/linux/jump_label.h +++ b/include/linux/jump_label.h @@ -7,17 +7,52 @@ * Copyright (C) 2009-2012 Jason Baron * Copyright (C) 2011-2012 Peter Zijlstra * + * DEPRECATED API: + * + * The use of 'struct static_key' directly, is now DEPRECATED. In addition + * static_key_{true,false}() is also DEPRECATED. IE DO NOT use the following: + * + * struct static_key false = STATIC_KEY_INIT_FALSE; + * struct static_key true = STATIC_KEY_INIT_TRUE; + * static_key_true() + * static_key_false() + * + * The updated API replacements are: + * + * DEFINE_STATIC_KEY_TRUE(key); + * DEFINE_STATIC_KEY_FALSE(key); + * static_key_likely() + * statick_key_unlikely() + * * Jump labels provide an interface to generate dynamic branches using - * self-modifying code. Assuming toolchain and architecture support, the result - * of a "if (static_key_false(&key))" statement is an unconditional branch (which - * defaults to false - and the true block is placed out of line). + * self-modifying code. Assuming toolchain and architecture support, if we + * define a "key" that is initially false via "DEFINE_STATIC_KEY_FALSE(key)", + * an "if (static_branch_unlikely(&key))" statement is an unconditional branch + * (which defaults to false - and the true block is placed out of line). + * Similarly, we can define an initially true key via + * "DEFINE_STATIC_KEY_TRUE(key)", and use it in the same + * "if (static_branch_unlikely(&key))", in which case we will generate an + * unconditional branch to the out-of-line true branch. Keys that are + * initially true or false can be using in both static_branch_unlikely() + * and static_branch_likely() statements. * - * However at runtime we can change the branch target using - * static_key_slow_{inc,dec}(). These function as a 'reference' count on the key - * object, and for as long as there are references all branches referring to - * that particular key will point to the (out of line) true block. + * At runtime we can change the branch target by setting the key + * to true via a call to static_branch_enable(), or false using + * static_branch_disable(). If the direction of the branch is switched by + * these calls then we run-time modify the branch target via a + * no-op -> jump or jump -> no-op conversion. For example, for an + * initially false key that is used in an "if (static_branch_unlikely(&key))" + * statement, setting the key to true requires us to patch in a jump + * to the out-of-line of true branch. * - * Since this relies on modifying code, the static_key_slow_{inc,dec}() functions + * In addtion to static_branch_{enable,disable}, we can also reference count + * the key or branch direction via static_branch_{inc,dec}. Thus, + * static_branch_inc() can be thought of as a 'make more true' and + * static_branch_dec() as a 'make more false'. The inc()/dec() + * interface is meant to be used exclusively from the inc()/dec() for a given + * key. + * + * Since this relies on modifying code, the branch modifying functions * must be considered absolute slow paths (machine wide synchronization etc.). * OTOH, since the affected branches are unconditional, their runtime overhead * will be absolutely minimal, esp. in the default (off) case where the total @@ -29,20 +64,10 @@ * cause significant performance degradation. Struct static_key_deferred and * static_key_slow_dec_deferred() provide for this. * - * Lacking toolchain and or architecture support, jump labels fall back to a simple - * conditional branch. - * - * struct static_key my_key = STATIC_KEY_INIT_TRUE; - * - * if (static_key_true(&my_key)) { - * } - * - * will result in the true case being in-line and starts the key with a single - * reference. Mixing static_key_true() and static_key_false() on the same key is not - * allowed. + * Lacking toolchain and or architecture support, static keys fall back to a + * simple conditional branch. * - * Not initializing the key (static data is initialized to 0s anyway) is the - * same as using STATIC_KEY_INIT_FALSE. + * Additional babbling in: Documentation/static-keys.txt */ #if defined(CC_HAVE_ASM_GOTO) && defined(CONFIG_JUMP_LABEL) -- cgit v1.2.3-70-g09d2 From 9d7fb04276481c59610983362d8e023d262b58ca Mon Sep 17 00:00:00 2001 From: Peter Zijlstra Date: Tue, 30 Jun 2015 11:30:54 +0200 Subject: sched/cputime: Guarantee stime + utime == rtime While the current code guarantees monotonicity for stime and utime independently of one another, it does not guarantee that the sum of both is equal to the total time we started out with. This confuses things (and peoples) who look at this sum, like top, and will report >100% usage followed by a matching period of 0%. Rework the code to provide both individual monotonicity and a coherent sum. Suggested-by: Fredrik Markstrom Reported-by: Fredrik Markstrom Tested-by: Fredrik Markstrom Signed-off-by: Peter Zijlstra (Intel) Cc: Frederic Weisbecker Cc: Linus Torvalds Cc: Mike Galbraith Cc: Peter Zijlstra Cc: Rik van Riel Cc: Stanislaw Gruszka Cc: Thomas Gleixner Cc: jason.low2@hp.com Cc: linux-kernel@vger.kernel.org Signed-off-by: Ingo Molnar --- include/linux/init_task.h | 10 +++++ include/linux/sched.h | 40 ++++++++++-------- kernel/fork.c | 7 ++-- kernel/sched/cputime.c | 101 +++++++++++++++++++++++++++------------------- 4 files changed, 97 insertions(+), 61 deletions(-) (limited to 'include/linux') diff --git a/include/linux/init_task.h b/include/linux/init_task.h index e8493fee8160..d0b380ee7d67 100644 --- a/include/linux/init_task.h +++ b/include/linux/init_task.h @@ -32,6 +32,14 @@ extern struct fs_struct init_fs; #define INIT_CPUSET_SEQ(tsk) #endif +#ifndef CONFIG_VIRT_CPU_ACCOUNTING_NATIVE +#define INIT_PREV_CPUTIME(x) .prev_cputime = { \ + .lock = __RAW_SPIN_LOCK_UNLOCKED(x.prev_cputime.lock), \ +}, +#else +#define INIT_PREV_CPUTIME(x) +#endif + #define INIT_SIGNALS(sig) { \ .nr_threads = 1, \ .thread_head = LIST_HEAD_INIT(init_task.thread_node), \ @@ -46,6 +54,7 @@ extern struct fs_struct init_fs; .cputime_atomic = INIT_CPUTIME_ATOMIC, \ .running = 0, \ }, \ + INIT_PREV_CPUTIME(sig) \ .cred_guard_mutex = \ __MUTEX_INITIALIZER(sig.cred_guard_mutex), \ } @@ -246,6 +255,7 @@ extern struct task_group root_task_group; INIT_TASK_RCU_TASKS(tsk) \ INIT_CPUSET_SEQ(tsk) \ INIT_RT_MUTEXES(tsk) \ + INIT_PREV_CPUTIME(tsk) \ INIT_VTIME(tsk) \ INIT_NUMA_BALANCING(tsk) \ INIT_KASAN(tsk) \ diff --git a/include/linux/sched.h b/include/linux/sched.h index ae21f1591615..7412070a25cc 100644 --- a/include/linux/sched.h +++ b/include/linux/sched.h @@ -530,39 +530,49 @@ struct cpu_itimer { }; /** - * struct cputime - snaphsot of system and user cputime + * struct prev_cputime - snaphsot of system and user cputime * @utime: time spent in user mode * @stime: time spent in system mode + * @lock: protects the above two fields * - * Gathers a generic snapshot of user and system time. + * Stores previous user/system time values such that we can guarantee + * monotonicity. */ -struct cputime { +struct prev_cputime { +#ifndef CONFIG_VIRT_CPU_ACCOUNTING_NATIVE cputime_t utime; cputime_t stime; + raw_spinlock_t lock; +#endif }; +static inline void prev_cputime_init(struct prev_cputime *prev) +{ +#ifndef CONFIG_VIRT_CPU_ACCOUNTING_NATIVE + prev->utime = prev->stime = 0; + raw_spin_lock_init(&prev->lock); +#endif +} + /** * struct task_cputime - collected CPU time counts * @utime: time spent in user mode, in &cputime_t units * @stime: time spent in kernel mode, in &cputime_t units * @sum_exec_runtime: total time spent on the CPU, in nanoseconds * - * This is an extension of struct cputime that includes the total runtime - * spent by the task from the scheduler point of view. - * - * As a result, this structure groups together three kinds of CPU time - * that are tracked for threads and thread groups. Most things considering - * CPU time want to group these counts together and treat all three - * of them in parallel. + * This structure groups together three kinds of CPU time that are tracked for + * threads and thread groups. Most things considering CPU time want to group + * these counts together and treat all three of them in parallel. */ struct task_cputime { cputime_t utime; cputime_t stime; unsigned long long sum_exec_runtime; }; + /* Alternate field names when used to cache expirations. */ -#define prof_exp stime #define virt_exp utime +#define prof_exp stime #define sched_exp sum_exec_runtime #define INIT_CPUTIME \ @@ -715,9 +725,7 @@ struct signal_struct { cputime_t utime, stime, cutime, cstime; cputime_t gtime; cputime_t cgtime; -#ifndef CONFIG_VIRT_CPU_ACCOUNTING_NATIVE - struct cputime prev_cputime; -#endif + struct prev_cputime prev_cputime; unsigned long nvcsw, nivcsw, cnvcsw, cnivcsw; unsigned long min_flt, maj_flt, cmin_flt, cmaj_flt; unsigned long inblock, oublock, cinblock, coublock; @@ -1481,9 +1489,7 @@ struct task_struct { cputime_t utime, stime, utimescaled, stimescaled; cputime_t gtime; -#ifndef CONFIG_VIRT_CPU_ACCOUNTING_NATIVE - struct cputime prev_cputime; -#endif + struct prev_cputime prev_cputime; #ifdef CONFIG_VIRT_CPU_ACCOUNTING_GEN seqlock_t vtime_seqlock; unsigned long long vtime_snap; diff --git a/kernel/fork.c b/kernel/fork.c index 1bfefc6f96a4..6e8f807c5716 100644 --- a/kernel/fork.c +++ b/kernel/fork.c @@ -1067,6 +1067,7 @@ static int copy_sighand(unsigned long clone_flags, struct task_struct *tsk) rcu_assign_pointer(tsk->sighand, sig); if (!sig) return -ENOMEM; + atomic_set(&sig->count, 1); memcpy(sig->action, current->sighand->action, sizeof(sig->action)); return 0; @@ -1128,6 +1129,7 @@ static int copy_signal(unsigned long clone_flags, struct task_struct *tsk) init_sigpending(&sig->shared_pending); INIT_LIST_HEAD(&sig->posix_timers); seqlock_init(&sig->stats_lock); + prev_cputime_init(&sig->prev_cputime); hrtimer_init(&sig->real_timer, CLOCK_MONOTONIC, HRTIMER_MODE_REL); sig->real_timer.function = it_real_fn; @@ -1335,9 +1337,8 @@ static struct task_struct *copy_process(unsigned long clone_flags, p->utime = p->stime = p->gtime = 0; p->utimescaled = p->stimescaled = 0; -#ifndef CONFIG_VIRT_CPU_ACCOUNTING_NATIVE - p->prev_cputime.utime = p->prev_cputime.stime = 0; -#endif + prev_cputime_init(&p->prev_cputime); + #ifdef CONFIG_VIRT_CPU_ACCOUNTING_GEN seqlock_init(&p->vtime_seqlock); p->vtime_snap = 0; diff --git a/kernel/sched/cputime.c b/kernel/sched/cputime.c index f5a64ffad176..8cbc3db671df 100644 --- a/kernel/sched/cputime.c +++ b/kernel/sched/cputime.c @@ -555,48 +555,43 @@ drop_precision: } /* - * Atomically advance counter to the new value. Interrupts, vcpu - * scheduling, and scaling inaccuracies can cause cputime_advance - * to be occasionally called with a new value smaller than counter. - * Let's enforce atomicity. + * Adjust tick based cputime random precision against scheduler runtime + * accounting. * - * Normally a caller will only go through this loop once, or not - * at all in case a previous caller updated counter the same jiffy. - */ -static void cputime_advance(cputime_t *counter, cputime_t new) -{ - cputime_t old; - - while (new > (old = READ_ONCE(*counter))) - cmpxchg_cputime(counter, old, new); -} - -/* - * Adjust tick based cputime random precision against scheduler - * runtime accounting. + * Tick based cputime accounting depend on random scheduling timeslices of a + * task to be interrupted or not by the timer. Depending on these + * circumstances, the number of these interrupts may be over or + * under-optimistic, matching the real user and system cputime with a variable + * precision. + * + * Fix this by scaling these tick based values against the total runtime + * accounted by the CFS scheduler. + * + * This code provides the following guarantees: + * + * stime + utime == rtime + * stime_i+1 >= stime_i, utime_i+1 >= utime_i + * + * Assuming that rtime_i+1 >= rtime_i. */ static void cputime_adjust(struct task_cputime *curr, - struct cputime *prev, + struct prev_cputime *prev, cputime_t *ut, cputime_t *st) { cputime_t rtime, stime, utime; + unsigned long flags; - /* - * Tick based cputime accounting depend on random scheduling - * timeslices of a task to be interrupted or not by the timer. - * Depending on these circumstances, the number of these interrupts - * may be over or under-optimistic, matching the real user and system - * cputime with a variable precision. - * - * Fix this by scaling these tick based values against the total - * runtime accounted by the CFS scheduler. - */ + /* Serialize concurrent callers such that we can honour our guarantees */ + raw_spin_lock_irqsave(&prev->lock, flags); rtime = nsecs_to_cputime(curr->sum_exec_runtime); /* - * Update userspace visible utime/stime values only if actual execution - * time is bigger than already exported. Note that can happen, that we - * provided bigger values due to scaling inaccuracy on big numbers. + * This is possible under two circumstances: + * - rtime isn't monotonic after all (a bug); + * - we got reordered by the lock. + * + * In both cases this acts as a filter such that the rest of the code + * can assume it is monotonic regardless of anything else. */ if (prev->stime + prev->utime >= rtime) goto out; @@ -606,22 +601,46 @@ static void cputime_adjust(struct task_cputime *curr, if (utime == 0) { stime = rtime; - } else if (stime == 0) { - utime = rtime; - } else { - cputime_t total = stime + utime; + goto update; + } - stime = scale_stime((__force u64)stime, - (__force u64)rtime, (__force u64)total); - utime = rtime - stime; + if (stime == 0) { + utime = rtime; + goto update; } - cputime_advance(&prev->stime, stime); - cputime_advance(&prev->utime, utime); + stime = scale_stime((__force u64)stime, (__force u64)rtime, + (__force u64)(stime + utime)); + + /* + * Make sure stime doesn't go backwards; this preserves monotonicity + * for utime because rtime is monotonic. + * + * utime_i+1 = rtime_i+1 - stime_i + * = rtime_i+1 - (rtime_i - utime_i) + * = (rtime_i+1 - rtime_i) + utime_i + * >= utime_i + */ + if (stime < prev->stime) + stime = prev->stime; + utime = rtime - stime; + + /* + * Make sure utime doesn't go backwards; this still preserves + * monotonicity for stime, analogous argument to above. + */ + if (utime < prev->utime) { + utime = prev->utime; + stime = rtime - utime; + } +update: + prev->stime = stime; + prev->utime = utime; out: *ut = prev->utime; *st = prev->stime; + raw_spin_unlock_irqrestore(&prev->lock, flags); } void task_cputime_adjusted(struct task_struct *p, cputime_t *ut, cputime_t *st) -- cgit v1.2.3-70-g09d2 From 63b0e9edceec10fa41ec33393a1515a5ff444277 Mon Sep 17 00:00:00 2001 From: Mike Galbraith Date: Tue, 14 Jul 2015 17:39:50 +0200 Subject: sched/fair: Beef up wake_wide() Josef Bacik reported that Facebook sees better performance with their 1:N load (1 dispatch/node, N workers/node) when carrying an old patch to try very hard to wake to an idle CPU. While looking at wake_wide(), I noticed that it doesn't pay attention to the wakeup of a many partner waker, returning 1 only when waking one of its many partners. Correct that, letting explicit domain flags override the heuristic. While at it, adjust task_struct bits, we don't need a 64-bit counter. Tested-by: Josef Bacik Signed-off-by: Mike Galbraith [ Tidy things up. ] Signed-off-by: Peter Zijlstra (Intel) Cc: Linus Torvalds Cc: Mike Galbraith Cc: Peter Zijlstra Cc: Thomas Gleixner Cc: kernel-team Cc: morten.rasmussen@arm.com Cc: riel@redhat.com Link: http://lkml.kernel.org/r/1436888390.7983.49.camel@gmail.com Signed-off-by: Ingo Molnar --- include/linux/sched.h | 4 +-- kernel/sched/fair.c | 67 +++++++++++++++++++++++++-------------------------- 2 files changed, 35 insertions(+), 36 deletions(-) (limited to 'include/linux') diff --git a/include/linux/sched.h b/include/linux/sched.h index 7412070a25cc..65a8a8651596 100644 --- a/include/linux/sched.h +++ b/include/linux/sched.h @@ -1359,9 +1359,9 @@ struct task_struct { #ifdef CONFIG_SMP struct llist_node wake_entry; int on_cpu; - struct task_struct *last_wakee; - unsigned long wakee_flips; + unsigned int wakee_flips; unsigned long wakee_flip_decay_ts; + struct task_struct *last_wakee; int wake_cpu; #endif diff --git a/kernel/sched/fair.c b/kernel/sched/fair.c index 8b384b8d2f1d..ea23f9f1b51b 100644 --- a/kernel/sched/fair.c +++ b/kernel/sched/fair.c @@ -4726,26 +4726,29 @@ static long effective_load(struct task_group *tg, int cpu, long wl, long wg) #endif +/* + * Detect M:N waker/wakee relationships via a switching-frequency heuristic. + * A waker of many should wake a different task than the one last awakened + * at a frequency roughly N times higher than one of its wakees. In order + * to determine whether we should let the load spread vs consolodating to + * shared cache, we look for a minimum 'flip' frequency of llc_size in one + * partner, and a factor of lls_size higher frequency in the other. With + * both conditions met, we can be relatively sure that the relationship is + * non-monogamous, with partner count exceeding socket size. Waker/wakee + * being client/server, worker/dispatcher, interrupt source or whatever is + * irrelevant, spread criteria is apparent partner count exceeds socket size. + */ static int wake_wide(struct task_struct *p) { + unsigned int master = current->wakee_flips; + unsigned int slave = p->wakee_flips; int factor = this_cpu_read(sd_llc_size); - /* - * Yeah, it's the switching-frequency, could means many wakee or - * rapidly switch, use factor here will just help to automatically - * adjust the loose-degree, so bigger node will lead to more pull. - */ - if (p->wakee_flips > factor) { - /* - * wakee is somewhat hot, it needs certain amount of cpu - * resource, so if waker is far more hot, prefer to leave - * it alone. - */ - if (current->wakee_flips > (factor * p->wakee_flips)) - return 1; - } - - return 0; + if (master < slave) + swap(master, slave); + if (slave < factor || master < slave * factor) + return 0; + return 1; } static int wake_affine(struct sched_domain *sd, struct task_struct *p, int sync) @@ -4757,13 +4760,6 @@ static int wake_affine(struct sched_domain *sd, struct task_struct *p, int sync) unsigned long weight; int balanced; - /* - * If we wake multiple tasks be careful to not bounce - * ourselves around too much. - */ - if (wake_wide(p)) - return 0; - idx = sd->wake_idx; this_cpu = smp_processor_id(); prev_cpu = task_cpu(p); @@ -5017,17 +5013,17 @@ select_task_rq_fair(struct task_struct *p, int prev_cpu, int sd_flag, int wake_f { struct sched_domain *tmp, *affine_sd = NULL, *sd = NULL; int cpu = smp_processor_id(); - int new_cpu = cpu; + int new_cpu = prev_cpu; int want_affine = 0; int sync = wake_flags & WF_SYNC; if (sd_flag & SD_BALANCE_WAKE) - want_affine = cpumask_test_cpu(cpu, tsk_cpus_allowed(p)); + want_affine = !wake_wide(p) && cpumask_test_cpu(cpu, tsk_cpus_allowed(p)); rcu_read_lock(); for_each_domain(cpu, tmp) { if (!(tmp->flags & SD_LOAD_BALANCE)) - continue; + break; /* * If both cpu and prev_cpu are part of this domain, @@ -5041,17 +5037,21 @@ select_task_rq_fair(struct task_struct *p, int prev_cpu, int sd_flag, int wake_f if (tmp->flags & sd_flag) sd = tmp; + else if (!want_affine) + break; } - if (affine_sd && cpu != prev_cpu && wake_affine(affine_sd, p, sync)) - prev_cpu = cpu; - - if (sd_flag & SD_BALANCE_WAKE) { - new_cpu = select_idle_sibling(p, prev_cpu); - goto unlock; + if (affine_sd) { + sd = NULL; /* Prefer wake_affine over balance flags */ + if (cpu != prev_cpu && wake_affine(affine_sd, p, sync)) + new_cpu = cpu; } - while (sd) { + if (!sd) { + if (sd_flag & SD_BALANCE_WAKE) /* XXX always ? */ + new_cpu = select_idle_sibling(p, new_cpu); + + } else while (sd) { struct sched_group *group; int weight; @@ -5085,7 +5085,6 @@ select_task_rq_fair(struct task_struct *p, int prev_cpu, int sd_flag, int wake_f } /* while loop will break here if sd == NULL */ } -unlock: rcu_read_unlock(); return new_cpu; -- cgit v1.2.3-70-g09d2 From fe32d3cd5e8eb0f82e459763374aa80797023403 Mon Sep 17 00:00:00 2001 From: Konstantin Khlebnikov Date: Wed, 15 Jul 2015 12:52:04 +0300 Subject: sched/preempt: Fix cond_resched_lock() and cond_resched_softirq() These functions check should_resched() before unlocking spinlock/bh-enable: preempt_count always non-zero => should_resched() always returns false. cond_resched_lock() worked iff spin_needbreak is set. This patch adds argument "preempt_offset" to should_resched(). preempt_count offset constants for that: PREEMPT_DISABLE_OFFSET - offset after preempt_disable() PREEMPT_LOCK_OFFSET - offset after spin_lock() SOFTIRQ_DISABLE_OFFSET - offset after local_bh_distable() SOFTIRQ_LOCK_OFFSET - offset after spin_lock_bh() Signed-off-by: Konstantin Khlebnikov Signed-off-by: Peter Zijlstra (Intel) Cc: Alexander Graf Cc: Boris Ostrovsky Cc: David Vrabel Cc: Linus Torvalds Cc: Mike Galbraith Cc: Paul Mackerras Cc: Peter Zijlstra Cc: Thomas Gleixner Fixes: bdb438065890 ("sched: Extract the basic add/sub preempt_count modifiers") Link: http://lkml.kernel.org/r/20150715095204.12246.98268.stgit@buzz Signed-off-by: Ingo Molnar --- arch/x86/include/asm/preempt.h | 4 ++-- include/asm-generic/preempt.h | 5 +++-- include/linux/preempt.h | 19 ++++++++++++++----- include/linux/sched.h | 6 ------ kernel/sched/core.c | 6 +++--- 5 files changed, 22 insertions(+), 18 deletions(-) (limited to 'include/linux') diff --git a/arch/x86/include/asm/preempt.h b/arch/x86/include/asm/preempt.h index dca71714f860..b12f81022a6b 100644 --- a/arch/x86/include/asm/preempt.h +++ b/arch/x86/include/asm/preempt.h @@ -90,9 +90,9 @@ static __always_inline bool __preempt_count_dec_and_test(void) /* * Returns true when we need to resched and can (barring IRQ state). */ -static __always_inline bool should_resched(void) +static __always_inline bool should_resched(int preempt_offset) { - return unlikely(!raw_cpu_read_4(__preempt_count)); + return unlikely(raw_cpu_read_4(__preempt_count) == preempt_offset); } #ifdef CONFIG_PREEMPT diff --git a/include/asm-generic/preempt.h b/include/asm-generic/preempt.h index d0a7a4753db2..0bec580a4885 100644 --- a/include/asm-generic/preempt.h +++ b/include/asm-generic/preempt.h @@ -71,9 +71,10 @@ static __always_inline bool __preempt_count_dec_and_test(void) /* * Returns true when we need to resched and can (barring IRQ state). */ -static __always_inline bool should_resched(void) +static __always_inline bool should_resched(int preempt_offset) { - return unlikely(!preempt_count() && tif_need_resched()); + return unlikely(preempt_count() == preempt_offset && + tif_need_resched()); } #ifdef CONFIG_PREEMPT diff --git a/include/linux/preempt.h b/include/linux/preempt.h index 84991f185173..bea8dd8ff5e0 100644 --- a/include/linux/preempt.h +++ b/include/linux/preempt.h @@ -84,12 +84,20 @@ */ #define in_nmi() (preempt_count() & NMI_MASK) +/* + * The preempt_count offset after preempt_disable(); + */ #if defined(CONFIG_PREEMPT_COUNT) -# define PREEMPT_DISABLE_OFFSET 1 +# define PREEMPT_DISABLE_OFFSET PREEMPT_OFFSET #else -# define PREEMPT_DISABLE_OFFSET 0 +# define PREEMPT_DISABLE_OFFSET 0 #endif +/* + * The preempt_count offset after spin_lock() + */ +#define PREEMPT_LOCK_OFFSET PREEMPT_DISABLE_OFFSET + /* * The preempt_count offset needed for things like: * @@ -103,7 +111,7 @@ * * Work as expected. */ -#define SOFTIRQ_LOCK_OFFSET (SOFTIRQ_DISABLE_OFFSET + PREEMPT_DISABLE_OFFSET) +#define SOFTIRQ_LOCK_OFFSET (SOFTIRQ_DISABLE_OFFSET + PREEMPT_LOCK_OFFSET) /* * Are we running in atomic context? WARNING: this macro cannot @@ -124,7 +132,8 @@ #if defined(CONFIG_DEBUG_PREEMPT) || defined(CONFIG_PREEMPT_TRACER) extern void preempt_count_add(int val); extern void preempt_count_sub(int val); -#define preempt_count_dec_and_test() ({ preempt_count_sub(1); should_resched(); }) +#define preempt_count_dec_and_test() \ + ({ preempt_count_sub(1); should_resched(0); }) #else #define preempt_count_add(val) __preempt_count_add(val) #define preempt_count_sub(val) __preempt_count_sub(val) @@ -184,7 +193,7 @@ do { \ #define preempt_check_resched() \ do { \ - if (should_resched()) \ + if (should_resched(0)) \ __preempt_schedule(); \ } while (0) diff --git a/include/linux/sched.h b/include/linux/sched.h index 65a8a8651596..9c144657aace 100644 --- a/include/linux/sched.h +++ b/include/linux/sched.h @@ -2891,12 +2891,6 @@ extern int _cond_resched(void); extern int __cond_resched_lock(spinlock_t *lock); -#ifdef CONFIG_PREEMPT_COUNT -#define PREEMPT_LOCK_OFFSET PREEMPT_OFFSET -#else -#define PREEMPT_LOCK_OFFSET 0 -#endif - #define cond_resched_lock(lock) ({ \ ___might_sleep(__FILE__, __LINE__, PREEMPT_LOCK_OFFSET);\ __cond_resched_lock(lock); \ diff --git a/kernel/sched/core.c b/kernel/sched/core.c index fa5826cc612f..f5fad2b12baf 100644 --- a/kernel/sched/core.c +++ b/kernel/sched/core.c @@ -4496,7 +4496,7 @@ SYSCALL_DEFINE0(sched_yield) int __sched _cond_resched(void) { - if (should_resched()) { + if (should_resched(0)) { preempt_schedule_common(); return 1; } @@ -4514,7 +4514,7 @@ EXPORT_SYMBOL(_cond_resched); */ int __cond_resched_lock(spinlock_t *lock) { - int resched = should_resched(); + int resched = should_resched(PREEMPT_LOCK_OFFSET); int ret = 0; lockdep_assert_held(lock); @@ -4536,7 +4536,7 @@ int __sched __cond_resched_softirq(void) { BUG_ON(!in_softirq()); - if (should_resched()) { + if (should_resched(SOFTIRQ_DISABLE_OFFSET)) { local_bh_enable(); preempt_schedule_common(); local_bh_disable(); -- cgit v1.2.3-70-g09d2 From 7eeb088e72048bf4660f64fc3824c8066cf17591 Mon Sep 17 00:00:00 2001 From: Oleg Nesterov Date: Tue, 30 Jun 2015 03:29:51 +0200 Subject: stop_machine: Unexport __stop_machine() The only caller outside of stop_machine.c is _cpu_down(), it can use stop_machine(). get_online_cpus() is fine under cpu_hotplug_begin(). Signed-off-by: Oleg Nesterov Signed-off-by: Peter Zijlstra (Intel) Cc: Linus Torvalds Cc: Mike Galbraith Cc: Peter Zijlstra Cc: Tejun Heo Cc: Thomas Gleixner Cc: dave@stgolabs.net Cc: der.herr@hofr.at Cc: paulmck@linux.vnet.ibm.com Cc: riel@redhat.com Cc: viro@ZenIV.linux.org.uk Link: http://lkml.kernel.org/r/20150630012951.GA23934@redhat.com Signed-off-by: Ingo Molnar --- include/linux/stop_machine.h | 22 ++-------------------- kernel/cpu.c | 2 +- kernel/stop_machine.c | 2 +- 3 files changed, 4 insertions(+), 22 deletions(-) (limited to 'include/linux') diff --git a/include/linux/stop_machine.h b/include/linux/stop_machine.h index d2abbdb8c6aa..0fca276a0537 100644 --- a/include/linux/stop_machine.h +++ b/include/linux/stop_machine.h @@ -114,23 +114,11 @@ static inline int try_stop_cpus(const struct cpumask *cpumask, * grabbing every spinlock in the kernel. */ int stop_machine(int (*fn)(void *), void *data, const struct cpumask *cpus); -/** - * __stop_machine: freeze the machine on all CPUs and run this function - * @fn: the function to run - * @data: the data ptr for the @fn - * @cpus: the cpus to run the @fn() on (NULL = any online cpu) - * - * Description: This is a special version of the above, which assumes cpus - * won't come or go while it's being called. Used by hotplug cpu. - */ -int __stop_machine(int (*fn)(void *), void *data, const struct cpumask *cpus); - int stop_machine_from_inactive_cpu(int (*fn)(void *), void *data, const struct cpumask *cpus); - #else /* CONFIG_STOP_MACHINE && CONFIG_SMP */ -static inline int __stop_machine(int (*fn)(void *), void *data, +static inline int stop_machine(int (*fn)(void *), void *data, const struct cpumask *cpus) { unsigned long flags; @@ -141,16 +129,10 @@ static inline int __stop_machine(int (*fn)(void *), void *data, return ret; } -static inline int stop_machine(int (*fn)(void *), void *data, - const struct cpumask *cpus) -{ - return __stop_machine(fn, data, cpus); -} - static inline int stop_machine_from_inactive_cpu(int (*fn)(void *), void *data, const struct cpumask *cpus) { - return __stop_machine(fn, data, cpus); + return stop_machine(fn, data, cpus); } #endif /* CONFIG_STOP_MACHINE && CONFIG_SMP */ diff --git a/kernel/cpu.c b/kernel/cpu.c index 9c9c9fab16cc..664ce5299334 100644 --- a/kernel/cpu.c +++ b/kernel/cpu.c @@ -395,7 +395,7 @@ static int __ref _cpu_down(unsigned int cpu, int tasks_frozen) * So now all preempt/rcu users must observe !cpu_active(). */ - err = __stop_machine(take_cpu_down, &tcd_param, cpumask_of(cpu)); + err = stop_machine(take_cpu_down, &tcd_param, cpumask_of(cpu)); if (err) { /* CPU didn't die: tell everyone. Can't complain. */ cpu_notify_nofail(CPU_DOWN_FAILED | mod, hcpu); diff --git a/kernel/stop_machine.c b/kernel/stop_machine.c index 621220852df0..b50910dbf030 100644 --- a/kernel/stop_machine.c +++ b/kernel/stop_machine.c @@ -513,7 +513,7 @@ early_initcall(cpu_stop_init); #ifdef CONFIG_STOP_MACHINE -int __stop_machine(int (*fn)(void *), void *data, const struct cpumask *cpus) +static int __stop_machine(int (*fn)(void *), void *data, const struct cpumask *cpus) { struct multi_stop_data msdata = { .fn = fn, -- cgit v1.2.3-70-g09d2 From 9a301f22faac7fc2207ee49c1855a6b4ba9c5a52 Mon Sep 17 00:00:00 2001 From: Oleg Nesterov Date: Tue, 30 Jun 2015 03:29:55 +0200 Subject: stop_machine: Use 'cpu_stop_fn_t' where possible Cosmetic, but 'cpu_stop_fn_t' actually makes the code more readable and it doesn't break cscope. And most of the declarations already use it. Signed-off-by: Oleg Nesterov Signed-off-by: Peter Zijlstra (Intel) Cc: Linus Torvalds Cc: Mike Galbraith Cc: Peter Zijlstra Cc: Tejun Heo Cc: Thomas Gleixner Cc: dave@stgolabs.net Cc: der.herr@hofr.at Cc: paulmck@linux.vnet.ibm.com Cc: riel@redhat.com Cc: viro@ZenIV.linux.org.uk Link: http://lkml.kernel.org/r/20150630012955.GA23937@redhat.com Signed-off-by: Ingo Molnar --- include/linux/stop_machine.h | 8 ++++---- kernel/stop_machine.c | 8 ++++---- 2 files changed, 8 insertions(+), 8 deletions(-) (limited to 'include/linux') diff --git a/include/linux/stop_machine.h b/include/linux/stop_machine.h index 0fca276a0537..414d924318ce 100644 --- a/include/linux/stop_machine.h +++ b/include/linux/stop_machine.h @@ -112,13 +112,13 @@ static inline int try_stop_cpus(const struct cpumask *cpumask, * * This can be thought of as a very heavy write lock, equivalent to * grabbing every spinlock in the kernel. */ -int stop_machine(int (*fn)(void *), void *data, const struct cpumask *cpus); +int stop_machine(cpu_stop_fn_t fn, void *data, const struct cpumask *cpus); -int stop_machine_from_inactive_cpu(int (*fn)(void *), void *data, +int stop_machine_from_inactive_cpu(cpu_stop_fn_t fn, void *data, const struct cpumask *cpus); #else /* CONFIG_STOP_MACHINE && CONFIG_SMP */ -static inline int stop_machine(int (*fn)(void *), void *data, +static inline int stop_machine(cpu_stop_fn_t fn, void *data, const struct cpumask *cpus) { unsigned long flags; @@ -129,7 +129,7 @@ static inline int stop_machine(int (*fn)(void *), void *data, return ret; } -static inline int stop_machine_from_inactive_cpu(int (*fn)(void *), void *data, +static inline int stop_machine_from_inactive_cpu(cpu_stop_fn_t fn, void *data, const struct cpumask *cpus) { return stop_machine(fn, data, cpus); diff --git a/kernel/stop_machine.c b/kernel/stop_machine.c index b50910dbf030..9a70defe9f1f 100644 --- a/kernel/stop_machine.c +++ b/kernel/stop_machine.c @@ -141,7 +141,7 @@ enum multi_stop_state { }; struct multi_stop_data { - int (*fn)(void *); + cpu_stop_fn_t fn; void *data; /* Like num_online_cpus(), but hotplug cpu uses us, so we need this. */ unsigned int num_threads; @@ -513,7 +513,7 @@ early_initcall(cpu_stop_init); #ifdef CONFIG_STOP_MACHINE -static int __stop_machine(int (*fn)(void *), void *data, const struct cpumask *cpus) +static int __stop_machine(cpu_stop_fn_t fn, void *data, const struct cpumask *cpus) { struct multi_stop_data msdata = { .fn = fn, @@ -546,7 +546,7 @@ static int __stop_machine(int (*fn)(void *), void *data, const struct cpumask *c return stop_cpus(cpu_online_mask, multi_cpu_stop, &msdata); } -int stop_machine(int (*fn)(void *), void *data, const struct cpumask *cpus) +int stop_machine(cpu_stop_fn_t fn, void *data, const struct cpumask *cpus) { int ret; @@ -580,7 +580,7 @@ EXPORT_SYMBOL_GPL(stop_machine); * 0 if all executions of @fn returned 0, any non zero return value if any * returned non zero. */ -int stop_machine_from_inactive_cpu(int (*fn)(void *), void *data, +int stop_machine_from_inactive_cpu(cpu_stop_fn_t fn, void *data, const struct cpumask *cpus) { struct multi_stop_data msdata = { .fn = fn, .data = data, -- cgit v1.2.3-70-g09d2 From 9d89c257dfb9c51a532d69397f6eed75e5168c35 Mon Sep 17 00:00:00 2001 From: Yuyang Du Date: Wed, 15 Jul 2015 08:04:37 +0800 Subject: sched/fair: Rewrite runnable load and utilization average tracking The idea of runnable load average (let runnable time contribute to weight) was proposed by Paul Turner and Ben Segall, and it is still followed by this rewrite. This rewrite aims to solve the following issues: 1. cfs_rq's load average (namely runnable_load_avg and blocked_load_avg) is updated at the granularity of an entity at a time, which results in the cfs_rq's load average is stale or partially updated: at any time, only one entity is up to date, all other entities are effectively lagging behind. This is undesirable. To illustrate, if we have n runnable entities in the cfs_rq, as time elapses, they certainly become outdated: t0: cfs_rq { e1_old, e2_old, ..., en_old } and when we update: t1: update e1, then we have cfs_rq { e1_new, e2_old, ..., en_old } t2: update e2, then we have cfs_rq { e1_old, e2_new, ..., en_old } ... We solve this by combining all runnable entities' load averages together in cfs_rq's avg, and update the cfs_rq's avg as a whole. This is based on the fact that if we regard the update as a function, then: w * update(e) = update(w * e) and update(e1) + update(e2) = update(e1 + e2), then w1 * update(e1) + w2 * update(e2) = update(w1 * e1 + w2 * e2) therefore, by this rewrite, we have an entirely updated cfs_rq at the time we update it: t1: update cfs_rq { e1_new, e2_new, ..., en_new } t2: update cfs_rq { e1_new, e2_new, ..., en_new } ... 2. cfs_rq's load average is different between top rq->cfs_rq and other task_group's per CPU cfs_rqs in whether or not blocked_load_average contributes to the load. The basic idea behind runnable load average (the same for utilization) is that the blocked state is taken into account as opposed to only accounting for the currently runnable state. Therefore, the average should include both the runnable/running and blocked load averages. This rewrite does that. In addition, we also combine runnable/running and blocked averages of all entities into the cfs_rq's average, and update it together at once. This is based on the fact that: update(runnable) + update(blocked) = update(runnable + blocked) This significantly reduces the code as we don't need to separately maintain/update runnable/running load and blocked load. 3. How task_group entities' share is calculated is complex and imprecise. We reduce the complexity in this rewrite to allow a very simple rule: the task_group's load_avg is aggregated from its per CPU cfs_rqs's load_avgs. Then group entity's weight is simply proportional to its own cfs_rq's load_avg / task_group's load_avg. To illustrate, if a task_group has { cfs_rq1, cfs_rq2, ..., cfs_rqn }, then, task_group_avg = cfs_rq1_avg + cfs_rq2_avg + ... + cfs_rqn_avg, then cfs_rqx's entity's share = cfs_rqx_avg / task_group_avg * task_group's share To sum up, this rewrite in principle is equivalent to the current one, but fixes the issues described above. Turns out, it significantly reduces the code complexity and hence increases clarity and efficiency. In addition, the new averages are more smooth/continuous (no spurious spikes and valleys) and updated more consistently and quickly to reflect the load dynamics. As a result, we have less load tracking overhead, better performance, and especially better power efficiency due to more balanced load. Signed-off-by: Yuyang Du Signed-off-by: Peter Zijlstra (Intel) Cc: Linus Torvalds Cc: Mike Galbraith Cc: Peter Zijlstra Cc: Thomas Gleixner Cc: arjan@linux.intel.com Cc: bsegall@google.com Cc: dietmar.eggemann@arm.com Cc: fengguang.wu@intel.com Cc: len.brown@intel.com Cc: morten.rasmussen@arm.com Cc: pjt@google.com Cc: rafael.j.wysocki@intel.com Cc: umgwanakikbuti@gmail.com Cc: vincent.guittot@linaro.org Link: http://lkml.kernel.org/r/1436918682-4971-3-git-send-email-yuyang.du@intel.com Signed-off-by: Ingo Molnar --- include/linux/sched.h | 41 ++-- kernel/sched/core.c | 3 - kernel/sched/debug.c | 41 ++-- kernel/sched/fair.c | 630 ++++++++++++++++---------------------------------- kernel/sched/sched.h | 28 +-- 5 files changed, 249 insertions(+), 494 deletions(-) (limited to 'include/linux') diff --git a/include/linux/sched.h b/include/linux/sched.h index 9c144657aace..44dca5b35de6 100644 --- a/include/linux/sched.h +++ b/include/linux/sched.h @@ -1175,29 +1175,24 @@ struct load_weight { u32 inv_weight; }; +/* + * The load_avg/util_avg accumulates an infinite geometric series. + * 1) load_avg factors the amount of time that a sched_entity is + * runnable on a rq into its weight. For cfs_rq, it is the aggregated + * such weights of all runnable and blocked sched_entities. + * 2) util_avg factors frequency scaling into the amount of time + * that a sched_entity is running on a CPU, in the range [0..SCHED_LOAD_SCALE]. + * For cfs_rq, it is the aggregated such times of all runnable and + * blocked sched_entities. + * The 64 bit load_sum can: + * 1) for cfs_rq, afford 4353082796 (=2^64/47742/88761) entities with + * the highest weight (=88761) always runnable, we should not overflow + * 2) for entity, support any load.weight always runnable + */ struct sched_avg { - u64 last_runnable_update; - s64 decay_count; - /* - * utilization_avg_contrib describes the amount of time that a - * sched_entity is running on a CPU. It is based on running_avg_sum - * and is scaled in the range [0..SCHED_LOAD_SCALE]. - * load_avg_contrib described the amount of time that a sched_entity - * is runnable on a rq. It is based on both runnable_avg_sum and the - * weight of the task. - */ - unsigned long load_avg_contrib, utilization_avg_contrib; - /* - * These sums represent an infinite geometric series and so are bound - * above by 1024/(1-y). Thus we only need a u32 to store them for all - * choices of y < 1-2^(-32)*1024. - * running_avg_sum reflects the time that the sched_entity is - * effectively running on the CPU. - * runnable_avg_sum represents the amount of time a sched_entity is on - * a runqueue which includes the running time that is monitored by - * running_avg_sum. - */ - u32 runnable_avg_sum, avg_period, running_avg_sum; + u64 last_update_time, load_sum; + u32 util_sum, period_contrib; + unsigned long load_avg, util_avg; }; #ifdef CONFIG_SCHEDSTATS @@ -1263,7 +1258,7 @@ struct sched_entity { #endif #ifdef CONFIG_SMP - /* Per-entity load-tracking */ + /* Per entity load average tracking */ struct sched_avg avg; #endif }; diff --git a/kernel/sched/core.c b/kernel/sched/core.c index f5fad2b12baf..3981526539c5 100644 --- a/kernel/sched/core.c +++ b/kernel/sched/core.c @@ -2020,9 +2020,6 @@ static void __sched_fork(unsigned long clone_flags, struct task_struct *p) p->se.prev_sum_exec_runtime = 0; p->se.nr_migrations = 0; p->se.vruntime = 0; -#ifdef CONFIG_SMP - p->se.avg.decay_count = 0; -#endif INIT_LIST_HEAD(&p->se.group_node); #ifdef CONFIG_SCHEDSTATS diff --git a/kernel/sched/debug.c b/kernel/sched/debug.c index 363b7e82554b..74f276f5568c 100644 --- a/kernel/sched/debug.c +++ b/kernel/sched/debug.c @@ -88,12 +88,8 @@ static void print_cfs_group_stats(struct seq_file *m, int cpu, struct task_group #endif P(se->load.weight); #ifdef CONFIG_SMP - P(se->avg.runnable_avg_sum); - P(se->avg.running_avg_sum); - P(se->avg.avg_period); - P(se->avg.load_avg_contrib); - P(se->avg.utilization_avg_contrib); - P(se->avg.decay_count); + P(se->avg.load_avg); + P(se->avg.util_avg); #endif #undef PN #undef P @@ -209,21 +205,19 @@ void print_cfs_rq(struct seq_file *m, int cpu, struct cfs_rq *cfs_rq) SEQ_printf(m, " .%-30s: %d\n", "nr_running", cfs_rq->nr_running); SEQ_printf(m, " .%-30s: %ld\n", "load", cfs_rq->load.weight); #ifdef CONFIG_SMP - SEQ_printf(m, " .%-30s: %ld\n", "runnable_load_avg", - cfs_rq->runnable_load_avg); - SEQ_printf(m, " .%-30s: %ld\n", "blocked_load_avg", - cfs_rq->blocked_load_avg); - SEQ_printf(m, " .%-30s: %ld\n", "utilization_load_avg", - cfs_rq->utilization_load_avg); + SEQ_printf(m, " .%-30s: %lu\n", "load_avg", + cfs_rq->avg.load_avg); + SEQ_printf(m, " .%-30s: %lu\n", "util_avg", + cfs_rq->avg.util_avg); + SEQ_printf(m, " .%-30s: %ld\n", "removed_load_avg", + atomic_long_read(&cfs_rq->removed_load_avg)); + SEQ_printf(m, " .%-30s: %ld\n", "removed_util_avg", + atomic_long_read(&cfs_rq->removed_util_avg)); #ifdef CONFIG_FAIR_GROUP_SCHED - SEQ_printf(m, " .%-30s: %ld\n", "tg_load_contrib", - cfs_rq->tg_load_contrib); - SEQ_printf(m, " .%-30s: %d\n", "tg_runnable_contrib", - cfs_rq->tg_runnable_contrib); + SEQ_printf(m, " .%-30s: %lu\n", "tg_load_avg_contrib", + cfs_rq->tg_load_avg_contrib); SEQ_printf(m, " .%-30s: %ld\n", "tg_load_avg", atomic_long_read(&cfs_rq->tg->load_avg)); - SEQ_printf(m, " .%-30s: %d\n", "tg->runnable_avg", - atomic_read(&cfs_rq->tg->runnable_avg)); #endif #endif #ifdef CONFIG_CFS_BANDWIDTH @@ -631,12 +625,11 @@ void proc_sched_show_task(struct task_struct *p, struct seq_file *m) P(se.load.weight); #ifdef CONFIG_SMP - P(se.avg.runnable_avg_sum); - P(se.avg.running_avg_sum); - P(se.avg.avg_period); - P(se.avg.load_avg_contrib); - P(se.avg.utilization_avg_contrib); - P(se.avg.decay_count); + P(se.avg.load_sum); + P(se.avg.util_sum); + P(se.avg.load_avg); + P(se.avg.util_avg); + P(se.avg.last_update_time); #endif P(policy); P(prio); diff --git a/kernel/sched/fair.c b/kernel/sched/fair.c index 90292c672a3b..01ffa9509c23 100644 --- a/kernel/sched/fair.c +++ b/kernel/sched/fair.c @@ -283,9 +283,6 @@ static inline struct cfs_rq *group_cfs_rq(struct sched_entity *grp) return grp->my_q; } -static void update_cfs_rq_blocked_load(struct cfs_rq *cfs_rq, - int force_update); - static inline void list_add_leaf_cfs_rq(struct cfs_rq *cfs_rq) { if (!cfs_rq->on_list) { @@ -305,8 +302,6 @@ static inline void list_add_leaf_cfs_rq(struct cfs_rq *cfs_rq) } cfs_rq->on_list = 1; - /* We should have no load, but we need to update last_decay. */ - update_cfs_rq_blocked_load(cfs_rq, 0); } } @@ -664,19 +659,31 @@ static u64 sched_vslice(struct cfs_rq *cfs_rq, struct sched_entity *se) static int select_idle_sibling(struct task_struct *p, int cpu); static unsigned long task_h_load(struct task_struct *p); -static inline void __update_task_entity_contrib(struct sched_entity *se); -static inline void __update_task_entity_utilization(struct sched_entity *se); +/* + * We choose a half-life close to 1 scheduling period. + * Note: The tables below are dependent on this value. + */ +#define LOAD_AVG_PERIOD 32 +#define LOAD_AVG_MAX 47742 /* maximum possible load avg */ +#define LOAD_AVG_MAX_N 345 /* number of full periods to produce LOAD_MAX_AVG */ /* Give new task start runnable values to heavy its load in infant time */ void init_task_runnable_average(struct task_struct *p) { - u32 slice; + struct sched_avg *sa = &p->se.avg; - slice = sched_slice(task_cfs_rq(p), &p->se) >> 10; - p->se.avg.runnable_avg_sum = p->se.avg.running_avg_sum = slice; - p->se.avg.avg_period = slice; - __update_task_entity_contrib(&p->se); - __update_task_entity_utilization(&p->se); + sa->last_update_time = 0; + /* + * sched_avg's period_contrib should be strictly less then 1024, so + * we give it 1023 to make sure it is almost a period (1024us), and + * will definitely be update (after enqueue). + */ + sa->period_contrib = 1023; + sa->load_avg = scale_load_down(p->se.load.weight); + sa->load_sum = sa->load_avg * LOAD_AVG_MAX; + sa->util_avg = scale_load_down(SCHED_LOAD_SCALE); + sa->util_sum = LOAD_AVG_MAX; + /* when this task enqueue'ed, it will contribute to its cfs_rq's load_avg */ } #else void init_task_runnable_average(struct task_struct *p) @@ -1698,8 +1705,8 @@ static u64 numa_get_avg_runtime(struct task_struct *p, u64 *period) delta = runtime - p->last_sum_exec_runtime; *period = now - p->last_task_numa_placement; } else { - delta = p->se.avg.runnable_avg_sum; - *period = p->se.avg.avg_period; + delta = p->se.avg.load_sum / p->se.load.weight; + *period = LOAD_AVG_MAX; } p->last_sum_exec_runtime = runtime; @@ -2347,13 +2354,13 @@ static inline long calc_tg_weight(struct task_group *tg, struct cfs_rq *cfs_rq) long tg_weight; /* - * Use this CPU's actual weight instead of the last load_contribution - * to gain a more accurate current total weight. See - * __update_cfs_rq_tg_load_contrib(). + * Use this CPU's real-time load instead of the last load contribution + * as the updating of the contribution is delayed, and we will use the + * the real-time load to calc the share. See update_tg_load_avg(). */ tg_weight = atomic_long_read(&tg->load_avg); - tg_weight -= cfs_rq->tg_load_contrib; - tg_weight += cfs_rq->load.weight; + tg_weight -= cfs_rq->tg_load_avg_contrib; + tg_weight += cfs_rq->avg.load_avg; return tg_weight; } @@ -2363,7 +2370,7 @@ static long calc_cfs_shares(struct cfs_rq *cfs_rq, struct task_group *tg) long tg_weight, load, shares; tg_weight = calc_tg_weight(tg, cfs_rq); - load = cfs_rq->load.weight; + load = cfs_rq->avg.load_avg; shares = (tg->shares * load); if (tg_weight) @@ -2425,14 +2432,6 @@ static inline void update_cfs_shares(struct cfs_rq *cfs_rq) #endif /* CONFIG_FAIR_GROUP_SCHED */ #ifdef CONFIG_SMP -/* - * We choose a half-life close to 1 scheduling period. - * Note: The tables below are dependent on this value. - */ -#define LOAD_AVG_PERIOD 32 -#define LOAD_AVG_MAX 47742 /* maximum possible load avg */ -#define LOAD_AVG_MAX_N 345 /* number of full periods to produce LOAD_MAX_AVG */ - /* Precomputed fixed inverse multiplies for multiplication by y^n */ static const u32 runnable_avg_yN_inv[] = { 0xffffffff, 0xfa83b2da, 0xf5257d14, 0xefe4b99a, 0xeac0c6e6, 0xe5b906e6, @@ -2481,9 +2480,8 @@ static __always_inline u64 decay_load(u64 val, u64 n) local_n %= LOAD_AVG_PERIOD; } - val *= runnable_avg_yN_inv[local_n]; - /* We don't use SRR here since we always want to round down. */ - return val >> 32; + val = mul_u64_u32_shr(val, runnable_avg_yN_inv[local_n], 32); + return val; } /* @@ -2542,23 +2540,22 @@ static u32 __compute_runnable_contrib(u64 n) * load_avg = u_0` + y*(u_0 + u_1*y + u_2*y^2 + ... ) * = u_0 + u_1*y + u_2*y^2 + ... [re-labeling u_i --> u_{i+1}] */ -static __always_inline int __update_entity_runnable_avg(u64 now, int cpu, - struct sched_avg *sa, - int runnable, - int running) +static __always_inline int +__update_load_avg(u64 now, int cpu, struct sched_avg *sa, + unsigned long weight, int running) { u64 delta, periods; - u32 runnable_contrib; + u32 contrib; int delta_w, decayed = 0; unsigned long scale_freq = arch_scale_freq_capacity(NULL, cpu); - delta = now - sa->last_runnable_update; + delta = now - sa->last_update_time; /* * This should only happen when time goes backwards, which it * unfortunately does during sched clock init when we swap over to TSC. */ if ((s64)delta < 0) { - sa->last_runnable_update = now; + sa->last_update_time = now; return 0; } @@ -2569,26 +2566,26 @@ static __always_inline int __update_entity_runnable_avg(u64 now, int cpu, delta >>= 10; if (!delta) return 0; - sa->last_runnable_update = now; + sa->last_update_time = now; /* delta_w is the amount already accumulated against our next period */ - delta_w = sa->avg_period % 1024; + delta_w = sa->period_contrib; if (delta + delta_w >= 1024) { - /* period roll-over */ decayed = 1; + /* how much left for next period will start over, we don't know yet */ + sa->period_contrib = 0; + /* * Now that we know we're crossing a period boundary, figure * out how much from delta we need to complete the current * period and accrue it. */ delta_w = 1024 - delta_w; - if (runnable) - sa->runnable_avg_sum += delta_w; + if (weight) + sa->load_sum += weight * delta_w; if (running) - sa->running_avg_sum += delta_w * scale_freq - >> SCHED_CAPACITY_SHIFT; - sa->avg_period += delta_w; + sa->util_sum += delta_w * scale_freq >> SCHED_CAPACITY_SHIFT; delta -= delta_w; @@ -2596,334 +2593,156 @@ static __always_inline int __update_entity_runnable_avg(u64 now, int cpu, periods = delta / 1024; delta %= 1024; - sa->runnable_avg_sum = decay_load(sa->runnable_avg_sum, - periods + 1); - sa->running_avg_sum = decay_load(sa->running_avg_sum, - periods + 1); - sa->avg_period = decay_load(sa->avg_period, - periods + 1); + sa->load_sum = decay_load(sa->load_sum, periods + 1); + sa->util_sum = decay_load((u64)(sa->util_sum), periods + 1); /* Efficiently calculate \sum (1..n_period) 1024*y^i */ - runnable_contrib = __compute_runnable_contrib(periods); - if (runnable) - sa->runnable_avg_sum += runnable_contrib; + contrib = __compute_runnable_contrib(periods); + if (weight) + sa->load_sum += weight * contrib; if (running) - sa->running_avg_sum += runnable_contrib * scale_freq - >> SCHED_CAPACITY_SHIFT; - sa->avg_period += runnable_contrib; + sa->util_sum += contrib * scale_freq >> SCHED_CAPACITY_SHIFT; } /* Remainder of delta accrued against u_0` */ - if (runnable) - sa->runnable_avg_sum += delta; + if (weight) + sa->load_sum += weight * delta; if (running) - sa->running_avg_sum += delta * scale_freq - >> SCHED_CAPACITY_SHIFT; - sa->avg_period += delta; - - return decayed; -} - -/* Synchronize an entity's decay with its parenting cfs_rq.*/ -static inline u64 __synchronize_entity_decay(struct sched_entity *se) -{ - struct cfs_rq *cfs_rq = cfs_rq_of(se); - u64 decays = atomic64_read(&cfs_rq->decay_counter); + sa->util_sum += delta * scale_freq >> SCHED_CAPACITY_SHIFT; - decays -= se->avg.decay_count; - se->avg.decay_count = 0; - if (!decays) - return 0; + sa->period_contrib += delta; - se->avg.load_avg_contrib = decay_load(se->avg.load_avg_contrib, decays); - se->avg.utilization_avg_contrib = - decay_load(se->avg.utilization_avg_contrib, decays); + if (decayed) { + sa->load_avg = div_u64(sa->load_sum, LOAD_AVG_MAX); + sa->util_avg = (sa->util_sum << SCHED_LOAD_SHIFT) / LOAD_AVG_MAX; + } - return decays; + return decayed; } #ifdef CONFIG_FAIR_GROUP_SCHED -static inline void __update_cfs_rq_tg_load_contrib(struct cfs_rq *cfs_rq, - int force_update) -{ - struct task_group *tg = cfs_rq->tg; - long tg_contrib; - - tg_contrib = cfs_rq->runnable_load_avg + cfs_rq->blocked_load_avg; - tg_contrib -= cfs_rq->tg_load_contrib; - - if (!tg_contrib) - return; - - if (force_update || abs(tg_contrib) > cfs_rq->tg_load_contrib / 8) { - atomic_long_add(tg_contrib, &tg->load_avg); - cfs_rq->tg_load_contrib += tg_contrib; - } -} - /* - * Aggregate cfs_rq runnable averages into an equivalent task_group - * representation for computing load contributions. + * Updating tg's load_avg is necessary before update_cfs_share (which is done) + * and effective_load (which is not done because it is too costly). */ -static inline void __update_tg_runnable_avg(struct sched_avg *sa, - struct cfs_rq *cfs_rq) +static inline void update_tg_load_avg(struct cfs_rq *cfs_rq, int force) { - struct task_group *tg = cfs_rq->tg; - long contrib; - - /* The fraction of a cpu used by this cfs_rq */ - contrib = div_u64((u64)sa->runnable_avg_sum << NICE_0_SHIFT, - sa->avg_period + 1); - contrib -= cfs_rq->tg_runnable_contrib; + long delta = cfs_rq->avg.load_avg - cfs_rq->tg_load_avg_contrib; - if (abs(contrib) > cfs_rq->tg_runnable_contrib / 64) { - atomic_add(contrib, &tg->runnable_avg); - cfs_rq->tg_runnable_contrib += contrib; - } -} - -static inline void __update_group_entity_contrib(struct sched_entity *se) -{ - struct cfs_rq *cfs_rq = group_cfs_rq(se); - struct task_group *tg = cfs_rq->tg; - int runnable_avg; - - u64 contrib; - - contrib = cfs_rq->tg_load_contrib * tg->shares; - se->avg.load_avg_contrib = div_u64(contrib, - atomic_long_read(&tg->load_avg) + 1); - - /* - * For group entities we need to compute a correction term in the case - * that they are consuming <1 cpu so that we would contribute the same - * load as a task of equal weight. - * - * Explicitly co-ordinating this measurement would be expensive, but - * fortunately the sum of each cpus contribution forms a usable - * lower-bound on the true value. - * - * Consider the aggregate of 2 contributions. Either they are disjoint - * (and the sum represents true value) or they are disjoint and we are - * understating by the aggregate of their overlap. - * - * Extending this to N cpus, for a given overlap, the maximum amount we - * understand is then n_i(n_i+1)/2 * w_i where n_i is the number of - * cpus that overlap for this interval and w_i is the interval width. - * - * On a small machine; the first term is well-bounded which bounds the - * total error since w_i is a subset of the period. Whereas on a - * larger machine, while this first term can be larger, if w_i is the - * of consequential size guaranteed to see n_i*w_i quickly converge to - * our upper bound of 1-cpu. - */ - runnable_avg = atomic_read(&tg->runnable_avg); - if (runnable_avg < NICE_0_LOAD) { - se->avg.load_avg_contrib *= runnable_avg; - se->avg.load_avg_contrib >>= NICE_0_SHIFT; + if (force || abs(delta) > cfs_rq->tg_load_avg_contrib / 64) { + atomic_long_add(delta, &cfs_rq->tg->load_avg); + cfs_rq->tg_load_avg_contrib = cfs_rq->avg.load_avg; } } #else /* CONFIG_FAIR_GROUP_SCHED */ -static inline void __update_cfs_rq_tg_load_contrib(struct cfs_rq *cfs_rq, - int force_update) {} -static inline void __update_tg_runnable_avg(struct sched_avg *sa, - struct cfs_rq *cfs_rq) {} -static inline void __update_group_entity_contrib(struct sched_entity *se) {} +static inline void update_tg_load_avg(struct cfs_rq *cfs_rq, int force) {} #endif /* CONFIG_FAIR_GROUP_SCHED */ -static inline void __update_task_entity_contrib(struct sched_entity *se) -{ - u32 contrib; - - /* avoid overflowing a 32-bit type w/ SCHED_LOAD_SCALE */ - contrib = se->avg.runnable_avg_sum * scale_load_down(se->load.weight); - contrib /= (se->avg.avg_period + 1); - se->avg.load_avg_contrib = scale_load(contrib); -} +static inline u64 cfs_rq_clock_task(struct cfs_rq *cfs_rq); -/* Compute the current contribution to load_avg by se, return any delta */ -static long __update_entity_load_avg_contrib(struct sched_entity *se) +/* Group cfs_rq's load_avg is used for task_h_load and update_cfs_share */ +static inline int update_cfs_rq_load_avg(u64 now, struct cfs_rq *cfs_rq) { - long old_contrib = se->avg.load_avg_contrib; + int decayed; + struct sched_avg *sa = &cfs_rq->avg; - if (entity_is_task(se)) { - __update_task_entity_contrib(se); - } else { - __update_tg_runnable_avg(&se->avg, group_cfs_rq(se)); - __update_group_entity_contrib(se); + if (atomic_long_read(&cfs_rq->removed_load_avg)) { + long r = atomic_long_xchg(&cfs_rq->removed_load_avg, 0); + sa->load_avg = max_t(long, sa->load_avg - r, 0); + sa->load_sum = max_t(s64, sa->load_sum - r * LOAD_AVG_MAX, 0); } - return se->avg.load_avg_contrib - old_contrib; -} - - -static inline void __update_task_entity_utilization(struct sched_entity *se) -{ - u32 contrib; - - /* avoid overflowing a 32-bit type w/ SCHED_LOAD_SCALE */ - contrib = se->avg.running_avg_sum * scale_load_down(SCHED_LOAD_SCALE); - contrib /= (se->avg.avg_period + 1); - se->avg.utilization_avg_contrib = scale_load(contrib); -} + if (atomic_long_read(&cfs_rq->removed_util_avg)) { + long r = atomic_long_xchg(&cfs_rq->removed_util_avg, 0); + sa->util_avg = max_t(long, sa->util_avg - r, 0); + sa->util_sum = max_t(s32, sa->util_sum - + ((r * LOAD_AVG_MAX) >> SCHED_LOAD_SHIFT), 0); + } -static long __update_entity_utilization_avg_contrib(struct sched_entity *se) -{ - long old_contrib = se->avg.utilization_avg_contrib; + decayed = __update_load_avg(now, cpu_of(rq_of(cfs_rq)), sa, + scale_load_down(cfs_rq->load.weight), cfs_rq->curr != NULL); - if (entity_is_task(se)) - __update_task_entity_utilization(se); - else - se->avg.utilization_avg_contrib = - group_cfs_rq(se)->utilization_load_avg; - - return se->avg.utilization_avg_contrib - old_contrib; -} +#ifndef CONFIG_64BIT + smp_wmb(); + cfs_rq->load_last_update_time_copy = sa->last_update_time; +#endif -static inline void subtract_blocked_load_contrib(struct cfs_rq *cfs_rq, - long load_contrib) -{ - if (likely(load_contrib < cfs_rq->blocked_load_avg)) - cfs_rq->blocked_load_avg -= load_contrib; - else - cfs_rq->blocked_load_avg = 0; + return decayed; } -static inline u64 cfs_rq_clock_task(struct cfs_rq *cfs_rq); - -/* Update a sched_entity's runnable average */ -static inline void update_entity_load_avg(struct sched_entity *se, - int update_cfs_rq) +/* Update task and its cfs_rq load average */ +static inline void update_load_avg(struct sched_entity *se, int update_tg) { struct cfs_rq *cfs_rq = cfs_rq_of(se); - long contrib_delta, utilization_delta; int cpu = cpu_of(rq_of(cfs_rq)); - u64 now; + u64 now = cfs_rq_clock_task(cfs_rq); /* - * For a group entity we need to use their owned cfs_rq_clock_task() in - * case they are the parent of a throttled hierarchy. + * Track task load average for carrying it to new CPU after migrated, and + * track group sched_entity load average for task_h_load calc in migration */ - if (entity_is_task(se)) - now = cfs_rq_clock_task(cfs_rq); - else - now = cfs_rq_clock_task(group_cfs_rq(se)); + __update_load_avg(now, cpu, &se->avg, + se->on_rq * scale_load_down(se->load.weight), cfs_rq->curr == se); - if (!__update_entity_runnable_avg(now, cpu, &se->avg, se->on_rq, - cfs_rq->curr == se)) - return; - - contrib_delta = __update_entity_load_avg_contrib(se); - utilization_delta = __update_entity_utilization_avg_contrib(se); - - if (!update_cfs_rq) - return; - - if (se->on_rq) { - cfs_rq->runnable_load_avg += contrib_delta; - cfs_rq->utilization_load_avg += utilization_delta; - } else { - subtract_blocked_load_contrib(cfs_rq, -contrib_delta); - } + if (update_cfs_rq_load_avg(now, cfs_rq) && update_tg) + update_tg_load_avg(cfs_rq, 0); } -/* - * Decay the load contributed by all blocked children and account this so that - * their contribution may appropriately discounted when they wake up. - */ -static void update_cfs_rq_blocked_load(struct cfs_rq *cfs_rq, int force_update) +/* Add the load generated by se into cfs_rq's load average */ +static inline void +enqueue_entity_load_avg(struct cfs_rq *cfs_rq, struct sched_entity *se) { - u64 now = cfs_rq_clock_task(cfs_rq) >> 20; - u64 decays; - - decays = now - cfs_rq->last_decay; - if (!decays && !force_update) - return; + struct sched_avg *sa = &se->avg; + u64 now = cfs_rq_clock_task(cfs_rq); + int migrated = 0, decayed; - if (atomic_long_read(&cfs_rq->removed_load)) { - unsigned long removed_load; - removed_load = atomic_long_xchg(&cfs_rq->removed_load, 0); - subtract_blocked_load_contrib(cfs_rq, removed_load); + if (sa->last_update_time == 0) { + sa->last_update_time = now; + migrated = 1; } - - if (decays) { - cfs_rq->blocked_load_avg = decay_load(cfs_rq->blocked_load_avg, - decays); - atomic64_add(decays, &cfs_rq->decay_counter); - cfs_rq->last_decay = now; + else { + __update_load_avg(now, cpu_of(rq_of(cfs_rq)), sa, + se->on_rq * scale_load_down(se->load.weight), cfs_rq->curr == se); } - __update_cfs_rq_tg_load_contrib(cfs_rq, force_update); -} + decayed = update_cfs_rq_load_avg(now, cfs_rq); -/* Add the load generated by se into cfs_rq's child load-average */ -static inline void enqueue_entity_load_avg(struct cfs_rq *cfs_rq, - struct sched_entity *se, - int wakeup) -{ - /* - * We track migrations using entity decay_count <= 0, on a wake-up - * migration we use a negative decay count to track the remote decays - * accumulated while sleeping. - * - * Newly forked tasks are enqueued with se->avg.decay_count == 0, they - * are seen by enqueue_entity_load_avg() as a migration with an already - * constructed load_avg_contrib. - */ - if (unlikely(se->avg.decay_count <= 0)) { - se->avg.last_runnable_update = rq_clock_task(rq_of(cfs_rq)); - if (se->avg.decay_count) { - /* - * In a wake-up migration we have to approximate the - * time sleeping. This is because we can't synchronize - * clock_task between the two cpus, and it is not - * guaranteed to be read-safe. Instead, we can - * approximate this using our carried decays, which are - * explicitly atomically readable. - */ - se->avg.last_runnable_update -= (-se->avg.decay_count) - << 20; - update_entity_load_avg(se, 0); - /* Indicate that we're now synchronized and on-rq */ - se->avg.decay_count = 0; - } - wakeup = 0; - } else { - __synchronize_entity_decay(se); + if (migrated) { + cfs_rq->avg.load_avg += sa->load_avg; + cfs_rq->avg.load_sum += sa->load_sum; + cfs_rq->avg.util_avg += sa->util_avg; + cfs_rq->avg.util_sum += sa->util_sum; } - /* migrated tasks did not contribute to our blocked load */ - if (wakeup) { - subtract_blocked_load_contrib(cfs_rq, se->avg.load_avg_contrib); - update_entity_load_avg(se, 0); - } - - cfs_rq->runnable_load_avg += se->avg.load_avg_contrib; - cfs_rq->utilization_load_avg += se->avg.utilization_avg_contrib; - /* we force update consideration on load-balancer moves */ - update_cfs_rq_blocked_load(cfs_rq, !wakeup); + if (decayed || migrated) + update_tg_load_avg(cfs_rq, 0); } /* - * Remove se's load from this cfs_rq child load-average, if the entity is - * transitioning to a blocked state we track its projected decay using - * blocked_load_avg. + * Task first catches up with cfs_rq, and then subtract + * itself from the cfs_rq (task must be off the queue now). */ -static inline void dequeue_entity_load_avg(struct cfs_rq *cfs_rq, - struct sched_entity *se, - int sleep) +void remove_entity_load_avg(struct sched_entity *se) { - update_entity_load_avg(se, 1); - /* we force update consideration on load-balancer moves */ - update_cfs_rq_blocked_load(cfs_rq, !sleep); + struct cfs_rq *cfs_rq = cfs_rq_of(se); + u64 last_update_time; + +#ifndef CONFIG_64BIT + u64 last_update_time_copy; - cfs_rq->runnable_load_avg -= se->avg.load_avg_contrib; - cfs_rq->utilization_load_avg -= se->avg.utilization_avg_contrib; - if (sleep) { - cfs_rq->blocked_load_avg += se->avg.load_avg_contrib; - se->avg.decay_count = atomic64_read(&cfs_rq->decay_counter); - } /* migrations, e.g. sleep=0 leave decay_count == 0 */ + do { + last_update_time_copy = cfs_rq->load_last_update_time_copy; + smp_rmb(); + last_update_time = cfs_rq->avg.last_update_time; + } while (last_update_time != last_update_time_copy); +#else + last_update_time = cfs_rq->avg.last_update_time; +#endif + + __update_load_avg(last_update_time, cpu_of(rq_of(cfs_rq)), &se->avg, 0, 0); + atomic_long_add(se->avg.load_avg, &cfs_rq->removed_load_avg); + atomic_long_add(se->avg.util_avg, &cfs_rq->removed_util_avg); } /* @@ -2948,16 +2767,10 @@ static int idle_balance(struct rq *this_rq); #else /* CONFIG_SMP */ -static inline void update_entity_load_avg(struct sched_entity *se, - int update_cfs_rq) {} -static inline void enqueue_entity_load_avg(struct cfs_rq *cfs_rq, - struct sched_entity *se, - int wakeup) {} -static inline void dequeue_entity_load_avg(struct cfs_rq *cfs_rq, - struct sched_entity *se, - int sleep) {} -static inline void update_cfs_rq_blocked_load(struct cfs_rq *cfs_rq, - int force_update) {} +static inline void update_load_avg(struct sched_entity *se, int update_tg) {} +static inline void +enqueue_entity_load_avg(struct cfs_rq *cfs_rq, struct sched_entity *se) {} +static inline void remove_entity_load_avg(struct sched_entity *se) {} static inline int idle_balance(struct rq *rq) { @@ -3089,7 +2902,7 @@ enqueue_entity(struct cfs_rq *cfs_rq, struct sched_entity *se, int flags) * Update run-time statistics of the 'current'. */ update_curr(cfs_rq); - enqueue_entity_load_avg(cfs_rq, se, flags & ENQUEUE_WAKEUP); + enqueue_entity_load_avg(cfs_rq, se); account_entity_enqueue(cfs_rq, se); update_cfs_shares(cfs_rq); @@ -3164,7 +2977,7 @@ dequeue_entity(struct cfs_rq *cfs_rq, struct sched_entity *se, int flags) * Update run-time statistics of the 'current'. */ update_curr(cfs_rq); - dequeue_entity_load_avg(cfs_rq, se, flags & DEQUEUE_SLEEP); + update_load_avg(se, 1); update_stats_dequeue(cfs_rq, se); if (flags & DEQUEUE_SLEEP) { @@ -3254,7 +3067,7 @@ set_next_entity(struct cfs_rq *cfs_rq, struct sched_entity *se) */ update_stats_wait_end(cfs_rq, se); __dequeue_entity(cfs_rq, se); - update_entity_load_avg(se, 1); + update_load_avg(se, 1); } update_stats_curr_start(cfs_rq, se); @@ -3354,7 +3167,7 @@ static void put_prev_entity(struct cfs_rq *cfs_rq, struct sched_entity *prev) /* Put 'current' back into the tree. */ __enqueue_entity(cfs_rq, prev); /* in !on_rq case, update occurred at dequeue */ - update_entity_load_avg(prev, 1); + update_load_avg(prev, 0); } cfs_rq->curr = NULL; } @@ -3370,8 +3183,7 @@ entity_tick(struct cfs_rq *cfs_rq, struct sched_entity *curr, int queued) /* * Ensure that runnable average is periodically updated. */ - update_entity_load_avg(curr, 1); - update_cfs_rq_blocked_load(cfs_rq, 1); + update_load_avg(curr, 1); update_cfs_shares(cfs_rq); #ifdef CONFIG_SCHED_HRTICK @@ -4244,8 +4056,8 @@ enqueue_task_fair(struct rq *rq, struct task_struct *p, int flags) if (cfs_rq_throttled(cfs_rq)) break; + update_load_avg(se, 1); update_cfs_shares(cfs_rq); - update_entity_load_avg(se, 1); } if (!se) @@ -4304,8 +4116,8 @@ static void dequeue_task_fair(struct rq *rq, struct task_struct *p, int flags) if (cfs_rq_throttled(cfs_rq)) break; + update_load_avg(se, 1); update_cfs_shares(cfs_rq); - update_entity_load_avg(se, 1); } if (!se) @@ -4444,7 +4256,7 @@ static void __update_cpu_load(struct rq *this_rq, unsigned long this_load, static void update_idle_cpu_load(struct rq *this_rq) { unsigned long curr_jiffies = READ_ONCE(jiffies); - unsigned long load = this_rq->cfs.runnable_load_avg; + unsigned long load = this_rq->cfs.avg.load_avg; unsigned long pending_updates; /* @@ -4490,7 +4302,7 @@ void update_cpu_load_nohz(void) */ void update_cpu_load_active(struct rq *this_rq) { - unsigned long load = this_rq->cfs.runnable_load_avg; + unsigned long load = this_rq->cfs.avg.load_avg; /* * See the mess around update_idle_cpu_load() / update_cpu_load_nohz(). */ @@ -4501,7 +4313,7 @@ void update_cpu_load_active(struct rq *this_rq) /* Used instead of source_load when we know the type == 0 */ static unsigned long weighted_cpuload(const int cpu) { - return cpu_rq(cpu)->cfs.runnable_load_avg; + return cpu_rq(cpu)->cfs.avg.load_avg; } /* @@ -4551,7 +4363,7 @@ static unsigned long cpu_avg_load_per_task(int cpu) { struct rq *rq = cpu_rq(cpu); unsigned long nr_running = READ_ONCE(rq->cfs.h_nr_running); - unsigned long load_avg = rq->cfs.runnable_load_avg; + unsigned long load_avg = rq->cfs.avg.load_avg; if (nr_running) return load_avg / nr_running; @@ -4670,7 +4482,7 @@ static long effective_load(struct task_group *tg, int cpu, long wl, long wg) /* * w = rw_i + @wl */ - w = se->my_q->load.weight + wl; + w = se->my_q->avg.load_avg + wl; /* * wl = S * s'_i; see (2) @@ -4691,7 +4503,7 @@ static long effective_load(struct task_group *tg, int cpu, long wl, long wg) /* * wl = dw_i = S * (s'_i - s_i); see (3) */ - wl -= se->load.weight; + wl -= se->avg.load_avg; /* * Recursively apply this logic to all parent groups to compute @@ -4761,14 +4573,14 @@ static int wake_affine(struct sched_domain *sd, struct task_struct *p, int sync) */ if (sync) { tg = task_group(current); - weight = current->se.load.weight; + weight = current->se.avg.load_avg; this_load += effective_load(tg, this_cpu, -weight, -weight); load += effective_load(tg, prev_cpu, 0, -weight); } tg = task_group(p); - weight = p->se.load.weight; + weight = p->se.avg.load_avg; /* * In low-load situations, where prev_cpu is idle and this_cpu is idle @@ -4961,12 +4773,12 @@ done: * tasks. The unit of the return value must be the one of capacity so we can * compare the usage with the capacity of the CPU that is available for CFS * task (ie cpu_capacity). - * cfs.utilization_load_avg is the sum of running time of runnable tasks on a + * cfs.avg.util_avg is the sum of running time of runnable tasks on a * CPU. It represents the amount of utilization of a CPU in the range * [0..SCHED_LOAD_SCALE]. The usage of a CPU can't be higher than the full * capacity of the CPU because it's about the running time on this CPU. - * Nevertheless, cfs.utilization_load_avg can be higher than SCHED_LOAD_SCALE - * because of unfortunate rounding in avg_period and running_load_avg or just + * Nevertheless, cfs.avg.util_avg can be higher than SCHED_LOAD_SCALE + * because of unfortunate rounding in util_avg or just * after migrating tasks until the average stabilizes with the new running * time. So we need to check that the usage stays into the range * [0..cpu_capacity_orig] and cap if necessary. @@ -4975,7 +4787,7 @@ done: */ static int get_cpu_usage(int cpu) { - unsigned long usage = cpu_rq(cpu)->cfs.utilization_load_avg; + unsigned long usage = cpu_rq(cpu)->cfs.avg.util_avg; unsigned long capacity = capacity_orig_of(cpu); if (usage >= SCHED_LOAD_SCALE) @@ -5084,26 +4896,22 @@ select_task_rq_fair(struct task_struct *p, int prev_cpu, int sd_flag, int wake_f * previous cpu. However, the caller only guarantees p->pi_lock is held; no * other assumptions, including the state of rq->lock, should be made. */ -static void -migrate_task_rq_fair(struct task_struct *p, int next_cpu) +static void migrate_task_rq_fair(struct task_struct *p, int next_cpu) { - struct sched_entity *se = &p->se; - struct cfs_rq *cfs_rq = cfs_rq_of(se); - /* - * Load tracking: accumulate removed load so that it can be processed - * when we next update owning cfs_rq under rq->lock. Tasks contribute - * to blocked load iff they have a positive decay-count. It can never - * be negative here since on-rq tasks have decay-count == 0. + * We are supposed to update the task to "current" time, then its up to date + * and ready to go to new CPU/cfs_rq. But we have difficulty in getting + * what current time is, so simply throw away the out-of-date time. This + * will result in the wakee task is less decayed, but giving the wakee more + * load sounds not bad. */ - if (se->avg.decay_count) { - se->avg.decay_count = -__synchronize_entity_decay(se); - atomic_long_add(se->avg.load_avg_contrib, - &cfs_rq->removed_load); - } + remove_entity_load_avg(&p->se); + + /* Tell new CPU we are migrated */ + p->se.avg.last_update_time = 0; /* We have migrated, no longer consider this task hot */ - se->exec_start = 0; + p->se.exec_start = 0; } #endif /* CONFIG_SMP */ @@ -5966,36 +5774,6 @@ static void attach_tasks(struct lb_env *env) } #ifdef CONFIG_FAIR_GROUP_SCHED -/* - * update tg->load_weight by folding this cpu's load_avg - */ -static void __update_blocked_averages_cpu(struct task_group *tg, int cpu) -{ - struct sched_entity *se = tg->se[cpu]; - struct cfs_rq *cfs_rq = tg->cfs_rq[cpu]; - - /* throttled entities do not contribute to load */ - if (throttled_hierarchy(cfs_rq)) - return; - - update_cfs_rq_blocked_load(cfs_rq, 1); - - if (se) { - update_entity_load_avg(se, 1); - /* - * We pivot on our runnable average having decayed to zero for - * list removal. This generally implies that all our children - * have also been removed (modulo rounding error or bandwidth - * control); however, such cases are rare and we can fix these - * at enqueue. - * - * TODO: fix up out-of-order children on enqueue. - */ - if (!se->avg.runnable_avg_sum && !cfs_rq->nr_running) - list_del_leaf_cfs_rq(cfs_rq); - } -} - static void update_blocked_averages(int cpu) { struct rq *rq = cpu_rq(cpu); @@ -6004,19 +5782,19 @@ static void update_blocked_averages(int cpu) raw_spin_lock_irqsave(&rq->lock, flags); update_rq_clock(rq); + /* * Iterates the task_group tree in a bottom up fashion, see * list_add_leaf_cfs_rq() for details. */ for_each_leaf_cfs_rq(rq, cfs_rq) { - /* - * Note: We may want to consider periodically releasing - * rq->lock about these updates so that creating many task - * groups does not result in continually extending hold time. - */ - __update_blocked_averages_cpu(cfs_rq->tg, rq->cpu); - } + /* throttled entities do not contribute to load */ + if (throttled_hierarchy(cfs_rq)) + continue; + if (update_cfs_rq_load_avg(cfs_rq_clock_task(cfs_rq), cfs_rq)) + update_tg_load_avg(cfs_rq, 0); + } raw_spin_unlock_irqrestore(&rq->lock, flags); } @@ -6044,14 +5822,13 @@ static void update_cfs_rq_h_load(struct cfs_rq *cfs_rq) } if (!se) { - cfs_rq->h_load = cfs_rq->runnable_load_avg; + cfs_rq->h_load = cfs_rq->avg.load_avg; cfs_rq->last_h_load_update = now; } while ((se = cfs_rq->h_load_next) != NULL) { load = cfs_rq->h_load; - load = div64_ul(load * se->avg.load_avg_contrib, - cfs_rq->runnable_load_avg + 1); + load = div64_ul(load * se->avg.load_avg, cfs_rq->avg.load_avg + 1); cfs_rq = group_cfs_rq(se); cfs_rq->h_load = load; cfs_rq->last_h_load_update = now; @@ -6063,8 +5840,8 @@ static unsigned long task_h_load(struct task_struct *p) struct cfs_rq *cfs_rq = task_cfs_rq(p); update_cfs_rq_h_load(cfs_rq); - return div64_ul(p->se.avg.load_avg_contrib * cfs_rq->h_load, - cfs_rq->runnable_load_avg + 1); + return div64_ul(p->se.avg.load_avg * cfs_rq->h_load, + cfs_rq->avg.load_avg + 1); } #else static inline void update_blocked_averages(int cpu) @@ -6073,7 +5850,7 @@ static inline void update_blocked_averages(int cpu) static unsigned long task_h_load(struct task_struct *p) { - return p->se.avg.load_avg_contrib; + return p->se.avg.load_avg; } #endif @@ -8071,15 +7848,18 @@ static void switched_from_fair(struct rq *rq, struct task_struct *p) } #ifdef CONFIG_SMP - /* - * Remove our load from contribution when we leave sched_fair - * and ensure we don't carry in an old decay_count if we - * switch back. - */ - if (se->avg.decay_count) { - __synchronize_entity_decay(se); - subtract_blocked_load_contrib(cfs_rq, se->avg.load_avg_contrib); - } + /* Catch up with the cfs_rq and remove our load when we leave */ + __update_load_avg(cfs_rq->avg.last_update_time, cpu_of(rq), &se->avg, + se->on_rq * scale_load_down(se->load.weight), cfs_rq->curr == se); + + cfs_rq->avg.load_avg = + max_t(long, cfs_rq->avg.load_avg - se->avg.load_avg, 0); + cfs_rq->avg.load_sum = + max_t(s64, cfs_rq->avg.load_sum - se->avg.load_sum, 0); + cfs_rq->avg.util_avg = + max_t(long, cfs_rq->avg.util_avg - se->avg.util_avg, 0); + cfs_rq->avg.util_sum = + max_t(s32, cfs_rq->avg.util_sum - se->avg.util_sum, 0); #endif } @@ -8136,8 +7916,8 @@ void init_cfs_rq(struct cfs_rq *cfs_rq) cfs_rq->min_vruntime_copy = cfs_rq->min_vruntime; #endif #ifdef CONFIG_SMP - atomic64_set(&cfs_rq->decay_counter, 1); - atomic_long_set(&cfs_rq->removed_load, 0); + atomic_long_set(&cfs_rq->removed_load_avg, 0); + atomic_long_set(&cfs_rq->removed_util_avg, 0); #endif } @@ -8182,14 +7962,14 @@ static void task_move_group_fair(struct task_struct *p, int queued) if (!queued) { cfs_rq = cfs_rq_of(se); se->vruntime += cfs_rq->min_vruntime; + #ifdef CONFIG_SMP - /* - * migrate_task_rq_fair() will have removed our previous - * contribution, but we must synchronize for ongoing future - * decay. - */ - se->avg.decay_count = atomic64_read(&cfs_rq->decay_counter); - cfs_rq->blocked_load_avg += se->avg.load_avg_contrib; + /* Virtually synchronize task with its new cfs_rq */ + p->se.avg.last_update_time = cfs_rq->avg.last_update_time; + cfs_rq->avg.load_avg += p->se.avg.load_avg; + cfs_rq->avg.load_sum += p->se.avg.load_sum; + cfs_rq->avg.util_avg += p->se.avg.util_avg; + cfs_rq->avg.util_sum += p->se.avg.util_sum; #endif } } diff --git a/kernel/sched/sched.h b/kernel/sched/sched.h index e13210cce7e8..dcde941a585b 100644 --- a/kernel/sched/sched.h +++ b/kernel/sched/sched.h @@ -245,7 +245,6 @@ struct task_group { #ifdef CONFIG_SMP atomic_long_t load_avg; - atomic_t runnable_avg; #endif #endif @@ -366,27 +365,18 @@ struct cfs_rq { #ifdef CONFIG_SMP /* - * CFS Load tracking - * Under CFS, load is tracked on a per-entity basis and aggregated up. - * This allows for the description of both thread and group usage (in - * the FAIR_GROUP_SCHED case). - * runnable_load_avg is the sum of the load_avg_contrib of the - * sched_entities on the rq. - * blocked_load_avg is similar to runnable_load_avg except that its - * the blocked sched_entities on the rq. - * utilization_load_avg is the sum of the average running time of the - * sched_entities on the rq. + * CFS load tracking */ - unsigned long runnable_load_avg, blocked_load_avg, utilization_load_avg; - atomic64_t decay_counter; - u64 last_decay; - atomic_long_t removed_load; - + struct sched_avg avg; #ifdef CONFIG_FAIR_GROUP_SCHED - /* Required to track per-cpu representation of a task_group */ - u32 tg_runnable_contrib; - unsigned long tg_load_contrib; + unsigned long tg_load_avg_contrib; +#endif + atomic_long_t removed_load_avg, removed_util_avg; +#ifndef CONFIG_64BIT + u64 load_last_update_time_copy; +#endif +#ifdef CONFIG_FAIR_GROUP_SCHED /* * h_load = weight * f(tg) * -- cgit v1.2.3-70-g09d2 From bff60792f994a87324ab57e89e945b4572b1ef77 Mon Sep 17 00:00:00 2001 From: Mark Rutland Date: Fri, 31 Jul 2015 15:46:16 +0100 Subject: arm64: psci: factor invocation code to drivers To enable sharing with arm, move the core PSCI framework code to drivers/firmware. This results in a minor gain in lines of code, but this will quickly be amortised by the removal of code currently duplicated in arch/arm. Signed-off-by: Mark Rutland Acked-by: Catalin Marinas Reviewed-by: Hanjun Guo Tested-by: Hanjun Guo Cc: Lorenzo Pieralisi Cc: Will Deacon Signed-off-by: Will Deacon --- arch/arm64/Kconfig | 1 + arch/arm64/include/asm/acpi.h | 4 +- arch/arm64/include/asm/psci.h | 28 ---- arch/arm64/kernel/psci.c | 361 +---------------------------------------- arch/arm64/kernel/setup.c | 2 +- drivers/firmware/Kconfig | 3 + drivers/firmware/Makefile | 1 + drivers/firmware/psci.c | 369 ++++++++++++++++++++++++++++++++++++++++++ include/linux/psci.h | 52 ++++++ 9 files changed, 431 insertions(+), 390 deletions(-) delete mode 100644 arch/arm64/include/asm/psci.h create mode 100644 drivers/firmware/psci.c create mode 100644 include/linux/psci.h (limited to 'include/linux') diff --git a/arch/arm64/Kconfig b/arch/arm64/Kconfig index 318175f62c24..7c55a632c08b 100644 --- a/arch/arm64/Kconfig +++ b/arch/arm64/Kconfig @@ -20,6 +20,7 @@ config ARM64 select ARM_GIC_V2M if PCI_MSI select ARM_GIC_V3 select ARM_GIC_V3_ITS if PCI_MSI + select ARM_PSCI_FW select BUILDTIME_EXTABLE_SORT select CLONE_BACKWARDS select COMMON_CLK diff --git a/arch/arm64/include/asm/acpi.h b/arch/arm64/include/asm/acpi.h index 406485ed110a..208cec08a74f 100644 --- a/arch/arm64/include/asm/acpi.h +++ b/arch/arm64/include/asm/acpi.h @@ -12,11 +12,11 @@ #ifndef _ASM_ACPI_H #define _ASM_ACPI_H -#include #include +#include +#include #include -#include #include /* Macros for consistency checks of the GICC subtable of MADT */ diff --git a/arch/arm64/include/asm/psci.h b/arch/arm64/include/asm/psci.h deleted file mode 100644 index 49d7e1aaebdc..000000000000 --- a/arch/arm64/include/asm/psci.h +++ /dev/null @@ -1,28 +0,0 @@ -/* - * This program is free software; you can redistribute it and/or modify - * it under the terms of the GNU General Public License version 2 as - * published by the Free Software Foundation. - * - * This program is distributed in the hope that it will be useful, - * but WITHOUT ANY WARRANTY; without even the implied warranty of - * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the - * GNU General Public License for more details. - * - * Copyright (C) 2013 ARM Limited - */ - -#ifndef __ASM_PSCI_H -#define __ASM_PSCI_H - -int __init psci_dt_init(void); - -#ifdef CONFIG_ACPI -int __init psci_acpi_init(void); -bool __init acpi_psci_present(void); -bool __init acpi_psci_use_hvc(void); -#else -static inline int psci_acpi_init(void) { return 0; } -static inline bool acpi_psci_present(void) { return false; } -#endif - -#endif /* __ASM_PSCI_H */ diff --git a/arch/arm64/kernel/psci.c b/arch/arm64/kernel/psci.c index 869f202748e8..51fd15a16461 100644 --- a/arch/arm64/kernel/psci.c +++ b/arch/arm64/kernel/psci.c @@ -18,23 +18,17 @@ #include #include #include -#include -#include #include +#include #include + #include #include -#include #include #include -#include #include #include -#include - -#define PSCI_POWER_STATE_TYPE_STANDBY 0 -#define PSCI_POWER_STATE_TYPE_POWER_DOWN 1 static bool psci_power_state_loses_context(u32 state) { @@ -50,122 +44,8 @@ static bool psci_power_state_is_valid(u32 state) return !(state & ~valid_mask); } -/* - * The CPU any Trusted OS is resident on. The trusted OS may reject CPU_OFF - * calls to its resident CPU, so we must avoid issuing those. We never migrate - * a Trusted OS even if it claims to be capable of migration -- doing so will - * require cooperation with a Trusted OS driver. - */ -static int resident_cpu = -1; - -struct psci_operations { - int (*cpu_suspend)(u32 state, unsigned long entry_point); - int (*cpu_off)(u32 state); - int (*cpu_on)(unsigned long cpuid, unsigned long entry_point); - int (*migrate)(unsigned long cpuid); - int (*affinity_info)(unsigned long target_affinity, - unsigned long lowest_affinity_level); - int (*migrate_info_type)(void); -}; - -static struct psci_operations psci_ops; - -typedef unsigned long (psci_fn)(unsigned long, unsigned long, - unsigned long, unsigned long); -asmlinkage psci_fn __invoke_psci_fn_hvc; -asmlinkage psci_fn __invoke_psci_fn_smc; -static psci_fn *invoke_psci_fn; - -enum psci_function { - PSCI_FN_CPU_SUSPEND, - PSCI_FN_CPU_ON, - PSCI_FN_CPU_OFF, - PSCI_FN_MIGRATE, - PSCI_FN_MAX, -}; - static DEFINE_PER_CPU_READ_MOSTLY(u32 *, psci_power_state); -static u32 psci_function_id[PSCI_FN_MAX]; - -static int psci_to_linux_errno(int errno) -{ - switch (errno) { - case PSCI_RET_SUCCESS: - return 0; - case PSCI_RET_NOT_SUPPORTED: - return -EOPNOTSUPP; - case PSCI_RET_INVALID_PARAMS: - return -EINVAL; - case PSCI_RET_DENIED: - return -EPERM; - }; - - return -EINVAL; -} - -static u32 psci_get_version(void) -{ - return invoke_psci_fn(PSCI_0_2_FN_PSCI_VERSION, 0, 0, 0); -} - -static int psci_cpu_suspend(u32 state, unsigned long entry_point) -{ - int err; - u32 fn; - - fn = psci_function_id[PSCI_FN_CPU_SUSPEND]; - err = invoke_psci_fn(fn, state, entry_point, 0); - return psci_to_linux_errno(err); -} - -static int psci_cpu_off(u32 state) -{ - int err; - u32 fn; - - fn = psci_function_id[PSCI_FN_CPU_OFF]; - err = invoke_psci_fn(fn, state, 0, 0); - return psci_to_linux_errno(err); -} - -static int psci_cpu_on(unsigned long cpuid, unsigned long entry_point) -{ - int err; - u32 fn; - - fn = psci_function_id[PSCI_FN_CPU_ON]; - err = invoke_psci_fn(fn, cpuid, entry_point, 0); - return psci_to_linux_errno(err); -} - -static int psci_migrate(unsigned long cpuid) -{ - int err; - u32 fn; - - fn = psci_function_id[PSCI_FN_MIGRATE]; - err = invoke_psci_fn(fn, cpuid, 0, 0); - return psci_to_linux_errno(err); -} - -static int psci_affinity_info(unsigned long target_affinity, - unsigned long lowest_affinity_level) -{ - return invoke_psci_fn(PSCI_0_2_FN64_AFFINITY_INFO, target_affinity, - lowest_affinity_level, 0); -} - -static int psci_migrate_info_type(void) -{ - return invoke_psci_fn(PSCI_0_2_FN_MIGRATE_INFO_TYPE, 0, 0, 0); -} - -static unsigned long psci_migrate_info_up_cpu(void) -{ - return invoke_psci_fn(PSCI_0_2_FN64_MIGRATE_INFO_UP_CPU, 0, 0, 0); -} - static int __maybe_unused cpu_psci_cpu_init_idle(unsigned int cpu) { int i, ret, count = 0; @@ -230,238 +110,6 @@ free_mem: return ret; } -static int get_set_conduit_method(struct device_node *np) -{ - const char *method; - - pr_info("probing for conduit method from DT.\n"); - - if (of_property_read_string(np, "method", &method)) { - pr_warn("missing \"method\" property\n"); - return -ENXIO; - } - - if (!strcmp("hvc", method)) { - invoke_psci_fn = __invoke_psci_fn_hvc; - } else if (!strcmp("smc", method)) { - invoke_psci_fn = __invoke_psci_fn_smc; - } else { - pr_warn("invalid \"method\" property: %s\n", method); - return -EINVAL; - } - return 0; -} - -static void psci_sys_reset(enum reboot_mode reboot_mode, const char *cmd) -{ - invoke_psci_fn(PSCI_0_2_FN_SYSTEM_RESET, 0, 0, 0); -} - -static void psci_sys_poweroff(void) -{ - invoke_psci_fn(PSCI_0_2_FN_SYSTEM_OFF, 0, 0, 0); -} - -/* - * Detect the presence of a resident Trusted OS which may cause CPU_OFF to - * return DENIED (which would be fatal). - */ -static void __init psci_init_migrate(void) -{ - unsigned long cpuid; - int type, cpu; - - type = psci_ops.migrate_info_type(); - - if (type == PSCI_0_2_TOS_MP) { - pr_info("Trusted OS migration not required\n"); - return; - } - - if (type == PSCI_RET_NOT_SUPPORTED) { - pr_info("MIGRATE_INFO_TYPE not supported.\n"); - return; - } - - if (type != PSCI_0_2_TOS_UP_MIGRATE && - type != PSCI_0_2_TOS_UP_NO_MIGRATE) { - pr_err("MIGRATE_INFO_TYPE returned unknown type (%d)\n", type); - return; - } - - cpuid = psci_migrate_info_up_cpu(); - if (cpuid & ~MPIDR_HWID_BITMASK) { - pr_warn("MIGRATE_INFO_UP_CPU reported invalid physical ID (0x%lx)\n", - cpuid); - return; - } - - cpu = get_logical_index(cpuid); - resident_cpu = cpu >= 0 ? cpu : -1; - - pr_info("Trusted OS resident on physical CPU 0x%lx\n", cpuid); -} - -static void __init psci_0_2_set_functions(void) -{ - pr_info("Using standard PSCI v0.2 function IDs\n"); - psci_function_id[PSCI_FN_CPU_SUSPEND] = PSCI_0_2_FN64_CPU_SUSPEND; - psci_ops.cpu_suspend = psci_cpu_suspend; - - psci_function_id[PSCI_FN_CPU_OFF] = PSCI_0_2_FN_CPU_OFF; - psci_ops.cpu_off = psci_cpu_off; - - psci_function_id[PSCI_FN_CPU_ON] = PSCI_0_2_FN64_CPU_ON; - psci_ops.cpu_on = psci_cpu_on; - - psci_function_id[PSCI_FN_MIGRATE] = PSCI_0_2_FN64_MIGRATE; - psci_ops.migrate = psci_migrate; - - psci_ops.affinity_info = psci_affinity_info; - - psci_ops.migrate_info_type = psci_migrate_info_type; - - arm_pm_restart = psci_sys_reset; - - pm_power_off = psci_sys_poweroff; -} - -/* - * Probe function for PSCI firmware versions >= 0.2 - */ -static int __init psci_probe(void) -{ - u32 ver = psci_get_version(); - - pr_info("PSCIv%d.%d detected in firmware.\n", - PSCI_VERSION_MAJOR(ver), - PSCI_VERSION_MINOR(ver)); - - if (PSCI_VERSION_MAJOR(ver) == 0 && PSCI_VERSION_MINOR(ver) < 2) { - pr_err("Conflicting PSCI version detected.\n"); - return -EINVAL; - } - - psci_0_2_set_functions(); - - psci_init_migrate(); - - return 0; -} - -typedef int (*psci_initcall_t)(const struct device_node *); - -/* - * PSCI init function for PSCI versions >=0.2 - * - * Probe based on PSCI PSCI_VERSION function - */ -static int __init psci_0_2_init(struct device_node *np) -{ - int err; - - err = get_set_conduit_method(np); - - if (err) - goto out_put_node; - /* - * Starting with v0.2, the PSCI specification introduced a call - * (PSCI_VERSION) that allows probing the firmware version, so - * that PSCI function IDs and version specific initialization - * can be carried out according to the specific version reported - * by firmware - */ - err = psci_probe(); - -out_put_node: - of_node_put(np); - return err; -} - -/* - * PSCI < v0.2 get PSCI Function IDs via DT. - */ -static int __init psci_0_1_init(struct device_node *np) -{ - u32 id; - int err; - - err = get_set_conduit_method(np); - - if (err) - goto out_put_node; - - pr_info("Using PSCI v0.1 Function IDs from DT\n"); - - if (!of_property_read_u32(np, "cpu_suspend", &id)) { - psci_function_id[PSCI_FN_CPU_SUSPEND] = id; - psci_ops.cpu_suspend = psci_cpu_suspend; - } - - if (!of_property_read_u32(np, "cpu_off", &id)) { - psci_function_id[PSCI_FN_CPU_OFF] = id; - psci_ops.cpu_off = psci_cpu_off; - } - - if (!of_property_read_u32(np, "cpu_on", &id)) { - psci_function_id[PSCI_FN_CPU_ON] = id; - psci_ops.cpu_on = psci_cpu_on; - } - - if (!of_property_read_u32(np, "migrate", &id)) { - psci_function_id[PSCI_FN_MIGRATE] = id; - psci_ops.migrate = psci_migrate; - } - -out_put_node: - of_node_put(np); - return err; -} - -static const struct of_device_id psci_of_match[] __initconst = { - { .compatible = "arm,psci", .data = psci_0_1_init}, - { .compatible = "arm,psci-0.2", .data = psci_0_2_init}, - {}, -}; - -int __init psci_dt_init(void) -{ - struct device_node *np; - const struct of_device_id *matched_np; - psci_initcall_t init_fn; - - np = of_find_matching_node_and_match(NULL, psci_of_match, &matched_np); - - if (!np) - return -ENODEV; - - init_fn = (psci_initcall_t)matched_np->data; - return init_fn(np); -} - -#ifdef CONFIG_ACPI -/* - * We use PSCI 0.2+ when ACPI is deployed on ARM64 and it's - * explicitly clarified in SBBR - */ -int __init psci_acpi_init(void) -{ - if (!acpi_psci_present()) { - pr_info("is not implemented in ACPI.\n"); - return -EOPNOTSUPP; - } - - pr_info("probing for conduit method from ACPI.\n"); - - if (acpi_psci_use_hvc()) - invoke_psci_fn = __invoke_psci_fn_hvc; - else - invoke_psci_fn = __invoke_psci_fn_smc; - - return psci_probe(); -} -#endif - #ifdef CONFIG_SMP static int __init cpu_psci_cpu_init(unsigned int cpu) @@ -489,11 +137,6 @@ static int cpu_psci_cpu_boot(unsigned int cpu) } #ifdef CONFIG_HOTPLUG_CPU -static bool psci_tos_resident_on(int cpu) -{ - return cpu == resident_cpu; -} - static int cpu_psci_cpu_disable(unsigned int cpu) { /* Fail early if we don't have CPU_OFF support */ diff --git a/arch/arm64/kernel/setup.c b/arch/arm64/kernel/setup.c index f3067d4d4e35..96ce26428f82 100644 --- a/arch/arm64/kernel/setup.c +++ b/arch/arm64/kernel/setup.c @@ -46,6 +46,7 @@ #include #include #include +#include #include #include @@ -61,7 +62,6 @@ #include #include #include -#include #include #include #include diff --git a/drivers/firmware/Kconfig b/drivers/firmware/Kconfig index 99c69a3205c4..d8de6a8dd4de 100644 --- a/drivers/firmware/Kconfig +++ b/drivers/firmware/Kconfig @@ -5,6 +5,9 @@ menu "Firmware Drivers" +config ARM_PSCI_FW + bool + config EDD tristate "BIOS Enhanced Disk Drive calls determine boot disk" depends on X86 diff --git a/drivers/firmware/Makefile b/drivers/firmware/Makefile index 4a4b897f9314..000830fc6707 100644 --- a/drivers/firmware/Makefile +++ b/drivers/firmware/Makefile @@ -1,6 +1,7 @@ # # Makefile for the linux kernel. # +obj-$(CONFIG_ARM_PSCI_FW) += psci.o obj-$(CONFIG_DMI) += dmi_scan.o obj-$(CONFIG_DMI_SYSFS) += dmi-sysfs.o obj-$(CONFIG_EDD) += edd.o diff --git a/drivers/firmware/psci.c b/drivers/firmware/psci.c new file mode 100644 index 000000000000..36e2cea3809b --- /dev/null +++ b/drivers/firmware/psci.c @@ -0,0 +1,369 @@ +/* + * This program is free software; you can redistribute it and/or modify + * it under the terms of the GNU General Public License version 2 as + * published by the Free Software Foundation. + * + * This program is distributed in the hope that it will be useful, + * but WITHOUT ANY WARRANTY; without even the implied warranty of + * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the + * GNU General Public License for more details. + * + * Copyright (C) 2015 ARM Limited + */ + +#define pr_fmt(fmt) "psci: " fmt + +#include +#include +#include +#include +#include +#include +#include + +#include + +#include +#include +#include + +/* + * The CPU any Trusted OS is resident on. The trusted OS may reject CPU_OFF + * calls to its resident CPU, so we must avoid issuing those. We never migrate + * a Trusted OS even if it claims to be capable of migration -- doing so will + * require cooperation with a Trusted OS driver. + */ +static int resident_cpu = -1; + +bool psci_tos_resident_on(int cpu) +{ + return cpu == resident_cpu; +} + +struct psci_operations psci_ops; + +typedef unsigned long (psci_fn)(unsigned long, unsigned long, + unsigned long, unsigned long); +asmlinkage psci_fn __invoke_psci_fn_hvc; +asmlinkage psci_fn __invoke_psci_fn_smc; +static psci_fn *invoke_psci_fn; + +enum psci_function { + PSCI_FN_CPU_SUSPEND, + PSCI_FN_CPU_ON, + PSCI_FN_CPU_OFF, + PSCI_FN_MIGRATE, + PSCI_FN_MAX, +}; + +static u32 psci_function_id[PSCI_FN_MAX]; + +static int psci_to_linux_errno(int errno) +{ + switch (errno) { + case PSCI_RET_SUCCESS: + return 0; + case PSCI_RET_NOT_SUPPORTED: + return -EOPNOTSUPP; + case PSCI_RET_INVALID_PARAMS: + return -EINVAL; + case PSCI_RET_DENIED: + return -EPERM; + }; + + return -EINVAL; +} + +static u32 psci_get_version(void) +{ + return invoke_psci_fn(PSCI_0_2_FN_PSCI_VERSION, 0, 0, 0); +} + +static int psci_cpu_suspend(u32 state, unsigned long entry_point) +{ + int err; + u32 fn; + + fn = psci_function_id[PSCI_FN_CPU_SUSPEND]; + err = invoke_psci_fn(fn, state, entry_point, 0); + return psci_to_linux_errno(err); +} + +static int psci_cpu_off(u32 state) +{ + int err; + u32 fn; + + fn = psci_function_id[PSCI_FN_CPU_OFF]; + err = invoke_psci_fn(fn, state, 0, 0); + return psci_to_linux_errno(err); +} + +static int psci_cpu_on(unsigned long cpuid, unsigned long entry_point) +{ + int err; + u32 fn; + + fn = psci_function_id[PSCI_FN_CPU_ON]; + err = invoke_psci_fn(fn, cpuid, entry_point, 0); + return psci_to_linux_errno(err); +} + +static int psci_migrate(unsigned long cpuid) +{ + int err; + u32 fn; + + fn = psci_function_id[PSCI_FN_MIGRATE]; + err = invoke_psci_fn(fn, cpuid, 0, 0); + return psci_to_linux_errno(err); +} + +static int psci_affinity_info(unsigned long target_affinity, + unsigned long lowest_affinity_level) +{ + return invoke_psci_fn(PSCI_0_2_FN64_AFFINITY_INFO, target_affinity, + lowest_affinity_level, 0); +} + +static int psci_migrate_info_type(void) +{ + return invoke_psci_fn(PSCI_0_2_FN_MIGRATE_INFO_TYPE, 0, 0, 0); +} + +static unsigned long psci_migrate_info_up_cpu(void) +{ + return invoke_psci_fn(PSCI_0_2_FN64_MIGRATE_INFO_UP_CPU, 0, 0, 0); +} + +static int get_set_conduit_method(struct device_node *np) +{ + const char *method; + + pr_info("probing for conduit method from DT.\n"); + + if (of_property_read_string(np, "method", &method)) { + pr_warn("missing \"method\" property\n"); + return -ENXIO; + } + + if (!strcmp("hvc", method)) { + invoke_psci_fn = __invoke_psci_fn_hvc; + } else if (!strcmp("smc", method)) { + invoke_psci_fn = __invoke_psci_fn_smc; + } else { + pr_warn("invalid \"method\" property: %s\n", method); + return -EINVAL; + } + return 0; +} + +static void psci_sys_reset(enum reboot_mode reboot_mode, const char *cmd) +{ + invoke_psci_fn(PSCI_0_2_FN_SYSTEM_RESET, 0, 0, 0); +} + +static void psci_sys_poweroff(void) +{ + invoke_psci_fn(PSCI_0_2_FN_SYSTEM_OFF, 0, 0, 0); +} + +/* + * Detect the presence of a resident Trusted OS which may cause CPU_OFF to + * return DENIED (which would be fatal). + */ +static void __init psci_init_migrate(void) +{ + unsigned long cpuid; + int type, cpu = -1; + + type = psci_ops.migrate_info_type(); + + if (type == PSCI_0_2_TOS_MP) { + pr_info("Trusted OS migration not required\n"); + return; + } + + if (type == PSCI_RET_NOT_SUPPORTED) { + pr_info("MIGRATE_INFO_TYPE not supported.\n"); + return; + } + + if (type != PSCI_0_2_TOS_UP_MIGRATE && + type != PSCI_0_2_TOS_UP_NO_MIGRATE) { + pr_err("MIGRATE_INFO_TYPE returned unknown type (%d)\n", type); + return; + } + + cpuid = psci_migrate_info_up_cpu(); + if (cpuid & ~MPIDR_HWID_BITMASK) { + pr_warn("MIGRATE_INFO_UP_CPU reported invalid physical ID (0x%lx)\n", + cpuid); + return; + } + + cpu = get_logical_index(cpuid); + resident_cpu = cpu >= 0 ? cpu : -1; + + pr_info("Trusted OS resident on physical CPU 0x%lx\n", cpuid); +} + +static void __init psci_0_2_set_functions(void) +{ + pr_info("Using standard PSCI v0.2 function IDs\n"); + psci_function_id[PSCI_FN_CPU_SUSPEND] = PSCI_0_2_FN64_CPU_SUSPEND; + psci_ops.cpu_suspend = psci_cpu_suspend; + + psci_function_id[PSCI_FN_CPU_OFF] = PSCI_0_2_FN_CPU_OFF; + psci_ops.cpu_off = psci_cpu_off; + + psci_function_id[PSCI_FN_CPU_ON] = PSCI_0_2_FN64_CPU_ON; + psci_ops.cpu_on = psci_cpu_on; + + psci_function_id[PSCI_FN_MIGRATE] = PSCI_0_2_FN64_MIGRATE; + psci_ops.migrate = psci_migrate; + + psci_ops.affinity_info = psci_affinity_info; + + psci_ops.migrate_info_type = psci_migrate_info_type; + + arm_pm_restart = psci_sys_reset; + + pm_power_off = psci_sys_poweroff; +} + +/* + * Probe function for PSCI firmware versions >= 0.2 + */ +static int __init psci_probe(void) +{ + u32 ver = psci_get_version(); + + pr_info("PSCIv%d.%d detected in firmware.\n", + PSCI_VERSION_MAJOR(ver), + PSCI_VERSION_MINOR(ver)); + + if (PSCI_VERSION_MAJOR(ver) == 0 && PSCI_VERSION_MINOR(ver) < 2) { + pr_err("Conflicting PSCI version detected.\n"); + return -EINVAL; + } + + psci_0_2_set_functions(); + + psci_init_migrate(); + + return 0; +} + +typedef int (*psci_initcall_t)(const struct device_node *); + +/* + * PSCI init function for PSCI versions >=0.2 + * + * Probe based on PSCI PSCI_VERSION function + */ +static int __init psci_0_2_init(struct device_node *np) +{ + int err; + + err = get_set_conduit_method(np); + + if (err) + goto out_put_node; + /* + * Starting with v0.2, the PSCI specification introduced a call + * (PSCI_VERSION) that allows probing the firmware version, so + * that PSCI function IDs and version specific initialization + * can be carried out according to the specific version reported + * by firmware + */ + err = psci_probe(); + +out_put_node: + of_node_put(np); + return err; +} + +/* + * PSCI < v0.2 get PSCI Function IDs via DT. + */ +static int __init psci_0_1_init(struct device_node *np) +{ + u32 id; + int err; + + err = get_set_conduit_method(np); + + if (err) + goto out_put_node; + + pr_info("Using PSCI v0.1 Function IDs from DT\n"); + + if (!of_property_read_u32(np, "cpu_suspend", &id)) { + psci_function_id[PSCI_FN_CPU_SUSPEND] = id; + psci_ops.cpu_suspend = psci_cpu_suspend; + } + + if (!of_property_read_u32(np, "cpu_off", &id)) { + psci_function_id[PSCI_FN_CPU_OFF] = id; + psci_ops.cpu_off = psci_cpu_off; + } + + if (!of_property_read_u32(np, "cpu_on", &id)) { + psci_function_id[PSCI_FN_CPU_ON] = id; + psci_ops.cpu_on = psci_cpu_on; + } + + if (!of_property_read_u32(np, "migrate", &id)) { + psci_function_id[PSCI_FN_MIGRATE] = id; + psci_ops.migrate = psci_migrate; + } + +out_put_node: + of_node_put(np); + return err; +} + +static const struct of_device_id psci_of_match[] __initconst = { + { .compatible = "arm,psci", .data = psci_0_1_init}, + { .compatible = "arm,psci-0.2", .data = psci_0_2_init}, + {}, +}; + +int __init psci_dt_init(void) +{ + struct device_node *np; + const struct of_device_id *matched_np; + psci_initcall_t init_fn; + + np = of_find_matching_node_and_match(NULL, psci_of_match, &matched_np); + + if (!np) + return -ENODEV; + + init_fn = (psci_initcall_t)matched_np->data; + return init_fn(np); +} + +#ifdef CONFIG_ACPI +/* + * We use PSCI 0.2+ when ACPI is deployed on ARM64 and it's + * explicitly clarified in SBBR + */ +int __init psci_acpi_init(void) +{ + if (!acpi_psci_present()) { + pr_info("is not implemented in ACPI.\n"); + return -EOPNOTSUPP; + } + + pr_info("probing for conduit method from ACPI.\n"); + + if (acpi_psci_use_hvc()) + invoke_psci_fn = __invoke_psci_fn_hvc; + else + invoke_psci_fn = __invoke_psci_fn_smc; + + return psci_probe(); +} +#endif diff --git a/include/linux/psci.h b/include/linux/psci.h new file mode 100644 index 000000000000..a682fcc91c33 --- /dev/null +++ b/include/linux/psci.h @@ -0,0 +1,52 @@ +/* + * This program is free software; you can redistribute it and/or modify + * it under the terms of the GNU General Public License version 2 as + * published by the Free Software Foundation. + * + * This program is distributed in the hope that it will be useful, + * but WITHOUT ANY WARRANTY; without even the implied warranty of + * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the + * GNU General Public License for more details. + * + * Copyright (C) 2015 ARM Limited + */ + +#ifndef __LINUX_PSCI_H +#define __LINUX_PSCI_H + +#include +#include + +#define PSCI_POWER_STATE_TYPE_STANDBY 0 +#define PSCI_POWER_STATE_TYPE_POWER_DOWN 1 + +bool psci_tos_resident_on(int cpu); + +struct psci_operations { + int (*cpu_suspend)(u32 state, unsigned long entry_point); + int (*cpu_off)(u32 state); + int (*cpu_on)(unsigned long cpuid, unsigned long entry_point); + int (*migrate)(unsigned long cpuid); + int (*affinity_info)(unsigned long target_affinity, + unsigned long lowest_affinity_level); + int (*migrate_info_type)(void); +}; + +extern struct psci_operations psci_ops; + +#if defined(CONFIG_ARM_PSCI_FW) +int __init psci_dt_init(void); +#else +static inline int psci_dt_init(void) { return 0; } +#endif + +#if defined(CONFIG_ARM_PSCI_FW) && defined(CONFIG_ACPI) +int __init psci_acpi_init(void); +bool __init acpi_psci_present(void); +bool __init acpi_psci_use_hvc(void); +#else +static inline int psci_acpi_init(void) { return 0; } +static inline bool acpi_psci_present(void) { return false; } +#endif + +#endif /* __LINUX_PSCI_H */ -- cgit v1.2.3-70-g09d2 From 17e8351a77397e8a83727eb17e3a3e9b8ab5257a Mon Sep 17 00:00:00 2001 From: Sascha Hauer Date: Fri, 24 Jul 2015 08:12:54 +0200 Subject: thermal: consistently use int for temperatures MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit The thermal code uses int, long and unsigned long for temperatures in different places. Using an unsigned type limits the thermal framework to positive temperatures without need. Also several drivers currently will report temperatures near UINT_MAX for temperatures below 0°C. This will probably immediately shut the machine down due to overtemperature if started below 0°C. 'long' is 64bit on several architectures. This is not needed since INT_MAX °mC is above the melting point of all known materials. Consistently use a plain 'int' for temperatures throughout the thermal code and the drivers. This only changes the places in the drivers where the temperature is passed around as pointer, when drivers internally use another type this is not changed. Signed-off-by: Sascha Hauer Acked-by: Geert Uytterhoeven Reviewed-by: Jean Delvare Reviewed-by: Lukasz Majewski Reviewed-by: Darren Hart Reviewed-by: Heiko Stuebner Reviewed-by: Peter Feuerer Cc: Punit Agrawal Cc: Zhang Rui Cc: Eduardo Valentin Cc: linux-pm@vger.kernel.org Cc: linux-kernel@vger.kernel.org Cc: Jean Delvare Cc: Peter Feuerer Cc: Heiko Stuebner Cc: Lukasz Majewski Cc: Stephen Warren Cc: Thierry Reding Cc: linux-acpi@vger.kernel.org Cc: platform-driver-x86@vger.kernel.org Cc: linux-arm-kernel@lists.infradead.org Cc: linux-omap@vger.kernel.org Cc: linux-samsung-soc@vger.kernel.org Cc: Guenter Roeck Cc: Rafael J. Wysocki Cc: Maxime Ripard Cc: Darren Hart Cc: lm-sensors@lm-sensors.org Signed-off-by: Zhang Rui --- drivers/acpi/thermal.c | 12 +++++----- drivers/hwmon/lm75.c | 2 +- drivers/hwmon/ntc_thermistor.c | 2 +- drivers/hwmon/tmp102.c | 2 +- drivers/input/touchscreen/sun4i-ts.c | 8 +++---- drivers/platform/x86/acerhdf.c | 9 ++++---- drivers/platform/x86/intel_mid_thermal.c | 9 ++++---- drivers/power/charger-manager.c | 2 +- drivers/power/power_supply_core.c | 2 +- drivers/thermal/armada_thermal.c | 2 +- drivers/thermal/db8500_thermal.c | 7 +++--- drivers/thermal/dove_thermal.c | 2 +- drivers/thermal/fair_share.c | 2 +- drivers/thermal/gov_bang_bang.c | 5 ++-- drivers/thermal/hisi_thermal.c | 4 ++-- drivers/thermal/imx_thermal.c | 27 +++++++++++----------- drivers/thermal/int340x_thermal/int3400_thermal.c | 2 +- .../thermal/int340x_thermal/int340x_thermal_zone.c | 10 ++++---- .../thermal/int340x_thermal/int340x_thermal_zone.h | 8 +++---- .../int340x_thermal/processor_thermal_device.c | 4 ++-- drivers/thermal/intel_quark_dts_thermal.c | 13 +++++------ drivers/thermal/intel_soc_dts_iosf.c | 8 +++---- drivers/thermal/kirkwood_thermal.c | 2 +- drivers/thermal/of-thermal.c | 14 +++++------ drivers/thermal/power_allocator.c | 16 ++++++------- drivers/thermal/qcom-spmi-temp-alarm.c | 2 +- drivers/thermal/rcar_thermal.c | 7 +++--- drivers/thermal/rockchip_thermal.c | 10 ++++---- drivers/thermal/samsung/exynos_tmu.c | 23 +++++++++--------- drivers/thermal/spear_thermal.c | 2 +- drivers/thermal/st/st_thermal.c | 5 ++-- drivers/thermal/step_wise.c | 4 ++-- drivers/thermal/tegra_soctherm.c | 4 ++-- drivers/thermal/thermal_core.c | 26 ++++++++++----------- drivers/thermal/thermal_hwmon.c | 10 ++++---- drivers/thermal/ti-soc-thermal/ti-thermal-common.c | 10 ++++---- drivers/thermal/x86_pkg_temp_thermal.c | 10 ++++---- include/linux/thermal.h | 26 +++++++++------------ include/trace/events/thermal_power_allocator.h | 6 ++--- 39 files changed, 152 insertions(+), 167 deletions(-) (limited to 'include/linux') diff --git a/drivers/acpi/thermal.c b/drivers/acpi/thermal.c index 6d4e44ea74ac..e66ad25d112f 100644 --- a/drivers/acpi/thermal.c +++ b/drivers/acpi/thermal.c @@ -529,8 +529,7 @@ static void acpi_thermal_check(void *data) /* sys I/F for generic thermal sysfs support */ -static int thermal_get_temp(struct thermal_zone_device *thermal, - unsigned long *temp) +static int thermal_get_temp(struct thermal_zone_device *thermal, int *temp) { struct acpi_thermal *tz = thermal->devdata; int result; @@ -637,7 +636,7 @@ static int thermal_get_trip_type(struct thermal_zone_device *thermal, } static int thermal_get_trip_temp(struct thermal_zone_device *thermal, - int trip, unsigned long *temp) + int trip, int *temp) { struct acpi_thermal *tz = thermal->devdata; int i; @@ -690,7 +689,8 @@ static int thermal_get_trip_temp(struct thermal_zone_device *thermal, } static int thermal_get_crit_temp(struct thermal_zone_device *thermal, - unsigned long *temperature) { + int *temperature) +{ struct acpi_thermal *tz = thermal->devdata; if (tz->trips.critical.flags.valid) { @@ -713,8 +713,8 @@ static int thermal_get_trend(struct thermal_zone_device *thermal, return -EINVAL; if (type == THERMAL_TRIP_ACTIVE) { - unsigned long trip_temp; - unsigned long temp = DECI_KELVIN_TO_MILLICELSIUS_WITH_OFFSET( + int trip_temp; + int temp = DECI_KELVIN_TO_MILLICELSIUS_WITH_OFFSET( tz->temperature, tz->kelvin_offset); if (thermal_get_trip_temp(thermal, trip, &trip_temp)) return -EINVAL; diff --git a/drivers/hwmon/lm75.c b/drivers/hwmon/lm75.c index fe41d5ae7cb2..e4e57bbafb10 100644 --- a/drivers/hwmon/lm75.c +++ b/drivers/hwmon/lm75.c @@ -104,7 +104,7 @@ static inline long lm75_reg_to_mc(s16 temp, u8 resolution) /* sysfs attributes for hwmon */ -static int lm75_read_temp(void *dev, long *temp) +static int lm75_read_temp(void *dev, int *temp) { struct lm75_data *data = lm75_update_device(dev); diff --git a/drivers/hwmon/ntc_thermistor.c b/drivers/hwmon/ntc_thermistor.c index dc0b76c5e302..feed30646d91 100644 --- a/drivers/hwmon/ntc_thermistor.c +++ b/drivers/hwmon/ntc_thermistor.c @@ -477,7 +477,7 @@ static int ntc_thermistor_get_ohm(struct ntc_data *data) return -EINVAL; } -static int ntc_read_temp(void *dev, long *temp) +static int ntc_read_temp(void *dev, int *temp) { struct ntc_data *data = dev_get_drvdata(dev); int ohm; diff --git a/drivers/hwmon/tmp102.c b/drivers/hwmon/tmp102.c index 9da2735f1424..65482624ea2c 100644 --- a/drivers/hwmon/tmp102.c +++ b/drivers/hwmon/tmp102.c @@ -98,7 +98,7 @@ static struct tmp102 *tmp102_update_device(struct device *dev) return tmp102; } -static int tmp102_read_temp(void *dev, long *temp) +static int tmp102_read_temp(void *dev, int *temp) { struct tmp102 *tmp102 = tmp102_update_device(dev); diff --git a/drivers/input/touchscreen/sun4i-ts.c b/drivers/input/touchscreen/sun4i-ts.c index c0116994067d..485794376ee5 100644 --- a/drivers/input/touchscreen/sun4i-ts.c +++ b/drivers/input/touchscreen/sun4i-ts.c @@ -191,7 +191,7 @@ static void sun4i_ts_close(struct input_dev *dev) writel(TEMP_IRQ_EN(1), ts->base + TP_INT_FIFOC); } -static int sun4i_get_temp(const struct sun4i_ts_data *ts, long *temp) +static int sun4i_get_temp(const struct sun4i_ts_data *ts, int *temp) { /* No temp_data until the first irq */ if (ts->temp_data == -1) @@ -202,7 +202,7 @@ static int sun4i_get_temp(const struct sun4i_ts_data *ts, long *temp) return 0; } -static int sun4i_get_tz_temp(void *data, long *temp) +static int sun4i_get_tz_temp(void *data, int *temp) { return sun4i_get_temp(data, temp); } @@ -215,14 +215,14 @@ static ssize_t show_temp(struct device *dev, struct device_attribute *devattr, char *buf) { struct sun4i_ts_data *ts = dev_get_drvdata(dev); - long temp; + int temp; int error; error = sun4i_get_temp(ts, &temp); if (error) return error; - return sprintf(buf, "%ld\n", temp); + return sprintf(buf, "%d\n", temp); } static ssize_t show_temp_label(struct device *dev, diff --git a/drivers/platform/x86/acerhdf.c b/drivers/platform/x86/acerhdf.c index 1ef02daddb60..460fa6708bfc 100644 --- a/drivers/platform/x86/acerhdf.c +++ b/drivers/platform/x86/acerhdf.c @@ -346,8 +346,7 @@ static void acerhdf_check_param(struct thermal_zone_device *thermal) * as late as the polling interval is since we can't do that in the respective * accessors of the module parameters. */ -static int acerhdf_get_ec_temp(struct thermal_zone_device *thermal, - unsigned long *t) +static int acerhdf_get_ec_temp(struct thermal_zone_device *thermal, int *t) { int temp, err = 0; @@ -453,7 +452,7 @@ static int acerhdf_get_trip_type(struct thermal_zone_device *thermal, int trip, } static int acerhdf_get_trip_hyst(struct thermal_zone_device *thermal, int trip, - unsigned long *temp) + int *temp) { if (trip != 0) return -EINVAL; @@ -464,7 +463,7 @@ static int acerhdf_get_trip_hyst(struct thermal_zone_device *thermal, int trip, } static int acerhdf_get_trip_temp(struct thermal_zone_device *thermal, int trip, - unsigned long *temp) + int *temp) { if (trip == 0) *temp = fanon; @@ -477,7 +476,7 @@ static int acerhdf_get_trip_temp(struct thermal_zone_device *thermal, int trip, } static int acerhdf_get_crit_temp(struct thermal_zone_device *thermal, - unsigned long *temperature) + int *temperature) { *temperature = ACERHDF_TEMP_CRIT; return 0; diff --git a/drivers/platform/x86/intel_mid_thermal.c b/drivers/platform/x86/intel_mid_thermal.c index 0944e834af8d..9f713b832ba3 100644 --- a/drivers/platform/x86/intel_mid_thermal.c +++ b/drivers/platform/x86/intel_mid_thermal.c @@ -132,7 +132,7 @@ static int is_valid_adc(uint16_t adc_val, uint16_t min, uint16_t max) * to achieve very close approximate temp value with less than * 0.5C error */ -static int adc_to_temp(int direct, uint16_t adc_val, unsigned long *tp) +static int adc_to_temp(int direct, uint16_t adc_val, int *tp) { int temp; @@ -174,14 +174,13 @@ static int adc_to_temp(int direct, uint16_t adc_val, unsigned long *tp) * * Can sleep */ -static int mid_read_temp(struct thermal_zone_device *tzd, unsigned long *temp) +static int mid_read_temp(struct thermal_zone_device *tzd, int *temp) { struct thermal_device_info *td_info = tzd->devdata; uint16_t adc_val, addr; uint8_t data = 0; int ret; - unsigned long curr_temp; - + int curr_temp; addr = td_info->chnl_addr; @@ -453,7 +452,7 @@ static SIMPLE_DEV_PM_OPS(mid_thermal_pm, * * Can sleep */ -static int read_curr_temp(struct thermal_zone_device *tzd, unsigned long *temp) +static int read_curr_temp(struct thermal_zone_device *tzd, int *temp) { WARN_ON(tzd == NULL); return mid_read_temp(tzd, temp); diff --git a/drivers/power/charger-manager.c b/drivers/power/charger-manager.c index 1c202ccbd2a6..907293e6f2a4 100644 --- a/drivers/power/charger-manager.c +++ b/drivers/power/charger-manager.c @@ -619,7 +619,7 @@ static int cm_get_battery_temperature(struct charger_manager *cm, #ifdef CONFIG_THERMAL if (cm->tzd_batt) { - ret = thermal_zone_get_temp(cm->tzd_batt, (unsigned long *)temp); + ret = thermal_zone_get_temp(cm->tzd_batt, temp); if (!ret) /* Calibrate temperature unit */ *temp /= 100; diff --git a/drivers/power/power_supply_core.c b/drivers/power/power_supply_core.c index 869284c2e1e8..456987c88baa 100644 --- a/drivers/power/power_supply_core.c +++ b/drivers/power/power_supply_core.c @@ -557,7 +557,7 @@ EXPORT_SYMBOL_GPL(power_supply_unreg_notifier); #ifdef CONFIG_THERMAL static int power_supply_read_temp(struct thermal_zone_device *tzd, - unsigned long *temp) + int *temp) { struct power_supply *psy; union power_supply_propval val; diff --git a/drivers/thermal/armada_thermal.c b/drivers/thermal/armada_thermal.c index 01255fd65135..26b8d326546a 100644 --- a/drivers/thermal/armada_thermal.c +++ b/drivers/thermal/armada_thermal.c @@ -155,7 +155,7 @@ static bool armada_is_valid(struct armada_thermal_priv *priv) } static int armada_get_temp(struct thermal_zone_device *thermal, - unsigned long *temp) + int *temp) { struct armada_thermal_priv *priv = thermal->devdata; unsigned long reg; diff --git a/drivers/thermal/db8500_thermal.c b/drivers/thermal/db8500_thermal.c index 2fb273c4baa9..652acd8fbe48 100644 --- a/drivers/thermal/db8500_thermal.c +++ b/drivers/thermal/db8500_thermal.c @@ -107,8 +107,7 @@ static int db8500_cdev_unbind(struct thermal_zone_device *thermal, } /* Callback to get current temperature */ -static int db8500_sys_get_temp(struct thermal_zone_device *thermal, - unsigned long *temp) +static int db8500_sys_get_temp(struct thermal_zone_device *thermal, int *temp) { struct db8500_thermal_zone *pzone = thermal->devdata; @@ -180,7 +179,7 @@ static int db8500_sys_get_trip_type(struct thermal_zone_device *thermal, /* Callback to get trip point temperature */ static int db8500_sys_get_trip_temp(struct thermal_zone_device *thermal, - int trip, unsigned long *temp) + int trip, int *temp) { struct db8500_thermal_zone *pzone = thermal->devdata; struct db8500_thsens_platform_data *ptrips = pzone->trip_tab; @@ -195,7 +194,7 @@ static int db8500_sys_get_trip_temp(struct thermal_zone_device *thermal, /* Callback to get critical trip point temperature */ static int db8500_sys_get_crit_temp(struct thermal_zone_device *thermal, - unsigned long *temp) + int *temp) { struct db8500_thermal_zone *pzone = thermal->devdata; struct db8500_thsens_platform_data *ptrips = pzone->trip_tab; diff --git a/drivers/thermal/dove_thermal.c b/drivers/thermal/dove_thermal.c index 09f6e304c274..a0bc9de42553 100644 --- a/drivers/thermal/dove_thermal.c +++ b/drivers/thermal/dove_thermal.c @@ -93,7 +93,7 @@ static int dove_init_sensor(const struct dove_thermal_priv *priv) } static int dove_get_temp(struct thermal_zone_device *thermal, - unsigned long *temp) + int *temp) { unsigned long reg; struct dove_thermal_priv *priv = thermal->devdata; diff --git a/drivers/thermal/fair_share.c b/drivers/thermal/fair_share.c index c2c10bbe24d6..34fe36504a55 100644 --- a/drivers/thermal/fair_share.c +++ b/drivers/thermal/fair_share.c @@ -34,7 +34,7 @@ static int get_trip_level(struct thermal_zone_device *tz) { int count = 0; - unsigned long trip_temp; + int trip_temp; enum thermal_trip_type trip_type; if (tz->trips == 0 || !tz->ops->get_trip_temp) diff --git a/drivers/thermal/gov_bang_bang.c b/drivers/thermal/gov_bang_bang.c index c5dd76b2ee74..70836c5b89bc 100644 --- a/drivers/thermal/gov_bang_bang.c +++ b/drivers/thermal/gov_bang_bang.c @@ -25,14 +25,13 @@ static void thermal_zone_trip_update(struct thermal_zone_device *tz, int trip) { - long trip_temp; - unsigned long trip_hyst; + int trip_temp, trip_hyst; struct thermal_instance *instance; tz->ops->get_trip_temp(tz, trip, &trip_temp); tz->ops->get_trip_hyst(tz, trip, &trip_hyst); - dev_dbg(&tz->device, "Trip%d[temp=%ld]:temp=%d:hyst=%ld\n", + dev_dbg(&tz->device, "Trip%d[temp=%d]:temp=%d:hyst=%d\n", trip, trip_temp, tz->temperature, trip_hyst); diff --git a/drivers/thermal/hisi_thermal.c b/drivers/thermal/hisi_thermal.c index d5dd357ba57c..49aa068d1603 100644 --- a/drivers/thermal/hisi_thermal.c +++ b/drivers/thermal/hisi_thermal.c @@ -155,7 +155,7 @@ static void hisi_thermal_disable_sensor(struct hisi_thermal_data *data) mutex_unlock(&data->thermal_lock); } -static int hisi_thermal_get_temp(void *_sensor, long *temp) +static int hisi_thermal_get_temp(void *_sensor, int *temp) { struct hisi_thermal_sensor *sensor = _sensor; struct hisi_thermal_data *data = sensor->thermal; @@ -178,7 +178,7 @@ static int hisi_thermal_get_temp(void *_sensor, long *temp) data->irq_bind_sensor = sensor_id; mutex_unlock(&data->thermal_lock); - dev_dbg(&data->pdev->dev, "id=%d, irq=%d, temp=%ld, thres=%d\n", + dev_dbg(&data->pdev->dev, "id=%d, irq=%d, temp=%d, thres=%d\n", sensor->id, data->irq_enabled, *temp, sensor->thres_temp); /* * Bind irq to sensor for two cases: diff --git a/drivers/thermal/imx_thermal.c b/drivers/thermal/imx_thermal.c index fde4c2876d14..4bec1d3c3d27 100644 --- a/drivers/thermal/imx_thermal.c +++ b/drivers/thermal/imx_thermal.c @@ -98,10 +98,10 @@ struct imx_thermal_data { enum thermal_device_mode mode; struct regmap *tempmon; u32 c1, c2; /* See formula in imx_get_sensor_data() */ - unsigned long temp_passive; - unsigned long temp_critical; - unsigned long alarm_temp; - unsigned long last_temp; + int temp_passive; + int temp_critical; + int alarm_temp; + int last_temp; bool irq_enabled; int irq; struct clk *thermal_clk; @@ -109,7 +109,7 @@ struct imx_thermal_data { }; static void imx_set_panic_temp(struct imx_thermal_data *data, - signed long panic_temp) + int panic_temp) { struct regmap *map = data->tempmon; int critical_value; @@ -121,7 +121,7 @@ static void imx_set_panic_temp(struct imx_thermal_data *data, } static void imx_set_alarm_temp(struct imx_thermal_data *data, - signed long alarm_temp) + int alarm_temp) { struct regmap *map = data->tempmon; int alarm_value; @@ -133,7 +133,7 @@ static void imx_set_alarm_temp(struct imx_thermal_data *data, TEMPSENSE0_ALARM_VALUE_SHIFT); } -static int imx_get_temp(struct thermal_zone_device *tz, unsigned long *temp) +static int imx_get_temp(struct thermal_zone_device *tz, int *temp) { struct imx_thermal_data *data = tz->devdata; struct regmap *map = data->tempmon; @@ -189,13 +189,13 @@ static int imx_get_temp(struct thermal_zone_device *tz, unsigned long *temp) if (data->alarm_temp == data->temp_critical && *temp < data->temp_passive) { imx_set_alarm_temp(data, data->temp_passive); - dev_dbg(&tz->device, "thermal alarm off: T < %lu\n", + dev_dbg(&tz->device, "thermal alarm off: T < %d\n", data->alarm_temp / 1000); } } if (*temp != data->last_temp) { - dev_dbg(&tz->device, "millicelsius: %ld\n", *temp); + dev_dbg(&tz->device, "millicelsius: %d\n", *temp); data->last_temp = *temp; } @@ -262,8 +262,7 @@ static int imx_get_trip_type(struct thermal_zone_device *tz, int trip, return 0; } -static int imx_get_crit_temp(struct thermal_zone_device *tz, - unsigned long *temp) +static int imx_get_crit_temp(struct thermal_zone_device *tz, int *temp) { struct imx_thermal_data *data = tz->devdata; @@ -272,7 +271,7 @@ static int imx_get_crit_temp(struct thermal_zone_device *tz, } static int imx_get_trip_temp(struct thermal_zone_device *tz, int trip, - unsigned long *temp) + int *temp) { struct imx_thermal_data *data = tz->devdata; @@ -282,7 +281,7 @@ static int imx_get_trip_temp(struct thermal_zone_device *tz, int trip, } static int imx_set_trip_temp(struct thermal_zone_device *tz, int trip, - unsigned long temp) + int temp) { struct imx_thermal_data *data = tz->devdata; @@ -434,7 +433,7 @@ static irqreturn_t imx_thermal_alarm_irq_thread(int irq, void *dev) { struct imx_thermal_data *data = dev; - dev_dbg(&data->tz->device, "THERMAL ALARM: T > %lu\n", + dev_dbg(&data->tz->device, "THERMAL ALARM: T > %d\n", data->alarm_temp / 1000); thermal_zone_device_update(data->tz); diff --git a/drivers/thermal/int340x_thermal/int3400_thermal.c b/drivers/thermal/int340x_thermal/int3400_thermal.c index 031018e7a65b..5836e5554433 100644 --- a/drivers/thermal/int340x_thermal/int3400_thermal.c +++ b/drivers/thermal/int340x_thermal/int3400_thermal.c @@ -186,7 +186,7 @@ static int int3400_thermal_run_osc(acpi_handle handle, } static int int3400_thermal_get_temp(struct thermal_zone_device *thermal, - unsigned long *temp) + int *temp) { *temp = 20 * 1000; /* faked temp sensor with 20C */ return 0; diff --git a/drivers/thermal/int340x_thermal/int340x_thermal_zone.c b/drivers/thermal/int340x_thermal/int340x_thermal_zone.c index 1e25133d35e2..b9b2666aa94c 100644 --- a/drivers/thermal/int340x_thermal/int340x_thermal_zone.c +++ b/drivers/thermal/int340x_thermal/int340x_thermal_zone.c @@ -20,7 +20,7 @@ #include "int340x_thermal_zone.h" static int int340x_thermal_get_zone_temp(struct thermal_zone_device *zone, - unsigned long *temp) + int *temp) { struct int34x_thermal_zone *d = zone->devdata; unsigned long long tmp; @@ -49,7 +49,7 @@ static int int340x_thermal_get_zone_temp(struct thermal_zone_device *zone, } static int int340x_thermal_get_trip_temp(struct thermal_zone_device *zone, - int trip, unsigned long *temp) + int trip, int *temp) { struct int34x_thermal_zone *d = zone->devdata; int i; @@ -114,7 +114,7 @@ static int int340x_thermal_get_trip_type(struct thermal_zone_device *zone, } static int int340x_thermal_set_trip_temp(struct thermal_zone_device *zone, - int trip, unsigned long temp) + int trip, int temp) { struct int34x_thermal_zone *d = zone->devdata; acpi_status status; @@ -136,7 +136,7 @@ static int int340x_thermal_set_trip_temp(struct thermal_zone_device *zone, static int int340x_thermal_get_trip_hyst(struct thermal_zone_device *zone, - int trip, unsigned long *temp) + int trip, int *temp) { struct int34x_thermal_zone *d = zone->devdata; acpi_status status; @@ -163,7 +163,7 @@ static struct thermal_zone_device_ops int340x_thermal_zone_ops = { }; static int int340x_thermal_get_trip_config(acpi_handle handle, char *name, - unsigned long *temp) + int *temp) { unsigned long long r; acpi_status status; diff --git a/drivers/thermal/int340x_thermal/int340x_thermal_zone.h b/drivers/thermal/int340x_thermal/int340x_thermal_zone.h index 9f38ab72c4bf..aaadf724ff2e 100644 --- a/drivers/thermal/int340x_thermal/int340x_thermal_zone.h +++ b/drivers/thermal/int340x_thermal/int340x_thermal_zone.h @@ -21,7 +21,7 @@ #define INT340X_THERMAL_MAX_ACT_TRIP_COUNT 10 struct active_trip { - unsigned long temp; + int temp; int id; bool valid; }; @@ -31,11 +31,11 @@ struct int34x_thermal_zone { struct active_trip act_trips[INT340X_THERMAL_MAX_ACT_TRIP_COUNT]; unsigned long *aux_trips; int aux_trip_nr; - unsigned long psv_temp; + int psv_temp; int psv_trip_id; - unsigned long crt_temp; + int crt_temp; int crt_trip_id; - unsigned long hot_temp; + int hot_temp; int hot_trip_id; struct thermal_zone_device *zone; struct thermal_zone_device_ops *override_ops; diff --git a/drivers/thermal/int340x_thermal/processor_thermal_device.c b/drivers/thermal/int340x_thermal/processor_thermal_device.c index 3df3dc34b124..ccc0ad02d066 100644 --- a/drivers/thermal/int340x_thermal/processor_thermal_device.c +++ b/drivers/thermal/int340x_thermal/processor_thermal_device.c @@ -145,7 +145,7 @@ static int get_tjmax(void) return -EINVAL; } -static int read_temp_msr(unsigned long *temp) +static int read_temp_msr(int *temp) { int cpu; u32 eax, edx; @@ -177,7 +177,7 @@ err_ret: } static int proc_thermal_get_zone_temp(struct thermal_zone_device *zone, - unsigned long *temp) + int *temp) { int ret; diff --git a/drivers/thermal/intel_quark_dts_thermal.c b/drivers/thermal/intel_quark_dts_thermal.c index 4434ec812cb7..5ed90e6c8a64 100644 --- a/drivers/thermal/intel_quark_dts_thermal.c +++ b/drivers/thermal/intel_quark_dts_thermal.c @@ -186,7 +186,7 @@ static int soc_dts_disable(struct thermal_zone_device *tzd) return ret; } -static int _get_trip_temp(int trip, unsigned long *temp) +static int _get_trip_temp(int trip, int *temp) { int status; u32 out; @@ -212,19 +212,18 @@ static int _get_trip_temp(int trip, unsigned long *temp) } static inline int sys_get_trip_temp(struct thermal_zone_device *tzd, - int trip, unsigned long *temp) + int trip, int *temp) { return _get_trip_temp(trip, temp); } -static inline int sys_get_crit_temp(struct thermal_zone_device *tzd, - unsigned long *temp) +static inline int sys_get_crit_temp(struct thermal_zone_device *tzd, int *temp) { return _get_trip_temp(QRK_DTS_ID_TP_CRITICAL, temp); } static int update_trip_temp(struct soc_sensor_entry *aux_entry, - int trip, unsigned long temp) + int trip, int temp) { u32 out; u32 temp_out; @@ -272,7 +271,7 @@ failed: } static inline int sys_set_trip_temp(struct thermal_zone_device *tzd, int trip, - unsigned long temp) + int temp) { return update_trip_temp(tzd->devdata, trip, temp); } @@ -289,7 +288,7 @@ static int sys_get_trip_type(struct thermal_zone_device *thermal, } static int sys_get_curr_temp(struct thermal_zone_device *tzd, - unsigned long *temp) + int *temp) { u32 out; int ret; diff --git a/drivers/thermal/intel_soc_dts_iosf.c b/drivers/thermal/intel_soc_dts_iosf.c index 42e4b6ac3875..5841d1d72996 100644 --- a/drivers/thermal/intel_soc_dts_iosf.c +++ b/drivers/thermal/intel_soc_dts_iosf.c @@ -80,7 +80,7 @@ err_ret: } static int sys_get_trip_temp(struct thermal_zone_device *tzd, int trip, - unsigned long *temp) + int *temp) { int status; u32 out; @@ -106,7 +106,7 @@ static int sys_get_trip_temp(struct thermal_zone_device *tzd, int trip, } static int update_trip_temp(struct intel_soc_dts_sensor_entry *dts, - int thres_index, unsigned long temp, + int thres_index, int temp, enum thermal_trip_type trip_type) { int status; @@ -196,7 +196,7 @@ err_restore_ptps: } static int sys_set_trip_temp(struct thermal_zone_device *tzd, int trip, - unsigned long temp) + int temp) { struct intel_soc_dts_sensor_entry *dts = tzd->devdata; struct intel_soc_dts_sensors *sensors = dts->sensors; @@ -226,7 +226,7 @@ static int sys_get_trip_type(struct thermal_zone_device *tzd, } static int sys_get_curr_temp(struct thermal_zone_device *tzd, - unsigned long *temp) + int *temp) { int status; u32 out; diff --git a/drivers/thermal/kirkwood_thermal.c b/drivers/thermal/kirkwood_thermal.c index 11041fe63dc2..892236621767 100644 --- a/drivers/thermal/kirkwood_thermal.c +++ b/drivers/thermal/kirkwood_thermal.c @@ -33,7 +33,7 @@ struct kirkwood_thermal_priv { }; static int kirkwood_get_temp(struct thermal_zone_device *thermal, - unsigned long *temp) + int *temp) { unsigned long reg; struct kirkwood_thermal_priv *priv = thermal->devdata; diff --git a/drivers/thermal/of-thermal.c b/drivers/thermal/of-thermal.c index b295b2b6c191..42b7d4253b94 100644 --- a/drivers/thermal/of-thermal.c +++ b/drivers/thermal/of-thermal.c @@ -91,7 +91,7 @@ struct __thermal_zone { /*** DT thermal zone device callbacks ***/ static int of_thermal_get_temp(struct thermal_zone_device *tz, - unsigned long *temp) + int *temp) { struct __thermal_zone *data = tz->devdata; @@ -177,7 +177,7 @@ EXPORT_SYMBOL_GPL(of_thermal_get_trip_points); * Return: zero on success, error code otherwise */ static int of_thermal_set_emul_temp(struct thermal_zone_device *tz, - unsigned long temp) + int temp) { struct __thermal_zone *data = tz->devdata; @@ -311,7 +311,7 @@ static int of_thermal_get_trip_type(struct thermal_zone_device *tz, int trip, } static int of_thermal_get_trip_temp(struct thermal_zone_device *tz, int trip, - unsigned long *temp) + int *temp) { struct __thermal_zone *data = tz->devdata; @@ -324,7 +324,7 @@ static int of_thermal_get_trip_temp(struct thermal_zone_device *tz, int trip, } static int of_thermal_set_trip_temp(struct thermal_zone_device *tz, int trip, - unsigned long temp) + int temp) { struct __thermal_zone *data = tz->devdata; @@ -338,7 +338,7 @@ static int of_thermal_set_trip_temp(struct thermal_zone_device *tz, int trip, } static int of_thermal_get_trip_hyst(struct thermal_zone_device *tz, int trip, - unsigned long *hyst) + int *hyst) { struct __thermal_zone *data = tz->devdata; @@ -351,7 +351,7 @@ static int of_thermal_get_trip_hyst(struct thermal_zone_device *tz, int trip, } static int of_thermal_set_trip_hyst(struct thermal_zone_device *tz, int trip, - unsigned long hyst) + int hyst) { struct __thermal_zone *data = tz->devdata; @@ -365,7 +365,7 @@ static int of_thermal_set_trip_hyst(struct thermal_zone_device *tz, int trip, } static int of_thermal_get_crit_temp(struct thermal_zone_device *tz, - unsigned long *temp) + int *temp) { struct __thermal_zone *data = tz->devdata; int i; diff --git a/drivers/thermal/power_allocator.c b/drivers/thermal/power_allocator.c index 4672250b329f..045aea59ce9d 100644 --- a/drivers/thermal/power_allocator.c +++ b/drivers/thermal/power_allocator.c @@ -92,8 +92,8 @@ struct power_allocator_params { * Return: The power budget for the next period. */ static u32 pid_controller(struct thermal_zone_device *tz, - unsigned long current_temp, - unsigned long control_temp, + int current_temp, + int control_temp, u32 max_allocatable_power) { s64 p, i, d, power_range; @@ -102,7 +102,7 @@ static u32 pid_controller(struct thermal_zone_device *tz, max_power_frac = int_to_frac(max_allocatable_power); - err = ((s32)control_temp - (s32)current_temp); + err = control_temp - current_temp; err = int_to_frac(err); /* Calculate the proportional term */ @@ -223,8 +223,8 @@ static void divvy_up_power(u32 *req_power, u32 *max_power, int num_actors, } static int allocate_power(struct thermal_zone_device *tz, - unsigned long current_temp, - unsigned long control_temp) + int current_temp, + int control_temp) { struct thermal_instance *instance; struct power_allocator_params *params = tz->governor_data; @@ -326,7 +326,7 @@ static int allocate_power(struct thermal_zone_device *tz, granted_power, total_granted_power, num_actors, power_range, max_allocatable_power, current_temp, - (s32)control_temp - (s32)current_temp); + control_temp - current_temp); devm_kfree(&tz->device, req_power); unlock: @@ -411,7 +411,7 @@ static int power_allocator_bind(struct thermal_zone_device *tz) { int ret; struct power_allocator_params *params; - unsigned long switch_on_temp, control_temp; + int switch_on_temp, control_temp; u32 temperature_threshold; if (!tz->tzp || !tz->tzp->sustainable_power) { @@ -476,7 +476,7 @@ static void power_allocator_unbind(struct thermal_zone_device *tz) static int power_allocator_throttle(struct thermal_zone_device *tz, int trip) { int ret; - unsigned long switch_on_temp, control_temp, current_temp; + int switch_on_temp, control_temp, current_temp; struct power_allocator_params *params = tz->governor_data; /* diff --git a/drivers/thermal/qcom-spmi-temp-alarm.c b/drivers/thermal/qcom-spmi-temp-alarm.c index c8d27b8fb9ec..b677aada5b52 100644 --- a/drivers/thermal/qcom-spmi-temp-alarm.c +++ b/drivers/thermal/qcom-spmi-temp-alarm.c @@ -117,7 +117,7 @@ static int qpnp_tm_update_temp_no_adc(struct qpnp_tm_chip *chip) return 0; } -static int qpnp_tm_get_temp(void *data, long *temp) +static int qpnp_tm_get_temp(void *data, int *temp) { struct qpnp_tm_chip *chip = data; int ret, mili_celsius; diff --git a/drivers/thermal/rcar_thermal.c b/drivers/thermal/rcar_thermal.c index fe4e767018c4..5d4ae7d705e0 100644 --- a/drivers/thermal/rcar_thermal.c +++ b/drivers/thermal/rcar_thermal.c @@ -200,8 +200,7 @@ err_out_unlock: return ret; } -static int rcar_thermal_get_temp(struct thermal_zone_device *zone, - unsigned long *temp) +static int rcar_thermal_get_temp(struct thermal_zone_device *zone, int *temp) { struct rcar_thermal_priv *priv = rcar_zone_to_priv(zone); @@ -235,7 +234,7 @@ static int rcar_thermal_get_trip_type(struct thermal_zone_device *zone, } static int rcar_thermal_get_trip_temp(struct thermal_zone_device *zone, - int trip, unsigned long *temp) + int trip, int *temp) { struct rcar_thermal_priv *priv = rcar_zone_to_priv(zone); struct device *dev = rcar_priv_to_dev(priv); @@ -299,7 +298,7 @@ static void _rcar_thermal_irq_ctrl(struct rcar_thermal_priv *priv, int enable) static void rcar_thermal_work(struct work_struct *work) { struct rcar_thermal_priv *priv; - unsigned long cctemp, nctemp; + int cctemp, nctemp; priv = container_of(work, struct rcar_thermal_priv, work.work); diff --git a/drivers/thermal/rockchip_thermal.c b/drivers/thermal/rockchip_thermal.c index cd8f5f93b42c..c89ffb26a354 100644 --- a/drivers/thermal/rockchip_thermal.c +++ b/drivers/thermal/rockchip_thermal.c @@ -64,7 +64,7 @@ struct rockchip_tsadc_chip { void (*control)(void __iomem *reg, bool on); /* Per-sensor methods */ - int (*get_temp)(int chn, void __iomem *reg, long *temp); + int (*get_temp)(int chn, void __iomem *reg, int *temp); void (*set_tshut_temp)(int chn, void __iomem *reg, long temp); void (*set_tshut_mode)(int chn, void __iomem *reg, enum tshut_mode m); }; @@ -191,7 +191,7 @@ static u32 rk_tsadcv2_temp_to_code(long temp) return 0; } -static long rk_tsadcv2_code_to_temp(u32 code) +static int rk_tsadcv2_code_to_temp(u32 code) { unsigned int low = 0; unsigned int high = ARRAY_SIZE(v2_code_table) - 1; @@ -277,7 +277,7 @@ static void rk_tsadcv2_control(void __iomem *regs, bool enable) writel_relaxed(val, regs + TSADCV2_AUTO_CON); } -static int rk_tsadcv2_get_temp(int chn, void __iomem *regs, long *temp) +static int rk_tsadcv2_get_temp(int chn, void __iomem *regs, int *temp) { u32 val; @@ -366,7 +366,7 @@ static irqreturn_t rockchip_thermal_alarm_irq_thread(int irq, void *dev) return IRQ_HANDLED; } -static int rockchip_thermal_get_temp(void *_sensor, long *out_temp) +static int rockchip_thermal_get_temp(void *_sensor, int *out_temp) { struct rockchip_thermal_sensor *sensor = _sensor; struct rockchip_thermal_data *thermal = sensor->thermal; @@ -374,7 +374,7 @@ static int rockchip_thermal_get_temp(void *_sensor, long *out_temp) int retval; retval = tsadc->get_temp(sensor->id, thermal->regs, out_temp); - dev_dbg(&thermal->pdev->dev, "sensor %d - temp: %ld, retval: %d\n", + dev_dbg(&thermal->pdev->dev, "sensor %d - temp: %d, retval: %d\n", sensor->id, *out_temp, retval); return retval; diff --git a/drivers/thermal/samsung/exynos_tmu.c b/drivers/thermal/samsung/exynos_tmu.c index 531f4b179871..9ec29a33aeea 100644 --- a/drivers/thermal/samsung/exynos_tmu.c +++ b/drivers/thermal/samsung/exynos_tmu.c @@ -207,8 +207,7 @@ struct exynos_tmu_data { int (*tmu_initialize)(struct platform_device *pdev); void (*tmu_control)(struct platform_device *pdev, bool on); int (*tmu_read)(struct exynos_tmu_data *data); - void (*tmu_set_emulation)(struct exynos_tmu_data *data, - unsigned long temp); + void (*tmu_set_emulation)(struct exynos_tmu_data *data, int temp); void (*tmu_clear_irqs)(struct exynos_tmu_data *data); }; @@ -216,7 +215,7 @@ static void exynos_report_trigger(struct exynos_tmu_data *p) { char data[10], *envp[] = { data, NULL }; struct thermal_zone_device *tz = p->tzd; - unsigned long temp; + int temp; unsigned int i; if (!tz) { @@ -517,7 +516,7 @@ static int exynos5433_tmu_initialize(struct platform_device *pdev) struct thermal_zone_device *tz = data->tzd; unsigned int status, trim_info; unsigned int rising_threshold = 0, falling_threshold = 0; - unsigned long temp, temp_hist; + int temp, temp_hist; int ret = 0, threshold_code, i, sensor_id, cal_type; status = readb(data->base + EXYNOS_TMU_REG_STATUS); @@ -610,7 +609,7 @@ static int exynos5440_tmu_initialize(struct platform_device *pdev) struct exynos_tmu_data *data = platform_get_drvdata(pdev); unsigned int trim_info = 0, con, rising_threshold; int ret = 0, threshold_code; - unsigned long crit_temp = 0; + int crit_temp = 0; /* * For exynos5440 soc triminfo value is swapped between TMU0 and @@ -663,7 +662,7 @@ static int exynos7_tmu_initialize(struct platform_device *pdev) unsigned int status, trim_info; unsigned int rising_threshold = 0, falling_threshold = 0; int ret = 0, threshold_code, i; - unsigned long temp, temp_hist; + int temp, temp_hist; unsigned int reg_off, bit_off; status = readb(data->base + EXYNOS_TMU_REG_STATUS); @@ -876,7 +875,7 @@ static void exynos7_tmu_control(struct platform_device *pdev, bool on) writel(con, data->base + EXYNOS_TMU_REG_CONTROL); } -static int exynos_get_temp(void *p, long *temp) +static int exynos_get_temp(void *p, int *temp) { struct exynos_tmu_data *data = p; @@ -896,7 +895,7 @@ static int exynos_get_temp(void *p, long *temp) #ifdef CONFIG_THERMAL_EMULATION static u32 get_emul_con_reg(struct exynos_tmu_data *data, unsigned int val, - unsigned long temp) + int temp) { if (temp) { temp /= MCELSIUS; @@ -926,7 +925,7 @@ static u32 get_emul_con_reg(struct exynos_tmu_data *data, unsigned int val, } static void exynos4412_tmu_set_emulation(struct exynos_tmu_data *data, - unsigned long temp) + int temp) { unsigned int val; u32 emul_con; @@ -946,7 +945,7 @@ static void exynos4412_tmu_set_emulation(struct exynos_tmu_data *data, } static void exynos5440_tmu_set_emulation(struct exynos_tmu_data *data, - unsigned long temp) + int temp) { unsigned int val; @@ -955,7 +954,7 @@ static void exynos5440_tmu_set_emulation(struct exynos_tmu_data *data, writel(val, data->base + EXYNOS5440_TMU_S0_7_DEBUG); } -static int exynos_tmu_set_emulation(void *drv_data, unsigned long temp) +static int exynos_tmu_set_emulation(void *drv_data, int temp) { struct exynos_tmu_data *data = drv_data; int ret = -EINVAL; @@ -978,7 +977,7 @@ out: #else #define exynos4412_tmu_set_emulation NULL #define exynos5440_tmu_set_emulation NULL -static int exynos_tmu_set_emulation(void *drv_data, unsigned long temp) +static int exynos_tmu_set_emulation(void *drv_data, int temp) { return -EINVAL; } #endif /* CONFIG_THERMAL_EMULATION */ diff --git a/drivers/thermal/spear_thermal.c b/drivers/thermal/spear_thermal.c index bddb71744a6c..534dd9136662 100644 --- a/drivers/thermal/spear_thermal.c +++ b/drivers/thermal/spear_thermal.c @@ -38,7 +38,7 @@ struct spear_thermal_dev { }; static inline int thermal_get_temp(struct thermal_zone_device *thermal, - unsigned long *temp) + int *temp) { struct spear_thermal_dev *stdev = thermal->devdata; diff --git a/drivers/thermal/st/st_thermal.c b/drivers/thermal/st/st_thermal.c index 76c515dd802b..44cbba99716a 100644 --- a/drivers/thermal/st/st_thermal.c +++ b/drivers/thermal/st/st_thermal.c @@ -111,8 +111,7 @@ static int st_thermal_calibration(struct st_thermal_sensor *sensor) } /* Callback to get temperature from HW*/ -static int st_thermal_get_temp(struct thermal_zone_device *th, - unsigned long *temperature) +static int st_thermal_get_temp(struct thermal_zone_device *th, int *temperature) { struct st_thermal_sensor *sensor = th->devdata; struct device *dev = sensor->dev; @@ -159,7 +158,7 @@ static int st_thermal_get_trip_type(struct thermal_zone_device *th, } static int st_thermal_get_trip_temp(struct thermal_zone_device *th, - int trip, unsigned long *temp) + int trip, int *temp) { struct st_thermal_sensor *sensor = th->devdata; struct device *dev = sensor->dev; diff --git a/drivers/thermal/step_wise.c b/drivers/thermal/step_wise.c index 5a0f12d08e8b..2f9f7086ac3d 100644 --- a/drivers/thermal/step_wise.c +++ b/drivers/thermal/step_wise.c @@ -113,7 +113,7 @@ static void update_passive_instance(struct thermal_zone_device *tz, static void thermal_zone_trip_update(struct thermal_zone_device *tz, int trip) { - long trip_temp; + int trip_temp; enum thermal_trip_type trip_type; enum thermal_trend trend; struct thermal_instance *instance; @@ -135,7 +135,7 @@ static void thermal_zone_trip_update(struct thermal_zone_device *tz, int trip) trace_thermal_zone_trip(tz, trip, trip_type); } - dev_dbg(&tz->device, "Trip%d[type=%d,temp=%ld]:trend=%d,throttle=%d\n", + dev_dbg(&tz->device, "Trip%d[type=%d,temp=%d]:trend=%d,throttle=%d\n", trip, trip_type, trip_temp, trend, throttle); mutex_lock(&tz->lock); diff --git a/drivers/thermal/tegra_soctherm.c b/drivers/thermal/tegra_soctherm.c index 9197fc05c5cc..74ea5765938b 100644 --- a/drivers/thermal/tegra_soctherm.c +++ b/drivers/thermal/tegra_soctherm.c @@ -293,7 +293,7 @@ static int enable_tsensor(struct tegra_soctherm *tegra, * H denotes an addition of 0.5 Celsius and N denotes negation * of the final value. */ -static long translate_temp(u16 val) +static int translate_temp(u16 val) { long t; @@ -306,7 +306,7 @@ static long translate_temp(u16 val) return t; } -static int tegra_thermctl_get_temp(void *data, long *out_temp) +static int tegra_thermctl_get_temp(void *data, int *out_temp) { struct tegra_thermctl_zone *zone = data; u32 val; diff --git a/drivers/thermal/thermal_core.c b/drivers/thermal/thermal_core.c index c4700950e42e..387c4287fc74 100644 --- a/drivers/thermal/thermal_core.c +++ b/drivers/thermal/thermal_core.c @@ -426,7 +426,7 @@ static void handle_non_critical_trips(struct thermal_zone_device *tz, static void handle_critical_trips(struct thermal_zone_device *tz, int trip, enum thermal_trip_type trip_type) { - long trip_temp; + int trip_temp; tz->ops->get_trip_temp(tz, trip, &trip_temp); @@ -474,12 +474,12 @@ static void handle_thermal_trip(struct thermal_zone_device *tz, int trip) * * Return: On success returns 0, an error code otherwise */ -int thermal_zone_get_temp(struct thermal_zone_device *tz, unsigned long *temp) +int thermal_zone_get_temp(struct thermal_zone_device *tz, int *temp) { int ret = -EINVAL; #ifdef CONFIG_THERMAL_EMULATION int count; - unsigned long crit_temp = -1UL; + int crit_temp = INT_MAX; enum thermal_trip_type type; #endif @@ -516,8 +516,7 @@ EXPORT_SYMBOL_GPL(thermal_zone_get_temp); static void update_temperature(struct thermal_zone_device *tz) { - long temp; - int ret; + int temp, ret; ret = thermal_zone_get_temp(tz, &temp); if (ret) { @@ -577,15 +576,14 @@ static ssize_t temp_show(struct device *dev, struct device_attribute *attr, char *buf) { struct thermal_zone_device *tz = to_thermal_zone(dev); - long temperature; - int ret; + int temperature, ret; ret = thermal_zone_get_temp(tz, &temperature); if (ret) return ret; - return sprintf(buf, "%ld\n", temperature); + return sprintf(buf, "%d\n", temperature); } static ssize_t @@ -689,7 +687,7 @@ trip_point_temp_show(struct device *dev, struct device_attribute *attr, { struct thermal_zone_device *tz = to_thermal_zone(dev); int trip, ret; - long temperature; + int temperature; if (!tz->ops->get_trip_temp) return -EPERM; @@ -702,7 +700,7 @@ trip_point_temp_show(struct device *dev, struct device_attribute *attr, if (ret) return ret; - return sprintf(buf, "%ld\n", temperature); + return sprintf(buf, "%d\n", temperature); } static ssize_t @@ -711,7 +709,7 @@ trip_point_hyst_store(struct device *dev, struct device_attribute *attr, { struct thermal_zone_device *tz = to_thermal_zone(dev); int trip, ret; - unsigned long temperature; + int temperature; if (!tz->ops->set_trip_hyst) return -EPERM; @@ -719,7 +717,7 @@ trip_point_hyst_store(struct device *dev, struct device_attribute *attr, if (!sscanf(attr->attr.name, "trip_point_%d_hyst", &trip)) return -EINVAL; - if (kstrtoul(buf, 10, &temperature)) + if (kstrtoint(buf, 10, &temperature)) return -EINVAL; /* @@ -738,7 +736,7 @@ trip_point_hyst_show(struct device *dev, struct device_attribute *attr, { struct thermal_zone_device *tz = to_thermal_zone(dev); int trip, ret; - unsigned long temperature; + int temperature; if (!tz->ops->get_trip_hyst) return -EPERM; @@ -748,7 +746,7 @@ trip_point_hyst_show(struct device *dev, struct device_attribute *attr, ret = tz->ops->get_trip_hyst(tz, trip, &temperature); - return ret ? ret : sprintf(buf, "%ld\n", temperature); + return ret ? ret : sprintf(buf, "%d\n", temperature); } static ssize_t diff --git a/drivers/thermal/thermal_hwmon.c b/drivers/thermal/thermal_hwmon.c index 1967bee4f076..06fd2ed9ef9d 100644 --- a/drivers/thermal/thermal_hwmon.c +++ b/drivers/thermal/thermal_hwmon.c @@ -69,7 +69,7 @@ static DEVICE_ATTR(name, 0444, name_show, NULL); static ssize_t temp_input_show(struct device *dev, struct device_attribute *attr, char *buf) { - long temperature; + int temperature; int ret; struct thermal_hwmon_attr *hwmon_attr = container_of(attr, struct thermal_hwmon_attr, attr); @@ -83,7 +83,7 @@ temp_input_show(struct device *dev, struct device_attribute *attr, char *buf) if (ret) return ret; - return sprintf(buf, "%ld\n", temperature); + return sprintf(buf, "%d\n", temperature); } static ssize_t @@ -95,14 +95,14 @@ temp_crit_show(struct device *dev, struct device_attribute *attr, char *buf) = container_of(hwmon_attr, struct thermal_hwmon_temp, temp_crit); struct thermal_zone_device *tz = temp->tz; - long temperature; + int temperature; int ret; ret = tz->ops->get_trip_temp(tz, 0, &temperature); if (ret) return ret; - return sprintf(buf, "%ld\n", temperature); + return sprintf(buf, "%d\n", temperature); } @@ -142,7 +142,7 @@ thermal_hwmon_lookup_temp(const struct thermal_hwmon_device *hwmon, static bool thermal_zone_crit_temp_valid(struct thermal_zone_device *tz) { - unsigned long temp; + int temp; return tz->ops->get_crit_temp && !tz->ops->get_crit_temp(tz, &temp); } diff --git a/drivers/thermal/ti-soc-thermal/ti-thermal-common.c b/drivers/thermal/ti-soc-thermal/ti-thermal-common.c index c7c5b3779dac..b213a1222295 100644 --- a/drivers/thermal/ti-soc-thermal/ti-thermal-common.c +++ b/drivers/thermal/ti-soc-thermal/ti-thermal-common.c @@ -76,14 +76,14 @@ static inline int ti_thermal_hotspot_temperature(int t, int s, int c) /* thermal zone ops */ /* Get temperature callback function for thermal zone */ -static inline int __ti_thermal_get_temp(void *devdata, long *temp) +static inline int __ti_thermal_get_temp(void *devdata, int *temp) { struct thermal_zone_device *pcb_tz = NULL; struct ti_thermal_data *data = devdata; struct ti_bandgap *bgp; const struct ti_temp_sensor *s; int ret, tmp, slope, constant; - unsigned long pcb_temp; + int pcb_temp; if (!data) return 0; @@ -119,7 +119,7 @@ static inline int __ti_thermal_get_temp(void *devdata, long *temp) } static inline int ti_thermal_get_temp(struct thermal_zone_device *thermal, - unsigned long *temp) + int *temp) { struct ti_thermal_data *data = thermal->devdata; @@ -229,7 +229,7 @@ static int ti_thermal_get_trip_type(struct thermal_zone_device *thermal, /* Get trip temperature callback functions for thermal zone */ static int ti_thermal_get_trip_temp(struct thermal_zone_device *thermal, - int trip, unsigned long *temp) + int trip, int *temp) { if (!ti_thermal_is_valid_trip(trip)) return -EINVAL; @@ -280,7 +280,7 @@ static int ti_thermal_get_trend(struct thermal_zone_device *thermal, /* Get critical temperature callback functions for thermal zone */ static int ti_thermal_get_crit_temp(struct thermal_zone_device *thermal, - unsigned long *temp) + int *temp) { /* shutdown zone */ return ti_thermal_get_trip_temp(thermal, OMAP_TRIP_NUMBER - 1, temp); diff --git a/drivers/thermal/x86_pkg_temp_thermal.c b/drivers/thermal/x86_pkg_temp_thermal.c index 50d1d2cb091a..7fc919f7da4d 100644 --- a/drivers/thermal/x86_pkg_temp_thermal.c +++ b/drivers/thermal/x86_pkg_temp_thermal.c @@ -164,7 +164,7 @@ err_ret: return err; } -static int sys_get_curr_temp(struct thermal_zone_device *tzd, unsigned long *temp) +static int sys_get_curr_temp(struct thermal_zone_device *tzd, int *temp) { u32 eax, edx; struct phy_dev_entry *phy_dev_entry; @@ -175,7 +175,7 @@ static int sys_get_curr_temp(struct thermal_zone_device *tzd, unsigned long *tem if (eax & 0x80000000) { *temp = phy_dev_entry->tj_max - ((eax >> 16) & 0x7f) * 1000; - pr_debug("sys_get_curr_temp %ld\n", *temp); + pr_debug("sys_get_curr_temp %d\n", *temp); return 0; } @@ -183,7 +183,7 @@ static int sys_get_curr_temp(struct thermal_zone_device *tzd, unsigned long *tem } static int sys_get_trip_temp(struct thermal_zone_device *tzd, - int trip, unsigned long *temp) + int trip, int *temp) { u32 eax, edx; struct phy_dev_entry *phy_dev_entry; @@ -214,13 +214,13 @@ static int sys_get_trip_temp(struct thermal_zone_device *tzd, *temp = phy_dev_entry->tj_max - thres_reg_value * 1000; else *temp = 0; - pr_debug("sys_get_trip_temp %ld\n", *temp); + pr_debug("sys_get_trip_temp %d\n", *temp); return 0; } static int sys_set_trip_temp(struct thermal_zone_device *tzd, int trip, - unsigned long temp) + int temp) { u32 l, h; struct phy_dev_entry *phy_dev_entry; diff --git a/include/linux/thermal.h b/include/linux/thermal.h index 037e9df2f610..17292fee8686 100644 --- a/include/linux/thermal.h +++ b/include/linux/thermal.h @@ -92,23 +92,19 @@ struct thermal_zone_device_ops { struct thermal_cooling_device *); int (*unbind) (struct thermal_zone_device *, struct thermal_cooling_device *); - int (*get_temp) (struct thermal_zone_device *, unsigned long *); + int (*get_temp) (struct thermal_zone_device *, int *); int (*get_mode) (struct thermal_zone_device *, enum thermal_device_mode *); int (*set_mode) (struct thermal_zone_device *, enum thermal_device_mode); int (*get_trip_type) (struct thermal_zone_device *, int, enum thermal_trip_type *); - int (*get_trip_temp) (struct thermal_zone_device *, int, - unsigned long *); - int (*set_trip_temp) (struct thermal_zone_device *, int, - unsigned long); - int (*get_trip_hyst) (struct thermal_zone_device *, int, - unsigned long *); - int (*set_trip_hyst) (struct thermal_zone_device *, int, - unsigned long); - int (*get_crit_temp) (struct thermal_zone_device *, unsigned long *); - int (*set_emul_temp) (struct thermal_zone_device *, unsigned long); + int (*get_trip_temp) (struct thermal_zone_device *, int, int *); + int (*set_trip_temp) (struct thermal_zone_device *, int, int); + int (*get_trip_hyst) (struct thermal_zone_device *, int, int *); + int (*set_trip_hyst) (struct thermal_zone_device *, int, int); + int (*get_crit_temp) (struct thermal_zone_device *, int *); + int (*set_emul_temp) (struct thermal_zone_device *, int); int (*get_trend) (struct thermal_zone_device *, int, enum thermal_trend *); int (*notify) (struct thermal_zone_device *, int, @@ -332,9 +328,9 @@ struct thermal_genl_event { * temperature. */ struct thermal_zone_of_device_ops { - int (*get_temp)(void *, long *); + int (*get_temp)(void *, int *); int (*get_trend)(void *, long *); - int (*set_emul_temp)(void *, unsigned long); + int (*set_emul_temp)(void *, int); }; /** @@ -406,7 +402,7 @@ thermal_of_cooling_device_register(struct device_node *np, char *, void *, const struct thermal_cooling_device_ops *); void thermal_cooling_device_unregister(struct thermal_cooling_device *); struct thermal_zone_device *thermal_zone_get_zone_by_name(const char *name); -int thermal_zone_get_temp(struct thermal_zone_device *tz, unsigned long *temp); +int thermal_zone_get_temp(struct thermal_zone_device *tz, int *temp); int get_tz_trend(struct thermal_zone_device *, int); struct thermal_instance *get_thermal_instance(struct thermal_zone_device *, @@ -457,7 +453,7 @@ static inline struct thermal_zone_device *thermal_zone_get_zone_by_name( const char *name) { return ERR_PTR(-ENODEV); } static inline int thermal_zone_get_temp( - struct thermal_zone_device *tz, unsigned long *temp) + struct thermal_zone_device *tz, int *temp) { return -ENODEV; } static inline int get_tz_trend(struct thermal_zone_device *tz, int trip) { return -ENODEV; } diff --git a/include/trace/events/thermal_power_allocator.h b/include/trace/events/thermal_power_allocator.h index 12e1321c4e0c..5afae8fe3795 100644 --- a/include/trace/events/thermal_power_allocator.h +++ b/include/trace/events/thermal_power_allocator.h @@ -11,7 +11,7 @@ TRACE_EVENT(thermal_power_allocator, u32 total_req_power, u32 *granted_power, u32 total_granted_power, size_t num_actors, u32 power_range, u32 max_allocatable_power, - unsigned long current_temp, s32 delta_temp), + int current_temp, s32 delta_temp), TP_ARGS(tz, req_power, total_req_power, granted_power, total_granted_power, num_actors, power_range, max_allocatable_power, current_temp, delta_temp), @@ -24,7 +24,7 @@ TRACE_EVENT(thermal_power_allocator, __field(size_t, num_actors ) __field(u32, power_range ) __field(u32, max_allocatable_power ) - __field(unsigned long, current_temp ) + __field(int, current_temp ) __field(s32, delta_temp ) ), TP_fast_assign( @@ -42,7 +42,7 @@ TRACE_EVENT(thermal_power_allocator, __entry->delta_temp = delta_temp; ), - TP_printk("thermal_zone_id=%d req_power={%s} total_req_power=%u granted_power={%s} total_granted_power=%u power_range=%u max_allocatable_power=%u current_temperature=%lu delta_temperature=%d", + TP_printk("thermal_zone_id=%d req_power={%s} total_req_power=%u granted_power={%s} total_granted_power=%u power_range=%u max_allocatable_power=%u current_temperature=%d delta_temperature=%d", __entry->tz_id, __print_array(__get_dynamic_array(req_power), __entry->num_actors, 4), -- cgit v1.2.3-70-g09d2 From 84ded2f8e7dda336fc2fb3570726ceb3b3b3590f Mon Sep 17 00:00:00 2001 From: Tejun Heo Date: Mon, 3 Aug 2015 11:45:34 -0400 Subject: Revert "libata: Implement support for sense data reporting" This reverts commit fe7173c206de63fc28475ee6ae42ff95c05692de. As implemented, ACS-4 sense reporting for ATA devices bypasses error diagnosis and handling in libata degrading EH behavior significantly. Revert the related changes for now. ATA_ID_COMMAND_SET_3/4 constants are not reverted as they're used by later changes. Signed-off-by: Tejun Heo Cc: Hannes Reinecke Cc: stable@vger.kernel.org #v4.1+ --- drivers/ata/libata-core.c | 20 +---------- drivers/ata/libata-eh.c | 86 ++--------------------------------------------- include/linux/ata.h | 16 --------- 3 files changed, 4 insertions(+), 118 deletions(-) (limited to 'include/linux') diff --git a/drivers/ata/libata-core.c b/drivers/ata/libata-core.c index 426bc12459de..19bcb80b2031 100644 --- a/drivers/ata/libata-core.c +++ b/drivers/ata/libata-core.c @@ -2147,24 +2147,6 @@ static int ata_dev_config_ncq(struct ata_device *dev, return 0; } -static void ata_dev_config_sense_reporting(struct ata_device *dev) -{ - unsigned int err_mask; - - if (!ata_id_has_sense_reporting(dev->id)) - return; - - if (ata_id_sense_reporting_enabled(dev->id)) - return; - - err_mask = ata_dev_set_feature(dev, SETFEATURE_SENSE_DATA, 0x1); - if (err_mask) { - ata_dev_dbg(dev, - "failed to enable Sense Data Reporting, Emask 0x%x\n", - err_mask); - } -} - /** * ata_dev_configure - Configure the specified ATA/ATAPI device * @dev: Target device to configure @@ -2387,7 +2369,7 @@ int ata_dev_configure(struct ata_device *dev) dev->devslp_timing[i] = sata_setting[j]; } } - ata_dev_config_sense_reporting(dev); + dev->cdb_len = 16; } diff --git a/drivers/ata/libata-eh.c b/drivers/ata/libata-eh.c index af08d32af4e0..16125be34893 100644 --- a/drivers/ata/libata-eh.c +++ b/drivers/ata/libata-eh.c @@ -1629,70 +1629,6 @@ unsigned int atapi_eh_tur(struct ata_device *dev, u8 *r_sense_key) return err_mask; } -/** - * ata_eh_request_sense - perform REQUEST_SENSE_DATA_EXT - * @dev: device to perform REQUEST_SENSE_SENSE_DATA_EXT to - * @sense_buf: result sense data buffer (SCSI_SENSE_BUFFERSIZE bytes long) - * @dfl_sense_key: default sense key to use - * - * Perform REQUEST_SENSE_DATA_EXT after the device reported CHECK - * SENSE. This function is EH helper. - * - * LOCKING: - * Kernel thread context (may sleep). - * - * RETURNS: - * encoded sense data on success, 0 on failure or if sense data - * is not available. - */ -static u32 ata_eh_request_sense(struct ata_queued_cmd *qc, - struct scsi_cmnd *cmd) -{ - struct ata_device *dev = qc->dev; - struct ata_taskfile tf; - unsigned int err_mask; - - if (!cmd) - return 0; - - DPRINTK("ATA request sense\n"); - ata_dev_warn(dev, "request sense\n"); - if (!ata_id_sense_reporting_enabled(dev->id)) { - ata_dev_warn(qc->dev, "sense data reporting disabled\n"); - return 0; - } - ata_tf_init(dev, &tf); - - tf.flags |= ATA_TFLAG_ISADDR | ATA_TFLAG_DEVICE; - tf.flags |= ATA_TFLAG_LBA | ATA_TFLAG_LBA48; - tf.command = ATA_CMD_REQ_SENSE_DATA; - tf.protocol = ATA_PROT_NODATA; - - err_mask = ata_exec_internal(dev, &tf, NULL, DMA_NONE, NULL, 0, 0); - /* - * ACS-4 states: - * The device may set the SENSE DATA AVAILABLE bit to one in the - * STATUS field and clear the ERROR bit to zero in the STATUS field - * to indicate that the command returned completion without an error - * and the sense data described in table 306 is available. - * - * IOW the 'ATA_SENSE' bit might not be set even though valid - * sense data is available. - * So check for both. - */ - if ((tf.command & ATA_SENSE) || - tf.lbah != 0 || tf.lbam != 0 || tf.lbal != 0) { - ata_scsi_set_sense(cmd, tf.lbah, tf.lbam, tf.lbal); - qc->flags |= ATA_QCFLAG_SENSE_VALID; - ata_dev_warn(dev, "sense data %02x/%02x/%02x\n", - tf.lbah, tf.lbam, tf.lbal); - } else { - ata_dev_warn(dev, "request sense failed stat %02x emask %x\n", - tf.command, err_mask); - } - return err_mask; -} - /** * atapi_eh_request_sense - perform ATAPI REQUEST_SENSE * @dev: device to perform REQUEST_SENSE to @@ -1896,22 +1832,7 @@ static unsigned int ata_eh_analyze_tf(struct ata_queued_cmd *qc, return ATA_EH_RESET; } - /* - * Sense data reporting does not work if the - * device fault bit is set. - */ - if ((stat & ATA_SENSE) && !(stat & ATA_DF) && - !(qc->flags & ATA_QCFLAG_SENSE_VALID)) { - if (!(qc->ap->pflags & ATA_PFLAG_FROZEN)) { - tmp = ata_eh_request_sense(qc, qc->scsicmd); - if (tmp) - qc->err_mask |= tmp; - } else { - ata_dev_warn(qc->dev, "sense data available but port frozen\n"); - } - } - - /* Set by NCQ autosense or request sense above */ + /* Set by NCQ autosense */ if (qc->flags & ATA_QCFLAG_SENSE_VALID) return 0; @@ -2658,15 +2579,14 @@ static void ata_eh_link_report(struct ata_link *link) #ifdef CONFIG_ATA_VERBOSE_ERROR if (res->command & (ATA_BUSY | ATA_DRDY | ATA_DF | ATA_DRQ | - ATA_SENSE | ATA_ERR)) { + ATA_ERR)) { if (res->command & ATA_BUSY) ata_dev_err(qc->dev, "status: { Busy }\n"); else - ata_dev_err(qc->dev, "status: { %s%s%s%s%s}\n", + ata_dev_err(qc->dev, "status: { %s%s%s%s}\n", res->command & ATA_DRDY ? "DRDY " : "", res->command & ATA_DF ? "DF " : "", res->command & ATA_DRQ ? "DRQ " : "", - res->command & ATA_SENSE ? "SENSE " : "", res->command & ATA_ERR ? "ERR " : ""); } diff --git a/include/linux/ata.h b/include/linux/ata.h index 6c78956aa470..0e6a782575b5 100644 --- a/include/linux/ata.h +++ b/include/linux/ata.h @@ -385,8 +385,6 @@ enum { SATA_SSP = 0x06, /* Software Settings Preservation */ SATA_DEVSLP = 0x09, /* Device Sleep */ - SETFEATURE_SENSE_DATA = 0xC3, /* Sense Data Reporting feature */ - /* feature values for SET_MAX */ ATA_SET_MAX_ADDR = 0x00, ATA_SET_MAX_PASSWD = 0x01, @@ -720,20 +718,6 @@ static inline bool ata_id_has_read_log_dma_ext(const u16 *id) return false; } -static inline bool ata_id_has_sense_reporting(const u16 *id) -{ - if (!(id[ATA_ID_CFS_ENABLE_2] & (1 << 15))) - return false; - return id[ATA_ID_COMMAND_SET_3] & (1 << 6); -} - -static inline bool ata_id_sense_reporting_enabled(const u16 *id) -{ - if (!(id[ATA_ID_CFS_ENABLE_2] & (1 << 15))) - return false; - return id[ATA_ID_COMMAND_SET_4] & (1 << 6); -} - /** * ata_id_major_version - get ATA level of drive * @id: Identify data -- cgit v1.2.3-70-g09d2 From 74a80d67b8316eb3fbeb73dafc060a5a0a708587 Mon Sep 17 00:00:00 2001 From: Tejun Heo Date: Mon, 3 Aug 2015 11:46:39 -0400 Subject: Revert "libata: Implement NCQ autosense" This reverts commit 42b966fbf35da9c87f08d98f9b8978edf9e717cf. As implemented, ACS-4 sense reporting for ATA devices bypasses error diagnosis and handling in libata degrading EH behavior significantly. Revert the related changes for now. Signed-off-by: Tejun Heo Cc: Hannes Reinecke Cc: stable@vger.kernel.org #v4.1+ --- drivers/ata/libata-eh.c | 18 ------------------ drivers/ata/libata-scsi.c | 9 ++------- drivers/ata/libata.h | 1 - include/linux/ata.h | 2 -- 4 files changed, 2 insertions(+), 28 deletions(-) (limited to 'include/linux') diff --git a/drivers/ata/libata-eh.c b/drivers/ata/libata-eh.c index 16125be34893..cb0508af1459 100644 --- a/drivers/ata/libata-eh.c +++ b/drivers/ata/libata-eh.c @@ -1592,8 +1592,6 @@ static int ata_eh_read_log_10h(struct ata_device *dev, tf->hob_lbah = buf[10]; tf->nsect = buf[12]; tf->hob_nsect = buf[13]; - if (ata_id_has_ncq_autosense(dev->id)) - tf->auxiliary = buf[14] << 16 | buf[15] << 8 | buf[16]; return 0; } @@ -1791,18 +1789,6 @@ void ata_eh_analyze_ncq_error(struct ata_link *link) memcpy(&qc->result_tf, &tf, sizeof(tf)); qc->result_tf.flags = ATA_TFLAG_ISADDR | ATA_TFLAG_LBA | ATA_TFLAG_LBA48; qc->err_mask |= AC_ERR_DEV | AC_ERR_NCQ; - if (qc->result_tf.auxiliary) { - char sense_key, asc, ascq; - - sense_key = (qc->result_tf.auxiliary >> 16) & 0xff; - asc = (qc->result_tf.auxiliary >> 8) & 0xff; - ascq = qc->result_tf.auxiliary & 0xff; - ata_dev_dbg(dev, "NCQ Autosense %02x/%02x/%02x\n", - sense_key, asc, ascq); - ata_scsi_set_sense(qc->scsicmd, sense_key, asc, ascq); - qc->flags |= ATA_QCFLAG_SENSE_VALID; - } - ehc->i.err_mask &= ~AC_ERR_DEV; } @@ -1832,10 +1818,6 @@ static unsigned int ata_eh_analyze_tf(struct ata_queued_cmd *qc, return ATA_EH_RESET; } - /* Set by NCQ autosense */ - if (qc->flags & ATA_QCFLAG_SENSE_VALID) - return 0; - if (stat & (ATA_ERR | ATA_DF)) qc->err_mask |= AC_ERR_DEV; else diff --git a/drivers/ata/libata-scsi.c b/drivers/ata/libata-scsi.c index e1ecd2ab3724..0d7f0da3a269 100644 --- a/drivers/ata/libata-scsi.c +++ b/drivers/ata/libata-scsi.c @@ -270,11 +270,8 @@ DEVICE_ATTR(unload_heads, S_IRUGO | S_IWUSR, ata_scsi_park_show, ata_scsi_park_store); EXPORT_SYMBOL_GPL(dev_attr_unload_heads); -void ata_scsi_set_sense(struct scsi_cmnd *cmd, u8 sk, u8 asc, u8 ascq) +static void ata_scsi_set_sense(struct scsi_cmnd *cmd, u8 sk, u8 asc, u8 ascq) { - if (!cmd) - return; - cmd->result = (DRIVER_SENSE << 24) | SAM_STAT_CHECK_CONDITION; scsi_build_sense_buffer(0, cmd->sense_buffer, sk, asc, ascq); @@ -1780,9 +1777,7 @@ static void ata_scsi_qc_complete(struct ata_queued_cmd *qc) ((cdb[2] & 0x20) || need_sense)) { ata_gen_passthru_sense(qc); } else { - if (qc->flags & ATA_QCFLAG_SENSE_VALID) { - cmd->result = SAM_STAT_CHECK_CONDITION; - } else if (!need_sense) { + if (!need_sense) { cmd->result = SAM_STAT_GOOD; } else { /* TODO: decide which descriptor format to use diff --git a/drivers/ata/libata.h b/drivers/ata/libata.h index 8cfdd9616d16..f840ca18a7c0 100644 --- a/drivers/ata/libata.h +++ b/drivers/ata/libata.h @@ -137,7 +137,6 @@ extern int ata_scsi_add_hosts(struct ata_host *host, struct scsi_host_template *sht); extern void ata_scsi_scan_host(struct ata_port *ap, int sync); extern int ata_scsi_offline_dev(struct ata_device *dev); -extern void ata_scsi_set_sense(struct scsi_cmnd *cmd, u8 sk, u8 asc, u8 ascq); extern void ata_scsi_media_change_notify(struct ata_device *dev); extern void ata_scsi_hotplug(struct work_struct *work); extern void ata_schedule_scsi_eh(struct Scsi_Host *shost); diff --git a/include/linux/ata.h b/include/linux/ata.h index 0e6a782575b5..d2992bfa1706 100644 --- a/include/linux/ata.h +++ b/include/linux/ata.h @@ -528,8 +528,6 @@ struct ata_bmdma_prd { #define ata_id_cdb_intr(id) (((id)[ATA_ID_CONFIG] & 0x60) == 0x20) #define ata_id_has_da(id) ((id)[ATA_ID_SATA_CAPABILITY_2] & (1 << 4)) #define ata_id_has_devslp(id) ((id)[ATA_ID_FEATURE_SUPP] & (1 << 8)) -#define ata_id_has_ncq_autosense(id) \ - ((id)[ATA_ID_FEATURE_SUPP] & (1 << 7)) static inline bool ata_id_has_hipm(const u16 *id) { -- cgit v1.2.3-70-g09d2 From a3a10ce3429e5dee623ad5c8407ea58e204fcb0a Mon Sep 17 00:00:00 2001 From: Richard Watts Date: Tue, 19 May 2015 16:06:53 +0100 Subject: Avoid usb reset crashes by making tty_io cdevs truly dynamic Avoid usb reset crashes by making tty_io cdevs truly dynamic Signed-off-by: Richard Watts Reported-by: Duncan Mackintosh Signed-off-by: Greg Kroah-Hartman --- drivers/tty/tty_io.c | 24 ++++++++++++++++-------- include/linux/tty_driver.h | 2 +- 2 files changed, 17 insertions(+), 9 deletions(-) (limited to 'include/linux') diff --git a/drivers/tty/tty_io.c b/drivers/tty/tty_io.c index c37a215177c0..02785d844354 100644 --- a/drivers/tty/tty_io.c +++ b/drivers/tty/tty_io.c @@ -3152,9 +3152,12 @@ static int tty_cdev_add(struct tty_driver *driver, dev_t dev, unsigned int index, unsigned int count) { /* init here, since reused cdevs cause crashes */ - cdev_init(&driver->cdevs[index], &tty_fops); - driver->cdevs[index].owner = driver->owner; - return cdev_add(&driver->cdevs[index], dev, count); + driver->cdevs[index] = cdev_alloc(); + if (!driver->cdevs[index]) + return -ENOMEM; + cdev_init(driver->cdevs[index], &tty_fops); + driver->cdevs[index]->owner = driver->owner; + return cdev_add(driver->cdevs[index], dev, count); } /** @@ -3260,8 +3263,10 @@ struct device *tty_register_device_attr(struct tty_driver *driver, error: put_device(dev); - if (cdev) - cdev_del(&driver->cdevs[index]); + if (cdev) { + cdev_del(driver->cdevs[index]); + driver->cdevs[index] = NULL; + } return ERR_PTR(retval); } EXPORT_SYMBOL_GPL(tty_register_device_attr); @@ -3281,8 +3286,10 @@ void tty_unregister_device(struct tty_driver *driver, unsigned index) { device_destroy(tty_class, MKDEV(driver->major, driver->minor_start) + index); - if (!(driver->flags & TTY_DRIVER_DYNAMIC_ALLOC)) - cdev_del(&driver->cdevs[index]); + if (!(driver->flags & TTY_DRIVER_DYNAMIC_ALLOC)) { + cdev_del(driver->cdevs[index]); + driver->cdevs[index] = NULL; + } } EXPORT_SYMBOL(tty_unregister_device); @@ -3347,6 +3354,7 @@ err_free_all: kfree(driver->ports); kfree(driver->ttys); kfree(driver->termios); + kfree(driver->cdevs); kfree(driver); return ERR_PTR(err); } @@ -3375,7 +3383,7 @@ static void destruct_tty_driver(struct kref *kref) } proc_tty_unregister_driver(driver); if (driver->flags & TTY_DRIVER_DYNAMIC_ALLOC) - cdev_del(&driver->cdevs[0]); + cdev_del(driver->cdevs[0]); } kfree(driver->cdevs); kfree(driver->ports); diff --git a/include/linux/tty_driver.h b/include/linux/tty_driver.h index 92e337c18839..161052477f77 100644 --- a/include/linux/tty_driver.h +++ b/include/linux/tty_driver.h @@ -296,7 +296,7 @@ struct tty_operations { struct tty_driver { int magic; /* magic number for this structure */ struct kref kref; /* Reference management */ - struct cdev *cdevs; + struct cdev **cdevs; struct module *owner; const char *driver_name; const char *name; -- cgit v1.2.3-70-g09d2 From 512f64d9f7467597388ffbd5a21589ee3f375d8b Mon Sep 17 00:00:00 2001 From: Tomas Winkler Date: Thu, 23 Jul 2015 15:08:41 +0300 Subject: mei: bus: add reference to bus device in struct mei_cl_client Add reference to the bus device (mei_device) for easier access. To ensures that referencing cldev->bus is valid during cldev life time we increase the bus ref counter on a client device creation and drop it on the device release. Signed-off-by: Tomas Winkler Signed-off-by: Greg Kroah-Hartman --- drivers/misc/mei/bus.c | 17 +++++++++++++++++ include/linux/mei_cl_bus.h | 3 +++ 2 files changed, 20 insertions(+) (limited to 'include/linux') diff --git a/drivers/misc/mei/bus.c b/drivers/misc/mei/bus.c index 1d9ce9c491cf..963731eb4383 100644 --- a/drivers/misc/mei/bus.c +++ b/drivers/misc/mei/bus.c @@ -590,6 +590,20 @@ static struct bus_type mei_cl_bus_type = { .uevent = mei_cl_device_uevent, }; +static struct mei_device *mei_dev_bus_get(struct mei_device *bus) +{ + if (bus) + get_device(bus->dev); + + return bus; +} + +static void mei_dev_bus_put(struct mei_device *bus) +{ + if (bus) + put_device(bus->dev); +} + static void mei_cl_dev_release(struct device *dev) { struct mei_cl_device *cldev = to_mei_cl_device(dev); @@ -598,6 +612,7 @@ static void mei_cl_dev_release(struct device *dev) return; mei_me_cl_put(cldev->me_cl); + mei_dev_bus_put(cldev->bus); kfree(cldev); } @@ -641,6 +656,7 @@ struct mei_cl_device *mei_cl_add_device(struct mei_device *bus, cldev->dev.parent = bus->dev; cldev->dev.bus = &mei_cl_bus_type; cldev->dev.type = &mei_cl_device_type; + cldev->bus = mei_dev_bus_get(bus); strlcpy(cldev->name, name, sizeof(cldev->name)); @@ -650,6 +666,7 @@ struct mei_cl_device *mei_cl_add_device(struct mei_device *bus, if (status) { dev_err(bus->dev, "Failed to register MEI device\n"); mei_me_cl_put(cldev->me_cl); + mei_dev_bus_put(bus); kfree(cldev); return NULL; } diff --git a/include/linux/mei_cl_bus.h b/include/linux/mei_cl_bus.h index a16b1f9c1aca..4c5c25b3222c 100644 --- a/include/linux/mei_cl_bus.h +++ b/include/linux/mei_cl_bus.h @@ -6,6 +6,7 @@ #include struct mei_cl_device; +struct mei_device; typedef void (*mei_cl_event_cb_t)(struct mei_cl_device *device, u32 events, void *context); @@ -17,6 +18,7 @@ typedef void (*mei_cl_event_cb_t)(struct mei_cl_device *device, * Drivers for MEI devices will get an mei_cl_device pointer * when being probed and shall use it for doing ME bus I/O. * + * @bus: parent mei device * @dev: linux driver model device pointer * @me_cl: me client * @cl: mei client @@ -29,6 +31,7 @@ typedef void (*mei_cl_event_cb_t)(struct mei_cl_device *device, * @priv_data: client private data */ struct mei_cl_device { + struct mei_device *bus; struct device dev; struct mei_me_client *me_cl; -- cgit v1.2.3-70-g09d2 From 0ff0a8d853039aa60bba3ca3e04e4fb74584a736 Mon Sep 17 00:00:00 2001 From: Tomas Winkler Date: Thu, 23 Jul 2015 15:08:42 +0300 Subject: mei: bus: add me client device list infrastructure Instead of holding the list of host clients (me_cl) we want to keep the list me client devices (mei_cl_device) This way we can create host to me client connection only when needed. Add list head to mei_cl_device and cl_bus_lock Add bus_added flag to the me client (mei_me_client) to track if the appropriate mei_cl_device was already created and is_added flag to mei_cl_device to track if it was already added to the device list across the bus rescans Signed-off-by: Tomas Winkler Signed-off-by: Greg Kroah-Hartman --- drivers/misc/mei/bus.c | 1 + drivers/misc/mei/init.c | 1 + drivers/misc/mei/mei_dev.h | 6 ++++-- include/linux/mei_cl_bus.h | 4 ++++ 4 files changed, 10 insertions(+), 2 deletions(-) (limited to 'include/linux') diff --git a/drivers/misc/mei/bus.c b/drivers/misc/mei/bus.c index 963731eb4383..34b14dda050c 100644 --- a/drivers/misc/mei/bus.c +++ b/drivers/misc/mei/bus.c @@ -657,6 +657,7 @@ struct mei_cl_device *mei_cl_add_device(struct mei_device *bus, cldev->dev.bus = &mei_cl_bus_type; cldev->dev.type = &mei_cl_device_type; cldev->bus = mei_dev_bus_get(bus); + INIT_LIST_HEAD(&cldev->bus_list); strlcpy(cldev->name, name, sizeof(cldev->name)); diff --git a/drivers/misc/mei/init.c b/drivers/misc/mei/init.c index 00c3865ca3b1..15000e9231b1 100644 --- a/drivers/misc/mei/init.c +++ b/drivers/misc/mei/init.c @@ -390,6 +390,7 @@ void mei_device_init(struct mei_device *dev, INIT_LIST_HEAD(&dev->me_clients); mutex_init(&dev->device_lock); init_rwsem(&dev->me_clients_rwsem); + mutex_init(&dev->cl_bus_lock); init_waitqueue_head(&dev->wait_hw_ready); init_waitqueue_head(&dev->wait_pg); init_waitqueue_head(&dev->wait_hbm_start); diff --git a/drivers/misc/mei/mei_dev.h b/drivers/misc/mei/mei_dev.h index bc65fb42aea9..882e6f77084a 100644 --- a/drivers/misc/mei/mei_dev.h +++ b/drivers/misc/mei/mei_dev.h @@ -178,7 +178,7 @@ struct mei_fw_status { * @client_id: me client id * @mei_flow_ctrl_creds: flow control credits * @connect_count: number connections to this client - * @reserved: reserved + * @bus_added: added to bus */ struct mei_me_client { struct list_head list; @@ -187,7 +187,7 @@ struct mei_me_client { u8 client_id; u8 mei_flow_ctrl_creds; u8 connect_count; - u8 reserved; + u8 bus_added; }; @@ -447,6 +447,7 @@ const char *mei_pg_state_str(enum mei_pg_state state); * @reset_work : work item for the device reset * * @device_list : mei client bus list + * @cl_bus_lock : client bus list lock * * @dbgfs_dir : debugfs mei root directory * @@ -543,6 +544,7 @@ struct mei_device { /* List of bus devices */ struct list_head device_list; + struct mutex cl_bus_lock; #if IS_ENABLED(CONFIG_DEBUG_FS) struct dentry *dbgfs_dir; diff --git a/include/linux/mei_cl_bus.h b/include/linux/mei_cl_bus.h index 4c5c25b3222c..85239138251c 100644 --- a/include/linux/mei_cl_bus.h +++ b/include/linux/mei_cl_bus.h @@ -18,6 +18,7 @@ typedef void (*mei_cl_event_cb_t)(struct mei_cl_device *device, * Drivers for MEI devices will get an mei_cl_device pointer * when being probed and shall use it for doing ME bus I/O. * + * @bus_list: device on the bus list * @bus: parent mei device * @dev: linux driver model device pointer * @me_cl: me client @@ -28,9 +29,11 @@ typedef void (*mei_cl_event_cb_t)(struct mei_cl_device *device, * events (e.g. Rx buffer pending) notifications. * @event_context: event callback run context * @events: Events bitmask sent to the driver. + * @is_added: device is already scanned * @priv_data: client private data */ struct mei_cl_device { + struct list_head bus_list; struct mei_device *bus; struct device dev; @@ -42,6 +45,7 @@ struct mei_cl_device { mei_cl_event_cb_t event_cb; void *event_context; unsigned long events; + unsigned int is_added:1; void *priv_data; }; -- cgit v1.2.3-70-g09d2 From 71ce789115f878a07e4a6c43d6006cea6aee1078 Mon Sep 17 00:00:00 2001 From: Tomas Winkler Date: Thu, 23 Jul 2015 15:08:43 +0300 Subject: mei: bus: enable running fixup routines before device registration Split the device registration into allocation and device struct initialization, device setup, and the final device registration. This why it is possible to run fixups and quirks during the setup stage on an initialized device. Each fixup routine effects do_match flag. If the flag is set to false at the end the device won't be registered on the bus. Signed-off-by: Tomas Winkler Signed-off-by: Greg Kroah-Hartman --- drivers/misc/mei/bus-fixup.c | 30 +++++++++++++++ drivers/misc/mei/bus.c | 91 ++++++++++++++++++++++++++++++++++++-------- drivers/misc/mei/mei_dev.h | 2 +- include/linux/mei_cl_bus.h | 4 ++ 4 files changed, 111 insertions(+), 16 deletions(-) (limited to 'include/linux') diff --git a/drivers/misc/mei/bus-fixup.c b/drivers/misc/mei/bus-fixup.c index 47aa1523d9e1..865e33bcd226 100644 --- a/drivers/misc/mei/bus-fixup.c +++ b/drivers/misc/mei/bus-fixup.c @@ -20,12 +20,15 @@ #include #include #include +#include #include #include "mei_dev.h" #include "client.h" +#define MEI_UUID_ANY NULL_UUID_LE + struct mei_nfc_cmd { u8 command; u8 status; @@ -412,4 +415,31 @@ void mei_nfc_host_exit(struct mei_device *bus) mutex_unlock(&bus->device_lock); } +#define MEI_FIXUP(_uuid, _hook) { _uuid, _hook } + +static struct mei_fixup { + + const uuid_le uuid; + void (*hook)(struct mei_cl_device *cldev); +} mei_fixups[] = {}; + +/** + * mei_cl_dev_fixup - run fixup handlers + * + * @cldev: me client device + */ +void mei_cl_dev_fixup(struct mei_cl_device *cldev) +{ + struct mei_fixup *f; + const uuid_le *uuid = mei_me_cl_uuid(cldev->me_cl); + int i; + + for (i = 0; i < ARRAY_SIZE(mei_fixups); i++) { + + f = &mei_fixups[i]; + if (uuid_le_cmp(f->uuid, MEI_UUID_ANY) == 0 || + uuid_le_cmp(f->uuid, *uuid) == 0) + f->hook(cldev); + } +} diff --git a/drivers/misc/mei/bus.c b/drivers/misc/mei/bus.c index 34b14dda050c..68b7756bf384 100644 --- a/drivers/misc/mei/bus.c +++ b/drivers/misc/mei/bus.c @@ -436,6 +436,9 @@ static int mei_cl_device_match(struct device *dev, struct device_driver *drv) if (!cldev) return 0; + if (!cldev->do_match) + return 0; + if (!cldrv || !cldrv->id_table) return 0; @@ -634,6 +637,76 @@ struct mei_cl *mei_cl_bus_find_cl_by_uuid(struct mei_device *bus, return NULL; } +/** + * mei_cl_dev_alloc - initialize and allocate mei client device + * + * @bus: mei device + * @me_cl: me client + * + * Return: allocated device structur or NULL on allocation failure + */ +static struct mei_cl_device *mei_cl_dev_alloc(struct mei_device *bus, + struct mei_me_client *me_cl) +{ + struct mei_cl_device *cldev; + + cldev = kzalloc(sizeof(struct mei_cl_device), GFP_KERNEL); + if (!cldev) + return NULL; + + device_initialize(&cldev->dev); + cldev->dev.parent = bus->dev; + cldev->dev.bus = &mei_cl_bus_type; + cldev->dev.type = &mei_cl_device_type; + cldev->bus = mei_dev_bus_get(bus); + cldev->me_cl = mei_me_cl_get(me_cl); + cldev->is_added = 0; + INIT_LIST_HEAD(&cldev->bus_list); + + return cldev; +} + +/** + * mei_cl_dev_setup - setup me client device + * run fix up routines and set the device name + * + * @bus: mei device + * @cldev: me client device + * + * Return: true if the device is eligible for enumeration + */ +static bool mei_cl_dev_setup(struct mei_device *bus, + struct mei_cl_device *cldev) +{ + cldev->do_match = 1; + mei_cl_dev_fixup(cldev); + + if (cldev->do_match) + dev_set_name(&cldev->dev, "mei:%s:%pUl", + cldev->name, mei_me_cl_uuid(cldev->me_cl)); + + return cldev->do_match == 1; +} + +/** + * mei_cl_bus_dev_add - add me client devices + * + * @cldev: me client device + * + * Return: 0 on success; < 0 on failre + */ +static int mei_cl_bus_dev_add(struct mei_cl_device *cldev) +{ + int ret; + + dev_dbg(cldev->bus->dev, "adding %pUL\n", mei_me_cl_uuid(cldev->me_cl)); + ret = device_add(&cldev->dev); + if (!ret) + cldev->is_added = 1; + + return ret; +} + struct mei_cl_device *mei_cl_add_device(struct mei_device *bus, struct mei_me_client *me_cl, struct mei_cl *cl, @@ -642,28 +715,16 @@ struct mei_cl_device *mei_cl_add_device(struct mei_device *bus, struct mei_cl_device *cldev; int status; - cldev = kzalloc(sizeof(struct mei_cl_device), GFP_KERNEL); + cldev = mei_cl_dev_alloc(bus, me_cl); if (!cldev) return NULL; - cldev->me_cl = mei_me_cl_get(me_cl); - if (!cldev->me_cl) { - kfree(cldev); - return NULL; - } - cldev->cl = cl; - cldev->dev.parent = bus->dev; - cldev->dev.bus = &mei_cl_bus_type; - cldev->dev.type = &mei_cl_device_type; - cldev->bus = mei_dev_bus_get(bus); - INIT_LIST_HEAD(&cldev->bus_list); - strlcpy(cldev->name, name, sizeof(cldev->name)); - dev_set_name(&cldev->dev, "mei:%s:%pUl", name, mei_me_cl_uuid(me_cl)); + mei_cl_dev_setup(bus, cldev); - status = device_register(&cldev->dev); + status = mei_cl_bus_dev_add(cldev); if (status) { dev_err(bus->dev, "Failed to register MEI device\n"); mei_me_cl_put(cldev->me_cl); diff --git a/drivers/misc/mei/mei_dev.h b/drivers/misc/mei/mei_dev.h index 882e6f77084a..ad59ab776f2d 100644 --- a/drivers/misc/mei/mei_dev.h +++ b/drivers/misc/mei/mei_dev.h @@ -335,7 +335,7 @@ struct mei_cl_device *mei_cl_add_device(struct mei_device *bus, struct mei_cl *cl, char *name); void mei_cl_remove_device(struct mei_cl_device *cldev); - +void mei_cl_dev_fixup(struct mei_cl_device *dev); ssize_t __mei_cl_send(struct mei_cl *cl, u8 *buf, size_t length, bool blocking); ssize_t __mei_cl_recv(struct mei_cl *cl, u8 *buf, size_t length); diff --git a/include/linux/mei_cl_bus.h b/include/linux/mei_cl_bus.h index 85239138251c..81ab56dd0ae0 100644 --- a/include/linux/mei_cl_bus.h +++ b/include/linux/mei_cl_bus.h @@ -29,6 +29,8 @@ typedef void (*mei_cl_event_cb_t)(struct mei_cl_device *device, * events (e.g. Rx buffer pending) notifications. * @event_context: event callback run context * @events: Events bitmask sent to the driver. + * + * @do_match: wheather device can be matched with a driver * @is_added: device is already scanned * @priv_data: client private data */ @@ -45,6 +47,8 @@ struct mei_cl_device { mei_cl_event_cb_t event_cb; void *event_context; unsigned long events; + + unsigned int do_match:1; unsigned int is_added:1; void *priv_data; -- cgit v1.2.3-70-g09d2 From bb2ef9c39db2e3c2562b4e439b2b00dc42e2c026 Mon Sep 17 00:00:00 2001 From: Alexander Usyskin Date: Sun, 26 Jul 2015 09:54:23 +0300 Subject: mei: bus: add and call callback on notify event Enable drivers on mei client bus to subscribe to asynchronous event notifications. Introduce events_mask to the existing callback infrastructure so it is possible to handle both RX and event notification. Signed-off-by: Alexander Usyskin Signed-off-by: Tomas Winkler Signed-off-by: Greg Kroah-Hartman --- drivers/misc/mei/bus.c | 50 ++++++++++++++++++++++++++++++++++++++++++---- drivers/misc/mei/client.c | 2 ++ drivers/misc/mei/mei_dev.h | 1 + drivers/nfc/mei_phy.c | 3 ++- include/linux/mei_cl_bus.h | 4 ++++ 5 files changed, 55 insertions(+), 5 deletions(-) (limited to 'include/linux') diff --git a/drivers/misc/mei/bus.c b/drivers/misc/mei/bus.c index 3ab08e522fb8..eef1c6b46ad8 100644 --- a/drivers/misc/mei/bus.c +++ b/drivers/misc/mei/bus.c @@ -222,7 +222,33 @@ static void mei_bus_event_work(struct work_struct *work) cldev->events = 0; /* Prepare for the next read */ - mei_cl_read_start(cldev->cl, 0, NULL); + if (cldev->events_mask & BIT(MEI_CL_EVENT_RX)) + mei_cl_read_start(cldev->cl, 0, NULL); +} + +/** + * mei_cl_bus_notify_event - schedule notify cb on bus client + * + * @cl: host client + */ +void mei_cl_bus_notify_event(struct mei_cl *cl) +{ + struct mei_cl_device *cldev = cl->cldev; + + if (!cldev || !cldev->event_cb) + return; + + if (!(cldev->events_mask & BIT(MEI_CL_EVENT_NOTIF))) + return; + + if (!cl->notify_ev) + return; + + set_bit(MEI_CL_EVENT_NOTIF, &cldev->events); + + schedule_work(&cldev->event_work); + + cl->notify_ev = false; } /** @@ -237,6 +263,9 @@ void mei_cl_bus_rx_event(struct mei_cl *cl) if (!cldev || !cldev->event_cb) return; + if (!(cldev->events_mask & BIT(MEI_CL_EVENT_RX))) + return; + set_bit(MEI_CL_EVENT_RX, &cldev->events); schedule_work(&cldev->event_work); @@ -247,6 +276,7 @@ void mei_cl_bus_rx_event(struct mei_cl *cl) * * @cldev: me client devices * @event_cb: callback function + * @events_mask: requested events bitmask * @context: driver context data * * Return: 0 on success @@ -254,6 +284,7 @@ void mei_cl_bus_rx_event(struct mei_cl *cl) * <0 on other errors */ int mei_cl_register_event_cb(struct mei_cl_device *cldev, + unsigned long events_mask, mei_cl_event_cb_t event_cb, void *context) { int ret; @@ -262,13 +293,24 @@ int mei_cl_register_event_cb(struct mei_cl_device *cldev, return -EALREADY; cldev->events = 0; + cldev->events_mask = events_mask; cldev->event_cb = event_cb; cldev->event_context = context; INIT_WORK(&cldev->event_work, mei_bus_event_work); - ret = mei_cl_read_start(cldev->cl, 0, NULL); - if (ret && ret != -EBUSY) - return ret; + if (cldev->events_mask & BIT(MEI_CL_EVENT_RX)) { + ret = mei_cl_read_start(cldev->cl, 0, NULL); + if (ret && ret != -EBUSY) + return ret; + } + + if (cldev->events_mask & BIT(MEI_CL_EVENT_NOTIF)) { + mutex_lock(&cldev->cl->dev->device_lock); + ret = mei_cl_notify_request(cldev->cl, NULL, event_cb ? 1 : 0); + mutex_unlock(&cldev->cl->dev->device_lock); + if (ret) + return ret; + } return 0; } diff --git a/drivers/misc/mei/client.c b/drivers/misc/mei/client.c index db2436aee2dc..5fcd70bcdf96 100644 --- a/drivers/misc/mei/client.c +++ b/drivers/misc/mei/client.c @@ -1375,6 +1375,8 @@ void mei_cl_notify(struct mei_cl *cl) if (cl->ev_async) kill_fasync(&cl->ev_async, SIGIO, POLL_PRI); + + mei_cl_bus_notify_event(cl); } /** diff --git a/drivers/misc/mei/mei_dev.h b/drivers/misc/mei/mei_dev.h index c960aaa538c0..e25ee16c658e 100644 --- a/drivers/misc/mei/mei_dev.h +++ b/drivers/misc/mei/mei_dev.h @@ -345,6 +345,7 @@ ssize_t __mei_cl_send(struct mei_cl *cl, u8 *buf, size_t length, bool blocking); ssize_t __mei_cl_recv(struct mei_cl *cl, u8 *buf, size_t length); void mei_cl_bus_rx_event(struct mei_cl *cl); +void mei_cl_bus_notify_event(struct mei_cl *cl); void mei_cl_bus_remove_devices(struct mei_device *bus); int mei_cl_bus_init(void); void mei_cl_bus_exit(void); diff --git a/drivers/nfc/mei_phy.c b/drivers/nfc/mei_phy.c index 2b77ccf77f81..754a9bb0f58d 100644 --- a/drivers/nfc/mei_phy.c +++ b/drivers/nfc/mei_phy.c @@ -355,7 +355,8 @@ static int nfc_mei_phy_enable(void *phy_id) goto err; } - r = mei_cl_register_event_cb(phy->device, nfc_mei_event_cb, phy); + r = mei_cl_register_event_cb(phy->device, BIT(MEI_CL_EVENT_RX), + nfc_mei_event_cb, phy); if (r) { pr_err("Event cb registration failed %d\n", r); goto err; diff --git a/include/linux/mei_cl_bus.h b/include/linux/mei_cl_bus.h index 81ab56dd0ae0..0962b2ca628a 100644 --- a/include/linux/mei_cl_bus.h +++ b/include/linux/mei_cl_bus.h @@ -28,6 +28,7 @@ typedef void (*mei_cl_event_cb_t)(struct mei_cl_device *device, * @event_cb: Drivers register this callback to get asynchronous ME * events (e.g. Rx buffer pending) notifications. * @event_context: event callback run context + * @events_mask: Events bit mask requested by driver. * @events: Events bitmask sent to the driver. * * @do_match: wheather device can be matched with a driver @@ -46,6 +47,7 @@ struct mei_cl_device { struct work_struct event_work; mei_cl_event_cb_t event_cb; void *event_context; + unsigned long events_mask; unsigned long events; unsigned int do_match:1; @@ -76,10 +78,12 @@ ssize_t mei_cl_send(struct mei_cl_device *device, u8 *buf, size_t length); ssize_t mei_cl_recv(struct mei_cl_device *device, u8 *buf, size_t length); int mei_cl_register_event_cb(struct mei_cl_device *device, + unsigned long event_mask, mei_cl_event_cb_t read_cb, void *context); #define MEI_CL_EVENT_RX 0 #define MEI_CL_EVENT_TX 1 +#define MEI_CL_EVENT_NOTIF 2 void *mei_cl_get_drvdata(const struct mei_cl_device *device); void mei_cl_set_drvdata(struct mei_cl_device *device, void *data); -- cgit v1.2.3-70-g09d2 From e5779e8e12299f77c2421a707855d8d124171d85 Mon Sep 17 00:00:00 2001 From: Andy Lutomirski Date: Thu, 30 Jul 2015 20:32:40 -0700 Subject: perf/x86/hw_breakpoints: Disallow kernel breakpoints unless kprobe-safe Code on the kprobe blacklist doesn't want unexpected int3 exceptions. It probably doesn't want unexpected debug exceptions either. Be safe: disallow breakpoints in nokprobes code. On non-CONFIG_KPROBES kernels, there is no kprobe blacklist. In that case, disallow kernel breakpoints entirely. It will be particularly important to keep hw breakpoints out of the entry and NMI code once we move debug exceptions off the IST stack. Signed-off-by: Andy Lutomirski Signed-off-by: Peter Zijlstra (Intel) Cc: Borislav Petkov Cc: Brian Gerst Cc: Linus Torvalds Cc: Masami Hiramatsu Cc: Peter Zijlstra Cc: Steven Rostedt Cc: Thomas Gleixner Link: http://lkml.kernel.org/r/e14b152af99640448d895e3c2a8c2d5ee19a1325.1438312874.git.luto@kernel.org Signed-off-by: Ingo Molnar --- arch/x86/kernel/hw_breakpoint.c | 15 +++++++++++++++ include/linux/kprobes.h | 2 ++ kernel/kprobes.c | 2 +- 3 files changed, 18 insertions(+), 1 deletion(-) (limited to 'include/linux') diff --git a/arch/x86/kernel/hw_breakpoint.c b/arch/x86/kernel/hw_breakpoint.c index 7114ba220fd4..78f3e90c5659 100644 --- a/arch/x86/kernel/hw_breakpoint.c +++ b/arch/x86/kernel/hw_breakpoint.c @@ -32,6 +32,7 @@ #include #include #include +#include #include #include #include @@ -243,6 +244,20 @@ static int arch_build_bp_info(struct perf_event *bp) info->type = X86_BREAKPOINT_RW; break; case HW_BREAKPOINT_X: + /* + * We don't allow kernel breakpoints in places that are not + * acceptable for kprobes. On non-kprobes kernels, we don't + * allow kernel breakpoints at all. + */ + if (bp->attr.bp_addr >= TASK_SIZE_MAX) { +#ifdef CONFIG_KPROBES + if (within_kprobe_blacklist(bp->attr.bp_addr)) + return -EINVAL; +#else + return -EINVAL; +#endif + } + info->type = X86_BREAKPOINT_EXECUTE; /* * x86 inst breakpoints need to have a specific undefined len. diff --git a/include/linux/kprobes.h b/include/linux/kprobes.h index 1ab54754a86d..8f6849084248 100644 --- a/include/linux/kprobes.h +++ b/include/linux/kprobes.h @@ -267,6 +267,8 @@ extern void show_registers(struct pt_regs *regs); extern void kprobes_inc_nmissed_count(struct kprobe *p); extern bool arch_within_kprobe_blacklist(unsigned long addr); +extern bool within_kprobe_blacklist(unsigned long addr); + struct kprobe_insn_cache { struct mutex mutex; void *(*alloc)(void); /* allocate insn page */ diff --git a/kernel/kprobes.c b/kernel/kprobes.c index c90e417bb963..d10ab6b9b5e0 100644 --- a/kernel/kprobes.c +++ b/kernel/kprobes.c @@ -1332,7 +1332,7 @@ bool __weak arch_within_kprobe_blacklist(unsigned long addr) addr < (unsigned long)__kprobes_text_end; } -static bool within_kprobe_blacklist(unsigned long addr) +bool within_kprobe_blacklist(unsigned long addr) { struct kprobe_blacklist_entry *ent; -- cgit v1.2.3-70-g09d2 From 4c2880b31c700b03f3f115b5ca64be615783aa9c Mon Sep 17 00:00:00 2001 From: Jon Hunter Date: Fri, 31 Jul 2015 09:44:12 +0100 Subject: irqchip/gic: Ensure gic_cpu_if_up/down() programs correct GIC instance Commit 3228950621d9 ("irqchip: gic: Preserve gic V2 bypass bits in cpu ctrl register") added a new function, gic_cpu_if_up(), to program the GIC CPU_CTRL register. This function assumes that there is only one GIC instance present and hence always uses the chip data for the primary GIC controller. Although it is not common for there to be a secondary, some devices do support a secondary. Therefore, fix this by passing gic_cpu_if_up() a pointer to the appropriate chip data structure. Similarly, the function gic_cpu_if_down() only assumes that there is a single GIC instance present. Update this function so that an instance number is passed for the appropriate GIC and return an error code on failure. The vexpress TC2 (which has a single GIC) is currently the only user of this function and so update it accordingly. Note that because the TC2 only has a single GIC, the call to gic_cpu_if_down() should always be successful. Signed-off-by: Jon Hunter Reviewed-by: Marc Zyngier Cc: Cc: Russell King Cc: Nicolas Pitre Cc: Jason Cooper Link: http://lkml.kernel.org/r/1438332252-25248-2-git-send-email-jonathanh@nvidia.com Signed-off-by: Thomas Gleixner --- arch/arm/mach-vexpress/tc2_pm.c | 2 +- drivers/irqchip/irq-gic.c | 18 ++++++++++++------ include/linux/irqchip/arm-gic.h | 2 +- 3 files changed, 14 insertions(+), 8 deletions(-) (limited to 'include/linux') diff --git a/arch/arm/mach-vexpress/tc2_pm.c b/arch/arm/mach-vexpress/tc2_pm.c index b3328cd46c33..1aa4ccece69f 100644 --- a/arch/arm/mach-vexpress/tc2_pm.c +++ b/arch/arm/mach-vexpress/tc2_pm.c @@ -80,7 +80,7 @@ static void tc2_pm_cpu_powerdown_prepare(unsigned int cpu, unsigned int cluster) * to the CPU by disabling the GIC CPU IF to prevent wfi * from completing execution behind power controller back */ - gic_cpu_if_down(); + gic_cpu_if_down(0); } static void tc2_pm_cluster_powerdown_prepare(unsigned int cluster) diff --git a/drivers/irqchip/irq-gic.c b/drivers/irqchip/irq-gic.c index 84fc622d0309..aa3e7b8a69c4 100644 --- a/drivers/irqchip/irq-gic.c +++ b/drivers/irqchip/irq-gic.c @@ -356,9 +356,9 @@ static u8 gic_get_cpumask(struct gic_chip_data *gic) return mask; } -static void gic_cpu_if_up(void) +static void gic_cpu_if_up(struct gic_chip_data *gic) { - void __iomem *cpu_base = gic_data_cpu_base(&gic_data[0]); + void __iomem *cpu_base = gic_data_cpu_base(gic); u32 bypass = 0; /* @@ -426,17 +426,23 @@ static void gic_cpu_init(struct gic_chip_data *gic) gic_cpu_config(dist_base, NULL); writel_relaxed(GICC_INT_PRI_THRESHOLD, base + GIC_CPU_PRIMASK); - gic_cpu_if_up(); + gic_cpu_if_up(gic); } -void gic_cpu_if_down(void) +int gic_cpu_if_down(unsigned int gic_nr) { - void __iomem *cpu_base = gic_data_cpu_base(&gic_data[0]); + void __iomem *cpu_base; u32 val = 0; + if (gic_nr >= MAX_GIC_NR) + return -EINVAL; + + cpu_base = gic_data_cpu_base(&gic_data[gic_nr]); val = readl(cpu_base + GIC_CPU_CTRL); val &= ~GICC_ENABLE; writel_relaxed(val, cpu_base + GIC_CPU_CTRL); + + return 0; } #ifdef CONFIG_CPU_PM @@ -572,7 +578,7 @@ static void gic_cpu_restore(unsigned int gic_nr) dist_base + GIC_DIST_PRI + i * 4); writel_relaxed(GICC_INT_PRI_THRESHOLD, cpu_base + GIC_CPU_PRIMASK); - gic_cpu_if_up(); + gic_cpu_if_up(&gic_data[gic_nr]); } static int gic_notifier(struct notifier_block *self, unsigned long cmd, void *v) diff --git a/include/linux/irqchip/arm-gic.h b/include/linux/irqchip/arm-gic.h index 61a2007eb49a..65da435d01c1 100644 --- a/include/linux/irqchip/arm-gic.h +++ b/include/linux/irqchip/arm-gic.h @@ -98,7 +98,7 @@ struct device_node; void gic_init_bases(unsigned int, int, void __iomem *, void __iomem *, u32 offset, struct device_node *); void gic_cascade_irq(unsigned int gic_nr, unsigned int irq); -void gic_cpu_if_down(void); +int gic_cpu_if_down(unsigned int gic_nr); static inline void gic_init(unsigned int nr, int start, void __iomem *dist , void __iomem *cpu) -- cgit v1.2.3-70-g09d2 From 12d560f4ea87030667438a169912380be00cea4b Mon Sep 17 00:00:00 2001 From: "Paul E. McKenney" Date: Tue, 14 Jul 2015 18:35:23 -0700 Subject: rcu,locking: Privatize smp_mb__after_unlock_lock() RCU is the only thing that uses smp_mb__after_unlock_lock(), and is likely the only thing that ever will use it, so this commit makes this macro private to RCU. Signed-off-by: Paul E. McKenney Cc: Will Deacon Cc: Peter Zijlstra Cc: Benjamin Herrenschmidt Cc: "linux-arch@vger.kernel.org" --- Documentation/memory-barriers.txt | 71 +++---------------------------------- arch/powerpc/include/asm/spinlock.h | 2 -- include/linux/spinlock.h | 10 ------ kernel/rcu/tree.h | 12 +++++++ 4 files changed, 16 insertions(+), 79 deletions(-) (limited to 'include/linux') diff --git a/Documentation/memory-barriers.txt b/Documentation/memory-barriers.txt index 318523872db5..eafa6a53f72c 100644 --- a/Documentation/memory-barriers.txt +++ b/Documentation/memory-barriers.txt @@ -1854,16 +1854,10 @@ RELEASE are to the same lock variable, but only from the perspective of another CPU not holding that lock. In short, a ACQUIRE followed by an RELEASE may -not- be assumed to be a full memory barrier. -Similarly, the reverse case of a RELEASE followed by an ACQUIRE does not -imply a full memory barrier. If it is necessary for a RELEASE-ACQUIRE -pair to produce a full barrier, the ACQUIRE can be followed by an -smp_mb__after_unlock_lock() invocation. This will produce a full barrier -(including transitivity) if either (a) the RELEASE and the ACQUIRE are -executed by the same CPU or task, or (b) the RELEASE and ACQUIRE act on -the same variable. The smp_mb__after_unlock_lock() primitive is free -on many architectures. Without smp_mb__after_unlock_lock(), the CPU's -execution of the critical sections corresponding to the RELEASE and the -ACQUIRE can cross, so that: +Similarly, the reverse case of a RELEASE followed by an ACQUIRE does +not imply a full memory barrier. Therefore, the CPU's execution of the +critical sections corresponding to the RELEASE and the ACQUIRE can cross, +so that: *A = a; RELEASE M @@ -1901,29 +1895,6 @@ the RELEASE would simply complete, thereby avoiding the deadlock. a sleep-unlock race, but the locking primitive needs to resolve such races properly in any case. -With smp_mb__after_unlock_lock(), the two critical sections cannot overlap. -For example, with the following code, the store to *A will always be -seen by other CPUs before the store to *B: - - *A = a; - RELEASE M - ACQUIRE N - smp_mb__after_unlock_lock(); - *B = b; - -The operations will always occur in one of the following orders: - - STORE *A, RELEASE, ACQUIRE, smp_mb__after_unlock_lock(), STORE *B - STORE *A, ACQUIRE, RELEASE, smp_mb__after_unlock_lock(), STORE *B - ACQUIRE, STORE *A, RELEASE, smp_mb__after_unlock_lock(), STORE *B - -If the RELEASE and ACQUIRE were instead both operating on the same lock -variable, only the first of these alternatives can occur. In addition, -the more strongly ordered systems may rule out some of the above orders. -But in any case, as noted earlier, the smp_mb__after_unlock_lock() -ensures that the store to *A will always be seen as happening before -the store to *B. - Locks and semaphores may not provide any guarantee of ordering on UP compiled systems, and so cannot be counted on in such a situation to actually achieve anything at all - especially with respect to I/O accesses - unless combined @@ -2154,40 +2125,6 @@ But it won't see any of: *E, *F or *G following RELEASE Q -However, if the following occurs: - - CPU 1 CPU 2 - =============================== =============================== - WRITE_ONCE(*A, a); - ACQUIRE M [1] - WRITE_ONCE(*B, b); - WRITE_ONCE(*C, c); - RELEASE M [1] - WRITE_ONCE(*D, d); WRITE_ONCE(*E, e); - ACQUIRE M [2] - smp_mb__after_unlock_lock(); - WRITE_ONCE(*F, f); - WRITE_ONCE(*G, g); - RELEASE M [2] - WRITE_ONCE(*H, h); - -CPU 3 might see: - - *E, ACQUIRE M [1], *C, *B, *A, RELEASE M [1], - ACQUIRE M [2], *H, *F, *G, RELEASE M [2], *D - -But assuming CPU 1 gets the lock first, CPU 3 won't see any of: - - *B, *C, *D, *F, *G or *H preceding ACQUIRE M [1] - *A, *B or *C following RELEASE M [1] - *F, *G or *H preceding ACQUIRE M [2] - *A, *B, *C, *E, *F or *G following RELEASE M [2] - -Note that the smp_mb__after_unlock_lock() is critically important -here: Without it CPU 3 might see some of the above orderings. -Without smp_mb__after_unlock_lock(), the accesses are not guaranteed -to be seen in order unless CPU 3 holds lock M. - ACQUIRES VS I/O ACCESSES ------------------------ diff --git a/arch/powerpc/include/asm/spinlock.h b/arch/powerpc/include/asm/spinlock.h index 4dbe072eecbe..523673d7583c 100644 --- a/arch/powerpc/include/asm/spinlock.h +++ b/arch/powerpc/include/asm/spinlock.h @@ -28,8 +28,6 @@ #include #include -#define smp_mb__after_unlock_lock() smp_mb() /* Full ordering for lock. */ - #ifdef CONFIG_PPC64 /* use 0x800000yy when locked, where yy == CPU number */ #ifdef __BIG_ENDIAN__ diff --git a/include/linux/spinlock.h b/include/linux/spinlock.h index 0063b24b4f36..16c5ed5a627c 100644 --- a/include/linux/spinlock.h +++ b/include/linux/spinlock.h @@ -130,16 +130,6 @@ do { \ #define smp_mb__before_spinlock() smp_wmb() #endif -/* - * Place this after a lock-acquisition primitive to guarantee that - * an UNLOCK+LOCK pair act as a full barrier. This guarantee applies - * if the UNLOCK and LOCK are executed by the same CPU or if the - * UNLOCK and LOCK operate on the same lock variable. - */ -#ifndef smp_mb__after_unlock_lock -#define smp_mb__after_unlock_lock() do { } while (0) -#endif - /** * raw_spin_unlock_wait - wait until the spinlock gets unlocked * @lock: the spinlock in question. diff --git a/kernel/rcu/tree.h b/kernel/rcu/tree.h index 0412030ca882..2e991f8361e4 100644 --- a/kernel/rcu/tree.h +++ b/kernel/rcu/tree.h @@ -653,3 +653,15 @@ static inline void rcu_nocb_q_lengths(struct rcu_data *rdp, long *ql, long *qll) #endif /* #else #ifdef CONFIG_RCU_NOCB_CPU */ } #endif /* #ifdef CONFIG_RCU_TRACE */ + +/* + * Place this after a lock-acquisition primitive to guarantee that + * an UNLOCK+LOCK pair act as a full barrier. This guarantee applies + * if the UNLOCK and LOCK are executed by the same CPU or if the + * UNLOCK and LOCK operate on the same lock variable. + */ +#ifdef CONFIG_PPC +#define smp_mb__after_unlock_lock() smp_mb() /* Full ordering for lock. */ +#else /* #ifdef CONFIG_PPC */ +#define smp_mb__after_unlock_lock() do { } while (0) +#endif /* #else #ifdef CONFIG_PPC */ -- cgit v1.2.3-70-g09d2 From cc476b42a39d5a66d94f46cade972dcb8ee278df Mon Sep 17 00:00:00 2001 From: Robert Baldyga Date: Fri, 31 Jul 2015 16:00:13 +0200 Subject: usb: gadget: encapsulate endpoint claiming mechanism So far it was necessary for usb functions to set ep->driver_data in endpoint obtained from autoconfig to non-null value, to indicate that endpoint is claimed by function (in autoconfig it was checked if endpoint has set this field to non-null value, and if it has, it was assumed that it is claimed). It could cause bugs because if some function doesn't set this field autoconfig could return the same endpoint more than one time. To help to avoid such bugs this patch adds claimed flag to struct usb_ep, and encapsulates endpoint claiming mechanism inside usb_ep_autoconfig_ss() and usb_ep_autoconfig_reset(), so now usb functions don't need to perform any additional actions to mark endpoint obtained from autoconfig as claimed. Signed-off-by: Robert Baldyga Signed-off-by: Felipe Balbi --- drivers/usb/gadget/epautoconf.c | 11 ++++++----- include/linux/usb/gadget.h | 1 + 2 files changed, 7 insertions(+), 5 deletions(-) (limited to 'include/linux') diff --git a/drivers/usb/gadget/epautoconf.c b/drivers/usb/gadget/epautoconf.c index 919cdfdda78b..8e00ca765549 100644 --- a/drivers/usb/gadget/epautoconf.c +++ b/drivers/usb/gadget/epautoconf.c @@ -53,7 +53,7 @@ ep_matches ( int num_req_streams = 0; /* endpoint already claimed? */ - if (NULL != ep->driver_data) + if (ep->claimed) return 0; /* only support ep0 for portable CONTROL traffic */ @@ -240,7 +240,7 @@ find_ep (struct usb_gadget *gadget, const char *name) * updated with the assigned number of streams if it is * different from the original value. To prevent the endpoint * from being returned by a later autoconfig call, claim it by - * assigning ep->driver_data to some non-null value. + * assigning ep->claimed to true. * * On failure, this returns a null endpoint descriptor. */ @@ -323,6 +323,7 @@ struct usb_ep *usb_ep_autoconfig_ss( found_ep: ep->desc = NULL; ep->comp_desc = NULL; + ep->claimed = true; return ep; } EXPORT_SYMBOL_GPL(usb_ep_autoconfig_ss); @@ -354,7 +355,7 @@ EXPORT_SYMBOL_GPL(usb_ep_autoconfig_ss); * descriptor bEndpointAddress. For bulk endpoints, the wMaxPacket value * is initialized as if the endpoint were used at full speed. To prevent * the endpoint from being returned by a later autoconfig call, claim it - * by assigning ep->driver_data to some non-null value. + * by assigning ep->claimed to true. * * On failure, this returns a null endpoint descriptor. */ @@ -373,7 +374,7 @@ EXPORT_SYMBOL_GPL(usb_ep_autoconfig); * * Use this for devices where one configuration may need to assign * endpoint resources very differently from the next one. It clears - * state such as ep->driver_data and the record of assigned endpoints + * state such as ep->claimed and the record of assigned endpoints * used by usb_ep_autoconfig(). */ void usb_ep_autoconfig_reset (struct usb_gadget *gadget) @@ -381,7 +382,7 @@ void usb_ep_autoconfig_reset (struct usb_gadget *gadget) struct usb_ep *ep; list_for_each_entry (ep, &gadget->ep_list, ep_list) { - ep->driver_data = NULL; + ep->claimed = false; } gadget->in_epnum = 0; gadget->out_epnum = 0; diff --git a/include/linux/usb/gadget.h b/include/linux/usb/gadget.h index 353a72096dda..68fb5e8b18c3 100644 --- a/include/linux/usb/gadget.h +++ b/include/linux/usb/gadget.h @@ -173,6 +173,7 @@ struct usb_ep { const char *name; const struct usb_ep_ops *ops; struct list_head ep_list; + bool claimed; unsigned maxpacket:16; unsigned maxpacket_limit:16; unsigned max_streams:16; -- cgit v1.2.3-70-g09d2 From 734b5a2addd333829a6d647ee14a3609c7a87c44 Mon Sep 17 00:00:00 2001 From: Robert Baldyga Date: Fri, 31 Jul 2015 16:00:14 +0200 Subject: usb: gadget: add endpoint capabilities flags Introduce struct usb_ep_caps which contains information about capabilities of usb endpoints - supported transfer types and directions. This structure should be filled by UDC driver for each of its endpoints, and will be used in epautoconf in new ep matching mechanism which will replace ugly guessing of endpoint capabilities basing on its name. Signed-off-by: Robert Baldyga Signed-off-by: Felipe Balbi --- include/linux/usb/gadget.h | 21 +++++++++++++++++++++ 1 file changed, 21 insertions(+) (limited to 'include/linux') diff --git a/include/linux/usb/gadget.h b/include/linux/usb/gadget.h index 68fb5e8b18c3..a9a49593c239 100644 --- a/include/linux/usb/gadget.h +++ b/include/linux/usb/gadget.h @@ -140,11 +140,30 @@ struct usb_ep_ops { void (*fifo_flush) (struct usb_ep *ep); }; +/** + * struct usb_ep_caps - endpoint capabilities description + * @type_control:Endpoint supports control type (reserved for ep0). + * @type_iso:Endpoint supports isochronous transfers. + * @type_bulk:Endpoint supports bulk transfers. + * @type_int:Endpoint supports interrupt transfers. + * @dir_in:Endpoint supports IN direction. + * @dir_out:Endpoint supports OUT direction. + */ +struct usb_ep_caps { + unsigned type_control:1; + unsigned type_iso:1; + unsigned type_bulk:1; + unsigned type_int:1; + unsigned dir_in:1; + unsigned dir_out:1; +}; + /** * struct usb_ep - device side representation of USB endpoint * @name:identifier for the endpoint, such as "ep-a" or "ep9in-bulk" * @ops: Function pointers used to access hardware-specific operations. * @ep_list:the gadget's ep_list holds all of its endpoints + * @caps:The structure describing types and directions supported by endoint. * @maxpacket:The maximum packet size used on this endpoint. The initial * value can sometimes be reduced (hardware allowing), according to * the endpoint descriptor used to configure the endpoint. @@ -167,12 +186,14 @@ struct usb_ep_ops { * gadget->ep_list. the control endpoint (gadget->ep0) is not in that list, * and is accessed only in response to a driver setup() callback. */ + struct usb_ep { void *driver_data; const char *name; const struct usb_ep_ops *ops; struct list_head ep_list; + struct usb_ep_caps caps; bool claimed; unsigned maxpacket:16; unsigned maxpacket_limit:16; -- cgit v1.2.3-70-g09d2 From 80e6e3847f851fc05e63265050115e29e2a50d7e Mon Sep 17 00:00:00 2001 From: Robert Baldyga Date: Fri, 31 Jul 2015 16:00:15 +0200 Subject: usb: gadget: add endpoint capabilities helper macros Add macros useful while initializing array of endpoint capabilities structures. These macros makes structure initialization more compact to decrease number of code lines and increase readability of code. Signed-off-by: Robert Baldyga Signed-off-by: Felipe Balbi --- include/linux/usb/gadget.h | 20 ++++++++++++++++++++ 1 file changed, 20 insertions(+) (limited to 'include/linux') diff --git a/include/linux/usb/gadget.h b/include/linux/usb/gadget.h index a9a49593c239..82b5bcbd2c98 100644 --- a/include/linux/usb/gadget.h +++ b/include/linux/usb/gadget.h @@ -158,6 +158,26 @@ struct usb_ep_caps { unsigned dir_out:1; }; +#define USB_EP_CAPS_TYPE_CONTROL 0x01 +#define USB_EP_CAPS_TYPE_ISO 0x02 +#define USB_EP_CAPS_TYPE_BULK 0x04 +#define USB_EP_CAPS_TYPE_INT 0x08 +#define USB_EP_CAPS_TYPE_ALL \ + (USB_EP_CAPS_TYPE_ISO | USB_EP_CAPS_TYPE_BULK | USB_EP_CAPS_TYPE_INT) +#define USB_EP_CAPS_DIR_IN 0x01 +#define USB_EP_CAPS_DIR_OUT 0x02 +#define USB_EP_CAPS_DIR_ALL (USB_EP_CAPS_DIR_IN | USB_EP_CAPS_DIR_OUT) + +#define USB_EP_CAPS(_type, _dir) \ + { \ + .type_control = !!(_type & USB_EP_CAPS_TYPE_CONTROL), \ + .type_iso = !!(_type & USB_EP_CAPS_TYPE_ISO), \ + .type_bulk = !!(_type & USB_EP_CAPS_TYPE_BULK), \ + .type_int = !!(_type & USB_EP_CAPS_TYPE_INT), \ + .dir_in = !!(_dir & USB_EP_CAPS_DIR_IN), \ + .dir_out = !!(_dir & USB_EP_CAPS_DIR_OUT), \ + } + /** * struct usb_ep - device side representation of USB endpoint * @name:identifier for the endpoint, such as "ep-a" or "ep9in-bulk" -- cgit v1.2.3-70-g09d2 From 6abc8ca19df0078de17dc38340db3002ed489ce7 Mon Sep 17 00:00:00 2001 From: Tejun Heo Date: Tue, 4 Aug 2015 15:20:55 -0400 Subject: cgroup: define controller file conventions Traditionally, each cgroup controller implemented whatever interface it wanted leading to interfaces which are widely inconsistent. Examining the requirements of the controllers readily yield that there are only a few control schemes shared among all. Two major controllers already had to implement new interface for the unified hierarchy due to significant structural changes. Let's take the chance to establish common conventions throughout all controllers. This patch defines CGROUP_WEIGHT_MIN/DFL/MAX to be used on all weight based control knobs and documents the conventions that controllers should follow on the unified hierarchy. Except for io.weight knob, all existing unified hierarchy knobs are already compliant. A follow-up patch will update io.weight. v2: Added descriptions of min, low and high knobs. Signed-off-by: Tejun Heo Acked-by: Johannes Weiner Cc: Li Zefan Cc: Peter Zijlstra --- Documentation/cgroups/unified-hierarchy.txt | 80 ++++++++++++++++++++++++++--- include/linux/cgroup.h | 9 ++++ 2 files changed, 81 insertions(+), 8 deletions(-) (limited to 'include/linux') diff --git a/Documentation/cgroups/unified-hierarchy.txt b/Documentation/cgroups/unified-hierarchy.txt index 86847a7647ab..1ee9caf29e57 100644 --- a/Documentation/cgroups/unified-hierarchy.txt +++ b/Documentation/cgroups/unified-hierarchy.txt @@ -23,10 +23,13 @@ CONTENTS 5. Other Changes 5-1. [Un]populated Notification 5-2. Other Core Changes - 5-3. Per-Controller Changes - 5-3-1. blkio - 5-3-2. cpuset - 5-3-3. memory + 5-3. Controller File Conventions + 5-3-1. Format + 5-3-2. Control Knobs + 5-4. Per-Controller Changes + 5-4-1. blkio + 5-4-2. cpuset + 5-4-3. memory 6. Planned Changes 6-1. CAP for resource control @@ -372,14 +375,75 @@ supported and the interface files "release_agent" and - The "cgroup.clone_children" file is removed. -5-3. Per-Controller Changes +5-3. Controller File Conventions -5-3-1. blkio +5-3-1. Format + +In general, all controller files should be in one of the following +formats whenever possible. + +- Values only files + + VAL0 VAL1...\n + +- Flat keyed files + + KEY0 VAL0\n + KEY1 VAL1\n + ... + +- Nested keyed files + + KEY0 SUB_KEY0=VAL00 SUB_KEY1=VAL01... + KEY1 SUB_KEY0=VAL10 SUB_KEY1=VAL11... + ... + +For a writeable file, the format for writing should generally match +reading; however, controllers may allow omitting later fields or +implement restricted shortcuts for most common use cases. + +For both flat and nested keyed files, only the values for a single key +can be written at a time. For nested keyed files, the sub key pairs +may be specified in any order and not all pairs have to be specified. + + +5-3-2. Control Knobs + +- Settings for a single feature should generally be implemented in a + single file. + +- In general, the root cgroup should be exempt from resource control + and thus shouldn't have resource control knobs. + +- If a controller implements ratio based resource distribution, the + control knob should be named "weight" and have the range [1, 10000] + and 100 should be the default value. The values are chosen to allow + enough and symmetric bias in both directions while keeping it + intuitive (the default is 100%). + +- If a controller implements an absolute resource guarantee and/or + limit, the control knobs should be named "min" and "max" + respectively. If a controller implements best effort resource + gurantee and/or limit, the control knobs should be named "low" and + "high" respectively. + + In the above four control files, the special token "max" should be + used to represent upward infinity for both reading and writing. + +- If a setting has configurable default value and specific overrides, + the default settings should be keyed with "default" and appear as + the first entry in the file. Specific entries can use "default" as + its value to indicate inheritance of the default value. + + +5-4. Per-Controller Changes + +5-4-1. blkio - blk-throttle becomes properly hierarchical. -5-3-2. cpuset +5-4-2. cpuset - Tasks are kept in empty cpusets after hotplug and take on the masks of the nearest non-empty ancestor, instead of being moved to it. @@ -388,7 +452,7 @@ supported and the interface files "release_agent" and masks of the nearest non-empty ancestor. -5-3-3. memory +5-4-3. memory - use_hierarchy is on by default and the cgroup file for the flag is not created. diff --git a/include/linux/cgroup.h b/include/linux/cgroup.h index a593e299162e..c6bf9d30c270 100644 --- a/include/linux/cgroup.h +++ b/include/linux/cgroup.h @@ -22,6 +22,15 @@ #ifdef CONFIG_CGROUPS +/* + * All weight knobs on the default hierarhcy should use the following min, + * default and max values. The default value is the logarithmic center of + * MIN and MAX and allows 100x to be expressed in both directions. + */ +#define CGROUP_WEIGHT_MIN 1 +#define CGROUP_WEIGHT_DFL 100 +#define CGROUP_WEIGHT_MAX 10000 + /* a css_task_iter should be treated as an opaque object */ struct css_task_iter { struct cgroup_subsys *ss; -- cgit v1.2.3-70-g09d2 From 7f3884f7de89c49439fdaa115f6d1caec3256cc3 Mon Sep 17 00:00:00 2001 From: Nick Dyer Date: Tue, 4 Aug 2015 16:36:29 -0700 Subject: Input: atmel_mxt_ts - use deep sleep mode when stopped The hardcoded 0x83 CTRL setting overrides other settings in that byte, enabling extra reporting that may not be useful on a particular platform. Implement improved suspend mechanism via deep sleep. By writing zero to both the active and idle cycle times the maXTouch device can be put into a deep sleep mode, using minimal power. It is necessary to issue a calibrate command after the chip has spent any time in deep sleep, however a soft reset is unnecessary. Use the old method on Chromebook Pixel via platform data option. This patch also deals with the situation where the power configuration is zero on probe, which would mean that the device never wakes up to execute commands. After a config download, the T7 power configuration may have changed so it is necessary to re-read it. Signed-off-by: Nick Dyer Acked-by: Benson Leung Acked-by: Yufeng Shen Signed-off-by: Dmitry Torokhov --- drivers/input/touchscreen/atmel_mxt_ts.c | 119 ++++++++++++++++++++++++++--- drivers/platform/chrome/chromeos_laptop.c | 4 +- include/linux/i2c/atmel_mxt_ts.h | 25 ------ include/linux/platform_data/atmel_mxt_ts.h | 31 ++++++++ 4 files changed, 142 insertions(+), 37 deletions(-) delete mode 100644 include/linux/i2c/atmel_mxt_ts.h create mode 100644 include/linux/platform_data/atmel_mxt_ts.h (limited to 'include/linux') diff --git a/drivers/input/touchscreen/atmel_mxt_ts.c b/drivers/input/touchscreen/atmel_mxt_ts.c index 8efe7a002f1e..0e743b3a691b 100644 --- a/drivers/input/touchscreen/atmel_mxt_ts.c +++ b/drivers/input/touchscreen/atmel_mxt_ts.c @@ -22,7 +22,7 @@ #include #include #include -#include +#include #include #include #include @@ -103,9 +103,13 @@ #define MXT_T6_STATUS_COMSERR (1 << 2) /* MXT_GEN_POWER_T7 field */ -#define MXT_POWER_IDLEACQINT 0 -#define MXT_POWER_ACTVACQINT 1 -#define MXT_POWER_ACTV2IDLETO 2 +struct t7_config { + u8 idle; + u8 active; +} __packed; + +#define MXT_POWER_CFG_RUN 0 +#define MXT_POWER_CFG_DEEPSLEEP 1 /* MXT_GEN_ACQUIRE_T8 field */ #define MXT_ACQUIRE_CHRGTIME 0 @@ -117,7 +121,7 @@ #define MXT_ACQUIRE_ATCHCALSTHR 7 /* MXT_TOUCH_MULTI_T9 field */ -#define MXT_TOUCH_CTRL 0 +#define MXT_T9_CTRL 0 #define MXT_T9_ORIENT 9 #define MXT_T9_RANGE 18 @@ -291,6 +295,7 @@ struct mxt_data { u8 last_message_count; u8 num_touchids; u8 multitouch; + struct t7_config t7_cfg; /* Cached parameters from object table */ u16 T5_address; @@ -1361,6 +1366,8 @@ static int mxt_upload_cfg_mem(struct mxt_data *data, unsigned int cfg_start, return 0; } +static int mxt_init_t7_power_cfg(struct mxt_data *data); + /* * mxt_update_cfg - download configuration to chip * @@ -1508,6 +1515,9 @@ static int mxt_update_cfg(struct mxt_data *data, const struct firmware *cfg) dev_info(dev, "Config successfully updated\n"); + /* T7 config may have changed */ + mxt_init_t7_power_cfg(data); + release_mem: kfree(config_mem); return ret; @@ -2051,6 +2061,60 @@ err_free_object_table: return error; } +static int mxt_set_t7_power_cfg(struct mxt_data *data, u8 sleep) +{ + struct device *dev = &data->client->dev; + int error; + struct t7_config *new_config; + struct t7_config deepsleep = { .active = 0, .idle = 0 }; + + if (sleep == MXT_POWER_CFG_DEEPSLEEP) + new_config = &deepsleep; + else + new_config = &data->t7_cfg; + + error = __mxt_write_reg(data->client, data->T7_address, + sizeof(data->t7_cfg), new_config); + if (error) + return error; + + dev_dbg(dev, "Set T7 ACTV:%d IDLE:%d\n", + new_config->active, new_config->idle); + + return 0; +} + +static int mxt_init_t7_power_cfg(struct mxt_data *data) +{ + struct device *dev = &data->client->dev; + int error; + bool retry = false; + +recheck: + error = __mxt_read_reg(data->client, data->T7_address, + sizeof(data->t7_cfg), &data->t7_cfg); + if (error) + return error; + + if (data->t7_cfg.active == 0 || data->t7_cfg.idle == 0) { + if (!retry) { + dev_dbg(dev, "T7 cfg zero, resetting\n"); + mxt_soft_reset(data); + retry = true; + goto recheck; + } else { + dev_dbg(dev, "T7 cfg zero after reset, overriding\n"); + data->t7_cfg.active = 20; + data->t7_cfg.idle = 100; + return mxt_set_t7_power_cfg(data, MXT_POWER_CFG_RUN); + } + } + + dev_dbg(dev, "Initialized power cfg: ACTV %d, IDLE %d\n", + data->t7_cfg.active, data->t7_cfg.idle); + return 0; +} + static int mxt_configure_objects(struct mxt_data *data, const struct firmware *cfg) { @@ -2058,6 +2122,12 @@ static int mxt_configure_objects(struct mxt_data *data, struct mxt_info *info = &data->info; int error; + error = mxt_init_t7_power_cfg(data); + if (error) { + dev_err(dev, "Failed to initialize power cfg\n"); + return error; + } + if (cfg) { error = mxt_update_cfg(data, cfg); if (error) @@ -2346,14 +2416,41 @@ static const struct attribute_group mxt_attr_group = { static void mxt_start(struct mxt_data *data) { - /* Touch enable */ - mxt_write_object(data, data->multitouch, MXT_TOUCH_CTRL, 0x83); + switch (data->pdata->suspend_mode) { + case MXT_SUSPEND_T9_CTRL: + mxt_soft_reset(data); + + /* Touch enable */ + /* 0x83 = SCANEN | RPTEN | ENABLE */ + mxt_write_object(data, + MXT_TOUCH_MULTI_T9, MXT_T9_CTRL, 0x83); + break; + + case MXT_SUSPEND_DEEP_SLEEP: + default: + mxt_set_t7_power_cfg(data, MXT_POWER_CFG_RUN); + + /* Recalibrate since chip has been in deep sleep */ + mxt_t6_command(data, MXT_COMMAND_CALIBRATE, 1, false); + break; + } + } static void mxt_stop(struct mxt_data *data) { - /* Touch disable */ - mxt_write_object(data, data->multitouch, MXT_TOUCH_CTRL, 0); + switch (data->pdata->suspend_mode) { + case MXT_SUSPEND_T9_CTRL: + /* Touch disable */ + mxt_write_object(data, + MXT_TOUCH_MULTI_T9, MXT_T9_CTRL, 0); + break; + + case MXT_SUSPEND_DEEP_SLEEP: + default: + mxt_set_t7_power_cfg(data, MXT_POWER_CFG_DEEPSLEEP); + break; + } } static int mxt_input_open(struct input_dev *dev) @@ -2409,6 +2506,8 @@ static const struct mxt_platform_data *mxt_parse_dt(struct i2c_client *client) pdata->t19_keymap = keymap; } + pdata->suspend_mode = MXT_SUSPEND_DEEP_SLEEP; + return pdata; } #else @@ -2625,8 +2724,6 @@ static int __maybe_unused mxt_resume(struct device *dev) struct mxt_data *data = i2c_get_clientdata(client); struct input_dev *input_dev = data->input_dev; - mxt_soft_reset(data); - mutex_lock(&input_dev->mutex); if (input_dev->users) diff --git a/drivers/platform/chrome/chromeos_laptop.c b/drivers/platform/chrome/chromeos_laptop.c index a04019ab9feb..02072749fff3 100644 --- a/drivers/platform/chrome/chromeos_laptop.c +++ b/drivers/platform/chrome/chromeos_laptop.c @@ -23,7 +23,7 @@ #include #include -#include +#include #include #include #include @@ -111,6 +111,7 @@ static struct mxt_platform_data atmel_224s_tp_platform_data = { .irqflags = IRQF_TRIGGER_FALLING, .t19_num_keys = ARRAY_SIZE(mxt_t19_keys), .t19_keymap = mxt_t19_keys, + .suspend_mode = MXT_SUSPEND_T9_CTRL, }; static struct i2c_board_info atmel_224s_tp_device = { @@ -121,6 +122,7 @@ static struct i2c_board_info atmel_224s_tp_device = { static struct mxt_platform_data atmel_1664s_platform_data = { .irqflags = IRQF_TRIGGER_FALLING, + .suspend_mode = MXT_SUSPEND_T9_CTRL, }; static struct i2c_board_info atmel_1664s_device = { diff --git a/include/linux/i2c/atmel_mxt_ts.h b/include/linux/i2c/atmel_mxt_ts.h deleted file mode 100644 index 02bf6ea31701..000000000000 --- a/include/linux/i2c/atmel_mxt_ts.h +++ /dev/null @@ -1,25 +0,0 @@ -/* - * Atmel maXTouch Touchscreen driver - * - * Copyright (C) 2010 Samsung Electronics Co.Ltd - * Author: Joonyoung Shim - * - * This program is free software; you can redistribute it and/or modify it - * under the terms of the GNU General Public License as published by the - * Free Software Foundation; either version 2 of the License, or (at your - * option) any later version. - */ - -#ifndef __LINUX_ATMEL_MXT_TS_H -#define __LINUX_ATMEL_MXT_TS_H - -#include - -/* The platform data for the Atmel maXTouch touchscreen driver */ -struct mxt_platform_data { - unsigned long irqflags; - u8 t19_num_keys; - const unsigned int *t19_keymap; -}; - -#endif /* __LINUX_ATMEL_MXT_TS_H */ diff --git a/include/linux/platform_data/atmel_mxt_ts.h b/include/linux/platform_data/atmel_mxt_ts.h new file mode 100644 index 000000000000..695035a8d7fb --- /dev/null +++ b/include/linux/platform_data/atmel_mxt_ts.h @@ -0,0 +1,31 @@ +/* + * Atmel maXTouch Touchscreen driver + * + * Copyright (C) 2010 Samsung Electronics Co.Ltd + * Author: Joonyoung Shim + * + * This program is free software; you can redistribute it and/or modify it + * under the terms of the GNU General Public License as published by the + * Free Software Foundation; either version 2 of the License, or (at your + * option) any later version. + */ + +#ifndef __LINUX_PLATFORM_DATA_ATMEL_MXT_TS_H +#define __LINUX_PLATFORM_DATA_ATMEL_MXT_TS_H + +#include + +enum mxt_suspend_mode { + MXT_SUSPEND_DEEP_SLEEP = 0, + MXT_SUSPEND_T9_CTRL = 1, +}; + +/* The platform data for the Atmel maXTouch touchscreen driver */ +struct mxt_platform_data { + unsigned long irqflags; + u8 t19_num_keys; + const unsigned int *t19_keymap; + enum mxt_suspend_mode suspend_mode; +}; + +#endif /* __LINUX_PLATFORM_DATA_ATMEL_MXT_TS_H */ -- cgit v1.2.3-70-g09d2 From 056f6c87028544de934f27caf95aa1545d585767 Mon Sep 17 00:00:00 2001 From: Geert Uytterhoeven Date: Fri, 10 Jul 2015 12:07:25 +0200 Subject: dmaengine: shdma: Make dummy shdma_chan_filter() always return false If CONFIG_SH_DMAE_BASE (which is required for DMA engine support for legacy SH, SH/R-Mobile, and R-Car Gen1, but not for R-Car Gen2) is not enabled, but CONFIG_RCAR_DMAC (for R-Car Gen2 DMA engine support) is, and the DTS doesn't provide a "dmas" property for a device, dma_request_slave_channel_compat() incorrectly succeeds, and returns a DMA channel. However, when trying to use that DMA channel later, it fails with: rcar-dmac e6700000.dma-controller: rcar_dmac_prep_slave_sg: bad parameter: len=1, id=-22 (Fortunately most drivers can handle this failure, and fall back to PIO) The reason for this is that a NULL legacy filter function is used, which actually means "all channels are OK", not "do not match". If CONFIG_SH_DMAE_BASE is enabled (like in shmobile_defconfig, which supports other SoCs besides R-Car Gen2), shdma_chan_filter() correctly returns false, as no available channel on R-Car Gen2 matches a shdma-base channel. If the DTS does provide a "dmas" property, dma_request_slave_channel() succeeds, and legacy filter-based matching is not used. To fix this, change shdma_chan_filter from being NULL to a dummy function that always returns false, like is done on other platforms. Signed-off-by: Geert Uytterhoeven Signed-off-by: Vinod Koul --- include/linux/shdma-base.h | 5 ++++- 1 file changed, 4 insertions(+), 1 deletion(-) (limited to 'include/linux') diff --git a/include/linux/shdma-base.h b/include/linux/shdma-base.h index dd0ba502ccb3..d927647e6350 100644 --- a/include/linux/shdma-base.h +++ b/include/linux/shdma-base.h @@ -128,7 +128,10 @@ void shdma_cleanup(struct shdma_dev *sdev); #if IS_ENABLED(CONFIG_SH_DMAE_BASE) bool shdma_chan_filter(struct dma_chan *chan, void *arg); #else -#define shdma_chan_filter NULL +static inline bool shdma_chan_filter(struct dma_chan *chan, void *arg) +{ + return false; +} #endif #endif -- cgit v1.2.3-70-g09d2 From 8cd90e50d1408c65c355084b1c7f8f9085f49c6b Mon Sep 17 00:00:00 2001 From: Jun Nie Date: Fri, 31 Jul 2015 15:49:19 +0800 Subject: uart: pl011: Add support to ZTE ZX296702 uart Support ZTE uart with some registers differing offset. Probe as platform device for not AMBA IP ID is available on ZTE uart. Signed-off-by: Jun Nie Signed-off-by: Greg Kroah-Hartman --- drivers/tty/serial/Kconfig | 4 +- drivers/tty/serial/amba-pl011.c | 195 +++++++++++++++++++++++++++++++++++++--- include/linux/amba/serial.h | 14 +++ 3 files changed, 197 insertions(+), 16 deletions(-) (limited to 'include/linux') diff --git a/drivers/tty/serial/Kconfig b/drivers/tty/serial/Kconfig index 687b1ea294b7..ed299b9e6375 100644 --- a/drivers/tty/serial/Kconfig +++ b/drivers/tty/serial/Kconfig @@ -47,12 +47,12 @@ config SERIAL_AMBA_PL010_CONSOLE config SERIAL_AMBA_PL011 tristate "ARM AMBA PL011 serial port support" - depends on ARM_AMBA + depends on ARM_AMBA || SOC_ZX296702 select SERIAL_CORE help This selects the ARM(R) AMBA(R) PrimeCell PL011 UART. If you have an Integrator/PP2, Integrator/CP or Versatile platform, say Y or M - here. + here. Say Y or M if you have SOC_ZX296702. If unsure, say N. diff --git a/drivers/tty/serial/amba-pl011.c b/drivers/tty/serial/amba-pl011.c index 017443d092c1..2af09ab153b6 100644 --- a/drivers/tty/serial/amba-pl011.c +++ b/drivers/tty/serial/amba-pl011.c @@ -74,6 +74,10 @@ /* There is by now at least one vendor with differing details, so handle it */ struct vendor_data { unsigned int ifls; + unsigned int fr_busy; + unsigned int fr_dsr; + unsigned int fr_cts; + unsigned int fr_ri; unsigned int lcrh_tx; unsigned int lcrh_rx; u16 *reg_lut; @@ -127,6 +131,7 @@ static u16 arm_reg[] = { [REG_DMACR] = UART011_DMACR, }; +#ifdef CONFIG_ARM_AMBA static unsigned int get_fifosize_arm(struct amba_device *dev) { return amba_rev(dev) < 3 ? 16 : 32; @@ -134,6 +139,10 @@ static unsigned int get_fifosize_arm(struct amba_device *dev) static struct vendor_data vendor_arm = { .ifls = UART011_IFLS_RX4_8|UART011_IFLS_TX4_8, + .fr_busy = UART01x_FR_BUSY, + .fr_dsr = UART01x_FR_DSR, + .fr_cts = UART01x_FR_CTS, + .fr_ri = UART011_FR_RI, .lcrh_tx = REG_LCRH, .lcrh_rx = REG_LCRH, .reg_lut = arm_reg, @@ -144,8 +153,13 @@ static struct vendor_data vendor_arm = { .fixed_options = false, .get_fifosize = get_fifosize_arm, }; +#endif static struct vendor_data vendor_sbsa = { + .fr_busy = UART01x_FR_BUSY, + .fr_dsr = UART01x_FR_DSR, + .fr_cts = UART01x_FR_CTS, + .fr_ri = UART011_FR_RI, .reg_lut = arm_reg, .oversampling = false, .dma_threshold = false, @@ -154,6 +168,7 @@ static struct vendor_data vendor_sbsa = { .fixed_options = true, }; +#ifdef CONFIG_ARM_AMBA static u16 st_reg[] = { [REG_DR] = UART01x_DR, [REG_RSR] = UART01x_RSR, @@ -180,6 +195,10 @@ static unsigned int get_fifosize_st(struct amba_device *dev) static struct vendor_data vendor_st = { .ifls = UART011_IFLS_RX_HALF|UART011_IFLS_TX_HALF, + .fr_busy = UART01x_FR_BUSY, + .fr_dsr = UART01x_FR_DSR, + .fr_cts = UART01x_FR_CTS, + .fr_ri = UART011_FR_RI, .lcrh_tx = REG_LCRH, .lcrh_rx = REG_ST_LCRH_RX, .reg_lut = st_reg, @@ -190,6 +209,43 @@ static struct vendor_data vendor_st = { .fixed_options = false, .get_fifosize = get_fifosize_st, }; +#endif + +#ifdef CONFIG_SOC_ZX296702 +static u16 zte_reg[] = { + [REG_DR] = ZX_UART01x_DR, + [REG_RSR] = UART01x_RSR, + [REG_ST_DMAWM] = ST_UART011_DMAWM, + [REG_FR] = ZX_UART01x_FR, + [REG_ST_LCRH_RX] = ST_UART011_LCRH_RX, + [REG_ILPR] = UART01x_ILPR, + [REG_IBRD] = UART011_IBRD, + [REG_FBRD] = UART011_FBRD, + [REG_LCRH] = ZX_UART011_LCRH_TX, + [REG_CR] = ZX_UART011_CR, + [REG_IFLS] = ZX_UART011_IFLS, + [REG_IMSC] = ZX_UART011_IMSC, + [REG_RIS] = ZX_UART011_RIS, + [REG_MIS] = ZX_UART011_MIS, + [REG_ICR] = ZX_UART011_ICR, + [REG_DMACR] = ZX_UART011_DMACR, +}; + +static struct vendor_data vendor_zte = { + .ifls = UART011_IFLS_RX4_8|UART011_IFLS_TX4_8, + .fr_busy = ZX_UART01x_FR_BUSY, + .fr_dsr = ZX_UART01x_FR_DSR, + .fr_cts = ZX_UART01x_FR_CTS, + .fr_ri = ZX_UART011_FR_RI, + .lcrh_tx = REG_LCRH, + .lcrh_rx = REG_ST_LCRH_RX, + .reg_lut = zte_reg, + .oversampling = false, + .dma_threshold = false, + .cts_event_workaround = false, + .fixed_options = false, +}; +#endif /* Deals with DMA transactions */ @@ -233,6 +289,10 @@ struct uart_amba_port { unsigned int im; /* interrupt mask */ unsigned int old_status; unsigned int fifosize; /* vendor-specific */ + unsigned int fr_busy; /* vendor-specific */ + unsigned int fr_dsr; /* vendor-specific */ + unsigned int fr_cts; /* vendor-specific */ + unsigned int fr_ri; /* vendor-specific */ unsigned int lcrh_tx; /* vendor-specific */ unsigned int lcrh_rx; /* vendor-specific */ unsigned int old_cr; /* state during shutdown */ @@ -1163,7 +1223,7 @@ static void pl011_dma_shutdown(struct uart_amba_port *uap) return; /* Disable RX and TX DMA */ - while (pl011_readw(uap, REG_FR) & UART01x_FR_BUSY) + while (pl011_readw(uap, REG_FR) & uap->fr_busy) barrier(); spin_lock_irq(&uap->port.lock); @@ -1412,11 +1472,11 @@ static void pl011_modem_status(struct uart_amba_port *uap) if (delta & UART01x_FR_DCD) uart_handle_dcd_change(&uap->port, status & UART01x_FR_DCD); - if (delta & UART01x_FR_DSR) + if (delta & uap->fr_dsr) uap->port.icount.dsr++; - if (delta & UART01x_FR_CTS) - uart_handle_cts_change(&uap->port, status & UART01x_FR_CTS); + if (delta & uap->fr_cts) + uart_handle_cts_change(&uap->port, status & uap->fr_cts); wake_up_interruptible(&uap->port.state->port.delta_msr_wait); } @@ -1487,7 +1547,7 @@ static unsigned int pl011_tx_empty(struct uart_port *port) struct uart_amba_port *uap = container_of(port, struct uart_amba_port, port); unsigned int status = pl011_readw(uap, REG_FR); - return status & (UART01x_FR_BUSY|UART01x_FR_TXFF) ? 0 : TIOCSER_TEMT; + return status & (uap->fr_busy|UART01x_FR_TXFF) ? 0 : TIOCSER_TEMT; } static unsigned int pl011_get_mctrl(struct uart_port *port) @@ -1502,9 +1562,9 @@ static unsigned int pl011_get_mctrl(struct uart_port *port) result |= tiocmbit TIOCMBIT(UART01x_FR_DCD, TIOCM_CAR); - TIOCMBIT(UART01x_FR_DSR, TIOCM_DSR); - TIOCMBIT(UART01x_FR_CTS, TIOCM_CTS); - TIOCMBIT(UART011_FR_RI, TIOCM_RNG); + TIOCMBIT(uap->fr_dsr, TIOCM_DSR); + TIOCMBIT(uap->fr_cts, TIOCM_CTS); + TIOCMBIT(uap->fr_ri, TIOCM_RNG); #undef TIOCMBIT return result; } @@ -1720,8 +1780,7 @@ static int pl011_startup(struct uart_port *port) /* * initialise the old status of the modem signals */ - uap->old_status = pl011_readw(uap, REG_FR) & - UART01x_FR_MODEM_ANY; + uap->old_status = pl011_readw(uap, REG_FR) & UART01x_FR_MODEM_ANY; /* Startup DMA */ pl011_dma_startup(uap); @@ -1800,7 +1859,7 @@ static void pl011_disable_interrupts(struct uart_amba_port *uap) /* mask all interrupts and clear all pending ones */ uap->im = 0; pl011_writew(uap, uap->im, REG_IMSC); - pl011_writew(0xffff, REG_ICR); + pl011_writew(uap, 0xffff, REG_ICR); spin_unlock_irq(&uap->port.lock); } @@ -2178,7 +2237,7 @@ pl011_console_write(struct console *co, const char *s, unsigned int count) */ do { status = pl011_readw(uap, REG_FR); - } while (status & UART01x_FR_BUSY); + } while (status & uap->fr_busy); if (!uap->vendor->always_enabled) pl011_writew(uap, old_cr, REG_CR); @@ -2295,7 +2354,7 @@ static void pl011_putc(struct uart_port *port, int c) while (pl011_readw(uap, REG_FR) & UART01x_FR_TXFF) ; pl011_writeb(uap, c, REG_DR); - while (pl011_readw(uap, REG_FR) & UART01x_FR_BUSY) + while (pl011_readw(uap, REG_FR) & uap->fr_busy) ; } @@ -2441,6 +2500,7 @@ static int pl011_register_port(struct uart_amba_port *uap) return ret; } +#ifdef CONFIG_ARM_AMBA static int pl011_probe(struct amba_device *dev, const struct amba_id *id) { struct uart_amba_port *uap; @@ -2464,6 +2524,10 @@ static int pl011_probe(struct amba_device *dev, const struct amba_id *id) uap->reg_lut = vendor->reg_lut; uap->lcrh_rx = vendor->lcrh_rx; uap->lcrh_tx = vendor->lcrh_tx; + uap->fr_busy = vendor->fr_busy; + uap->fr_dsr = vendor->fr_dsr; + uap->fr_cts = vendor->fr_cts; + uap->fr_ri = vendor->fr_ri; uap->fifosize = vendor->get_fifosize(dev); uap->port.irq = dev->irq[0]; uap->port.ops = &amba_pl011_pops; @@ -2487,6 +2551,67 @@ static int pl011_remove(struct amba_device *dev) pl011_unregister_port(uap); return 0; } +#endif + +#ifdef CONFIG_SOC_ZX296702 +static int zx_uart_probe(struct platform_device *pdev) +{ + struct uart_amba_port *uap; + struct vendor_data *vendor = &vendor_zte; + struct resource *res; + int portnr, ret; + + portnr = pl011_find_free_port(); + if (portnr < 0) + return portnr; + + uap = devm_kzalloc(&pdev->dev, sizeof(struct uart_amba_port), + GFP_KERNEL); + if (!uap) { + ret = -ENOMEM; + goto out; + } + + uap->clk = devm_clk_get(&pdev->dev, NULL); + if (IS_ERR(uap->clk)) { + ret = PTR_ERR(uap->clk); + goto out; + } + + uap->vendor = vendor; + uap->reg_lut = vendor->reg_lut; + uap->lcrh_rx = vendor->lcrh_rx; + uap->lcrh_tx = vendor->lcrh_tx; + uap->fr_busy = vendor->fr_busy; + uap->fr_dsr = vendor->fr_dsr; + uap->fr_cts = vendor->fr_cts; + uap->fr_ri = vendor->fr_ri; + uap->fifosize = 16; + uap->port.irq = platform_get_irq(pdev, 0); + uap->port.ops = &amba_pl011_pops; + + res = platform_get_resource(pdev, IORESOURCE_MEM, 0); + + ret = pl011_setup_port(&pdev->dev, uap, res, portnr); + if (ret) + return ret; + + platform_set_drvdata(pdev, uap); + + return pl011_register_port(uap); +out: + return ret; +} + +static int zx_uart_remove(struct platform_device *pdev) +{ + struct uart_amba_port *uap = platform_get_drvdata(pdev); + + uart_remove_one_port(&amba_reg, &uap->port); + pl011_unregister_port(uap); + return 0; +} +#endif #ifdef CONFIG_PM_SLEEP static int pl011_suspend(struct device *dev) @@ -2544,6 +2669,10 @@ static int sbsa_uart_probe(struct platform_device *pdev) uap->vendor = &vendor_sbsa; uap->reg_lut = vendor_sbsa.reg_lut; + uap->fr_busy = vendor_sbsa.fr_busy; + uap->fr_dsr = vendor_sbsa.fr_dsr; + uap->fr_cts = vendor_sbsa.fr_cts; + uap->fr_ri = vendor_sbsa.fr_ri; uap->fifosize = 32; uap->port.irq = platform_get_irq(pdev, 0); uap->port.ops = &sbsa_uart_pops; @@ -2593,6 +2722,7 @@ static struct platform_driver arm_sbsa_uart_platform_driver = { }, }; +#ifdef CONFIG_ARM_AMBA static struct amba_id pl011_ids[] = { { .id = 0x00041011, @@ -2618,20 +2748,57 @@ static struct amba_driver pl011_driver = { .probe = pl011_probe, .remove = pl011_remove, }; +#endif + +#ifdef CONFIG_SOC_ZX296702 +static const struct of_device_id zx_uart_dt_ids[] = { + { .compatible = "zte,zx296702-uart", }, + { /* sentinel */ } +}; +MODULE_DEVICE_TABLE(of, zx_uart_dt_ids); + +static struct platform_driver zx_uart_driver = { + .driver = { + .name = "zx-uart", + .owner = THIS_MODULE, + .pm = &pl011_dev_pm_ops, + .of_match_table = zx_uart_dt_ids, + }, + .probe = zx_uart_probe, + .remove = zx_uart_remove, +}; +#endif + static int __init pl011_init(void) { + int ret; printk(KERN_INFO "Serial: AMBA PL011 UART driver\n"); if (platform_driver_register(&arm_sbsa_uart_platform_driver)) pr_warn("could not register SBSA UART platform driver\n"); - return amba_driver_register(&pl011_driver); + +#ifdef CONFIG_SOC_ZX296702 + ret = platform_driver_register(&zx_uart_driver); + if (ret) + pr_warn("could not register ZX UART platform driver\n"); +#endif + +#ifdef CONFIG_ARM_AMBA + ret = amba_driver_register(&pl011_driver); +#endif + return ret; } static void __exit pl011_exit(void) { platform_driver_unregister(&arm_sbsa_uart_platform_driver); +#ifdef CONFIG_SOC_ZX296702 + platform_driver_unregister(&zx_uart_driver); +#endif +#ifdef CONFIG_ARM_AMBA amba_driver_unregister(&pl011_driver); +#endif } /* diff --git a/include/linux/amba/serial.h b/include/linux/amba/serial.h index 0ddb5c02ad8b..6a0a89ed7f81 100644 --- a/include/linux/amba/serial.h +++ b/include/linux/amba/serial.h @@ -33,12 +33,14 @@ #define UART01x_DR 0x00 /* Data read or written from the interface. */ #define UART01x_RSR 0x04 /* Receive status register (Read). */ #define UART01x_ECR 0x04 /* Error clear register (Write). */ +#define ZX_UART01x_DR 0x04 /* Data read or written from the interface. */ #define UART010_LCRH 0x08 /* Line control register, high byte. */ #define ST_UART011_DMAWM 0x08 /* DMA watermark configure register. */ #define UART010_LCRM 0x0C /* Line control register, middle byte. */ #define ST_UART011_TIMEOUT 0x0C /* Timeout period register. */ #define UART010_LCRL 0x10 /* Line control register, low byte. */ #define UART010_CR 0x14 /* Control register. */ +#define ZX_UART01x_FR 0x14 /* Flag register (Read only). */ #define UART01x_FR 0x18 /* Flag register (Read only). */ #define UART010_IIR 0x1C /* Interrupt identification register (Read). */ #define UART010_ICR 0x1C /* Interrupt clear register (Write). */ @@ -49,13 +51,21 @@ #define UART011_LCRH 0x2c /* Line control register. */ #define ST_UART011_LCRH_TX 0x2c /* Tx Line control register. */ #define UART011_CR 0x30 /* Control register. */ +#define ZX_UART011_LCRH_TX 0x30 /* Tx Line control register. */ #define UART011_IFLS 0x34 /* Interrupt fifo level select. */ +#define ZX_UART011_CR 0x34 /* Control register. */ +#define ZX_UART011_IFLS 0x38 /* Interrupt fifo level select. */ #define UART011_IMSC 0x38 /* Interrupt mask. */ #define UART011_RIS 0x3c /* Raw interrupt status. */ #define UART011_MIS 0x40 /* Masked interrupt status. */ +#define ZX_UART011_IMSC 0x40 /* Interrupt mask. */ #define UART011_ICR 0x44 /* Interrupt clear register. */ +#define ZX_UART011_RIS 0x44 /* Raw interrupt status. */ #define UART011_DMACR 0x48 /* DMA control register. */ +#define ZX_UART011_MIS 0x48 /* Masked interrupt status. */ +#define ZX_UART011_ICR 0x4c /* Interrupt clear register. */ #define ST_UART011_XFCR 0x50 /* XON/XOFF control register. */ +#define ZX_UART011_DMACR 0x50 /* DMA control register. */ #define ST_UART011_XON1 0x54 /* XON1 register. */ #define ST_UART011_XON2 0x58 /* XON2 register. */ #define ST_UART011_XOFF1 0x5C /* XON1 register. */ @@ -75,15 +85,19 @@ #define UART01x_RSR_PE 0x02 #define UART01x_RSR_FE 0x01 +#define ZX_UART01x_FR_BUSY 0x300 #define UART011_FR_RI 0x100 #define UART011_FR_TXFE 0x080 #define UART011_FR_RXFF 0x040 #define UART01x_FR_TXFF 0x020 #define UART01x_FR_RXFE 0x010 #define UART01x_FR_BUSY 0x008 +#define ZX_UART01x_FR_DSR 0x008 #define UART01x_FR_DCD 0x004 #define UART01x_FR_DSR 0x002 +#define ZX_UART01x_FR_CTS 0x002 #define UART01x_FR_CTS 0x001 +#define ZX_UART011_FR_RI 0x001 #define UART01x_FR_TMSK (UART01x_FR_TXFF + UART01x_FR_BUSY) #define UART011_CR_CTSEN 0x8000 /* CTS hardware flow control */ -- cgit v1.2.3-70-g09d2 From 77a68e56aae141d3e9c740a0ac43362af75d4890 Mon Sep 17 00:00:00 2001 From: Maxime Ripard Date: Mon, 20 Jul 2015 10:41:32 +0200 Subject: dmaengine: Add an enum for the dmaengine alignment constraints Most drivers need to set constraints on the buffer alignment for async tx operations. However, even though it is documented, some drivers either use a defined constant that is not matching what the alignment variable expects (like DMA_BUSWIDTH_* constants) or fill the alignment in bytes instead of power of two. Add a new enum for these alignments that matches what the framework expects, and convert the drivers to it. Signed-off-by: Maxime Ripard Signed-off-by: Vinod Koul --- drivers/dma/coh901318.c | 2 +- drivers/dma/dma-jz4780.c | 2 +- drivers/dma/edma.c | 2 +- drivers/dma/imx-dma.c | 2 +- drivers/dma/k3dma.c | 3 +-- drivers/dma/mic_x100_dma.h | 2 +- drivers/dma/mmp_pdma.c | 3 +-- drivers/dma/mmp_tdma.c | 3 +-- drivers/dma/ste_dma40.c | 2 +- drivers/dma/sun6i-dma.c | 2 +- drivers/dma/xgene-dma.c | 5 ++--- include/linux/dmaengine.h | 25 ++++++++++++++++++++----- 12 files changed, 32 insertions(+), 21 deletions(-) (limited to 'include/linux') diff --git a/drivers/dma/coh901318.c b/drivers/dma/coh901318.c index fd22dd36985f..c340ca9bd2b5 100644 --- a/drivers/dma/coh901318.c +++ b/drivers/dma/coh901318.c @@ -2730,7 +2730,7 @@ static int __init coh901318_probe(struct platform_device *pdev) * This controller can only access address at even 32bit boundaries, * i.e. 2^2 */ - base->dma_memcpy.copy_align = 2; + base->dma_memcpy.copy_align = DMAENGINE_ALIGN_4_BYTES; err = dma_async_device_register(&base->dma_memcpy); if (err) diff --git a/drivers/dma/dma-jz4780.c b/drivers/dma/dma-jz4780.c index 26d2f0e09ea3..c29569ac9e4f 100644 --- a/drivers/dma/dma-jz4780.c +++ b/drivers/dma/dma-jz4780.c @@ -775,7 +775,7 @@ static int jz4780_dma_probe(struct platform_device *pdev) dma_cap_set(DMA_CYCLIC, dd->cap_mask); dd->dev = dev; - dd->copy_align = 2; /* 2^2 = 4 byte alignment */ + dd->copy_align = DMAENGINE_ALIGN_4_BYTES; dd->device_alloc_chan_resources = jz4780_dma_alloc_chan_resources; dd->device_free_chan_resources = jz4780_dma_free_chan_resources; dd->device_prep_slave_sg = jz4780_dma_prep_slave_sg; diff --git a/drivers/dma/edma.c b/drivers/dma/edma.c index 88853af69489..3e5d4f193005 100644 --- a/drivers/dma/edma.c +++ b/drivers/dma/edma.c @@ -1000,7 +1000,7 @@ static void edma_dma_init(struct edma_cc *ecc, struct dma_device *dma, * code using dma memcpy must make sure alignment of * length is at dma->copy_align boundary. */ - dma->copy_align = DMA_SLAVE_BUSWIDTH_4_BYTES; + dma->copy_align = DMAENGINE_ALIGN_4_BYTES; INIT_LIST_HEAD(&dma->channels); } diff --git a/drivers/dma/imx-dma.c b/drivers/dma/imx-dma.c index 139c5676cd74..48d85f8b95fe 100644 --- a/drivers/dma/imx-dma.c +++ b/drivers/dma/imx-dma.c @@ -1187,7 +1187,7 @@ static int __init imxdma_probe(struct platform_device *pdev) platform_set_drvdata(pdev, imxdma); - imxdma->dma_device.copy_align = 2; /* 2^2 = 4 bytes alignment */ + imxdma->dma_device.copy_align = DMAENGINE_ALIGN_4_BYTES; imxdma->dma_device.dev->dma_parms = &imxdma->dma_parms; dma_set_max_seg_size(imxdma->dma_device.dev, 0xffffff); diff --git a/drivers/dma/k3dma.c b/drivers/dma/k3dma.c index 647e362f01fd..1ba2fd73852d 100644 --- a/drivers/dma/k3dma.c +++ b/drivers/dma/k3dma.c @@ -24,7 +24,6 @@ #include "virt-dma.h" #define DRIVER_NAME "k3-dma" -#define DMA_ALIGN 3 #define DMA_MAX_SIZE 0x1ffc #define INT_STAT 0x00 @@ -732,7 +731,7 @@ static int k3_dma_probe(struct platform_device *op) d->slave.device_pause = k3_dma_transfer_pause; d->slave.device_resume = k3_dma_transfer_resume; d->slave.device_terminate_all = k3_dma_terminate_all; - d->slave.copy_align = DMA_ALIGN; + d->slave.copy_align = DMAENGINE_ALIGN_8_BYTES; /* init virtual channel */ d->chans = devm_kzalloc(&op->dev, diff --git a/drivers/dma/mic_x100_dma.h b/drivers/dma/mic_x100_dma.h index f663b0bdd11d..d89982034e68 100644 --- a/drivers/dma/mic_x100_dma.h +++ b/drivers/dma/mic_x100_dma.h @@ -39,7 +39,7 @@ */ #define MIC_DMA_MAX_NUM_CHAN 8 #define MIC_DMA_NUM_CHAN 4 -#define MIC_DMA_ALIGN_SHIFT 6 +#define MIC_DMA_ALIGN_SHIFT DMAENGINE_ALIGN_64_BYTES #define MIC_DMA_ALIGN_BYTES (1 << MIC_DMA_ALIGN_SHIFT) #define MIC_DMA_DESC_RX_SIZE (128 * 1024 - 4) diff --git a/drivers/dma/mmp_pdma.c b/drivers/dma/mmp_pdma.c index 462a0229a743..e39457f13d4d 100644 --- a/drivers/dma/mmp_pdma.c +++ b/drivers/dma/mmp_pdma.c @@ -72,7 +72,6 @@ #define DCMD_WIDTH4 (3 << 14) /* 4 byte width (Word) */ #define DCMD_LENGTH 0x01fff /* length mask (max = 8K - 1) */ -#define PDMA_ALIGNMENT 3 #define PDMA_MAX_DESC_BYTES DCMD_LENGTH struct mmp_pdma_desc_hw { @@ -1071,7 +1070,7 @@ static int mmp_pdma_probe(struct platform_device *op) pdev->device.device_issue_pending = mmp_pdma_issue_pending; pdev->device.device_config = mmp_pdma_config; pdev->device.device_terminate_all = mmp_pdma_terminate_all; - pdev->device.copy_align = PDMA_ALIGNMENT; + pdev->device.copy_align = DMAENGINE_ALIGN_8_BYTES; pdev->device.src_addr_widths = widths; pdev->device.dst_addr_widths = widths; pdev->device.directions = BIT(DMA_MEM_TO_DEV) | BIT(DMA_DEV_TO_MEM); diff --git a/drivers/dma/mmp_tdma.c b/drivers/dma/mmp_tdma.c index e683761e0f8f..3df0422607d5 100644 --- a/drivers/dma/mmp_tdma.c +++ b/drivers/dma/mmp_tdma.c @@ -100,7 +100,6 @@ enum mmp_tdma_type { PXA910_SQU, }; -#define TDMA_ALIGNMENT 3 #define TDMA_MAX_XFER_BYTES SZ_64K struct mmp_tdma_chan { @@ -695,7 +694,7 @@ static int mmp_tdma_probe(struct platform_device *pdev) tdev->device.device_pause = mmp_tdma_pause_chan; tdev->device.device_resume = mmp_tdma_resume_chan; tdev->device.device_terminate_all = mmp_tdma_terminate_all; - tdev->device.copy_align = TDMA_ALIGNMENT; + tdev->device.copy_align = DMAENGINE_ALIGN_8_BYTES; dma_set_mask(&pdev->dev, DMA_BIT_MASK(64)); platform_set_drvdata(pdev, tdev); diff --git a/drivers/dma/ste_dma40.c b/drivers/dma/ste_dma40.c index 3c10f034d4b9..750d1b313684 100644 --- a/drivers/dma/ste_dma40.c +++ b/drivers/dma/ste_dma40.c @@ -2853,7 +2853,7 @@ static void d40_ops_init(struct d40_base *base, struct dma_device *dev) * This controller can only access address at even * 32bit boundaries, i.e. 2^2 */ - dev->copy_align = 2; + dev->copy_align = DMAENGINE_ALIGN_4_BYTES; } if (dma_has_cap(DMA_SG, dev->cap_mask)) diff --git a/drivers/dma/sun6i-dma.c b/drivers/dma/sun6i-dma.c index 842ff97c2cfb..73e0be6e2100 100644 --- a/drivers/dma/sun6i-dma.c +++ b/drivers/dma/sun6i-dma.c @@ -969,7 +969,7 @@ static int sun6i_dma_probe(struct platform_device *pdev) sdc->slave.device_issue_pending = sun6i_dma_issue_pending; sdc->slave.device_prep_slave_sg = sun6i_dma_prep_slave_sg; sdc->slave.device_prep_dma_memcpy = sun6i_dma_prep_dma_memcpy; - sdc->slave.copy_align = 4; + sdc->slave.copy_align = DMAENGINE_ALIGN_4_BYTES; sdc->slave.device_config = sun6i_dma_config; sdc->slave.device_pause = sun6i_dma_pause; sdc->slave.device_resume = sun6i_dma_resume; diff --git a/drivers/dma/xgene-dma.c b/drivers/dma/xgene-dma.c index 620fd55ec766..fe87a634b145 100644 --- a/drivers/dma/xgene-dma.c +++ b/drivers/dma/xgene-dma.c @@ -150,7 +150,6 @@ #define XGENE_DMA_PQ_CHANNEL 1 #define XGENE_DMA_MAX_BYTE_CNT 0x4000 /* 16 KB */ #define XGENE_DMA_MAX_64B_DESC_BYTE_CNT 0x14000 /* 80 KB */ -#define XGENE_DMA_XOR_ALIGNMENT 6 /* 64 Bytes */ #define XGENE_DMA_MAX_XOR_SRC 5 #define XGENE_DMA_16K_BUFFER_LEN_CODE 0x0 #define XGENE_DMA_INVALID_LEN_CODE 0x7800000000000000ULL @@ -1740,13 +1739,13 @@ static void xgene_dma_set_caps(struct xgene_dma_chan *chan, if (dma_has_cap(DMA_XOR, dma_dev->cap_mask)) { dma_dev->device_prep_dma_xor = xgene_dma_prep_xor; dma_dev->max_xor = XGENE_DMA_MAX_XOR_SRC; - dma_dev->xor_align = XGENE_DMA_XOR_ALIGNMENT; + dma_dev->xor_align = DMAENGINE_ALIGN_64_BYTES; } if (dma_has_cap(DMA_PQ, dma_dev->cap_mask)) { dma_dev->device_prep_dma_pq = xgene_dma_prep_pq; dma_dev->max_pq = XGENE_DMA_MAX_XOR_SRC; - dma_dev->pq_align = XGENE_DMA_XOR_ALIGNMENT; + dma_dev->pq_align = DMAENGINE_ALIGN_64_BYTES; } } diff --git a/include/linux/dmaengine.h b/include/linux/dmaengine.h index e2f5eb419976..03ed832adbc2 100644 --- a/include/linux/dmaengine.h +++ b/include/linux/dmaengine.h @@ -584,6 +584,20 @@ struct dma_tx_state { u32 residue; }; +/** + * enum dmaengine_alignment - defines alignment of the DMA async tx + * buffers + */ +enum dmaengine_alignment { + DMAENGINE_ALIGN_1_BYTE = 0, + DMAENGINE_ALIGN_2_BYTES = 1, + DMAENGINE_ALIGN_4_BYTES = 2, + DMAENGINE_ALIGN_8_BYTES = 3, + DMAENGINE_ALIGN_16_BYTES = 4, + DMAENGINE_ALIGN_32_BYTES = 5, + DMAENGINE_ALIGN_64_BYTES = 6, +}; + /** * struct dma_device - info on the entity supplying DMA services * @chancnt: how many DMA channels are supported @@ -645,10 +659,10 @@ struct dma_device { dma_cap_mask_t cap_mask; unsigned short max_xor; unsigned short max_pq; - u8 copy_align; - u8 xor_align; - u8 pq_align; - u8 fill_align; + enum dmaengine_alignment copy_align; + enum dmaengine_alignment xor_align; + enum dmaengine_alignment pq_align; + enum dmaengine_alignment fill_align; #define DMA_HAS_PQ_CONTINUE (1 << 15) int dev_id; @@ -833,7 +847,8 @@ static inline dma_cookie_t dmaengine_submit(struct dma_async_tx_descriptor *desc return desc->tx_submit(desc); } -static inline bool dmaengine_check_align(u8 align, size_t off1, size_t off2, size_t len) +static inline bool dmaengine_check_align(enum dmaengine_alignment align, + size_t off1, size_t off2, size_t len) { size_t mask; -- cgit v1.2.3-70-g09d2 From 2b94ed245861a7d378dcde6eef7fa7717e06e349 Mon Sep 17 00:00:00 2001 From: Vitaly Kuznetsov Date: Sat, 1 Aug 2015 16:08:06 -0700 Subject: kexec: define kexec_in_progress in !CONFIG_KEXEC case If some piece of code wants to check kexec_in_progress it has to be put in #ifdef CONFIG_KEXEC block to not break the build in !CONFIG_KEXEC case. Overcome this limitation by defining kexec_in_progress to false. Signed-off-by: Vitaly Kuznetsov Signed-off-by: K. Y. Srinivasan Signed-off-by: Greg Kroah-Hartman --- include/linux/kexec.h | 1 + 1 file changed, 1 insertion(+) (limited to 'include/linux') diff --git a/include/linux/kexec.h b/include/linux/kexec.h index e804306ef5e8..b63218f68c4b 100644 --- a/include/linux/kexec.h +++ b/include/linux/kexec.h @@ -323,6 +323,7 @@ struct pt_regs; struct task_struct; static inline void crash_kexec(struct pt_regs *regs) { } static inline int kexec_should_crash(struct task_struct *p) { return 0; } +#define kexec_in_progress false #endif /* CONFIG_KEXEC */ #endif /* !defined(__ASSEBMLY__) */ -- cgit v1.2.3-70-g09d2 From 1a1d48a4a8fde49aedc045d894efe67173d59fe0 Mon Sep 17 00:00:00 2001 From: Denys Vlasenko Date: Tue, 4 Aug 2015 16:15:14 +0200 Subject: linux/bitmap: Force inlining of bitmap weight functions With this config: http://busybox.net/~vda/kernel_config_OPTIMIZE_INLINING_and_Os gcc-4.7.2 generates many copies of these tiny functions: bitmap_weight (55 copies): 55 push %rbp 48 89 e5 mov %rsp,%rbp e8 3f 3a 8b 00 callq __bitmap_weight 5d pop %rbp c3 retq hweight_long (23 copies): 55 push %rbp e8 b5 65 8e 00 callq __sw_hweight64 48 89 e5 mov %rsp,%rbp 5d pop %rbp c3 retq See https://gcc.gnu.org/bugzilla/show_bug.cgi?id=66122 This patch fixes this via s/inline/__always_inline/ While at it, replaced two "__inline__" with usual "inline" (the rest of the source file uses the latter). text data bss dec filename 86971357 17195880 36659200 140826437 vmlinux.before 86971120 17195912 36659200 140826232 vmlinux Signed-off-by: Denys Vlasenko Cc: Andrew Morton Cc: David Rientjes Cc: Linus Torvalds Cc: Peter Zijlstra Cc: Thomas Gleixner Cc: Thomas Graf Cc: linux-kernel@vger.kernel.org Link: http://lkml.kernel.org/r/1438697716-28121-1-git-send-email-dvlasenk@redhat.com Signed-off-by: Ingo Molnar --- include/linux/bitmap.h | 2 +- include/linux/bitops.h | 6 +++--- 2 files changed, 4 insertions(+), 4 deletions(-) (limited to 'include/linux') diff --git a/include/linux/bitmap.h b/include/linux/bitmap.h index ea17cca9e685..9653fdb76a42 100644 --- a/include/linux/bitmap.h +++ b/include/linux/bitmap.h @@ -295,7 +295,7 @@ static inline int bitmap_full(const unsigned long *src, unsigned int nbits) return find_first_zero_bit(src, nbits) == nbits; } -static inline int bitmap_weight(const unsigned long *src, unsigned int nbits) +static __always_inline int bitmap_weight(const unsigned long *src, unsigned int nbits) { if (small_const_nbits(nbits)) return hweight_long(*src & BITMAP_LAST_WORD_MASK(nbits)); diff --git a/include/linux/bitops.h b/include/linux/bitops.h index 297f5bda4fdf..e63553386ae7 100644 --- a/include/linux/bitops.h +++ b/include/linux/bitops.h @@ -57,7 +57,7 @@ extern unsigned long __sw_hweight64(__u64 w); (bit) < (size); \ (bit) = find_next_zero_bit((addr), (size), (bit) + 1)) -static __inline__ int get_bitmask_order(unsigned int count) +static inline int get_bitmask_order(unsigned int count) { int order; @@ -65,7 +65,7 @@ static __inline__ int get_bitmask_order(unsigned int count) return order; /* We could be slightly more clever with -1 here... */ } -static __inline__ int get_count_order(unsigned int count) +static inline int get_count_order(unsigned int count) { int order; @@ -75,7 +75,7 @@ static __inline__ int get_count_order(unsigned int count) return order; } -static inline unsigned long hweight_long(unsigned long w) +static __always_inline unsigned long hweight_long(unsigned long w) { return sizeof(w) == 4 ? hweight32(w) : hweight64(w); } -- cgit v1.2.3-70-g09d2 From accd0b9ec015d611eb7783dd86f1bb31bf8d62ab Mon Sep 17 00:00:00 2001 From: Denys Vlasenko Date: Tue, 4 Aug 2015 16:15:16 +0200 Subject: jiffies: Force inlining of {m,u}msecs_to_jiffies() With this config: http://busybox.net/~vda/kernel_config_OPTIMIZE_INLINING_and_Os gcc-4.7.2 generates many copies of these tiny functions: msecs_to_jiffies (45 copies): 55 push %rbp 48 89 e5 mov %rsp,%rbp e8 59 ec 03 00 callq __msecs_to_jiffies 5d pop %rbp c3 retq usecs_to_jiffies (10 copies): 55 push %rbp 48 89 e5 mov %rsp,%rbp e8 5d 54 5e ff callq __usecs_to_jiffies 5d pop %rbp c3 retq See https://gcc.gnu.org/bugzilla/show_bug.cgi?id=66122 This patch fixes this via s/inline/__always_inline/ text data bss dec filename 86970954 17195912 36659200 140826066 vmlinux.before 86966150 17195912 36659200 140821262 vmlinux Signed-off-by: Denys Vlasenko Cc: Andrew Morton Cc: David Rientjes Cc: Linus Torvalds Cc: Peter Zijlstra Cc: Thomas Gleixner Cc: Thomas Graf Cc: linux-kernel@vger.kernel.org Link: http://lkml.kernel.org/r/1438697716-28121-3-git-send-email-dvlasenk@redhat.com Signed-off-by: Ingo Molnar --- include/linux/jiffies.h | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) (limited to 'include/linux') diff --git a/include/linux/jiffies.h b/include/linux/jiffies.h index 535fd3bb1ba8..1ba48a18c1d7 100644 --- a/include/linux/jiffies.h +++ b/include/linux/jiffies.h @@ -351,7 +351,7 @@ static inline unsigned long _msecs_to_jiffies(const unsigned int m) * directly here and from __msecs_to_jiffies() in the case where * constant folding is not possible. */ -static inline unsigned long msecs_to_jiffies(const unsigned int m) +static __always_inline unsigned long msecs_to_jiffies(const unsigned int m) { if (__builtin_constant_p(m)) { if ((int)m < 0) @@ -405,7 +405,7 @@ static inline unsigned long _usecs_to_jiffies(const unsigned int u) * directly here and from __msecs_to_jiffies() in the case where * constant folding is not possible. */ -static inline unsigned long usecs_to_jiffies(const unsigned int u) +static __always_inline unsigned long usecs_to_jiffies(const unsigned int u) { if (__builtin_constant_p(u)) { if (u > jiffies_to_usecs(MAX_JIFFY_OFFSET)) -- cgit v1.2.3-70-g09d2 From 3f3af97d8225a58ecdcde7217c030b17e5198226 Mon Sep 17 00:00:00 2001 From: David Howells Date: Wed, 5 Aug 2015 12:54:46 +0100 Subject: ASN.1: Fix actions on CHOICE elements with IMPLICIT tags In an ASN.1 description where there is a CHOICE construct that contains elements with IMPLICIT tags that refer to constructed types, actions to be taken on those elements should be conditional on the corresponding element actually being matched. Currently, however, such actions are performed unconditionally in the middle of processing the CHOICE. For example, look at elements 'b' and 'e' here: A ::= SEQUENCE { CHOICE { b [0] IMPLICIT B ({ do_XXXXXXXXXXXX_b }), c [1] EXPLICIT C ({ do_XXXXXXXXXXXX_c }), d [2] EXPLICIT B ({ do_XXXXXXXXXXXX_d }), e [3] IMPLICIT C ({ do_XXXXXXXXXXXX_e }), f [4] IMPLICIT INTEGER ({ do_XXXXXXXXXXXX_f }) } } ({ do_XXXXXXXXXXXX_A }) B ::= SET OF OBJECT IDENTIFIER ({ do_XXXXXXXXXXXX_oid }) C ::= SET OF INTEGER ({ do_XXXXXXXXXXXX_int }) They each have an action (do_XXXXXXXXXXXX_b and do_XXXXXXXXXXXX_e) that should only be processed if that element is matched. The problem is that there's no easy place to hang the action off in the subclause (type B for element 'b' and type C for element 'e') because subclause opcode sequences can be shared. To fix this, introduce a conditional action opcode(ASN1_OP_MAYBE_ACT) that the decoder only processes if the preceding match was successful. This can be seen in an excerpt from the output of the fixed ASN.1 compiler for the above ASN.1 description: [ 13] = ASN1_OP_COND_MATCH_JUMP_OR_SKIP, // e [ 14] = _tagn(CONT, CONS, 3), [ 15] = _jump_target(45), // --> C [ 16] = ASN1_OP_MAYBE_ACT, [ 17] = _action(ACT_do_XXXXXXXXXXXX_e), In this, if the op at [13] is matched (ie. element 'e' above) then the action at [16] will be performed. However, if the op at [13] doesn't match or is skipped because it is conditional and some previous op matched, then the action at [16] will be ignored. Note that to make this work in the decoder, the ASN1_OP_RETURN op must set the flag to indicate that a match happened. This is necessary because the _jump_target() seen above introduces a subclause (in this case an object of type 'C') which is likely to alter the flag. Setting the flag here is okay because to process a subclause, a match must have happened and caused a jump. This cannot be tested with the code as it stands, but rather affects future code. Signed-off-by: David Howells Reviewed-by: David Woodhouse --- include/linux/asn1_ber_bytecode.h | 3 ++- lib/asn1_decoder.c | 14 +++++++++++++- scripts/asn1_compiler.c | 3 ++- 3 files changed, 17 insertions(+), 3 deletions(-) (limited to 'include/linux') diff --git a/include/linux/asn1_ber_bytecode.h b/include/linux/asn1_ber_bytecode.h index 945d44ae529c..27f35780aecf 100644 --- a/include/linux/asn1_ber_bytecode.h +++ b/include/linux/asn1_ber_bytecode.h @@ -61,7 +61,8 @@ enum asn1_opcode { ASN1_OP_COND_FAIL = 0x1b, ASN1_OP_COMPLETE = 0x1c, ASN1_OP_ACT = 0x1d, - ASN1_OP_RETURN = 0x1e, + ASN1_OP_MAYBE_ACT = 0x1e, + ASN1_OP_RETURN = 0x1f, /* The following eight have bit 0 -> SET, 1 -> OF, 2 -> ACT */ ASN1_OP_END_SEQ = 0x20, diff --git a/lib/asn1_decoder.c b/lib/asn1_decoder.c index 1a000bb050f9..55980d7e1ac0 100644 --- a/lib/asn1_decoder.c +++ b/lib/asn1_decoder.c @@ -33,6 +33,7 @@ static const unsigned char asn1_op_lengths[ASN1_OP__NR] = { [ASN1_OP_COND_FAIL] = 1, [ASN1_OP_COMPLETE] = 1, [ASN1_OP_ACT] = 1 + 1, + [ASN1_OP_MAYBE_ACT] = 1 + 1, [ASN1_OP_RETURN] = 1, [ASN1_OP_END_SEQ] = 1, [ASN1_OP_END_SEQ_OF] = 1 + 1, @@ -177,6 +178,7 @@ int asn1_ber_decoder(const struct asn1_decoder *decoder, unsigned char flags = 0; #define FLAG_INDEFINITE_LENGTH 0x01 #define FLAG_MATCHED 0x02 +#define FLAG_LAST_MATCHED 0x04 /* Last tag matched */ #define FLAG_CONS 0x20 /* Corresponds to CONS bit in the opcode tag * - ie. whether or not we are going to parse * a compound type. @@ -211,6 +213,7 @@ next_op: if ((op & ASN1_OP_MATCH__COND && flags & FLAG_MATCHED) || dp == datalen) { + flags &= ~FLAG_LAST_MATCHED; pc += asn1_op_lengths[op]; goto next_op; } @@ -422,8 +425,15 @@ next_op: pc += asn1_op_lengths[op]; goto next_op; + case ASN1_OP_MAYBE_ACT: + if (!(flags & FLAG_LAST_MATCHED)) { + pc += asn1_op_lengths[op]; + goto next_op; + } case ASN1_OP_ACT: ret = actions[machine[pc + 1]](context, hdr, tag, data + tdp, len); + if (ret < 0) + return ret; pc += asn1_op_lengths[op]; goto next_op; @@ -431,6 +441,7 @@ next_op: if (unlikely(jsp <= 0)) goto jump_stack_underflow; pc = jump_stack[--jsp]; + flags |= FLAG_MATCHED | FLAG_LAST_MATCHED; goto next_op; default: @@ -438,7 +449,8 @@ next_op: } /* Shouldn't reach here */ - pr_err("ASN.1 decoder error: Found reserved opcode (%u)\n", op); + pr_err("ASN.1 decoder error: Found reserved opcode (%u) pc=%zu\n", + op, pc); return -EBADMSG; data_overrun_error: diff --git a/scripts/asn1_compiler.c b/scripts/asn1_compiler.c index e87359cd23c0..0515bced929a 100644 --- a/scripts/asn1_compiler.c +++ b/scripts/asn1_compiler.c @@ -1468,7 +1468,8 @@ dont_render_tag: case TYPE_REF: render_element(out, e->type->type->element, tag); if (e->action) - render_opcode(out, "ASN1_OP_ACT,\n"); + render_opcode(out, "ASN1_OP_%sACT,\n", + skippable ? "MAYBE_" : ""); break; case SEQUENCE: -- cgit v1.2.3-70-g09d2 From 233ce79db4b23a174bcf30bde5d6ad913d5f46d3 Mon Sep 17 00:00:00 2001 From: David Howells Date: Wed, 5 Aug 2015 12:54:46 +0100 Subject: ASN.1: Handle 'ANY OPTIONAL' in grammar An ANY object in an ASN.1 grammar that is marked OPTIONAL should be skipped if there is no more data to be had. This can be tested by editing X.509 certificates or PKCS#7 messages to remove the NULL from subobjects that look like the following: SEQUENCE { OBJECT(2a864886f70d01010b); NULL(); } This is an algorithm identifier plus an optional parameter. The modified DER can be passed to one of: keyctl padd asymmetric "" @s Tested-by: Marcel Holtmann Reviewed-by: David Woodhouse --- include/linux/asn1_ber_bytecode.h | 17 +++++++++++------ lib/asn1_decoder.c | 8 ++++++++ scripts/asn1_compiler.c | 3 ++- 3 files changed, 21 insertions(+), 7 deletions(-) (limited to 'include/linux') diff --git a/include/linux/asn1_ber_bytecode.h b/include/linux/asn1_ber_bytecode.h index 27f35780aecf..ab3a6c002f7b 100644 --- a/include/linux/asn1_ber_bytecode.h +++ b/include/linux/asn1_ber_bytecode.h @@ -45,24 +45,27 @@ enum asn1_opcode { ASN1_OP_MATCH_JUMP = 0x04, ASN1_OP_MATCH_JUMP_OR_SKIP = 0x05, ASN1_OP_MATCH_ANY = 0x08, + ASN1_OP_MATCH_ANY_OR_SKIP = 0x09, ASN1_OP_MATCH_ANY_ACT = 0x0a, + ASN1_OP_MATCH_ANY_ACT_OR_SKIP = 0x0b, /* Everything before here matches unconditionally */ ASN1_OP_COND_MATCH_OR_SKIP = 0x11, ASN1_OP_COND_MATCH_ACT_OR_SKIP = 0x13, ASN1_OP_COND_MATCH_JUMP_OR_SKIP = 0x15, ASN1_OP_COND_MATCH_ANY = 0x18, + ASN1_OP_COND_MATCH_ANY_OR_SKIP = 0x19, ASN1_OP_COND_MATCH_ANY_ACT = 0x1a, + ASN1_OP_COND_MATCH_ANY_ACT_OR_SKIP = 0x1b, /* Everything before here will want a tag from the data */ -#define ASN1_OP__MATCHES_TAG ASN1_OP_COND_MATCH_ANY_ACT +#define ASN1_OP__MATCHES_TAG ASN1_OP_COND_MATCH_ANY_ACT_OR_SKIP /* These are here to help fill up space */ - ASN1_OP_COND_FAIL = 0x1b, - ASN1_OP_COMPLETE = 0x1c, - ASN1_OP_ACT = 0x1d, - ASN1_OP_MAYBE_ACT = 0x1e, - ASN1_OP_RETURN = 0x1f, + ASN1_OP_COND_FAIL = 0x1c, + ASN1_OP_COMPLETE = 0x1d, + ASN1_OP_ACT = 0x1e, + ASN1_OP_MAYBE_ACT = 0x1f, /* The following eight have bit 0 -> SET, 1 -> OF, 2 -> ACT */ ASN1_OP_END_SEQ = 0x20, @@ -77,6 +80,8 @@ enum asn1_opcode { #define ASN1_OP_END__OF 0x02 #define ASN1_OP_END__ACT 0x04 + ASN1_OP_RETURN = 0x28, + ASN1_OP__NR }; diff --git a/lib/asn1_decoder.c b/lib/asn1_decoder.c index 3f74dd3e2910..2b3f46c049d4 100644 --- a/lib/asn1_decoder.c +++ b/lib/asn1_decoder.c @@ -24,12 +24,16 @@ static const unsigned char asn1_op_lengths[ASN1_OP__NR] = { [ASN1_OP_MATCH_JUMP] = 1 + 1 + 1, [ASN1_OP_MATCH_JUMP_OR_SKIP] = 1 + 1 + 1, [ASN1_OP_MATCH_ANY] = 1, + [ASN1_OP_MATCH_ANY_OR_SKIP] = 1, [ASN1_OP_MATCH_ANY_ACT] = 1 + 1, + [ASN1_OP_MATCH_ANY_ACT_OR_SKIP] = 1 + 1, [ASN1_OP_COND_MATCH_OR_SKIP] = 1 + 1, [ASN1_OP_COND_MATCH_ACT_OR_SKIP] = 1 + 1 + 1, [ASN1_OP_COND_MATCH_JUMP_OR_SKIP] = 1 + 1 + 1, [ASN1_OP_COND_MATCH_ANY] = 1, + [ASN1_OP_COND_MATCH_ANY_OR_SKIP] = 1, [ASN1_OP_COND_MATCH_ANY_ACT] = 1 + 1, + [ASN1_OP_COND_MATCH_ANY_ACT_OR_SKIP] = 1 + 1, [ASN1_OP_COND_FAIL] = 1, [ASN1_OP_COMPLETE] = 1, [ASN1_OP_ACT] = 1 + 1, @@ -304,7 +308,9 @@ next_op: /* Decide how to handle the operation */ switch (op) { case ASN1_OP_MATCH_ANY_ACT: + case ASN1_OP_MATCH_ANY_ACT_OR_SKIP: case ASN1_OP_COND_MATCH_ANY_ACT: + case ASN1_OP_COND_MATCH_ANY_ACT_OR_SKIP: ret = actions[machine[pc + 1]](context, hdr, tag, data + dp, len); if (ret < 0) return ret; @@ -321,8 +327,10 @@ next_op: case ASN1_OP_MATCH: case ASN1_OP_MATCH_OR_SKIP: case ASN1_OP_MATCH_ANY: + case ASN1_OP_MATCH_ANY_OR_SKIP: case ASN1_OP_COND_MATCH_OR_SKIP: case ASN1_OP_COND_MATCH_ANY: + case ASN1_OP_COND_MATCH_ANY_OR_SKIP: skip_data: if (!(flags & FLAG_CONS)) { if (flags & FLAG_INDEFINITE_LENGTH) { diff --git a/scripts/asn1_compiler.c b/scripts/asn1_compiler.c index 0515bced929a..1c75e22b6385 100644 --- a/scripts/asn1_compiler.c +++ b/scripts/asn1_compiler.c @@ -1401,7 +1401,8 @@ static void render_element(FILE *out, struct element *e, struct element *tag) act = e->action ? "_ACT" : ""; switch (e->compound) { case ANY: - render_opcode(out, "ASN1_OP_%sMATCH_ANY%s,", cond, act); + render_opcode(out, "ASN1_OP_%sMATCH_ANY%s%s,", + cond, act, skippable ? "_OR_SKIP" : ""); if (e->name) render_more(out, "\t\t// %*.*s", (int)e->name->size, (int)e->name->size, -- cgit v1.2.3-70-g09d2 From 9f55eb92441883a1afca48dc8d32bf62c4d8e833 Mon Sep 17 00:00:00 2001 From: Fugang Duan Date: Tue, 28 Jul 2015 15:30:39 +0800 Subject: ARM: imx6ul: add fec bits to GPR syscon definition FEC requires additional bits to select refrence clock. Signed-off-by: Fugang Duan Signed-off-by: Shawn Guo --- include/linux/mfd/syscon/imx6q-iomuxc-gpr.h | 8 ++++++++ 1 file changed, 8 insertions(+) (limited to 'include/linux') diff --git a/include/linux/mfd/syscon/imx6q-iomuxc-gpr.h b/include/linux/mfd/syscon/imx6q-iomuxc-gpr.h index d16f4c82c568..558a485d03ab 100644 --- a/include/linux/mfd/syscon/imx6q-iomuxc-gpr.h +++ b/include/linux/mfd/syscon/imx6q-iomuxc-gpr.h @@ -435,4 +435,12 @@ #define IMX6SX_GPR5_DISP_MUX_DCIC1_LVDS (0x1 << 1) #define IMX6SX_GPR5_DISP_MUX_DCIC1_MASK (0x1 << 1) +/* For imx6ul iomux gpr register field define */ +#define IMX6UL_GPR1_ENET1_CLK_DIR (0x1 << 17) +#define IMX6UL_GPR1_ENET2_CLK_DIR (0x1 << 18) +#define IMX6UL_GPR1_ENET1_CLK_OUTPUT (0x1 << 17) +#define IMX6UL_GPR1_ENET2_CLK_OUTPUT (0x1 << 18) +#define IMX6UL_GPR1_ENET_CLK_DIR (0x3 << 17) +#define IMX6UL_GPR1_ENET_CLK_OUTPUT (0x3 << 17) + #endif /* __LINUX_IMX6Q_IOMUXC_GPR_H */ -- cgit v1.2.3-70-g09d2 From 44e259ac909f3b41786cf732a44b5cf8444e098a Mon Sep 17 00:00:00 2001 From: Russell King Date: Wed, 15 Jul 2015 19:59:36 +0100 Subject: ARM: dove: create a proper PMU driver for power domains, PMU IRQs and resets The PMU device contains an interrupt controller, power control and resets. The interrupt controller is a little sub-standard in that there is no race free way to clear down pending interrupts, so we try to avoid problems by reducing the window as much as possible, and clearing as infrequently as possible. The interrupt support is implemented using an IRQ domain, and the parent interrupt referenced in the standard DT way. The power domains and reset support is closely related - there is a defined sequence for powering down a domain which is tightly coupled with asserting the reset. Hence, it makes sense to group these two together, and in order to avoid any locking contention disrupting this sequence, we avoid the use of syscon or regmap. This patch adds the core PMU driver: power domains must be defined in the DT file in order to make use of them. The reset controller can be referenced in the standard way for reset controllers. Signed-off-by: Russell King Signed-off-by: Andrew Lunn Signed-off-by: Gregory CLEMENT --- arch/arm/mach-mvebu/Kconfig | 1 + arch/arm/mach-mvebu/dove.c | 2 + drivers/soc/Makefile | 1 + drivers/soc/dove/Makefile | 1 + drivers/soc/dove/pmu.c | 412 +++++++++++++++++++++++++++++++++++++++++++ include/linux/soc/dove/pmu.h | 6 + 6 files changed, 423 insertions(+) create mode 100644 drivers/soc/dove/Makefile create mode 100644 drivers/soc/dove/pmu.c create mode 100644 include/linux/soc/dove/pmu.h (limited to 'include/linux') diff --git a/arch/arm/mach-mvebu/Kconfig b/arch/arm/mach-mvebu/Kconfig index 97473168d6b6..c86a5a0aefac 100644 --- a/arch/arm/mach-mvebu/Kconfig +++ b/arch/arm/mach-mvebu/Kconfig @@ -96,6 +96,7 @@ config MACH_DOVE select MACH_MVEBU_ANY select ORION_IRQCHIP select ORION_TIMER + select PM_GENERIC_DOMAINS if PM select PINCTRL_DOVE help Say 'Y' here if you want your kernel to support the diff --git a/arch/arm/mach-mvebu/dove.c b/arch/arm/mach-mvebu/dove.c index 5a1741500a30..1aebb82e3d7b 100644 --- a/arch/arm/mach-mvebu/dove.c +++ b/arch/arm/mach-mvebu/dove.c @@ -12,6 +12,7 @@ #include #include #include +#include #include #include #include "common.h" @@ -24,6 +25,7 @@ static void __init dove_init(void) tauros2_init(0); #endif BUG_ON(mvebu_mbus_dt_init(false)); + dove_init_pmu(); of_platform_populate(NULL, of_default_bus_match_table, NULL, NULL); } diff --git a/drivers/soc/Makefile b/drivers/soc/Makefile index 7dc7c0d8a2c1..0b12d777d3c4 100644 --- a/drivers/soc/Makefile +++ b/drivers/soc/Makefile @@ -2,6 +2,7 @@ # Makefile for the Linux Kernel SOC specific device drivers. # +obj-$(CONFIG_MACH_DOVE) += dove/ obj-$(CONFIG_ARCH_MEDIATEK) += mediatek/ obj-$(CONFIG_ARCH_QCOM) += qcom/ obj-$(CONFIG_ARCH_SUNXI) += sunxi/ diff --git a/drivers/soc/dove/Makefile b/drivers/soc/dove/Makefile new file mode 100644 index 000000000000..2db8e65513a3 --- /dev/null +++ b/drivers/soc/dove/Makefile @@ -0,0 +1 @@ +obj-y += pmu.o diff --git a/drivers/soc/dove/pmu.c b/drivers/soc/dove/pmu.c new file mode 100644 index 000000000000..6792aae9e2e5 --- /dev/null +++ b/drivers/soc/dove/pmu.c @@ -0,0 +1,412 @@ +/* + * Marvell Dove PMU support + */ +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include + +#define NR_PMU_IRQS 7 + +#define PMC_SW_RST 0x30 +#define PMC_IRQ_CAUSE 0x50 +#define PMC_IRQ_MASK 0x54 + +#define PMU_PWR 0x10 +#define PMU_ISO 0x58 + +struct pmu_data { + spinlock_t lock; + struct device_node *of_node; + void __iomem *pmc_base; + void __iomem *pmu_base; + struct irq_chip_generic *irq_gc; + struct irq_domain *irq_domain; +#ifdef CONFIG_RESET_CONTROLLER + struct reset_controller_dev reset; +#endif +}; + +/* + * The PMU contains a register to reset various subsystems within the + * SoC. Export this as a reset controller. + */ +#ifdef CONFIG_RESET_CONTROLLER +#define rcdev_to_pmu(rcdev) container_of(rcdev, struct pmu_data, reset) + +static int pmu_reset_reset(struct reset_controller_dev *rc, unsigned long id) +{ + struct pmu_data *pmu = rcdev_to_pmu(rc); + unsigned long flags; + u32 val; + + spin_lock_irqsave(&pmu->lock, flags); + val = readl_relaxed(pmu->pmc_base + PMC_SW_RST); + writel_relaxed(val & ~BIT(id), pmu->pmc_base + PMC_SW_RST); + writel_relaxed(val | BIT(id), pmu->pmc_base + PMC_SW_RST); + spin_unlock_irqrestore(&pmu->lock, flags); + + return 0; +} + +static int pmu_reset_assert(struct reset_controller_dev *rc, unsigned long id) +{ + struct pmu_data *pmu = rcdev_to_pmu(rc); + unsigned long flags; + u32 val = ~BIT(id); + + spin_lock_irqsave(&pmu->lock, flags); + val &= readl_relaxed(pmu->pmc_base + PMC_SW_RST); + writel_relaxed(val, pmu->pmc_base + PMC_SW_RST); + spin_unlock_irqrestore(&pmu->lock, flags); + + return 0; +} + +static int pmu_reset_deassert(struct reset_controller_dev *rc, unsigned long id) +{ + struct pmu_data *pmu = rcdev_to_pmu(rc); + unsigned long flags; + u32 val = BIT(id); + + spin_lock_irqsave(&pmu->lock, flags); + val |= readl_relaxed(pmu->pmc_base + PMC_SW_RST); + writel_relaxed(val, pmu->pmc_base + PMC_SW_RST); + spin_unlock_irqrestore(&pmu->lock, flags); + + return 0; +} + +static struct reset_control_ops pmu_reset_ops = { + .reset = pmu_reset_reset, + .assert = pmu_reset_assert, + .deassert = pmu_reset_deassert, +}; + +static struct reset_controller_dev pmu_reset __initdata = { + .ops = &pmu_reset_ops, + .owner = THIS_MODULE, + .nr_resets = 32, +}; + +static void __init pmu_reset_init(struct pmu_data *pmu) +{ + int ret; + + pmu->reset = pmu_reset; + pmu->reset.of_node = pmu->of_node; + + ret = reset_controller_register(&pmu->reset); + if (ret) + pr_err("pmu: %s failed: %d\n", "reset_controller_register", ret); +} +#else +static void __init pmu_reset_init(struct pmu_data *pmu) +{ +} +#endif + +struct pmu_domain { + struct pmu_data *pmu; + u32 pwr_mask; + u32 rst_mask; + u32 iso_mask; + struct generic_pm_domain base; +}; + +#define to_pmu_domain(dom) container_of(dom, struct pmu_domain, base) + +/* + * This deals with the "old" Marvell sequence of bringing a power domain + * down/up, which is: apply power, release reset, disable isolators. + * + * Later devices apparantly use a different sequence: power up, disable + * isolators, assert repair signal, enable SRMA clock, enable AXI clock, + * enable module clock, deassert reset. + * + * Note: reading the assembly, it seems that the IO accessors have an + * unfortunate side-effect - they cause memory already read into registers + * for the if () to be re-read for the bit-set or bit-clear operation. + * The code is written to avoid this. + */ +static int pmu_domain_power_off(struct generic_pm_domain *domain) +{ + struct pmu_domain *pmu_dom = to_pmu_domain(domain); + struct pmu_data *pmu = pmu_dom->pmu; + unsigned long flags; + unsigned int val; + void __iomem *pmu_base = pmu->pmu_base; + void __iomem *pmc_base = pmu->pmc_base; + + spin_lock_irqsave(&pmu->lock, flags); + + /* Enable isolators */ + if (pmu_dom->iso_mask) { + val = ~pmu_dom->iso_mask; + val &= readl_relaxed(pmu_base + PMU_ISO); + writel_relaxed(val, pmu_base + PMU_ISO); + } + + /* Reset unit */ + if (pmu_dom->rst_mask) { + val = ~pmu_dom->rst_mask; + val &= readl_relaxed(pmc_base + PMC_SW_RST); + writel_relaxed(val, pmc_base + PMC_SW_RST); + } + + /* Power down */ + val = readl_relaxed(pmu_base + PMU_PWR) | pmu_dom->pwr_mask; + writel_relaxed(val, pmu_base + PMU_PWR); + + spin_unlock_irqrestore(&pmu->lock, flags); + + return 0; +} + +static int pmu_domain_power_on(struct generic_pm_domain *domain) +{ + struct pmu_domain *pmu_dom = to_pmu_domain(domain); + struct pmu_data *pmu = pmu_dom->pmu; + unsigned long flags; + unsigned int val; + void __iomem *pmu_base = pmu->pmu_base; + void __iomem *pmc_base = pmu->pmc_base; + + spin_lock_irqsave(&pmu->lock, flags); + + /* Power on */ + val = ~pmu_dom->pwr_mask & readl_relaxed(pmu_base + PMU_PWR); + writel_relaxed(val, pmu_base + PMU_PWR); + + /* Release reset */ + if (pmu_dom->rst_mask) { + val = pmu_dom->rst_mask; + val |= readl_relaxed(pmc_base + PMC_SW_RST); + writel_relaxed(val, pmc_base + PMC_SW_RST); + } + + /* Disable isolators */ + if (pmu_dom->iso_mask) { + val = pmu_dom->iso_mask; + val |= readl_relaxed(pmu_base + PMU_ISO); + writel_relaxed(val, pmu_base + PMU_ISO); + } + + spin_unlock_irqrestore(&pmu->lock, flags); + + return 0; +} + +static void __pmu_domain_register(struct pmu_domain *domain, + struct device_node *np) +{ + unsigned int val = readl_relaxed(domain->pmu->pmu_base + PMU_PWR); + + domain->base.power_off = pmu_domain_power_off; + domain->base.power_on = pmu_domain_power_on; + + pm_genpd_init(&domain->base, NULL, !(val & domain->pwr_mask)); + + if (np) + of_genpd_add_provider_simple(np, &domain->base); +} + +/* PMU IRQ controller */ +static void pmu_irq_handler(unsigned int irq, struct irq_desc *desc) +{ + struct pmu_data *pmu = irq_get_handler_data(irq); + struct irq_chip_generic *gc = pmu->irq_gc; + struct irq_domain *domain = pmu->irq_domain; + void __iomem *base = gc->reg_base; + u32 stat = readl_relaxed(base + PMC_IRQ_CAUSE) & gc->mask_cache; + u32 done = ~0; + + if (stat == 0) { + handle_bad_irq(irq, desc); + return; + } + + while (stat) { + u32 hwirq = fls(stat) - 1; + + stat &= ~(1 << hwirq); + done &= ~(1 << hwirq); + + generic_handle_irq(irq_find_mapping(domain, hwirq)); + } + + /* + * The PMU mask register is not RW0C: it is RW. This means that + * the bits take whatever value is written to them; if you write + * a '1', you will set the interrupt. + * + * Unfortunately this means there is NO race free way to clear + * these interrupts. + * + * So, let's structure the code so that the window is as small as + * possible. + */ + irq_gc_lock(gc); + done &= readl_relaxed(base + PMC_IRQ_CAUSE); + writel_relaxed(done, base + PMC_IRQ_CAUSE); + irq_gc_unlock(gc); +} + +static int __init dove_init_pmu_irq(struct pmu_data *pmu, int irq) +{ + const char *name = "pmu_irq"; + struct irq_chip_generic *gc; + struct irq_domain *domain; + int ret; + + /* mask and clear all interrupts */ + writel(0, pmu->pmc_base + PMC_IRQ_MASK); + writel(0, pmu->pmc_base + PMC_IRQ_CAUSE); + + domain = irq_domain_add_linear(pmu->of_node, NR_PMU_IRQS, + &irq_generic_chip_ops, NULL); + if (!domain) { + pr_err("%s: unable to add irq domain\n", name); + return -ENOMEM; + } + + ret = irq_alloc_domain_generic_chips(domain, NR_PMU_IRQS, 1, name, + handle_level_irq, + IRQ_NOREQUEST | IRQ_NOPROBE, 0, + IRQ_GC_INIT_MASK_CACHE); + if (ret) { + pr_err("%s: unable to alloc irq domain gc: %d\n", name, ret); + irq_domain_remove(domain); + return ret; + } + + gc = irq_get_domain_generic_chip(domain, 0); + gc->reg_base = pmu->pmc_base; + gc->chip_types[0].regs.mask = PMC_IRQ_MASK; + gc->chip_types[0].chip.irq_mask = irq_gc_mask_clr_bit; + gc->chip_types[0].chip.irq_unmask = irq_gc_mask_set_bit; + + pmu->irq_domain = domain; + pmu->irq_gc = gc; + + irq_set_handler_data(irq, pmu); + irq_set_chained_handler(irq, pmu_irq_handler); + + return 0; +} + +/* + * pmu: power-manager@d0000 { + * compatible = "marvell,dove-pmu"; + * reg = <0xd0000 0x8000> <0xd8000 0x8000>; + * interrupts = <33>; + * interrupt-controller; + * #reset-cells = 1; + * vpu_domain: vpu-domain { + * #power-domain-cells = <0>; + * marvell,pmu_pwr_mask = <0x00000008>; + * marvell,pmu_iso_mask = <0x00000001>; + * resets = <&pmu 16>; + * }; + * gpu_domain: gpu-domain { + * #power-domain-cells = <0>; + * marvell,pmu_pwr_mask = <0x00000004>; + * marvell,pmu_iso_mask = <0x00000002>; + * resets = <&pmu 18>; + * }; + * }; + */ +int __init dove_init_pmu(void) +{ + struct device_node *np_pmu, *domains_node, *np; + struct pmu_data *pmu; + int ret, parent_irq; + + /* Lookup the PMU node */ + np_pmu = of_find_compatible_node(NULL, NULL, "marvell,dove-pmu"); + if (!np_pmu) + return 0; + + domains_node = of_get_child_by_name(np_pmu, "domains"); + if (!domains_node) { + pr_err("%s: failed to find domains sub-node\n", np_pmu->name); + return 0; + } + + pmu = kzalloc(sizeof(*pmu), GFP_KERNEL); + if (!pmu) + return -ENOMEM; + + spin_lock_init(&pmu->lock); + pmu->of_node = np_pmu; + pmu->pmc_base = of_iomap(pmu->of_node, 0); + pmu->pmu_base = of_iomap(pmu->of_node, 1); + if (!pmu->pmc_base || !pmu->pmu_base) { + pr_err("%s: failed to map PMU\n", np_pmu->name); + iounmap(pmu->pmu_base); + iounmap(pmu->pmc_base); + kfree(pmu); + return -ENOMEM; + } + + pmu_reset_init(pmu); + + for_each_available_child_of_node(domains_node, np) { + struct of_phandle_args args; + struct pmu_domain *domain; + + domain = kzalloc(sizeof(*domain), GFP_KERNEL); + if (!domain) + break; + + domain->pmu = pmu; + domain->base.name = kstrdup(np->name, GFP_KERNEL); + if (!domain->base.name) { + kfree(domain); + break; + } + + of_property_read_u32(np, "marvell,pmu_pwr_mask", + &domain->pwr_mask); + of_property_read_u32(np, "marvell,pmu_iso_mask", + &domain->iso_mask); + + /* + * We parse the reset controller property directly here + * to ensure that we can operate when the reset controller + * support is not configured into the kernel. + */ + ret = of_parse_phandle_with_args(np, "resets", "#reset-cells", + 0, &args); + if (ret == 0) { + if (args.np == pmu->of_node) + domain->rst_mask = BIT(args.args[0]); + of_node_put(args.np); + } + + __pmu_domain_register(domain, np); + } + pm_genpd_poweroff_unused(); + + /* Loss of the interrupt controller is not a fatal error. */ + parent_irq = irq_of_parse_and_map(pmu->of_node, 0); + if (!parent_irq) { + pr_err("%s: no interrupt specified\n", np_pmu->name); + } else { + ret = dove_init_pmu_irq(pmu, parent_irq); + if (ret) + pr_err("dove_init_pmu_irq() failed: %d\n", ret); + } + + return 0; +} diff --git a/include/linux/soc/dove/pmu.h b/include/linux/soc/dove/pmu.h new file mode 100644 index 000000000000..9c99f84bcc0e --- /dev/null +++ b/include/linux/soc/dove/pmu.h @@ -0,0 +1,6 @@ +#ifndef LINUX_SOC_DOVE_PMU_H +#define LINUX_SOC_DOVE_PMU_H + +int dove_init_pmu(void); + +#endif -- cgit v1.2.3-70-g09d2 From f368ed6088ae9c1fbe1c897bb5f215ce5e63fa1e Mon Sep 17 00:00:00 2001 From: Greg Kroah-Hartman Date: Thu, 30 Jul 2015 15:59:57 -0700 Subject: char: make misc_deregister a void function With well over 200+ users of this api, there are a mere 12 users that actually checked the return value of this function. And all of them really didn't do anything with that information as the system or module was shutting down no matter what. So stop pretending like it matters, and just return void from misc_deregister(). If something goes wrong in the call, you will get a WARNING splat in the syslog so you know how to fix up your driver. Other than that, there's nothing that can go wrong. Cc: Alasdair Kergon Cc: Neil Brown Cc: Oleg Drokin Cc: Andreas Dilger Cc: "Michael S. Tsirkin" Cc: Wim Van Sebroeck Cc: Christine Caulfield Cc: David Teigland Cc: Mark Fasheh Acked-by: Joel Becker Acked-by: Alexandre Belloni Acked-by: Alessandro Zummo Acked-by: Mike Snitzer Signed-off-by: Greg Kroah-Hartman --- drivers/char/misc.c | 9 +++------ drivers/md/dm-ioctl.c | 4 +--- drivers/misc/vmw_vmci/vmci_host.c | 7 +------ drivers/rtc/rtc-ds1374.c | 5 ++--- drivers/staging/android/ashmem.c | 7 +------ drivers/staging/android/ion/ion_test.c | 3 ++- drivers/staging/lustre/lustre/libcfs/module.c | 4 +--- drivers/vhost/scsi.c | 4 ++-- drivers/watchdog/at91rm9200_wdt.c | 5 ++--- drivers/watchdog/ks8695_wdt.c | 9 +++------ drivers/watchdog/ts72xx_wdt.c | 3 ++- fs/btrfs/super.c | 3 +-- fs/dlm/plock.c | 3 +-- fs/dlm/user.c | 9 +++------ fs/ocfs2/stack_user.c | 9 +-------- include/linux/miscdevice.h | 2 +- 16 files changed, 27 insertions(+), 59 deletions(-) (limited to 'include/linux') diff --git a/drivers/char/misc.c b/drivers/char/misc.c index c83ef9652bc9..8069b361b8dd 100644 --- a/drivers/char/misc.c +++ b/drivers/char/misc.c @@ -243,17 +243,15 @@ int misc_register(struct miscdevice * misc) * @misc: device to unregister * * Unregister a miscellaneous device that was previously - * successfully registered with misc_register(). Success - * is indicated by a zero return, a negative errno code - * indicates an error. + * successfully registered with misc_register(). */ -int misc_deregister(struct miscdevice *misc) +void misc_deregister(struct miscdevice *misc) { int i = DYNAMIC_MINORS - misc->minor - 1; if (WARN_ON(list_empty(&misc->list))) - return -EINVAL; + return; mutex_lock(&misc_mtx); list_del(&misc->list); @@ -261,7 +259,6 @@ int misc_deregister(struct miscdevice *misc) if (i < DYNAMIC_MINORS && i >= 0) clear_bit(i, misc_minors); mutex_unlock(&misc_mtx); - return 0; } EXPORT_SYMBOL(misc_register); diff --git a/drivers/md/dm-ioctl.c b/drivers/md/dm-ioctl.c index 720ceeb7fa9b..80a439543259 100644 --- a/drivers/md/dm-ioctl.c +++ b/drivers/md/dm-ioctl.c @@ -1919,9 +1919,7 @@ int __init dm_interface_init(void) void dm_interface_exit(void) { - if (misc_deregister(&_dm_misc) < 0) - DMERR("misc_deregister failed for control device"); - + misc_deregister(&_dm_misc); dm_hash_exit(); } diff --git a/drivers/misc/vmw_vmci/vmci_host.c b/drivers/misc/vmw_vmci/vmci_host.c index a721b5d8a9da..9ec262a52656 100644 --- a/drivers/misc/vmw_vmci/vmci_host.c +++ b/drivers/misc/vmw_vmci/vmci_host.c @@ -1031,14 +1031,9 @@ int __init vmci_host_init(void) void __exit vmci_host_exit(void) { - int error; - vmci_host_device_initialized = false; - error = misc_deregister(&vmci_host_miscdev); - if (error) - pr_warn("Error unregistering character device: %d\n", error); - + misc_deregister(&vmci_host_miscdev); vmci_ctx_destroy(host_context); vmci_qp_broker_exit(); diff --git a/drivers/rtc/rtc-ds1374.c b/drivers/rtc/rtc-ds1374.c index 167783fa7ac1..72c933375233 100644 --- a/drivers/rtc/rtc-ds1374.c +++ b/drivers/rtc/rtc-ds1374.c @@ -666,9 +666,8 @@ static int ds1374_remove(struct i2c_client *client) #ifdef CONFIG_RTC_DRV_DS1374_WDT int res; - res = misc_deregister(&ds1374_miscdev); - if (!res) - ds1374_miscdev.parent = NULL; + misc_deregister(&ds1374_miscdev); + ds1374_miscdev.parent = NULL; unregister_reboot_notifier(&ds1374_wdt_notifier); #endif diff --git a/drivers/staging/android/ashmem.c b/drivers/staging/android/ashmem.c index c5c037ccf32c..2c75d90b26c8 100644 --- a/drivers/staging/android/ashmem.c +++ b/drivers/staging/android/ashmem.c @@ -863,14 +863,9 @@ static int __init ashmem_init(void) static void __exit ashmem_exit(void) { - int ret; - unregister_shrinker(&ashmem_shrinker); - ret = misc_deregister(&ashmem_misc); - if (unlikely(ret)) - pr_err("failed to unregister misc device!\n"); - + misc_deregister(&ashmem_misc); kmem_cache_destroy(ashmem_range_cachep); kmem_cache_destroy(ashmem_area_cachep); diff --git a/drivers/staging/android/ion/ion_test.c b/drivers/staging/android/ion/ion_test.c index 7d6e6b6bc894..b8dcf5a26cc4 100644 --- a/drivers/staging/android/ion/ion_test.c +++ b/drivers/staging/android/ion/ion_test.c @@ -269,7 +269,8 @@ static int ion_test_remove(struct platform_device *pdev) if (!testdev) return -ENODATA; - return misc_deregister(&testdev->misc); + misc_deregister(&testdev->misc); + return 0; } static struct platform_device *ion_test_pdev; diff --git a/drivers/staging/lustre/lustre/libcfs/module.c b/drivers/staging/lustre/lustre/libcfs/module.c index e60b2e9b9194..e7074006e41b 100644 --- a/drivers/staging/lustre/lustre/libcfs/module.c +++ b/drivers/staging/lustre/lustre/libcfs/module.c @@ -467,9 +467,7 @@ static void exit_libcfs_module(void) cfs_crypto_unregister(); cfs_wi_shutdown(); - rc = misc_deregister(&libcfs_dev); - if (rc) - CERROR("misc_deregister error %d\n", rc); + misc_deregister(&libcfs_dev); cfs_cpu_fini(); diff --git a/drivers/vhost/scsi.c b/drivers/vhost/scsi.c index dfcc02c93648..f114a9dbb48f 100644 --- a/drivers/vhost/scsi.c +++ b/drivers/vhost/scsi.c @@ -1573,9 +1573,9 @@ static int __init vhost_scsi_register(void) return misc_register(&vhost_scsi_misc); } -static int vhost_scsi_deregister(void) +static void vhost_scsi_deregister(void) { - return misc_deregister(&vhost_scsi_misc); + misc_deregister(&vhost_scsi_misc); } static char *vhost_scsi_dump_proto_id(struct vhost_scsi_tport *tport) diff --git a/drivers/watchdog/at91rm9200_wdt.c b/drivers/watchdog/at91rm9200_wdt.c index 41cecb55766c..9ba1153465ae 100644 --- a/drivers/watchdog/at91rm9200_wdt.c +++ b/drivers/watchdog/at91rm9200_wdt.c @@ -269,9 +269,8 @@ static int at91wdt_remove(struct platform_device *pdev) if (res) dev_warn(dev, "failed to unregister restart handler\n"); - res = misc_deregister(&at91wdt_miscdev); - if (!res) - at91wdt_miscdev.parent = NULL; + misc_deregister(&at91wdt_miscdev); + at91wdt_miscdev.parent = NULL; return res; } diff --git a/drivers/watchdog/ks8695_wdt.c b/drivers/watchdog/ks8695_wdt.c index b7ea39b455c8..1e41818a44bc 100644 --- a/drivers/watchdog/ks8695_wdt.c +++ b/drivers/watchdog/ks8695_wdt.c @@ -254,13 +254,10 @@ static int ks8695wdt_probe(struct platform_device *pdev) static int ks8695wdt_remove(struct platform_device *pdev) { - int res; - - res = misc_deregister(&ks8695wdt_miscdev); - if (!res) - ks8695wdt_miscdev.parent = NULL; + misc_deregister(&ks8695wdt_miscdev); + ks8695wdt_miscdev.parent = NULL; - return res; + return 0; } static void ks8695wdt_shutdown(struct platform_device *pdev) diff --git a/drivers/watchdog/ts72xx_wdt.c b/drivers/watchdog/ts72xx_wdt.c index 119beb7f6017..4b541934b6c5 100644 --- a/drivers/watchdog/ts72xx_wdt.c +++ b/drivers/watchdog/ts72xx_wdt.c @@ -428,7 +428,8 @@ static int ts72xx_wdt_probe(struct platform_device *pdev) static int ts72xx_wdt_remove(struct platform_device *pdev) { - return misc_deregister(&ts72xx_wdt_miscdev); + misc_deregister(&ts72xx_wdt_miscdev); + return 0; } static struct platform_driver ts72xx_wdt_driver = { diff --git a/fs/btrfs/super.c b/fs/btrfs/super.c index cd7ef34d2dce..6bad63379a4c 100644 --- a/fs/btrfs/super.c +++ b/fs/btrfs/super.c @@ -2163,8 +2163,7 @@ static int btrfs_interface_init(void) static void btrfs_interface_exit(void) { - if (misc_deregister(&btrfs_misc) < 0) - printk(KERN_INFO "BTRFS: misc_deregister failed for control device\n"); + misc_deregister(&btrfs_misc); } static void btrfs_print_info(void) diff --git a/fs/dlm/plock.c b/fs/dlm/plock.c index e0ab3a93eeff..5532f097f6da 100644 --- a/fs/dlm/plock.c +++ b/fs/dlm/plock.c @@ -509,7 +509,6 @@ int dlm_plock_init(void) void dlm_plock_exit(void) { - if (misc_deregister(&plock_dev_misc) < 0) - log_print("dlm_plock_exit: misc_deregister failed"); + misc_deregister(&plock_dev_misc); } diff --git a/fs/dlm/user.c b/fs/dlm/user.c index fb85f32e9eca..75ecc0d3bc85 100644 --- a/fs/dlm/user.c +++ b/fs/dlm/user.c @@ -362,18 +362,15 @@ fail: int dlm_device_deregister(struct dlm_ls *ls) { - int error; - /* The device is not registered. This happens when the lockspace was never used from userspace, or when device_create_lockspace() calls dlm_release_lockspace() after the register fails. */ if (!ls->ls_device.name) return 0; - error = misc_deregister(&ls->ls_device); - if (!error) - kfree(ls->ls_device.name); - return error; + misc_deregister(&ls->ls_device); + kfree(ls->ls_device.name); + return 0; } static int device_user_purge(struct dlm_user_proc *proc, diff --git a/fs/ocfs2/stack_user.c b/fs/ocfs2/stack_user.c index 2768eb1da2b8..ced70c8139f7 100644 --- a/fs/ocfs2/stack_user.c +++ b/fs/ocfs2/stack_user.c @@ -655,14 +655,7 @@ static int ocfs2_control_init(void) static void ocfs2_control_exit(void) { - int rc; - - rc = misc_deregister(&ocfs2_control_device); - if (rc) - printk(KERN_ERR - "ocfs2: Unable to deregister ocfs2_control device " - "(errno %d)\n", - -rc); + misc_deregister(&ocfs2_control_device); } static void fsdlm_lock_ast_wrapper(void *astarg) diff --git a/include/linux/miscdevice.h b/include/linux/miscdevice.h index 819077c32690..81f6e427ba6b 100644 --- a/include/linux/miscdevice.h +++ b/include/linux/miscdevice.h @@ -67,7 +67,7 @@ struct miscdevice { }; extern int misc_register(struct miscdevice *misc); -extern int misc_deregister(struct miscdevice *misc); +extern void misc_deregister(struct miscdevice *misc); #define MODULE_ALIAS_MISCDEV(minor) \ MODULE_ALIAS("char-major-" __stringify(MISC_MAJOR) \ -- cgit v1.2.3-70-g09d2 From 7f163a6fd957a85f7f66a129db1ad243a44399ee Mon Sep 17 00:00:00 2001 From: Jake Oshins Date: Wed, 5 Aug 2015 00:52:36 -0700 Subject: drivers:hv: Modify hv_vmbus to search for all MMIO ranges available. This patch changes the logic in hv_vmbus to record all of the ranges in the VM's firmware (BIOS or UEFI) that offer regions of memory-mapped I/O space for use by paravirtual front-end drivers. The old logic just found one range above 4GB and called it good. This logic will find any ranges above 1MB. It would have been possible with this patch to just use existing resource allocation functions, rather than keep track of the entire set of Hyper-V related MMIO regions in VMBus. This strategy, however, is not sufficient when the resource allocator needs to be aware of the constraints of a Hyper-V virtual machine, which is what happens in the next patch in the series. So this first patch exists to show the first steps in reworking the MMIO allocation paths for Hyper-V front-end drivers. Signed-off-by: Jake Oshins Signed-off-by: K. Y. Srinivasan Signed-off-by: Greg Kroah-Hartman --- drivers/hv/vmbus_drv.c | 116 +++++++++++++++++++++++++++++++--------- drivers/video/fbdev/hyperv_fb.c | 2 +- include/linux/hyperv.h | 2 +- 3 files changed, 92 insertions(+), 28 deletions(-) (limited to 'include/linux') diff --git a/drivers/hv/vmbus_drv.c b/drivers/hv/vmbus_drv.c index e7b0bcd453e7..ee59e06c2194 100644 --- a/drivers/hv/vmbus_drv.c +++ b/drivers/hv/vmbus_drv.c @@ -102,10 +102,7 @@ static struct notifier_block hyperv_panic_block = { .notifier_call = hyperv_panic_event, }; -struct resource hyperv_mmio = { - .name = "hyperv mmio", - .flags = IORESOURCE_MEM, -}; +struct resource *hyperv_mmio; EXPORT_SYMBOL_GPL(hyperv_mmio); static int vmbus_exists(void) @@ -1013,30 +1010,105 @@ void vmbus_device_unregister(struct hv_device *device_obj) /* - * VMBUS is an acpi enumerated device. Get the the information we + * VMBUS is an acpi enumerated device. Get the information we * need from DSDT. */ - +#define VTPM_BASE_ADDRESS 0xfed40000 static acpi_status vmbus_walk_resources(struct acpi_resource *res, void *ctx) { + resource_size_t start = 0; + resource_size_t end = 0; + struct resource *new_res; + struct resource **old_res = &hyperv_mmio; + struct resource **prev_res = NULL; + switch (res->type) { case ACPI_RESOURCE_TYPE_IRQ: irq = res->data.irq.interrupts[0]; + return AE_OK; + + /* + * "Address" descriptors are for bus windows. Ignore + * "memory" descriptors, which are for registers on + * devices. + */ + case ACPI_RESOURCE_TYPE_ADDRESS32: + start = res->data.address32.address.minimum; + end = res->data.address32.address.maximum; break; case ACPI_RESOURCE_TYPE_ADDRESS64: - hyperv_mmio.start = res->data.address64.address.minimum; - hyperv_mmio.end = res->data.address64.address.maximum; + start = res->data.address64.address.minimum; + end = res->data.address64.address.maximum; break; + + default: + /* Unused resource type */ + return AE_OK; + } + /* + * Ignore ranges that are below 1MB, as they're not + * necessary or useful here. + */ + if (end < 0x100000) + return AE_OK; + + new_res = kzalloc(sizeof(*new_res), GFP_ATOMIC); + if (!new_res) + return AE_NO_MEMORY; + + /* If this range overlaps the virtual TPM, truncate it. */ + if (end > VTPM_BASE_ADDRESS && start < VTPM_BASE_ADDRESS) + end = VTPM_BASE_ADDRESS; + + new_res->name = "hyperv mmio"; + new_res->flags = IORESOURCE_MEM; + new_res->start = start; + new_res->end = end; + + do { + if (!*old_res) { + *old_res = new_res; + break; + } + + if ((*old_res)->end < new_res->start) { + new_res->sibling = *old_res; + if (prev_res) + (*prev_res)->sibling = new_res; + *old_res = new_res; + break; + } + + prev_res = old_res; + old_res = &(*old_res)->sibling; + + } while (1); return AE_OK; } +static int vmbus_acpi_remove(struct acpi_device *device) +{ + struct resource *cur_res; + struct resource *next_res; + + if (hyperv_mmio) { + for (cur_res = hyperv_mmio; cur_res; cur_res = next_res) { + next_res = cur_res->sibling; + kfree(cur_res); + } + } + + return 0; +} + static int vmbus_acpi_add(struct acpi_device *device) { acpi_status result; int ret_val = -ENODEV; + struct acpi_device *ancestor; hv_acpi_dev = device; @@ -1046,35 +1118,27 @@ static int vmbus_acpi_add(struct acpi_device *device) if (ACPI_FAILURE(result)) goto acpi_walk_err; /* - * The parent of the vmbus acpi device (Gen2 firmware) is the VMOD that - * has the mmio ranges. Get that. + * Some ancestor of the vmbus acpi device (Gen1 or Gen2 + * firmware) is the VMOD that has the mmio ranges. Get that. */ - if (device->parent) { - result = acpi_walk_resources(device->parent->handle, - METHOD_NAME__CRS, - vmbus_walk_resources, NULL); + for (ancestor = device->parent; ancestor; ancestor = ancestor->parent) { + result = acpi_walk_resources(ancestor->handle, METHOD_NAME__CRS, + vmbus_walk_resources, NULL); if (ACPI_FAILURE(result)) - goto acpi_walk_err; - if (hyperv_mmio.start && hyperv_mmio.end) - request_resource(&iomem_resource, &hyperv_mmio); + continue; + if (hyperv_mmio) + break; } ret_val = 0; acpi_walk_err: complete(&probe_event); + if (ret_val) + vmbus_acpi_remove(device); return ret_val; } -static int vmbus_acpi_remove(struct acpi_device *device) -{ - int ret = 0; - - if (hyperv_mmio.start && hyperv_mmio.end) - ret = release_resource(&hyperv_mmio); - return ret; -} - static const struct acpi_device_id vmbus_acpi_device_ids[] = { {"VMBUS", 0}, {"VMBus", 0}, diff --git a/drivers/video/fbdev/hyperv_fb.c b/drivers/video/fbdev/hyperv_fb.c index 807ee22ef229..b54ee1c05a5f 100644 --- a/drivers/video/fbdev/hyperv_fb.c +++ b/drivers/video/fbdev/hyperv_fb.c @@ -688,7 +688,7 @@ static int hvfb_getmem(struct fb_info *info) par->mem.name = KBUILD_MODNAME; par->mem.flags = IORESOURCE_MEM | IORESOURCE_BUSY; if (gen2vm) { - ret = allocate_resource(&hyperv_mmio, &par->mem, + ret = allocate_resource(hyperv_mmio, &par->mem, screen_fb_size, 0, -1, screen_fb_size, diff --git a/include/linux/hyperv.h b/include/linux/hyperv.h index 30d3a1f79450..217e14be77b9 100644 --- a/include/linux/hyperv.h +++ b/include/linux/hyperv.h @@ -1233,7 +1233,7 @@ extern bool vmbus_prep_negotiate_resp(struct icmsg_hdr *, void hv_process_channel_removal(struct vmbus_channel *channel, u32 relid); -extern struct resource hyperv_mmio; +extern struct resource *hyperv_mmio; /* * Negotiated version with the Host. -- cgit v1.2.3-70-g09d2 From 3546448338e76a52d4f86eb3680cb2934e22d89b Mon Sep 17 00:00:00 2001 From: Jake Oshins Date: Wed, 5 Aug 2015 00:52:37 -0700 Subject: drivers:hv: Move MMIO range picking from hyper_fb to hv_vmbus This patch deletes the logic from hyperv_fb which picked a range of MMIO space for the frame buffer and adds new logic to hv_vmbus which picks ranges for child drivers. The new logic isn't quite the same as the old, as it considers more possible ranges. Signed-off-by: Jake Oshins Signed-off-by: K. Y. Srinivasan Signed-off-by: Greg Kroah-Hartman --- drivers/hv/vmbus_drv.c | 88 +++++++++++++++++++++++++++++++++++++++-- drivers/video/fbdev/hyperv_fb.c | 46 ++++++++++----------- include/linux/hyperv.h | 7 +++- 3 files changed, 110 insertions(+), 31 deletions(-) (limited to 'include/linux') diff --git a/drivers/hv/vmbus_drv.c b/drivers/hv/vmbus_drv.c index ee59e06c2194..8c3eaee8c54c 100644 --- a/drivers/hv/vmbus_drv.c +++ b/drivers/hv/vmbus_drv.c @@ -39,6 +39,7 @@ #include #include #include +#include #include #include "hyperv_vmbus.h" @@ -103,7 +104,6 @@ static struct notifier_block hyperv_panic_block = { }; struct resource *hyperv_mmio; -EXPORT_SYMBOL_GPL(hyperv_mmio); static int vmbus_exists(void) { @@ -891,8 +891,8 @@ err_cleanup: } /** - * __vmbus_child_driver_register - Register a vmbus's driver - * @drv: Pointer to driver structure you want to register + * __vmbus_child_driver_register() - Register a vmbus's driver + * @hv_driver: Pointer to driver structure you want to register * @owner: owner module of the drv * @mod_name: module name string * @@ -924,7 +924,8 @@ EXPORT_SYMBOL_GPL(__vmbus_driver_register); /** * vmbus_driver_unregister() - Unregister a vmbus's driver - * @drv: Pointer to driver structure you want to un-register + * @hv_driver: Pointer to driver structure you want to + * un-register * * Un-register the given driver that was previous registered with a call to * vmbus_driver_register() @@ -1104,6 +1105,85 @@ static int vmbus_acpi_remove(struct acpi_device *device) return 0; } +/** + * vmbus_allocate_mmio() - Pick a memory-mapped I/O range. + * @new: If successful, supplied a pointer to the + * allocated MMIO space. + * @device_obj: Identifies the caller + * @min: Minimum guest physical address of the + * allocation + * @max: Maximum guest physical address + * @size: Size of the range to be allocated + * @align: Alignment of the range to be allocated + * @fb_overlap_ok: Whether this allocation can be allowed + * to overlap the video frame buffer. + * + * This function walks the resources granted to VMBus by the + * _CRS object in the ACPI namespace underneath the parent + * "bridge" whether that's a root PCI bus in the Generation 1 + * case or a Module Device in the Generation 2 case. It then + * attempts to allocate from the global MMIO pool in a way that + * matches the constraints supplied in these parameters and by + * that _CRS. + * + * Return: 0 on success, -errno on failure + */ +int vmbus_allocate_mmio(struct resource **new, struct hv_device *device_obj, + resource_size_t min, resource_size_t max, + resource_size_t size, resource_size_t align, + bool fb_overlap_ok) +{ + struct resource *iter; + resource_size_t range_min, range_max, start, local_min, local_max; + const char *dev_n = dev_name(&device_obj->device); + u32 fb_end = screen_info.lfb_base + (screen_info.lfb_size << 1); + int i; + + for (iter = hyperv_mmio; iter; iter = iter->sibling) { + if ((iter->start >= max) || (iter->end <= min)) + continue; + + range_min = iter->start; + range_max = iter->end; + + /* If this range overlaps the frame buffer, split it into + two tries. */ + for (i = 0; i < 2; i++) { + local_min = range_min; + local_max = range_max; + if (fb_overlap_ok || (range_min >= fb_end) || + (range_max <= screen_info.lfb_base)) { + i++; + } else { + if ((range_min <= screen_info.lfb_base) && + (range_max >= screen_info.lfb_base)) { + /* + * The frame buffer is in this window, + * so trim this into the part that + * preceeds the frame buffer. + */ + local_max = screen_info.lfb_base - 1; + range_min = fb_end; + } else { + range_min = fb_end; + continue; + } + } + + start = (local_min + align - 1) & ~(align - 1); + for (; start + size - 1 <= local_max; start += align) { + *new = request_mem_region_exclusive(start, size, + dev_n); + if (*new) + return 0; + } + } + } + + return -ENXIO; +} +EXPORT_SYMBOL_GPL(vmbus_allocate_mmio); + static int vmbus_acpi_add(struct acpi_device *device) { acpi_status result; diff --git a/drivers/video/fbdev/hyperv_fb.c b/drivers/video/fbdev/hyperv_fb.c index b54ee1c05a5f..e2451bdb4525 100644 --- a/drivers/video/fbdev/hyperv_fb.c +++ b/drivers/video/fbdev/hyperv_fb.c @@ -213,7 +213,7 @@ struct synthvid_msg { struct hvfb_par { struct fb_info *info; - struct resource mem; + struct resource *mem; bool fb_ready; /* fb device is ready */ struct completion wait; u32 synthvid_version; @@ -677,26 +677,18 @@ static void hvfb_get_option(struct fb_info *info) /* Get framebuffer memory from Hyper-V video pci space */ -static int hvfb_getmem(struct fb_info *info) +static int hvfb_getmem(struct hv_device *hdev, struct fb_info *info) { struct hvfb_par *par = info->par; struct pci_dev *pdev = NULL; void __iomem *fb_virt; int gen2vm = efi_enabled(EFI_BOOT); + resource_size_t pot_start, pot_end; int ret; - par->mem.name = KBUILD_MODNAME; - par->mem.flags = IORESOURCE_MEM | IORESOURCE_BUSY; if (gen2vm) { - ret = allocate_resource(hyperv_mmio, &par->mem, - screen_fb_size, - 0, -1, - screen_fb_size, - NULL, NULL); - if (ret != 0) { - pr_err("Unable to allocate framebuffer memory\n"); - return -ENODEV; - } + pot_start = 0; + pot_end = -1; } else { pdev = pci_get_device(PCI_VENDOR_ID_MICROSOFT, PCI_DEVICE_ID_HYPERV_VIDEO, NULL); @@ -709,16 +701,18 @@ static int hvfb_getmem(struct fb_info *info) pci_resource_len(pdev, 0) < screen_fb_size) goto err1; - par->mem.end = pci_resource_end(pdev, 0); - par->mem.start = par->mem.end - screen_fb_size + 1; - ret = request_resource(&pdev->resource[0], &par->mem); - if (ret != 0) { - pr_err("Unable to request framebuffer memory\n"); - goto err1; - } + pot_end = pci_resource_end(pdev, 0); + pot_start = pot_end - screen_fb_size + 1; + } + + ret = vmbus_allocate_mmio(&par->mem, hdev, pot_start, pot_end, + screen_fb_size, 0x100000, true); + if (ret != 0) { + pr_err("Unable to allocate framebuffer memory\n"); + goto err1; } - fb_virt = ioremap(par->mem.start, screen_fb_size); + fb_virt = ioremap(par->mem->start, screen_fb_size); if (!fb_virt) goto err2; @@ -736,7 +730,7 @@ static int hvfb_getmem(struct fb_info *info) info->apertures->ranges[0].size = pci_resource_len(pdev, 0); } - info->fix.smem_start = par->mem.start; + info->fix.smem_start = par->mem->start; info->fix.smem_len = screen_fb_size; info->screen_base = fb_virt; info->screen_size = screen_fb_size; @@ -749,7 +743,8 @@ static int hvfb_getmem(struct fb_info *info) err3: iounmap(fb_virt); err2: - release_resource(&par->mem); + release_mem_region(par->mem->start, screen_fb_size); + par->mem = NULL; err1: if (!gen2vm) pci_dev_put(pdev); @@ -763,7 +758,8 @@ static void hvfb_putmem(struct fb_info *info) struct hvfb_par *par = info->par; iounmap(info->screen_base); - release_resource(&par->mem); + release_mem_region(par->mem->start, screen_fb_size); + par->mem = NULL; } @@ -794,7 +790,7 @@ static int hvfb_probe(struct hv_device *hdev, goto error1; } - ret = hvfb_getmem(info); + ret = hvfb_getmem(hdev, info); if (ret) { pr_err("No memory for framebuffer\n"); goto error2; diff --git a/include/linux/hyperv.h b/include/linux/hyperv.h index 217e14be77b9..54733d5b503e 100644 --- a/include/linux/hyperv.h +++ b/include/linux/hyperv.h @@ -977,6 +977,11 @@ int __must_check __vmbus_driver_register(struct hv_driver *hv_driver, const char *mod_name); void vmbus_driver_unregister(struct hv_driver *hv_driver); +int vmbus_allocate_mmio(struct resource **new, struct hv_device *device_obj, + resource_size_t min, resource_size_t max, + resource_size_t size, resource_size_t align, + bool fb_overlap_ok); + /** * VMBUS_DEVICE - macro used to describe a specific hyperv vmbus device * @@ -1233,8 +1238,6 @@ extern bool vmbus_prep_negotiate_resp(struct icmsg_hdr *, void hv_process_channel_removal(struct vmbus_channel *channel, u32 relid); -extern struct resource *hyperv_mmio; - /* * Negotiated version with the Host. */ -- cgit v1.2.3-70-g09d2 From 9f01ec53458d9e9b68f1c555e773b5d1a1f66e94 Mon Sep 17 00:00:00 2001 From: "K. Y. Srinivasan" Date: Wed, 5 Aug 2015 00:52:38 -0700 Subject: Drivers: hv: vmbus: Improve the CPU affiliation for channels The current code tracks the assigned CPUs within a NUMA node in the context of the primary channel. So, if we have a VM with a single NUMA node with 8 VCPUs, we may end up unevenly distributing the channel load. Fix the issue by tracking affiliations globally. Signed-off-by: K. Y. Srinivasan Signed-off-by: Greg Kroah-Hartman --- drivers/hv/channel_mgmt.c | 11 ++++++----- drivers/hv/hv.c | 9 +++++++++ drivers/hv/hyperv_vmbus.h | 5 +++++ include/linux/hyperv.h | 1 - 4 files changed, 20 insertions(+), 6 deletions(-) (limited to 'include/linux') diff --git a/drivers/hv/channel_mgmt.c b/drivers/hv/channel_mgmt.c index 30613dfa38b3..39c5afc7970c 100644 --- a/drivers/hv/channel_mgmt.c +++ b/drivers/hv/channel_mgmt.c @@ -392,6 +392,7 @@ static void init_vp_index(struct vmbus_channel *channel, const uuid_le *type_gui struct vmbus_channel *primary = channel->primary_channel; int next_node; struct cpumask available_mask; + struct cpumask *alloced_mask; for (i = IDE; i < MAX_PERF_CHN; i++) { if (!memcmp(type_guid->b, hp_devs[i].guid, @@ -409,7 +410,6 @@ static void init_vp_index(struct vmbus_channel *channel, const uuid_le *type_gui * channel, bind it to cpu 0. */ channel->numa_node = 0; - cpumask_set_cpu(0, &channel->alloced_cpus_in_node); channel->target_cpu = 0; channel->target_vp = hv_context.vp_index[0]; return; @@ -434,21 +434,22 @@ static void init_vp_index(struct vmbus_channel *channel, const uuid_le *type_gui channel->numa_node = next_node; primary = channel; } + alloced_mask = &hv_context.hv_numa_map[primary->numa_node]; - if (cpumask_weight(&primary->alloced_cpus_in_node) == + if (cpumask_weight(alloced_mask) == cpumask_weight(cpumask_of_node(primary->numa_node))) { /* * We have cycled through all the CPUs in the node; * reset the alloced map. */ - cpumask_clear(&primary->alloced_cpus_in_node); + cpumask_clear(alloced_mask); } - cpumask_xor(&available_mask, &primary->alloced_cpus_in_node, + cpumask_xor(&available_mask, alloced_mask, cpumask_of_node(primary->numa_node)); cur_cpu = cpumask_next(-1, &available_mask); - cpumask_set_cpu(cur_cpu, &primary->alloced_cpus_in_node); + cpumask_set_cpu(cur_cpu, alloced_mask); channel->target_cpu = cur_cpu; channel->target_vp = hv_context.vp_index[cur_cpu]; diff --git a/drivers/hv/hv.c b/drivers/hv/hv.c index 41d8072d61d9..fd93cfde96d0 100644 --- a/drivers/hv/hv.c +++ b/drivers/hv/hv.c @@ -332,6 +332,13 @@ int hv_synic_alloc(void) size_t ced_size = sizeof(struct clock_event_device); int cpu; + hv_context.hv_numa_map = kzalloc(sizeof(struct cpumask) * nr_node_ids, + GFP_ATOMIC); + if (hv_context.hv_numa_map == NULL) { + pr_err("Unable to allocate NUMA map\n"); + goto err; + } + for_each_online_cpu(cpu) { hv_context.event_dpc[cpu] = kmalloc(size, GFP_ATOMIC); if (hv_context.event_dpc[cpu] == NULL) { @@ -345,6 +352,7 @@ int hv_synic_alloc(void) pr_err("Unable to allocate clock event device\n"); goto err; } + hv_init_clockevent_device(hv_context.clk_evt[cpu], cpu); hv_context.synic_message_page[cpu] = @@ -393,6 +401,7 @@ void hv_synic_free(void) { int cpu; + kfree(hv_context.hv_numa_map); for_each_online_cpu(cpu) hv_synic_free_cpu(cpu); } diff --git a/drivers/hv/hyperv_vmbus.h b/drivers/hv/hyperv_vmbus.h index 638370701657..6f258255ac94 100644 --- a/drivers/hv/hyperv_vmbus.h +++ b/drivers/hv/hyperv_vmbus.h @@ -551,6 +551,11 @@ struct hv_context { * Support PV clockevent device. */ struct clock_event_device *clk_evt[NR_CPUS]; + /* + * To manage allocations in a NUMA node. + * Array indexed by numa node ID. + */ + struct cpumask *hv_numa_map; }; extern struct hv_context hv_context; diff --git a/include/linux/hyperv.h b/include/linux/hyperv.h index 54733d5b503e..5a3df5a47c8f 100644 --- a/include/linux/hyperv.h +++ b/include/linux/hyperv.h @@ -699,7 +699,6 @@ struct vmbus_channel { /* * State to manage the CPU affiliation of channels. */ - struct cpumask alloced_cpus_in_node; int numa_node; /* * Support for sub-channels. For high performance devices, -- cgit v1.2.3-70-g09d2 From 3b71107d73b16074afa7658f3f0fcf837aabfe24 Mon Sep 17 00:00:00 2001 From: Dexuan Cui Date: Wed, 5 Aug 2015 00:52:39 -0700 Subject: Drivers: hv: vmbus: Further improve CPU affiliation logic Keep track of CPU affiliations of sub-channels within the scope of the primary channel. This will allow us to better distribute the load amongst available CPUs. Signed-off-by: Dexuan Cui Signed-off-by: K. Y. Srinivasan Signed-off-by: Greg Kroah-Hartman --- drivers/hv/channel_mgmt.c | 20 ++++++++++++++++++-- include/linux/hyperv.h | 1 + 2 files changed, 19 insertions(+), 2 deletions(-) (limited to 'include/linux') diff --git a/drivers/hv/channel_mgmt.c b/drivers/hv/channel_mgmt.c index 39c5afc7970c..2f9aead4ecfc 100644 --- a/drivers/hv/channel_mgmt.c +++ b/drivers/hv/channel_mgmt.c @@ -448,8 +448,24 @@ static void init_vp_index(struct vmbus_channel *channel, const uuid_le *type_gui cpumask_xor(&available_mask, alloced_mask, cpumask_of_node(primary->numa_node)); - cur_cpu = cpumask_next(-1, &available_mask); - cpumask_set_cpu(cur_cpu, alloced_mask); + cur_cpu = -1; + while (true) { + cur_cpu = cpumask_next(cur_cpu, &available_mask); + if (cur_cpu >= nr_cpu_ids) { + cur_cpu = -1; + cpumask_copy(&available_mask, + cpumask_of_node(primary->numa_node)); + continue; + } + + if (!cpumask_test_cpu(cur_cpu, + &primary->alloced_cpus_in_node)) { + cpumask_set_cpu(cur_cpu, + &primary->alloced_cpus_in_node); + cpumask_set_cpu(cur_cpu, alloced_mask); + break; + } + } channel->target_cpu = cur_cpu; channel->target_vp = hv_context.vp_index[cur_cpu]; diff --git a/include/linux/hyperv.h b/include/linux/hyperv.h index 5a3df5a47c8f..54733d5b503e 100644 --- a/include/linux/hyperv.h +++ b/include/linux/hyperv.h @@ -699,6 +699,7 @@ struct vmbus_channel { /* * State to manage the CPU affiliation of channels. */ + struct cpumask alloced_cpus_in_node; int numa_node; /* * Support for sub-channels. For high performance devices, -- cgit v1.2.3-70-g09d2 From 061dff29f8f62c21c9222897e4d121b4a5fa50da Mon Sep 17 00:00:00 2001 From: Chuck Lever Date: Mon, 3 Aug 2015 13:02:59 -0400 Subject: xprtrdma: Increase default credit limit In preparation for similar increases on NFS/RDMA servers, bump the advertised credit limit for RPC/RDMA to 128. This allocates some extra resources, but the client will continue to allow only the number of RPCs in flight that the server requests via its advertised credit limit. Signed-off-by: Chuck Lever Reviewed-By: Sagi Grimberg Tested-by: Devesh Sharma Signed-off-by: Anna Schumaker --- include/linux/sunrpc/xprtrdma.h | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) (limited to 'include/linux') diff --git a/include/linux/sunrpc/xprtrdma.h b/include/linux/sunrpc/xprtrdma.h index b17613052cc3..b7b279b54504 100644 --- a/include/linux/sunrpc/xprtrdma.h +++ b/include/linux/sunrpc/xprtrdma.h @@ -49,7 +49,7 @@ * a single chunk type per message is supported currently. */ #define RPCRDMA_MIN_SLOT_TABLE (2U) -#define RPCRDMA_DEF_SLOT_TABLE (32U) +#define RPCRDMA_DEF_SLOT_TABLE (128U) #define RPCRDMA_MAX_SLOT_TABLE (256U) #define RPCRDMA_DEF_INLINE (1024) /* default inline max */ -- cgit v1.2.3-70-g09d2 From c0bd1b9e58959c51a4c939505f89721dfbc73c44 Mon Sep 17 00:00:00 2001 From: Rob Herring Date: Wed, 22 Jul 2015 13:17:15 -0500 Subject: Revert "ti-st: add device tree support" This reverts commit 46d0d33350e9b32642d745a8b46a954910196b4d. This binding is horrible and never should have been merged. It is not documented nor are there any in tree users, so reverting it will not break anything we care about. Lets revert it before we do have users. The problems with it are: - It is not documented. - The GPIO connection is described with a custom property and uses Linux GPIO numbering. - The UART connection is described using the Linux tty device name. Cc: Gigi Joseph Cc: Greg Kroah-Hartman Signed-off-by: Rob Herring Signed-off-by: Greg Kroah-Hartman --- drivers/misc/ti-st/st_kim.c | 94 ++++---------------------------------------- drivers/misc/ti-st/st_ll.c | 17 +------- include/linux/ti_wilink_st.h | 1 - 3 files changed, 9 insertions(+), 103 deletions(-) (limited to 'include/linux') diff --git a/drivers/misc/ti-st/st_kim.c b/drivers/misc/ti-st/st_kim.c index c84093e639e0..c828282af38a 100644 --- a/drivers/misc/ti-st/st_kim.c +++ b/drivers/misc/ti-st/st_kim.c @@ -36,8 +36,6 @@ #include #include #include -#include -#include #define MAX_ST_DEVICES 3 /* Imagine 1 on each UART for now */ static struct platform_device *st_kim_devices[MAX_ST_DEVICES]; @@ -45,9 +43,6 @@ static struct platform_device *st_kim_devices[MAX_ST_DEVICES]; /**********************************************************************/ /* internal functions */ -struct ti_st_plat_data *dt_pdata; -static struct ti_st_plat_data *get_platform_data(struct device *dev); - /** * st_get_plat_device - * function which returns the reference to the platform device @@ -469,12 +464,7 @@ long st_kim_start(void *kim_data) struct kim_data_s *kim_gdata = (struct kim_data_s *)kim_data; pr_info(" %s", __func__); - if (kim_gdata->kim_pdev->dev.of_node) { - pr_debug("use device tree data"); - pdata = dt_pdata; - } else { - pdata = kim_gdata->kim_pdev->dev.platform_data; - } + pdata = kim_gdata->kim_pdev->dev.platform_data; do { /* platform specific enabling code here */ @@ -534,18 +524,12 @@ long st_kim_stop(void *kim_data) { long err = 0; struct kim_data_s *kim_gdata = (struct kim_data_s *)kim_data; - struct ti_st_plat_data *pdata; + struct ti_st_plat_data *pdata = + kim_gdata->kim_pdev->dev.platform_data; struct tty_struct *tty = kim_gdata->core_data->tty; reinit_completion(&kim_gdata->ldisc_installed); - if (kim_gdata->kim_pdev->dev.of_node) { - pr_debug("use device tree data"); - pdata = dt_pdata; - } else - pdata = kim_gdata->kim_pdev->dev.platform_data; - - if (tty) { /* can be called before ldisc is installed */ /* Flush any pending characters in the driver and discipline. */ tty_ldisc_flush(tty); @@ -737,52 +721,13 @@ static const struct file_operations list_debugfs_fops = { * board-*.c file */ -static const struct of_device_id kim_of_match[] = { -{ - .compatible = "kim", - }, - {} -}; -MODULE_DEVICE_TABLE(of, kim_of_match); - -static struct ti_st_plat_data *get_platform_data(struct device *dev) -{ - struct device_node *np = dev->of_node; - const u32 *dt_property; - int len; - - dt_pdata = kzalloc(sizeof(*dt_pdata), GFP_KERNEL); - if (!dt_pdata) - return NULL; - - dt_property = of_get_property(np, "dev_name", &len); - if (dt_property) - memcpy(&dt_pdata->dev_name, dt_property, len); - of_property_read_u32(np, "nshutdown_gpio", - &dt_pdata->nshutdown_gpio); - of_property_read_u32(np, "flow_cntrl", &dt_pdata->flow_cntrl); - of_property_read_u32(np, "baud_rate", &dt_pdata->baud_rate); - - return dt_pdata; -} - static struct dentry *kim_debugfs_dir; static int kim_probe(struct platform_device *pdev) { struct kim_data_s *kim_gdata; - struct ti_st_plat_data *pdata; + struct ti_st_plat_data *pdata = pdev->dev.platform_data; int err; - if (pdev->dev.of_node) - pdata = get_platform_data(&pdev->dev); - else - pdata = pdev->dev.platform_data; - - if (pdata == NULL) { - dev_err(&pdev->dev, "Platform Data is missing\n"); - return -ENXIO; - } - if ((pdev->id != -1) && (pdev->id < MAX_ST_DEVICES)) { /* multiple devices could exist */ st_kim_devices[pdev->id] = pdev; @@ -863,16 +808,9 @@ err_core_init: static int kim_remove(struct platform_device *pdev) { /* free the GPIOs requested */ - struct ti_st_plat_data *pdata; + struct ti_st_plat_data *pdata = pdev->dev.platform_data; struct kim_data_s *kim_gdata; - if (pdev->dev.of_node) { - pr_debug("use device tree data"); - pdata = dt_pdata; - } else { - pdata = pdev->dev.platform_data; - } - kim_gdata = platform_get_drvdata(pdev); /* Free the Bluetooth/FM/GPIO @@ -890,22 +828,12 @@ static int kim_remove(struct platform_device *pdev) kfree(kim_gdata); kim_gdata = NULL; - kfree(dt_pdata); - dt_pdata = NULL; - return 0; } static int kim_suspend(struct platform_device *pdev, pm_message_t state) { - struct ti_st_plat_data *pdata; - - if (pdev->dev.of_node) { - pr_debug("use device tree data"); - pdata = dt_pdata; - } else { - pdata = pdev->dev.platform_data; - } + struct ti_st_plat_data *pdata = pdev->dev.platform_data; if (pdata->suspend) return pdata->suspend(pdev, state); @@ -915,14 +843,7 @@ static int kim_suspend(struct platform_device *pdev, pm_message_t state) static int kim_resume(struct platform_device *pdev) { - struct ti_st_plat_data *pdata; - - if (pdev->dev.of_node) { - pr_debug("use device tree data"); - pdata = dt_pdata; - } else { - pdata = pdev->dev.platform_data; - } + struct ti_st_plat_data *pdata = pdev->dev.platform_data; if (pdata->resume) return pdata->resume(pdev); @@ -939,7 +860,6 @@ static struct platform_driver kim_platform_driver = { .resume = kim_resume, .driver = { .name = "kim", - .of_match_table = of_match_ptr(kim_of_match), }, }; diff --git a/drivers/misc/ti-st/st_ll.c b/drivers/misc/ti-st/st_ll.c index 518e1b7f2f95..93b4d67cc4a3 100644 --- a/drivers/misc/ti-st/st_ll.c +++ b/drivers/misc/ti-st/st_ll.c @@ -26,7 +26,6 @@ #include /**********************************************************************/ - /* internal functions */ static void send_ll_cmd(struct st_data_s *st_data, unsigned char cmd) @@ -54,13 +53,7 @@ static void ll_device_want_to_sleep(struct st_data_s *st_data) /* communicate to platform about chip asleep */ kim_data = st_data->kim_data; - if (kim_data->kim_pdev->dev.of_node) { - pr_debug("use device tree data"); - pdata = dt_pdata; - } else { - pdata = kim_data->kim_pdev->dev.platform_data; - } - + pdata = kim_data->kim_pdev->dev.platform_data; if (pdata->chip_asleep) pdata->chip_asleep(NULL); } @@ -93,13 +86,7 @@ static void ll_device_want_to_wakeup(struct st_data_s *st_data) /* communicate to platform about chip wakeup */ kim_data = st_data->kim_data; - if (kim_data->kim_pdev->dev.of_node) { - pr_debug("use device tree data"); - pdata = dt_pdata; - } else { - pdata = kim_data->kim_pdev->dev.platform_data; - } - + pdata = kim_data->kim_pdev->dev.platform_data; if (pdata->chip_awake) pdata->chip_awake(NULL); } diff --git a/include/linux/ti_wilink_st.h b/include/linux/ti_wilink_st.h index c78dcfeaf25f..d4217eff489f 100644 --- a/include/linux/ti_wilink_st.h +++ b/include/linux/ti_wilink_st.h @@ -86,7 +86,6 @@ struct st_proto_s { extern long st_register(struct st_proto_s *); extern long st_unregister(struct st_proto_s *); -extern struct ti_st_plat_data *dt_pdata; /* * header information used by st_core.c -- cgit v1.2.3-70-g09d2 From 722102274ab1206634d31abe4f438a911a0945d2 Mon Sep 17 00:00:00 2001 From: Chunyan Zhang Date: Fri, 31 Jul 2015 09:37:26 -0600 Subject: Coresight: Add an interface for supporting ETM3/4 Context ID tracing If PID namespace is enabled, everytime users configure the Context ID register to trace the specific process, there needs to be a translation between the real PID seen from the kernel and VPID seen from the namespace in which the user's process resides . This patch just adds the translation interface for ETMs. Signed-off-by: Chunyan Zhang Signed-off-by: Mathieu Poirier Signed-off-by: Greg Kroah-Hartman --- include/linux/coresight.h | 20 ++++++++++++++++++++ 1 file changed, 20 insertions(+) (limited to 'include/linux') diff --git a/include/linux/coresight.h b/include/linux/coresight.h index 3486b9082adb..626da6948ca2 100644 --- a/include/linux/coresight.h +++ b/include/linux/coresight.h @@ -248,4 +248,24 @@ static inline struct coresight_platform_data *of_get_coresight_platform_data( struct device *dev, struct device_node *node) { return NULL; } #endif +#ifdef CONFIG_PID_NS +static inline unsigned long +coresight_vpid_to_pid(unsigned long vpid) +{ + struct task_struct *task = NULL; + unsigned long pid = 0; + + rcu_read_lock(); + task = find_task_by_vpid(vpid); + if (task) + pid = task_pid_nr(task); + rcu_read_unlock(); + + return pid; +} +#else +static inline unsigned long +coresight_vpid_to_pid(unsigned long vpid) { return vpid; } +#endif + #endif -- cgit v1.2.3-70-g09d2 From ff63ec1312dabd28876c9c03b5ed172a879bfb60 Mon Sep 17 00:00:00 2001 From: Mark Brown Date: Fri, 31 Jul 2015 09:37:30 -0600 Subject: coresight: Fix implicit inclusion of linux/sched.h The patch "Coresight: Add an interface for supporting ETM3/4 Context ID tracing" adds uses of find_task_by_vpid() and task_pid_nr() from linux/sched.h but does not include that header causing build errors in at least an ARM allmodconfig where it is not implicitly included. Add an explicit include to fix that. Signed-off-by: Mark Brown Signed-off-by: Mathieu Poirier Signed-off-by: Greg Kroah-Hartman --- include/linux/coresight.h | 1 + 1 file changed, 1 insertion(+) (limited to 'include/linux') diff --git a/include/linux/coresight.h b/include/linux/coresight.h index 626da6948ca2..c69e1b932809 100644 --- a/include/linux/coresight.h +++ b/include/linux/coresight.h @@ -14,6 +14,7 @@ #define _LINUX_CORESIGHT_H #include +#include /* Peripheral id registers (0xFD0-0xFEC) */ #define CORESIGHT_PERIPHIDR4 0xfd0 -- cgit v1.2.3-70-g09d2 From eace75cfdcf7d9937d8c1fb226780123c64d72c4 Mon Sep 17 00:00:00 2001 From: Srinivas Kandagatla Date: Mon, 27 Jul 2015 12:13:19 +0100 Subject: nvmem: Add a simple NVMEM framework for nvmem providers This patch adds just providers part of the framework just to enable easy review. Up until now, NVMEM drivers like eeprom were stored in drivers/misc, where they all had to duplicate pretty much the same code to register a sysfs file, allow in-kernel users to access the content of the devices they were driving, etc. This was also a problem as far as other in-kernel users were involved, since the solutions used were pretty much different from on driver to another, there was a rather big abstraction leak. This introduction of this framework aims at solving this. It also introduces DT representation for consumer devices to go get the data they require (MAC Addresses, SoC/Revision ID, part numbers, and so on) from the nvmems. Having regmap interface to this framework would give much better abstraction for nvmems on different buses. Signed-off-by: Maxime Ripard [Maxime Ripard: intial version of eeprom framework] Signed-off-by: Srinivas Kandagatla Tested-by: Stefan Wahren Tested-by: Philipp Zabel Tested-by: Rajendra Nayak Signed-off-by: Greg Kroah-Hartman --- drivers/Kconfig | 2 + drivers/Makefile | 1 + drivers/nvmem/Kconfig | 13 ++ drivers/nvmem/Makefile | 6 + drivers/nvmem/core.c | 406 +++++++++++++++++++++++++++++++++++++++++ include/linux/nvmem-consumer.h | 23 +++ include/linux/nvmem-provider.h | 47 +++++ 7 files changed, 498 insertions(+) create mode 100644 drivers/nvmem/Kconfig create mode 100644 drivers/nvmem/Makefile create mode 100644 drivers/nvmem/core.c create mode 100644 include/linux/nvmem-consumer.h create mode 100644 include/linux/nvmem-provider.h (limited to 'include/linux') diff --git a/drivers/Kconfig b/drivers/Kconfig index 6e973b8e3a3b..4e2e6aaf0b88 100644 --- a/drivers/Kconfig +++ b/drivers/Kconfig @@ -184,4 +184,6 @@ source "drivers/android/Kconfig" source "drivers/nvdimm/Kconfig" +source "drivers/nvmem/Kconfig" + endmenu diff --git a/drivers/Makefile b/drivers/Makefile index b64b49f6e01b..4c270f5414f0 100644 --- a/drivers/Makefile +++ b/drivers/Makefile @@ -165,3 +165,4 @@ obj-$(CONFIG_RAS) += ras/ obj-$(CONFIG_THUNDERBOLT) += thunderbolt/ obj-$(CONFIG_CORESIGHT) += hwtracing/coresight/ obj-$(CONFIG_ANDROID) += android/ +obj-$(CONFIG_NVMEM) += nvmem/ diff --git a/drivers/nvmem/Kconfig b/drivers/nvmem/Kconfig new file mode 100644 index 000000000000..de90c82d891b --- /dev/null +++ b/drivers/nvmem/Kconfig @@ -0,0 +1,13 @@ +menuconfig NVMEM + tristate "NVMEM Support" + select REGMAP + help + Support for NVMEM(Non Volatile Memory) devices like EEPROM, EFUSES... + + This framework is designed to provide a generic interface to NVMEM + from both the Linux Kernel and the userspace. + + This driver can also be built as a module. If so, the module + will be called nvmem_core. + + If unsure, say no. diff --git a/drivers/nvmem/Makefile b/drivers/nvmem/Makefile new file mode 100644 index 000000000000..6df2c6952ad5 --- /dev/null +++ b/drivers/nvmem/Makefile @@ -0,0 +1,6 @@ +# +# Makefile for nvmem drivers. +# + +obj-$(CONFIG_NVMEM) += nvmem_core.o +nvmem_core-y := core.o diff --git a/drivers/nvmem/core.c b/drivers/nvmem/core.c new file mode 100644 index 000000000000..2b024915e224 --- /dev/null +++ b/drivers/nvmem/core.c @@ -0,0 +1,406 @@ +/* + * nvmem framework core. + * + * Copyright (C) 2015 Srinivas Kandagatla + * Copyright (C) 2013 Maxime Ripard + * + * This program is free software; you can redistribute it and/or modify + * it under the terms of the GNU General Public License version 2 and + * only version 2 as published by the Free Software Foundation. + * + * This program is distributed in the hope that it will be useful, + * but WITHOUT ANY WARRANTY; without even the implied warranty of + * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the + * GNU General Public License for more details. + */ + +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include + +struct nvmem_device { + const char *name; + struct regmap *regmap; + struct module *owner; + struct device dev; + int stride; + int word_size; + int ncells; + int id; + int users; + size_t size; + bool read_only; +}; + +struct nvmem_cell { + const char *name; + int offset; + int bytes; + int bit_offset; + int nbits; + struct nvmem_device *nvmem; + struct list_head node; +}; + +static DEFINE_MUTEX(nvmem_mutex); +static DEFINE_IDA(nvmem_ida); + +static LIST_HEAD(nvmem_cells); +static DEFINE_MUTEX(nvmem_cells_mutex); + +#define to_nvmem_device(d) container_of(d, struct nvmem_device, dev) + +static ssize_t bin_attr_nvmem_read(struct file *filp, struct kobject *kobj, + struct bin_attribute *attr, + char *buf, loff_t pos, size_t count) +{ + struct device *dev = container_of(kobj, struct device, kobj); + struct nvmem_device *nvmem = to_nvmem_device(dev); + int rc; + + /* Stop the user from reading */ + if (pos > nvmem->size) + return 0; + + if (pos + count > nvmem->size) + count = nvmem->size - pos; + + count = round_down(count, nvmem->word_size); + + rc = regmap_raw_read(nvmem->regmap, pos, buf, count); + + if (IS_ERR_VALUE(rc)) + return rc; + + return count; +} + +static ssize_t bin_attr_nvmem_write(struct file *filp, struct kobject *kobj, + struct bin_attribute *attr, + char *buf, loff_t pos, size_t count) +{ + struct device *dev = container_of(kobj, struct device, kobj); + struct nvmem_device *nvmem = to_nvmem_device(dev); + int rc; + + /* Stop the user from writing */ + if (pos > nvmem->size) + return 0; + + if (pos + count > nvmem->size) + count = nvmem->size - pos; + + count = round_down(count, nvmem->word_size); + + rc = regmap_raw_write(nvmem->regmap, pos, buf, count); + + if (IS_ERR_VALUE(rc)) + return rc; + + return count; +} + +/* default read/write permissions */ +static struct bin_attribute bin_attr_rw_nvmem = { + .attr = { + .name = "nvmem", + .mode = S_IWUSR | S_IRUGO, + }, + .read = bin_attr_nvmem_read, + .write = bin_attr_nvmem_write, +}; + +static struct bin_attribute *nvmem_bin_rw_attributes[] = { + &bin_attr_rw_nvmem, + NULL, +}; + +static const struct attribute_group nvmem_bin_rw_group = { + .bin_attrs = nvmem_bin_rw_attributes, +}; + +static const struct attribute_group *nvmem_rw_dev_groups[] = { + &nvmem_bin_rw_group, + NULL, +}; + +/* read only permission */ +static struct bin_attribute bin_attr_ro_nvmem = { + .attr = { + .name = "nvmem", + .mode = S_IRUGO, + }, + .read = bin_attr_nvmem_read, +}; + +static struct bin_attribute *nvmem_bin_ro_attributes[] = { + &bin_attr_ro_nvmem, + NULL, +}; + +static const struct attribute_group nvmem_bin_ro_group = { + .bin_attrs = nvmem_bin_ro_attributes, +}; + +static const struct attribute_group *nvmem_ro_dev_groups[] = { + &nvmem_bin_ro_group, + NULL, +}; + +static void nvmem_release(struct device *dev) +{ + struct nvmem_device *nvmem = to_nvmem_device(dev); + + ida_simple_remove(&nvmem_ida, nvmem->id); + kfree(nvmem); +} + +static const struct device_type nvmem_provider_type = { + .release = nvmem_release, +}; + +static struct bus_type nvmem_bus_type = { + .name = "nvmem", +}; + +static int of_nvmem_match(struct device *dev, void *nvmem_np) +{ + return dev->of_node == nvmem_np; +} + +static struct nvmem_device *of_nvmem_find(struct device_node *nvmem_np) +{ + struct device *d; + + if (!nvmem_np) + return NULL; + + d = bus_find_device(&nvmem_bus_type, NULL, nvmem_np, of_nvmem_match); + + if (!d) + return NULL; + + return to_nvmem_device(d); +} + +static struct nvmem_cell *nvmem_find_cell(const char *cell_id) +{ + struct nvmem_cell *p; + + list_for_each_entry(p, &nvmem_cells, node) + if (p && !strcmp(p->name, cell_id)) + return p; + + return NULL; +} + +static void nvmem_cell_drop(struct nvmem_cell *cell) +{ + mutex_lock(&nvmem_cells_mutex); + list_del(&cell->node); + mutex_unlock(&nvmem_cells_mutex); + kfree(cell); +} + +static void nvmem_device_remove_all_cells(const struct nvmem_device *nvmem) +{ + struct nvmem_cell *cell; + struct list_head *p, *n; + + list_for_each_safe(p, n, &nvmem_cells) { + cell = list_entry(p, struct nvmem_cell, node); + if (cell->nvmem == nvmem) + nvmem_cell_drop(cell); + } +} + +static void nvmem_cell_add(struct nvmem_cell *cell) +{ + mutex_lock(&nvmem_cells_mutex); + list_add_tail(&cell->node, &nvmem_cells); + mutex_unlock(&nvmem_cells_mutex); +} + +static int nvmem_cell_info_to_nvmem_cell(struct nvmem_device *nvmem, + const struct nvmem_cell_info *info, + struct nvmem_cell *cell) +{ + cell->nvmem = nvmem; + cell->offset = info->offset; + cell->bytes = info->bytes; + cell->name = info->name; + + cell->bit_offset = info->bit_offset; + cell->nbits = info->nbits; + + if (cell->nbits) + cell->bytes = DIV_ROUND_UP(cell->nbits + cell->bit_offset, + BITS_PER_BYTE); + + if (!IS_ALIGNED(cell->offset, nvmem->stride)) { + dev_err(&nvmem->dev, + "cell %s unaligned to nvmem stride %d\n", + cell->name, nvmem->stride); + return -EINVAL; + } + + return 0; +} + +static int nvmem_add_cells(struct nvmem_device *nvmem, + const struct nvmem_config *cfg) +{ + struct nvmem_cell **cells; + const struct nvmem_cell_info *info = cfg->cells; + int i, rval; + + cells = kcalloc(cfg->ncells, sizeof(*cells), GFP_KERNEL); + if (!cells) + return -ENOMEM; + + for (i = 0; i < cfg->ncells; i++) { + cells[i] = kzalloc(sizeof(**cells), GFP_KERNEL); + if (!cells[i]) { + rval = -ENOMEM; + goto err; + } + + rval = nvmem_cell_info_to_nvmem_cell(nvmem, &info[i], cells[i]); + if (IS_ERR_VALUE(rval)) { + kfree(cells[i]); + goto err; + } + + nvmem_cell_add(cells[i]); + } + + nvmem->ncells = cfg->ncells; + /* remove tmp array */ + kfree(cells); + + return 0; +err: + while (--i) + nvmem_cell_drop(cells[i]); + + return rval; +} + +/** + * nvmem_register() - Register a nvmem device for given nvmem_config. + * Also creates an binary entry in /sys/bus/nvmem/devices/dev-name/nvmem + * + * @config: nvmem device configuration with which nvmem device is created. + * + * Return: Will be an ERR_PTR() on error or a valid pointer to nvmem_device + * on success. + */ + +struct nvmem_device *nvmem_register(const struct nvmem_config *config) +{ + struct nvmem_device *nvmem; + struct device_node *np; + struct regmap *rm; + int rval; + + if (!config->dev) + return ERR_PTR(-EINVAL); + + rm = dev_get_regmap(config->dev, NULL); + if (!rm) { + dev_err(config->dev, "Regmap not found\n"); + return ERR_PTR(-EINVAL); + } + + nvmem = kzalloc(sizeof(*nvmem), GFP_KERNEL); + if (!nvmem) + return ERR_PTR(-ENOMEM); + + rval = ida_simple_get(&nvmem_ida, 0, 0, GFP_KERNEL); + if (rval < 0) { + kfree(nvmem); + return ERR_PTR(rval); + } + + nvmem->id = rval; + nvmem->regmap = rm; + nvmem->owner = config->owner; + nvmem->stride = regmap_get_reg_stride(rm); + nvmem->word_size = regmap_get_val_bytes(rm); + nvmem->size = regmap_get_max_register(rm) + nvmem->stride; + nvmem->dev.type = &nvmem_provider_type; + nvmem->dev.bus = &nvmem_bus_type; + nvmem->dev.parent = config->dev; + np = config->dev->of_node; + nvmem->dev.of_node = np; + dev_set_name(&nvmem->dev, "%s%d", + config->name ? : "nvmem", config->id); + + nvmem->read_only = of_property_read_bool(np, "read-only") | + config->read_only; + + nvmem->dev.groups = nvmem->read_only ? nvmem_ro_dev_groups : + nvmem_rw_dev_groups; + + device_initialize(&nvmem->dev); + + dev_dbg(&nvmem->dev, "Registering nvmem device %s\n", config->name); + + rval = device_add(&nvmem->dev); + if (rval) { + ida_simple_remove(&nvmem_ida, nvmem->id); + kfree(nvmem); + return ERR_PTR(rval); + } + + if (config->cells) + nvmem_add_cells(nvmem, config); + + return nvmem; +} +EXPORT_SYMBOL_GPL(nvmem_register); + +/** + * nvmem_unregister() - Unregister previously registered nvmem device + * + * @nvmem: Pointer to previously registered nvmem device. + * + * Return: Will be an negative on error or a zero on success. + */ +int nvmem_unregister(struct nvmem_device *nvmem) +{ + if (nvmem->users) + return -EBUSY; + + nvmem_device_remove_all_cells(nvmem); + device_del(&nvmem->dev); + + return 0; +} +EXPORT_SYMBOL_GPL(nvmem_unregister); + +static int __init nvmem_init(void) +{ + return bus_register(&nvmem_bus_type); +} + +static void __exit nvmem_exit(void) +{ + bus_unregister(&nvmem_bus_type); +} + +subsys_initcall(nvmem_init); +module_exit(nvmem_exit); + +MODULE_AUTHOR("Srinivas Kandagatla + * Copyright (C) 2013 Maxime Ripard + * + * This file is licensed under the terms of the GNU General Public + * License version 2. This program is licensed "as is" without any + * warranty of any kind, whether express or implied. + */ + +#ifndef _LINUX_NVMEM_CONSUMER_H +#define _LINUX_NVMEM_CONSUMER_H + +struct nvmem_cell_info { + const char *name; + unsigned int offset; + unsigned int bytes; + unsigned int bit_offset; + unsigned int nbits; +}; + +#endif /* ifndef _LINUX_NVMEM_CONSUMER_H */ diff --git a/include/linux/nvmem-provider.h b/include/linux/nvmem-provider.h new file mode 100644 index 000000000000..0b68caff1b3c --- /dev/null +++ b/include/linux/nvmem-provider.h @@ -0,0 +1,47 @@ +/* + * nvmem framework provider. + * + * Copyright (C) 2015 Srinivas Kandagatla + * Copyright (C) 2013 Maxime Ripard + * + * This file is licensed under the terms of the GNU General Public + * License version 2. This program is licensed "as is" without any + * warranty of any kind, whether express or implied. + */ + +#ifndef _LINUX_NVMEM_PROVIDER_H +#define _LINUX_NVMEM_PROVIDER_H + +struct nvmem_device; +struct nvmem_cell_info; + +struct nvmem_config { + struct device *dev; + const char *name; + int id; + struct module *owner; + const struct nvmem_cell_info *cells; + int ncells; + bool read_only; +}; + +#if IS_ENABLED(CONFIG_NVMEM) + +struct nvmem_device *nvmem_register(const struct nvmem_config *cfg); +int nvmem_unregister(struct nvmem_device *nvmem); + +#else + +static inline struct nvmem_device *nvmem_register(const struct nvmem_config *c) +{ + return ERR_PTR(-ENOSYS); +} + +static inline int nvmem_unregister(struct nvmem_device *nvmem) +{ + return -ENOSYS; +} + +#endif /* CONFIG_NVMEM */ + +#endif /* ifndef _LINUX_NVMEM_PROVIDER_H */ -- cgit v1.2.3-70-g09d2 From 69aba7948cbe53f2f1827e84e9dd0ae470a5072e Mon Sep 17 00:00:00 2001 From: Srinivas Kandagatla Date: Mon, 27 Jul 2015 12:13:34 +0100 Subject: nvmem: Add a simple NVMEM framework for consumers This patch adds just consumers part of the framework just to enable easy review. Up until now, nvmem drivers were stored in drivers/misc, where they all had to duplicate pretty much the same code to register a sysfs file, allow in-kernel users to access the content of the devices they were driving, etc. This was also a problem as far as other in-kernel users were involved, since the solutions used were pretty much different from on driver to another, there was a rather big abstraction leak. This introduction of this framework aims at solving this. It also introduces DT representation for consumer devices to go get the data they require (MAC Addresses, SoC/Revision ID, part numbers, and so on) from the nvmems. Having regmap interface to this framework would give much better abstraction for nvmems on different buses. Signed-off-by: Maxime Ripard [Maxime Ripard: intial version of the framework] Signed-off-by: Srinivas Kandagatla Tested-by: Stefan Wahren Tested-by: Philipp Zabel Tested-by: Rajendra Nayak Signed-off-by: Greg Kroah-Hartman --- drivers/nvmem/core.c | 421 ++++++++++++++++++++++++++++++++++++++++- include/linux/nvmem-consumer.h | 61 ++++++ 2 files changed, 481 insertions(+), 1 deletion(-) (limited to 'include/linux') diff --git a/drivers/nvmem/core.c b/drivers/nvmem/core.c index 2b024915e224..8c16ae2e1308 100644 --- a/drivers/nvmem/core.c +++ b/drivers/nvmem/core.c @@ -377,8 +377,12 @@ EXPORT_SYMBOL_GPL(nvmem_register); */ int nvmem_unregister(struct nvmem_device *nvmem) { - if (nvmem->users) + mutex_lock(&nvmem_mutex); + if (nvmem->users) { + mutex_unlock(&nvmem_mutex); return -EBUSY; + } + mutex_unlock(&nvmem_mutex); nvmem_device_remove_all_cells(nvmem); device_del(&nvmem->dev); @@ -387,6 +391,421 @@ int nvmem_unregister(struct nvmem_device *nvmem) } EXPORT_SYMBOL_GPL(nvmem_unregister); +static struct nvmem_device *__nvmem_device_get(struct device_node *np, + struct nvmem_cell **cellp, + const char *cell_id) +{ + struct nvmem_device *nvmem = NULL; + + mutex_lock(&nvmem_mutex); + + if (np) { + nvmem = of_nvmem_find(np); + if (!nvmem) { + mutex_unlock(&nvmem_mutex); + return ERR_PTR(-EPROBE_DEFER); + } + } else { + struct nvmem_cell *cell = nvmem_find_cell(cell_id); + + if (cell) { + nvmem = cell->nvmem; + *cellp = cell; + } + + if (!nvmem) { + mutex_unlock(&nvmem_mutex); + return ERR_PTR(-ENOENT); + } + } + + nvmem->users++; + mutex_unlock(&nvmem_mutex); + + if (!try_module_get(nvmem->owner)) { + dev_err(&nvmem->dev, + "could not increase module refcount for cell %s\n", + nvmem->name); + + mutex_lock(&nvmem_mutex); + nvmem->users--; + mutex_unlock(&nvmem_mutex); + + return ERR_PTR(-EINVAL); + } + + return nvmem; +} + +static void __nvmem_device_put(struct nvmem_device *nvmem) +{ + module_put(nvmem->owner); + mutex_lock(&nvmem_mutex); + nvmem->users--; + mutex_unlock(&nvmem_mutex); +} + +static struct nvmem_cell *nvmem_cell_get_from_list(const char *cell_id) +{ + struct nvmem_cell *cell = NULL; + struct nvmem_device *nvmem; + + nvmem = __nvmem_device_get(NULL, &cell, cell_id); + if (IS_ERR(nvmem)) + return ERR_CAST(nvmem); + + return cell; +} + +#if IS_ENABLED(CONFIG_NVMEM) && IS_ENABLED(CONFIG_OF) +/** + * of_nvmem_cell_get() - Get a nvmem cell from given device node and cell id + * + * @dev node: Device tree node that uses the nvmem cell + * @id: nvmem cell name from nvmem-cell-names property. + * + * Return: Will be an ERR_PTR() on error or a valid pointer + * to a struct nvmem_cell. The nvmem_cell will be freed by the + * nvmem_cell_put(). + */ +struct nvmem_cell *of_nvmem_cell_get(struct device_node *np, + const char *name) +{ + struct device_node *cell_np, *nvmem_np; + struct nvmem_cell *cell; + struct nvmem_device *nvmem; + const __be32 *addr; + int rval, len, index; + + index = of_property_match_string(np, "nvmem-cell-names", name); + + cell_np = of_parse_phandle(np, "nvmem-cells", index); + if (!cell_np) + return ERR_PTR(-EINVAL); + + nvmem_np = of_get_next_parent(cell_np); + if (!nvmem_np) + return ERR_PTR(-EINVAL); + + nvmem = __nvmem_device_get(nvmem_np, NULL, NULL); + if (IS_ERR(nvmem)) + return ERR_CAST(nvmem); + + addr = of_get_property(cell_np, "reg", &len); + if (!addr || (len < 2 * sizeof(u32))) { + dev_err(&nvmem->dev, "nvmem: invalid reg on %s\n", + cell_np->full_name); + rval = -EINVAL; + goto err_mem; + } + + cell = kzalloc(sizeof(*cell), GFP_KERNEL); + if (!cell) { + rval = -ENOMEM; + goto err_mem; + } + + cell->nvmem = nvmem; + cell->offset = be32_to_cpup(addr++); + cell->bytes = be32_to_cpup(addr); + cell->name = cell_np->name; + + addr = of_get_property(cell_np, "bits", &len); + if (addr && len == (2 * sizeof(u32))) { + cell->bit_offset = be32_to_cpup(addr++); + cell->nbits = be32_to_cpup(addr); + } + + if (cell->nbits) + cell->bytes = DIV_ROUND_UP(cell->nbits + cell->bit_offset, + BITS_PER_BYTE); + + if (!IS_ALIGNED(cell->offset, nvmem->stride)) { + dev_err(&nvmem->dev, + "cell %s unaligned to nvmem stride %d\n", + cell->name, nvmem->stride); + rval = -EINVAL; + goto err_sanity; + } + + nvmem_cell_add(cell); + + return cell; + +err_sanity: + kfree(cell); + +err_mem: + __nvmem_device_put(nvmem); + + return ERR_PTR(rval); +} +EXPORT_SYMBOL_GPL(of_nvmem_cell_get); +#endif + +/** + * nvmem_cell_get() - Get nvmem cell of device form a given cell name + * + * @dev node: Device tree node that uses the nvmem cell + * @id: nvmem cell name to get. + * + * Return: Will be an ERR_PTR() on error or a valid pointer + * to a struct nvmem_cell. The nvmem_cell will be freed by the + * nvmem_cell_put(). + */ +struct nvmem_cell *nvmem_cell_get(struct device *dev, const char *cell_id) +{ + struct nvmem_cell *cell; + + if (dev->of_node) { /* try dt first */ + cell = of_nvmem_cell_get(dev->of_node, cell_id); + if (!IS_ERR(cell) || PTR_ERR(cell) == -EPROBE_DEFER) + return cell; + } + + return nvmem_cell_get_from_list(cell_id); +} +EXPORT_SYMBOL_GPL(nvmem_cell_get); + +static void devm_nvmem_cell_release(struct device *dev, void *res) +{ + nvmem_cell_put(*(struct nvmem_cell **)res); +} + +/** + * devm_nvmem_cell_get() - Get nvmem cell of device form a given id + * + * @dev node: Device tree node that uses the nvmem cell + * @id: nvmem id in nvmem-names property. + * + * Return: Will be an ERR_PTR() on error or a valid pointer + * to a struct nvmem_cell. The nvmem_cell will be freed by the + * automatically once the device is freed. + */ +struct nvmem_cell *devm_nvmem_cell_get(struct device *dev, const char *id) +{ + struct nvmem_cell **ptr, *cell; + + ptr = devres_alloc(devm_nvmem_cell_release, sizeof(*ptr), GFP_KERNEL); + if (!ptr) + return ERR_PTR(-ENOMEM); + + cell = nvmem_cell_get(dev, id); + if (!IS_ERR(cell)) { + *ptr = cell; + devres_add(dev, ptr); + } else { + devres_free(ptr); + } + + return cell; +} +EXPORT_SYMBOL_GPL(devm_nvmem_cell_get); + +static int devm_nvmem_cell_match(struct device *dev, void *res, void *data) +{ + struct nvmem_cell **c = res; + + if (WARN_ON(!c || !*c)) + return 0; + + return *c == data; +} + +/** + * devm_nvmem_cell_put() - Release previously allocated nvmem cell + * from devm_nvmem_cell_get. + * + * @cell: Previously allocated nvmem cell by devm_nvmem_cell_get() + */ +void devm_nvmem_cell_put(struct device *dev, struct nvmem_cell *cell) +{ + int ret; + + ret = devres_release(dev, devm_nvmem_cell_release, + devm_nvmem_cell_match, cell); + + WARN_ON(ret); +} +EXPORT_SYMBOL(devm_nvmem_cell_put); + +/** + * nvmem_cell_put() - Release previously allocated nvmem cell. + * + * @cell: Previously allocated nvmem cell by nvmem_cell_get() + */ +void nvmem_cell_put(struct nvmem_cell *cell) +{ + struct nvmem_device *nvmem = cell->nvmem; + + __nvmem_device_put(nvmem); + nvmem_cell_drop(cell); +} +EXPORT_SYMBOL_GPL(nvmem_cell_put); + +static inline void nvmem_shift_read_buffer_in_place(struct nvmem_cell *cell, + void *buf) +{ + u8 *p, *b; + int i, bit_offset = cell->bit_offset; + + p = b = buf; + if (bit_offset) { + /* First shift */ + *b++ >>= bit_offset; + + /* setup rest of the bytes if any */ + for (i = 1; i < cell->bytes; i++) { + /* Get bits from next byte and shift them towards msb */ + *p |= *b << (BITS_PER_BYTE - bit_offset); + + p = b; + *b++ >>= bit_offset; + } + + /* result fits in less bytes */ + if (cell->bytes != DIV_ROUND_UP(cell->nbits, BITS_PER_BYTE)) + *p-- = 0; + } + /* clear msb bits if any leftover in the last byte */ + *p &= GENMASK((cell->nbits%BITS_PER_BYTE) - 1, 0); +} + +static int __nvmem_cell_read(struct nvmem_device *nvmem, + struct nvmem_cell *cell, + void *buf, size_t *len) +{ + int rc; + + rc = regmap_raw_read(nvmem->regmap, cell->offset, buf, cell->bytes); + + if (IS_ERR_VALUE(rc)) + return rc; + + /* shift bits in-place */ + if (cell->bit_offset || cell->bit_offset) + nvmem_shift_read_buffer_in_place(cell, buf); + + *len = cell->bytes; + + return 0; +} + +/** + * nvmem_cell_read() - Read a given nvmem cell + * + * @cell: nvmem cell to be read. + * @len: pointer to length of cell which will be populated on successful read. + * + * Return: ERR_PTR() on error or a valid pointer to a char * buffer on success. + * The buffer should be freed by the consumer with a kfree(). + */ +void *nvmem_cell_read(struct nvmem_cell *cell, size_t *len) +{ + struct nvmem_device *nvmem = cell->nvmem; + u8 *buf; + int rc; + + if (!nvmem || !nvmem->regmap) + return ERR_PTR(-EINVAL); + + buf = kzalloc(cell->bytes, GFP_KERNEL); + if (!buf) + return ERR_PTR(-ENOMEM); + + rc = __nvmem_cell_read(nvmem, cell, buf, len); + if (IS_ERR_VALUE(rc)) { + kfree(buf); + return ERR_PTR(rc); + } + + return buf; +} +EXPORT_SYMBOL_GPL(nvmem_cell_read); + +static inline void *nvmem_cell_prepare_write_buffer(struct nvmem_cell *cell, + u8 *_buf, int len) +{ + struct nvmem_device *nvmem = cell->nvmem; + int i, rc, nbits, bit_offset = cell->bit_offset; + u8 v, *p, *buf, *b, pbyte, pbits; + + nbits = cell->nbits; + buf = kzalloc(cell->bytes, GFP_KERNEL); + if (!buf) + return ERR_PTR(-ENOMEM); + + memcpy(buf, _buf, len); + p = b = buf; + + if (bit_offset) { + pbyte = *b; + *b <<= bit_offset; + + /* setup the first byte with lsb bits from nvmem */ + rc = regmap_raw_read(nvmem->regmap, cell->offset, &v, 1); + *b++ |= GENMASK(bit_offset - 1, 0) & v; + + /* setup rest of the byte if any */ + for (i = 1; i < cell->bytes; i++) { + /* Get last byte bits and shift them towards lsb */ + pbits = pbyte >> (BITS_PER_BYTE - 1 - bit_offset); + pbyte = *b; + p = b; + *b <<= bit_offset; + *b++ |= pbits; + } + } + + /* if it's not end on byte boundary */ + if ((nbits + bit_offset) % BITS_PER_BYTE) { + /* setup the last byte with msb bits from nvmem */ + rc = regmap_raw_read(nvmem->regmap, + cell->offset + cell->bytes - 1, &v, 1); + *p |= GENMASK(7, (nbits + bit_offset) % BITS_PER_BYTE) & v; + + } + + return buf; +} + +/** + * nvmem_cell_write() - Write to a given nvmem cell + * + * @cell: nvmem cell to be written. + * @buf: Buffer to be written. + * @len: length of buffer to be written to nvmem cell. + * + * Return: length of bytes written or negative on failure. + */ +int nvmem_cell_write(struct nvmem_cell *cell, void *buf, size_t len) +{ + struct nvmem_device *nvmem = cell->nvmem; + int rc; + + if (!nvmem || !nvmem->regmap || nvmem->read_only || + (cell->bit_offset == 0 && len != cell->bytes)) + return -EINVAL; + + if (cell->bit_offset || cell->nbits) { + buf = nvmem_cell_prepare_write_buffer(cell, buf, len); + if (IS_ERR(buf)) + return PTR_ERR(buf); + } + + rc = regmap_raw_write(nvmem->regmap, cell->offset, buf, cell->bytes); + + /* free the tmp buffer */ + if (cell->bit_offset) + kfree(buf); + + if (IS_ERR_VALUE(rc)) + return rc; + + return len; +} +EXPORT_SYMBOL_GPL(nvmem_cell_write); + static int __init nvmem_init(void) { return bus_register(&nvmem_bus_type); diff --git a/include/linux/nvmem-consumer.h b/include/linux/nvmem-consumer.h index 1e9e7678a501..297cc67b7211 100644 --- a/include/linux/nvmem-consumer.h +++ b/include/linux/nvmem-consumer.h @@ -12,6 +12,11 @@ #ifndef _LINUX_NVMEM_CONSUMER_H #define _LINUX_NVMEM_CONSUMER_H +struct device; +struct device_node; +/* consumer cookie */ +struct nvmem_cell; + struct nvmem_cell_info { const char *name; unsigned int offset; @@ -20,4 +25,60 @@ struct nvmem_cell_info { unsigned int nbits; }; +#if IS_ENABLED(CONFIG_NVMEM) + +/* Cell based interface */ +struct nvmem_cell *nvmem_cell_get(struct device *dev, const char *name); +struct nvmem_cell *devm_nvmem_cell_get(struct device *dev, const char *name); +void nvmem_cell_put(struct nvmem_cell *cell); +void devm_nvmem_cell_put(struct device *dev, struct nvmem_cell *cell); +void *nvmem_cell_read(struct nvmem_cell *cell, size_t *len); +int nvmem_cell_write(struct nvmem_cell *cell, void *buf, size_t len); + +#else + +static inline struct nvmem_cell *nvmem_cell_get(struct device *dev, + const char *name) +{ + return ERR_PTR(-ENOSYS); +} + +static inline struct nvmem_cell *devm_nvmem_cell_get(struct device *dev, + const char *name) +{ + return ERR_PTR(-ENOSYS); +} + +static inline void devm_nvmem_cell_put(struct device *dev, + struct nvmem_cell *cell) +{ + +} +static inline void nvmem_cell_put(struct nvmem_cell *cell) +{ +} + +static inline char *nvmem_cell_read(struct nvmem_cell *cell, size_t *len) +{ + return ERR_PTR(-ENOSYS); +} + +static inline int nvmem_cell_write(struct nvmem_cell *cell, + const char *buf, size_t len) +{ + return -ENOSYS; +} +#endif /* CONFIG_NVMEM */ + +#if IS_ENABLED(CONFIG_NVMEM) && IS_ENABLED(CONFIG_OF) +struct nvmem_cell *of_nvmem_cell_get(struct device_node *np, + const char *name); +#else +static inline struct nvmem_cell *of_nvmem_cell_get(struct device_node *np, + const char *name) +{ + return ERR_PTR(-ENOSYS); +} +#endif /* CONFIG_NVMEM && CONFIG_OF */ + #endif /* ifndef _LINUX_NVMEM_CONSUMER_H */ -- cgit v1.2.3-70-g09d2 From e2a5402ec7c6d0442cca370a0097e75750f81398 Mon Sep 17 00:00:00 2001 From: Srinivas Kandagatla Date: Mon, 27 Jul 2015 12:13:45 +0100 Subject: nvmem: Add nvmem_device based consumer apis. This patch adds read/write apis which are based on nvmem_device. It is common that the drivers like omap cape manager or qcom cpr driver to access bytes directly at particular offset in the eeprom and not from nvmem cell info in DT. These driver would need to get access to the nvmem directly, which is what these new APIS provide. These wrapper apis would help such users to avoid code duplication in there drivers and also avoid them reading a big eeprom blob and parsing it internally in there driver. Signed-off-by: Srinivas Kandagatla Tested-by: Stefan Wahren Tested-by: Philipp Zabel Tested-by: Rajendra Nayak Signed-off-by: Greg Kroah-Hartman --- drivers/nvmem/core.c | 258 +++++++++++++++++++++++++++++++++++++++++ include/linux/nvmem-consumer.h | 73 ++++++++++++ 2 files changed, 331 insertions(+) (limited to 'include/linux') diff --git a/drivers/nvmem/core.c b/drivers/nvmem/core.c index 8c16ae2e1308..d3c6676b3c0c 100644 --- a/drivers/nvmem/core.c +++ b/drivers/nvmem/core.c @@ -445,6 +445,148 @@ static void __nvmem_device_put(struct nvmem_device *nvmem) mutex_unlock(&nvmem_mutex); } +static int nvmem_match(struct device *dev, void *data) +{ + return !strcmp(dev_name(dev), data); +} + +static struct nvmem_device *nvmem_find(const char *name) +{ + struct device *d; + + d = bus_find_device(&nvmem_bus_type, NULL, (void *)name, nvmem_match); + + if (!d) + return NULL; + + return to_nvmem_device(d); +} + +#if IS_ENABLED(CONFIG_NVMEM) && IS_ENABLED(CONFIG_OF) +/** + * of_nvmem_device_get() - Get nvmem device from a given id + * + * @dev node: Device tree node that uses the nvmem device + * @id: nvmem name from nvmem-names property. + * + * Return: ERR_PTR() on error or a valid pointer to a struct nvmem_device + * on success. + */ +struct nvmem_device *of_nvmem_device_get(struct device_node *np, const char *id) +{ + + struct device_node *nvmem_np; + int index; + + index = of_property_match_string(np, "nvmem-names", id); + + nvmem_np = of_parse_phandle(np, "nvmem", index); + if (!nvmem_np) + return ERR_PTR(-EINVAL); + + return __nvmem_device_get(nvmem_np, NULL, NULL); +} +EXPORT_SYMBOL_GPL(of_nvmem_device_get); +#endif + +/** + * nvmem_device_get() - Get nvmem device from a given id + * + * @dev : Device that uses the nvmem device + * @id: nvmem name from nvmem-names property. + * + * Return: ERR_PTR() on error or a valid pointer to a struct nvmem_device + * on success. + */ +struct nvmem_device *nvmem_device_get(struct device *dev, const char *dev_name) +{ + if (dev->of_node) { /* try dt first */ + struct nvmem_device *nvmem; + + nvmem = of_nvmem_device_get(dev->of_node, dev_name); + + if (!IS_ERR(nvmem) || PTR_ERR(nvmem) == -EPROBE_DEFER) + return nvmem; + + } + + return nvmem_find(dev_name); +} +EXPORT_SYMBOL_GPL(nvmem_device_get); + +static int devm_nvmem_device_match(struct device *dev, void *res, void *data) +{ + struct nvmem_device **nvmem = res; + + if (WARN_ON(!nvmem || !*nvmem)) + return 0; + + return *nvmem == data; +} + +static void devm_nvmem_device_release(struct device *dev, void *res) +{ + nvmem_device_put(*(struct nvmem_device **)res); +} + +/** + * devm_nvmem_device_put() - put alredy got nvmem device + * + * @nvmem: pointer to nvmem device allocated by devm_nvmem_cell_get(), + * that needs to be released. + */ +void devm_nvmem_device_put(struct device *dev, struct nvmem_device *nvmem) +{ + int ret; + + ret = devres_release(dev, devm_nvmem_device_release, + devm_nvmem_device_match, nvmem); + + WARN_ON(ret); +} +EXPORT_SYMBOL_GPL(devm_nvmem_device_put); + +/** + * nvmem_device_put() - put alredy got nvmem device + * + * @nvmem: pointer to nvmem device that needs to be released. + */ +void nvmem_device_put(struct nvmem_device *nvmem) +{ + __nvmem_device_put(nvmem); +} +EXPORT_SYMBOL_GPL(nvmem_device_put); + +/** + * devm_nvmem_device_get() - Get nvmem cell of device form a given id + * + * @dev node: Device tree node that uses the nvmem cell + * @id: nvmem name in nvmems property. + * + * Return: ERR_PTR() on error or a valid pointer to a struct nvmem_cell + * on success. The nvmem_cell will be freed by the automatically once the + * device is freed. + */ +struct nvmem_device *devm_nvmem_device_get(struct device *dev, const char *id) +{ + struct nvmem_device **ptr, *nvmem; + + ptr = devres_alloc(devm_nvmem_device_release, sizeof(*ptr), GFP_KERNEL); + if (!ptr) + return ERR_PTR(-ENOMEM); + + nvmem = nvmem_device_get(dev, id); + if (!IS_ERR(nvmem)) { + *ptr = nvmem; + devres_add(dev, ptr); + } else { + devres_free(ptr); + } + + return nvmem; +} +EXPORT_SYMBOL_GPL(devm_nvmem_device_get); + static struct nvmem_cell *nvmem_cell_get_from_list(const char *cell_id) { struct nvmem_cell *cell = NULL; @@ -806,6 +948,122 @@ int nvmem_cell_write(struct nvmem_cell *cell, void *buf, size_t len) } EXPORT_SYMBOL_GPL(nvmem_cell_write); +/** + * nvmem_device_cell_read() - Read a given nvmem device and cell + * + * @nvmem: nvmem device to read from. + * @info: nvmem cell info to be read. + * @buf: buffer pointer which will be populated on successful read. + * + * Return: length of successful bytes read on success and negative + * error code on error. + */ +ssize_t nvmem_device_cell_read(struct nvmem_device *nvmem, + struct nvmem_cell_info *info, void *buf) +{ + struct nvmem_cell cell; + int rc; + ssize_t len; + + if (!nvmem || !nvmem->regmap) + return -EINVAL; + + rc = nvmem_cell_info_to_nvmem_cell(nvmem, info, &cell); + if (IS_ERR_VALUE(rc)) + return rc; + + rc = __nvmem_cell_read(nvmem, &cell, buf, &len); + if (IS_ERR_VALUE(rc)) + return rc; + + return len; +} +EXPORT_SYMBOL_GPL(nvmem_device_cell_read); + +/** + * nvmem_device_cell_write() - Write cell to a given nvmem device + * + * @nvmem: nvmem device to be written to. + * @info: nvmem cell info to be written + * @buf: buffer to be written to cell. + * + * Return: length of bytes written or negative error code on failure. + * */ +int nvmem_device_cell_write(struct nvmem_device *nvmem, + struct nvmem_cell_info *info, void *buf) +{ + struct nvmem_cell cell; + int rc; + + if (!nvmem || !nvmem->regmap) + return -EINVAL; + + rc = nvmem_cell_info_to_nvmem_cell(nvmem, info, &cell); + if (IS_ERR_VALUE(rc)) + return rc; + + return nvmem_cell_write(&cell, buf, cell.bytes); +} +EXPORT_SYMBOL_GPL(nvmem_device_cell_write); + +/** + * nvmem_device_read() - Read from a given nvmem device + * + * @nvmem: nvmem device to read from. + * @offset: offset in nvmem device. + * @bytes: number of bytes to read. + * @buf: buffer pointer which will be populated on successful read. + * + * Return: length of successful bytes read on success and negative + * error code on error. + */ +int nvmem_device_read(struct nvmem_device *nvmem, + unsigned int offset, + size_t bytes, void *buf) +{ + int rc; + + if (!nvmem || !nvmem->regmap) + return -EINVAL; + + rc = regmap_raw_read(nvmem->regmap, offset, buf, bytes); + + if (IS_ERR_VALUE(rc)) + return rc; + + return bytes; +} +EXPORT_SYMBOL_GPL(nvmem_device_read); + +/** + * nvmem_device_write() - Write cell to a given nvmem device + * + * @nvmem: nvmem device to be written to. + * @offset: offset in nvmem device. + * @bytes: number of bytes to write. + * @buf: buffer to be written. + * + * Return: length of bytes written or negative error code on failure. + * */ +int nvmem_device_write(struct nvmem_device *nvmem, + unsigned int offset, + size_t bytes, void *buf) +{ + int rc; + + if (!nvmem || !nvmem->regmap) + return -EINVAL; + + rc = regmap_raw_write(nvmem->regmap, offset, buf, bytes); + + if (IS_ERR_VALUE(rc)) + return rc; + + + return bytes; +} +EXPORT_SYMBOL_GPL(nvmem_device_write); + static int __init nvmem_init(void) { return bus_register(&nvmem_bus_type); diff --git a/include/linux/nvmem-consumer.h b/include/linux/nvmem-consumer.h index 297cc67b7211..9bb77d3ed6e0 100644 --- a/include/linux/nvmem-consumer.h +++ b/include/linux/nvmem-consumer.h @@ -16,6 +16,7 @@ struct device; struct device_node; /* consumer cookie */ struct nvmem_cell; +struct nvmem_device; struct nvmem_cell_info { const char *name; @@ -35,6 +36,21 @@ void devm_nvmem_cell_put(struct device *dev, struct nvmem_cell *cell); void *nvmem_cell_read(struct nvmem_cell *cell, size_t *len); int nvmem_cell_write(struct nvmem_cell *cell, void *buf, size_t len); +/* direct nvmem device read/write interface */ +struct nvmem_device *nvmem_device_get(struct device *dev, const char *name); +struct nvmem_device *devm_nvmem_device_get(struct device *dev, + const char *name); +void nvmem_device_put(struct nvmem_device *nvmem); +void devm_nvmem_device_put(struct device *dev, struct nvmem_device *nvmem); +int nvmem_device_read(struct nvmem_device *nvmem, unsigned int offset, + size_t bytes, void *buf); +int nvmem_device_write(struct nvmem_device *nvmem, unsigned int offset, + size_t bytes, void *buf); +ssize_t nvmem_device_cell_read(struct nvmem_device *nvmem, + struct nvmem_cell_info *info, void *buf); +int nvmem_device_cell_write(struct nvmem_device *nvmem, + struct nvmem_cell_info *info, void *buf); + #else static inline struct nvmem_cell *nvmem_cell_get(struct device *dev, @@ -68,17 +84,74 @@ static inline int nvmem_cell_write(struct nvmem_cell *cell, { return -ENOSYS; } + +static inline struct nvmem_device *nvmem_device_get(struct device *dev, + const char *name) +{ + return ERR_PTR(-ENOSYS); +} + +static inline struct nvmem_device *devm_nvmem_device_get(struct device *dev, + const char *name) +{ + return ERR_PTR(-ENOSYS); +} + +static inline void nvmem_device_put(struct nvmem_device *nvmem) +{ +} + +static inline void devm_nvmem_device_put(struct device *dev, + struct nvmem_device *nvmem) +{ +} + +static inline ssize_t nvmem_device_cell_read(struct nvmem_device *nvmem, + struct nvmem_cell_info *info, + void *buf) +{ + return -ENOSYS; +} + +static inline int nvmem_device_cell_write(struct nvmem_device *nvmem, + struct nvmem_cell_info *info, + void *buf) +{ + return -ENOSYS; +} + +static inline int nvmem_device_read(struct nvmem_device *nvmem, + unsigned int offset, size_t bytes, + void *buf) +{ + return -ENOSYS; +} + +static inline int nvmem_device_write(struct nvmem_device *nvmem, + unsigned int offset, size_t bytes, + void *buf) +{ + return -ENOSYS; +} #endif /* CONFIG_NVMEM */ #if IS_ENABLED(CONFIG_NVMEM) && IS_ENABLED(CONFIG_OF) struct nvmem_cell *of_nvmem_cell_get(struct device_node *np, const char *name); +struct nvmem_device *of_nvmem_device_get(struct device_node *np, + const char *name); #else static inline struct nvmem_cell *of_nvmem_cell_get(struct device_node *np, const char *name) { return ERR_PTR(-ENOSYS); } + +static inline struct nvmem_device *of_nvmem_device_get(struct device_node *np, + const char *name) +{ + return ERR_PTR(-ENOSYS); +} #endif /* CONFIG_NVMEM && CONFIG_OF */ #endif /* ifndef _LINUX_NVMEM_CONSUMER_H */ -- cgit v1.2.3-70-g09d2 From f61ae4fb66a4f7ae49e3456003fc4328d6db09c9 Mon Sep 17 00:00:00 2001 From: Thomas Gleixner Date: Sun, 2 Aug 2015 20:38:26 +0000 Subject: genirq: Provide irq_desc_has_action If we have a reference to irq_desc already, there is no point to do another lookup. Signed-off-by: Thomas Gleixner Cc: Jiang Liu Cc: Peter Zijlstra Cc: Rusty Russell Cc: Bjorn Helgaas Link: http://lkml.kernel.org/r/20150802203609.638130301@linutronix.de Signed-off-by: Thomas Gleixner --- include/linux/irqdesc.h | 8 ++++++-- 1 file changed, 6 insertions(+), 2 deletions(-) (limited to 'include/linux') diff --git a/include/linux/irqdesc.h b/include/linux/irqdesc.h index fcea4e48e21f..5acfa26602e1 100644 --- a/include/linux/irqdesc.h +++ b/include/linux/irqdesc.h @@ -166,12 +166,16 @@ static inline int handle_domain_irq(struct irq_domain *domain, #endif /* Test to see if a driver has successfully requested an irq */ -static inline int irq_has_action(unsigned int irq) +static inline int irq_desc_has_action(struct irq_desc *desc) { - struct irq_desc *desc = irq_to_desc(irq); return desc->action != NULL; } +static inline int irq_has_action(unsigned int irq) +{ + return irq_desc_has_action(irq_to_desc(irq)); +} + /* caller has locked the irq_desc and both params are valid */ static inline void __irq_set_handler_locked(unsigned int irq, irq_flow_handler_t handler) -- cgit v1.2.3-70-g09d2 From 71db87ba570038497db1227b7dc61113c4156565 Mon Sep 17 00:00:00 2001 From: Viresh Kumar Date: Thu, 30 Jul 2015 15:04:01 +0530 Subject: bus: subsys: update return type of ->remove_dev() to void Its return value is not used by the subsys core and nothing meaningful can be done with it, even if we want to use it. The subsys device is anyway getting removed. Update prototype of ->remove_dev() to make its return type as void. Fix all usage sites as well. Signed-off-by: Viresh Kumar Signed-off-by: Greg Kroah-Hartman --- arch/sh/kernel/cpu/sh4/sq.c | 3 +-- arch/tile/kernel/sysfs.c | 11 ++++------- arch/x86/kernel/cpu/microcode/core.c | 5 ++--- drivers/cpufreq/cpufreq.c | 12 +++++------- drivers/net/rionet.c | 4 +--- include/linux/device.h | 2 +- 6 files changed, 14 insertions(+), 23 deletions(-) (limited to 'include/linux') diff --git a/arch/sh/kernel/cpu/sh4/sq.c b/arch/sh/kernel/cpu/sh4/sq.c index 0a47bd3e7bee..4ca78ed71ad2 100644 --- a/arch/sh/kernel/cpu/sh4/sq.c +++ b/arch/sh/kernel/cpu/sh4/sq.c @@ -355,13 +355,12 @@ static int sq_dev_add(struct device *dev, struct subsys_interface *sif) return error; } -static int sq_dev_remove(struct device *dev, struct subsys_interface *sif) +static void sq_dev_remove(struct device *dev, struct subsys_interface *sif) { unsigned int cpu = dev->id; struct kobject *kobj = sq_kobject[cpu]; kobject_put(kobj); - return 0; } static struct subsys_interface sq_interface = { diff --git a/arch/tile/kernel/sysfs.c b/arch/tile/kernel/sysfs.c index a3ed12f8f83b..825867c53853 100644 --- a/arch/tile/kernel/sysfs.c +++ b/arch/tile/kernel/sysfs.c @@ -198,16 +198,13 @@ static int hv_stats_device_add(struct device *dev, struct subsys_interface *sif) return err; } -static int hv_stats_device_remove(struct device *dev, - struct subsys_interface *sif) +static void hv_stats_device_remove(struct device *dev, + struct subsys_interface *sif) { int cpu = dev->id; - if (!cpu_online(cpu)) - return 0; - - sysfs_remove_file(&dev->kobj, &dev_attr_hv_stats.attr); - return 0; + if (cpu_online(cpu)) + sysfs_remove_file(&dev->kobj, &dev_attr_hv_stats.attr); } diff --git a/arch/x86/kernel/cpu/microcode/core.c b/arch/x86/kernel/cpu/microcode/core.c index 6236a54a63f4..3c986390058a 100644 --- a/arch/x86/kernel/cpu/microcode/core.c +++ b/arch/x86/kernel/cpu/microcode/core.c @@ -377,17 +377,16 @@ static int mc_device_add(struct device *dev, struct subsys_interface *sif) return err; } -static int mc_device_remove(struct device *dev, struct subsys_interface *sif) +static void mc_device_remove(struct device *dev, struct subsys_interface *sif) { int cpu = dev->id; if (!cpu_online(cpu)) - return 0; + return; pr_debug("CPU%d removed\n", cpu); microcode_fini_cpu(cpu); sysfs_remove_group(&dev->kobj, &mc_attr_group); - return 0; } static struct subsys_interface mc_cpu_interface = { diff --git a/drivers/cpufreq/cpufreq.c b/drivers/cpufreq/cpufreq.c index 26063afb3eba..6da25c10bdfd 100644 --- a/drivers/cpufreq/cpufreq.c +++ b/drivers/cpufreq/cpufreq.c @@ -1518,7 +1518,7 @@ static int __cpufreq_remove_dev_finish(struct device *dev, * * Removes the cpufreq interface for a CPU device. */ -static int cpufreq_remove_dev(struct device *dev, struct subsys_interface *sif) +static void cpufreq_remove_dev(struct device *dev, struct subsys_interface *sif) { unsigned int cpu = dev->id; int ret; @@ -1533,7 +1533,7 @@ static int cpufreq_remove_dev(struct device *dev, struct subsys_interface *sif) struct cpumask mask; if (!policy) - return 0; + return; cpumask_copy(&mask, policy->related_cpus); cpumask_clear_cpu(cpu, &mask); @@ -1544,19 +1544,17 @@ static int cpufreq_remove_dev(struct device *dev, struct subsys_interface *sif) */ if (cpumask_intersects(&mask, cpu_present_mask)) { remove_cpu_dev_symlink(policy, cpu); - return 0; + return; } cpufreq_policy_free(policy, true); - return 0; + return; } ret = __cpufreq_remove_dev_prepare(dev, sif); if (!ret) - ret = __cpufreq_remove_dev_finish(dev, sif); - - return ret; + __cpufreq_remove_dev_finish(dev, sif); } static void handle_update(struct work_struct *work) diff --git a/drivers/net/rionet.c b/drivers/net/rionet.c index dac7a0d9bb46..01f08a7751f7 100644 --- a/drivers/net/rionet.c +++ b/drivers/net/rionet.c @@ -396,7 +396,7 @@ static int rionet_close(struct net_device *ndev) return 0; } -static int rionet_remove_dev(struct device *dev, struct subsys_interface *sif) +static void rionet_remove_dev(struct device *dev, struct subsys_interface *sif) { struct rio_dev *rdev = to_rio_dev(dev); unsigned char netid = rdev->net->hport->id; @@ -416,8 +416,6 @@ static int rionet_remove_dev(struct device *dev, struct subsys_interface *sif) } } } - - return 0; } static void rionet_get_drvinfo(struct net_device *ndev, diff --git a/include/linux/device.h b/include/linux/device.h index a2b4ea70a946..1225f98e9240 100644 --- a/include/linux/device.h +++ b/include/linux/device.h @@ -341,7 +341,7 @@ struct subsys_interface { struct bus_type *subsys; struct list_head node; int (*add_dev)(struct device *dev, struct subsys_interface *sif); - int (*remove_dev)(struct device *dev, struct subsys_interface *sif); + void (*remove_dev)(struct device *dev, struct subsys_interface *sif); }; int subsys_interface_register(struct subsys_interface *sif); -- cgit v1.2.3-70-g09d2 From 50c7cd2bd3786258606c6c7c8356064c08ab2383 Mon Sep 17 00:00:00 2001 From: Maxime Ripard Date: Mon, 6 Jul 2015 12:19:23 +0200 Subject: dmaengine: Add scatter-gathered memset The current API allows the driver to accelerate memset by using the DMA controller. However, it does so over a contiguous memory area, which might proves inefficient when you have to do it over a non-contiguous yet repititive pattern, since you have to create a number of descriptors and then submit each other. Add a memset operation going over a scatter list to handle such cases in a single call. Signed-off-by: Maxime Ripard Acked-by: Ludovic Desroches Signed-off-by: Vinod Koul --- include/linux/dmaengine.h | 5 +++++ 1 file changed, 5 insertions(+) (limited to 'include/linux') diff --git a/include/linux/dmaengine.h b/include/linux/dmaengine.h index 03ed832adbc2..8ad9a4e839f6 100644 --- a/include/linux/dmaengine.h +++ b/include/linux/dmaengine.h @@ -66,6 +66,7 @@ enum dma_transaction_type { DMA_XOR_VAL, DMA_PQ_VAL, DMA_MEMSET, + DMA_MEMSET_SG, DMA_INTERRUPT, DMA_SG, DMA_PRIVATE, @@ -630,6 +631,7 @@ enum dmaengine_alignment { * @device_prep_dma_pq: prepares a pq operation * @device_prep_dma_pq_val: prepares a pqzero_sum operation * @device_prep_dma_memset: prepares a memset operation + * @device_prep_dma_memset_sg: prepares a memset operation over a scatter list * @device_prep_dma_interrupt: prepares an end of chain interrupt operation * @device_prep_slave_sg: prepares a slave dma operation * @device_prep_dma_cyclic: prepare a cyclic dma operation suitable for audio. @@ -696,6 +698,9 @@ struct dma_device { struct dma_async_tx_descriptor *(*device_prep_dma_memset)( struct dma_chan *chan, dma_addr_t dest, int value, size_t len, unsigned long flags); + struct dma_async_tx_descriptor *(*device_prep_dma_memset_sg)( + struct dma_chan *chan, struct scatterlist *sg, + unsigned int nents, int value, unsigned long flags); struct dma_async_tx_descriptor *(*device_prep_dma_interrupt)( struct dma_chan *chan, unsigned long flags); struct dma_async_tx_descriptor *(*device_prep_dma_sg)( -- cgit v1.2.3-70-g09d2 From 596c154d62330ea0bb4e3c3e50afa3682e50b617 Mon Sep 17 00:00:00 2001 From: Robert Baldyga Date: Thu, 6 Aug 2015 14:11:10 +0200 Subject: usb: gadget: add 'ep_match' callback to usb_gadget_ops Add callback that is called by epautoconf to allow UDC driver match the best endpoint for specific descriptor. It's intended to supply mechanism which allows to get rid of chip-specific endpoint matching code from epautoconf. If gadget has set 'ep_match' callback we prefer to call it first, and if it fails to find matching endpoint, then we try to use default matching algorithm. Signed-off-by: Robert Baldyga Signed-off-by: Felipe Balbi --- drivers/usb/gadget/epautoconf.c | 6 ++++++ include/linux/usb/gadget.h | 3 +++ 2 files changed, 9 insertions(+) (limited to 'include/linux') diff --git a/drivers/usb/gadget/epautoconf.c b/drivers/usb/gadget/epautoconf.c index 95e12759af4d..f000c73319f4 100644 --- a/drivers/usb/gadget/epautoconf.c +++ b/drivers/usb/gadget/epautoconf.c @@ -165,6 +165,12 @@ struct usb_ep *usb_ep_autoconfig_ss( type = desc->bmAttributes & USB_ENDPOINT_XFERTYPE_MASK; + if (gadget->ops->match_ep) { + ep = gadget->ops->match_ep(gadget, desc, ep_comp); + if (ep) + goto found_ep; + } + /* First, apply chip-specific "best usage" knowledge. * This might make a good usb_gadget_ops hook ... */ diff --git a/include/linux/usb/gadget.h b/include/linux/usb/gadget.h index 82b5bcbd2c98..303214bb2f8b 100644 --- a/include/linux/usb/gadget.h +++ b/include/linux/usb/gadget.h @@ -534,6 +534,9 @@ struct usb_gadget_ops { int (*udc_start)(struct usb_gadget *, struct usb_gadget_driver *); int (*udc_stop)(struct usb_gadget *); + struct usb_ep *(*match_ep)(struct usb_gadget *, + struct usb_endpoint_descriptor *, + struct usb_ss_ep_comp_descriptor *); }; /** -- cgit v1.2.3-70-g09d2 From 4278c687f697b651ab0c771114564da5ed006f22 Mon Sep 17 00:00:00 2001 From: Robert Baldyga Date: Thu, 6 Aug 2015 14:11:11 +0200 Subject: usb: gadget: move ep_matches() from epautoconf to udc-core Move ep_matches() function to udc-core and rename it to usb_gadget_ep_match_desc(). This function can be used by UDC drivers in 'match_ep' callback to avoid writing lots of repetitive code. Replace all calls of ep_matches() with usb_gadget_ep_match_desc(). Signed-off-by: Robert Baldyga Signed-off-by: Felipe Balbi --- drivers/usb/gadget/epautoconf.c | 95 +++++---------------------------------- drivers/usb/gadget/udc/udc-core.c | 69 ++++++++++++++++++++++++++++ include/linux/usb/gadget.h | 8 ++++ 3 files changed, 88 insertions(+), 84 deletions(-) (limited to 'include/linux') diff --git a/drivers/usb/gadget/epautoconf.c b/drivers/usb/gadget/epautoconf.c index f000c73319f4..d49af4fc8667 100644 --- a/drivers/usb/gadget/epautoconf.c +++ b/drivers/usb/gadget/epautoconf.c @@ -22,82 +22,6 @@ #include "gadget_chips.h" -static int -ep_matches ( - struct usb_gadget *gadget, - struct usb_ep *ep, - struct usb_endpoint_descriptor *desc, - struct usb_ss_ep_comp_descriptor *ep_comp -) -{ - u8 type; - u16 max; - int num_req_streams = 0; - - /* endpoint already claimed? */ - if (ep->claimed) - return 0; - - type = usb_endpoint_type(desc); - max = 0x7ff & usb_endpoint_maxp(desc); - - if (usb_endpoint_dir_in(desc) && !ep->caps.dir_in) - return 0; - if (usb_endpoint_dir_out(desc) && !ep->caps.dir_out) - return 0; - - if (max > ep->maxpacket_limit) - return 0; - - /* "high bandwidth" works only at high speed */ - if (!gadget_is_dualspeed(gadget) && usb_endpoint_maxp(desc) & (3<<11)) - return 0; - - switch (type) { - case USB_ENDPOINT_XFER_CONTROL: - /* only support ep0 for portable CONTROL traffic */ - return 0; - case USB_ENDPOINT_XFER_ISOC: - if (!ep->caps.type_iso) - return 0; - /* ISO: limit 1023 bytes full speed, - * 1024 high/super speed - */ - if (!gadget_is_dualspeed(gadget) && max > 1023) - return 0; - break; - case USB_ENDPOINT_XFER_BULK: - if (!ep->caps.type_bulk) - return 0; - if (ep_comp && gadget_is_superspeed(gadget)) { - /* Get the number of required streams from the - * EP companion descriptor and see if the EP - * matches it - */ - num_req_streams = ep_comp->bmAttributes & 0x1f; - if (num_req_streams > ep->max_streams) - return 0; - } - break; - case USB_ENDPOINT_XFER_INT: - /* Bulk endpoints handle interrupt transfers, - * except the toggle-quirky iso-synch kind - */ - if (!ep->caps.type_int && !ep->caps.type_bulk) - return 0; - /* INT: limit 64 bytes full speed, - * 1024 high/super speed - */ - if (!gadget_is_dualspeed(gadget) && max > 64) - return 0; - break; - } - - /* MATCH!! */ - - return 1; -} - static struct usb_ep * find_ep (struct usb_gadget *gadget, const char *name) { @@ -180,10 +104,12 @@ struct usb_ep *usb_ep_autoconfig_ss( if (type == USB_ENDPOINT_XFER_INT) { /* ep-e, ep-f are PIO with only 64 byte fifos */ ep = find_ep(gadget, "ep-e"); - if (ep && ep_matches(gadget, ep, desc, ep_comp)) + if (ep && usb_gadget_ep_match_desc(gadget, + ep, desc, ep_comp)) goto found_ep; ep = find_ep(gadget, "ep-f"); - if (ep && ep_matches(gadget, ep, desc, ep_comp)) + if (ep && usb_gadget_ep_match_desc(gadget, + ep, desc, ep_comp)) goto found_ep; } @@ -191,20 +117,21 @@ struct usb_ep *usb_ep_autoconfig_ss( snprintf(name, sizeof(name), "ep%d%s", usb_endpoint_num(desc), usb_endpoint_dir_in(desc) ? "in" : "out"); ep = find_ep(gadget, name); - if (ep && ep_matches(gadget, ep, desc, ep_comp)) + if (ep && usb_gadget_ep_match_desc(gadget, ep, desc, ep_comp)) goto found_ep; } else if (gadget_is_goku (gadget)) { if (USB_ENDPOINT_XFER_INT == type) { /* single buffering is enough */ ep = find_ep(gadget, "ep3-bulk"); - if (ep && ep_matches(gadget, ep, desc, ep_comp)) + if (ep && usb_gadget_ep_match_desc(gadget, + ep, desc, ep_comp)) goto found_ep; } else if (USB_ENDPOINT_XFER_BULK == type && (USB_DIR_IN & desc->bEndpointAddress)) { /* DMA may be available */ ep = find_ep(gadget, "ep2-bulk"); - if (ep && ep_matches(gadget, ep, desc, - ep_comp)) + if (ep && usb_gadget_ep_match_desc(gadget, + ep, desc, ep_comp)) goto found_ep; } @@ -223,14 +150,14 @@ struct usb_ep *usb_ep_autoconfig_ss( ep = find_ep(gadget, "ep2out"); } else ep = NULL; - if (ep && ep_matches(gadget, ep, desc, ep_comp)) + if (ep && usb_gadget_ep_match_desc(gadget, ep, desc, ep_comp)) goto found_ep; #endif } /* Second, look at endpoints until an unclaimed one looks usable */ list_for_each_entry (ep, &gadget->ep_list, ep_list) { - if (ep_matches(gadget, ep, desc, ep_comp)) + if (usb_gadget_ep_match_desc(gadget, ep, desc, ep_comp)) goto found_ep; } diff --git a/drivers/usb/gadget/udc/udc-core.c b/drivers/usb/gadget/udc/udc-core.c index 362ee8af5fce..b6427d1e9a6c 100644 --- a/drivers/usb/gadget/udc/udc-core.c +++ b/drivers/usb/gadget/udc/udc-core.c @@ -131,6 +131,75 @@ EXPORT_SYMBOL_GPL(usb_gadget_giveback_request); /* ------------------------------------------------------------------------- */ +int usb_gadget_ep_match_desc(struct usb_gadget *gadget, + struct usb_ep *ep, struct usb_endpoint_descriptor *desc, + struct usb_ss_ep_comp_descriptor *ep_comp) +{ + u8 type; + u16 max; + int num_req_streams = 0; + + /* endpoint already claimed? */ + if (ep->claimed) + return 0; + + type = usb_endpoint_type(desc); + max = 0x7ff & usb_endpoint_maxp(desc); + + if (usb_endpoint_dir_in(desc) && !ep->caps.dir_in) + return 0; + if (usb_endpoint_dir_out(desc) && !ep->caps.dir_out) + return 0; + + if (max > ep->maxpacket_limit) + return 0; + + /* "high bandwidth" works only at high speed */ + if (!gadget_is_dualspeed(gadget) && usb_endpoint_maxp(desc) & (3<<11)) + return 0; + + switch (type) { + case USB_ENDPOINT_XFER_CONTROL: + /* only support ep0 for portable CONTROL traffic */ + return 0; + case USB_ENDPOINT_XFER_ISOC: + if (!ep->caps.type_iso) + return 0; + /* ISO: limit 1023 bytes full speed, 1024 high/super speed */ + if (!gadget_is_dualspeed(gadget) && max > 1023) + return 0; + break; + case USB_ENDPOINT_XFER_BULK: + if (!ep->caps.type_bulk) + return 0; + if (ep_comp && gadget_is_superspeed(gadget)) { + /* Get the number of required streams from the + * EP companion descriptor and see if the EP + * matches it + */ + num_req_streams = ep_comp->bmAttributes & 0x1f; + if (num_req_streams > ep->max_streams) + return 0; + } + break; + case USB_ENDPOINT_XFER_INT: + /* Bulk endpoints handle interrupt transfers, + * except the toggle-quirky iso-synch kind + */ + if (!ep->caps.type_int && !ep->caps.type_bulk) + return 0; + /* INT: limit 64 bytes full speed, 1024 high/super speed */ + if (!gadget_is_dualspeed(gadget) && max > 64) + return 0; + break; + } + + return 1; +} +EXPORT_SYMBOL_GPL(usb_gadget_ep_match_desc); + +/* ------------------------------------------------------------------------- */ + static void usb_gadget_state_work(struct work_struct *work) { struct usb_gadget *gadget = work_to_gadget(work); diff --git a/include/linux/usb/gadget.h b/include/linux/usb/gadget.h index 303214bb2f8b..e04fd6381ae8 100644 --- a/include/linux/usb/gadget.h +++ b/include/linux/usb/gadget.h @@ -1204,6 +1204,14 @@ extern void usb_gadget_giveback_request(struct usb_ep *ep, /*-------------------------------------------------------------------------*/ +/* utility to check if endpoint caps match descriptor needs */ + +extern int usb_gadget_ep_match_desc(struct usb_gadget *gadget, + struct usb_ep *ep, struct usb_endpoint_descriptor *desc, + struct usb_ss_ep_comp_descriptor *ep_comp); + +/*-------------------------------------------------------------------------*/ + /* utility to update vbus status for udc core, it may be scheduled */ extern void usb_udc_vbus_handler(struct usb_gadget *gadget, bool status); -- cgit v1.2.3-70-g09d2 From b0aea0037c8896b8e69cad3f6e828782789c1edf Mon Sep 17 00:00:00 2001 From: Robert Baldyga Date: Thu, 6 Aug 2015 14:11:12 +0200 Subject: usb: gadget: move find_ep() from epautoconf to udc-core Move find_ep() to udc-core and rename it to gadget_find_ep_by_name(). It can be used in UDC drivers, especially in 'match_ep' callback after moving chip-specific endpoint matching logic from epautoconf to UDC drivers. Replace all calls of find_ep() function with gadget_find_ep_by_name(). Signed-off-by: Robert Baldyga Signed-off-by: Felipe Balbi --- drivers/usb/gadget/epautoconf.c | 30 +++++++++--------------------- drivers/usb/gadget/udc/udc-core.c | 21 +++++++++++++++++++++ include/linux/usb/gadget.h | 8 +++++++- 3 files changed, 37 insertions(+), 22 deletions(-) (limited to 'include/linux') diff --git a/drivers/usb/gadget/epautoconf.c b/drivers/usb/gadget/epautoconf.c index d49af4fc8667..a39ca033b9ce 100644 --- a/drivers/usb/gadget/epautoconf.c +++ b/drivers/usb/gadget/epautoconf.c @@ -22,18 +22,6 @@ #include "gadget_chips.h" -static struct usb_ep * -find_ep (struct usb_gadget *gadget, const char *name) -{ - struct usb_ep *ep; - - list_for_each_entry (ep, &gadget->ep_list, ep_list) { - if (0 == strcmp (ep->name, name)) - return ep; - } - return NULL; -} - /** * usb_ep_autoconfig_ss() - choose an endpoint matching the ep * descriptor and ep companion descriptor @@ -103,11 +91,11 @@ struct usb_ep *usb_ep_autoconfig_ss( if (type == USB_ENDPOINT_XFER_INT) { /* ep-e, ep-f are PIO with only 64 byte fifos */ - ep = find_ep(gadget, "ep-e"); + ep = gadget_find_ep_by_name(gadget, "ep-e"); if (ep && usb_gadget_ep_match_desc(gadget, ep, desc, ep_comp)) goto found_ep; - ep = find_ep(gadget, "ep-f"); + ep = gadget_find_ep_by_name(gadget, "ep-f"); if (ep && usb_gadget_ep_match_desc(gadget, ep, desc, ep_comp)) goto found_ep; @@ -116,20 +104,20 @@ struct usb_ep *usb_ep_autoconfig_ss( /* USB3380: use same address for usb and hardware endpoints */ snprintf(name, sizeof(name), "ep%d%s", usb_endpoint_num(desc), usb_endpoint_dir_in(desc) ? "in" : "out"); - ep = find_ep(gadget, name); + ep = gadget_find_ep_by_name(gadget, name); if (ep && usb_gadget_ep_match_desc(gadget, ep, desc, ep_comp)) goto found_ep; } else if (gadget_is_goku (gadget)) { if (USB_ENDPOINT_XFER_INT == type) { /* single buffering is enough */ - ep = find_ep(gadget, "ep3-bulk"); + ep = gadget_find_ep_by_name(gadget, "ep3-bulk"); if (ep && usb_gadget_ep_match_desc(gadget, ep, desc, ep_comp)) goto found_ep; } else if (USB_ENDPOINT_XFER_BULK == type && (USB_DIR_IN & desc->bEndpointAddress)) { /* DMA may be available */ - ep = find_ep(gadget, "ep2-bulk"); + ep = gadget_find_ep_by_name(gadget, "ep2-bulk"); if (ep && usb_gadget_ep_match_desc(gadget, ep, desc, ep_comp)) goto found_ep; @@ -140,14 +128,14 @@ struct usb_ep *usb_ep_autoconfig_ss( if ((USB_ENDPOINT_XFER_BULK == type) || (USB_ENDPOINT_XFER_ISOC == type)) { if (USB_DIR_IN & desc->bEndpointAddress) - ep = find_ep (gadget, "ep5in"); + ep = gadget_find_ep_by_name(gadget, "ep5in"); else - ep = find_ep (gadget, "ep6out"); + ep = gadget_find_ep_by_name(gadget, "ep6out"); } else if (USB_ENDPOINT_XFER_INT == type) { if (USB_DIR_IN & desc->bEndpointAddress) - ep = find_ep(gadget, "ep1in"); + ep = gadget_find_ep_by_name(gadget, "ep1in"); else - ep = find_ep(gadget, "ep2out"); + ep = gadget_find_ep_by_name(gadget, "ep2out"); } else ep = NULL; if (ep && usb_gadget_ep_match_desc(gadget, ep, desc, ep_comp)) diff --git a/drivers/usb/gadget/udc/udc-core.c b/drivers/usb/gadget/udc/udc-core.c index b6427d1e9a6c..3c954b5fe4f3 100644 --- a/drivers/usb/gadget/udc/udc-core.c +++ b/drivers/usb/gadget/udc/udc-core.c @@ -131,6 +131,27 @@ EXPORT_SYMBOL_GPL(usb_gadget_giveback_request); /* ------------------------------------------------------------------------- */ +/** + * gadget_find_ep_by_name - returns ep whose name is the same as sting passed + * in second parameter or NULL if searched endpoint not found + * @g: controller to check for quirk + * @name: name of searched endpoint + */ +struct usb_ep *gadget_find_ep_by_name(struct usb_gadget *g, const char *name) +{ + struct usb_ep *ep; + + gadget_for_each_ep(ep, g) { + if (!strcmp(ep->name, name)) + return ep; + } + + return NULL; +} +EXPORT_SYMBOL_GPL(gadget_find_ep_by_name); + +/* ------------------------------------------------------------------------- */ + int usb_gadget_ep_match_desc(struct usb_gadget *gadget, struct usb_ep *ep, struct usb_endpoint_descriptor *desc, struct usb_ss_ep_comp_descriptor *ep_comp) diff --git a/include/linux/usb/gadget.h b/include/linux/usb/gadget.h index e04fd6381ae8..c14a69b36d27 100644 --- a/include/linux/usb/gadget.h +++ b/include/linux/usb/gadget.h @@ -639,7 +639,6 @@ static inline struct usb_gadget *dev_to_usb_gadget(struct device *dev) #define gadget_for_each_ep(tmp, gadget) \ list_for_each_entry(tmp, &(gadget)->ep_list, ep_list) - /** * usb_ep_align_maybe - returns @len aligned to ep's maxpacketsize if gadget * requires quirk_ep_out_aligned_size, otherwise reguens len. @@ -1204,6 +1203,13 @@ extern void usb_gadget_giveback_request(struct usb_ep *ep, /*-------------------------------------------------------------------------*/ +/* utility to find endpoint by name */ + +extern struct usb_ep *gadget_find_ep_by_name(struct usb_gadget *g, + const char *name); + +/*-------------------------------------------------------------------------*/ + /* utility to check if endpoint caps match descriptor needs */ extern int usb_gadget_ep_match_desc(struct usb_gadget *gadget, -- cgit v1.2.3-70-g09d2 From 098d2164e3441c252eaa28906d45e16b7bf1bd2b Mon Sep 17 00:00:00 2001 From: Wang Nan Date: Wed, 1 Jul 2015 02:13:49 +0000 Subject: bpf: Use correct #ifdef controller for trace_call_bpf() Commit e1abf2cc8d5d80b41c4419368ec743ccadbb131e ("bpf: Fix the build on BPF_SYSCALL=y && !CONFIG_TRACING kernels, make it more configurable") updated the building condition of bpf_trace.o from CONFIG_BPF_SYSCALL to CONFIG_BPF_EVENTS, but the corresponding #ifdef controller in trace_events.h for trace_call_bpf() was not changed. Which, in theory, is incorrect. With current Kconfigs, we can create a .config with CONFIG_BPF_SYSCALL=y and CONFIG_BPF_EVENTS=n by unselecting CONFIG_KPROBE_EVENT and selecting CONFIG_BPF_SYSCALL. With these options, trace_call_bpf() will be defined as an extern function, but if anyone calls it a symbol missing error will be triggered since bpf_trace.o was not built. This patch changes the #ifdef controller for trace_call_bpf() from CONFIG_BPF_SYSCALL to CONFIG_BPF_EVENTS. I'll show its correctness: Before this patch: BPF_SYSCALL BPF_EVENTS trace_call_bpf bpf_trace.o y y normal compiled n n inline not compiled y n normal not compiled (incorrect) n y impossible (BPF_EVENTS depends on BPF_SYSCALL) After this patch: BPF_SYSCALL BPF_EVENTS trace_call_bpf bpf_trace.o y y normal compiled n n inline not compiled y n inline not compiled (fixed) n y impossible (BPF_EVENTS depends on BPF_SYSCALL) So this patch doesn't break anything. QED. Signed-off-by: Wang Nan Cc: Alexei Starovoitov Cc: Brendan Gregg Cc: Daniel Borkmann Cc: David Ahern Cc: He Kuang Cc: Jiri Olsa Cc: Kaixu Xia Cc: Masami Hiramatsu Cc: Namhyung Kim Cc: Peter Zijlstra Cc: Zefan Li Cc: pi3orama@163.com Link: http://lkml.kernel.org/r/1435716878-189507-2-git-send-email-wangnan0@huawei.com Signed-off-by: Arnaldo Carvalho de Melo --- include/linux/trace_events.h | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) (limited to 'include/linux') diff --git a/include/linux/trace_events.h b/include/linux/trace_events.h index 1063c850dbab..180dbf8720f9 100644 --- a/include/linux/trace_events.h +++ b/include/linux/trace_events.h @@ -542,7 +542,7 @@ event_trigger_unlock_commit_regs(struct trace_event_file *file, event_triggers_post_call(file, tt); } -#ifdef CONFIG_BPF_SYSCALL +#ifdef CONFIG_BPF_EVENTS unsigned int trace_call_bpf(struct bpf_prog *prog, void *ctx); #else static inline unsigned int trace_call_bpf(struct bpf_prog *prog, void *ctx) -- cgit v1.2.3-70-g09d2 From 04a22fae4cbc1f7d3f7471e9b36359f98bd3f043 Mon Sep 17 00:00:00 2001 From: Wang Nan Date: Wed, 1 Jul 2015 02:13:50 +0000 Subject: tracing, perf: Implement BPF programs attached to uprobes By copying BPF related operation to uprobe processing path, this patch allow users attach BPF programs to uprobes like what they are already doing on kprobes. After this patch, users are allowed to use PERF_EVENT_IOC_SET_BPF on a uprobe perf event. Which make it possible to profile user space programs and kernel events together using BPF. Because of this patch, CONFIG_BPF_EVENTS should be selected by CONFIG_UPROBE_EVENT to ensure trace_call_bpf() is compiled even if KPROBE_EVENT is not set. Signed-off-by: Wang Nan Acked-by: Alexei Starovoitov Cc: Brendan Gregg Cc: Daniel Borkmann Cc: David Ahern Cc: He Kuang Cc: Jiri Olsa Cc: Kaixu Xia Cc: Masami Hiramatsu Cc: Namhyung Kim Cc: Peter Zijlstra Cc: Zefan Li Cc: pi3orama@163.com Link: http://lkml.kernel.org/r/1435716878-189507-3-git-send-email-wangnan0@huawei.com Signed-off-by: Arnaldo Carvalho de Melo --- include/linux/trace_events.h | 5 +++++ kernel/events/core.c | 4 ++-- kernel/trace/Kconfig | 2 +- kernel/trace/trace_uprobe.c | 5 +++++ 4 files changed, 13 insertions(+), 3 deletions(-) (limited to 'include/linux') diff --git a/include/linux/trace_events.h b/include/linux/trace_events.h index 180dbf8720f9..ed27917cabc9 100644 --- a/include/linux/trace_events.h +++ b/include/linux/trace_events.h @@ -243,6 +243,7 @@ enum { TRACE_EVENT_FL_USE_CALL_FILTER_BIT, TRACE_EVENT_FL_TRACEPOINT_BIT, TRACE_EVENT_FL_KPROBE_BIT, + TRACE_EVENT_FL_UPROBE_BIT, }; /* @@ -257,6 +258,7 @@ enum { * USE_CALL_FILTER - For trace internal events, don't use file filter * TRACEPOINT - Event is a tracepoint * KPROBE - Event is a kprobe + * UPROBE - Event is a uprobe */ enum { TRACE_EVENT_FL_FILTERED = (1 << TRACE_EVENT_FL_FILTERED_BIT), @@ -267,8 +269,11 @@ enum { TRACE_EVENT_FL_USE_CALL_FILTER = (1 << TRACE_EVENT_FL_USE_CALL_FILTER_BIT), TRACE_EVENT_FL_TRACEPOINT = (1 << TRACE_EVENT_FL_TRACEPOINT_BIT), TRACE_EVENT_FL_KPROBE = (1 << TRACE_EVENT_FL_KPROBE_BIT), + TRACE_EVENT_FL_UPROBE = (1 << TRACE_EVENT_FL_UPROBE_BIT), }; +#define TRACE_EVENT_FL_UKPROBE (TRACE_EVENT_FL_KPROBE | TRACE_EVENT_FL_UPROBE) + struct trace_event_call { struct list_head list; struct trace_event_class *class; diff --git a/kernel/events/core.c b/kernel/events/core.c index bdea12924b11..77f9e5d0e2d1 100644 --- a/kernel/events/core.c +++ b/kernel/events/core.c @@ -6846,8 +6846,8 @@ static int perf_event_set_bpf_prog(struct perf_event *event, u32 prog_fd) if (event->tp_event->prog) return -EEXIST; - if (!(event->tp_event->flags & TRACE_EVENT_FL_KPROBE)) - /* bpf programs can only be attached to kprobes */ + if (!(event->tp_event->flags & TRACE_EVENT_FL_UKPROBE)) + /* bpf programs can only be attached to u/kprobes */ return -EINVAL; prog = bpf_prog_get(prog_fd); diff --git a/kernel/trace/Kconfig b/kernel/trace/Kconfig index 3b9a48ae153a..1153c43428f3 100644 --- a/kernel/trace/Kconfig +++ b/kernel/trace/Kconfig @@ -434,7 +434,7 @@ config UPROBE_EVENT config BPF_EVENTS depends on BPF_SYSCALL - depends on KPROBE_EVENT + depends on KPROBE_EVENT || UPROBE_EVENT bool default y help diff --git a/kernel/trace/trace_uprobe.c b/kernel/trace/trace_uprobe.c index aa1ea7b36fa8..f97479f1ce35 100644 --- a/kernel/trace/trace_uprobe.c +++ b/kernel/trace/trace_uprobe.c @@ -1095,11 +1095,15 @@ static void __uprobe_perf_func(struct trace_uprobe *tu, { struct trace_event_call *call = &tu->tp.call; struct uprobe_trace_entry_head *entry; + struct bpf_prog *prog = call->prog; struct hlist_head *head; void *data; int size, esize; int rctx; + if (prog && !trace_call_bpf(prog, regs)) + return; + esize = SIZEOF_TRACE_ENTRY(is_ret_probe(tu)); size = esize + tu->tp.size + dsize; @@ -1289,6 +1293,7 @@ static int register_uprobe_event(struct trace_uprobe *tu) return -ENODEV; } + call->flags = TRACE_EVENT_FL_UPROBE; call->class->reg = trace_uprobe_register; call->data = tu; ret = trace_add_event_call(call); -- cgit v1.2.3-70-g09d2 From 84cb777e67814f2e06a99ff228f743409e9617e9 Mon Sep 17 00:00:00 2001 From: Richard Guy Briggs Date: Wed, 5 Aug 2015 23:48:20 -0400 Subject: audit: use macros for unset inode and device values Clean up a number of places were casted magic numbers are used to represent unset inode and device numbers in preparation for the audit by executable path patch set. Signed-off-by: Richard Guy Briggs [PM: enclosed the _UNSET macros in parentheses for ./scripts/checkpatch] Signed-off-by: Paul Moore --- include/linux/audit.h | 3 +++ kernel/audit.c | 2 +- kernel/audit_watch.c | 8 ++++---- kernel/auditsc.c | 6 +++--- 4 files changed, 11 insertions(+), 8 deletions(-) (limited to 'include/linux') diff --git a/include/linux/audit.h b/include/linux/audit.h index c2e7e3a83965..759feb0e9d13 100644 --- a/include/linux/audit.h +++ b/include/linux/audit.h @@ -27,6 +27,9 @@ #include #include +#define AUDIT_INO_UNSET ((unsigned long)-1) +#define AUDIT_DEV_UNSET ((dev_t)-1) + struct audit_sig_info { uid_t uid; pid_t pid; diff --git a/kernel/audit.c b/kernel/audit.c index 7497a5a0fac0..060153dc47d4 100644 --- a/kernel/audit.c +++ b/kernel/audit.c @@ -1759,7 +1759,7 @@ void audit_log_name(struct audit_context *context, struct audit_names *n, } else audit_log_format(ab, " name=(null)"); - if (n->ino != (unsigned long)-1) { + if (n->ino != AUDIT_INO_UNSET) { audit_log_format(ab, " inode=%lu" " dev=%02x:%02x mode=%#ho" " ouid=%u ogid=%u rdev=%02x:%02x", diff --git a/kernel/audit_watch.c b/kernel/audit_watch.c index b81ad5bc7485..645c6884cee5 100644 --- a/kernel/audit_watch.c +++ b/kernel/audit_watch.c @@ -138,7 +138,7 @@ char *audit_watch_path(struct audit_watch *watch) int audit_watch_compare(struct audit_watch *watch, unsigned long ino, dev_t dev) { - return (watch->ino != (unsigned long)-1) && + return (watch->ino != AUDIT_INO_UNSET) && (watch->ino == ino) && (watch->dev == dev); } @@ -179,8 +179,8 @@ static struct audit_watch *audit_init_watch(char *path) INIT_LIST_HEAD(&watch->rules); atomic_set(&watch->count, 1); watch->path = path; - watch->dev = (dev_t)-1; - watch->ino = (unsigned long)-1; + watch->dev = AUDIT_DEV_UNSET; + watch->ino = AUDIT_INO_UNSET; return watch; } @@ -493,7 +493,7 @@ static int audit_watch_handle_event(struct fsnotify_group *group, if (mask & (FS_CREATE|FS_MOVED_TO) && inode) audit_update_watch(parent, dname, inode->i_sb->s_dev, inode->i_ino, 0); else if (mask & (FS_DELETE|FS_MOVED_FROM)) - audit_update_watch(parent, dname, (dev_t)-1, (unsigned long)-1, 1); + audit_update_watch(parent, dname, AUDIT_DEV_UNSET, AUDIT_INO_UNSET, 1); else if (mask & (FS_DELETE_SELF|FS_UNMOUNT|FS_MOVE_SELF)) audit_remove_parent_watches(parent); diff --git a/kernel/auditsc.c b/kernel/auditsc.c index f6bc31e7dca9..ea3fe2b748a8 100644 --- a/kernel/auditsc.c +++ b/kernel/auditsc.c @@ -180,7 +180,7 @@ static int audit_match_filetype(struct audit_context *ctx, int val) return 0; list_for_each_entry(n, &ctx->names_list, list) { - if ((n->ino != -1) && + if ((n->ino != AUDIT_INO_UNSET) && ((n->mode & S_IFMT) == mode)) return 1; } @@ -1681,7 +1681,7 @@ static struct audit_names *audit_alloc_name(struct audit_context *context, aname->should_free = true; } - aname->ino = (unsigned long)-1; + aname->ino = AUDIT_INO_UNSET; aname->type = type; list_add_tail(&aname->list, &context->names_list); @@ -1923,7 +1923,7 @@ void __audit_inode_child(const struct inode *parent, if (inode) audit_copy_inode(found_child, dentry, inode); else - found_child->ino = (unsigned long)-1; + found_child->ino = AUDIT_INO_UNSET; } EXPORT_SYMBOL_GPL(__audit_inode_child); -- cgit v1.2.3-70-g09d2 From 34d99af52ad40bd498ba66970579a5bc1fb1a3bc Mon Sep 17 00:00:00 2001 From: Richard Guy Briggs Date: Wed, 5 Aug 2015 16:29:37 -0400 Subject: audit: implement audit by executable This adds the ability audit the actions of a not-yet-running process. This patch implements the ability to filter on the executable path. Instead of just hard coding the ino and dev of the executable we care about at the moment the rule is inserted into the kernel, use the new audit_fsnotify infrastructure to manage this dynamically. This means that if the filename does not yet exist but the containing directory does, or if the inode in question is unlinked and creat'd (aka updated) the rule will just continue to work. If the containing directory is moved or deleted or the filesystem is unmounted, the rule is deleted automatically. A future enhancement would be to have the rule survive across directory disruptions. This is a heavily modified version of a patch originally submitted by Eric Paris with some ideas from Peter Moody. Cc: Peter Moody Cc: Eric Paris Signed-off-by: Richard Guy Briggs [PM: minor whitespace clean to satisfy ./scripts/checkpatch] Signed-off-by: Paul Moore --- include/linux/audit.h | 1 + include/uapi/linux/audit.h | 5 ++++- kernel/audit.h | 4 ++++ kernel/audit_tree.c | 2 ++ kernel/audit_watch.c | 31 +++++++++++++++++++++++++++ kernel/auditfilter.c | 53 +++++++++++++++++++++++++++++++++++++++++++++- kernel/auditsc.c | 3 +++ 7 files changed, 97 insertions(+), 2 deletions(-) (limited to 'include/linux') diff --git a/include/linux/audit.h b/include/linux/audit.h index 759feb0e9d13..b2abc996c25d 100644 --- a/include/linux/audit.h +++ b/include/linux/audit.h @@ -62,6 +62,7 @@ struct audit_krule { struct audit_field *inode_f; /* quick access to an inode field */ struct audit_watch *watch; /* associated watch */ struct audit_tree *tree; /* associated watched tree */ + struct audit_fsnotify_mark *exe; struct list_head rlist; /* entry in audit_{watch,tree}.rules list */ struct list_head list; /* for AUDIT_LIST* purposes only */ u64 prio; diff --git a/include/uapi/linux/audit.h b/include/uapi/linux/audit.h index d3475e1f15ec..f6ff62c24aba 100644 --- a/include/uapi/linux/audit.h +++ b/include/uapi/linux/audit.h @@ -266,6 +266,7 @@ #define AUDIT_OBJ_UID 109 #define AUDIT_OBJ_GID 110 #define AUDIT_FIELD_COMPARE 111 +#define AUDIT_EXE 112 #define AUDIT_ARG0 200 #define AUDIT_ARG1 (AUDIT_ARG0+1) @@ -324,8 +325,10 @@ enum { #define AUDIT_FEATURE_BITMAP_BACKLOG_LIMIT 0x00000001 #define AUDIT_FEATURE_BITMAP_BACKLOG_WAIT_TIME 0x00000002 +#define AUDIT_FEATURE_BITMAP_EXECUTABLE_PATH 0x00000004 #define AUDIT_FEATURE_BITMAP_ALL (AUDIT_FEATURE_BITMAP_BACKLOG_LIMIT | \ - AUDIT_FEATURE_BITMAP_BACKLOG_WAIT_TIME) + AUDIT_FEATURE_BITMAP_BACKLOG_WAIT_TIME | \ + AUDIT_FEATURE_BITMAP_EXECUTABLE_PATH) /* deprecated: AUDIT_VERSION_* */ #define AUDIT_VERSION_LATEST AUDIT_FEATURE_BITMAP_ALL diff --git a/kernel/audit.h b/kernel/audit.h index 7102d538737b..24ec86145667 100644 --- a/kernel/audit.h +++ b/kernel/audit.h @@ -274,6 +274,8 @@ extern char *audit_mark_path(struct audit_fsnotify_mark *mark); extern void audit_remove_mark(struct audit_fsnotify_mark *audit_mark); extern void audit_remove_mark_rule(struct audit_krule *krule); extern int audit_mark_compare(struct audit_fsnotify_mark *mark, unsigned long ino, dev_t dev); +extern int audit_dupe_exe(struct audit_krule *new, struct audit_krule *old); +extern int audit_exe_compare(struct task_struct *tsk, struct audit_fsnotify_mark *mark); #else #define audit_put_watch(w) {} @@ -289,6 +291,8 @@ extern int audit_mark_compare(struct audit_fsnotify_mark *mark, unsigned long in #define audit_remove_mark(m) #define audit_remove_mark_rule(k) #define audit_mark_compare(m, i, d) 0 +#define audit_exe_compare(t, m) (-EINVAL) +#define audit_dupe_exe(n, o) (-EINVAL) #endif /* CONFIG_AUDIT_WATCH */ #ifdef CONFIG_AUDIT_TREE diff --git a/kernel/audit_tree.c b/kernel/audit_tree.c index 2e0c97427b33..f41722506808 100644 --- a/kernel/audit_tree.c +++ b/kernel/audit_tree.c @@ -478,6 +478,8 @@ static void kill_rules(struct audit_tree *tree) if (rule->tree) { /* not a half-baked one */ audit_tree_log_remove_rule(rule); + if (entry->rule.exe) + audit_remove_mark(entry->rule.exe); rule->tree = NULL; list_del_rcu(&entry->list); list_del(&entry->rule.list); diff --git a/kernel/audit_watch.c b/kernel/audit_watch.c index 645c6884cee5..27ef8dcf7cd8 100644 --- a/kernel/audit_watch.c +++ b/kernel/audit_watch.c @@ -312,6 +312,8 @@ static void audit_update_watch(struct audit_parent *parent, list_replace(&oentry->rule.list, &nentry->rule.list); } + if (oentry->rule.exe) + audit_remove_mark(oentry->rule.exe); audit_watch_log_rule_change(r, owatch, "updated_rules"); @@ -342,6 +344,8 @@ static void audit_remove_parent_watches(struct audit_parent *parent) list_for_each_entry_safe(r, nextr, &w->rules, rlist) { e = container_of(r, struct audit_entry, rule); audit_watch_log_rule_change(r, w, "remove_rule"); + if (e->rule.exe) + audit_remove_mark(e->rule.exe); list_del(&r->rlist); list_del(&r->list); list_del_rcu(&e->list); @@ -514,3 +518,30 @@ static int __init audit_watch_init(void) return 0; } device_initcall(audit_watch_init); + +int audit_dupe_exe(struct audit_krule *new, struct audit_krule *old) +{ + struct audit_fsnotify_mark *audit_mark; + char *pathname; + + pathname = kstrdup(audit_mark_path(old->exe), GFP_KERNEL); + if (!pathname) + return -ENOMEM; + + audit_mark = audit_alloc_mark(new, pathname, strlen(pathname)); + if (IS_ERR(audit_mark)) { + kfree(pathname); + return PTR_ERR(audit_mark); + } + new->exe = audit_mark; + + return 0; +} + +int audit_exe_compare(struct task_struct *tsk, struct audit_fsnotify_mark *mark) +{ + unsigned long ino = tsk->mm->exe_file->f_inode->i_ino; + dev_t dev = tsk->mm->exe_file->f_inode->i_sb->s_dev; + + return audit_mark_compare(mark, ino, dev); +} diff --git a/kernel/auditfilter.c b/kernel/auditfilter.c index b4d8c366ec30..7714d93edb85 100644 --- a/kernel/auditfilter.c +++ b/kernel/auditfilter.c @@ -405,6 +405,12 @@ static int audit_field_valid(struct audit_entry *entry, struct audit_field *f) if (f->val > AUDIT_MAX_FIELD_COMPARE) return -EINVAL; break; + case AUDIT_EXE: + if (f->op != Audit_equal) + return -EINVAL; + if (entry->rule.listnr != AUDIT_FILTER_EXIT) + return -EINVAL; + break; }; return 0; } @@ -419,6 +425,7 @@ static struct audit_entry *audit_data_to_entry(struct audit_rule_data *data, size_t remain = datasz - sizeof(struct audit_rule_data); int i; char *str; + struct audit_fsnotify_mark *audit_mark; entry = audit_to_entry_common(data); if (IS_ERR(entry)) @@ -539,6 +546,24 @@ static struct audit_entry *audit_data_to_entry(struct audit_rule_data *data, entry->rule.buflen += f->val; entry->rule.filterkey = str; break; + case AUDIT_EXE: + if (entry->rule.exe || f->val > PATH_MAX) + goto exit_free; + str = audit_unpack_string(&bufp, &remain, f->val); + if (IS_ERR(str)) { + err = PTR_ERR(str); + goto exit_free; + } + entry->rule.buflen += f->val; + + audit_mark = audit_alloc_mark(&entry->rule, str, f->val); + if (IS_ERR(audit_mark)) { + kfree(str); + err = PTR_ERR(audit_mark); + goto exit_free; + } + entry->rule.exe = audit_mark; + break; } } @@ -551,6 +576,8 @@ exit_nofree: exit_free: if (entry->rule.tree) audit_put_tree(entry->rule.tree); /* that's the temporary one */ + if (entry->rule.exe) + audit_remove_mark(entry->rule.exe); /* that's the template one */ audit_free_rule(entry); return ERR_PTR(err); } @@ -615,6 +642,10 @@ static struct audit_rule_data *audit_krule_to_data(struct audit_krule *krule) data->buflen += data->values[i] = audit_pack_string(&bufp, krule->filterkey); break; + case AUDIT_EXE: + data->buflen += data->values[i] = + audit_pack_string(&bufp, audit_mark_path(krule->exe)); + break; case AUDIT_LOGINUID_SET: if (krule->pflags & AUDIT_LOGINUID_LEGACY && !f->val) { data->fields[i] = AUDIT_LOGINUID; @@ -678,6 +709,12 @@ static int audit_compare_rule(struct audit_krule *a, struct audit_krule *b) if (strcmp(a->filterkey, b->filterkey)) return 1; break; + case AUDIT_EXE: + /* both paths exist based on above type compare */ + if (strcmp(audit_mark_path(a->exe), + audit_mark_path(b->exe))) + return 1; + break; case AUDIT_UID: case AUDIT_EUID: case AUDIT_SUID: @@ -799,8 +836,14 @@ struct audit_entry *audit_dupe_rule(struct audit_krule *old) err = -ENOMEM; else new->filterkey = fk; + break; + case AUDIT_EXE: + err = audit_dupe_exe(new, old); + break; } if (err) { + if (new->exe) + audit_remove_mark(new->exe); audit_free_rule(entry); return ERR_PTR(err); } @@ -963,6 +1006,9 @@ int audit_del_rule(struct audit_entry *entry) if (e->rule.tree) audit_remove_tree_rule(&e->rule); + if (e->rule.exe) + audit_remove_mark_rule(&e->rule); + #ifdef CONFIG_AUDITSYSCALL if (!dont_count) audit_n_rules--; @@ -1067,8 +1113,11 @@ int audit_rule_change(int type, __u32 portid, int seq, void *data, WARN_ON(1); } - if (err || type == AUDIT_DEL_RULE) + if (err || type == AUDIT_DEL_RULE) { + if (entry->rule.exe) + audit_remove_mark(entry->rule.exe); audit_free_rule(entry); + } return err; } @@ -1360,6 +1409,8 @@ static int update_lsm_rule(struct audit_krule *r) return 0; nentry = audit_dupe_rule(r); + if (entry->rule.exe) + audit_remove_mark(entry->rule.exe); if (IS_ERR(nentry)) { /* save the first error encountered for the * return value */ diff --git a/kernel/auditsc.c b/kernel/auditsc.c index ea3fe2b748a8..9b56b7ae053f 100644 --- a/kernel/auditsc.c +++ b/kernel/auditsc.c @@ -466,6 +466,9 @@ static int audit_filter_rules(struct task_struct *tsk, result = audit_comparator(ctx->ppid, f->op, f->val); } break; + case AUDIT_EXE: + result = audit_exe_compare(tsk, rule->exe); + break; case AUDIT_UID: result = audit_uid_comparator(cred->uid, f->op, f->uid); break; -- cgit v1.2.3-70-g09d2 From 3ca9bb33c627f22640cfca97fdf88eec0a120dfd Mon Sep 17 00:00:00 2001 From: Viresh Kumar Date: Wed, 29 Jul 2015 16:23:03 +0530 Subject: PM / OPP: Add clock-latency-ns support With "operating-points-v2" bindings, clock-latency is defined per OPP. Users of this value expect a single value which defines the latency to switch to any clock rate. Find maximum clock-latency-ns from the OPP table to service requests from such users. Reviewed-by: Stephen Boyd Reviewed-by: Bartlomiej Zolnierkiewicz Signed-off-by: Viresh Kumar Signed-off-by: Rafael J. Wysocki --- drivers/base/power/opp.c | 41 +++++++++++++++++++++++++++++++++++++++-- include/linux/pm_opp.h | 6 ++++++ 2 files changed, 45 insertions(+), 2 deletions(-) (limited to 'include/linux') diff --git a/drivers/base/power/opp.c b/drivers/base/power/opp.c index 0e0eff4d9299..8638204c457e 100644 --- a/drivers/base/power/opp.c +++ b/drivers/base/power/opp.c @@ -57,6 +57,8 @@ * @u_volt_min: Minimum voltage in microvolts corresponding to this OPP * @u_volt_max: Maximum voltage in microvolts corresponding to this OPP * @u_amp: Maximum current drawn by the device in microamperes + * @clock_latency_ns: Latency (in nanoseconds) of switching to this OPP's + * frequency from any other OPP's frequency. * @dev_opp: points back to the device_opp struct this opp belongs to * @rcu_head: RCU callback head used for deferred freeing * @np: OPP's device node. @@ -75,6 +77,7 @@ struct dev_pm_opp { unsigned long u_volt_min; unsigned long u_volt_max; unsigned long u_amp; + unsigned long clock_latency_ns; struct device_opp *dev_opp; struct rcu_head rcu_head; @@ -109,6 +112,8 @@ struct device_opp { struct srcu_notifier_head srcu_head; struct rcu_head rcu_head; struct list_head opp_list; + + unsigned long clock_latency_ns_max; }; /* @@ -225,6 +230,32 @@ unsigned long dev_pm_opp_get_freq(struct dev_pm_opp *opp) } EXPORT_SYMBOL_GPL(dev_pm_opp_get_freq); +/** + * dev_pm_opp_get_max_clock_latency() - Get max clock latency in nanoseconds + * @dev: device for which we do this operation + * + * Return: This function returns the max clock latency in nanoseconds. + * + * Locking: This function takes rcu_read_lock(). + */ +unsigned long dev_pm_opp_get_max_clock_latency(struct device *dev) +{ + struct device_opp *dev_opp; + unsigned long clock_latency_ns; + + rcu_read_lock(); + + dev_opp = _find_device_opp(dev); + if (IS_ERR(dev_opp)) + clock_latency_ns = 0; + else + clock_latency_ns = dev_opp->clock_latency_ns_max; + + rcu_read_unlock(); + return clock_latency_ns; +} +EXPORT_SYMBOL_GPL(dev_pm_opp_get_max_clock_latency); + /** * dev_pm_opp_get_opp_count() - Get number of opps available in the opp list * @dev: device for which we do this operation @@ -779,6 +810,8 @@ static int _opp_add_static_v2(struct device *dev, struct device_node *np) new_opp->np = np; new_opp->dynamic = false; new_opp->available = true; + of_property_read_u32(np, "clock-latency-ns", + (u32 *)&new_opp->clock_latency_ns); ret = opp_get_microvolt(new_opp, dev); if (ret) @@ -790,11 +823,15 @@ static int _opp_add_static_v2(struct device *dev, struct device_node *np) if (ret) goto free_opp; + if (new_opp->clock_latency_ns > dev_opp->clock_latency_ns_max) + dev_opp->clock_latency_ns_max = new_opp->clock_latency_ns; + mutex_unlock(&dev_opp_list_lock); - pr_debug("%s: turbo:%d rate:%lu uv:%lu uvmin:%lu uvmax:%lu\n", + pr_debug("%s: turbo:%d rate:%lu uv:%lu uvmin:%lu uvmax:%lu latency:%lu\n", __func__, new_opp->turbo, new_opp->rate, new_opp->u_volt, - new_opp->u_volt_min, new_opp->u_volt_max); + new_opp->u_volt_min, new_opp->u_volt_max, + new_opp->clock_latency_ns); /* * Notify the changes in the availability of the operable diff --git a/include/linux/pm_opp.h b/include/linux/pm_opp.h index cec2d4540914..20324b579adc 100644 --- a/include/linux/pm_opp.h +++ b/include/linux/pm_opp.h @@ -31,6 +31,7 @@ unsigned long dev_pm_opp_get_voltage(struct dev_pm_opp *opp); unsigned long dev_pm_opp_get_freq(struct dev_pm_opp *opp); int dev_pm_opp_get_opp_count(struct device *dev); +unsigned long dev_pm_opp_get_max_clock_latency(struct device *dev); struct dev_pm_opp *dev_pm_opp_find_freq_exact(struct device *dev, unsigned long freq, @@ -67,6 +68,11 @@ static inline int dev_pm_opp_get_opp_count(struct device *dev) return 0; } +static inline unsigned long dev_pm_opp_get_max_clock_latency(struct device *dev) +{ + return 0; +} + static inline struct dev_pm_opp *dev_pm_opp_find_freq_exact(struct device *dev, unsigned long freq, bool available) { -- cgit v1.2.3-70-g09d2 From 8d4d4e98acd68c31435ebb7beea591dbf60b9eb2 Mon Sep 17 00:00:00 2001 From: Viresh Kumar Date: Fri, 12 Jun 2015 17:10:38 +0530 Subject: PM / OPP: Add helpers for initializing CPU OPPs With "operating-points-v2" its possible to tell which devices share OPPs. We already have infrastructure to decode that information. This patch adds following APIs: - of_get_cpus_sharing_opps: Returns cpumask of CPUs sharing OPPs (only valid with v2 bindings). - of_cpumask_init_opp_table: Initializes OPPs for all CPUs present in cpumask. - of_cpumask_free_opp_table: Frees OPPs for all CPUs present in cpumask. - set_cpus_sharing_opps: Sets which CPUs share OPPs (only valid with old OPP bindings, as this information isn't present in DT). Reviewed-by: Stephen Boyd Reviewed-by: Bartlomiej Zolnierkiewicz Signed-off-by: Viresh Kumar Signed-off-by: Rafael J. Wysocki --- drivers/base/power/opp.c | 175 +++++++++++++++++++++++++++++++++++++++++++++++ include/linux/pm_opp.h | 23 +++++++ 2 files changed, 198 insertions(+) (limited to 'include/linux') diff --git a/drivers/base/power/opp.c b/drivers/base/power/opp.c index 0ebcea49145a..663aae1c9834 100644 --- a/drivers/base/power/opp.c +++ b/drivers/base/power/opp.c @@ -11,6 +11,7 @@ * published by the Free Software Foundation. */ +#include #include #include #include @@ -1195,6 +1196,26 @@ unlock: } EXPORT_SYMBOL_GPL(of_free_opp_table); +void of_cpumask_free_opp_table(cpumask_var_t cpumask) +{ + struct device *cpu_dev; + int cpu; + + WARN_ON(cpumask_empty(cpumask)); + + for_each_cpu(cpu, cpumask) { + cpu_dev = get_cpu_device(cpu); + if (!cpu_dev) { + pr_err("%s: failed to get cpu%d device\n", __func__, + cpu); + continue; + } + + of_free_opp_table(cpu_dev); + } +} +EXPORT_SYMBOL_GPL(of_cpumask_free_opp_table); + /* Returns opp descriptor node from its phandle. Caller must do of_node_put() */ static struct device_node * _of_get_opp_desc_node_from_prop(struct device *dev, const struct property *prop) @@ -1211,6 +1232,31 @@ _of_get_opp_desc_node_from_prop(struct device *dev, const struct property *prop) return opp_np; } +/* Returns opp descriptor node for a device. Caller must do of_node_put() */ +static struct device_node *_of_get_opp_desc_node(struct device *dev) +{ + const struct property *prop; + + prop = of_find_property(dev->of_node, "operating-points-v2", NULL); + if (!prop) + return ERR_PTR(-ENODEV); + if (!prop->value) + return ERR_PTR(-ENODATA); + + /* + * TODO: Support for multiple OPP tables. + * + * There should be only ONE phandle present in "operating-points-v2" + * property. + */ + if (prop->length != sizeof(__be32)) { + dev_err(dev, "%s: Invalid opp desc phandle\n", __func__); + return ERR_PTR(-EINVAL); + } + + return _of_get_opp_desc_node_from_prop(dev, prop); +} + /* Initializes OPP tables based on new bindings */ static int _of_init_opp_table_v2(struct device *dev, const struct property *prop) @@ -1351,4 +1397,133 @@ int of_init_opp_table(struct device *dev) return _of_init_opp_table_v2(dev, prop); } EXPORT_SYMBOL_GPL(of_init_opp_table); + +int of_cpumask_init_opp_table(cpumask_var_t cpumask) +{ + struct device *cpu_dev; + int cpu, ret = 0; + + WARN_ON(cpumask_empty(cpumask)); + + for_each_cpu(cpu, cpumask) { + cpu_dev = get_cpu_device(cpu); + if (!cpu_dev) { + pr_err("%s: failed to get cpu%d device\n", __func__, + cpu); + continue; + } + + ret = of_init_opp_table(cpu_dev); + if (ret) { + pr_err("%s: couldn't find opp table for cpu:%d, %d\n", + __func__, cpu, ret); + + /* Free all other OPPs */ + of_cpumask_free_opp_table(cpumask); + break; + } + } + + return ret; +} +EXPORT_SYMBOL_GPL(of_cpumask_init_opp_table); + +/* Required only for V1 bindings, as v2 can manage it from DT itself */ +int set_cpus_sharing_opps(struct device *cpu_dev, cpumask_var_t cpumask) +{ + struct device_list_opp *list_dev; + struct device_opp *dev_opp; + struct device *dev; + int cpu, ret = 0; + + rcu_read_lock(); + + dev_opp = _find_device_opp(cpu_dev); + if (IS_ERR(dev_opp)) { + ret = -EINVAL; + goto out_rcu_read_unlock; + } + + for_each_cpu(cpu, cpumask) { + if (cpu == cpu_dev->id) + continue; + + dev = get_cpu_device(cpu); + if (!dev) { + dev_err(cpu_dev, "%s: failed to get cpu%d device\n", + __func__, cpu); + continue; + } + + list_dev = _add_list_dev(dev, dev_opp); + if (!list_dev) { + dev_err(dev, "%s: failed to add list-dev for cpu%d device\n", + __func__, cpu); + continue; + } + } +out_rcu_read_unlock: + rcu_read_unlock(); + + return 0; +} +EXPORT_SYMBOL_GPL(set_cpus_sharing_opps); + +/* + * Works only for OPP v2 bindings. + * + * cpumask should be already set to mask of cpu_dev->id. + * Returns -ENOENT if operating-points-v2 bindings aren't supported. + */ +int of_get_cpus_sharing_opps(struct device *cpu_dev, cpumask_var_t cpumask) +{ + struct device_node *np, *tmp_np; + struct device *tcpu_dev; + int cpu, ret = 0; + + /* Get OPP descriptor node */ + np = _of_get_opp_desc_node(cpu_dev); + if (IS_ERR(np)) { + dev_dbg(cpu_dev, "%s: Couldn't find opp node: %ld\n", __func__, + PTR_ERR(np)); + return -ENOENT; + } + + /* OPPs are shared ? */ + if (!of_property_read_bool(np, "opp-shared")) + goto put_cpu_node; + + for_each_possible_cpu(cpu) { + if (cpu == cpu_dev->id) + continue; + + tcpu_dev = get_cpu_device(cpu); + if (!tcpu_dev) { + dev_err(cpu_dev, "%s: failed to get cpu%d device\n", + __func__, cpu); + ret = -ENODEV; + goto put_cpu_node; + } + + /* Get OPP descriptor node */ + tmp_np = _of_get_opp_desc_node(tcpu_dev); + if (IS_ERR(tmp_np)) { + dev_err(tcpu_dev, "%s: Couldn't find opp node: %ld\n", + __func__, PTR_ERR(tmp_np)); + ret = PTR_ERR(tmp_np); + goto put_cpu_node; + } + + /* CPUs are sharing opp node */ + if (np == tmp_np) + cpumask_set_cpu(cpu, cpumask); + + of_node_put(tmp_np); + } + +put_cpu_node: + of_node_put(np); + return ret; +} +EXPORT_SYMBOL_GPL(of_get_cpus_sharing_opps); #endif diff --git a/include/linux/pm_opp.h b/include/linux/pm_opp.h index 20324b579adc..bb52fae5b921 100644 --- a/include/linux/pm_opp.h +++ b/include/linux/pm_opp.h @@ -121,6 +121,10 @@ static inline struct srcu_notifier_head *dev_pm_opp_get_notifier( #if defined(CONFIG_PM_OPP) && defined(CONFIG_OF) int of_init_opp_table(struct device *dev); void of_free_opp_table(struct device *dev); +int of_cpumask_init_opp_table(cpumask_var_t cpumask); +void of_cpumask_free_opp_table(cpumask_var_t cpumask); +int of_get_cpus_sharing_opps(struct device *cpu_dev, cpumask_var_t cpumask); +int set_cpus_sharing_opps(struct device *cpu_dev, cpumask_var_t cpumask); #else static inline int of_init_opp_table(struct device *dev) { @@ -130,6 +134,25 @@ static inline int of_init_opp_table(struct device *dev) static inline void of_free_opp_table(struct device *dev) { } + +static inline int of_cpumask_init_opp_table(cpumask_var_t cpumask) +{ + return -ENOSYS; +} + +static inline void of_cpumask_free_opp_table(cpumask_var_t cpumask) +{ +} + +static inline int of_get_cpus_sharing_opps(struct device *cpu_dev, cpumask_var_t cpumask) +{ + return -ENOSYS; +} + +static inline int set_cpus_sharing_opps(struct device *cpu_dev, cpumask_var_t cpumask) +{ + return -ENOSYS; +} #endif #endif /* __LINUX_OPP_H__ */ -- cgit v1.2.3-70-g09d2 From 19445b25e350ebebaa304bb2135619f643302947 Mon Sep 17 00:00:00 2001 From: Bartlomiej Zolnierkiewicz Date: Thu, 9 Jul 2015 17:43:35 +0200 Subject: PM / OPP: add dev_pm_opp_is_turbo() helper Add dev_pm_opp_is_turbo() helper to verify if an opp is to be used only for turbo mode or not. Reviewed-by: Stephen Boyd Signed-off-by: Bartlomiej Zolnierkiewicz Signed-off-by: Viresh Kumar Signed-off-by: Rafael J. Wysocki --- drivers/base/power/opp.c | 34 ++++++++++++++++++++++++++++++++++ include/linux/pm_opp.h | 7 +++++++ 2 files changed, 41 insertions(+) (limited to 'include/linux') diff --git a/drivers/base/power/opp.c b/drivers/base/power/opp.c index 663aae1c9834..204c6c945168 100644 --- a/drivers/base/power/opp.c +++ b/drivers/base/power/opp.c @@ -280,6 +280,40 @@ unsigned long dev_pm_opp_get_freq(struct dev_pm_opp *opp) } EXPORT_SYMBOL_GPL(dev_pm_opp_get_freq); +/** + * dev_pm_opp_is_turbo() - Returns if opp is turbo OPP or not + * @opp: opp for which turbo mode is being verified + * + * Turbo OPPs are not for normal use, and can be enabled (under certain + * conditions) for short duration of times to finish high throughput work + * quickly. Running on them for longer times may overheat the chip. + * + * Return: true if opp is turbo opp, else false. + * + * Locking: This function must be called under rcu_read_lock(). opp is a rcu + * protected pointer. This means that opp which could have been fetched by + * opp_find_freq_{exact,ceil,floor} functions is valid as long as we are + * under RCU lock. The pointer returned by the opp_find_freq family must be + * used in the same section as the usage of this function with the pointer + * prior to unlocking with rcu_read_unlock() to maintain the integrity of the + * pointer. + */ +bool dev_pm_opp_is_turbo(struct dev_pm_opp *opp) +{ + struct dev_pm_opp *tmp_opp; + + opp_rcu_lockdep_assert(); + + tmp_opp = rcu_dereference(opp); + if (IS_ERR_OR_NULL(tmp_opp) || !tmp_opp->available) { + pr_err("%s: Invalid parameters\n", __func__); + return false; + } + + return tmp_opp->turbo; +} +EXPORT_SYMBOL_GPL(dev_pm_opp_is_turbo); + /** * dev_pm_opp_get_max_clock_latency() - Get max clock latency in nanoseconds * @dev: device for which we do this operation diff --git a/include/linux/pm_opp.h b/include/linux/pm_opp.h index bb52fae5b921..cab7ba55bedb 100644 --- a/include/linux/pm_opp.h +++ b/include/linux/pm_opp.h @@ -30,6 +30,8 @@ unsigned long dev_pm_opp_get_voltage(struct dev_pm_opp *opp); unsigned long dev_pm_opp_get_freq(struct dev_pm_opp *opp); +bool dev_pm_opp_is_turbo(struct dev_pm_opp *opp); + int dev_pm_opp_get_opp_count(struct device *dev); unsigned long dev_pm_opp_get_max_clock_latency(struct device *dev); @@ -63,6 +65,11 @@ static inline unsigned long dev_pm_opp_get_freq(struct dev_pm_opp *opp) return 0; } +static inline bool dev_pm_opp_is_turbo(struct dev_pm_opp *opp) +{ + return false; +} + static inline int dev_pm_opp_get_opp_count(struct device *dev) { return 0; -- cgit v1.2.3-70-g09d2 From 44139ed4943ee8ec186eea3e9072ca16d2b48133 Mon Sep 17 00:00:00 2001 From: Viresh Kumar Date: Wed, 29 Jul 2015 16:23:09 +0530 Subject: cpufreq: Allow drivers to enable boost support after registering driver In some cases it wouldn't be known at time of driver registration, if the driver needs to support boost frequencies. For example, while getting boost information from DT with opp-v2 bindings, we need to parse the bindings for all the CPUs to know if turbo/boost OPPs are supported or not. One way out to do that efficiently is to delay supporting boost mode (i.e. creating /sys/devices/system/cpu/cpufreq/boost file), until the time OPP bindings are parsed. At that point, the driver can enable boost support. This can be done at ->init(), where the frequency table is created. To do that, the driver requires few APIs from cpufreq core that let him do this. This patch provides these APIs. Signed-off-by: Viresh Kumar Reviewed-by: Stephen Boyd Signed-off-by: Rafael J. Wysocki --- drivers/cpufreq/cpufreq.c | 68 +++++++++++++++++++++++++++++++------------- drivers/cpufreq/freq_table.c | 15 ++++++++++ include/linux/cpufreq.h | 12 ++++++++ 3 files changed, 75 insertions(+), 20 deletions(-) (limited to 'include/linux') diff --git a/drivers/cpufreq/cpufreq.c b/drivers/cpufreq/cpufreq.c index 76a26609d96b..e48242119d77 100644 --- a/drivers/cpufreq/cpufreq.c +++ b/drivers/cpufreq/cpufreq.c @@ -2437,6 +2437,49 @@ int cpufreq_boost_supported(void) } EXPORT_SYMBOL_GPL(cpufreq_boost_supported); +static int create_boost_sysfs_file(void) +{ + int ret; + + if (!cpufreq_boost_supported()) + return 0; + + /* + * Check if driver provides function to enable boost - + * if not, use cpufreq_boost_set_sw as default + */ + if (!cpufreq_driver->set_boost) + cpufreq_driver->set_boost = cpufreq_boost_set_sw; + + ret = cpufreq_sysfs_create_file(&boost.attr); + if (ret) + pr_err("%s: cannot register global BOOST sysfs file\n", + __func__); + + return ret; +} + +static void remove_boost_sysfs_file(void) +{ + if (cpufreq_boost_supported()) + cpufreq_sysfs_remove_file(&boost.attr); +} + +int cpufreq_enable_boost_support(void) +{ + if (!cpufreq_driver) + return -EINVAL; + + if (cpufreq_boost_supported()) + return 0; + + cpufreq_driver->boost_supported = true; + + /* This will get removed on driver unregister */ + return create_boost_sysfs_file(); +} +EXPORT_SYMBOL_GPL(cpufreq_enable_boost_support); + int cpufreq_boost_enabled(void) { return cpufreq_driver->boost_enabled; @@ -2490,21 +2533,9 @@ int cpufreq_register_driver(struct cpufreq_driver *driver_data) if (driver_data->setpolicy) driver_data->flags |= CPUFREQ_CONST_LOOPS; - if (cpufreq_boost_supported()) { - /* - * Check if driver provides function to enable boost - - * if not, use cpufreq_boost_set_sw as default - */ - if (!cpufreq_driver->set_boost) - cpufreq_driver->set_boost = cpufreq_boost_set_sw; - - ret = cpufreq_sysfs_create_file(&boost.attr); - if (ret) { - pr_err("%s: cannot register global BOOST sysfs file\n", - __func__); - goto err_null_driver; - } - } + ret = create_boost_sysfs_file(); + if (ret) + goto err_null_driver; ret = subsys_interface_register(&cpufreq_interface); if (ret) @@ -2528,8 +2559,7 @@ out: err_if_unreg: subsys_interface_unregister(&cpufreq_interface); err_boost_unreg: - if (cpufreq_boost_supported()) - cpufreq_sysfs_remove_file(&boost.attr); + remove_boost_sysfs_file(); err_null_driver: write_lock_irqsave(&cpufreq_driver_lock, flags); cpufreq_driver = NULL; @@ -2558,9 +2588,7 @@ int cpufreq_unregister_driver(struct cpufreq_driver *driver) /* Protect against concurrent cpu hotplug */ get_online_cpus(); subsys_interface_unregister(&cpufreq_interface); - if (cpufreq_boost_supported()) - cpufreq_sysfs_remove_file(&boost.attr); - + remove_boost_sysfs_file(); unregister_hotcpu_notifier(&cpufreq_cpu_notifier); write_lock_irqsave(&cpufreq_driver_lock, flags); diff --git a/drivers/cpufreq/freq_table.c b/drivers/cpufreq/freq_table.c index dfbbf981ed56..a8f1daffc9bc 100644 --- a/drivers/cpufreq/freq_table.c +++ b/drivers/cpufreq/freq_table.c @@ -18,6 +18,21 @@ * FREQUENCY TABLE HELPERS * *********************************************************************/ +bool policy_has_boost_freq(struct cpufreq_policy *policy) +{ + struct cpufreq_frequency_table *pos, *table = policy->freq_table; + + if (!table) + return false; + + cpufreq_for_each_valid_entry(pos, table) + if (pos->flags & CPUFREQ_BOOST_FREQ) + return true; + + return false; +} +EXPORT_SYMBOL_GPL(policy_has_boost_freq); + int cpufreq_frequency_table_cpuinfo(struct cpufreq_policy *policy, struct cpufreq_frequency_table *table) { diff --git a/include/linux/cpufreq.h b/include/linux/cpufreq.h index bde1e567b3a9..95f018649abf 100644 --- a/include/linux/cpufreq.h +++ b/include/linux/cpufreq.h @@ -578,6 +578,8 @@ ssize_t cpufreq_show_cpus(const struct cpumask *mask, char *buf); int cpufreq_boost_trigger_state(int state); int cpufreq_boost_supported(void); int cpufreq_boost_enabled(void); +int cpufreq_enable_boost_support(void); +bool policy_has_boost_freq(struct cpufreq_policy *policy); #else static inline int cpufreq_boost_trigger_state(int state) { @@ -591,6 +593,16 @@ static inline int cpufreq_boost_enabled(void) { return 0; } + +static inline int cpufreq_enable_boost_support(void) +{ + return -EINVAL; +} + +static inline bool policy_has_boost_freq(struct cpufreq_policy *policy) +{ + return false; +} #endif /* the following funtion is for cpufreq core use only */ struct cpufreq_frequency_table *cpufreq_frequency_get_table(unsigned int cpu); -- cgit v1.2.3-70-g09d2 From 4248b0da460839e30eaaad78992b9a1dd3e63e21 Mon Sep 17 00:00:00 2001 From: Mel Gorman Date: Thu, 6 Aug 2015 15:46:20 -0700 Subject: fs, file table: reinit files_stat.max_files after deferred memory initialisation Dave Hansen reported the following; My laptop has been behaving strangely with 4.2-rc2. Once I log in to my X session, I start getting all kinds of strange errors from applications and see this in my dmesg: VFS: file-max limit 8192 reached The problem is that the file-max is calculated before memory is fully initialised and miscalculates how much memory the kernel is using. This patch recalculates file-max after deferred memory initialisation. Note that using memory hotplug infrastructure would not have avoided this problem as the value is not recalculated after memory hot-add. 4.1: files_stat.max_files = 6582781 4.2-rc2: files_stat.max_files = 8192 4.2-rc2 patched: files_stat.max_files = 6562467 Small differences with the patch applied and 4.1 but not enough to matter. Signed-off-by: Mel Gorman Reported-by: Dave Hansen Cc: Nicolai Stange Cc: Dave Hansen Cc: Alex Ng Cc: Fengguang Wu Cc: Peter Zijlstra (Intel) Signed-off-by: Andrew Morton Signed-off-by: Linus Torvalds --- fs/dcache.c | 13 +++---------- fs/file_table.c | 24 +++++++++++++++--------- include/linux/fs.h | 5 +++-- init/main.c | 2 +- mm/page_alloc.c | 3 +++ 5 files changed, 25 insertions(+), 22 deletions(-) (limited to 'include/linux') diff --git a/fs/dcache.c b/fs/dcache.c index 5c8ea15e73a5..9b5fe503f6cb 100644 --- a/fs/dcache.c +++ b/fs/dcache.c @@ -3442,22 +3442,15 @@ void __init vfs_caches_init_early(void) inode_init_early(); } -void __init vfs_caches_init(unsigned long mempages) +void __init vfs_caches_init(void) { - unsigned long reserve; - - /* Base hash sizes on available memory, with a reserve equal to - 150% of current kernel size */ - - reserve = min((mempages - nr_free_pages()) * 3/2, mempages - 1); - mempages -= reserve; - names_cachep = kmem_cache_create("names_cache", PATH_MAX, 0, SLAB_HWCACHE_ALIGN|SLAB_PANIC, NULL); dcache_init(); inode_init(); - files_init(mempages); + files_init(); + files_maxfiles_init(); mnt_init(); bdev_cache_init(); chrdev_init(); diff --git a/fs/file_table.c b/fs/file_table.c index 7f9d407c7595..ad17e05ebf95 100644 --- a/fs/file_table.c +++ b/fs/file_table.c @@ -25,6 +25,7 @@ #include #include #include +#include #include @@ -308,19 +309,24 @@ void put_filp(struct file *file) } } -void __init files_init(unsigned long mempages) +void __init files_init(void) { - unsigned long n; - filp_cachep = kmem_cache_create("filp", sizeof(struct file), 0, SLAB_HWCACHE_ALIGN | SLAB_PANIC, NULL); + percpu_counter_init(&nr_files, 0, GFP_KERNEL); +} - /* - * One file with associated inode and dcache is very roughly 1K. - * Per default don't use more than 10% of our memory for files. - */ +/* + * One file with associated inode and dcache is very roughly 1K. Per default + * do not use more than 10% of our memory for files. + */ +void __init files_maxfiles_init(void) +{ + unsigned long n; + unsigned long memreserve = (totalram_pages - nr_free_pages()) * 3/2; + + memreserve = min(memreserve, totalram_pages - 1); + n = ((totalram_pages - memreserve) * (PAGE_SIZE / 1024)) / 10; - n = (mempages * (PAGE_SIZE / 1024)) / 10; files_stat.max_files = max_t(unsigned long, n, NR_FILE); - percpu_counter_init(&nr_files, 0, GFP_KERNEL); } diff --git a/include/linux/fs.h b/include/linux/fs.h index cc008c338f5a..84b783f277f7 100644 --- a/include/linux/fs.h +++ b/include/linux/fs.h @@ -55,7 +55,8 @@ struct vm_fault; extern void __init inode_init(void); extern void __init inode_init_early(void); -extern void __init files_init(unsigned long); +extern void __init files_init(void); +extern void __init files_maxfiles_init(void); extern struct files_stat_struct files_stat; extern unsigned long get_max_files(void); @@ -2245,7 +2246,7 @@ extern int ioctl_preallocate(struct file *filp, void __user *argp); /* fs/dcache.c */ extern void __init vfs_caches_init_early(void); -extern void __init vfs_caches_init(unsigned long); +extern void __init vfs_caches_init(void); extern struct kmem_cache *names_cachep; diff --git a/init/main.c b/init/main.c index c5d5626289ce..56506553d4d8 100644 --- a/init/main.c +++ b/init/main.c @@ -656,7 +656,7 @@ asmlinkage __visible void __init start_kernel(void) key_init(); security_init(); dbg_late_init(); - vfs_caches_init(totalram_pages); + vfs_caches_init(); signals_init(); /* rootfs populating might need page-writeback */ page_writeback_init(); diff --git a/mm/page_alloc.c b/mm/page_alloc.c index 322628278ae4..cb61f44eb3fc 100644 --- a/mm/page_alloc.c +++ b/mm/page_alloc.c @@ -1201,6 +1201,9 @@ void __init page_alloc_init_late(void) /* Block until all are initialised */ wait_for_completion(&pgdat_init_all_done_comp); + + /* Reinit limits that are based on free pages after the kernel is up */ + files_maxfiles_init(); } #endif /* CONFIG_DEFERRED_STRUCT_PAGE_INIT */ -- cgit v1.2.3-70-g09d2 From f4c18e6f7b5bbb5b528b3334115806b0d76f50f9 Mon Sep 17 00:00:00 2001 From: Naoya Horiguchi Date: Thu, 6 Aug 2015 15:47:08 -0700 Subject: mm: check __PG_HWPOISON separately from PAGE_FLAGS_CHECK_AT_* The race condition addressed in commit add05cecef80 ("mm: soft-offline: don't free target page in successful page migration") was not closed completely, because that can happen not only for soft-offline, but also for hard-offline. Consider that a slab page is about to be freed into buddy pool, and then an uncorrected memory error hits the page just after entering __free_one_page(), then VM_BUG_ON_PAGE(page->flags & PAGE_FLAGS_CHECK_AT_PREP) is triggered, despite the fact that it's not necessary because the data on the affected page is not consumed. To solve it, this patch drops __PG_HWPOISON from page flag checks at allocation/free time. I think it's justified because __PG_HWPOISON flags is defined to prevent the page from being reused, and setting it outside the page's alloc-free cycle is a designed behavior (not a bug.) For recent months, I was annoyed about BUG_ON when soft-offlined page remains on lru cache list for a while, which is avoided by calling put_page() instead of putback_lru_page() in page migration's success path. This means that this patch reverts a major change from commit add05cecef80 about the new refcounting rule of soft-offlined pages, so "reuse window" revives. This will be closed by a subsequent patch. Signed-off-by: Naoya Horiguchi Cc: Andi Kleen Cc: Dean Nelson Cc: Tony Luck Cc: "Kirill A. Shutemov" Cc: Hugh Dickins Cc: David Rientjes Signed-off-by: Andrew Morton Signed-off-by: Linus Torvalds --- include/linux/page-flags.h | 10 +++++++--- mm/huge_memory.c | 7 +------ mm/migrate.c | 5 ++++- mm/page_alloc.c | 4 ++++ 4 files changed, 16 insertions(+), 10 deletions(-) (limited to 'include/linux') diff --git a/include/linux/page-flags.h b/include/linux/page-flags.h index f34e040b34e9..41c93844fb1d 100644 --- a/include/linux/page-flags.h +++ b/include/linux/page-flags.h @@ -631,15 +631,19 @@ static inline void ClearPageSlabPfmemalloc(struct page *page) 1 << PG_private | 1 << PG_private_2 | \ 1 << PG_writeback | 1 << PG_reserved | \ 1 << PG_slab | 1 << PG_swapcache | 1 << PG_active | \ - 1 << PG_unevictable | __PG_MLOCKED | __PG_HWPOISON | \ + 1 << PG_unevictable | __PG_MLOCKED | \ __PG_COMPOUND_LOCK) /* * Flags checked when a page is prepped for return by the page allocator. - * Pages being prepped should not have any flags set. It they are set, + * Pages being prepped should not have these flags set. It they are set, * there has been a kernel bug or struct page corruption. + * + * __PG_HWPOISON is exceptional because it needs to be kept beyond page's + * alloc-free cycle to prevent from reusing the page. */ -#define PAGE_FLAGS_CHECK_AT_PREP ((1 << NR_PAGEFLAGS) - 1) +#define PAGE_FLAGS_CHECK_AT_PREP \ + (((1 << NR_PAGEFLAGS) - 1) & ~__PG_HWPOISON) #define PAGE_FLAGS_PRIVATE \ (1 << PG_private | 1 << PG_private_2) diff --git a/mm/huge_memory.c b/mm/huge_memory.c index c107094f79ba..097c7a4bfbd9 100644 --- a/mm/huge_memory.c +++ b/mm/huge_memory.c @@ -1676,12 +1676,7 @@ static void __split_huge_page_refcount(struct page *page, /* after clearing PageTail the gup refcount can be released */ smp_mb__after_atomic(); - /* - * retain hwpoison flag of the poisoned tail page: - * fix for the unsuitable process killed on Guest Machine(KVM) - * by the memory-failure. - */ - page_tail->flags &= ~PAGE_FLAGS_CHECK_AT_PREP | __PG_HWPOISON; + page_tail->flags &= ~PAGE_FLAGS_CHECK_AT_PREP; page_tail->flags |= (page->flags & ((1L << PG_referenced) | (1L << PG_swapbacked) | diff --git a/mm/migrate.c b/mm/migrate.c index ee401e4e5ef1..f2415be7d93b 100644 --- a/mm/migrate.c +++ b/mm/migrate.c @@ -950,7 +950,10 @@ out: list_del(&page->lru); dec_zone_page_state(page, NR_ISOLATED_ANON + page_is_file_cache(page)); - if (reason != MR_MEMORY_FAILURE) + /* Soft-offlined page shouldn't go through lru cache list */ + if (reason == MR_MEMORY_FAILURE) + put_page(page); + else putback_lru_page(page); } diff --git a/mm/page_alloc.c b/mm/page_alloc.c index cb61f44eb3fc..beda41710802 100644 --- a/mm/page_alloc.c +++ b/mm/page_alloc.c @@ -1296,6 +1296,10 @@ static inline int check_new_page(struct page *page) bad_reason = "non-NULL mapping"; if (unlikely(atomic_read(&page->_count) != 0)) bad_reason = "nonzero _count"; + if (unlikely(page->flags & __PG_HWPOISON)) { + bad_reason = "HWPoisoned (hardware-corrupted)"; + bad_flags = __PG_HWPOISON; + } if (unlikely(page->flags & PAGE_FLAGS_CHECK_AT_PREP)) { bad_reason = "PAGE_FLAGS_CHECK_AT_PREP flag set"; bad_flags = PAGE_FLAGS_CHECK_AT_PREP; -- cgit v1.2.3-70-g09d2 From d9eea403ca81f60cd535d354c77ada4c2bee8d66 Mon Sep 17 00:00:00 2001 From: Achiad Shochat Date: Tue, 4 Aug 2015 14:05:42 +0300 Subject: net/mlx5_core: Introduce access function to modify RSS/LRO params To be used by the mlx5 Eth driver in following commit. This is in preparation for netdev "light-weight" open/stop flow change described in previous commit. Signed-off-by: Achiad Shochat Signed-off-by: Amir Vadai Signed-off-by: David S. Miller --- drivers/net/ethernet/mellanox/mlx5/core/transobj.c | 12 ++++++++++++ drivers/net/ethernet/mellanox/mlx5/core/transobj.h | 2 ++ include/linux/mlx5/mlx5_ifc.h | 9 ++++++++- 3 files changed, 22 insertions(+), 1 deletion(-) (limited to 'include/linux') diff --git a/drivers/net/ethernet/mellanox/mlx5/core/transobj.c b/drivers/net/ethernet/mellanox/mlx5/core/transobj.c index c4f3f74908ec..e6453f61141e 100644 --- a/drivers/net/ethernet/mellanox/mlx5/core/transobj.c +++ b/drivers/net/ethernet/mellanox/mlx5/core/transobj.c @@ -163,6 +163,18 @@ int mlx5_core_create_tir(struct mlx5_core_dev *dev, u32 *in, int inlen, return err; } +int mlx5_core_modify_tir(struct mlx5_core_dev *dev, u32 tirn, u32 *in, + int inlen) +{ + u32 out[MLX5_ST_SZ_DW(modify_tir_out)]; + + MLX5_SET(modify_tir_in, in, tirn, tirn); + MLX5_SET(modify_tir_in, in, opcode, MLX5_CMD_OP_MODIFY_TIR); + + memset(out, 0, sizeof(out)); + return mlx5_cmd_exec_check_status(dev, in, inlen, out, sizeof(out)); +} + void mlx5_core_destroy_tir(struct mlx5_core_dev *dev, u32 tirn) { u32 in[MLX5_ST_SZ_DW(destroy_tir_out)]; diff --git a/drivers/net/ethernet/mellanox/mlx5/core/transobj.h b/drivers/net/ethernet/mellanox/mlx5/core/transobj.h index 10bd75e7d9b1..d436c2d8b527 100644 --- a/drivers/net/ethernet/mellanox/mlx5/core/transobj.h +++ b/drivers/net/ethernet/mellanox/mlx5/core/transobj.h @@ -45,6 +45,8 @@ int mlx5_core_modify_sq(struct mlx5_core_dev *dev, u32 sqn, u32 *in, int inlen); void mlx5_core_destroy_sq(struct mlx5_core_dev *dev, u32 sqn); int mlx5_core_create_tir(struct mlx5_core_dev *dev, u32 *in, int inlen, u32 *tirn); +int mlx5_core_modify_tir(struct mlx5_core_dev *dev, u32 tirn, u32 *in, + int inlen); void mlx5_core_destroy_tir(struct mlx5_core_dev *dev, u32 tirn); int mlx5_core_create_tis(struct mlx5_core_dev *dev, u32 *in, int inlen, u32 *tisn); diff --git a/include/linux/mlx5/mlx5_ifc.h b/include/linux/mlx5/mlx5_ifc.h index c60a62bba652..469b7bda3304 100644 --- a/include/linux/mlx5/mlx5_ifc.h +++ b/include/linux/mlx5/mlx5_ifc.h @@ -4050,6 +4050,13 @@ struct mlx5_ifc_modify_tis_in_bits { struct mlx5_ifc_tisc_bits ctx; }; +struct mlx5_ifc_modify_tir_bitmask_bits { + u8 reserved[0x20]; + + u8 reserved1[0x1f]; + u8 lro[0x1]; +}; + struct mlx5_ifc_modify_tir_out_bits { u8 status[0x8]; u8 reserved_0[0x18]; @@ -4071,7 +4078,7 @@ struct mlx5_ifc_modify_tir_in_bits { u8 reserved_3[0x20]; - u8 modify_bitmask[0x40]; + struct mlx5_ifc_modify_tir_bitmask_bits bitmask; u8 reserved_4[0x40]; -- cgit v1.2.3-70-g09d2 From 5c50368f38317627421bf24a0b66b1af0d44eddc Mon Sep 17 00:00:00 2001 From: Achiad Shochat Date: Tue, 4 Aug 2015 14:05:43 +0300 Subject: net/mlx5e: Light-weight netdev open/stop Create/destroy TIRs, TISs and flow tables upon PCI probe/remove rather than upon the netdev ndo_open/stop. Upon ndo_stop(), redirect all RX traffic to the (lately introduced) "Drop RQ" and then close only the RX/TX rings, leaving the TIRs, TISs and flow tables alive. Signed-off-by: Achiad Shochat Signed-off-by: Amir Vadai Signed-off-by: David S. Miller --- drivers/net/ethernet/mellanox/mlx5/core/en_main.c | 237 ++++++++++++++------- drivers/net/ethernet/mellanox/mlx5/core/transobj.c | 12 ++ drivers/net/ethernet/mellanox/mlx5/core/transobj.h | 2 + include/linux/mlx5/mlx5_ifc.h | 9 +- 4 files changed, 184 insertions(+), 76 deletions(-) (limited to 'include/linux') diff --git a/drivers/net/ethernet/mellanox/mlx5/core/en_main.c b/drivers/net/ethernet/mellanox/mlx5/core/en_main.c index baa7a69bb694..33d08bb11f84 100644 --- a/drivers/net/ethernet/mellanox/mlx5/core/en_main.c +++ b/drivers/net/ethernet/mellanox/mlx5/core/en_main.c @@ -1301,14 +1301,18 @@ static void mlx5e_fill_rqt_rqns(struct mlx5e_priv *priv, void *rqtc, ix = ix % priv->params.num_channels; MLX5_SET(rqtc, rqtc, rq_num[i], - priv->channel[ix]->rq.rqn); + test_bit(MLX5E_STATE_OPENED, &priv->state) ? + priv->channel[ix]->rq.rqn : + priv->drop_rq.rqn); } break; default: /* MLX5E_SINGLE_RQ_RQT */ MLX5_SET(rqtc, rqtc, rq_num[0], - priv->channel[0]->rq.rqn); + test_bit(MLX5E_STATE_OPENED, &priv->state) ? + priv->channel[0]->rq.rqn : + priv->drop_rq.rqn); break; } @@ -1347,19 +1351,95 @@ static int mlx5e_open_rqt(struct mlx5e_priv *priv, enum mlx5e_rqt_ix rqt_ix) return err; } +static int mlx5e_redirect_rqt(struct mlx5e_priv *priv, enum mlx5e_rqt_ix rqt_ix) +{ + struct mlx5_core_dev *mdev = priv->mdev; + u32 *in; + void *rqtc; + int inlen; + int log_sz; + int sz; + int err; + + log_sz = (rqt_ix == MLX5E_SINGLE_RQ_RQT) ? 0 : + priv->params.rx_hash_log_tbl_sz; + sz = 1 << log_sz; + + inlen = MLX5_ST_SZ_BYTES(modify_rqt_in) + sizeof(u32) * sz; + in = mlx5_vzalloc(inlen); + if (!in) + return -ENOMEM; + + rqtc = MLX5_ADDR_OF(modify_rqt_in, in, ctx); + + MLX5_SET(rqtc, rqtc, rqt_actual_size, sz); + + mlx5e_fill_rqt_rqns(priv, rqtc, rqt_ix); + + MLX5_SET(modify_rqt_in, in, bitmask.rqn_list, 1); + + err = mlx5_core_modify_rqt(mdev, priv->rqtn[rqt_ix], in, inlen); + + kvfree(in); + + return err; +} + static void mlx5e_close_rqt(struct mlx5e_priv *priv, enum mlx5e_rqt_ix rqt_ix) { mlx5_core_destroy_rqt(priv->mdev, priv->rqtn[rqt_ix]); } +static void mlx5e_build_tir_ctx_lro(void *tirc, struct mlx5e_priv *priv) +{ + if (!priv->params.lro_en) + return; + +#define ROUGH_MAX_L2_L3_HDR_SZ 256 + + MLX5_SET(tirc, tirc, lro_enable_mask, + MLX5_TIRC_LRO_ENABLE_MASK_IPV4_LRO | + MLX5_TIRC_LRO_ENABLE_MASK_IPV6_LRO); + MLX5_SET(tirc, tirc, lro_max_ip_payload_size, + (priv->params.lro_wqe_sz - + ROUGH_MAX_L2_L3_HDR_SZ) >> 8); + MLX5_SET(tirc, tirc, lro_timeout_period_usecs, + MLX5_CAP_ETH(priv->mdev, + lro_timer_supported_periods[3])); +} + +static int mlx5e_modify_tir_lro(struct mlx5e_priv *priv, int tt) +{ + struct mlx5_core_dev *mdev = priv->mdev; + + void *in; + void *tirc; + int inlen; + int err; + + inlen = MLX5_ST_SZ_BYTES(modify_tir_in); + in = mlx5_vzalloc(inlen); + if (!in) + return -ENOMEM; + + MLX5_SET(modify_tir_in, in, bitmask.lro, 1); + tirc = MLX5_ADDR_OF(modify_tir_in, in, ctx); + + mlx5e_build_tir_ctx_lro(tirc, priv); + + err = mlx5_core_modify_tir(mdev, priv->tirn[tt], in, inlen); + + kvfree(in); + + return err; +} + static void mlx5e_build_tir_ctx(struct mlx5e_priv *priv, u32 *tirc, int tt) { void *hfso = MLX5_ADDR_OF(tirc, tirc, rx_hash_field_selector_outer); MLX5_SET(tirc, tirc, transport_domain, priv->tdn); -#define ROUGH_MAX_L2_L3_HDR_SZ 256 - #define MLX5_HASH_IP (MLX5_HASH_FIELD_SEL_SRC_IP |\ MLX5_HASH_FIELD_SEL_DST_IP) @@ -1372,17 +1452,7 @@ static void mlx5e_build_tir_ctx(struct mlx5e_priv *priv, u32 *tirc, int tt) MLX5_HASH_FIELD_SEL_DST_IP |\ MLX5_HASH_FIELD_SEL_IPSEC_SPI) - if (priv->params.lro_en) { - MLX5_SET(tirc, tirc, lro_enable_mask, - MLX5_TIRC_LRO_ENABLE_MASK_IPV4_LRO | - MLX5_TIRC_LRO_ENABLE_MASK_IPV6_LRO); - MLX5_SET(tirc, tirc, lro_max_ip_payload_size, - (priv->params.lro_wqe_sz - - ROUGH_MAX_L2_L3_HDR_SZ) >> 8); - MLX5_SET(tirc, tirc, lro_timeout_period_usecs, - MLX5_CAP_ETH(priv->mdev, - lro_timer_supported_periods[3])); - } + mlx5e_build_tir_ctx_lro(tirc, priv); MLX5_SET(tirc, tirc, disp_type, MLX5_TIRC_DISP_TYPE_INDIRECT); @@ -1568,12 +1638,20 @@ static int mlx5e_set_dev_port_mtu(struct net_device *netdev) return 0; } +static void mlx5e_redirect_rqts(struct mlx5e_priv *priv) +{ + mlx5e_redirect_rqt(priv, MLX5E_INDIRECTION_RQT); + mlx5e_redirect_rqt(priv, MLX5E_SINGLE_RQ_RQT); +} + int mlx5e_open_locked(struct net_device *netdev) { struct mlx5e_priv *priv = netdev_priv(netdev); int num_txqs; int err; + set_bit(MLX5E_STATE_OPENED, &priv->state); + num_txqs = priv->params.num_channels * priv->params.num_tc; netif_set_real_num_tx_queues(netdev, num_txqs); netif_set_real_num_rx_queues(netdev, priv->params.num_channels); @@ -1582,83 +1660,32 @@ int mlx5e_open_locked(struct net_device *netdev) if (err) return err; - err = mlx5e_open_tises(priv); - if (err) { - netdev_err(netdev, "%s: mlx5e_open_tises failed, %d\n", - __func__, err); - return err; - } - err = mlx5e_open_channels(priv); if (err) { netdev_err(netdev, "%s: mlx5e_open_channels failed, %d\n", __func__, err); - goto err_close_tises; - } - - err = mlx5e_open_rqt(priv, MLX5E_INDIRECTION_RQT); - if (err) { - netdev_err(netdev, "%s: mlx5e_open_rqt(INDIR) failed, %d\n", - __func__, err); - goto err_close_channels; - } - - err = mlx5e_open_rqt(priv, MLX5E_SINGLE_RQ_RQT); - if (err) { - netdev_err(netdev, "%s: mlx5e_open_rqt(SINGLE) failed, %d\n", - __func__, err); - goto err_close_rqt_indir; - } - - err = mlx5e_open_tirs(priv); - if (err) { - netdev_err(netdev, "%s: mlx5e_open_tir failed, %d\n", - __func__, err); - goto err_close_rqt_single; - } - - err = mlx5e_open_flow_table(priv); - if (err) { - netdev_err(netdev, "%s: mlx5e_open_flow_table failed, %d\n", - __func__, err); - goto err_close_tirs; + return err; } err = mlx5e_add_all_vlan_rules(priv); if (err) { netdev_err(netdev, "%s: mlx5e_add_all_vlan_rules failed, %d\n", __func__, err); - goto err_close_flow_table; + goto err_close_channels; } mlx5e_init_eth_addr(priv); - set_bit(MLX5E_STATE_OPENED, &priv->state); - mlx5e_update_carrier(priv); + mlx5e_redirect_rqts(priv); mlx5e_set_rx_mode_core(priv); schedule_delayed_work(&priv->update_stats_work, 0); return 0; -err_close_flow_table: - mlx5e_close_flow_table(priv); - -err_close_tirs: - mlx5e_close_tirs(priv); - -err_close_rqt_single: - mlx5e_close_rqt(priv, MLX5E_SINGLE_RQ_RQT); - -err_close_rqt_indir: - mlx5e_close_rqt(priv, MLX5E_INDIRECTION_RQT); - err_close_channels: mlx5e_close_channels(priv); -err_close_tises: - mlx5e_close_tises(priv); - return err; } @@ -1682,13 +1709,9 @@ int mlx5e_close_locked(struct net_device *netdev) mlx5e_set_rx_mode_core(priv); mlx5e_del_all_vlan_rules(priv); + mlx5e_redirect_rqts(priv); netif_carrier_off(priv->netdev); - mlx5e_close_flow_table(priv); - mlx5e_close_tirs(priv); - mlx5e_close_rqt(priv, MLX5E_SINGLE_RQ_RQT); - mlx5e_close_rqt(priv, MLX5E_INDIRECTION_RQT); mlx5e_close_channels(priv); - mlx5e_close_tises(priv); return 0; } @@ -1766,6 +1789,8 @@ static int mlx5e_set_features(struct net_device *netdev, mlx5e_close_locked(priv->netdev); priv->params.lro_en = !!(features & NETIF_F_LRO); + mlx5e_modify_tir_lro(priv, MLX5E_TT_IPV4_TCP); + mlx5e_modify_tir_lro(priv, MLX5E_TT_IPV6_TCP); if (was_opened) err = mlx5e_open_locked(priv->netdev); @@ -2026,16 +2051,72 @@ static void *mlx5e_create_netdev(struct mlx5_core_dev *mdev) goto err_dealloc_transport_domain; } + err = mlx5e_open_tises(priv); + if (err) { + mlx5_core_warn(mdev, "open tises failed, %d\n", err); + goto err_destroy_mkey; + } + + err = mlx5e_open_drop_rq(priv); + if (err) { + mlx5_core_err(mdev, "open drop rq failed, %d\n", err); + goto err_close_tises; + } + + err = mlx5e_open_rqt(priv, MLX5E_INDIRECTION_RQT); + if (err) { + mlx5_core_warn(mdev, "open rqt(INDIR) failed, %d\n", err); + goto err_close_drop_rq; + } + + err = mlx5e_open_rqt(priv, MLX5E_SINGLE_RQ_RQT); + if (err) { + mlx5_core_warn(mdev, "open rqt(SINGLE) failed, %d\n", err); + goto err_close_rqt_indir; + } + + err = mlx5e_open_tirs(priv); + if (err) { + mlx5_core_warn(mdev, "open tirs failed, %d\n", err); + goto err_close_rqt_single; + } + + err = mlx5e_open_flow_table(priv); + if (err) { + mlx5_core_warn(mdev, "open flow table failed, %d\n", err); + goto err_close_tirs; + } + + mlx5e_init_eth_addr(priv); + err = register_netdev(netdev); if (err) { mlx5_core_err(mdev, "register_netdev failed, %d\n", err); - goto err_destroy_mkey; + goto err_close_flow_table; } mlx5e_enable_async_events(priv); return priv; +err_close_flow_table: + mlx5e_close_flow_table(priv); + +err_close_tirs: + mlx5e_close_tirs(priv); + +err_close_rqt_single: + mlx5e_close_rqt(priv, MLX5E_SINGLE_RQ_RQT); + +err_close_rqt_indir: + mlx5e_close_rqt(priv, MLX5E_INDIRECTION_RQT); + +err_close_drop_rq: + mlx5e_close_drop_rq(priv); + +err_close_tises: + mlx5e_close_tises(priv); + err_destroy_mkey: mlx5_core_destroy_mkey(mdev, &priv->mr); @@ -2060,6 +2141,12 @@ static void mlx5e_destroy_netdev(struct mlx5_core_dev *mdev, void *vpriv) struct net_device *netdev = priv->netdev; unregister_netdev(netdev); + mlx5e_close_flow_table(priv); + mlx5e_close_tirs(priv); + mlx5e_close_rqt(priv, MLX5E_SINGLE_RQ_RQT); + mlx5e_close_rqt(priv, MLX5E_INDIRECTION_RQT); + mlx5e_close_drop_rq(priv); + mlx5e_close_tises(priv); mlx5_core_destroy_mkey(priv->mdev, &priv->mr); mlx5_dealloc_transport_domain(priv->mdev, priv->tdn); mlx5_core_dealloc_pd(priv->mdev, priv->pdn); diff --git a/drivers/net/ethernet/mellanox/mlx5/core/transobj.c b/drivers/net/ethernet/mellanox/mlx5/core/transobj.c index e6453f61141e..b4c87c7b0cf0 100644 --- a/drivers/net/ethernet/mellanox/mlx5/core/transobj.c +++ b/drivers/net/ethernet/mellanox/mlx5/core/transobj.c @@ -387,6 +387,18 @@ int mlx5_core_create_rqt(struct mlx5_core_dev *dev, u32 *in, int inlen, return err; } +int mlx5_core_modify_rqt(struct mlx5_core_dev *dev, u32 rqtn, u32 *in, + int inlen) +{ + u32 out[MLX5_ST_SZ_DW(modify_rqt_out)]; + + MLX5_SET(modify_rqt_in, in, rqtn, rqtn); + MLX5_SET(modify_rqt_in, in, opcode, MLX5_CMD_OP_MODIFY_RQT); + + memset(out, 0, sizeof(out)); + return mlx5_cmd_exec_check_status(dev, in, inlen, out, sizeof(out)); +} + void mlx5_core_destroy_rqt(struct mlx5_core_dev *dev, u32 rqtn) { u32 in[MLX5_ST_SZ_DW(destroy_rqt_in)]; diff --git a/drivers/net/ethernet/mellanox/mlx5/core/transobj.h b/drivers/net/ethernet/mellanox/mlx5/core/transobj.h index d436c2d8b527..74cae51436e4 100644 --- a/drivers/net/ethernet/mellanox/mlx5/core/transobj.h +++ b/drivers/net/ethernet/mellanox/mlx5/core/transobj.h @@ -65,6 +65,8 @@ int mlx5_core_arm_xsrq(struct mlx5_core_dev *dev, u32 rmpn, u16 lwm); int mlx5_core_create_rqt(struct mlx5_core_dev *dev, u32 *in, int inlen, u32 *rqtn); +int mlx5_core_modify_rqt(struct mlx5_core_dev *dev, u32 rqtn, u32 *in, + int inlen); void mlx5_core_destroy_rqt(struct mlx5_core_dev *dev, u32 rqtn); #endif /* __TRANSOBJ_H__ */ diff --git a/include/linux/mlx5/mlx5_ifc.h b/include/linux/mlx5/mlx5_ifc.h index 469b7bda3304..dd2097455a2e 100644 --- a/include/linux/mlx5/mlx5_ifc.h +++ b/include/linux/mlx5/mlx5_ifc.h @@ -4123,6 +4123,13 @@ struct mlx5_ifc_modify_rqt_out_bits { u8 reserved_1[0x40]; }; +struct mlx5_ifc_rqt_bitmask_bits { + u8 reserved[0x20]; + + u8 reserved1[0x1f]; + u8 rqn_list[0x1]; +}; + struct mlx5_ifc_modify_rqt_in_bits { u8 opcode[0x10]; u8 reserved_0[0x10]; @@ -4135,7 +4142,7 @@ struct mlx5_ifc_modify_rqt_in_bits { u8 reserved_3[0x20]; - u8 modify_bitmask[0x40]; + struct mlx5_ifc_rqt_bitmask_bits bitmask; u8 reserved_4[0x40]; -- cgit v1.2.3-70-g09d2 From efea389d3cc6427a9a94e92b2d7bf4c862f2cfcf Mon Sep 17 00:00:00 2001 From: Gal Pressman Date: Tue, 4 Aug 2015 14:05:47 +0300 Subject: net/mlx5_core: Support physical port counters Added physical port counters in the following standard formats to ethtool statistics: - IEEE 802.3 - RFC2863 - RFC2819 Signed-off-by: Gal Pressman Signed-off-by: Saeed Mahameed Signed-off-by: Amir Vadai Signed-off-by: David S. Miller --- drivers/net/ethernet/mellanox/mlx5/core/en.h | 75 ++++++++++++++++++++++ .../net/ethernet/mellanox/mlx5/core/en_ethtool.c | 10 ++- drivers/net/ethernet/mellanox/mlx5/core/en_main.c | 42 ++++++++++++ include/linux/mlx5/device.h | 10 +++ include/linux/mlx5/driver.h | 1 + 5 files changed, 137 insertions(+), 1 deletion(-) (limited to 'include/linux') diff --git a/drivers/net/ethernet/mellanox/mlx5/core/en.h b/drivers/net/ethernet/mellanox/mlx5/core/en.h index 35c33907a9ff..e9d7d90363a8 100644 --- a/drivers/net/ethernet/mellanox/mlx5/core/en.h +++ b/drivers/net/ethernet/mellanox/mlx5/core/en.h @@ -138,6 +138,80 @@ struct mlx5e_vport_stats { #define NUM_VPORT_COUNTERS 31 }; +static const char pport_strings[][ETH_GSTRING_LEN] = { + /* IEEE802.3 counters */ + "frames_tx", + "frames_rx", + "check_seq_err", + "alignment_err", + "octets_tx", + "octets_received", + "multicast_xmitted", + "broadcast_xmitted", + "multicast_rx", + "broadcast_rx", + "in_range_len_errors", + "out_of_range_len", + "too_long_errors", + "symbol_err", + "mac_control_tx", + "mac_control_rx", + "unsupported_op_rx", + "pause_ctrl_rx", + "pause_ctrl_tx", + + /* RFC2863 counters */ + "in_octets", + "in_ucast_pkts", + "in_discards", + "in_errors", + "in_unknown_protos", + "out_octets", + "out_ucast_pkts", + "out_discards", + "out_errors", + "in_multicast_pkts", + "in_broadcast_pkts", + "out_multicast_pkts", + "out_broadcast_pkts", + + /* RFC2819 counters */ + "drop_events", + "octets", + "pkts", + "broadcast_pkts", + "multicast_pkts", + "crc_align_errors", + "undersize_pkts", + "oversize_pkts", + "fragments", + "jabbers", + "collisions", + "p64octets", + "p65to127octets", + "p128to255octets", + "p256to511octets", + "p512to1023octets", + "p1024to1518octets", + "p1519to2047octets", + "p2048to4095octets", + "p4096to8191octets", + "p8192to10239octets", +}; + +#define NUM_IEEE_802_3_COUNTERS 19 +#define NUM_RFC_2863_COUNTERS 13 +#define NUM_RFC_2819_COUNTERS 21 +#define NUM_PPORT_COUNTERS (NUM_IEEE_802_3_COUNTERS + \ + NUM_RFC_2863_COUNTERS + \ + NUM_RFC_2819_COUNTERS) + +struct mlx5e_pport_stats { + __be64 IEEE_802_3_counters[NUM_IEEE_802_3_COUNTERS]; + __be64 RFC_2863_counters[NUM_RFC_2863_COUNTERS]; + __be64 RFC_2819_counters[NUM_RFC_2819_COUNTERS]; +}; + static const char rq_stats_strings[][ETH_GSTRING_LEN] = { "packets", "csum_none", @@ -180,6 +254,7 @@ struct mlx5e_sq_stats { struct mlx5e_stats { struct mlx5e_vport_stats vport; + struct mlx5e_pport_stats pport; }; struct mlx5e_params { diff --git a/drivers/net/ethernet/mellanox/mlx5/core/en_ethtool.c b/drivers/net/ethernet/mellanox/mlx5/core/en_ethtool.c index b95aa3384c36..b549797b315f 100644 --- a/drivers/net/ethernet/mellanox/mlx5/core/en_ethtool.c +++ b/drivers/net/ethernet/mellanox/mlx5/core/en_ethtool.c @@ -171,7 +171,7 @@ static int mlx5e_get_sset_count(struct net_device *dev, int sset) switch (sset) { case ETH_SS_STATS: - return NUM_VPORT_COUNTERS + + return NUM_VPORT_COUNTERS + NUM_PPORT_COUNTERS + priv->params.num_channels * NUM_RQ_STATS + priv->params.num_channels * priv->params.num_tc * NUM_SQ_STATS; @@ -200,6 +200,11 @@ static void mlx5e_get_strings(struct net_device *dev, strcpy(data + (idx++) * ETH_GSTRING_LEN, vport_strings[i]); + /* PPORT counters */ + for (i = 0; i < NUM_PPORT_COUNTERS; i++) + strcpy(data + (idx++) * ETH_GSTRING_LEN, + pport_strings[i]); + /* per channel counters */ for (i = 0; i < priv->params.num_channels; i++) for (j = 0; j < NUM_RQ_STATS; j++) @@ -234,6 +239,9 @@ static void mlx5e_get_ethtool_stats(struct net_device *dev, for (i = 0; i < NUM_VPORT_COUNTERS; i++) data[idx++] = ((u64 *)&priv->stats.vport)[i]; + for (i = 0; i < NUM_PPORT_COUNTERS; i++) + data[idx++] = be64_to_cpu(((__be64 *)&priv->stats.pport)[i]); + /* per channel counters */ for (i = 0; i < priv->params.num_channels; i++) for (j = 0; j < NUM_RQ_STATS; j++) diff --git a/drivers/net/ethernet/mellanox/mlx5/core/en_main.c b/drivers/net/ethernet/mellanox/mlx5/core/en_main.c index b8023a7484e0..111427b33ec8 100644 --- a/drivers/net/ethernet/mellanox/mlx5/core/en_main.c +++ b/drivers/net/ethernet/mellanox/mlx5/core/en_main.c @@ -82,6 +82,47 @@ static void mlx5e_update_carrier_work(struct work_struct *work) mutex_unlock(&priv->state_lock); } +static void mlx5e_update_pport_counters(struct mlx5e_priv *priv) +{ + struct mlx5_core_dev *mdev = priv->mdev; + struct mlx5e_pport_stats *s = &priv->stats.pport; + u32 *in; + u32 *out; + int sz = MLX5_ST_SZ_BYTES(ppcnt_reg); + + in = mlx5_vzalloc(sz); + out = mlx5_vzalloc(sz); + if (!in || !out) + goto free_out; + + MLX5_SET(ppcnt_reg, in, local_port, 1); + + MLX5_SET(ppcnt_reg, in, grp, MLX5_IEEE_802_3_COUNTERS_GROUP); + mlx5_core_access_reg(mdev, in, sz, out, + sz, MLX5_REG_PPCNT, 0, 0); + memcpy(s->IEEE_802_3_counters, + MLX5_ADDR_OF(ppcnt_reg, out, counter_set), + sizeof(s->IEEE_802_3_counters)); + + MLX5_SET(ppcnt_reg, in, grp, MLX5_RFC_2863_COUNTERS_GROUP); + mlx5_core_access_reg(mdev, in, sz, out, + sz, MLX5_REG_PPCNT, 0, 0); + memcpy(s->RFC_2863_counters, + MLX5_ADDR_OF(ppcnt_reg, out, counter_set), + sizeof(s->RFC_2863_counters)); + + MLX5_SET(ppcnt_reg, in, grp, MLX5_RFC_2819_COUNTERS_GROUP); + mlx5_core_access_reg(mdev, in, sz, out, + sz, MLX5_REG_PPCNT, 0, 0); + memcpy(s->RFC_2819_counters, + MLX5_ADDR_OF(ppcnt_reg, out, counter_set), + sizeof(s->RFC_2819_counters)); + +free_out: + kvfree(in); + kvfree(out); +} + void mlx5e_update_stats(struct mlx5e_priv *priv) { struct mlx5_core_dev *mdev = priv->mdev; @@ -202,6 +243,7 @@ void mlx5e_update_stats(struct mlx5e_priv *priv) s->tx_csum_offload = s->tx_packets - tx_offload_none; s->rx_csum_good = s->rx_packets - s->rx_csum_none; + mlx5e_update_pport_counters(priv); free_out: kvfree(out); } diff --git a/include/linux/mlx5/device.h b/include/linux/mlx5/device.h index b943cd9e2097..250b1ff8b48d 100644 --- a/include/linux/mlx5/device.h +++ b/include/linux/mlx5/device.h @@ -1182,6 +1182,16 @@ enum { MLX5_CMD_STAT_BAD_SIZE_OUTS_CQES_ERR = 0x40, }; +enum { + MLX5_IEEE_802_3_COUNTERS_GROUP = 0x0, + MLX5_RFC_2863_COUNTERS_GROUP = 0x1, + MLX5_RFC_2819_COUNTERS_GROUP = 0x2, + MLX5_RFC_3635_COUNTERS_GROUP = 0x3, + MLX5_ETHERNET_EXTENDED_COUNTERS_GROUP = 0x5, + MLX5_PER_PRIORITY_COUNTERS_GROUP = 0x10, + MLX5_PER_TRAFFIC_CLASS_COUNTERS_GROUP = 0x11 +}; + static inline u16 mlx5_to_sw_pkey_sz(int pkey_sz) { if (pkey_sz > MLX5_MAX_LOG_PKEY_TABLE) diff --git a/include/linux/mlx5/driver.h b/include/linux/mlx5/driver.h index 5fe0cae1a515..2039546b0ec6 100644 --- a/include/linux/mlx5/driver.h +++ b/include/linux/mlx5/driver.h @@ -103,6 +103,7 @@ enum { MLX5_REG_PMTU = 0x5003, MLX5_REG_PTYS = 0x5004, MLX5_REG_PAOS = 0x5006, + MLX5_REG_PPCNT = 0x5008, MLX5_REG_PMAOS = 0x5012, MLX5_REG_PUDE = 0x5009, MLX5_REG_PMPE = 0x5010, -- cgit v1.2.3-70-g09d2 From d92cff89a0c80e7e49796366e441d97f07b5d321 Mon Sep 17 00:00:00 2001 From: "Jason A. Donenfeld" Date: Tue, 4 Aug 2015 18:26:19 +0200 Subject: net_dbg_ratelimited: turn into no-op when !DEBUG The pr_debug family of functions turns into a no-op when -DDEBUG is not specified, opting instead to call "no_printk", which gets compiled to a no-op (but retains gcc's nice warnings about printf-style arguments). The problem with net_dbg_ratelimited is that it is defined to be a variant of net_ratelimited_function, which expands to essentially: if (net_ratelimit()) pr_debug(fmt, ...); When DEBUG is not defined, then this becomes, if (net_ratelimit()) ; This seems benign, except it isn't. Firstly, there's the obvious overhead of calling net_ratelimit needlessly, which does quite some book keeping for the rate limiting. Given that the pr_debug and net_dbg_ratelimited family of functions are sprinkled liberally through performance critical code, with developers assuming they'll be compiled out to a no-op most of the time, we certainly do not want this needless book keeping. Secondly, and most visibly, even though no debug message is printed when DEBUG is not defined, if there is a flood of invocations, dmesg winds up peppered with messages such as "net_ratelimit: 320 callbacks suppressed". This is because our aforementioned net_ratelimit() function actually prints this text in some circumstances. It's especially odd to see this when there isn't any other accompanying debug message. So, in sum, it doesn't make sense to have this function's current behavior, and instead it should match what every other debug family of functions in the kernel does with !DEBUG -- nothing. This patch replaces calls to net_dbg_ratelimited when !DEBUG with no_printk, keeping with the idiom of all the other debug print helpers. Also, though not strictly neccessary, it guards the call with an if (0) so that all evaluation of any arguments are sure to be compiled out. Signed-off-by: Jason A. Donenfeld Signed-off-by: David S. Miller --- include/linux/net.h | 8 ++++++++ 1 file changed, 8 insertions(+) (limited to 'include/linux') diff --git a/include/linux/net.h b/include/linux/net.h index 04aa06852771..049d4b03c4c4 100644 --- a/include/linux/net.h +++ b/include/linux/net.h @@ -239,8 +239,16 @@ do { \ net_ratelimited_function(pr_warn, fmt, ##__VA_ARGS__) #define net_info_ratelimited(fmt, ...) \ net_ratelimited_function(pr_info, fmt, ##__VA_ARGS__) +#if defined(DEBUG) #define net_dbg_ratelimited(fmt, ...) \ net_ratelimited_function(pr_debug, fmt, ##__VA_ARGS__) +#else +#define net_dbg_ratelimited(fmt, ...) \ + do { \ + if (0) \ + no_printk(KERN_DEBUG pr_fmt(fmt), ##__VA_ARGS__); \ + } while (0) +#endif bool __net_get_random_once(void *buf, int nbytes, bool *done, struct static_key *done_key); -- cgit v1.2.3-70-g09d2 From 3499abb249bb5ed9d21031944bc3059ec4aa2909 Mon Sep 17 00:00:00 2001 From: Andreas Schultz Date: Wed, 5 Aug 2015 17:51:45 +0200 Subject: netfilter: nfacct: per network namespace support - Move the nfnl_acct_list into the network namespace, initialize and destroy it per namespace - Keep track of refcnt on nfacct objects, the old logic does not longer work with a per namespace list - Adjust xt_nfacct to pass the namespace when registring objects Signed-off-by: Andreas Schultz Signed-off-by: Pablo Neira Ayuso --- include/linux/netfilter/nfnetlink_acct.h | 3 +- include/net/net_namespace.h | 3 ++ net/netfilter/nfnetlink_acct.c | 71 ++++++++++++++++++++++---------- net/netfilter/xt_nfacct.c | 2 +- 4 files changed, 56 insertions(+), 23 deletions(-) (limited to 'include/linux') diff --git a/include/linux/netfilter/nfnetlink_acct.h b/include/linux/netfilter/nfnetlink_acct.h index 6ec975748742..80ca889b164e 100644 --- a/include/linux/netfilter/nfnetlink_acct.h +++ b/include/linux/netfilter/nfnetlink_acct.h @@ -2,6 +2,7 @@ #define _NFNL_ACCT_H_ #include +#include enum { NFACCT_NO_QUOTA = -1, @@ -11,7 +12,7 @@ enum { struct nf_acct; -struct nf_acct *nfnl_acct_find_get(const char *filter_name); +struct nf_acct *nfnl_acct_find_get(struct net *net, const char *filter_name); void nfnl_acct_put(struct nf_acct *acct); void nfnl_acct_update(const struct sk_buff *skb, struct nf_acct *nfacct); extern int nfnl_acct_overquota(const struct sk_buff *skb, diff --git a/include/net/net_namespace.h b/include/net/net_namespace.h index e951453e0a23..2dcea635ecce 100644 --- a/include/net/net_namespace.h +++ b/include/net/net_namespace.h @@ -118,6 +118,9 @@ struct net { #endif struct sock *nfnl; struct sock *nfnl_stash; +#if IS_ENABLED(CONFIG_NETFILTER_NETLINK_ACCT) + struct list_head nfnl_acct_list; +#endif #endif #ifdef CONFIG_WEXT_CORE struct sk_buff_head wext_nlevents; diff --git a/net/netfilter/nfnetlink_acct.c b/net/netfilter/nfnetlink_acct.c index c18af2f63eef..fefbf5f0b28d 100644 --- a/net/netfilter/nfnetlink_acct.c +++ b/net/netfilter/nfnetlink_acct.c @@ -27,8 +27,6 @@ MODULE_LICENSE("GPL"); MODULE_AUTHOR("Pablo Neira Ayuso "); MODULE_DESCRIPTION("nfacct: Extended Netfilter accounting infrastructure"); -static LIST_HEAD(nfnl_acct_list); - struct nf_acct { atomic64_t pkts; atomic64_t bytes; @@ -53,6 +51,7 @@ nfnl_acct_new(struct sock *nfnl, struct sk_buff *skb, const struct nlmsghdr *nlh, const struct nlattr * const tb[]) { struct nf_acct *nfacct, *matching = NULL; + struct net *net = sock_net(nfnl); char *acct_name; unsigned int size = 0; u32 flags = 0; @@ -64,7 +63,7 @@ nfnl_acct_new(struct sock *nfnl, struct sk_buff *skb, if (strlen(acct_name) == 0) return -EINVAL; - list_for_each_entry(nfacct, &nfnl_acct_list, head) { + list_for_each_entry(nfacct, &net->nfnl_acct_list, head) { if (strncmp(nfacct->name, acct_name, NFACCT_NAME_MAX) != 0) continue; @@ -124,7 +123,7 @@ nfnl_acct_new(struct sock *nfnl, struct sk_buff *skb, be64_to_cpu(nla_get_be64(tb[NFACCT_PKTS]))); } atomic_set(&nfacct->refcnt, 1); - list_add_tail_rcu(&nfacct->head, &nfnl_acct_list); + list_add_tail_rcu(&nfacct->head, &net->nfnl_acct_list); return 0; } @@ -185,6 +184,7 @@ nla_put_failure: static int nfnl_acct_dump(struct sk_buff *skb, struct netlink_callback *cb) { + struct net *net = sock_net(skb->sk); struct nf_acct *cur, *last; const struct nfacct_filter *filter = cb->data; @@ -196,7 +196,7 @@ nfnl_acct_dump(struct sk_buff *skb, struct netlink_callback *cb) cb->args[1] = 0; rcu_read_lock(); - list_for_each_entry_rcu(cur, &nfnl_acct_list, head) { + list_for_each_entry_rcu(cur, &net->nfnl_acct_list, head) { if (last) { if (cur != last) continue; @@ -257,6 +257,7 @@ static int nfnl_acct_get(struct sock *nfnl, struct sk_buff *skb, const struct nlmsghdr *nlh, const struct nlattr * const tb[]) { + struct net *net = sock_net(nfnl); int ret = -ENOENT; struct nf_acct *cur; char *acct_name; @@ -283,7 +284,7 @@ nfnl_acct_get(struct sock *nfnl, struct sk_buff *skb, return -EINVAL; acct_name = nla_data(tb[NFACCT_NAME]); - list_for_each_entry(cur, &nfnl_acct_list, head) { + list_for_each_entry(cur, &net->nfnl_acct_list, head) { struct sk_buff *skb2; if (strncmp(cur->name, acct_name, NFACCT_NAME_MAX)!= 0) @@ -336,19 +337,20 @@ static int nfnl_acct_del(struct sock *nfnl, struct sk_buff *skb, const struct nlmsghdr *nlh, const struct nlattr * const tb[]) { + struct net *net = sock_net(nfnl); char *acct_name; struct nf_acct *cur; int ret = -ENOENT; if (!tb[NFACCT_NAME]) { - list_for_each_entry(cur, &nfnl_acct_list, head) + list_for_each_entry(cur, &net->nfnl_acct_list, head) nfnl_acct_try_del(cur); return 0; } acct_name = nla_data(tb[NFACCT_NAME]); - list_for_each_entry(cur, &nfnl_acct_list, head) { + list_for_each_entry(cur, &net->nfnl_acct_list, head) { if (strncmp(cur->name, acct_name, NFACCT_NAME_MAX) != 0) continue; @@ -394,12 +396,12 @@ static const struct nfnetlink_subsystem nfnl_acct_subsys = { MODULE_ALIAS_NFNL_SUBSYS(NFNL_SUBSYS_ACCT); -struct nf_acct *nfnl_acct_find_get(const char *acct_name) +struct nf_acct *nfnl_acct_find_get(struct net *net, const char *acct_name) { struct nf_acct *cur, *acct = NULL; rcu_read_lock(); - list_for_each_entry_rcu(cur, &nfnl_acct_list, head) { + list_for_each_entry_rcu(cur, &net->nfnl_acct_list, head) { if (strncmp(cur->name, acct_name, NFACCT_NAME_MAX)!= 0) continue; @@ -422,7 +424,9 @@ EXPORT_SYMBOL_GPL(nfnl_acct_find_get); void nfnl_acct_put(struct nf_acct *acct) { - atomic_dec(&acct->refcnt); + if (atomic_dec_and_test(&acct->refcnt)) + kfree_rcu(acct, rcu_head); + module_put(THIS_MODULE); } EXPORT_SYMBOL_GPL(nfnl_acct_put); @@ -478,34 +482,59 @@ int nfnl_acct_overquota(const struct sk_buff *skb, struct nf_acct *nfacct) } EXPORT_SYMBOL_GPL(nfnl_acct_overquota); +static int __net_init nfnl_acct_net_init(struct net *net) +{ + INIT_LIST_HEAD(&net->nfnl_acct_list); + + return 0; +} + +static void __net_exit nfnl_acct_net_exit(struct net *net) +{ + struct nf_acct *cur, *tmp; + + list_for_each_entry_safe(cur, tmp, &net->nfnl_acct_list, head) { + list_del_rcu(&cur->head); + + if (atomic_dec_and_test(&cur->refcnt)) + kfree_rcu(cur, rcu_head); + } +} + +static struct pernet_operations nfnl_acct_ops = { + .init = nfnl_acct_net_init, + .exit = nfnl_acct_net_exit, +}; + static int __init nfnl_acct_init(void) { int ret; + ret = register_pernet_subsys(&nfnl_acct_ops); + if (ret < 0) { + pr_err("nfnl_acct_init: failed to register pernet ops\n"); + goto err_out; + } + pr_info("nfnl_acct: registering with nfnetlink.\n"); ret = nfnetlink_subsys_register(&nfnl_acct_subsys); if (ret < 0) { pr_err("nfnl_acct_init: cannot register with nfnetlink.\n"); - goto err_out; + goto cleanup_pernet; } return 0; + +cleanup_pernet: + unregister_pernet_subsys(&nfnl_acct_ops); err_out: return ret; } static void __exit nfnl_acct_exit(void) { - struct nf_acct *cur, *tmp; - pr_info("nfnl_acct: unregistering from nfnetlink.\n"); nfnetlink_subsys_unregister(&nfnl_acct_subsys); - - list_for_each_entry_safe(cur, tmp, &nfnl_acct_list, head) { - list_del_rcu(&cur->head); - /* We are sure that our objects have no clients at this point, - * it's safe to release them all without checking refcnt. */ - kfree_rcu(cur, rcu_head); - } + unregister_pernet_subsys(&nfnl_acct_ops); } module_init(nfnl_acct_init); diff --git a/net/netfilter/xt_nfacct.c b/net/netfilter/xt_nfacct.c index 8c646ed9c921..3048a7e3a90a 100644 --- a/net/netfilter/xt_nfacct.c +++ b/net/netfilter/xt_nfacct.c @@ -37,7 +37,7 @@ nfacct_mt_checkentry(const struct xt_mtchk_param *par) struct xt_nfacct_match_info *info = par->matchinfo; struct nf_acct *nfacct; - nfacct = nfnl_acct_find_get(info->name); + nfacct = nfnl_acct_find_get(par->net, info->name); if (nfacct == NULL) { pr_info("xt_nfacct: accounting object with name `%s' " "does not exists\n", info->name); -- cgit v1.2.3-70-g09d2 From 377a51a6b8c09b39b540e5a80ad7029a450cee71 Mon Sep 17 00:00:00 2001 From: Geert Uytterhoeven Date: Mon, 6 Jul 2015 15:40:22 +0200 Subject: proportions: Spelling s/consitent/consistent/ Signed-off-by: Geert Uytterhoeven Signed-off-by: Jiri Kosina --- include/linux/proportions.h | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) (limited to 'include/linux') diff --git a/include/linux/proportions.h b/include/linux/proportions.h index 00e8e8fa7358..5440f64d2942 100644 --- a/include/linux/proportions.h +++ b/include/linux/proportions.h @@ -33,7 +33,7 @@ struct prop_global { /* * global proportion descriptor * - * this is needed to consitently flip prop_global structures. + * this is needed to consistently flip prop_global structures. */ struct prop_descriptor { int index; -- cgit v1.2.3-70-g09d2 From 99d49e3af6dfde62caffb2913807fdaf293a9e3d Mon Sep 17 00:00:00 2001 From: Frans Klaver Date: Thu, 4 Sep 2014 00:58:23 +0200 Subject: mod_devicetable: add space before */ Match the style of the other one-line comments. Signed-off-by: Frans Klaver Reviewed-by: Hannes Reinecke Signed-off-by: Jiri Kosina --- include/linux/mod_devicetable.h | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) (limited to 'include/linux') diff --git a/include/linux/mod_devicetable.h b/include/linux/mod_devicetable.h index 34f25b7bf642..688997a24aad 100644 --- a/include/linux/mod_devicetable.h +++ b/include/linux/mod_devicetable.h @@ -253,7 +253,7 @@ struct pcmcia_device_id { __u32 prod_id_hash[4]; - /* not matched against in kernelspace*/ + /* not matched against in kernelspace */ const char * prod_id[4]; /* not matched against */ -- cgit v1.2.3-70-g09d2 From a568231f463225eb31593f71446a267a03ae0528 Mon Sep 17 00:00:00 2001 From: Leilk Liu Date: Fri, 7 Aug 2015 15:19:50 +0800 Subject: spi: mediatek: Add spi bus for Mediatek MT8173 This patch adds basic spi bus for MT8173. Signed-off-by: Leilk Liu Signed-off-by: Mark Brown --- drivers/spi/Kconfig | 9 + drivers/spi/Makefile | 1 + drivers/spi/spi-mt65xx.c | 749 +++++++++++++++++++++++++++++++ include/linux/platform_data/spi-mt65xx.h | 22 + 4 files changed, 781 insertions(+) create mode 100644 drivers/spi/spi-mt65xx.c create mode 100644 include/linux/platform_data/spi-mt65xx.h (limited to 'include/linux') diff --git a/drivers/spi/Kconfig b/drivers/spi/Kconfig index 0cae1694014d..38ddfba49d76 100644 --- a/drivers/spi/Kconfig +++ b/drivers/spi/Kconfig @@ -326,6 +326,15 @@ config SPI_MESON_SPIFC This enables master mode support for the SPIFC (SPI flash controller) available in Amlogic Meson SoCs. +config SPI_MT65XX + tristate "MediaTek SPI controller" + depends on ARCH_MEDIATEK || COMPILE_TEST + help + This selects the MediaTek(R) SPI bus driver. + If you want to use MediaTek(R) SPI interface, + say Y or M here.If you are not sure, say N. + SPI drivers for Mediatek MT65XX and MT81XX series ARM SoCs. + config SPI_OC_TINY tristate "OpenCores tiny SPI" depends on GPIOLIB || COMPILE_TEST diff --git a/drivers/spi/Makefile b/drivers/spi/Makefile index 1154dbac8f2c..9746beb21769 100644 --- a/drivers/spi/Makefile +++ b/drivers/spi/Makefile @@ -48,6 +48,7 @@ obj-$(CONFIG_SPI_MESON_SPIFC) += spi-meson-spifc.o obj-$(CONFIG_SPI_MPC512x_PSC) += spi-mpc512x-psc.o obj-$(CONFIG_SPI_MPC52xx_PSC) += spi-mpc52xx-psc.o obj-$(CONFIG_SPI_MPC52xx) += spi-mpc52xx.o +obj-$(CONFIG_SPI_MT65XX) += spi-mt65xx.o obj-$(CONFIG_SPI_MXS) += spi-mxs.o obj-$(CONFIG_SPI_NUC900) += spi-nuc900.o obj-$(CONFIG_SPI_OC_TINY) += spi-oc-tiny.o diff --git a/drivers/spi/spi-mt65xx.c b/drivers/spi/spi-mt65xx.c new file mode 100644 index 000000000000..4676b0122b89 --- /dev/null +++ b/drivers/spi/spi-mt65xx.c @@ -0,0 +1,749 @@ +/* + * Copyright (c) 2015 MediaTek Inc. + * Author: Leilk Liu + * + * This program is free software; you can redistribute it and/or modify + * it under the terms of the GNU General Public License version 2 as + * published by the Free Software Foundation. + * + * This program is distributed in the hope that it will be useful, + * but WITHOUT ANY WARRANTY; without even the implied warranty of + * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the + * GNU General Public License for more details. + */ + +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include + +#define SPI_CFG0_REG 0x0000 +#define SPI_CFG1_REG 0x0004 +#define SPI_TX_SRC_REG 0x0008 +#define SPI_RX_DST_REG 0x000c +#define SPI_TX_DATA_REG 0x0010 +#define SPI_RX_DATA_REG 0x0014 +#define SPI_CMD_REG 0x0018 +#define SPI_STATUS0_REG 0x001c +#define SPI_PAD_SEL_REG 0x0024 + +#define SPI_CFG0_SCK_HIGH_OFFSET 0 +#define SPI_CFG0_SCK_LOW_OFFSET 8 +#define SPI_CFG0_CS_HOLD_OFFSET 16 +#define SPI_CFG0_CS_SETUP_OFFSET 24 + +#define SPI_CFG1_CS_IDLE_OFFSET 0 +#define SPI_CFG1_PACKET_LOOP_OFFSET 8 +#define SPI_CFG1_PACKET_LENGTH_OFFSET 16 +#define SPI_CFG1_GET_TICK_DLY_OFFSET 30 + +#define SPI_CFG1_CS_IDLE_MASK 0xff +#define SPI_CFG1_PACKET_LOOP_MASK 0xff00 +#define SPI_CFG1_PACKET_LENGTH_MASK 0x3ff0000 + +#define SPI_CMD_ACT_OFFSET 0 +#define SPI_CMD_RESUME_OFFSET 1 +#define SPI_CMD_CPHA_OFFSET 8 +#define SPI_CMD_CPOL_OFFSET 9 +#define SPI_CMD_TXMSBF_OFFSET 12 +#define SPI_CMD_RXMSBF_OFFSET 13 +#define SPI_CMD_RX_ENDIAN_OFFSET 14 +#define SPI_CMD_TX_ENDIAN_OFFSET 15 + +#define SPI_CMD_RST BIT(2) +#define SPI_CMD_PAUSE_EN BIT(4) +#define SPI_CMD_DEASSERT BIT(5) +#define SPI_CMD_CPHA BIT(8) +#define SPI_CMD_CPOL BIT(9) +#define SPI_CMD_RX_DMA BIT(10) +#define SPI_CMD_TX_DMA BIT(11) +#define SPI_CMD_TXMSBF BIT(12) +#define SPI_CMD_RXMSBF BIT(13) +#define SPI_CMD_RX_ENDIAN BIT(14) +#define SPI_CMD_TX_ENDIAN BIT(15) +#define SPI_CMD_FINISH_IE BIT(16) +#define SPI_CMD_PAUSE_IE BIT(17) + +#define MTK_SPI_QUIRK_PAD_SELECT 1 +/* Must explicitly send dummy Tx bytes to do Rx only transfer */ +#define MTK_SPI_QUIRK_MUST_TX 1 + +#define MT8173_SPI_MAX_PAD_SEL 3 + +#define MTK_SPI_IDLE 0 +#define MTK_SPI_PAUSED 1 + +#define MTK_SPI_MAX_FIFO_SIZE 32 +#define MTK_SPI_PACKET_SIZE 1024 + +struct mtk_spi_compatible { + u32 need_pad_sel; + u32 must_tx; +}; + +struct mtk_spi { + void __iomem *base; + u32 state; + u32 pad_sel; + struct clk *spi_clk, *parent_clk; + struct spi_transfer *cur_transfer; + u32 xfer_len; + struct scatterlist *tx_sgl, *rx_sgl; + u32 tx_sgl_len, rx_sgl_len; + const struct mtk_spi_compatible *dev_comp; +}; + +static const struct mtk_spi_compatible mt6589_compat = { + .need_pad_sel = 0, + .must_tx = 0, +}; + +static const struct mtk_spi_compatible mt8135_compat = { + .need_pad_sel = 0, + .must_tx = 0, +}; + +static const struct mtk_spi_compatible mt8173_compat = { + .need_pad_sel = MTK_SPI_QUIRK_PAD_SELECT, + .must_tx = MTK_SPI_QUIRK_MUST_TX, +}; + +/* + * A piece of default chip info unless the platform + * supplies it. + */ +static const struct mtk_chip_config mtk_default_chip_info = { + .rx_mlsb = 1, + .tx_mlsb = 1, + .tx_endian = 0, + .rx_endian = 0, +}; + +static const struct of_device_id mtk_spi_of_match[] = { + { .compatible = "mediatek,mt6589-spi", .data = (void *)&mt6589_compat }, + { .compatible = "mediatek,mt8135-spi", .data = (void *)&mt8135_compat }, + { .compatible = "mediatek,mt8173-spi", .data = (void *)&mt8173_compat }, + {} +}; +MODULE_DEVICE_TABLE(of, mtk_spi_of_match); + +static void mtk_spi_reset(struct mtk_spi *mdata) +{ + u32 reg_val; + + /* set the software reset bit in SPI_CMD_REG. */ + reg_val = readl(mdata->base + SPI_CMD_REG); + reg_val |= SPI_CMD_RST; + writel(reg_val, mdata->base + SPI_CMD_REG); + + reg_val = readl(mdata->base + SPI_CMD_REG); + reg_val &= ~SPI_CMD_RST; + writel(reg_val, mdata->base + SPI_CMD_REG); +} + +static void mtk_spi_config(struct mtk_spi *mdata, + struct mtk_chip_config *chip_config) +{ + u32 reg_val; + + reg_val = readl(mdata->base + SPI_CMD_REG); + + /* set the mlsbx and mlsbtx */ + reg_val &= ~(SPI_CMD_TXMSBF | SPI_CMD_RXMSBF); + reg_val |= (chip_config->tx_mlsb << SPI_CMD_TXMSBF_OFFSET); + reg_val |= (chip_config->rx_mlsb << SPI_CMD_RXMSBF_OFFSET); + + /* set the tx/rx endian */ + reg_val &= ~(SPI_CMD_TX_ENDIAN | SPI_CMD_RX_ENDIAN); + reg_val |= (chip_config->tx_endian << SPI_CMD_TX_ENDIAN_OFFSET); + reg_val |= (chip_config->rx_endian << SPI_CMD_RX_ENDIAN_OFFSET); + + /* set finish and pause interrupt always enable */ + reg_val |= SPI_CMD_FINISH_IE | SPI_CMD_PAUSE_EN; + + /* disable dma mode */ + reg_val &= ~(SPI_CMD_TX_DMA | SPI_CMD_RX_DMA); + + /* disable deassert mode */ + reg_val &= ~SPI_CMD_DEASSERT; + + writel(reg_val, mdata->base + SPI_CMD_REG); + + /* pad select */ + if (mdata->dev_comp->need_pad_sel) + writel(mdata->pad_sel, mdata->base + SPI_PAD_SEL_REG); +} + +static int mtk_spi_prepare_hardware(struct spi_master *master) +{ + struct spi_transfer *trans; + struct mtk_spi *mdata = spi_master_get_devdata(master); + struct spi_message *msg = master->cur_msg; + int ret; + + ret = clk_prepare_enable(mdata->spi_clk); + if (ret < 0) { + dev_err(&master->dev, "failed to enable clock (%d)\n", ret); + return ret; + } + + trans = list_first_entry(&msg->transfers, struct spi_transfer, + transfer_list); + if (trans->cs_change == 0) { + mdata->state = MTK_SPI_IDLE; + mtk_spi_reset(mdata); + } + + return ret; +} + +static int mtk_spi_unprepare_hardware(struct spi_master *master) +{ + struct mtk_spi *mdata = spi_master_get_devdata(master); + + clk_disable_unprepare(mdata->spi_clk); + + return 0; +} + +static int mtk_spi_prepare_message(struct spi_master *master, + struct spi_message *msg) +{ + u32 reg_val; + u8 cpha, cpol; + struct mtk_chip_config *chip_config; + struct spi_device *spi = msg->spi; + struct mtk_spi *mdata = spi_master_get_devdata(master); + + cpha = spi->mode & SPI_CPHA ? 1 : 0; + cpol = spi->mode & SPI_CPOL ? 1 : 0; + + reg_val = readl(mdata->base + SPI_CMD_REG); + reg_val &= ~(SPI_CMD_CPHA | SPI_CMD_CPOL); + reg_val |= (cpha << SPI_CMD_CPHA_OFFSET); + reg_val |= (cpol << SPI_CMD_CPOL_OFFSET); + writel(reg_val, mdata->base + SPI_CMD_REG); + + chip_config = spi->controller_data; + if (!chip_config) { + chip_config = (void *)&mtk_default_chip_info; + spi->controller_data = chip_config; + } + mtk_spi_config(mdata, chip_config); + + return 0; +} + +static void mtk_spi_set_cs(struct spi_device *spi, bool enable) +{ + u32 reg_val; + struct mtk_spi *mdata = spi_master_get_devdata(spi->master); + + reg_val = readl(mdata->base + SPI_CMD_REG); + if (!enable) + reg_val |= SPI_CMD_PAUSE_EN; + else + reg_val &= ~SPI_CMD_PAUSE_EN; + writel(reg_val, mdata->base + SPI_CMD_REG); +} + +static void mtk_spi_prepare_transfer(struct spi_master *master, + struct spi_transfer *xfer) +{ + u32 spi_clk_hz, div, high_time, low_time, holdtime, + setuptime, cs_idletime, reg_val = 0; + struct mtk_spi *mdata = spi_master_get_devdata(master); + + spi_clk_hz = clk_get_rate(mdata->spi_clk); + if (xfer->speed_hz < spi_clk_hz / 2) + div = DIV_ROUND_UP(spi_clk_hz, xfer->speed_hz); + else + div = 1; + + high_time = (div + 1) / 2; + low_time = (div + 1) / 2; + holdtime = (div + 1) / 2 * 2; + setuptime = (div + 1) / 2 * 2; + cs_idletime = (div + 1) / 2 * 2; + + reg_val |= (((high_time - 1) & 0xff) << SPI_CFG0_SCK_HIGH_OFFSET); + reg_val |= (((low_time - 1) & 0xff) << SPI_CFG0_SCK_LOW_OFFSET); + reg_val |= (((holdtime - 1) & 0xff) << SPI_CFG0_CS_HOLD_OFFSET); + reg_val |= (((setuptime - 1) & 0xff) << SPI_CFG0_CS_SETUP_OFFSET); + writel(reg_val, mdata->base + SPI_CFG0_REG); + + reg_val = readl(mdata->base + SPI_CFG1_REG); + reg_val &= ~SPI_CFG1_CS_IDLE_MASK; + reg_val |= (((cs_idletime - 1) & 0xff) << SPI_CFG1_CS_IDLE_OFFSET); + writel(reg_val, mdata->base + SPI_CFG1_REG); +} + +static void mtk_spi_setup_packet(struct spi_master *master) +{ + u32 packet_size, packet_loop, reg_val; + struct mtk_spi *mdata = spi_master_get_devdata(master); + + packet_size = min_t(unsigned, mdata->xfer_len, MTK_SPI_PACKET_SIZE); + packet_loop = mdata->xfer_len / packet_size; + + reg_val = readl(mdata->base + SPI_CFG1_REG); + reg_val &= ~(SPI_CFG1_PACKET_LENGTH_MASK + SPI_CFG1_PACKET_LOOP_MASK); + reg_val |= (packet_size - 1) << SPI_CFG1_PACKET_LENGTH_OFFSET; + reg_val |= (packet_loop - 1) << SPI_CFG1_PACKET_LOOP_OFFSET; + writel(reg_val, mdata->base + SPI_CFG1_REG); +} + +static void mtk_spi_enable_transfer(struct spi_master *master) +{ + int cmd; + struct mtk_spi *mdata = spi_master_get_devdata(master); + + cmd = readl(mdata->base + SPI_CMD_REG); + if (mdata->state == MTK_SPI_IDLE) + cmd |= 1 << SPI_CMD_ACT_OFFSET; + else + cmd |= 1 << SPI_CMD_RESUME_OFFSET; + writel(cmd, mdata->base + SPI_CMD_REG); +} + +static int mtk_spi_get_mult_delta(int xfer_len) +{ + int mult_delta; + + if (xfer_len > MTK_SPI_PACKET_SIZE) + mult_delta = xfer_len % MTK_SPI_PACKET_SIZE; + else + mult_delta = 0; + + return mult_delta; +} + +static void mtk_spi_update_mdata_len(struct spi_master *master) +{ + int mult_delta; + struct mtk_spi *mdata = spi_master_get_devdata(master); + + if (mdata->tx_sgl_len && mdata->rx_sgl_len) { + if (mdata->tx_sgl_len > mdata->rx_sgl_len) { + mult_delta = mtk_spi_get_mult_delta(mdata->rx_sgl_len); + mdata->xfer_len = mdata->rx_sgl_len - mult_delta; + mdata->rx_sgl_len = mult_delta; + mdata->tx_sgl_len -= mdata->xfer_len; + } else { + mult_delta = mtk_spi_get_mult_delta(mdata->tx_sgl_len); + mdata->xfer_len = mdata->tx_sgl_len - mult_delta; + mdata->tx_sgl_len = mult_delta; + mdata->rx_sgl_len -= mdata->xfer_len; + } + } else if (mdata->tx_sgl_len) { + mult_delta = mtk_spi_get_mult_delta(mdata->tx_sgl_len); + mdata->xfer_len = mdata->tx_sgl_len - mult_delta; + mdata->tx_sgl_len = mult_delta; + } else if (mdata->rx_sgl_len) { + mult_delta = mtk_spi_get_mult_delta(mdata->rx_sgl_len); + mdata->xfer_len = mdata->rx_sgl_len - mult_delta; + mdata->rx_sgl_len = mult_delta; + } +} + +static void mtk_spi_setup_dma_addr(struct spi_master *master, + struct spi_transfer *xfer) +{ + struct mtk_spi *mdata = spi_master_get_devdata(master); + + if (mdata->tx_sgl) + writel(cpu_to_le32(xfer->tx_dma), mdata->base + SPI_TX_SRC_REG); + if (mdata->rx_sgl) + writel(cpu_to_le32(xfer->rx_dma), mdata->base + SPI_RX_DST_REG); +} + +static int mtk_spi_fifo_transfer(struct spi_master *master, + struct spi_device *spi, + struct spi_transfer *xfer) +{ + int cnt, i; + struct mtk_spi *mdata = spi_master_get_devdata(master); + + mdata->cur_transfer = xfer; + mdata->xfer_len = xfer->len; + mtk_spi_prepare_transfer(master, xfer); + mtk_spi_setup_packet(master); + + if (xfer->len % 4) + cnt = xfer->len / 4 + 1; + else + cnt = xfer->len / 4; + + for (i = 0; i < cnt; i++) + writel(*((u32 *)xfer->tx_buf + i), + mdata->base + SPI_TX_DATA_REG); + + mtk_spi_enable_transfer(master); + + return 1; +} + +static int mtk_spi_dma_transfer(struct spi_master *master, + struct spi_device *spi, + struct spi_transfer *xfer) +{ + int cmd; + struct mtk_spi *mdata = spi_master_get_devdata(master); + + mdata->tx_sgl = NULL; + mdata->rx_sgl = NULL; + mdata->tx_sgl_len = 0; + mdata->rx_sgl_len = 0; + mdata->cur_transfer = xfer; + + mtk_spi_prepare_transfer(master, xfer); + + cmd = readl(mdata->base + SPI_CMD_REG); + if (xfer->tx_buf) + cmd |= SPI_CMD_TX_DMA; + if (xfer->rx_buf) + cmd |= SPI_CMD_RX_DMA; + writel(cmd, mdata->base + SPI_CMD_REG); + + if (xfer->tx_buf) + mdata->tx_sgl = xfer->tx_sg.sgl; + if (xfer->rx_buf) + mdata->rx_sgl = xfer->rx_sg.sgl; + + if (mdata->tx_sgl) { + xfer->tx_dma = sg_dma_address(mdata->tx_sgl); + mdata->tx_sgl_len = sg_dma_len(mdata->tx_sgl); + } + if (mdata->rx_sgl) { + xfer->rx_dma = sg_dma_address(mdata->rx_sgl); + mdata->rx_sgl_len = sg_dma_len(mdata->rx_sgl); + } + + mtk_spi_update_mdata_len(master); + mtk_spi_setup_packet(master); + mtk_spi_setup_dma_addr(master, xfer); + mtk_spi_enable_transfer(master); + + return 1; +} + +static int mtk_spi_transfer_one(struct spi_master *master, + struct spi_device *spi, + struct spi_transfer *xfer) +{ + if (master->can_dma(master, spi, xfer)) + return mtk_spi_dma_transfer(master, spi, xfer); + else + return mtk_spi_fifo_transfer(master, spi, xfer); +} + +static bool mtk_spi_can_dma(struct spi_master *master, + struct spi_device *spi, + struct spi_transfer *xfer) +{ + return xfer->len > MTK_SPI_MAX_FIFO_SIZE; +} + +static irqreturn_t mtk_spi_interrupt(int irq, void *dev_id) +{ + u32 cmd, reg_val, i; + struct spi_master *master = dev_id; + struct mtk_spi *mdata = spi_master_get_devdata(master); + struct spi_transfer *trans = mdata->cur_transfer; + + reg_val = readl(mdata->base + SPI_STATUS0_REG); + if (reg_val & 0x2) + mdata->state = MTK_SPI_PAUSED; + else + mdata->state = MTK_SPI_IDLE; + + if (!master->can_dma(master, master->cur_msg->spi, trans)) { + /* xfer len is not N*4 bytes every time in a transfer, + * but SPI_RX_DATA_REG must reads 4 bytes once, + * so rx buffer byte by byte. + */ + if (trans->rx_buf) { + for (i = 0; i < mdata->xfer_len; i++) { + if (i % 4 == 0) + reg_val = + readl(mdata->base + SPI_RX_DATA_REG); + *((u8 *)(trans->rx_buf + i)) = + (reg_val >> ((i % 4) * 8)) & 0xff; + } + } + spi_finalize_current_transfer(master); + return IRQ_HANDLED; + } + + if (mdata->tx_sgl) + trans->tx_dma += mdata->xfer_len; + if (mdata->rx_sgl) + trans->rx_dma += mdata->xfer_len; + + if (mdata->tx_sgl && (mdata->tx_sgl_len == 0)) { + mdata->tx_sgl = sg_next(mdata->tx_sgl); + if (mdata->tx_sgl) { + trans->tx_dma = sg_dma_address(mdata->tx_sgl); + mdata->tx_sgl_len = sg_dma_len(mdata->tx_sgl); + } + } + if (mdata->rx_sgl && (mdata->rx_sgl_len == 0)) { + mdata->rx_sgl = sg_next(mdata->rx_sgl); + if (mdata->rx_sgl) { + trans->rx_dma = sg_dma_address(mdata->rx_sgl); + mdata->rx_sgl_len = sg_dma_len(mdata->rx_sgl); + } + } + + if (!mdata->tx_sgl && !mdata->rx_sgl) { + /* spi disable dma */ + cmd = readl(mdata->base + SPI_CMD_REG); + cmd &= ~SPI_CMD_TX_DMA; + cmd &= ~SPI_CMD_RX_DMA; + writel(cmd, mdata->base + SPI_CMD_REG); + + spi_finalize_current_transfer(master); + return IRQ_HANDLED; + } + + mtk_spi_update_mdata_len(master); + mtk_spi_setup_packet(master); + mtk_spi_setup_dma_addr(master, trans); + mtk_spi_enable_transfer(master); + + return IRQ_HANDLED; +} + +static int mtk_spi_probe(struct platform_device *pdev) +{ + struct spi_master *master; + struct mtk_spi *mdata; + const struct of_device_id *of_id; + struct resource *res; + int irq, ret; + + master = spi_alloc_master(&pdev->dev, sizeof(*mdata)); + if (!master) { + dev_err(&pdev->dev, "failed to alloc spi master\n"); + return -ENOMEM; + } + + master->auto_runtime_pm = true; + master->dev.of_node = pdev->dev.of_node; + master->mode_bits = SPI_CPOL | SPI_CPHA; + + master->set_cs = mtk_spi_set_cs; + master->prepare_transfer_hardware = mtk_spi_prepare_hardware; + master->unprepare_transfer_hardware = mtk_spi_unprepare_hardware; + master->prepare_message = mtk_spi_prepare_message; + master->transfer_one = mtk_spi_transfer_one; + master->can_dma = mtk_spi_can_dma; + + of_id = of_match_node(mtk_spi_of_match, pdev->dev.of_node); + if (!of_id) { + dev_err(&pdev->dev, "failed to probe of_node\n"); + ret = -EINVAL; + goto err_put_master; + } + + mdata = spi_master_get_devdata(master); + mdata->dev_comp = of_id->data; + if (mdata->dev_comp->must_tx) + master->flags = SPI_MASTER_MUST_TX; + + if (mdata->dev_comp->need_pad_sel) { + ret = of_property_read_u32(pdev->dev.of_node, + "mediatek,pad-select", + &mdata->pad_sel); + if (ret) { + dev_err(&pdev->dev, "failed to read pad select: %d\n", + ret); + goto err_put_master; + } + + if (mdata->pad_sel > MT8173_SPI_MAX_PAD_SEL) { + dev_err(&pdev->dev, "wrong pad-select: %u\n", + mdata->pad_sel); + ret = -EINVAL; + goto err_put_master; + } + } + + platform_set_drvdata(pdev, master); + + res = platform_get_resource(pdev, IORESOURCE_MEM, 0); + if (!res) { + ret = -ENODEV; + dev_err(&pdev->dev, "failed to determine base address\n"); + goto err_put_master; + } + + mdata->base = devm_ioremap_resource(&pdev->dev, res); + if (IS_ERR(mdata->base)) { + ret = PTR_ERR(mdata->base); + goto err_put_master; + } + + irq = platform_get_irq(pdev, 0); + if (irq < 0) { + dev_err(&pdev->dev, "failed to get irq (%d)\n", irq); + ret = irq; + goto err_put_master; + } + + if (!pdev->dev.dma_mask) + pdev->dev.dma_mask = &pdev->dev.coherent_dma_mask; + + ret = devm_request_irq(&pdev->dev, irq, mtk_spi_interrupt, + IRQF_TRIGGER_NONE, dev_name(&pdev->dev), master); + if (ret) { + dev_err(&pdev->dev, "failed to register irq (%d)\n", ret); + goto err_put_master; + } + + mdata->spi_clk = devm_clk_get(&pdev->dev, "spi-clk"); + if (IS_ERR(mdata->spi_clk)) { + ret = PTR_ERR(mdata->spi_clk); + dev_err(&pdev->dev, "failed to get spi-clk: %d\n", ret); + goto err_put_master; + } + + mdata->parent_clk = devm_clk_get(&pdev->dev, "parent-clk"); + if (IS_ERR(mdata->parent_clk)) { + ret = PTR_ERR(mdata->parent_clk); + dev_err(&pdev->dev, "failed to get parent-clk: %d\n", ret); + goto err_put_master; + } + + ret = clk_prepare_enable(mdata->spi_clk); + if (ret < 0) { + dev_err(&pdev->dev, "failed to enable spi_clk (%d)\n", ret); + goto err_put_master; + } + + ret = clk_set_parent(mdata->spi_clk, mdata->parent_clk); + if (ret < 0) { + dev_err(&pdev->dev, "failed to clk_set_parent (%d)\n", ret); + goto err_disable_clk; + } + + clk_disable_unprepare(mdata->spi_clk); + + pm_runtime_enable(&pdev->dev); + + ret = devm_spi_register_master(&pdev->dev, master); + if (ret) { + dev_err(&pdev->dev, "failed to register master (%d)\n", ret); + goto err_put_master; + } + + return 0; + +err_disable_clk: + clk_disable_unprepare(mdata->spi_clk); +err_put_master: + spi_master_put(master); + + return ret; +} + +static int mtk_spi_remove(struct platform_device *pdev) +{ + struct spi_master *master = platform_get_drvdata(pdev); + struct mtk_spi *mdata = spi_master_get_devdata(master); + + pm_runtime_disable(&pdev->dev); + + mtk_spi_reset(mdata); + clk_disable_unprepare(mdata->spi_clk); + spi_master_put(master); + + return 0; +} + +#ifdef CONFIG_PM_SLEEP +static int mtk_spi_suspend(struct device *dev) +{ + int ret; + struct spi_master *master = dev_get_drvdata(dev); + struct mtk_spi *mdata = spi_master_get_devdata(master); + + ret = spi_master_suspend(master); + if (ret) + return ret; + + if (!pm_runtime_suspended(dev)) + clk_disable_unprepare(mdata->spi_clk); + + return ret; +} + +static int mtk_spi_resume(struct device *dev) +{ + int ret; + struct spi_master *master = dev_get_drvdata(dev); + struct mtk_spi *mdata = spi_master_get_devdata(master); + + if (!pm_runtime_suspended(dev)) { + ret = clk_prepare_enable(mdata->spi_clk); + if (ret < 0) + return ret; + } + + ret = spi_master_resume(master); + if (ret < 0) + clk_disable_unprepare(mdata->spi_clk); + + return ret; +} +#endif /* CONFIG_PM_SLEEP */ + +#ifdef CONFIG_PM +static int mtk_spi_runtime_suspend(struct device *dev) +{ + struct spi_master *master = dev_get_drvdata(dev); + struct mtk_spi *mdata = spi_master_get_devdata(master); + + clk_disable_unprepare(mdata->spi_clk); + + return 0; +} + +static int mtk_spi_runtime_resume(struct device *dev) +{ + struct spi_master *master = dev_get_drvdata(dev); + struct mtk_spi *mdata = spi_master_get_devdata(master); + + return clk_prepare_enable(mdata->spi_clk); +} +#endif /* CONFIG_PM */ + +static const struct dev_pm_ops mtk_spi_pm = { + SET_SYSTEM_SLEEP_PM_OPS(mtk_spi_suspend, mtk_spi_resume) + SET_RUNTIME_PM_OPS(mtk_spi_runtime_suspend, + mtk_spi_runtime_resume, NULL) +}; + +struct platform_driver mtk_spi_driver = { + .driver = { + .name = "mtk-spi", + .pm = &mtk_spi_pm, + .of_match_table = mtk_spi_of_match, + }, + .probe = mtk_spi_probe, + .remove = mtk_spi_remove, +}; + +module_platform_driver(mtk_spi_driver); + +MODULE_DESCRIPTION("MTK SPI Controller driver"); +MODULE_AUTHOR("Leilk Liu "); +MODULE_LICENSE("GPL v2"); +MODULE_ALIAS("platform: mtk_spi"); diff --git a/include/linux/platform_data/spi-mt65xx.h b/include/linux/platform_data/spi-mt65xx.h new file mode 100644 index 000000000000..751225569d27 --- /dev/null +++ b/include/linux/platform_data/spi-mt65xx.h @@ -0,0 +1,22 @@ +/* + * MTK SPI bus driver definitions + * + * Copyright (c) 2015 MediaTek Inc. + * Author: Leilk Liu + * + * This program is free software; you can redistribute it and/or modify + * it under the terms of the GNU General Public License version 2 as + * published by the Free Software Foundation. + */ + +#ifndef ____LINUX_PLATFORM_DATA_SPI_MTK_H +#define ____LINUX_PLATFORM_DATA_SPI_MTK_H + +/* Board specific platform_data */ +struct mtk_chip_config { + u32 tx_mlsb; + u32 rx_mlsb; + u32 tx_endian; + u32 rx_endian; +}; +#endif -- cgit v1.2.3-70-g09d2 From 3cfe7a74d42b7e3644f8b2b26aa20146d4f90f0f Mon Sep 17 00:00:00 2001 From: Nicolas Boichat Date: Wed, 8 Jul 2015 14:30:18 +0800 Subject: regmap: Use different lockdep class for each regmap init call Lockdep validator complains about recursive locking and deadlock when two different regmap instances are called in a nested order. That happens anytime a regmap read/write call needs to access another regmap. This is because, for performance reason, lockdep groups all locks initialized by the same mutex_init() in the same lock class. Therefore all regmap mutexes are in the same lock class, leading to lockdep "nested locking" warnings if a regmap accesses another regmap. In general, it is impossible to establish in advance the hierarchy of regmaps, so we make sure that each regmap init call initializes its own static lock_class_key. This is done by wrapping all regmap_init calls into macros. This also allows us to give meaningful names to the lock_class_key. For example, in rt5677 case, we have in /proc/lockdep_chains: irq_context: 0 [ffffffc0018d2198] &dev->mutex [ffffffc0018d2198] &dev->mutex [ffffffc001bd7f60] rt5677:5104:(&rt5677_regmap)->_lock [ffffffc001bd7f58] rt5677:5096:(&rt5677_regmap_physical)->_lock [ffffffc001b95448] &(&base->lock)->rlock The above would have resulted in a lockdep recursive warning previously. This is not the case anymore as the lockdep validator now clearly identifies the 2 regmaps as separate. Signed-off-by: Nicolas Boichat Signed-off-by: Mark Brown --- drivers/base/regmap/regmap-ac97.c | 22 +++-- drivers/base/regmap/regmap-i2c.c | 22 +++-- drivers/base/regmap/regmap-mmio.c | 27 ++++-- drivers/base/regmap/regmap-spi.c | 22 +++-- drivers/base/regmap/regmap-spmi.c | 44 +++++---- drivers/base/regmap/regmap.c | 31 +++--- include/linux/regmap.h | 192 ++++++++++++++++++++++++++++---------- 7 files changed, 250 insertions(+), 110 deletions(-) (limited to 'include/linux') diff --git a/drivers/base/regmap/regmap-ac97.c b/drivers/base/regmap/regmap-ac97.c index 8d304e2a943d..aa631be8b821 100644 --- a/drivers/base/regmap/regmap-ac97.c +++ b/drivers/base/regmap/regmap-ac97.c @@ -87,12 +87,15 @@ static const struct regmap_bus ac97_regmap_bus = { * The return value will be an ERR_PTR() on error or a valid pointer to * a struct regmap. */ -struct regmap *regmap_init_ac97(struct snd_ac97 *ac97, - const struct regmap_config *config) +struct regmap *__regmap_init_ac97(struct snd_ac97 *ac97, + const struct regmap_config *config, + struct lock_class_key *lock_key, + const char *lock_name) { - return regmap_init(&ac97->dev, &ac97_regmap_bus, ac97, config); + return __regmap_init(&ac97->dev, &ac97_regmap_bus, ac97, config, + lock_key, lock_name); } -EXPORT_SYMBOL_GPL(regmap_init_ac97); +EXPORT_SYMBOL_GPL(__regmap_init_ac97); /** * devm_regmap_init_ac97(): Initialise AC'97 register map @@ -104,11 +107,14 @@ EXPORT_SYMBOL_GPL(regmap_init_ac97); * to a struct regmap. The regmap will be automatically freed by the * device management code. */ -struct regmap *devm_regmap_init_ac97(struct snd_ac97 *ac97, - const struct regmap_config *config) +struct regmap *__devm_regmap_init_ac97(struct snd_ac97 *ac97, + const struct regmap_config *config, + struct lock_class_key *lock_key, + const char *lock_name) { - return devm_regmap_init(&ac97->dev, &ac97_regmap_bus, ac97, config); + return __devm_regmap_init(&ac97->dev, &ac97_regmap_bus, ac97, config, + lock_key, lock_name); } -EXPORT_SYMBOL_GPL(devm_regmap_init_ac97); +EXPORT_SYMBOL_GPL(__devm_regmap_init_ac97); MODULE_LICENSE("GPL v2"); diff --git a/drivers/base/regmap/regmap-i2c.c b/drivers/base/regmap/regmap-i2c.c index 4b76e33110a2..3163b22e2baf 100644 --- a/drivers/base/regmap/regmap-i2c.c +++ b/drivers/base/regmap/regmap-i2c.c @@ -242,17 +242,20 @@ static const struct regmap_bus *regmap_get_i2c_bus(struct i2c_client *i2c, * The return value will be an ERR_PTR() on error or a valid pointer to * a struct regmap. */ -struct regmap *regmap_init_i2c(struct i2c_client *i2c, - const struct regmap_config *config) +struct regmap *__regmap_init_i2c(struct i2c_client *i2c, + const struct regmap_config *config, + struct lock_class_key *lock_key, + const char *lock_name) { const struct regmap_bus *bus = regmap_get_i2c_bus(i2c, config); if (IS_ERR(bus)) return ERR_CAST(bus); - return regmap_init(&i2c->dev, bus, &i2c->dev, config); + return __regmap_init(&i2c->dev, bus, &i2c->dev, config, + lock_key, lock_name); } -EXPORT_SYMBOL_GPL(regmap_init_i2c); +EXPORT_SYMBOL_GPL(__regmap_init_i2c); /** * devm_regmap_init_i2c(): Initialise managed register map @@ -264,16 +267,19 @@ EXPORT_SYMBOL_GPL(regmap_init_i2c); * to a struct regmap. The regmap will be automatically freed by the * device management code. */ -struct regmap *devm_regmap_init_i2c(struct i2c_client *i2c, - const struct regmap_config *config) +struct regmap *__devm_regmap_init_i2c(struct i2c_client *i2c, + const struct regmap_config *config, + struct lock_class_key *lock_key, + const char *lock_name) { const struct regmap_bus *bus = regmap_get_i2c_bus(i2c, config); if (IS_ERR(bus)) return ERR_CAST(bus); - return devm_regmap_init(&i2c->dev, bus, &i2c->dev, config); + return __devm_regmap_init(&i2c->dev, bus, &i2c->dev, config, + lock_key, lock_name); } -EXPORT_SYMBOL_GPL(devm_regmap_init_i2c); +EXPORT_SYMBOL_GPL(__devm_regmap_init_i2c); MODULE_LICENSE("GPL"); diff --git a/drivers/base/regmap/regmap-mmio.c b/drivers/base/regmap/regmap-mmio.c index 04a329a377e9..a1b2b270e4bc 100644 --- a/drivers/base/regmap/regmap-mmio.c +++ b/drivers/base/regmap/regmap-mmio.c @@ -307,9 +307,11 @@ err_free: * The return value will be an ERR_PTR() on error or a valid pointer to * a struct regmap. */ -struct regmap *regmap_init_mmio_clk(struct device *dev, const char *clk_id, - void __iomem *regs, - const struct regmap_config *config) +struct regmap *__regmap_init_mmio_clk(struct device *dev, const char *clk_id, + void __iomem *regs, + const struct regmap_config *config, + struct lock_class_key *lock_key, + const char *lock_name) { struct regmap_mmio_context *ctx; @@ -317,9 +319,10 @@ struct regmap *regmap_init_mmio_clk(struct device *dev, const char *clk_id, if (IS_ERR(ctx)) return ERR_CAST(ctx); - return regmap_init(dev, ®map_mmio, ctx, config); + return __regmap_init(dev, ®map_mmio, ctx, config, + lock_key, lock_name); } -EXPORT_SYMBOL_GPL(regmap_init_mmio_clk); +EXPORT_SYMBOL_GPL(__regmap_init_mmio_clk); /** * devm_regmap_init_mmio_clk(): Initialise managed register map with clock @@ -333,9 +336,12 @@ EXPORT_SYMBOL_GPL(regmap_init_mmio_clk); * to a struct regmap. The regmap will be automatically freed by the * device management code. */ -struct regmap *devm_regmap_init_mmio_clk(struct device *dev, const char *clk_id, - void __iomem *regs, - const struct regmap_config *config) +struct regmap *__devm_regmap_init_mmio_clk(struct device *dev, + const char *clk_id, + void __iomem *regs, + const struct regmap_config *config, + struct lock_class_key *lock_key, + const char *lock_name) { struct regmap_mmio_context *ctx; @@ -343,8 +349,9 @@ struct regmap *devm_regmap_init_mmio_clk(struct device *dev, const char *clk_id, if (IS_ERR(ctx)) return ERR_CAST(ctx); - return devm_regmap_init(dev, ®map_mmio, ctx, config); + return __devm_regmap_init(dev, ®map_mmio, ctx, config, + lock_key, lock_name); } -EXPORT_SYMBOL_GPL(devm_regmap_init_mmio_clk); +EXPORT_SYMBOL_GPL(__devm_regmap_init_mmio_clk); MODULE_LICENSE("GPL v2"); diff --git a/drivers/base/regmap/regmap-spi.c b/drivers/base/regmap/regmap-spi.c index 53d1148e80a0..4c7850d660d1 100644 --- a/drivers/base/regmap/regmap-spi.c +++ b/drivers/base/regmap/regmap-spi.c @@ -122,12 +122,15 @@ static struct regmap_bus regmap_spi = { * The return value will be an ERR_PTR() on error or a valid pointer to * a struct regmap. */ -struct regmap *regmap_init_spi(struct spi_device *spi, - const struct regmap_config *config) +struct regmap *__regmap_init_spi(struct spi_device *spi, + const struct regmap_config *config, + struct lock_class_key *lock_key, + const char *lock_name) { - return regmap_init(&spi->dev, ®map_spi, &spi->dev, config); + return __regmap_init(&spi->dev, ®map_spi, &spi->dev, config, + lock_key, lock_name); } -EXPORT_SYMBOL_GPL(regmap_init_spi); +EXPORT_SYMBOL_GPL(__regmap_init_spi); /** * devm_regmap_init_spi(): Initialise register map @@ -139,11 +142,14 @@ EXPORT_SYMBOL_GPL(regmap_init_spi); * to a struct regmap. The map will be automatically freed by the * device management code. */ -struct regmap *devm_regmap_init_spi(struct spi_device *spi, - const struct regmap_config *config) +struct regmap *__devm_regmap_init_spi(struct spi_device *spi, + const struct regmap_config *config, + struct lock_class_key *lock_key, + const char *lock_name) { - return devm_regmap_init(&spi->dev, ®map_spi, &spi->dev, config); + return __devm_regmap_init(&spi->dev, ®map_spi, &spi->dev, config, + lock_key, lock_name); } -EXPORT_SYMBOL_GPL(devm_regmap_init_spi); +EXPORT_SYMBOL_GPL(__devm_regmap_init_spi); MODULE_LICENSE("GPL"); diff --git a/drivers/base/regmap/regmap-spmi.c b/drivers/base/regmap/regmap-spmi.c index d7026dc33388..7f50f5862d39 100644 --- a/drivers/base/regmap/regmap-spmi.c +++ b/drivers/base/regmap/regmap-spmi.c @@ -99,12 +99,15 @@ static struct regmap_bus regmap_spmi_base = { * The return value will be an ERR_PTR() on error or a valid pointer to * a struct regmap. */ -struct regmap *regmap_init_spmi_base(struct spmi_device *sdev, - const struct regmap_config *config) +struct regmap *__regmap_init_spmi_base(struct spmi_device *sdev, + const struct regmap_config *config, + struct lock_class_key *lock_key, + const char *lock_name) { - return regmap_init(&sdev->dev, ®map_spmi_base, sdev, config); + return __regmap_init(&sdev->dev, ®map_spmi_base, sdev, config, + lock_key, lock_name); } -EXPORT_SYMBOL_GPL(regmap_init_spmi_base); +EXPORT_SYMBOL_GPL(__regmap_init_spmi_base); /** * devm_regmap_init_spmi_base(): Create managed regmap for Base register space @@ -115,12 +118,15 @@ EXPORT_SYMBOL_GPL(regmap_init_spmi_base); * to a struct regmap. The regmap will be automatically freed by the * device management code. */ -struct regmap *devm_regmap_init_spmi_base(struct spmi_device *sdev, - const struct regmap_config *config) +struct regmap *__devm_regmap_init_spmi_base(struct spmi_device *sdev, + const struct regmap_config *config, + struct lock_class_key *lock_key, + const char *lock_name) { - return devm_regmap_init(&sdev->dev, ®map_spmi_base, sdev, config); + return __devm_regmap_init(&sdev->dev, ®map_spmi_base, sdev, config, + lock_key, lock_name); } -EXPORT_SYMBOL_GPL(devm_regmap_init_spmi_base); +EXPORT_SYMBOL_GPL(__devm_regmap_init_spmi_base); static int regmap_spmi_ext_read(void *context, const void *reg, size_t reg_size, @@ -230,12 +236,15 @@ static struct regmap_bus regmap_spmi_ext = { * The return value will be an ERR_PTR() on error or a valid pointer to * a struct regmap. */ -struct regmap *regmap_init_spmi_ext(struct spmi_device *sdev, - const struct regmap_config *config) +struct regmap *__regmap_init_spmi_ext(struct spmi_device *sdev, + const struct regmap_config *config, + struct lock_class_key *lock_key, + const char *lock_name) { - return regmap_init(&sdev->dev, ®map_spmi_ext, sdev, config); + return __regmap_init(&sdev->dev, ®map_spmi_ext, sdev, config, + lock_key, lock_name); } -EXPORT_SYMBOL_GPL(regmap_init_spmi_ext); +EXPORT_SYMBOL_GPL(__regmap_init_spmi_ext); /** * devm_regmap_init_spmi_ext(): Create managed regmap for Ext register space @@ -246,11 +255,14 @@ EXPORT_SYMBOL_GPL(regmap_init_spmi_ext); * to a struct regmap. The regmap will be automatically freed by the * device management code. */ -struct regmap *devm_regmap_init_spmi_ext(struct spmi_device *sdev, - const struct regmap_config *config) +struct regmap *__devm_regmap_init_spmi_ext(struct spmi_device *sdev, + const struct regmap_config *config, + struct lock_class_key *lock_key, + const char *lock_name) { - return devm_regmap_init(&sdev->dev, ®map_spmi_ext, sdev, config); + return __devm_regmap_init(&sdev->dev, ®map_spmi_ext, sdev, config, + lock_key, lock_name); } -EXPORT_SYMBOL_GPL(devm_regmap_init_spmi_ext); +EXPORT_SYMBOL_GPL(__devm_regmap_init_spmi_ext); MODULE_LICENSE("GPL"); diff --git a/drivers/base/regmap/regmap.c b/drivers/base/regmap/regmap.c index 7111d04f2621..b9fddccd6e06 100644 --- a/drivers/base/regmap/regmap.c +++ b/drivers/base/regmap/regmap.c @@ -527,10 +527,12 @@ EXPORT_SYMBOL_GPL(regmap_get_val_endian); * a struct regmap. This function should generally not be called * directly, it should be called by bus-specific init functions. */ -struct regmap *regmap_init(struct device *dev, - const struct regmap_bus *bus, - void *bus_context, - const struct regmap_config *config) +struct regmap *__regmap_init(struct device *dev, + const struct regmap_bus *bus, + void *bus_context, + const struct regmap_config *config, + struct lock_class_key *lock_key, + const char *lock_name) { struct regmap *map; int ret = -EINVAL; @@ -556,10 +558,14 @@ struct regmap *regmap_init(struct device *dev, spin_lock_init(&map->spinlock); map->lock = regmap_lock_spinlock; map->unlock = regmap_unlock_spinlock; + lockdep_set_class_and_name(&map->spinlock, + lock_key, lock_name); } else { mutex_init(&map->mutex); map->lock = regmap_lock_mutex; map->unlock = regmap_unlock_mutex; + lockdep_set_class_and_name(&map->mutex, + lock_key, lock_name); } map->lock_arg = map; } @@ -899,7 +905,7 @@ err_map: err: return ERR_PTR(ret); } -EXPORT_SYMBOL_GPL(regmap_init); +EXPORT_SYMBOL_GPL(__regmap_init); static void devm_regmap_release(struct device *dev, void *res) { @@ -919,10 +925,12 @@ static void devm_regmap_release(struct device *dev, void *res) * directly, it should be called by bus-specific init functions. The * map will be automatically freed by the device management code. */ -struct regmap *devm_regmap_init(struct device *dev, - const struct regmap_bus *bus, - void *bus_context, - const struct regmap_config *config) +struct regmap *__devm_regmap_init(struct device *dev, + const struct regmap_bus *bus, + void *bus_context, + const struct regmap_config *config, + struct lock_class_key *lock_key, + const char *lock_name) { struct regmap **ptr, *regmap; @@ -930,7 +938,8 @@ struct regmap *devm_regmap_init(struct device *dev, if (!ptr) return ERR_PTR(-ENOMEM); - regmap = regmap_init(dev, bus, bus_context, config); + regmap = __regmap_init(dev, bus, bus_context, config, + lock_key, lock_name); if (!IS_ERR(regmap)) { *ptr = regmap; devres_add(dev, ptr); @@ -940,7 +949,7 @@ struct regmap *devm_regmap_init(struct device *dev, return regmap; } -EXPORT_SYMBOL_GPL(devm_regmap_init); +EXPORT_SYMBOL_GPL(__devm_regmap_init); static void regmap_field_init(struct regmap_field *rm_field, struct regmap *regmap, struct reg_field reg_field) diff --git a/include/linux/regmap.h b/include/linux/regmap.h index 59c55ea0f0b5..5d7027286032 100644 --- a/include/linux/regmap.h +++ b/include/linux/regmap.h @@ -17,6 +17,7 @@ #include #include #include +#include struct module; struct device; @@ -324,46 +325,147 @@ struct regmap_bus { enum regmap_endian val_format_endian_default; }; -struct regmap *regmap_init(struct device *dev, - const struct regmap_bus *bus, - void *bus_context, - const struct regmap_config *config); -int regmap_attach_dev(struct device *dev, struct regmap *map, - const struct regmap_config *config); -struct regmap *regmap_init_i2c(struct i2c_client *i2c, - const struct regmap_config *config); -struct regmap *regmap_init_spi(struct spi_device *dev, - const struct regmap_config *config); -struct regmap *regmap_init_spmi_base(struct spmi_device *dev, - const struct regmap_config *config); -struct regmap *regmap_init_spmi_ext(struct spmi_device *dev, - const struct regmap_config *config); -struct regmap *regmap_init_mmio_clk(struct device *dev, const char *clk_id, - void __iomem *regs, - const struct regmap_config *config); -struct regmap *regmap_init_ac97(struct snd_ac97 *ac97, - const struct regmap_config *config); - -struct regmap *devm_regmap_init(struct device *dev, - const struct regmap_bus *bus, - void *bus_context, - const struct regmap_config *config); -struct regmap *devm_regmap_init_i2c(struct i2c_client *i2c, - const struct regmap_config *config); -struct regmap *devm_regmap_init_spi(struct spi_device *dev, - const struct regmap_config *config); -struct regmap *devm_regmap_init_spmi_base(struct spmi_device *dev, - const struct regmap_config *config); -struct regmap *devm_regmap_init_spmi_ext(struct spmi_device *dev, - const struct regmap_config *config); -struct regmap *devm_regmap_init_mmio_clk(struct device *dev, const char *clk_id, - void __iomem *regs, - const struct regmap_config *config); -struct regmap *devm_regmap_init_ac97(struct snd_ac97 *ac97, - const struct regmap_config *config); +/* + * __regmap_init functions. + * + * These functions take a lock key and name parameter, and should not be called + * directly. Instead, use the regmap_init macros that generate a key and name + * for each call. + */ +struct regmap *__regmap_init(struct device *dev, + const struct regmap_bus *bus, + void *bus_context, + const struct regmap_config *config, + struct lock_class_key *lock_key, + const char *lock_name); +struct regmap *__regmap_init_i2c(struct i2c_client *i2c, + const struct regmap_config *config, + struct lock_class_key *lock_key, + const char *lock_name); +struct regmap *__regmap_init_spi(struct spi_device *dev, + const struct regmap_config *config, + struct lock_class_key *lock_key, + const char *lock_name); +struct regmap *__regmap_init_spmi_base(struct spmi_device *dev, + const struct regmap_config *config, + struct lock_class_key *lock_key, + const char *lock_name); +struct regmap *__regmap_init_spmi_ext(struct spmi_device *dev, + const struct regmap_config *config, + struct lock_class_key *lock_key, + const char *lock_name); +struct regmap *__regmap_init_mmio_clk(struct device *dev, const char *clk_id, + void __iomem *regs, + const struct regmap_config *config, + struct lock_class_key *lock_key, + const char *lock_name); +struct regmap *__regmap_init_ac97(struct snd_ac97 *ac97, + const struct regmap_config *config, + struct lock_class_key *lock_key, + const char *lock_name); + +struct regmap *__devm_regmap_init(struct device *dev, + const struct regmap_bus *bus, + void *bus_context, + const struct regmap_config *config, + struct lock_class_key *lock_key, + const char *lock_name); +struct regmap *__devm_regmap_init_i2c(struct i2c_client *i2c, + const struct regmap_config *config, + struct lock_class_key *lock_key, + const char *lock_name); +struct regmap *__devm_regmap_init_spi(struct spi_device *dev, + const struct regmap_config *config, + struct lock_class_key *lock_key, + const char *lock_name); +struct regmap *__devm_regmap_init_spmi_base(struct spmi_device *dev, + const struct regmap_config *config, + struct lock_class_key *lock_key, + const char *lock_name); +struct regmap *__devm_regmap_init_spmi_ext(struct spmi_device *dev, + const struct regmap_config *config, + struct lock_class_key *lock_key, + const char *lock_name); +struct regmap *__devm_regmap_init_mmio_clk(struct device *dev, + const char *clk_id, + void __iomem *regs, + const struct regmap_config *config, + struct lock_class_key *lock_key, + const char *lock_name); +struct regmap *__devm_regmap_init_ac97(struct snd_ac97 *ac97, + const struct regmap_config *config, + struct lock_class_key *lock_key, + const char *lock_name); +/* + * Wrapper for regmap_init macros to include a unique lockdep key and name + * for each call. No-op if CONFIG_LOCKDEP is not set. + * + * @fn: Real function to call (in the form __[*_]regmap_init[_*]) + * @name: Config variable name (#config in the calling macro) + **/ +#ifdef CONFIG_LOCKDEP +#define __regmap_lockdep_wrapper(fn, name, ...) \ +( \ + ({ \ + static struct lock_class_key _key; \ + fn(__VA_ARGS__, &_key, \ + KBUILD_BASENAME ":" \ + __stringify(__LINE__) ":" \ + "(" name ")->lock"); \ + }) \ +) +#else +#define __regmap_lockdep_wrapper(fn, name, ...) fn(__VA_ARGS__, NULL, NULL) +#endif + +#define regmap_init(dev, bus, bus_context, config) \ + __regmap_lockdep_wrapper(__regmap_init, #config, \ + dev, bus, bus_context, config) +int regmap_attach_dev(struct device *dev, struct regmap *map, + const struct regmap_config *config); +#define regmap_init_i2c(i2c, config) \ + __regmap_lockdep_wrapper(__regmap_init_i2c, #config, \ + i2c, config) +#define regmap_init_spi(dev, config) \ + __regmap_lockdep_wrapper(__regmap_init_spi, #config, \ + dev, config) +#define regmap_init_spmi_base(dev, config) \ + __regmap_lockdep_wrapper(__regmap_init_spmi_base, #config, \ + dev, config) +#define regmap_init_spmi_ext(dev, config) \ + __regmap_lockdep_wrapper(__regmap_init_spmi_ext, #config, \ + dev, config) +#define regmap_init_mmio_clk(dev, clk_id, regs, config) \ + __regmap_lockdep_wrapper(__regmap_init_mmio_clk, #config, \ + dev, clk_id, regs, config) +#define regmap_init_ac97(ac97, config) \ + __regmap_lockdep_wrapper(__regmap_init_ac97, #config, \ + ac97, config) bool regmap_ac97_default_volatile(struct device *dev, unsigned int reg); +#define devm_regmap_init(dev, bus, bus_context, config) \ + __regmap_lockdep_wrapper(__devm_regmap_init, #config, \ + dev, bus, bus_context, config) +#define devm_regmap_init_i2c(i2c, config) \ + __regmap_lockdep_wrapper(__devm_regmap_init_i2c, #config, \ + i2c, config) +#define devm_regmap_init_spi(dev, config) \ + __regmap_lockdep_wrapper(__devm_regmap_init_spi, #config, \ + dev, config) +#define devm_regmap_init_spmi_base(dev, config) \ + __regmap_lockdep_wrapper(__devm_regmap_init_spmi_base, #config, \ + dev, config) +#define devm_regmap_init_spmi_ext(dev, config) \ + __regmap_lockdep_wrapper(__devm_regmap_init_spmi_ext, #config, \ + dev, config) +#define devm_regmap_init_mmio_clk(dev, clk_id, regs, config) \ + __regmap_lockdep_wrapper(__devm_regmap_init_mmio_clk, #config, \ + dev, clk_id, regs, config) +#define devm_regmap_init_ac97(ac97, config) \ + __regmap_lockdep_wrapper(__devm_regmap_init_ac97, #config, \ + ac97, config) + /** * regmap_init_mmio(): Initialise register map * @@ -374,12 +476,8 @@ bool regmap_ac97_default_volatile(struct device *dev, unsigned int reg); * The return value will be an ERR_PTR() on error or a valid pointer to * a struct regmap. */ -static inline struct regmap *regmap_init_mmio(struct device *dev, - void __iomem *regs, - const struct regmap_config *config) -{ - return regmap_init_mmio_clk(dev, NULL, regs, config); -} +#define regmap_init_mmio(dev, regs, config) \ + regmap_init_mmio_clk(dev, NULL, regs, config) /** * devm_regmap_init_mmio(): Initialise managed register map @@ -392,12 +490,8 @@ static inline struct regmap *regmap_init_mmio(struct device *dev, * to a struct regmap. The regmap will be automatically freed by the * device management code. */ -static inline struct regmap *devm_regmap_init_mmio(struct device *dev, - void __iomem *regs, - const struct regmap_config *config) -{ - return devm_regmap_init_mmio_clk(dev, NULL, regs, config); -} +#define devm_regmap_init_mmio(dev, regs, config) \ + devm_regmap_init_mmio_clk(dev, NULL, regs, config) void regmap_exit(struct regmap *map); int regmap_reinit_cache(struct regmap *map, -- cgit v1.2.3-70-g09d2 From 21c36d35711d24a7689b7fb9606ce78f3b4c3d3b Mon Sep 17 00:00:00 2001 From: Bartlomiej Zolnierkiewicz Date: Fri, 7 Aug 2015 13:59:16 +0200 Subject: cpufreq-dt: make scaling_boost_freqs sysfs attr available when boost is enabled Make scaling_boost_freqs sysfs attribute is available when cpufreq-dt driver is used and boost support is enabled. Suggested-by: Viresh Kumar Acked-by: Viresh Kumar Signed-off-by: Bartlomiej Zolnierkiewicz Signed-off-by: Rafael J. Wysocki --- drivers/cpufreq/cpufreq-dt.c | 9 ++++++++- include/linux/cpufreq.h | 1 + 2 files changed, 9 insertions(+), 1 deletion(-) (limited to 'include/linux') diff --git a/drivers/cpufreq/cpufreq-dt.c b/drivers/cpufreq/cpufreq-dt.c index b9259abd25d4..c3583cdfadbd 100644 --- a/drivers/cpufreq/cpufreq-dt.c +++ b/drivers/cpufreq/cpufreq-dt.c @@ -36,6 +36,12 @@ struct private_data { unsigned int voltage_tolerance; /* in percentage */ }; +static struct freq_attr *cpufreq_dt_attr[] = { + &cpufreq_freq_attr_scaling_available_freqs, + NULL, /* Extra space for boost-attr if required */ + NULL, +}; + static int set_target(struct cpufreq_policy *policy, unsigned int index) { struct dev_pm_opp *opp; @@ -336,6 +342,7 @@ static int cpufreq_init(struct cpufreq_policy *policy) ret = cpufreq_enable_boost_support(); if (ret) goto out_free_cpufreq_table; + cpufreq_dt_attr[1] = &cpufreq_freq_attr_scaling_boost_freqs; } policy->cpuinfo.transition_latency = transition_latency; @@ -411,7 +418,7 @@ static struct cpufreq_driver dt_cpufreq_driver = { .exit = cpufreq_exit, .ready = cpufreq_ready, .name = "cpufreq-dt", - .attr = cpufreq_generic_attr, + .attr = cpufreq_dt_attr, }; static int dt_cpufreq_probe(struct platform_device *pdev) diff --git a/include/linux/cpufreq.h b/include/linux/cpufreq.h index 95f018649abf..657542d3e23b 100644 --- a/include/linux/cpufreq.h +++ b/include/linux/cpufreq.h @@ -609,6 +609,7 @@ struct cpufreq_frequency_table *cpufreq_frequency_get_table(unsigned int cpu); /* the following are really really optional */ extern struct freq_attr cpufreq_freq_attr_scaling_available_freqs; +extern struct freq_attr cpufreq_freq_attr_scaling_boost_freqs; extern struct freq_attr *cpufreq_generic_attr[]; int cpufreq_table_validate_and_show(struct cpufreq_policy *policy, struct cpufreq_frequency_table *table); -- cgit v1.2.3-70-g09d2 From cf184dc2dd33847f4b211b01d8c7ec0526e6c5e4 Mon Sep 17 00:00:00 2001 From: Jaiprakash Singh Date: Wed, 20 May 2015 21:17:11 -0500 Subject: fsl_ifc: Change IO accessor based on endianness IFC IO accressor are set at run time based on IFC IP registers endianness.IFC node in DTS file contains information about endianness. Signed-off-by: Jaiprakash Singh Signed-off-by: Scott Wood Acked-by: Brian Norris --- .../bindings/memory-controllers/fsl/ifc.txt | 3 + drivers/memory/fsl_ifc.c | 43 ++-- drivers/mtd/nand/fsl_ifc_nand.c | 258 +++++++++++---------- include/linux/fsl_ifc.h | 50 ++++ 4 files changed, 213 insertions(+), 141 deletions(-) (limited to 'include/linux') diff --git a/Documentation/devicetree/bindings/memory-controllers/fsl/ifc.txt b/Documentation/devicetree/bindings/memory-controllers/fsl/ifc.txt index d5e370450ac0..89427b018ba7 100644 --- a/Documentation/devicetree/bindings/memory-controllers/fsl/ifc.txt +++ b/Documentation/devicetree/bindings/memory-controllers/fsl/ifc.txt @@ -18,6 +18,8 @@ Properties: interrupt (NAND_EVTER_STAT). If there is only one, that interrupt reports both types of event. +- little-endian : If this property is absent, the big-endian mode will + be in use as default for registers. - ranges : Each range corresponds to a single chipselect, and covers the entire access window as configured. @@ -34,6 +36,7 @@ Example: #size-cells = <1>; reg = <0x0 0xffe1e000 0 0x2000>; interrupts = <16 2 19 2>; + little-endian; /* NOR, NAND Flashes and CPLD on board */ ranges = <0x0 0x0 0x0 0xee000000 0x02000000 diff --git a/drivers/memory/fsl_ifc.c b/drivers/memory/fsl_ifc.c index 410c39749872..e87459f6d686 100644 --- a/drivers/memory/fsl_ifc.c +++ b/drivers/memory/fsl_ifc.c @@ -62,7 +62,7 @@ int fsl_ifc_find(phys_addr_t addr_base) return -ENODEV; for (i = 0; i < fsl_ifc_ctrl_dev->banks; i++) { - u32 cspr = in_be32(&fsl_ifc_ctrl_dev->regs->cspr_cs[i].cspr); + u32 cspr = ifc_in32(&fsl_ifc_ctrl_dev->regs->cspr_cs[i].cspr); if (cspr & CSPR_V && (cspr & CSPR_BA) == convert_ifc_address(addr_base)) return i; @@ -79,16 +79,16 @@ static int fsl_ifc_ctrl_init(struct fsl_ifc_ctrl *ctrl) /* * Clear all the common status and event registers */ - if (in_be32(&ifc->cm_evter_stat) & IFC_CM_EVTER_STAT_CSER) - out_be32(&ifc->cm_evter_stat, IFC_CM_EVTER_STAT_CSER); + if (ifc_in32(&ifc->cm_evter_stat) & IFC_CM_EVTER_STAT_CSER) + ifc_out32(IFC_CM_EVTER_STAT_CSER, &ifc->cm_evter_stat); /* enable all error and events */ - out_be32(&ifc->cm_evter_en, IFC_CM_EVTER_EN_CSEREN); + ifc_out32(IFC_CM_EVTER_EN_CSEREN, &ifc->cm_evter_en); /* enable all error and event interrupts */ - out_be32(&ifc->cm_evter_intr_en, IFC_CM_EVTER_INTR_EN_CSERIREN); - out_be32(&ifc->cm_erattr0, 0x0); - out_be32(&ifc->cm_erattr1, 0x0); + ifc_out32(IFC_CM_EVTER_INTR_EN_CSERIREN, &ifc->cm_evter_intr_en); + ifc_out32(0x0, &ifc->cm_erattr0); + ifc_out32(0x0, &ifc->cm_erattr1); return 0; } @@ -127,9 +127,9 @@ static u32 check_nand_stat(struct fsl_ifc_ctrl *ctrl) spin_lock_irqsave(&nand_irq_lock, flags); - stat = in_be32(&ifc->ifc_nand.nand_evter_stat); + stat = ifc_in32(&ifc->ifc_nand.nand_evter_stat); if (stat) { - out_be32(&ifc->ifc_nand.nand_evter_stat, stat); + ifc_out32(stat, &ifc->ifc_nand.nand_evter_stat); ctrl->nand_stat = stat; wake_up(&ctrl->nand_wait); } @@ -161,16 +161,16 @@ static irqreturn_t fsl_ifc_ctrl_irq(int irqno, void *data) irqreturn_t ret = IRQ_NONE; /* read for chip select error */ - cs_err = in_be32(&ifc->cm_evter_stat); + cs_err = ifc_in32(&ifc->cm_evter_stat); if (cs_err) { dev_err(ctrl->dev, "transaction sent to IFC is not mapped to" "any memory bank 0x%08X\n", cs_err); /* clear the chip select error */ - out_be32(&ifc->cm_evter_stat, IFC_CM_EVTER_STAT_CSER); + ifc_out32(IFC_CM_EVTER_STAT_CSER, &ifc->cm_evter_stat); /* read error attribute registers print the error information */ - status = in_be32(&ifc->cm_erattr0); - err_addr = in_be32(&ifc->cm_erattr1); + status = ifc_in32(&ifc->cm_erattr0); + err_addr = ifc_in32(&ifc->cm_erattr1); if (status & IFC_CM_ERATTR0_ERTYP_READ) dev_err(ctrl->dev, "Read transaction error" @@ -231,6 +231,23 @@ static int fsl_ifc_ctrl_probe(struct platform_device *dev) goto err; } + version = ifc_in32(&fsl_ifc_ctrl_dev->regs->ifc_rev) & + FSL_IFC_VERSION_MASK; + banks = (version == FSL_IFC_VERSION_1_0_0) ? 4 : 8; + dev_info(&dev->dev, "IFC version %d.%d, %d banks\n", + version >> 24, (version >> 16) & 0xf, banks); + + fsl_ifc_ctrl_dev->version = version; + fsl_ifc_ctrl_dev->banks = banks; + + if (of_property_read_bool(dev->dev.of_node, "little-endian")) { + fsl_ifc_ctrl_dev->little_endian = true; + dev_dbg(&dev->dev, "IFC REGISTERS are LITTLE endian\n"); + } else { + fsl_ifc_ctrl_dev->little_endian = false; + dev_dbg(&dev->dev, "IFC REGISTERS are BIG endian\n"); + } + version = ioread32be(&fsl_ifc_ctrl_dev->regs->ifc_rev) & FSL_IFC_VERSION_MASK; banks = (version == FSL_IFC_VERSION_1_0_0) ? 4 : 8; diff --git a/drivers/mtd/nand/fsl_ifc_nand.c b/drivers/mtd/nand/fsl_ifc_nand.c index 51394e59901b..a4e27e891153 100644 --- a/drivers/mtd/nand/fsl_ifc_nand.c +++ b/drivers/mtd/nand/fsl_ifc_nand.c @@ -238,8 +238,8 @@ static void set_addr(struct mtd_info *mtd, int column, int page_addr, int oob) ifc_nand_ctrl->page = page_addr; /* Program ROW0/COL0 */ - iowrite32be(page_addr, &ifc->ifc_nand.row0); - iowrite32be((oob ? IFC_NAND_COL_MS : 0) | column, &ifc->ifc_nand.col0); + ifc_out32(page_addr, &ifc->ifc_nand.row0); + ifc_out32((oob ? IFC_NAND_COL_MS : 0) | column, &ifc->ifc_nand.col0); buf_num = page_addr & priv->bufnum_mask; @@ -301,19 +301,19 @@ static void fsl_ifc_run_command(struct mtd_info *mtd) int i; /* set the chip select for NAND Transaction */ - iowrite32be(priv->bank << IFC_NAND_CSEL_SHIFT, - &ifc->ifc_nand.nand_csel); + ifc_out32(priv->bank << IFC_NAND_CSEL_SHIFT, + &ifc->ifc_nand.nand_csel); dev_vdbg(priv->dev, "%s: fir0=%08x fcr0=%08x\n", __func__, - ioread32be(&ifc->ifc_nand.nand_fir0), - ioread32be(&ifc->ifc_nand.nand_fcr0)); + ifc_in32(&ifc->ifc_nand.nand_fir0), + ifc_in32(&ifc->ifc_nand.nand_fcr0)); ctrl->nand_stat = 0; /* start read/write seq */ - iowrite32be(IFC_NAND_SEQ_STRT_FIR_STRT, &ifc->ifc_nand.nandseq_strt); + ifc_out32(IFC_NAND_SEQ_STRT_FIR_STRT, &ifc->ifc_nand.nandseq_strt); /* wait for command complete flag or timeout */ wait_event_timeout(ctrl->nand_wait, ctrl->nand_stat, @@ -336,7 +336,7 @@ static void fsl_ifc_run_command(struct mtd_info *mtd) int sector_end = sector + chip->ecc.steps - 1; for (i = sector / 4; i <= sector_end / 4; i++) - eccstat[i] = ioread32be(&ifc->ifc_nand.nand_eccstat[i]); + eccstat[i] = ifc_in32(&ifc->ifc_nand.nand_eccstat[i]); for (i = sector; i <= sector_end; i++) { errors = check_read_ecc(mtd, ctrl, eccstat, i); @@ -376,33 +376,33 @@ static void fsl_ifc_do_read(struct nand_chip *chip, /* Program FIR/IFC_NAND_FCR0 for Small/Large page */ if (mtd->writesize > 512) { - iowrite32be((IFC_FIR_OP_CW0 << IFC_NAND_FIR0_OP0_SHIFT) | - (IFC_FIR_OP_CA0 << IFC_NAND_FIR0_OP1_SHIFT) | - (IFC_FIR_OP_RA0 << IFC_NAND_FIR0_OP2_SHIFT) | - (IFC_FIR_OP_CMD1 << IFC_NAND_FIR0_OP3_SHIFT) | - (IFC_FIR_OP_RBCD << IFC_NAND_FIR0_OP4_SHIFT), - &ifc->ifc_nand.nand_fir0); - iowrite32be(0x0, &ifc->ifc_nand.nand_fir1); - - iowrite32be((NAND_CMD_READ0 << IFC_NAND_FCR0_CMD0_SHIFT) | - (NAND_CMD_READSTART << IFC_NAND_FCR0_CMD1_SHIFT), - &ifc->ifc_nand.nand_fcr0); + ifc_out32((IFC_FIR_OP_CW0 << IFC_NAND_FIR0_OP0_SHIFT) | + (IFC_FIR_OP_CA0 << IFC_NAND_FIR0_OP1_SHIFT) | + (IFC_FIR_OP_RA0 << IFC_NAND_FIR0_OP2_SHIFT) | + (IFC_FIR_OP_CMD1 << IFC_NAND_FIR0_OP3_SHIFT) | + (IFC_FIR_OP_RBCD << IFC_NAND_FIR0_OP4_SHIFT), + &ifc->ifc_nand.nand_fir0); + ifc_out32(0x0, &ifc->ifc_nand.nand_fir1); + + ifc_out32((NAND_CMD_READ0 << IFC_NAND_FCR0_CMD0_SHIFT) | + (NAND_CMD_READSTART << IFC_NAND_FCR0_CMD1_SHIFT), + &ifc->ifc_nand.nand_fcr0); } else { - iowrite32be((IFC_FIR_OP_CW0 << IFC_NAND_FIR0_OP0_SHIFT) | - (IFC_FIR_OP_CA0 << IFC_NAND_FIR0_OP1_SHIFT) | - (IFC_FIR_OP_RA0 << IFC_NAND_FIR0_OP2_SHIFT) | - (IFC_FIR_OP_RBCD << IFC_NAND_FIR0_OP3_SHIFT), - &ifc->ifc_nand.nand_fir0); - iowrite32be(0x0, &ifc->ifc_nand.nand_fir1); + ifc_out32((IFC_FIR_OP_CW0 << IFC_NAND_FIR0_OP0_SHIFT) | + (IFC_FIR_OP_CA0 << IFC_NAND_FIR0_OP1_SHIFT) | + (IFC_FIR_OP_RA0 << IFC_NAND_FIR0_OP2_SHIFT) | + (IFC_FIR_OP_RBCD << IFC_NAND_FIR0_OP3_SHIFT), + &ifc->ifc_nand.nand_fir0); + ifc_out32(0x0, &ifc->ifc_nand.nand_fir1); if (oob) - iowrite32be(NAND_CMD_READOOB << - IFC_NAND_FCR0_CMD0_SHIFT, - &ifc->ifc_nand.nand_fcr0); + ifc_out32(NAND_CMD_READOOB << + IFC_NAND_FCR0_CMD0_SHIFT, + &ifc->ifc_nand.nand_fcr0); else - iowrite32be(NAND_CMD_READ0 << - IFC_NAND_FCR0_CMD0_SHIFT, - &ifc->ifc_nand.nand_fcr0); + ifc_out32(NAND_CMD_READ0 << + IFC_NAND_FCR0_CMD0_SHIFT, + &ifc->ifc_nand.nand_fcr0); } } @@ -422,7 +422,7 @@ static void fsl_ifc_cmdfunc(struct mtd_info *mtd, unsigned int command, switch (command) { /* READ0 read the entire buffer to use hardware ECC. */ case NAND_CMD_READ0: - iowrite32be(0, &ifc->ifc_nand.nand_fbcr); + ifc_out32(0, &ifc->ifc_nand.nand_fbcr); set_addr(mtd, 0, page_addr, 0); ifc_nand_ctrl->read_bytes = mtd->writesize + mtd->oobsize; @@ -437,7 +437,7 @@ static void fsl_ifc_cmdfunc(struct mtd_info *mtd, unsigned int command, /* READOOB reads only the OOB because no ECC is performed. */ case NAND_CMD_READOOB: - iowrite32be(mtd->oobsize - column, &ifc->ifc_nand.nand_fbcr); + ifc_out32(mtd->oobsize - column, &ifc->ifc_nand.nand_fbcr); set_addr(mtd, column, page_addr, 1); ifc_nand_ctrl->read_bytes = mtd->writesize + mtd->oobsize; @@ -453,19 +453,19 @@ static void fsl_ifc_cmdfunc(struct mtd_info *mtd, unsigned int command, if (command == NAND_CMD_PARAM) timing = IFC_FIR_OP_RBCD; - iowrite32be((IFC_FIR_OP_CW0 << IFC_NAND_FIR0_OP0_SHIFT) | - (IFC_FIR_OP_UA << IFC_NAND_FIR0_OP1_SHIFT) | - (timing << IFC_NAND_FIR0_OP2_SHIFT), - &ifc->ifc_nand.nand_fir0); - iowrite32be(command << IFC_NAND_FCR0_CMD0_SHIFT, - &ifc->ifc_nand.nand_fcr0); - iowrite32be(column, &ifc->ifc_nand.row3); + ifc_out32((IFC_FIR_OP_CW0 << IFC_NAND_FIR0_OP0_SHIFT) | + (IFC_FIR_OP_UA << IFC_NAND_FIR0_OP1_SHIFT) | + (timing << IFC_NAND_FIR0_OP2_SHIFT), + &ifc->ifc_nand.nand_fir0); + ifc_out32(command << IFC_NAND_FCR0_CMD0_SHIFT, + &ifc->ifc_nand.nand_fcr0); + ifc_out32(column, &ifc->ifc_nand.row3); /* * although currently it's 8 bytes for READID, we always read * the maximum 256 bytes(for PARAM) */ - iowrite32be(256, &ifc->ifc_nand.nand_fbcr); + ifc_out32(256, &ifc->ifc_nand.nand_fbcr); ifc_nand_ctrl->read_bytes = 256; set_addr(mtd, 0, 0, 0); @@ -480,16 +480,16 @@ static void fsl_ifc_cmdfunc(struct mtd_info *mtd, unsigned int command, /* ERASE2 uses the block and page address from ERASE1 */ case NAND_CMD_ERASE2: - iowrite32be((IFC_FIR_OP_CW0 << IFC_NAND_FIR0_OP0_SHIFT) | - (IFC_FIR_OP_RA0 << IFC_NAND_FIR0_OP1_SHIFT) | - (IFC_FIR_OP_CMD1 << IFC_NAND_FIR0_OP2_SHIFT), - &ifc->ifc_nand.nand_fir0); + ifc_out32((IFC_FIR_OP_CW0 << IFC_NAND_FIR0_OP0_SHIFT) | + (IFC_FIR_OP_RA0 << IFC_NAND_FIR0_OP1_SHIFT) | + (IFC_FIR_OP_CMD1 << IFC_NAND_FIR0_OP2_SHIFT), + &ifc->ifc_nand.nand_fir0); - iowrite32be((NAND_CMD_ERASE1 << IFC_NAND_FCR0_CMD0_SHIFT) | - (NAND_CMD_ERASE2 << IFC_NAND_FCR0_CMD1_SHIFT), - &ifc->ifc_nand.nand_fcr0); + ifc_out32((NAND_CMD_ERASE1 << IFC_NAND_FCR0_CMD0_SHIFT) | + (NAND_CMD_ERASE2 << IFC_NAND_FCR0_CMD1_SHIFT), + &ifc->ifc_nand.nand_fcr0); - iowrite32be(0, &ifc->ifc_nand.nand_fbcr); + ifc_out32(0, &ifc->ifc_nand.nand_fbcr); ifc_nand_ctrl->read_bytes = 0; fsl_ifc_run_command(mtd); return; @@ -506,19 +506,18 @@ static void fsl_ifc_cmdfunc(struct mtd_info *mtd, unsigned int command, (NAND_CMD_STATUS << IFC_NAND_FCR0_CMD1_SHIFT) | (NAND_CMD_PAGEPROG << IFC_NAND_FCR0_CMD2_SHIFT); - iowrite32be( - (IFC_FIR_OP_CW0 << IFC_NAND_FIR0_OP0_SHIFT) | - (IFC_FIR_OP_CA0 << IFC_NAND_FIR0_OP1_SHIFT) | - (IFC_FIR_OP_RA0 << IFC_NAND_FIR0_OP2_SHIFT) | - (IFC_FIR_OP_WBCD << IFC_NAND_FIR0_OP3_SHIFT) | - (IFC_FIR_OP_CMD2 << IFC_NAND_FIR0_OP4_SHIFT), - &ifc->ifc_nand.nand_fir0); - iowrite32be( - (IFC_FIR_OP_CW1 << IFC_NAND_FIR1_OP5_SHIFT) | - (IFC_FIR_OP_RDSTAT << - IFC_NAND_FIR1_OP6_SHIFT) | - (IFC_FIR_OP_NOP << IFC_NAND_FIR1_OP7_SHIFT), - &ifc->ifc_nand.nand_fir1); + ifc_out32( + (IFC_FIR_OP_CW0 << IFC_NAND_FIR0_OP0_SHIFT) | + (IFC_FIR_OP_CA0 << IFC_NAND_FIR0_OP1_SHIFT) | + (IFC_FIR_OP_RA0 << IFC_NAND_FIR0_OP2_SHIFT) | + (IFC_FIR_OP_WBCD << IFC_NAND_FIR0_OP3_SHIFT) | + (IFC_FIR_OP_CMD2 << IFC_NAND_FIR0_OP4_SHIFT), + &ifc->ifc_nand.nand_fir0); + ifc_out32( + (IFC_FIR_OP_CW1 << IFC_NAND_FIR1_OP5_SHIFT) | + (IFC_FIR_OP_RDSTAT << IFC_NAND_FIR1_OP6_SHIFT) | + (IFC_FIR_OP_NOP << IFC_NAND_FIR1_OP7_SHIFT), + &ifc->ifc_nand.nand_fir1); } else { nand_fcr0 = ((NAND_CMD_PAGEPROG << IFC_NAND_FCR0_CMD1_SHIFT) | @@ -527,20 +526,19 @@ static void fsl_ifc_cmdfunc(struct mtd_info *mtd, unsigned int command, (NAND_CMD_STATUS << IFC_NAND_FCR0_CMD3_SHIFT)); - iowrite32be( + ifc_out32( (IFC_FIR_OP_CW0 << IFC_NAND_FIR0_OP0_SHIFT) | (IFC_FIR_OP_CMD2 << IFC_NAND_FIR0_OP1_SHIFT) | (IFC_FIR_OP_CA0 << IFC_NAND_FIR0_OP2_SHIFT) | (IFC_FIR_OP_RA0 << IFC_NAND_FIR0_OP3_SHIFT) | (IFC_FIR_OP_WBCD << IFC_NAND_FIR0_OP4_SHIFT), &ifc->ifc_nand.nand_fir0); - iowrite32be( - (IFC_FIR_OP_CMD1 << IFC_NAND_FIR1_OP5_SHIFT) | - (IFC_FIR_OP_CW3 << IFC_NAND_FIR1_OP6_SHIFT) | - (IFC_FIR_OP_RDSTAT << - IFC_NAND_FIR1_OP7_SHIFT) | - (IFC_FIR_OP_NOP << IFC_NAND_FIR1_OP8_SHIFT), - &ifc->ifc_nand.nand_fir1); + ifc_out32( + (IFC_FIR_OP_CMD1 << IFC_NAND_FIR1_OP5_SHIFT) | + (IFC_FIR_OP_CW3 << IFC_NAND_FIR1_OP6_SHIFT) | + (IFC_FIR_OP_RDSTAT << IFC_NAND_FIR1_OP7_SHIFT) | + (IFC_FIR_OP_NOP << IFC_NAND_FIR1_OP8_SHIFT), + &ifc->ifc_nand.nand_fir1); if (column >= mtd->writesize) nand_fcr0 |= @@ -555,7 +553,7 @@ static void fsl_ifc_cmdfunc(struct mtd_info *mtd, unsigned int command, column -= mtd->writesize; ifc_nand_ctrl->oob = 1; } - iowrite32be(nand_fcr0, &ifc->ifc_nand.nand_fcr0); + ifc_out32(nand_fcr0, &ifc->ifc_nand.nand_fcr0); set_addr(mtd, column, page_addr, ifc_nand_ctrl->oob); return; } @@ -563,24 +561,26 @@ static void fsl_ifc_cmdfunc(struct mtd_info *mtd, unsigned int command, /* PAGEPROG reuses all of the setup from SEQIN and adds the length */ case NAND_CMD_PAGEPROG: { if (ifc_nand_ctrl->oob) { - iowrite32be(ifc_nand_ctrl->index - - ifc_nand_ctrl->column, - &ifc->ifc_nand.nand_fbcr); + ifc_out32(ifc_nand_ctrl->index - + ifc_nand_ctrl->column, + &ifc->ifc_nand.nand_fbcr); } else { - iowrite32be(0, &ifc->ifc_nand.nand_fbcr); + ifc_out32(0, &ifc->ifc_nand.nand_fbcr); } fsl_ifc_run_command(mtd); return; } - case NAND_CMD_STATUS: - iowrite32be((IFC_FIR_OP_CW0 << IFC_NAND_FIR0_OP0_SHIFT) | - (IFC_FIR_OP_RB << IFC_NAND_FIR0_OP1_SHIFT), - &ifc->ifc_nand.nand_fir0); - iowrite32be(NAND_CMD_STATUS << IFC_NAND_FCR0_CMD0_SHIFT, - &ifc->ifc_nand.nand_fcr0); - iowrite32be(1, &ifc->ifc_nand.nand_fbcr); + case NAND_CMD_STATUS: { + void __iomem *addr; + + ifc_out32((IFC_FIR_OP_CW0 << IFC_NAND_FIR0_OP0_SHIFT) | + (IFC_FIR_OP_RB << IFC_NAND_FIR0_OP1_SHIFT), + &ifc->ifc_nand.nand_fir0); + ifc_out32(NAND_CMD_STATUS << IFC_NAND_FCR0_CMD0_SHIFT, + &ifc->ifc_nand.nand_fcr0); + ifc_out32(1, &ifc->ifc_nand.nand_fbcr); set_addr(mtd, 0, 0, 0); ifc_nand_ctrl->read_bytes = 1; @@ -590,17 +590,19 @@ static void fsl_ifc_cmdfunc(struct mtd_info *mtd, unsigned int command, * The chip always seems to report that it is * write-protected, even when it is not. */ + addr = ifc_nand_ctrl->addr; if (chip->options & NAND_BUSWIDTH_16) - setbits16(ifc_nand_ctrl->addr, NAND_STATUS_WP); + ifc_out16(ifc_in16(addr) | (NAND_STATUS_WP), addr); else - setbits8(ifc_nand_ctrl->addr, NAND_STATUS_WP); + ifc_out8(ifc_in8(addr) | (NAND_STATUS_WP), addr); return; + } case NAND_CMD_RESET: - iowrite32be(IFC_FIR_OP_CW0 << IFC_NAND_FIR0_OP0_SHIFT, - &ifc->ifc_nand.nand_fir0); - iowrite32be(NAND_CMD_RESET << IFC_NAND_FCR0_CMD0_SHIFT, - &ifc->ifc_nand.nand_fcr0); + ifc_out32(IFC_FIR_OP_CW0 << IFC_NAND_FIR0_OP0_SHIFT, + &ifc->ifc_nand.nand_fir0); + ifc_out32(NAND_CMD_RESET << IFC_NAND_FCR0_CMD0_SHIFT, + &ifc->ifc_nand.nand_fcr0); fsl_ifc_run_command(mtd); return; @@ -658,7 +660,7 @@ static uint8_t fsl_ifc_read_byte(struct mtd_info *mtd) */ if (ifc_nand_ctrl->index < ifc_nand_ctrl->read_bytes) { offset = ifc_nand_ctrl->index++; - return in_8(ifc_nand_ctrl->addr + offset); + return ifc_in8(ifc_nand_ctrl->addr + offset); } dev_err(priv->dev, "%s: beyond end of buffer\n", __func__); @@ -680,7 +682,7 @@ static uint8_t fsl_ifc_read_byte16(struct mtd_info *mtd) * next byte. */ if (ifc_nand_ctrl->index < ifc_nand_ctrl->read_bytes) { - data = in_be16(ifc_nand_ctrl->addr + ifc_nand_ctrl->index); + data = ifc_in16(ifc_nand_ctrl->addr + ifc_nand_ctrl->index); ifc_nand_ctrl->index += 2; return (uint8_t) data; } @@ -726,18 +728,18 @@ static int fsl_ifc_wait(struct mtd_info *mtd, struct nand_chip *chip) u32 nand_fsr; /* Use READ_STATUS command, but wait for the device to be ready */ - iowrite32be((IFC_FIR_OP_CW0 << IFC_NAND_FIR0_OP0_SHIFT) | - (IFC_FIR_OP_RDSTAT << IFC_NAND_FIR0_OP1_SHIFT), - &ifc->ifc_nand.nand_fir0); - iowrite32be(NAND_CMD_STATUS << IFC_NAND_FCR0_CMD0_SHIFT, - &ifc->ifc_nand.nand_fcr0); - iowrite32be(1, &ifc->ifc_nand.nand_fbcr); + ifc_out32((IFC_FIR_OP_CW0 << IFC_NAND_FIR0_OP0_SHIFT) | + (IFC_FIR_OP_RDSTAT << IFC_NAND_FIR0_OP1_SHIFT), + &ifc->ifc_nand.nand_fir0); + ifc_out32(NAND_CMD_STATUS << IFC_NAND_FCR0_CMD0_SHIFT, + &ifc->ifc_nand.nand_fcr0); + ifc_out32(1, &ifc->ifc_nand.nand_fbcr); set_addr(mtd, 0, 0, 0); ifc_nand_ctrl->read_bytes = 1; fsl_ifc_run_command(mtd); - nand_fsr = ioread32be(&ifc->ifc_nand.nand_fsr); + nand_fsr = ifc_in32(&ifc->ifc_nand.nand_fsr); /* * The chip always seems to report that it is @@ -829,34 +831,34 @@ static void fsl_ifc_sram_init(struct fsl_ifc_mtd *priv) uint32_t cs = priv->bank; /* Save CSOR and CSOR_ext */ - csor = ioread32be(&ifc->csor_cs[cs].csor); - csor_ext = ioread32be(&ifc->csor_cs[cs].csor_ext); + csor = ifc_in32(&ifc->csor_cs[cs].csor); + csor_ext = ifc_in32(&ifc->csor_cs[cs].csor_ext); /* chage PageSize 8K and SpareSize 1K*/ csor_8k = (csor & ~(CSOR_NAND_PGS_MASK)) | 0x0018C000; - iowrite32be(csor_8k, &ifc->csor_cs[cs].csor); - iowrite32be(0x0000400, &ifc->csor_cs[cs].csor_ext); + ifc_out32(csor_8k, &ifc->csor_cs[cs].csor); + ifc_out32(0x0000400, &ifc->csor_cs[cs].csor_ext); /* READID */ - iowrite32be((IFC_FIR_OP_CW0 << IFC_NAND_FIR0_OP0_SHIFT) | - (IFC_FIR_OP_UA << IFC_NAND_FIR0_OP1_SHIFT) | - (IFC_FIR_OP_RB << IFC_NAND_FIR0_OP2_SHIFT), - &ifc->ifc_nand.nand_fir0); - iowrite32be(NAND_CMD_READID << IFC_NAND_FCR0_CMD0_SHIFT, - &ifc->ifc_nand.nand_fcr0); - iowrite32be(0x0, &ifc->ifc_nand.row3); + ifc_out32((IFC_FIR_OP_CW0 << IFC_NAND_FIR0_OP0_SHIFT) | + (IFC_FIR_OP_UA << IFC_NAND_FIR0_OP1_SHIFT) | + (IFC_FIR_OP_RB << IFC_NAND_FIR0_OP2_SHIFT), + &ifc->ifc_nand.nand_fir0); + ifc_out32(NAND_CMD_READID << IFC_NAND_FCR0_CMD0_SHIFT, + &ifc->ifc_nand.nand_fcr0); + ifc_out32(0x0, &ifc->ifc_nand.row3); - iowrite32be(0x0, &ifc->ifc_nand.nand_fbcr); + ifc_out32(0x0, &ifc->ifc_nand.nand_fbcr); /* Program ROW0/COL0 */ - iowrite32be(0x0, &ifc->ifc_nand.row0); - iowrite32be(0x0, &ifc->ifc_nand.col0); + ifc_out32(0x0, &ifc->ifc_nand.row0); + ifc_out32(0x0, &ifc->ifc_nand.col0); /* set the chip select for NAND Transaction */ - iowrite32be(cs << IFC_NAND_CSEL_SHIFT, &ifc->ifc_nand.nand_csel); + ifc_out32(cs << IFC_NAND_CSEL_SHIFT, &ifc->ifc_nand.nand_csel); /* start read seq */ - iowrite32be(IFC_NAND_SEQ_STRT_FIR_STRT, &ifc->ifc_nand.nandseq_strt); + ifc_out32(IFC_NAND_SEQ_STRT_FIR_STRT, &ifc->ifc_nand.nandseq_strt); /* wait for command complete flag or timeout */ wait_event_timeout(ctrl->nand_wait, ctrl->nand_stat, @@ -866,8 +868,8 @@ static void fsl_ifc_sram_init(struct fsl_ifc_mtd *priv) printk(KERN_ERR "fsl-ifc: Failed to Initialise SRAM\n"); /* Restore CSOR and CSOR_ext */ - iowrite32be(csor, &ifc->csor_cs[cs].csor); - iowrite32be(csor_ext, &ifc->csor_cs[cs].csor_ext); + ifc_out32(csor, &ifc->csor_cs[cs].csor); + ifc_out32(csor_ext, &ifc->csor_cs[cs].csor_ext); } static int fsl_ifc_chip_init(struct fsl_ifc_mtd *priv) @@ -884,7 +886,7 @@ static int fsl_ifc_chip_init(struct fsl_ifc_mtd *priv) /* fill in nand_chip structure */ /* set up function call table */ - if ((ioread32be(&ifc->cspr_cs[priv->bank].cspr)) & CSPR_PORT_SIZE_16) + if ((ifc_in32(&ifc->cspr_cs[priv->bank].cspr)) & CSPR_PORT_SIZE_16) chip->read_byte = fsl_ifc_read_byte16; else chip->read_byte = fsl_ifc_read_byte; @@ -898,13 +900,13 @@ static int fsl_ifc_chip_init(struct fsl_ifc_mtd *priv) chip->bbt_td = &bbt_main_descr; chip->bbt_md = &bbt_mirror_descr; - iowrite32be(0x0, &ifc->ifc_nand.ncfgr); + ifc_out32(0x0, &ifc->ifc_nand.ncfgr); /* set up nand options */ chip->bbt_options = NAND_BBT_USE_FLASH; chip->options = NAND_NO_SUBPAGE_WRITE; - if (ioread32be(&ifc->cspr_cs[priv->bank].cspr) & CSPR_PORT_SIZE_16) { + if (ifc_in32(&ifc->cspr_cs[priv->bank].cspr) & CSPR_PORT_SIZE_16) { chip->read_byte = fsl_ifc_read_byte16; chip->options |= NAND_BUSWIDTH_16; } else { @@ -917,7 +919,7 @@ static int fsl_ifc_chip_init(struct fsl_ifc_mtd *priv) chip->ecc.read_page = fsl_ifc_read_page; chip->ecc.write_page = fsl_ifc_write_page; - csor = ioread32be(&ifc->csor_cs[priv->bank].csor); + csor = ifc_in32(&ifc->csor_cs[priv->bank].csor); /* Hardware generates ECC per 512 Bytes */ chip->ecc.size = 512; @@ -1006,7 +1008,7 @@ static int fsl_ifc_chip_remove(struct fsl_ifc_mtd *priv) static int match_bank(struct fsl_ifc_regs __iomem *ifc, int bank, phys_addr_t addr) { - u32 cspr = ioread32be(&ifc->cspr_cs[bank].cspr); + u32 cspr = ifc_in32(&ifc->cspr_cs[bank].cspr); if (!(cspr & CSPR_V)) return 0; @@ -1092,16 +1094,16 @@ static int fsl_ifc_nand_probe(struct platform_device *dev) dev_set_drvdata(priv->dev, priv); - iowrite32be(IFC_NAND_EVTER_EN_OPC_EN | - IFC_NAND_EVTER_EN_FTOER_EN | - IFC_NAND_EVTER_EN_WPER_EN, - &ifc->ifc_nand.nand_evter_en); + ifc_out32(IFC_NAND_EVTER_EN_OPC_EN | + IFC_NAND_EVTER_EN_FTOER_EN | + IFC_NAND_EVTER_EN_WPER_EN, + &ifc->ifc_nand.nand_evter_en); /* enable NAND Machine Interrupts */ - iowrite32be(IFC_NAND_EVTER_INTR_OPCIR_EN | - IFC_NAND_EVTER_INTR_FTOERIR_EN | - IFC_NAND_EVTER_INTR_WPERIR_EN, - &ifc->ifc_nand.nand_evter_intr_en); + ifc_out32(IFC_NAND_EVTER_INTR_OPCIR_EN | + IFC_NAND_EVTER_INTR_FTOERIR_EN | + IFC_NAND_EVTER_INTR_WPERIR_EN, + &ifc->ifc_nand.nand_evter_intr_en); priv->mtd.name = kasprintf(GFP_KERNEL, "%llx.flash", (u64)res.start); if (!priv->mtd.name) { ret = -ENOMEM; diff --git a/include/linux/fsl_ifc.h b/include/linux/fsl_ifc.h index bf0321eabbda..0023088b253b 100644 --- a/include/linux/fsl_ifc.h +++ b/include/linux/fsl_ifc.h @@ -841,9 +841,59 @@ struct fsl_ifc_ctrl { u32 nand_stat; wait_queue_head_t nand_wait; + bool little_endian; }; extern struct fsl_ifc_ctrl *fsl_ifc_ctrl_dev; +static inline u32 ifc_in32(void __iomem *addr) +{ + u32 val; + + if (fsl_ifc_ctrl_dev->little_endian) + val = ioread32(addr); + else + val = ioread32be(addr); + + return val; +} + +static inline u16 ifc_in16(void __iomem *addr) +{ + u16 val; + + if (fsl_ifc_ctrl_dev->little_endian) + val = ioread16(addr); + else + val = ioread16be(addr); + + return val; +} + +static inline u8 ifc_in8(void __iomem *addr) +{ + return ioread8(addr); +} + +static inline void ifc_out32(u32 val, void __iomem *addr) +{ + if (fsl_ifc_ctrl_dev->little_endian) + iowrite32(val, addr); + else + iowrite32be(val, addr); +} + +static inline void ifc_out16(u16 val, void __iomem *addr) +{ + if (fsl_ifc_ctrl_dev->little_endian) + iowrite16(val, addr); + else + iowrite16be(val, addr); +} + +static inline void ifc_out8(u8 val, void __iomem *addr) +{ + iowrite8(val, addr); +} #endif /* __ASM_FSL_IFC_H */ -- cgit v1.2.3-70-g09d2 From ad30bad3a5474f2585a7f2a35d4705c7f85210f3 Mon Sep 17 00:00:00 2001 From: Pengyu Ma Date: Tue, 4 Aug 2015 16:32:18 +0800 Subject: iio: declare struct to fix warning When compile iio related driver the following warning shown: include/linux/iio/trigger.h:35:34: warning: 'struct iio_trigger' declared inside parameter list int (*set_trigger_state)(struct iio_trigger *trig, bool state); include/linux/iio/trigger.h:38:18: warning: 'struct iio_dev' declared inside parameter list struct iio_dev *indio_dev); 'struct iio_dev' and 'struct iio_trigger' was used before declaration, forward declaration for these structs to fix warning. Signed-off-by: Pengyu Ma Acked-by: Daniel Baluta Signed-off-by: Jonathan Cameron --- include/linux/iio/trigger.h | 3 +++ 1 file changed, 3 insertions(+) (limited to 'include/linux') diff --git a/include/linux/iio/trigger.h b/include/linux/iio/trigger.h index fa76c79a52a1..1c9e028e0d4a 100644 --- a/include/linux/iio/trigger.h +++ b/include/linux/iio/trigger.h @@ -18,6 +18,9 @@ struct iio_subirq { bool enabled; }; +struct iio_dev; +struct iio_trigger; + /** * struct iio_trigger_ops - operations structure for an iio_trigger. * @owner: used to monitor usage count of the trigger. -- cgit v1.2.3-70-g09d2 From c689a923c867eac40ed3826c1d9328edea8b6bc7 Mon Sep 17 00:00:00 2001 From: Lars-Peter Clausen Date: Wed, 5 Aug 2015 15:38:14 +0200 Subject: iio: Add inverse unit conversion macros Add inverse unit conversion macro to convert from standard IIO units to units that might be used by some devices. Those are useful in combination with scale factors that are specified as IIO_VAL_FRACTIONAL. Typically the denominator for those specifications will contain the maximum raw value the sensor will generate and the numerator the value it maps to in a specific unit. Sometimes datasheets specify those in different units than the standard IIO units (e.g. degree/s instead of rad/s) and so we need to do a unit conversion. From a mathematical point of view it does not make a difference whether we apply the unit conversion to the numerator or the inverse unit conversion to the denominator since (x / y) / z = x / (y * z). But as the denominator is typically a larger value and we are rounding both the numerator and denominator to integer values using the later method gives us a better precision (E.g. the relative error is smaller if we round 8000.3 to 8000 rather than rounding 8.3 to 8). This is where in inverse unit conversion macros will be used. Marked for stable as used by some upcoming fixes. Signed-off-by: Lars-Peter Clausen Cc: Signed-off-by: Jonathan Cameron --- include/linux/iio/iio.h | 17 +++++++++++++++++ 1 file changed, 17 insertions(+) (limited to 'include/linux') diff --git a/include/linux/iio/iio.h b/include/linux/iio/iio.h index f79148261d16..7bb7f673cb3f 100644 --- a/include/linux/iio/iio.h +++ b/include/linux/iio/iio.h @@ -644,6 +644,15 @@ int iio_str_to_fixpoint(const char *str, int fract_mult, int *integer, */ #define IIO_DEGREE_TO_RAD(deg) (((deg) * 314159ULL + 9000000ULL) / 18000000ULL) +/** + * IIO_RAD_TO_DEGREE() - Convert rad to degree + * @rad: A value in rad + * + * Returns the given value converted from rad to degree + */ +#define IIO_RAD_TO_DEGREE(rad) \ + (((rad) * 18000000ULL + 314159ULL / 2) / 314159ULL) + /** * IIO_G_TO_M_S_2() - Convert g to meter / second**2 * @g: A value in g @@ -652,4 +661,12 @@ int iio_str_to_fixpoint(const char *str, int fract_mult, int *integer, */ #define IIO_G_TO_M_S_2(g) ((g) * 980665ULL / 100000ULL) +/** + * IIO_M_S_2_TO_G() - Convert meter / second**2 to g + * @ms2: A value in meter / second**2 + * + * Returns the given value converted from meter / second**2 to g + */ +#define IIO_M_S_2_TO_G(ms2) (((ms2) * 100000ULL + 980665ULL / 2) / 980665ULL) + #endif /* _INDUSTRIAL_IO_H_ */ -- cgit v1.2.3-70-g09d2 From 7e34d70a7163b236f520ef4fc0d7c50093dd3746 Mon Sep 17 00:00:00 2001 From: Tal Shorer Date: Fri, 7 Aug 2015 22:35:51 +0300 Subject: usb: hcd.h: Fix the values of SetHubDepth and GetPortErrorCount to match USB 3.1 specification >From the usb 3.1 spec available at http://www.usb.org/developers/docs/ table 10-7 (Hub Class Requests) specifies the values for SetHubDepth and GetPortErrorCount as: Request bmRequestType bRequest wValue wIndex wLength Data SetHubDepth 00100000B SET_HUB_DEPTH Hub Depth Zero Zero None GetPortErrorCount 10100011B GET_PORT_ERR_COUNT Zero Port Two Number of Link Errors on this port Fix these two values to match the spec. Signed-off-by: Tal Shorer Signed-off-by: Greg Kroah-Hartman --- include/linux/usb/hcd.h | 6 +++--- 1 file changed, 3 insertions(+), 3 deletions(-) (limited to 'include/linux') diff --git a/include/linux/usb/hcd.h b/include/linux/usb/hcd.h index c9aa7792de10..d2784c10bfe2 100644 --- a/include/linux/usb/hcd.h +++ b/include/linux/usb/hcd.h @@ -564,9 +564,9 @@ extern void usb_ep0_reinit(struct usb_device *); /*-------------------------------------------------------------------------*/ -/* class requests from USB 3.0 hub spec, table 10-5 */ -#define SetHubDepth (0x3000 | HUB_SET_DEPTH) -#define GetPortErrorCount (0x8000 | HUB_GET_PORT_ERR_COUNT) +/* class requests from USB 3.1 hub spec, table 10-7 */ +#define SetHubDepth (0x2000 | HUB_SET_DEPTH) +#define GetPortErrorCount (0xa300 | HUB_GET_PORT_ERR_COUNT) /* * Generic bandwidth allocation constants/support -- cgit v1.2.3-70-g09d2 From 85a77ff0160ab0a70eb4e8b14200e29b4d35c355 Mon Sep 17 00:00:00 2001 From: Chanwoo Choi Date: Thu, 11 Jun 2015 14:58:47 +0900 Subject: extcon: Remove duplicate header file in extcon.h This patch removes the duplicate header file in extcon.h. Signed-off-by: Chanwoo Choi --- include/linux/extcon.h | 2 -- 1 file changed, 2 deletions(-) (limited to 'include/linux') diff --git a/include/linux/extcon.h b/include/linux/extcon.h index b16d929fa75f..1656c98175f5 100644 --- a/include/linux/extcon.h +++ b/include/linux/extcon.h @@ -27,8 +27,6 @@ #define __LINUX_EXTCON_H__ #include -#include -#include /* * Define the unique id of supported external connectors -- cgit v1.2.3-70-g09d2 From 2519b7650e99d90643a7a20d623513de9c95a817 Mon Sep 17 00:00:00 2001 From: Chanwoo Choi Date: Thu, 11 Jun 2015 20:17:02 +0900 Subject: extcon: Remove optional print_state() function pointer of struct extcon_dev This patch removes the optional print_state() function pointer which included in 'struct extcon_dev' because the extcon must maintain the consistent name of extcon device on sysfs instead of inconsistent state of external connectors. Signed-off-by: Chanwoo Choi --- drivers/extcon/extcon-gpio.c | 18 ------------------ drivers/extcon/extcon.c | 8 -------- include/linux/extcon.h | 5 ----- 3 files changed, 31 deletions(-) (limited to 'include/linux') diff --git a/drivers/extcon/extcon-gpio.c b/drivers/extcon/extcon-gpio.c index 355459a54e8b..57c24fa52edb 100644 --- a/drivers/extcon/extcon-gpio.c +++ b/drivers/extcon/extcon-gpio.c @@ -65,22 +65,6 @@ static irqreturn_t gpio_irq_handler(int irq, void *dev_id) return IRQ_HANDLED; } -static ssize_t extcon_gpio_print_state(struct extcon_dev *edev, char *buf) -{ - struct device *dev = edev->dev.parent; - struct gpio_extcon_data *extcon_data = dev_get_drvdata(dev); - const char *state; - - if (extcon_get_state(edev)) - state = extcon_data->state_on; - else - state = extcon_data->state_off; - - if (state) - return sprintf(buf, "%s\n", state); - return -EINVAL; -} - static int gpio_extcon_probe(struct platform_device *pdev) { struct gpio_extcon_platform_data *pdata = dev_get_platdata(&pdev->dev); @@ -110,8 +94,6 @@ static int gpio_extcon_probe(struct platform_device *pdev) extcon_data->state_on = pdata->state_on; extcon_data->state_off = pdata->state_off; extcon_data->check_on_resume = pdata->check_on_resume; - if (pdata->state_on && pdata->state_off) - extcon_data->edev->print_state = extcon_gpio_print_state; ret = devm_gpio_request_one(&pdev->dev, extcon_data->gpio, GPIOF_DIR_IN, pdev->name); diff --git a/drivers/extcon/extcon.c b/drivers/extcon/extcon.c index 43b57b02d050..d1fb5b4d024a 100644 --- a/drivers/extcon/extcon.c +++ b/drivers/extcon/extcon.c @@ -172,14 +172,6 @@ static ssize_t state_show(struct device *dev, struct device_attribute *attr, int i, count = 0; struct extcon_dev *edev = dev_get_drvdata(dev); - if (edev->print_state) { - int ret = edev->print_state(edev, buf); - - if (ret >= 0) - return ret; - /* Use default if failed */ - } - if (edev->max_supported == 0) return sprintf(buf, "%u\n", edev->state); diff --git a/include/linux/extcon.h b/include/linux/extcon.h index 1656c98175f5..c0f8c4fc5d45 100644 --- a/include/linux/extcon.h +++ b/include/linux/extcon.h @@ -75,8 +75,6 @@ struct extcon_cable; * be attached simulataneously. {0x7, 0} is equivalent to * {0x3, 0x6, 0x5, 0}. If it is {0xFFFFFFFF, 0}, there * can be no simultaneous connections. - * @print_state: An optional callback to override the method to print the - * status of the extcon device. * @dev: Device of this extcon. * @state: Attach/detach state of this extcon. Do not provide at * register-time. @@ -100,9 +98,6 @@ struct extcon_dev { const unsigned int *supported_cable; const u32 *mutually_exclusive; - /* Optional callbacks to override class functions */ - ssize_t (*print_state)(struct extcon_dev *edev, char *buf); - /* Internal data. Please do not set. */ struct device dev; struct raw_notifier_head *nh; -- cgit v1.2.3-70-g09d2 From ffe8690c85b8426db7783064724d106702f1b1e8 Mon Sep 17 00:00:00 2001 From: Kaixu Xia Date: Thu, 6 Aug 2015 07:02:32 +0000 Subject: perf: add the necessary core perf APIs when accessing events counters in eBPF programs This patch add three core perf APIs: - perf_event_attrs(): export the struct perf_event_attr from struct perf_event; - perf_event_get(): get the struct perf_event from the given fd; - perf_event_read_local(): read the events counters active on the current CPU; These APIs are needed when accessing events counters in eBPF programs. The API perf_event_read_local() comes from Peter and I add the corresponding SOB. Signed-off-by: Kaixu Xia Signed-off-by: Peter Zijlstra Signed-off-by: David S. Miller --- include/linux/perf_event.h | 10 ++++++ kernel/events/core.c | 78 ++++++++++++++++++++++++++++++++++++++++++++++ 2 files changed, 88 insertions(+) (limited to 'include/linux') diff --git a/include/linux/perf_event.h b/include/linux/perf_event.h index 2027809433b3..092a0e8a479a 100644 --- a/include/linux/perf_event.h +++ b/include/linux/perf_event.h @@ -641,6 +641,8 @@ extern int perf_event_init_task(struct task_struct *child); extern void perf_event_exit_task(struct task_struct *child); extern void perf_event_free_task(struct task_struct *task); extern void perf_event_delayed_put(struct task_struct *task); +extern struct perf_event *perf_event_get(unsigned int fd); +extern const struct perf_event_attr *perf_event_attrs(struct perf_event *event); extern void perf_event_print_debug(void); extern void perf_pmu_disable(struct pmu *pmu); extern void perf_pmu_enable(struct pmu *pmu); @@ -659,6 +661,7 @@ perf_event_create_kernel_counter(struct perf_event_attr *attr, void *context); extern void perf_pmu_migrate_context(struct pmu *pmu, int src_cpu, int dst_cpu); +extern u64 perf_event_read_local(struct perf_event *event); extern u64 perf_event_read_value(struct perf_event *event, u64 *enabled, u64 *running); @@ -979,6 +982,12 @@ static inline int perf_event_init_task(struct task_struct *child) { return 0; } static inline void perf_event_exit_task(struct task_struct *child) { } static inline void perf_event_free_task(struct task_struct *task) { } static inline void perf_event_delayed_put(struct task_struct *task) { } +static inline struct perf_event *perf_event_get(unsigned int fd) { return ERR_PTR(-EINVAL); } +static inline const struct perf_event_attr *perf_event_attrs(struct perf_event *event) +{ + return ERR_PTR(-EINVAL); +} +static inline u64 perf_event_read_local(struct perf_event *event) { return -EINVAL; } static inline void perf_event_print_debug(void) { } static inline int perf_event_task_disable(void) { return -EINVAL; } static inline int perf_event_task_enable(void) { return -EINVAL; } @@ -1011,6 +1020,7 @@ static inline void perf_event_enable(struct perf_event *event) { } static inline void perf_event_disable(struct perf_event *event) { } static inline int __perf_event_disable(void *info) { return -1; } static inline void perf_event_task_tick(void) { } +static inline int perf_event_release_kernel(struct perf_event *event) { return 0; } #endif #if defined(CONFIG_PERF_EVENTS) && defined(CONFIG_NO_HZ_FULL) diff --git a/kernel/events/core.c b/kernel/events/core.c index d3dae3419b99..e2c6a8886d4d 100644 --- a/kernel/events/core.c +++ b/kernel/events/core.c @@ -3212,6 +3212,59 @@ static inline u64 perf_event_count(struct perf_event *event) return __perf_event_count(event); } +/* + * NMI-safe method to read a local event, that is an event that + * is: + * - either for the current task, or for this CPU + * - does not have inherit set, for inherited task events + * will not be local and we cannot read them atomically + * - must not have a pmu::count method + */ +u64 perf_event_read_local(struct perf_event *event) +{ + unsigned long flags; + u64 val; + + /* + * Disabling interrupts avoids all counter scheduling (context + * switches, timer based rotation and IPIs). + */ + local_irq_save(flags); + + /* If this is a per-task event, it must be for current */ + WARN_ON_ONCE((event->attach_state & PERF_ATTACH_TASK) && + event->hw.target != current); + + /* If this is a per-CPU event, it must be for this CPU */ + WARN_ON_ONCE(!(event->attach_state & PERF_ATTACH_TASK) && + event->cpu != smp_processor_id()); + + /* + * It must not be an event with inherit set, we cannot read + * all child counters from atomic context. + */ + WARN_ON_ONCE(event->attr.inherit); + + /* + * It must not have a pmu::count method, those are not + * NMI safe. + */ + WARN_ON_ONCE(event->pmu->count); + + /* + * If the event is currently on this CPU, its either a per-task event, + * or local to this CPU. Furthermore it means its ACTIVE (otherwise + * oncpu == -1). + */ + if (event->oncpu == smp_processor_id()) + event->pmu->read(event); + + val = local64_read(&event->count); + local_irq_restore(flags); + + return val; +} + static u64 perf_event_read(struct perf_event *event) { /* @@ -8574,6 +8627,31 @@ void perf_event_delayed_put(struct task_struct *task) WARN_ON_ONCE(task->perf_event_ctxp[ctxn]); } +struct perf_event *perf_event_get(unsigned int fd) +{ + int err; + struct fd f; + struct perf_event *event; + + err = perf_fget_light(fd, &f); + if (err) + return ERR_PTR(err); + + event = f.file->private_data; + atomic_long_inc(&event->refcount); + fdput(f); + + return event; +} + +const struct perf_event_attr *perf_event_attrs(struct perf_event *event) +{ + if (!event) + return ERR_PTR(-EINVAL); + + return &event->attr; +} + /* * inherit a event from parent task to child task: */ -- cgit v1.2.3-70-g09d2 From 2a36f0b92eb638dd023870574eb471b1c56be9ad Mon Sep 17 00:00:00 2001 From: Wang Nan Date: Thu, 6 Aug 2015 07:02:33 +0000 Subject: bpf: Make the bpf_prog_array_map more generic All the map backends are of generic nature. In order to avoid adding much special code into the eBPF core, rewrite part of the bpf_prog_array map code and make it more generic. So the new perf_event_array map type can reuse most of code with bpf_prog_array map and add fewer lines of special code. Signed-off-by: Wang Nan Signed-off-by: Kaixu Xia Signed-off-by: David S. Miller --- arch/x86/net/bpf_jit_comp.c | 6 ++-- include/linux/bpf.h | 8 +++-- kernel/bpf/arraymap.c | 80 +++++++++++++++++++++++++++------------------ kernel/bpf/core.c | 2 +- kernel/bpf/syscall.c | 2 +- 5 files changed, 60 insertions(+), 38 deletions(-) (limited to 'include/linux') diff --git a/arch/x86/net/bpf_jit_comp.c b/arch/x86/net/bpf_jit_comp.c index ec5214f39aa8..70efcd0940f9 100644 --- a/arch/x86/net/bpf_jit_comp.c +++ b/arch/x86/net/bpf_jit_comp.c @@ -246,7 +246,7 @@ static void emit_prologue(u8 **pprog) * goto out; * if (++tail_call_cnt > MAX_TAIL_CALL_CNT) * goto out; - * prog = array->prog[index]; + * prog = array->ptrs[index]; * if (prog == NULL) * goto out; * goto *(prog->bpf_func + prologue_size); @@ -284,9 +284,9 @@ static void emit_bpf_tail_call(u8 **pprog) EMIT3(0x83, 0xC0, 0x01); /* add eax, 1 */ EMIT2_off32(0x89, 0x85, -STACKSIZE + 36); /* mov dword ptr [rbp - 516], eax */ - /* prog = array->prog[index]; */ + /* prog = array->ptrs[index]; */ EMIT4_off32(0x48, 0x8D, 0x84, 0xD6, /* lea rax, [rsi + rdx * 8 + offsetof(...)] */ - offsetof(struct bpf_array, prog)); + offsetof(struct bpf_array, ptrs)); EMIT3(0x48, 0x8B, 0x00); /* mov rax, qword ptr [rax] */ /* if (prog == NULL) diff --git a/include/linux/bpf.h b/include/linux/bpf.h index 139d6d2e123f..d495211d63d1 100644 --- a/include/linux/bpf.h +++ b/include/linux/bpf.h @@ -24,6 +24,10 @@ struct bpf_map_ops { void *(*map_lookup_elem)(struct bpf_map *map, void *key); int (*map_update_elem)(struct bpf_map *map, void *key, void *value, u64 flags); int (*map_delete_elem)(struct bpf_map *map, void *key); + + /* funcs called by prog_array and perf_event_array map */ + void *(*map_fd_get_ptr) (struct bpf_map *map, int fd); + void (*map_fd_put_ptr) (void *ptr); }; struct bpf_map { @@ -142,13 +146,13 @@ struct bpf_array { bool owner_jited; union { char value[0] __aligned(8); - struct bpf_prog *prog[0] __aligned(8); + void *ptrs[0] __aligned(8); }; }; #define MAX_TAIL_CALL_CNT 32 u64 bpf_tail_call(u64 ctx, u64 r2, u64 index, u64 r4, u64 r5); -void bpf_prog_array_map_clear(struct bpf_map *map); +void bpf_fd_array_map_clear(struct bpf_map *map); bool bpf_prog_array_compatible(struct bpf_array *array, const struct bpf_prog *fp); const struct bpf_func_proto *bpf_get_trace_printk_proto(void); diff --git a/kernel/bpf/arraymap.c b/kernel/bpf/arraymap.c index cb31229a6fa4..45df6572ecfd 100644 --- a/kernel/bpf/arraymap.c +++ b/kernel/bpf/arraymap.c @@ -150,15 +150,15 @@ static int __init register_array_map(void) } late_initcall(register_array_map); -static struct bpf_map *prog_array_map_alloc(union bpf_attr *attr) +static struct bpf_map *fd_array_map_alloc(union bpf_attr *attr) { - /* only bpf_prog file descriptors can be stored in prog_array map */ + /* only file descriptors can be stored in this type of map */ if (attr->value_size != sizeof(u32)) return ERR_PTR(-EINVAL); return array_map_alloc(attr); } -static void prog_array_map_free(struct bpf_map *map) +static void fd_array_map_free(struct bpf_map *map) { struct bpf_array *array = container_of(map, struct bpf_array, map); int i; @@ -167,21 +167,21 @@ static void prog_array_map_free(struct bpf_map *map) /* make sure it's empty */ for (i = 0; i < array->map.max_entries; i++) - BUG_ON(array->prog[i] != NULL); + BUG_ON(array->ptrs[i] != NULL); kvfree(array); } -static void *prog_array_map_lookup_elem(struct bpf_map *map, void *key) +static void *fd_array_map_lookup_elem(struct bpf_map *map, void *key) { return NULL; } /* only called from syscall */ -static int prog_array_map_update_elem(struct bpf_map *map, void *key, - void *value, u64 map_flags) +static int fd_array_map_update_elem(struct bpf_map *map, void *key, + void *value, u64 map_flags) { struct bpf_array *array = container_of(map, struct bpf_array, map); - struct bpf_prog *prog, *old_prog; + void *new_ptr, *old_ptr; u32 index = *(u32 *)key, ufd; if (map_flags != BPF_ANY) @@ -191,57 +191,75 @@ static int prog_array_map_update_elem(struct bpf_map *map, void *key, return -E2BIG; ufd = *(u32 *)value; - prog = bpf_prog_get(ufd); - if (IS_ERR(prog)) - return PTR_ERR(prog); - - if (!bpf_prog_array_compatible(array, prog)) { - bpf_prog_put(prog); - return -EINVAL; - } + new_ptr = map->ops->map_fd_get_ptr(map, ufd); + if (IS_ERR(new_ptr)) + return PTR_ERR(new_ptr); - old_prog = xchg(array->prog + index, prog); - if (old_prog) - bpf_prog_put_rcu(old_prog); + old_ptr = xchg(array->ptrs + index, new_ptr); + if (old_ptr) + map->ops->map_fd_put_ptr(old_ptr); return 0; } -static int prog_array_map_delete_elem(struct bpf_map *map, void *key) +static int fd_array_map_delete_elem(struct bpf_map *map, void *key) { struct bpf_array *array = container_of(map, struct bpf_array, map); - struct bpf_prog *old_prog; + void *old_ptr; u32 index = *(u32 *)key; if (index >= array->map.max_entries) return -E2BIG; - old_prog = xchg(array->prog + index, NULL); - if (old_prog) { - bpf_prog_put_rcu(old_prog); + old_ptr = xchg(array->ptrs + index, NULL); + if (old_ptr) { + map->ops->map_fd_put_ptr(old_ptr); return 0; } else { return -ENOENT; } } +static void *prog_fd_array_get_ptr(struct bpf_map *map, int fd) +{ + struct bpf_array *array = container_of(map, struct bpf_array, map); + struct bpf_prog *prog = bpf_prog_get(fd); + if (IS_ERR(prog)) + return prog; + + if (!bpf_prog_array_compatible(array, prog)) { + bpf_prog_put(prog); + return ERR_PTR(-EINVAL); + } + return prog; +} + +static void prog_fd_array_put_ptr(void *ptr) +{ + struct bpf_prog *prog = ptr; + + bpf_prog_put_rcu(prog); +} + /* decrement refcnt of all bpf_progs that are stored in this map */ -void bpf_prog_array_map_clear(struct bpf_map *map) +void bpf_fd_array_map_clear(struct bpf_map *map) { struct bpf_array *array = container_of(map, struct bpf_array, map); int i; for (i = 0; i < array->map.max_entries; i++) - prog_array_map_delete_elem(map, &i); + fd_array_map_delete_elem(map, &i); } static const struct bpf_map_ops prog_array_ops = { - .map_alloc = prog_array_map_alloc, - .map_free = prog_array_map_free, + .map_alloc = fd_array_map_alloc, + .map_free = fd_array_map_free, .map_get_next_key = array_map_get_next_key, - .map_lookup_elem = prog_array_map_lookup_elem, - .map_update_elem = prog_array_map_update_elem, - .map_delete_elem = prog_array_map_delete_elem, + .map_lookup_elem = fd_array_map_lookup_elem, + .map_update_elem = fd_array_map_update_elem, + .map_delete_elem = fd_array_map_delete_elem, + .map_fd_get_ptr = prog_fd_array_get_ptr, + .map_fd_put_ptr = prog_fd_array_put_ptr, }; static struct bpf_map_type_list prog_array_type __read_mostly = { diff --git a/kernel/bpf/core.c b/kernel/bpf/core.c index fafa74161445..67c380cfa9ca 100644 --- a/kernel/bpf/core.c +++ b/kernel/bpf/core.c @@ -450,7 +450,7 @@ select_insn: tail_call_cnt++; - prog = READ_ONCE(array->prog[index]); + prog = READ_ONCE(array->ptrs[index]); if (unlikely(!prog)) goto out; diff --git a/kernel/bpf/syscall.c b/kernel/bpf/syscall.c index a1b14d197a4f..dc9b464fefa9 100644 --- a/kernel/bpf/syscall.c +++ b/kernel/bpf/syscall.c @@ -72,7 +72,7 @@ static int bpf_map_release(struct inode *inode, struct file *filp) /* prog_array stores refcnt-ed bpf_prog pointers * release them all when user space closes prog_array_fd */ - bpf_prog_array_map_clear(map); + bpf_fd_array_map_clear(map); bpf_map_put(map); return 0; -- cgit v1.2.3-70-g09d2 From ea317b267e9d03a8241893aa176fba7661d07579 Mon Sep 17 00:00:00 2001 From: Kaixu Xia Date: Thu, 6 Aug 2015 07:02:34 +0000 Subject: bpf: Add new bpf map type to store the pointer to struct perf_event Introduce a new bpf map type 'BPF_MAP_TYPE_PERF_EVENT_ARRAY'. This map only stores the pointer to struct perf_event. The user space event FDs from perf_event_open() syscall are converted to the pointer to struct perf_event and stored in map. Signed-off-by: Kaixu Xia Signed-off-by: David S. Miller --- include/linux/bpf.h | 1 + include/uapi/linux/bpf.h | 1 + kernel/bpf/arraymap.c | 57 ++++++++++++++++++++++++++++++++++++++++++++++++ 3 files changed, 59 insertions(+) (limited to 'include/linux') diff --git a/include/linux/bpf.h b/include/linux/bpf.h index d495211d63d1..4fc1f4070789 100644 --- a/include/linux/bpf.h +++ b/include/linux/bpf.h @@ -10,6 +10,7 @@ #include #include #include +#include struct bpf_map; diff --git a/include/uapi/linux/bpf.h b/include/uapi/linux/bpf.h index 2ce13c109b00..a1814e8e53a7 100644 --- a/include/uapi/linux/bpf.h +++ b/include/uapi/linux/bpf.h @@ -114,6 +114,7 @@ enum bpf_map_type { BPF_MAP_TYPE_HASH, BPF_MAP_TYPE_ARRAY, BPF_MAP_TYPE_PROG_ARRAY, + BPF_MAP_TYPE_PERF_EVENT_ARRAY, }; enum bpf_prog_type { diff --git a/kernel/bpf/arraymap.c b/kernel/bpf/arraymap.c index 45df6572ecfd..29ace107f236 100644 --- a/kernel/bpf/arraymap.c +++ b/kernel/bpf/arraymap.c @@ -273,3 +273,60 @@ static int __init register_prog_array_map(void) return 0; } late_initcall(register_prog_array_map); + +static void perf_event_array_map_free(struct bpf_map *map) +{ + bpf_fd_array_map_clear(map); + fd_array_map_free(map); +} + +static void *perf_event_fd_array_get_ptr(struct bpf_map *map, int fd) +{ + struct perf_event *event; + const struct perf_event_attr *attr; + + event = perf_event_get(fd); + if (IS_ERR(event)) + return event; + + attr = perf_event_attrs(event); + if (IS_ERR(attr)) + return (void *)attr; + + if (attr->type != PERF_TYPE_RAW && + attr->type != PERF_TYPE_HARDWARE) { + perf_event_release_kernel(event); + return ERR_PTR(-EINVAL); + } + return event; +} + +static void perf_event_fd_array_put_ptr(void *ptr) +{ + struct perf_event *event = ptr; + + perf_event_release_kernel(event); +} + +static const struct bpf_map_ops perf_event_array_ops = { + .map_alloc = fd_array_map_alloc, + .map_free = perf_event_array_map_free, + .map_get_next_key = array_map_get_next_key, + .map_lookup_elem = fd_array_map_lookup_elem, + .map_update_elem = fd_array_map_update_elem, + .map_delete_elem = fd_array_map_delete_elem, + .map_fd_get_ptr = perf_event_fd_array_get_ptr, + .map_fd_put_ptr = perf_event_fd_array_put_ptr, +}; + +static struct bpf_map_type_list perf_event_array_type __read_mostly = { + .ops = &perf_event_array_ops, + .type = BPF_MAP_TYPE_PERF_EVENT_ARRAY, +}; + +static int __init register_perf_event_array_map(void) +{ + bpf_register_map_type(&perf_event_array_type); + return 0; +} +late_initcall(register_perf_event_array_map); -- cgit v1.2.3-70-g09d2 From 35578d7984003097af2b1e34502bc943d40c1804 Mon Sep 17 00:00:00 2001 From: Kaixu Xia Date: Thu, 6 Aug 2015 07:02:35 +0000 Subject: bpf: Implement function bpf_perf_event_read() that get the selected hardware PMU conuter According to the perf_event_map_fd and index, the function bpf_perf_event_read() can convert the corresponding map value to the pointer to struct perf_event and return the Hardware PMU counter value. Signed-off-by: Kaixu Xia Signed-off-by: David S. Miller --- include/linux/bpf.h | 1 + include/uapi/linux/bpf.h | 1 + kernel/bpf/verifier.c | 48 +++++++++++++++++++++++++++++++++--------------- kernel/trace/bpf_trace.c | 31 +++++++++++++++++++++++++++++++ 4 files changed, 66 insertions(+), 15 deletions(-) (limited to 'include/linux') diff --git a/include/linux/bpf.h b/include/linux/bpf.h index 4fc1f4070789..f57d7fed9ec3 100644 --- a/include/linux/bpf.h +++ b/include/linux/bpf.h @@ -190,6 +190,7 @@ extern const struct bpf_func_proto bpf_map_lookup_elem_proto; extern const struct bpf_func_proto bpf_map_update_elem_proto; extern const struct bpf_func_proto bpf_map_delete_elem_proto; +extern const struct bpf_func_proto bpf_perf_event_read_proto; extern const struct bpf_func_proto bpf_get_prandom_u32_proto; extern const struct bpf_func_proto bpf_get_smp_processor_id_proto; extern const struct bpf_func_proto bpf_tail_call_proto; diff --git a/include/uapi/linux/bpf.h b/include/uapi/linux/bpf.h index a1814e8e53a7..92a48e2d5461 100644 --- a/include/uapi/linux/bpf.h +++ b/include/uapi/linux/bpf.h @@ -271,6 +271,7 @@ enum bpf_func_id { */ BPF_FUNC_skb_get_tunnel_key, BPF_FUNC_skb_set_tunnel_key, + BPF_FUNC_perf_event_read, /* u64 bpf_perf_event_read(&map, index) */ __BPF_FUNC_MAX_ID, }; diff --git a/kernel/bpf/verifier.c b/kernel/bpf/verifier.c index cd307df98cb3..48e1c7192560 100644 --- a/kernel/bpf/verifier.c +++ b/kernel/bpf/verifier.c @@ -238,6 +238,14 @@ static const char * const reg_type_str[] = { [CONST_IMM] = "imm", }; +static const struct { + int map_type; + int func_id; +} func_limit[] = { + {BPF_MAP_TYPE_PROG_ARRAY, BPF_FUNC_tail_call}, + {BPF_MAP_TYPE_PERF_EVENT_ARRAY, BPF_FUNC_perf_event_read}, +}; + static void print_verifier_state(struct verifier_env *env) { enum bpf_reg_type t; @@ -837,6 +845,28 @@ static int check_func_arg(struct verifier_env *env, u32 regno, return err; } +static int check_map_func_compatibility(struct bpf_map *map, int func_id) +{ + bool bool_map, bool_func; + int i; + + if (!map) + return 0; + + for (i = 0; i <= ARRAY_SIZE(func_limit); i++) { + bool_map = (map->map_type == func_limit[i].map_type); + bool_func = (func_id == func_limit[i].func_id); + /* only when map & func pair match it can continue. + * don't allow any other map type to be passed into + * the special func; + */ + if (bool_map != bool_func) + return -EINVAL; + } + + return 0; +} + static int check_call(struct verifier_env *env, int func_id) { struct verifier_state *state = &env->cur_state; @@ -912,21 +942,9 @@ static int check_call(struct verifier_env *env, int func_id) return -EINVAL; } - if (map && map->map_type == BPF_MAP_TYPE_PROG_ARRAY && - func_id != BPF_FUNC_tail_call) - /* prog_array map type needs extra care: - * only allow to pass it into bpf_tail_call() for now. - * bpf_map_delete_elem() can be allowed in the future, - * while bpf_map_update_elem() must only be done via syscall - */ - return -EINVAL; - - if (func_id == BPF_FUNC_tail_call && - map->map_type != BPF_MAP_TYPE_PROG_ARRAY) - /* don't allow any other map type to be passed into - * bpf_tail_call() - */ - return -EINVAL; + err = check_map_func_compatibility(map, func_id); + if (err) + return err; return 0; } diff --git a/kernel/trace/bpf_trace.c b/kernel/trace/bpf_trace.c index 88a041adee90..ef9936df1b04 100644 --- a/kernel/trace/bpf_trace.c +++ b/kernel/trace/bpf_trace.c @@ -158,6 +158,35 @@ const struct bpf_func_proto *bpf_get_trace_printk_proto(void) return &bpf_trace_printk_proto; } +static u64 bpf_perf_event_read(u64 r1, u64 index, u64 r3, u64 r4, u64 r5) +{ + struct bpf_map *map = (struct bpf_map *) (unsigned long) r1; + struct bpf_array *array = container_of(map, struct bpf_array, map); + struct perf_event *event; + + if (unlikely(index >= array->map.max_entries)) + return -E2BIG; + + event = (struct perf_event *)array->ptrs[index]; + if (!event) + return -ENOENT; + + /* + * we don't know if the function is run successfully by the + * return value. It can be judged in other places, such as + * eBPF programs. + */ + return perf_event_read_local(event); +} + +const struct bpf_func_proto bpf_perf_event_read_proto = { + .func = bpf_perf_event_read, + .gpl_only = false, + .ret_type = RET_INTEGER, + .arg1_type = ARG_CONST_MAP_PTR, + .arg2_type = ARG_ANYTHING, +}; + static const struct bpf_func_proto *kprobe_prog_func_proto(enum bpf_func_id func_id) { switch (func_id) { @@ -183,6 +212,8 @@ static const struct bpf_func_proto *kprobe_prog_func_proto(enum bpf_func_id func return bpf_get_trace_printk_proto(); case BPF_FUNC_get_smp_processor_id: return &bpf_get_smp_processor_id_proto; + case BPF_FUNC_perf_event_read: + return &bpf_perf_event_read_proto; default: return NULL; } -- cgit v1.2.3-70-g09d2 From 48e9743dd6483c5fd3f10c8e42c60d52d64b0e27 Mon Sep 17 00:00:00 2001 From: Vladimir Zapolskiy Date: Mon, 27 Jul 2015 17:30:50 +0300 Subject: i2c: core: add and export of_get_i2c_adapter_by_node() interface of_find_i2c_adapter_by_node() call requires quite often missing put_device(), and i2c_put_adapter() releases a device locked by i2c_get_adapter() only. In general module_put(adapter->owner) and put_device(dev) are not interchangeable. This is a common error reproduction scenario as a result of the misusage described above (for clearness this is run on iMX6 platform with HDMI and I2C bus drivers compiled as kernel modules): root@mx6q:~# lsmod | grep i2c i2c_imx 10213 0 root@mx6q:~# lsmod | grep dw_hdmi_imx dw_hdmi_imx 3631 0 dw_hdmi 11846 1 dw_hdmi_imx imxdrm 8674 3 dw_hdmi_imx,imx_ipuv3_crtc,imx_ldb drm_kms_helper 113765 5 dw_hdmi,imxdrm,imx_ipuv3_crtc,imx_ldb root@mx6q:~# rmmod dw_hdmi_imx root@mx6q:~# lsmod | grep i2c i2c_imx 10213 -1 ^^^^^ root@mx6q:~# rmmod i2c_imx rmmod: ERROR: Module i2c_imx is in use To fix existing users of these interfaces and to avoid any further confusion and misusage in future, add one more interface of_get_i2c_adapter_by_node(), it is similar to i2c_get_adapter() in sense that an I2C bus device driver found and locked by user can be correctly unlocked by i2c_put_adapter(). Signed-off-by: Vladimir Zapolskiy Signed-off-by: Wolfram Sang --- drivers/i2c/i2c-core.c | 18 ++++++++++++++++++ include/linux/i2c.h | 7 +++++++ 2 files changed, 25 insertions(+) (limited to 'include/linux') diff --git a/drivers/i2c/i2c-core.c b/drivers/i2c/i2c-core.c index f80992d0a608..07a83f34ed58 100644 --- a/drivers/i2c/i2c-core.c +++ b/drivers/i2c/i2c-core.c @@ -1371,6 +1371,24 @@ struct i2c_adapter *of_find_i2c_adapter_by_node(struct device_node *node) return adapter; } EXPORT_SYMBOL(of_find_i2c_adapter_by_node); + +/* must call i2c_put_adapter() when done with returned i2c_adapter device */ +struct i2c_adapter *of_get_i2c_adapter_by_node(struct device_node *node) +{ + struct i2c_adapter *adapter; + + adapter = of_find_i2c_adapter_by_node(node); + if (!adapter) + return NULL; + + if (!try_module_get(adapter->owner)) { + put_device(&adapter->dev); + adapter = NULL; + } + + return adapter; +} +EXPORT_SYMBOL(of_get_i2c_adapter_by_node); #else static void of_i2c_register_devices(struct i2c_adapter *adap) { } #endif /* CONFIG_OF */ diff --git a/include/linux/i2c.h b/include/linux/i2c.h index e83a738a3b87..e2c859b74f8b 100644 --- a/include/linux/i2c.h +++ b/include/linux/i2c.h @@ -638,6 +638,8 @@ extern struct i2c_client *of_find_i2c_device_by_node(struct device_node *node); /* must call put_device() when done with returned i2c_adapter device */ extern struct i2c_adapter *of_find_i2c_adapter_by_node(struct device_node *node); +/* must call i2c_put_adapter() when done with returned i2c_adapter device */ +struct i2c_adapter *of_get_i2c_adapter_by_node(struct device_node *node); #else static inline struct i2c_client *of_find_i2c_device_by_node(struct device_node *node) @@ -649,6 +651,11 @@ static inline struct i2c_adapter *of_find_i2c_adapter_by_node(struct device_node { return NULL; } + +static inline struct i2c_adapter *of_get_i2c_adapter_by_node(struct device_node *node) +{ + return NULL; +} #endif /* CONFIG_OF */ #endif /* _LINUX_I2C_H */ -- cgit v1.2.3-70-g09d2 From 0cc67945ea5933d53db69606312cf52f553d1b81 Mon Sep 17 00:00:00 2001 From: Sudeep Holla Date: Fri, 31 Jul 2015 11:48:05 +0100 Subject: mailbox: switch to hrtimer for tx_complete polling The mailbox core uses jiffy based timer to handle polling for the transmit completion. If the client/protocol have/support notification of the last packet transmit completion via ACK packet, then we tick the Tx state machine immediately in the callback. However if the client doesn't support that mechanism we might end-up waiting for atleast a jiffy even though the remote is ready to receive the next request. This patch switches the timer used for that polling from jiffy-based to hrtimer-based so that we can support polling at much higher time resolution. Reported-and-suggested-by: Juri Lelli Signed-off-by: Sudeep Holla Signed-off-by: Jassi Brar --- drivers/mailbox/mailbox.c | 27 +++++++++++++++------------ include/linux/mailbox_controller.h | 7 ++++--- 2 files changed, 19 insertions(+), 15 deletions(-) (limited to 'include/linux') diff --git a/drivers/mailbox/mailbox.c b/drivers/mailbox/mailbox.c index c7fdb57fd166..6a4811f85705 100644 --- a/drivers/mailbox/mailbox.c +++ b/drivers/mailbox/mailbox.c @@ -26,8 +26,6 @@ static LIST_HEAD(mbox_cons); static DEFINE_MUTEX(con_mutex); -static void poll_txdone(unsigned long data); - static int add_to_rbuf(struct mbox_chan *chan, void *mssg) { int idx; @@ -88,7 +86,9 @@ exit: spin_unlock_irqrestore(&chan->lock, flags); if (!err && (chan->txdone_method & TXDONE_BY_POLL)) - poll_txdone((unsigned long)chan->mbox); + /* kick start the timer immediately to avoid delays */ + hrtimer_start(&chan->mbox->poll_hrt, ktime_set(0, 0), + HRTIMER_MODE_REL); } static void tx_tick(struct mbox_chan *chan, int r) @@ -112,9 +112,10 @@ static void tx_tick(struct mbox_chan *chan, int r) complete(&chan->tx_complete); } -static void poll_txdone(unsigned long data) +static enum hrtimer_restart txdone_hrtimer(struct hrtimer *hrtimer) { - struct mbox_controller *mbox = (struct mbox_controller *)data; + struct mbox_controller *mbox = + container_of(hrtimer, struct mbox_controller, poll_hrt); bool txdone, resched = false; int i; @@ -130,9 +131,11 @@ static void poll_txdone(unsigned long data) } } - if (resched) - mod_timer(&mbox->poll, jiffies + - msecs_to_jiffies(mbox->txpoll_period)); + if (resched) { + hrtimer_forward_now(hrtimer, ms_to_ktime(mbox->txpoll_period)); + return HRTIMER_RESTART; + } + return HRTIMER_NORESTART; } /** @@ -451,9 +454,9 @@ int mbox_controller_register(struct mbox_controller *mbox) txdone = TXDONE_BY_ACK; if (txdone == TXDONE_BY_POLL) { - mbox->poll.function = &poll_txdone; - mbox->poll.data = (unsigned long)mbox; - init_timer(&mbox->poll); + hrtimer_init(&mbox->poll_hrt, CLOCK_MONOTONIC, + HRTIMER_MODE_REL); + mbox->poll_hrt.function = txdone_hrtimer; } for (i = 0; i < mbox->num_chans; i++) { @@ -495,7 +498,7 @@ void mbox_controller_unregister(struct mbox_controller *mbox) mbox_free_channel(&mbox->chans[i]); if (mbox->txdone_poll) - del_timer_sync(&mbox->poll); + hrtimer_cancel(&mbox->poll_hrt); mutex_unlock(&con_mutex); } diff --git a/include/linux/mailbox_controller.h b/include/linux/mailbox_controller.h index 68c42454439b..74deadb42d76 100644 --- a/include/linux/mailbox_controller.h +++ b/include/linux/mailbox_controller.h @@ -9,7 +9,7 @@ #include #include -#include +#include #include #include @@ -67,7 +67,8 @@ struct mbox_chan_ops { * @txpoll_period: If 'txdone_poll' is in effect, the API polls for * last TX's status after these many millisecs * @of_xlate: Controller driver specific mapping of channel via DT - * @poll: API private. Used to poll for TXDONE on all channels. + * @poll_hrt: API private. hrtimer used to poll for TXDONE on all + * channels. * @node: API private. To hook into list of controllers. */ struct mbox_controller { @@ -81,7 +82,7 @@ struct mbox_controller { struct mbox_chan *(*of_xlate)(struct mbox_controller *mbox, const struct of_phandle_args *sp); /* Internal to API */ - struct timer_list poll; + struct hrtimer poll_hrt; struct list_head node; }; -- cgit v1.2.3-70-g09d2 From 92b7cb5dc885b38b21093eefed8028b615952965 Mon Sep 17 00:00:00 2001 From: Roger Quadros Date: Mon, 3 Aug 2015 17:40:51 +0300 Subject: extcon: palmas: Support GPIO based USB ID detection Some palmas based chip variants do not have OTG based ID logic. For these variants we rely on GPIO based USB ID detection. These chips do have VBUS comparator for VBUS detection so we continue to use the old way of detecting VBUS. Signed-off-by: Roger Quadros Acked-by: Chanwoo Choi Acked-by: Lee Jones Signed-off-by: Chanwoo Choi --- .../devicetree/bindings/extcon/extcon-palmas.txt | 5 +- drivers/extcon/extcon-palmas.c | 129 ++++++++++++++++++--- drivers/extcon/extcon-usb-gpio.c | 1 + include/linux/mfd/palmas.h | 7 ++ 4 files changed, 126 insertions(+), 16 deletions(-) (limited to 'include/linux') diff --git a/Documentation/devicetree/bindings/extcon/extcon-palmas.txt b/Documentation/devicetree/bindings/extcon/extcon-palmas.txt index 45414bbcd945..f61d5af44a27 100644 --- a/Documentation/devicetree/bindings/extcon/extcon-palmas.txt +++ b/Documentation/devicetree/bindings/extcon/extcon-palmas.txt @@ -10,8 +10,11 @@ Required Properties: Optional Properties: - ti,wakeup : To enable the wakeup comparator in probe - - ti,enable-id-detection: Perform ID detection. + - ti,enable-id-detection: Perform ID detection. If id-gpio is specified + it performs id-detection using GPIO else using OTG core. - ti,enable-vbus-detection: Perform VBUS detection. + - id-gpio: gpio for GPIO ID detection. See gpio binding. + - debounce-delay-ms: debounce delay for GPIO ID pin in milliseconds. palmas-usb { compatible = "ti,twl6035-usb", "ti,palmas-usb"; diff --git a/drivers/extcon/extcon-palmas.c b/drivers/extcon/extcon-palmas.c index 8933e7e0d9da..662e91778cb0 100644 --- a/drivers/extcon/extcon-palmas.c +++ b/drivers/extcon/extcon-palmas.c @@ -28,6 +28,10 @@ #include #include #include +#include +#include + +#define USB_GPIO_DEBOUNCE_MS 20 /* ms */ static const unsigned int palmas_extcon_cable[] = { EXTCON_USB, @@ -118,19 +122,54 @@ static irqreturn_t palmas_id_irq_handler(int irq, void *_palmas_usb) return IRQ_HANDLED; } +static void palmas_gpio_id_detect(struct work_struct *work) +{ + int id; + struct palmas_usb *palmas_usb = container_of(to_delayed_work(work), + struct palmas_usb, + wq_detectid); + struct extcon_dev *edev = palmas_usb->edev; + + if (!palmas_usb->id_gpiod) + return; + + id = gpiod_get_value_cansleep(palmas_usb->id_gpiod); + + if (id) { + extcon_set_cable_state_(edev, EXTCON_USB_HOST, false); + dev_info(palmas_usb->dev, "USB-HOST cable is detached\n"); + } else { + extcon_set_cable_state_(edev, EXTCON_USB_HOST, true); + dev_info(palmas_usb->dev, "USB-HOST cable is attached\n"); + } +} + +static irqreturn_t palmas_gpio_id_irq_handler(int irq, void *_palmas_usb) +{ + struct palmas_usb *palmas_usb = _palmas_usb; + + queue_delayed_work(system_power_efficient_wq, &palmas_usb->wq_detectid, + palmas_usb->sw_debounce_jiffies); + + return IRQ_HANDLED; +} + static void palmas_enable_irq(struct palmas_usb *palmas_usb) { palmas_write(palmas_usb->palmas, PALMAS_USB_OTG_BASE, PALMAS_USB_VBUS_CTRL_SET, PALMAS_USB_VBUS_CTRL_SET_VBUS_ACT_COMP); - palmas_write(palmas_usb->palmas, PALMAS_USB_OTG_BASE, - PALMAS_USB_ID_CTRL_SET, PALMAS_USB_ID_CTRL_SET_ID_ACT_COMP); + if (palmas_usb->enable_id_detection) { + palmas_write(palmas_usb->palmas, PALMAS_USB_OTG_BASE, + PALMAS_USB_ID_CTRL_SET, + PALMAS_USB_ID_CTRL_SET_ID_ACT_COMP); - palmas_write(palmas_usb->palmas, PALMAS_USB_OTG_BASE, - PALMAS_USB_ID_INT_EN_HI_SET, - PALMAS_USB_ID_INT_EN_HI_SET_ID_GND | - PALMAS_USB_ID_INT_EN_HI_SET_ID_FLOAT); + palmas_write(palmas_usb->palmas, PALMAS_USB_OTG_BASE, + PALMAS_USB_ID_INT_EN_HI_SET, + PALMAS_USB_ID_INT_EN_HI_SET_ID_GND | + PALMAS_USB_ID_INT_EN_HI_SET_ID_FLOAT); + } if (palmas_usb->enable_vbus_detection) palmas_vbus_irq_handler(palmas_usb->vbus_irq, palmas_usb); @@ -169,20 +208,36 @@ static int palmas_usb_probe(struct platform_device *pdev) palmas_usb->wakeup = pdata->wakeup; } + palmas_usb->id_gpiod = devm_gpiod_get_optional(&pdev->dev, "id"); + if (IS_ERR(palmas_usb->id_gpiod)) { + dev_err(&pdev->dev, "failed to get id gpio\n"); + return PTR_ERR(palmas_usb->id_gpiod); + } + + if (palmas_usb->enable_id_detection && palmas_usb->id_gpiod) { + palmas_usb->enable_id_detection = false; + palmas_usb->enable_gpio_id_detection = true; + } + + if (palmas_usb->enable_gpio_id_detection) { + u32 debounce; + + if (of_property_read_u32(node, "debounce-delay-ms", &debounce)) + debounce = USB_GPIO_DEBOUNCE_MS; + + status = gpiod_set_debounce(palmas_usb->id_gpiod, + debounce * 1000); + if (status < 0) + palmas_usb->sw_debounce_jiffies = msecs_to_jiffies(debounce); + } + + INIT_DELAYED_WORK(&palmas_usb->wq_detectid, palmas_gpio_id_detect); + palmas->usb = palmas_usb; palmas_usb->palmas = palmas; palmas_usb->dev = &pdev->dev; - palmas_usb->id_otg_irq = regmap_irq_get_virq(palmas->irq_data, - PALMAS_ID_OTG_IRQ); - palmas_usb->id_irq = regmap_irq_get_virq(palmas->irq_data, - PALMAS_ID_IRQ); - palmas_usb->vbus_otg_irq = regmap_irq_get_virq(palmas->irq_data, - PALMAS_VBUS_OTG_IRQ); - palmas_usb->vbus_irq = regmap_irq_get_virq(palmas->irq_data, - PALMAS_VBUS_IRQ); - palmas_usb_wakeup(palmas, palmas_usb->wakeup); platform_set_drvdata(pdev, palmas_usb); @@ -201,6 +256,10 @@ static int palmas_usb_probe(struct platform_device *pdev) } if (palmas_usb->enable_id_detection) { + palmas_usb->id_otg_irq = regmap_irq_get_virq(palmas->irq_data, + PALMAS_ID_OTG_IRQ); + palmas_usb->id_irq = regmap_irq_get_virq(palmas->irq_data, + PALMAS_ID_IRQ); status = devm_request_threaded_irq(palmas_usb->dev, palmas_usb->id_irq, NULL, palmas_id_irq_handler, @@ -212,9 +271,33 @@ static int palmas_usb_probe(struct platform_device *pdev) palmas_usb->id_irq, status); return status; } + } else if (palmas_usb->enable_gpio_id_detection) { + palmas_usb->gpio_id_irq = gpiod_to_irq(palmas_usb->id_gpiod); + if (palmas_usb->gpio_id_irq < 0) { + dev_err(&pdev->dev, "failed to get id irq\n"); + return palmas_usb->gpio_id_irq; + } + status = devm_request_threaded_irq(&pdev->dev, + palmas_usb->gpio_id_irq, + NULL, + palmas_gpio_id_irq_handler, + IRQF_TRIGGER_RISING | + IRQF_TRIGGER_FALLING | + IRQF_ONESHOT, + "palmas_usb_id", + palmas_usb); + if (status < 0) { + dev_err(&pdev->dev, + "failed to request handler for id irq\n"); + return status; + } } if (palmas_usb->enable_vbus_detection) { + palmas_usb->vbus_otg_irq = regmap_irq_get_virq(palmas->irq_data, + PALMAS_VBUS_OTG_IRQ); + palmas_usb->vbus_irq = regmap_irq_get_virq(palmas->irq_data, + PALMAS_VBUS_IRQ); status = devm_request_threaded_irq(palmas_usb->dev, palmas_usb->vbus_irq, NULL, palmas_vbus_irq_handler, @@ -229,10 +312,21 @@ static int palmas_usb_probe(struct platform_device *pdev) } palmas_enable_irq(palmas_usb); + /* perform initial detection */ + palmas_gpio_id_detect(&palmas_usb->wq_detectid.work); device_set_wakeup_capable(&pdev->dev, true); return 0; } +static int palmas_usb_remove(struct platform_device *pdev) +{ + struct palmas_usb *palmas_usb = platform_get_drvdata(pdev); + + cancel_delayed_work_sync(&palmas_usb->wq_detectid); + + return 0; +} + #ifdef CONFIG_PM_SLEEP static int palmas_usb_suspend(struct device *dev) { @@ -243,6 +337,8 @@ static int palmas_usb_suspend(struct device *dev) enable_irq_wake(palmas_usb->vbus_irq); if (palmas_usb->enable_id_detection) enable_irq_wake(palmas_usb->id_irq); + if (palmas_usb->enable_gpio_id_detection) + enable_irq_wake(palmas_usb->gpio_id_irq); } return 0; } @@ -256,6 +352,8 @@ static int palmas_usb_resume(struct device *dev) disable_irq_wake(palmas_usb->vbus_irq); if (palmas_usb->enable_id_detection) disable_irq_wake(palmas_usb->id_irq); + if (palmas_usb->enable_gpio_id_detection) + disable_irq_wake(palmas_usb->gpio_id_irq); } return 0; }; @@ -273,6 +371,7 @@ static const struct of_device_id of_palmas_match_tbl[] = { static struct platform_driver palmas_usb_driver = { .probe = palmas_usb_probe, + .remove = palmas_usb_remove, .driver = { .name = "palmas-usb", .of_match_table = of_palmas_match_tbl, diff --git a/drivers/extcon/extcon-usb-gpio.c b/drivers/extcon/extcon-usb-gpio.c index a2a44536a608..2b2fecffb1ad 100644 --- a/drivers/extcon/extcon-usb-gpio.c +++ b/drivers/extcon/extcon-usb-gpio.c @@ -15,6 +15,7 @@ */ #include +#include #include #include #include diff --git a/include/linux/mfd/palmas.h b/include/linux/mfd/palmas.h index bb270bd03eed..13e1d96935ed 100644 --- a/include/linux/mfd/palmas.h +++ b/include/linux/mfd/palmas.h @@ -21,6 +21,7 @@ #include #include #include +#include #include #define PALMAS_NUM_CLIENTS 3 @@ -551,10 +552,16 @@ struct palmas_usb { int vbus_otg_irq; int vbus_irq; + int gpio_id_irq; + struct gpio_desc *id_gpiod; + unsigned long sw_debounce_jiffies; + struct delayed_work wq_detectid; + enum palmas_usb_state linkstat; int wakeup; bool enable_vbus_detection; bool enable_id_detection; + bool enable_gpio_id_detection; }; #define comparator_to_palmas(x) container_of((x), struct palmas_usb, comparator) -- cgit v1.2.3-70-g09d2 From cc9a903d915c21626b6b2fbf8ed0ff16a7f82210 Mon Sep 17 00:00:00 2001 From: Chuck Lever Date: Fri, 7 Aug 2015 16:55:46 -0400 Subject: svcrdma: Change maximum server payload back to RPCSVC_MAXPAYLOAD Both commit 0380a3f375 ("svcrdma: Add a separate "max data segs" macro for svcrdma") and commit 7e5be28827bf ("svcrdma: advertise the correct max payload") are incorrect. This commit reverts both changes, restoring the server's maximum payload size to 1MB. Commit 7e5be28827bf based the server's maximum payload on the _client's_ RPCRDMA_MAX_DATA_SEGS value. That was wrong. Commit 0380a3f375 tried to fix this so that the client maximum payload size could be raised without affecting the server, but managed to confuse matters more on the server side. More importantly, limiting the advertised maximum payload size was meant to be a workaround, not the actual fix. We need to revisit https://bugzilla.linux-nfs.org/show_bug.cgi?id=270 A Linux client on a platform with 64KB pages can overrun and crash an x86_64 NFS/RDMA server when the r/wsize is 1MB. An x86/64 Linux client seems to work fine using 1MB reads and writes when the Linux server's maximum payload size is restored to 1MB. BugLink: https://bugzilla.linux-nfs.org/show_bug.cgi?id=270 Fixes: 0380a3f375 ("svcrdma: Add a separate "max data segs" macro") Signed-off-by: Chuck Lever Signed-off-by: J. Bruce Fields --- include/linux/sunrpc/svc_rdma.h | 9 ++------- net/sunrpc/xprtrdma/svc_rdma_transport.c | 2 +- net/sunrpc/xprtrdma/xprt_rdma.h | 1 - 3 files changed, 3 insertions(+), 9 deletions(-) (limited to 'include/linux') diff --git a/include/linux/sunrpc/svc_rdma.h b/include/linux/sunrpc/svc_rdma.h index 13af61b70417..d5ee6d8b7c58 100644 --- a/include/linux/sunrpc/svc_rdma.h +++ b/include/linux/sunrpc/svc_rdma.h @@ -172,13 +172,6 @@ struct svcxprt_rdma { #define RDMAXPRT_SQ_PENDING 2 #define RDMAXPRT_CONN_PENDING 3 -#define RPCRDMA_MAX_SVC_SEGS (64) /* server max scatter/gather */ -#if RPCSVC_MAXPAYLOAD < (RPCRDMA_MAX_SVC_SEGS << PAGE_SHIFT) -#define RPCRDMA_MAXPAYLOAD RPCSVC_MAXPAYLOAD -#else -#define RPCRDMA_MAXPAYLOAD (RPCRDMA_MAX_SVC_SEGS << PAGE_SHIFT) -#endif - #define RPCRDMA_LISTEN_BACKLOG 10 /* The default ORD value is based on two outstanding full-size writes with a * page size of 4k, or 32k * 2 ops / 4k = 16 outstanding RDMA_READ. */ @@ -187,6 +180,8 @@ struct svcxprt_rdma { #define RPCRDMA_MAX_REQUESTS 32 #define RPCRDMA_MAX_REQ_SIZE 4096 +#define RPCSVC_MAXPAYLOAD_RDMA RPCSVC_MAXPAYLOAD + /* svc_rdma_marshal.c */ extern int svc_rdma_xdr_decode_req(struct rpcrdma_msg **, struct svc_rqst *); extern int svc_rdma_xdr_encode_error(struct svcxprt_rdma *, diff --git a/net/sunrpc/xprtrdma/svc_rdma_transport.c b/net/sunrpc/xprtrdma/svc_rdma_transport.c index 4054a9de6a91..21e40365042c 100644 --- a/net/sunrpc/xprtrdma/svc_rdma_transport.c +++ b/net/sunrpc/xprtrdma/svc_rdma_transport.c @@ -91,7 +91,7 @@ struct svc_xprt_class svc_rdma_class = { .xcl_name = "rdma", .xcl_owner = THIS_MODULE, .xcl_ops = &svc_rdma_ops, - .xcl_max_payload = RPCRDMA_MAXPAYLOAD, + .xcl_max_payload = RPCSVC_MAXPAYLOAD_RDMA, .xcl_ident = XPRT_TRANSPORT_RDMA, }; diff --git a/net/sunrpc/xprtrdma/xprt_rdma.h b/net/sunrpc/xprtrdma/xprt_rdma.h index f49dd8b38122..e718d0959af3 100644 --- a/net/sunrpc/xprtrdma/xprt_rdma.h +++ b/net/sunrpc/xprtrdma/xprt_rdma.h @@ -51,7 +51,6 @@ #include /* rpc_xprt */ #include /* RPC/RDMA protocol */ #include /* xprt parameters */ -#include /* RPCSVC_MAXPAYLOAD */ #define RDMA_RESOLVE_TIMEOUT (5000) /* 5 seconds */ #define RDMA_CONNECT_RETRY_MAX (2) /* retries if no listener backlog */ -- cgit v1.2.3-70-g09d2 From ea126e74353453d15fc0a181910ae1e25162f2a1 Mon Sep 17 00:00:00 2001 From: Jeff Layton Date: Mon, 8 Jun 2015 12:03:32 -0700 Subject: nfsd/sunrpc: add a new svc_serv_ops struct and move sv_shutdown into it In later patches we'll need to abstract out more operations on a per-service level, besides sv_shutdown and sv_function. Declare a new svc_serv_ops struct to hold these operations, and move sv_shutdown into this struct. Signed-off-by: Shirley Ma Acked-by: Jeff Layton Tested-by: Shirley Ma Signed-off-by: J. Bruce Fields --- fs/lockd/svc.c | 6 +++++- fs/nfs/callback.c | 5 ++++- fs/nfsd/nfssvc.c | 6 +++++- include/linux/sunrpc/svc.h | 20 ++++++++++---------- net/sunrpc/svc.c | 18 +++++++++--------- 5 files changed, 33 insertions(+), 22 deletions(-) (limited to 'include/linux') diff --git a/fs/lockd/svc.c b/fs/lockd/svc.c index 55505cbe11af..4182b2f925cd 100644 --- a/fs/lockd/svc.c +++ b/fs/lockd/svc.c @@ -322,6 +322,10 @@ out_rqst: return error; } +static struct svc_serv_ops lockd_sv_ops = { + .svo_shutdown = svc_rpcb_cleanup, +}; + static struct svc_serv *lockd_create_svc(void) { struct svc_serv *serv; @@ -350,7 +354,7 @@ static struct svc_serv *lockd_create_svc(void) nlm_timeout = LOCKD_DFLT_TIMEO; nlmsvc_timeout = nlm_timeout * HZ; - serv = svc_create(&nlmsvc_program, LOCKD_BUFSIZE, svc_rpcb_cleanup); + serv = svc_create(&nlmsvc_program, LOCKD_BUFSIZE, &lockd_sv_ops); if (!serv) { printk(KERN_WARNING "lockd_up: create service failed\n"); return ERR_PTR(-ENOMEM); diff --git a/fs/nfs/callback.c b/fs/nfs/callback.c index 682529c00996..182792d115fc 100644 --- a/fs/nfs/callback.c +++ b/fs/nfs/callback.c @@ -308,6 +308,9 @@ err_bind: return ret; } +static struct svc_serv_ops nfs_cb_sv_ops = { +}; + static struct svc_serv *nfs_callback_create_svc(int minorversion) { struct nfs_callback_data *cb_info = &nfs_callback_info[minorversion]; @@ -333,7 +336,7 @@ static struct svc_serv *nfs_callback_create_svc(int minorversion) printk(KERN_WARNING "nfs_callback_create_svc: no kthread, %d users??\n", cb_info->users); - serv = svc_create(&nfs4_callback_program, NFS4_CALLBACK_BUFSIZE, NULL); + serv = svc_create(&nfs4_callback_program, NFS4_CALLBACK_BUFSIZE, &nfs_cb_sv_ops); if (!serv) { printk(KERN_ERR "nfs_callback_create_svc: create service failed\n"); return ERR_PTR(-ENOMEM); diff --git a/fs/nfsd/nfssvc.c b/fs/nfsd/nfssvc.c index 9277cc91c21b..7311677330b2 100644 --- a/fs/nfsd/nfssvc.c +++ b/fs/nfsd/nfssvc.c @@ -391,6 +391,10 @@ static int nfsd_get_default_max_blksize(void) return ret; } +static struct svc_serv_ops nfsd_sv_ops = { + .svo_shutdown = nfsd_last_thread, +}; + int nfsd_create_serv(struct net *net) { int error; @@ -405,7 +409,7 @@ int nfsd_create_serv(struct net *net) nfsd_max_blksize = nfsd_get_default_max_blksize(); nfsd_reset_versions(); nn->nfsd_serv = svc_create_pooled(&nfsd_program, nfsd_max_blksize, - nfsd_last_thread, nfsd, THIS_MODULE); + &nfsd_sv_ops, nfsd, THIS_MODULE); if (nn->nfsd_serv == NULL) return -ENOMEM; diff --git a/include/linux/sunrpc/svc.h b/include/linux/sunrpc/svc.h index fae6fb947fc8..2e682f636b13 100644 --- a/include/linux/sunrpc/svc.h +++ b/include/linux/sunrpc/svc.h @@ -54,6 +54,13 @@ struct svc_pool { unsigned long sp_flags; } ____cacheline_aligned_in_smp; +struct svc_serv; + +struct svc_serv_ops { + /* Callback to use when last thread exits. */ + void (*svo_shutdown)(struct svc_serv *serv, struct net *net); +}; + /* * RPC service. * @@ -85,13 +92,7 @@ struct svc_serv { unsigned int sv_nrpools; /* number of thread pools */ struct svc_pool * sv_pools; /* array of thread pools */ - - void (*sv_shutdown)(struct svc_serv *serv, - struct net *net); - /* Callback to use when last thread - * exits. - */ - + struct svc_serv_ops *sv_ops; /* server operations */ struct module * sv_module; /* optional module to count when * adding threads */ svc_thread_fn sv_function; /* main function for threads */ @@ -429,13 +430,12 @@ int svc_rpcb_setup(struct svc_serv *serv, struct net *net); void svc_rpcb_cleanup(struct svc_serv *serv, struct net *net); int svc_bind(struct svc_serv *serv, struct net *net); struct svc_serv *svc_create(struct svc_program *, unsigned int, - void (*shutdown)(struct svc_serv *, struct net *net)); + struct svc_serv_ops *); struct svc_rqst *svc_prepare_thread(struct svc_serv *serv, struct svc_pool *pool, int node); void svc_exit_thread(struct svc_rqst *); struct svc_serv * svc_create_pooled(struct svc_program *, unsigned int, - void (*shutdown)(struct svc_serv *, struct net *net), - svc_thread_fn, struct module *); + struct svc_serv_ops *, svc_thread_fn, struct module *); int svc_set_num_threads(struct svc_serv *, struct svc_pool *, int); int svc_pool_stats_open(struct svc_serv *serv, struct file *file); void svc_destroy(struct svc_serv *); diff --git a/net/sunrpc/svc.c b/net/sunrpc/svc.c index 5a16d8d8c831..36eee907696b 100644 --- a/net/sunrpc/svc.c +++ b/net/sunrpc/svc.c @@ -423,7 +423,7 @@ EXPORT_SYMBOL_GPL(svc_bind); */ static struct svc_serv * __svc_create(struct svc_program *prog, unsigned int bufsize, int npools, - void (*shutdown)(struct svc_serv *serv, struct net *net)) + struct svc_serv_ops *ops) { struct svc_serv *serv; unsigned int vers; @@ -440,7 +440,7 @@ __svc_create(struct svc_program *prog, unsigned int bufsize, int npools, bufsize = RPCSVC_MAXPAYLOAD; serv->sv_max_payload = bufsize? bufsize : 4096; serv->sv_max_mesg = roundup(serv->sv_max_payload + PAGE_SIZE, PAGE_SIZE); - serv->sv_shutdown = shutdown; + serv->sv_ops = ops; xdrsize = 0; while (prog) { prog->pg_lovers = prog->pg_nvers-1; @@ -486,21 +486,21 @@ __svc_create(struct svc_program *prog, unsigned int bufsize, int npools, struct svc_serv * svc_create(struct svc_program *prog, unsigned int bufsize, - void (*shutdown)(struct svc_serv *serv, struct net *net)) + struct svc_serv_ops *ops) { - return __svc_create(prog, bufsize, /*npools*/1, shutdown); + return __svc_create(prog, bufsize, /*npools*/1, ops); } EXPORT_SYMBOL_GPL(svc_create); struct svc_serv * svc_create_pooled(struct svc_program *prog, unsigned int bufsize, - void (*shutdown)(struct svc_serv *serv, struct net *net), - svc_thread_fn func, struct module *mod) + struct svc_serv_ops *ops, svc_thread_fn func, + struct module *mod) { struct svc_serv *serv; unsigned int npools = svc_pool_map_get(); - serv = __svc_create(prog, bufsize, npools, shutdown); + serv = __svc_create(prog, bufsize, npools, ops); if (!serv) goto out_err; @@ -517,8 +517,8 @@ void svc_shutdown_net(struct svc_serv *serv, struct net *net) { svc_close_net(serv, net); - if (serv->sv_shutdown) - serv->sv_shutdown(serv, net); + if (serv->sv_ops->svo_shutdown) + serv->sv_ops->svo_shutdown(serv, net); } EXPORT_SYMBOL_GPL(svc_shutdown_net); -- cgit v1.2.3-70-g09d2 From c369014f1776367269c8fbb5ea8932826d89ce2f Mon Sep 17 00:00:00 2001 From: Jeff Layton Date: Mon, 8 Jun 2015 12:04:46 -0700 Subject: nfsd/sunrpc: move sv_function into sv_ops Since we now have a container for holding svc_serv operations, move the sv_function into it as well. Signed-off-by: Shirley Ma Acked-by: Jeff Layton Tested-by: Shirley Ma Signed-off-by: J. Bruce Fields --- fs/nfsd/nfssvc.c | 3 ++- include/linux/sunrpc/svc.h | 11 +++-------- net/sunrpc/svc.c | 8 +++----- 3 files changed, 8 insertions(+), 14 deletions(-) (limited to 'include/linux') diff --git a/fs/nfsd/nfssvc.c b/fs/nfsd/nfssvc.c index 7311677330b2..bd03968363ff 100644 --- a/fs/nfsd/nfssvc.c +++ b/fs/nfsd/nfssvc.c @@ -393,6 +393,7 @@ static int nfsd_get_default_max_blksize(void) static struct svc_serv_ops nfsd_sv_ops = { .svo_shutdown = nfsd_last_thread, + .svo_function = nfsd, }; int nfsd_create_serv(struct net *net) @@ -409,7 +410,7 @@ int nfsd_create_serv(struct net *net) nfsd_max_blksize = nfsd_get_default_max_blksize(); nfsd_reset_versions(); nn->nfsd_serv = svc_create_pooled(&nfsd_program, nfsd_max_blksize, - &nfsd_sv_ops, nfsd, THIS_MODULE); + &nfsd_sv_ops, THIS_MODULE); if (nn->nfsd_serv == NULL) return -ENOMEM; diff --git a/include/linux/sunrpc/svc.h b/include/linux/sunrpc/svc.h index 2e682f636b13..7c51b21ce9d6 100644 --- a/include/linux/sunrpc/svc.h +++ b/include/linux/sunrpc/svc.h @@ -19,11 +19,6 @@ #include #include -/* - * This is the RPC server thread function prototype - */ -typedef int (*svc_thread_fn)(void *); - /* statistics for svc_pool structures */ struct svc_pool_stats { atomic_long_t packets; @@ -58,7 +53,8 @@ struct svc_serv; struct svc_serv_ops { /* Callback to use when last thread exits. */ - void (*svo_shutdown)(struct svc_serv *serv, struct net *net); + void (*svo_shutdown)(struct svc_serv *, struct net *); + int (*svo_function)(void *); }; /* @@ -95,7 +91,6 @@ struct svc_serv { struct svc_serv_ops *sv_ops; /* server operations */ struct module * sv_module; /* optional module to count when * adding threads */ - svc_thread_fn sv_function; /* main function for threads */ #if defined(CONFIG_SUNRPC_BACKCHANNEL) struct list_head sv_cb_list; /* queue for callback requests * that arrive over the same @@ -435,7 +430,7 @@ struct svc_rqst *svc_prepare_thread(struct svc_serv *serv, struct svc_pool *pool, int node); void svc_exit_thread(struct svc_rqst *); struct svc_serv * svc_create_pooled(struct svc_program *, unsigned int, - struct svc_serv_ops *, svc_thread_fn, struct module *); + struct svc_serv_ops *, struct module *); int svc_set_num_threads(struct svc_serv *, struct svc_pool *, int); int svc_pool_stats_open(struct svc_serv *serv, struct file *file); void svc_destroy(struct svc_serv *); diff --git a/net/sunrpc/svc.c b/net/sunrpc/svc.c index 36eee907696b..5b8726030c24 100644 --- a/net/sunrpc/svc.c +++ b/net/sunrpc/svc.c @@ -34,7 +34,7 @@ static void svc_unregister(const struct svc_serv *serv, struct net *net); -#define svc_serv_is_pooled(serv) ((serv)->sv_function) +#define svc_serv_is_pooled(serv) ((serv)->sv_ops->svo_function) /* * Mode for mapping cpus to pools. @@ -494,8 +494,7 @@ EXPORT_SYMBOL_GPL(svc_create); struct svc_serv * svc_create_pooled(struct svc_program *prog, unsigned int bufsize, - struct svc_serv_ops *ops, svc_thread_fn func, - struct module *mod) + struct svc_serv_ops *ops, struct module *mod) { struct svc_serv *serv; unsigned int npools = svc_pool_map_get(); @@ -504,7 +503,6 @@ svc_create_pooled(struct svc_program *prog, unsigned int bufsize, if (!serv) goto out_err; - serv->sv_function = func; serv->sv_module = mod; return serv; out_err: @@ -740,7 +738,7 @@ svc_set_num_threads(struct svc_serv *serv, struct svc_pool *pool, int nrservs) } __module_get(serv->sv_module); - task = kthread_create_on_node(serv->sv_function, rqstp, + task = kthread_create_on_node(serv->sv_ops->svo_function, rqstp, node, "%s", serv->sv_name); if (IS_ERR(task)) { error = PTR_ERR(task); -- cgit v1.2.3-70-g09d2 From 758f62fff9ad630f05866a1dd6ae9453a7730c2e Mon Sep 17 00:00:00 2001 From: Jeff Layton Date: Mon, 8 Jun 2015 12:05:56 -0700 Subject: nfsd/sunrpc: move sv_module parm into sv_ops ...not technically an operation, but it's more convenient and cleaner to pass the module pointer in this struct. Signed-off-by: Shirley Ma Acked-by: Jeff Layton Tested-by: Shirley Ma Signed-off-by: J. Bruce Fields --- fs/nfsd/nfssvc.c | 3 ++- include/linux/sunrpc/svc.h | 9 ++++++--- net/sunrpc/svc.c | 8 +++----- 3 files changed, 11 insertions(+), 9 deletions(-) (limited to 'include/linux') diff --git a/fs/nfsd/nfssvc.c b/fs/nfsd/nfssvc.c index bd03968363ff..17ceaad5f80a 100644 --- a/fs/nfsd/nfssvc.c +++ b/fs/nfsd/nfssvc.c @@ -394,6 +394,7 @@ static int nfsd_get_default_max_blksize(void) static struct svc_serv_ops nfsd_sv_ops = { .svo_shutdown = nfsd_last_thread, .svo_function = nfsd, + .svo_module = THIS_MODULE, }; int nfsd_create_serv(struct net *net) @@ -410,7 +411,7 @@ int nfsd_create_serv(struct net *net) nfsd_max_blksize = nfsd_get_default_max_blksize(); nfsd_reset_versions(); nn->nfsd_serv = svc_create_pooled(&nfsd_program, nfsd_max_blksize, - &nfsd_sv_ops, THIS_MODULE); + &nfsd_sv_ops); if (nn->nfsd_serv == NULL) return -ENOMEM; diff --git a/include/linux/sunrpc/svc.h b/include/linux/sunrpc/svc.h index 7c51b21ce9d6..0150003d584b 100644 --- a/include/linux/sunrpc/svc.h +++ b/include/linux/sunrpc/svc.h @@ -54,7 +54,12 @@ struct svc_serv; struct svc_serv_ops { /* Callback to use when last thread exits. */ void (*svo_shutdown)(struct svc_serv *, struct net *); + + /* function for service threads to run */ int (*svo_function)(void *); + + /* optional module to count when adding threads (pooled svcs only) */ + struct module *svo_module; }; /* @@ -89,8 +94,6 @@ struct svc_serv { unsigned int sv_nrpools; /* number of thread pools */ struct svc_pool * sv_pools; /* array of thread pools */ struct svc_serv_ops *sv_ops; /* server operations */ - struct module * sv_module; /* optional module to count when - * adding threads */ #if defined(CONFIG_SUNRPC_BACKCHANNEL) struct list_head sv_cb_list; /* queue for callback requests * that arrive over the same @@ -430,7 +433,7 @@ struct svc_rqst *svc_prepare_thread(struct svc_serv *serv, struct svc_pool *pool, int node); void svc_exit_thread(struct svc_rqst *); struct svc_serv * svc_create_pooled(struct svc_program *, unsigned int, - struct svc_serv_ops *, struct module *); + struct svc_serv_ops *); int svc_set_num_threads(struct svc_serv *, struct svc_pool *, int); int svc_pool_stats_open(struct svc_serv *serv, struct file *file); void svc_destroy(struct svc_serv *); diff --git a/net/sunrpc/svc.c b/net/sunrpc/svc.c index 5b8726030c24..5a6be22a7904 100644 --- a/net/sunrpc/svc.c +++ b/net/sunrpc/svc.c @@ -494,7 +494,7 @@ EXPORT_SYMBOL_GPL(svc_create); struct svc_serv * svc_create_pooled(struct svc_program *prog, unsigned int bufsize, - struct svc_serv_ops *ops, struct module *mod) + struct svc_serv_ops *ops) { struct svc_serv *serv; unsigned int npools = svc_pool_map_get(); @@ -502,8 +502,6 @@ svc_create_pooled(struct svc_program *prog, unsigned int bufsize, serv = __svc_create(prog, bufsize, npools, ops); if (!serv) goto out_err; - - serv->sv_module = mod; return serv; out_err: svc_pool_map_put(); @@ -737,12 +735,12 @@ svc_set_num_threads(struct svc_serv *serv, struct svc_pool *pool, int nrservs) break; } - __module_get(serv->sv_module); + __module_get(serv->sv_ops->svo_module); task = kthread_create_on_node(serv->sv_ops->svo_function, rqstp, node, "%s", serv->sv_name); if (IS_ERR(task)) { error = PTR_ERR(task); - module_put(serv->sv_module); + module_put(serv->sv_ops->svo_module); svc_exit_thread(rqstp); break; } -- cgit v1.2.3-70-g09d2 From b9e13cdfac70e38ade17b53810a36968c5842339 Mon Sep 17 00:00:00 2001 From: Jeff Layton Date: Mon, 8 Jun 2015 12:06:51 -0700 Subject: nfsd/sunrpc: turn enqueueing a svc_xprt into a svc_serv operation For now, all services use svc_xprt_do_enqueue, but once we add workqueue-based service support, we'll need to do something different. Signed-off-by: Shirley Ma Acked-by: Jeff Layton Tested-by: Shirley Ma Signed-off-by: J. Bruce Fields --- fs/lockd/svc.c | 3 ++- fs/nfs/callback.c | 1 + fs/nfsd/nfssvc.c | 11 ++++++----- include/linux/sunrpc/svc.h | 3 +++ include/linux/sunrpc/svc_xprt.h | 1 + net/sunrpc/svc_xprt.c | 10 +++++----- 6 files changed, 18 insertions(+), 11 deletions(-) (limited to 'include/linux') diff --git a/fs/lockd/svc.c b/fs/lockd/svc.c index 4182b2f925cd..530914b5c455 100644 --- a/fs/lockd/svc.c +++ b/fs/lockd/svc.c @@ -323,7 +323,8 @@ out_rqst: } static struct svc_serv_ops lockd_sv_ops = { - .svo_shutdown = svc_rpcb_cleanup, + .svo_shutdown = svc_rpcb_cleanup, + .svo_enqueue_xprt = svc_xprt_do_enqueue, }; static struct svc_serv *lockd_create_svc(void) diff --git a/fs/nfs/callback.c b/fs/nfs/callback.c index 182792d115fc..2c4a0b565d28 100644 --- a/fs/nfs/callback.c +++ b/fs/nfs/callback.c @@ -309,6 +309,7 @@ err_bind: } static struct svc_serv_ops nfs_cb_sv_ops = { + .svo_enqueue_xprt = svc_xprt_do_enqueue, }; static struct svc_serv *nfs_callback_create_svc(int minorversion) diff --git a/fs/nfsd/nfssvc.c b/fs/nfsd/nfssvc.c index 17ceaad5f80a..d8b9b4cd37c6 100644 --- a/fs/nfsd/nfssvc.c +++ b/fs/nfsd/nfssvc.c @@ -391,10 +391,11 @@ static int nfsd_get_default_max_blksize(void) return ret; } -static struct svc_serv_ops nfsd_sv_ops = { - .svo_shutdown = nfsd_last_thread, - .svo_function = nfsd, - .svo_module = THIS_MODULE, +static struct svc_serv_ops nfsd_thread_sv_ops = { + .svo_shutdown = nfsd_last_thread, + .svo_function = nfsd, + .svo_enqueue_xprt = svc_xprt_do_enqueue, + .svo_module = THIS_MODULE, }; int nfsd_create_serv(struct net *net) @@ -411,7 +412,7 @@ int nfsd_create_serv(struct net *net) nfsd_max_blksize = nfsd_get_default_max_blksize(); nfsd_reset_versions(); nn->nfsd_serv = svc_create_pooled(&nfsd_program, nfsd_max_blksize, - &nfsd_sv_ops); + &nfsd_thread_sv_ops); if (nn->nfsd_serv == NULL) return -ENOMEM; diff --git a/include/linux/sunrpc/svc.h b/include/linux/sunrpc/svc.h index 0150003d584b..97609d0f68f6 100644 --- a/include/linux/sunrpc/svc.h +++ b/include/linux/sunrpc/svc.h @@ -58,6 +58,9 @@ struct svc_serv_ops { /* function for service threads to run */ int (*svo_function)(void *); + /* queue up a transport for servicing */ + void (*svo_enqueue_xprt)(struct svc_xprt *); + /* optional module to count when adding threads (pooled svcs only) */ struct module *svo_module; }; diff --git a/include/linux/sunrpc/svc_xprt.h b/include/linux/sunrpc/svc_xprt.h index 79f6f8f3dc0a..78512cfe1fe6 100644 --- a/include/linux/sunrpc/svc_xprt.h +++ b/include/linux/sunrpc/svc_xprt.h @@ -116,6 +116,7 @@ void svc_xprt_init(struct net *, struct svc_xprt_class *, struct svc_xprt *, struct svc_serv *); int svc_create_xprt(struct svc_serv *, const char *, struct net *, const int, const unsigned short, int); +void svc_xprt_do_enqueue(struct svc_xprt *xprt); void svc_xprt_enqueue(struct svc_xprt *xprt); void svc_xprt_put(struct svc_xprt *xprt); void svc_xprt_copy_addrs(struct svc_rqst *rqstp, struct svc_xprt *xprt); diff --git a/net/sunrpc/svc_xprt.c b/net/sunrpc/svc_xprt.c index 163ac45c3639..a6cbb2104667 100644 --- a/net/sunrpc/svc_xprt.c +++ b/net/sunrpc/svc_xprt.c @@ -24,7 +24,6 @@ static int svc_deferred_recv(struct svc_rqst *rqstp); static struct cache_deferred_req *svc_defer(struct cache_req *req); static void svc_age_temp_xprts(unsigned long closure); static void svc_delete_xprt(struct svc_xprt *xprt); -static void svc_xprt_do_enqueue(struct svc_xprt *xprt); /* apparently the "standard" is that clients close * idle connections after 5 minutes, servers after @@ -225,12 +224,12 @@ static void svc_xprt_received(struct svc_xprt *xprt) } /* As soon as we clear busy, the xprt could be closed and - * 'put', so we need a reference to call svc_xprt_do_enqueue with: + * 'put', so we need a reference to call svc_enqueue_xprt with: */ svc_xprt_get(xprt); smp_mb__before_atomic(); clear_bit(XPT_BUSY, &xprt->xpt_flags); - svc_xprt_do_enqueue(xprt); + xprt->xpt_server->sv_ops->svo_enqueue_xprt(xprt); svc_xprt_put(xprt); } @@ -320,7 +319,7 @@ static bool svc_xprt_has_something_to_do(struct svc_xprt *xprt) return false; } -static void svc_xprt_do_enqueue(struct svc_xprt *xprt) +void svc_xprt_do_enqueue(struct svc_xprt *xprt) { struct svc_pool *pool; struct svc_rqst *rqstp = NULL; @@ -402,6 +401,7 @@ redo_search: out: trace_svc_xprt_do_enqueue(xprt, rqstp); } +EXPORT_SYMBOL_GPL(svc_xprt_do_enqueue); /* * Queue up a transport with data pending. If there are idle nfsd @@ -412,7 +412,7 @@ void svc_xprt_enqueue(struct svc_xprt *xprt) { if (test_bit(XPT_BUSY, &xprt->xpt_flags)) return; - svc_xprt_do_enqueue(xprt); + xprt->xpt_server->sv_ops->svo_enqueue_xprt(xprt); } EXPORT_SYMBOL_GPL(svc_xprt_enqueue); -- cgit v1.2.3-70-g09d2 From 598e2359090d393b01a8e10386dc3056ccfa47ae Mon Sep 17 00:00:00 2001 From: Jeff Layton Date: Mon, 8 Jun 2015 12:08:33 -0700 Subject: nfsd/sunrpc: abstract out svc_set_num_threads to sv_ops Add an operation that will do setup of the service. In the case of a classic thread-based service that means starting up threads. In the case of a workqueue-based service, the setup will do something different. Signed-off-by: Shirley Ma Acked-by: Jeff Layton Tested-by: Shirley Ma Signed-off-by: J. Bruce Fields --- fs/nfsd/nfssvc.c | 8 +++++--- include/linux/sunrpc/svc.h | 3 +++ 2 files changed, 8 insertions(+), 3 deletions(-) (limited to 'include/linux') diff --git a/fs/nfsd/nfssvc.c b/fs/nfsd/nfssvc.c index d8b9b4cd37c6..ad4e2377dd63 100644 --- a/fs/nfsd/nfssvc.c +++ b/fs/nfsd/nfssvc.c @@ -395,6 +395,7 @@ static struct svc_serv_ops nfsd_thread_sv_ops = { .svo_shutdown = nfsd_last_thread, .svo_function = nfsd, .svo_enqueue_xprt = svc_xprt_do_enqueue, + .svo_setup = svc_set_num_threads, .svo_module = THIS_MODULE, }; @@ -507,8 +508,8 @@ int nfsd_set_nrthreads(int n, int *nthreads, struct net *net) /* apply the new numbers */ svc_get(nn->nfsd_serv); for (i = 0; i < n; i++) { - err = svc_set_num_threads(nn->nfsd_serv, &nn->nfsd_serv->sv_pools[i], - nthreads[i]); + err = nn->nfsd_serv->sv_ops->svo_setup(nn->nfsd_serv, + &nn->nfsd_serv->sv_pools[i], nthreads[i]); if (err) break; } @@ -547,7 +548,8 @@ nfsd_svc(int nrservs, struct net *net) error = nfsd_startup_net(nrservs, net); if (error) goto out_destroy; - error = svc_set_num_threads(nn->nfsd_serv, NULL, nrservs); + error = nn->nfsd_serv->sv_ops->svo_setup(nn->nfsd_serv, + NULL, nrservs); if (error) goto out_shutdown; /* We are holding a reference to nn->nfsd_serv which diff --git a/include/linux/sunrpc/svc.h b/include/linux/sunrpc/svc.h index 97609d0f68f6..fd5bb9922545 100644 --- a/include/linux/sunrpc/svc.h +++ b/include/linux/sunrpc/svc.h @@ -61,6 +61,9 @@ struct svc_serv_ops { /* queue up a transport for servicing */ void (*svo_enqueue_xprt)(struct svc_xprt *); + /* set up thread (or whatever) execution context */ + int (*svo_setup)(struct svc_serv *, struct svc_pool *, int); + /* optional module to count when adding threads (pooled svcs only) */ struct module *svo_module; }; -- cgit v1.2.3-70-g09d2 From d70bc0c67c7aaf0d00084b2f91b44fe1a8ae4e15 Mon Sep 17 00:00:00 2001 From: Jeff Layton Date: Mon, 8 Jun 2015 12:09:54 -0700 Subject: nfsd/sunrpc: move pool_mode definitions into svc.h In later patches, we're going to need to allow code external to svc.c to figure out what pool_mode is in use. Move these definitions into svc.h to prepare for that. Also, make the svc_pool_map object available and exported so that other modules can peek in there to get insight into what pool mode is in use. Likewise, export svc_pool_map_get/put function to make it safe to do so. Signed-off-by: Shirley Ma Acked-by: Jeff Layton Tested-by: Shirley Ma Signed-off-by: J. Bruce Fields --- include/linux/sunrpc/svc.h | 25 +++++++++++++++++++++++++ net/sunrpc/svc.c | 31 +++++++------------------------ 2 files changed, 32 insertions(+), 24 deletions(-) (limited to 'include/linux') diff --git a/include/linux/sunrpc/svc.h b/include/linux/sunrpc/svc.h index fd5bb9922545..3a9baead5c3e 100644 --- a/include/linux/sunrpc/svc.h +++ b/include/linux/sunrpc/svc.h @@ -427,6 +427,29 @@ struct svc_procedure { unsigned int pc_xdrressize; /* maximum size of XDR reply */ }; +/* + * Mode for mapping cpus to pools. + */ +enum { + SVC_POOL_AUTO = -1, /* choose one of the others */ + SVC_POOL_GLOBAL, /* no mapping, just a single global pool + * (legacy & UP mode) */ + SVC_POOL_PERCPU, /* one pool per cpu */ + SVC_POOL_PERNODE /* one pool per numa node */ +}; + +struct svc_pool_map { + int count; /* How many svc_servs use us */ + int mode; /* Note: int not enum to avoid + * warnings about "enumeration value + * not handled in switch" */ + unsigned int npools; + unsigned int *pool_to; /* maps pool id to cpu or node */ + unsigned int *to_pool; /* maps cpu or node to pool id */ +}; + +extern struct svc_pool_map svc_pool_map; + /* * Function prototypes. */ @@ -438,6 +461,8 @@ struct svc_serv *svc_create(struct svc_program *, unsigned int, struct svc_rqst *svc_prepare_thread(struct svc_serv *serv, struct svc_pool *pool, int node); void svc_exit_thread(struct svc_rqst *); +unsigned int svc_pool_map_get(void); +void svc_pool_map_put(void); struct svc_serv * svc_create_pooled(struct svc_program *, unsigned int, struct svc_serv_ops *); int svc_set_num_threads(struct svc_serv *, struct svc_pool *, int); diff --git a/net/sunrpc/svc.c b/net/sunrpc/svc.c index 5a6be22a7904..486c14bf4e49 100644 --- a/net/sunrpc/svc.c +++ b/net/sunrpc/svc.c @@ -36,34 +36,17 @@ static void svc_unregister(const struct svc_serv *serv, struct net *net); #define svc_serv_is_pooled(serv) ((serv)->sv_ops->svo_function) -/* - * Mode for mapping cpus to pools. - */ -enum { - SVC_POOL_AUTO = -1, /* choose one of the others */ - SVC_POOL_GLOBAL, /* no mapping, just a single global pool - * (legacy & UP mode) */ - SVC_POOL_PERCPU, /* one pool per cpu */ - SVC_POOL_PERNODE /* one pool per numa node */ -}; #define SVC_POOL_DEFAULT SVC_POOL_GLOBAL /* * Structure for mapping cpus to pools and vice versa. * Setup once during sunrpc initialisation. */ -static struct svc_pool_map { - int count; /* How many svc_servs use us */ - int mode; /* Note: int not enum to avoid - * warnings about "enumeration value - * not handled in switch" */ - unsigned int npools; - unsigned int *pool_to; /* maps pool id to cpu or node */ - unsigned int *to_pool; /* maps cpu or node to pool id */ -} svc_pool_map = { - .count = 0, +struct svc_pool_map svc_pool_map = { .mode = SVC_POOL_DEFAULT }; +EXPORT_SYMBOL_GPL(svc_pool_map); + static DEFINE_MUTEX(svc_pool_map_mutex);/* protects svc_pool_map.count only */ static int @@ -236,7 +219,7 @@ svc_pool_map_init_pernode(struct svc_pool_map *m) * vice versa). Initialise the map if we're the first user. * Returns the number of pools. */ -static unsigned int +unsigned int svc_pool_map_get(void) { struct svc_pool_map *m = &svc_pool_map; @@ -271,7 +254,7 @@ svc_pool_map_get(void) mutex_unlock(&svc_pool_map_mutex); return m->npools; } - +EXPORT_SYMBOL_GPL(svc_pool_map_get); /* * Drop a reference to the global map of cpus to pools. @@ -280,7 +263,7 @@ svc_pool_map_get(void) * mode using the pool_mode module option without * rebooting or re-loading sunrpc.ko. */ -static void +void svc_pool_map_put(void) { struct svc_pool_map *m = &svc_pool_map; @@ -297,7 +280,7 @@ svc_pool_map_put(void) mutex_unlock(&svc_pool_map_mutex); } - +EXPORT_SYMBOL_GPL(svc_pool_map_put); static int svc_pool_map_get_node(unsigned int pidx) { -- cgit v1.2.3-70-g09d2 From 1b6dc1dffbb142de60eb65f6155276ac31ff5474 Mon Sep 17 00:00:00 2001 From: Jeff Layton Date: Mon, 8 Jun 2015 12:11:10 -0700 Subject: nfsd/sunrpc: factor svc_rqst allocation and freeing from sv_nrthreads refcounting In later patches, we'll want to be able to allocate and free svc_rqst structures without monkeying with the serv->sv_nrthreads refcount. Factor those pieces out of their respective functions. Signed-off-by: Shirley Ma Acked-by: Jeff Layton Tested-by: Shirley Ma Signed-off-by: J. Bruce Fields --- include/linux/sunrpc/svc.h | 3 +++ net/sunrpc/svc.c | 54 ++++++++++++++++++++++++++++++---------------- 2 files changed, 39 insertions(+), 18 deletions(-) (limited to 'include/linux') diff --git a/include/linux/sunrpc/svc.h b/include/linux/sunrpc/svc.h index 3a9baead5c3e..cc0fc712bb82 100644 --- a/include/linux/sunrpc/svc.h +++ b/include/linux/sunrpc/svc.h @@ -458,8 +458,11 @@ void svc_rpcb_cleanup(struct svc_serv *serv, struct net *net); int svc_bind(struct svc_serv *serv, struct net *net); struct svc_serv *svc_create(struct svc_program *, unsigned int, struct svc_serv_ops *); +struct svc_rqst *svc_rqst_alloc(struct svc_serv *serv, + struct svc_pool *pool, int node); struct svc_rqst *svc_prepare_thread(struct svc_serv *serv, struct svc_pool *pool, int node); +void svc_rqst_free(struct svc_rqst *); void svc_exit_thread(struct svc_rqst *); unsigned int svc_pool_map_get(void); void svc_pool_map_put(void); diff --git a/net/sunrpc/svc.c b/net/sunrpc/svc.c index 486c14bf4e49..a8f579df14d8 100644 --- a/net/sunrpc/svc.c +++ b/net/sunrpc/svc.c @@ -583,40 +583,52 @@ svc_release_buffer(struct svc_rqst *rqstp) } struct svc_rqst * -svc_prepare_thread(struct svc_serv *serv, struct svc_pool *pool, int node) +svc_rqst_alloc(struct svc_serv *serv, struct svc_pool *pool, int node) { struct svc_rqst *rqstp; rqstp = kzalloc_node(sizeof(*rqstp), GFP_KERNEL, node); if (!rqstp) - goto out_enomem; + return rqstp; - serv->sv_nrthreads++; __set_bit(RQ_BUSY, &rqstp->rq_flags); spin_lock_init(&rqstp->rq_lock); rqstp->rq_server = serv; rqstp->rq_pool = pool; - spin_lock_bh(&pool->sp_lock); - pool->sp_nrthreads++; - list_add_rcu(&rqstp->rq_all, &pool->sp_all_threads); - spin_unlock_bh(&pool->sp_lock); rqstp->rq_argp = kmalloc_node(serv->sv_xdrsize, GFP_KERNEL, node); if (!rqstp->rq_argp) - goto out_thread; + goto out_enomem; rqstp->rq_resp = kmalloc_node(serv->sv_xdrsize, GFP_KERNEL, node); if (!rqstp->rq_resp) - goto out_thread; + goto out_enomem; if (!svc_init_buffer(rqstp, serv->sv_max_mesg, node)) - goto out_thread; + goto out_enomem; return rqstp; -out_thread: - svc_exit_thread(rqstp); out_enomem: - return ERR_PTR(-ENOMEM); + svc_rqst_free(rqstp); + return NULL; +} +EXPORT_SYMBOL_GPL(svc_rqst_alloc); + +struct svc_rqst * +svc_prepare_thread(struct svc_serv *serv, struct svc_pool *pool, int node) +{ + struct svc_rqst *rqstp; + + rqstp = svc_rqst_alloc(serv, pool, node); + if (!rqstp) + return ERR_PTR(-ENOMEM); + + serv->sv_nrthreads++; + spin_lock_bh(&pool->sp_lock); + pool->sp_nrthreads++; + list_add_rcu(&rqstp->rq_all, &pool->sp_all_threads); + spin_unlock_bh(&pool->sp_lock); + return rqstp; } EXPORT_SYMBOL_GPL(svc_prepare_thread); @@ -751,15 +763,21 @@ EXPORT_SYMBOL_GPL(svc_set_num_threads); * mutex" for the service. */ void -svc_exit_thread(struct svc_rqst *rqstp) +svc_rqst_free(struct svc_rqst *rqstp) { - struct svc_serv *serv = rqstp->rq_server; - struct svc_pool *pool = rqstp->rq_pool; - svc_release_buffer(rqstp); kfree(rqstp->rq_resp); kfree(rqstp->rq_argp); kfree(rqstp->rq_auth_data); + kfree_rcu(rqstp, rq_rcu_head); +} +EXPORT_SYMBOL_GPL(svc_rqst_free); + +void +svc_exit_thread(struct svc_rqst *rqstp) +{ + struct svc_serv *serv = rqstp->rq_server; + struct svc_pool *pool = rqstp->rq_pool; spin_lock_bh(&pool->sp_lock); pool->sp_nrthreads--; @@ -767,7 +785,7 @@ svc_exit_thread(struct svc_rqst *rqstp) list_del_rcu(&rqstp->rq_all); spin_unlock_bh(&pool->sp_lock); - kfree_rcu(rqstp, rq_rcu_head); + svc_rqst_free(rqstp); /* Release the server */ if (serv) -- cgit v1.2.3-70-g09d2 From 7a76a021cd5a292be875fbc616daf03eab1e6996 Mon Sep 17 00:00:00 2001 From: Benjamin Poirier Date: Fri, 7 Aug 2015 09:32:21 -0700 Subject: net-timestamp: Update skb_complete_tx_timestamp comment After "62bccb8 net-timestamp: Make the clone operation stand-alone from phy timestamping" the hwtstamps parameter of skb_complete_tx_timestamp() may no longer be NULL. Signed-off-by: Benjamin Poirier Cc: Alexander Duyck Signed-off-by: David S. Miller --- include/linux/skbuff.h | 6 +++--- 1 file changed, 3 insertions(+), 3 deletions(-) (limited to 'include/linux') diff --git a/include/linux/skbuff.h b/include/linux/skbuff.h index d6cdd6e87d53..22b6d9ca1654 100644 --- a/include/linux/skbuff.h +++ b/include/linux/skbuff.h @@ -2884,11 +2884,11 @@ static inline bool skb_defer_rx_timestamp(struct sk_buff *skb) * * PHY drivers may accept clones of transmitted packets for * timestamping via their phy_driver.txtstamp method. These drivers - * must call this function to return the skb back to the stack, with - * or without a timestamp. + * must call this function to return the skb back to the stack with a + * timestamp. * * @skb: clone of the the original outgoing packet - * @hwtstamps: hardware time stamps, may be NULL if not available + * @hwtstamps: hardware time stamps * */ void skb_complete_tx_timestamp(struct sk_buff *skb, -- cgit v1.2.3-70-g09d2 From 124fe20d94630b6f173dae5eb815e6e6e350c72d Mon Sep 17 00:00:00 2001 From: Dan Williams Date: Mon, 10 Aug 2015 23:07:05 -0400 Subject: mm: enhance region_is_ram() to region_intersects() region_is_ram() is used to prevent the establishment of aliased mappings to physical "System RAM" with incompatible cache settings. However, it uses "-1" to indicate both "unknown" memory ranges (ranges not described by platform firmware) and "mixed" ranges (where the parameters describe a range that partially overlaps "System RAM"). Fix this up by explicitly tracking the "unknown" vs "mixed" resource cases and returning REGION_INTERSECTS, REGION_MIXED, or REGION_DISJOINT. This re-write also adds support for detecting when the requested region completely eclipses all of a resource. Note, the implementation treats overlaps between "unknown" and the requested memory type as REGION_INTERSECTS. Finally, other memory types can be passed in by name, for now the only usage "System RAM". Suggested-by: Luis R. Rodriguez Reviewed-by: Toshi Kani Signed-off-by: Dan Williams --- include/linux/mm.h | 9 +++++++- kernel/resource.c | 61 ++++++++++++++++++++++++++++++++---------------------- 2 files changed, 44 insertions(+), 26 deletions(-) (limited to 'include/linux') diff --git a/include/linux/mm.h b/include/linux/mm.h index 2e872f92dbac..84b05ebedb2d 100644 --- a/include/linux/mm.h +++ b/include/linux/mm.h @@ -369,7 +369,14 @@ static inline int put_page_unless_one(struct page *page) } extern int page_is_ram(unsigned long pfn); -extern int region_is_ram(resource_size_t phys_addr, unsigned long size); + +enum { + REGION_INTERSECTS, + REGION_DISJOINT, + REGION_MIXED, +}; + +int region_intersects(resource_size_t offset, size_t size, const char *type); /* Support for virtually mapped pages */ struct page *vmalloc_to_page(const void *addr); diff --git a/kernel/resource.c b/kernel/resource.c index fed052a1bc9f..f150dbbe6f62 100644 --- a/kernel/resource.c +++ b/kernel/resource.c @@ -492,40 +492,51 @@ int __weak page_is_ram(unsigned long pfn) } EXPORT_SYMBOL_GPL(page_is_ram); -/* - * Search for a resouce entry that fully contains the specified region. - * If found, return 1 if it is RAM, 0 if not. - * If not found, or region is not fully contained, return -1 +/** + * region_intersects() - determine intersection of region with known resources + * @start: region start address + * @size: size of region + * @name: name of resource (in iomem_resource) * - * Used by the ioremap functions to ensure the user is not remapping RAM and is - * a vast speed up over walking through the resource table page by page. + * Check if the specified region partially overlaps or fully eclipses a + * resource identified by @name. Return REGION_DISJOINT if the region + * does not overlap @name, return REGION_MIXED if the region overlaps + * @type and another resource, and return REGION_INTERSECTS if the + * region overlaps @type and no other defined resource. Note, that + * REGION_INTERSECTS is also returned in the case when the specified + * region overlaps RAM and undefined memory holes. + * + * region_intersect() is used by memory remapping functions to ensure + * the user is not remapping RAM and is a vast speed up over walking + * through the resource table page by page. */ -int region_is_ram(resource_size_t start, unsigned long size) +int region_intersects(resource_size_t start, size_t size, const char *name) { - struct resource *p; - resource_size_t end = start + size - 1; unsigned long flags = IORESOURCE_MEM | IORESOURCE_BUSY; - const char *name = "System RAM"; - int ret = -1; + resource_size_t end = start + size - 1; + int type = 0; int other = 0; + struct resource *p; read_lock(&resource_lock); for (p = iomem_resource.child; p ; p = p->sibling) { - if (p->end < start) - continue; - - if (p->start <= start && end <= p->end) { - /* resource fully contains region */ - if ((p->flags != flags) || strcmp(p->name, name)) - ret = 0; - else - ret = 1; - break; - } - if (end < p->start) - break; /* not found */ + bool is_type = strcmp(p->name, name) == 0 && p->flags == flags; + + if (start >= p->start && start <= p->end) + is_type ? type++ : other++; + if (end >= p->start && end <= p->end) + is_type ? type++ : other++; + if (p->start >= start && p->end <= end) + is_type ? type++ : other++; } read_unlock(&resource_lock); - return ret; + + if (other == 0) + return type ? REGION_INTERSECTS : REGION_DISJOINT; + + if (type) + return REGION_MIXED; + + return REGION_DISJOINT; } void __weak arch_remove_reservations(struct resource *avail) -- cgit v1.2.3-70-g09d2 From 2584cf83578c26db144730ef498f4070f82ee3ea Mon Sep 17 00:00:00 2001 From: Dan Williams Date: Mon, 10 Aug 2015 23:07:05 -0400 Subject: arch, drivers: don't include directly, use instead Preparation for uniform definition of ioremap, ioremap_wc, ioremap_wt, and ioremap_cache, tree-wide. Acked-by: Christoph Hellwig Signed-off-by: Dan Williams --- arch/arm/mach-shmobile/pm-rcar.c | 2 +- arch/ia64/kernel/cyclone.c | 2 +- drivers/isdn/icn/icn.h | 2 +- drivers/mtd/devices/slram.c | 2 +- drivers/mtd/nand/diskonchip.c | 2 +- drivers/mtd/onenand/generic.c | 2 +- drivers/scsi/sun3x_esp.c | 2 +- drivers/staging/comedi/drivers/ii_pci20kc.c | 1 + drivers/tty/serial/8250/8250_core.c | 2 +- drivers/video/fbdev/s1d13xxxfb.c | 3 +-- drivers/video/fbdev/stifb.c | 1 + include/linux/io-mapping.h | 2 +- include/linux/mtd/map.h | 2 +- include/video/vga.h | 2 +- 14 files changed, 14 insertions(+), 13 deletions(-) (limited to 'include/linux') diff --git a/arch/arm/mach-shmobile/pm-rcar.c b/arch/arm/mach-shmobile/pm-rcar.c index 00022ee56f80..9d3dde00c2fe 100644 --- a/arch/arm/mach-shmobile/pm-rcar.c +++ b/arch/arm/mach-shmobile/pm-rcar.c @@ -12,7 +12,7 @@ #include #include #include -#include +#include #include "pm-rcar.h" /* SYSC */ diff --git a/arch/ia64/kernel/cyclone.c b/arch/ia64/kernel/cyclone.c index 4826ff957a3d..5fa3848ba224 100644 --- a/arch/ia64/kernel/cyclone.c +++ b/arch/ia64/kernel/cyclone.c @@ -4,7 +4,7 @@ #include #include #include -#include +#include /* IBM Summit (EXA) Cyclone counter code*/ #define CYCLONE_CBAR_ADDR 0xFEB00CD0 diff --git a/drivers/isdn/icn/icn.h b/drivers/isdn/icn/icn.h index b713466997a0..f8f2e76d34bf 100644 --- a/drivers/isdn/icn/icn.h +++ b/drivers/isdn/icn/icn.h @@ -38,7 +38,7 @@ typedef struct icn_cdef { #include #include #include -#include +#include #include #include #include diff --git a/drivers/mtd/devices/slram.c b/drivers/mtd/devices/slram.c index 2fc4957cbe7f..a70eb83e68f1 100644 --- a/drivers/mtd/devices/slram.c +++ b/drivers/mtd/devices/slram.c @@ -41,7 +41,7 @@ #include #include #include -#include +#include #include diff --git a/drivers/mtd/nand/diskonchip.c b/drivers/mtd/nand/diskonchip.c index 7da266a53979..0802158a3f75 100644 --- a/drivers/mtd/nand/diskonchip.c +++ b/drivers/mtd/nand/diskonchip.c @@ -24,7 +24,7 @@ #include #include #include -#include +#include #include #include diff --git a/drivers/mtd/onenand/generic.c b/drivers/mtd/onenand/generic.c index 32a216d31141..ab7bda0bb245 100644 --- a/drivers/mtd/onenand/generic.c +++ b/drivers/mtd/onenand/generic.c @@ -18,7 +18,7 @@ #include #include #include -#include +#include /* * Note: Driver name and platform data format have been updated! diff --git a/drivers/scsi/sun3x_esp.c b/drivers/scsi/sun3x_esp.c index e26e81de7c45..d50c5ed8f428 100644 --- a/drivers/scsi/sun3x_esp.c +++ b/drivers/scsi/sun3x_esp.c @@ -12,9 +12,9 @@ #include #include #include +#include #include -#include #include #include diff --git a/drivers/staging/comedi/drivers/ii_pci20kc.c b/drivers/staging/comedi/drivers/ii_pci20kc.c index 0768bc42a5db..14ef1f67dd42 100644 --- a/drivers/staging/comedi/drivers/ii_pci20kc.c +++ b/drivers/staging/comedi/drivers/ii_pci20kc.c @@ -28,6 +28,7 @@ */ #include +#include #include "../comedidev.h" /* diff --git a/drivers/tty/serial/8250/8250_core.c b/drivers/tty/serial/8250/8250_core.c index 37fff12dd4d0..fe902ff52e58 100644 --- a/drivers/tty/serial/8250/8250_core.c +++ b/drivers/tty/serial/8250/8250_core.c @@ -38,11 +38,11 @@ #include #include #include +#include #ifdef CONFIG_SPARC #include #endif -#include #include #include "8250.h" diff --git a/drivers/video/fbdev/s1d13xxxfb.c b/drivers/video/fbdev/s1d13xxxfb.c index 83433cb0dfba..96aa46dc696c 100644 --- a/drivers/video/fbdev/s1d13xxxfb.c +++ b/drivers/video/fbdev/s1d13xxxfb.c @@ -32,8 +32,7 @@ #include #include #include - -#include +#include #include