summaryrefslogtreecommitdiff
path: root/arch/arm64
diff options
context:
space:
mode:
authorArd Biesheuvel <ard.biesheuvel@linaro.org>2018-11-22 09:46:46 +0100
committerWill Deacon <will.deacon@arm.com>2018-11-27 19:00:45 +0000
commitbdb85cd1d20669dfae813555dddb745ad09323ba (patch)
treeaf519f4e76f6d913d7ed665a12c4cdc51512edb6 /arch/arm64
parent7aaf7b2fd26c3a069472dd9778367b2f941dd866 (diff)
arm64/module: switch to ADRP/ADD sequences for PLT entries
Now that we have switched to the small code model entirely, and reduced the extended KASLR range to 4 GB, we can be sure that the targets of relative branches that are out of range are in range for a ADRP/ADD pair, which is one instruction shorter than our current MOVN/MOVK/MOVK sequence, and is more idiomatic and so it is more likely to be implemented efficiently by micro-architectures. So switch over the ordinary PLT code and the special handling of the Cortex-A53 ADRP errata, as well as the ftrace trampline handling. Reviewed-by: Torsten Duwe <duwe@lst.de> Signed-off-by: Ard Biesheuvel <ard.biesheuvel@linaro.org> [will: Added a couple of comments in the plt equality check] Signed-off-by: Will Deacon <will.deacon@arm.com>
Diffstat (limited to 'arch/arm64')
-rw-r--r--arch/arm64/include/asm/module.h36
-rw-r--r--arch/arm64/kernel/ftrace.c2
-rw-r--r--arch/arm64/kernel/module-plts.c99
-rw-r--r--arch/arm64/kernel/module.c4
4 files changed, 88 insertions, 53 deletions
diff --git a/arch/arm64/include/asm/module.h b/arch/arm64/include/asm/module.h
index f81c1847312d..905e1bb0e7bd 100644
--- a/arch/arm64/include/asm/module.h
+++ b/arch/arm64/include/asm/module.h
@@ -58,39 +58,19 @@ struct plt_entry {
* is exactly what we are dealing with here, we are free to use x16
* as a scratch register in the PLT veneers.
*/
- __le32 mov0; /* movn x16, #0x.... */
- __le32 mov1; /* movk x16, #0x...., lsl #16 */
- __le32 mov2; /* movk x16, #0x...., lsl #32 */
+ __le32 adrp; /* adrp x16, .... */
+ __le32 add; /* add x16, x16, #0x.... */
__le32 br; /* br x16 */
};
-static inline struct plt_entry get_plt_entry(u64 val)
+static inline bool is_forbidden_offset_for_adrp(void *place)
{
- /*
- * MOVK/MOVN/MOVZ opcode:
- * +--------+------------+--------+-----------+-------------+---------+
- * | sf[31] | opc[30:29] | 100101 | hw[22:21] | imm16[20:5] | Rd[4:0] |
- * +--------+------------+--------+-----------+-------------+---------+
- *
- * Rd := 0x10 (x16)
- * hw := 0b00 (no shift), 0b01 (lsl #16), 0b10 (lsl #32)
- * opc := 0b11 (MOVK), 0b00 (MOVN), 0b10 (MOVZ)
- * sf := 1 (64-bit variant)
- */
- return (struct plt_entry){
- cpu_to_le32(0x92800010 | (((~val ) & 0xffff)) << 5),
- cpu_to_le32(0xf2a00010 | ((( val >> 16) & 0xffff)) << 5),
- cpu_to_le32(0xf2c00010 | ((( val >> 32) & 0xffff)) << 5),
- cpu_to_le32(0xd61f0200)
- };
+ return IS_ENABLED(CONFIG_ARM64_ERRATUM_843419) &&
+ cpus_have_const_cap(ARM64_WORKAROUND_843419) &&
+ ((u64)place & 0xfff) >= 0xff8;
}
-static inline bool plt_entries_equal(const struct plt_entry *a,
- const struct plt_entry *b)
-{
- return a->mov0 == b->mov0 &&
- a->mov1 == b->mov1 &&
- a->mov2 == b->mov2;
-}
+struct plt_entry get_plt_entry(u64 dst, void *pc);
+bool plt_entries_equal(const struct plt_entry *a, const struct plt_entry *b);
#endif /* __ASM_MODULE_H */
diff --git a/arch/arm64/kernel/ftrace.c b/arch/arm64/kernel/ftrace.c
index 50986e388d2b..2135665a8ab3 100644
--- a/arch/arm64/kernel/ftrace.c
+++ b/arch/arm64/kernel/ftrace.c
@@ -104,7 +104,7 @@ int ftrace_make_call(struct dyn_ftrace *rec, unsigned long addr)
* is added in the future, but for now, the pr_err() below
* deals with a theoretical issue only.
*/
- trampoline = get_plt_entry(addr);
+ trampoline = get_plt_entry(addr, mod->arch.ftrace_trampoline);
if (!plt_entries_equal(mod->arch.ftrace_trampoline,
&trampoline)) {
if (!plt_entries_equal(mod->arch.ftrace_trampoline,
diff --git a/arch/arm64/kernel/module-plts.c b/arch/arm64/kernel/module-plts.c
index a0efe30211d6..255941394941 100644
--- a/arch/arm64/kernel/module-plts.c
+++ b/arch/arm64/kernel/module-plts.c
@@ -11,6 +11,61 @@
#include <linux/module.h>
#include <linux/sort.h>
+static struct plt_entry __get_adrp_add_pair(u64 dst, u64 pc,
+ enum aarch64_insn_register reg)
+{
+ u32 adrp, add;
+
+ adrp = aarch64_insn_gen_adr(pc, dst, reg, AARCH64_INSN_ADR_TYPE_ADRP);
+ add = aarch64_insn_gen_add_sub_imm(reg, reg, dst % SZ_4K,
+ AARCH64_INSN_VARIANT_64BIT,
+ AARCH64_INSN_ADSB_ADD);
+
+ return (struct plt_entry){ cpu_to_le32(adrp), cpu_to_le32(add) };
+}
+
+struct plt_entry get_plt_entry(u64 dst, void *pc)
+{
+ struct plt_entry plt;
+ static u32 br;
+
+ if (!br)
+ br = aarch64_insn_gen_branch_reg(AARCH64_INSN_REG_16,
+ AARCH64_INSN_BRANCH_NOLINK);
+
+ plt = __get_adrp_add_pair(dst, (u64)pc, AARCH64_INSN_REG_16);
+ plt.br = cpu_to_le32(br);
+
+ return plt;
+}
+
+bool plt_entries_equal(const struct plt_entry *a, const struct plt_entry *b)
+{
+ u64 p, q;
+
+ /*
+ * Check whether both entries refer to the same target:
+ * do the cheapest checks first.
+ * If the 'add' or 'br' opcodes are different, then the target
+ * cannot be the same.
+ */
+ if (a->add != b->add || a->br != b->br)
+ return false;
+
+ p = ALIGN_DOWN((u64)a, SZ_4K);
+ q = ALIGN_DOWN((u64)b, SZ_4K);
+
+ /*
+ * If the 'adrp' opcodes are the same then we just need to check
+ * that they refer to the same 4k region.
+ */
+ if (a->adrp == b->adrp && p == q)
+ return true;
+
+ return (p + aarch64_insn_adrp_get_offset(le32_to_cpu(a->adrp))) ==
+ (q + aarch64_insn_adrp_get_offset(le32_to_cpu(b->adrp)));
+}
+
static bool in_init(const struct module *mod, void *loc)
{
return (u64)loc - (u64)mod->init_layout.base < mod->init_layout.size;
@@ -24,19 +79,23 @@ u64 module_emit_plt_entry(struct module *mod, Elf64_Shdr *sechdrs,
&mod->arch.init;
struct plt_entry *plt = (struct plt_entry *)sechdrs[pltsec->plt_shndx].sh_addr;
int i = pltsec->plt_num_entries;
+ int j = i - 1;
u64 val = sym->st_value + rela->r_addend;
- plt[i] = get_plt_entry(val);
+ if (is_forbidden_offset_for_adrp(&plt[i].adrp))
+ i++;
+
+ plt[i] = get_plt_entry(val, &plt[i]);
/*
* Check if the entry we just created is a duplicate. Given that the
* relocations are sorted, this will be the last entry we allocated.
* (if one exists).
*/
- if (i > 0 && plt_entries_equal(plt + i, plt + i - 1))
- return (u64)&plt[i - 1];
+ if (j >= 0 && plt_entries_equal(plt + i, plt + j))
+ return (u64)&plt[j];
- pltsec->plt_num_entries++;
+ pltsec->plt_num_entries += i - j;
if (WARN_ON(pltsec->plt_num_entries > pltsec->plt_max_entries))
return 0;
@@ -51,35 +110,24 @@ u64 module_emit_veneer_for_adrp(struct module *mod, Elf64_Shdr *sechdrs,
&mod->arch.init;
struct plt_entry *plt = (struct plt_entry *)sechdrs[pltsec->plt_shndx].sh_addr;
int i = pltsec->plt_num_entries++;
- u32 mov0, mov1, mov2, br;
+ u32 br;
int rd;
if (WARN_ON(pltsec->plt_num_entries > pltsec->plt_max_entries))
return 0;
+ if (is_forbidden_offset_for_adrp(&plt[i].adrp))
+ i = pltsec->plt_num_entries++;
+
/* get the destination register of the ADRP instruction */
rd = aarch64_insn_decode_register(AARCH64_INSN_REGTYPE_RD,
le32_to_cpup((__le32 *)loc));
- /* generate the veneer instructions */
- mov0 = aarch64_insn_gen_movewide(rd, (u16)~val, 0,
- AARCH64_INSN_VARIANT_64BIT,
- AARCH64_INSN_MOVEWIDE_INVERSE);
- mov1 = aarch64_insn_gen_movewide(rd, (u16)(val >> 16), 16,
- AARCH64_INSN_VARIANT_64BIT,
- AARCH64_INSN_MOVEWIDE_KEEP);
- mov2 = aarch64_insn_gen_movewide(rd, (u16)(val >> 32), 32,
- AARCH64_INSN_VARIANT_64BIT,
- AARCH64_INSN_MOVEWIDE_KEEP);
br = aarch64_insn_gen_branch_imm((u64)&plt[i].br, (u64)loc + 4,
AARCH64_INSN_BRANCH_NOLINK);
- plt[i] = (struct plt_entry){
- cpu_to_le32(mov0),
- cpu_to_le32(mov1),
- cpu_to_le32(mov2),
- cpu_to_le32(br)
- };
+ plt[i] = __get_adrp_add_pair(val, (u64)&plt[i], rd);
+ plt[i].br = cpu_to_le32(br);
return (u64)&plt[i];
}
@@ -195,6 +243,15 @@ static unsigned int count_plts(Elf64_Sym *syms, Elf64_Rela *rela, int num,
break;
}
}
+
+ if (IS_ENABLED(CONFIG_ARM64_ERRATUM_843419) &&
+ cpus_have_const_cap(ARM64_WORKAROUND_843419))
+ /*
+ * Add some slack so we can skip PLT slots that may trigger
+ * the erratum due to the placement of the ADRP instruction.
+ */
+ ret += DIV_ROUND_UP(ret, (SZ_4K / sizeof(struct plt_entry)));
+
return ret;
}
diff --git a/arch/arm64/kernel/module.c b/arch/arm64/kernel/module.c
index c2abe59c6f8f..f713e2fc4d75 100644
--- a/arch/arm64/kernel/module.c
+++ b/arch/arm64/kernel/module.c
@@ -203,9 +203,7 @@ static int reloc_insn_adrp(struct module *mod, Elf64_Shdr *sechdrs,
{
u32 insn;
- if (!IS_ENABLED(CONFIG_ARM64_ERRATUM_843419) ||
- !cpus_have_const_cap(ARM64_WORKAROUND_843419) ||
- ((u64)place & 0xfff) < 0xff8)
+ if (!is_forbidden_offset_for_adrp(place))
return reloc_insn_imm(RELOC_OP_PAGE, place, val, 12, 21,
AARCH64_INSN_IMM_ADR);