| 1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
116
117
118
119
120
121
122
123
124
125
126
127
128
129
130
131
132
133
134
135
136
137
138
139
140
141
142
143
144
145
146
147
148
149
150
151
152
153
154
155
156
157
158
159
160
161
162
163
164
165
166
167
168
169
170
171
172
173
174
175
176
177
178
179
180
181
182
183
184
185
186
187
188
189
190
191
192
193
194
195
196
197
198
199
200
201
202
203
204
205
206
207
208
209
210
211
212
213
214
215
216
217
218
219
220
221
222
223
224
225
226
227
228
229
230
231
232
233
234
235
236
237
238
239
240
241
242
243
244
245
246
247
248
249
250
251
252
253
254
255
256
257
258
259
260
261
262
263
264
265
266
267
268
269
270
271
272
273
274
275
276
277
278
279
280
281
282
283
284
285
286
287
288
289
290
291
292
293
294
295
296
297
298
299
300
301
302
303
304
305
306
307
308
309
310
311
312
313
314
315
316
317
318
319
320
321
322
323
324
325
326
327
328
329
330
331
332
333
334
335
336
337
338
339
340
341
 | // SPDX-License-Identifier: GPL-2.0-only
#include <linux/extable.h>
#include <linux/uaccess.h>
#include <linux/sched/debug.h>
#include <linux/bitfield.h>
#include <xen/xen.h>
#include <asm/fpu/api.h>
#include <asm/sev.h>
#include <asm/traps.h>
#include <asm/kdebug.h>
#include <asm/insn-eval.h>
#include <asm/sgx.h>
static inline unsigned long *pt_regs_nr(struct pt_regs *regs, int nr)
{
	int reg_offset = pt_regs_offset(regs, nr);
	static unsigned long __dummy;
	if (WARN_ON_ONCE(reg_offset < 0))
		return &__dummy;
	return (unsigned long *)((unsigned long)regs + reg_offset);
}
static inline unsigned long
ex_fixup_addr(const struct exception_table_entry *x)
{
	return (unsigned long)&x->fixup + x->fixup;
}
static bool ex_handler_default(const struct exception_table_entry *e,
			       struct pt_regs *regs)
{
	if (e->data & EX_FLAG_CLEAR_AX)
		regs->ax = 0;
	if (e->data & EX_FLAG_CLEAR_DX)
		regs->dx = 0;
	regs->ip = ex_fixup_addr(e);
	return true;
}
/*
 * This is the *very* rare case where we do a "load_unaligned_zeropad()"
 * and it's a page crosser into a non-existent page.
 *
 * This happens when we optimistically load a pathname a word-at-a-time
 * and the name is less than the full word and the  next page is not
 * mapped. Typically that only happens for CONFIG_DEBUG_PAGEALLOC.
 *
 * NOTE! The faulting address is always a 'mov mem,reg' type instruction
 * of size 'long', and the exception fixup must always point to right
 * after the instruction.
 */
static bool ex_handler_zeropad(const struct exception_table_entry *e,
			       struct pt_regs *regs,
			       unsigned long fault_addr)
{
	struct insn insn;
	const unsigned long mask = sizeof(long) - 1;
	unsigned long offset, addr, next_ip, len;
	unsigned long *reg;
	next_ip = ex_fixup_addr(e);
	len = next_ip - regs->ip;
	if (len > MAX_INSN_SIZE)
		return false;
	if (insn_decode(&insn, (void *) regs->ip, len, INSN_MODE_KERN))
		return false;
	if (insn.length != len)
		return false;
	if (insn.opcode.bytes[0] != 0x8b)
		return false;
	if (insn.opnd_bytes != sizeof(long))
		return false;
	addr = (unsigned long) insn_get_addr_ref(&insn, regs);
	if (addr == ~0ul)
		return false;
	offset = addr & mask;
	addr = addr & ~mask;
	if (fault_addr != addr + sizeof(long))
		return false;
	reg = insn_get_modrm_reg_ptr(&insn, regs);
	if (!reg)
		return false;
	*reg = *(unsigned long *)addr >> (offset * 8);
	return ex_handler_default(e, regs);
}
static bool ex_handler_fault(const struct exception_table_entry *fixup,
			     struct pt_regs *regs, int trapnr)
{
	regs->ax = trapnr;
	return ex_handler_default(fixup, regs);
}
static bool ex_handler_sgx(const struct exception_table_entry *fixup,
			   struct pt_regs *regs, int trapnr)
{
	regs->ax = trapnr | SGX_ENCLS_FAULT_FLAG;
	return ex_handler_default(fixup, regs);
}
/*
 * Handler for when we fail to restore a task's FPU state.  We should never get
 * here because the FPU state of a task using the FPU (task->thread.fpu.state)
 * should always be valid.  However, past bugs have allowed userspace to set
 * reserved bits in the XSAVE area using PTRACE_SETREGSET or sys_rt_sigreturn().
 * These caused XRSTOR to fail when switching to the task, leaking the FPU
 * registers of the task previously executing on the CPU.  Mitigate this class
 * of vulnerability by restoring from the initial state (essentially, zeroing
 * out all the FPU registers) if we can't restore from the task's FPU state.
 */
static bool ex_handler_fprestore(const struct exception_table_entry *fixup,
				 struct pt_regs *regs)
{
	regs->ip = ex_fixup_addr(fixup);
	WARN_ONCE(1, "Bad FPU state detected at %pB, reinitializing FPU registers.",
		  (void *)instruction_pointer(regs));
	fpu_reset_from_exception_fixup();
	return true;
}
static bool ex_handler_uaccess(const struct exception_table_entry *fixup,
			       struct pt_regs *regs, int trapnr)
{
	WARN_ONCE(trapnr == X86_TRAP_GP, "General protection fault in user access. Non-canonical address?");
	return ex_handler_default(fixup, regs);
}
static bool ex_handler_copy(const struct exception_table_entry *fixup,
			    struct pt_regs *regs, int trapnr)
{
	WARN_ONCE(trapnr == X86_TRAP_GP, "General protection fault in user access. Non-canonical address?");
	return ex_handler_fault(fixup, regs, trapnr);
}
static bool ex_handler_msr(const struct exception_table_entry *fixup,
			   struct pt_regs *regs, bool wrmsr, bool safe, int reg)
{
	if (__ONCE_LITE_IF(!safe && wrmsr)) {
		pr_warn("unchecked MSR access error: WRMSR to 0x%x (tried to write 0x%08x%08x) at rIP: 0x%lx (%pS)\n",
			(unsigned int)regs->cx, (unsigned int)regs->dx,
			(unsigned int)regs->ax,  regs->ip, (void *)regs->ip);
		show_stack_regs(regs);
	}
	if (__ONCE_LITE_IF(!safe && !wrmsr)) {
		pr_warn("unchecked MSR access error: RDMSR from 0x%x at rIP: 0x%lx (%pS)\n",
			(unsigned int)regs->cx, regs->ip, (void *)regs->ip);
		show_stack_regs(regs);
	}
	if (!wrmsr) {
		/* Pretend that the read succeeded and returned 0. */
		regs->ax = 0;
		regs->dx = 0;
	}
	if (safe)
		*pt_regs_nr(regs, reg) = -EIO;
	return ex_handler_default(fixup, regs);
}
static bool ex_handler_clear_fs(const struct exception_table_entry *fixup,
				struct pt_regs *regs)
{
	if (static_cpu_has(X86_BUG_NULL_SEG))
		asm volatile ("mov %0, %%fs" : : "rm" (__USER_DS));
	asm volatile ("mov %0, %%fs" : : "rm" (0));
	return ex_handler_default(fixup, regs);
}
static bool ex_handler_imm_reg(const struct exception_table_entry *fixup,
			       struct pt_regs *regs, int reg, int imm)
{
	*pt_regs_nr(regs, reg) = (long)imm;
	return ex_handler_default(fixup, regs);
}
static bool ex_handler_ucopy_len(const struct exception_table_entry *fixup,
				  struct pt_regs *regs, int trapnr, int reg, int imm)
{
	regs->cx = imm * regs->cx + *pt_regs_nr(regs, reg);
	return ex_handler_uaccess(fixup, regs, trapnr);
}
int ex_get_fixup_type(unsigned long ip)
{
	const struct exception_table_entry *e = search_exception_tables(ip);
	return e ? FIELD_GET(EX_DATA_TYPE_MASK, e->data) : EX_TYPE_NONE;
}
int fixup_exception(struct pt_regs *regs, int trapnr, unsigned long error_code,
		    unsigned long fault_addr)
{
	const struct exception_table_entry *e;
	int type, reg, imm;
#ifdef CONFIG_PNPBIOS
	if (unlikely(SEGMENT_IS_PNP_CODE(regs->cs))) {
		extern u32 pnp_bios_fault_eip, pnp_bios_fault_esp;
		extern u32 pnp_bios_is_utter_crap;
		pnp_bios_is_utter_crap = 1;
		printk(KERN_CRIT "PNPBIOS fault.. attempting recovery.\n");
		__asm__ volatile(
			"movl %0, %%esp\n\t"
			"jmp *%1\n\t"
			: : "g" (pnp_bios_fault_esp), "g" (pnp_bios_fault_eip));
		panic("do_trap: can't hit this");
	}
#endif
	e = search_exception_tables(regs->ip);
	if (!e)
		return 0;
	type = FIELD_GET(EX_DATA_TYPE_MASK, e->data);
	reg  = FIELD_GET(EX_DATA_REG_MASK,  e->data);
	imm  = FIELD_GET(EX_DATA_IMM_MASK,  e->data);
	switch (type) {
	case EX_TYPE_DEFAULT:
	case EX_TYPE_DEFAULT_MCE_SAFE:
		return ex_handler_default(e, regs);
	case EX_TYPE_FAULT:
	case EX_TYPE_FAULT_MCE_SAFE:
		return ex_handler_fault(e, regs, trapnr);
	case EX_TYPE_UACCESS:
		return ex_handler_uaccess(e, regs, trapnr);
	case EX_TYPE_COPY:
		return ex_handler_copy(e, regs, trapnr);
	case EX_TYPE_CLEAR_FS:
		return ex_handler_clear_fs(e, regs);
	case EX_TYPE_FPU_RESTORE:
		return ex_handler_fprestore(e, regs);
	case EX_TYPE_BPF:
		return ex_handler_bpf(e, regs);
	case EX_TYPE_WRMSR:
		return ex_handler_msr(e, regs, true, false, reg);
	case EX_TYPE_RDMSR:
		return ex_handler_msr(e, regs, false, false, reg);
	case EX_TYPE_WRMSR_SAFE:
		return ex_handler_msr(e, regs, true, true, reg);
	case EX_TYPE_RDMSR_SAFE:
		return ex_handler_msr(e, regs, false, true, reg);
	case EX_TYPE_WRMSR_IN_MCE:
		ex_handler_msr_mce(regs, true);
		break;
	case EX_TYPE_RDMSR_IN_MCE:
		ex_handler_msr_mce(regs, false);
		break;
	case EX_TYPE_POP_REG:
		regs->sp += sizeof(long);
		fallthrough;
	case EX_TYPE_IMM_REG:
		return ex_handler_imm_reg(e, regs, reg, imm);
	case EX_TYPE_FAULT_SGX:
		return ex_handler_sgx(e, regs, trapnr);
	case EX_TYPE_UCOPY_LEN:
		return ex_handler_ucopy_len(e, regs, trapnr, reg, imm);
	case EX_TYPE_ZEROPAD:
		return ex_handler_zeropad(e, regs, fault_addr);
	}
	BUG();
}
extern unsigned int early_recursion_flag;
/* Restricted version used during very early boot */
void __init early_fixup_exception(struct pt_regs *regs, int trapnr)
{
	/* Ignore early NMIs. */
	if (trapnr == X86_TRAP_NMI)
		return;
	if (early_recursion_flag > 2)
		goto halt_loop;
	/*
	 * Old CPUs leave the high bits of CS on the stack
	 * undefined.  I'm not sure which CPUs do this, but at least
	 * the 486 DX works this way.
	 * Xen pv domains are not using the default __KERNEL_CS.
	 */
	if (!xen_pv_domain() && regs->cs != __KERNEL_CS)
		goto fail;
	/*
	 * The full exception fixup machinery is available as soon as
	 * the early IDT is loaded.  This means that it is the
	 * responsibility of extable users to either function correctly
	 * when handlers are invoked early or to simply avoid causing
	 * exceptions before they're ready to handle them.
	 *
	 * This is better than filtering which handlers can be used,
	 * because refusing to call a handler here is guaranteed to
	 * result in a hard-to-debug panic.
	 *
	 * Keep in mind that not all vectors actually get here.  Early
	 * page faults, for example, are special.
	 */
	if (fixup_exception(regs, trapnr, regs->orig_ax, 0))
		return;
	if (trapnr == X86_TRAP_UD) {
		if (report_bug(regs->ip, regs) == BUG_TRAP_TYPE_WARN) {
			/* Skip the ud2. */
			regs->ip += LEN_UD2;
			return;
		}
		/*
		 * If this was a BUG and report_bug returns or if this
		 * was just a normal #UD, we want to continue onward and
		 * crash.
		 */
	}
fail:
	early_printk("PANIC: early exception 0x%02x IP %lx:%lx error %lx cr2 0x%lx\n",
		     (unsigned)trapnr, (unsigned long)regs->cs, regs->ip,
		     regs->orig_ax, read_cr2());
	show_regs(regs);
halt_loop:
	while (true)
		halt();
}
 |