~ubuntu-branches/ubuntu/precise/linux-lowlatency/precise

« back to all changes in this revision

Viewing changes to arch/x86/kernel/machine_kexec_64.c

  • Committer: Package Import Robot
  • Author(s): Alessio Igor Bogani
  • Date: 2011-10-26 11:13:05 UTC
  • Revision ID: package-import@ubuntu.com-20111026111305-tz023xykf0i6eosh
Tags: upstream-3.2.0
ImportĀ upstreamĀ versionĀ 3.2.0

Show diffs side-by-side

added added

removed removed

Lines of Context:
 
1
/*
 
2
 * handle transition of Linux booting another kernel
 
3
 * Copyright (C) 2002-2005 Eric Biederman  <ebiederm@xmission.com>
 
4
 *
 
5
 * This source code is licensed under the GNU General Public License,
 
6
 * Version 2.  See the file COPYING for more details.
 
7
 */
 
8
 
 
9
#include <linux/mm.h>
 
10
#include <linux/kexec.h>
 
11
#include <linux/string.h>
 
12
#include <linux/gfp.h>
 
13
#include <linux/reboot.h>
 
14
#include <linux/numa.h>
 
15
#include <linux/ftrace.h>
 
16
#include <linux/io.h>
 
17
#include <linux/suspend.h>
 
18
 
 
19
#include <asm/pgtable.h>
 
20
#include <asm/tlbflush.h>
 
21
#include <asm/mmu_context.h>
 
22
#include <asm/debugreg.h>
 
23
 
 
24
static int init_one_level2_page(struct kimage *image, pgd_t *pgd,
 
25
                                unsigned long addr)
 
26
{
 
27
        pud_t *pud;
 
28
        pmd_t *pmd;
 
29
        struct page *page;
 
30
        int result = -ENOMEM;
 
31
 
 
32
        addr &= PMD_MASK;
 
33
        pgd += pgd_index(addr);
 
34
        if (!pgd_present(*pgd)) {
 
35
                page = kimage_alloc_control_pages(image, 0);
 
36
                if (!page)
 
37
                        goto out;
 
38
                pud = (pud_t *)page_address(page);
 
39
                clear_page(pud);
 
40
                set_pgd(pgd, __pgd(__pa(pud) | _KERNPG_TABLE));
 
41
        }
 
42
        pud = pud_offset(pgd, addr);
 
43
        if (!pud_present(*pud)) {
 
44
                page = kimage_alloc_control_pages(image, 0);
 
45
                if (!page)
 
46
                        goto out;
 
47
                pmd = (pmd_t *)page_address(page);
 
48
                clear_page(pmd);
 
49
                set_pud(pud, __pud(__pa(pmd) | _KERNPG_TABLE));
 
50
        }
 
51
        pmd = pmd_offset(pud, addr);
 
52
        if (!pmd_present(*pmd))
 
53
                set_pmd(pmd, __pmd(addr | __PAGE_KERNEL_LARGE_EXEC));
 
54
        result = 0;
 
55
out:
 
56
        return result;
 
57
}
 
58
 
 
59
static void init_level2_page(pmd_t *level2p, unsigned long addr)
 
60
{
 
61
        unsigned long end_addr;
 
62
 
 
63
        addr &= PAGE_MASK;
 
64
        end_addr = addr + PUD_SIZE;
 
65
        while (addr < end_addr) {
 
66
                set_pmd(level2p++, __pmd(addr | __PAGE_KERNEL_LARGE_EXEC));
 
67
                addr += PMD_SIZE;
 
68
        }
 
69
}
 
70
 
 
71
static int init_level3_page(struct kimage *image, pud_t *level3p,
 
72
                                unsigned long addr, unsigned long last_addr)
 
73
{
 
74
        unsigned long end_addr;
 
75
        int result;
 
76
 
 
77
        result = 0;
 
78
        addr &= PAGE_MASK;
 
79
        end_addr = addr + PGDIR_SIZE;
 
80
        while ((addr < last_addr) && (addr < end_addr)) {
 
81
                struct page *page;
 
82
                pmd_t *level2p;
 
83
 
 
84
                page = kimage_alloc_control_pages(image, 0);
 
85
                if (!page) {
 
86
                        result = -ENOMEM;
 
87
                        goto out;
 
88
                }
 
89
                level2p = (pmd_t *)page_address(page);
 
90
                init_level2_page(level2p, addr);
 
91
                set_pud(level3p++, __pud(__pa(level2p) | _KERNPG_TABLE));
 
92
                addr += PUD_SIZE;
 
93
        }
 
94
        /* clear the unused entries */
 
95
        while (addr < end_addr) {
 
96
                pud_clear(level3p++);
 
97
                addr += PUD_SIZE;
 
98
        }
 
99
out:
 
100
        return result;
 
101
}
 
102
 
 
103
 
 
104
static int init_level4_page(struct kimage *image, pgd_t *level4p,
 
105
                                unsigned long addr, unsigned long last_addr)
 
106
{
 
107
        unsigned long end_addr;
 
108
        int result;
 
109
 
 
110
        result = 0;
 
111
        addr &= PAGE_MASK;
 
112
        end_addr = addr + (PTRS_PER_PGD * PGDIR_SIZE);
 
113
        while ((addr < last_addr) && (addr < end_addr)) {
 
114
                struct page *page;
 
115
                pud_t *level3p;
 
116
 
 
117
                page = kimage_alloc_control_pages(image, 0);
 
118
                if (!page) {
 
119
                        result = -ENOMEM;
 
120
                        goto out;
 
121
                }
 
122
                level3p = (pud_t *)page_address(page);
 
123
                result = init_level3_page(image, level3p, addr, last_addr);
 
124
                if (result)
 
125
                        goto out;
 
126
                set_pgd(level4p++, __pgd(__pa(level3p) | _KERNPG_TABLE));
 
127
                addr += PGDIR_SIZE;
 
128
        }
 
129
        /* clear the unused entries */
 
130
        while (addr < end_addr) {
 
131
                pgd_clear(level4p++);
 
132
                addr += PGDIR_SIZE;
 
133
        }
 
134
out:
 
135
        return result;
 
136
}
 
137
 
 
138
static void free_transition_pgtable(struct kimage *image)
 
139
{
 
140
        free_page((unsigned long)image->arch.pud);
 
141
        free_page((unsigned long)image->arch.pmd);
 
142
        free_page((unsigned long)image->arch.pte);
 
143
}
 
144
 
 
145
static int init_transition_pgtable(struct kimage *image, pgd_t *pgd)
 
146
{
 
147
        pud_t *pud;
 
148
        pmd_t *pmd;
 
149
        pte_t *pte;
 
150
        unsigned long vaddr, paddr;
 
151
        int result = -ENOMEM;
 
152
 
 
153
        vaddr = (unsigned long)relocate_kernel;
 
154
        paddr = __pa(page_address(image->control_code_page)+PAGE_SIZE);
 
155
        pgd += pgd_index(vaddr);
 
156
        if (!pgd_present(*pgd)) {
 
157
                pud = (pud_t *)get_zeroed_page(GFP_KERNEL);
 
158
                if (!pud)
 
159
                        goto err;
 
160
                image->arch.pud = pud;
 
161
                set_pgd(pgd, __pgd(__pa(pud) | _KERNPG_TABLE));
 
162
        }
 
163
        pud = pud_offset(pgd, vaddr);
 
164
        if (!pud_present(*pud)) {
 
165
                pmd = (pmd_t *)get_zeroed_page(GFP_KERNEL);
 
166
                if (!pmd)
 
167
                        goto err;
 
168
                image->arch.pmd = pmd;
 
169
                set_pud(pud, __pud(__pa(pmd) | _KERNPG_TABLE));
 
170
        }
 
171
        pmd = pmd_offset(pud, vaddr);
 
172
        if (!pmd_present(*pmd)) {
 
173
                pte = (pte_t *)get_zeroed_page(GFP_KERNEL);
 
174
                if (!pte)
 
175
                        goto err;
 
176
                image->arch.pte = pte;
 
177
                set_pmd(pmd, __pmd(__pa(pte) | _KERNPG_TABLE));
 
178
        }
 
179
        pte = pte_offset_kernel(pmd, vaddr);
 
180
        set_pte(pte, pfn_pte(paddr >> PAGE_SHIFT, PAGE_KERNEL_EXEC));
 
181
        return 0;
 
182
err:
 
183
        free_transition_pgtable(image);
 
184
        return result;
 
185
}
 
186
 
 
187
 
 
188
static int init_pgtable(struct kimage *image, unsigned long start_pgtable)
 
189
{
 
190
        pgd_t *level4p;
 
191
        int result;
 
192
        level4p = (pgd_t *)__va(start_pgtable);
 
193
        result = init_level4_page(image, level4p, 0, max_pfn << PAGE_SHIFT);
 
194
        if (result)
 
195
                return result;
 
196
        /*
 
197
         * image->start may be outside 0 ~ max_pfn, for example when
 
198
         * jump back to original kernel from kexeced kernel
 
199
         */
 
200
        result = init_one_level2_page(image, level4p, image->start);
 
201
        if (result)
 
202
                return result;
 
203
        return init_transition_pgtable(image, level4p);
 
204
}
 
205
 
 
206
static void set_idt(void *newidt, u16 limit)
 
207
{
 
208
        struct desc_ptr curidt;
 
209
 
 
210
        /* x86-64 supports unaliged loads & stores */
 
211
        curidt.size    = limit;
 
212
        curidt.address = (unsigned long)newidt;
 
213
 
 
214
        __asm__ __volatile__ (
 
215
                "lidtq %0\n"
 
216
                : : "m" (curidt)
 
217
                );
 
218
};
 
219
 
 
220
 
 
221
static void set_gdt(void *newgdt, u16 limit)
 
222
{
 
223
        struct desc_ptr curgdt;
 
224
 
 
225
        /* x86-64 supports unaligned loads & stores */
 
226
        curgdt.size    = limit;
 
227
        curgdt.address = (unsigned long)newgdt;
 
228
 
 
229
        __asm__ __volatile__ (
 
230
                "lgdtq %0\n"
 
231
                : : "m" (curgdt)
 
232
                );
 
233
};
 
234
 
 
235
static void load_segments(void)
 
236
{
 
237
        __asm__ __volatile__ (
 
238
                "\tmovl %0,%%ds\n"
 
239
                "\tmovl %0,%%es\n"
 
240
                "\tmovl %0,%%ss\n"
 
241
                "\tmovl %0,%%fs\n"
 
242
                "\tmovl %0,%%gs\n"
 
243
                : : "a" (__KERNEL_DS) : "memory"
 
244
                );
 
245
}
 
246
 
 
247
int machine_kexec_prepare(struct kimage *image)
 
248
{
 
249
        unsigned long start_pgtable;
 
250
        int result;
 
251
 
 
252
        /* Calculate the offsets */
 
253
        start_pgtable = page_to_pfn(image->control_code_page) << PAGE_SHIFT;
 
254
 
 
255
        /* Setup the identity mapped 64bit page table */
 
256
        result = init_pgtable(image, start_pgtable);
 
257
        if (result)
 
258
                return result;
 
259
 
 
260
        return 0;
 
261
}
 
262
 
 
263
void machine_kexec_cleanup(struct kimage *image)
 
264
{
 
265
        free_transition_pgtable(image);
 
266
}
 
267
 
 
268
/*
 
269
 * Do not allocate memory (or fail in any way) in machine_kexec().
 
270
 * We are past the point of no return, committed to rebooting now.
 
271
 */
 
272
void machine_kexec(struct kimage *image)
 
273
{
 
274
        unsigned long page_list[PAGES_NR];
 
275
        void *control_page;
 
276
        int save_ftrace_enabled;
 
277
 
 
278
#ifdef CONFIG_KEXEC_JUMP
 
279
        if (image->preserve_context)
 
280
                save_processor_state();
 
281
#endif
 
282
 
 
283
        save_ftrace_enabled = __ftrace_enabled_save();
 
284
 
 
285
        /* Interrupts aren't acceptable while we reboot */
 
286
        local_irq_disable();
 
287
        hw_breakpoint_disable();
 
288
 
 
289
        if (image->preserve_context) {
 
290
#ifdef CONFIG_X86_IO_APIC
 
291
                /*
 
292
                 * We need to put APICs in legacy mode so that we can
 
293
                 * get timer interrupts in second kernel. kexec/kdump
 
294
                 * paths already have calls to disable_IO_APIC() in
 
295
                 * one form or other. kexec jump path also need
 
296
                 * one.
 
297
                 */
 
298
                disable_IO_APIC();
 
299
#endif
 
300
        }
 
301
 
 
302
        control_page = page_address(image->control_code_page) + PAGE_SIZE;
 
303
        memcpy(control_page, relocate_kernel, KEXEC_CONTROL_CODE_MAX_SIZE);
 
304
 
 
305
        page_list[PA_CONTROL_PAGE] = virt_to_phys(control_page);
 
306
        page_list[VA_CONTROL_PAGE] = (unsigned long)control_page;
 
307
        page_list[PA_TABLE_PAGE] =
 
308
          (unsigned long)__pa(page_address(image->control_code_page));
 
309
 
 
310
        if (image->type == KEXEC_TYPE_DEFAULT)
 
311
                page_list[PA_SWAP_PAGE] = (page_to_pfn(image->swap_page)
 
312
                                                << PAGE_SHIFT);
 
313
 
 
314
        /*
 
315
         * The segment registers are funny things, they have both a
 
316
         * visible and an invisible part.  Whenever the visible part is
 
317
         * set to a specific selector, the invisible part is loaded
 
318
         * with from a table in memory.  At no other time is the
 
319
         * descriptor table in memory accessed.
 
320
         *
 
321
         * I take advantage of this here by force loading the
 
322
         * segments, before I zap the gdt with an invalid value.
 
323
         */
 
324
        load_segments();
 
325
        /*
 
326
         * The gdt & idt are now invalid.
 
327
         * If you want to load them you must set up your own idt & gdt.
 
328
         */
 
329
        set_gdt(phys_to_virt(0), 0);
 
330
        set_idt(phys_to_virt(0), 0);
 
331
 
 
332
        /* now call it */
 
333
        image->start = relocate_kernel((unsigned long)image->head,
 
334
                                       (unsigned long)page_list,
 
335
                                       image->start,
 
336
                                       image->preserve_context);
 
337
 
 
338
#ifdef CONFIG_KEXEC_JUMP
 
339
        if (image->preserve_context)
 
340
                restore_processor_state();
 
341
#endif
 
342
 
 
343
        __ftrace_enabled_restore(save_ftrace_enabled);
 
344
}
 
345
 
 
346
void arch_crash_save_vmcoreinfo(void)
 
347
{
 
348
        VMCOREINFO_SYMBOL(phys_base);
 
349
        VMCOREINFO_SYMBOL(init_level4_pgt);
 
350
 
 
351
#ifdef CONFIG_NUMA
 
352
        VMCOREINFO_SYMBOL(node_data);
 
353
        VMCOREINFO_LENGTH(node_data, MAX_NUMNODES);
 
354
#endif
 
355
}
 
356