Creation of Cybook 2416 (actually Gen4) repository

This commit is contained in:
mlt
2009-12-18 17:10:00 +00:00
committed by godzil
commit 76f20f4d40
13791 changed files with 6812321 additions and 0 deletions

37
drivers/kvm/Kconfig Normal file
View File

@@ -0,0 +1,37 @@
#
# KVM configuration
#
menu "Virtualization"
config KVM
tristate "Kernel-based Virtual Machine (KVM) support"
depends on X86 && EXPERIMENTAL
---help---
Support hosting fully virtualized guest machines using hardware
virtualization extensions. You will need a fairly recent
processor equipped with virtualization extensions. You will also
need to select one or more of the processor modules below.
This module provides access to the hardware capabilities through
a character device node named /dev/kvm.
To compile this as a module, choose M here: the module
will be called kvm.
If unsure, say N.
config KVM_INTEL
tristate "KVM for Intel processors support"
depends on KVM
---help---
Provides support for KVM on Intel processors equipped with the VT
extensions.
config KVM_AMD
tristate "KVM for AMD processors support"
depends on KVM
---help---
Provides support for KVM on AMD processors equipped with the AMD-V
(SVM) extensions.
endmenu

10
drivers/kvm/Makefile Normal file
View File

@@ -0,0 +1,10 @@
#
# Makefile for Kernel-based Virtual Machine module
#
kvm-objs := kvm_main.o mmu.o x86_emulate.o
obj-$(CONFIG_KVM) += kvm.o
kvm-intel-objs = vmx.o
obj-$(CONFIG_KVM_INTEL) += kvm-intel.o
kvm-amd-objs = svm.o
obj-$(CONFIG_KVM_AMD) += kvm-amd.o

641
drivers/kvm/kvm.h Normal file
View File

@@ -0,0 +1,641 @@
#ifndef __KVM_H
#define __KVM_H
/*
* This work is licensed under the terms of the GNU GPL, version 2. See
* the COPYING file in the top-level directory.
*/
#include <linux/types.h>
#include <linux/list.h>
#include <linux/mutex.h>
#include <linux/spinlock.h>
#include <linux/mm.h>
#include "vmx.h"
#include <linux/kvm.h>
#include <linux/kvm_para.h>
#define CR0_PE_MASK (1ULL << 0)
#define CR0_TS_MASK (1ULL << 3)
#define CR0_NE_MASK (1ULL << 5)
#define CR0_WP_MASK (1ULL << 16)
#define CR0_NW_MASK (1ULL << 29)
#define CR0_CD_MASK (1ULL << 30)
#define CR0_PG_MASK (1ULL << 31)
#define CR3_WPT_MASK (1ULL << 3)
#define CR3_PCD_MASK (1ULL << 4)
#define CR3_RESEVED_BITS 0x07ULL
#define CR3_L_MODE_RESEVED_BITS (~((1ULL << 40) - 1) | 0x0fe7ULL)
#define CR3_FLAGS_MASK ((1ULL << 5) - 1)
#define CR4_VME_MASK (1ULL << 0)
#define CR4_PSE_MASK (1ULL << 4)
#define CR4_PAE_MASK (1ULL << 5)
#define CR4_PGE_MASK (1ULL << 7)
#define CR4_VMXE_MASK (1ULL << 13)
#define KVM_GUEST_CR0_MASK \
(CR0_PG_MASK | CR0_PE_MASK | CR0_WP_MASK | CR0_NE_MASK \
| CR0_NW_MASK | CR0_CD_MASK)
#define KVM_VM_CR0_ALWAYS_ON \
(CR0_PG_MASK | CR0_PE_MASK | CR0_WP_MASK | CR0_NE_MASK)
#define KVM_GUEST_CR4_MASK \
(CR4_PSE_MASK | CR4_PAE_MASK | CR4_PGE_MASK | CR4_VMXE_MASK | CR4_VME_MASK)
#define KVM_PMODE_VM_CR4_ALWAYS_ON (CR4_VMXE_MASK | CR4_PAE_MASK)
#define KVM_RMODE_VM_CR4_ALWAYS_ON (CR4_VMXE_MASK | CR4_PAE_MASK | CR4_VME_MASK)
#define INVALID_PAGE (~(hpa_t)0)
#define UNMAPPED_GVA (~(gpa_t)0)
#define KVM_MAX_VCPUS 1
#define KVM_MEMORY_SLOTS 4
#define KVM_NUM_MMU_PAGES 256
#define KVM_MIN_FREE_MMU_PAGES 5
#define KVM_REFILL_PAGES 25
#define FX_IMAGE_SIZE 512
#define FX_IMAGE_ALIGN 16
#define FX_BUF_SIZE (2 * FX_IMAGE_SIZE + FX_IMAGE_ALIGN)
#define DE_VECTOR 0
#define DF_VECTOR 8
#define TS_VECTOR 10
#define NP_VECTOR 11
#define SS_VECTOR 12
#define GP_VECTOR 13
#define PF_VECTOR 14
#define SELECTOR_TI_MASK (1 << 2)
#define SELECTOR_RPL_MASK 0x03
#define IOPL_SHIFT 12
/*
* Address types:
*
* gva - guest virtual address
* gpa - guest physical address
* gfn - guest frame number
* hva - host virtual address
* hpa - host physical address
* hfn - host frame number
*/
typedef unsigned long gva_t;
typedef u64 gpa_t;
typedef unsigned long gfn_t;
typedef unsigned long hva_t;
typedef u64 hpa_t;
typedef unsigned long hfn_t;
#define NR_PTE_CHAIN_ENTRIES 5
struct kvm_pte_chain {
u64 *parent_ptes[NR_PTE_CHAIN_ENTRIES];
struct hlist_node link;
};
/*
* kvm_mmu_page_role, below, is defined as:
*
* bits 0:3 - total guest paging levels (2-4, or zero for real mode)
* bits 4:7 - page table level for this shadow (1-4)
* bits 8:9 - page table quadrant for 2-level guests
* bit 16 - "metaphysical" - gfn is not a real page (huge page/real mode)
*/
union kvm_mmu_page_role {
unsigned word;
struct {
unsigned glevels : 4;
unsigned level : 4;
unsigned quadrant : 2;
unsigned pad_for_nice_hex_output : 6;
unsigned metaphysical : 1;
};
};
struct kvm_mmu_page {
struct list_head link;
struct hlist_node hash_link;
/*
* The following two entries are used to key the shadow page in the
* hash table.
*/
gfn_t gfn;
union kvm_mmu_page_role role;
hpa_t page_hpa;
unsigned long slot_bitmap; /* One bit set per slot which has memory
* in this shadow page.
*/
int global; /* Set if all ptes in this page are global */
int multimapped; /* More than one parent_pte? */
int root_count; /* Currently serving as active root */
union {
u64 *parent_pte; /* !multimapped */
struct hlist_head parent_ptes; /* multimapped, kvm_pte_chain */
};
};
struct vmcs {
u32 revision_id;
u32 abort;
char data[0];
};
#define vmx_msr_entry kvm_msr_entry
struct kvm_vcpu;
/*
* x86 supports 3 paging modes (4-level 64-bit, 3-level 64-bit, and 2-level
* 32-bit). The kvm_mmu structure abstracts the details of the current mmu
* mode.
*/
struct kvm_mmu {
void (*new_cr3)(struct kvm_vcpu *vcpu);
int (*page_fault)(struct kvm_vcpu *vcpu, gva_t gva, u32 err);
void (*free)(struct kvm_vcpu *vcpu);
gpa_t (*gva_to_gpa)(struct kvm_vcpu *vcpu, gva_t gva);
hpa_t root_hpa;
int root_level;
int shadow_root_level;
u64 *pae_root;
};
#define KVM_NR_MEM_OBJS 20
struct kvm_mmu_memory_cache {
int nobjs;
void *objects[KVM_NR_MEM_OBJS];
};
/*
* We don't want allocation failures within the mmu code, so we preallocate
* enough memory for a single page fault in a cache.
*/
struct kvm_guest_debug {
int enabled;
unsigned long bp[4];
int singlestep;
};
enum {
VCPU_REGS_RAX = 0,
VCPU_REGS_RCX = 1,
VCPU_REGS_RDX = 2,
VCPU_REGS_RBX = 3,
VCPU_REGS_RSP = 4,
VCPU_REGS_RBP = 5,
VCPU_REGS_RSI = 6,
VCPU_REGS_RDI = 7,
#ifdef CONFIG_X86_64
VCPU_REGS_R8 = 8,
VCPU_REGS_R9 = 9,
VCPU_REGS_R10 = 10,
VCPU_REGS_R11 = 11,
VCPU_REGS_R12 = 12,
VCPU_REGS_R13 = 13,
VCPU_REGS_R14 = 14,
VCPU_REGS_R15 = 15,
#endif
NR_VCPU_REGS
};
enum {
VCPU_SREG_CS,
VCPU_SREG_DS,
VCPU_SREG_ES,
VCPU_SREG_FS,
VCPU_SREG_GS,
VCPU_SREG_SS,
VCPU_SREG_TR,
VCPU_SREG_LDTR,
};
struct kvm_vcpu {
struct kvm *kvm;
union {
struct vmcs *vmcs;
struct vcpu_svm *svm;
};
struct mutex mutex;
int cpu;
int launched;
int interrupt_window_open;
unsigned long irq_summary; /* bit vector: 1 per word in irq_pending */
#define NR_IRQ_WORDS KVM_IRQ_BITMAP_SIZE(unsigned long)
unsigned long irq_pending[NR_IRQ_WORDS];
unsigned long regs[NR_VCPU_REGS]; /* for rsp: vcpu_load_rsp_rip() */
unsigned long rip; /* needs vcpu_load_rsp_rip() */
unsigned long cr0;
unsigned long cr2;
unsigned long cr3;
gpa_t para_state_gpa;
struct page *para_state_page;
gpa_t hypercall_gpa;
unsigned long cr4;
unsigned long cr8;
u64 pdptrs[4]; /* pae */
u64 shadow_efer;
u64 apic_base;
u64 ia32_misc_enable_msr;
int nmsrs;
struct vmx_msr_entry *guest_msrs;
struct vmx_msr_entry *host_msrs;
struct list_head free_pages;
struct kvm_mmu_page page_header_buf[KVM_NUM_MMU_PAGES];
struct kvm_mmu mmu;
struct kvm_mmu_memory_cache mmu_pte_chain_cache;
struct kvm_mmu_memory_cache mmu_rmap_desc_cache;
gfn_t last_pt_write_gfn;
int last_pt_write_count;
struct kvm_guest_debug guest_debug;
char fx_buf[FX_BUF_SIZE];
char *host_fx_image;
char *guest_fx_image;
int mmio_needed;
int mmio_read_completed;
int mmio_is_write;
int mmio_size;
unsigned char mmio_data[8];
gpa_t mmio_phys_addr;
struct {
int active;
u8 save_iopl;
struct kvm_save_segment {
u16 selector;
unsigned long base;
u32 limit;
u32 ar;
} tr, es, ds, fs, gs;
} rmode;
};
struct kvm_memory_slot {
gfn_t base_gfn;
unsigned long npages;
unsigned long flags;
struct page **phys_mem;
unsigned long *dirty_bitmap;
};
struct kvm {
spinlock_t lock; /* protects everything except vcpus */
int nmemslots;
struct kvm_memory_slot memslots[KVM_MEMORY_SLOTS];
/*
* Hash table of struct kvm_mmu_page.
*/
struct list_head active_mmu_pages;
int n_free_mmu_pages;
struct hlist_head mmu_page_hash[KVM_NUM_MMU_PAGES];
struct kvm_vcpu vcpus[KVM_MAX_VCPUS];
int memory_config_version;
int busy;
unsigned long rmap_overflow;
struct list_head vm_list;
struct file *filp;
};
struct kvm_stat {
u32 pf_fixed;
u32 pf_guest;
u32 tlb_flush;
u32 invlpg;
u32 exits;
u32 io_exits;
u32 mmio_exits;
u32 signal_exits;
u32 irq_window_exits;
u32 halt_exits;
u32 request_irq_exits;
u32 irq_exits;
};
struct descriptor_table {
u16 limit;
unsigned long base;
} __attribute__((packed));
struct kvm_arch_ops {
int (*cpu_has_kvm_support)(void); /* __init */
int (*disabled_by_bios)(void); /* __init */
void (*hardware_enable)(void *dummy); /* __init */
void (*hardware_disable)(void *dummy);
int (*hardware_setup)(void); /* __init */
void (*hardware_unsetup)(void); /* __exit */
int (*vcpu_create)(struct kvm_vcpu *vcpu);
void (*vcpu_free)(struct kvm_vcpu *vcpu);
void (*vcpu_load)(struct kvm_vcpu *vcpu);
void (*vcpu_put)(struct kvm_vcpu *vcpu);
void (*vcpu_decache)(struct kvm_vcpu *vcpu);
int (*set_guest_debug)(struct kvm_vcpu *vcpu,
struct kvm_debug_guest *dbg);
int (*get_msr)(struct kvm_vcpu *vcpu, u32 msr_index, u64 *pdata);
int (*set_msr)(struct kvm_vcpu *vcpu, u32 msr_index, u64 data);
u64 (*get_segment_base)(struct kvm_vcpu *vcpu, int seg);
void (*get_segment)(struct kvm_vcpu *vcpu,
struct kvm_segment *var, int seg);
void (*set_segment)(struct kvm_vcpu *vcpu,
struct kvm_segment *var, int seg);
void (*get_cs_db_l_bits)(struct kvm_vcpu *vcpu, int *db, int *l);
void (*decache_cr0_cr4_guest_bits)(struct kvm_vcpu *vcpu);
void (*set_cr0)(struct kvm_vcpu *vcpu, unsigned long cr0);
void (*set_cr0_no_modeswitch)(struct kvm_vcpu *vcpu,
unsigned long cr0);
void (*set_cr3)(struct kvm_vcpu *vcpu, unsigned long cr3);
void (*set_cr4)(struct kvm_vcpu *vcpu, unsigned long cr4);
void (*set_efer)(struct kvm_vcpu *vcpu, u64 efer);
void (*get_idt)(struct kvm_vcpu *vcpu, struct descriptor_table *dt);
void (*set_idt)(struct kvm_vcpu *vcpu, struct descriptor_table *dt);
void (*get_gdt)(struct kvm_vcpu *vcpu, struct descriptor_table *dt);
void (*set_gdt)(struct kvm_vcpu *vcpu, struct descriptor_table *dt);
unsigned long (*get_dr)(struct kvm_vcpu *vcpu, int dr);
void (*set_dr)(struct kvm_vcpu *vcpu, int dr, unsigned long value,
int *exception);
void (*cache_regs)(struct kvm_vcpu *vcpu);
void (*decache_regs)(struct kvm_vcpu *vcpu);
unsigned long (*get_rflags)(struct kvm_vcpu *vcpu);
void (*set_rflags)(struct kvm_vcpu *vcpu, unsigned long rflags);
void (*invlpg)(struct kvm_vcpu *vcpu, gva_t addr);
void (*tlb_flush)(struct kvm_vcpu *vcpu);
void (*inject_page_fault)(struct kvm_vcpu *vcpu,
unsigned long addr, u32 err_code);
void (*inject_gp)(struct kvm_vcpu *vcpu, unsigned err_code);
int (*run)(struct kvm_vcpu *vcpu, struct kvm_run *run);
int (*vcpu_setup)(struct kvm_vcpu *vcpu);
void (*skip_emulated_instruction)(struct kvm_vcpu *vcpu);
void (*patch_hypercall)(struct kvm_vcpu *vcpu,
unsigned char *hypercall_addr);
};
extern struct kvm_stat kvm_stat;
extern struct kvm_arch_ops *kvm_arch_ops;
#define kvm_printf(kvm, fmt ...) printk(KERN_DEBUG fmt)
#define vcpu_printf(vcpu, fmt...) kvm_printf(vcpu->kvm, fmt)
int kvm_init_arch(struct kvm_arch_ops *ops, struct module *module);
void kvm_exit_arch(void);
void kvm_mmu_destroy(struct kvm_vcpu *vcpu);
int kvm_mmu_create(struct kvm_vcpu *vcpu);
int kvm_mmu_setup(struct kvm_vcpu *vcpu);
int kvm_mmu_reset_context(struct kvm_vcpu *vcpu);
void kvm_mmu_slot_remove_write_access(struct kvm_vcpu *vcpu, int slot);
hpa_t gpa_to_hpa(struct kvm_vcpu *vcpu, gpa_t gpa);
#define HPA_MSB ((sizeof(hpa_t) * 8) - 1)
#define HPA_ERR_MASK ((hpa_t)1 << HPA_MSB)
static inline int is_error_hpa(hpa_t hpa) { return hpa >> HPA_MSB; }
hpa_t gva_to_hpa(struct kvm_vcpu *vcpu, gva_t gva);
void kvm_emulator_want_group7_invlpg(void);
extern hpa_t bad_page_address;
static inline struct page *gfn_to_page(struct kvm_memory_slot *slot, gfn_t gfn)
{
return slot->phys_mem[gfn - slot->base_gfn];
}
struct kvm_memory_slot *gfn_to_memslot(struct kvm *kvm, gfn_t gfn);
void mark_page_dirty(struct kvm *kvm, gfn_t gfn);
enum emulation_result {
EMULATE_DONE, /* no further processing */
EMULATE_DO_MMIO, /* kvm_run filled with mmio request */
EMULATE_FAIL, /* can't emulate this instruction */
};
int emulate_instruction(struct kvm_vcpu *vcpu, struct kvm_run *run,
unsigned long cr2, u16 error_code);
void realmode_lgdt(struct kvm_vcpu *vcpu, u16 size, unsigned long address);
void realmode_lidt(struct kvm_vcpu *vcpu, u16 size, unsigned long address);
void realmode_lmsw(struct kvm_vcpu *vcpu, unsigned long msw,
unsigned long *rflags);
unsigned long realmode_get_cr(struct kvm_vcpu *vcpu, int cr);
void realmode_set_cr(struct kvm_vcpu *vcpu, int cr, unsigned long value,
unsigned long *rflags);
struct x86_emulate_ctxt;
int emulate_invlpg(struct kvm_vcpu *vcpu, gva_t address);
int emulate_clts(struct kvm_vcpu *vcpu);
int emulator_get_dr(struct x86_emulate_ctxt* ctxt, int dr,
unsigned long *dest);
int emulator_set_dr(struct x86_emulate_ctxt *ctxt, int dr,
unsigned long value);
void set_cr0(struct kvm_vcpu *vcpu, unsigned long cr0);
void set_cr3(struct kvm_vcpu *vcpu, unsigned long cr0);
void set_cr4(struct kvm_vcpu *vcpu, unsigned long cr0);
void set_cr8(struct kvm_vcpu *vcpu, unsigned long cr0);
void lmsw(struct kvm_vcpu *vcpu, unsigned long msw);
int kvm_get_msr_common(struct kvm_vcpu *vcpu, u32 msr, u64 *pdata);
int kvm_set_msr_common(struct kvm_vcpu *vcpu, u32 msr, u64 data);
void fx_init(struct kvm_vcpu *vcpu);
void load_msrs(struct vmx_msr_entry *e, int n);
void save_msrs(struct vmx_msr_entry *e, int n);
void kvm_resched(struct kvm_vcpu *vcpu);
int kvm_read_guest(struct kvm_vcpu *vcpu,
gva_t addr,
unsigned long size,
void *dest);
int kvm_write_guest(struct kvm_vcpu *vcpu,
gva_t addr,
unsigned long size,
void *data);
unsigned long segment_base(u16 selector);
void kvm_mmu_pre_write(struct kvm_vcpu *vcpu, gpa_t gpa, int bytes);
void kvm_mmu_post_write(struct kvm_vcpu *vcpu, gpa_t gpa, int bytes);
int kvm_mmu_unprotect_page_virt(struct kvm_vcpu *vcpu, gva_t gva);
void kvm_mmu_free_some_pages(struct kvm_vcpu *vcpu);
int kvm_hypercall(struct kvm_vcpu *vcpu, struct kvm_run *run);
static inline int kvm_mmu_page_fault(struct kvm_vcpu *vcpu, gva_t gva,
u32 error_code)
{
if (unlikely(vcpu->kvm->n_free_mmu_pages < KVM_MIN_FREE_MMU_PAGES))
kvm_mmu_free_some_pages(vcpu);
return vcpu->mmu.page_fault(vcpu, gva, error_code);
}
static inline struct page *_gfn_to_page(struct kvm *kvm, gfn_t gfn)
{
struct kvm_memory_slot *slot = gfn_to_memslot(kvm, gfn);
return (slot) ? slot->phys_mem[gfn - slot->base_gfn] : NULL;
}
static inline int is_long_mode(struct kvm_vcpu *vcpu)
{
#ifdef CONFIG_X86_64
return vcpu->shadow_efer & EFER_LME;
#else
return 0;
#endif
}
static inline int is_pae(struct kvm_vcpu *vcpu)
{
return vcpu->cr4 & CR4_PAE_MASK;
}
static inline int is_pse(struct kvm_vcpu *vcpu)
{
return vcpu->cr4 & CR4_PSE_MASK;
}
static inline int is_paging(struct kvm_vcpu *vcpu)
{
return vcpu->cr0 & CR0_PG_MASK;
}
static inline int memslot_id(struct kvm *kvm, struct kvm_memory_slot *slot)
{
return slot - kvm->memslots;
}
static inline struct kvm_mmu_page *page_header(hpa_t shadow_page)
{
struct page *page = pfn_to_page(shadow_page >> PAGE_SHIFT);
return (struct kvm_mmu_page *)page_private(page);
}
static inline u16 read_fs(void)
{
u16 seg;
asm ("mov %%fs, %0" : "=g"(seg));
return seg;
}
static inline u16 read_gs(void)
{
u16 seg;
asm ("mov %%gs, %0" : "=g"(seg));
return seg;
}
static inline u16 read_ldt(void)
{
u16 ldt;
asm ("sldt %0" : "=g"(ldt));
return ldt;
}
static inline void load_fs(u16 sel)
{
asm ("mov %0, %%fs" : : "rm"(sel));
}
static inline void load_gs(u16 sel)
{
asm ("mov %0, %%gs" : : "rm"(sel));
}
#ifndef load_ldt
static inline void load_ldt(u16 sel)
{
asm ("lldt %0" : : "rm"(sel));
}
#endif
static inline void get_idt(struct descriptor_table *table)
{
asm ("sidt %0" : "=m"(*table));
}
static inline void get_gdt(struct descriptor_table *table)
{
asm ("sgdt %0" : "=m"(*table));
}
static inline unsigned long read_tr_base(void)
{
u16 tr;
asm ("str %0" : "=g"(tr));
return segment_base(tr);
}
#ifdef CONFIG_X86_64
static inline unsigned long read_msr(unsigned long msr)
{
u64 value;
rdmsrl(msr, value);
return value;
}
#endif
static inline void fx_save(void *image)
{
asm ("fxsave (%0)":: "r" (image));
}
static inline void fx_restore(void *image)
{
asm ("fxrstor (%0)":: "r" (image));
}
static inline void fpu_init(void)
{
asm ("finit");
}
static inline u32 get_rdx_init_val(void)
{
return 0x600; /* P6 family */
}
#define ASM_VMX_VMCLEAR_RAX ".byte 0x66, 0x0f, 0xc7, 0x30"
#define ASM_VMX_VMLAUNCH ".byte 0x0f, 0x01, 0xc2"
#define ASM_VMX_VMRESUME ".byte 0x0f, 0x01, 0xc3"
#define ASM_VMX_VMPTRLD_RAX ".byte 0x0f, 0xc7, 0x30"
#define ASM_VMX_VMREAD_RDX_RAX ".byte 0x0f, 0x78, 0xd0"
#define ASM_VMX_VMWRITE_RAX_RDX ".byte 0x0f, 0x79, 0xd0"
#define ASM_VMX_VMWRITE_RSP_RDX ".byte 0x0f, 0x79, 0xd4"
#define ASM_VMX_VMXOFF ".byte 0x0f, 0x01, 0xc4"
#define ASM_VMX_VMXON_RAX ".byte 0xf3, 0x0f, 0xc7, 0x30"
#define MSR_IA32_TIME_STAMP_COUNTER 0x010
#define TSS_IOPB_BASE_OFFSET 0x66
#define TSS_BASE_SIZE 0x68
#define TSS_IOPB_SIZE (65536 / 8)
#define TSS_REDIRECTION_SIZE (256 / 8)
#define RMODE_TSS_SIZE (TSS_BASE_SIZE + TSS_REDIRECTION_SIZE + TSS_IOPB_SIZE + 1)
#endif

2568
drivers/kvm/kvm_main.c Normal file

File diff suppressed because it is too large Load Diff

45
drivers/kvm/kvm_svm.h Normal file
View File

@@ -0,0 +1,45 @@
#ifndef __KVM_SVM_H
#define __KVM_SVM_H
#include <linux/kernel.h>
#include <linux/types.h>
#include <linux/list.h>
#include <asm/msr.h>
#include "svm.h"
#include "kvm.h"
static const u32 host_save_msrs[] = {
#ifdef CONFIG_X86_64
MSR_STAR, MSR_LSTAR, MSR_CSTAR, MSR_SYSCALL_MASK, MSR_KERNEL_GS_BASE,
MSR_FS_BASE, MSR_GS_BASE,
#endif
MSR_IA32_SYSENTER_CS, MSR_IA32_SYSENTER_ESP, MSR_IA32_SYSENTER_EIP,
MSR_IA32_DEBUGCTLMSR, /*MSR_IA32_LASTBRANCHFROMIP,
MSR_IA32_LASTBRANCHTOIP, MSR_IA32_LASTINTFROMIP,MSR_IA32_LASTINTTOIP,*/
};
#define NR_HOST_SAVE_MSRS ARRAY_SIZE(host_save_msrs)
#define NUM_DB_REGS 4
struct vcpu_svm {
struct vmcb *vmcb;
unsigned long vmcb_pa;
struct svm_cpu_data *svm_data;
uint64_t asid_generation;
unsigned long cr0;
unsigned long cr4;
unsigned long db_regs[NUM_DB_REGS];
u64 next_rip;
u64 host_msrs[NR_HOST_SAVE_MSRS];
unsigned long host_cr2;
unsigned long host_db_regs[NUM_DB_REGS];
unsigned long host_dr6;
unsigned long host_dr7;
};
#endif

14
drivers/kvm/kvm_vmx.h Normal file
View File

@@ -0,0 +1,14 @@
#ifndef __KVM_VMX_H
#define __KVM_VMX_H
#ifdef CONFIG_X86_64
/*
* avoid save/load MSR_SYSCALL_MASK and MSR_LSTAR by std vt
* mechanism (cpu bug AA24)
*/
#define NR_BAD_MSRS 2
#else
#define NR_BAD_MSRS 0
#endif
#endif

1477
drivers/kvm/mmu.c Normal file

File diff suppressed because it is too large Load Diff

480
drivers/kvm/paging_tmpl.h Normal file
View File

@@ -0,0 +1,480 @@
/*
* Kernel-based Virtual Machine driver for Linux
*
* This module enables machines with Intel VT-x extensions to run virtual
* machines without emulation or binary translation.
*
* MMU support
*
* Copyright (C) 2006 Qumranet, Inc.
*
* Authors:
* Yaniv Kamay <yaniv@qumranet.com>
* Avi Kivity <avi@qumranet.com>
*
* This work is licensed under the terms of the GNU GPL, version 2. See
* the COPYING file in the top-level directory.
*
*/
/*
* We need the mmu code to access both 32-bit and 64-bit guest ptes,
* so the code in this file is compiled twice, once per pte size.
*/
#if PTTYPE == 64
#define pt_element_t u64
#define guest_walker guest_walker64
#define FNAME(name) paging##64_##name
#define PT_BASE_ADDR_MASK PT64_BASE_ADDR_MASK
#define PT_DIR_BASE_ADDR_MASK PT64_DIR_BASE_ADDR_MASK
#define PT_INDEX(addr, level) PT64_INDEX(addr, level)
#define SHADOW_PT_INDEX(addr, level) PT64_INDEX(addr, level)
#define PT_LEVEL_MASK(level) PT64_LEVEL_MASK(level)
#define PT_PTE_COPY_MASK PT64_PTE_COPY_MASK
#ifdef CONFIG_X86_64
#define PT_MAX_FULL_LEVELS 4
#else
#define PT_MAX_FULL_LEVELS 2
#endif
#elif PTTYPE == 32
#define pt_element_t u32
#define guest_walker guest_walker32
#define FNAME(name) paging##32_##name
#define PT_BASE_ADDR_MASK PT32_BASE_ADDR_MASK
#define PT_DIR_BASE_ADDR_MASK PT32_DIR_BASE_ADDR_MASK
#define PT_INDEX(addr, level) PT32_INDEX(addr, level)
#define SHADOW_PT_INDEX(addr, level) PT64_INDEX(addr, level)
#define PT_LEVEL_MASK(level) PT32_LEVEL_MASK(level)
#define PT_PTE_COPY_MASK PT32_PTE_COPY_MASK
#define PT_MAX_FULL_LEVELS 2
#else
#error Invalid PTTYPE value
#endif
/*
* The guest_walker structure emulates the behavior of the hardware page
* table walker.
*/
struct guest_walker {
int level;
gfn_t table_gfn[PT_MAX_FULL_LEVELS];
pt_element_t *table;
pt_element_t *ptep;
pt_element_t inherited_ar;
gfn_t gfn;
u32 error_code;
};
/*
* Fetch a guest pte for a guest virtual address
*/
static int FNAME(walk_addr)(struct guest_walker *walker,
struct kvm_vcpu *vcpu, gva_t addr,
int write_fault, int user_fault, int fetch_fault)
{
hpa_t hpa;
struct kvm_memory_slot *slot;
pt_element_t *ptep;
pt_element_t root;
gfn_t table_gfn;
pgprintk("%s: addr %lx\n", __FUNCTION__, addr);
walker->level = vcpu->mmu.root_level;
walker->table = NULL;
root = vcpu->cr3;
#if PTTYPE == 64
if (!is_long_mode(vcpu)) {
walker->ptep = &vcpu->pdptrs[(addr >> 30) & 3];
root = *walker->ptep;
if (!(root & PT_PRESENT_MASK))
goto not_present;
--walker->level;
}
#endif
table_gfn = (root & PT64_BASE_ADDR_MASK) >> PAGE_SHIFT;
walker->table_gfn[walker->level - 1] = table_gfn;
pgprintk("%s: table_gfn[%d] %lx\n", __FUNCTION__,
walker->level - 1, table_gfn);
slot = gfn_to_memslot(vcpu->kvm, table_gfn);
hpa = safe_gpa_to_hpa(vcpu, root & PT64_BASE_ADDR_MASK);
walker->table = kmap_atomic(pfn_to_page(hpa >> PAGE_SHIFT), KM_USER0);
ASSERT((!is_long_mode(vcpu) && is_pae(vcpu)) ||
(vcpu->cr3 & ~(PAGE_MASK | CR3_FLAGS_MASK)) == 0);
walker->inherited_ar = PT_USER_MASK | PT_WRITABLE_MASK;
for (;;) {
int index = PT_INDEX(addr, walker->level);
hpa_t paddr;
ptep = &walker->table[index];
ASSERT(((unsigned long)walker->table & PAGE_MASK) ==
((unsigned long)ptep & PAGE_MASK));
if (!is_present_pte(*ptep))
goto not_present;
if (write_fault && !is_writeble_pte(*ptep))
if (user_fault || is_write_protection(vcpu))
goto access_error;
if (user_fault && !(*ptep & PT_USER_MASK))
goto access_error;
#if PTTYPE == 64
if (fetch_fault && is_nx(vcpu) && (*ptep & PT64_NX_MASK))
goto access_error;
#endif
if (!(*ptep & PT_ACCESSED_MASK)) {
mark_page_dirty(vcpu->kvm, table_gfn);
*ptep |= PT_ACCESSED_MASK;
}
if (walker->level == PT_PAGE_TABLE_LEVEL) {
walker->gfn = (*ptep & PT_BASE_ADDR_MASK)
>> PAGE_SHIFT;
break;
}
if (walker->level == PT_DIRECTORY_LEVEL
&& (*ptep & PT_PAGE_SIZE_MASK)
&& (PTTYPE == 64 || is_pse(vcpu))) {
walker->gfn = (*ptep & PT_DIR_BASE_ADDR_MASK)
>> PAGE_SHIFT;
walker->gfn += PT_INDEX(addr, PT_PAGE_TABLE_LEVEL);
break;
}
if (walker->level != 3 || is_long_mode(vcpu))
walker->inherited_ar &= walker->table[index];
table_gfn = (*ptep & PT_BASE_ADDR_MASK) >> PAGE_SHIFT;
paddr = safe_gpa_to_hpa(vcpu, *ptep & PT_BASE_ADDR_MASK);
kunmap_atomic(walker->table, KM_USER0);
walker->table = kmap_atomic(pfn_to_page(paddr >> PAGE_SHIFT),
KM_USER0);
--walker->level;
walker->table_gfn[walker->level - 1 ] = table_gfn;
pgprintk("%s: table_gfn[%d] %lx\n", __FUNCTION__,
walker->level - 1, table_gfn);
}
walker->ptep = ptep;
pgprintk("%s: pte %llx\n", __FUNCTION__, (u64)*ptep);
return 1;
not_present:
walker->error_code = 0;
goto err;
access_error:
walker->error_code = PFERR_PRESENT_MASK;
err:
if (write_fault)
walker->error_code |= PFERR_WRITE_MASK;
if (user_fault)
walker->error_code |= PFERR_USER_MASK;
if (fetch_fault)
walker->error_code |= PFERR_FETCH_MASK;
return 0;
}
static void FNAME(release_walker)(struct guest_walker *walker)
{
if (walker->table)
kunmap_atomic(walker->table, KM_USER0);
}
static void FNAME(mark_pagetable_dirty)(struct kvm *kvm,
struct guest_walker *walker)
{
mark_page_dirty(kvm, walker->table_gfn[walker->level - 1]);
}
static void FNAME(set_pte)(struct kvm_vcpu *vcpu, u64 guest_pte,
u64 *shadow_pte, u64 access_bits, gfn_t gfn)
{
ASSERT(*shadow_pte == 0);
access_bits &= guest_pte;
*shadow_pte = (guest_pte & PT_PTE_COPY_MASK);
set_pte_common(vcpu, shadow_pte, guest_pte & PT_BASE_ADDR_MASK,
guest_pte & PT_DIRTY_MASK, access_bits, gfn);
}
static void FNAME(set_pde)(struct kvm_vcpu *vcpu, u64 guest_pde,
u64 *shadow_pte, u64 access_bits, gfn_t gfn)
{
gpa_t gaddr;
ASSERT(*shadow_pte == 0);
access_bits &= guest_pde;
gaddr = (gpa_t)gfn << PAGE_SHIFT;
if (PTTYPE == 32 && is_cpuid_PSE36())
gaddr |= (guest_pde & PT32_DIR_PSE36_MASK) <<
(32 - PT32_DIR_PSE36_SHIFT);
*shadow_pte = guest_pde & PT_PTE_COPY_MASK;
set_pte_common(vcpu, shadow_pte, gaddr,
guest_pde & PT_DIRTY_MASK, access_bits, gfn);
}
/*
* Fetch a shadow pte for a specific level in the paging hierarchy.
*/
static u64 *FNAME(fetch)(struct kvm_vcpu *vcpu, gva_t addr,
struct guest_walker *walker)
{
hpa_t shadow_addr;
int level;
u64 *prev_shadow_ent = NULL;
pt_element_t *guest_ent = walker->ptep;
if (!is_present_pte(*guest_ent))
return NULL;
shadow_addr = vcpu->mmu.root_hpa;
level = vcpu->mmu.shadow_root_level;
if (level == PT32E_ROOT_LEVEL) {
shadow_addr = vcpu->mmu.pae_root[(addr >> 30) & 3];
shadow_addr &= PT64_BASE_ADDR_MASK;
--level;
}
for (; ; level--) {
u32 index = SHADOW_PT_INDEX(addr, level);
u64 *shadow_ent = ((u64 *)__va(shadow_addr)) + index;
struct kvm_mmu_page *shadow_page;
u64 shadow_pte;
int metaphysical;
gfn_t table_gfn;
if (is_present_pte(*shadow_ent) || is_io_pte(*shadow_ent)) {
if (level == PT_PAGE_TABLE_LEVEL)
return shadow_ent;
shadow_addr = *shadow_ent & PT64_BASE_ADDR_MASK;
prev_shadow_ent = shadow_ent;
continue;
}
if (level == PT_PAGE_TABLE_LEVEL) {
if (walker->level == PT_DIRECTORY_LEVEL) {
if (prev_shadow_ent)
*prev_shadow_ent |= PT_SHADOW_PS_MARK;
FNAME(set_pde)(vcpu, *guest_ent, shadow_ent,
walker->inherited_ar,
walker->gfn);
} else {
ASSERT(walker->level == PT_PAGE_TABLE_LEVEL);
FNAME(set_pte)(vcpu, *guest_ent, shadow_ent,
walker->inherited_ar,
walker->gfn);
}
return shadow_ent;
}
if (level - 1 == PT_PAGE_TABLE_LEVEL
&& walker->level == PT_DIRECTORY_LEVEL) {
metaphysical = 1;
table_gfn = (*guest_ent & PT_BASE_ADDR_MASK)
>> PAGE_SHIFT;
} else {
metaphysical = 0;
table_gfn = walker->table_gfn[level - 2];
}
shadow_page = kvm_mmu_get_page(vcpu, table_gfn, addr, level-1,
metaphysical, shadow_ent);
shadow_addr = shadow_page->page_hpa;
shadow_pte = shadow_addr | PT_PRESENT_MASK | PT_ACCESSED_MASK
| PT_WRITABLE_MASK | PT_USER_MASK;
*shadow_ent = shadow_pte;
prev_shadow_ent = shadow_ent;
}
}
/*
* The guest faulted for write. We need to
*
* - check write permissions
* - update the guest pte dirty bit
* - update our own dirty page tracking structures
*/
static int FNAME(fix_write_pf)(struct kvm_vcpu *vcpu,
u64 *shadow_ent,
struct guest_walker *walker,
gva_t addr,
int user,
int *write_pt)
{
pt_element_t *guest_ent;
int writable_shadow;
gfn_t gfn;
struct kvm_mmu_page *page;
if (is_writeble_pte(*shadow_ent))
return !user || (*shadow_ent & PT_USER_MASK);
writable_shadow = *shadow_ent & PT_SHADOW_WRITABLE_MASK;
if (user) {
/*
* User mode access. Fail if it's a kernel page or a read-only
* page.
*/
if (!(*shadow_ent & PT_SHADOW_USER_MASK) || !writable_shadow)
return 0;
ASSERT(*shadow_ent & PT_USER_MASK);
} else
/*
* Kernel mode access. Fail if it's a read-only page and
* supervisor write protection is enabled.
*/
if (!writable_shadow) {
if (is_write_protection(vcpu))
return 0;
*shadow_ent &= ~PT_USER_MASK;
}
guest_ent = walker->ptep;
if (!is_present_pte(*guest_ent)) {
*shadow_ent = 0;
return 0;
}
gfn = walker->gfn;
if (user) {
/*
* Usermode page faults won't be for page table updates.
*/
while ((page = kvm_mmu_lookup_page(vcpu, gfn)) != NULL) {
pgprintk("%s: zap %lx %x\n",
__FUNCTION__, gfn, page->role.word);
kvm_mmu_zap_page(vcpu, page);
}
} else if (kvm_mmu_lookup_page(vcpu, gfn)) {
pgprintk("%s: found shadow page for %lx, marking ro\n",
__FUNCTION__, gfn);
mark_page_dirty(vcpu->kvm, gfn);
FNAME(mark_pagetable_dirty)(vcpu->kvm, walker);
*guest_ent |= PT_DIRTY_MASK;
*write_pt = 1;
return 0;
}
mark_page_dirty(vcpu->kvm, gfn);
*shadow_ent |= PT_WRITABLE_MASK;
FNAME(mark_pagetable_dirty)(vcpu->kvm, walker);
*guest_ent |= PT_DIRTY_MASK;
rmap_add(vcpu, shadow_ent);
return 1;
}
/*
* Page fault handler. There are several causes for a page fault:
* - there is no shadow pte for the guest pte
* - write access through a shadow pte marked read only so that we can set
* the dirty bit
* - write access to a shadow pte marked read only so we can update the page
* dirty bitmap, when userspace requests it
* - mmio access; in this case we will never install a present shadow pte
* - normal guest page fault due to the guest pte marked not present, not
* writable, or not executable
*
* Returns: 1 if we need to emulate the instruction, 0 otherwise, or
* a negative value on error.
*/
static int FNAME(page_fault)(struct kvm_vcpu *vcpu, gva_t addr,
u32 error_code)
{
int write_fault = error_code & PFERR_WRITE_MASK;
int user_fault = error_code & PFERR_USER_MASK;
int fetch_fault = error_code & PFERR_FETCH_MASK;
struct guest_walker walker;
u64 *shadow_pte;
int fixed;
int write_pt = 0;
int r;
pgprintk("%s: addr %lx err %x\n", __FUNCTION__, addr, error_code);
kvm_mmu_audit(vcpu, "pre page fault");
r = mmu_topup_memory_caches(vcpu);
if (r)
return r;
/*
* Look up the shadow pte for the faulting address.
*/
r = FNAME(walk_addr)(&walker, vcpu, addr, write_fault, user_fault,
fetch_fault);
/*
* The page is not mapped by the guest. Let the guest handle it.
*/
if (!r) {
pgprintk("%s: guest page fault\n", __FUNCTION__);
inject_page_fault(vcpu, addr, walker.error_code);
FNAME(release_walker)(&walker);
return 0;
}
shadow_pte = FNAME(fetch)(vcpu, addr, &walker);
pgprintk("%s: shadow pte %p %llx\n", __FUNCTION__,
shadow_pte, *shadow_pte);
/*
* Update the shadow pte.
*/
if (write_fault)
fixed = FNAME(fix_write_pf)(vcpu, shadow_pte, &walker, addr,
user_fault, &write_pt);
else
fixed = fix_read_pf(shadow_pte);
pgprintk("%s: updated shadow pte %p %llx\n", __FUNCTION__,
shadow_pte, *shadow_pte);
FNAME(release_walker)(&walker);
/*
* mmio: emulate if accessible, otherwise its a guest fault.
*/
if (is_io_pte(*shadow_pte))
return 1;
++kvm_stat.pf_fixed;
kvm_mmu_audit(vcpu, "post page fault (fixed)");
return write_pt;
}
static gpa_t FNAME(gva_to_gpa)(struct kvm_vcpu *vcpu, gva_t vaddr)
{
struct guest_walker walker;
gpa_t gpa = UNMAPPED_GVA;
int r;
r = FNAME(walk_addr)(&walker, vcpu, vaddr, 0, 0, 0);
if (r) {
gpa = (gpa_t)walker.gfn << PAGE_SHIFT;
gpa |= vaddr & ~PAGE_MASK;
}
FNAME(release_walker)(&walker);
return gpa;
}
#undef pt_element_t
#undef guest_walker
#undef FNAME
#undef PT_BASE_ADDR_MASK
#undef PT_INDEX
#undef SHADOW_PT_INDEX
#undef PT_LEVEL_MASK
#undef PT_PTE_COPY_MASK
#undef PT_NON_PTE_COPY_MASK
#undef PT_DIR_BASE_ADDR_MASK
#undef PT_MAX_FULL_LEVELS

View File

@@ -0,0 +1,17 @@
struct segment_descriptor {
u16 limit_low;
u16 base_low;
u8 base_mid;
u8 type : 4;
u8 system : 1;
u8 dpl : 2;
u8 present : 1;
u8 limit_high : 4;
u8 avl : 1;
u8 long_mode : 1;
u8 default_op : 1;
u8 granularity : 1;
u8 base_high;
} __attribute__((packed));

1752
drivers/kvm/svm.c Normal file

File diff suppressed because it is too large Load Diff

315
drivers/kvm/svm.h Normal file
View File

@@ -0,0 +1,315 @@
#ifndef __SVM_H
#define __SVM_H
enum {
INTERCEPT_INTR,
INTERCEPT_NMI,
INTERCEPT_SMI,
INTERCEPT_INIT,
INTERCEPT_VINTR,
INTERCEPT_SELECTIVE_CR0,
INTERCEPT_STORE_IDTR,
INTERCEPT_STORE_GDTR,
INTERCEPT_STORE_LDTR,
INTERCEPT_STORE_TR,
INTERCEPT_LOAD_IDTR,
INTERCEPT_LOAD_GDTR,
INTERCEPT_LOAD_LDTR,
INTERCEPT_LOAD_TR,
INTERCEPT_RDTSC,
INTERCEPT_RDPMC,
INTERCEPT_PUSHF,
INTERCEPT_POPF,
INTERCEPT_CPUID,
INTERCEPT_RSM,
INTERCEPT_IRET,
INTERCEPT_INTn,
INTERCEPT_INVD,
INTERCEPT_PAUSE,
INTERCEPT_HLT,
INTERCEPT_INVLPG,
INTERCEPT_INVLPGA,
INTERCEPT_IOIO_PROT,
INTERCEPT_MSR_PROT,
INTERCEPT_TASK_SWITCH,
INTERCEPT_FERR_FREEZE,
INTERCEPT_SHUTDOWN,
INTERCEPT_VMRUN,
INTERCEPT_VMMCALL,
INTERCEPT_VMLOAD,
INTERCEPT_VMSAVE,
INTERCEPT_STGI,
INTERCEPT_CLGI,
INTERCEPT_SKINIT,
INTERCEPT_RDTSCP,
INTERCEPT_ICEBP,
INTERCEPT_WBINVD,
};
struct __attribute__ ((__packed__)) vmcb_control_area {
u16 intercept_cr_read;
u16 intercept_cr_write;
u16 intercept_dr_read;
u16 intercept_dr_write;
u32 intercept_exceptions;
u64 intercept;
u8 reserved_1[44];
u64 iopm_base_pa;
u64 msrpm_base_pa;
u64 tsc_offset;
u32 asid;
u8 tlb_ctl;
u8 reserved_2[3];
u32 int_ctl;
u32 int_vector;
u32 int_state;
u8 reserved_3[4];
u32 exit_code;
u32 exit_code_hi;
u64 exit_info_1;
u64 exit_info_2;
u32 exit_int_info;
u32 exit_int_info_err;
u64 nested_ctl;
u8 reserved_4[16];
u32 event_inj;
u32 event_inj_err;
u64 nested_cr3;
u64 lbr_ctl;
u8 reserved_5[832];
};
#define TLB_CONTROL_DO_NOTHING 0
#define TLB_CONTROL_FLUSH_ALL_ASID 1
#define V_TPR_MASK 0x0f
#define V_IRQ_SHIFT 8
#define V_IRQ_MASK (1 << V_IRQ_SHIFT)
#define V_INTR_PRIO_SHIFT 16
#define V_INTR_PRIO_MASK (0x0f << V_INTR_PRIO_SHIFT)
#define V_IGN_TPR_SHIFT 20
#define V_IGN_TPR_MASK (1 << V_IGN_TPR_SHIFT)
#define V_INTR_MASKING_SHIFT 24
#define V_INTR_MASKING_MASK (1 << V_INTR_MASKING_SHIFT)
#define SVM_INTERRUPT_SHADOW_MASK 1
#define SVM_IOIO_STR_SHIFT 2
#define SVM_IOIO_REP_SHIFT 3
#define SVM_IOIO_SIZE_SHIFT 4
#define SVM_IOIO_ASIZE_SHIFT 7
#define SVM_IOIO_TYPE_MASK 1
#define SVM_IOIO_STR_MASK (1 << SVM_IOIO_STR_SHIFT)
#define SVM_IOIO_REP_MASK (1 << SVM_IOIO_REP_SHIFT)
#define SVM_IOIO_SIZE_MASK (7 << SVM_IOIO_SIZE_SHIFT)
#define SVM_IOIO_ASIZE_MASK (7 << SVM_IOIO_ASIZE_SHIFT)
struct __attribute__ ((__packed__)) vmcb_seg {
u16 selector;
u16 attrib;
u32 limit;
u64 base;
};
struct __attribute__ ((__packed__)) vmcb_save_area {
struct vmcb_seg es;
struct vmcb_seg cs;
struct vmcb_seg ss;
struct vmcb_seg ds;
struct vmcb_seg fs;
struct vmcb_seg gs;
struct vmcb_seg gdtr;
struct vmcb_seg ldtr;
struct vmcb_seg idtr;
struct vmcb_seg tr;
u8 reserved_1[43];
u8 cpl;
u8 reserved_2[4];
u64 efer;
u8 reserved_3[112];
u64 cr4;
u64 cr3;
u64 cr0;
u64 dr7;
u64 dr6;
u64 rflags;
u64 rip;
u8 reserved_4[88];
u64 rsp;
u8 reserved_5[24];
u64 rax;
u64 star;
u64 lstar;
u64 cstar;
u64 sfmask;
u64 kernel_gs_base;
u64 sysenter_cs;
u64 sysenter_esp;
u64 sysenter_eip;
u64 cr2;
u8 reserved_6[32];
u64 g_pat;
u64 dbgctl;
u64 br_from;
u64 br_to;
u64 last_excp_from;
u64 last_excp_to;
};
struct __attribute__ ((__packed__)) vmcb {
struct vmcb_control_area control;
struct vmcb_save_area save;
};
#define SVM_CPUID_FEATURE_SHIFT 2
#define SVM_CPUID_FUNC 0x8000000a
#define MSR_EFER_SVME_MASK (1ULL << 12)
#define MSR_VM_HSAVE_PA 0xc0010117ULL
#define SVM_SELECTOR_S_SHIFT 4
#define SVM_SELECTOR_DPL_SHIFT 5
#define SVM_SELECTOR_P_SHIFT 7
#define SVM_SELECTOR_AVL_SHIFT 8
#define SVM_SELECTOR_L_SHIFT 9
#define SVM_SELECTOR_DB_SHIFT 10
#define SVM_SELECTOR_G_SHIFT 11
#define SVM_SELECTOR_TYPE_MASK (0xf)
#define SVM_SELECTOR_S_MASK (1 << SVM_SELECTOR_S_SHIFT)
#define SVM_SELECTOR_DPL_MASK (3 << SVM_SELECTOR_DPL_SHIFT)
#define SVM_SELECTOR_P_MASK (1 << SVM_SELECTOR_P_SHIFT)
#define SVM_SELECTOR_AVL_MASK (1 << SVM_SELECTOR_AVL_SHIFT)
#define SVM_SELECTOR_L_MASK (1 << SVM_SELECTOR_L_SHIFT)
#define SVM_SELECTOR_DB_MASK (1 << SVM_SELECTOR_DB_SHIFT)
#define SVM_SELECTOR_G_MASK (1 << SVM_SELECTOR_G_SHIFT)
#define SVM_SELECTOR_WRITE_MASK (1 << 1)
#define SVM_SELECTOR_READ_MASK SVM_SELECTOR_WRITE_MASK
#define SVM_SELECTOR_CODE_MASK (1 << 3)
#define INTERCEPT_CR0_MASK 1
#define INTERCEPT_CR3_MASK (1 << 3)
#define INTERCEPT_CR4_MASK (1 << 4)
#define INTERCEPT_DR0_MASK 1
#define INTERCEPT_DR1_MASK (1 << 1)
#define INTERCEPT_DR2_MASK (1 << 2)
#define INTERCEPT_DR3_MASK (1 << 3)
#define INTERCEPT_DR4_MASK (1 << 4)
#define INTERCEPT_DR5_MASK (1 << 5)
#define INTERCEPT_DR6_MASK (1 << 6)
#define INTERCEPT_DR7_MASK (1 << 7)
#define SVM_EVTINJ_VEC_MASK 0xff
#define SVM_EVTINJ_TYPE_SHIFT 8
#define SVM_EVTINJ_TYPE_MASK (7 << SVM_EVTINJ_TYPE_SHIFT)
#define SVM_EVTINJ_TYPE_INTR (0 << SVM_EVTINJ_TYPE_SHIFT)
#define SVM_EVTINJ_TYPE_NMI (2 << SVM_EVTINJ_TYPE_SHIFT)
#define SVM_EVTINJ_TYPE_EXEPT (3 << SVM_EVTINJ_TYPE_SHIFT)
#define SVM_EVTINJ_TYPE_SOFT (4 << SVM_EVTINJ_TYPE_SHIFT)
#define SVM_EVTINJ_VALID (1 << 31)
#define SVM_EVTINJ_VALID_ERR (1 << 11)
#define SVM_EXITINTINFO_VEC_MASK SVM_EVTINJ_VEC_MASK
#define SVM_EXITINTINFO_TYPE_INTR SVM_EVTINJ_TYPE_INTR
#define SVM_EXITINTINFO_TYPE_NMI SVM_EVTINJ_TYPE_NMI
#define SVM_EXITINTINFO_TYPE_EXEPT SVM_EVTINJ_TYPE_EXEPT
#define SVM_EXITINTINFO_TYPE_SOFT SVM_EVTINJ_TYPE_SOFT
#define SVM_EXITINTINFO_VALID SVM_EVTINJ_VALID
#define SVM_EXITINTINFO_VALID_ERR SVM_EVTINJ_VALID_ERR
#define SVM_EXIT_READ_CR0 0x000
#define SVM_EXIT_READ_CR3 0x003
#define SVM_EXIT_READ_CR4 0x004
#define SVM_EXIT_READ_CR8 0x008
#define SVM_EXIT_WRITE_CR0 0x010
#define SVM_EXIT_WRITE_CR3 0x013
#define SVM_EXIT_WRITE_CR4 0x014
#define SVM_EXIT_WRITE_CR8 0x018
#define SVM_EXIT_READ_DR0 0x020
#define SVM_EXIT_READ_DR1 0x021
#define SVM_EXIT_READ_DR2 0x022
#define SVM_EXIT_READ_DR3 0x023
#define SVM_EXIT_READ_DR4 0x024
#define SVM_EXIT_READ_DR5 0x025
#define SVM_EXIT_READ_DR6 0x026
#define SVM_EXIT_READ_DR7 0x027
#define SVM_EXIT_WRITE_DR0 0x030
#define SVM_EXIT_WRITE_DR1 0x031
#define SVM_EXIT_WRITE_DR2 0x032
#define SVM_EXIT_WRITE_DR3 0x033
#define SVM_EXIT_WRITE_DR4 0x034
#define SVM_EXIT_WRITE_DR5 0x035
#define SVM_EXIT_WRITE_DR6 0x036
#define SVM_EXIT_WRITE_DR7 0x037
#define SVM_EXIT_EXCP_BASE 0x040
#define SVM_EXIT_INTR 0x060
#define SVM_EXIT_NMI 0x061
#define SVM_EXIT_SMI 0x062
#define SVM_EXIT_INIT 0x063
#define SVM_EXIT_VINTR 0x064
#define SVM_EXIT_CR0_SEL_WRITE 0x065
#define SVM_EXIT_IDTR_READ 0x066
#define SVM_EXIT_GDTR_READ 0x067
#define SVM_EXIT_LDTR_READ 0x068
#define SVM_EXIT_TR_READ 0x069
#define SVM_EXIT_IDTR_WRITE 0x06a
#define SVM_EXIT_GDTR_WRITE 0x06b
#define SVM_EXIT_LDTR_WRITE 0x06c
#define SVM_EXIT_TR_WRITE 0x06d
#define SVM_EXIT_RDTSC 0x06e
#define SVM_EXIT_RDPMC 0x06f
#define SVM_EXIT_PUSHF 0x070
#define SVM_EXIT_POPF 0x071
#define SVM_EXIT_CPUID 0x072
#define SVM_EXIT_RSM 0x073
#define SVM_EXIT_IRET 0x074
#define SVM_EXIT_SWINT 0x075
#define SVM_EXIT_INVD 0x076
#define SVM_EXIT_PAUSE 0x077
#define SVM_EXIT_HLT 0x078
#define SVM_EXIT_INVLPG 0x079
#define SVM_EXIT_INVLPGA 0x07a
#define SVM_EXIT_IOIO 0x07b
#define SVM_EXIT_MSR 0x07c
#define SVM_EXIT_TASK_SWITCH 0x07d
#define SVM_EXIT_FERR_FREEZE 0x07e
#define SVM_EXIT_SHUTDOWN 0x07f
#define SVM_EXIT_VMRUN 0x080
#define SVM_EXIT_VMMCALL 0x081
#define SVM_EXIT_VMLOAD 0x082
#define SVM_EXIT_VMSAVE 0x083
#define SVM_EXIT_STGI 0x084
#define SVM_EXIT_CLGI 0x085
#define SVM_EXIT_SKINIT 0x086
#define SVM_EXIT_RDTSCP 0x087
#define SVM_EXIT_ICEBP 0x088
#define SVM_EXIT_WBINVD 0x089
#define SVM_EXIT_NPF 0x400
#define SVM_EXIT_ERR -1
#define SVM_CR0_SELECTIVE_MASK (1 << 3 | 1) // TS and MP
#define SVM_VMLOAD ".byte 0x0f, 0x01, 0xda"
#define SVM_VMRUN ".byte 0x0f, 0x01, 0xd8"
#define SVM_VMSAVE ".byte 0x0f, 0x01, 0xdb"
#define SVM_CLGI ".byte 0x0f, 0x01, 0xdd"
#define SVM_STGI ".byte 0x0f, 0x01, 0xdc"
#define SVM_INVLPGA ".byte 0x0f, 0x01, 0xdf"
#endif

2104
drivers/kvm/vmx.c Normal file

File diff suppressed because it is too large Load Diff

297
drivers/kvm/vmx.h Normal file
View File

@@ -0,0 +1,297 @@
#ifndef VMX_H
#define VMX_H
/*
* vmx.h: VMX Architecture related definitions
* Copyright (c) 2004, Intel Corporation.
*
* This program is free software; you can redistribute it and/or modify it
* under the terms and conditions of the GNU General Public License,
* version 2, as published by the Free Software Foundation.
*
* This program is distributed in the hope it will be useful, but WITHOUT
* ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or
* FITNESS FOR A PARTICULAR PURPOSE. See the GNU General Public License for
* more details.
*
* You should have received a copy of the GNU General Public License along with
* this program; if not, write to the Free Software Foundation, Inc., 59 Temple
* Place - Suite 330, Boston, MA 02111-1307 USA.
*
* A few random additions are:
* Copyright (C) 2006 Qumranet
* Avi Kivity <avi@qumranet.com>
* Yaniv Kamay <yaniv@qumranet.com>
*
*/
#define CPU_BASED_VIRTUAL_INTR_PENDING 0x00000004
#define CPU_BASED_USE_TSC_OFFSETING 0x00000008
#define CPU_BASED_HLT_EXITING 0x00000080
#define CPU_BASED_INVDPG_EXITING 0x00000200
#define CPU_BASED_MWAIT_EXITING 0x00000400
#define CPU_BASED_RDPMC_EXITING 0x00000800
#define CPU_BASED_RDTSC_EXITING 0x00001000
#define CPU_BASED_CR8_LOAD_EXITING 0x00080000
#define CPU_BASED_CR8_STORE_EXITING 0x00100000
#define CPU_BASED_TPR_SHADOW 0x00200000
#define CPU_BASED_MOV_DR_EXITING 0x00800000
#define CPU_BASED_UNCOND_IO_EXITING 0x01000000
#define CPU_BASED_ACTIVATE_IO_BITMAP 0x02000000
#define CPU_BASED_MSR_BITMAPS 0x10000000
#define CPU_BASED_MONITOR_EXITING 0x20000000
#define CPU_BASED_PAUSE_EXITING 0x40000000
#define PIN_BASED_EXT_INTR_MASK 0x1
#define PIN_BASED_NMI_EXITING 0x8
#define VM_EXIT_ACK_INTR_ON_EXIT 0x00008000
#define VM_EXIT_HOST_ADD_SPACE_SIZE 0x00000200
/* VMCS Encodings */
enum vmcs_field {
GUEST_ES_SELECTOR = 0x00000800,
GUEST_CS_SELECTOR = 0x00000802,
GUEST_SS_SELECTOR = 0x00000804,
GUEST_DS_SELECTOR = 0x00000806,
GUEST_FS_SELECTOR = 0x00000808,
GUEST_GS_SELECTOR = 0x0000080a,
GUEST_LDTR_SELECTOR = 0x0000080c,
GUEST_TR_SELECTOR = 0x0000080e,
HOST_ES_SELECTOR = 0x00000c00,
HOST_CS_SELECTOR = 0x00000c02,
HOST_SS_SELECTOR = 0x00000c04,
HOST_DS_SELECTOR = 0x00000c06,
HOST_FS_SELECTOR = 0x00000c08,
HOST_GS_SELECTOR = 0x00000c0a,
HOST_TR_SELECTOR = 0x00000c0c,
IO_BITMAP_A = 0x00002000,
IO_BITMAP_A_HIGH = 0x00002001,
IO_BITMAP_B = 0x00002002,
IO_BITMAP_B_HIGH = 0x00002003,
MSR_BITMAP = 0x00002004,
MSR_BITMAP_HIGH = 0x00002005,
VM_EXIT_MSR_STORE_ADDR = 0x00002006,
VM_EXIT_MSR_STORE_ADDR_HIGH = 0x00002007,
VM_EXIT_MSR_LOAD_ADDR = 0x00002008,
VM_EXIT_MSR_LOAD_ADDR_HIGH = 0x00002009,
VM_ENTRY_MSR_LOAD_ADDR = 0x0000200a,
VM_ENTRY_MSR_LOAD_ADDR_HIGH = 0x0000200b,
TSC_OFFSET = 0x00002010,
TSC_OFFSET_HIGH = 0x00002011,
VIRTUAL_APIC_PAGE_ADDR = 0x00002012,
VIRTUAL_APIC_PAGE_ADDR_HIGH = 0x00002013,
VMCS_LINK_POINTER = 0x00002800,
VMCS_LINK_POINTER_HIGH = 0x00002801,
GUEST_IA32_DEBUGCTL = 0x00002802,
GUEST_IA32_DEBUGCTL_HIGH = 0x00002803,
PIN_BASED_VM_EXEC_CONTROL = 0x00004000,
CPU_BASED_VM_EXEC_CONTROL = 0x00004002,
EXCEPTION_BITMAP = 0x00004004,
PAGE_FAULT_ERROR_CODE_MASK = 0x00004006,
PAGE_FAULT_ERROR_CODE_MATCH = 0x00004008,
CR3_TARGET_COUNT = 0x0000400a,
VM_EXIT_CONTROLS = 0x0000400c,
VM_EXIT_MSR_STORE_COUNT = 0x0000400e,
VM_EXIT_MSR_LOAD_COUNT = 0x00004010,
VM_ENTRY_CONTROLS = 0x00004012,
VM_ENTRY_MSR_LOAD_COUNT = 0x00004014,
VM_ENTRY_INTR_INFO_FIELD = 0x00004016,
VM_ENTRY_EXCEPTION_ERROR_CODE = 0x00004018,
VM_ENTRY_INSTRUCTION_LEN = 0x0000401a,
TPR_THRESHOLD = 0x0000401c,
SECONDARY_VM_EXEC_CONTROL = 0x0000401e,
VM_INSTRUCTION_ERROR = 0x00004400,
VM_EXIT_REASON = 0x00004402,
VM_EXIT_INTR_INFO = 0x00004404,
VM_EXIT_INTR_ERROR_CODE = 0x00004406,
IDT_VECTORING_INFO_FIELD = 0x00004408,
IDT_VECTORING_ERROR_CODE = 0x0000440a,
VM_EXIT_INSTRUCTION_LEN = 0x0000440c,
VMX_INSTRUCTION_INFO = 0x0000440e,
GUEST_ES_LIMIT = 0x00004800,
GUEST_CS_LIMIT = 0x00004802,
GUEST_SS_LIMIT = 0x00004804,
GUEST_DS_LIMIT = 0x00004806,
GUEST_FS_LIMIT = 0x00004808,
GUEST_GS_LIMIT = 0x0000480a,
GUEST_LDTR_LIMIT = 0x0000480c,
GUEST_TR_LIMIT = 0x0000480e,
GUEST_GDTR_LIMIT = 0x00004810,
GUEST_IDTR_LIMIT = 0x00004812,
GUEST_ES_AR_BYTES = 0x00004814,
GUEST_CS_AR_BYTES = 0x00004816,
GUEST_SS_AR_BYTES = 0x00004818,
GUEST_DS_AR_BYTES = 0x0000481a,
GUEST_FS_AR_BYTES = 0x0000481c,
GUEST_GS_AR_BYTES = 0x0000481e,
GUEST_LDTR_AR_BYTES = 0x00004820,
GUEST_TR_AR_BYTES = 0x00004822,
GUEST_INTERRUPTIBILITY_INFO = 0x00004824,
GUEST_ACTIVITY_STATE = 0X00004826,
GUEST_SYSENTER_CS = 0x0000482A,
HOST_IA32_SYSENTER_CS = 0x00004c00,
CR0_GUEST_HOST_MASK = 0x00006000,
CR4_GUEST_HOST_MASK = 0x00006002,
CR0_READ_SHADOW = 0x00006004,
CR4_READ_SHADOW = 0x00006006,
CR3_TARGET_VALUE0 = 0x00006008,
CR3_TARGET_VALUE1 = 0x0000600a,
CR3_TARGET_VALUE2 = 0x0000600c,
CR3_TARGET_VALUE3 = 0x0000600e,
EXIT_QUALIFICATION = 0x00006400,
GUEST_LINEAR_ADDRESS = 0x0000640a,
GUEST_CR0 = 0x00006800,
GUEST_CR3 = 0x00006802,
GUEST_CR4 = 0x00006804,
GUEST_ES_BASE = 0x00006806,
GUEST_CS_BASE = 0x00006808,
GUEST_SS_BASE = 0x0000680a,
GUEST_DS_BASE = 0x0000680c,
GUEST_FS_BASE = 0x0000680e,
GUEST_GS_BASE = 0x00006810,
GUEST_LDTR_BASE = 0x00006812,
GUEST_TR_BASE = 0x00006814,
GUEST_GDTR_BASE = 0x00006816,
GUEST_IDTR_BASE = 0x00006818,
GUEST_DR7 = 0x0000681a,
GUEST_RSP = 0x0000681c,
GUEST_RIP = 0x0000681e,
GUEST_RFLAGS = 0x00006820,
GUEST_PENDING_DBG_EXCEPTIONS = 0x00006822,
GUEST_SYSENTER_ESP = 0x00006824,
GUEST_SYSENTER_EIP = 0x00006826,
HOST_CR0 = 0x00006c00,
HOST_CR3 = 0x00006c02,
HOST_CR4 = 0x00006c04,
HOST_FS_BASE = 0x00006c06,
HOST_GS_BASE = 0x00006c08,
HOST_TR_BASE = 0x00006c0a,
HOST_GDTR_BASE = 0x00006c0c,
HOST_IDTR_BASE = 0x00006c0e,
HOST_IA32_SYSENTER_ESP = 0x00006c10,
HOST_IA32_SYSENTER_EIP = 0x00006c12,
HOST_RSP = 0x00006c14,
HOST_RIP = 0x00006c16,
};
#define VMX_EXIT_REASONS_FAILED_VMENTRY 0x80000000
#define EXIT_REASON_EXCEPTION_NMI 0
#define EXIT_REASON_EXTERNAL_INTERRUPT 1
#define EXIT_REASON_TRIPLE_FAULT 2
#define EXIT_REASON_PENDING_INTERRUPT 7
#define EXIT_REASON_TASK_SWITCH 9
#define EXIT_REASON_CPUID 10
#define EXIT_REASON_HLT 12
#define EXIT_REASON_INVLPG 14
#define EXIT_REASON_RDPMC 15
#define EXIT_REASON_RDTSC 16
#define EXIT_REASON_VMCALL 18
#define EXIT_REASON_VMCLEAR 19
#define EXIT_REASON_VMLAUNCH 20
#define EXIT_REASON_VMPTRLD 21
#define EXIT_REASON_VMPTRST 22
#define EXIT_REASON_VMREAD 23
#define EXIT_REASON_VMRESUME 24
#define EXIT_REASON_VMWRITE 25
#define EXIT_REASON_VMOFF 26
#define EXIT_REASON_VMON 27
#define EXIT_REASON_CR_ACCESS 28
#define EXIT_REASON_DR_ACCESS 29
#define EXIT_REASON_IO_INSTRUCTION 30
#define EXIT_REASON_MSR_READ 31
#define EXIT_REASON_MSR_WRITE 32
#define EXIT_REASON_MWAIT_INSTRUCTION 36
/*
* Interruption-information format
*/
#define INTR_INFO_VECTOR_MASK 0xff /* 7:0 */
#define INTR_INFO_INTR_TYPE_MASK 0x700 /* 10:8 */
#define INTR_INFO_DELIEVER_CODE_MASK 0x800 /* 11 */
#define INTR_INFO_VALID_MASK 0x80000000 /* 31 */
#define VECTORING_INFO_VECTOR_MASK INTR_INFO_VECTOR_MASK
#define VECTORING_INFO_TYPE_MASK INTR_INFO_INTR_TYPE_MASK
#define VECTORING_INFO_DELIEVER_CODE_MASK INTR_INFO_DELIEVER_CODE_MASK
#define VECTORING_INFO_VALID_MASK INTR_INFO_VALID_MASK
#define INTR_TYPE_EXT_INTR (0 << 8) /* external interrupt */
#define INTR_TYPE_EXCEPTION (3 << 8) /* processor exception */
/*
* Exit Qualifications for MOV for Control Register Access
*/
#define CONTROL_REG_ACCESS_NUM 0x7 /* 2:0, number of control register */
#define CONTROL_REG_ACCESS_TYPE 0x30 /* 5:4, access type */
#define CONTROL_REG_ACCESS_REG 0xf00 /* 10:8, general purpose register */
#define LMSW_SOURCE_DATA_SHIFT 16
#define LMSW_SOURCE_DATA (0xFFFF << LMSW_SOURCE_DATA_SHIFT) /* 16:31 lmsw source */
#define REG_EAX (0 << 8)
#define REG_ECX (1 << 8)
#define REG_EDX (2 << 8)
#define REG_EBX (3 << 8)
#define REG_ESP (4 << 8)
#define REG_EBP (5 << 8)
#define REG_ESI (6 << 8)
#define REG_EDI (7 << 8)
#define REG_R8 (8 << 8)
#define REG_R9 (9 << 8)
#define REG_R10 (10 << 8)
#define REG_R11 (11 << 8)
#define REG_R12 (12 << 8)
#define REG_R13 (13 << 8)
#define REG_R14 (14 << 8)
#define REG_R15 (15 << 8)
/*
* Exit Qualifications for MOV for Debug Register Access
*/
#define DEBUG_REG_ACCESS_NUM 0x7 /* 2:0, number of debug register */
#define DEBUG_REG_ACCESS_TYPE 0x10 /* 4, direction of access */
#define TYPE_MOV_TO_DR (0 << 4)
#define TYPE_MOV_FROM_DR (1 << 4)
#define DEBUG_REG_ACCESS_REG 0xf00 /* 11:8, general purpose register */
/* segment AR */
#define SEGMENT_AR_L_MASK (1 << 13)
/* entry controls */
#define VM_ENTRY_CONTROLS_IA32E_MASK (1 << 9)
#define AR_TYPE_ACCESSES_MASK 1
#define AR_TYPE_READABLE_MASK (1 << 1)
#define AR_TYPE_WRITEABLE_MASK (1 << 2)
#define AR_TYPE_CODE_MASK (1 << 3)
#define AR_TYPE_MASK 0x0f
#define AR_TYPE_BUSY_64_TSS 11
#define AR_TYPE_BUSY_32_TSS 11
#define AR_TYPE_BUSY_16_TSS 3
#define AR_TYPE_LDT 2
#define AR_UNUSABLE_MASK (1 << 16)
#define AR_S_MASK (1 << 4)
#define AR_P_MASK (1 << 7)
#define AR_L_MASK (1 << 13)
#define AR_DB_MASK (1 << 14)
#define AR_G_MASK (1 << 15)
#define AR_DPL_SHIFT 5
#define AR_DPL(ar) (((ar) >> AR_DPL_SHIFT) & 3)
#define AR_RESERVD_MASK 0xfffe0f00
#define CR4_VMXE 0x2000
#define MSR_IA32_VMX_BASIC 0x480
#define MSR_IA32_FEATURE_CONTROL 0x03a
#define MSR_IA32_VMX_PINBASED_CTLS 0x481
#define MSR_IA32_VMX_PROCBASED_CTLS 0x482
#define MSR_IA32_VMX_EXIT_CTLS 0x483
#define MSR_IA32_VMX_ENTRY_CTLS 0x484
#endif

1415
drivers/kvm/x86_emulate.c Normal file

File diff suppressed because it is too large Load Diff

185
drivers/kvm/x86_emulate.h Normal file
View File

@@ -0,0 +1,185 @@
/******************************************************************************
* x86_emulate.h
*
* Generic x86 (32-bit and 64-bit) instruction decoder and emulator.
*
* Copyright (c) 2005 Keir Fraser
*
* From: xen-unstable 10676:af9809f51f81a3c43f276f00c81a52ef558afda4
*/
#ifndef __X86_EMULATE_H__
#define __X86_EMULATE_H__
struct x86_emulate_ctxt;
/*
* x86_emulate_ops:
*
* These operations represent the instruction emulator's interface to memory.
* There are two categories of operation: those that act on ordinary memory
* regions (*_std), and those that act on memory regions known to require
* special treatment or emulation (*_emulated).
*
* The emulator assumes that an instruction accesses only one 'emulated memory'
* location, that this location is the given linear faulting address (cr2), and
* that this is one of the instruction's data operands. Instruction fetches and
* stack operations are assumed never to access emulated memory. The emulator
* automatically deduces which operand of a string-move operation is accessing
* emulated memory, and assumes that the other operand accesses normal memory.
*
* NOTES:
* 1. The emulator isn't very smart about emulated vs. standard memory.
* 'Emulated memory' access addresses should be checked for sanity.
* 'Normal memory' accesses may fault, and the caller must arrange to
* detect and handle reentrancy into the emulator via recursive faults.
* Accesses may be unaligned and may cross page boundaries.
* 2. If the access fails (cannot emulate, or a standard access faults) then
* it is up to the memop to propagate the fault to the guest VM via
* some out-of-band mechanism, unknown to the emulator. The memop signals
* failure by returning X86EMUL_PROPAGATE_FAULT to the emulator, which will
* then immediately bail.
* 3. Valid access sizes are 1, 2, 4 and 8 bytes. On x86/32 systems only
* cmpxchg8b_emulated need support 8-byte accesses.
* 4. The emulator cannot handle 64-bit mode emulation on an x86/32 system.
*/
/* Access completed successfully: continue emulation as normal. */
#define X86EMUL_CONTINUE 0
/* Access is unhandleable: bail from emulation and return error to caller. */
#define X86EMUL_UNHANDLEABLE 1
/* Terminate emulation but return success to the caller. */
#define X86EMUL_PROPAGATE_FAULT 2 /* propagate a generated fault to guest */
#define X86EMUL_RETRY_INSTR 2 /* retry the instruction for some reason */
#define X86EMUL_CMPXCHG_FAILED 2 /* cmpxchg did not see expected value */
struct x86_emulate_ops {
/*
* read_std: Read bytes of standard (non-emulated/special) memory.
* Used for instruction fetch, stack operations, and others.
* @addr: [IN ] Linear address from which to read.
* @val: [OUT] Value read from memory, zero-extended to 'u_long'.
* @bytes: [IN ] Number of bytes to read from memory.
*/
int (*read_std)(unsigned long addr,
unsigned long *val,
unsigned int bytes, struct x86_emulate_ctxt * ctxt);
/*
* write_std: Write bytes of standard (non-emulated/special) memory.
* Used for stack operations, and others.
* @addr: [IN ] Linear address to which to write.
* @val: [IN ] Value to write to memory (low-order bytes used as
* required).
* @bytes: [IN ] Number of bytes to write to memory.
*/
int (*write_std)(unsigned long addr,
unsigned long val,
unsigned int bytes, struct x86_emulate_ctxt * ctxt);
/*
* read_emulated: Read bytes from emulated/special memory area.
* @addr: [IN ] Linear address from which to read.
* @val: [OUT] Value read from memory, zero-extended to 'u_long'.
* @bytes: [IN ] Number of bytes to read from memory.
*/
int (*read_emulated) (unsigned long addr,
unsigned long *val,
unsigned int bytes,
struct x86_emulate_ctxt * ctxt);
/*
* write_emulated: Read bytes from emulated/special memory area.
* @addr: [IN ] Linear address to which to write.
* @val: [IN ] Value to write to memory (low-order bytes used as
* required).
* @bytes: [IN ] Number of bytes to write to memory.
*/
int (*write_emulated) (unsigned long addr,
unsigned long val,
unsigned int bytes,
struct x86_emulate_ctxt * ctxt);
/*
* cmpxchg_emulated: Emulate an atomic (LOCKed) CMPXCHG operation on an
* emulated/special memory area.
* @addr: [IN ] Linear address to access.
* @old: [IN ] Value expected to be current at @addr.
* @new: [IN ] Value to write to @addr.
* @bytes: [IN ] Number of bytes to access using CMPXCHG.
*/
int (*cmpxchg_emulated) (unsigned long addr,
unsigned long old,
unsigned long new,
unsigned int bytes,
struct x86_emulate_ctxt * ctxt);
/*
* cmpxchg8b_emulated: Emulate an atomic (LOCKed) CMPXCHG8B operation on an
* emulated/special memory area.
* @addr: [IN ] Linear address to access.
* @old: [IN ] Value expected to be current at @addr.
* @new: [IN ] Value to write to @addr.
* NOTES:
* 1. This function is only ever called when emulating a real CMPXCHG8B.
* 2. This function is *never* called on x86/64 systems.
* 2. Not defining this function (i.e., specifying NULL) is equivalent
* to defining a function that always returns X86EMUL_UNHANDLEABLE.
*/
int (*cmpxchg8b_emulated) (unsigned long addr,
unsigned long old_lo,
unsigned long old_hi,
unsigned long new_lo,
unsigned long new_hi,
struct x86_emulate_ctxt * ctxt);
};
struct cpu_user_regs;
struct x86_emulate_ctxt {
/* Register state before/after emulation. */
struct kvm_vcpu *vcpu;
/* Linear faulting address (if emulating a page-faulting instruction). */
unsigned long eflags;
unsigned long cr2;
/* Emulated execution mode, represented by an X86EMUL_MODE value. */
int mode;
unsigned long cs_base;
unsigned long ds_base;
unsigned long es_base;
unsigned long ss_base;
unsigned long gs_base;
unsigned long fs_base;
};
/* Execution mode, passed to the emulator. */
#define X86EMUL_MODE_REAL 0 /* Real mode. */
#define X86EMUL_MODE_PROT16 2 /* 16-bit protected mode. */
#define X86EMUL_MODE_PROT32 4 /* 32-bit protected mode. */
#define X86EMUL_MODE_PROT64 8 /* 64-bit (long) mode. */
/* Host execution mode. */
#if defined(__i386__)
#define X86EMUL_MODE_HOST X86EMUL_MODE_PROT32
#elif defined(CONFIG_X86_64)
#define X86EMUL_MODE_HOST X86EMUL_MODE_PROT64
#endif
/*
* x86_emulate_memop: Emulate an instruction that faulted attempting to
* read/write a 'special' memory area.
* Returns -1 on failure, 0 on success.
*/
int x86_emulate_memop(struct x86_emulate_ctxt *ctxt,
struct x86_emulate_ops *ops);
/*
* Given the 'reg' portion of a ModRM byte, and a register block, return a
* pointer into the block that addresses the relevant register.
* @highbyte_regs specifies whether to decode AH,CH,DH,BH.
*/
void *decode_register(u8 modrm_reg, unsigned long *regs,
int highbyte_regs);
#endif /* __X86_EMULATE_H__ */