首页 > 其他分享 >kvm vm运行

kvm vm运行

时间:2023-06-01 10:38:33浏览次数:29  
标签:KVM vcpu kvm vm %% vmx arch 运行


用户空间发送 KVM_RUN指令,通过vm的fd描述符进行。

static long kvm_vcpu_ioctl(struct file *filp,  unsigned int ioctl, unsigned long arg) 

 { 

struct kvm_vcpu *vcpu = filp->private_data; 

void __user *argp = (void __user *)arg; 

int r; 

struct kvm_fpu *fpu = NULL; 

struct kvm_sregs *kvm_sregs = NULL; 



if (vcpu->kvm->mm != current->mm) 

return -EIO; 



if (unlikely(_IOC_TYPE(ioctl) != KVMIO)) 

return -EINVAL; 



 #if defined(CONFIG_S390) || defined(CONFIG_PPC) || defined(CONFIG_MIPS) 

/* 

* Special cases: vcpu ioctls that are asynchronous to vcpu execution, 

* so vcpu_load() would break it. 

*/ 

if (ioctl == KVM_S390_INTERRUPT || ioctl == KVM_S390_IRQ || ioctl == KVM_INTERRUPT) 

return kvm_arch_vcpu_ioctl(filp, ioctl, arg); 

 #endif 





r = vcpu_load(vcpu); 

if (r) 

return r; 

switch (ioctl) { 

case KVM_RUN:
r = -EINVAL;
if (arg)
goto out;
if (unlikely(vcpu->pid != current->pids[PIDTYPE_PID].pid)) {
/* The thread running this VCPU changed. */
struct pid *oldpid = vcpu->pid;
struct pid *newpid = get_task_pid(current, PIDTYPE_PID);


rcu_assign_pointer(vcpu->pid, newpid);
if (oldpid)
synchronize_rcu();
put_pid(oldpid);
}
r = kvm_arch_vcpu_ioctl_run(vcpu, vcpu->run);
trace_kvm_userspace_exit(vcpu->run->exit_reason, r); 

break; 

case KVM_GET_REGS: { 

struct kvm_regs *kvm_regs; 



r = -ENOMEM; 

kvm_regs = kzalloc(sizeof(struct kvm_regs), GFP_KERNEL); 

if (!kvm_regs) 

goto out; 

r = kvm_arch_vcpu_ioctl_get_regs(vcpu, kvm_regs); 

if (r) 

goto out_free1; 

r = -EFAULT; 

if (copy_to_user(argp, kvm_regs, sizeof(struct kvm_regs))) 

goto out_free1; 

r = 0; 

 out_free1: 

kfree(kvm_regs); 

break; 

} 

case KVM_SET_REGS: { 

struct kvm_regs *kvm_regs; 



r = -ENOMEM; 

kvm_regs = memdup_user(argp, sizeof(*kvm_regs)); 

if (IS_ERR(kvm_regs)) { 

r = PTR_ERR(kvm_regs); 

goto out; 

} 

r = kvm_arch_vcpu_ioctl_set_regs(vcpu, kvm_regs); 

kfree(kvm_regs); 

break; 

} 

case KVM_GET_SREGS: { 

kvm_sregs = kzalloc(sizeof(struct kvm_sregs), GFP_KERNEL); 

r = -ENOMEM; 

if (!kvm_sregs) 

goto out; 

r = kvm_arch_vcpu_ioctl_get_sregs(vcpu, kvm_sregs); 

if (r) 

goto out; 

r = -EFAULT; 

if (copy_to_user(argp, kvm_sregs, sizeof(struct kvm_sregs))) 

goto out; 

r = 0; 

break; 

} 

case KVM_SET_SREGS: { 

kvm_sregs = memdup_user(argp, sizeof(*kvm_sregs)); 

if (IS_ERR(kvm_sregs)) { 

r = PTR_ERR(kvm_sregs); 

kvm_sregs = NULL; 

goto out; 

} 

r = kvm_arch_vcpu_ioctl_set_sregs(vcpu, kvm_sregs); 

break; 

} 

case KVM_GET_MP_STATE: { 

struct kvm_mp_state mp_state; 



r = kvm_arch_vcpu_ioctl_get_mpstate(vcpu, &mp_state); 

if (r) 

goto out; 

r = -EFAULT; 

if (copy_to_user(argp, &mp_state, sizeof(mp_state))) 

goto out; 

r = 0; 

break; 

} 

case KVM_SET_MP_STATE: { 

struct kvm_mp_state mp_state; 



r = -EFAULT; 

if (copy_from_user(&mp_state, argp, sizeof(mp_state))) 

goto out; 

r = kvm_arch_vcpu_ioctl_set_mpstate(vcpu, &mp_state); 

break; 

} 

case KVM_TRANSLATE: { 

struct kvm_translation tr; 



r = -EFAULT; 

if (copy_from_user(&tr, argp, sizeof(tr))) 

goto out; 

r = kvm_arch_vcpu_ioctl_translate(vcpu, &tr); 

if (r) 

goto out; 

r = -EFAULT; 

if (copy_to_user(argp, &tr, sizeof(tr))) 

goto out; 

r = 0; 

break; 

} 

case KVM_SET_GUEST_DEBUG: { 

struct kvm_guest_debug dbg; 



r = -EFAULT; 

if (copy_from_user(&dbg, argp, sizeof(dbg))) 

goto out; 

r = kvm_arch_vcpu_ioctl_set_guest_debug(vcpu, &dbg); 

break; 

} 

case KVM_SET_SIGNAL_MASK: { 

struct kvm_signal_mask __user *sigmask_arg = argp; 

struct kvm_signal_mask kvm_sigmask; 

sigset_t sigset, *p; 



p = NULL; 

if (argp) { 

r = -EFAULT; 

if (copy_from_user(&kvm_sigmask, argp, 

  sizeof(kvm_sigmask))) 

goto out; 

r = -EINVAL; 

if (kvm_sigmask.len != sizeof(sigset)) 

goto out; 

r = -EFAULT; 

if (copy_from_user(&sigset, sigmask_arg->sigset, 

  sizeof(sigset))) 

goto out; 

p = &sigset; 

} 

r = kvm_vcpu_ioctl_set_sigmask(vcpu, p); 

break; 

} 

case KVM_GET_FPU: { 

fpu = kzalloc(sizeof(struct kvm_fpu), GFP_KERNEL); 

r = -ENOMEM; 

if (!fpu) 

goto out; 

r = kvm_arch_vcpu_ioctl_get_fpu(vcpu, fpu); 

if (r) 

goto out; 

r = -EFAULT; 

if (copy_to_user(argp, fpu, sizeof(struct kvm_fpu))) 

goto out; 

r = 0; 

break; 

} 

case KVM_SET_FPU: { 

fpu = memdup_user(argp, sizeof(*fpu)); 

if (IS_ERR(fpu)) { 

r = PTR_ERR(fpu); 

fpu = NULL; 

goto out; 

} 

r = kvm_arch_vcpu_ioctl_set_fpu(vcpu, fpu); 

break; 

} 

default: 

r = kvm_arch_vcpu_ioctl(filp, ioctl, arg); 

} 

 out: 

vcpu_put(vcpu); 

kfree(fpu); 

kfree(kvm_sregs); 

return r; 
}




 int kvm_arch_vcpu_ioctl_run(struct kvm_vcpu *vcpu, struct kvm_run *kvm_run)
 {
struct fpu *fpu = ¤t->thread.fpu;
int r;
sigset_t sigsaved;


fpu__activate_curr(fpu);


if (vcpu->sigset_active)
sigprocmask(SIG_SETMASK, &vcpu->sigset, &sigsaved);


if (unlikely(vcpu->arch.mp_state == KVM_MP_STATE_UNINITIALIZED)) {
kvm_vcpu_block(vcpu);
kvm_apic_accept_events(vcpu);
clear_bit(KVM_REQ_UNHALT, &vcpu->requests);
r = -EAGAIN;
goto out;
}


/* re-sync apic's tpr */
if (!lapic_in_kernel(vcpu)) {
if (kvm_set_cr8(vcpu, kvm_run->cr8) != 0) {
r = -EINVAL;
goto out;
}
}


if (unlikely(vcpu->arch.complete_userspace_io)) {
int (*cui)(struct kvm_vcpu *) = vcpu->arch.complete_userspace_io;
vcpu->arch.complete_userspace_io = NULL;
r = cui(vcpu);
if (r <= 0)
goto out;
} else
WARN_ON(vcpu->arch.pio.count || vcpu->mmio_needed);


r = vcpu_run(vcpu);


 out:
post_kvm_run_save(vcpu);
if (vcpu->sigset_active)
sigprocmask(SIG_SETMASK, &sigsaved, NULL);


return r;
 }
x86.c
 static int vcpu_run(struct kvm_vcpu *vcpu)
 {
int r;
struct kvm *kvm = vcpu->kvm;


vcpu->srcu_idx = srcu_read_lock(&kvm->srcu);


for (;;) {
if (kvm_vcpu_running(vcpu)) {
r = vcpu_enter_guest(vcpu);
} else {
r = vcpu_block(kvm, vcpu);
}


if (r <= 0)
break;


clear_bit(KVM_REQ_PENDING_TIMER, &vcpu->requests);
if (kvm_cpu_has_pending_timer(vcpu))
kvm_inject_pending_timer_irqs(vcpu);


if (dm_request_for_irq_injection(vcpu) &&
kvm_vcpu_ready_for_interrupt_injection(vcpu)) {
r = 0;
vcpu->run->exit_reason = KVM_EXIT_IRQ_WINDOW_OPEN;
++vcpu->stat.request_irq_exits;
break;
}


kvm_check_async_pf_completion(vcpu);


if (signal_pending(current)) {
r = -EINTR;
vcpu->run->exit_reason = KVM_EXIT_INTR;
++vcpu->stat.signal_exits;
break;
}
if (need_resched()) {
srcu_read_unlock(&kvm->srcu, vcpu->srcu_idx);
cond_resched();
vcpu->srcu_idx = srcu_read_lock(&kvm->srcu);
}
}


srcu_read_unlock(&kvm->srcu, vcpu->srcu_idx);


return r;
 }

 static int vcpu_enter_guest(struct kvm_vcpu *vcpu)
 {
int r;
bool req_int_win =
dm_request_for_irq_injection(vcpu) &&
kvm_cpu_accept_dm_intr(vcpu);


bool req_immediate_exit = false;


if (vcpu->requests) {
if (kvm_check_request(KVM_REQ_MMU_RELOAD, vcpu))
kvm_mmu_unload(vcpu);
if (kvm_check_request(KVM_REQ_MIGRATE_TIMER, vcpu))
__kvm_migrate_timers(vcpu);
if (kvm_check_request(KVM_REQ_MASTERCLOCK_UPDATE, vcpu))
kvm_gen_update_masterclock(vcpu->kvm);
if (kvm_check_request(KVM_REQ_GLOBAL_CLOCK_UPDATE, vcpu))
kvm_gen_kvmclock_update(vcpu);
if (kvm_check_request(KVM_REQ_CLOCK_UPDATE, vcpu)) {
r = kvm_guest_time_update(vcpu);
if (unlikely(r))
goto out;
}
if (kvm_check_request(KVM_REQ_MMU_SYNC, vcpu))
kvm_mmu_sync_roots(vcpu);
if (kvm_check_request(KVM_REQ_TLB_FLUSH, vcpu))
kvm_vcpu_flush_tlb(vcpu);
if (kvm_check_request(KVM_REQ_REPORT_TPR_ACCESS, vcpu)) {
vcpu->run->exit_reason = KVM_EXIT_TPR_ACCESS;
r = 0;
goto out;
}
if (kvm_check_request(KVM_REQ_TRIPLE_FAULT, vcpu)) {
vcpu->run->exit_reason = KVM_EXIT_SHUTDOWN;
r = 0;
goto out;
}
if (kvm_check_request(KVM_REQ_DEACTIVATE_FPU, vcpu)) {
vcpu->fpu_active = 0;
kvm_x86_ops->fpu_deactivate(vcpu);
}
if (kvm_check_request(KVM_REQ_APF_HALT, vcpu)) {
/* Page is swapped out. Do synthetic halt */
vcpu->arch.apf.halted = true;
r = 1;
goto out;
}
if (kvm_check_request(KVM_REQ_STEAL_UPDATE, vcpu))
record_steal_time(vcpu);
if (kvm_check_request(KVM_REQ_SMI, vcpu))
process_smi(vcpu);
if (kvm_check_request(KVM_REQ_NMI, vcpu))
process_nmi(vcpu);
if (kvm_check_request(KVM_REQ_PMU, vcpu))
kvm_pmu_handle_event(vcpu);
if (kvm_check_request(KVM_REQ_PMI, vcpu))
kvm_pmu_deliver_pmi(vcpu);
if (kvm_check_request(KVM_REQ_IOAPIC_EOI_EXIT, vcpu)) {
BUG_ON(vcpu->arch.pending_ioapic_eoi > 255);
if (test_bit(vcpu->arch.pending_ioapic_eoi,
    vcpu->arch.ioapic_handled_vectors)) {
vcpu->run->exit_reason = KVM_EXIT_IOAPIC_EOI;
vcpu->run->eoi.vector =
vcpu->arch.pending_ioapic_eoi;
r = 0;
goto out;
}
}
if (kvm_check_request(KVM_REQ_SCAN_IOAPIC, vcpu))
vcpu_scan_ioapic(vcpu);
if (kvm_check_request(KVM_REQ_APIC_PAGE_RELOAD, vcpu))
kvm_vcpu_reload_apic_access_page(vcpu);
if (kvm_check_request(KVM_REQ_HV_CRASH, vcpu)) {
vcpu->run->exit_reason = KVM_EXIT_SYSTEM_EVENT;
vcpu->run->system_event.type = KVM_SYSTEM_EVENT_CRASH;
r = 0;
goto out;
}
if (kvm_check_request(KVM_REQ_HV_RESET, vcpu)) {
vcpu->run->exit_reason = KVM_EXIT_SYSTEM_EVENT;
vcpu->run->system_event.type = KVM_SYSTEM_EVENT_RESET;
r = 0;
goto out;
}
if (kvm_check_request(KVM_REQ_HV_EXIT, vcpu)) {
vcpu->run->exit_reason = KVM_EXIT_HYPERV;
vcpu->run->hyperv = vcpu->arch.hyperv.exit;
r = 0;
goto out;
}


/*
* KVM_REQ_HV_STIMER has to be processed after
* KVM_REQ_CLOCK_UPDATE, because Hyper-V SynIC timers
* depend on the guest clock being up-to-date
*/
if (kvm_check_request(KVM_REQ_HV_STIMER, vcpu))
kvm_hv_process_stimers(vcpu);
}


/*
* KVM_REQ_EVENT is not set when posted interrupts are set by
* VT-d hardware, so we have to update RVI unconditionally.
*/
if (kvm_lapic_enabled(vcpu)) {
/*
* Update architecture specific hints for APIC
* virtual interrupt delivery.
*/
if (vcpu->arch.apicv_active)
kvm_x86_ops->hwapic_irr_update(vcpu,
kvm_lapic_find_highest_irr(vcpu));
}


if (kvm_check_request(KVM_REQ_EVENT, vcpu) || req_int_win) {
kvm_apic_accept_events(vcpu);
if (vcpu->arch.mp_state == KVM_MP_STATE_INIT_RECEIVED) {
r = 1;
goto out;
}


if (inject_pending_event(vcpu, req_int_win) != 0)
req_immediate_exit = true;
else {
/* Enable NMI/IRQ window open exits if needed.
*
* SMIs have two cases: 1) they can be nested, and
* then there is nothing to do here because RSM will
* cause a vmexit anyway; 2) or the SMI can be pending
* because inject_pending_event has completed the
* injection of an IRQ or NMI from the previous vmexit,
* and then we request an immediate exit to inject the SMI.
*/
if (vcpu->arch.smi_pending && !is_smm(vcpu))
req_immediate_exit = true;
if (vcpu->arch.nmi_pending)
kvm_x86_ops->enable_nmi_window(vcpu);
if (kvm_cpu_has_injectable_intr(vcpu) || req_int_win)
kvm_x86_ops->enable_irq_window(vcpu);
}


if (kvm_lapic_enabled(vcpu)) {
update_cr8_intercept(vcpu);
kvm_lapic_sync_to_vapic(vcpu);
}
}


r = kvm_mmu_reload(vcpu);
if (unlikely(r)) {
goto cancel_injection;
}


preempt_disable();


kvm_x86_ops->prepare_guest_switch(vcpu);
if (vcpu->fpu_active)
kvm_load_guest_fpu(vcpu);
vcpu->mode = IN_GUEST_MODE;


srcu_read_unlock(&vcpu->kvm->srcu, vcpu->srcu_idx);


/*
* We should set ->mode before check ->requests,
* Please see the comment in kvm_make_all_cpus_request.
* This also orders the write to mode from any reads
* to the page tables done while the VCPU is running.
* Please see the comment in kvm_flush_remote_tlbs.
*/
smp_mb__after_srcu_read_unlock();


local_irq_disable();


if (vcpu->mode == EXITING_GUEST_MODE || vcpu->requests
   || need_resched() || signal_pending(current)) {
vcpu->mode = OUTSIDE_GUEST_MODE;
smp_wmb();
local_irq_enable();
preempt_enable();
vcpu->srcu_idx = srcu_read_lock(&vcpu->kvm->srcu);
r = 1;
goto cancel_injection;
}


kvm_load_guest_xcr0(vcpu);


if (req_immediate_exit) {
kvm_make_request(KVM_REQ_EVENT, vcpu);
smp_send_reschedule(vcpu->cpu);
}


trace_kvm_entry(vcpu->vcpu_id);
wait_lapic_expire(vcpu);
guest_enter_irqoff();


if (unlikely(vcpu->arch.switch_db_regs)) {
set_debugreg(0, 7);
set_debugreg(vcpu->arch.eff_db[0], 0);
set_debugreg(vcpu->arch.eff_db[1], 1);
set_debugreg(vcpu->arch.eff_db[2], 2);
set_debugreg(vcpu->arch.eff_db[3], 3);
set_debugreg(vcpu->arch.dr6, 6);
vcpu->arch.switch_db_regs &= ~KVM_DEBUGREG_RELOAD;
}
kvm_x86_ops->run(vcpu);
 


/* 

* Do this here before restoring debug registers on the host.  And 

* since we do this before handling the vmexit, a DR access vmexit 

* can (a) read the correct value of the debug registers, (b) set 

* KVM_DEBUGREG_WONT_EXIT again. 

*/ 

if (unlikely(vcpu->arch.switch_db_regs & KVM_DEBUGREG_WONT_EXIT)) { 

WARN_ON(vcpu->guest_debug & KVM_GUESTDBG_USE_HW_BP); 

kvm_x86_ops->sync_dirty_debug_regs(vcpu); 

kvm_update_dr0123(vcpu); 

kvm_update_dr6(vcpu); 

kvm_update_dr7(vcpu); 

vcpu->arch.switch_db_regs &= ~KVM_DEBUGREG_RELOAD; 

} 



/* 

* If the guest has used debug registers, at least dr7 

* will be disabled while returning to the host. 

* If we don't have active breakpoints in the host, we don't 

* care about the messed up debug address registers. But if 

* we have some of them active, restore the old state. 

*/ 

if (hw_breakpoint_active()) 

hw_breakpoint_restore(); 



vcpu->arch.last_guest_tsc = kvm_read_l1_tsc(vcpu, rdtsc()); 



vcpu->mode = OUTSIDE_GUEST_MODE; 

smp_wmb(); 



kvm_put_guest_xcr0(vcpu); 



kvm_x86_ops->handle_external_intr(vcpu); 



++vcpu->stat.exits; 



guest_exit_irqoff(); 



local_irq_enable(); 

preempt_enable(); 



vcpu->srcu_idx = srcu_read_lock(&vcpu->kvm->srcu); 



/* 

* Profile KVM exit RIPs: 

*/ 

if (unlikely(prof_on == KVM_PROFILING)) { 

unsigned long rip = kvm_rip_read(vcpu); 

profile_hit(KVM_PROFILING, (void *)rip); 

} 



if (unlikely(vcpu->arch.tsc_always_catchup)) 

kvm_make_request(KVM_REQ_CLOCK_UPDATE, vcpu); 



if (vcpu->arch.apic_attention) 

kvm_lapic_sync_from_vapic(vcpu); 



r = kvm_x86_ops->handle_exit(vcpu); 

return r; 



 cancel_injection: 

kvm_x86_ops->cancel_injection(vcpu); 

if (unlikely(vcpu->arch.apic_attention)) 

kvm_lapic_sync_from_vapic(vcpu); 

 out: 

return r; 

 } 


static void __noclone vmx_vcpu_run(struct kvm_vcpu *vcpu)
 {
struct vcpu_vmx *vmx = to_vmx(vcpu);
unsigned long debugctlmsr, cr4;


/* Record the guest's net vcpu time for enforced NMI injections. */
if (unlikely(!cpu_has_virtual_nmis() && vmx->soft_vnmi_blocked))
vmx->entry_time = ktime_get();


/* Don't enter VMX if guest state is invalid, let the exit handler
  start emulation until we arrive back to a valid state */
if (vmx->emulation_required)
return;


if (vmx->ple_window_dirty) {
vmx->ple_window_dirty = false;
vmcs_write32(PLE_WINDOW, vmx->ple_window);
}


if (vmx->nested.sync_shadow_vmcs) {
copy_vmcs12_to_shadow(vmx);
vmx->nested.sync_shadow_vmcs = false;
}


if (test_bit(VCPU_REGS_RSP, (unsigned long *)&vcpu->arch.regs_dirty))
vmcs_writel(GUEST_RSP, vcpu->arch.regs[VCPU_REGS_RSP]);
if (test_bit(VCPU_REGS_RIP, (unsigned long *)&vcpu->arch.regs_dirty))
vmcs_writel(GUEST_RIP, vcpu->arch.regs[VCPU_REGS_RIP]);


cr4 = cr4_read_shadow();
if (unlikely(cr4 != vmx->host_state.vmcs_host_cr4)) {
vmcs_writel(HOST_CR4, cr4);
vmx->host_state.vmcs_host_cr4 = cr4;
}


/* When single-stepping over STI and MOV SS, we must clear the
* corresponding interruptibility bits in the guest state. Otherwise
* vmentry fails as it then expects bit 14 (BS) in pending debug
* exceptions being set, but that's not correct for the guest debugging
* case. */
if (vcpu->guest_debug & KVM_GUESTDBG_SINGLESTEP)
vmx_set_interrupt_shadow(vcpu, 0);


if (vmx->guest_pkru_valid)
__write_pkru(vmx->guest_pkru);


atomic_switch_perf_msrs(vmx);
debugctlmsr = get_debugctlmsr();


vmx_arm_hv_timer(vcpu);


vmx->__launched = vmx->loaded_vmcs->launched;
asm(
/* Store host registers */
"push %%" _ASM_DX "; push %%" _ASM_BP ";"
"push %%" _ASM_CX " \n\t" /* placeholder for guest rcx */
"push %%" _ASM_CX " \n\t"
"cmp %%" _ASM_SP ", %c[host_rsp](%0) \n\t"
"je 1f \n\t"
"mov %%" _ASM_SP ", %c[host_rsp](%0) \n\t"
__ex(ASM_VMX_VMWRITE_RSP_RDX) "\n\t"
"1: \n\t"
/* Reload cr2 if changed */
"mov %c[cr2](%0), %%" _ASM_AX " \n\t"
"mov %%cr2, %%" _ASM_DX " \n\t"
"cmp %%" _ASM_AX ", %%" _ASM_DX " \n\t"
"je 2f \n\t"
"mov %%" _ASM_AX", %%cr2 \n\t"
"2: \n\t"
/* Check if vmlaunch of vmresume is needed */
"cmpl $0, %c[launched](%0) \n\t"
/* Load guest registers.  Don't clobber flags. */
"mov %c[rax](%0), %%" _ASM_AX " \n\t"
"mov %c[rbx](%0), %%" _ASM_BX " \n\t"
"mov %c[rdx](%0), %%" _ASM_DX " \n\t"
"mov %c[rsi](%0), %%" _ASM_SI " \n\t"
"mov %c[rdi](%0), %%" _ASM_DI " \n\t"
"mov %c[rbp](%0), %%" _ASM_BP " \n\t"
 #ifdef CONFIG_X86_64
"mov %c[r8](%0),  %%r8  \n\t"
"mov %c[r9](%0),  %%r9  \n\t"
"mov %c[r10](%0), %%r10 \n\t"
"mov %c[r11](%0), %%r11 \n\t"
"mov %c[r12](%0), %%r12 \n\t"
"mov %c[r13](%0), %%r13 \n\t"
"mov %c[r14](%0), %%r14 \n\t"
"mov %c[r15](%0), %%r15 \n\t"
 #endif
"mov %c[rcx](%0), %%" _ASM_CX " \n\t" /* kills %0 (ecx) */


/* Enter guest mode */
"jne 1f \n\t"
__ex(ASM_VMX_VMLAUNCH) "\n\t"
"jmp 2f \n\t"
"1: " __ex(ASM_VMX_VMRESUME) "\n\t"
"2: "
/* Save guest registers, load host registers, keep flags */
"mov %0, %c[wordsize](%%" _ASM_SP ") \n\t"
"pop %0 \n\t"
"mov %%" _ASM_AX ", %c[rax](%0) \n\t"
"mov %%" _ASM_BX ", %c[rbx](%0) \n\t"
__ASM_SIZE(pop) " %c[rcx](%0) \n\t"
"mov %%" _ASM_DX ", %c[rdx](%0) \n\t"
"mov %%" _ASM_SI ", %c[rsi](%0) \n\t"
"mov %%" _ASM_DI ", %c[rdi](%0) \n\t"
"mov %%" _ASM_BP ", %c[rbp](%0) \n\t"
 #ifdef CONFIG_X86_64
"mov %%r8,  %c[r8](%0) \n\t"
"mov %%r9,  %c[r9](%0) \n\t"
"mov %%r10, %c[r10](%0) \n\t"
"mov %%r11, %c[r11](%0) \n\t"
"mov %%r12, %c[r12](%0) \n\t"
"mov %%r13, %c[r13](%0) \n\t"
"mov %%r14, %c[r14](%0) \n\t"
"mov %%r15, %c[r15](%0) \n\t"
 #endif
"mov %%cr2, %%" _ASM_AX "   \n\t"
"mov %%" _ASM_AX ", %c[cr2](%0) \n\t"


"pop  %%" _ASM_BP "; pop  %%" _ASM_DX " \n\t"
"setbe %c[fail](%0) \n\t"
".pushsection .rodata \n\t"
".global vmx_return \n\t"
"vmx_return: " _ASM_PTR " 2b \n\t"
".popsection"
     : : "c"(vmx), "d"((unsigned long)HOST_RSP),
[launched]"i"(offsetof(struct vcpu_vmx, __launched)),
[fail]"i"(offsetof(struct vcpu_vmx, fail)),
[host_rsp]"i"(offsetof(struct vcpu_vmx, host_rsp)),
[rax]"i"(offsetof(struct vcpu_vmx, vcpu.arch.regs[VCPU_REGS_RAX])),
[rbx]"i"(offsetof(struct vcpu_vmx, vcpu.arch.regs[VCPU_REGS_RBX])),
[rcx]"i"(offsetof(struct vcpu_vmx, vcpu.arch.regs[VCPU_REGS_RCX])),
[rdx]"i"(offsetof(struct vcpu_vmx, vcpu.arch.regs[VCPU_REGS_RDX])),
[rsi]"i"(offsetof(struct vcpu_vmx, vcpu.arch.regs[VCPU_REGS_RSI])),
[rdi]"i"(offsetof(struct vcpu_vmx, vcpu.arch.regs[VCPU_REGS_RDI])),
[rbp]"i"(offsetof(struct vcpu_vmx, vcpu.arch.regs[VCPU_REGS_RBP])),
 #ifdef CONFIG_X86_64
[r8]"i"(offsetof(struct vcpu_vmx, vcpu.arch.regs[VCPU_REGS_R8])),
[r9]"i"(offsetof(struct vcpu_vmx, vcpu.arch.regs[VCPU_REGS_R9])),
[r10]"i"(offsetof(struct vcpu_vmx, vcpu.arch.regs[VCPU_REGS_R10])),
[r11]"i"(offsetof(struct vcpu_vmx, vcpu.arch.regs[VCPU_REGS_R11])),
[r12]"i"(offsetof(struct vcpu_vmx, vcpu.arch.regs[VCPU_REGS_R12])),
[r13]"i"(offsetof(struct vcpu_vmx, vcpu.arch.regs[VCPU_REGS_R13])),
[r14]"i"(offsetof(struct vcpu_vmx, vcpu.arch.regs[VCPU_REGS_R14])),
[r15]"i"(offsetof(struct vcpu_vmx, vcpu.arch.regs[VCPU_REGS_R15])),
 #endif
[cr2]"i"(offsetof(struct vcpu_vmx, vcpu.arch.cr2)),
[wordsize]"i"(sizeof(ulong))
     : "cc", "memory"
 #ifdef CONFIG_X86_64
, "rax", "rbx", "rdi", "rsi"
, "r8", "r9", "r10", "r11", "r12", "r13", "r14", "r15"
 #else
, "eax", "ebx", "edi", "esi"
 #endif
     );


/* MSR_IA32_DEBUGCTLMSR is zeroed on vmexit. Restore it if needed */
if (debugctlmsr)
update_debugctlmsr(debugctlmsr);


 #ifndef CONFIG_X86_64
/*
* The sysexit path does not restore ds/es, so we must set them to
* a reasonable value ourselves.
*
* We can't defer this to vmx_load_host_state() since that function
* may be executed in interrupt context, which saves and restore segments
* around it, nullifying its effect.
*/
loadsegment(ds, __USER_DS);
loadsegment(es, __USER_DS);
 #endif


vcpu->arch.regs_avail = ~((1 << VCPU_REGS_RIP) | (1 << VCPU_REGS_RSP)
 | (1 << VCPU_EXREG_RFLAGS)
 | (1 << VCPU_EXREG_PDPTR)
 | (1 << VCPU_EXREG_SEGMENTS)
 | (1 << VCPU_EXREG_CR3));
vcpu->arch.regs_dirty = 0;


vmx->idt_vectoring_info = vmcs_read32(IDT_VECTORING_INFO_FIELD);


vmx->loaded_vmcs->launched = 1;


vmx->exit_reason = vmcs_read32(VM_EXIT_REASON);


/*
* eager fpu is enabled if PKEY is supported and CR4 is switched
* back on host, so it is safe to read guest PKRU from current
* XSAVE.
*/
if (boot_cpu_has(X86_FEATURE_OSPKE)) {
vmx->guest_pkru = __read_pkru();
if (vmx->guest_pkru != vmx->host_pkru) {
vmx->guest_pkru_valid = true;
__write_pkru(vmx->host_pkru);
} else
vmx->guest_pkru_valid = false;
}


/*
* the KVM_REQ_EVENT optimization bit is only on for one entry, and if
* we did not inject a still-pending event to L1 now because of
* nested_run_pending, we need to re-enable this bit.
*/
if (vmx->nested.nested_run_pending)
kvm_make_request(KVM_REQ_EVENT, vcpu);


vmx->nested.nested_run_pending = 0;


vmx_complete_atomic_exit(vmx);
vmx_recover_nmi_blocking(vmx);
vmx_complete_interrupts(vmx);
 }

标签:KVM,vcpu,kvm,vm,%%,vmx,arch,运行
From: https://blog.51cto.com/u_11860992/6392679

相关文章

  • JVM-垃圾回收
    1.如何判断对象可以回收1.1.引用计数法定义:给对象添加一个引用计数器,每当有一个地方引用它,计数器值就加一;相反的,当引用失效的时候,计数器值就减一;任何时刻计数器为0的对象就是不可能再被使用的。弊端:可能存在无效的循环引用现在主流的Java虚拟机里并没有选用引用计数算法来管......
  • 各个语言运行100万个并发任务需要多少内存?
    译者注:原文链接:https://pkolaczk.github.io/memory-consumption-of-async/Github项目地址:https://github.com/pkolaczk/async-runtimes-benchmarks正文在这篇博客文章中,我深入探讨了异步和多线程编程在内存消耗方面的比较,跨足了如Rust、Go、Java、C#、Python、Node.js和Elix......
  • JVM吊打面试官专属秘籍
    01优秀的Java开发者大家都是有经验的Java开发人员,想想为何要学习JVM?[面试?调优?装逼?]不管出于何种原因,总之你得先学好。那怎么学好呢?因为目前课堂中有大学生和工作几年的,也就是每个人对于JVM的了解可能不一样,这就要考虑到怎么切入既然大家都学习过Java,那不妨就从Java开始聊起,......
  • 什么是SVM
    支持向量机(SupportVectorMachine,SVM) 概念:在支持向量机中,样本数据被看作是在空间中的点,不同类别的样本被尽可能大的间隔分开。超平面是一个划分空间的决策边界,具有最大间隔,使得离超平面最近的样本点称为支持向量原理:支持向量机可以用于线性可分和线性不可分的数据......
  • mvvm
    mvvmmvvm的工作原理vue的版本当前,vue共有3个大版本,其中:2.x版本的vue是目前企业级项目开发中的主流版本3.x版本的vue于2020-09-19发布,生态还不完善,尚未在企业级项目开发中普及和推广1.x版本的vue几乎被淘汰,不再建议学习与使用总结:3.x版本的vue是未来企业级项目开发的趋势......
  • celery笔记一之celery介绍、启动和运行结果跟踪
    本文首发于公众号:Hunter后端原文链接:celery笔记一之celery介绍、启动和运行结果跟踪本篇笔记内容如下:celery介绍celery准备celery启动和异步任务的运行运行结果跟踪1、celery介绍celery大致有两种应用场景,一种是异步任务,一种是定时任务。比如说在一个接口请求中,......
  • 35 KVM管理设备-管理虚拟网卡
    35KVM管理设备-管理虚拟网卡35.1概述虚拟网卡类型主要包含virtio-net、vhost-net、vhost-user等。用户在创建虚拟机后,可能会有挂载或者卸载虚拟网卡的需求。openEuler提供了网卡热插拔的功能,通过网卡热插拔,能够改变网络的吞吐量,提高系统的灵活性和扩展性。35.2操作步骤虚拟......
  • 比较 VisualVM、JMC 和异步分析器
    关键要点分析程序的性能很重要:您是否了解用于分析的开源工具有两种主要类型的分析器:采样分析器和检测分析器;了解它们的差异将帮助您选择合适的类型三种主要的开源分析器各有优缺点:一个简单的分析器(VisualVM),一个具有很多特性的可分析器(async-profiler),以及一个获取大量附加信息......
  • macos安装nvm管理多版本node
    最早直接采用brew安装,如下:brewinstallnode@18brewuninstallnode@18 //卸载 但学习的项目用的是老版本node,所以卸载了,用NVM来管理多版本node,参考这篇文章:https://blog.bigoodyssey.com/how-to-manage-multiple-node-versions-in-macos-2021-guide-5065f32cb63b同时加......
  • win+R快捷键打开运行命令
    win+R快捷键打开运行 1.cmd 最常用的cmd,打开命令行2.control打开控制面板3.打开internet属性https://blog.csdn.net/Stephanie2014/article/details/108056503......