The VMs are not always resumes on reboot
The “libvirt-guests” service can conflict with “onevm-all” and we don't need to wait for each VM to boot during start. * posttemplate/10-libvirt-guests: disable the “libvirt-guests” service. * init/onenode.service (ExecStart): use default wait timeout (60s). (TimeoutStartSec): wait for longer than the default timeout. (ExecReload): just try to resume any remaining VMs. (ExecStop): wait longer for VM to suspend. (TimeoutStopSec): wait for longer than the stop timeout. * scripts/onevm-all: schedule actions in parallel and wait globally for their executions. Ref: #22155
This commit is contained in:
parent
d9e9d2e81c
commit
340dd409e2
|
@ -8,11 +8,15 @@ After=multi-user.target
|
|||
Type=oneshot
|
||||
Environment=CREDS=/var/lib/one/.one/one_auth
|
||||
Environment=ENDPOINT=http://127.0.0.1:2633/RPC2
|
||||
TimeoutSec=1min
|
||||
RemainAfterExit=yes
|
||||
Restart=no
|
||||
ExecStart=/usr/share/eole/sbin/onevm-all -t 20 -w -c ${CREDS} -e ${ENDPOINT} -a "resume"
|
||||
ExecStop=/usr/share/eole/sbin/onevm-all -t 20 -w -c ${CREDS} -e ${ENDPOINT} -a "suspend"
|
||||
ExecStart=/usr/share/eole/sbin/onevm-all -c ${CREDS} -e ${ENDPOINT} -a "resume"
|
||||
# Permit to start remaining VMs at distance by a simple restart
|
||||
ExecReload=/usr/share/eole/sbin/onevm-all -c ${CREDS} -e ${ENDPOINT} -a "resume"
|
||||
ExecStop=/usr/share/eole/sbin/onevm-all -w 300 -c ${CREDS} -e ${ENDPOINT} -a "suspend"
|
||||
# Keep some marging with timeout
|
||||
TimeoutStartSec=120s
|
||||
TimeoutStopSec=360s
|
||||
|
||||
[Install]
|
||||
WantedBy=multi-user.target
|
||||
|
|
|
@ -0,0 +1,9 @@
|
|||
#!/bin/sh
|
||||
|
||||
echo "Disable and mask libvirt-guests service"
|
||||
for action in stop disable mask
|
||||
do
|
||||
systemctl ${action} libvirt-guests.service 2> /dev/null
|
||||
done
|
||||
|
||||
exit 0
|
|
@ -1,5 +1,8 @@
|
|||
#!/usr/bin/env ruby
|
||||
|
||||
# Do not buffer output
|
||||
STDOUT.sync = TRUE
|
||||
|
||||
##############################################################################
|
||||
# Environment Configuration
|
||||
##############################################################################
|
||||
|
@ -30,19 +33,30 @@ include OpenNebula
|
|||
MAXWAIT=60
|
||||
INTERVAL=1
|
||||
|
||||
def _wait(vm, st)
|
||||
wait = 0
|
||||
while vm.status != st
|
||||
vm.info
|
||||
if vm.status == 'unkn'
|
||||
break
|
||||
end
|
||||
wait += INTERVAL
|
||||
sleep(INTERVAL)
|
||||
if wait >= MAXWAIT
|
||||
break
|
||||
end
|
||||
# List of supported actions
|
||||
ACTIONS = [
|
||||
'status', # Get the status of all VMs in OpenNebula VM pool
|
||||
'suspend', # Suspend all VMs in RUNNING state
|
||||
'resume', # Resume all VMs in SUSPENDED or UNKNOWN state
|
||||
]
|
||||
|
||||
|
||||
# Map each action with a target state
|
||||
EXPECTED_STATUS_MAP = {
|
||||
'status' => nil,
|
||||
'boot' => 'runn',
|
||||
'suspend' => 'susp',
|
||||
'resume' => 'runn'
|
||||
}
|
||||
|
||||
def dump_running_vms_file()
|
||||
if File.exist?(RUNVMFILE)
|
||||
running_vms = File.readlines(RUNVMFILE).uniq
|
||||
else
|
||||
running_vms = []
|
||||
end
|
||||
|
||||
return running_vms
|
||||
end
|
||||
|
||||
def CreoleGet(variable)
|
||||
|
@ -54,19 +68,48 @@ def CreoleGet(variable)
|
|||
end
|
||||
end
|
||||
|
||||
def _do_wait(vms, action, maxwait)
|
||||
if maxwait == 0 and action == 'resume'
|
||||
# User explicitely don't want to wait
|
||||
vms.clear
|
||||
return 0
|
||||
end
|
||||
|
||||
print "Wait #{maxwait}s for VMs to #{action}"
|
||||
for try in 0..maxwait
|
||||
vms.delete_if do |vm|
|
||||
vm.info
|
||||
vm.status == EXPECTED_STATUS_MAP[action]
|
||||
end
|
||||
break if vms.empty?
|
||||
print "."
|
||||
sleep(1)
|
||||
end
|
||||
if vms.empty?
|
||||
puts " OK"
|
||||
return 0
|
||||
else
|
||||
puts " FAIL"
|
||||
return -1
|
||||
end
|
||||
end
|
||||
|
||||
|
||||
#
|
||||
# NAME: _do_suspend
|
||||
# PARAM: OpenNebula::VirtualMachine object
|
||||
# AIM: Suspend a virtual machine
|
||||
#
|
||||
def _do_suspend(vm, wait)
|
||||
def _do_suspend(vm)
|
||||
fd = File.open(RUNVMFILE,'a')
|
||||
if vm.status == "runn"
|
||||
puts("Suspending #{vm.name} ...")
|
||||
puts("Suspending #{vm.id} - #{vm.name}... ")
|
||||
fd.write("#{vm.id}\n")
|
||||
vm.suspend
|
||||
if wait
|
||||
_wait(vm, "susp")
|
||||
rc = vm.suspend
|
||||
if OpenNebula.is_error?(rc)
|
||||
puts rc.message
|
||||
else
|
||||
puts "scheduled"
|
||||
end
|
||||
end
|
||||
fd.close
|
||||
|
@ -77,28 +120,13 @@ end
|
|||
# PARAM: OpenNebula::VirtualMachine object
|
||||
# AIM: Resum a suspended virtual machines
|
||||
#
|
||||
def _do_resume(vm, wait, force=FALSE)
|
||||
if force
|
||||
vm.resume
|
||||
def _do_resume(vm)
|
||||
print("Resume #{vm.id} - #{vm.name}... ")
|
||||
rc = vm.resume
|
||||
if OpenNebula.is_error?(rc)
|
||||
puts rc.message
|
||||
else
|
||||
if vm.status == "susp"
|
||||
puts("Resume on #{vm.name}")
|
||||
vm.resume
|
||||
# elsif vm.status == 'save'
|
||||
# puts("Recover on #{vm.name}")
|
||||
# # Try to recover VM with retry action
|
||||
# vm.recover(2)
|
||||
# vm.resume
|
||||
elsif vm.status == 'unkn'
|
||||
puts("Resume on #{vm.name}")
|
||||
vm.resume
|
||||
else
|
||||
return -1
|
||||
end
|
||||
end
|
||||
|
||||
if wait
|
||||
_wait(vm, "runn")
|
||||
puts "scheduled"
|
||||
end
|
||||
end
|
||||
|
||||
|
@ -107,65 +135,65 @@ options = {:creds => nil, :action => nil, :endpoint => nil,
|
|||
:timeout => nil}
|
||||
|
||||
parser = OptionParser.new do|opts|
|
||||
opts.banner = "Usage: #{File.basename(__FILE__)} [options]"
|
||||
opts.on('-c', '--creds file', 'Crediential file') do |value|
|
||||
options[:creds] = value;
|
||||
end
|
||||
opts.banner = "Usage: #{File.basename(__FILE__)} [options]"
|
||||
opts.on('-c', '--creds file', 'Crediential file') do |value|
|
||||
options[:creds] = value;
|
||||
end
|
||||
|
||||
opts.on('-a', '--action action', 'Action to run') do |value|
|
||||
options[:action] = value;
|
||||
end
|
||||
opts.on('-a', '--action action', 'Action to run') do |value|
|
||||
options[:action] = value;
|
||||
end
|
||||
|
||||
opts.on('-e', '--end-point url', 'End point URL') do |value|
|
||||
options[:endpoint] = value;
|
||||
end
|
||||
opts.on('-e', '--end-point url', 'End point URL') do |value|
|
||||
options[:endpoint] = value;
|
||||
end
|
||||
|
||||
opts.on('-t', '--timeout timeout', 'Timeout for opennebula connection') do |value|
|
||||
options[:timeout] = value.to_i;
|
||||
end
|
||||
opts.on('-t', '--timeout timeout', 'Timeout for opennebula connection') do |value|
|
||||
options[:timeout] = value.to_i;
|
||||
end
|
||||
|
||||
opts.on('-w', '--wait', 'Wait for action ends') do |w|
|
||||
options[:wait] = w
|
||||
end
|
||||
|
||||
opts.on('-h', '--help', 'Displays Help') do
|
||||
puts opts
|
||||
exit
|
||||
end
|
||||
opts.on('-w', '--wait timeout', 'Wait for action ends') do |value|
|
||||
options[:wait] = value.to_i
|
||||
end
|
||||
|
||||
opts.on('-h', '--help', 'Displays Help') do
|
||||
puts opts
|
||||
exit
|
||||
end
|
||||
|
||||
end
|
||||
|
||||
parser.parse!
|
||||
|
||||
# OpenNebula credentials
|
||||
|
||||
if not options[:creds]
|
||||
options[:creds] = "/var/lib/one/.one/one_auth"
|
||||
end
|
||||
|
||||
if not options[:action]
|
||||
options[:action] = "status"
|
||||
options[:action] = "status"
|
||||
end
|
||||
|
||||
if not options[:endpoint]
|
||||
ip = CreoleGet('adresse_ip_eth0').chomp
|
||||
options[:endpoint] = "http://#{ip}:2633/RPC2"
|
||||
ip = CreoleGet('adresse_ip_eth0').chomp
|
||||
options[:endpoint] = "http://#{ip}:2633/RPC2"
|
||||
end
|
||||
|
||||
if not options[:timeout]
|
||||
options[:timeout] = TIMEOUT
|
||||
options[:timeout] = TIMEOUT
|
||||
end
|
||||
|
||||
# Actions
|
||||
SUPPORTED = ['status', 'boot', 'resume', 'shutdown', 'suspend']
|
||||
|
||||
|
||||
if not SUPPORTED.include?(options[:action])
|
||||
puts("Action : #{options[:action]}) is not supported")
|
||||
exit(-1)
|
||||
if not options[:wait]
|
||||
options[:wait] = MAXWAIT
|
||||
end
|
||||
|
||||
|
||||
if not ACTIONS.include?(options[:action])
|
||||
puts("Action : #{options[:action]}) is not supported")
|
||||
exit(-1)
|
||||
end
|
||||
|
||||
|
||||
begin
|
||||
File.readlines(options[:creds]).each do |line|
|
||||
CREDENTIALS = line
|
||||
|
@ -175,54 +203,77 @@ rescue
|
|||
exit(-1)
|
||||
end
|
||||
|
||||
|
||||
exit_code = 0
|
||||
begin
|
||||
client = Client.new(CREDENTIALS, options[:endpoint])
|
||||
client = Client.new(CREDENTIALS, options[:endpoint])
|
||||
|
||||
vm_pool = VirtualMachinePool.new(client, USERFLAG)
|
||||
vm_pool = VirtualMachinePool.new(client, USERFLAG)
|
||||
|
||||
if File.exist?(RUNVMFILE)
|
||||
running_vms = File.readlines(RUNVMFILE)
|
||||
else
|
||||
running_vms = []
|
||||
end
|
||||
|
||||
rc = vm_pool.info
|
||||
cnt = 0
|
||||
while OpenNebula.is_error?(rc)
|
||||
if cnt == options[:timeout]
|
||||
puts rc.message
|
||||
exit(-1)
|
||||
end
|
||||
# Try to load vm pool infos from OpenNebula until timeout expires
|
||||
rc = vm_pool.info
|
||||
sleep(1)
|
||||
cnt += 1
|
||||
end
|
||||
|
||||
vm_pool.each do |vm|
|
||||
case options[:action]
|
||||
when "status"
|
||||
puts("#{vm.name}\t#{vm.status}")
|
||||
when "boot"
|
||||
puts("DEBUG #{vm.status}")
|
||||
if vm.status == "unkn"
|
||||
puts("Booting #{vm.name} ...")
|
||||
vm.boot
|
||||
end
|
||||
when "suspend"
|
||||
_do_suspend(vm, options[:wait])
|
||||
when "resume"
|
||||
if running_vms.include?("#{vm.id}\n")
|
||||
_do_resume(vm, options[:wait], TRUE)
|
||||
end
|
||||
else
|
||||
puts("#{vm.name}\t#{vm.status}")
|
||||
cnt = 0
|
||||
while OpenNebula.is_error?(rc)
|
||||
if cnt == options[:timeout]
|
||||
puts rc.message
|
||||
exit(-1)
|
||||
end
|
||||
rc = vm_pool.info
|
||||
sleep(1)
|
||||
cnt += 1
|
||||
end
|
||||
end
|
||||
if options[:action] == "resume"
|
||||
File.truncate(RUNVMFILE, 0) if File.exists?(RUNVMFILE)
|
||||
end
|
||||
|
||||
if options[:action] == "resume"
|
||||
running_vms = dump_running_vms_file()
|
||||
running_vms.each do |vmid|
|
||||
vm = VirtualMachine.new_with_id(vmid, client)
|
||||
vm.info
|
||||
_do_resume(vm)
|
||||
end
|
||||
|
||||
else
|
||||
vm_pool.each do |vm|
|
||||
case options[:action]
|
||||
when "status"
|
||||
puts "#{vm.name}\t#{vm.status}"
|
||||
|
||||
when "suspend"
|
||||
_do_suspend(vm)
|
||||
end
|
||||
end
|
||||
|
||||
# Update list of suspended VMs
|
||||
running_vms = dump_running_vms_file()
|
||||
end
|
||||
|
||||
if options[:action] != 'status'
|
||||
vms = []
|
||||
running_vms.each do |vmid|
|
||||
vm = VirtualMachine.new_with_id(vmid, client)
|
||||
vms.push(vm)
|
||||
end
|
||||
exit_code = _do_wait(vms, options[:action], options[:wait])
|
||||
end
|
||||
|
||||
if options[:action] == "resume"
|
||||
if vms.empty?
|
||||
File.truncate(RUNVMFILE, 0) if File.exists?(RUNVMFILE)
|
||||
else
|
||||
fd = File.open(RUNVMFILE,'w')
|
||||
vms.each do |vm|
|
||||
fd.write("#{vm.id}\n")
|
||||
end
|
||||
end
|
||||
end
|
||||
|
||||
rescue Exception => e
|
||||
puts e.message
|
||||
exit(-1)
|
||||
puts e.message
|
||||
puts e.backtrace
|
||||
exit(-1)
|
||||
end
|
||||
exit 0
|
||||
|
||||
exit(exit_code)
|
||||
|
||||
# Local Variables:
|
||||
# ruby-indent-level: 4
|
||||
# End:
|
||||
|
|
Loading…
Reference in New Issue