The VMs are not always resumes on reboot
The “libvirt-guests” service can conflict with “onevm-all” and we don't need to wait for each VM to boot during start. * posttemplate/10-libvirt-guests: disable the “libvirt-guests” service. * init/onenode.service (ExecStart): use default wait timeout (60s). (TimeoutStartSec): wait for longer than the default timeout. (ExecReload): just try to resume any remaining VMs. (ExecStop): wait longer for VM to suspend. (TimeoutStopSec): wait for longer than the stop timeout. * scripts/onevm-all: schedule actions in parallel and wait globally for their executions. Ref: #22155
This commit is contained in:
parent
d9e9d2e81c
commit
340dd409e2
|
@ -8,11 +8,15 @@ After=multi-user.target
|
||||||
Type=oneshot
|
Type=oneshot
|
||||||
Environment=CREDS=/var/lib/one/.one/one_auth
|
Environment=CREDS=/var/lib/one/.one/one_auth
|
||||||
Environment=ENDPOINT=http://127.0.0.1:2633/RPC2
|
Environment=ENDPOINT=http://127.0.0.1:2633/RPC2
|
||||||
TimeoutSec=1min
|
|
||||||
RemainAfterExit=yes
|
RemainAfterExit=yes
|
||||||
Restart=no
|
Restart=no
|
||||||
ExecStart=/usr/share/eole/sbin/onevm-all -t 20 -w -c ${CREDS} -e ${ENDPOINT} -a "resume"
|
ExecStart=/usr/share/eole/sbin/onevm-all -c ${CREDS} -e ${ENDPOINT} -a "resume"
|
||||||
ExecStop=/usr/share/eole/sbin/onevm-all -t 20 -w -c ${CREDS} -e ${ENDPOINT} -a "suspend"
|
# Permit to start remaining VMs at distance by a simple restart
|
||||||
|
ExecReload=/usr/share/eole/sbin/onevm-all -c ${CREDS} -e ${ENDPOINT} -a "resume"
|
||||||
|
ExecStop=/usr/share/eole/sbin/onevm-all -w 300 -c ${CREDS} -e ${ENDPOINT} -a "suspend"
|
||||||
|
# Keep some marging with timeout
|
||||||
|
TimeoutStartSec=120s
|
||||||
|
TimeoutStopSec=360s
|
||||||
|
|
||||||
[Install]
|
[Install]
|
||||||
WantedBy=multi-user.target
|
WantedBy=multi-user.target
|
||||||
|
|
|
@ -0,0 +1,9 @@
|
||||||
|
#!/bin/sh
|
||||||
|
|
||||||
|
echo "Disable and mask libvirt-guests service"
|
||||||
|
for action in stop disable mask
|
||||||
|
do
|
||||||
|
systemctl ${action} libvirt-guests.service 2> /dev/null
|
||||||
|
done
|
||||||
|
|
||||||
|
exit 0
|
|
@ -1,5 +1,8 @@
|
||||||
#!/usr/bin/env ruby
|
#!/usr/bin/env ruby
|
||||||
|
|
||||||
|
# Do not buffer output
|
||||||
|
STDOUT.sync = TRUE
|
||||||
|
|
||||||
##############################################################################
|
##############################################################################
|
||||||
# Environment Configuration
|
# Environment Configuration
|
||||||
##############################################################################
|
##############################################################################
|
||||||
|
@ -30,19 +33,30 @@ include OpenNebula
|
||||||
MAXWAIT=60
|
MAXWAIT=60
|
||||||
INTERVAL=1
|
INTERVAL=1
|
||||||
|
|
||||||
def _wait(vm, st)
|
# List of supported actions
|
||||||
wait = 0
|
ACTIONS = [
|
||||||
while vm.status != st
|
'status', # Get the status of all VMs in OpenNebula VM pool
|
||||||
vm.info
|
'suspend', # Suspend all VMs in RUNNING state
|
||||||
if vm.status == 'unkn'
|
'resume', # Resume all VMs in SUSPENDED or UNKNOWN state
|
||||||
break
|
]
|
||||||
end
|
|
||||||
wait += INTERVAL
|
|
||||||
sleep(INTERVAL)
|
# Map each action with a target state
|
||||||
if wait >= MAXWAIT
|
EXPECTED_STATUS_MAP = {
|
||||||
break
|
'status' => nil,
|
||||||
end
|
'boot' => 'runn',
|
||||||
|
'suspend' => 'susp',
|
||||||
|
'resume' => 'runn'
|
||||||
|
}
|
||||||
|
|
||||||
|
def dump_running_vms_file()
|
||||||
|
if File.exist?(RUNVMFILE)
|
||||||
|
running_vms = File.readlines(RUNVMFILE).uniq
|
||||||
|
else
|
||||||
|
running_vms = []
|
||||||
end
|
end
|
||||||
|
|
||||||
|
return running_vms
|
||||||
end
|
end
|
||||||
|
|
||||||
def CreoleGet(variable)
|
def CreoleGet(variable)
|
||||||
|
@ -54,19 +68,48 @@ def CreoleGet(variable)
|
||||||
end
|
end
|
||||||
end
|
end
|
||||||
|
|
||||||
|
def _do_wait(vms, action, maxwait)
|
||||||
|
if maxwait == 0 and action == 'resume'
|
||||||
|
# User explicitely don't want to wait
|
||||||
|
vms.clear
|
||||||
|
return 0
|
||||||
|
end
|
||||||
|
|
||||||
|
print "Wait #{maxwait}s for VMs to #{action}"
|
||||||
|
for try in 0..maxwait
|
||||||
|
vms.delete_if do |vm|
|
||||||
|
vm.info
|
||||||
|
vm.status == EXPECTED_STATUS_MAP[action]
|
||||||
|
end
|
||||||
|
break if vms.empty?
|
||||||
|
print "."
|
||||||
|
sleep(1)
|
||||||
|
end
|
||||||
|
if vms.empty?
|
||||||
|
puts " OK"
|
||||||
|
return 0
|
||||||
|
else
|
||||||
|
puts " FAIL"
|
||||||
|
return -1
|
||||||
|
end
|
||||||
|
end
|
||||||
|
|
||||||
|
|
||||||
#
|
#
|
||||||
# NAME: _do_suspend
|
# NAME: _do_suspend
|
||||||
# PARAM: OpenNebula::VirtualMachine object
|
# PARAM: OpenNebula::VirtualMachine object
|
||||||
# AIM: Suspend a virtual machine
|
# AIM: Suspend a virtual machine
|
||||||
#
|
#
|
||||||
def _do_suspend(vm, wait)
|
def _do_suspend(vm)
|
||||||
fd = File.open(RUNVMFILE,'a')
|
fd = File.open(RUNVMFILE,'a')
|
||||||
if vm.status == "runn"
|
if vm.status == "runn"
|
||||||
puts("Suspending #{vm.name} ...")
|
puts("Suspending #{vm.id} - #{vm.name}... ")
|
||||||
fd.write("#{vm.id}\n")
|
fd.write("#{vm.id}\n")
|
||||||
vm.suspend
|
rc = vm.suspend
|
||||||
if wait
|
if OpenNebula.is_error?(rc)
|
||||||
_wait(vm, "susp")
|
puts rc.message
|
||||||
|
else
|
||||||
|
puts "scheduled"
|
||||||
end
|
end
|
||||||
end
|
end
|
||||||
fd.close
|
fd.close
|
||||||
|
@ -77,28 +120,13 @@ end
|
||||||
# PARAM: OpenNebula::VirtualMachine object
|
# PARAM: OpenNebula::VirtualMachine object
|
||||||
# AIM: Resum a suspended virtual machines
|
# AIM: Resum a suspended virtual machines
|
||||||
#
|
#
|
||||||
def _do_resume(vm, wait, force=FALSE)
|
def _do_resume(vm)
|
||||||
if force
|
print("Resume #{vm.id} - #{vm.name}... ")
|
||||||
vm.resume
|
rc = vm.resume
|
||||||
|
if OpenNebula.is_error?(rc)
|
||||||
|
puts rc.message
|
||||||
else
|
else
|
||||||
if vm.status == "susp"
|
puts "scheduled"
|
||||||
puts("Resume on #{vm.name}")
|
|
||||||
vm.resume
|
|
||||||
# elsif vm.status == 'save'
|
|
||||||
# puts("Recover on #{vm.name}")
|
|
||||||
# # Try to recover VM with retry action
|
|
||||||
# vm.recover(2)
|
|
||||||
# vm.resume
|
|
||||||
elsif vm.status == 'unkn'
|
|
||||||
puts("Resume on #{vm.name}")
|
|
||||||
vm.resume
|
|
||||||
else
|
|
||||||
return -1
|
|
||||||
end
|
|
||||||
end
|
|
||||||
|
|
||||||
if wait
|
|
||||||
_wait(vm, "runn")
|
|
||||||
end
|
end
|
||||||
end
|
end
|
||||||
|
|
||||||
|
@ -107,65 +135,65 @@ options = {:creds => nil, :action => nil, :endpoint => nil,
|
||||||
:timeout => nil}
|
:timeout => nil}
|
||||||
|
|
||||||
parser = OptionParser.new do|opts|
|
parser = OptionParser.new do|opts|
|
||||||
opts.banner = "Usage: #{File.basename(__FILE__)} [options]"
|
opts.banner = "Usage: #{File.basename(__FILE__)} [options]"
|
||||||
opts.on('-c', '--creds file', 'Crediential file') do |value|
|
opts.on('-c', '--creds file', 'Crediential file') do |value|
|
||||||
options[:creds] = value;
|
options[:creds] = value;
|
||||||
end
|
end
|
||||||
|
|
||||||
opts.on('-a', '--action action', 'Action to run') do |value|
|
opts.on('-a', '--action action', 'Action to run') do |value|
|
||||||
options[:action] = value;
|
options[:action] = value;
|
||||||
end
|
end
|
||||||
|
|
||||||
opts.on('-e', '--end-point url', 'End point URL') do |value|
|
opts.on('-e', '--end-point url', 'End point URL') do |value|
|
||||||
options[:endpoint] = value;
|
options[:endpoint] = value;
|
||||||
end
|
end
|
||||||
|
|
||||||
opts.on('-t', '--timeout timeout', 'Timeout for opennebula connection') do |value|
|
opts.on('-t', '--timeout timeout', 'Timeout for opennebula connection') do |value|
|
||||||
options[:timeout] = value.to_i;
|
options[:timeout] = value.to_i;
|
||||||
end
|
end
|
||||||
|
|
||||||
opts.on('-w', '--wait', 'Wait for action ends') do |w|
|
opts.on('-w', '--wait timeout', 'Wait for action ends') do |value|
|
||||||
options[:wait] = w
|
options[:wait] = value.to_i
|
||||||
end
|
end
|
||||||
|
|
||||||
opts.on('-h', '--help', 'Displays Help') do
|
|
||||||
puts opts
|
|
||||||
exit
|
|
||||||
end
|
|
||||||
|
|
||||||
|
opts.on('-h', '--help', 'Displays Help') do
|
||||||
|
puts opts
|
||||||
|
exit
|
||||||
|
end
|
||||||
|
|
||||||
end
|
end
|
||||||
|
|
||||||
parser.parse!
|
parser.parse!
|
||||||
|
|
||||||
# OpenNebula credentials
|
# OpenNebula credentials
|
||||||
|
|
||||||
if not options[:creds]
|
if not options[:creds]
|
||||||
options[:creds] = "/var/lib/one/.one/one_auth"
|
options[:creds] = "/var/lib/one/.one/one_auth"
|
||||||
end
|
end
|
||||||
|
|
||||||
if not options[:action]
|
if not options[:action]
|
||||||
options[:action] = "status"
|
options[:action] = "status"
|
||||||
end
|
end
|
||||||
|
|
||||||
if not options[:endpoint]
|
if not options[:endpoint]
|
||||||
ip = CreoleGet('adresse_ip_eth0').chomp
|
ip = CreoleGet('adresse_ip_eth0').chomp
|
||||||
options[:endpoint] = "http://#{ip}:2633/RPC2"
|
options[:endpoint] = "http://#{ip}:2633/RPC2"
|
||||||
end
|
end
|
||||||
|
|
||||||
if not options[:timeout]
|
if not options[:timeout]
|
||||||
options[:timeout] = TIMEOUT
|
options[:timeout] = TIMEOUT
|
||||||
end
|
end
|
||||||
|
|
||||||
# Actions
|
if not options[:wait]
|
||||||
SUPPORTED = ['status', 'boot', 'resume', 'shutdown', 'suspend']
|
options[:wait] = MAXWAIT
|
||||||
|
|
||||||
|
|
||||||
if not SUPPORTED.include?(options[:action])
|
|
||||||
puts("Action : #{options[:action]}) is not supported")
|
|
||||||
exit(-1)
|
|
||||||
end
|
end
|
||||||
|
|
||||||
|
|
||||||
|
if not ACTIONS.include?(options[:action])
|
||||||
|
puts("Action : #{options[:action]}) is not supported")
|
||||||
|
exit(-1)
|
||||||
|
end
|
||||||
|
|
||||||
|
|
||||||
begin
|
begin
|
||||||
File.readlines(options[:creds]).each do |line|
|
File.readlines(options[:creds]).each do |line|
|
||||||
CREDENTIALS = line
|
CREDENTIALS = line
|
||||||
|
@ -175,54 +203,77 @@ rescue
|
||||||
exit(-1)
|
exit(-1)
|
||||||
end
|
end
|
||||||
|
|
||||||
|
|
||||||
|
exit_code = 0
|
||||||
begin
|
begin
|
||||||
client = Client.new(CREDENTIALS, options[:endpoint])
|
client = Client.new(CREDENTIALS, options[:endpoint])
|
||||||
|
|
||||||
vm_pool = VirtualMachinePool.new(client, USERFLAG)
|
vm_pool = VirtualMachinePool.new(client, USERFLAG)
|
||||||
|
|
||||||
if File.exist?(RUNVMFILE)
|
# Try to load vm pool infos from OpenNebula until timeout expires
|
||||||
running_vms = File.readlines(RUNVMFILE)
|
|
||||||
else
|
|
||||||
running_vms = []
|
|
||||||
end
|
|
||||||
|
|
||||||
rc = vm_pool.info
|
|
||||||
cnt = 0
|
|
||||||
while OpenNebula.is_error?(rc)
|
|
||||||
if cnt == options[:timeout]
|
|
||||||
puts rc.message
|
|
||||||
exit(-1)
|
|
||||||
end
|
|
||||||
rc = vm_pool.info
|
rc = vm_pool.info
|
||||||
sleep(1)
|
cnt = 0
|
||||||
cnt += 1
|
while OpenNebula.is_error?(rc)
|
||||||
end
|
if cnt == options[:timeout]
|
||||||
|
puts rc.message
|
||||||
vm_pool.each do |vm|
|
exit(-1)
|
||||||
case options[:action]
|
end
|
||||||
when "status"
|
rc = vm_pool.info
|
||||||
puts("#{vm.name}\t#{vm.status}")
|
sleep(1)
|
||||||
when "boot"
|
cnt += 1
|
||||||
puts("DEBUG #{vm.status}")
|
|
||||||
if vm.status == "unkn"
|
|
||||||
puts("Booting #{vm.name} ...")
|
|
||||||
vm.boot
|
|
||||||
end
|
|
||||||
when "suspend"
|
|
||||||
_do_suspend(vm, options[:wait])
|
|
||||||
when "resume"
|
|
||||||
if running_vms.include?("#{vm.id}\n")
|
|
||||||
_do_resume(vm, options[:wait], TRUE)
|
|
||||||
end
|
|
||||||
else
|
|
||||||
puts("#{vm.name}\t#{vm.status}")
|
|
||||||
end
|
end
|
||||||
end
|
|
||||||
if options[:action] == "resume"
|
if options[:action] == "resume"
|
||||||
File.truncate(RUNVMFILE, 0) if File.exists?(RUNVMFILE)
|
running_vms = dump_running_vms_file()
|
||||||
end
|
running_vms.each do |vmid|
|
||||||
|
vm = VirtualMachine.new_with_id(vmid, client)
|
||||||
|
vm.info
|
||||||
|
_do_resume(vm)
|
||||||
|
end
|
||||||
|
|
||||||
|
else
|
||||||
|
vm_pool.each do |vm|
|
||||||
|
case options[:action]
|
||||||
|
when "status"
|
||||||
|
puts "#{vm.name}\t#{vm.status}"
|
||||||
|
|
||||||
|
when "suspend"
|
||||||
|
_do_suspend(vm)
|
||||||
|
end
|
||||||
|
end
|
||||||
|
|
||||||
|
# Update list of suspended VMs
|
||||||
|
running_vms = dump_running_vms_file()
|
||||||
|
end
|
||||||
|
|
||||||
|
if options[:action] != 'status'
|
||||||
|
vms = []
|
||||||
|
running_vms.each do |vmid|
|
||||||
|
vm = VirtualMachine.new_with_id(vmid, client)
|
||||||
|
vms.push(vm)
|
||||||
|
end
|
||||||
|
exit_code = _do_wait(vms, options[:action], options[:wait])
|
||||||
|
end
|
||||||
|
|
||||||
|
if options[:action] == "resume"
|
||||||
|
if vms.empty?
|
||||||
|
File.truncate(RUNVMFILE, 0) if File.exists?(RUNVMFILE)
|
||||||
|
else
|
||||||
|
fd = File.open(RUNVMFILE,'w')
|
||||||
|
vms.each do |vm|
|
||||||
|
fd.write("#{vm.id}\n")
|
||||||
|
end
|
||||||
|
end
|
||||||
|
end
|
||||||
|
|
||||||
rescue Exception => e
|
rescue Exception => e
|
||||||
puts e.message
|
puts e.message
|
||||||
exit(-1)
|
puts e.backtrace
|
||||||
|
exit(-1)
|
||||||
end
|
end
|
||||||
exit 0
|
|
||||||
|
exit(exit_code)
|
||||||
|
|
||||||
|
# Local Variables:
|
||||||
|
# ruby-indent-level: 4
|
||||||
|
# End:
|
||||||
|
|
Loading…
Reference in New Issue