Correction du lancement du service onenode

Le service onenode ne se lance pas a cause d'une
boucle dans les dépendances de services.

Pour régler le problème on le lance après la multi-user.target

De plus au moment ou le service se lance opennebula n'est pas
complètement lancé alors le script essaye d'ouvrir des connections
pendant 20 secondes avant de remonter un problème.

Enfin on garde une liste des machines qui sont "running" avant l'arrêt
du serveur pour pouvoir les relance proprement.

Contribution de Cadoles (htts://www.cadoles.com)

ref #20338 @6h
This commit is contained in:
Philippe Caseiro 2017-05-03 11:40:08 +02:00 committed by Daniel Dehennin
parent edc07ce939
commit e9ff4ad34c
2 changed files with 64 additions and 20 deletions

View File

@ -2,6 +2,7 @@
Description=OpenNebula Node starter Description=OpenNebula Node starter
After=opennebula.service opennebula-sunstone.service libvirt-bin.service After=opennebula.service opennebula-sunstone.service libvirt-bin.service
ConditionPathExists=/etc/eole/release ConditionPathExists=/etc/eole/release
After=multi-user.target
[Service] [Service]
Type=oneshot Type=oneshot
@ -10,8 +11,8 @@ Environment=ENDPOINT=http://127.0.0.1:2633/RPC2
TimeoutSec=1min TimeoutSec=1min
RemainAfterExit=yes RemainAfterExit=yes
Restart=no Restart=no
ExecStart=/usr/share/eole/sbin/onevm-all -w -c ${CREDS} -e ${ENDPOINT} -a "resume" ExecStart=/usr/share/eole/sbin/onevm-all -t 20 -w -c ${CREDS} -e ${ENDPOINT} -a "resume"
ExecStop=/usr/share/eole/sbin/onevm-all -w -c ${CREDS} -e ${ENDPOINT} -a "suspend" ExecStop=/usr/share/eole/sbin/onevm-all -t 20 -w -c ${CREDS} -e ${ENDPOINT} -a "suspend"
[Install] [Install]
WantedBy=multi-user.target WantedBy=multi-user.target

View File

@ -5,6 +5,11 @@
############################################################################## ##############################################################################
ONE_LOCATION=ENV["ONE_LOCATION"] ONE_LOCATION=ENV["ONE_LOCATION"]
USER=ENV["user"] USER=ENV["user"]
RUNVMFILE="/var/lib/one/running.bck"
TIMEOUT=20
# oneadmin user flag value
USERFLAG=-2
if !ONE_LOCATION if !ONE_LOCATION
RUBY_LIB_LOCATION="/usr/lib/one/ruby" RUBY_LIB_LOCATION="/usr/lib/one/ruby"
@ -55,13 +60,16 @@ end
# AIM: Suspend a virtual machine # AIM: Suspend a virtual machine
# #
def _do_suspend(vm, wait) def _do_suspend(vm, wait)
fd = File.open(RUNVMFILE,'a')
if vm.status == "runn" if vm.status == "runn"
puts("Suspending #{vm.name} ...") puts("Suspending #{vm.name} ...")
fd.write("#{vm.id}\n")
vm.suspend vm.suspend
if wait if wait
_wait(vm, "susp") _wait(vm, "susp")
end end
end end
fd.close
end end
# #
@ -69,28 +77,34 @@ end
# PARAM: OpenNebula::VirtualMachine object # PARAM: OpenNebula::VirtualMachine object
# AIM: Resum a suspended virtual machines # AIM: Resum a suspended virtual machines
# #
def _do_resume(vm, wait) def _do_resume(vm, wait, force=FALSE)
if vm.status == "susp" if force
puts("Resume on #{vm.name}")
vm.resume
# elsif vm.status == 'save'
# puts("Recover on #{vm.name}")
# # Try to recover VM with retry action
# vm.recover(2)
# vm.resume
elsif vm.status == 'unkn'
puts("Resume on #{vm.name}")
vm.resume vm.resume
else else
return -1 if vm.status == "susp"
puts("Resume on #{vm.name}")
vm.resume
# elsif vm.status == 'save'
# puts("Recover on #{vm.name}")
# # Try to recover VM with retry action
# vm.recover(2)
# vm.resume
elsif vm.status == 'unkn'
puts("Resume on #{vm.name}")
vm.resume
else
return -1
end
end end
if wait if wait
_wait(vm, "runn") _wait(vm, "runn")
end end
end end
options = {:creds => nil, :action => nil, :endpoint => nil} options = {:creds => nil, :action => nil, :endpoint => nil,
:timeout => nil}
parser = OptionParser.new do|opts| parser = OptionParser.new do|opts|
opts.banner = "Usage: #{File.basename(__FILE__)} [options]" opts.banner = "Usage: #{File.basename(__FILE__)} [options]"
@ -106,6 +120,10 @@ parser = OptionParser.new do|opts|
options[:endpoint] = value; options[:endpoint] = value;
end end
opts.on('-t', '--timeout timeout', 'Timeout for opennebula connection') do |value|
options[:timeout] = value.to_i;
end
opts.on('-w', '--wait', 'Wait for action ends') do |w| opts.on('-w', '--wait', 'Wait for action ends') do |w|
options[:wait] = w options[:wait] = w
end end
@ -114,6 +132,8 @@ parser = OptionParser.new do|opts|
puts opts puts opts
exit exit
end end
end end
parser.parse! parser.parse!
@ -133,6 +153,10 @@ if not options[:endpoint]
options[:endpoint] = "http://#{ip}:2633/RPC2" options[:endpoint] = "http://#{ip}:2633/RPC2"
end end
if not options[:timeout]
options[:timeout] = TIMEOUT
end
# Actions # Actions
SUPPORTED = ['status', 'boot', 'resume', 'shutdown', 'suspend'] SUPPORTED = ['status', 'boot', 'resume', 'shutdown', 'suspend']
@ -154,12 +178,24 @@ end
begin begin
client = Client.new(CREDENTIALS, options[:endpoint]) client = Client.new(CREDENTIALS, options[:endpoint])
vm_pool = VirtualMachinePool.new(client, -1) vm_pool = VirtualMachinePool.new(client, USERFLAG)
if File.exist?(RUNVMFILE)
running_vms = File.open(RUNVMFILE,'r')
else
running_vms = []
end
rc = vm_pool.info rc = vm_pool.info
if OpenNebula.is_error?(rc) cnt = 0
puts rc.message while OpenNebula.is_error?(rc)
exit(-1) if cnt == options[:timeout]
puts rc.message
exit(-1)
end
rc = vm_pool.info
sleep(1)
cnt += 1
end end
vm_pool.each do |vm| vm_pool.each do |vm|
@ -175,11 +211,18 @@ begin
when "suspend" when "suspend"
_do_suspend(vm, options[:wait]) _do_suspend(vm, options[:wait])
when "resume" when "resume"
_do_resume(vm, options[:wait]) force = FALSE
if running_vms.include?('vm.id')
force = TRUE
end
_do_resume(vm, options[:wait], force)
else else
puts("#{vm.name}\t#{vm.status}") puts("#{vm.name}\t#{vm.status}")
end end
end end
if options[:action] == "resume"
File.truncate(RUNVMFILE, 0)
end
rescue Exception => e rescue Exception => e
puts e.message puts e.message
exit(-1) exit(-1)