Skip to content

Commit

Permalink
Download & use versioned images
Browse files Browse the repository at this point in the history
In #1524 we changed rhizome to be able to download versioned boot
images. In this change, we actually use that infrastructure.

To download a versioned boot image, one can do:

```
> vmh.download_boot_image("ubuntu-jammy", version: "20240319")
```

After this has been done, all VMs on that host which want to use the
`ubuntu-jammy` image will use the latest boot image.

This allows us to be able to download new image versions without
impacting previous VMs at all. Previous VMs will continue to use the
older images.

As follow up items:
* VmHost setup will include downloading some default images (e.g.
 `ubuntu-jammy`) with explicit. vm_setup won't download the images
  automatically.
* We will enforce to use versioned images always
* Add a program to remove unused images
  • Loading branch information
pykello committed May 9, 2024
1 parent 830c023 commit f78b995
Show file tree
Hide file tree
Showing 11 changed files with 190 additions and 61 deletions.
14 changes: 14 additions & 0 deletions model/boot_image.rb
Original file line number Diff line number Diff line change
@@ -0,0 +1,14 @@
# frozen_string_literal: true

require_relative "../model"

class BootImage < Sequel::Model
many_to_one :vm_host, key: :vm_host_id, class: :VmHost
one_to_many :vm_storage_volumes, key: :boot_image_id, class: :VmStorageVolume

include ResourceMethods

def self.ubid_type
UBID::TYPE_ETC
end
end
8 changes: 4 additions & 4 deletions model/vm_host.rb
Original file line number Diff line number Diff line change
Expand Up @@ -13,8 +13,9 @@ class VmHost < Sequel::Model
one_to_many :spdk_installations, key: :vm_host_id
one_to_many :storage_devices, key: :vm_host_id
one_to_many :pci_devices, key: :vm_host_id
one_to_many :boot_images, key: :vm_host_id

plugin :association_dependencies, assigned_host_addresses: :destroy, assigned_subnets: :destroy, hetzner_host: :destroy, spdk_installations: :destroy, storage_devices: :destroy
plugin :association_dependencies, assigned_host_addresses: :destroy, assigned_subnets: :destroy, hetzner_host: :destroy, spdk_installations: :destroy, storage_devices: :destroy, boot_images: :destroy

include ResourceMethods
include SemaphoreMethods
Expand Down Expand Up @@ -183,9 +184,8 @@ def install_rhizome(install_specs: false)
end

# Introduced for downloading a new boot image via REPL.
# Use with caution as the vm_host will not accept a new vm during the image download.
def download_boot_image(image_name, custom_url = nil)
Strand.create_with_id(schedule: Time.now, prog: "DownloadBootImage", label: "start", stack: [{subject_id: id, image_name: image_name, custom_url: custom_url}])
def download_boot_image(image_name, version:, custom_url: nil)
Strand.create_with_id(schedule: Time.now, prog: "DownloadBootImage", label: "start", stack: [{subject_id: id, image_name: image_name, custom_url: custom_url, version: version}])
end

def hetznerify(server_id)
Expand Down
1 change: 1 addition & 0 deletions model/vm_storage_volume.rb
Original file line number Diff line number Diff line change
Expand Up @@ -8,6 +8,7 @@ class VmStorageVolume < Sequel::Model
many_to_one :storage_device
many_to_one :key_encryption_key_1, class: :StorageKeyEncryptionKey
many_to_one :key_encryption_key_2, class: :StorageKeyEncryptionKey
many_to_one :boot_image

plugin :association_dependencies, key_encryption_key_1: :destroy, key_encryption_key_2: :destroy

Expand Down
75 changes: 59 additions & 16 deletions prog/download_boot_image.rb
Original file line number Diff line number Diff line change
@@ -1,14 +1,42 @@
# frozen_string_literal: true

require "json"

class Prog::DownloadBootImage < Prog::Base
subject_is :sshable, :vm_host

def image_name
@image_name ||= frame.fetch("image_name")
end

def custom_url
@custom_url ||= frame["custom_url"]
def version
@version ||= frame.fetch("version")
end

def url
# YYY: Should we get ubuntu & almalinux urls here? Since we might start
# putting all images into the blob storage in future, we're postponing the
# decision and keeping the current logic (i.e. formula based URL in the
# rhizome side).
@url ||=
if frame["custom_url"]
frame["custom_url"]
elsif image_name.start_with?("github", "postgres")
blob_storage_client.get_presigned_url("GET", Config.ubicloud_images_bucket_name, "#{image_name}-#{vm_host.arch}.raw", 60 * 60).to_s
end
end

def sha256_sum
hashes = {
["ubuntu-jammy", "x64", "20240319"] => "304983616fcba6ee1452e9f38993d7d3b8a90e1eb65fb0054d672ce23294d812",
["ubuntu-jammy", "arm64", "20240319"] => "40ea1181447b9395fa03f6f2c405482fe532a348cc46fbb876effcfbbb35336f",
["almalinux-9.3", "x64", "20231113"] => "6bbd060c971fd827a544c7e5e991a7d9e44460a449d2d058a0bb1290dec5a114",
["almalinux-9.3", "arm64", "20231113"] => "a064715bc755346d5a8e1a4c6b1b7abffe4de03f1b0584942d5483ed32aafd67"
}

# YYY: In future all images should be checked for sha256 sum, so the nil
# default will be removed.
hashes.fetch([image_name, vm_host.arch, version], nil)
end

def blob_storage_client
Expand All @@ -21,39 +49,54 @@ def blob_storage_client
end

label def start
vm_host.update(allocation_state: "draining")
hop_wait_draining
end

label def wait_draining
nap 15 unless vm_host.vms_dataset.where(boot_image: image_name).empty?

sshable.cmd("sudo rm -f /var/storage/images/#{image_name.shellescape}.raw")
fail "Image already exists on host" if vm_host.boot_images_dataset.where(name: image_name, version: version).count > 0
BootImage.create_with_id(
vm_host_id: vm_host.id,
name: image_name,
version: version,
activated_at: nil
)
hop_download
end

label def download
case sshable.cmd("common/bin/daemonizer --check download_#{image_name.shellescape}")
q_daemon_name = "download_#{image_name}_#{version}".shellescape
case sshable.cmd("common/bin/daemonizer --check #{q_daemon_name}")
when "Succeeded"
sshable.cmd("common/bin/daemonizer --clean download_#{image_name.shellescape}")
sshable.cmd("common/bin/daemonizer --clean #{q_daemon_name}")
hop_update_available_storage_space
when "NotStarted"
url = custom_url || blob_storage_client.get_presigned_url("GET", Config.ubicloud_images_bucket_name, "#{image_name}-#{vm_host.arch}.raw", 60 * 60).to_s
sshable.cmd("common/bin/daemonizer 'host/bin/download-boot-image #{image_name.shellescape} #{url.shellescape}' #{("download_" + image_name).shellescape}", stdin: Config.ubicloud_images_blob_storage_certs)
params_json = {
image_name: image_name,
url: url,
version: version,
sha256sum: sha256_sum
}.to_json
sshable.cmd("common/bin/daemonizer 'host/bin/download-boot-image #{params_json.shellescape}' #{q_daemon_name}", stdin: Config.ubicloud_images_blob_storage_certs)
when "Failed"
BootImage.where(vm_host_id: vm_host.id, name: image_name, version: version).destroy
fail "Failed to download '#{image_name}' image on #{vm_host}"
end

nap 15
end

label def update_available_storage_space
image_path = "/var/storage/images/#{image_name}.raw"
image_path = "/var/storage/images/#{image_name}-#{version}.raw"
image_size_bytes = sshable.cmd("stat -c %s #{image_path}").to_i
image_size_gib = (image_size_bytes / 1024.0**3).ceil
StorageDevice.where(vm_host_id: vm_host.id, name: "DEFAULT").update(
available_storage_gib: Sequel[:available_storage_gib] - image_size_gib
)
pop "#{image_name} downloaded"
hop_activate_boot_image
end

label def activate_boot_image
BootImage.where(
vm_host_id: vm_host.id,
name: image_name,
version: version
).update(activated_at: Time.now)
pop "image=#{image_name} version=#{version} downloaded"
end
end
1 change: 1 addition & 0 deletions prog/vm/nexus.rb
Original file line number Diff line number Diff line change
Expand Up @@ -145,6 +145,7 @@ def storage_volumes
{
"boot" => s.boot,
"image" => s.boot ? vm.boot_image : nil,
"image_version" => s.boot_image&.version,
"size_gib" => s.size_gib,
"device_id" => s.device_id,
"disk_index" => s.disk_index,
Expand Down
25 changes: 19 additions & 6 deletions rhizome/host/bin/download-boot-image
Original file line number Diff line number Diff line change
@@ -1,18 +1,31 @@
#!/bin/env ruby
# frozen_string_literal: true

unless (boot_image = ARGV.shift)
puts "need boot_image as argument"
require "json"
require_relative "../../common/lib/util"
require_relative "../lib/boot_image"

unless (params = ARGV.shift)
puts "need params as argument"
exit 1
end

custom_url = ARGV.shift
params_json = JSON.parse(params)

require_relative "../../common/lib/util"
require_relative "../lib/boot_image"
unless (boot_image = params_json["image_name"])
puts "need boot_image in params"
exit 1
end

# YYY: version will be mandatory in the future
version = params_json["version"]
url = params_json["url"]
sha256sum = params_json["sha256sum"]

certs = $stdin.read
ca_path = "/usr/lib/ssl/certs/ubicloud_images_blob_storage_certs.crt"
safe_write_to_file(ca_path, certs)

BootImage.new(boot_image, nil).download(url: custom_url, ca_path: ca_path)
BootImage.new(boot_image, version).download(
url: url, ca_path: ca_path, sha256sum: sha256sum
)
10 changes: 10 additions & 0 deletions scheduling/allocator.rb
Original file line number Diff line number Diff line change
Expand Up @@ -223,6 +223,15 @@ def self.allocate_spdk_installation(spdk_installations)

private

def allocate_boot_image(vm_host, boot_image_name)
boot_image = BootImage.where(
vm_host_id: vm_host.id,
name: boot_image_name
).exclude(activated_at: nil).order_by(Sequel.desc(:version)).first

boot_image&.id
end

def map_volumes_to_devices
return false if @candidate_host[:available_storage_gib] < @request.storage_gib
@storage_device_allocations = @candidate_host[:storage_devices].map { StorageDeviceAllocation.new(_1["id"], _1["available_storage_gib"]) }
Expand Down Expand Up @@ -260,6 +269,7 @@ def create_storage_volumes(vm, vm_host)
boot: volume["boot"],
size_gib: volume["size_gib"],
use_bdev_ubi: SpdkInstallation[spdk_installation_id].supports_bdev_ubi? && volume["boot"],
boot_image_id: volume["boot"] ? allocate_boot_image(vm_host, vm.boot_image) : nil,
skip_sync: volume["skip_sync"],
disk_index: disk_index,
key_encryption_key_1_id: key_encryption_key&.id,
Expand Down
4 changes: 2 additions & 2 deletions spec/model/vm_host_spec.rb
Original file line number Diff line number Diff line change
Expand Up @@ -77,9 +77,9 @@
vh.id = "46683a25-acb1-4371-afe9-d39f303e44b4"
expect(Strand).to receive(:create) do |args|
expect(args[:prog]).to eq("DownloadBootImage")
expect(args[:stack]).to eq([subject_id: vh.id, image_name: "my-image", custom_url: "https://example.com/my-image.raw"])
expect(args[:stack]).to eq([subject_id: vh.id, image_name: "my-image", custom_url: "https://example.com/my-image.raw", version: "20230303"])
end
vh.download_boot_image("my-image", "https://example.com/my-image.raw")
vh.download_boot_image("my-image", custom_url: "https://example.com/my-image.raw", version: "20230303")
end

it "assigned_subnets returns the assigned subnets" do
Expand Down
85 changes: 55 additions & 30 deletions spec/prog/download_boot_image_spec.rb
Original file line number Diff line number Diff line change
Expand Up @@ -3,7 +3,7 @@
require_relative "../model/spec_helper"

RSpec.describe Prog::DownloadBootImage do
subject(:dbi) { described_class.new(Strand.new(stack: [{"image_name" => "my-image", "custom_url" => "https://example.com/my-image.raw"}])) }
subject(:dbi) { described_class.new(Strand.new(stack: [{"image_name" => "my-image", "custom_url" => "https://example.com/my-image.raw", "version" => "20230303"}])) }

let(:sshable) { Sshable.create_with_id }
let(:vm_host) { VmHost.create(location: "hetzner-hel1") { _1.id = sshable.id } }
Expand All @@ -13,57 +13,71 @@
end

describe "#start" do
it "drains vm host and hops" do
expect {
expect { dbi.start }.to hop("wait_draining")
}.to change { vm_host.reload.allocation_state }.from("unprepared").to("draining")
end
end

describe "#wait_draining" do
it "waits draining" do
dataset = instance_double(Sequel::Dataset)
expect(vm_host).to receive(:vms_dataset).and_return(dataset)
expect(dataset).to receive(:where).with(boot_image: "my-image").and_return([instance_double(Vm)])
expect { dbi.wait_draining }.to nap(15)
it "creates database record and hops" do
expect { dbi.start }.to hop("download")
expect(BootImage.where(vm_host_id: vm_host.id, name: "my-image", version: "20230303").count).to eq(1)
end

it "hops if it's drained" do
expect(vm_host).to receive(:vms_dataset).and_return(instance_double(Sequel::Dataset, where: []))
expect(sshable).to receive(:cmd).with("sudo rm -f /var/storage/images/my-image.raw")
expect { dbi.wait_draining }.to hop("download")
it "fails if image already exists" do
BootImage.create(vm_host_id: vm_host.id, name: "my-image", version: "20230303") { _1.id = vm_host.id }
expect { dbi.start }.to raise_error RuntimeError, "Image already exists on host"
end
end

describe "#download" do
it "starts to download image if it's not started yet" do
expect(sshable).to receive(:cmd).with("common/bin/daemonizer --check download_my-image").and_return("NotStarted")
expect(sshable).to receive(:cmd).with("common/bin/daemonizer 'host/bin/download-boot-image my-image https://example.com/my-image.raw' download_my-image", stdin: nil)
params_json = {
"image_name" => "my-image",
"url" => "https://example.com/my-image.raw",
"version" => "20230303",
"sha256sum" => nil
}.to_json
expect(sshable).to receive(:cmd).with("common/bin/daemonizer --check download_my-image_20230303").and_return("NotStarted")
expect(sshable).to receive(:cmd).with("common/bin/daemonizer 'host/bin/download-boot-image #{params_json.shellescape}' download_my-image_20230303", stdin: nil)
expect { dbi.download }.to nap(15)
end

it "generates presigned URL if a custom_url not provided" do
expect(dbi).to receive(:frame).and_return({"image_name" => "my-image"}).at_least(:once)
it "generates presigned URL for github-runners images if a custom_url not provided" do
params_json = {
"image_name" => "github-runners-image",
"url" => "https://minio.example.com/my-image.raw",
"version" => "20230303",
"sha256sum" => nil
}.to_json
expect(dbi).to receive(:frame).and_return({"image_name" => "github-runners-image", "version" => "20230303"}).at_least(:once)
expect(Minio::Client).to receive(:new).and_return(instance_double(Minio::Client, get_presigned_url: "https://minio.example.com/my-image.raw"))
expect(Config).to receive(:ubicloud_images_blob_storage_certs).and_return("certs").at_least(:once)
expect(sshable).to receive(:cmd).with("common/bin/daemonizer --check download_my-image").and_return("NotStarted")
expect(sshable).to receive(:cmd).with("common/bin/daemonizer 'host/bin/download-boot-image my-image https://minio.example.com/my-image.raw' download_my-image", stdin: "certs")
expect(sshable).to receive(:cmd).with("common/bin/daemonizer --check download_github-runners-image_20230303").and_return("NotStarted")
expect(sshable).to receive(:cmd).with("common/bin/daemonizer 'host/bin/download-boot-image #{params_json.shellescape}' download_github-runners-image_20230303", stdin: "certs")
expect { dbi.download }.to nap(15)
end

it "doesn't send a url for non-github-runners images by default" do
params_json = {
"image_name" => "my-image",
"url" => nil,
"version" => "20230303",
"sha256sum" => nil
}.to_json
expect(dbi).to receive(:frame).and_return({"image_name" => "my-image", "version" => "20230303"}).at_least(:once)
expect(sshable).to receive(:cmd).with("common/bin/daemonizer --check download_my-image_20230303").and_return("NotStarted")
expect(sshable).to receive(:cmd).with("common/bin/daemonizer 'host/bin/download-boot-image #{params_json.shellescape}' download_my-image_20230303", stdin: nil)
expect { dbi.download }.to nap(15)
end

it "waits manual intervation if it's failed" do
expect(sshable).to receive(:cmd).with("common/bin/daemonizer --check download_my-image").and_return("Failed")
expect(sshable).to receive(:cmd).with("common/bin/daemonizer --check download_my-image_20230303").and_return("Failed")
expect { dbi.download }.to raise_error RuntimeError, "Failed to download 'my-image' image on VmHost[#{vm_host.ubid}]"
end

it "waits for the download to complete" do
expect(sshable).to receive(:cmd).with("common/bin/daemonizer --check download_my-image").and_return("InProgess")
expect(sshable).to receive(:cmd).with("common/bin/daemonizer --check download_my-image_20230303").and_return("InProgess")
expect { dbi.download }.to nap(15)
end

it "hops if it's succeeded" do
expect(sshable).to receive(:cmd).with("common/bin/daemonizer --check download_my-image").and_return("Succeeded")
expect(sshable).to receive(:cmd).with("common/bin/daemonizer --clean download_my-image")
expect(sshable).to receive(:cmd).with("common/bin/daemonizer --check download_my-image_20230303").and_return("Succeeded")
expect(sshable).to receive(:cmd).with("common/bin/daemonizer --clean download_my-image_20230303")
expect { dbi.download }.to hop("update_available_storage_space")
end
end
Expand All @@ -77,9 +91,20 @@
available_storage_gib: 35,
enabled: true
)
expect(sshable).to receive(:cmd).with("stat -c %s /var/storage/images/my-image.raw").and_return("2361393152")
expect { dbi.update_available_storage_space }.to exit({"msg" => "my-image downloaded"})
expect(sshable).to receive(:cmd).with("stat -c %s /var/storage/images/my-image-20230303.raw").and_return("2361393152")
expect { dbi.update_available_storage_space }.to hop("activate_boot_image")
expect(sd.reload.available_storage_gib).to eq(32)
end
end

describe "#activate_boot_image" do
it "activates the boot image" do
dataset = instance_double(Sequel::Dataset)
expect(BootImage).to receive(:where).with(vm_host_id: vm_host.id, name: "my-image", version: "20230303").and_return(dataset)
expect(dataset).to receive(:update) do |args|
expect(args[:activated_at]).to be <= Time.now
end
expect { dbi.activate_boot_image }.to exit({"msg" => "image=my-image version=20230303 downloaded"})
end
end
end

0 comments on commit f78b995

Please sign in to comment.