ubicloud · fdr · Apr 23, 2024 · furkansahin · Apr 23, 2024 · fdr
diff --git a/model/vm.rb b/model/vm.rb
@@ -22,6 +22,7 @@ class Vm < Sequel::Model
   include SemaphoreMethods
   include HealthMonitorMethods
   semaphore :destroy, :start_after_host_reboot, :prevent_destroy, :update_firewall_rules, :checkup
+  semaphore :suspend, :unsuspend
 
   include Authorization::HyperTagMethods
 

diff --git a/prog/vm/nexus.rb b/prog/vm/nexus.rb
@@ -9,6 +9,7 @@
 class Prog::Vm::Nexus < Prog::Base
   subject_is :vm
   semaphore :destroy, :start_after_host_reboot, :prevent_destroy, :update_firewall_rules, :checkup
+  semaphore :suspend, :unsuspend
 
   def self.assemble(public_key, project_id, name: nil, size: "standard-2",
     unix_user: "ubi", location: "hetzner-hel1", boot_image: "ubuntu-jammy",
@@ -489,6 +490,11 @@ def before_run
   end
 
   label def wait
+    when_suspend_set? do
+      register_deadline(:suspended, 5 * 60)
+      hop_suspending
+    end
+
     when_start_after_host_reboot_set? do
       register_deadline(:wait, 5 * 60)
       hop_start_after_host_reboot
@@ -545,6 +551,44 @@ def before_run
     nap 30
   end
 
+  label def suspending
+    # :nocov:
+    begin
+      host.sshable.cmd("sudo systemctl stop #{q_vm}")
+    rescue Sshable::SshError => ex
+      raise unless /Failed to stop .* Unit .* not loaded\./.match?(ex.stderr)
+    end
+
+    begin
+      host.sshable.cmd("sudo systemctl stop #{q_vm}-dnsmasq")
+    rescue Sshable::SshError => ex
+      raise unless /Failed to stop .* Unit .* not loaded\./.match?(ex.stderr)
+    end
+    # :nocov:
+
+    VmHost.dataset.where(id: vm.vm_host_id).update(
+      used_cores: Sequel[:used_cores] - vm.cores,
+      used_hugepages_1g: Sequel[:used_hugepages_1g] - vm.mem_gib
+    )
+
+    decr_suspend
+    hop_suspended
+  end
+
+  label def suspended
+    when_unsuspend_set? do
+      host.sshable.cmd("sudo systemctl start #{q_vm} #{q_vm}-dnsmasq")
+      VmHost.dataset.where(id: vm.vm_host_id).update(
+        used_cores: Sequel[:used_cores] + vm.cores,
+        used_hugepages_1g: Sequel[:used_hugepages_1g] + vm.mem_gib
+      )
+      decr_unsuspend
+      hop_wait
+    end
+
+    nap 2**35
+  end
+
   label def destroy
     decr_destroy
 

diff --git a/spec/prog/vm/nexus_spec.rb b/spec/prog/vm/nexus_spec.rb
@@ -781,11 +781,49 @@ def new_host(**args)
     end
   end
 
+  context "with vmhost with allocated resources" do
+    create_host = ->(vm, mod) {
+      sshable = Sshable.create { _1.id = VmHost.generate_uuid }
+      VmHost.create(location: "xyz",
+        used_cores: 9,
+        used_hugepages_1g: 10,
+        total_hugepages_1g: 18) { _1.id = sshable.id }.tap {
+        vm.update(vm_host_id: _1.id, unix_user: "test_unix_user", public_key: "test_public_key", boot_image: "test-boot-image")
+      }
+    }
+
+    describe "#suspending" do
+      it "deducts the core and memory count" do
+        vmh = create_host.call(vm, 0)
+        expect(nx.host.sshable).to receive(:cmd).at_least(:once)
+        expect { nx.suspending }.to hop("suspended").and change { vmh.reload.used_cores }.from(9).to(8).and change { vmh.reload.used_hugepages_1g }.from(10).to(2)
+      end
+    end
+
+    describe "#suspended" do
+      it "naps when there's nothing to do" do
+        expect { nx.suspended }.to nap
+      end
+
+      it "unsuspending reallocates the resources" do
+        vmh = create_host.call(vm, -1)
+        expect(nx).to receive(:when_unsuspend_set?).and_yield
+        expect(nx.host.sshable).to receive(:cmd).at_least(:once)
+        expect { nx.suspended }.to hop("wait").and change { vmh.reload.used_cores }.from(9).to(10).and change { vmh.reload.used_hugepages_1g }.from(10).to(18)
+      end
+    end
+  end
+
   describe "#wait" do
     it "naps when nothing to do" do
       expect { nx.wait }.to nap(30)
     end
 
+    it "hops to suspending when needed" do
+      expect(nx).to receive(:when_suspend_set?).and_yield
+      expect { nx.wait }.to hop("suspending")
+    end
+
     it "hops to start_after_host_reboot when needed" do
       expect(nx).to receive(:when_start_after_host_reboot_set?).and_yield
       expect { nx.wait }.to hop("start_after_host_reboot")