Skip to content
New issue

Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.

By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.

Already on GitHub? Sign in to your account

API, CLI: Add feature to rebalance (migrate-data) #290

Merged
merged 4 commits into from
Jan 17, 2023
Merged
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension


Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
6 changes: 3 additions & 3 deletions .github/workflows/on-pr-submit.yml
Original file line number Diff line number Diff line change
Expand Up @@ -37,8 +37,8 @@ jobs:
run: |
cd tests && ./build-container.sh
- name: Setup Test environment
run: binnacle -v tests/setup.t
run: binnacle -vv tests/setup.t
- name: Build and Install Storage manager to nodes/containers
run: VERSION="${{ github.ref_name }}" binnacle -v tests/install.t
run: VERSION="${{ github.ref_name }}" binnacle -vv tests/install.t
- name: Run all Tests
run: binnacle -v tests/all
run: binnacle -vv tests/all
2 changes: 0 additions & 2 deletions mgr/src/cmds/rebalance_process.cr
Original file line number Diff line number Diff line change
Expand Up @@ -147,9 +147,7 @@ class Rebalancer
all_dirs << rel_path
next
end

mnt_full_path = Path.new(@mount_dir, rel_path)

# Stat the file from the @backend_dir to check the size
begin
file_info = File.info(backend_full_path, follow_symlinks: false)
Expand Down
47 changes: 47 additions & 0 deletions mgr/src/cmds/rebalance_start_stop.cr
Original file line number Diff line number Diff line change
@@ -0,0 +1,47 @@
require "./helpers"

command "rebalance.start", "Start Rebalancing Kadalu Storage volume" do |parser, _|
parser.banner = "Usage: kadalu rebalance start POOL/VOLNAME [arguments]"
end

handler "rebalance.start" do |args|
begin
command_error "Pool/Volname is required" if args.pos_args.size == 0
args.pool_name, volume_name = pool_and_volume_name(args.pos_args.size > 0 ? args.pos_args[0] : "")
api_call(args, "Failed to start rebalancing of volume") do |client|
volume = client.pool(args.pool_name).volume(volume_name).rebalance_start

handle_json_output(volume, args)

puts "Rebalance of Volume #{volume.name} started"
end
rescue ex : InvalidVolumeRequest
STDERR.puts "Starting of volume rebalance failed"
STDERR.puts ex
exit 1
end
end

command "rebalance.stop", "Stop Rebalancing Kadalu Storage volume" do |parser, _|
parser.banner = "Usage: kadalu rebalance stop POOL/VOLNAME [arguments]"
end

handler "rebalance.stop" do |args|
begin
command_error "Pool/Volname is required" if args.pos_args.size == 0
args.pool_name, volume_name = pool_and_volume_name(args.pos_args.size > 0 ? args.pos_args[0] : "")
next unless (args.script_mode || yes("Are you sure you want to stop rebalancing of volume? [y/N]"))

api_call(args, "Failed to stop rebalancing of volume.") do |client|
volume = client.pool(args.pool_name).volume(volume_name).rebalance_stop

handle_json_output(volume, args)

puts "Rebalancing of Volume #{volume.name} stopped"
end
rescue ex : InvalidVolumeRequest
STDERR.puts "Stopping of volume rebalance failed"
STDERR.puts ex
exit 1
end
end
4 changes: 4 additions & 0 deletions mgr/src/cmds/volumes.cr
Original file line number Diff line number Diff line change
Expand Up @@ -336,6 +336,10 @@ handler "volume.expand" do |args|

puts "Volume #{req.name} expanded successfully"
puts "ID: #{volume.id}"

puts "Proceed to the rebalancing of volume #{req.name} by following the below steps."
puts "To start the rebalancing of volume: `kadalu volume rebalnce-start #{args.pool_name}/#{req.name}`"
puts "To force stop the rebalancing of volume: `kadalu volume rebalnce-stop #{args.pool_name}/#{req.name}`"
end
rescue ex : InvalidVolumeRequest
STDERR.puts "Volume expand failed"
Expand Down
15 changes: 5 additions & 10 deletions mgr/src/server/plugins/volume_expand.cr
Original file line number Diff line number Diff line change
Expand Up @@ -6,15 +6,6 @@ require "../datastore/*"
require "./ping"
require "./volume_utils.cr"

ACTION_MANAGE_SERVICES = "manage_services"

node_action ACTION_MANAGE_SERVICES do |data, _env|
services, volfiles, _ = VolumeRequestToNode.from_json(data)
save_volfiles(volfiles)
sighup_processes(services)
restart_shd_service_and_start_fix_layout_service(services)
end

put "/api/v1/pools/:pool_name/volumes" do |env|
pool_name = env.params.url["pool_name"]

Expand Down Expand Up @@ -151,6 +142,10 @@ put "/api/v1/pools/:pool_name/volumes" do |env|

existing_nodes = participating_nodes(pool_name, volume)

# Add only the first existing node for fix-layout service
services = add_fix_layout_service(services, pool.not_nil!.name, req.name, existing_nodes[0],
volume.not_nil!.distribute_groups[0].storage_units[0])

# Remove duplicated node objects to avoid multiple node_actions to same node.
all_unique_nodes = (existing_nodes + nodes).uniq(&.id)

Expand All @@ -163,7 +158,7 @@ put "/api/v1/pools/:pool_name/volumes" do |env|
ACTION_MANAGE_SERVICES,
pool_name,
all_unique_nodes,
{services, volfiles, rollback_volume}.to_json
{services, volfiles, rollback_volume, "start"}.to_json
)

api_exception(!resp.ok, node_errors("Failed to restart SHD/start fix-layout service", resp.node_responses).to_json)
Expand Down
73 changes: 73 additions & 0 deletions mgr/src/server/plugins/volume_rebalance_start_stop.cr
Original file line number Diff line number Diff line change
@@ -0,0 +1,73 @@
require "moana_types"

require "../conf"
require "./helpers"
require "../datastore/*"
require "./ping"
require "./volume_utils.cr"

def rebalance_start_stop(env, action)
pool_name = env.params.url["pool_name"]
volume_name = env.params.url["volume_name"]

forbidden_api_exception(!Datastore.maintainer?(env.user_id, pool_name, volume_name))

volume = Datastore.get_volume(pool_name, volume_name)
api_exception(volume.nil?, {"error": "Volume doesn't exists"}.to_json)
volume = volume.not_nil!
pool = volume.not_nil!.pool

nodes = participating_nodes(pool_name, volume)

# TODO: Add to missed_ops if a node is not reachable [Check if this is required, since node ping check is done]

# Validate if all the nodes are reachable.
resp = dispatch_action(ACTION_PING, pool_name, nodes, "")
api_exception(!resp.ok, node_errors("Not all participant nodes are reachable", resp.node_responses).to_json)

# Generate Services and Volfiles if Volume to be started
services, volfiles = services_and_volfiles(volume)

# Node list where migrate data process is to be run.
migrate_data_nodes = [] of String

# Add node of first storage_unit of every distribute group
volume.not_nil!.distribute_groups.each do |dist_grp|
services = add_migrate_data_service(services, pool.not_nil!.name, volume.not_nil!.name,
dist_grp.storage_units[0].node, dist_grp.storage_units[0])
migrate_data_nodes.push(dist_grp.storage_units[0].node.name)
end

resp = dispatch_action(
ACTION_MANAGE_SERVICES,
pool_name,
Datastore.get_nodes(pool_name, migrate_data_nodes.uniq),
{services, volfiles, volume, action}.to_json
)

api_exception(!resp.ok, node_errors("Failed to #{action} rebalancing of Volume", resp.node_responses).to_json)

# Save Services details
services.each do |node_id, svcs|
svcs.each do |svc|
if action == "start"
# Enable each Services
Datastore.enable_service(pool.id, node_id, svc)
else
# Disable each Services
Datastore.disable_service(pool.id, node_id, svc)
end
end
end

env.response.status_code = 200
volume.to_json
end

post "/api/v1/pools/:pool_name/volumes/:volume_name/rebalance_start" do |env|
rebalance_start_stop(env, "start")
end

post "/api/v1/pools/:pool_name/volumes/:volume_name/rebalance_stop" do |env|
rebalance_start_stop(env, "stop")
end
28 changes: 25 additions & 3 deletions mgr/src/server/plugins/volume_utils.cr
Original file line number Diff line number Diff line change
Expand Up @@ -14,11 +14,13 @@ TEST_XATTR_VALUE = "testvalue"
VOLUME_ID_XATTR_NAME = "trusted.glusterfs.volume-id"

alias VolumeRequestToNode = Tuple(Hash(String, Array(MoanaTypes::ServiceUnit)), Hash(String, Array(MoanaTypes::Volfile)), MoanaTypes::Volume)
alias VolumeRequestToNodeWithAction = Tuple(Hash(String, Array(MoanaTypes::ServiceUnit)), Hash(String, Array(MoanaTypes::Volfile)), MoanaTypes::Volume, String)
alias ServiceRequestToNode = Tuple(Hash(String, Array(MoanaTypes::ServiceUnit)))

ACTION_VALIDATE_VOLUME_CREATE = "validate_volume_create"
ACTION_VOLUME_CREATE = "volume_create"
ACTION_VOLUME_CREATE_STOPPED = "volume_create_stopped"
ACTION_MANAGE_SERVICES = "manage_services"

node_action ACTION_VALIDATE_VOLUME_CREATE do |data, _env|
req = MoanaTypes::Volume.from_json(data)
Expand All @@ -33,6 +35,13 @@ node_action ACTION_VOLUME_CREATE_STOPPED do |data, _env|
handle_volume_create(data, stopped: true)
end

node_action ACTION_MANAGE_SERVICES do |data, _env|
services, volfiles, _, action = VolumeRequestToNodeWithAction.from_json(data)
save_volfiles(volfiles)
sighup_processes(services)
restart_shd_service_and_manage_rebalance_services(services, action)
end

def volfile_get(name)
# TODO: Add logic to read from the Templates directory
case name
Expand Down Expand Up @@ -129,14 +138,20 @@ def validate_volume_create(req)
NodeResponse.new(true, "")
end

def restart_shd_service_and_start_fix_layout_service(services)
def restart_shd_service_and_manage_rebalance_services(services, action = "start")
unless services[GlobalConfig.local_node.id]?.nil?
services[GlobalConfig.local_node.id].each do |service|
svc = Service.from_json(service.to_json)
if svc.name == "shdservice"
svc.restart
elsif svc.name == "fixlayoutservice"
svc.start
elsif svc.name == "fixlayoutservice" || svc.name == "migratedataservice"
status_file_path = "/var/lib/kadalu/#{svc.id}.json"
FileUtils.rm(status_file_path) if File.exists?(status_file_path)
if action == "start"
svc.start
else
svc.stop
end
end
end
end
Expand Down Expand Up @@ -527,3 +542,10 @@ def add_fix_layout_service(services, pool_name, volume_name, node, storage_unit)

services
end

def add_migrate_data_service(services, pool_name, volume_name, node, storage_unit)
service = MigrateDataService.new(pool_name, volume_name, storage_unit)
services[node.id] << service.unit

services
end
4 changes: 2 additions & 2 deletions mgr/src/server/services/fix_layout.cr
Original file line number Diff line number Diff line change
Expand Up @@ -9,8 +9,8 @@ class FixLayoutService < Service
def initialize(pool_name, volume_name, storage_unit)
@create_pid_file = true

@path = Path[PROGRAM_NAME].expand.to_s
@id = "#{volume_name}-fix-layout"
@path = PROGRAM_NAME == "kadalu" ? PROGRAM_NAME : Path[PROGRAM_NAME].expand.to_s
@id = "rebalance-fix-layout-#{storage_unit.path.gsub("/", "%2F")}"
@pid_file = "/run/kadalu/#{@id}.pid"
@args = [
"_rebalance", "--fix-layout",
Expand Down
22 changes: 22 additions & 0 deletions mgr/src/server/services/migrate_data.cr
Original file line number Diff line number Diff line change
@@ -0,0 +1,22 @@
require "./services"

class MigrateDataService < Service
getter path : String,
args : Array(String),
pid_file : String,
id : String

def initialize(pool_name, volume_name, storage_unit)
@create_pid_file = true

@path = PROGRAM_NAME == "kadalu" ? PROGRAM_NAME : Path[PROGRAM_NAME].expand.to_s
@id = "rebalance-migrate-data-#{storage_unit.path.gsub("/", "%2F")}"
@pid_file = "/run/kadalu/#{@id}.pid"
@args = [
"_rebalance", "--migrate-data",
"#{pool_name}/#{volume_name}",
storage_unit.path, "--volfile-servers",
"#{storage_unit.node.name}:#{storage_unit.port}",
]
end
end
19 changes: 19 additions & 0 deletions sdk/crystal/src/volumes.cr
Original file line number Diff line number Diff line change
Expand Up @@ -202,5 +202,24 @@ module StorageManager
StorageManager.error_response(response)
end
end

def volume_rebalance_start_stop(action)
url = "#{@client.url}/api/v1/pools/#{@pool_name}/volumes/#{@name}/rebalance_#{action}"

response = StorageManager.http_post(url, "{}", headers: @client.auth_header)
if response.status_code == 200
MoanaTypes::Volume.from_json(response.body)
else
StorageManager.error_response(response)
end
end

def rebalance_start
volume_rebalance_start_stop("start")
end

def rebalance_stop
volume_rebalance_start_stop("stop")
end
end
end
Loading