-
Notifications
You must be signed in to change notification settings - Fork 122
New issue
Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.
By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.
Already on GitHub? Sign in to your account
support draining multiple node #469
base: master
Are you sure you want to change the base?
Changes from all commits
5cd6d5a
4e8f556
5811f05
4abdd9c
d449bc4
05726d3
96a94ec
96f5a4c
319ea71
File filter
Filter by extension
Conversations
Jump to
Diff view
Diff view
There are no files selected for viewing
Original file line number | Diff line number | Diff line change |
---|---|---|
|
@@ -49,16 +49,16 @@ func PrepareNodeDrain(experimentsDetails *experimentTypes.ExperimentDetails, cli | |
common.WaitForDuration(experimentsDetails.RampTime) | ||
} | ||
|
||
if experimentsDetails.TargetNode == "" { | ||
if experimentsDetails.TargetNodes == "" { | ||
//Select node for kubelet-service-kill | ||
experimentsDetails.TargetNode, err = common.GetNodeName(experimentsDetails.AppNS, experimentsDetails.AppLabel, experimentsDetails.NodeLabel, clients) | ||
experimentsDetails.TargetNodes, err = common.GetNodeName(experimentsDetails.AppNS, experimentsDetails.AppLabel, experimentsDetails.NodeLabel, clients) | ||
if err != nil { | ||
return err | ||
} | ||
} | ||
|
||
if experimentsDetails.EngineName != "" { | ||
msg := "Injecting " + experimentsDetails.ExperimentName + " chaos on " + experimentsDetails.TargetNode + " node" | ||
msg := "Injecting " + experimentsDetails.ExperimentName + " chaos on " + experimentsDetails.TargetNodes + " node" | ||
types.SetEngineEventAttributes(eventsDetails, types.ChaosInject, msg, "Normal", chaosDetails) | ||
events.GenerateEvents(eventsDetails, clients, chaosDetails, "ChaosEngine") | ||
} | ||
|
@@ -114,45 +114,65 @@ func PrepareNodeDrain(experimentsDetails *experimentTypes.ExperimentDetails, cli | |
// drainNode drain the application node | ||
func drainNode(experimentsDetails *experimentTypes.ExperimentDetails, clients clients.ClientSets, chaosDetails *types.ChaosDetails) error { | ||
|
||
select { | ||
case <-inject: | ||
// stopping the chaos execution, if abort signal received | ||
os.Exit(0) | ||
default: | ||
log.Infof("[Inject]: Draining the %v node", experimentsDetails.TargetNode) | ||
targetNodes := strings.Split(experimentsDetails.TargetNodes, ",") | ||
if len(targetNodes) == 0 { | ||
return errors.Errorf("No target nodes provided, expected the comma-separated names of one or more nodes") | ||
} | ||
|
||
command := exec.Command("kubectl", "drain", experimentsDetails.TargetNode, "--ignore-daemonsets", "--delete-local-data", "--force", "--timeout", strconv.Itoa(experimentsDetails.ChaosDuration)+"s") | ||
var out, stderr bytes.Buffer | ||
command.Stdout = &out | ||
command.Stderr = &stderr | ||
if err := command.Run(); err != nil { | ||
log.Infof("Error String: %v", stderr.String()) | ||
return errors.Errorf("Unable to drain the %v node, err: %v", experimentsDetails.TargetNode, err) | ||
} | ||
log.Infof("Target nodes list: %v", targetNodes) | ||
for _, targetNode := range targetNodes { | ||
There was a problem hiding this comment. Choose a reason for hiding this commentThe reason will be displayed to describe this comment to others. Learn more. Can we also handle the panic case when the length of the list is zero and give an error message to provide the target node name. There was a problem hiding this comment. Choose a reason for hiding this commentThe reason will be displayed to describe this comment to others. Learn more. When we run chaos for more nodes we could perform the pre and post chaos check for all the target nodes. There was a problem hiding this comment. Choose a reason for hiding this commentThe reason will be displayed to describe this comment to others. Learn more. We can also change |
||
|
||
common.SetTargets(experimentsDetails.TargetNode, "injected", "node", chaosDetails) | ||
select { | ||
case <-inject: | ||
// stopping the chaos execution, if abort signal received | ||
os.Exit(0) | ||
default: | ||
log.Infof("[Inject]: Draining the %v node", targetNode) | ||
|
||
command := exec.Command("kubectl", "drain", targetNode, "--ignore-daemonsets", "--delete-emptydir-data", "--force", "--timeout", strconv.Itoa(experimentsDetails.ChaosDuration)+"s") | ||
var out, stderr bytes.Buffer | ||
command.Stdout = &out | ||
command.Stderr = &stderr | ||
if err := command.Run(); err != nil { | ||
log.Infof("Error String: %v", stderr.String()) | ||
return errors.Errorf("Unable to drain the %v node, err: %v", targetNode, err) | ||
There was a problem hiding this comment. Choose a reason for hiding this commentThe reason will be displayed to describe this comment to others. Learn more. We can pass the stderr here since the err will contain the exit code description e.g. |
||
} | ||
|
||
return retry. | ||
Times(uint(experimentsDetails.Timeout / experimentsDetails.Delay)). | ||
Wait(time.Duration(experimentsDetails.Delay) * time.Second). | ||
Try(func(attempt uint) error { | ||
nodeSpec, err := clients.KubeClient.CoreV1().Nodes().Get(experimentsDetails.TargetNode, v1.GetOptions{}) | ||
if err != nil { | ||
return err | ||
} | ||
if !nodeSpec.Spec.Unschedulable { | ||
return errors.Errorf("%v node is not in unschedulable state", experimentsDetails.TargetNode) | ||
} | ||
return nil | ||
}) | ||
common.SetTargets(targetNode, "injected", "node", chaosDetails) | ||
|
||
err = retry. | ||
Times(uint(experimentsDetails.Timeout / experimentsDetails.Delay)). | ||
Wait(time.Duration(experimentsDetails.Delay) * time.Second). | ||
Try(func(attempt uint) error { | ||
nodeSpec, err := clients.KubeClient.CoreV1().Nodes().Get(targetNode, v1.GetOptions{}) | ||
if err != nil { | ||
if apierrors.IsNotFound(err) { | ||
return nil | ||
There was a problem hiding this comment. Choose a reason for hiding this commentThe reason will be displayed to describe this comment to others. Learn more. Shall we add a log here to specify that the resource was not found? |
||
} else { | ||
return err | ||
} | ||
} | ||
if !nodeSpec.Spec.Unschedulable { | ||
return errors.Errorf("%v node is not in unschedulable state", targetNode) | ||
} | ||
return nil | ||
}) | ||
if err != nil { | ||
return err | ||
} | ||
} | ||
} | ||
return nil | ||
} | ||
|
||
// uncordonNode uncordon the application node | ||
func uncordonNode(experimentsDetails *experimentTypes.ExperimentDetails, clients clients.ClientSets, chaosDetails *types.ChaosDetails) error { | ||
|
||
targetNodes := strings.Split(experimentsDetails.TargetNode, ",") | ||
targetNodes := strings.Split(experimentsDetails.TargetNodes, ",") | ||
if len(targetNodes) == 0 { | ||
return errors.Errorf("No target nodes provided, expected the comma-separated names of one or more nodes") | ||
} | ||
|
||
for _, targetNode := range targetNodes { | ||
|
||
//Check node exist before uncordon the node | ||
|
@@ -183,7 +203,11 @@ func uncordonNode(experimentsDetails *experimentTypes.ExperimentDetails, clients | |
Times(uint(experimentsDetails.Timeout / experimentsDetails.Delay)). | ||
Wait(time.Duration(experimentsDetails.Delay) * time.Second). | ||
Try(func(attempt uint) error { | ||
targetNodes := strings.Split(experimentsDetails.TargetNode, ",") | ||
targetNodes := strings.Split(experimentsDetails.TargetNodes, ",") | ||
if len(targetNodes) == 0 { | ||
There was a problem hiding this comment. Choose a reason for hiding this commentThe reason will be displayed to describe this comment to others. Learn more. Same comment as above, this check will fail for an empty string. |
||
return errors.Errorf("No target nodes provided, expected the comma-separated names of one or more nodes") | ||
} | ||
|
||
for _, targetNode := range targetNodes { | ||
nodeSpec, err := clients.KubeClient.CoreV1().Nodes().Get(targetNode, v1.GetOptions{}) | ||
if err != nil { | ||
|
@@ -194,7 +218,7 @@ func uncordonNode(experimentsDetails *experimentTypes.ExperimentDetails, clients | |
} | ||
} | ||
if nodeSpec.Spec.Unschedulable { | ||
return errors.Errorf("%v node is in unschedulable state", experimentsDetails.TargetNode) | ||
return errors.Errorf("%v node is in unschedulable state", targetNode) | ||
} | ||
} | ||
return nil | ||
|
There was a problem hiding this comment.
Choose a reason for hiding this comment
The reason will be displayed to describe this comment to others. Learn more.
Even for an empty string value in
experimentsDetails.TargetNodes
, thelen(targetNodes)
will be equal to 1. We can modify the check as follows: