Merge pull request #22726 from smarterclayton/disruption

openshift-merge-robot · web-flow · commit d35551f39c0d · 2019-05-03T07:28:36.000+02:00
Support --option on upgrade tests to abort in progress
diff --git a/cmd/openshift-tests/openshift-tests.go b/cmd/openshift-tests/openshift-tests.go
@@ -116,7 +116,6 @@ func newRunCommand() *cobra.Command {
 				if err := initProvider(opt.Provider); err != nil {
 					return err
 				}
-				os.Setenv("TEST_PROVIDER", opt.Provider)
 				e2e.AfterReadingAllFlags(exutil.TestContext)
 				return opt.Run(args)
 			})
@@ -142,9 +141,18 @@ func newRunUpgradeCommand() *cobra.Command {
 		If you specify the --dry-run argument, the actions the suite will take will be printed to the
 		output.
 
+		Supported options:
+
+		* abort-at=NUMBER - Set to a number between 0 and 100 to control the percent of operators
+		at which to stop the current upgrade and roll back to the current version.
+		* disrupt-reboot=POLICY - During upgrades, periodically reboot master nodes. If set to 'graceful'
+		the reboot will allow the node to shut down services in an orderly fashion. If set to 'force' the
+		machine will terminate immediately without clean shutdown.
+
 		`) + testginkgo.SuitesString(opt.Suites, "\n\nAvailable upgrade suites:\n\n"),
 
-		SilenceUsage: true,
+		SilenceUsage:  true,
+		SilenceErrors: true,
 		RunE: func(cmd *cobra.Command, args []string) error {
 			return mirrorToFile(opt, func() error {
 				if len(upgradeOpt.ToImage) == 0 {
@@ -156,7 +164,11 @@ func newRunUpgradeCommand() *cobra.Command {
 						if suite.Name == args[0] {
 							upgradeOpt.Suite = suite.Name
 							upgradeOpt.JUnitDir = opt.JUnitDir
-							os.Setenv("TEST_UPGRADE", upgradeOpt.ToEnv())
+							value := upgradeOpt.ToEnv()
+							if err := initUpgrade(value); err != nil {
+								return err
+							}
+							opt.SuiteOptions = value
 							break
 						}
 					}
@@ -165,7 +177,6 @@ func newRunUpgradeCommand() *cobra.Command {
 				if err := initProvider(opt.Provider); err != nil {
 					return err
 				}
-				os.Setenv("TEST_PROVIDER", opt.Provider)
 				e2e.AfterReadingAllFlags(exutil.TestContext)
 				return opt.Run(args)
 			})
@@ -198,7 +209,7 @@ func newRunTestCommand() *cobra.Command {
 			if err := initProvider(os.Getenv("TEST_PROVIDER")); err != nil {
 				return err
 			}
-			if err := initUpgrade(os.Getenv("TEST_UPGRADE")); err != nil {
+			if err := initUpgrade(os.Getenv("TEST_SUITE_OPTIONS")); err != nil {
 				return err
 			}
 			e2e.AfterReadingAllFlags(exutil.TestContext)
@@ -236,6 +247,7 @@ func mirrorToFile(opt *testginkgo.Options, fn func() error) error {
 
 func bindOptions(opt *testginkgo.Options, flags *pflag.FlagSet) {
 	flags.BoolVar(&opt.DryRun, "dry-run", opt.DryRun, "Print the tests to run without executing them.")
+	flags.BoolVar(&opt.PrintCommands, "print-commands", opt.PrintCommands, "Print the sub-commands that would be executed instead.")
 	flags.StringVar(&opt.JUnitDir, "junit-dir", opt.JUnitDir, "The directory to write test reports to.")
 	flags.StringVar(&opt.Provider, "provider", opt.Provider, "The cluster infrastructure provider. Will automatically default to the correct value.")
 	flags.StringVarP(&opt.TestFile, "file", "f", opt.TestFile, "Create a suite from the newline-delimited test names in this file.")
diff --git a/cmd/openshift-tests/upgrade.go b/cmd/openshift-tests/upgrade.go
@@ -25,7 +25,23 @@ var upgradeSuites = []*ginkgo.TestSuite{
 		`),
 		Matches: func(name string) bool { return strings.Contains(name, "[Feature:ClusterUpgrade]") },
 
-		Init:        func() error { return filterUpgrade(upgrade.AllTests(), func(name string) bool { return true }) },
+		Init: func(opt map[string]string) error {
+			for k, v := range opt {
+				switch k {
+				case "abort-at":
+					if err := upgrade.SetUpgradeAbortAt(v); err != nil {
+						return err
+					}
+				case "disrupt-reboot":
+					if err := upgrade.SetUpgradeDisruptReboot(v); err != nil {
+						return err
+					}
+				default:
+					return fmt.Errorf("unrecognized upgrade option: %s", k)
+				}
+			}
+			return filterUpgrade(upgrade.AllTests(), func(name string) bool { return true })
+		},
 		TestTimeout: 120 * time.Minute,
 	},
 }
@@ -34,6 +50,27 @@ type UpgradeOptions struct {
 	Suite    string
 	ToImage  string
 	JUnitDir string
+
+	TestOptions []string
+}
+
+func (o *UpgradeOptions) OptionsMap() (map[string]string, error) {
+	options := make(map[string]string)
+	for _, option := range o.TestOptions {
+		parts := strings.SplitN(option, "=", 2)
+		if len(parts) != 2 {
+			return nil, fmt.Errorf("test option %q is not valid, must be KEY=VALUE", option)
+		}
+		if len(parts[0]) == 0 {
+			return nil, fmt.Errorf("test option %q is not valid, must be KEY=VALUE", option)
+		}
+		_, exists := options[parts[0]]
+		if exists {
+			return nil, fmt.Errorf("option %q declared twice", parts[0])
+		}
+		options[parts[0]] = parts[1]
+	}
+	return options, nil
 }
 
 func (o *UpgradeOptions) ToEnv() string {
@@ -57,8 +94,12 @@ func initUpgrade(value string) error {
 			exutil.TestContext.UpgradeTarget = ""
 			exutil.TestContext.UpgradeImage = opt.ToImage
 			exutil.TestContext.ReportDir = opt.JUnitDir
+			o, err := opt.OptionsMap()
+			if err != nil {
+				return err
+			}
 			if suite.Init != nil {
-				return suite.Init()
+				return suite.Init(o)
 			}
 			return nil
 		}
@@ -79,4 +120,5 @@ func filterUpgrade(tests []upgrades.Test, match func(string) bool) error {
 
 func bindUpgradeOptions(opt *UpgradeOptions, flags *pflag.FlagSet) {
 	flags.StringVar(&opt.ToImage, "to-image", opt.ToImage, "Specify the image to test an upgrade to.")
+	flags.StringSliceVar(&opt.TestOptions, "options", opt.TestOptions, "A set of KEY=VALUE options to control the test. See the help text.")
 }
diff --git a/pkg/test/ginkgo/cmd_runsuite.go b/pkg/test/ginkgo/cmd_runsuite.go
@@ -30,12 +30,21 @@ type Options struct {
 
 	IncludeSuccessOutput bool
 
-	Provider string
+	Provider     string
+	SuiteOptions string
 
 	Suites []*TestSuite
 
-	DryRun      bool
-	Out, ErrOut io.Writer
+	DryRun        bool
+	PrintCommands bool
+	Out, ErrOut   io.Writer
+}
+
+func (opt *Options) AsEnv() []string {
+	var args []string
+	args = append(args, fmt.Sprintf("TEST_PROVIDER=%s", opt.Provider))
+	args = append(args, fmt.Sprintf("TEST_SUITE_OPTIONS=%s", opt.SuiteOptions))
+	return args
 }
 
 func (opt *Options) Run(args []string) error {
@@ -104,6 +113,11 @@ func (opt *Options) Run(args []string) error {
 		return fmt.Errorf("suite %q does not contain any tests", suite.Name)
 	}
 
+	if opt.PrintCommands {
+		status := newTestStatus(opt.Out, true, len(tests), time.Minute, &monitor.Monitor{}, opt.AsEnv())
+		newParallelTestQueue(tests).Execute(context.Background(), 1, status.OutputCommand)
+		return nil
+	}
 	if opt.DryRun {
 		for _, test := range sortedTests(tests) {
 			fmt.Fprintf(opt.Out, "%q\n", test.name)
@@ -164,7 +178,7 @@ func (opt *Options) Run(args []string) error {
 	if len(tests) == 1 {
 		includeSuccess = true
 	}
-	status := newTestStatus(opt.Out, includeSuccess, len(tests), timeout, m)
+	status := newTestStatus(opt.Out, includeSuccess, len(tests), timeout, m, opt.AsEnv())
 
 	smoke, normal := splitTests(tests, func(t *testCase) bool {
 		return strings.Contains(t.name, "[Smoke]")
@@ -255,7 +269,7 @@ func (opt *Options) Run(args []string) error {
 		}
 
 		q := newParallelTestQueue(retries)
-		status := newTestStatus(ioutil.Discard, opt.IncludeSuccessOutput, len(retries), timeout, m)
+		status := newTestStatus(ioutil.Discard, opt.IncludeSuccessOutput, len(retries), timeout, m, opt.AsEnv())
 		q.Execute(ctx, parallelism, status.Run)
 		var flaky []string
 		var repeatFailures []*testCase
diff --git a/pkg/test/ginkgo/status.go b/pkg/test/ginkgo/status.go
@@ -1,12 +1,14 @@
 package ginkgo
 
 import (
+	"bytes"
 	"context"
 	"fmt"
 	"io"
 	"os"
 	"os/exec"
 	"sort"
+	"strings"
 	"sync"
 	"syscall"
 	"time"
@@ -18,6 +20,7 @@ type testStatus struct {
 	out     io.Writer
 	timeout time.Duration
 	monitor monitor.Interface
+	env     []string
 
 	includeSuccessfulOutput bool
 
@@ -27,12 +30,13 @@ type testStatus struct {
 	total    int
 }
 
-func newTestStatus(out io.Writer, includeSuccessfulOutput bool, total int, timeout time.Duration, m monitor.Interface) *testStatus {
+func newTestStatus(out io.Writer, includeSuccessfulOutput bool, total int, timeout time.Duration, m monitor.Interface, testEnv []string) *testStatus {
 	return &testStatus{
 		out:     out,
 		total:   total,
 		timeout: timeout,
 		monitor: m,
+		env:     testEnv,
 
 		includeSuccessfulOutput: includeSuccessfulOutput,
 	}
@@ -53,6 +57,17 @@ func (s *testStatus) Fprintf(format string) {
 	fmt.Fprintf(s.out, format, s.failures, s.index, s.total)
 }
 
+// OutputCommand prints to stdout what would have been executed.
+func (s *testStatus) OutputCommand(ctx context.Context, test *testCase) {
+	buf := &bytes.Buffer{}
+	for _, env := range s.env {
+		parts := strings.SplitN(env, "=", 2)
+		fmt.Fprintf(buf, "%s=%q ", parts[0], parts[1])
+	}
+	fmt.Fprintf(buf, "%s %s %q", os.Args[0], "run-test", test.name)
+	fmt.Fprintln(s.out, buf.String())
+}
+
 func (s *testStatus) Run(ctx context.Context, test *testCase) {
 	defer func() {
 		switch {
@@ -94,6 +109,7 @@ func (s *testStatus) Run(ctx context.Context, test *testCase) {
 
 	test.start = time.Now()
 	c := exec.Command(os.Args[0], "run-test", test.name)
+	c.Env = append(os.Environ(), s.env...)
 	s.Fprintf(fmt.Sprintf("started: (%s) %q\n\n", "%d/%d/%d", test.name))
 	out, err := runWithTimeout(ctx, c, s.timeout)
 	test.end = time.Now()
diff --git a/pkg/test/ginkgo/test.go b/pkg/test/ginkgo/test.go
@@ -63,7 +63,7 @@ type TestSuite struct {
 
 	// Init should be run once before a test in this suite is run. Not called by
 	// methods in this package.
-	Init func() error
+	Init func(map[string]string) error
 
 	Parallelism int
 	// The number of flakes that may occur before this test is marked as a failure.
diff --git a/test/e2e/upgrade/monitor.go b/test/e2e/upgrade/monitor.go
diff --git a/test/e2e/upgrade/upgrade.go b/test/e2e/upgrade/upgrade.go