Skip to content

Commit 5089636

Browse files
committed
add must-gather command
1 parent 0fc2062 commit 5089636

3 files changed

Lines changed: 316 additions & 0 deletions

File tree

pkg/oc/cli/admin/admin.go

Lines changed: 2 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -29,6 +29,7 @@ import (
2929
migratehpa "github.com/openshift/origin/pkg/oc/cli/admin/migrate/legacyhpa"
3030
migratestorage "github.com/openshift/origin/pkg/oc/cli/admin/migrate/storage"
3131
migratetemplateinstances "github.com/openshift/origin/pkg/oc/cli/admin/migrate/templateinstances"
32+
"github.com/openshift/origin/pkg/oc/cli/admin/mustgather"
3233
"github.com/openshift/origin/pkg/oc/cli/admin/network"
3334
"github.com/openshift/origin/pkg/oc/cli/admin/node"
3435
"github.com/openshift/origin/pkg/oc/cli/admin/policy"
@@ -62,6 +63,7 @@ func NewCommandAdmin(name, fullName string, f kcmdutil.Factory, streams genericc
6263
Commands: []*cobra.Command{
6364
upgrade.New(f, fullName, streams),
6465
top.NewCommandTop(top.TopRecommendedName, fullName+" "+top.TopRecommendedName, f, streams),
66+
mustgather.NewMustGatherCommand(f, streams),
6567
},
6668
},
6769
{
Lines changed: 309 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,309 @@
1+
package mustgather
2+
3+
import (
4+
"fmt"
5+
"math/rand"
6+
"time"
7+
8+
"github.com/spf13/cobra"
9+
10+
corev1 "k8s.io/api/core/v1"
11+
rbacv1 "k8s.io/api/rbac/v1"
12+
metav1 "k8s.io/apimachinery/pkg/apis/meta/v1"
13+
"k8s.io/apimachinery/pkg/util/wait"
14+
"k8s.io/cli-runtime/pkg/genericclioptions"
15+
"k8s.io/cli-runtime/pkg/genericclioptions/printers"
16+
"k8s.io/client-go/kubernetes"
17+
"k8s.io/client-go/rest"
18+
"k8s.io/klog"
19+
kcmdutil "k8s.io/kubernetes/pkg/kubectl/cmd/util"
20+
"k8s.io/kubernetes/pkg/kubectl/scheme"
21+
"k8s.io/kubernetes/pkg/kubectl/util/templates"
22+
23+
"github.com/openshift/origin/pkg/oc/cli/rsync"
24+
)
25+
26+
var (
27+
mustGatherLong = templates.LongDesc(`
28+
Launch a pod to gather debugging information
29+
30+
This command will launch a pod in a temporary namespace on your
31+
cluster that gathers debugging information, using a copy of the active
32+
client config context, and then downloads the gathered information.
33+
34+
Experimental: This command is under active development and may change without notice.
35+
`)
36+
37+
mustGatherExample = templates.Examples(`
38+
# gather default information using the default image and command, writing into ./must-gather.local.<rand>
39+
oc adm must-gather
40+
41+
# gather default information with a specific local folder to copy to
42+
oc adm must-gather --dest-dir=/local/directory
43+
44+
# gather default information using a specific image, command, and pod-dir
45+
oc adm must-gather --image=my/image:tag --source-dir=/pod/directory -- myspecial-command.sh
46+
`)
47+
)
48+
49+
func NewMustGatherCommand(f kcmdutil.Factory, streams genericclioptions.IOStreams) *cobra.Command {
50+
o := NewMustGatherOptions(streams)
51+
rsyncCommand := rsync.NewCmdRsync(rsync.RsyncRecommendedName, "", f, streams)
52+
cmd := &cobra.Command{
53+
Use: "must-gather",
54+
Short: "Launch a new instance of a pod for gathering debug information",
55+
Long: mustGatherLong,
56+
Example: mustGatherExample,
57+
Hidden: true,
58+
Run: func(cmd *cobra.Command, args []string) {
59+
kcmdutil.CheckErr(o.Complete(f, cmd, args))
60+
kcmdutil.CheckErr(o.Run(rsyncCommand))
61+
},
62+
}
63+
64+
cmd.Flags().StringVar(&o.NodeName, "node-name", o.NodeName, "Set a specific node to use - by default a random master will be used")
65+
cmd.Flags().StringVar(&o.Image, "image", o.Image, "Set a specific image to use, by default the OpenShift's must-gather image will be used.")
66+
cmd.Flags().StringVar(&o.DestDir, "dest-dir", o.DestDir, "Set a specific directory on the local machine to write gathered data to.")
67+
cmd.Flags().BoolVar(&o.Keep, "keep", o.Keep, "Do not delete temporary resources when command completes.")
68+
cmd.Flags().MarkHidden("keep")
69+
70+
return cmd
71+
}
72+
73+
func NewMustGatherOptions(streams genericclioptions.IOStreams) *MustGatherOptions {
74+
return &MustGatherOptions{IOStreams: streams}
75+
}
76+
77+
func (o *MustGatherOptions) Complete(f kcmdutil.Factory, cmd *cobra.Command, args []string) error {
78+
if i := cmd.ArgsLenAtDash(); i != -1 && i < len(args) {
79+
o.Command = args[i:]
80+
} else {
81+
o.Command = args
82+
}
83+
var err error
84+
if o.Config, err = f.ToRESTConfig(); err != nil {
85+
return err
86+
}
87+
if o.Client, err = kubernetes.NewForConfig(o.Config); err != nil {
88+
return err
89+
}
90+
if len(o.DestDir) == 0 {
91+
o.DestDir = fmt.Sprintf("must-gather.local.%06d", rand.Int63())
92+
}
93+
if len(o.Image) == 0 {
94+
// TODO lookup cluster specific default
95+
o.Image = "quay.io/openshift/origin-must-gather:v4.0"
96+
}
97+
o.PrinterCreated, err = printers.NewTypeSetter(scheme.Scheme).WrapToPrinter(&printers.NamePrinter{Operation: "created"}, nil)
98+
if err != nil {
99+
return err
100+
}
101+
o.RsyncRshCmd = rsync.DefaultRsyncRemoteShellToUse(cmd.Parent())
102+
return nil
103+
}
104+
105+
type MustGatherOptions struct {
106+
genericclioptions.IOStreams
107+
108+
Config *rest.Config
109+
Client kubernetes.Interface
110+
111+
NodeName string
112+
DestDir string
113+
Image string
114+
Command []string
115+
Keep bool
116+
117+
RsyncRshCmd string
118+
119+
PrinterCreated printers.ResourcePrinter
120+
}
121+
122+
// Run creates and runs a must-gather pod.d
123+
func (o *MustGatherOptions) Run(rsyncCmd *cobra.Command) error {
124+
if len(o.Image) == 0 {
125+
return fmt.Errorf("missing an image")
126+
}
127+
128+
var err error
129+
130+
// create namespace
131+
ns, err := o.Client.CoreV1().Namespaces().Create(&corev1.Namespace{
132+
ObjectMeta: metav1.ObjectMeta{
133+
GenerateName: "openshift-must-gather-",
134+
Labels: map[string]string{
135+
"openshift.io/run-level": "0",
136+
},
137+
Annotations: map[string]string{
138+
"oc.openshift.io/command": "oc adm must-gather",
139+
},
140+
},
141+
})
142+
if err != nil {
143+
return err
144+
}
145+
if !o.Keep {
146+
defer func() {
147+
err = o.Client.CoreV1().Namespaces().Delete(ns.Name, nil)
148+
}()
149+
} else {
150+
o.PrinterCreated.PrintObj(ns, o.Out)
151+
}
152+
153+
clusterRoleBinding, err := o.Client.RbacV1().ClusterRoleBindings().Create(o.newClusterRoleBinding(ns.Name))
154+
if err != nil {
155+
return err
156+
}
157+
if !o.Keep {
158+
defer func() {
159+
err = o.Client.RbacV1().ClusterRoleBindings().Delete(clusterRoleBinding.Name, &metav1.DeleteOptions{})
160+
}()
161+
} else {
162+
o.PrinterCreated.PrintObj(clusterRoleBinding, o.Out)
163+
}
164+
165+
// create pod
166+
pod, err := o.Client.CoreV1().Pods(ns.Name).Create(o.newPod(o.NodeName))
167+
if err != nil {
168+
return err
169+
}
170+
171+
// wait for pod to be running (gather has completed)
172+
if err := o.waitForPodRunning(pod); err != nil {
173+
return err
174+
}
175+
176+
// copy the gathered files into the local destination dir
177+
err = o.copyFilesFromPod(pod)
178+
return err
179+
}
180+
181+
func (o *MustGatherOptions) copyFilesFromPod(pod *corev1.Pod) error {
182+
rsyncOptions := &rsync.RsyncOptions{
183+
Namespace: pod.Namespace,
184+
Source: &rsync.PathSpec{PodName: pod.Name, Path: "/must-gather/"},
185+
ContainerName: "copy",
186+
Destination: &rsync.PathSpec{PodName: "", Path: o.DestDir},
187+
Client: o.Client,
188+
Config: o.Config,
189+
RshCmd: fmt.Sprintf("%s --namespace=%s", o.RsyncRshCmd, pod.Namespace),
190+
IOStreams: o.IOStreams,
191+
}
192+
rsyncOptions.Strategy = rsync.NewDefaultCopyStrategy(rsyncOptions)
193+
return rsyncOptions.RunRsync()
194+
195+
}
196+
197+
func (o *MustGatherOptions) waitForPodRunning(pod *corev1.Pod) error {
198+
phase := pod.Status.Phase
199+
err := wait.PollImmediate(time.Second, 10*time.Minute, func() (bool, error) {
200+
var err error
201+
if pod, err = o.Client.CoreV1().Pods(pod.Namespace).Get(pod.Name, metav1.GetOptions{}); err != nil {
202+
klog.Error(err)
203+
return false, nil
204+
}
205+
phase = pod.Status.Phase
206+
return phase != corev1.PodPending, nil
207+
})
208+
if err != nil {
209+
return err
210+
}
211+
if phase != corev1.PodRunning {
212+
return fmt.Errorf("pod is not running: %v", phase)
213+
}
214+
return nil
215+
}
216+
217+
func (o *MustGatherOptions) newClusterRoleBinding(ns string) *rbacv1.ClusterRoleBinding {
218+
return &rbacv1.ClusterRoleBinding{
219+
ObjectMeta: metav1.ObjectMeta{
220+
GenerateName: "must-gather-",
221+
Annotations: map[string]string{
222+
"oc.openshift.io/command": "oc adm must-gather",
223+
},
224+
},
225+
RoleRef: rbacv1.RoleRef{
226+
APIGroup: "rbac.authorization.k8s.io",
227+
Kind: "ClusterRole",
228+
Name: "cluster-admin",
229+
},
230+
Subjects: []rbacv1.Subject{
231+
{
232+
Kind: "ServiceAccount",
233+
Name: "default",
234+
Namespace: ns,
235+
},
236+
},
237+
}
238+
}
239+
240+
// newPod creates a pod with 2 containers with a shared volume mount:
241+
// - gather: init container that runs gather command
242+
// - copy: no-op container we can exec into
243+
func (o *MustGatherOptions) newPod(node string) *corev1.Pod {
244+
zero := int64(0)
245+
ret := &corev1.Pod{
246+
ObjectMeta: metav1.ObjectMeta{
247+
GenerateName: "must-gather-",
248+
Labels: map[string]string{
249+
"app": "must-gather",
250+
},
251+
},
252+
Spec: corev1.PodSpec{
253+
NodeName: node,
254+
RestartPolicy: corev1.RestartPolicyNever,
255+
Volumes: []corev1.Volume{
256+
{
257+
Name: "must-gather-output",
258+
VolumeSource: corev1.VolumeSource{
259+
EmptyDir: &corev1.EmptyDirVolumeSource{},
260+
},
261+
},
262+
},
263+
InitContainers: []corev1.Container{
264+
{
265+
Name: "gather",
266+
Image: o.Image,
267+
Command: []string{"/bin/bash", "-c", "for resource in $RESOURCES ; do openshift-must-gather inspect ${resource} --base-dir /must-gather ; done"},
268+
Env: []corev1.EnvVar{
269+
{
270+
Name: "RESOURCES",
271+
Value: "clusteroperators certificatesigningrequests nodes machines machineconfigs ns/default ns/openshift ns/kube-system persistentvolumes volumeattachments clusternetworks hostsubnets clusterautoscaler machineautoscaler",
272+
},
273+
},
274+
VolumeMounts: []corev1.VolumeMount{
275+
{
276+
Name: "must-gather-output",
277+
MountPath: "/must-gather",
278+
ReadOnly: false,
279+
},
280+
},
281+
},
282+
},
283+
Containers: []corev1.Container{
284+
{
285+
Name: "copy",
286+
Image: o.Image,
287+
Command: []string{"/bin/bash", "-c", "trap : TERM INT; sleep infinity & wait"},
288+
VolumeMounts: []corev1.VolumeMount{
289+
{
290+
Name: "must-gather-output",
291+
MountPath: "/must-gather",
292+
ReadOnly: false,
293+
},
294+
},
295+
},
296+
},
297+
TerminationGracePeriodSeconds: &zero,
298+
Tolerations: []corev1.Toleration{
299+
{
300+
Operator: "Exists",
301+
},
302+
},
303+
},
304+
}
305+
if len(o.Command) > 0 {
306+
ret.Spec.Containers[0].Command = o.Command
307+
}
308+
return ret
309+
}

test/cmd/admin.sh

Lines changed: 5 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -392,4 +392,9 @@ os::cmd::expect_success_and_text "oc adm prune images" "Dry run enabled - no mod
392392
echo "images: ok"
393393
os::test::junit::declare_suite_end
394394

395+
# oc adm must-gather
396+
os::test::junit::declare_suite_start "cmd/admin/must-gather"
397+
os::cmd::expect_success "oc adm must-gather --help"
398+
os::test::junit::declare_suite_end
399+
395400
os::test::junit::declare_suite_end

0 commit comments

Comments
 (0)