-
Notifications
You must be signed in to change notification settings - Fork 1
/
dump_monitoring.sh
238 lines (212 loc) · 8.13 KB
/
dump_monitoring.sh
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
116
117
118
119
120
121
122
123
124
125
126
127
128
129
130
131
132
133
134
135
136
137
138
139
140
141
142
143
144
145
146
147
148
149
150
151
152
153
154
155
156
157
158
159
160
161
162
163
164
165
166
167
168
169
170
171
172
173
174
175
176
177
178
179
180
181
182
183
184
185
186
187
188
189
190
191
192
193
194
195
196
197
198
199
200
201
202
203
204
205
206
207
208
209
210
211
212
213
214
215
216
217
218
219
220
221
222
223
224
225
226
227
228
229
230
231
232
233
234
235
236
237
238
#!/bin/bash
set -euo pipefail
if [ -n "${DEBUG:-}" ] ; then
set -x
fi
declare -a components=()
if [[ ${#components[@]} -eq 0 ]]
then
components=( "alertmanager" "cluster_monitoring_operator" "grafana" "kube_state_metrics" "node_exporter" "prometheus_adapter" "prometheus" "prometheus_operator" "telemeter_client" "project_info" )
fi
NAMESPACE=openshift-monitoring
DATE=`date +%Y%m%d_%H%M%S`
target=${target:-"monitoring-$DATE"}
logs_folder="$target/logs"
project_folder="$target/project"
alertmanager_folder="$target/alertmanager"
cluster_monitoring_operator_folder="$target/cluster-monitoring-operator"
grafana_folder="$target/grafana"
kube_state_metrics_folder="$target/kube-state-metrics"
node_exporter_folder="$target/node-exporter"
prometheus_adapter_folder="$target/prometheus-adapter"
prometheus_folder="$target/prometheus"
prometheus_operator_folder="$target/prometheus-operator"
telemeter_client_folder="$target/telemeter-client"
# Output resource items
dump_resource_items() {
local type=$1
mkdir $project_folder/$type
for resource in `oc -n $NAMESPACE get $type -o jsonpath='{.items[*].metadata.name}'`
do
oc -n $NAMESPACE get $type $resource -o yaml > $project_folder/$type/$resource
done
}
# Output persistent volumes
dump_persistent_volumes() {
mkdir $project_folder/persistentvolumes
echo -- Extracting persistentvolumes ...
for pv in `oc get persistentvolumes -o 'go-template={{range $pv := .items}}{{if $pv.spec.claimRef}}{{if eq $pv.spec.claimRef.namespace "'${NAMESPACE}'"}}{{printf "%s\n" $pv.metadata.name}}{{end}}{{end}}{{end}}'`
do
oc -n $NAMESPACE get persistentvolumes $pv -o yaml > $project_folder/persistentvolumes/$pv
done
}
# Output resources in resource_types list
check_project_info() {
mkdir $project_folder
echo Getting general objects
echo -- Nodes Description
oc -n $NAMESPACE describe nodes > $project_folder/nodes
echo -- Project Description
oc -n $NAMESPACE get namespace $NAMESPACE -o yaml > $project_folder/monitoring-project-info
echo -- Events
oc -n $NAMESPACE get events > $project_folder/events
# Don't get the secrets content for security reasons
echo -- Secrets
oc -n $NAMESPACE describe secrets > $project_folder/secrets
resource_types=(configmaps daemonsets deployment services routes serviceaccounts statefulset persistentvolumeclaims pods)
for resource_type in ${resource_types[@]}
do
echo -- Extracting $resource_type ...
dump_resource_items $resource_type
done
dump_persistent_volumes
}
# Output alertmanager environment values and pod logs
check_alertmanager() {
echo -- Checking alertmanager environment values and pod logs
alertmanager_pods=$(oc -n $NAMESPACE get pods -l app=alertmanager -o jsonpath={.items[*].metadata.name})
mkdir $alertmanager_folder
for pod in $alertmanager_pods
do
echo ---- alertmanager pod: $pod
get_env $pod $alertmanager_folder
get_pod_logs $pod $alertmanager_folder
done
}
# Output cluster_monitoring_operator environment values and pod logs
check_cluster_monitoring_operator() {
echo -- Checking cluster_monitoring_operator environment values and pod logs
cluster_monitoring_operator_pods=$(oc -n $NAMESPACE get pods -l app=cluster-monitoring-operator -o jsonpath={.items[*].metadata.name})
mkdir $cluster_monitoring_operator_folder
for pod in $cluster_monitoring_operator_pods
do
echo ---- cluster_monitoring_operator pod: $pod
get_env $pod $cluster_monitoring_operator_folder
get_pod_logs $pod $cluster_monitoring_operator_folder
done
}
# Output grafana environment values and pod logs
check_grafana() {
echo -- Checking grafana environment values and pod logs
grafana_pods=$(oc -n $NAMESPACE get pods -l app=grafana -o jsonpath={.items[*].metadata.name})
mkdir $grafana_folder
for pod in $grafana_pods
do
echo ---- grafana pod: $pod
get_env $pod $grafana_folder
get_pod_logs $pod $grafana_folder
done
}
# Output kube_state_metrics environment values and pod logs
check_kube_state_metrics() {
echo -- Checking kube_state_metrics environment values and pod logs
kube_state_metrics_pods=$(oc -n $NAMESPACE get pods -l app=kube-state-metrics -o jsonpath={.items[*].metadata.name})
mkdir $kube_state_metrics_folder
for pod in $kube_state_metrics_pods
do
echo ---- kube_state_metrics pod: $pod
get_env $pod $kube_state_metrics_folder
get_pod_logs $pod $kube_state_metrics_folder
done
}
# Output node_exporter environment values and pod logs
check_node_exporter() {
echo -- Checking node_exporter environment values and pod logs
node_exporter_pods=$(oc -n $NAMESPACE get pods -l app=node-exporter -o jsonpath={.items[*].metadata.name})
mkdir $node_exporter_folder
for pod in $node_exporter_pods
do
echo ---- node_exporter pod: $pod
get_env $pod $node_exporter_folder
get_pod_logs $pod $node_exporter_folder
done
}
# Output prometheus_adapter environment values and pod logs
check_prometheus_adapter() {
echo -- Checking prometheus_adapter environment values and pod logs
prometheus_adapter_pods=$(oc -n $NAMESPACE get pods -l name=prometheus-adapter -o jsonpath={.items[*].metadata.name})
mkdir $prometheus_adapter_folder
for pod in $prometheus_adapter_pods
do
echo ---- prometheus_adapter pod: $pod
get_env $pod $prometheus_adapter_folder
get_pod_logs $pod $prometheus_adapter_folder
done
}
# Output prometheus environment values and pod logs
check_prometheus() {
echo -- Checking prometheus environment values and pod logs
prometheus_pods=$(oc -n $NAMESPACE get pods -l app=prometheus -o jsonpath={.items[*].metadata.name})
mkdir $prometheus_folder
for pod in $prometheus_pods
do
echo ---- prometheus pod: $pod
get_env $pod $prometheus_folder
get_pod_logs $pod $prometheus_folder
done
}
# Output prometheus_operator environment values and pod logs
check_prometheus_operator() {
echo -- Checking prometheus_operator environment values and pod logs
prometheus_operator_pods=$(oc -n $NAMESPACE get pods -l k8s-app=prometheus-operator -o jsonpath={.items[*].metadata.name})
mkdir $prometheus_operator_folder
for pod in $prometheus_operator_pods
do
echo ---- prometheus_operator pod: $pod
get_env $pod $prometheus_operator_folder
get_pod_logs $pod $prometheus_operator_folder
done
}
# Output telemeter_client environment values and pod logs
check_telemeter_client() {
echo -- Checking telemeter_client environment values and pod logs
telemeter_client_pods=$(oc -n $NAMESPACE get pods -l k8s-app=telemeter-client -o jsonpath={.items[*].metadata.name})
mkdir $telemeter_client_folder
for pod in $telemeter_client_pods
do
echo ---- telemeter_client pod: $pod
get_env $pod $telemeter_client_folder
get_pod_logs $pod $telemeter_client_folder
done
}
# Get build version for all container images
get_env() {
local pod=$1
local env_file=$2/$pod
containers=$(oc -n $NAMESPACE get po $pod -o jsonpath='{.spec.containers[*].name}')
for container in $containers
do
dockerfile=$(oc -n $NAMESPACE exec $pod -c $container -- find /root/buildinfo -name "Dockerfile-openshift*" || :)
if [ -n "$dockerfile" ]
then
echo Dockerfile info: $dockerfile > $env_file
oc -n $NAMESPACE exec $pod -c $container -- grep -o "\"build-date\"=\"[^[:blank:]]*\"" $dockerfile >> $env_file || echo ---- Unable to get build date
fi
echo -- Environment Variables >> $env_file
oc -n $NAMESPACE exec $pod -c $container -- env | sort >> $env_file
done
}
# Get pod logs for all containers
get_pod_logs() {
local pod=$1
local logs_folder=$2/logs
echo -- POD $1 Logs
if [ ! -d "$logs_folder" ]
then
mkdir $logs_folder
fi
local containers=$(oc -n $NAMESPACE get po $pod -o jsonpath='{.spec.containers[*].name}')
for container in $containers
do
oc -n $NAMESPACE logs $pod -c $container | nice xz > $logs_folder/$pod-$container.log.xz || oc -n $NAMESPACE logs $pod | nice xz > $logs_folder/$pod.log.xz || echo ---- Unable to get logs from pod $pod and container $container
done
}
if [ ! -d ${target} ]
then
mkdir -p $target
fi
echo Retrieving results to $target
for comp in "${components[@]}"
do
eval "check_${comp}" || echo Unrecognized function check_${comp} to check component: ${comp}
done