Skip to content
New issue

Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.

By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.

Already on GitHub? Sign in to your account

[CIRC-9010]: Update default metric filters and developer workflow automation #108

Open
wants to merge 10 commits into
base: master
Choose a base branch
from
36 changes: 25 additions & 11 deletions .prow/Makefile
Original file line number Diff line number Diff line change
Expand Up @@ -11,39 +11,53 @@ ifdef IS_CI
NAME_PREFIX=$(shell git symbolic-ref --short HEAD)
endif

#
ifeq ($(NAME_PREFIX),)
# Defaults NAME_PREFIX to "manual" if IS_CI is unset or set to ""
NAME_PREFIX=manual
endif

#
ifndef FULL_IMAGE_NAME
override FULL_IMAGE_NAME = registry.k8s.dev.circonus.com/circonus-kubernetes-agent:latest
endif


RUNTIME_DATA_FILE=./.runtime-$(NAME_PREFIX).yaml

.PHONY: all terraform gcp runtime_data helm clean
.PHONY: all terraform runtime_data gcp kubeconfig registry proxy helm clean

all: terraform helm
all: runtime_data terraform kubeconfig registry proxy helm

runtime_data:
ifeq ($(NAME_PREFIX),manual)
@if [ ! -f "$(RUNTIME_DATA_FILE)" ]; then cp "$(DATA_FILE)" "$(RUNTIME_DATA_FILE)"; fi
else
@cp "$(DATA_FILE)" "$(RUNTIME_DATA_FILE)"
endif

terraform: gcp

gcp: runtime_data
gcp:
@NAME_PREFIX=$(NAME_PREFIX) $(MULTIFORM) apply
# ifeq ($(NAME_PREFIX),manual)
# NAME_PREFIX=$(NAME_PREFIX) $(MULTIFORM) watch_apply
# else
@NAME_PREFIX=$(NAME_PREFIX) $(MULTIFORM) wait_apply
# endif

kubeconfig:
@if [ -f "$(KUBECONFIG)" ] && ! [ -f "$(KUBECONFIG).bak" ]; then mv $(KUBECONFIG) $(KUBECONFIG).bak; fi
@NAME_PREFIX=$(NAME_PREFIX) $(MULTIFORM) kubeconfig
@NAME_PREFIX=$(NAME_PREFIX) $(MULTIFORM) proxy

runtime_data:
ifeq ($(NAME_PREFIX),manual)
@if [ ! -f "$(RUNTIME_DATA_FILE)" ]; then cp "$(DATA_FILE)" "$(RUNTIME_DATA_FILE)"; fi
else
@cp "$(DATA_FILE)" "$(RUNTIME_DATA_FILE)"
endif
registry:
@NAME_PREFIX=$(NAME_PREFIX) $(MULTIFORM) registry

proxy:
@NAME_PREFIX=$(NAME_PREFIX) $(MULTIFORM) proxy

helm:
@NAME_PREFIX=$(NAME_PREFIX) $(ADMIRAL) sync
@NAME_PREFIX=$(NAME_PREFIX) FULL_IMAGE_NAME=$(FULL_IMAGE_NAME) $(ADMIRAL) sync
# ifeq ($(NAME_PREFIX),manual)
# NAME_PREFIX=$(NAME_PREFIX) $(ADMIRAL) watch_sync
# else
Expand Down
33 changes: 24 additions & 9 deletions .prow/README.md
Original file line number Diff line number Diff line change
Expand Up @@ -7,31 +7,46 @@
The following are required, outside of (but not excluding) utilities
normally available on macos

- yq ()
- gcloud ()
- terraform ()
- yq ()
- helm ()
- helmfile ()
- git ()
- bash ()
- ssh ()
- gcloud ()

## Manual deploy
## Manual

### Manual deploy

A deployment process as easy as `seq 1 3`

0. Ensure your system has the required dependencies
0. Ensure the host system has the required dependencies installed
1. `cd` to this directory.
2. run `make`
2. run `make` **note: the tf can take up to 20 minutes to deploy**
3. ???

## Automatic Deploy
### Manual teardown

1. run `make clean`
2. ???

### Troubleshooting manual deploy/teardown

Just run the step again.
They should all be idempotent.

## Automatic

### Automatic Deploy

> TODO

## TODO
### TODO

- Docker container
- Artifact registry / push k8s agent docker container
- package all dependencies in a docker image (alpine based?)
- S3/GCS bucket for tfstate
- Automatic Deploy
- Automatic prow-triggered Deploy

6 changes: 5 additions & 1 deletion .prow/helm/admiral.sh
Original file line number Diff line number Diff line change
Expand Up @@ -129,14 +129,18 @@ multiplex_helmfile() {
ctx=$(yq -r '(.workspaces[] | select(.name == "'"${workspace}"'")).kubectl.context' "${RUNTIME_DATA_FILE}")
kube_contexts=$(kubectl config view | yq -r '.contexts[].name')
cluster_name=$(yq -r '(.workspaces[] | select(.name == "'"${workspace}"'")).kubectl.cluster_name' "${RUNTIME_DATA_FILE}")
registry_name=$(yq -r '(.workspaces[] | select(.name == "'"${workspace}"'")).registry_name' "${RUNTIME_DATA_FILE}")
# if we pass a FULL_IMAGE_NAME, use it. Otherwise, use the default
FULL_IMAGE_NAME="${FULL_IMAGE_NAME:-${registry_name}/circonus-kubernetes-agent:latest}"


# ensure workspace log dir exists
mkdir -p "${LOG_DIR}/${workspace}"
# ensure expected context is inside of kubeconfig
if [[ "${kube_contexts}" == *"${ctx}"* ]]; then
# background a task that starts helmfile and notifies of completion in the workspace command log
# shellcheck disable=SC2068
( (HTTPS_PROXY="localhost:${port_number}" CLUSTER_NAME="${cluster_name}" helmfile ${@} --kube-context "${ctx}" &> "${LOG_DIR}/${workspace}/helmfile_${command}.log") \
( (HTTPS_PROXY="localhost:${port_number}" CLUSTER_NAME="${cluster_name}" FULL_IMAGE_NAME="${FULL_IMAGE_NAME}" helmfile ${@} --kube-context "${ctx}" &> "${LOG_DIR}/${workspace}/helmfile_${command}.log") \
&& (echo "Helmfile ${command} complete!" > "${LOG_DIR}/${workspace}/helmfile_${command}.log") ) &
else
echo "[ERROR] Couldn't find ${ctx} in kubectl config" | tee "${LOG_DIR}/${workspace}/helmfile_${command}.log"
Expand Down
23 changes: 23 additions & 0 deletions .prow/helm/charts/kube-dns-metrics/.helmignore
Original file line number Diff line number Diff line change
@@ -0,0 +1,23 @@
# Patterns to ignore when building packages.
# This supports shell glob matching, relative path matching, and
# negation (prefixed with !). Only one pattern per line.
.DS_Store
# Common VCS dirs
.git/
.gitignore
.bzr/
.bzrignore
.hg/
.hgignore
.svn/
# Common backup files
*.swp
*.bak
*.tmp
*.orig
*~
# Various IDEs
.project
.idea/
*.tmproj
.vscode/
24 changes: 24 additions & 0 deletions .prow/helm/charts/kube-dns-metrics/Chart.yaml
Original file line number Diff line number Diff line change
@@ -0,0 +1,24 @@
apiVersion: v2
name: kube-dns-metrics
description: A Helm chart for Kubernetes

# A chart can be either an 'application' or a 'library' chart.
#
# Application charts are a collection of templates that can be packaged into versioned archives
# to be deployed.
#
# Library charts provide useful utilities or functions for the chart developer. They're included as
# a dependency of application charts to inject those utilities and functions into the rendering
# pipeline. Library charts do not define any templates and therefore cannot be deployed.
type: application

# This is the chart version. This version number should be incremented each time you make changes
# to the chart and its templates, including the app version.
# Versions are expected to follow Semantic Versioning (https://semver.org/)
version: 0.1.0

# This is the version number of the application being deployed. This version number should be
# incremented each time you make changes to the application. Versions are not expected to
# follow Semantic Versioning. They should reflect the version the application is using.
# It is recommended to use it with quotes.
appVersion: "1.16.0"
5 changes: 5 additions & 0 deletions .prow/helm/charts/kube-dns-metrics/templates/NOTES.txt
Original file line number Diff line number Diff line change
@@ -0,0 +1,5 @@
1. Get the application URL by running these commands:
export POD_NAME=$(kubectl get pods --namespace {{ .Release.Namespace }} -l "app.kubernetes.io/name={{ include "kube-dns-metrics.name" . }},app.kubernetes.io/instance={{ .Release.Name }}" -o jsonpath="{.items[0].metadata.name}")
export CONTAINER_PORT=$(kubectl get pod --namespace {{ .Release.Namespace }} $POD_NAME -o jsonpath="{.spec.containers[0].ports[0].containerPort}")
echo "Visit http://127.0.0.1:8080 to use your application"
kubectl --namespace {{ .Release.Namespace }} port-forward $POD_NAME 8080:$CONTAINER_PORT
52 changes: 52 additions & 0 deletions .prow/helm/charts/kube-dns-metrics/templates/_helpers.tpl
Original file line number Diff line number Diff line change
@@ -0,0 +1,52 @@
{{/*
Expand the name of the chart.
*/}}
{{- define "kube-dns-metrics.name" -}}
{{- default .Chart.Name .Values.nameOverride | trunc 63 | trimSuffix "-" }}
{{- end }}

{{/*
Create a default fully qualified app name.
We truncate at 63 chars because some Kubernetes name fields are limited to this (by the DNS naming spec).
If release name contains chart name it will be used as a full name.
*/}}
{{- define "kube-dns-metrics.fullname" -}}
{{- if .Values.fullnameOverride }}
{{- .Values.fullnameOverride | trunc 63 | trimSuffix "-" }}
{{- else }}
{{- $name := default .Chart.Name .Values.nameOverride }}
{{- if contains $name .Release.Name }}
{{- .Release.Name | trunc 63 | trimSuffix "-" }}
{{- else }}
{{- printf "%s-%s" .Release.Name $name | trunc 63 | trimSuffix "-" }}
{{- end }}
{{- end }}
{{- end }}

{{/*
Create chart name and version as used by the chart label.
*/}}
{{- define "kube-dns-metrics.chart" -}}
{{- printf "%s-%s" .Chart.Name .Chart.Version | replace "+" "_" | trunc 63 | trimSuffix "-" }}
{{- end }}

{{/*
Common labels
*/}}
{{- define "kube-dns-metrics.labels" -}}
helm.sh/chart: {{ include "kube-dns-metrics.chart" . }}
{{ include "kube-dns-metrics.selectorLabels" . }}
{{- if .Chart.AppVersion }}
app.kubernetes.io/version: {{ .Chart.AppVersion | quote }}
{{- end }}
app.kubernetes.io/managed-by: {{ .Release.Service }}
{{- end }}

{{/*
Selector labels
*/}}
{{- define "kube-dns-metrics.selectorLabels" -}}
app.kubernetes.io/name: {{ include "kube-dns-metrics.name" . }}
app.kubernetes.io/instance: {{ .Release.Name }}
{{- end }}

19 changes: 19 additions & 0 deletions .prow/helm/charts/kube-dns-metrics/templates/service.yaml
Original file line number Diff line number Diff line change
@@ -0,0 +1,19 @@
---
apiVersion: v1
kind: Service
metadata:
name: {{ include "kube-dns-metrics.fullname" . }}
labels:
{{- include "kube-dns-metrics.labels" . | nindent 4 }}
annotations:
prometheus.io/port: "{{ .Values.service.port }}"
prometheus.io/scrape: "true"
spec:
type: {{ .Values.service.type }}
ports:
- name: http-metrics-kube-dns
port: {{ .Values.service.port }}
protocol: TCP
targetPort: {{ .Values.service.port }}
selector:
k8s-app: kube-dns
Original file line number Diff line number Diff line change
@@ -0,0 +1,15 @@
apiVersion: v1
kind: Pod
metadata:
name: "{{ include "kube-dns-metrics.fullname" . }}-test-connection"
labels:
{{- include "kube-dns-metrics.labels" . | nindent 4 }}
annotations:
"helm.sh/hook": test
spec:
containers:
- name: wget
image: busybox
command: ['wget']
args: ['{{ include "kube-dns-metrics.fullname" . }}:{{ .Values.service.port }}/metrics']
restartPolicy: Never
4 changes: 4 additions & 0 deletions .prow/helm/charts/kube-dns-metrics/values.yaml
Original file line number Diff line number Diff line change
@@ -0,0 +1,4 @@
service:
type: ClusterIP
port: 10055

6 changes: 3 additions & 3 deletions .prow/helm/config/circonus-kubernetes-agent/secrets.yaml
Original file line number Diff line number Diff line change
@@ -1,13 +1,13 @@
circonus_api_key: ENC[AES256_GCM,data:VeWoAT5uvTcdT+6IsqRAviPj0Ag2CfWN2OUTUPCXLL1a9Jtm,iv:rKlICA/p7Qmt/iPZd7rrudE8vb/Tr5ZtM7gCnZNtOOM=,tag:LiVCG3EqjQZnYQKlihCF8Q==,type:str]
circonus_api_key: ENC[AES256_GCM,data:gEINr7R9hupERtQfG7pXoctawFXxmUiZgPGo615lvvUsErCn,iv:NV5kpe6lwXAjtpoq+d+Cp+x6um+k2HFiwcu6H11VWc4=,tag:89MpyyuwGW+J+uKrYOm0jA==,type:str]
contact_email: ENC[AES256_GCM,data:I+s1NW7NGF3PnDex6a6lwUXOj+U6SZLLKjUx8PY=,iv:yb5wF77r/UNRn4uJ4vsmOlm+jgrPMXk5zoM2948dHJ0=,tag:SBpO6IHyXLANncvnk/POxQ==,type:str]
sops:
kms: []
gcp_kms: []
azure_kv: []
hc_vault: []
age: []
lastmodified: "2022-10-04T01:06:08Z"
mac: ENC[AES256_GCM,data:ZXUgBlq+3mVnYCoBWNOcXgac451jM+Oh0xzhhpkao1PnzG00FkHVmL54TBw6ksAllJau25toTNOFe0p1TFt0dUTSYdodPQehxsqTGpYF+8J3mvLv4gJ+Z4t0smmd3L2mJgGJSmW7fPYOSKhARtFFNjM9jk0J1VI7fzZQ859np0w=,iv:ejphILKM0/cFRrO6Fiie8gcvU0tmAypK7TPl5XttXkg=,tag:tueLUtDbIgdY3KcmBjQt8Q==,type:str]
lastmodified: "2022-10-07T23:15:35Z"
mac: ENC[AES256_GCM,data:fHHKZ6QRJz7Yk6Bi/QkYFPA7ZGMdTeiINf+QSm7Zvsll+S3PiaVM85G1iCmj9INWBsC0d9K9VR3la0qtOyTSbXytpiTgZzzEcUgJUBwoiiXYzTSj0vSe1LuKMXflAUgKlOnwcVjwX8LEem2aoO9LIGp/dfDwryWXwD9/3XX+xeQ=,iv:swYu+RL1m/3S/rvETvtwECfWndu6SvGwueR37eP638U=,tag:21TSzISfMuJOUQ50AKvegw==,type:str]
pgp:
- created_at: "2022-10-03T23:53:23Z"
enc: |
Expand Down
65 changes: 65 additions & 0 deletions .prow/helm/config/circonus-kubernetes-agent/values.yaml
Original file line number Diff line number Diff line change
@@ -1,4 +1,69 @@
image: ""
kubernetes_name: ""
broker_cid: "/broker/35"
dns:
port: "10055"
metric_filters: |
{
"metric_filters": [
["allow", "^.+$", "tags", "and(collector:dynamic)", "NO_LOCAL_FILTER dynamically collected metrics"],
["allow", "^(Disk|Memory|PID)Pressure$", "node status"],
["allow", "^(container|node|pod)_.*$", "node metrics k8s v1.18+"],
["allow", "^(kube_)?pod_container_status_(running|terminated|waiting|ready)(_count)?$", "containers"],
["allow", "^(kube_)?pod_container_status_(terminated|waiting)_reason(_count)?$", "containers health"],
["allow", "^(kube_)?pod_init_container_status_(terminated|waiting)_reason(_count)?$", "init containers health"],
["allow", "^(kube_)?pod_status_(ready|scheduled)(_count)?$", "tags", "and(condition:true)", "pods"],
["allow", "^(kube_)?pod_status_phase(_count)?$", "tags", "and(or(phase:Running,phase:Pending,phase:Failed,phase:Succeeded))", "pods"],
["allow", "^(node|kubelet_running_pod_count|Ready)$", "nodes"],
["allow", "^(pod|node)_cpu_usage_seconds_total$", "utilization"],
["allow", "^(pod|node)_memory_working_set_bytes$", "utilization"],
["allow", "^(used|capacity)$", "tags", "and(or(units:bytes,units:percent),or(resource:memory,resource:fs,volume_name:*),not(container_name:*),not(sys_container:*))", "utilization"],
["allow", "^NetworkUnavailable$", "node status"],
["allow", "^[rt]x$", "tags", "and(resource:network,or(units:bytes,units:errors),not(container_name:*),not(sys_container:*))", "utilization"],
["allow", "^apiserver_request_total$", "tags", "and(or(code:5*,code:4*))", "api req errors"],
["allow", "^authenticated_user_requests$", "api auth"],
["allow", "^authentication_attempts$", "api auth health"],
["allow", "^cadvisor.*$", "cadvisor"],
["allow", "^capacity_.*$", "node capacity"],
["allow", "^collect_.*$", "agent collection stats"],
["allow", "^coredns*", "dns health"],
["allow", "^coredns_(dns|forward)_request_(count_total|duration_seconds_avg)$", "dns health"],
["allow", "^coredns_(dns|forward)_response_rcode_count_total$", "dns health"],
["allow", "^daemonset_scheduled_delta$", "health"],
["allow", "^deployment_generation_delta$", "health"],
["allow", "^events$", "events"],
["allow", "^kube_(service_labels|deployment_labels|pod_container_info|pod_deleted)$", "ksm inventory"],
["allow", "^kube_(service|deployment)_labels$", "ksm inventory"],
["allow", "^kube_daemonset_status_(current|desired)_number_scheduled$", "health"],
["allow", "^kube_deployment_(created|spec_replicas)$", "deployments"],
["allow", "^kube_deployment_(metadata|status_observed)_generation$", "health"],
["allow", "^kube_deployment_status_(replicas|replicas_updated|replicas_available|replicas_unavailable)$", "deployments"],
["allow", "^kube_deployment_status_replicas_unavailable$", "deployments"],
["allow", "^kube_hpa_(spec_max|status_current)_replicas$", "scale"],
["allow", "^kube_job_status_failed$", "health"],
["allow", "^kube_namespace_status_phase$", "namespaces"],
["allow", "^kube_namespace_status_phase$", "tags", "and(or(phase:Active,phase:Terminating))", "namespaces"],
["allow", "^kube_node_spec_unschedulable$", "node status"],
["allow", "^kube_node_status_allocatable$", "node status"],
["allow", "^kube_node_status_condition$", "node status health"],
["allow", "^kube_persistentvolume_status_phase$", "health"],
["allow", "^kube_pod_info$", "pods"],
["allow", "^kube_pod_start_time$", "pods"],
["allow", "^kube_pod_status_condition$", "pods"],
["allow", "^kube_statefulset_status_(replicas|replicas_ready)$", "health"],
["allow", "^kubedns*","dns health"],
["allow", "^kubelet_.*$", "node metrics k8s v1.18+"],
["allow", "^machine_.*$", "node metrics k8s v1.18+"],
["allow", "^pod_container_status$", "containers"],
["allow", "^pod_container_status_(terminated|waiting)_reason(_count)?$", "containers health"],
["allow", "^pod_init_container_status_(terminated|waiting)_reason(_count)?$", "init containers health"],
["allow", "^pod_status_(ready|scheduled)$", "pods"],
["allow", "^pod_status_phase$", "pods"],
["allow", "^prober_.*$", "node metrics/probes k8s v1.18+"],
["allow", "^resource_(request|limit)$", "resources"],
["allow", "^statefulset_replica_delta$", "health"],
["allow", "^usage(Milli|Nano)Cores$", "tags", "and(not(container_name:*),not(sys_container:*))", "utilization"],
["allow", "^utilization$", "utilization health"],
["deny", "^.+$", "all other metrics"]
]
}
4 changes: 4 additions & 0 deletions .prow/helm/helmfile.d/00-kube-dns-metrics.yaml
Original file line number Diff line number Diff line change
@@ -0,0 +1,4 @@
releases:
- name: kube-dns-metrics
namespace: kube-system
chart: ../charts/kube-dns-metrics
1 change: 1 addition & 0 deletions .prow/helm/helmfile.d/99-kubernetes-agent.yaml
Original file line number Diff line number Diff line change
Expand Up @@ -5,5 +5,6 @@ releases:
values:
- ../config/{{`{{ .Release.Name }}`}}/values.yaml
- kubernetes_name: {{ requiredEnv "CLUSTER_NAME" }}
- image: {{ requiredEnv "FULL_IMAGE_NAME" }}
secrets:
- ../config/{{`{{ .Release.Name }}`}}/secrets.yaml
Loading