Skip to content

Commit

Permalink
fix(prometheus): do not restart job on template changes (#26)
Browse files Browse the repository at this point in the history
fix(prometheus): do not restart job on template changes

Signed-off-by: Bruce Becker <[email protected]>

ci: bump vault and nomad versions

Signed-off-by: Bruce Becker <[email protected]>

ci: debug some setup tasks

ci: debug ci step

Signed-off-by: Bruce Becker <[email protected]>

ci: move checkout to top of the list

Signed-off-by: Bruce Becker <[email protected]>

ci: test planning against my actual cluster

Signed-off-by: Bruce Becker <[email protected]>

ci: test planning against my actual cluster

Signed-off-by: Bruce Becker <[email protected]>

ci: check connectivity first

Signed-off-by: Bruce Becker <[email protected]>

ci: flatten stages to keep tailscale in the mix

Signed-off-by: Bruce Becker <[email protected]>

ci: change nomad to executable

Signed-off-by: Bruce Becker <[email protected]>

ci: add outputs to the job

Signed-off-by: Bruce Becker <[email protected]>

ci: fix name of dependent job

Signed-off-by: Bruce Becker <[email protected]>

feat(prometheus): request a host volume for tsdb data

Signed-off-by: Bruce Becker <[email protected]>

ci: use github environment instead of ci commands

Signed-off-by: Bruce Becker <[email protected]>

fix(prometheus): set dynamic port

Signed-off-by: Bruce Becker <[email protected]>

fix(prometheus): align health check url

Signed-off-by: Bruce Becker <[email protected]>

ci: temporarily remove conditional

Signed-off-by: Bruce Becker <[email protected]>

ci: check variables

Signed-off-by: Bruce Becker <[email protected]>

ci: use name of the step instead of the job

Signed-off-by: Bruce Becker <[email protected]>

---------

Signed-off-by: Bruce Becker <[email protected]>
  • Loading branch information
brucellino authored Apr 23, 2023
1 parent 5e46cb5 commit d660fd0
Show file tree
Hide file tree
Showing 2 changed files with 58 additions and 25 deletions.
40 changes: 27 additions & 13 deletions .github/workflows/validate.yml
Original file line number Diff line number Diff line change
Expand Up @@ -7,31 +7,45 @@ on:
- '**.nomad'

jobs:
tailscale:
changedfiles:
runs-on: ubuntu-latest
outputs:
all: ${{ steps.changes.outputs.all }}
nomad: ${{ steps.changes.outputs.nomad }}
steps:
- name: checkout repo
uses: actions/checkout@v3
- name: Get Changed Files
id: changes
# Set outputs
run: |
echo "NOMAD_FILES_CHANGED=$(git diff --name-only --diff-filter=ACMRT ${{ github.event.pull_request.base.sha }} ${{ github.sha }} | grep .nomad | xargs) >> $GITHUB_OUTPUT"
nomad-plan:
needs: changedfiles
env:
NOMAD_ADDR: ${{ secrets.NOMAD_ADDR }}
runs-on: ubuntu-22.04
steps:
- name: check
run: echo "${{ steps.changes.outputs.NOMAD_FILES_CHANGED }}"
- name: Setup Tailscale
uses: tailscale/github-action@main
with:
authkey: ${{ secrets.TAILSCALE_AUTHKEY }}
nomad-plan:
needs: tailscale
runs-on: ubuntu-22.04
steps:
- name: check connectivity
run: ifconfig ; host sense.orca-ordinal.ts.net
- name: Checkout change
uses: actions/checkout@v3
with:
fetch-depth: 0
- name: Get Vault
run: |
mkdir -p bin ; curl -fSL https://releases.hashicorp.com/vault/1.12.3/vault_1.12.3_linux_amd64.zip | gunzip -> bin/vault
mkdir -p bin ; curl -fSL https://releases.hashicorp.com/vault/1.13.1/vault_1.13.1_linux_amd64.zip | gunzip -> bin/vault
- name: Get token
run: chmod u+x bin/vault ; bin/vault -version
- name: Get Nomad
run: |
mkdir -p bin ; curl -fSL https://releases.hashicorp.com/nomad/1.4.4/nomad_1.4.4_linux_amd64.zip | gunzip -> bin/nomad
- name: Checkout change
uses: actions/checkout@v3
with:
fetch-depth: 0

mkdir -p bin ; curl -fSL https://releases.hashicorp.com/nomad/1.5.3/nomad_1.5.3_linux_amd64.zip | gunzip -> bin/nomad ; ls -lht bin
- name: Plan the job
run: chmod u+x bin/nomad ; bin/nomad plan ansible.nomad
run: chmod a+x bin/nomad ; for file in ${{steps.changes.outputs.nomad}} ; do bin/nomad plan ${file} ; done
43 changes: 31 additions & 12 deletions prometheus.nomad
Original file line number Diff line number Diff line change
Expand Up @@ -21,16 +21,18 @@ job "prometheus" {

group "monitoring" {
count = 1

volume "data" {
type = "host"
read_only = false
source = "scratch"
}
network {
port "prometheus_ui" {
static = 9090
}
port "prometheus_ui" {}
}

restart {
attempts = 2
interval = "5m"
attempts = 1
interval = "7m"
delay = "1m"
mode = "fail"
}
Expand All @@ -49,8 +51,13 @@ job "prometheus" {
}
}
template {
change_mode = "restart"
change_mode = "signal"
change_signal = "SIGHUP"
destination = "local/prometheus.yml"
wait {
min = "10s"
max = "20s"
}
data = <<EOH
---
global:
Expand Down Expand Up @@ -107,10 +114,14 @@ EOH
}

template {
change_mode = "restart"
change_mode = "noop"
destination = "local/node-rules.yml"
left_delimiter = "[["
right_delimiter = "]]"
wait {
min = "10s"
max = "20s"
}
data = <<EOH
---
groups:
Expand Down Expand Up @@ -181,10 +192,18 @@ EOH
command = "local/prometheus-2.40.2.linux-arm64/prometheus"
args = [
"--config.file=local/prometheus.yml",
"--web.external-url=http://0.0.0.0:9090/prometheus"
]
"--storage.tsdb.retention.size=1GB",
"--storage.tsdb.retention.time=7d",
"--web.listen-address=:${NOMAD_PORT_prometheus_ui}",
"--web.enable-admin-api",
"--storage.tsdb.path=data"
]
}
volume_mount {
volume = "data"
destination = "data"
read_only = false
}

resources {
cpu = 250
memory = 400
Expand All @@ -198,7 +217,7 @@ EOH
check {
name = "prometheus_ui port alive"
type = "http"
path = "prometheus/-/healthy"
path = "-/healthy"
interval = "10s"
timeout = "2s"
}
Expand Down

0 comments on commit d660fd0

Please sign in to comment.