From d660fd05e3e504d716303b3efe88da2aef48db3f Mon Sep 17 00:00:00 2001 From: Bruce Becker Date: Sun, 23 Apr 2023 12:56:59 +0200 Subject: [PATCH] fix(prometheus): do not restart job on template changes (#26) fix(prometheus): do not restart job on template changes Signed-off-by: Bruce Becker ci: bump vault and nomad versions Signed-off-by: Bruce Becker ci: debug some setup tasks ci: debug ci step Signed-off-by: Bruce Becker ci: move checkout to top of the list Signed-off-by: Bruce Becker ci: test planning against my actual cluster Signed-off-by: Bruce Becker ci: test planning against my actual cluster Signed-off-by: Bruce Becker ci: check connectivity first Signed-off-by: Bruce Becker ci: flatten stages to keep tailscale in the mix Signed-off-by: Bruce Becker ci: change nomad to executable Signed-off-by: Bruce Becker ci: add outputs to the job Signed-off-by: Bruce Becker ci: fix name of dependent job Signed-off-by: Bruce Becker feat(prometheus): request a host volume for tsdb data Signed-off-by: Bruce Becker ci: use github environment instead of ci commands Signed-off-by: Bruce Becker fix(prometheus): set dynamic port Signed-off-by: Bruce Becker fix(prometheus): align health check url Signed-off-by: Bruce Becker ci: temporarily remove conditional Signed-off-by: Bruce Becker ci: check variables Signed-off-by: Bruce Becker ci: use name of the step instead of the job Signed-off-by: Bruce Becker --------- Signed-off-by: Bruce Becker --- .github/workflows/validate.yml | 40 +++++++++++++++++++++---------- prometheus.nomad | 43 ++++++++++++++++++++++++---------- 2 files changed, 58 insertions(+), 25 deletions(-) diff --git a/.github/workflows/validate.yml b/.github/workflows/validate.yml index 4fc7273..17d2b45 100644 --- a/.github/workflows/validate.yml +++ b/.github/workflows/validate.yml @@ -7,31 +7,45 @@ on: - '**.nomad' jobs: - tailscale: + changedfiles: + runs-on: ubuntu-latest + outputs: + all: ${{ steps.changes.outputs.all }} + nomad: ${{ steps.changes.outputs.nomad }} + steps: + - name: checkout repo + uses: actions/checkout@v3 + - name: Get Changed Files + id: changes + # Set outputs + run: | + echo "NOMAD_FILES_CHANGED=$(git diff --name-only --diff-filter=ACMRT ${{ github.event.pull_request.base.sha }} ${{ github.sha }} | grep .nomad | xargs) >> $GITHUB_OUTPUT" + + nomad-plan: + needs: changedfiles env: NOMAD_ADDR: ${{ secrets.NOMAD_ADDR }} runs-on: ubuntu-22.04 steps: + - name: check + run: echo "${{ steps.changes.outputs.NOMAD_FILES_CHANGED }}" - name: Setup Tailscale uses: tailscale/github-action@main with: authkey: ${{ secrets.TAILSCALE_AUTHKEY }} - nomad-plan: - needs: tailscale - runs-on: ubuntu-22.04 - steps: + - name: check connectivity + run: ifconfig ; host sense.orca-ordinal.ts.net + - name: Checkout change + uses: actions/checkout@v3 + with: + fetch-depth: 0 - name: Get Vault run: | - mkdir -p bin ; curl -fSL https://releases.hashicorp.com/vault/1.12.3/vault_1.12.3_linux_amd64.zip | gunzip -> bin/vault + mkdir -p bin ; curl -fSL https://releases.hashicorp.com/vault/1.13.1/vault_1.13.1_linux_amd64.zip | gunzip -> bin/vault - name: Get token run: chmod u+x bin/vault ; bin/vault -version - name: Get Nomad run: | - mkdir -p bin ; curl -fSL https://releases.hashicorp.com/nomad/1.4.4/nomad_1.4.4_linux_amd64.zip | gunzip -> bin/nomad - - name: Checkout change - uses: actions/checkout@v3 - with: - fetch-depth: 0 - + mkdir -p bin ; curl -fSL https://releases.hashicorp.com/nomad/1.5.3/nomad_1.5.3_linux_amd64.zip | gunzip -> bin/nomad ; ls -lht bin - name: Plan the job - run: chmod u+x bin/nomad ; bin/nomad plan ansible.nomad + run: chmod a+x bin/nomad ; for file in ${{steps.changes.outputs.nomad}} ; do bin/nomad plan ${file} ; done diff --git a/prometheus.nomad b/prometheus.nomad index c77553d..4d056bb 100644 --- a/prometheus.nomad +++ b/prometheus.nomad @@ -21,16 +21,18 @@ job "prometheus" { group "monitoring" { count = 1 - + volume "data" { + type = "host" + read_only = false + source = "scratch" + } network { - port "prometheus_ui" { - static = 9090 - } + port "prometheus_ui" {} } restart { - attempts = 2 - interval = "5m" + attempts = 1 + interval = "7m" delay = "1m" mode = "fail" } @@ -49,8 +51,13 @@ job "prometheus" { } } template { - change_mode = "restart" + change_mode = "signal" + change_signal = "SIGHUP" destination = "local/prometheus.yml" + wait { + min = "10s" + max = "20s" + } data = <