From 2f0da405f92cee5db759d64b488628c09290f418 Mon Sep 17 00:00:00 2001 From: Gus Ralph Date: Mon, 4 Nov 2024 13:34:30 +0000 Subject: [PATCH 1/6] Added some validation to handle bad URLs --- internal/runner/executer.go | 11 +++++++++-- 1 file changed, 9 insertions(+), 2 deletions(-) diff --git a/internal/runner/executer.go b/internal/runner/executer.go index 0a6cf515..fbc74dd8 100644 --- a/internal/runner/executer.go +++ b/internal/runner/executer.go @@ -23,12 +23,19 @@ func (r *Runner) ExecuteCrawling() error { wg := sizedwaitgroup.New(r.options.Parallelism) for _, input := range inputs { + if input == "" { + gologger.Warning().Msgf("Skipping empty input") + continue + } if !r.networkpolicy.Validate(input) { gologger.Info().Msgf("Skipping excluded host %s", input) continue } wg.Add() input = addSchemeIfNotExists(input) + if r.crawler == nil { + return errorutil.New("crawler is not initialized") + } go func(input string) { defer wg.Done() @@ -36,8 +43,8 @@ func (r *Runner) ExecuteCrawling() error { gologger.Warning().Msgf("Could not crawl %s: %s", input, err) } r.state.InFlightUrls.Delete(input) - }(input) - } + }(input) + } wg.Wait() return nil } From d21e81d59fe508a60a8cd732782f93a400c1d30f Mon Sep 17 00:00:00 2001 From: chivato <61525295+SecGus@users.noreply.github.com> Date: Thu, 7 Nov 2024 14:51:02 +0000 Subject: [PATCH 2/6] Update options.go --- internal/runner/options.go | 3 +++ 1 file changed, 3 insertions(+) diff --git a/internal/runner/options.go b/internal/runner/options.go index 61805e16..51afa92b 100644 --- a/internal/runner/options.go +++ b/internal/runner/options.go @@ -86,6 +86,9 @@ func readCustomFormConfig(formConfig string) error { func (r *Runner) parseInputs() []string { values := make(map[string]struct{}) for _, url := range r.options.URLs { + if input == "" { + continue + } value := normalizeInput(url) if _, ok := values[value]; !ok { values[value] = struct{}{} From e8d27217a2acae4f2a4ea9201501a613bc76fbcb Mon Sep 17 00:00:00 2001 From: chivato <61525295+SecGus@users.noreply.github.com> Date: Thu, 7 Nov 2024 14:51:28 +0000 Subject: [PATCH 3/6] Update executer.go --- internal/runner/executer.go | 4 ---- 1 file changed, 4 deletions(-) diff --git a/internal/runner/executer.go b/internal/runner/executer.go index fbc74dd8..de653364 100644 --- a/internal/runner/executer.go +++ b/internal/runner/executer.go @@ -23,10 +23,6 @@ func (r *Runner) ExecuteCrawling() error { wg := sizedwaitgroup.New(r.options.Parallelism) for _, input := range inputs { - if input == "" { - gologger.Warning().Msgf("Skipping empty input") - continue - } if !r.networkpolicy.Validate(input) { gologger.Info().Msgf("Skipping excluded host %s", input) continue From 07e8cb5762fe8c709d6f32b19b170bc6a2365cdf Mon Sep 17 00:00:00 2001 From: chivato <61525295+SecGus@users.noreply.github.com> Date: Thu, 7 Nov 2024 14:52:01 +0000 Subject: [PATCH 4/6] Update executer.go --- internal/runner/executer.go | 3 --- 1 file changed, 3 deletions(-) diff --git a/internal/runner/executer.go b/internal/runner/executer.go index de653364..61fdcd03 100644 --- a/internal/runner/executer.go +++ b/internal/runner/executer.go @@ -29,9 +29,6 @@ func (r *Runner) ExecuteCrawling() error { } wg.Add() input = addSchemeIfNotExists(input) - if r.crawler == nil { - return errorutil.New("crawler is not initialized") - } go func(input string) { defer wg.Done() From bdbb4df9d5316647d7b54ca0eeda504237d39c2f Mon Sep 17 00:00:00 2001 From: chivato <61525295+SecGus@users.noreply.github.com> Date: Thu, 7 Nov 2024 14:52:42 +0000 Subject: [PATCH 5/6] Update executer.go --- internal/runner/executer.go | 3 +++ 1 file changed, 3 insertions(+) diff --git a/internal/runner/executer.go b/internal/runner/executer.go index 61fdcd03..7dff7b8e 100644 --- a/internal/runner/executer.go +++ b/internal/runner/executer.go @@ -11,6 +11,9 @@ import ( // ExecuteCrawling executes the crawling main loop func (r *Runner) ExecuteCrawling() error { + if r.crawler == nil { + return errorutil.New("crawler is not initialized") + } inputs := r.parseInputs() if len(inputs) == 0 { return errorutil.New("no input provided for crawling") From 8cab68ee0278b387bcc8ad59ee0be18dcefa07b5 Mon Sep 17 00:00:00 2001 From: chivato <61525295+SecGus@users.noreply.github.com> Date: Thu, 7 Nov 2024 14:57:57 +0000 Subject: [PATCH 6/6] Update options.go --- internal/runner/options.go | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/internal/runner/options.go b/internal/runner/options.go index 51afa92b..b796707d 100644 --- a/internal/runner/options.go +++ b/internal/runner/options.go @@ -86,7 +86,7 @@ func readCustomFormConfig(formConfig string) error { func (r *Runner) parseInputs() []string { values := make(map[string]struct{}) for _, url := range r.options.URLs { - if input == "" { + if url == "" { continue } value := normalizeInput(url)