diff --git a/assess.py b/assess.py index 2d43366..ba9e532 100644 --- a/assess.py +++ b/assess.py @@ -185,7 +185,7 @@ def show_options(option: str, groupdir: str, operation: str, logger): os.system(f'ls {groupdir}/outs/') else: os.system(f'ls {groupdir}/errs/') - else: + elif option == 'labels': logger.info('Detecting labels from previous runs:') labels = glob.glob(f'{args.workdir}/groups/{args.groupID}/proj_codes*') for l in labels: @@ -193,6 +193,8 @@ def show_options(option: str, groupdir: str, operation: str, logger): if pcode == '1': pcode = 'main' logger.info(f'{format_str(pcode,20)} - {l}') + else: + logger.info(f'{option} not accepted - use "jobids" or "labels"') def cleanup(cleantype: str, groupdir: str, logger): """Remove older versions of project code files, error or output logs. Clear directories.""" @@ -343,7 +345,7 @@ def assess_main(args): parser.add_argument('-j','--jobid', dest='jobID', help='Identifier of job to inspect') parser.add_argument('-p','--phase', dest='phase', default='validate', help='Pipeline phase to inspect') - parser.add_argument('-s','--show-opts', dest='show_opts', help='Show options for jobids, repeat label') + parser.add_argument('-s','--show-opts', dest='show_opts', help='Show options for jobids, labels') parser.add_argument('-r','--repeat_label', dest='repeat_label', default=None, help='Save a selection of codes which failed on a given error - input a repeat id.') parser.add_argument('-i','--inspect', dest='inspect', help='Inspect error/output of a given type/label') diff --git a/docs/source/assess-overview.rst b/docs/source/assess-overview.rst index a28bc25..97b22dc 100644 --- a/docs/source/assess-overview.rst +++ b/docs/source/assess-overview.rst @@ -1,2 +1,43 @@ Assessor Tool -============= \ No newline at end of file +============= + +The assessor script ```assess.py``` is an all-purpose pipeline checking tool which can be used to assess: + - The current status of all datasets within a given group in the pipeline (which phase each dataset currently sits in) + - The errors/outputs associated with previous job runs. + +1. Overall Progress of the Pipeline +-------------------------------- + +To see the general status of the pipeline for a given group: +:: +python assess.py progress +:: + +An example use case is to write out all datasets that require scanning to a new label (repeat_label): +:: +python assess.py progress -p scan -r -W +:: + +The last flag ```-W``` is required when writing an output file from this program, otherwise the program will dryrun and produce no files. + +2. Checking errors +------------------ +Check what repeat labels are available already using +:: +python assess.py errors -s labels +:: + +Show what jobs have previously run +:: +python assess.py errors -s jobids +:: + +For showing all errors from a previous job run +:: +python assess.py errors -j +:: + +For selecting a specific type of error to investigate (-i) and examine the full log for each example (-E) +:: +python assess.py test errors -j -i "type_of_error" -E +:: \ No newline at end of file diff --git a/requirements.txt b/requirements.txt index c203483..71a09a0 100644 --- a/requirements.txt +++ b/requirements.txt @@ -22,6 +22,7 @@ pandas==2.1.1 python-dateutil==2.8.2 pytz==2023.3.post1 requests==2.31.0 +scipy==1.10.1 six==1.16.0 tzdata==2023.3 ujson==5.8.0