Skip to content

Commit

Permalink
Merge pull request #15 from cedadev/dwest77-cci
Browse files Browse the repository at this point in the history
Version 2.4.0: Poetrization of dependencies and package installation.
  • Loading branch information
dwest77a authored Oct 29, 2024
2 parents d365338 + d4b434e commit 3aa3dca
Show file tree
Hide file tree
Showing 23 changed files with 781 additions and 133 deletions.
28 changes: 28 additions & 0 deletions .github/workflows/test.yml
Original file line number Diff line number Diff line change
@@ -0,0 +1,28 @@
name: Automatic Test

on:
- push
- pull_request

jobs:
build:
runs-on: ubuntu-latest
strategy:
matrix:
python-version: ['3.10','3.11', '3.12']

steps:
- name: Checkout repository
uses: actions/checkout@v4

- name: Set up Python ${{ matrix.python-version }}
uses: actions/setup-python@v5
with:
python-version: ${{ matrix.python-version }}
- name: Install Python dependencies
run: |
python -m pip install --upgrade pip
pip3 install poetry
poetry install
- name: Run tests
run: poetry run pytest
3 changes: 2 additions & 1 deletion .gitignore
Original file line number Diff line number Diff line change
Expand Up @@ -4,4 +4,5 @@
.idea/*
venv/
data/
test_code/
test_code/
.venv/
31 changes: 31 additions & 0 deletions LICENSE
Original file line number Diff line number Diff line change
@@ -0,0 +1,31 @@
BSD 3-Clause License

Copyright (c) 2023-2024, Centre of Environmental Data Analysis Developers,
Scientific and Technical Facilities Council (STFC),
UK Research and Innovation (UKRI).
All rights reserved.

Redistribution and use in source and binary forms, with or without
modification, are permitted provided that the following conditions are met:

1. Redistributions of source code must retain the above copyright notice, this
list of conditions and the following disclaimer.

2. Redistributions in binary form must reproduce the above copyright notice, this
list of conditions and the following disclaimer in the documentation and/or other
materials provided with the distribution.

3. Neither the name of the copyright holder nor the names of its contributors
may be used to endorse or promote products derived from this software without
specific prior written permission.

THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS “AS IS”
AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE ARE
DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT HOLDER OR CONTRIBUTORS BE LIABLE
FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL
DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR
SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER
CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY,
OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE
OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
3 changes: 0 additions & 3 deletions __init__.py

This file was deleted.

5 changes: 4 additions & 1 deletion ceda_elasticsearch_tools/__init__.py
Original file line number Diff line number Diff line change
@@ -1,4 +1,7 @@
from .elasticsearch.ceda_elasticsearch_client import CEDAElasticsearchClient

from .index_tools.base import IndexUpdaterBase
from .index_tools.ceda_client import BulkClient
from .index_tools.ceda_client import BulkClient

from importlib.metadata import version
__version__ = version
2 changes: 0 additions & 2 deletions ceda_elasticsearch_tools/cmdline/__init__.py
Original file line number Diff line number Diff line change
@@ -1,2 +0,0 @@
import pkg_resources
__version__ = pkg_resources.require("ceda_elasticsearch_tools")[0].version
8 changes: 3 additions & 5 deletions ceda_elasticsearch_tools/cmdline/ceda_eo/coverage_test.py
Original file line number Diff line number Diff line change
Expand Up @@ -15,13 +15,11 @@
from docopt import docopt

import simplejson as json
from ceda_elasticsearch_tools.cmdline import __version__
from ceda_elasticsearch_tools import __version__
import os
import re
from ceda_elasticsearch_tools.core.updater import ElasticsearchUpdater, ElasticsearchQuery
from ceda_elasticsearch_tools.core import util


from ceda_elasticsearch_tools.core import utils

def main():
base = os.path.dirname(__file__)
Expand Down Expand Up @@ -71,7 +69,7 @@ def main():

print(f"Group: {group} Total files: {len(file_list)}"
f" Files in: {len(results['True'])} Files out: {len(results['False'])}"
f" Coverage: {util.percent(len(file_list),len(results['True']))}")
f" Coverage: {utils.percent(len(file_list),len(results['True']))}")



Expand Down
2 changes: 1 addition & 1 deletion ceda_elasticsearch_tools/cmdline/fbs_missing_files.py
Original file line number Diff line number Diff line change
Expand Up @@ -35,7 +35,7 @@
import os
from ceda_elasticsearch_tools.core import utils
import subprocess
from ceda_elasticsearch_tools.cmdline import __version__
from ceda_elasticsearch_tools import __version__
from time import sleep
from tqdm import tqdm

Expand Down
4 changes: 2 additions & 2 deletions ceda_elasticsearch_tools/cmdline/nla_sync_es.py
Original file line number Diff line number Diff line change
Expand Up @@ -27,7 +27,7 @@
from docopt import docopt

import requests
import pkg_resources
from ceda_elasticsearch_tools import __version__
from time import sleep
import itertools, sys
from multiprocessing import Process
Expand Down Expand Up @@ -122,7 +122,7 @@ def main():
"""

# Get command line arguments
args = docopt(__doc__, version=pkg_resources.require("ceda_elasticsearch_tools")[0].version)
args = docopt(__doc__, version=__version__)

if not args["--host"]:
host = "jasmin-es1.ceda.ac.uk"
Expand Down
2 changes: 1 addition & 1 deletion ceda_elasticsearch_tools/cmdline/secondary_scripts/md5.py
Original file line number Diff line number Diff line change
Expand Up @@ -32,7 +32,7 @@
from ceda_elasticsearch_tools.core import updater
from datetime import datetime
import os, logging
from ceda_elasticsearch_tools.cmdline import __version__
from ceda_elasticsearch_tools import __version__
import hashlib
import simplejson as json

Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -27,7 +27,7 @@
import os
import simplejson as json
from elasticsearch import Elasticsearch
from ceda_elasticsearch_tools.cmdline import __version__
from ceda_elasticsearch_tools import __version__
import hashlib


Expand Down
4 changes: 2 additions & 2 deletions ceda_elasticsearch_tools/cmdline/update_md5.py
Original file line number Diff line number Diff line change
Expand Up @@ -33,12 +33,12 @@
from ceda_elasticsearch_tools.core import log_reader
from datetime import datetime
import os, logging
from ceda_elasticsearch_tools.cmdline import __version__
from ceda_elasticsearch_tools import __version__
import subprocess
import simplejson as json
from ceda_elasticsearch_tools.core.updater import ElasticsearchUpdater
import math
from ceda_elasticsearch_tools.core.util import ProgressBar
from ceda_elasticsearch_tools.core.utils import ProgressBar


def logger_setup(log_dir):
Expand Down
3 changes: 1 addition & 2 deletions ceda_elasticsearch_tools/core/log_reader.py
Original file line number Diff line number Diff line change
Expand Up @@ -3,10 +3,9 @@
import re
import hashlib
from datetime import datetime
from ceda_elasticsearch_tools.core.utils import get_latest_log

import logging

from ceda_elasticsearch_tools.core.utils import get_latest_log

class SpotMapping(object):
"""
Expand Down
5 changes: 3 additions & 2 deletions ceda_elasticsearch_tools/core/updater.py
Original file line number Diff line number Diff line change
Expand Up @@ -2,10 +2,11 @@
import json
import re
import logging
from log_reader import MD5LogFile
import utils
import hashlib

from ceda_elasticsearch_tools.elasticsearch import CEDAElasticsearchClient
from .log_reader import MD5LogFile
from . import utils

class ElasticsearchQuery(object):
"""
Expand Down
13 changes: 13 additions & 0 deletions ceda_elasticsearch_tools/index_tools/ceda_client.py
Original file line number Diff line number Diff line change
Expand Up @@ -62,6 +62,12 @@ def get_ids(self):
return ids

def upload(self, action, content_list):
"""
Upload from the content list given a specific action.
E.g 'add', 'update' etc.
"""

content_list = self._preprocess_records(content_list)
action_list = self._generate_bulk_operation_body(
content_list,
action=action
Expand Down Expand Up @@ -91,3 +97,10 @@ def add_records(self, records):
self.upload('update', update)
if len(add) > 0:
self.upload('index', add)

def _preprocess_records(self, content_list):
"""
Method to override for inserting perprocessing
to the list of records
"""
return content_list
33 changes: 33 additions & 0 deletions ceda_elasticsearch_tools/tests/conftest.py
Original file line number Diff line number Diff line change
@@ -0,0 +1,33 @@

def pytest_collection_modifyitems(items):

CLASS_ORDER = [
"TestConsistency",
# "TestCedaDirsUpdaterDeposit",
# "TestCedaDirsUpdaterMoles",
# "TestCedaFbiUpdaterDeposit",
# "TestIndexUpdaterBase",
# "TestClusterHealth",
# "TestMainAliases",
# "TestEOQueries",
# "TestEFFQueries",
# "TestOpensearchQueries",
# "TestIndexMapping",
# "SpotMappingTestCase",
# "CedaEOQueries",
# "IndexMappings",
]

sorted_items = items.copy()
# read the class names from default items
class_mapping = {item: item.cls.__name__ for item in items}


# Iteratively move tests of each class to the end of the test queue
for class_ in CLASS_ORDER:
sorted_items = [it for it in sorted_items if class_mapping[it] != class_] + [
it for it in sorted_items if class_mapping[it] == class_
]


items[:] = sorted_items
92 changes: 92 additions & 0 deletions ceda_elasticsearch_tools/tests/test_consistency.py
Original file line number Diff line number Diff line change
@@ -0,0 +1,92 @@

class TestConsistency:

def test_index_tools(self):
from ceda_elasticsearch_tools import IndexUpdaterBase, BulkClient, __version__
from ceda_elasticsearch_tools.index_tools import (
CedaDirs,
CedaEo,
CedaFbi
)

assert 1==1, "Importing index tools successful."

def test_elasticsearch(self):
from ceda_elasticsearch_tools import CEDAElasticsearchClient
from ceda_elasticsearch_tools.elasticsearch.ceda_elasticsearch_client import CA_ROOT

assert 1==1, "Importing elasticsearch successful."

def test_core(self):
from ceda_elasticsearch_tools.core.log_reader import (
SpotMapping,
MD5LogFile,
DepositLog,
)

from ceda_elasticsearch_tools.core.updater import (
ElasticsearchQuery,
IndexFilter,
ElasticsearchUpdater
)

from ceda_elasticsearch_tools.core.utils import (
get_number_of_submitted_lotus_tasks,
percent,
get_latest_log,
list2file_newlines,
ProgressBar
)

assert 1==1, "Importing core successful."

def test_cmdline(self):

from ceda_elasticsearch_tools.cmdline.ceda_eo.coverage_test import main
from ceda_elasticsearch_tools.cmdline.secondary_scripts.md5 import (
logger_setup,
file_md5,
main
)
from ceda_elasticsearch_tools.cmdline.secondary_scripts.nla_sync_lotus_task import (
main,
NLASync
)

from ceda_elasticsearch_tools.cmdline.secondary_scripts.spot_checker import (
es_connection,
main,
make_query,
process_list,
get_args,
dir_exists
)

from ceda_elasticsearch_tools.cmdline.fbs_missing_files import (
submit_jobs_to_lotus,
generate_summary,
create_missing_list,
nolotus,
main,
)

from ceda_elasticsearch_tools.cmdline.nla_sync_es import (
chunk_dict,
chunks,
create_output_dir,
download_data_from_nla,
main,
loading
)

from ceda_elasticsearch_tools.cmdline.update_md5 import (
logger_setup,
update_from_logs,
extract_id,
write_page_to_file,
download_files_missing_md5,
calculate_md5s,
main
)

assert 1==1, "Importing cmdline successful."
Loading

0 comments on commit 3aa3dca

Please sign in to comment.