Skip to content

Commit

Permalink
Merge pull request #17 from cedadev/v0.1.3
Browse files Browse the repository at this point in the history
V0.1.3
  • Loading branch information
dwest77a authored Nov 19, 2024
2 parents a51d34f + 645eaba commit f951f1c
Show file tree
Hide file tree
Showing 10 changed files with 72 additions and 5 deletions.
9 changes: 9 additions & 0 deletions facet_scanner/__init__.py
Original file line number Diff line number Diff line change
Expand Up @@ -5,3 +5,12 @@
__license__ = 'BSD - see LICENSE file in top-level package directory'
__contact__ = '[email protected]'

# Logger setup
import logging

logging.basicConfig(level=logging.DEBUG)
logstream = logging.StreamHandler()

formatter = logging.Formatter('%(levelname)s [%(name)s]: %(message)s')
logstream.setFormatter(formatter)

7 changes: 7 additions & 0 deletions facet_scanner/collection_handlers/base.py
Original file line number Diff line number Diff line change
Expand Up @@ -17,6 +17,13 @@
from facet_scanner.utils import generator_grouper, Singleton
import time

from facet_scanner import logstream
import logging

logger = logging.getLogger(__name__)
logger.addHandler(logstream)
logger.propagate = False


class CollectionHandler(metaclass=Singleton):
"""
Expand Down
13 changes: 12 additions & 1 deletion facet_scanner/collection_handlers/cci.py
Original file line number Diff line number Diff line change
Expand Up @@ -11,12 +11,19 @@
from tqdm import tqdm
import hashlib
import json
import logging

from facet_scanner.collection_handlers.base import CollectionHandler
from facet_scanner.collection_handlers.utils import CatalogueDatasets
from facet_scanner.utils import parse_key
from tag_scanner.tagger import ProcessDatasets

from facet_scanner import logstream

logger = logging.getLogger(__name__)
logger.addHandler(logstream)
logger.propagate = False

def nested_get(key_list, input_dict, default=None):
"""
Takes an iterable of keys and returns none if not found or the value
Expand Down Expand Up @@ -113,8 +120,11 @@ def get_facets(self, path):
:return: Dict Facet:value pairs
"""

logger.debug('Getting facets for CCI-type path')

tagged_dataset = self.pds.get_file_tags(path)

logger.debug('Translating tagging code to facet map')
# Translate between output from tagging code to map to named facets
mapped_facets = {}
for tag_name, tag_value in tagged_dataset.labels.items():
Expand All @@ -128,6 +138,7 @@ def get_facets(self, path):
if tag_name == facet and tag_name_mapping is None:
mapped_facets[facet] = tag_value

logger.debug('Obtaining moles record metadata')
# Get MOLES catalogue
moles_info = self.catalogue.get_moles_record_metadata(path)

Expand All @@ -137,6 +148,7 @@ def get_facets(self, path):
if moles_info:
mapped_facets['datasetId'] = moles_info['url'].split('uuid/')[-1]

logger.debug('Completed facet mapping')
return mapped_facets

@staticmethod
Expand Down Expand Up @@ -256,7 +268,6 @@ def _get_collection_variables(self, results, file_index):

if values:
ids = [x['key'] for x in values]
print(ids)

# Sample 1 netCDF file from each DRS
for id in ids:
Expand Down
7 changes: 7 additions & 0 deletions facet_scanner/collection_handlers/cmip5.py
Original file line number Diff line number Diff line change
Expand Up @@ -10,6 +10,13 @@

from facet_scanner.collection_handlers.base import CollectionHandler
import os
import logging

from facet_scanner import logstream

logger = logging.getLogger(__name__)
logger.addHandler(logstream)
logger.propagate = False

class CMIP5(CollectionHandler):
project_name = 'opensearch'
Expand Down
6 changes: 6 additions & 0 deletions facet_scanner/core/elasticsearch_connection.py
Original file line number Diff line number Diff line change
Expand Up @@ -11,6 +11,12 @@
from elasticsearch.helpers import scan, bulk
from ceda_elasticsearch_tools.elasticsearch import CEDAElasticsearchClient

import logging
from facet_scanner import logstream

logger = logging.getLogger(__name__)
logger.addHandler(logstream)
logger.propagate = False

class ElasticsearchConnection:
"""
Expand Down
6 changes: 6 additions & 0 deletions facet_scanner/core/facet_scanner.py
Original file line number Diff line number Diff line change
Expand Up @@ -11,7 +11,11 @@
from facet_scanner.collection_handlers.utils import FacetFactory
import logging

from facet_scanner import logstream

logger = logging.getLogger(__name__)
logger.addHandler(logstream)
logger.propagate = False


class FacetScanner:
Expand All @@ -33,8 +37,10 @@ def get_handler(self, path, **kwargs):
:return: Mapped collection handler
:rtype: CollectionHandler
"""
logger.debug("Obtaining handler")
handler, collection_root = self.handler_factory.get_handler(path)

logger.debug('Handler Obtainment complete')
# Handle situation where handler not found
if handler is None:
logger.error(f'Unable to find a handler for: {path} in facet_scanner.collection_handlers.utils.collection_map.'
Expand Down
7 changes: 7 additions & 0 deletions facet_scanner/scripts/facet_scanner_cmd.py
Original file line number Diff line number Diff line change
Expand Up @@ -24,6 +24,13 @@
from configparser import RawConfigParser
from facet_scanner.utils import query_yes_no
from facet_scanner.core.facet_scanner import FacetScanner
import logging

from facet_scanner import logstream

logger = logging.getLogger(__name__)
logger.addHandler(logstream)
logger.propagate = False


class FacetExtractor(FacetScanner):
Expand Down
7 changes: 7 additions & 0 deletions facet_scanner/scripts/lotus_facet_scanner.py
Original file line number Diff line number Diff line change
Expand Up @@ -23,6 +23,13 @@
import os
import json

import logging
from facet_scanner import logstream

logger = logging.getLogger(__name__)
logger.addHandler(logstream)
logger.propagate = False


class LotusFacetScanner(FacetExtractor):

Expand Down
7 changes: 7 additions & 0 deletions facet_scanner/utils/snippets.py
Original file line number Diff line number Diff line change
Expand Up @@ -11,6 +11,13 @@
import sys
import itertools
from collections import OrderedDict
import logging

from facet_scanner import logstream

logger = logging.getLogger(__name__)
logger.addHandler(logstream)
logger.propagate = False


def query_yes_no(question, default="yes"):
Expand Down
8 changes: 4 additions & 4 deletions pyproject.toml
Original file line number Diff line number Diff line change
@@ -1,6 +1,6 @@
[tool.poetry]
name = "facet-scanner"
version = "0.1.1"
version = "0.1.3"
description = "Extracts facets from datasets to add to elasticsearch."
authors = ["Daniel Westwood <[email protected]>"]
license = "{file='LICENSE'}"
Expand All @@ -24,10 +24,10 @@ python = "^3.12"
elasticsearch = "^7"
requests = "^2.32.3"
tqdm = "^4.66.6"
sphinx = "^8.1.3"
directory-tree = { git = "https://github.com/cedadev/ceda-directory-tree.git", tag = "v1.1.2" }
sphinx = "^7"
directory-tree = { git = "https://github.com/cedadev/ceda-directory-tree.git", tag = "v1.1.3" }
ceda-elasticsearch-tools = { git = "https://github.com/cedadev/ceda-elasticsearch-tools.git", tag = "v2.4.0" }
tag-scanner = { git = "https://github.com/cedadev/cci-tag-scanner.git", tag = "v2.1.3" }
tag-scanner = { git = "https://github.com/cedadev/cci-tag-scanner.git", tag = "v2.1.6" }
pytest = "^8.3.3"


Expand Down

0 comments on commit f951f1c

Please sign in to comment.