-
Notifications
You must be signed in to change notification settings - Fork 2
/
Copy pathconfig_autoSB.py
176 lines (153 loc) · 7.74 KB
/
config_autoSB.py
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
116
117
118
119
120
121
122
123
124
125
126
127
128
129
130
131
132
133
134
135
136
137
138
139
140
141
142
143
144
145
146
147
148
149
150
151
152
153
154
155
156
157
158
159
160
161
162
163
164
165
166
167
168
169
170
171
172
173
174
175
176
# -*- coding: utf-8 -*-
"""
config_autoSB.py
By: Emily Sturdivant, [email protected]
Last modified: 1/10/17
OVERVIEW: Configuration file for sb_automation.py
REQUIRES: autoSB.py, pysb, lxml
"""
#%% Import packages
import sciencebasepy as pysb
import os
import glob
from lxml import etree
import json
import pickle
import sys
sys.path.append(os.path.dirname(os.path.realpath(__file__))) # Add the script location to the system path just to make sure this works.
from autoSB import *
import getpass
"""
Input variables
"""
#-----------------------
# REQUIRED
#-----------------------
# SB username (should be entire USGS email):
useremail = '[email protected]'
# SB password. If commented out, you will be prompted for your password each time you run the script.
#password =
# URL of data release landing page (e.g. 'https://www.sciencebase.gov/catalog/item/__item_ID__'):
# landing_link = "https://www.sciencebase.gov/catalog/item/5a54fbc3e4b01e7be242b917" # testing page
landing_link = "https://www.sciencebase.gov/catalog/item/5d5ece47e4b01d82ce961e36" # Deep Dive volume II
# Path to local top-level directory of data release (equivalent to landing page):
# OSX: If this is a server mounted and visible in your Volumes: r'/Volumes/[directory on server]'
parentdir = r'/Volumes/stor/Projects/DeepDive/5_datarelease_packages/vol2/release_v4_forSB' # OSX format
# parentdir = r"D:\DeepDive\5_datarelease_packages\vol1\sb_upload_test" # WINDOWS format
# DOI of data release (e.g. '10.5066/F78P5XNK'):
dr_doi = "10.5066/P9V7F6UX" # DOI for Deep Dive volume I
# Year of publication, if it needs to updated. Used as the date in citation publication date and the calendar date in time period of content.
pubdate = '2019'
# The edition element in the metadata can be modified here.
#edition = '1.0'
# Image file (with path) to be used as preview image. If commented out, the preview image will be ignored.
# File name of browse graphic; assumes that it is within parentdir
# previewImage = 'bb20160318_parentpage_browse.png'
# previewImage = os.path.join(parentdir, previewImage)
#-------------------------------------------------------------------------------
# OPTIONAL - ScienceBase page inheritance
#-------------------------------------------------------------------------------
# SB fields that will be copied (inherited) from landing page to sub-pages (subparents), which are different from data pages.
# Recommended: citation, contacts, body (='abstract' in XML; 'summary' in SB), purpose, webLinks
# body =
# relatedItems = Associated items
subparent_inherits = ['citation', 'contacts', 'body', 'webLinks', 'relatedItems'] #'purpose',
# SB fields that data pages inherit from their parent page. All other fields will be automatically populated from the XML.
# Recommended: citation, body, webLinks
data_inherits = ['citation', 'contacts', 'body', 'webLinks', 'relatedItems']
# SB fields that will be populated from the XML file in the top directory, assuming an error-free XML is present.
# Note that body = abstract. The Summary item on SB will automatically be created from body.
# Default: [].
landing_fields_from_xml = []
# qcfields_dict = {'contacts':3, 'webLinks':0, 'facets':1} # Comment out to keep it simple and save time
# qcfields_dict = {'contacts':9, 'webLinks':3, 'facets':1}
#-------------------------------------------------------------------------------
# Time-saving options
#-------------------------------------------------------------------------------
# Default True:
update_subpages = False # False to save time if page structure is already established.
delete_all_subpages = False # True to delete all child pages before running. Not necessary.
update_XML = True # False to save time if XML already has most up-to-date values.
update_data = False # False to save time if up-to-date data files have already been uploaded.
update_extent = False
verbose = True
# page_per_filename = False
max_MBsize = 2000 # 2000 mb is the suggested threshold above which to use the large file uploader.
start_xml_idx = 0 # 0 to perform for all XMLs. This is included in case a process does not complete. '25' to start upload at file 26.
# Default False:
add_preview_image_to_all = False # True to put first image file encountered in a directory on its corresponding page
restore_original_xml = False # True to restore original files saved on the first run of the code. Not necessary.
remove_original_xml = True # True to remove original files saved on the first run of the code.
# ------------------------------------------------------------------------------
# OPTIONAL - XML changes
# ------------------------------------------------------------------------------
# To change the "Suggested citation" in the Other Citation Information, choose one of two options: either use the find_and_replace variable or the new_othercit variable, see below.
# FIND AND REPLACE. {key='desired value': value=['list of','values','to replace']}
find_and_replace = {'**dr_doi**': dr_doi,
'xxx-ofr doi-***': '10.3133/ofr20191071',
# 'http:': 'https:',
'dx.doi.org': 'doi.org'
}
# REMOVE ELEMENT. If an element needs to be removed, this will occur before additions or replacements
# remove_fills = {'./idinfo/crossref':['AUTHOR', 'doi.org/10.3133/ofr20171015']}
# APPEND ELEMENT.
# Add {container: new XML element} item to metadata_additions dictionary for each element to be appended to the container element. Appending will not remove any elements.
# Example of a new cross reference:
# new_crossref = """
# <crossref><citeinfo>
# <origin>E.A. Himmelstoss</origin>
# <origin>Meredith Kratzmann</origin>
# <origin>E. Robert Thieler</origin>
# <pubdate>2017</pubdate>
# <title>National Assessment of Shoreline Change: Summary Statistics for Updated Vector Shorelines and Associated Shoreline Change Data for the Gulf of Mexico and Southeast Atlantic Coasts</title>
# <serinfo><sername>Open-File Report</sername><issue>2017-1015</issue></serinfo>
# <pubinfo>
# <pubplace>Reston, VA</pubplace>
# <publish>U.S. Geological Survey</publish>
# </pubinfo>
# <onlink>https://doi.org/10.3133/ofr20171015</onlink>
# </citeinfo></crossref>
# """
# metadata_additions = {'./idinfo':new_crossref}
# REPLACE ELEMENT. Replace suggested citation:
# new_othercit = """<othercit>
# Suggested citation: Lastname, F.M., {}, Title: U.S. Geological Survey data release, https://doi.org/{}.
# </othercit>""".format(pubdate, dr_doi)
# metadata_replacements = {'./idinfo/citation/citeinfo':new_othercit}
# Example of new distribution information:
# sb_distrib = """
# <distrib>
# <cntinfo>
# <cntorgp>
# <cntorg>U.S. Geological Survey - ScienceBase</cntorg>
# </cntorgp>
# <cntaddr>
# <addrtype>mailing and physical address</addrtype>
# <address>Denver Federal Center</address>
# <address>Building 810</address>
# <address>Mail Stop 302</address>
# <city>Denver</city>
# <state>CO</state>
# <postal>80225</postal>
# <country>USA</country>
# </cntaddr>
# <cntvoice>1-888-275-8747</cntvoice>
# <cntemail>[email protected]</cntemail>
# </cntinfo>
# </distrib>
# """
# metadata_replacements = {'./distinfo':sb_distrib}
"""
Initialize
"""
#%% Initialize SB session
password = getpass.getpass("ScienceBase password: ")
sb = log_in(useremail, password)
stash_dir = os.path.join(parentdir, '.assistants')
#%% Find landing page
if not "landing_id" in locals():
try:
landing_id = os.path.split(landing_link)[1] # get ID for parent page from link
except:
print("""Either the ID (landing_id) or the URL (landing_link) of the
ScienceBase landing page must be specified in config_autoSB.py.""")