From 823b052ee4e0725fb8864b4f368e86e6a68e1076 Mon Sep 17 00:00:00 2001 From: Saksham Malhotra Date: Sun, 5 May 2019 00:43:42 +0530 Subject: [PATCH 01/17] [ref]:change dtype of example gctoo if all columns contain Nan --- cmapPy/pandasGEXpress/mini_gctoo_for_testing.py | 3 +++ 1 file changed, 3 insertions(+) diff --git a/cmapPy/pandasGEXpress/mini_gctoo_for_testing.py b/cmapPy/pandasGEXpress/mini_gctoo_for_testing.py index 2c94c53..45776d3 100644 --- a/cmapPy/pandasGEXpress/mini_gctoo_for_testing.py +++ b/cmapPy/pandasGEXpress/mini_gctoo_for_testing.py @@ -45,6 +45,9 @@ def make(convert_neg_666=True): if convert_neg_666: mini_row_metadata = mini_row_metadata.replace([-666, "-666", -666.0], [numpy.nan, numpy.nan, numpy.nan]) + # if all values in a column are nanpandas.Series(mini_row_metadata.isna().sum() == mini_row_metadata.shape[0]) convert dtype of that column to float + all_nan_columns = (mini_row_metadata.isna().sum() == mini_row_metadata.shape[0]).to_numpy().nonzero()[0] + mini_row_metadata = mini_row_metadata.astype({d: 'float' for d in mini_row_metadata.columns[all_nan_columns.tolist()]}) else: mini_row_metadata = mini_row_metadata.replace([-666, -666.0], ["-666", "-666"]) From 7dfe6b1f626814a2baa88caa36c727a3f9810a18 Mon Sep 17 00:00:00 2001 From: Saksham Malhotra Date: Sun, 23 Jun 2019 01:14:00 +0530 Subject: [PATCH 02/17] [add]: paramater to not sort cidx and ridx --- cmapPy/pandasGEXpress/parse_gctx.py | 78 +++++++++++++++++++++-------- 1 file changed, 57 insertions(+), 21 deletions(-) diff --git a/cmapPy/pandasGEXpress/parse_gctx.py b/cmapPy/pandasGEXpress/parse_gctx.py index 2ec4bcc..3c758fb 100644 --- a/cmapPy/pandasGEXpress/parse_gctx.py +++ b/cmapPy/pandasGEXpress/parse_gctx.py @@ -21,7 +21,8 @@ def parse(gctx_file_path, convert_neg_666=True, rid=None, cid=None, - ridx=None, cidx=None, row_meta_only=False, col_meta_only=False, make_multiindex=False): + ridx=None, cidx=None, row_meta_only=False, col_meta_only=False, make_multiindex=False, + sort_col_meta = True, sort_row_meta = True): """ Primary method of script. Reads in path to a gctx file and parses into GCToo object. @@ -44,7 +45,8 @@ def parse(gctx_file_path, convert_neg_666=True, rid=None, cid=None, as pandas DataFrame - make_multiindex (bool): whether to create a multi-index df combining the 3 component dfs - + - sort_col_meta (bool) : whether to sort the column metadata by indexes. Default = True + - sort_row_meta (bool) : whether to sort the row metadata by indexes. Default = False Output: - myGCToo (GCToo): A GCToo instance containing content of parsed gctx file. Note: if meta_only = True, this will be a GCToo instance where the data_df is empty, i.e. data_df = pd.DataFrame(index=rids, @@ -74,7 +76,7 @@ def parse(gctx_file_path, convert_neg_666=True, rid=None, cid=None, row_meta = parse_metadata_df("row", row_dset, convert_neg_666) # validate optional input ids & get indexes to subset by - (sorted_ridx, sorted_cidx) = check_and_order_id_inputs(rid, ridx, cid, cidx, row_meta, None) + (sorted_ridx, sorted_cidx) = check_and_order_id_inputs(rid, ridx, cid, cidx, row_meta, None, sort_row_meta, sort_col_meta) gctx_file.close() @@ -87,7 +89,7 @@ def parse(gctx_file_path, convert_neg_666=True, rid=None, cid=None, col_meta = parse_metadata_df("col", col_dset, convert_neg_666) # validate optional input ids & get indexes to subset by - (sorted_ridx, sorted_cidx) = check_and_order_id_inputs(rid, ridx, cid, cidx, None, col_meta) + (sorted_ridx, sorted_cidx) = check_and_order_id_inputs(rid, ridx, cid, cidx, None, col_meta, sort_row_meta, sort_col_meta) gctx_file.close() @@ -104,15 +106,42 @@ def parse(gctx_file_path, convert_neg_666=True, rid=None, cid=None, col_meta = parse_metadata_df("col", col_dset, convert_neg_666) # validate optional input ids & get indexes to subset by - (sorted_ridx, sorted_cidx) = check_and_order_id_inputs(rid, ridx, cid, cidx, row_meta, col_meta) + (sorted_ridx, sorted_cidx) = check_and_order_id_inputs(rid, ridx, cid, cidx, row_meta, col_meta, + sort_row_meta = True, sort_col_meta = True) data_dset = gctx_file[data_node] data_df = parse_data_df(data_dset, sorted_ridx, sorted_cidx, row_meta, col_meta) + row_mapping_df = row_meta.index.tolist() + col_mapping_df = col_meta.index.tolist() + + print("here") + print(row_mapping_df) + print(col_mapping_df) + # (if subsetting) subset metadata row_meta = row_meta.iloc[sorted_ridx] col_meta = col_meta.iloc[sorted_cidx] + if not sort_col_meta: + ## in the subsetted and re-indexed dataframe get where new indexes lie + (_, unsorted_cidx) = check_and_order_id_inputs(rid, ridx, cid, cidx, row_meta, col_meta, + sort_row_meta, sort_col_meta) + + #d = {i:t for (i,t) in enumerate(sorted([20,10,15]))} + #l = [d[x] for x in [20,10,15]] + #[b.index(i) for i in a] + print("unsorted cid", unsorted_cidx) + data_df = data_df.iloc[:,unsorted_cidx] + col_meta = col_meta.iloc[unsorted_cidx,:] + + if not sort_row_meta: + (unsorted_ridx, _) = check_and_order_id_inputs(rid, ridx, cid, cidx, row_meta, col_meta, + sort_row_meta, sort_row_meta) + print("unsorted_ridx", unsorted_ridx) + data_df = data_df.iloc[unsorted_ridx,:] + row_meta = row_meta.iloc[unsorted_ridx,:] + # get version my_version = gctx_file.attrs[version_node] if type(my_version) == np.ndarray: @@ -126,7 +155,7 @@ def parse(gctx_file_path, convert_neg_666=True, rid=None, cid=None, return my_gctoo -def check_and_order_id_inputs(rid, ridx, cid, cidx, row_meta_df, col_meta_df): +def check_and_order_id_inputs(rid, ridx, cid, cidx, row_meta_df, col_meta_df, sort_row_meta, sort_col_meta): """ Makes sure that (if entered) id inputs entered are of one type (string id or index) Input: @@ -140,11 +169,14 @@ def check_and_order_id_inputs(rid, ridx, cid, cidx, row_meta_df, col_meta_df): (row_type, row_ids) = check_id_idx_exclusivity(rid, ridx) (col_type, col_ids) = check_id_idx_exclusivity(cid, cidx) - row_ids = check_and_convert_ids(row_type, row_ids, row_meta_df) - ordered_ridx = get_ordered_idx(row_type, row_ids, row_meta_df) - col_ids = check_and_convert_ids(col_type, col_ids, col_meta_df) - ordered_cidx = get_ordered_idx(col_type, col_ids, col_meta_df) + print("row_ids", row_ids) + print("col_ids", col_ids) + row_ids = check_and_convert_ids(row_type, row_ids, row_meta_df, sort_col_meta) + ordered_ridx = get_ordered_idx(row_type, row_ids, row_meta_df, sort_row_meta) + + col_ids = check_and_convert_ids(col_type, col_ids, col_meta_df, sort_col_meta) + ordered_cidx = get_ordered_idx(col_type, col_ids, col_meta_df, sort_col_meta) return (ordered_ridx, ordered_cidx) @@ -172,19 +204,20 @@ def check_id_idx_exclusivity(id, idx): return (None, []) -def check_and_convert_ids(id_type, id_list, meta_df): +def check_and_convert_ids(id_type, id_list, meta_df, sort_id): if meta_df is not None: if id_type == "id": id_list = convert_ids_to_meta_type(id_list, meta_df) check_id_validity(id_list, meta_df) else: - check_idx_validity(id_list, meta_df) + check_idx_validity(id_list, meta_df, sort_id) return id_list else: return None def check_id_validity(id_list, meta_df): + print(id_list) id_set = set(id_list) meta_set = set(meta_df.index) mismatch_ids = id_set - meta_set @@ -195,14 +228,15 @@ def check_id_validity(id_list, meta_df): raise Exception("parse_gctx check_id_validity " + msg) -def check_idx_validity(id_list, meta_df): - N = meta_df.shape[0] - out_of_range_ids = [my_id for my_id in id_list if my_id < 0 or my_id >= N] - if len(out_of_range_ids): - msg = "some of indexes being used to subset the data are not valid max N: {} out_of_range_ids: {}".format(N, - out_of_range_ids) - logger.error(msg) - raise Exception("parse_gctx check_idx_validity " + msg) +def check_idx_validity(id_list, meta_df, sort_id): + if sort_id: + N = meta_df.shape[0] + out_of_range_ids = [my_id for my_id in id_list if my_id < 0 or my_id >= N] + if len(out_of_range_ids): + msg = "some of indexes being used to subset the data are not valid max N: {} out_of_range_ids: {}".format(N, + out_of_range_ids) + logger.error(msg) + raise Exception("parse_gctx check_idx_validity " + msg) def convert_ids_to_meta_type(id_list, meta_df): @@ -216,7 +250,7 @@ def convert_ids_to_meta_type(id_list, meta_df): raise Exception("parse_gctx check_if_ids_in_meta " + msg + " ValueError ve: {}".format(ve)) -def get_ordered_idx(id_type, id_list, meta_df): +def get_ordered_idx(id_type, id_list, meta_df, sort_idx): """ Gets index values corresponding to ids to subset and orders them. Input: @@ -231,6 +265,8 @@ def get_ordered_idx(id_type, id_list, meta_df): elif id_type == "id": lookup = {x: i for (i,x) in enumerate(meta_df.index)} id_list = [lookup[str(i)] for i in id_list] + if not sort_idx: + return [sorted(id_list).index(i) for i in id_list] return sorted(id_list) else: return None From 67fc6c67d877ac6afb28c7e7833430854c7ae14b Mon Sep 17 00:00:00 2001 From: Saksham Malhotra Date: Sun, 23 Jun 2019 01:19:33 +0530 Subject: [PATCH 03/17] [WIP]: adding test case titles for modified parse_gctx --- .../tests/python3_tests/test_parse_gctx.py | 15 +++++++++++++++ 1 file changed, 15 insertions(+) diff --git a/cmapPy/pandasGEXpress/tests/python3_tests/test_parse_gctx.py b/cmapPy/pandasGEXpress/tests/python3_tests/test_parse_gctx.py index 07622d9..18eae0d 100644 --- a/cmapPy/pandasGEXpress/tests/python3_tests/test_parse_gctx.py +++ b/cmapPy/pandasGEXpress/tests/python3_tests/test_parse_gctx.py @@ -135,6 +135,21 @@ def test_parse(self): mg12 = parse_gctx.parse("../functional_tests/mini_gctoo_for_testing.gctx", col_meta_only=True) pandas_testing.assert_frame_equal(mg12, mg1.col_metadata_df) + # test with sort_row_meta False and ridx + mg13 = parse_gctx.parse("../functional_tests/mini_gctoo_for_testing.gctx",) + + # test with sort_col_meta False and cidx + mg14 = + + + # test with sort_row_meta and sort_col_meta both False and ridx and cidx + mg15 = + + + # test with sort_row_meta and sort_col_meta both False and string rid and cid + mg16 = + + def test_parse_rid_as_entrez_id(self): input_file = "../functional_tests/test_parse_gctx_rid_entrez_id.gctx" g = parse_gctx.parse(input_file) From 06015524a1361ca66249858fc6a419dd25d64a60 Mon Sep 17 00:00:00 2001 From: Saksham Malhotra Date: Sun, 23 Jun 2019 13:03:04 +0530 Subject: [PATCH 04/17] [ref]: add sort to row_meta_only and col_meta_only --- cmapPy/pandasGEXpress/parse_gctx.py | 33 +++++++++++++++++------------ 1 file changed, 20 insertions(+), 13 deletions(-) diff --git a/cmapPy/pandasGEXpress/parse_gctx.py b/cmapPy/pandasGEXpress/parse_gctx.py index 3c758fb..35e7238 100644 --- a/cmapPy/pandasGEXpress/parse_gctx.py +++ b/cmapPy/pandasGEXpress/parse_gctx.py @@ -76,12 +76,19 @@ def parse(gctx_file_path, convert_neg_666=True, rid=None, cid=None, row_meta = parse_metadata_df("row", row_dset, convert_neg_666) # validate optional input ids & get indexes to subset by - (sorted_ridx, sorted_cidx) = check_and_order_id_inputs(rid, ridx, cid, cidx, row_meta, None, sort_row_meta, sort_col_meta) + (sorted_ridx, sorted_cidx) = check_and_order_id_inputs(rid, ridx, cid, cidx, row_meta, None, + sort_row_meta = True, sort_col_meta = True) gctx_file.close() # subset if specified, then return row_meta = row_meta.iloc[sorted_ridx] + + if not sort_row_meta: + (unsorted_ridx, _) = check_and_order_id_inputs(rid, ridx, cid, cidx, row_meta, None, + sort_row_meta, sort_row_meta) + row_meta = row_meta.iloc[unsorted_ridx] + return row_meta elif col_meta_only: # read in col metadata @@ -89,12 +96,21 @@ def parse(gctx_file_path, convert_neg_666=True, rid=None, cid=None, col_meta = parse_metadata_df("col", col_dset, convert_neg_666) # validate optional input ids & get indexes to subset by - (sorted_ridx, sorted_cidx) = check_and_order_id_inputs(rid, ridx, cid, cidx, None, col_meta, sort_row_meta, sort_col_meta) + (sorted_ridx, sorted_cidx) = check_and_order_id_inputs(rid, ridx, cid, cidx, None, + col_meta, sort_row_meta = True, sort_col_meta = True) gctx_file.close() # subset if specified, then return col_meta = col_meta.iloc[sorted_cidx] + print(col_meta.index) + + if not sort_col_meta: + (_, unsorted_cidx) = check_and_order_id_inputs(rid, ridx, cid, cidx, None, col_meta, + sort_row_meta, sort_col_meta) + print("unsorted cidx", unsorted_cidx) + col_meta = col_meta.iloc[unsorted_cidx, :] + return col_meta else: # read in row metadata @@ -112,26 +128,17 @@ def parse(gctx_file_path, convert_neg_666=True, rid=None, cid=None, data_dset = gctx_file[data_node] data_df = parse_data_df(data_dset, sorted_ridx, sorted_cidx, row_meta, col_meta) - row_mapping_df = row_meta.index.tolist() - col_mapping_df = col_meta.index.tolist() - - print("here") - print(row_mapping_df) - print(col_mapping_df) - # (if subsetting) subset metadata row_meta = row_meta.iloc[sorted_ridx] col_meta = col_meta.iloc[sorted_cidx] + print(col_meta.index) if not sort_col_meta: ## in the subsetted and re-indexed dataframe get where new indexes lie (_, unsorted_cidx) = check_and_order_id_inputs(rid, ridx, cid, cidx, row_meta, col_meta, sort_row_meta, sort_col_meta) - #d = {i:t for (i,t) in enumerate(sorted([20,10,15]))} - #l = [d[x] for x in [20,10,15]] - #[b.index(i) for i in a] - print("unsorted cid", unsorted_cidx) + print("unsorted cidx", unsorted_cidx) data_df = data_df.iloc[:,unsorted_cidx] col_meta = col_meta.iloc[unsorted_cidx,:] From a14c557b65a4d11637eff2c2b26b7ddcd42f2bdb Mon Sep 17 00:00:00 2001 From: Saksham Malhotra Date: Sun, 23 Jun 2019 13:21:32 +0530 Subject: [PATCH 05/17] [ref]:remove print stateements --- cmapPy/pandasGEXpress/parse_gctx.py | 8 -------- 1 file changed, 8 deletions(-) diff --git a/cmapPy/pandasGEXpress/parse_gctx.py b/cmapPy/pandasGEXpress/parse_gctx.py index 35e7238..e35e346 100644 --- a/cmapPy/pandasGEXpress/parse_gctx.py +++ b/cmapPy/pandasGEXpress/parse_gctx.py @@ -103,12 +103,10 @@ def parse(gctx_file_path, convert_neg_666=True, rid=None, cid=None, # subset if specified, then return col_meta = col_meta.iloc[sorted_cidx] - print(col_meta.index) if not sort_col_meta: (_, unsorted_cidx) = check_and_order_id_inputs(rid, ridx, cid, cidx, None, col_meta, sort_row_meta, sort_col_meta) - print("unsorted cidx", unsorted_cidx) col_meta = col_meta.iloc[unsorted_cidx, :] return col_meta @@ -132,20 +130,17 @@ def parse(gctx_file_path, convert_neg_666=True, rid=None, cid=None, row_meta = row_meta.iloc[sorted_ridx] col_meta = col_meta.iloc[sorted_cidx] - print(col_meta.index) if not sort_col_meta: ## in the subsetted and re-indexed dataframe get where new indexes lie (_, unsorted_cidx) = check_and_order_id_inputs(rid, ridx, cid, cidx, row_meta, col_meta, sort_row_meta, sort_col_meta) - print("unsorted cidx", unsorted_cidx) data_df = data_df.iloc[:,unsorted_cidx] col_meta = col_meta.iloc[unsorted_cidx,:] if not sort_row_meta: (unsorted_ridx, _) = check_and_order_id_inputs(rid, ridx, cid, cidx, row_meta, col_meta, sort_row_meta, sort_row_meta) - print("unsorted_ridx", unsorted_ridx) data_df = data_df.iloc[unsorted_ridx,:] row_meta = row_meta.iloc[unsorted_ridx,:] @@ -177,8 +172,6 @@ def check_and_order_id_inputs(rid, ridx, cid, cidx, row_meta_df, col_meta_df, so (col_type, col_ids) = check_id_idx_exclusivity(cid, cidx) - print("row_ids", row_ids) - print("col_ids", col_ids) row_ids = check_and_convert_ids(row_type, row_ids, row_meta_df, sort_col_meta) ordered_ridx = get_ordered_idx(row_type, row_ids, row_meta_df, sort_row_meta) @@ -224,7 +217,6 @@ def check_and_convert_ids(id_type, id_list, meta_df, sort_id): def check_id_validity(id_list, meta_df): - print(id_list) id_set = set(id_list) meta_set = set(meta_df.index) mismatch_ids = id_set - meta_set From e95ada2a468815b30d32dd248e45288c4060ba2d Mon Sep 17 00:00:00 2001 From: Saksham Malhotra Date: Sun, 23 Jun 2019 14:37:52 +0530 Subject: [PATCH 06/17] according to pandas version --- cmapPy/pandasGEXpress/mini_gctoo_for_testing.py | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/cmapPy/pandasGEXpress/mini_gctoo_for_testing.py b/cmapPy/pandasGEXpress/mini_gctoo_for_testing.py index 6b97c06..df98cd4 100644 --- a/cmapPy/pandasGEXpress/mini_gctoo_for_testing.py +++ b/cmapPy/pandasGEXpress/mini_gctoo_for_testing.py @@ -47,7 +47,7 @@ def make(convert_neg_666=True): if convert_neg_666: mini_row_metadata = mini_row_metadata.replace([-666, "-666", -666.0], [numpy.nan, numpy.nan, numpy.nan]) # if all values in a column are nanpandas.Series(mini_row_metadata.isna().sum() == mini_row_metadata.shape[0]) convert dtype of that column to float - all_nan_columns = (mini_row_metadata.isna().sum() == mini_row_metadata.shape[0]).to_numpy().nonzero()[0] + all_nan_columns = (mini_row_metadata.isnull().sum() == numpy.array(mini_row_metadata.shape[0])).nonzero()[0] mini_row_metadata = mini_row_metadata.astype({d: 'float' for d in mini_row_metadata.columns[all_nan_columns.tolist()]}) else: mini_row_metadata = mini_row_metadata.replace([-666, -666.0], ["-666", "-666"]) From a1153acd1456107d8dba80d2ad2e874c258715d8 Mon Sep 17 00:00:00 2001 From: Saksham Malhotra Date: Sun, 23 Jun 2019 14:38:36 +0530 Subject: [PATCH 07/17] [ref]:change python3 tests for write gctx; give absolute paths in tests --- .../tests/python3_tests/__init__.py | 0 .../tests/python3_tests/test_concat.py | 4 +- .../tests/python3_tests/test_edge_cases.py | 2 +- .../tests/python3_tests/test_gct2gctx.py | 12 ++--- .../tests/python3_tests/test_gctx2gct.py | 24 ++++----- .../tests/python3_tests/test_parse.py | 22 ++++---- .../tests/python3_tests/test_parse_gct.py | 2 +- .../tests/python3_tests/test_parse_gctx.py | 53 +++++++++---------- .../tests/python3_tests/test_subset.py | 10 ++-- .../tests/python3_tests/test_write_gct.py | 2 +- .../tests/python3_tests/test_write_gctx.py | 2 +- 11 files changed, 66 insertions(+), 67 deletions(-) create mode 100644 cmapPy/pandasGEXpress/tests/python3_tests/__init__.py diff --git a/cmapPy/pandasGEXpress/tests/python3_tests/__init__.py b/cmapPy/pandasGEXpress/tests/python3_tests/__init__.py new file mode 100644 index 0000000..e69de29 diff --git a/cmapPy/pandasGEXpress/tests/python3_tests/test_concat.py b/cmapPy/pandasGEXpress/tests/python3_tests/test_concat.py index abbf8c7..6cdb6d0 100644 --- a/cmapPy/pandasGEXpress/tests/python3_tests/test_concat.py +++ b/cmapPy/pandasGEXpress/tests/python3_tests/test_concat.py @@ -10,7 +10,7 @@ logger = logging.getLogger(setup_logger.LOGGER_NAME) -FUNCTIONAL_TESTS_DIR = "../functional_tests" +FUNCTIONAL_TESTS_DIR = "cmapPy/pandasGEXpress/tests/functional_tests/" class TestConcat(unittest.TestCase): @@ -333,7 +333,7 @@ def test_build_mismatched_common_meta_report(self): self.assertEqual({"r3"}, set(r.orig_rid)) def test_concat_main(self): - test_dir = "../functional_tests/test_concat/test_main" + test_dir = "cmapPy/pandasGEXpress/tests/functional_tests/test_concat/test_main" g_a = pg.parse(os.path.join(test_dir, "a.gct")) logger.debug("g_a: {}".format(g_a)) diff --git a/cmapPy/pandasGEXpress/tests/python3_tests/test_edge_cases.py b/cmapPy/pandasGEXpress/tests/python3_tests/test_edge_cases.py index 64d680a..951c7ea 100644 --- a/cmapPy/pandasGEXpress/tests/python3_tests/test_edge_cases.py +++ b/cmapPy/pandasGEXpress/tests/python3_tests/test_edge_cases.py @@ -10,7 +10,7 @@ import pandas.util.testing as pandas_testing -FUNCTIONAL_TESTS_PATH = "../functional_tests" +FUNCTIONAL_TESTS_PATH = "cmapPy/pandasGEXpress/tests/functional_tests/" logger = logging.getLogger(setup_logger.LOGGER_NAME) diff --git a/cmapPy/pandasGEXpress/tests/python3_tests/test_gct2gctx.py b/cmapPy/pandasGEXpress/tests/python3_tests/test_gct2gctx.py index 012242b..05d1863 100644 --- a/cmapPy/pandasGEXpress/tests/python3_tests/test_gct2gctx.py +++ b/cmapPy/pandasGEXpress/tests/python3_tests/test_gct2gctx.py @@ -14,8 +14,8 @@ class TestGCT2GCTx(unittest.TestCase): def test_gct2gctx_main(self): - in_name = "../functional_tests/mini_gctoo_for_testing.gct" - out_name = "../functional_tests/test_gct2gctx_out.gctx" + in_name = "cmapPy/pandasGEXpress/tests/functional_tests/mini_gctoo_for_testing.gct" + out_name = "cmapPy/pandasGEXpress/tests/functional_tests/test_gct2gctx_out.gctx" args_string = "-f {} -o {}".format(in_name, out_name) args = gct2gctx.build_parser().parse_args(args_string.split()) @@ -29,10 +29,10 @@ def test_gct2gctx_main(self): pd.util.testing.assert_frame_equal(in_gct.col_metadata_df, out_gctx.col_metadata_df) pd.util.testing.assert_frame_equal(in_gct.row_metadata_df, out_gctx.row_metadata_df) - no_meta = "../functional_tests/mini_gctoo_for_testing_nometa.gct" - added_meta = "../functional_tests/test_gct2gctx_out_annotated.gctx" - row_meta = "../functional_tests/test_rowmeta_n6.txt" - col_meta = "../functional_tests/test_colmeta_n6.txt" + no_meta = "cmapPy/pandasGEXpress/tests/functional_tests/mini_gctoo_for_testing_nometa.gct" + added_meta = "cmapPy/pandasGEXpress/tests/functional_tests/test_gct2gctx_out_annotated.gctx" + row_meta = "cmapPy/pandasGEXpress/tests/functional_tests/test_rowmeta_n6.txt" + col_meta = "cmapPy/pandasGEXpress/tests/functional_tests/test_colmeta_n6.txt" args_string = "-f {} -o {} -row_annot_path {} -col_annot_path {}".format(no_meta, added_meta, row_meta, col_meta) args = gct2gctx.build_parser().parse_args(args_string.split()) diff --git a/cmapPy/pandasGEXpress/tests/python3_tests/test_gctx2gct.py b/cmapPy/pandasGEXpress/tests/python3_tests/test_gctx2gct.py index 06576c1..39f39e5 100644 --- a/cmapPy/pandasGEXpress/tests/python3_tests/test_gctx2gct.py +++ b/cmapPy/pandasGEXpress/tests/python3_tests/test_gctx2gct.py @@ -14,8 +14,8 @@ class TestGCTx2GCT(unittest.TestCase): def test_gctx2gct_main(self): - in_name = "../functional_tests/mini_gctoo_for_testing.gctx" - out_name = "../functional_tests/test_gctx2gct_out.gct" + in_name = "cmapPy/pandasGEXpress/tests/functional_tests/mini_gctoo_for_testing.gctx" + out_name = "cmapPy/pandasGEXpress/tests/functional_tests/test_gctx2gct_out.gct" args_string = "-f {} -o {}".format(in_name, out_name) args = gctx2gct.build_parser().parse_args(args_string.split()) @@ -29,10 +29,10 @@ def test_gctx2gct_main(self): pd.util.testing.assert_frame_equal(in_gctx.col_metadata_df, out_gct.col_metadata_df) pd.util.testing.assert_frame_equal(in_gctx.row_metadata_df, out_gct.row_metadata_df) - no_meta = "../functional_tests/mini_gctoo_for_testing_nometa.gctx" - added_meta = "../functional_tests/test_gctx2gct_out_annotated.gct" - row_meta = "../functional_tests/test_rowmeta_n6.txt" - col_meta = "../functional_tests/test_colmeta_n6.txt" + no_meta = "cmapPy/pandasGEXpress/tests/functional_tests/mini_gctoo_for_testing_nometa.gctx" + added_meta = "cmapPy/pandasGEXpress/tests/functional_tests/test_gctx2gct_out_annotated.gct" + row_meta = "cmapPy/pandasGEXpress/tests/functional_tests/test_rowmeta_n6.txt" + col_meta = "cmapPy/pandasGEXpress/tests/functional_tests/test_colmeta_n6.txt" args_string = "-f {} -o {} -row_annot_path {} -col_annot_path {}".format(no_meta, added_meta, row_meta, col_meta ) args = gctx2gct.build_parser().parse_args(args_string.split()) @@ -51,9 +51,9 @@ def test_gctx2gct_main(self): def test_missing_annotations(self): with self.assertRaises(Exception) as context: - no_meta = "../functional_tests/mini_gctoo_for_testing_nometa.gctx" - added_meta = "../functional_tests/test_gctx2gct_out_annotated.gct" - row_meta = "../functional_tests/test_missing_rowmeta.txt" + no_meta = "cmapPy/pandasGEXpress/tests/functional_tests/mini_gctoo_for_testing_nometa.gctx" + added_meta = "cmapPy/pandasGEXpress/tests/functional_tests/test_gctx2gct_out_annotated.gct" + row_meta = "cmapPy/pandasGEXpress/tests/functional_tests/test_missing_rowmeta.txt" args_string = "-f {} -o {} -row_annot_path {}".format(no_meta, added_meta, row_meta) args = gctx2gct.build_parser().parse_args(args_string.split()) @@ -63,9 +63,9 @@ def test_missing_annotations(self): self.assertTrue('Row ids in matrix missing from annotations file', context.exception) with self.assertRaises(Exception) as context: - no_meta = "../functional_tests/mini_gctoo_for_testing_nometa.gctx" - added_meta = "../functional_tests/test_gctx2gct_out_annotated.gct" - col_meta = "../functional_tests/test_missing_colmeta.txt" + no_meta = "cmapPy/pandasGEXpress/tests/functional_tests/mini_gctoo_for_testing_nometa.gctx" + added_meta = "cmapPy/pandasGEXpress/tests/functional_tests/test_gctx2gct_out_annotated.gct" + col_meta = "cmapPy/pandasGEXpress/tests/functional_tests/test_missing_colmeta.txt" args_string = "-f {} -o {} -col_annot_path {}".format(no_meta, added_meta, col_meta) args = gctx2gct.build_parser().parse_args(args_string.split()) diff --git a/cmapPy/pandasGEXpress/tests/python3_tests/test_parse.py b/cmapPy/pandasGEXpress/tests/python3_tests/test_parse.py index c4de20c..d25492b 100644 --- a/cmapPy/pandasGEXpress/tests/python3_tests/test_parse.py +++ b/cmapPy/pandasGEXpress/tests/python3_tests/test_parse.py @@ -9,7 +9,7 @@ __author__ = "Oana Enache" __email__ = "oana@broadinstitute.org" -FUNCTIONAL_TESTS_PATH = "functional_tests" +FUNCTIONAL_TESTS_PATH = "cmapPy/pandasGEXpress/tests/functional_tests/" logger = logging.getLogger(setup_logger.LOGGER_NAME) @@ -17,7 +17,7 @@ class TestParse(unittest.TestCase): def test_gctx_parsing(self): # parse in gctx, no other arguments mg1 = mini_gctoo_for_testing.make() - mg2 = parse.parse("../functional_tests/mini_gctoo_for_testing.gctx") + mg2 = parse.parse("cmapPy/pandasGEXpress/tests/functional_tests//mini_gctoo_for_testing.gctx") pandas_testing.assert_frame_equal(mg1.data_df, mg2.data_df) pandas_testing.assert_frame_equal(mg1.row_metadata_df, mg2.row_metadata_df) @@ -27,14 +27,14 @@ def test_gctx_parsing(self): self.assertTrue(mg2.col_metadata_df["mfc_plate_id"].isnull().all()) # parse w/o convert_neg_666 - mg2_alt = parse.parse("../functional_tests/mini_gctoo_for_testing.gctx", convert_neg_666 = False) + mg2_alt = parse.parse("cmapPy/pandasGEXpress/tests/functional_tests/mini_gctoo_for_testing.gctx", convert_neg_666 = False) self.assertFalse(mg2_alt.col_metadata_df["mfc_plate_id"].isnull().all()) # parsing w/rids & cids specified test_rids = ['LJP007_MCF10A_24H:TRT_CP:BRD-K93918653:3.33', 'LJP007_MCF7_24H:CTL_VEHICLE:DMSO:-666'] test_cids = ['LJP007_MCF7_24H:TRT_POSCON:BRD-A61304759:10'] mg3 = subset_gctoo.subset_gctoo(mg1, rid=test_rids, cid=test_cids) - mg4 = parse.parse("../functional_tests/mini_gctoo_for_testing.gctx", + mg4 = parse.parse("cmapPy/pandasGEXpress/tests/functional_tests/mini_gctoo_for_testing.gctx", rid=test_rids, cid=test_cids) pandas_testing.assert_frame_equal(mg3.data_df, mg4.data_df) pandas_testing.assert_frame_equal(mg3.row_metadata_df, mg4.row_metadata_df) @@ -43,28 +43,28 @@ def test_gctx_parsing(self): # parsing w/ridx & cidx specified mg5 = subset_gctoo.subset_gctoo(mg1, rid=['LJP007_MCF7_24H:CTL_VEHICLE:DMSO:-666'], cid=['LJP007_MCF7_24H:CTL_VEHICLE:DMSO:-666']) - mg6 = parse.parse("../functional_tests/mini_gctoo_for_testing.gctx", ridx=[4], cidx=[4]) + mg6 = parse.parse("cmapPy/pandasGEXpress/tests/functional_tests/mini_gctoo_for_testing.gctx", ridx=[4], cidx=[4]) pandas_testing.assert_frame_equal(mg5.data_df, mg6.data_df) pandas_testing.assert_frame_equal(mg5.row_metadata_df, mg6.row_metadata_df) pandas_testing.assert_frame_equal(mg5.col_metadata_df, mg6.col_metadata_df) # parsing row metadata only - mg7 = parse.parse("../functional_tests/mini_gctoo_for_testing.gctx", row_meta_only=True) + mg7 = parse.parse("cmapPy/pandasGEXpress/tests/functional_tests/mini_gctoo_for_testing.gctx", row_meta_only=True) pandas_testing.assert_frame_equal(mg7, mg1.row_metadata_df) # parsing col metadata only - mg8 = parse.parse("../functional_tests/mini_gctoo_for_testing.gctx", col_meta_only=True) + mg8 = parse.parse("cmapPy/pandasGEXpress/tests/functional_tests/mini_gctoo_for_testing.gctx", col_meta_only=True) pandas_testing.assert_frame_equal(mg8, mg1.col_metadata_df) # parsing w/multiindex - mg9 = parse.parse("../functional_tests/mini_gctoo_for_testing.gctx", make_multiindex=True) + mg9 = parse.parse("cmapPy/pandasGEXpress/tests/functional_tests/mini_gctoo_for_testing.gctx", make_multiindex=True) self.assertTrue(mg9.multi_index_df is not None) def test_gct_parsing(self): # parse in gct, no other arguments mg1 = mini_gctoo_for_testing.make() - mg2 = parse.parse("../functional_tests/mini_gctoo_for_testing.gct") + mg2 = parse.parse("cmapPy/pandasGEXpress/tests/functional_tests/mini_gctoo_for_testing.gct") pandas_testing.assert_frame_equal(mg1.data_df, mg2.data_df) pandas_testing.assert_frame_equal(mg1.row_metadata_df, mg2.row_metadata_df) @@ -74,13 +74,13 @@ def test_gct_parsing(self): self.assertTrue(mg2.col_metadata_df["mfc_plate_id"].isnull().all()) # parse w/o convert_neg_666 - mg2_alt = parse.parse("../functional_tests/mini_gctoo_for_testing.gct", convert_neg_666 = False) + mg2_alt = parse.parse("cmapPy/pandasGEXpress/tests/functional_tests/mini_gctoo_for_testing.gct", convert_neg_666 = False) self.assertCountEqual(mg2_alt.col_metadata_df["mfc_plate_id"].values.tolist(), [-666] * 6) # parse in gct with subsetting my_rid = "LJP007_MCF10A_24H:TRT_CP:BRD-K93918653:3.33" - mg3 = parse.parse("../functional_tests/mini_gctoo_for_testing.gct", + mg3 = parse.parse("cmapPy/pandasGEXpress/tests/functional_tests//mini_gctoo_for_testing.gct", cidx=[0, 2], rid=[my_rid]) self.assertEqual(mg3.data_df.shape, (1, 2)) diff --git a/cmapPy/pandasGEXpress/tests/python3_tests/test_parse_gct.py b/cmapPy/pandasGEXpress/tests/python3_tests/test_parse_gct.py index 4ca8946..870767e 100644 --- a/cmapPy/pandasGEXpress/tests/python3_tests/test_parse_gct.py +++ b/cmapPy/pandasGEXpress/tests/python3_tests/test_parse_gct.py @@ -8,7 +8,7 @@ import cmapPy.pandasGEXpress.GCToo as GCToo -FUNCTIONAL_TESTS_PATH = "../functional_tests" +FUNCTIONAL_TESTS_PATH = "cmapPy/pandasGEXpress/tests/functional_tests/" logger = logging.getLogger(setup_logger.LOGGER_NAME) diff --git a/cmapPy/pandasGEXpress/tests/python3_tests/test_parse_gctx.py b/cmapPy/pandasGEXpress/tests/python3_tests/test_parse_gctx.py index 18eae0d..2f84829 100644 --- a/cmapPy/pandasGEXpress/tests/python3_tests/test_parse_gctx.py +++ b/cmapPy/pandasGEXpress/tests/python3_tests/test_parse_gctx.py @@ -44,7 +44,7 @@ class TestParseGctx(unittest.TestCase): def test_parse(self): # parse whole thing mg1 = mini_gctoo_for_testing.make() - mg2 = parse_gctx.parse("../functional_tests/mini_gctoo_for_testing.gctx") + mg2 = parse_gctx.parse("cmapPy/pandasGEXpress/tests/functional_tests/mini_gctoo_for_testing.gctx") pandas_testing.assert_frame_equal(mg1.data_df, mg2.data_df) pandas_testing.assert_frame_equal(mg1.row_metadata_df, mg2.row_metadata_df) @@ -54,7 +54,7 @@ def test_parse(self): test_rids = ['LJP007_MCF10A_24H:TRT_CP:BRD-K93918653:3.33', 'LJP007_MCF7_24H:CTL_VEHICLE:DMSO:-666'] test_cids = ['LJP007_MCF7_24H:TRT_POSCON:BRD-A61304759:10'] mg3 = subset_gctoo.subset_gctoo(mg1, rid=test_rids, cid=test_cids) - mg4 = parse_gctx.parse("../functional_tests/mini_gctoo_for_testing.gctx", + mg4 = parse_gctx.parse("cmapPy/pandasGEXpress/tests/functional_tests/mini_gctoo_for_testing.gctx", rid=test_rids, cid=test_cids) pandas_testing.assert_frame_equal(mg3.data_df, mg4.data_df) pandas_testing.assert_frame_equal(mg3.row_metadata_df, mg4.row_metadata_df) @@ -104,14 +104,14 @@ def test_parse(self): # test with ridx/cidx mg7 = subset_gctoo.subset_gctoo(mg1, rid=['LJP007_MCF7_24H:CTL_VEHICLE:DMSO:-666'], cid=['LJP007_MCF7_24H:CTL_VEHICLE:DMSO:-666']) - mg8 = parse_gctx.parse("../functional_tests/mini_gctoo_for_testing.gctx", ridx=[4], cidx=[4]) + mg8 = parse_gctx.parse("cmapPy/pandasGEXpress/tests/functional_tests/mini_gctoo_for_testing.gctx", ridx=[4], cidx=[4]) pandas_testing.assert_frame_equal(mg7.data_df, mg8.data_df) pandas_testing.assert_frame_equal(mg7.row_metadata_df, mg8.row_metadata_df) pandas_testing.assert_frame_equal(mg7.col_metadata_df, mg8.col_metadata_df) # test with rid/cidx - mg9 = parse_gctx.parse("../functional_tests/mini_gctoo_for_testing.gctx", + mg9 = parse_gctx.parse("cmapPy/pandasGEXpress/tests/functional_tests/mini_gctoo_for_testing.gctx", rid=['LJP007_MCF7_24H:CTL_VEHICLE:DMSO:-666'], cidx=[4]) @@ -120,7 +120,7 @@ def test_parse(self): pandas_testing.assert_frame_equal(mg7.col_metadata_df, mg9.col_metadata_df) # test with ridx/cid - mg10 = parse_gctx.parse("../functional_tests/mini_gctoo_for_testing.gctx", ridx=[4], + mg10 = parse_gctx.parse("cmapPy/pandasGEXpress/tests/functional_tests/mini_gctoo_for_testing.gctx", ridx=[4], cid=['LJP007_MCF7_24H:CTL_VEHICLE:DMSO:-666']) pandas_testing.assert_frame_equal(mg7.data_df, mg10.data_df) @@ -128,30 +128,30 @@ def test_parse(self): pandas_testing.assert_frame_equal(mg7.col_metadata_df, mg10.col_metadata_df) # test with row_meta_only - mg11 = parse_gctx.parse("../functional_tests/mini_gctoo_for_testing.gctx", row_meta_only=True) + mg11 = parse_gctx.parse("cmapPy/pandasGEXpress/tests/functional_tests/mini_gctoo_for_testing.gctx", row_meta_only=True) pandas_testing.assert_frame_equal(mg11, mg1.row_metadata_df) # test with col_meta_only - mg12 = parse_gctx.parse("../functional_tests/mini_gctoo_for_testing.gctx", col_meta_only=True) + mg12 = parse_gctx.parse("cmapPy/pandasGEXpress/tests/functional_tests/mini_gctoo_for_testing.gctx", col_meta_only=True) pandas_testing.assert_frame_equal(mg12, mg1.col_metadata_df) # test with sort_row_meta False and ridx - mg13 = parse_gctx.parse("../functional_tests/mini_gctoo_for_testing.gctx",) + mg13 = parse_gctx.parse("cmapPy/pandasGEXpress/tests/functional_tests/mini_gctoo_for_testing.gctx",) # test with sort_col_meta False and cidx - mg14 = + #mg14 = # test with sort_row_meta and sort_col_meta both False and ridx and cidx - mg15 = + #mg15 = # test with sort_row_meta and sort_col_meta both False and string rid and cid - mg16 = + #mg16 = def test_parse_rid_as_entrez_id(self): - input_file = "../functional_tests/test_parse_gctx_rid_entrez_id.gctx" + input_file = "cmapPy/pandasGEXpress/tests/functional_tests/test_parse_gctx_rid_entrez_id.gctx" g = parse_gctx.parse(input_file) self.assertEqual((5, 5), g.data_df.shape) logger.debug("g.data_df.index: {}".format(g.data_df.index)) @@ -177,19 +177,19 @@ def test_check_and_order_id_inputs(self): # case 1: row and col lists are populated and same type self.assertEqual((sorted(ridx), sorted(cidx)), - parse_gctx.check_and_order_id_inputs(None, ridx, None, cidx, row_meta, col_meta)) + parse_gctx.check_and_order_id_inputs(None, ridx, None, cidx, row_meta, col_meta, sort_row_meta = True, sort_col_meta = True)) # case 2: row & col lists are populated, but of different types self.assertEqual((sorted(ridx), [0, 1, 2, 3]), - parse_gctx.check_and_order_id_inputs(None, ridx, cid, None, row_meta, col_meta)) + parse_gctx.check_and_order_id_inputs(None, ridx, cid, None, row_meta, col_meta, sort_row_meta = True, sort_col_meta = True)) # case 3: row list and col lists are both None self.assertEqual(([0, 1, 2, 3], [0, 1, 2, 3, 4, 5]), - parse_gctx.check_and_order_id_inputs(None, None, None, None, row_meta, col_meta)) + parse_gctx.check_and_order_id_inputs(None, None, None, None, row_meta, col_meta, sort_row_meta = True, sort_col_meta = True)) # case 4: row list is populated, col list is None self.assertEqual(([0, 1, 2], [0, 1, 2, 3, 4, 5]), - parse_gctx.check_and_order_id_inputs(rid, None, None, None, row_meta, col_meta)) + parse_gctx.check_and_order_id_inputs(rid, None, None, None, row_meta, col_meta, sort_row_meta = True, sort_col_meta = True)) def test_check_id_idx_exclusivity(self): ids = ["a", "b", "c"] @@ -218,7 +218,7 @@ def test_parse_metadata_df(self): logger.debug("mini_row_meta.columns: {}".format(mini_row_meta.columns)) logger.debug("mini_row_meta.dtypes: {}".format(mini_row_meta.dtypes)) - gctx_file = h5py.File("../functional_tests/mini_gctoo_for_testing.gctx", "r") + gctx_file = h5py.File("cmapPy/pandasGEXpress/tests/functional_tests/mini_gctoo_for_testing.gctx", "r") row_dset = gctx_file[row_meta_group_node] col_dset = gctx_file[col_meta_group_node] @@ -285,19 +285,19 @@ def test_get_ordered_idx(self): mg = mini_gctoo_for_testing.make() # case 1: id_type == None - case1 = parse_gctx.get_ordered_idx(None, [], mg.row_metadata_df) + case1 = parse_gctx.get_ordered_idx(None, [], mg.row_metadata_df, sort_idx = True) self.assertEqual(case1, list(range(0, 6)), "Expected ordered idx to be {} but got {}".format(list(range(0, 6)), case1)) # case 2: id_type == "id" case2 = parse_gctx.get_ordered_idx("id", - ['LJP007_MCF7_24H:CTL_VEHICLE:DMSO:-666'], mg.col_metadata_df) + ['LJP007_MCF7_24H:CTL_VEHICLE:DMSO:-666'], mg.col_metadata_df, sort_idx = True) self.assertEqual(case2, [4], "Expected ordered idx to be {} but got {}".format([4], case2)) # case 3: id_type == ridx case3 = parse_gctx.get_ordered_idx("idx", - [5, 1, 3], mg.col_metadata_df) + [5, 1, 3], mg.col_metadata_df, sort_idx = True) self.assertEqual(case3, [1, 3, 5], "Expected ordered idx to be {} but got {}".format([1, 3, 5], case3)) @@ -310,12 +310,11 @@ def test_parse_data_df(self): mini_data_df.columns.name = "cid" # create h5py File instance - mini_gctx = h5py.File("../functional_tests/mini_gctx_with_metadata_n2x3.gctx", "r") + mini_gctx = h5py.File("cmapPy/pandasGEXpress/tests/functional_tests/mini_gctx_with_metadata_n2x3.gctx", "r") data_dset = mini_gctx[data_node] - # get relevant metadata fields - col_meta = parse_gctx.get_column_metadata("../functional_tests/mini_gctx_with_metadata_n2x3.gctx") - row_meta = parse_gctx.get_row_metadata("../functional_tests/mini_gctx_with_metadata_n2x3.gctx") + col_meta = parse_gctx.get_column_metadata("cmapPy/pandasGEXpress/tests/functional_tests/mini_gctx_with_metadata_n2x3.gctx") + row_meta = parse_gctx.get_row_metadata("cmapPy/pandasGEXpress/tests/functional_tests/mini_gctx_with_metadata_n2x3.gctx") # case 1: no subsetting data_df1 = parse_gctx.parse_data_df(data_dset, [0, 1, 2], [0, 1], row_meta, col_meta) @@ -367,11 +366,11 @@ def test_check_idx_validity(self): id_list = [0,1,2] df = pd.DataFrame({}, index=range(5)) logger.debug("df.shape: {}".format(df.shape)) - parse_gctx.check_idx_validity(id_list, df) + parse_gctx.check_idx_validity(id_list, df, sort_id = True) id_list[0] = -1 with self.assertRaises(Exception) as context: - parse_gctx.check_idx_validity(id_list, df) + parse_gctx.check_idx_validity(id_list, df, sort_id = True) logger.debug("context.exception: {}".format(context.exception)) self.assertIn("some of indexes being used to subset the data are not valid", str(context.exception)) self.assertIn("[-1]", str(context.exception)) @@ -379,7 +378,7 @@ def test_check_idx_validity(self): invalid_high = df.shape[0] + 1 id_list[0] = invalid_high with self.assertRaises(Exception) as context: - parse_gctx.check_idx_validity(id_list, df) + parse_gctx.check_idx_validity(id_list, df, sort_id = True) logger.debug("context.exception: {}".format(context.exception)) self.assertIn("some of indexes being used to subset the data are not valid", str(context.exception)) self.assertIn("[{}]".format(invalid_high), str(context.exception)) diff --git a/cmapPy/pandasGEXpress/tests/python3_tests/test_subset.py b/cmapPy/pandasGEXpress/tests/python3_tests/test_subset.py index 249b630..b92b120 100644 --- a/cmapPy/pandasGEXpress/tests/python3_tests/test_subset.py +++ b/cmapPy/pandasGEXpress/tests/python3_tests/test_subset.py @@ -12,7 +12,7 @@ class TestSubset(unittest.TestCase): def test_read_arg(self): - arg_path = os.path.join("../functional_tests", "test_subset_rid.grp") + arg_path = os.path.join("cmapPy/pandasGEXpress/tests/functional_tests/", "test_subset_rid.grp") rids = sg._read_arg([arg_path]) self.assertCountEqual(rids, ["a", "Bb", "c"]) @@ -27,10 +27,10 @@ def test_read_arg_bad(self): def test_subset_main(self): - in_gct_path = os.path.join("../functional_tests", "test_subset_in.gct") - rid_grp_path = os.path.join("../functional_tests", "test_subset_rid.grp") - out_name = os.path.join("../functional_tests", "test_subset_out.gct") - expected_out_path = os.path.join("../functional_tests", "test_subset_expected.gct") + in_gct_path = os.path.join("cmapPy/pandasGEXpress/tests/functional_tests/", "test_subset_in.gct") + rid_grp_path = os.path.join("cmapPy/pandasGEXpress/tests/functional_tests/", "test_subset_rid.grp") + out_name = os.path.join("cmapPy/pandasGEXpress/tests/functional_tests/", "test_subset_out.gct") + expected_out_path = os.path.join("cmapPy/pandasGEXpress/tests/functional_tests/", "test_subset_expected.gct") args_string = "-i {} --rid {} -ec {} -o {}".format( in_gct_path, rid_grp_path, "f", out_name) diff --git a/cmapPy/pandasGEXpress/tests/python3_tests/test_write_gct.py b/cmapPy/pandasGEXpress/tests/python3_tests/test_write_gct.py index e01319d..a5ce5da 100644 --- a/cmapPy/pandasGEXpress/tests/python3_tests/test_write_gct.py +++ b/cmapPy/pandasGEXpress/tests/python3_tests/test_write_gct.py @@ -8,7 +8,7 @@ import cmapPy.pandasGEXpress.parse_gct as pg import cmapPy.pandasGEXpress.write_gct as wg -FUNCTIONAL_TESTS_PATH = "../functional_tests" +FUNCTIONAL_TESTS_PATH = "cmapPy/pandasGEXpress/tests/functional_tests/" logger = logging.getLogger(setup_logger.LOGGER_NAME) diff --git a/cmapPy/pandasGEXpress/tests/python3_tests/test_write_gctx.py b/cmapPy/pandasGEXpress/tests/python3_tests/test_write_gctx.py index 6d7af8e..25f9bf2 100644 --- a/cmapPy/pandasGEXpress/tests/python3_tests/test_write_gctx.py +++ b/cmapPy/pandasGEXpress/tests/python3_tests/test_write_gctx.py @@ -12,7 +12,7 @@ __author__ = "Oana Enache" __email__ = "oana@broadinstitute.org" -FUNCTIONAL_TESTS_PATH = "../functional_tests" +FUNCTIONAL_TESTS_PATH = "cmapPy/pandasGEXpress/tests/functional_tests/" # instantiate logger logger = logging.getLogger(setup_logger.LOGGER_NAME) From 3e450a724d77a0a63f0c2351d78780ad3e64c74b Mon Sep 17 00:00:00 2001 From: Saksham Malhotra Date: Sun, 23 Jun 2019 14:39:17 +0530 Subject: [PATCH 08/17] [ref]: add init to test dir --- cmapPy/pandasGEXpress/tests/__init__.py | 0 1 file changed, 0 insertions(+), 0 deletions(-) create mode 100644 cmapPy/pandasGEXpress/tests/__init__.py diff --git a/cmapPy/pandasGEXpress/tests/__init__.py b/cmapPy/pandasGEXpress/tests/__init__.py new file mode 100644 index 0000000..e69de29 From e1508cc830d4c188c52ce0a4194354869061b68c Mon Sep 17 00:00:00 2001 From: Saksham Malhotra Date: Sun, 23 Jun 2019 14:39:30 +0530 Subject: [PATCH 09/17] [ref]:add absolute path for gct --- .../pandasGEXpress/tests/test_python2_python3_compatibility.py | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/cmapPy/pandasGEXpress/tests/test_python2_python3_compatibility.py b/cmapPy/pandasGEXpress/tests/test_python2_python3_compatibility.py index e308216..9c3e0f3 100644 --- a/cmapPy/pandasGEXpress/tests/test_python2_python3_compatibility.py +++ b/cmapPy/pandasGEXpress/tests/test_python2_python3_compatibility.py @@ -15,7 +15,7 @@ __author__ = "Saksham Malhotra" __email__ = "saksham2196@gmail.com" -FUNCTIONAL_TESTS_PATH = "functional_tests" +FUNCTIONAL_TESTS_PATH = "cmapPy/pandasGEXpress/tests/functional_tests/" logger = logging.getLogger(setup_logger.LOGGER_NAME) From 5161a3ea8545610fc5c65c383bb8b272e4098bf6 Mon Sep 17 00:00:00 2001 From: Saksham Malhotra Date: Sun, 23 Jun 2019 18:38:00 +0530 Subject: [PATCH 10/17] [ref]:make changes to python2 tests --- .../tests/python2_tests/__init__.py | 0 .../tests/python2_tests/test_concat.py | 4 +- .../tests/python2_tests/test_edge_cases.py | 2 +- .../tests/python2_tests/test_gct2gctx.py | 24 +++++----- .../tests/python2_tests/test_gctx2gct.py | 24 +++++----- .../tests/python2_tests/test_parse.py | 23 +++++----- .../tests/python2_tests/test_parse_gct.py | 6 +-- .../tests/python2_tests/test_parse_gctx.py | 46 +++++++++---------- .../tests/python2_tests/test_subset.py | 10 ++-- .../tests/python2_tests/test_write_gct.py | 2 +- .../tests/python2_tests/test_write_gctx.py | 2 +- 11 files changed, 72 insertions(+), 71 deletions(-) create mode 100644 cmapPy/pandasGEXpress/tests/python2_tests/__init__.py diff --git a/cmapPy/pandasGEXpress/tests/python2_tests/__init__.py b/cmapPy/pandasGEXpress/tests/python2_tests/__init__.py new file mode 100644 index 0000000..e69de29 diff --git a/cmapPy/pandasGEXpress/tests/python2_tests/test_concat.py b/cmapPy/pandasGEXpress/tests/python2_tests/test_concat.py index abbf8c7..0513b70 100644 --- a/cmapPy/pandasGEXpress/tests/python2_tests/test_concat.py +++ b/cmapPy/pandasGEXpress/tests/python2_tests/test_concat.py @@ -10,7 +10,7 @@ logger = logging.getLogger(setup_logger.LOGGER_NAME) -FUNCTIONAL_TESTS_DIR = "../functional_tests" +FUNCTIONAL_TESTS_DIR = "cmapPy/pandasGEXpress/tests/functional_tests/" class TestConcat(unittest.TestCase): @@ -333,7 +333,7 @@ def test_build_mismatched_common_meta_report(self): self.assertEqual({"r3"}, set(r.orig_rid)) def test_concat_main(self): - test_dir = "../functional_tests/test_concat/test_main" + test_dir = "cmapPy/pandasGEXpress/tests/functional_tests//test_concat/test_main" g_a = pg.parse(os.path.join(test_dir, "a.gct")) logger.debug("g_a: {}".format(g_a)) diff --git a/cmapPy/pandasGEXpress/tests/python2_tests/test_edge_cases.py b/cmapPy/pandasGEXpress/tests/python2_tests/test_edge_cases.py index 64d680a..951c7ea 100644 --- a/cmapPy/pandasGEXpress/tests/python2_tests/test_edge_cases.py +++ b/cmapPy/pandasGEXpress/tests/python2_tests/test_edge_cases.py @@ -10,7 +10,7 @@ import pandas.util.testing as pandas_testing -FUNCTIONAL_TESTS_PATH = "../functional_tests" +FUNCTIONAL_TESTS_PATH = "cmapPy/pandasGEXpress/tests/functional_tests/" logger = logging.getLogger(setup_logger.LOGGER_NAME) diff --git a/cmapPy/pandasGEXpress/tests/python2_tests/test_gct2gctx.py b/cmapPy/pandasGEXpress/tests/python2_tests/test_gct2gctx.py index 616e0c8..b1ad925 100644 --- a/cmapPy/pandasGEXpress/tests/python2_tests/test_gct2gctx.py +++ b/cmapPy/pandasGEXpress/tests/python2_tests/test_gct2gctx.py @@ -14,8 +14,8 @@ class TestGCT2GCTx(unittest.TestCase): def test_gct2gctx_main(self): - in_name = "../functional_tests/mini_gctoo_for_testing.gct" - out_name = "../functional_tests/test_gct2gctx_out.gctx" + in_name = "cmapPy/pandasGEXpress/tests/functional_tests//mini_gctoo_for_testing.gct" + out_name = "cmapPy/pandasGEXpress/tests/functional_tests//test_gct2gctx_out.gctx" args_string = "-f {} -o {}".format(in_name, out_name) args = gct2gctx.build_parser().parse_args(args_string.split()) @@ -29,10 +29,10 @@ def test_gct2gctx_main(self): pd.util.testing.assert_frame_equal(in_gct.col_metadata_df, out_gctx.col_metadata_df) pd.util.testing.assert_frame_equal(in_gct.row_metadata_df, out_gctx.row_metadata_df) - no_meta = "../functional_tests/mini_gctoo_for_testing_nometa.gct" - added_meta = "../functional_tests/test_gct2gctx_out_annotated.gctx" - row_meta = "../functional_tests/test_rowmeta_n6.txt" - col_meta = "../functional_tests/test_colmeta_n6.txt" + no_meta = "cmapPy/pandasGEXpress/tests/functional_tests//mini_gctoo_for_testing_nometa.gct" + added_meta = "cmapPy/pandasGEXpress/tests/functional_tests//test_gct2gctx_out_annotated.gctx" + row_meta = "cmapPy/pandasGEXpress/tests/functional_tests//test_rowmeta_n6.txt" + col_meta = "cmapPy/pandasGEXpress/tests/functional_tests//test_colmeta_n6.txt" args_string = "-f {} -o {} -row_annot_path {} -col_annot_path {}".format(no_meta, added_meta, row_meta, col_meta) args = gct2gctx.build_parser().parse_args(args_string.split()) @@ -51,9 +51,9 @@ def test_gct2gctx_main(self): def test_missing_annotations(self): with self.assertRaises(Exception) as context: - no_meta = "../functional_tests/mini_gctoo_for_testing_nometa.gct" - added_meta = "../functional_tests/test_gctx2gct_out_annotated.gctx" - row_meta = "../functional_tests/test_missing_rowmeta.txt" + no_meta = "cmapPy/pandasGEXpress/tests/functional_tests//mini_gctoo_for_testing_nometa.gct" + added_meta = "cmapPy/pandasGEXpress/tests/functional_tests//test_gctx2gct_out_annotated.gctx" + row_meta = "cmapPy/pandasGEXpress/tests/functional_tests//test_missing_rowmeta.txt" args_string = "-f {} -o {} -row_annot_path {}".format(no_meta, added_meta, row_meta) args = gct2gctx.build_parser().parse_args(args_string.split()) @@ -62,9 +62,9 @@ def test_missing_annotations(self): self.assertTrue('Row ids in matrix missing from annotations file' in context.exception) with self.assertRaises(Exception) as context: - no_meta = "../functional_tests/mini_gctoo_for_testing_nometa.gct" - added_meta = "../functional_tests/test_gctx2gct_out_annotated.gctx" - col_meta = "../functional_tests/test_missing_colmeta.txt" + no_meta = "cmapPy/pandasGEXpress/tests/functional_tests//mini_gctoo_for_testing_nometa.gct" + added_meta = "cmapPy/pandasGEXpress/tests/functional_tests//test_gctx2gct_out_annotated.gctx" + col_meta = "cmapPy/pandasGEXpress/tests/functional_tests//test_missing_colmeta.txt" args_string = "-f {} -o {} -col_annot_path {}".format(no_meta, added_meta, col_meta) args = gct2gctx.build_parser().parse_args(args_string.split()) diff --git a/cmapPy/pandasGEXpress/tests/python2_tests/test_gctx2gct.py b/cmapPy/pandasGEXpress/tests/python2_tests/test_gctx2gct.py index d148df2..6fca880 100644 --- a/cmapPy/pandasGEXpress/tests/python2_tests/test_gctx2gct.py +++ b/cmapPy/pandasGEXpress/tests/python2_tests/test_gctx2gct.py @@ -14,8 +14,8 @@ class TestGCTx2GCT(unittest.TestCase): def test_gctx2gct_main(self): - in_name = "../functional_tests/mini_gctoo_for_testing.gctx" - out_name = "../functional_tests/test_gctx2gct_out.gct" + in_name = "cmapPy/pandasGEXpress/tests/functional_tests//mini_gctoo_for_testing.gctx" + out_name = "cmapPy/pandasGEXpress/tests/functional_tests//test_gctx2gct_out.gct" args_string = "-f {} -o {}".format(in_name, out_name) args = gctx2gct.build_parser().parse_args(args_string.split()) @@ -29,10 +29,10 @@ def test_gctx2gct_main(self): pd.util.testing.assert_frame_equal(in_gctx.col_metadata_df, out_gct.col_metadata_df) pd.util.testing.assert_frame_equal(in_gctx.row_metadata_df, out_gct.row_metadata_df) - no_meta = "../functional_tests/mini_gctoo_for_testing_nometa.gctx" - added_meta = "../functional_tests/test_gctx2gct_out_annotated.gct" - row_meta = "../functional_tests/test_rowmeta_n6.txt" - col_meta = "../functional_tests/test_colmeta_n6.txt" + no_meta = "cmapPy/pandasGEXpress/tests/functional_tests//mini_gctoo_for_testing_nometa.gctx" + added_meta = "cmapPy/pandasGEXpress/tests/functional_tests//test_gctx2gct_out_annotated.gct" + row_meta = "cmapPy/pandasGEXpress/tests/functional_tests//test_rowmeta_n6.txt" + col_meta = "cmapPy/pandasGEXpress/tests/functional_tests//test_colmeta_n6.txt" args_string = "-f {} -o {} -row_annot_path {} -col_annot_path {}".format(no_meta, added_meta, row_meta, col_meta ) args = gctx2gct.build_parser().parse_args(args_string.split()) @@ -51,9 +51,9 @@ def test_gctx2gct_main(self): def test_missing_annotations(self): with self.assertRaises(Exception) as context: - no_meta = "../functional_tests/mini_gctoo_for_testing_nometa.gctx" - added_meta = "../functional_tests/test_gctx2gct_out_annotated.gct" - row_meta = "../functional_tests/test_missing_rowmeta.txt" + no_meta = "cmapPy/pandasGEXpress/tests/functional_tests//mini_gctoo_for_testing_nometa.gctx" + added_meta = "cmapPy/pandasGEXpress/tests/functional_tests//test_gctx2gct_out_annotated.gct" + row_meta = "cmapPy/pandasGEXpress/tests/functional_tests//test_missing_rowmeta.txt" args_string = "-f {} -o {} -row_annot_path {}".format(no_meta, added_meta, row_meta) args = gctx2gct.build_parser().parse_args(args_string.split()) @@ -62,9 +62,9 @@ def test_missing_annotations(self): self.assertTrue('Row ids in matrix missing from annotations file' in context.exception) with self.assertRaises(Exception) as context: - no_meta = "../functional_tests/mini_gctoo_for_testing_nometa.gctx" - added_meta = "../functional_tests/test_gctx2gct_out_annotated.gct" - col_meta = "../functional_tests/test_missing_colmeta.txt" + no_meta = "cmapPy/pandasGEXpress/tests/functional_tests//mini_gctoo_for_testing_nometa.gctx" + added_meta = "cmapPy/pandasGEXpress/tests/functional_tests//test_gctx2gct_out_annotated.gct" + col_meta = "cmapPy/pandasGEXpress/tests/functional_tests//test_missing_colmeta.txt" args_string = "-f {} -o {} -col_annot_path {}".format(no_meta, added_meta, col_meta) args = gctx2gct.build_parser().parse_args(args_string.split()) diff --git a/cmapPy/pandasGEXpress/tests/python2_tests/test_parse.py b/cmapPy/pandasGEXpress/tests/python2_tests/test_parse.py index 9516dcd..9817650 100644 --- a/cmapPy/pandasGEXpress/tests/python2_tests/test_parse.py +++ b/cmapPy/pandasGEXpress/tests/python2_tests/test_parse.py @@ -1,3 +1,4 @@ + import logging import cmapPy.pandasGEXpress.setup_GCToo_logger as setup_logger import unittest @@ -9,7 +10,7 @@ __author__ = "Oana Enache" __email__ = "oana@broadinstitute.org" -FUNCTIONAL_TESTS_PATH = "../functional_tests" +FUNCTIONAL_TESTS_PATH = "cmapPy/pandasGEXpress/tests/functional_tests/" logger = logging.getLogger(setup_logger.LOGGER_NAME) @@ -17,7 +18,7 @@ class TestParse(unittest.TestCase): def test_gctx_parsing(self): # parse in gctx, no other arguments mg1 = mini_gctoo_for_testing.make() - mg2 = parse.parse("../functional_tests/mini_gctoo_for_testing.gctx") + mg2 = parse.parse("cmapPy/pandasGEXpress/tests/functional_tests/mini_gctoo_for_testing.gctx") pandas_testing.assert_frame_equal(mg1.data_df, mg2.data_df) pandas_testing.assert_frame_equal(mg1.row_metadata_df, mg2.row_metadata_df) @@ -27,14 +28,14 @@ def test_gctx_parsing(self): self.assertTrue(mg2.col_metadata_df["mfc_plate_id"].isnull().all()) # parse w/o convert_neg_666 - mg2_alt = parse.parse("../functional_tests/mini_gctoo_for_testing.gctx", convert_neg_666 = False) + mg2_alt = parse.parse("cmapPy/pandasGEXpress/tests/functional_tests//mini_gctoo_for_testing.gctx", convert_neg_666 = False) self.assertFalse(mg2_alt.col_metadata_df["mfc_plate_id"].isnull().all()) # parsing w/rids & cids specified test_rids = ['LJP007_MCF10A_24H:TRT_CP:BRD-K93918653:3.33', 'LJP007_MCF7_24H:CTL_VEHICLE:DMSO:-666'] test_cids = ['LJP007_MCF7_24H:TRT_POSCON:BRD-A61304759:10'] mg3 = subset_gctoo.subset_gctoo(mg1, rid=test_rids, cid=test_cids) - mg4 = parse.parse("../functional_tests/mini_gctoo_for_testing.gctx", + mg4 = parse.parse("cmapPy/pandasGEXpress/tests/functional_tests/mini_gctoo_for_testing.gctx", rid=test_rids, cid=test_cids) pandas_testing.assert_frame_equal(mg3.data_df, mg4.data_df) pandas_testing.assert_frame_equal(mg3.row_metadata_df, mg4.row_metadata_df) @@ -43,28 +44,28 @@ def test_gctx_parsing(self): # parsing w/ridx & cidx specified mg5 = subset_gctoo.subset_gctoo(mg1, rid=['LJP007_MCF7_24H:CTL_VEHICLE:DMSO:-666'], cid=['LJP007_MCF7_24H:CTL_VEHICLE:DMSO:-666']) - mg6 = parse.parse("../functional_tests/mini_gctoo_for_testing.gctx", ridx=[4], cidx=[4]) + mg6 = parse.parse("cmapPy/pandasGEXpress/tests/functional_tests/mini_gctoo_for_testing.gctx", ridx=[4], cidx=[4]) pandas_testing.assert_frame_equal(mg5.data_df, mg6.data_df) pandas_testing.assert_frame_equal(mg5.row_metadata_df, mg6.row_metadata_df) pandas_testing.assert_frame_equal(mg5.col_metadata_df, mg6.col_metadata_df) # parsing row metadata only - mg7 = parse.parse("../functional_tests/mini_gctoo_for_testing.gctx", row_meta_only=True) + mg7 = parse.parse("cmapPy/pandasGEXpress/tests/functional_tests//mini_gctoo_for_testing.gctx", row_meta_only=True) pandas_testing.assert_frame_equal(mg7, mg1.row_metadata_df) # parsing col metadata only - mg8 = parse.parse("../functional_tests/mini_gctoo_for_testing.gctx", col_meta_only=True) + mg8 = parse.parse("cmapPy/pandasGEXpress/tests/functional_tests//mini_gctoo_for_testing.gctx", col_meta_only=True) pandas_testing.assert_frame_equal(mg8, mg1.col_metadata_df) # parsing w/multiindex - mg9 = parse.parse("../functional_tests/mini_gctoo_for_testing.gctx", make_multiindex=True) + mg9 = parse.parse("cmapPy/pandasGEXpress/tests/functional_tests//mini_gctoo_for_testing.gctx", make_multiindex=True) self.assertTrue(mg9.multi_index_df is not None) def test_gct_parsing(self): # parse in gct, no other arguments mg1 = mini_gctoo_for_testing.make() - mg2 = parse.parse("../functional_tests/mini_gctoo_for_testing.gct") + mg2 = parse.parse("cmapPy/pandasGEXpress/tests/functional_tests/mini_gctoo_for_testing.gct") pandas_testing.assert_frame_equal(mg1.data_df, mg2.data_df) pandas_testing.assert_frame_equal(mg1.row_metadata_df, mg2.row_metadata_df) @@ -74,13 +75,13 @@ def test_gct_parsing(self): self.assertTrue(mg2.col_metadata_df["mfc_plate_id"].isnull().all()) # parse w/o convert_neg_666 - mg2_alt = parse.parse("../functional_tests/mini_gctoo_for_testing.gct", convert_neg_666 = False) + mg2_alt = parse.parse("cmapPy/pandasGEXpress/tests/functional_tests//mini_gctoo_for_testing.gct", convert_neg_666 = False) self.assertItemsEqual(mg2_alt.col_metadata_df["mfc_plate_id"].values.tolist(), [-666] * 6) # parse in gct with subsetting my_rid = "LJP007_MCF10A_24H:TRT_CP:BRD-K93918653:3.33" - mg3 = parse.parse("../functional_tests/mini_gctoo_for_testing.gct", + mg3 = parse.parse("cmapPy/pandasGEXpress/tests/functional_tests//mini_gctoo_for_testing.gct", cidx=[0, 2], rid=[my_rid]) self.assertEqual(mg3.data_df.shape, (1, 2)) diff --git a/cmapPy/pandasGEXpress/tests/python2_tests/test_parse_gct.py b/cmapPy/pandasGEXpress/tests/python2_tests/test_parse_gct.py index 64da359..7fca7d2 100644 --- a/cmapPy/pandasGEXpress/tests/python2_tests/test_parse_gct.py +++ b/cmapPy/pandasGEXpress/tests/python2_tests/test_parse_gct.py @@ -8,7 +8,7 @@ import cmapPy.pandasGEXpress.GCToo as GCToo -FUNCTIONAL_TESTS_PATH = "../functional_tests" +FUNCTIONAL_TESTS_PATH = "cmapPy/pandasGEXpress/tests/functional_tests/" logger = logging.getLogger(setup_logger.LOGGER_NAME) @@ -21,7 +21,7 @@ def test_read_version_and_dims(self): dims1 = ["10", "15", "3", "4"] fname1 = "testing_testing1" - f1 = open(fname1, "wb") + f1 = open(fname1, "w") f1.write(("#" + version1 + "\n")) f1.write((dims1[0] + "\t" + dims1[1] + "\t" + dims1[2] + "\t" + dims1[3] + "\n")) f1.close() @@ -40,7 +40,7 @@ def test_read_version_and_dims(self): dims2 = ["10", "15"] fname2 = "testing_testing2" - f2 = open(fname2, "wb") + f2 = open(fname2, "w") f2.write(("#" + version2 + "\n")) f2.write((dims2[0] + "\t" + dims2[1] + "\n")) f2.close() diff --git a/cmapPy/pandasGEXpress/tests/python2_tests/test_parse_gctx.py b/cmapPy/pandasGEXpress/tests/python2_tests/test_parse_gctx.py index 07622d9..c74b731 100644 --- a/cmapPy/pandasGEXpress/tests/python2_tests/test_parse_gctx.py +++ b/cmapPy/pandasGEXpress/tests/python2_tests/test_parse_gctx.py @@ -17,7 +17,7 @@ __author__ = "Oana Enache" __email__ = "oana@broadinstitute.org" -FUNCTIONAL_TESTS_PATH = "../functional_tests" +FUNCTIONAL_TESTS_PATH = "cmapPy/pandasGEXpress/tests/functional_tests/" logger = logging.getLogger(setup_logger.LOGGER_NAME) @@ -44,7 +44,7 @@ class TestParseGctx(unittest.TestCase): def test_parse(self): # parse whole thing mg1 = mini_gctoo_for_testing.make() - mg2 = parse_gctx.parse("../functional_tests/mini_gctoo_for_testing.gctx") + mg2 = parse_gctx.parse("cmapPy/pandasGEXpress/tests/functional_tests//mini_gctoo_for_testing.gctx") pandas_testing.assert_frame_equal(mg1.data_df, mg2.data_df) pandas_testing.assert_frame_equal(mg1.row_metadata_df, mg2.row_metadata_df) @@ -54,7 +54,7 @@ def test_parse(self): test_rids = ['LJP007_MCF10A_24H:TRT_CP:BRD-K93918653:3.33', 'LJP007_MCF7_24H:CTL_VEHICLE:DMSO:-666'] test_cids = ['LJP007_MCF7_24H:TRT_POSCON:BRD-A61304759:10'] mg3 = subset_gctoo.subset_gctoo(mg1, rid=test_rids, cid=test_cids) - mg4 = parse_gctx.parse("../functional_tests/mini_gctoo_for_testing.gctx", + mg4 = parse_gctx.parse("cmapPy/pandasGEXpress/tests/functional_tests//mini_gctoo_for_testing.gctx", rid=test_rids, cid=test_cids) pandas_testing.assert_frame_equal(mg3.data_df, mg4.data_df) pandas_testing.assert_frame_equal(mg3.row_metadata_df, mg4.row_metadata_df) @@ -104,14 +104,14 @@ def test_parse(self): # test with ridx/cidx mg7 = subset_gctoo.subset_gctoo(mg1, rid=['LJP007_MCF7_24H:CTL_VEHICLE:DMSO:-666'], cid=['LJP007_MCF7_24H:CTL_VEHICLE:DMSO:-666']) - mg8 = parse_gctx.parse("../functional_tests/mini_gctoo_for_testing.gctx", ridx=[4], cidx=[4]) + mg8 = parse_gctx.parse("cmapPy/pandasGEXpress/tests/functional_tests//mini_gctoo_for_testing.gctx", ridx=[4], cidx=[4]) pandas_testing.assert_frame_equal(mg7.data_df, mg8.data_df) pandas_testing.assert_frame_equal(mg7.row_metadata_df, mg8.row_metadata_df) pandas_testing.assert_frame_equal(mg7.col_metadata_df, mg8.col_metadata_df) # test with rid/cidx - mg9 = parse_gctx.parse("../functional_tests/mini_gctoo_for_testing.gctx", + mg9 = parse_gctx.parse("cmapPy/pandasGEXpress/tests/functional_tests//mini_gctoo_for_testing.gctx", rid=['LJP007_MCF7_24H:CTL_VEHICLE:DMSO:-666'], cidx=[4]) @@ -120,7 +120,7 @@ def test_parse(self): pandas_testing.assert_frame_equal(mg7.col_metadata_df, mg9.col_metadata_df) # test with ridx/cid - mg10 = parse_gctx.parse("../functional_tests/mini_gctoo_for_testing.gctx", ridx=[4], + mg10 = parse_gctx.parse("cmapPy/pandasGEXpress/tests/functional_tests//mini_gctoo_for_testing.gctx", ridx=[4], cid=['LJP007_MCF7_24H:CTL_VEHICLE:DMSO:-666']) pandas_testing.assert_frame_equal(mg7.data_df, mg10.data_df) @@ -128,15 +128,15 @@ def test_parse(self): pandas_testing.assert_frame_equal(mg7.col_metadata_df, mg10.col_metadata_df) # test with row_meta_only - mg11 = parse_gctx.parse("../functional_tests/mini_gctoo_for_testing.gctx", row_meta_only=True) + mg11 = parse_gctx.parse("cmapPy/pandasGEXpress/tests/functional_tests//mini_gctoo_for_testing.gctx", row_meta_only=True) pandas_testing.assert_frame_equal(mg11, mg1.row_metadata_df) # test with col_meta_only - mg12 = parse_gctx.parse("../functional_tests/mini_gctoo_for_testing.gctx", col_meta_only=True) + mg12 = parse_gctx.parse("cmapPy/pandasGEXpress/tests/functional_tests//mini_gctoo_for_testing.gctx", col_meta_only=True) pandas_testing.assert_frame_equal(mg12, mg1.col_metadata_df) def test_parse_rid_as_entrez_id(self): - input_file = "../functional_tests/test_parse_gctx_rid_entrez_id.gctx" + input_file = "cmapPy/pandasGEXpress/tests/functional_tests//test_parse_gctx_rid_entrez_id.gctx" g = parse_gctx.parse(input_file) self.assertEqual((5, 5), g.data_df.shape) logger.debug("g.data_df.index: {}".format(g.data_df.index)) @@ -162,19 +162,19 @@ def test_check_and_order_id_inputs(self): # case 1: row and col lists are populated and same type self.assertEqual((sorted(ridx), sorted(cidx)), - parse_gctx.check_and_order_id_inputs(None, ridx, None, cidx, row_meta, col_meta)) + parse_gctx.check_and_order_id_inputs(None, ridx, None, cidx, row_meta, col_meta, sort_row_meta = True, sort_col_meta = True)) # case 2: row & col lists are populated, but of different types self.assertEqual((sorted(ridx), [0, 1, 2, 3]), - parse_gctx.check_and_order_id_inputs(None, ridx, cid, None, row_meta, col_meta)) + parse_gctx.check_and_order_id_inputs(None, ridx, cid, None, row_meta, col_meta, sort_row_meta = True, sort_col_meta = True)) # case 3: row list and col lists are both None self.assertEqual(([0, 1, 2, 3], [0, 1, 2, 3, 4, 5]), - parse_gctx.check_and_order_id_inputs(None, None, None, None, row_meta, col_meta)) + parse_gctx.check_and_order_id_inputs(None, None, None, None, row_meta, col_meta, sort_row_meta = True, sort_col_meta = True)) # case 4: row list is populated, col list is None self.assertEqual(([0, 1, 2], [0, 1, 2, 3, 4, 5]), - parse_gctx.check_and_order_id_inputs(rid, None, None, None, row_meta, col_meta)) + parse_gctx.check_and_order_id_inputs(rid, None, None, None, row_meta, col_meta, sort_row_meta = True, sort_col_meta = True)) def test_check_id_idx_exclusivity(self): ids = ["a", "b", "c"] @@ -203,7 +203,7 @@ def test_parse_metadata_df(self): logger.debug("mini_row_meta.columns: {}".format(mini_row_meta.columns)) logger.debug("mini_row_meta.dtypes: {}".format(mini_row_meta.dtypes)) - gctx_file = h5py.File("../functional_tests/mini_gctoo_for_testing.gctx", "r") + gctx_file = h5py.File("cmapPy/pandasGEXpress/tests/functional_tests//mini_gctoo_for_testing.gctx", "r") row_dset = gctx_file[row_meta_group_node] col_dset = gctx_file[col_meta_group_node] @@ -270,19 +270,19 @@ def test_get_ordered_idx(self): mg = mini_gctoo_for_testing.make() # case 1: id_type == None - case1 = parse_gctx.get_ordered_idx(None, [], mg.row_metadata_df) + case1 = parse_gctx.get_ordered_idx(None, [], mg.row_metadata_df, sort_idx = True) self.assertEqual(case1, list(range(0, 6)), "Expected ordered idx to be {} but got {}".format(list(range(0, 6)), case1)) # case 2: id_type == "id" case2 = parse_gctx.get_ordered_idx("id", - ['LJP007_MCF7_24H:CTL_VEHICLE:DMSO:-666'], mg.col_metadata_df) + ['LJP007_MCF7_24H:CTL_VEHICLE:DMSO:-666'], mg.col_metadata_df, sort_idx = True) self.assertEqual(case2, [4], "Expected ordered idx to be {} but got {}".format([4], case2)) # case 3: id_type == ridx case3 = parse_gctx.get_ordered_idx("idx", - [5, 1, 3], mg.col_metadata_df) + [5, 1, 3], mg.col_metadata_df, sort_idx = True) self.assertEqual(case3, [1, 3, 5], "Expected ordered idx to be {} but got {}".format([1, 3, 5], case3)) @@ -295,12 +295,12 @@ def test_parse_data_df(self): mini_data_df.columns.name = "cid" # create h5py File instance - mini_gctx = h5py.File("../functional_tests/mini_gctx_with_metadata_n2x3.gctx", "r") + mini_gctx = h5py.File("cmapPy/pandasGEXpress/tests/functional_tests//mini_gctx_with_metadata_n2x3.gctx", "r") data_dset = mini_gctx[data_node] # get relevant metadata fields - col_meta = parse_gctx.get_column_metadata("../functional_tests/mini_gctx_with_metadata_n2x3.gctx") - row_meta = parse_gctx.get_row_metadata("../functional_tests/mini_gctx_with_metadata_n2x3.gctx") + col_meta = parse_gctx.get_column_metadata("cmapPy/pandasGEXpress/tests/functional_tests//mini_gctx_with_metadata_n2x3.gctx") + row_meta = parse_gctx.get_row_metadata("cmapPy/pandasGEXpress/tests/functional_tests//mini_gctx_with_metadata_n2x3.gctx") # case 1: no subsetting data_df1 = parse_gctx.parse_data_df(data_dset, [0, 1, 2], [0, 1], row_meta, col_meta) @@ -352,11 +352,11 @@ def test_check_idx_validity(self): id_list = [0,1,2] df = pd.DataFrame({}, index=range(5)) logger.debug("df.shape: {}".format(df.shape)) - parse_gctx.check_idx_validity(id_list, df) + parse_gctx.check_idx_validity(id_list, df, sort_id = True) id_list[0] = -1 with self.assertRaises(Exception) as context: - parse_gctx.check_idx_validity(id_list, df) + parse_gctx.check_idx_validity(id_list, df, sort_id = True) logger.debug("context.exception: {}".format(context.exception)) self.assertIn("some of indexes being used to subset the data are not valid", str(context.exception)) self.assertIn("[-1]", str(context.exception)) @@ -364,7 +364,7 @@ def test_check_idx_validity(self): invalid_high = df.shape[0] + 1 id_list[0] = invalid_high with self.assertRaises(Exception) as context: - parse_gctx.check_idx_validity(id_list, df) + parse_gctx.check_idx_validity(id_list, df, sort_id = True) logger.debug("context.exception: {}".format(context.exception)) self.assertIn("some of indexes being used to subset the data are not valid", str(context.exception)) self.assertIn("[{}]".format(invalid_high), str(context.exception)) diff --git a/cmapPy/pandasGEXpress/tests/python2_tests/test_subset.py b/cmapPy/pandasGEXpress/tests/python2_tests/test_subset.py index c8617d1..50b7b8c 100644 --- a/cmapPy/pandasGEXpress/tests/python2_tests/test_subset.py +++ b/cmapPy/pandasGEXpress/tests/python2_tests/test_subset.py @@ -12,7 +12,7 @@ class TestSubset(unittest.TestCase): def test_read_arg(self): - arg_path = os.path.join("../functional_tests", "test_subset_rid.grp") + arg_path = os.path.join("cmapPy/pandasGEXpress/tests/functional_tests/", "test_subset_rid.grp") rids = sg._read_arg([arg_path]) self.assertItemsEqual(rids, ["a", "Bb", "c"]) @@ -27,10 +27,10 @@ def test_read_arg_bad(self): def test_subset_main(self): - in_gct_path = os.path.join("../functional_tests", "test_subset_in.gct") - rid_grp_path = os.path.join("../functional_tests", "test_subset_rid.grp") - out_name = os.path.join("../functional_tests", "test_subset_out.gct") - expected_out_path = os.path.join("../functional_tests", "test_subset_expected.gct") + in_gct_path = os.path.join("cmapPy/pandasGEXpress/tests/functional_tests/", "test_subset_in.gct") + rid_grp_path = os.path.join("cmapPy/pandasGEXpress/tests/functional_tests/", "test_subset_rid.grp") + out_name = os.path.join("cmapPy/pandasGEXpress/tests/functional_tests/", "test_subset_out.gct") + expected_out_path = os.path.join("cmapPy/pandasGEXpress/tests/functional_tests/", "test_subset_expected.gct") args_string = "-i {} --rid {} -ec {} -o {}".format( in_gct_path, rid_grp_path, "f", out_name) diff --git a/cmapPy/pandasGEXpress/tests/python2_tests/test_write_gct.py b/cmapPy/pandasGEXpress/tests/python2_tests/test_write_gct.py index 8ad097b..53bfb1d 100644 --- a/cmapPy/pandasGEXpress/tests/python2_tests/test_write_gct.py +++ b/cmapPy/pandasGEXpress/tests/python2_tests/test_write_gct.py @@ -8,7 +8,7 @@ import cmapPy.pandasGEXpress.parse_gct as pg import cmapPy.pandasGEXpress.write_gct as wg -FUNCTIONAL_TESTS_PATH = "../functional_tests" +FUNCTIONAL_TESTS_PATH = "cmapPy/pandasGEXpress/tests/functional_tests/" logger = logging.getLogger(setup_logger.LOGGER_NAME) diff --git a/cmapPy/pandasGEXpress/tests/python2_tests/test_write_gctx.py b/cmapPy/pandasGEXpress/tests/python2_tests/test_write_gctx.py index f613a86..39e8732 100644 --- a/cmapPy/pandasGEXpress/tests/python2_tests/test_write_gctx.py +++ b/cmapPy/pandasGEXpress/tests/python2_tests/test_write_gctx.py @@ -12,7 +12,7 @@ __author__ = "Oana Enache" __email__ = "oana@broadinstitute.org" -FUNCTIONAL_TESTS_PATH = "../functional_tests" +FUNCTIONAL_TESTS_PATH = "cmapPy/pandasGEXpress/tests/functional_tests/" # instantiate logger logger = logging.getLogger(setup_logger.LOGGER_NAME) From 2f1a51b0e5e9d08a75d01febf122287b08abf261 Mon Sep 17 00:00:00 2001 From: Saksham Malhotra Date: Sun, 23 Jun 2019 18:38:36 +0530 Subject: [PATCH 11/17] [ref]:absolute paths in set io tests --- cmapPy/set_io/tests/__init__.py | 0 cmapPy/set_io/tests/test_gmt.py | 2 +- cmapPy/set_io/tests/test_grp.py | 2 +- 3 files changed, 2 insertions(+), 2 deletions(-) create mode 100644 cmapPy/set_io/tests/__init__.py diff --git a/cmapPy/set_io/tests/__init__.py b/cmapPy/set_io/tests/__init__.py new file mode 100644 index 0000000..e69de29 diff --git a/cmapPy/set_io/tests/test_gmt.py b/cmapPy/set_io/tests/test_gmt.py index 6779316..d9ab6ae 100644 --- a/cmapPy/set_io/tests/test_gmt.py +++ b/cmapPy/set_io/tests/test_gmt.py @@ -5,7 +5,7 @@ import cmapPy.set_io.gmt as gmt logger = logging.getLogger(setup_logger.LOGGER_NAME) -FUNCTIONAL_TESTS_DIR = "functional_tests" +FUNCTIONAL_TESTS_DIR = "cmapPy/set_io/tests/functional_tests/" class TestGMT(unittest.TestCase): diff --git a/cmapPy/set_io/tests/test_grp.py b/cmapPy/set_io/tests/test_grp.py index f9c401e..bc7c87e 100644 --- a/cmapPy/set_io/tests/test_grp.py +++ b/cmapPy/set_io/tests/test_grp.py @@ -5,7 +5,7 @@ import cmapPy.set_io.grp as grp logger = logging.getLogger(setup_logger.LOGGER_NAME) -FUNCTIONAL_TESTS_DIR = "functional_tests" +FUNCTIONAL_TESTS_DIR = "cmapPy/set_io/tests/functional_tests/" class TestGRP(unittest.TestCase): From e6720fd9238d960cc8a1a6b79c114585074a143e Mon Sep 17 00:00:00 2001 From: Saksham Malhotra Date: Sun, 23 Jun 2019 18:38:57 +0530 Subject: [PATCH 12/17] [ref]:add init to math tests dir --- cmapPy/math/tests/__init__.py | 0 1 file changed, 0 insertions(+), 0 deletions(-) create mode 100644 cmapPy/math/tests/__init__.py diff --git a/cmapPy/math/tests/__init__.py b/cmapPy/math/tests/__init__.py new file mode 100644 index 0000000..e69de29 From c5356fee6d8a2d7ba1d122a5c66822dee0c29cd0 Mon Sep 17 00:00:00 2001 From: Saksham Malhotra Date: Sun, 23 Jun 2019 19:49:05 +0530 Subject: [PATCH 13/17] [add]: add new test cases for parse gctx --- .../tests/python2_tests/test_parse_gctx.py | 43 +++++++++++++++++++ 1 file changed, 43 insertions(+) diff --git a/cmapPy/pandasGEXpress/tests/python2_tests/test_parse_gctx.py b/cmapPy/pandasGEXpress/tests/python2_tests/test_parse_gctx.py index c74b731..f74b214 100644 --- a/cmapPy/pandasGEXpress/tests/python2_tests/test_parse_gctx.py +++ b/cmapPy/pandasGEXpress/tests/python2_tests/test_parse_gctx.py @@ -135,6 +135,49 @@ def test_parse(self): mg12 = parse_gctx.parse("cmapPy/pandasGEXpress/tests/functional_tests//mini_gctoo_for_testing.gctx", col_meta_only=True) pandas_testing.assert_frame_equal(mg12, mg1.col_metadata_df) + # test with sort_col_meta False and cidx + mg13 = parse_gctx.parse("cmapPy/pandasGEXpress/tests/functional_tests//mini_gctoo_for_testing.gctx", + cidx = [4,1,3], sort_col_meta= False) + + pandas_testing.assert_frame_equal(mg13.data_df, mg1.data_df.iloc[:, [4,1,3]]) + pandas_testing.assert_frame_equal(mg13.col_metadata_df, mg1.col_metadata_df.iloc[[4,1,3],:]) + pandas_testing.assert_frame_equal(mg13.row_metadata_df, mg1.row_metadata_df) + + + # test with sort_row_meta False and ridx + mg14 = parse_gctx.parse("cmapPy/pandasGEXpress/tests/functional_tests//mini_gctoo_for_testing.gctx", + ridx = [3,0,1], sort_row_meta= False) + + pandas_testing.assert_frame_equal(mg14.data_df, mg1.data_df.iloc[[3,0,1],:]) + pandas_testing.assert_frame_equal(mg14.col_metadata_df, mg1.col_metadata_df) + pandas_testing.assert_frame_equal(mg14.row_metadata_df, mg1.row_metadata_df.iloc[[3,0,1],:]) + + # test with sort_col_meta False and cidx and col_meta_only + mg15 = parse_gctx.parse("cmapPy/pandasGEXpress/tests/functional_tests//mini_gctoo_for_testing.gctx", + cidx = [4,1,3], sort_col_meta= False, col_meta_only=True) + pandas_testing.assert_frame_equal(mg15, mg1.col_metadata_df.iloc[[4,1,3],:]) + + # test with sort_row_meta False and ridx and row_meta_only + mg16 = parse_gctx.parse("cmapPy/pandasGEXpress/tests/functional_tests//mini_gctoo_for_testing.gctx", + ridx = [3,0,1], sort_row_meta= False, row_meta_only=True) + pandas_testing.assert_frame_equal(mg16, mg1.row_metadata_df.iloc[[3,0,1],:]) + + # test with sort_col_meta False and cid + cid_unsorted = ['LJP007_MCF7_24H:TRT_POSCON:BRD-K81418486:10','LJP007_MCF10A_24H:TRT_CP:BRD-K93918653:3.33'] + mg17 = parse_gctx.parse("cmapPy/pandasGEXpress/tests/functional_tests//mini_gctoo_for_testing.gctx", + cid = cid_unsorted, sort_col_meta= False) + pandas_testing.assert_frame_equal(mg17.data_df, mg1.data_df.iloc[:, [2,0]]) + pandas_testing.assert_frame_equal(mg17.col_metadata_df, mg1.col_metadata_df.iloc[[2,0],:]) + pandas_testing.assert_frame_equal(mg17.row_metadata_df, mg1.row_metadata_df) + + # test with sort_row_meta False and rid + rid_unsorted = ['LJP007_MCF7_24H:TRT_CP:BRD-K64857848:10', 'MISC003_A375_24H:TRT_CP:BRD-K93918653:3.33'] + mg18 = parse_gctx.parse("cmapPy/pandasGEXpress/tests/functional_tests/mini_gctoo_for_testing.gctx", + rid = rid_unsorted, sort_row_meta=False) + pandas_testing.assert_frame_equal(mg18.data_df, mg1.data_df.iloc[[5,1], :]) + pandas_testing.assert_frame_equal(mg18.col_metadata_df, mg1.col_metadata_df) + pandas_testing.assert_frame_equal(mg18.row_metadata_df, mg1.row_metadata_df.iloc[[5,1],:]) + def test_parse_rid_as_entrez_id(self): input_file = "cmapPy/pandasGEXpress/tests/functional_tests//test_parse_gctx_rid_entrez_id.gctx" g = parse_gctx.parse(input_file) From a1119318b095e9f6fce393eda3ee17b5707acfc0 Mon Sep 17 00:00:00 2001 From: Saksham Malhotra Date: Sun, 23 Jun 2019 19:49:18 +0530 Subject: [PATCH 14/17] [add]: add new test cases for parse gctx; python3 --- .../tests/python3_tests/test_parse_gctx.py | 45 ++++++++++++++++--- 1 file changed, 38 insertions(+), 7 deletions(-) diff --git a/cmapPy/pandasGEXpress/tests/python3_tests/test_parse_gctx.py b/cmapPy/pandasGEXpress/tests/python3_tests/test_parse_gctx.py index 2f84829..ea79f43 100644 --- a/cmapPy/pandasGEXpress/tests/python3_tests/test_parse_gctx.py +++ b/cmapPy/pandasGEXpress/tests/python3_tests/test_parse_gctx.py @@ -139,16 +139,47 @@ def test_parse(self): mg13 = parse_gctx.parse("cmapPy/pandasGEXpress/tests/functional_tests/mini_gctoo_for_testing.gctx",) # test with sort_col_meta False and cidx - #mg14 = + mg13 = parse_gctx.parse("cmapPy/pandasGEXpress/tests/functional_tests//mini_gctoo_for_testing.gctx", + cidx = [4,1,3], sort_col_meta= False) - - # test with sort_row_meta and sort_col_meta both False and ridx and cidx - #mg15 = + pandas_testing.assert_frame_equal(mg13.data_df, mg1.data_df.iloc[:, [4,1,3]]) + pandas_testing.assert_frame_equal(mg13.col_metadata_df, mg1.col_metadata_df.iloc[[4,1,3],:]) + pandas_testing.assert_frame_equal(mg13.row_metadata_df, mg1.row_metadata_df) - # test with sort_row_meta and sort_col_meta both False and string rid and cid - #mg16 = - + # test with sort_row_meta False and ridx + mg14 = parse_gctx.parse("cmapPy/pandasGEXpress/tests/functional_tests//mini_gctoo_for_testing.gctx", + ridx = [3,0,1], sort_row_meta= False) + + pandas_testing.assert_frame_equal(mg14.data_df, mg1.data_df.iloc[[3,0,1],:]) + pandas_testing.assert_frame_equal(mg14.col_metadata_df, mg1.col_metadata_df) + pandas_testing.assert_frame_equal(mg14.row_metadata_df, mg1.row_metadata_df.iloc[[3,0,1],:]) + + # test with sort_col_meta False and cidx and col_meta_only + mg15 = parse_gctx.parse("cmapPy/pandasGEXpress/tests/functional_tests//mini_gctoo_for_testing.gctx", + cidx = [4,1,3], sort_col_meta= False, col_meta_only=True) + pandas_testing.assert_frame_equal(mg15, mg1.col_metadata_df.iloc[[4,1,3],:]) + + # test with sort_row_meta False and ridx and row_meta_only + mg16 = parse_gctx.parse("cmapPy/pandasGEXpress/tests/functional_tests//mini_gctoo_for_testing.gctx", + ridx = [3,0,1], sort_row_meta= False, row_meta_only=True) + pandas_testing.assert_frame_equal(mg16, mg1.row_metadata_df.iloc[[3,0,1],:]) + + # test with sort_col_meta False and cid + cid_unsorted = ['LJP007_MCF7_24H:TRT_POSCON:BRD-K81418486:10','LJP007_MCF10A_24H:TRT_CP:BRD-K93918653:3.33'] + mg17 = parse_gctx.parse("cmapPy/pandasGEXpress/tests/functional_tests//mini_gctoo_for_testing.gctx", + cid = cid_unsorted, sort_col_meta= False) + pandas_testing.assert_frame_equal(mg17.data_df, mg1.data_df.iloc[:, [2,0]]) + pandas_testing.assert_frame_equal(mg17.col_metadata_df, mg1.col_metadata_df.iloc[[2,0],:]) + pandas_testing.assert_frame_equal(mg17.row_metadata_df, mg1.row_metadata_df) + + # test with sort_row_meta False and rid + rid_unsorted = ['LJP007_MCF7_24H:TRT_CP:BRD-K64857848:10', 'MISC003_A375_24H:TRT_CP:BRD-K93918653:3.33'] + mg18 = parse_gctx.parse("cmapPy/pandasGEXpress/tests/functional_tests/mini_gctoo_for_testing.gctx", + rid = rid_unsorted, sort_row_meta=False) + pandas_testing.assert_frame_equal(mg18.data_df, mg1.data_df.iloc[[5,1], :]) + pandas_testing.assert_frame_equal(mg18.col_metadata_df, mg1.col_metadata_df) + pandas_testing.assert_frame_equal(mg18.row_metadata_df, mg1.row_metadata_df.iloc[[5,1],:]) def test_parse_rid_as_entrez_id(self): input_file = "cmapPy/pandasGEXpress/tests/functional_tests/test_parse_gctx_rid_entrez_id.gctx" From cc091c9a24936a654210577504ad425887acfc23 Mon Sep 17 00:00:00 2001 From: Saksham Malhotra Date: Sun, 23 Jun 2019 19:52:59 +0530 Subject: [PATCH 15/17] [ref]:update function documentations --- cmapPy/pandasGEXpress/parse_gctx.py | 6 +++++- 1 file changed, 5 insertions(+), 1 deletion(-) diff --git a/cmapPy/pandasGEXpress/parse_gctx.py b/cmapPy/pandasGEXpress/parse_gctx.py index e35e346..ce8dbd5 100644 --- a/cmapPy/pandasGEXpress/parse_gctx.py +++ b/cmapPy/pandasGEXpress/parse_gctx.py @@ -46,7 +46,7 @@ def parse(gctx_file_path, convert_neg_666=True, rid=None, cid=None, - make_multiindex (bool): whether to create a multi-index df combining the 3 component dfs - sort_col_meta (bool) : whether to sort the column metadata by indexes. Default = True - - sort_row_meta (bool) : whether to sort the row metadata by indexes. Default = False + - sort_row_meta (bool) : whether to sort the row metadata by indexes. Default = True Output: - myGCToo (GCToo): A GCToo instance containing content of parsed gctx file. Note: if meta_only = True, this will be a GCToo instance where the data_df is empty, i.e. data_df = pd.DataFrame(index=rids, @@ -165,6 +165,8 @@ def check_and_order_id_inputs(rid, ridx, cid, cidx, row_meta_df, col_meta_df, so - ridx (list or None): if not None, a list of indexes - cid (list or None): if not None, a list of cids - cidx (list or None): if not None, a list of indexes + - sort_row_meta (bool): boolean indicating whether to return sorted row indexes + - sort_col_meta (bool): boolean indicating whether to return sorted column indexes Output: - a tuple of the ordered ridx and cidx """ @@ -255,6 +257,8 @@ def get_ordered_idx(id_type, id_list, meta_df, sort_idx): Input: - id_type (str): either "id", "idx" or None - id_list (list): either a list of indexes or id names + - meta_df (dataframe): dataframe + - sort_idx (bool): boolean indicating whether to return sorted indexes or not Output: - a sorted list of indexes to subset a dimension by """ From 4123f3545bc104232e8ed83578eaf1bc355a4370 Mon Sep 17 00:00:00 2001 From: Saksham Malhotra Date: Sun, 23 Jun 2019 20:19:53 +0530 Subject: [PATCH 16/17] [ref]:change travis config --- .travis.yml | 27 +++++++++------------------ 1 file changed, 9 insertions(+), 18 deletions(-) diff --git a/.travis.yml b/.travis.yml index c3c366d..8716dfe 100644 --- a/.travis.yml +++ b/.travis.yml @@ -10,54 +10,45 @@ matrix: include: # run pandasGEXpress python2_tests - python: "2.7" - env: TEST_DIR=cmapPy/pandasGEXpress/tests/python2_tests script: - - cd $TEST_DIR && python -m unittest discover -p "test_*.py" + - python -m unittest discover -p "test_*.py" -s cmapPy/pandasGEXpress/tests/python2_tests/ # run pandasGEXpress python3_tests - python: "3.6" - env: TEST_DIR=cmapPy/pandasGEXpress/tests/python3_tests script: - - cd $TEST_DIR && python -m unittest discover -p "test_*.py" + - python -m unittest discover -p "test_*.py" -s cmapPy/pandasGEXpress/tests/python3_tests/ # run set_io tests for python2 - python: "2.7" - env: TEST_DIR=cmapPy/set_io/tests script: - - cd $TEST_DIR && python -m unittest discover -p "test_*.py" + - python -m unittest discover -p "test_*.py" cmapPy/set_io/tests/ # run set_io tests for python3 - python: "3.6" - env: TEST_DIR=cmapPy/set_io/tests script: - - cd $TEST_DIR && python -m unittest discover -p "test_*.py" + - python -m unittest discover -p "test_*.py" cmapPy/set_io/tests/ # run math tests for python2 - python: "2.7" - env: TEST_DIR=cmapPy/math/tests script: - - cd $TEST_DIR && python -m unittest discover -p "test_*.py" + - python -m unittest discover -p "test_*.py" cmapPy/math/tests/ # run math tests for python3 - python: "3.6" - env: TEST_DIR=cmapPy/math/tests script: - - cd $TEST_DIR && python -m unittest discover -p "test_*.py" + - python -m unittest discover -p "test_*.py" cmapPy/math/tests/ # run python2_python3_comaptibility tests for python2 - python: "2.7" - env: TEST_DIR=cmapPy/pandasGEXpress/tests script: - - cd $TEST_DIR && python -m unittest discover -p test_python2_python3_compatibility.py + - python -m unittest cmapPy/pandasGEXpress/tests/test_python2_python3_compatibility.py # run python2_python3_comaptibility tests for python3 - python: "3.6" - env: TEST_DIR=cmapPy/pandasGEXpress/tests script: - - cd $TEST_DIR && python -m unittest discover -p test_python2_python3_compatibility.py + - python -m unittest cmapPy/pandasGEXpress/tests/test_python2_python3_compatibility.py # what branches of github to use branches: only: - - master - - travis_testing + - master \ No newline at end of file From 5c9ea07fe0d74ada7e8bdd32d931f42db1623daf Mon Sep 17 00:00:00 2001 From: Saksham Malhotra Date: Sun, 23 Jun 2019 20:29:39 +0530 Subject: [PATCH 17/17] [ref]:fix travis --- .travis.yml | 12 ++++++------ 1 file changed, 6 insertions(+), 6 deletions(-) diff --git a/.travis.yml b/.travis.yml index 8716dfe..706aa26 100644 --- a/.travis.yml +++ b/.travis.yml @@ -21,32 +21,32 @@ matrix: # run set_io tests for python2 - python: "2.7" script: - - python -m unittest discover -p "test_*.py" cmapPy/set_io/tests/ + - python -m unittest discover -p "test_*.py" -s cmapPy/set_io/tests/ # run set_io tests for python3 - python: "3.6" script: - - python -m unittest discover -p "test_*.py" cmapPy/set_io/tests/ + - python -m unittest discover -p "test_*.py" -s cmapPy/set_io/tests/ # run math tests for python2 - python: "2.7" script: - - python -m unittest discover -p "test_*.py" cmapPy/math/tests/ + - python -m unittest discover -p "test_*.py" -s cmapPy/math/tests/ # run math tests for python3 - python: "3.6" script: - - python -m unittest discover -p "test_*.py" cmapPy/math/tests/ + - python -m unittest discover -p "test_*.py" -s cmapPy/math/tests/ # run python2_python3_comaptibility tests for python2 - python: "2.7" script: - - python -m unittest cmapPy/pandasGEXpress/tests/test_python2_python3_compatibility.py + - python -m unittest discover -p "test_python2_python3_*.py" -s cmapPy/pandasGEXpress/tests/ # run python2_python3_comaptibility tests for python3 - python: "3.6" script: - - python -m unittest cmapPy/pandasGEXpress/tests/test_python2_python3_compatibility.py + - python -m unittest discover -p "test_python2_python3_*.py" -s cmapPy/pandasGEXpress/tests/ # what branches of github to use branches: