diff --git a/.travis.yml b/.travis.yml index c3c366d..706aa26 100644 --- a/.travis.yml +++ b/.travis.yml @@ -10,54 +10,45 @@ matrix: include: # run pandasGEXpress python2_tests - python: "2.7" - env: TEST_DIR=cmapPy/pandasGEXpress/tests/python2_tests script: - - cd $TEST_DIR && python -m unittest discover -p "test_*.py" + - python -m unittest discover -p "test_*.py" -s cmapPy/pandasGEXpress/tests/python2_tests/ # run pandasGEXpress python3_tests - python: "3.6" - env: TEST_DIR=cmapPy/pandasGEXpress/tests/python3_tests script: - - cd $TEST_DIR && python -m unittest discover -p "test_*.py" + - python -m unittest discover -p "test_*.py" -s cmapPy/pandasGEXpress/tests/python3_tests/ # run set_io tests for python2 - python: "2.7" - env: TEST_DIR=cmapPy/set_io/tests script: - - cd $TEST_DIR && python -m unittest discover -p "test_*.py" + - python -m unittest discover -p "test_*.py" -s cmapPy/set_io/tests/ # run set_io tests for python3 - python: "3.6" - env: TEST_DIR=cmapPy/set_io/tests script: - - cd $TEST_DIR && python -m unittest discover -p "test_*.py" + - python -m unittest discover -p "test_*.py" -s cmapPy/set_io/tests/ # run math tests for python2 - python: "2.7" - env: TEST_DIR=cmapPy/math/tests script: - - cd $TEST_DIR && python -m unittest discover -p "test_*.py" + - python -m unittest discover -p "test_*.py" -s cmapPy/math/tests/ # run math tests for python3 - python: "3.6" - env: TEST_DIR=cmapPy/math/tests script: - - cd $TEST_DIR && python -m unittest discover -p "test_*.py" + - python -m unittest discover -p "test_*.py" -s cmapPy/math/tests/ # run python2_python3_comaptibility tests for python2 - python: "2.7" - env: TEST_DIR=cmapPy/pandasGEXpress/tests script: - - cd $TEST_DIR && python -m unittest discover -p test_python2_python3_compatibility.py + - python -m unittest discover -p "test_python2_python3_*.py" -s cmapPy/pandasGEXpress/tests/ # run python2_python3_comaptibility tests for python3 - python: "3.6" - env: TEST_DIR=cmapPy/pandasGEXpress/tests script: - - cd $TEST_DIR && python -m unittest discover -p test_python2_python3_compatibility.py + - python -m unittest discover -p "test_python2_python3_*.py" -s cmapPy/pandasGEXpress/tests/ # what branches of github to use branches: only: - - master - - travis_testing + - master \ No newline at end of file diff --git a/cmapPy/math/tests/__init__.py b/cmapPy/math/tests/__init__.py new file mode 100644 index 0000000..e69de29 diff --git a/cmapPy/pandasGEXpress/mini_gctoo_for_testing.py b/cmapPy/pandasGEXpress/mini_gctoo_for_testing.py index f8fed81..df98cd4 100644 --- a/cmapPy/pandasGEXpress/mini_gctoo_for_testing.py +++ b/cmapPy/pandasGEXpress/mini_gctoo_for_testing.py @@ -47,7 +47,7 @@ def make(convert_neg_666=True): if convert_neg_666: mini_row_metadata = mini_row_metadata.replace([-666, "-666", -666.0], [numpy.nan, numpy.nan, numpy.nan]) # if all values in a column are nanpandas.Series(mini_row_metadata.isna().sum() == mini_row_metadata.shape[0]) convert dtype of that column to float - all_nan_columns = numpy.array(mini_row_metadata.isnull().sum() == mini_row_metadata.shape[0]).nonzero()[0] + all_nan_columns = (mini_row_metadata.isnull().sum() == numpy.array(mini_row_metadata.shape[0])).nonzero()[0] mini_row_metadata = mini_row_metadata.astype({d: 'float' for d in mini_row_metadata.columns[all_nan_columns.tolist()]}) else: mini_row_metadata = mini_row_metadata.replace([-666, -666.0], ["-666", "-666"]) diff --git a/cmapPy/pandasGEXpress/parse_gctx.py b/cmapPy/pandasGEXpress/parse_gctx.py index 2ec4bcc..ce8dbd5 100644 --- a/cmapPy/pandasGEXpress/parse_gctx.py +++ b/cmapPy/pandasGEXpress/parse_gctx.py @@ -21,7 +21,8 @@ def parse(gctx_file_path, convert_neg_666=True, rid=None, cid=None, - ridx=None, cidx=None, row_meta_only=False, col_meta_only=False, make_multiindex=False): + ridx=None, cidx=None, row_meta_only=False, col_meta_only=False, make_multiindex=False, + sort_col_meta = True, sort_row_meta = True): """ Primary method of script. Reads in path to a gctx file and parses into GCToo object. @@ -44,7 +45,8 @@ def parse(gctx_file_path, convert_neg_666=True, rid=None, cid=None, as pandas DataFrame - make_multiindex (bool): whether to create a multi-index df combining the 3 component dfs - + - sort_col_meta (bool) : whether to sort the column metadata by indexes. Default = True + - sort_row_meta (bool) : whether to sort the row metadata by indexes. Default = True Output: - myGCToo (GCToo): A GCToo instance containing content of parsed gctx file. Note: if meta_only = True, this will be a GCToo instance where the data_df is empty, i.e. data_df = pd.DataFrame(index=rids, @@ -74,12 +76,19 @@ def parse(gctx_file_path, convert_neg_666=True, rid=None, cid=None, row_meta = parse_metadata_df("row", row_dset, convert_neg_666) # validate optional input ids & get indexes to subset by - (sorted_ridx, sorted_cidx) = check_and_order_id_inputs(rid, ridx, cid, cidx, row_meta, None) + (sorted_ridx, sorted_cidx) = check_and_order_id_inputs(rid, ridx, cid, cidx, row_meta, None, + sort_row_meta = True, sort_col_meta = True) gctx_file.close() # subset if specified, then return row_meta = row_meta.iloc[sorted_ridx] + + if not sort_row_meta: + (unsorted_ridx, _) = check_and_order_id_inputs(rid, ridx, cid, cidx, row_meta, None, + sort_row_meta, sort_row_meta) + row_meta = row_meta.iloc[unsorted_ridx] + return row_meta elif col_meta_only: # read in col metadata @@ -87,12 +96,19 @@ def parse(gctx_file_path, convert_neg_666=True, rid=None, cid=None, col_meta = parse_metadata_df("col", col_dset, convert_neg_666) # validate optional input ids & get indexes to subset by - (sorted_ridx, sorted_cidx) = check_and_order_id_inputs(rid, ridx, cid, cidx, None, col_meta) + (sorted_ridx, sorted_cidx) = check_and_order_id_inputs(rid, ridx, cid, cidx, None, + col_meta, sort_row_meta = True, sort_col_meta = True) gctx_file.close() # subset if specified, then return col_meta = col_meta.iloc[sorted_cidx] + + if not sort_col_meta: + (_, unsorted_cidx) = check_and_order_id_inputs(rid, ridx, cid, cidx, None, col_meta, + sort_row_meta, sort_col_meta) + col_meta = col_meta.iloc[unsorted_cidx, :] + return col_meta else: # read in row metadata @@ -104,7 +120,8 @@ def parse(gctx_file_path, convert_neg_666=True, rid=None, cid=None, col_meta = parse_metadata_df("col", col_dset, convert_neg_666) # validate optional input ids & get indexes to subset by - (sorted_ridx, sorted_cidx) = check_and_order_id_inputs(rid, ridx, cid, cidx, row_meta, col_meta) + (sorted_ridx, sorted_cidx) = check_and_order_id_inputs(rid, ridx, cid, cidx, row_meta, col_meta, + sort_row_meta = True, sort_col_meta = True) data_dset = gctx_file[data_node] data_df = parse_data_df(data_dset, sorted_ridx, sorted_cidx, row_meta, col_meta) @@ -113,6 +130,20 @@ def parse(gctx_file_path, convert_neg_666=True, rid=None, cid=None, row_meta = row_meta.iloc[sorted_ridx] col_meta = col_meta.iloc[sorted_cidx] + if not sort_col_meta: + ## in the subsetted and re-indexed dataframe get where new indexes lie + (_, unsorted_cidx) = check_and_order_id_inputs(rid, ridx, cid, cidx, row_meta, col_meta, + sort_row_meta, sort_col_meta) + + data_df = data_df.iloc[:,unsorted_cidx] + col_meta = col_meta.iloc[unsorted_cidx,:] + + if not sort_row_meta: + (unsorted_ridx, _) = check_and_order_id_inputs(rid, ridx, cid, cidx, row_meta, col_meta, + sort_row_meta, sort_row_meta) + data_df = data_df.iloc[unsorted_ridx,:] + row_meta = row_meta.iloc[unsorted_ridx,:] + # get version my_version = gctx_file.attrs[version_node] if type(my_version) == np.ndarray: @@ -126,7 +157,7 @@ def parse(gctx_file_path, convert_neg_666=True, rid=None, cid=None, return my_gctoo -def check_and_order_id_inputs(rid, ridx, cid, cidx, row_meta_df, col_meta_df): +def check_and_order_id_inputs(rid, ridx, cid, cidx, row_meta_df, col_meta_df, sort_row_meta, sort_col_meta): """ Makes sure that (if entered) id inputs entered are of one type (string id or index) Input: @@ -134,17 +165,20 @@ def check_and_order_id_inputs(rid, ridx, cid, cidx, row_meta_df, col_meta_df): - ridx (list or None): if not None, a list of indexes - cid (list or None): if not None, a list of cids - cidx (list or None): if not None, a list of indexes + - sort_row_meta (bool): boolean indicating whether to return sorted row indexes + - sort_col_meta (bool): boolean indicating whether to return sorted column indexes Output: - a tuple of the ordered ridx and cidx """ (row_type, row_ids) = check_id_idx_exclusivity(rid, ridx) (col_type, col_ids) = check_id_idx_exclusivity(cid, cidx) - row_ids = check_and_convert_ids(row_type, row_ids, row_meta_df) - ordered_ridx = get_ordered_idx(row_type, row_ids, row_meta_df) - col_ids = check_and_convert_ids(col_type, col_ids, col_meta_df) - ordered_cidx = get_ordered_idx(col_type, col_ids, col_meta_df) + row_ids = check_and_convert_ids(row_type, row_ids, row_meta_df, sort_col_meta) + ordered_ridx = get_ordered_idx(row_type, row_ids, row_meta_df, sort_row_meta) + + col_ids = check_and_convert_ids(col_type, col_ids, col_meta_df, sort_col_meta) + ordered_cidx = get_ordered_idx(col_type, col_ids, col_meta_df, sort_col_meta) return (ordered_ridx, ordered_cidx) @@ -172,13 +206,13 @@ def check_id_idx_exclusivity(id, idx): return (None, []) -def check_and_convert_ids(id_type, id_list, meta_df): +def check_and_convert_ids(id_type, id_list, meta_df, sort_id): if meta_df is not None: if id_type == "id": id_list = convert_ids_to_meta_type(id_list, meta_df) check_id_validity(id_list, meta_df) else: - check_idx_validity(id_list, meta_df) + check_idx_validity(id_list, meta_df, sort_id) return id_list else: return None @@ -195,14 +229,15 @@ def check_id_validity(id_list, meta_df): raise Exception("parse_gctx check_id_validity " + msg) -def check_idx_validity(id_list, meta_df): - N = meta_df.shape[0] - out_of_range_ids = [my_id for my_id in id_list if my_id < 0 or my_id >= N] - if len(out_of_range_ids): - msg = "some of indexes being used to subset the data are not valid max N: {} out_of_range_ids: {}".format(N, - out_of_range_ids) - logger.error(msg) - raise Exception("parse_gctx check_idx_validity " + msg) +def check_idx_validity(id_list, meta_df, sort_id): + if sort_id: + N = meta_df.shape[0] + out_of_range_ids = [my_id for my_id in id_list if my_id < 0 or my_id >= N] + if len(out_of_range_ids): + msg = "some of indexes being used to subset the data are not valid max N: {} out_of_range_ids: {}".format(N, + out_of_range_ids) + logger.error(msg) + raise Exception("parse_gctx check_idx_validity " + msg) def convert_ids_to_meta_type(id_list, meta_df): @@ -216,12 +251,14 @@ def convert_ids_to_meta_type(id_list, meta_df): raise Exception("parse_gctx check_if_ids_in_meta " + msg + " ValueError ve: {}".format(ve)) -def get_ordered_idx(id_type, id_list, meta_df): +def get_ordered_idx(id_type, id_list, meta_df, sort_idx): """ Gets index values corresponding to ids to subset and orders them. Input: - id_type (str): either "id", "idx" or None - id_list (list): either a list of indexes or id names + - meta_df (dataframe): dataframe + - sort_idx (bool): boolean indicating whether to return sorted indexes or not Output: - a sorted list of indexes to subset a dimension by """ @@ -231,6 +268,8 @@ def get_ordered_idx(id_type, id_list, meta_df): elif id_type == "id": lookup = {x: i for (i,x) in enumerate(meta_df.index)} id_list = [lookup[str(i)] for i in id_list] + if not sort_idx: + return [sorted(id_list).index(i) for i in id_list] return sorted(id_list) else: return None diff --git a/cmapPy/pandasGEXpress/tests/__init__.py b/cmapPy/pandasGEXpress/tests/__init__.py new file mode 100644 index 0000000..e69de29 diff --git a/cmapPy/pandasGEXpress/tests/python2_tests/__init__.py b/cmapPy/pandasGEXpress/tests/python2_tests/__init__.py new file mode 100644 index 0000000..e69de29 diff --git a/cmapPy/pandasGEXpress/tests/python2_tests/test_concat.py b/cmapPy/pandasGEXpress/tests/python2_tests/test_concat.py index abbf8c7..0513b70 100644 --- a/cmapPy/pandasGEXpress/tests/python2_tests/test_concat.py +++ b/cmapPy/pandasGEXpress/tests/python2_tests/test_concat.py @@ -10,7 +10,7 @@ logger = logging.getLogger(setup_logger.LOGGER_NAME) -FUNCTIONAL_TESTS_DIR = "../functional_tests" +FUNCTIONAL_TESTS_DIR = "cmapPy/pandasGEXpress/tests/functional_tests/" class TestConcat(unittest.TestCase): @@ -333,7 +333,7 @@ def test_build_mismatched_common_meta_report(self): self.assertEqual({"r3"}, set(r.orig_rid)) def test_concat_main(self): - test_dir = "../functional_tests/test_concat/test_main" + test_dir = "cmapPy/pandasGEXpress/tests/functional_tests//test_concat/test_main" g_a = pg.parse(os.path.join(test_dir, "a.gct")) logger.debug("g_a: {}".format(g_a)) diff --git a/cmapPy/pandasGEXpress/tests/python2_tests/test_edge_cases.py b/cmapPy/pandasGEXpress/tests/python2_tests/test_edge_cases.py index 64d680a..951c7ea 100644 --- a/cmapPy/pandasGEXpress/tests/python2_tests/test_edge_cases.py +++ b/cmapPy/pandasGEXpress/tests/python2_tests/test_edge_cases.py @@ -10,7 +10,7 @@ import pandas.util.testing as pandas_testing -FUNCTIONAL_TESTS_PATH = "../functional_tests" +FUNCTIONAL_TESTS_PATH = "cmapPy/pandasGEXpress/tests/functional_tests/" logger = logging.getLogger(setup_logger.LOGGER_NAME) diff --git a/cmapPy/pandasGEXpress/tests/python2_tests/test_gct2gctx.py b/cmapPy/pandasGEXpress/tests/python2_tests/test_gct2gctx.py index 616e0c8..b1ad925 100644 --- a/cmapPy/pandasGEXpress/tests/python2_tests/test_gct2gctx.py +++ b/cmapPy/pandasGEXpress/tests/python2_tests/test_gct2gctx.py @@ -14,8 +14,8 @@ class TestGCT2GCTx(unittest.TestCase): def test_gct2gctx_main(self): - in_name = "../functional_tests/mini_gctoo_for_testing.gct" - out_name = "../functional_tests/test_gct2gctx_out.gctx" + in_name = "cmapPy/pandasGEXpress/tests/functional_tests//mini_gctoo_for_testing.gct" + out_name = "cmapPy/pandasGEXpress/tests/functional_tests//test_gct2gctx_out.gctx" args_string = "-f {} -o {}".format(in_name, out_name) args = gct2gctx.build_parser().parse_args(args_string.split()) @@ -29,10 +29,10 @@ def test_gct2gctx_main(self): pd.util.testing.assert_frame_equal(in_gct.col_metadata_df, out_gctx.col_metadata_df) pd.util.testing.assert_frame_equal(in_gct.row_metadata_df, out_gctx.row_metadata_df) - no_meta = "../functional_tests/mini_gctoo_for_testing_nometa.gct" - added_meta = "../functional_tests/test_gct2gctx_out_annotated.gctx" - row_meta = "../functional_tests/test_rowmeta_n6.txt" - col_meta = "../functional_tests/test_colmeta_n6.txt" + no_meta = "cmapPy/pandasGEXpress/tests/functional_tests//mini_gctoo_for_testing_nometa.gct" + added_meta = "cmapPy/pandasGEXpress/tests/functional_tests//test_gct2gctx_out_annotated.gctx" + row_meta = "cmapPy/pandasGEXpress/tests/functional_tests//test_rowmeta_n6.txt" + col_meta = "cmapPy/pandasGEXpress/tests/functional_tests//test_colmeta_n6.txt" args_string = "-f {} -o {} -row_annot_path {} -col_annot_path {}".format(no_meta, added_meta, row_meta, col_meta) args = gct2gctx.build_parser().parse_args(args_string.split()) @@ -51,9 +51,9 @@ def test_gct2gctx_main(self): def test_missing_annotations(self): with self.assertRaises(Exception) as context: - no_meta = "../functional_tests/mini_gctoo_for_testing_nometa.gct" - added_meta = "../functional_tests/test_gctx2gct_out_annotated.gctx" - row_meta = "../functional_tests/test_missing_rowmeta.txt" + no_meta = "cmapPy/pandasGEXpress/tests/functional_tests//mini_gctoo_for_testing_nometa.gct" + added_meta = "cmapPy/pandasGEXpress/tests/functional_tests//test_gctx2gct_out_annotated.gctx" + row_meta = "cmapPy/pandasGEXpress/tests/functional_tests//test_missing_rowmeta.txt" args_string = "-f {} -o {} -row_annot_path {}".format(no_meta, added_meta, row_meta) args = gct2gctx.build_parser().parse_args(args_string.split()) @@ -62,9 +62,9 @@ def test_missing_annotations(self): self.assertTrue('Row ids in matrix missing from annotations file' in context.exception) with self.assertRaises(Exception) as context: - no_meta = "../functional_tests/mini_gctoo_for_testing_nometa.gct" - added_meta = "../functional_tests/test_gctx2gct_out_annotated.gctx" - col_meta = "../functional_tests/test_missing_colmeta.txt" + no_meta = "cmapPy/pandasGEXpress/tests/functional_tests//mini_gctoo_for_testing_nometa.gct" + added_meta = "cmapPy/pandasGEXpress/tests/functional_tests//test_gctx2gct_out_annotated.gctx" + col_meta = "cmapPy/pandasGEXpress/tests/functional_tests//test_missing_colmeta.txt" args_string = "-f {} -o {} -col_annot_path {}".format(no_meta, added_meta, col_meta) args = gct2gctx.build_parser().parse_args(args_string.split()) diff --git a/cmapPy/pandasGEXpress/tests/python2_tests/test_gctx2gct.py b/cmapPy/pandasGEXpress/tests/python2_tests/test_gctx2gct.py index d148df2..6fca880 100644 --- a/cmapPy/pandasGEXpress/tests/python2_tests/test_gctx2gct.py +++ b/cmapPy/pandasGEXpress/tests/python2_tests/test_gctx2gct.py @@ -14,8 +14,8 @@ class TestGCTx2GCT(unittest.TestCase): def test_gctx2gct_main(self): - in_name = "../functional_tests/mini_gctoo_for_testing.gctx" - out_name = "../functional_tests/test_gctx2gct_out.gct" + in_name = "cmapPy/pandasGEXpress/tests/functional_tests//mini_gctoo_for_testing.gctx" + out_name = "cmapPy/pandasGEXpress/tests/functional_tests//test_gctx2gct_out.gct" args_string = "-f {} -o {}".format(in_name, out_name) args = gctx2gct.build_parser().parse_args(args_string.split()) @@ -29,10 +29,10 @@ def test_gctx2gct_main(self): pd.util.testing.assert_frame_equal(in_gctx.col_metadata_df, out_gct.col_metadata_df) pd.util.testing.assert_frame_equal(in_gctx.row_metadata_df, out_gct.row_metadata_df) - no_meta = "../functional_tests/mini_gctoo_for_testing_nometa.gctx" - added_meta = "../functional_tests/test_gctx2gct_out_annotated.gct" - row_meta = "../functional_tests/test_rowmeta_n6.txt" - col_meta = "../functional_tests/test_colmeta_n6.txt" + no_meta = "cmapPy/pandasGEXpress/tests/functional_tests//mini_gctoo_for_testing_nometa.gctx" + added_meta = "cmapPy/pandasGEXpress/tests/functional_tests//test_gctx2gct_out_annotated.gct" + row_meta = "cmapPy/pandasGEXpress/tests/functional_tests//test_rowmeta_n6.txt" + col_meta = "cmapPy/pandasGEXpress/tests/functional_tests//test_colmeta_n6.txt" args_string = "-f {} -o {} -row_annot_path {} -col_annot_path {}".format(no_meta, added_meta, row_meta, col_meta ) args = gctx2gct.build_parser().parse_args(args_string.split()) @@ -51,9 +51,9 @@ def test_gctx2gct_main(self): def test_missing_annotations(self): with self.assertRaises(Exception) as context: - no_meta = "../functional_tests/mini_gctoo_for_testing_nometa.gctx" - added_meta = "../functional_tests/test_gctx2gct_out_annotated.gct" - row_meta = "../functional_tests/test_missing_rowmeta.txt" + no_meta = "cmapPy/pandasGEXpress/tests/functional_tests//mini_gctoo_for_testing_nometa.gctx" + added_meta = "cmapPy/pandasGEXpress/tests/functional_tests//test_gctx2gct_out_annotated.gct" + row_meta = "cmapPy/pandasGEXpress/tests/functional_tests//test_missing_rowmeta.txt" args_string = "-f {} -o {} -row_annot_path {}".format(no_meta, added_meta, row_meta) args = gctx2gct.build_parser().parse_args(args_string.split()) @@ -62,9 +62,9 @@ def test_missing_annotations(self): self.assertTrue('Row ids in matrix missing from annotations file' in context.exception) with self.assertRaises(Exception) as context: - no_meta = "../functional_tests/mini_gctoo_for_testing_nometa.gctx" - added_meta = "../functional_tests/test_gctx2gct_out_annotated.gct" - col_meta = "../functional_tests/test_missing_colmeta.txt" + no_meta = "cmapPy/pandasGEXpress/tests/functional_tests//mini_gctoo_for_testing_nometa.gctx" + added_meta = "cmapPy/pandasGEXpress/tests/functional_tests//test_gctx2gct_out_annotated.gct" + col_meta = "cmapPy/pandasGEXpress/tests/functional_tests//test_missing_colmeta.txt" args_string = "-f {} -o {} -col_annot_path {}".format(no_meta, added_meta, col_meta) args = gctx2gct.build_parser().parse_args(args_string.split()) diff --git a/cmapPy/pandasGEXpress/tests/python2_tests/test_parse.py b/cmapPy/pandasGEXpress/tests/python2_tests/test_parse.py index 9516dcd..9817650 100644 --- a/cmapPy/pandasGEXpress/tests/python2_tests/test_parse.py +++ b/cmapPy/pandasGEXpress/tests/python2_tests/test_parse.py @@ -1,3 +1,4 @@ + import logging import cmapPy.pandasGEXpress.setup_GCToo_logger as setup_logger import unittest @@ -9,7 +10,7 @@ __author__ = "Oana Enache" __email__ = "oana@broadinstitute.org" -FUNCTIONAL_TESTS_PATH = "../functional_tests" +FUNCTIONAL_TESTS_PATH = "cmapPy/pandasGEXpress/tests/functional_tests/" logger = logging.getLogger(setup_logger.LOGGER_NAME) @@ -17,7 +18,7 @@ class TestParse(unittest.TestCase): def test_gctx_parsing(self): # parse in gctx, no other arguments mg1 = mini_gctoo_for_testing.make() - mg2 = parse.parse("../functional_tests/mini_gctoo_for_testing.gctx") + mg2 = parse.parse("cmapPy/pandasGEXpress/tests/functional_tests/mini_gctoo_for_testing.gctx") pandas_testing.assert_frame_equal(mg1.data_df, mg2.data_df) pandas_testing.assert_frame_equal(mg1.row_metadata_df, mg2.row_metadata_df) @@ -27,14 +28,14 @@ def test_gctx_parsing(self): self.assertTrue(mg2.col_metadata_df["mfc_plate_id"].isnull().all()) # parse w/o convert_neg_666 - mg2_alt = parse.parse("../functional_tests/mini_gctoo_for_testing.gctx", convert_neg_666 = False) + mg2_alt = parse.parse("cmapPy/pandasGEXpress/tests/functional_tests//mini_gctoo_for_testing.gctx", convert_neg_666 = False) self.assertFalse(mg2_alt.col_metadata_df["mfc_plate_id"].isnull().all()) # parsing w/rids & cids specified test_rids = ['LJP007_MCF10A_24H:TRT_CP:BRD-K93918653:3.33', 'LJP007_MCF7_24H:CTL_VEHICLE:DMSO:-666'] test_cids = ['LJP007_MCF7_24H:TRT_POSCON:BRD-A61304759:10'] mg3 = subset_gctoo.subset_gctoo(mg1, rid=test_rids, cid=test_cids) - mg4 = parse.parse("../functional_tests/mini_gctoo_for_testing.gctx", + mg4 = parse.parse("cmapPy/pandasGEXpress/tests/functional_tests/mini_gctoo_for_testing.gctx", rid=test_rids, cid=test_cids) pandas_testing.assert_frame_equal(mg3.data_df, mg4.data_df) pandas_testing.assert_frame_equal(mg3.row_metadata_df, mg4.row_metadata_df) @@ -43,28 +44,28 @@ def test_gctx_parsing(self): # parsing w/ridx & cidx specified mg5 = subset_gctoo.subset_gctoo(mg1, rid=['LJP007_MCF7_24H:CTL_VEHICLE:DMSO:-666'], cid=['LJP007_MCF7_24H:CTL_VEHICLE:DMSO:-666']) - mg6 = parse.parse("../functional_tests/mini_gctoo_for_testing.gctx", ridx=[4], cidx=[4]) + mg6 = parse.parse("cmapPy/pandasGEXpress/tests/functional_tests/mini_gctoo_for_testing.gctx", ridx=[4], cidx=[4]) pandas_testing.assert_frame_equal(mg5.data_df, mg6.data_df) pandas_testing.assert_frame_equal(mg5.row_metadata_df, mg6.row_metadata_df) pandas_testing.assert_frame_equal(mg5.col_metadata_df, mg6.col_metadata_df) # parsing row metadata only - mg7 = parse.parse("../functional_tests/mini_gctoo_for_testing.gctx", row_meta_only=True) + mg7 = parse.parse("cmapPy/pandasGEXpress/tests/functional_tests//mini_gctoo_for_testing.gctx", row_meta_only=True) pandas_testing.assert_frame_equal(mg7, mg1.row_metadata_df) # parsing col metadata only - mg8 = parse.parse("../functional_tests/mini_gctoo_for_testing.gctx", col_meta_only=True) + mg8 = parse.parse("cmapPy/pandasGEXpress/tests/functional_tests//mini_gctoo_for_testing.gctx", col_meta_only=True) pandas_testing.assert_frame_equal(mg8, mg1.col_metadata_df) # parsing w/multiindex - mg9 = parse.parse("../functional_tests/mini_gctoo_for_testing.gctx", make_multiindex=True) + mg9 = parse.parse("cmapPy/pandasGEXpress/tests/functional_tests//mini_gctoo_for_testing.gctx", make_multiindex=True) self.assertTrue(mg9.multi_index_df is not None) def test_gct_parsing(self): # parse in gct, no other arguments mg1 = mini_gctoo_for_testing.make() - mg2 = parse.parse("../functional_tests/mini_gctoo_for_testing.gct") + mg2 = parse.parse("cmapPy/pandasGEXpress/tests/functional_tests/mini_gctoo_for_testing.gct") pandas_testing.assert_frame_equal(mg1.data_df, mg2.data_df) pandas_testing.assert_frame_equal(mg1.row_metadata_df, mg2.row_metadata_df) @@ -74,13 +75,13 @@ def test_gct_parsing(self): self.assertTrue(mg2.col_metadata_df["mfc_plate_id"].isnull().all()) # parse w/o convert_neg_666 - mg2_alt = parse.parse("../functional_tests/mini_gctoo_for_testing.gct", convert_neg_666 = False) + mg2_alt = parse.parse("cmapPy/pandasGEXpress/tests/functional_tests//mini_gctoo_for_testing.gct", convert_neg_666 = False) self.assertItemsEqual(mg2_alt.col_metadata_df["mfc_plate_id"].values.tolist(), [-666] * 6) # parse in gct with subsetting my_rid = "LJP007_MCF10A_24H:TRT_CP:BRD-K93918653:3.33" - mg3 = parse.parse("../functional_tests/mini_gctoo_for_testing.gct", + mg3 = parse.parse("cmapPy/pandasGEXpress/tests/functional_tests//mini_gctoo_for_testing.gct", cidx=[0, 2], rid=[my_rid]) self.assertEqual(mg3.data_df.shape, (1, 2)) diff --git a/cmapPy/pandasGEXpress/tests/python2_tests/test_parse_gct.py b/cmapPy/pandasGEXpress/tests/python2_tests/test_parse_gct.py index 64da359..7fca7d2 100644 --- a/cmapPy/pandasGEXpress/tests/python2_tests/test_parse_gct.py +++ b/cmapPy/pandasGEXpress/tests/python2_tests/test_parse_gct.py @@ -8,7 +8,7 @@ import cmapPy.pandasGEXpress.GCToo as GCToo -FUNCTIONAL_TESTS_PATH = "../functional_tests" +FUNCTIONAL_TESTS_PATH = "cmapPy/pandasGEXpress/tests/functional_tests/" logger = logging.getLogger(setup_logger.LOGGER_NAME) @@ -21,7 +21,7 @@ def test_read_version_and_dims(self): dims1 = ["10", "15", "3", "4"] fname1 = "testing_testing1" - f1 = open(fname1, "wb") + f1 = open(fname1, "w") f1.write(("#" + version1 + "\n")) f1.write((dims1[0] + "\t" + dims1[1] + "\t" + dims1[2] + "\t" + dims1[3] + "\n")) f1.close() @@ -40,7 +40,7 @@ def test_read_version_and_dims(self): dims2 = ["10", "15"] fname2 = "testing_testing2" - f2 = open(fname2, "wb") + f2 = open(fname2, "w") f2.write(("#" + version2 + "\n")) f2.write((dims2[0] + "\t" + dims2[1] + "\n")) f2.close() diff --git a/cmapPy/pandasGEXpress/tests/python2_tests/test_parse_gctx.py b/cmapPy/pandasGEXpress/tests/python2_tests/test_parse_gctx.py index 07622d9..f74b214 100644 --- a/cmapPy/pandasGEXpress/tests/python2_tests/test_parse_gctx.py +++ b/cmapPy/pandasGEXpress/tests/python2_tests/test_parse_gctx.py @@ -17,7 +17,7 @@ __author__ = "Oana Enache" __email__ = "oana@broadinstitute.org" -FUNCTIONAL_TESTS_PATH = "../functional_tests" +FUNCTIONAL_TESTS_PATH = "cmapPy/pandasGEXpress/tests/functional_tests/" logger = logging.getLogger(setup_logger.LOGGER_NAME) @@ -44,7 +44,7 @@ class TestParseGctx(unittest.TestCase): def test_parse(self): # parse whole thing mg1 = mini_gctoo_for_testing.make() - mg2 = parse_gctx.parse("../functional_tests/mini_gctoo_for_testing.gctx") + mg2 = parse_gctx.parse("cmapPy/pandasGEXpress/tests/functional_tests//mini_gctoo_for_testing.gctx") pandas_testing.assert_frame_equal(mg1.data_df, mg2.data_df) pandas_testing.assert_frame_equal(mg1.row_metadata_df, mg2.row_metadata_df) @@ -54,7 +54,7 @@ def test_parse(self): test_rids = ['LJP007_MCF10A_24H:TRT_CP:BRD-K93918653:3.33', 'LJP007_MCF7_24H:CTL_VEHICLE:DMSO:-666'] test_cids = ['LJP007_MCF7_24H:TRT_POSCON:BRD-A61304759:10'] mg3 = subset_gctoo.subset_gctoo(mg1, rid=test_rids, cid=test_cids) - mg4 = parse_gctx.parse("../functional_tests/mini_gctoo_for_testing.gctx", + mg4 = parse_gctx.parse("cmapPy/pandasGEXpress/tests/functional_tests//mini_gctoo_for_testing.gctx", rid=test_rids, cid=test_cids) pandas_testing.assert_frame_equal(mg3.data_df, mg4.data_df) pandas_testing.assert_frame_equal(mg3.row_metadata_df, mg4.row_metadata_df) @@ -104,14 +104,14 @@ def test_parse(self): # test with ridx/cidx mg7 = subset_gctoo.subset_gctoo(mg1, rid=['LJP007_MCF7_24H:CTL_VEHICLE:DMSO:-666'], cid=['LJP007_MCF7_24H:CTL_VEHICLE:DMSO:-666']) - mg8 = parse_gctx.parse("../functional_tests/mini_gctoo_for_testing.gctx", ridx=[4], cidx=[4]) + mg8 = parse_gctx.parse("cmapPy/pandasGEXpress/tests/functional_tests//mini_gctoo_for_testing.gctx", ridx=[4], cidx=[4]) pandas_testing.assert_frame_equal(mg7.data_df, mg8.data_df) pandas_testing.assert_frame_equal(mg7.row_metadata_df, mg8.row_metadata_df) pandas_testing.assert_frame_equal(mg7.col_metadata_df, mg8.col_metadata_df) # test with rid/cidx - mg9 = parse_gctx.parse("../functional_tests/mini_gctoo_for_testing.gctx", + mg9 = parse_gctx.parse("cmapPy/pandasGEXpress/tests/functional_tests//mini_gctoo_for_testing.gctx", rid=['LJP007_MCF7_24H:CTL_VEHICLE:DMSO:-666'], cidx=[4]) @@ -120,7 +120,7 @@ def test_parse(self): pandas_testing.assert_frame_equal(mg7.col_metadata_df, mg9.col_metadata_df) # test with ridx/cid - mg10 = parse_gctx.parse("../functional_tests/mini_gctoo_for_testing.gctx", ridx=[4], + mg10 = parse_gctx.parse("cmapPy/pandasGEXpress/tests/functional_tests//mini_gctoo_for_testing.gctx", ridx=[4], cid=['LJP007_MCF7_24H:CTL_VEHICLE:DMSO:-666']) pandas_testing.assert_frame_equal(mg7.data_df, mg10.data_df) @@ -128,15 +128,58 @@ def test_parse(self): pandas_testing.assert_frame_equal(mg7.col_metadata_df, mg10.col_metadata_df) # test with row_meta_only - mg11 = parse_gctx.parse("../functional_tests/mini_gctoo_for_testing.gctx", row_meta_only=True) + mg11 = parse_gctx.parse("cmapPy/pandasGEXpress/tests/functional_tests//mini_gctoo_for_testing.gctx", row_meta_only=True) pandas_testing.assert_frame_equal(mg11, mg1.row_metadata_df) # test with col_meta_only - mg12 = parse_gctx.parse("../functional_tests/mini_gctoo_for_testing.gctx", col_meta_only=True) + mg12 = parse_gctx.parse("cmapPy/pandasGEXpress/tests/functional_tests//mini_gctoo_for_testing.gctx", col_meta_only=True) pandas_testing.assert_frame_equal(mg12, mg1.col_metadata_df) + # test with sort_col_meta False and cidx + mg13 = parse_gctx.parse("cmapPy/pandasGEXpress/tests/functional_tests//mini_gctoo_for_testing.gctx", + cidx = [4,1,3], sort_col_meta= False) + + pandas_testing.assert_frame_equal(mg13.data_df, mg1.data_df.iloc[:, [4,1,3]]) + pandas_testing.assert_frame_equal(mg13.col_metadata_df, mg1.col_metadata_df.iloc[[4,1,3],:]) + pandas_testing.assert_frame_equal(mg13.row_metadata_df, mg1.row_metadata_df) + + + # test with sort_row_meta False and ridx + mg14 = parse_gctx.parse("cmapPy/pandasGEXpress/tests/functional_tests//mini_gctoo_for_testing.gctx", + ridx = [3,0,1], sort_row_meta= False) + + pandas_testing.assert_frame_equal(mg14.data_df, mg1.data_df.iloc[[3,0,1],:]) + pandas_testing.assert_frame_equal(mg14.col_metadata_df, mg1.col_metadata_df) + pandas_testing.assert_frame_equal(mg14.row_metadata_df, mg1.row_metadata_df.iloc[[3,0,1],:]) + + # test with sort_col_meta False and cidx and col_meta_only + mg15 = parse_gctx.parse("cmapPy/pandasGEXpress/tests/functional_tests//mini_gctoo_for_testing.gctx", + cidx = [4,1,3], sort_col_meta= False, col_meta_only=True) + pandas_testing.assert_frame_equal(mg15, mg1.col_metadata_df.iloc[[4,1,3],:]) + + # test with sort_row_meta False and ridx and row_meta_only + mg16 = parse_gctx.parse("cmapPy/pandasGEXpress/tests/functional_tests//mini_gctoo_for_testing.gctx", + ridx = [3,0,1], sort_row_meta= False, row_meta_only=True) + pandas_testing.assert_frame_equal(mg16, mg1.row_metadata_df.iloc[[3,0,1],:]) + + # test with sort_col_meta False and cid + cid_unsorted = ['LJP007_MCF7_24H:TRT_POSCON:BRD-K81418486:10','LJP007_MCF10A_24H:TRT_CP:BRD-K93918653:3.33'] + mg17 = parse_gctx.parse("cmapPy/pandasGEXpress/tests/functional_tests//mini_gctoo_for_testing.gctx", + cid = cid_unsorted, sort_col_meta= False) + pandas_testing.assert_frame_equal(mg17.data_df, mg1.data_df.iloc[:, [2,0]]) + pandas_testing.assert_frame_equal(mg17.col_metadata_df, mg1.col_metadata_df.iloc[[2,0],:]) + pandas_testing.assert_frame_equal(mg17.row_metadata_df, mg1.row_metadata_df) + + # test with sort_row_meta False and rid + rid_unsorted = ['LJP007_MCF7_24H:TRT_CP:BRD-K64857848:10', 'MISC003_A375_24H:TRT_CP:BRD-K93918653:3.33'] + mg18 = parse_gctx.parse("cmapPy/pandasGEXpress/tests/functional_tests/mini_gctoo_for_testing.gctx", + rid = rid_unsorted, sort_row_meta=False) + pandas_testing.assert_frame_equal(mg18.data_df, mg1.data_df.iloc[[5,1], :]) + pandas_testing.assert_frame_equal(mg18.col_metadata_df, mg1.col_metadata_df) + pandas_testing.assert_frame_equal(mg18.row_metadata_df, mg1.row_metadata_df.iloc[[5,1],:]) + def test_parse_rid_as_entrez_id(self): - input_file = "../functional_tests/test_parse_gctx_rid_entrez_id.gctx" + input_file = "cmapPy/pandasGEXpress/tests/functional_tests//test_parse_gctx_rid_entrez_id.gctx" g = parse_gctx.parse(input_file) self.assertEqual((5, 5), g.data_df.shape) logger.debug("g.data_df.index: {}".format(g.data_df.index)) @@ -162,19 +205,19 @@ def test_check_and_order_id_inputs(self): # case 1: row and col lists are populated and same type self.assertEqual((sorted(ridx), sorted(cidx)), - parse_gctx.check_and_order_id_inputs(None, ridx, None, cidx, row_meta, col_meta)) + parse_gctx.check_and_order_id_inputs(None, ridx, None, cidx, row_meta, col_meta, sort_row_meta = True, sort_col_meta = True)) # case 2: row & col lists are populated, but of different types self.assertEqual((sorted(ridx), [0, 1, 2, 3]), - parse_gctx.check_and_order_id_inputs(None, ridx, cid, None, row_meta, col_meta)) + parse_gctx.check_and_order_id_inputs(None, ridx, cid, None, row_meta, col_meta, sort_row_meta = True, sort_col_meta = True)) # case 3: row list and col lists are both None self.assertEqual(([0, 1, 2, 3], [0, 1, 2, 3, 4, 5]), - parse_gctx.check_and_order_id_inputs(None, None, None, None, row_meta, col_meta)) + parse_gctx.check_and_order_id_inputs(None, None, None, None, row_meta, col_meta, sort_row_meta = True, sort_col_meta = True)) # case 4: row list is populated, col list is None self.assertEqual(([0, 1, 2], [0, 1, 2, 3, 4, 5]), - parse_gctx.check_and_order_id_inputs(rid, None, None, None, row_meta, col_meta)) + parse_gctx.check_and_order_id_inputs(rid, None, None, None, row_meta, col_meta, sort_row_meta = True, sort_col_meta = True)) def test_check_id_idx_exclusivity(self): ids = ["a", "b", "c"] @@ -203,7 +246,7 @@ def test_parse_metadata_df(self): logger.debug("mini_row_meta.columns: {}".format(mini_row_meta.columns)) logger.debug("mini_row_meta.dtypes: {}".format(mini_row_meta.dtypes)) - gctx_file = h5py.File("../functional_tests/mini_gctoo_for_testing.gctx", "r") + gctx_file = h5py.File("cmapPy/pandasGEXpress/tests/functional_tests//mini_gctoo_for_testing.gctx", "r") row_dset = gctx_file[row_meta_group_node] col_dset = gctx_file[col_meta_group_node] @@ -270,19 +313,19 @@ def test_get_ordered_idx(self): mg = mini_gctoo_for_testing.make() # case 1: id_type == None - case1 = parse_gctx.get_ordered_idx(None, [], mg.row_metadata_df) + case1 = parse_gctx.get_ordered_idx(None, [], mg.row_metadata_df, sort_idx = True) self.assertEqual(case1, list(range(0, 6)), "Expected ordered idx to be {} but got {}".format(list(range(0, 6)), case1)) # case 2: id_type == "id" case2 = parse_gctx.get_ordered_idx("id", - ['LJP007_MCF7_24H:CTL_VEHICLE:DMSO:-666'], mg.col_metadata_df) + ['LJP007_MCF7_24H:CTL_VEHICLE:DMSO:-666'], mg.col_metadata_df, sort_idx = True) self.assertEqual(case2, [4], "Expected ordered idx to be {} but got {}".format([4], case2)) # case 3: id_type == ridx case3 = parse_gctx.get_ordered_idx("idx", - [5, 1, 3], mg.col_metadata_df) + [5, 1, 3], mg.col_metadata_df, sort_idx = True) self.assertEqual(case3, [1, 3, 5], "Expected ordered idx to be {} but got {}".format([1, 3, 5], case3)) @@ -295,12 +338,12 @@ def test_parse_data_df(self): mini_data_df.columns.name = "cid" # create h5py File instance - mini_gctx = h5py.File("../functional_tests/mini_gctx_with_metadata_n2x3.gctx", "r") + mini_gctx = h5py.File("cmapPy/pandasGEXpress/tests/functional_tests//mini_gctx_with_metadata_n2x3.gctx", "r") data_dset = mini_gctx[data_node] # get relevant metadata fields - col_meta = parse_gctx.get_column_metadata("../functional_tests/mini_gctx_with_metadata_n2x3.gctx") - row_meta = parse_gctx.get_row_metadata("../functional_tests/mini_gctx_with_metadata_n2x3.gctx") + col_meta = parse_gctx.get_column_metadata("cmapPy/pandasGEXpress/tests/functional_tests//mini_gctx_with_metadata_n2x3.gctx") + row_meta = parse_gctx.get_row_metadata("cmapPy/pandasGEXpress/tests/functional_tests//mini_gctx_with_metadata_n2x3.gctx") # case 1: no subsetting data_df1 = parse_gctx.parse_data_df(data_dset, [0, 1, 2], [0, 1], row_meta, col_meta) @@ -352,11 +395,11 @@ def test_check_idx_validity(self): id_list = [0,1,2] df = pd.DataFrame({}, index=range(5)) logger.debug("df.shape: {}".format(df.shape)) - parse_gctx.check_idx_validity(id_list, df) + parse_gctx.check_idx_validity(id_list, df, sort_id = True) id_list[0] = -1 with self.assertRaises(Exception) as context: - parse_gctx.check_idx_validity(id_list, df) + parse_gctx.check_idx_validity(id_list, df, sort_id = True) logger.debug("context.exception: {}".format(context.exception)) self.assertIn("some of indexes being used to subset the data are not valid", str(context.exception)) self.assertIn("[-1]", str(context.exception)) @@ -364,7 +407,7 @@ def test_check_idx_validity(self): invalid_high = df.shape[0] + 1 id_list[0] = invalid_high with self.assertRaises(Exception) as context: - parse_gctx.check_idx_validity(id_list, df) + parse_gctx.check_idx_validity(id_list, df, sort_id = True) logger.debug("context.exception: {}".format(context.exception)) self.assertIn("some of indexes being used to subset the data are not valid", str(context.exception)) self.assertIn("[{}]".format(invalid_high), str(context.exception)) diff --git a/cmapPy/pandasGEXpress/tests/python2_tests/test_subset.py b/cmapPy/pandasGEXpress/tests/python2_tests/test_subset.py index c8617d1..50b7b8c 100644 --- a/cmapPy/pandasGEXpress/tests/python2_tests/test_subset.py +++ b/cmapPy/pandasGEXpress/tests/python2_tests/test_subset.py @@ -12,7 +12,7 @@ class TestSubset(unittest.TestCase): def test_read_arg(self): - arg_path = os.path.join("../functional_tests", "test_subset_rid.grp") + arg_path = os.path.join("cmapPy/pandasGEXpress/tests/functional_tests/", "test_subset_rid.grp") rids = sg._read_arg([arg_path]) self.assertItemsEqual(rids, ["a", "Bb", "c"]) @@ -27,10 +27,10 @@ def test_read_arg_bad(self): def test_subset_main(self): - in_gct_path = os.path.join("../functional_tests", "test_subset_in.gct") - rid_grp_path = os.path.join("../functional_tests", "test_subset_rid.grp") - out_name = os.path.join("../functional_tests", "test_subset_out.gct") - expected_out_path = os.path.join("../functional_tests", "test_subset_expected.gct") + in_gct_path = os.path.join("cmapPy/pandasGEXpress/tests/functional_tests/", "test_subset_in.gct") + rid_grp_path = os.path.join("cmapPy/pandasGEXpress/tests/functional_tests/", "test_subset_rid.grp") + out_name = os.path.join("cmapPy/pandasGEXpress/tests/functional_tests/", "test_subset_out.gct") + expected_out_path = os.path.join("cmapPy/pandasGEXpress/tests/functional_tests/", "test_subset_expected.gct") args_string = "-i {} --rid {} -ec {} -o {}".format( in_gct_path, rid_grp_path, "f", out_name) diff --git a/cmapPy/pandasGEXpress/tests/python2_tests/test_write_gct.py b/cmapPy/pandasGEXpress/tests/python2_tests/test_write_gct.py index 8ad097b..53bfb1d 100644 --- a/cmapPy/pandasGEXpress/tests/python2_tests/test_write_gct.py +++ b/cmapPy/pandasGEXpress/tests/python2_tests/test_write_gct.py @@ -8,7 +8,7 @@ import cmapPy.pandasGEXpress.parse_gct as pg import cmapPy.pandasGEXpress.write_gct as wg -FUNCTIONAL_TESTS_PATH = "../functional_tests" +FUNCTIONAL_TESTS_PATH = "cmapPy/pandasGEXpress/tests/functional_tests/" logger = logging.getLogger(setup_logger.LOGGER_NAME) diff --git a/cmapPy/pandasGEXpress/tests/python2_tests/test_write_gctx.py b/cmapPy/pandasGEXpress/tests/python2_tests/test_write_gctx.py index f613a86..39e8732 100644 --- a/cmapPy/pandasGEXpress/tests/python2_tests/test_write_gctx.py +++ b/cmapPy/pandasGEXpress/tests/python2_tests/test_write_gctx.py @@ -12,7 +12,7 @@ __author__ = "Oana Enache" __email__ = "oana@broadinstitute.org" -FUNCTIONAL_TESTS_PATH = "../functional_tests" +FUNCTIONAL_TESTS_PATH = "cmapPy/pandasGEXpress/tests/functional_tests/" # instantiate logger logger = logging.getLogger(setup_logger.LOGGER_NAME) diff --git a/cmapPy/pandasGEXpress/tests/python3_tests/__init__.py b/cmapPy/pandasGEXpress/tests/python3_tests/__init__.py new file mode 100644 index 0000000..e69de29 diff --git a/cmapPy/pandasGEXpress/tests/python3_tests/test_concat.py b/cmapPy/pandasGEXpress/tests/python3_tests/test_concat.py index abbf8c7..6cdb6d0 100644 --- a/cmapPy/pandasGEXpress/tests/python3_tests/test_concat.py +++ b/cmapPy/pandasGEXpress/tests/python3_tests/test_concat.py @@ -10,7 +10,7 @@ logger = logging.getLogger(setup_logger.LOGGER_NAME) -FUNCTIONAL_TESTS_DIR = "../functional_tests" +FUNCTIONAL_TESTS_DIR = "cmapPy/pandasGEXpress/tests/functional_tests/" class TestConcat(unittest.TestCase): @@ -333,7 +333,7 @@ def test_build_mismatched_common_meta_report(self): self.assertEqual({"r3"}, set(r.orig_rid)) def test_concat_main(self): - test_dir = "../functional_tests/test_concat/test_main" + test_dir = "cmapPy/pandasGEXpress/tests/functional_tests/test_concat/test_main" g_a = pg.parse(os.path.join(test_dir, "a.gct")) logger.debug("g_a: {}".format(g_a)) diff --git a/cmapPy/pandasGEXpress/tests/python3_tests/test_edge_cases.py b/cmapPy/pandasGEXpress/tests/python3_tests/test_edge_cases.py index 64d680a..951c7ea 100644 --- a/cmapPy/pandasGEXpress/tests/python3_tests/test_edge_cases.py +++ b/cmapPy/pandasGEXpress/tests/python3_tests/test_edge_cases.py @@ -10,7 +10,7 @@ import pandas.util.testing as pandas_testing -FUNCTIONAL_TESTS_PATH = "../functional_tests" +FUNCTIONAL_TESTS_PATH = "cmapPy/pandasGEXpress/tests/functional_tests/" logger = logging.getLogger(setup_logger.LOGGER_NAME) diff --git a/cmapPy/pandasGEXpress/tests/python3_tests/test_gct2gctx.py b/cmapPy/pandasGEXpress/tests/python3_tests/test_gct2gctx.py index 012242b..05d1863 100644 --- a/cmapPy/pandasGEXpress/tests/python3_tests/test_gct2gctx.py +++ b/cmapPy/pandasGEXpress/tests/python3_tests/test_gct2gctx.py @@ -14,8 +14,8 @@ class TestGCT2GCTx(unittest.TestCase): def test_gct2gctx_main(self): - in_name = "../functional_tests/mini_gctoo_for_testing.gct" - out_name = "../functional_tests/test_gct2gctx_out.gctx" + in_name = "cmapPy/pandasGEXpress/tests/functional_tests/mini_gctoo_for_testing.gct" + out_name = "cmapPy/pandasGEXpress/tests/functional_tests/test_gct2gctx_out.gctx" args_string = "-f {} -o {}".format(in_name, out_name) args = gct2gctx.build_parser().parse_args(args_string.split()) @@ -29,10 +29,10 @@ def test_gct2gctx_main(self): pd.util.testing.assert_frame_equal(in_gct.col_metadata_df, out_gctx.col_metadata_df) pd.util.testing.assert_frame_equal(in_gct.row_metadata_df, out_gctx.row_metadata_df) - no_meta = "../functional_tests/mini_gctoo_for_testing_nometa.gct" - added_meta = "../functional_tests/test_gct2gctx_out_annotated.gctx" - row_meta = "../functional_tests/test_rowmeta_n6.txt" - col_meta = "../functional_tests/test_colmeta_n6.txt" + no_meta = "cmapPy/pandasGEXpress/tests/functional_tests/mini_gctoo_for_testing_nometa.gct" + added_meta = "cmapPy/pandasGEXpress/tests/functional_tests/test_gct2gctx_out_annotated.gctx" + row_meta = "cmapPy/pandasGEXpress/tests/functional_tests/test_rowmeta_n6.txt" + col_meta = "cmapPy/pandasGEXpress/tests/functional_tests/test_colmeta_n6.txt" args_string = "-f {} -o {} -row_annot_path {} -col_annot_path {}".format(no_meta, added_meta, row_meta, col_meta) args = gct2gctx.build_parser().parse_args(args_string.split()) diff --git a/cmapPy/pandasGEXpress/tests/python3_tests/test_gctx2gct.py b/cmapPy/pandasGEXpress/tests/python3_tests/test_gctx2gct.py index 06576c1..39f39e5 100644 --- a/cmapPy/pandasGEXpress/tests/python3_tests/test_gctx2gct.py +++ b/cmapPy/pandasGEXpress/tests/python3_tests/test_gctx2gct.py @@ -14,8 +14,8 @@ class TestGCTx2GCT(unittest.TestCase): def test_gctx2gct_main(self): - in_name = "../functional_tests/mini_gctoo_for_testing.gctx" - out_name = "../functional_tests/test_gctx2gct_out.gct" + in_name = "cmapPy/pandasGEXpress/tests/functional_tests/mini_gctoo_for_testing.gctx" + out_name = "cmapPy/pandasGEXpress/tests/functional_tests/test_gctx2gct_out.gct" args_string = "-f {} -o {}".format(in_name, out_name) args = gctx2gct.build_parser().parse_args(args_string.split()) @@ -29,10 +29,10 @@ def test_gctx2gct_main(self): pd.util.testing.assert_frame_equal(in_gctx.col_metadata_df, out_gct.col_metadata_df) pd.util.testing.assert_frame_equal(in_gctx.row_metadata_df, out_gct.row_metadata_df) - no_meta = "../functional_tests/mini_gctoo_for_testing_nometa.gctx" - added_meta = "../functional_tests/test_gctx2gct_out_annotated.gct" - row_meta = "../functional_tests/test_rowmeta_n6.txt" - col_meta = "../functional_tests/test_colmeta_n6.txt" + no_meta = "cmapPy/pandasGEXpress/tests/functional_tests/mini_gctoo_for_testing_nometa.gctx" + added_meta = "cmapPy/pandasGEXpress/tests/functional_tests/test_gctx2gct_out_annotated.gct" + row_meta = "cmapPy/pandasGEXpress/tests/functional_tests/test_rowmeta_n6.txt" + col_meta = "cmapPy/pandasGEXpress/tests/functional_tests/test_colmeta_n6.txt" args_string = "-f {} -o {} -row_annot_path {} -col_annot_path {}".format(no_meta, added_meta, row_meta, col_meta ) args = gctx2gct.build_parser().parse_args(args_string.split()) @@ -51,9 +51,9 @@ def test_gctx2gct_main(self): def test_missing_annotations(self): with self.assertRaises(Exception) as context: - no_meta = "../functional_tests/mini_gctoo_for_testing_nometa.gctx" - added_meta = "../functional_tests/test_gctx2gct_out_annotated.gct" - row_meta = "../functional_tests/test_missing_rowmeta.txt" + no_meta = "cmapPy/pandasGEXpress/tests/functional_tests/mini_gctoo_for_testing_nometa.gctx" + added_meta = "cmapPy/pandasGEXpress/tests/functional_tests/test_gctx2gct_out_annotated.gct" + row_meta = "cmapPy/pandasGEXpress/tests/functional_tests/test_missing_rowmeta.txt" args_string = "-f {} -o {} -row_annot_path {}".format(no_meta, added_meta, row_meta) args = gctx2gct.build_parser().parse_args(args_string.split()) @@ -63,9 +63,9 @@ def test_missing_annotations(self): self.assertTrue('Row ids in matrix missing from annotations file', context.exception) with self.assertRaises(Exception) as context: - no_meta = "../functional_tests/mini_gctoo_for_testing_nometa.gctx" - added_meta = "../functional_tests/test_gctx2gct_out_annotated.gct" - col_meta = "../functional_tests/test_missing_colmeta.txt" + no_meta = "cmapPy/pandasGEXpress/tests/functional_tests/mini_gctoo_for_testing_nometa.gctx" + added_meta = "cmapPy/pandasGEXpress/tests/functional_tests/test_gctx2gct_out_annotated.gct" + col_meta = "cmapPy/pandasGEXpress/tests/functional_tests/test_missing_colmeta.txt" args_string = "-f {} -o {} -col_annot_path {}".format(no_meta, added_meta, col_meta) args = gctx2gct.build_parser().parse_args(args_string.split()) diff --git a/cmapPy/pandasGEXpress/tests/python3_tests/test_parse.py b/cmapPy/pandasGEXpress/tests/python3_tests/test_parse.py index c4de20c..d25492b 100644 --- a/cmapPy/pandasGEXpress/tests/python3_tests/test_parse.py +++ b/cmapPy/pandasGEXpress/tests/python3_tests/test_parse.py @@ -9,7 +9,7 @@ __author__ = "Oana Enache" __email__ = "oana@broadinstitute.org" -FUNCTIONAL_TESTS_PATH = "functional_tests" +FUNCTIONAL_TESTS_PATH = "cmapPy/pandasGEXpress/tests/functional_tests/" logger = logging.getLogger(setup_logger.LOGGER_NAME) @@ -17,7 +17,7 @@ class TestParse(unittest.TestCase): def test_gctx_parsing(self): # parse in gctx, no other arguments mg1 = mini_gctoo_for_testing.make() - mg2 = parse.parse("../functional_tests/mini_gctoo_for_testing.gctx") + mg2 = parse.parse("cmapPy/pandasGEXpress/tests/functional_tests//mini_gctoo_for_testing.gctx") pandas_testing.assert_frame_equal(mg1.data_df, mg2.data_df) pandas_testing.assert_frame_equal(mg1.row_metadata_df, mg2.row_metadata_df) @@ -27,14 +27,14 @@ def test_gctx_parsing(self): self.assertTrue(mg2.col_metadata_df["mfc_plate_id"].isnull().all()) # parse w/o convert_neg_666 - mg2_alt = parse.parse("../functional_tests/mini_gctoo_for_testing.gctx", convert_neg_666 = False) + mg2_alt = parse.parse("cmapPy/pandasGEXpress/tests/functional_tests/mini_gctoo_for_testing.gctx", convert_neg_666 = False) self.assertFalse(mg2_alt.col_metadata_df["mfc_plate_id"].isnull().all()) # parsing w/rids & cids specified test_rids = ['LJP007_MCF10A_24H:TRT_CP:BRD-K93918653:3.33', 'LJP007_MCF7_24H:CTL_VEHICLE:DMSO:-666'] test_cids = ['LJP007_MCF7_24H:TRT_POSCON:BRD-A61304759:10'] mg3 = subset_gctoo.subset_gctoo(mg1, rid=test_rids, cid=test_cids) - mg4 = parse.parse("../functional_tests/mini_gctoo_for_testing.gctx", + mg4 = parse.parse("cmapPy/pandasGEXpress/tests/functional_tests/mini_gctoo_for_testing.gctx", rid=test_rids, cid=test_cids) pandas_testing.assert_frame_equal(mg3.data_df, mg4.data_df) pandas_testing.assert_frame_equal(mg3.row_metadata_df, mg4.row_metadata_df) @@ -43,28 +43,28 @@ def test_gctx_parsing(self): # parsing w/ridx & cidx specified mg5 = subset_gctoo.subset_gctoo(mg1, rid=['LJP007_MCF7_24H:CTL_VEHICLE:DMSO:-666'], cid=['LJP007_MCF7_24H:CTL_VEHICLE:DMSO:-666']) - mg6 = parse.parse("../functional_tests/mini_gctoo_for_testing.gctx", ridx=[4], cidx=[4]) + mg6 = parse.parse("cmapPy/pandasGEXpress/tests/functional_tests/mini_gctoo_for_testing.gctx", ridx=[4], cidx=[4]) pandas_testing.assert_frame_equal(mg5.data_df, mg6.data_df) pandas_testing.assert_frame_equal(mg5.row_metadata_df, mg6.row_metadata_df) pandas_testing.assert_frame_equal(mg5.col_metadata_df, mg6.col_metadata_df) # parsing row metadata only - mg7 = parse.parse("../functional_tests/mini_gctoo_for_testing.gctx", row_meta_only=True) + mg7 = parse.parse("cmapPy/pandasGEXpress/tests/functional_tests/mini_gctoo_for_testing.gctx", row_meta_only=True) pandas_testing.assert_frame_equal(mg7, mg1.row_metadata_df) # parsing col metadata only - mg8 = parse.parse("../functional_tests/mini_gctoo_for_testing.gctx", col_meta_only=True) + mg8 = parse.parse("cmapPy/pandasGEXpress/tests/functional_tests/mini_gctoo_for_testing.gctx", col_meta_only=True) pandas_testing.assert_frame_equal(mg8, mg1.col_metadata_df) # parsing w/multiindex - mg9 = parse.parse("../functional_tests/mini_gctoo_for_testing.gctx", make_multiindex=True) + mg9 = parse.parse("cmapPy/pandasGEXpress/tests/functional_tests/mini_gctoo_for_testing.gctx", make_multiindex=True) self.assertTrue(mg9.multi_index_df is not None) def test_gct_parsing(self): # parse in gct, no other arguments mg1 = mini_gctoo_for_testing.make() - mg2 = parse.parse("../functional_tests/mini_gctoo_for_testing.gct") + mg2 = parse.parse("cmapPy/pandasGEXpress/tests/functional_tests/mini_gctoo_for_testing.gct") pandas_testing.assert_frame_equal(mg1.data_df, mg2.data_df) pandas_testing.assert_frame_equal(mg1.row_metadata_df, mg2.row_metadata_df) @@ -74,13 +74,13 @@ def test_gct_parsing(self): self.assertTrue(mg2.col_metadata_df["mfc_plate_id"].isnull().all()) # parse w/o convert_neg_666 - mg2_alt = parse.parse("../functional_tests/mini_gctoo_for_testing.gct", convert_neg_666 = False) + mg2_alt = parse.parse("cmapPy/pandasGEXpress/tests/functional_tests/mini_gctoo_for_testing.gct", convert_neg_666 = False) self.assertCountEqual(mg2_alt.col_metadata_df["mfc_plate_id"].values.tolist(), [-666] * 6) # parse in gct with subsetting my_rid = "LJP007_MCF10A_24H:TRT_CP:BRD-K93918653:3.33" - mg3 = parse.parse("../functional_tests/mini_gctoo_for_testing.gct", + mg3 = parse.parse("cmapPy/pandasGEXpress/tests/functional_tests//mini_gctoo_for_testing.gct", cidx=[0, 2], rid=[my_rid]) self.assertEqual(mg3.data_df.shape, (1, 2)) diff --git a/cmapPy/pandasGEXpress/tests/python3_tests/test_parse_gct.py b/cmapPy/pandasGEXpress/tests/python3_tests/test_parse_gct.py index 4ca8946..870767e 100644 --- a/cmapPy/pandasGEXpress/tests/python3_tests/test_parse_gct.py +++ b/cmapPy/pandasGEXpress/tests/python3_tests/test_parse_gct.py @@ -8,7 +8,7 @@ import cmapPy.pandasGEXpress.GCToo as GCToo -FUNCTIONAL_TESTS_PATH = "../functional_tests" +FUNCTIONAL_TESTS_PATH = "cmapPy/pandasGEXpress/tests/functional_tests/" logger = logging.getLogger(setup_logger.LOGGER_NAME) diff --git a/cmapPy/pandasGEXpress/tests/python3_tests/test_parse_gctx.py b/cmapPy/pandasGEXpress/tests/python3_tests/test_parse_gctx.py index 07622d9..ea79f43 100644 --- a/cmapPy/pandasGEXpress/tests/python3_tests/test_parse_gctx.py +++ b/cmapPy/pandasGEXpress/tests/python3_tests/test_parse_gctx.py @@ -44,7 +44,7 @@ class TestParseGctx(unittest.TestCase): def test_parse(self): # parse whole thing mg1 = mini_gctoo_for_testing.make() - mg2 = parse_gctx.parse("../functional_tests/mini_gctoo_for_testing.gctx") + mg2 = parse_gctx.parse("cmapPy/pandasGEXpress/tests/functional_tests/mini_gctoo_for_testing.gctx") pandas_testing.assert_frame_equal(mg1.data_df, mg2.data_df) pandas_testing.assert_frame_equal(mg1.row_metadata_df, mg2.row_metadata_df) @@ -54,7 +54,7 @@ def test_parse(self): test_rids = ['LJP007_MCF10A_24H:TRT_CP:BRD-K93918653:3.33', 'LJP007_MCF7_24H:CTL_VEHICLE:DMSO:-666'] test_cids = ['LJP007_MCF7_24H:TRT_POSCON:BRD-A61304759:10'] mg3 = subset_gctoo.subset_gctoo(mg1, rid=test_rids, cid=test_cids) - mg4 = parse_gctx.parse("../functional_tests/mini_gctoo_for_testing.gctx", + mg4 = parse_gctx.parse("cmapPy/pandasGEXpress/tests/functional_tests/mini_gctoo_for_testing.gctx", rid=test_rids, cid=test_cids) pandas_testing.assert_frame_equal(mg3.data_df, mg4.data_df) pandas_testing.assert_frame_equal(mg3.row_metadata_df, mg4.row_metadata_df) @@ -104,14 +104,14 @@ def test_parse(self): # test with ridx/cidx mg7 = subset_gctoo.subset_gctoo(mg1, rid=['LJP007_MCF7_24H:CTL_VEHICLE:DMSO:-666'], cid=['LJP007_MCF7_24H:CTL_VEHICLE:DMSO:-666']) - mg8 = parse_gctx.parse("../functional_tests/mini_gctoo_for_testing.gctx", ridx=[4], cidx=[4]) + mg8 = parse_gctx.parse("cmapPy/pandasGEXpress/tests/functional_tests/mini_gctoo_for_testing.gctx", ridx=[4], cidx=[4]) pandas_testing.assert_frame_equal(mg7.data_df, mg8.data_df) pandas_testing.assert_frame_equal(mg7.row_metadata_df, mg8.row_metadata_df) pandas_testing.assert_frame_equal(mg7.col_metadata_df, mg8.col_metadata_df) # test with rid/cidx - mg9 = parse_gctx.parse("../functional_tests/mini_gctoo_for_testing.gctx", + mg9 = parse_gctx.parse("cmapPy/pandasGEXpress/tests/functional_tests/mini_gctoo_for_testing.gctx", rid=['LJP007_MCF7_24H:CTL_VEHICLE:DMSO:-666'], cidx=[4]) @@ -120,7 +120,7 @@ def test_parse(self): pandas_testing.assert_frame_equal(mg7.col_metadata_df, mg9.col_metadata_df) # test with ridx/cid - mg10 = parse_gctx.parse("../functional_tests/mini_gctoo_for_testing.gctx", ridx=[4], + mg10 = parse_gctx.parse("cmapPy/pandasGEXpress/tests/functional_tests/mini_gctoo_for_testing.gctx", ridx=[4], cid=['LJP007_MCF7_24H:CTL_VEHICLE:DMSO:-666']) pandas_testing.assert_frame_equal(mg7.data_df, mg10.data_df) @@ -128,15 +128,61 @@ def test_parse(self): pandas_testing.assert_frame_equal(mg7.col_metadata_df, mg10.col_metadata_df) # test with row_meta_only - mg11 = parse_gctx.parse("../functional_tests/mini_gctoo_for_testing.gctx", row_meta_only=True) + mg11 = parse_gctx.parse("cmapPy/pandasGEXpress/tests/functional_tests/mini_gctoo_for_testing.gctx", row_meta_only=True) pandas_testing.assert_frame_equal(mg11, mg1.row_metadata_df) # test with col_meta_only - mg12 = parse_gctx.parse("../functional_tests/mini_gctoo_for_testing.gctx", col_meta_only=True) + mg12 = parse_gctx.parse("cmapPy/pandasGEXpress/tests/functional_tests/mini_gctoo_for_testing.gctx", col_meta_only=True) pandas_testing.assert_frame_equal(mg12, mg1.col_metadata_df) + # test with sort_row_meta False and ridx + mg13 = parse_gctx.parse("cmapPy/pandasGEXpress/tests/functional_tests/mini_gctoo_for_testing.gctx",) + + # test with sort_col_meta False and cidx + mg13 = parse_gctx.parse("cmapPy/pandasGEXpress/tests/functional_tests//mini_gctoo_for_testing.gctx", + cidx = [4,1,3], sort_col_meta= False) + + pandas_testing.assert_frame_equal(mg13.data_df, mg1.data_df.iloc[:, [4,1,3]]) + pandas_testing.assert_frame_equal(mg13.col_metadata_df, mg1.col_metadata_df.iloc[[4,1,3],:]) + pandas_testing.assert_frame_equal(mg13.row_metadata_df, mg1.row_metadata_df) + + + # test with sort_row_meta False and ridx + mg14 = parse_gctx.parse("cmapPy/pandasGEXpress/tests/functional_tests//mini_gctoo_for_testing.gctx", + ridx = [3,0,1], sort_row_meta= False) + + pandas_testing.assert_frame_equal(mg14.data_df, mg1.data_df.iloc[[3,0,1],:]) + pandas_testing.assert_frame_equal(mg14.col_metadata_df, mg1.col_metadata_df) + pandas_testing.assert_frame_equal(mg14.row_metadata_df, mg1.row_metadata_df.iloc[[3,0,1],:]) + + # test with sort_col_meta False and cidx and col_meta_only + mg15 = parse_gctx.parse("cmapPy/pandasGEXpress/tests/functional_tests//mini_gctoo_for_testing.gctx", + cidx = [4,1,3], sort_col_meta= False, col_meta_only=True) + pandas_testing.assert_frame_equal(mg15, mg1.col_metadata_df.iloc[[4,1,3],:]) + + # test with sort_row_meta False and ridx and row_meta_only + mg16 = parse_gctx.parse("cmapPy/pandasGEXpress/tests/functional_tests//mini_gctoo_for_testing.gctx", + ridx = [3,0,1], sort_row_meta= False, row_meta_only=True) + pandas_testing.assert_frame_equal(mg16, mg1.row_metadata_df.iloc[[3,0,1],:]) + + # test with sort_col_meta False and cid + cid_unsorted = ['LJP007_MCF7_24H:TRT_POSCON:BRD-K81418486:10','LJP007_MCF10A_24H:TRT_CP:BRD-K93918653:3.33'] + mg17 = parse_gctx.parse("cmapPy/pandasGEXpress/tests/functional_tests//mini_gctoo_for_testing.gctx", + cid = cid_unsorted, sort_col_meta= False) + pandas_testing.assert_frame_equal(mg17.data_df, mg1.data_df.iloc[:, [2,0]]) + pandas_testing.assert_frame_equal(mg17.col_metadata_df, mg1.col_metadata_df.iloc[[2,0],:]) + pandas_testing.assert_frame_equal(mg17.row_metadata_df, mg1.row_metadata_df) + + # test with sort_row_meta False and rid + rid_unsorted = ['LJP007_MCF7_24H:TRT_CP:BRD-K64857848:10', 'MISC003_A375_24H:TRT_CP:BRD-K93918653:3.33'] + mg18 = parse_gctx.parse("cmapPy/pandasGEXpress/tests/functional_tests/mini_gctoo_for_testing.gctx", + rid = rid_unsorted, sort_row_meta=False) + pandas_testing.assert_frame_equal(mg18.data_df, mg1.data_df.iloc[[5,1], :]) + pandas_testing.assert_frame_equal(mg18.col_metadata_df, mg1.col_metadata_df) + pandas_testing.assert_frame_equal(mg18.row_metadata_df, mg1.row_metadata_df.iloc[[5,1],:]) + def test_parse_rid_as_entrez_id(self): - input_file = "../functional_tests/test_parse_gctx_rid_entrez_id.gctx" + input_file = "cmapPy/pandasGEXpress/tests/functional_tests/test_parse_gctx_rid_entrez_id.gctx" g = parse_gctx.parse(input_file) self.assertEqual((5, 5), g.data_df.shape) logger.debug("g.data_df.index: {}".format(g.data_df.index)) @@ -162,19 +208,19 @@ def test_check_and_order_id_inputs(self): # case 1: row and col lists are populated and same type self.assertEqual((sorted(ridx), sorted(cidx)), - parse_gctx.check_and_order_id_inputs(None, ridx, None, cidx, row_meta, col_meta)) + parse_gctx.check_and_order_id_inputs(None, ridx, None, cidx, row_meta, col_meta, sort_row_meta = True, sort_col_meta = True)) # case 2: row & col lists are populated, but of different types self.assertEqual((sorted(ridx), [0, 1, 2, 3]), - parse_gctx.check_and_order_id_inputs(None, ridx, cid, None, row_meta, col_meta)) + parse_gctx.check_and_order_id_inputs(None, ridx, cid, None, row_meta, col_meta, sort_row_meta = True, sort_col_meta = True)) # case 3: row list and col lists are both None self.assertEqual(([0, 1, 2, 3], [0, 1, 2, 3, 4, 5]), - parse_gctx.check_and_order_id_inputs(None, None, None, None, row_meta, col_meta)) + parse_gctx.check_and_order_id_inputs(None, None, None, None, row_meta, col_meta, sort_row_meta = True, sort_col_meta = True)) # case 4: row list is populated, col list is None self.assertEqual(([0, 1, 2], [0, 1, 2, 3, 4, 5]), - parse_gctx.check_and_order_id_inputs(rid, None, None, None, row_meta, col_meta)) + parse_gctx.check_and_order_id_inputs(rid, None, None, None, row_meta, col_meta, sort_row_meta = True, sort_col_meta = True)) def test_check_id_idx_exclusivity(self): ids = ["a", "b", "c"] @@ -203,7 +249,7 @@ def test_parse_metadata_df(self): logger.debug("mini_row_meta.columns: {}".format(mini_row_meta.columns)) logger.debug("mini_row_meta.dtypes: {}".format(mini_row_meta.dtypes)) - gctx_file = h5py.File("../functional_tests/mini_gctoo_for_testing.gctx", "r") + gctx_file = h5py.File("cmapPy/pandasGEXpress/tests/functional_tests/mini_gctoo_for_testing.gctx", "r") row_dset = gctx_file[row_meta_group_node] col_dset = gctx_file[col_meta_group_node] @@ -270,19 +316,19 @@ def test_get_ordered_idx(self): mg = mini_gctoo_for_testing.make() # case 1: id_type == None - case1 = parse_gctx.get_ordered_idx(None, [], mg.row_metadata_df) + case1 = parse_gctx.get_ordered_idx(None, [], mg.row_metadata_df, sort_idx = True) self.assertEqual(case1, list(range(0, 6)), "Expected ordered idx to be {} but got {}".format(list(range(0, 6)), case1)) # case 2: id_type == "id" case2 = parse_gctx.get_ordered_idx("id", - ['LJP007_MCF7_24H:CTL_VEHICLE:DMSO:-666'], mg.col_metadata_df) + ['LJP007_MCF7_24H:CTL_VEHICLE:DMSO:-666'], mg.col_metadata_df, sort_idx = True) self.assertEqual(case2, [4], "Expected ordered idx to be {} but got {}".format([4], case2)) # case 3: id_type == ridx case3 = parse_gctx.get_ordered_idx("idx", - [5, 1, 3], mg.col_metadata_df) + [5, 1, 3], mg.col_metadata_df, sort_idx = True) self.assertEqual(case3, [1, 3, 5], "Expected ordered idx to be {} but got {}".format([1, 3, 5], case3)) @@ -295,12 +341,11 @@ def test_parse_data_df(self): mini_data_df.columns.name = "cid" # create h5py File instance - mini_gctx = h5py.File("../functional_tests/mini_gctx_with_metadata_n2x3.gctx", "r") + mini_gctx = h5py.File("cmapPy/pandasGEXpress/tests/functional_tests/mini_gctx_with_metadata_n2x3.gctx", "r") data_dset = mini_gctx[data_node] - # get relevant metadata fields - col_meta = parse_gctx.get_column_metadata("../functional_tests/mini_gctx_with_metadata_n2x3.gctx") - row_meta = parse_gctx.get_row_metadata("../functional_tests/mini_gctx_with_metadata_n2x3.gctx") + col_meta = parse_gctx.get_column_metadata("cmapPy/pandasGEXpress/tests/functional_tests/mini_gctx_with_metadata_n2x3.gctx") + row_meta = parse_gctx.get_row_metadata("cmapPy/pandasGEXpress/tests/functional_tests/mini_gctx_with_metadata_n2x3.gctx") # case 1: no subsetting data_df1 = parse_gctx.parse_data_df(data_dset, [0, 1, 2], [0, 1], row_meta, col_meta) @@ -352,11 +397,11 @@ def test_check_idx_validity(self): id_list = [0,1,2] df = pd.DataFrame({}, index=range(5)) logger.debug("df.shape: {}".format(df.shape)) - parse_gctx.check_idx_validity(id_list, df) + parse_gctx.check_idx_validity(id_list, df, sort_id = True) id_list[0] = -1 with self.assertRaises(Exception) as context: - parse_gctx.check_idx_validity(id_list, df) + parse_gctx.check_idx_validity(id_list, df, sort_id = True) logger.debug("context.exception: {}".format(context.exception)) self.assertIn("some of indexes being used to subset the data are not valid", str(context.exception)) self.assertIn("[-1]", str(context.exception)) @@ -364,7 +409,7 @@ def test_check_idx_validity(self): invalid_high = df.shape[0] + 1 id_list[0] = invalid_high with self.assertRaises(Exception) as context: - parse_gctx.check_idx_validity(id_list, df) + parse_gctx.check_idx_validity(id_list, df, sort_id = True) logger.debug("context.exception: {}".format(context.exception)) self.assertIn("some of indexes being used to subset the data are not valid", str(context.exception)) self.assertIn("[{}]".format(invalid_high), str(context.exception)) diff --git a/cmapPy/pandasGEXpress/tests/python3_tests/test_subset.py b/cmapPy/pandasGEXpress/tests/python3_tests/test_subset.py index 249b630..b92b120 100644 --- a/cmapPy/pandasGEXpress/tests/python3_tests/test_subset.py +++ b/cmapPy/pandasGEXpress/tests/python3_tests/test_subset.py @@ -12,7 +12,7 @@ class TestSubset(unittest.TestCase): def test_read_arg(self): - arg_path = os.path.join("../functional_tests", "test_subset_rid.grp") + arg_path = os.path.join("cmapPy/pandasGEXpress/tests/functional_tests/", "test_subset_rid.grp") rids = sg._read_arg([arg_path]) self.assertCountEqual(rids, ["a", "Bb", "c"]) @@ -27,10 +27,10 @@ def test_read_arg_bad(self): def test_subset_main(self): - in_gct_path = os.path.join("../functional_tests", "test_subset_in.gct") - rid_grp_path = os.path.join("../functional_tests", "test_subset_rid.grp") - out_name = os.path.join("../functional_tests", "test_subset_out.gct") - expected_out_path = os.path.join("../functional_tests", "test_subset_expected.gct") + in_gct_path = os.path.join("cmapPy/pandasGEXpress/tests/functional_tests/", "test_subset_in.gct") + rid_grp_path = os.path.join("cmapPy/pandasGEXpress/tests/functional_tests/", "test_subset_rid.grp") + out_name = os.path.join("cmapPy/pandasGEXpress/tests/functional_tests/", "test_subset_out.gct") + expected_out_path = os.path.join("cmapPy/pandasGEXpress/tests/functional_tests/", "test_subset_expected.gct") args_string = "-i {} --rid {} -ec {} -o {}".format( in_gct_path, rid_grp_path, "f", out_name) diff --git a/cmapPy/pandasGEXpress/tests/python3_tests/test_write_gct.py b/cmapPy/pandasGEXpress/tests/python3_tests/test_write_gct.py index e01319d..a5ce5da 100644 --- a/cmapPy/pandasGEXpress/tests/python3_tests/test_write_gct.py +++ b/cmapPy/pandasGEXpress/tests/python3_tests/test_write_gct.py @@ -8,7 +8,7 @@ import cmapPy.pandasGEXpress.parse_gct as pg import cmapPy.pandasGEXpress.write_gct as wg -FUNCTIONAL_TESTS_PATH = "../functional_tests" +FUNCTIONAL_TESTS_PATH = "cmapPy/pandasGEXpress/tests/functional_tests/" logger = logging.getLogger(setup_logger.LOGGER_NAME) diff --git a/cmapPy/pandasGEXpress/tests/python3_tests/test_write_gctx.py b/cmapPy/pandasGEXpress/tests/python3_tests/test_write_gctx.py index 6d7af8e..25f9bf2 100644 --- a/cmapPy/pandasGEXpress/tests/python3_tests/test_write_gctx.py +++ b/cmapPy/pandasGEXpress/tests/python3_tests/test_write_gctx.py @@ -12,7 +12,7 @@ __author__ = "Oana Enache" __email__ = "oana@broadinstitute.org" -FUNCTIONAL_TESTS_PATH = "../functional_tests" +FUNCTIONAL_TESTS_PATH = "cmapPy/pandasGEXpress/tests/functional_tests/" # instantiate logger logger = logging.getLogger(setup_logger.LOGGER_NAME) diff --git a/cmapPy/pandasGEXpress/tests/test_python2_python3_compatibility.py b/cmapPy/pandasGEXpress/tests/test_python2_python3_compatibility.py index e308216..9c3e0f3 100644 --- a/cmapPy/pandasGEXpress/tests/test_python2_python3_compatibility.py +++ b/cmapPy/pandasGEXpress/tests/test_python2_python3_compatibility.py @@ -15,7 +15,7 @@ __author__ = "Saksham Malhotra" __email__ = "saksham2196@gmail.com" -FUNCTIONAL_TESTS_PATH = "functional_tests" +FUNCTIONAL_TESTS_PATH = "cmapPy/pandasGEXpress/tests/functional_tests/" logger = logging.getLogger(setup_logger.LOGGER_NAME) diff --git a/cmapPy/set_io/tests/__init__.py b/cmapPy/set_io/tests/__init__.py new file mode 100644 index 0000000..e69de29 diff --git a/cmapPy/set_io/tests/test_gmt.py b/cmapPy/set_io/tests/test_gmt.py index 6779316..d9ab6ae 100644 --- a/cmapPy/set_io/tests/test_gmt.py +++ b/cmapPy/set_io/tests/test_gmt.py @@ -5,7 +5,7 @@ import cmapPy.set_io.gmt as gmt logger = logging.getLogger(setup_logger.LOGGER_NAME) -FUNCTIONAL_TESTS_DIR = "functional_tests" +FUNCTIONAL_TESTS_DIR = "cmapPy/set_io/tests/functional_tests/" class TestGMT(unittest.TestCase): diff --git a/cmapPy/set_io/tests/test_grp.py b/cmapPy/set_io/tests/test_grp.py index f9c401e..bc7c87e 100644 --- a/cmapPy/set_io/tests/test_grp.py +++ b/cmapPy/set_io/tests/test_grp.py @@ -5,7 +5,7 @@ import cmapPy.set_io.grp as grp logger = logging.getLogger(setup_logger.LOGGER_NAME) -FUNCTIONAL_TESTS_DIR = "functional_tests" +FUNCTIONAL_TESTS_DIR = "cmapPy/set_io/tests/functional_tests/" class TestGRP(unittest.TestCase):