From efc36937a363e1c8405caf70d3e7e1d5799e765c Mon Sep 17 00:00:00 2001
From: orisenbazuru <orisenbazuru@gmail.com>
Date: Sun, 12 Jan 2020 18:22:24 +0100
Subject: [PATCH] added neural workflow for DDI model

---
 .gitignore                                    |   52 +
 cluster/data/medinfmk/ddi/raw/drug_names.tsv  | 1430 +++++++++++++++++
 ddi/__init__.py                               |    0
 ddi/dataset.py                                |  213 +++
 ddi/model.py                                  |   52 +
 ddi/run_workflow.py                           |  353 ++++
 ddi/utilities.py                              |  330 ++++
 .../02_AA_Skorch_DDI-checkpoint.ipynb         |  581 -------
 notebooks/02_AA_Skorch_DDI.ipynb              |  581 -------
 req.txt                                       |    2 +-
 req_conda.txt                                 |  197 +++
 req_pip.txt                                   |   25 +
 setup.py                                      |   16 +
 13 files changed, 2669 insertions(+), 1163 deletions(-)
 create mode 100644 cluster/data/medinfmk/ddi/raw/drug_names.tsv
 create mode 100644 ddi/__init__.py
 create mode 100644 ddi/dataset.py
 create mode 100644 ddi/model.py
 create mode 100644 ddi/run_workflow.py
 create mode 100644 ddi/utilities.py
 delete mode 100644 notebooks/.ipynb_checkpoints/02_AA_Skorch_DDI-checkpoint.ipynb
 delete mode 100644 notebooks/02_AA_Skorch_DDI.ipynb
 create mode 100644 req_conda.txt
 create mode 100644 req_pip.txt
 create mode 100644 setup.py

diff --git a/.gitignore b/.gitignore
index 4f5f323..913d836 100644
--- a/.gitignore
+++ b/.gitignore
@@ -1,2 +1,54 @@
 *.p
 *.pickle
+
+.DS_Store
+
+# Byte-compiled / optimized / DLL files
+__pycache__/
+*.py[cod]
+*$py.class
+
+# C extensions
+*.so
+
+# Distribution / packaging
+.Python
+build/
+develop-eggs/
+dist/
+downloads/
+eggs/
+.eggs/
+lib/
+lib64/
+parts/
+sdist/
+var/
+wheels/
+*.egg-info/
+.installed.cfg
+*.egg
+MANIFEST
+
+# Jupyter Notebook
+.ipynb_checkpoints
+
+# pyenv
+.python-version
+
+# Environments
+.env
+.venv
+env/
+venv/
+ENV/
+env.bak/
+venv.bak/
+
+# Vs code
+.vscode
+
+# orisenbazuru
+explore.py
+notebooks/orisenbazuru/*
+cluster/data/medinfmk/ddi/processed/*
\ No newline at end of file
diff --git a/cluster/data/medinfmk/ddi/raw/drug_names.tsv b/cluster/data/medinfmk/ddi/raw/drug_names.tsv
new file mode 100644
index 0000000..9f3cca2
--- /dev/null
+++ b/cluster/data/medinfmk/ddi/raw/drug_names.tsv
@@ -0,0 +1,1430 @@
+CID100000085	carnitine
+CID100000119	gamma-aminobutyric
+CID100000137	5-aminolevulinic
+CID100000143	leucovorin
+CID100000146	5-methyltetrahydrofolate
+CID100000158	PGE2
+CID100000159	prostacyclin
+CID100000160	prostaglandin
+CID100000175	acetate
+CID100000187	acetylcholine
+CID100000191	adenosine
+CID100000206	glucose
+CID100000214	PGE1
+CID100000222	ammonia
+CID100000232	arginine
+CID100000244	benzyl
+CID100000247	betaine
+CID100000271	calcium
+CID100000297	graphene
+CID100000298	chloramphenicol
+CID100000303	bile
+CID100000305	choline
+CID100000311	citric
+CID100000312	chloride
+CID100000338	salicylate
+CID100000401	D-cycloserine
+CID100000444	bupropion
+CID100000450	estradiol
+CID100000453	mannitol
+CID100000564	EACA
+CID100000581	N-acetylcysteine
+CID100000596	cytarabine
+CID100000598	mesna
+CID100000612	lactate
+CID100000679	DMSO
+CID100000681	dopamine
+CID100000698	estrone
+CID100000699	Oestrogen
+CID100000700	monoethanolamine
+CID100000727	lindane
+CID100000738	glutamine
+CID100000750	glycine
+CID100000753	glycerol
+CID100000767	bicarbonate
+CID100000772	LMWH
+CID100000774	histamine
+CID100000785	quinol
+CID100000813	potassium
+CID100000815	kanamycin
+CID100000838	epinephrine
+CID100000853	thyroxine
+CID100000861	triiodothyronine
+CID100000888	magnesium
+CID100000896	melatonin
+CID100000923	sodium
+CID100000937	niacin
+CID100000942	nicotine
+CID100000946	nitrite
+CID100000948	nitrous
+CID100000951	norepinephrine
+CID100000961	hydroxyl
+CID100001003	phosphate
+CID100001046	pyrazinamide
+CID100001054	pyridoxine
+CID100001065	quinidine
+CID100001071	retinol
+CID100001084	thiosulfate
+CID100001125	tetrahydrobiopterin
+CID100001130	thiamine
+CID100001134	thymidine
+CID100001206	methamphetamine
+CID100001301	naproxen
+CID100001546	cladribine
+CID100001690	doxorubicin
+CID100001727	4-AP
+CID100001775	phenytoin
+CID100001798	methazolamide
+CID100001805	5-azacytidine
+CID100001875	methylprednisolone
+CID100001971	abacavir
+CID100001972	amphotericin
+CID100001978	acebutolol
+CID100001983	acetaminophen
+CID100001986	acetazolamide
+CID100001990	acetohydroxamic
+CID100001993	methacholine
+CID100002019	actinomycin
+CID100002021	spectinomycin
+CID100002022	acyclovir
+CID100002082	albendazole
+CID100002083	salbutamol
+CID100002085	Almeta
+CID100002088	alendronate
+CID100002092	alfuzosin
+CID100002094	allopurinol
+CID100002099	alosetron
+CID100002118	alprazolam
+CID100002123	hexamethylmelamine
+CID100002130	amantadine
+CID100002133	amcinonide
+CID100002140	diatrizoate
+CID100002141	amifostine
+CID100002142	amikacin
+CID100002145	aminoglutethimide
+CID100002148	p-aminohippurate
+CID100002153	theophylline
+CID100002156	amiodarone
+CID100002159	amisulpride
+CID100002160	amitriptyline
+CID100002161	amlexanox
+CID100002162	amlodipine
+CID100002163	amobarbital
+CID100002168	amorolfine
+CID100002170	amoxapine
+CID100002171	amoxicillin
+CID100002173	ampicillin
+CID100002177	amprenavir
+CID100002179	amsacrine
+CID100002182	anagrelide
+CID100002187	anastrozole
+CID100002215	apomorphine
+CID100002216	apraclonidine
+CID100002232	argatroban
+CID100002244	aspirin
+CID100002249	atenolol
+CID100002250	atorvastatin
+CID100002265	azathioprine
+CID100002266	azelaic
+CID100002267	azelastine
+CID100002269	azithromycin
+CID100002274	aztreonam
+CID100002284	baclofen
+CID100002307	beclomethasone
+CID100002308	beclomethasone
+CID100002311	benazepril
+CID100002315	bendrofluazide
+CID100002337	benzocaine
+CID100002341	benzphetamine
+CID100002344	benztropine
+CID100002345	benzyl
+CID100002349	penicillin
+CID100002350	bepotastine
+CID100002351	bepridil
+CID100002366	betahistine
+CID100002367	dexamethasone
+CID100002369	betaxolol
+CID100002370	bethanechol
+CID100002375	bicalutamide
+CID100002391	bisacodyl
+CID100002405	bisoprolol
+CID100002431	bretylium
+CID100002435	brimonidine
+CID100002441	bromazepam
+CID100002442	bromhexine
+CID100002443	bromocriptine
+CID100002462	budesonide
+CID100002471	bumetanide
+CID100002474	bupivacaine
+CID100002476	buprenorphine
+CID100002477	buspirone
+CID100002478	busulfan
+CID100002479	butabarbital
+CID100002484	butenafine
+CID100002487	butorphanol
+CID100002512	cabergoline
+CID100002519	caffeine
+CID100002520	verapamil
+CID100002522	calcipotriol
+CID100002524	1,25(OH)2D3
+CID100002540	candesartan
+CID100002541	candesartan
+CID100002547	capreomycin
+CID100002548	capsaicin
+CID100002550	captopril
+CID100002551	carbachol
+CID100002554	carbamazepine
+CID100002559	carbenicillin
+CID100002563	carbidopa
+CID100002564	carbinoxamine
+CID100002576	carisoprodol
+CID100002578	BCNU
+CID100002583	carteolol
+CID100002585	carvedilol
+CID100002609	cefaclor
+CID100002610	cefadroxil
+CID100002617	cefazolin
+CID100002622	cefepime
+CID100002631	cefotaxime
+CID100002637	cefoxitin
+CID100002646	cefprozil
+CID100002650	ceftazidime
+CID100002654	cephem
+CID100002655	ceftizoxime
+CID100002656	ceftriaxone
+CID100002658	cefuroxime
+CID100002662	celecoxib
+CID100002663	celiprolol
+CID100002666	cephalexin
+CID100002675	cefixime
+CID100002676	cerivastatin
+CID100002678	cetirizine
+CID100002684	cevimeline
+CID100002708	chlorambucil
+CID100002712	chlordiazepoxide
+CID100002713	chlorhexidine
+CID100002719	chloroquine
+CID100002720	thiazide
+CID100002725	chlorpheniramine
+CID100002726	chlorpromazine
+CID100002727	chlorpropamide
+CID100002732	chlorthalidone
+CID100002733	chlorzoxazone
+CID100002749	ciclopirox
+CID100002751	cilazapril
+CID100002754	cilostazol
+CID100002756	cimetidine
+CID100002762	cinoxacin
+CID100002764	ciprofloxacin
+CID100002767	cisplatin
+CID100002771	citalopram
+CID100002781	clemastine
+CID100002786	clindamycin
+CID100002789	clobazam
+CID100002791	clobetasol
+CID100002792	clobetasone
+CID100002794	clofazimine
+CID100002800	clomiphene
+CID100002801	clomipramine
+CID100002802	clonazepam
+CID100002803	clonidine
+CID100002806	clopidogrel
+CID100002809	clorazepate
+CID100002812	clotrimazole
+CID100002818	clozapine
+CID100002826	cocaine
+CID100002828	codeine
+CID100002833	colchicine
+CID100002881	sodium
+CID100002883	crotamiton
+CID100002891	cyanocobalamin
+CID100002895	cyclobenzaprine
+CID100002905	cyclopentolate
+CID100002907	cyclophosphamide
+CID100002909	v
+CID100002913	cyproheptadine
+CID100002914	cyproterone
+CID100002949	danazol
+CID100002951	dantrolene
+CID100002955	dapsone
+CID100002958	anthracycline
+CID100002972	deferiprone
+CID100002973	deferoxamine
+CID100002978	delta
+CID100002995	desipramine
+CID100003000	desoximetasone
+CID100003003	dexamethasone
+CID100003007	amphetamine
+CID100003008	dextromethorphan
+CID100003009	DFMO
+CID100003015	cyproterone
+CID100003016	diazepam
+CID100003019	diazoxide
+CID100003032	diclofenac
+CID100003038	dichlorphenamide
+CID100003040	dicloxacillin
+CID100003042	dicyclomine
+CID100003043	didanosine
+CID100003053	diethylenetriaminepentaacetic
+CID100003056	diflorasone
+CID100003059	diflunisal
+CID100003060	epitopic
+CID100003062	digoxin
+CID100003063	dihydrocodeine
+CID100003066	dihydroergotamine
+CID100003075	diltiazem
+CID100003080	dimercaprol
+CID100003100	diphenhydramine
+CID100003105	dipivefrin
+CID100003108	dipyridamole
+CID100003114	disopyramide
+CID100003117	disulfiram
+CID100003121	valproate
+CID100003125	alpha-methyl-p-tyrosine
+CID100003143	docetaxel
+CID100003148	dolasetron
+CID100003151	domperidone
+CID100003152	donepezil
+CID100003154	dorzolamide
+CID100003155	dothiepin
+CID100003156	doxapram
+CID100003157	doxazosin
+CID100003158	doxepin
+CID100003168	droperidol
+CID100003171	L-threo-DOPS
+CID100003182	dyphylline
+CID100003198	econazole
+CID100003199	trabectedin
+CID100003202	edrophonium
+CID100003203	efavirenz
+CID100003209	eicosapentaenoic
+CID100003219	emedastine
+CID100003222	CAS
+CID100003226	enflurane
+CID100003241	epinastine
+CID100003249	vitamin
+CID100003250	ergonovine
+CID100003251	ergotamine
+CID100003255	erythromycin
+CID100003256	monomycin
+CID100003261	estazolam
+CID100003263	estradiol
+CID100003267	estradiol
+CID100003268	estramustine
+CID100003269	estriol
+CID100003278	ethacrynic
+CID100003279	ethambutol
+CID100003291	ethosuximide
+CID100003292	ethotoin
+CID100003305	etidronate
+CID100003308	etodolac
+CID100003310	etoposide
+CID100003324	famciclovir
+CID100003325	famotidine
+CID100003331	felbamate
+CID100003333	felodipine
+CID100003339	fenofibrate
+CID100003340	fenoldopam
+CID100003342	fenoprofen
+CID100003345	fentanyl
+CID100003348	fexofenadine
+CID100003350	finasteride
+CID100003354	flavoxate
+CID100003355	flecainide
+CID100003363	FdUrd
+CID100003364	flucloxacillin
+CID100003365	fluconazole
+CID100003366	5-fluorocytosine
+CID100003367	fludarabine
+CID100003368	FAMP
+CID100003370	fludrocortisone
+CID100003372	fluphenazine
+CID100003373	flumazenil
+CID100003375	flumethasone
+CID100003379	flunisolide
+CID100003380	flunitrazepam
+CID100003381	fluocinolone
+CID100003382	fluocinonide
+CID100003384	fluorometholone
+CID100003385	5-FU
+CID100003386	fluoxetine
+CID100003387	fluoxymesterone
+CID100003388	fluphenazine
+CID100003392	flurandrenolone
+CID100003393	flurazepam
+CID100003394	flurbiprofen
+CID100003397	flutamide
+CID100003399	fluticasone
+CID100003403	fluvastatin
+CID100003404	fluvoxamine
+CID100003405	folate
+CID100003406	4-methylpyrazole
+CID100003410	formoterol
+CID100003414	foscarnet
+CID100003417	fosfomycin
+CID100003419	fosinopril
+CID100003425	Romidepsin
+CID100003440	furosemide
+CID100003443	fusidic
+CID100003446	gabapentin
+CID100003449	galantamine
+CID100003454	ganciclovir
+CID100003461	gemcitabine
+CID100003462	gemeprost
+CID100003463	gemfibrozil
+CID100003467	gentamicin
+CID100003475	gliclazide
+CID100003476	glimepiride
+CID100003478	glipizide
+CID100003488	glibenclamide
+CID100003494	glycopyrrolate
+CID100003510	granisetron
+CID100003512	griseofulvin
+CID100003516	guaifenesin
+CID100003517	icodextrin
+CID100003519	guanfacine
+CID100003520	guanidinium
+CID100003553	halcinonide
+CID100003559	haloperidol
+CID100003598	hexachlorophene
+CID100003623	homatropine
+CID100003636	Buscopan
+CID100003637	hydralazine
+CID100003639	hydrochlorothiazide
+CID100003640	cortisol
+CID100003642	hydrocortisone
+CID100003647	hydroflumethiazide
+CID100003648	hydromorphone
+CID100003652	hydroxychloroquine
+CID100003657	hydroxyurea
+CID100003658	hydroxyzine
+CID100003661	atropine
+CID100003672	ibuprofen
+CID100003675	phenelzine
+CID100003676	lidocaine
+CID100003685	idarubicin
+CID100003687	IdUrd
+CID100003690	ifosfamide
+CID100003696	imipramine
+CID100003702	indapamide
+CID100003706	indinavir
+CID100003715	indomethacin
+CID100003724	iodixanol
+CID100003730	iohexol
+CID100003734	iopamidol
+CID100003736	iopromide
+CID100003737	sodium
+CID100003738	iotrolan
+CID100003739	iodipamide
+CID100003741	ioversol
+CID100003742	ioxaglate
+CID100003743	ioxilan
+CID100003746	ipratropium
+CID100003749	irbesartan
+CID100003750	irinotecan
+CID100003759	isocarboxazid
+CID100003763	isoflurane
+CID100003767	isoniazid
+CID100003777	isopropyl
+CID100003779	isoproterenol
+CID100003780	isosorbide
+CID100003783	isoxsuprine
+CID100003784	isradipine
+CID100003793	itraconazole
+CID100003821	ketamine
+CID100003823	ketoconazole
+CID100003825	ketoprofen
+CID100003826	ketorolac
+CID100003827	ketotifen
+CID100003830	cytokinin
+CID100003848	phenyllactate
+CID100003869	labetalol
+CID100003872	lactulose
+CID100003877	lamivudine
+CID100003878	lamotrigine
+CID100003883	lansoprazole
+CID100003890	latanoprost
+CID100003899	leflunomide
+CID100003902	letrozole
+CID100003911	Leuprorelin
+CID100003914	levobunolol
+CID100003915	levocabastine
+CID100003916	levomepromazine
+CID100003918	dextrorphan
+CID100003928	lincomycin
+CID100003929	linezolid
+CID100003937	Lisinopril
+CID100003938	lisuride
+CID100003939	LiOH
+CID100003948	lomefloxacin
+CID100003950	lomustine
+CID100003954	loperamide
+CID100003957	loratadine
+CID100003958	lorazepam
+CID100003961	losartan
+CID100003962	lovastatin
+CID100003964	loxapine
+CID100003998	mafenide
+CID100004004	malathion
+CID100004011	maprotiline
+CID100004030	mebendazole
+CID100004031	mebeverine
+CID100004032	mecamylamine
+CID100004033	nitrogen
+CID100004034	monamine
+CID100004036	meclofenamate
+CID100004042	medroxyprogesterone
+CID100004043	medrysone
+CID100004044	mefenamic
+CID100004046	mefloquine
+CID100004048	megestrol
+CID100004051	meloxicam
+CID100004053	melphalan
+CID100004054	memantine
+CID100004057	Cantril
+CID100004058	meperidine
+CID100004060	mephenytoin
+CID100004062	mepivacaine
+CID100004064	meprobamate
+CID100004075	5-ASA
+CID100004086	metaproterenol
+CID100004091	metformin
+CID100004095	methadone
+CID100004101	methenamine
+CID100004107	methocarbamol
+CID100004112	methotrexate
+CID100004114	8-MOP
+CID100004120	N-methylscopolamine
+CID100004121	methyclothiazide
+CID100004138	methyldopa
+CID100004139	methylene
+CID100004140	methylergometrine
+CID100004158	methylphenidate
+CID100004159	methylprednisolone
+CID100004160	methyltestosterone
+CID100004163	methysergide
+CID100004168	metoclopramide
+CID100004170	metolazone
+CID100004171	metoprolol
+CID100004173	metronidazole
+CID100004174	metyrapone
+CID100004178	mexiletine
+CID100004184	mianserin
+CID100004189	miconazole
+CID100004192	midazolam
+CID100004195	midodrine
+CID100004196	mifepristone
+CID100004197	milrinone
+CID100004201	minoxidil
+CID100004205	mirtazapine
+CID100004211	mitotane
+CID100004212	mitoxantrone
+CID100004235	moclobemide
+CID100004236	modafinil
+CID100004240	mometasone
+CID100004248	montelukast
+CID100004253	morphine
+CID100004259	moxifloxacin
+CID100004264	mupirocin
+CID100004271	mycophenolate
+CID100004272	mycophenolic
+CID100004274	tetrofosmin
+CID100004409	nabumetone
+CID100004411	nadolol
+CID100004419	nalbuphine
+CID100004421	nalidixic
+CID100004422	nalmefene
+CID100004425	naloxone
+CID100004428	naltrexone
+CID100004432	nandrolone
+CID100004436	naphazoline
+CID100004440	naratriptan
+CID100004443	nateglinide
+CID100004449	nefazodone
+CID100004450	nefopam
+CID100004451	nelfinavir
+CID100004454	neomycin
+CID100004456	neostigmine
+CID100004463	nevirapine
+CID100004473	nicardipine
+CID100004485	nifedipine
+CID100004493	nilutamide
+CID100004497	nimodipine
+CID100004499	nisoldipine
+CID100004506	nitrazepam
+CID100004509	nitrofurantoin
+CID100004510	nitroglycerin
+CID100004513	nizatidine
+CID100004536	norethisterone
+CID100004539	norfloxacin
+CID100004542	levonorgestrel
+CID100004543	nortriptyline
+CID100004547	repaglinide
+CID100004568	nystatin
+CID100004583	ofloxacin
+CID100004585	olanzapine
+CID100004594	omeprazole
+CID100004595	ondansetron
+CID100004599	orlistat
+CID100004601	orphenadrine
+CID100004603	oseltamivir
+CID100004607	oxacillin
+CID100004609	oxaliplatin
+CID100004614	oxaprozin
+CID100004616	oxazepam
+CID100004623	oxiconazole
+CID100004631	oxprenolol
+CID100004634	oxybutynin
+CID100004635	oxycodone
+CID100004638	oxymetholone
+CID100004639	oxymorphone
+CID100004649	APAs
+CID100004666	paclitaxel
+CID100004673	pamidronate
+CID100004675	pancuronium
+CID100004678	dexpanthenol
+CID100004679	pantoprazole
+CID100004680	papaverine
+CID100004689	paromomycin
+CID100004691	paroxetine
+CID100004695	isosulfan
+CID100004724	penbutolol
+CID100004725	penciclovir
+CID100004727	D-penicillamine
+CID100004730	penicillin
+CID100004735	pentamidine
+CID100004736	pentazocine
+CID100004737	pentobarbital
+CID100004739	pentostatin
+CID100004740	pentoxifylline
+CID100004745	pergolide
+CID100004746	perhexiline
+CID100004747	propericiazine
+CID100004748	perphenazine
+CID100004756	phenylazo
+CID100004763	phenobarbital
+CID100004768	phenoxybenzamine
+CID100004771	phentermine
+CID100004775	4-PBA
+CID100004782	phenylephrine
+CID100004786	phenylpropanolamine
+CID100004810	moxonidine
+CID100004811	physostigmine
+CID100004812	vitamin
+CID100004819	pilocarpine
+CID100004828	pindolol
+CID100004829	pioglitazone
+CID100004834	piperacillin
+CID100004845	pirbuterol
+CID100004865	podophyllotoxin
+CID100004868	polymyxin
+CID100004870	polythiazide
+CID100004885	pramipexole
+CID100004889	pravastatin
+CID100004891	praziquantel
+CID100004893	prazosin
+CID100004894	prednisolone
+CID100004895	prednisolone
+CID100004896	prednisolone
+CID100004900	prednisone
+CID100004906	prilocaine
+CID100004908	primaquine
+CID100004909	primidone
+CID100004911	probenecid
+CID100004913	procainamide
+CID100004914	procaine
+CID100004915	procarbazine
+CID100004917	prochlorperazine
+CID100004920	progesterone
+CID100004923	proguanil
+CID100004927	promethazine
+CID100004932	propafenone
+CID100004934	propantheline
+CID100004935	proparacaine
+CID100004943	propofol
+CID100004946	propranolol
+CID100004976	protriptyline
+CID100004989	pyrantel
+CID100004991	pyridostigmine
+CID100004992	mepyramine
+CID100004993	pyrimethamine
+CID100004999	quazepam
+CID100005002	quetiapine
+CID100005005	quinapril
+CID100005029	rabeprazole
+CID100005032	ephedrine
+CID100005035	raloxifene
+CID100005038	ramipril
+CID100005039	ranitidine
+CID100005040	rapamycin
+CID100005051	rescinnamine
+CID100005052	reserpine
+CID100005064	ribavirin
+CID100005070	riluzole
+CID100005071	rimantadine
+CID100005073	risperidone
+CID100005076	ritonavir
+CID100005077	rivastigmine
+CID100005078	rizatriptan
+CID100005090	rofecoxib
+CID100005095	ropinirole
+CID100005106	roxithromycin
+CID100005152	salmeterol
+CID100005155	stavudine
+CID100005161	salsalate
+CID100005184	scopolamine
+CID100005193	secobarbital
+CID100005195	deprenyl
+CID100005203	sertraline
+CID100005206	sevoflurane
+CID100005210	sibutramine
+CID100005212	sildenafil
+CID100005214	silver
+CID100005215	sulfadiazine
+CID100005238	sodium
+CID100005245	risedronate
+CID100005248	sodium
+CID100005253	sotalol
+CID100005257	sparfloxacin
+CID100005267	spironolactone
+CID100005291	imatinib
+CID100005297	streptomycin
+CID100005300	streptozotocin
+CID100005311	vorinostat
+CID100005314	succinylcholine
+CID100005318	sulconazole
+CID100005320	sulfacetamide
+CID100005329	sulfamethoxazole
+CID100005333	sulfonamide
+CID100005352	sulindac
+CID100005358	sumatriptan
+CID100005359	suprofen
+CID100005372	tacrolimus
+CID100005376	tamoxifen
+CID100005379	gatifloxacin
+CID100005381	tazarotene
+CID100005391	temazepam
+CID100005394	temozolomide
+CID100005396	teniposide
+CID100005401	terazosin
+CID100005402	terbinafine
+CID100005403	terbutaline
+CID100005404	terconazole
+CID100005407	testolactone
+CID100005408	testosterone
+CID100005409	testosterone
+CID100005410	testosterone
+CID100005411	tetracaine
+CID100005419	tetrahydrozoline
+CID100005426	thalidomide
+CID100005430	thiabendazole
+CID100005452	thioridazine
+CID100005453	thiotepa
+CID100005454	thiothixene
+CID100005466	tiagabine
+CID100005468	tiaprofenic
+CID100005470	tibolone
+CID100005472	ticlopidine
+CID100005478	timolol
+CID100005479	tinidazole
+CID100005483	tiopronin
+CID100005486	tirofiban
+CID100005487	tizanidine
+CID100005496	tobramycin
+CID100005503	tolazamide
+CID100005505	tolbutamide
+CID100005508	tolmetin
+CID100005512	tolterodine
+CID100005514	topiramate
+CID100005515	topotecan
+CID100005516	toremifene
+CID100005523	tramadol
+CID100005524	tramazoline
+CID100005525	trandolapril
+CID100005526	tranexamic
+CID100005530	tranylcypromine
+CID100005533	trazodone
+CID100005538	retinoic
+CID100005544	triamcinolone
+CID100005546	triamterene
+CID100005556	triazolam
+CID100005564	triclosan
+CID100005565	trien
+CID100005566	trifluoperazine
+CID100005572	trihexyphenidyl
+CID100005576	trimethadione
+CID100005577	trimethobenzamide
+CID100005578	trimethoprim
+CID100005582	trimetrexate
+CID100005584	trimipramine
+CID100005591	troglitazone
+CID100005593	tropicamide
+CID100005595	tropisetron
+CID100005596	trospium
+CID100005625	delavirdine
+CID100005636	unoprostone
+CID100005645	UDCA
+CID100005647	VACV
+CID100005650	valsartan
+CID100005651	Vancocine
+CID100005656	venlafaxine
+CID100005665	vigabatrin
+CID100005672	vinorelbine
+CID100005717	zafirlukast
+CID100005718	zalcitabine
+CID100005719	zaleplon
+CID100005721	zanamivir
+CID100005726	zidovudine
+CID100005727	ZnCl2
+CID100005731	zolmitriptan
+CID100005732	zolpidem
+CID100005734	zonisamide
+CID100005735	zopiclone
+CID100005746	mitomycin
+CID100005771	oxytocin
+CID100005775	phentolamine
+CID100005877	demethyl
+CID100005878	oxandrolone
+CID100005939	biguanide
+CID100005978	vincristine
+CID100006018	tetrabenazine
+CID100006049	EDTA
+CID100006058	cysteamine
+CID100006116	calcium
+CID100006238	17-hydroxyprogesterone
+CID100006256	trifluorothymidine
+CID100006432	Optison
+CID100006436	triamcinolone
+CID100006451	bromcresol
+CID100006468	phencyclidine
+CID100006476	methsuximide
+CID100006503	tris
+CID100006726	cyclizine
+CID100007012	phenylbutyric
+CID100007029	diethylpropion
+CID100007187	benzoyl
+CID100007638	monobenzone
+CID100007699	benzonatate
+CID100008197	tetraen
+CID100008230	argenine
+CID100008612	chloroprocaine
+CID100008982	nafcillin
+CID100009034	methohexital
+CID100009354	dimercaptosuccinic
+CID100009433	aminophylline
+CID100009904	nandrolone
+CID100010100	propoxyphene
+CID100010340	sodium
+CID100010413	hydroxybutyrate
+CID100010547	echothiophate
+CID100010631	medroxyprogesterone
+CID100010660	dimenhydrinate
+CID100011125	lithium
+CID100011973	Brolene
+CID100012453	zuclopenthixol
+CID100012460	phendimetrazine
+CID100012536	desonide
+CID100012555	benzydamine
+CID100012559	erythromycin
+CID100012597	isomannide
+CID100012620	Tadenan
+CID100013314	lormetazepam
+CID100013342	vinblastine
+CID100014789	ferumoxytol
+CID100014888	arsenic
+CID100014917	fluoride
+CID100015232	benzathine
+CID100015459	metaxalone
+CID100016124	methylene
+CID100016230	amiloride
+CID100016362	pimozide
+CID100016533	betamethasone
+CID100016850	fluorescein
+CID100016886	5-aza-2'-deoxycytidine
+CID100017358	SonoVue
+CID100018140	estramustine
+CID100019090	megestrol
+CID100020585	choline
+CID100020969	clocortolone
+CID100021800	betamethasone
+CID100021945	methenamine
+CID100022258	hydrocodone
+CID100022318	gold
+CID100022502	procaine
+CID100023703	ogen
+CID100023897	molindone
+CID100023925	Fe(III
+CID100023926	lanthanum
+CID100023951	samarium
+CID100023954	silver
+CID100023957	technetium-99m
+CID100023976	chromium
+CID100023978	copper
+CID100023982	gadolinium
+CID100023993	yttrium
+CID100023994	zinc
+CID100024087	selenium
+CID100024414	barium
+CID100024424	zinc
+CID100024450	potassium
+CID100024642	TlCl
+CID100024706	Triphasil
+CID100024748	glucose
+CID100024841	iodide
+CID100025419	clodronate
+CID100025959	Prussian
+CID100027304	HMDP
+CID100027400	pizotifen
+CID100027661	isosorbide-5-mononitrate
+CID100027991	desmopressin
+CID100027993	conjugated
+CID100028332	propoxyphene
+CID100028486	lithium
+CID100029089	chlorhexidine
+CID100030623	dexrazoxane
+CID100031072	carbimazole
+CID100031264	paraldehyde
+CID100031378	fludrocortisone
+CID100031477	metipranolol
+CID100032169	articaine
+CID100032281	Protirelin
+CID100032603	clindamycin
+CID100032797	clobetasol
+CID100032800	Asp-Tyr(SO3H)-Met-Gly-Trp-Met-Asp-Phe-NH2
+CID100034312	oxcarbazepine
+CID100036339	etomidate
+CID100036523	gonadorelin
+CID100036811	dobutamine
+CID100037392	halofantrine
+CID100037720	pentosan
+CID100038904	carboplatin
+CID100039042	bezafibrate
+CID100039507	rimexolone
+CID100039524	stiripentol
+CID100039764	vecuronium
+CID100039860	nabilone
+CID100040159	permethrin
+CID100040632	pirfenidone
+CID100040703	pinaverium
+CID100040973	desogestrel
+CID100040976	Implanon
+CID100041317	acitretin
+CID100041684	nitazoxanide
+CID100041693	sufentanil
+CID100041744	valrubicin
+CID100041774	acarbose
+CID100041781	torasemide
+CID100042113	desflurane
+CID100042395	Westcort
+CID100042615	Supremon
+CID100042955	misoprostol
+CID100044563	lodoxamide
+CID100044564	lodoxamide
+CID100045469	sodium
+CID100047319	atracurium
+CID100047320	cisatracurium
+CID100047419	cefuroxime
+CID100047471	butoconazole
+CID100047528	nicorandil
+CID100047640	naftifine
+CID100047725	Goserelin
+CID100050294	nedocromil
+CID100050614	cefotetan
+CID100051263	alfentanil
+CID100051577	miglitol
+CID100051634	miglustat
+CID100052421	prednicarbate
+CID100054158	mebrofenin
+CID100054313	iloprost
+CID100054331	fosfomycin
+CID100054373	Octreotide
+CID100054454	simvastatin
+CID100054547	cefpodoxime
+CID100054688	clarithromycin
+CID100054786	treprostinil
+CID100054808	loteprednol
+CID100054840	atomoxetine
+CID100054949	NSC
+CID100055331	moexiprilat
+CID100055466	Gd-DTPA
+CID100055480	milnacipran
+CID100056338	fosphenytoin
+CID100056959	ranolazine
+CID100057166	Photofrin
+CID100057469	imiquimod
+CID100057537	rotigotine
+CID100057697	pemirolast
+CID100059708	levetiracetam
+CID100059768	esmolol
+CID100060146	tamsulosin
+CID100060164	adapalene
+CID100060172	adefovir
+CID100060183	perindopril
+CID100060184	perindopril
+CID100060198	exemestane
+CID100060490	zileuton
+CID100060496	amlodipine
+CID100060612	dexmedetomidine
+CID100060613	cidofovir
+CID100060668	etoposide
+CID100060695	rocuronium
+CID100060706	meropenem
+CID100060714	gadoteridol
+CID100060726	bromfenac
+CID100060751	mTHPC
+CID100060752	ibutilide
+CID100060754	gadodiamide
+CID100060787	saquinavir
+CID100060795	aripiprazole
+CID100060814	remifentanil
+CID100060830	tiotropium
+CID100060834	duloxetine
+CID100060843	pemetrexed
+CID100060852	ibandronate
+CID100060853	ziprasidone
+CID100060860	metaiodobenzylguanidine
+CID100060864	olopatadine
+CID100060867	levosimendan
+CID100060871	adefovir
+CID100060877	emtricitabine
+CID100060878	eprosartan
+CID100060936	tiludronate
+CID100060953	capecitabine
+CID100061475	sodium
+CID100061799	monomethylfumarate
+CID100062305	doxycycline
+CID100062358	ammonium
+CID100062816	colestipol
+CID100062819	Metrodin
+CID100062924	fluticasone
+CID100062956	fosinoprilat
+CID100062959	trovafloxacin
+CID100062965	beclomethasone
+CID100063001	benazeprilat
+CID100064147	valganciclovir
+CID100064778	Meropenem
+CID100064929	fenofibric
+CID100064987	tenofovir
+CID100065014	AMD3100
+CID100065157	testosterone
+CID100065281	CAS
+CID100065370	Glat
+CID100065628	bendamustine
+CID100065840	colestimide
+CID100065856	reboxetine
+CID100065863	sertaconazole
+CID100065866	lercanidipine
+CID100065999	telmisartan
+CID100068613	atosiban
+CID100068740	zoledronic
+CID100068844	brinzolamide
+CID100069512	malvidin
+CID100071158	acamprosate
+CID100071273	ropivacaine
+CID100071301	nebivolol
+CID100071316	mivacurium
+CID100071329	dofetilide
+CID100071348	lanreotide
+CID100071360	iloperidone
+CID100071362	Hoe
+CID100071406	TPGS
+CID100071436	Lutalyse
+CID100071469	erythromycin
+CID100071616	voriconazole
+CID100072022	perindoprilat
+CID100072054	darifenacin
+CID100072081	Terlipressin
+CID100072111	rifaximin
+CID100072466	bleomycin
+CID100072938	gemifloxacin
+CID100073303	doripenem
+CID100073658	telithromycin
+CID100074989	atovaquone
+CID100077992	frovatriptan
+CID100077993	eletriptan
+CID100077996	paricalcitol
+CID100077997	MK-462
+CID100077998	rosiglitazone
+CID100078032	Vallergan
+CID100082146	bexarotene
+CID100082148	agomelatine
+CID100083030	Ge-132
+CID100083513	barium
+CID100083606	strontium
+CID100084003	ketorolac
+CID100091488	fluocinolone
+CID100093860	bortezomib
+CID100096312	nelarabine
+CID100102258	rasburicase
+CID100102399	dextran
+CID100104741	ICI
+CID100104758	raltitrexed
+CID100104778	levodopa/carbidopa
+CID100104799	fotemustine
+CID100104849	rimonabant
+CID100104865	bosentan
+CID100105145	68Ga
+CID100107694	Diane-35
+CID100107969	FTY720
+CID100107994	quinaprilat
+CID100110634	vardenafil
+CID100110635	tadalafil
+CID100114709	eslicarbazepine
+CID100115237	paliperidone
+CID100115355	NTBC
+CID100119182	clofarabine
+CID100119212	benzylpenicilloyl
+CID100119607	valdecoxib
+CID100119828	parecoxib
+CID100119830	tenofovir
+CID100121396	N-carbamylglutamate
+CID100121749	ustekinumab
+CID100121892	retigabine
+CID100122197	FP-CIT
+CID100122316	rasagiline
+CID100123015	SU5416
+CID100123597	travoprost
+CID100123606	almotriptan
+CID100123610	Eptifibatide
+CID100123611	fondaparinux
+CID100123619	etoricoxib
+CID100123620	mometasone
+CID100123623	Cancidas
+CID100123631	gefitinib
+CID100123634	Trisequens
+CID100123809	Madopar
+CID100124087	desloratadine
+CID100125017	desvenlafaxine
+CID100125889	pregabalin
+CID100127909	bimatoprost
+CID100128549	trandolaprilat
+CID100129228	rufinamide
+CID100129806	rosuvastatin
+CID100130564	esomeprazole
+CID100130881	olmesartan
+CID100131535	fosamprenavir
+CID100132804	pitavastatin
+CID100132971	abiraterone
+CID100132999	ivabradine
+CID100134018	febuxostat
+CID100134019	pixantrone
+CID100134780	pomalidomide
+CID100135113	sacrosidase
+CID100145068	nitric
+CID100147740	drospirenone
+CID100147912	posaconazole
+CID100148121	pralatrexate
+CID100148127	ospemifene
+CID100148191	temsirolimus
+CID100148192	atazanavir
+CID100148211	palonosetron
+CID100150310	eplerenone
+CID100150311	ezetimibe
+CID100150610	ertapenem
+CID100151075	nepafenac
+CID100151165	aprepitant
+CID100151171	conivaptan
+CID100152945	dutasteride
+CID100153941	entecavir
+CID100153994	clevidipine
+CID100154058	solifenacin
+CID100154256	bazedoxifene
+CID100156326	asenapine
+CID100156418	cinacalcet
+CID100157429	gadopentetate
+CID100157688	vinflunine
+CID100157920	lubiprostone
+CID100157921	methyl
+CID100158781	olmesartan
+CID100158786	pegaptanib
+CID100159247	sevelamer
+CID100160036	MnDPDP
+CID100160051	colesevelam
+CID100160352	zidovudine/lamivudine
+CID100163296	tipranavir
+CID100166548	Anidulafungin
+CID100168625	poly(styrene
+CID100168924	lanthanum
+CID100170361	varenicline
+CID100171558	Kaluril
+CID100176870	erlotinib
+CID100179344	eslicarbazepine
+CID100185457	S-benzoylmercaptoacetyltriglycine
+CID100192155	Prednefrin
+CID100193962	etravirine
+CID100197281	gadobenate
+CID100197712	ambrisentan
+CID100206527	L-Dmp
+CID100208898	dronedarone
+CID100208902	ramelteon
+CID100208908	lapatinib
+CID100208920	NuvaRing
+CID100213023	dabigatran
+CID100213039	darunavir
+CID100213046	lurasidone
+CID100214339	luliconazole
+CID100216209	aliskiren
+CID100216210	dabigatran
+CID100216235	sitaxsentan
+CID100216237	tolvaptan
+CID100216239	sorafenib
+CID100216258	Colimycin
+CID100216326	lenalidomide
+CID100216416	lasofoxifene
+CID100219024	regadenoson
+CID100219078	lacosamide
+CID100219084	Gd-EOB-DTPA
+CID100219090	fosaprepitant
+CID100222786	cortisone
+CID100315411	18F-FDG
+CID100358641	trimethoprim-sulfamethoxazole
+CID100441332	carboprost
+CID100441382	natamycin
+CID100443379	CAS
+CID100444006	cefditoren
+CID100444013	gadoversetamide
+CID100444033	ciclesonide
+CID100449193	roflumilast
+CID100455658	hemin
+CID100477468	FK463
+CID100483407	maraviroc
+CID100489129	efinaconazole
+CID100517045	sodium
+CID100644241	nilotinib
+CID100656628	silodosin
+CID100656892	TMC207
+CID100657298	propylthiouracil
+CID100667490	6-mercaptopurine
+CID101349907	methimazole
+CID102723601	6-thioguanine
+CID102761171	ethionamide
+CID103006171	rilpivirine
+CID103010818	d-telaprevir
+CID103038497	fospropofol
+CID103052762	prucalopride
+CID103055172	pimecrolimus
+CID103062316	dasatinib
+CID103080904	Estrofem
+CID103081276	fluticasone/salmeterol
+CID103081361	vandetanib
+CID103081362	telavancin
+CID103081884	copolymer
+CID103085017	sevelamer
+CID103086257	Photofrin
+CID103086685	axitinib
+CID103086686	sunitinib
+CID103325225	ALX
+CID104369359	sitagliptin
+CID104474778	2-hydroxysuccinaldehyde
+CID104479094	doxercalciferol
+CID104479097	hydroxocobalamin
+CID104517618	sodium
+CID104630253	alclometasone
+CID104659568	entacapone
+CID104659569	tolcapone
+CID105001396	warfarin
+CID105251896	vildagliptin
+CID105273759	abacavir-lamivudine
+CID105277135	elvitegravir
+CID105281007	dacarbazine
+CID105310993	acipimox
+CID105311167	halobetasol
+CID105327147	polyoxyethylene
+CID105328940	bosutinib
+CID105353894	pralidoxime
+CID105353980	sulfasalazine
+CID105359596	arsenic
+CID105360237	Paroven
+CID105361912	rifabutin
+CID105361917	methylnaltrexone
+CID105362070	balsalazide
+CID105362420	verteporfin
+CID105381226	rifampicin
+CID105462337	olsalazine
+CID105464096	ramiprilat
+CID105479141	CGP
+CID105487068	Mersyndol
+CID105487301	tegaserod
+CID105488383	ofatumumab
+CID105488547	alvimopan
+CID105493381	deferasirox
+CID106102852	gadobutrol
+CID106323497	rifapentine
+CID106326970	selenium
+CID106328144	radium
+CID106328526	AC1O3HA7
+CID106331630	eribulin
+CID106333887	auranofin
+CID106398970	cefdinir
+CID106433082	hexaminolevulinate
+CID106433091	tapentadol
+CID106433101	tafluprost
+CID106433117	indacaterol
+CID106433119	rivaroxaban
+CID106435110	ivermectin
+CID106437075	Timentin
+CID106440191	fidaxomicin
+CID106442177	everolimus
+CID106445540	ixabepilone
+CID106452749	Locorten
+CID106453361	Sativex
+CID106477186	delamanid
+CID106850789	iron
+CID106850791	leuprorelin
+CID106918182	strontium
+CID106918313	vilazodone
+CID106918366	fosaprepitant
+CID106918430	ceftobiprole
+CID106918456	prasugrel
+CID106918462	retapamulin
+CID106918558	fesoterodine
+CID106918584	sugammadex
+CID106918638	belinostat
+CID106918670	PEP005
+CID109794842	tasimelteon
+CID109800339	olodaterol
+CID109810131	ceftaroline
+CID109811221	cabazitaxel
+CID109812414	dalteparin
+CID109815559	trans
+CID109818231	tofacitinib
+CID109825285	azilsartan
+CID109831414	Lovaza
+CID109831761	irbesartan-hydrochlorothiazide
+CID109831783	Stalevo
+CID109846180	eltrombopag
+CID109853053	lomitapide
+CID109854489	fluticasone
+CID109865528	mirabegron
+CID109869929	avanafil
+CID109871419	ticagrelor
+CID109887712	dapagliflozin
+CID109898619	Depreotide
+CID109912092	apremilast
+CID109924495	perampanel
+CID109930048	vernakalant
+CID109940864	Benicar-HCT
+CID109941444	SOM230
+CID109966051	Lu
+CID110028615	vorapaxar
+CID110096344	linagliptin
+CID110107393	18F-flutemetamol
+CID110113978	pazopanib
+CID110163178	afatinib
+CID110178705	besifloxacin
+CID110182969	apixaban
+CID110324367	boceprevir
+CID110465263	lorcaserin
+CID110482134	glycerol
+CID111001318	tafamidis
+CID111020241	ascorbate
+CID111163584	alogliptin
+CID111167602	regorafenib
+CID111228026	aclidinium
+CID111234049	TR-700
+CID111235728	Saxagliptin
+CID111238823	azilsartan
+CID111254352	sodium
+CID111304743	riociguat
+CID111499245	AN2690
+CID111501341	florbetaben
+CID111505907	Zyprexa
+CID111519069	umeclidinium
+CID111531537	eliglustat
+CID111556711	Carfilzomib
+CID111593706	sevelamer
+CID111597571	crizotinib
+CID111597697	lisdexamfetamine
+CID111672461	gadofosveset
+CID111707110	trametinib
+CID111722286	spinosad
+CID111947681	nitroprusside
+CID111949646	empagliflozin
+CID111979316	vasopressin
+CID113559279	ulipristal
+CID115951529	MDV3100
+CID116004692	macitentan
+CID116065945	TMC435
+CID116126651	PCI-32765
+CID116129616	salmon
+CID116129617	cosyntropin
+CID116129629	LY146032
+CID116129632	Nuvocid
+CID116129665	Vitrum
+CID116129672	Insulin
+CID116129682	Forteo
+CID116129690	ziconotide
+CID116129701	T-A2-3
+CID116129703	Revasc
+CID116129704	Bivalirudin
+CID116130199	Enfuvirtide
+CID116130295	BPTI
+CID116130957	Org
+CID116131215	Abarelix
+CID116131310	hepatitis
+CID116132265	ACTH(1-39
+CID116132283	glucagon
+CID116132344	oCRH
+CID116132418	NovoLog
+CID116132438	Humalog
+CID116132441	Refludan
+CID116132446	Symlin
+CID116134956	liraglutide
+CID116136245	degarelix
+CID116137271	insulin
+CID116139342	Lyxumia
+CID116139605	teduglutide
+CID116156130	exenatide
+CID116158207	linaclotide
+CID116158473	o291
+CID116213095	pentastarch
+CID116220172	ivacaftor
+CID117754772	ruxolitinib
+CID119371515	alcaftadine
+CID122834577	nesiritide
+CID123668479	raltegravir
+CID123689036	pertechnetate
+CID123690938	piroxicam
+CID124762228	Azarga
+CID124776445	vismodegib
+CID124812758	canagliflozin
+CID124822371	florbetapir
+CID124826799	ponatinib
+CID124838347	glucagon
+CID124846132	hetastarch
+CID124950485	cobicistat
+CID124965990	suvorexant
+CID125074470	Triptorelin
+CID125074886	cetrorelix
+CID125077405	Nafarelin
+CID125077993	Histrelin
+CID125094462	sofosbuvir
+CID125102847	cabozantinib
+CID125880656	doxycycline
+CID125880664	tetracycline
+CID126275995	minocycline
+CID140468184	acenocoumarol
+CID142611257	vemurafenib
+CID144134877	oxytetracycline
+CID144146714	Lantus
+CID144201342	tesamorelin
+CID144201343	ecallantide
+CID144462760	dabrafenib
+CID144564107	mipomersen
+CID144564722	Promacta
+CID144567678	dalbavancin
+CID146181941	Signifor
+CID146216142	dolutegravir
+CID151508717	tenoxicam
+CID151601240	demeclocycline
+CID153477714	heparin
+CID153627505	hydroxypropyl
+CID154677977	A77
+CID154681041	tigecycline
+CID154682541	doxycycline
+CID154687131	lymecycline
+CID156603655	pegaptanib
+CID156842239	n-3
+CID170683024	x
+CID170695640	colestyramine
+CID171306834	K779
diff --git a/ddi/__init__.py b/ddi/__init__.py
new file mode 100644
index 0000000..e69de29
diff --git a/ddi/dataset.py b/ddi/dataset.py
new file mode 100644
index 0000000..561b30e
--- /dev/null
+++ b/ddi/dataset.py
@@ -0,0 +1,213 @@
+import os
+import numpy as np
+import torch
+from .utilities import ModelScore, ReaderWriter
+from torch.utils.data import Dataset, DataLoader
+from sklearn.model_selection import StratifiedKFold, StratifiedShuffleSplit
+from sklearn.utils.class_weight import compute_class_weight
+
+
+class DDIDataTensor(Dataset):
+
+    def __init__(self, X_feat, y):
+        self.X_feat = X_feat  # tensor.float32, (drug pairs, features)
+        # drug interactions
+        self.y = y  # tensor.float32, (drug pairs,)
+        self.num_samples = self.y.size(0)  # int, number of drug pairs
+
+    def __getitem__(self, indx):
+
+        return(self.X_feat[indx], self.y[indx], indx)
+
+    def __len__(self):
+        return(self.num_samples)
+
+
+class PartitionDataTensor(Dataset):
+
+    def __init__(self, ddi_datatensor, partition_ids, dsettype, fold_num):
+        self.ddi_datatensor = ddi_datatensor  # instance of :class:`DDIDataTensor`
+        self.partition_ids = partition_ids  # list of indices for drug pairs
+        self.dsettype = dsettype  # string, dataset type (i.e. train, validation, test)
+        self.fold_num = fold_num  # int, fold number
+        self.num_samples = len(self.partition_ids)  # int, number of docs in the partition
+
+    def __getitem__(self, indx):
+        target_id = self.partition_ids[indx]
+        return self.ddi_datatensor[target_id]
+
+    def __len__(self):
+        return(self.num_samples)
+
+
+def construct_load_dataloaders(dataset_fold, dsettypes, config, wrk_dir):
+    """construct dataloaders for the dataset for one fold
+
+       Args:
+            dataset_fold: dictionary,
+                          example: {'train': <neural.dataset.PartitionDataTensor at 0x1cec95c96a0>,
+                                    'validation': <neural.dataset.PartitionDataTensor at 0x1cec95c9208>,
+                                    'test': <neural.dataset.PartitionDataTensor at 0x1cec95c9240>,
+                                    'class_weights': tensor([0.6957, 1.7778])
+                                   }
+            dsettype: list, ['train', 'validation', 'test']
+            config: dict, {'batch_size': int, 'num_workers': int}
+            wrk_dir: string, folder path
+    """
+
+    # setup data loaders
+    data_loaders = {}
+    epoch_loss_avgbatch = {}
+    epoch_loss_avgsamples = {}
+    flog_out = {}
+    score_dict = {}
+    class_weights = {}
+    for dsettype in dsettypes:
+        if(dsettype == 'train'):
+            shuffle = True
+            class_weights[dsettype] = dataset_fold['class_weights']
+        else:
+            shuffle = False
+            class_weights[dsettype] = None
+        data_loaders[dsettype] = DataLoader(dataset_fold[dsettype],
+                                            batch_size=config['batch_size'],
+                                            shuffle=shuffle,
+                                            num_workers=config['num_workers'])
+
+        epoch_loss_avgbatch[dsettype] = []
+        epoch_loss_avgsamples[dsettype] = []
+        score_dict[dsettype] = ModelScore(0, 0.0, 0.0, 0.0, 0.0, 0.0)  # (best_epoch, auc, aupr, f1, precision, recall)
+        if(wrk_dir):
+            flog_out[dsettype] = os.path.join(wrk_dir, dsettype + ".log")
+        else:
+            flog_out[dsettype] = None
+
+    return (data_loaders, epoch_loss_avgbatch, epoch_loss_avgsamples, score_dict, class_weights, flog_out)
+
+def preprocess_features(feat_fpath):
+    X_fea = np.loadtxt(feat_fpath,dtype=float,delimiter=",")
+    r, c = np.triu_indices(len(X_fea),1) # take indices off the diagnoal by 1
+    return np.concatenate((X_fea[r], X_fea[c]), axis=1)
+def preprocess_labels(interaction_fpath):
+    interaction_matrix = np.loadtxt(interaction_fpath,dtype=float,delimiter=",")
+    r, c = np.triu_indices(len(interaction_matrix),1) # take indices off the diagnoal by 1
+    return interaction_matrix[r,c]
+
+def get_stratified_partitions(ddi_datatensor, num_folds=5, valid_set_portion=0.1, random_state=42):
+    """Generate 5-fold stratified sample of drug-pair ids based on the interaction label
+
+    Args:
+        ddi_datatensor: instance of :class:`DDIDataTensor`
+    """
+    skf_trte = StratifiedKFold(n_splits=num_folds, random_state=random_state, shuffle=True)  # split train and test
+    data_partitions = {}
+    X = ddi_datatensor.X_feat
+    y = ddi_datatensor.y
+    fold_num = 0
+    for train_index, test_index in skf_trte.split(X,y):
+    
+        data_partitions[fold_num] = {'train': train_index,
+                                     'test': test_index}
+        print("fold_num:", fold_num)
+        print('train data')
+        report_label_distrib(y[train_index])
+        print('test data')
+        report_label_distrib(y[test_index])
+        print()
+        fold_num += 1
+        print("-"*25)
+    return(data_partitions)
+
+
+def report_label_distrib(labels):
+    classes, counts = np.unique(labels, return_counts=True)
+    norm_counts = counts/counts.sum()
+    for i, label in enumerate(classes):
+        print("class:", label, "norm count:", norm_counts[i])
+
+
+def validate_partitions(data_partitions, drugpairs_ids, valid_set_portion=0.1, test_set_portion=0.2):
+    if(not isinstance(drugpairs_ids, set)):
+        drugpairs_ids = set(drugpairs_ids)
+    num_pairs = len(drugpairs_ids)
+    test_set_accum = set([])
+    for fold_num in data_partitions:
+        print('fold_num', fold_num)
+        tr_ids = data_partitions[fold_num]['train']
+        te_ids = data_partitions[fold_num]['test']
+
+        tr_te = set(tr_ids).intersection(te_ids)
+        # assert there is no overlap among train and test partition within a fold
+        assert len(tr_te) == 0
+        print('expected test set size:', test_set_portion*num_pairs, '; actual test set size:', len(te_ids))
+        print()
+        assert np.abs(test_set_portion*num_pairs - len(te_ids)) <= 2
+        test_set_accum = test_set_accum.union(te_ids)
+    # verify that assembling test sets from each of the five folds would be equivalent to all drugpair ids
+    assert len(test_set_accum) == num_pairs
+    assert test_set_accum == drugpairs_ids
+    print("passed intersection and overlap test (i.e. train, validation and test sets are not",
+          "intersecting in each fold and the concatenation of test sets from each fold is",
+          "equivalent to the whole dataset)")
+
+
+def generate_partition_datatensor(ddi_datatensor, data_partitions):
+    datatensor_partitions = {}
+    for fold_num in data_partitions:
+        datatensor_partitions[fold_num] = {}
+        for dsettype in data_partitions[fold_num]:
+            target_ids = data_partitions[fold_num][dsettype]
+            datatensor_partition = PartitionDataTensor(ddi_datatensor, target_ids, dsettype, fold_num)
+            datatensor_partitions[fold_num][dsettype] = datatensor_partition
+    return(datatensor_partitions)
+
+def build_datatensor_partitions(data_partitions, ddi_datatensor):
+    datatensor_partitions = generate_partition_datatensor(ddi_datatensor, data_partitions)
+    compute_class_weights_per_fold_(datatensor_partitions)
+    return datatensor_partitions
+
+def compute_class_weights(labels_tensor):
+    classes, counts = np.unique(labels_tensor, return_counts=True)
+    # print("classes", classes)
+    # print("counts", counts)
+    class_weights = compute_class_weight('balanced', classes, labels_tensor.numpy())
+    return class_weights
+
+
+def compute_class_weights_per_fold_(datatensor_partitions):
+    """computes inverse class weights and updates the passed dictionary
+
+    Args:
+        datatensor_partitions: dictionary, {fold_num, int: {datasettype, string:{datapartition, instance of
+        :class:`PartitionDataTensor`}}}}
+
+    Example:
+        datatensor_partitions
+            {0: {'train': <neural.dataset.PartitionDataTensor at 0x1cec95c96a0>,
+                 'validation': <neural.dataset.PartitionDataTensor at 0x1cec95c9208>,
+                 'test': <neural.dataset.PartitionDataTensor at 0x1cec95c9240>
+                }, ..
+            }
+        is updated after computation of class weights to
+            {0: {'train': <neural.dataset.PartitionDataTensor at 0x1cec95c96a0>,
+                 'validation': <neural.dataset.PartitionDataTensor at 0x1cec95c9208>,
+                 'test': <neural.dataset.PartitionDataTensor at 0x1cec95c9240>,
+                 'class_weights': tensor([0.6957, 1.7778]),
+                 }, ..
+            }
+    """
+
+    for fold_num in datatensor_partitions:  # looping over the numbered folds
+        dpartition = datatensor_partitions[fold_num]['train']
+        partition_ids = dpartition.partition_ids
+        labels = dpartition.ddi_datatensor.y[partition_ids]
+        datatensor_partitions[fold_num]['class_weights'] = torch.from_numpy(compute_class_weights(labels)).float()
+
+def read_pickles(data_dir, device):
+
+    # Read stored data structures
+    data_partitions = ReaderWriter.read_data(os.path.join(data_dir, 'data_partitions.pkl'))
+    # instance of :class:`DDIDataTensor`
+    ddi_datatensor = ReaderWriter.read_tensor(os.path.join(data_dir, 'ddi_datatensor.torch'), device)
+
+    return data_partitions, ddi_datatensor
diff --git a/ddi/model.py b/ddi/model.py
new file mode 100644
index 0000000..ed6719b
--- /dev/null
+++ b/ddi/model.py
@@ -0,0 +1,52 @@
+import torch
+from torch import nn
+import torch.nn.functional as F
+
+class NDD_Paper(nn.Module):
+    def __init__(self, D_in=1096, H1=300, H2=400, D_out=1, drop=0.5):
+        super(NDD_Paper, self).__init__()
+        # an affine operation: y = Wx + b
+        self.fc1 = nn.Linear(D_in, H1) # Fully Connected
+        self.fc2 = nn.Linear(H1, H2)
+        self.fc3 = nn.Linear(H2, D_out)
+        self.drop = nn.Dropout(drop)
+        self._init_weights()
+
+    def forward(self, x):
+        x = F.relu(self.fc1(x))
+        x = self.drop(x)
+        x = F.relu(self.fc2(x))
+        x = self.drop(x)
+        x = self.fc3(x)
+        return x
+    
+    def _init_weights(self):
+        for m in self.modules():
+            if(isinstance(m, nn.Linear)):
+                m.weight.data.normal_(0, 0.05)
+                m.bias.data.uniform_(-1,0)
+
+
+class NDD_Code(nn.Module):
+    def __init__(self, D_in=1096, H1=400, H2=300, D_out=1, drop=0.5):
+        super(NDD_Code, self).__init__()
+        # an affine operation: y = Wx + b
+        self.fc1 = nn.Linear(D_in, H1) # Fully Connected
+        self.fc2 = nn.Linear(H1, H2)
+        self.fc3 = nn.Linear(H2, D_out)
+        self.drop = nn.Dropout(drop)
+        self._init_weights()
+
+    def forward(self, x):
+        x = F.relu(self.fc1(x))
+        x = self.drop(x)
+        x = F.relu(self.fc2(x))
+        x = self.drop(x)
+        x = self.fc3(x)
+        return x
+    
+    def _init_weights(self):
+        for m in self.modules():
+            if(isinstance(m, nn.Linear)):
+                nn.init.xavier_normal_(m.weight.data)
+                m.bias.data.uniform_(-1,0)
\ No newline at end of file
diff --git a/ddi/run_workflow.py b/ddi/run_workflow.py
new file mode 100644
index 0000000..67bdbbf
--- /dev/null
+++ b/ddi/run_workflow.py
@@ -0,0 +1,353 @@
+
+import os
+import itertools
+from .utilities import get_device, create_directory, ReaderWriter, perfmetric_report, plot_loss
+from .model import NDD_Code
+from .dataset import construct_load_dataloaders
+import numpy as np
+import pandas as pd
+import torch
+from torch import nn
+import torch.multiprocessing as mp
+
+
+class NDDHyperparamConfig:
+    def __init__(self, fc1_dim, fc2_dim, p_dropout, l2_reg, batch_size, num_epochs):
+        self.fc1_dim = fc1_dim
+        self.fc2_dim = fc2_dim
+        self.p_dropout = p_dropout
+        self.l2_reg = l2_reg
+        self.batch_size = batch_size
+        self.num_epochs = num_epochs
+
+    def __repr__(self):
+        desc = " fc1_dim:{}\n fc2_dim:{}\n p_dropout:{} \n " \
+               "l2_reg:{} \n batch_size:{} \n num_epochs: {}".format(self.fc1_dim,
+                                                                     self.fc2_dim,
+                                                                     self.p_dropout, 
+                                                                     self.l2_reg, 
+                                                                     self.batch_size,
+                                                                     self.num_epochs)
+        return desc
+
+
+def generate_models_config(hyperparam_config, similarity_type, fold_num, fdtype):
+
+
+    # currently generic_config is shared across all models
+    # leaving it as placeholder such that custom generic configs could be passed :)
+
+
+    ndd_config = {'input_dim':1096,
+                  'fc1_dim':hyperparam_config.fc1_dim,
+                  'fc2_dim':hyperparam_config.fc2_dim,
+                  'pdropout':hyperparam_config.p_dropout,
+                  'to_gpu':True,
+                  }
+    generic_config = {'fdtype':fdtype}
+    dataloader_config = {'batch_size': hyperparam_config.batch_size,
+                         'num_workers': 0}
+    config = {'dataloader_config': dataloader_config,
+              'ndd_config': ndd_config,
+              'generic_config': generic_config
+             }
+
+    options = {'similarity_type': similarity_type,
+               'fold_num': fold_num,
+               'num_epochs': hyperparam_config.num_epochs,
+               'weight_decay': hyperparam_config.l2_reg}
+
+    return config, options
+
+def build_config_map(similarity_type):
+    hyperparam_config = NDDHyperparamConfig(400,300,0.5,0,200,20)
+    fold_num = -1 
+    mconfig, options = generate_models_config(hyperparam_config, similarity_type, fold_num, torch.float32)
+    return mconfig, options
+
+def dump_dict_content(dsettype_content_map, dsettypes, desc, wrk_dir):
+    for dsettype in dsettypes:
+        path = os.path.join(wrk_dir, '{}_{}.pkl'.format(desc, dsettype))
+        ReaderWriter.dump_data(dsettype_content_map[dsettype], path)
+
+
+def run_ddi(data_partition, dsettypes, config, options, wrk_dir,
+            state_dict_dir=None, to_gpu=True, gpu_index=0):
+    pid = "{}".format(os.getpid())  # process id description
+    # get data loader config
+    dataloader_config = config['dataloader_config']
+    cld = construct_load_dataloaders(data_partition, dsettypes, dataloader_config, wrk_dir)
+    # dictionaries by dsettypes
+    data_loaders, epoch_loss_avgbatch, epoch_loss_avgsamples, score_dict, class_weights, flog_out = cld
+    # print(class_weights)
+    device = get_device(to_gpu, gpu_index)  # gpu device
+    generic_config = config['generic_config']
+    fdtype = generic_config['fdtype']
+    if('train' in class_weights):
+        class_weights = class_weights['train'][1].type(fdtype).to(device)  # update class weights to fdtype tensor
+    else:
+        class_weights = torch.tensor([1]).type(fdtype).to(device)  # weighting all casess equally
+
+    print("class weights", class_weights)
+    # loss_func = torch.nn.NLLLoss(weight=class_weights, reduction='mean')  # negative log likelihood loss
+    # binary cross entropy
+    loss_func = torch.nn.BCEWithLogitsLoss(pos_weight=class_weights, reduction='mean')
+
+    num_epochs = options.get('num_epochs', 50)
+    fold_num = options.get('fold_num')
+
+    # parse config dict
+    ndd_config = config['ndd_config']
+
+    # ddi model
+    ndd_model = NDD_Code(D_in=ndd_config['input_dim'],
+                         H1=ndd_config['fc1_dim'],
+                         H2=ndd_config['fc2_dim'],
+                         D_out=1,
+                         drop=ndd_config['pdropout'])
+    
+
+    # define optimizer and group parameters
+    models_param = list(ndd_model.parameters())
+    models = [(ndd_model, 'ndd_code')]
+
+    if(state_dict_dir):  # load state dictionary of saved models
+        num_train_epochs = 20
+        for m, m_name in models: # TODO: update this as it should read best model achieved on validation set
+            m.load_state_dict(torch.load(os.path.join(state_dict_dir, '{}_{}.pkl'.format(m_name, num_train_epochs)), map_location=device))
+
+    # update models fdtype and move to device
+    for m, m_name in models:
+        m.type(fdtype).to(device)
+
+    if('train' in data_loaders):
+        weight_decay = options.get('weight_decay', 1e-3)
+        optimizer = torch.optim.Adam(models_param, weight_decay=weight_decay, lr=1e-3)
+        # see paper Cyclical Learning rates for Training Neural Networks for parameters' choice
+        # `https://arxive.org/pdf/1506.01186.pdf`
+        # pytorch version >1.1, scheduler should be called after optimizer
+        # for cyclical lr scheduler, it should be called after each batch update
+        num_iter = len(data_loaders['train'])  # num_train_samples/batch_size
+        c_step_size = int(np.ceil(5*num_iter))  # this should be 2-10 times num_iter
+        base_lr = 3e-4
+        max_lr = 5*base_lr  # 3-5 times base_lr
+        cyc_scheduler = torch.optim.lr_scheduler.CyclicLR(optimizer, base_lr, max_lr, step_size_up=c_step_size,
+                                                          mode='triangular', cycle_momentum=False)
+
+    # store sentences' attention weights
+
+    # if ('validation' in data_loaders):
+    m_state_dict_dir = create_directory(os.path.join(wrk_dir, 'model_statedict'))
+
+    if(num_epochs > 1):
+        fig_dir = create_directory(os.path.join(wrk_dir, 'figures'))
+
+    # dump config dictionaries on disk
+    config_dir = create_directory(os.path.join(wrk_dir, 'config'))
+    ReaderWriter.dump_data(config, os.path.join(config_dir, 'mconfig.pkl'))
+    ReaderWriter.dump_data(options, os.path.join(config_dir, 'exp_options.pkl'))
+    sigmoid = torch.nn.Sigmoid()
+    for epoch in range(num_epochs):
+        # print("-"*35)
+        for dsettype in dsettypes:
+            print("device: {} | similarity_type: {} | fold_num: {} | epoch: {} | dsettype: {} | pid: {}"
+                  "".format(device, options.get('similarity_type'), fold_num, epoch, dsettype, pid))
+            pred_class = []
+            ref_class = []
+            prob_scores = []
+            ddi_ids = []
+            data_loader = data_loaders[dsettype]
+            # total_num_samples = len(data_loader.dataset)
+            epoch_loss = 0.
+            epoch_loss_deavrg = 0.
+
+            if(dsettype == 'train'):  # should be only for train
+                for m, m_name in models:
+                    m.train()
+            else:
+                for m, m_name in models:
+                    m.eval()
+
+            for i_batch, samples_batch in enumerate(data_loader):
+                # print('batch num:', i_batch)
+
+                # zero model grad
+                if(dsettype == 'train'):
+                    optimizer.zero_grad()
+
+                X_batch, y_batch, ids = samples_batch
+
+                X_batch = X_batch.to(device)
+                y_batch = y_batch.reshape(-1, 1) # TODO: reshape when preprocessing feature
+                y_batch = y_batch.to(device)
+                # print('ids', ids.shape, ids.dtype)
+
+                with torch.set_grad_enabled(dsettype == 'train'):
+                    # print("number of examples in batch:", docs_batch.size(0))
+                    num_samples_perbatch = X_batch.size(0)
+                    # print("number_samples_per_batch", num_samples_perbatch)
+                    y_pred_logit = ndd_model(X_batch)
+                    y_pred_prob  = sigmoid(y_pred_logit)
+                    y_pred_clss = torch.zeros(y_pred_prob.shape, device=device, dtype=torch.int32)
+                    y_pred_clss[y_pred_prob > 0.5] = 1
+
+                    # print('y_pred_logit', y_pred_logit.shape, y_pred_logit.dtype)
+                    # print('y_pred_prob', y_pred_prob.shape, y_pred_prob.dtype)
+                    # print('y_pred_class', y_pred_clss.shape, y_pred_clss.dtype)
+                    # print('y_batch', y_batch.shape, y_batch.dtype)
+
+                    if(dsettype == 'test'):
+                        pred_class.extend(y_pred_clss.view(-1).tolist())
+                        ref_class.extend(y_batch.view(-1).tolist())
+                        prob_scores.extend(y_pred_prob.view(-1).tolist())
+                        ddi_ids.extend(ids.tolist())
+
+                    loss = loss_func(y_pred_logit, y_batch)
+                    if(dsettype == 'train'):
+                        # print("computing loss")
+                        # backward step (i.e. compute gradients)
+                        loss.backward()
+                        # optimzer step -- update weights
+                        optimizer.step()
+                        # after each batch step the scheduler
+                        cyc_scheduler.step()
+                    epoch_loss += loss.item()
+                    # deaverage the loss to deal with last batch with unequal size
+                    epoch_loss_deavrg += loss.item() * num_samples_perbatch
+
+                    # torch.cuda.ipc_collect()
+                    # torch.cuda.empty_cache()
+            # end of epoch
+            # print("+"*35)
+            epoch_loss_avgbatch[dsettype].append(epoch_loss/len(data_loader))
+            epoch_loss_avgsamples[dsettype].append(epoch_loss_deavrg/len(data_loader.dataset))
+
+            modelscore = perfmetric_report(pred_class, ref_class, prob_scores, epoch+1, flog_out[dsettype])
+            perf = modelscore.s_auc
+            if(perf > score_dict[dsettype].s_auc):
+                score_dict[dsettype] = modelscore
+            for m, m_name in models:
+                torch.save(m.state_dict(), os.path.join(m_state_dict_dir, '{}_{}.pkl'.format(m_name, (epoch+1))))
+
+    if(num_epochs > 1):
+        plot_loss(epoch_loss_avgbatch, epoch_loss_avgsamples, fig_dir)
+
+    # dump_scores
+    dump_dict_content(score_dict, list(score_dict.keys()), 'score', wrk_dir)
+    # this will run once
+    if(dsettype == 'test'):
+        # save predictions
+        predictions_df = build_predictions_df(ddi_ids, ref_class, pred_class, prob_scores)
+        predictions_path = os.path.join(wrk_dir, 'predictions.csv')
+        predictions_df.to_csv(predictions_path)
+
+    # return ref_class, pred_class, prob_scores
+
+def build_predictions_df(ids, true_class, pred_class, prob_scores):
+    df_dict = {
+        'id': ids,
+        'true_class': true_class,
+        'pred_class': pred_class,
+        'prob_score_class1': prob_scores,
+    }
+    predictions_df = pd.DataFrame(df_dict)
+    predictions_df.set_index('id', inplace=True)
+    return predictions_df
+
+
+def generate_hyperparam_space():
+    fc1_dim = [400]
+    fc2_dim = [300]
+    l2_reg_vals = [0.0]
+    batch_size_vals = [200]
+    dropout_vals = [0.5]
+    num_epochs_vals = [20]
+    hyperparam_space = list(itertools.product(*[fc1_dim,  fc2_dim,
+                                                dropout_vals, 
+                                                l2_reg_vals, 
+                                                batch_size_vals,
+                                                num_epochs_vals]))
+    return hyperparam_space
+
+def compute_numtrials(prob_interval_truemax, prob_estim):
+    """ computes number of trials needed for random hyperparameter search
+        see `algorithms for hyperparameter optimization paper
+        <https://papers.nips.cc/paper/4443-algorithms-for-hyper-parameter-optimization.pdf>`__
+        Args:
+            prob_interval_truemax: float, probability interval of the true optimal hyperparam,
+                i.e. within 5% expressed as .05
+            prob_estim: float, probability/confidence level, i.e. 95% expressed as .95
+    """
+    n = np.log(1-prob_estim)/np.log(1-prob_interval_truemax)
+    return(int(np.ceil(n))+1)
+
+
+def get_hyperparam_options(prob_interval_truemax, prob_estim, random_seed=42):
+    np.random.seed(random_seed)
+    num_trials = compute_numtrials(prob_interval_truemax, prob_estim)
+    hyperparam_space = generate_hyperparam_space()
+    if(num_trials > len(hyperparam_space)):
+        num_trials = len(hyperparam_space)
+    indxs = np.random.choice(len(hyperparam_space), size=num_trials, replace=False)
+    # encoder_dim, num_layers, encoder_approach, attn_method, p_dropout, l2_reg, batch_size, num_epochs
+    return [NDDHyperparamConfig(*hyperparam_space[indx]) for indx in indxs]
+
+
+def get_random_simtype_fold_per_hyperparam_exp(similarity_types, random_seed=42):
+    """Get for each similarity type the fold number to use for identifying optimal hyperparams
+    """
+    np.random.seed(random_seed)
+    simtype_fold = {}
+    for sim_type in similarity_types:
+        simtype_fold[sim_type] = np.random.randint(5)
+    return simtype_fold
+
+
+def get_saved_config(config_dir):
+    options = ReaderWriter.read_data(os.path.join(config_dir, 'exp_options.pkl'))
+    mconfig = ReaderWriter.read_data(os.path.join(config_dir, 'mconfig.pkl'))
+    return mconfig, options
+
+
+def get_index_argmax(score_matrix, target_indx):
+    argmax_indx = np.argmax(score_matrix, axis=0)[target_indx]
+    return argmax_indx
+
+
+def train_val_run(datatensor_partitions, config_map, train_val_dir, fold_gpu_map, num_epochs=20):
+    dsettypes = ['train']
+    mconfig, options = config_map
+    options['num_epochs'] = num_epochs  # override number of epochs using user specified value
+    similarity_type = options['similarity_type']
+    for fold_num in datatensor_partitions:
+        # update options fold num to the current fold
+        options['fold_num'] = fold_num
+        data_partition = datatensor_partitions[fold_num]
+        path = os.path.join(train_val_dir, 'train_val_{}'.format(similarity_type), 'fold_{}'.format(fold_num))
+        wrk_dir = create_directory(path)
+        run_ddi(data_partition, dsettypes, mconfig, options, wrk_dir,
+                state_dict_dir=None, to_gpu=True, gpu_index=fold_gpu_map[fold_num])
+
+
+def test_run(datatensor_partitions, config_map, train_val_dir, test_dir, fold_gpu_map, num_epochs=1):
+    dsettypes = ['test']
+    mconfig, options = config_map
+    options['num_epochs'] = num_epochs  # override number of epochs using user specified value
+    similarity_type = options['similarity_type']
+    for fold_num in datatensor_partitions:
+        # update options fold num to the current fold
+        options['fold_num'] = fold_num
+        data_partition = datatensor_partitions[fold_num]
+        path = os.path.join(train_val_dir, 'train_val_{}'.format(similarity_type), 'fold_{}'.format(fold_num))
+        if os.path.exists(path):
+            train_dir = create_directory(path)
+            # load state_dict pth
+            state_dict_pth = os.path.join(train_dir, 'model_statedict')
+            path = os.path.join(test_dir, 'test_{}'.format(similarity_type), 'fold_{}'.format(fold_num))
+            test_wrk_dir = create_directory(path)
+            run_ddi(data_partition, dsettypes, mconfig, options, test_wrk_dir,
+                    state_dict_dir=state_dict_pth, to_gpu=True, 
+                    gpu_index=fold_gpu_map[fold_num])
+        else:
+            print('WARNING: test dir not found: {}'.format(path))
+
diff --git a/ddi/utilities.py b/ddi/utilities.py
new file mode 100644
index 0000000..f7be57a
--- /dev/null
+++ b/ddi/utilities.py
@@ -0,0 +1,330 @@
+import os
+import shutil
+import pickle
+import torch
+import numpy as np
+import pandas as pd
+from sklearn.metrics import classification_report, f1_score, roc_curve, precision_recall_curve, accuracy_score, \
+                            recall_score, precision_score, roc_auc_score, auc
+from matplotlib import pyplot as plt
+
+
+class ModelScore:
+    def __init__(self, best_epoch_indx, s_auc, s_aupr, s_f1, s_precision, s_recall):
+        self.best_epoch_indx = best_epoch_indx
+        self.s_auc = s_auc
+        self.s_aupr = s_aupr
+        self.s_f1 = s_f1
+        self.s_precision = s_precision
+        self.s_recall = s_recall
+
+
+    def __repr__(self):
+        desc = " best_epoch_indx:{}\n auc:{} \n apur:{} \n f1:{} \n precision:{} \n recall:{} \n" \
+               "".format(self.best_epoch_indx, self.s_auc, self.s_aupr, self.s_f1, self.s_precision, self.s_recall)
+        return desc
+
+def get_performance_results(similarity_type, target_dir, num_folds, dsettype):
+    all_perf = {}
+    num_metrics = 3 # number of metrics to focus on
+    perf_dict = [{} for i in range(num_metrics)]  # track auc, aupr, f1 measure
+    if dsettype == 'train':
+        prefix = 'train_val'
+    else:
+        prefix = dsettype
+    for fold_num in range(num_folds):
+
+        fold_dir = os.path.join(target_dir,
+                                '{}_{}'.format(prefix, similarity_type),
+                                'fold_{}'.format(fold_num))
+
+        score_file = os.path.join(fold_dir, 'score_{}.pkl'.format(dsettype))
+        if os.path.isfile(score_file):
+            mscore = ReaderWriter.read_data(score_file)
+            perf_dict[0]['fold{}'.format(fold_num)] = mscore.s_auc
+            perf_dict[1]['fold{}'.format(fold_num)] = mscore.s_aupr
+            perf_dict[2]['fold{}'.format(fold_num)] = mscore.s_f1
+    perf_df = []
+    for i in range(num_metrics):
+        all_perf = perf_dict[i]
+        all_perf_df = pd.DataFrame(all_perf, index=[similarity_type])
+        median = all_perf_df.median(axis=1)
+        mean = all_perf_df.mean(axis=1)
+        stddev = all_perf_df.std(axis=1)
+        all_perf_df['mean'] = mean
+        all_perf_df['median'] = median
+        all_perf_df['stddev'] = stddev
+        perf_df.append(all_perf_df.sort_values('mean', ascending=False))
+    return perf_df
+
+
+def build_performance_dfs(similarity_types, target_dir, num_folds, dsettype):
+    auc_df = pd.DataFrame()
+    aupr_df = pd.DataFrame()
+    f1_df = pd.DataFrame()
+    for sim_type in similarity_types:
+        s_auc , s_aupr, s_f1 = get_performance_results(sim_type, target_dir, num_folds, dsettype)
+        auc_df = pd.concat([auc_df, s_auc], sort=True)
+        aupr_df = pd.concat([aupr_df, s_aupr], sort=True)
+        f1_df = pd.concat([f1_df, s_f1], sort=True)
+
+    return auc_df, aupr_df, f1_df
+
+
+class ReaderWriter(object):
+    """class for dumping, reading and logging data"""
+    def __init__(self):
+        pass
+
+    @staticmethod
+    def dump_data(data, file_name, mode="wb"):
+        """dump data by pickling
+           Args:
+               data: data to be pickled
+               file_name: file path where data will be dumped
+               mode: specify writing options i.e. binary or unicode
+        """
+        with open(file_name, mode) as f:
+            pickle.dump(data, f)
+
+    @staticmethod
+    def read_data(file_name, mode="rb"):
+        """read dumped/pickled data
+           Args:
+               file_name: file path where data will be dumped
+               mode: specify writing options i.e. binary or unicode
+        """
+        with open(file_name, mode) as f:
+            data = pickle.load(f)
+        return(data)
+
+    @staticmethod
+    def dump_tensor(data, file_name):
+        """
+        Dump a tensor using PyTorch's custom serialization. Enables re-loading the tensor on a specific gpu later.
+        Args:
+            data: Tensor
+            file_name: file path where data will be dumped
+        Returns:
+        """
+        torch.save(data, file_name)
+
+    @staticmethod
+    def read_tensor(file_name, device):
+        """read dumped/pickled data
+           Args:
+               file_name: file path where data will be dumped
+               device: the gpu to load the tensor on to
+        """
+        data = torch.load(file_name, map_location=device)
+        return data
+
+    @staticmethod
+    def write_log(line, outfile, mode="a"):
+        """write data to a file
+           Args:
+               line: string representing data to be written out
+               outfile: file path where data will be written/logged
+               mode: specify writing options i.e. append, write
+        """
+        with open(outfile, mode) as f:
+            f.write(line)
+
+    @staticmethod
+    def read_log(file_name, mode="r"):
+        """write data to a file
+           Args:
+               line: string representing data to be written out
+               outfile: file path where data will be written/logged
+               mode: specify writing options i.e. append, write
+        """
+        with open(file_name, mode) as f:
+            for line in f:
+                yield line
+
+
+def create_directory(folder_name, directory="current"):
+    """create directory/folder (if it does not exist) and returns the path of the directory
+       Args:
+           folder_name: string representing the name of the folder to be created
+       Keyword Arguments:
+           directory: string representing the directory where to create the folder
+                      if `current` then the folder will be created in the current directory
+    """
+    if directory == "current":
+        path_current_dir = os.path.dirname(__file__)  # __file__ refers to utilities.py
+    else:
+        path_current_dir = directory
+    path_new_dir = os.path.join(path_current_dir, folder_name)
+    if not os.path.exists(path_new_dir):
+        os.makedirs(path_new_dir)
+    return(path_new_dir)
+
+
+def get_device(to_gpu, index=0):
+    is_cuda = torch.cuda.is_available()
+    if(is_cuda and to_gpu):
+        target_device = 'cuda:{}'.format(index)
+    else:
+        target_device = 'cpu'
+    return torch.device(target_device)
+
+
+def report_available_cuda_devices():
+    if(torch.cuda.is_available()):
+        n_gpu = torch.cuda.device_count()
+        print('number of GPUs available:', n_gpu)
+        for i in range(n_gpu):
+            print("cuda:{}, name:{}".format(i, torch.cuda.get_device_name(i)))
+            device = torch.device('cuda', i)
+            get_cuda_device_stats(device)
+            print()
+    else:
+        print("no GPU devices available!!")
+
+def get_cuda_device_stats(device):
+    print('total memory available:', torch.cuda.get_device_properties(device).total_memory/(1024**3), 'GB')
+    print('total memory allocated on device:', torch.cuda.memory_allocated(device)/(1024**3), 'GB')
+    print('max memory allocated on device:', torch.cuda.max_memory_allocated(device)/(1024**3), 'GB')
+    print('total memory cached on device:', torch.cuda.memory_cached(device)/(1024**3), 'GB')
+    print('max memory cached  on device:', torch.cuda.max_memory_cached(device)/(1024**3), 'GB')
+
+def get_interaction_stat(matrix):
+    w, h = matrix.shape
+    totalnum_elements = w*h
+    nonzero_elem = np.count_nonzero(matrix)
+    zero_elem = totalnum_elements - nonzero_elem
+    print('number of rows: {}, cols: {}'.format(w, h))
+    print('total number of elements', totalnum_elements)
+    print('number of nonzero elements', nonzero_elem)
+    print('number of zero elements', zero_elem)
+    print('diagnoal elements ', np.diag(matrix))
+
+def perfmetric_report(pred_target, ref_target, probscore, epoch, outlog, plot_roc=True):
+
+    # print(ref_target.shape)
+    # print(pred_target.shape)
+    #
+    # print("ref_target \n", ref_target)
+    # print("pred_target \n", pred_target)
+    
+
+    lsep = "\n"
+    report = "Epoch: {}".format(epoch) + lsep
+    report += "Classification report on all events:" + lsep
+    report += str(classification_report(ref_target, pred_target)) + lsep
+    report += "macro f1:" + lsep
+    macro_f1 = f1_score(ref_target, pred_target, average='macro')
+    report += str(macro_f1) + lsep
+    report += "micro f1:" + lsep
+    micro_f1 = f1_score(ref_target, pred_target, average='micro')
+    report += str(micro_f1) + lsep
+    report += "accuracy:" + lsep
+    accuracy = accuracy_score(ref_target, pred_target)
+    report += str(accuracy) + lsep
+        
+    s_auc = roc_auc_score(ref_target, probscore)
+    report += "AUC:\n" + str(s_auc) + lsep
+    precision_scores, recall_scores, __ = precision_recall_curve(ref_target, probscore)
+    s_aupr = auc(recall_scores, precision_scores)
+    report += "AUPR:\n" + str(s_aupr) + lsep
+    s_f1 = f1_score(ref_target, pred_target)
+    report += "binary f1:\n" + str(s_f1) + lsep
+    s_recall = recall_score(ref_target, pred_target)
+    s_precision = precision_score(ref_target, pred_target)
+    report += "-"*30 + lsep
+
+    modelscore = ModelScore(epoch, s_auc, s_aupr, s_f1, s_precision, s_recall)
+    ReaderWriter.write_log(report, outlog)
+    return modelscore
+
+
+def plot_precision_recall_curve(ref_target, prob_poslabel, figname, outdir):
+    pr, rec, thresholds = precision_recall_curve(ref_target, prob_poslabel)
+    thresholds[0] = 1
+    plt.figure(figsize=(9, 6))
+    plt.plot(pr, rec, 'bo', label='Precision vs Recall')
+    # plt.plot(np.arange(0,len(thresholds)), thresholds, 'r-', label='thresholds')
+    plt.xlabel('Precision')
+    plt.ylabel('Recall')
+    plt.title('Precision vs. recall curve')
+    plt.legend(loc='best')
+    plt.savefig(os.path.join(outdir, os.path.join('precisionrecall_curve_{}'.format(figname) + ".pdf")))
+    plt.close()
+
+
+def plot_roc_curve(ref_target, prob_poslabel, figname, outdir):
+    fpr, tpr, thresholds = roc_curve(ref_target, prob_poslabel)
+    thresholds[0] = 1
+    plt.figure(figsize=(9, 6))
+    plt.plot(fpr, tpr, 'bo', label='TPR vs FPR')
+    plt.plot(fpr, thresholds, 'r-', label='thresholds')
+    plt.xlabel('False positive rate')
+    plt.ylabel('True positive rate')
+    plt.title('ROC curve')
+    plt.legend(loc='best')
+    plt.savefig(os.path.join(outdir, os.path.join('roc_curve_{}'.format(figname) + ".pdf")))
+    plt.close()
+
+
+def plot_loss(epoch_loss_avgbatch, epoch_loss_avgsamples, wrk_dir):
+    dsettypes = epoch_loss_avgbatch.keys()
+    for dsettype in dsettypes:
+        plt.figure(figsize=(9, 6))
+        plt.plot(epoch_loss_avgbatch[dsettype], 'r', epoch_loss_avgsamples[dsettype], 'b')
+        plt.xlabel("number of epochs")
+        plt.ylabel("negative loglikelihood cost")
+        plt.legend(['epoch batch average loss', 'epoch training samples average loss'])
+        plt.savefig(os.path.join(wrk_dir, os.path.join(dsettype + ".pdf")))
+        plt.close()
+
+
+def delete_directory(directory):
+    if(os.path.isdir(directory)):
+        shutil.rmtree(directory)
+
+
+# code from keras https://github.com/keras-team/keras/blob/master/keras/utils/np_utils.py
+def to_categorical(y, num_classes=None, dtype='float32'):
+    """Converts a class vector (integers) to binary class matrix.
+    E.g. for use with categorical_crossentropy.
+    # Arguments
+        y: class vector to be converted into a matrix
+            (integers from 0 to num_classes).
+        num_classes: total number of classes.
+        dtype: The data type expected by the input, as a string
+            (`float32`, `float64`, `int32`...)
+    # Returns
+        A binary matrix representation of the input. The classes axis
+        is placed last.
+    # Example
+    ```python
+    # Consider an array of 5 labels out of a set of 3 classes {0, 1, 2}:
+    > labels
+    array([0, 2, 1, 2, 0])
+    # `to_categorical` converts this into a matrix with as many
+    # columns as there are classes. The number of rows
+    # stays the same.
+    > to_categorical(labels)
+    array([[ 1.,  0.,  0.],
+           [ 0.,  0.,  1.],
+           [ 0.,  1.,  0.],
+           [ 0.,  0.,  1.],
+           [ 1.,  0.,  0.]], dtype=float32)
+    ```
+    """
+
+    y = np.array(y, dtype='int')
+    input_shape = y.shape
+    if input_shape and input_shape[-1] == 1 and len(input_shape) > 1:
+        input_shape = tuple(input_shape[:-1])
+    y = y.ravel()
+    if not num_classes:
+        num_classes = np.max(y) + 1
+    n = y.shape[0]
+    categorical = np.zeros((n, num_classes), dtype=dtype)
+    categorical[np.arange(n), y] = 1
+    output_shape = input_shape + (num_classes,)
+    categorical = np.reshape(categorical, output_shape)
+    return categorical
+
diff --git a/notebooks/.ipynb_checkpoints/02_AA_Skorch_DDI-checkpoint.ipynb b/notebooks/.ipynb_checkpoints/02_AA_Skorch_DDI-checkpoint.ipynb
deleted file mode 100644
index 111f7ab..0000000
--- a/notebooks/.ipynb_checkpoints/02_AA_Skorch_DDI-checkpoint.ipynb
+++ /dev/null
@@ -1,581 +0,0 @@
-{
- "cells": [
-  {
-   "cell_type": "markdown",
-   "metadata": {},
-   "source": [
-    "![](https://scikit-learn.org/stable/_images/grid_search_workflow.png)"
-   ]
-  },
-  {
-   "cell_type": "code",
-   "execution_count": 1230,
-   "metadata": {},
-   "outputs": [],
-   "source": [
-    "import warnings\n",
-    "warnings.filterwarnings('ignore')"
-   ]
-  },
-  {
-   "cell_type": "code",
-   "execution_count": 1231,
-   "metadata": {},
-   "outputs": [],
-   "source": [
-    "import numpy as np\n",
-    "import pandas as pd\n",
-    "\n",
-    "import pickle\n",
-    "\n",
-    "from sklearn.datasets import make_classification\n",
-    "from sklearn.pipeline import Pipeline\n",
-    "from sklearn.preprocessing import LabelEncoder\n",
-    "from sklearn.model_selection import GridSearchCV\n",
-    "from sklearn.model_selection import train_test_split\n",
-    "from sklearn.model_selection import StratifiedKFold\n",
-    "from sklearn.metrics import accuracy_score, f1_score, roc_auc_score, precision_score, recall_score, matthews_corrcoef, precision_recall_curve, auc\n",
-    "\n",
-    "from keras.utils import np_utils\n",
-    "\n",
-    "import torch\n",
-    "from torch import nn\n",
-    "import torch.nn.functional as F\n",
-    "from torch.utils.data import TensorDataset\n",
-    "from torch.utils.data import Dataset\n",
-    "from torch.utils.data import DataLoader\n",
-    "from torch.utils.tensorboard import SummaryWriter\n",
-    "from torch.optim import SGD\n",
-    "\n",
-    "import skorch\n",
-    "from skorch import NeuralNetClassifier\n",
-    "from skorch.callbacks import EpochScoring\n",
-    "from skorch.callbacks import TensorBoard\n",
-    "from skorch.helper import predefined_split"
-   ]
-  },
-  {
-   "cell_type": "code",
-   "execution_count": 1232,
-   "metadata": {},
-   "outputs": [],
-   "source": [
-    "# import configurations (file paths, etc.)\n",
-    "import yaml\n",
-    "try:\n",
-    "    from yaml import CLoader as Loader, CDumper as Dumper\n",
-    "except ImportError:\n",
-    "    from yaml import Loader, Dumper\n",
-    "    \n",
-    "configFile = '../cluster/data/medinfmk/ddi/config/config.yml'\n",
-    "\n",
-    "with open(configFile, 'r') as ymlfile:\n",
-    "    cfg = yaml.load(ymlfile, Loader=Loader)"
-   ]
-  },
-  {
-   "cell_type": "code",
-   "execution_count": 1233,
-   "metadata": {},
-   "outputs": [],
-   "source": [
-    "pathInput = cfg['filePaths']['dirRaw']\n",
-    "pathOutput = cfg['filePaths']['dirProcessed']\n",
-    "# path to store python binary files (pickles)\n",
-    "# in order not to recalculate them every time\n",
-    "pathPickles = cfg['filePaths']['dirProcessedFiles']['dirPickles']\n",
-    "pathRuns = cfg['filePaths']['dirProcessedFiles']['dirRuns']\n",
-    "pathPaperScores = cfg['filePaths']['dirRawFiles']['paper-individual-metrics-scores']\n",
-    "datasetDirs = cfg['filePaths']['dirRawDatasets']\n",
-    "DS1_path = str(datasetDirs[0])"
-   ]
-  },
-  {
-   "cell_type": "markdown",
-   "metadata": {},
-   "source": [
-    "# Helper Functions"
-   ]
-  },
-  {
-   "cell_type": "code",
-   "execution_count": 1234,
-   "metadata": {},
-   "outputs": [],
-   "source": [
-    "def prepare_data(input_fea, input_lab, seperate=False):\n",
-    "    offside_sim_path = input_fea\n",
-    "    drug_interaction_matrix_path = input_lab\n",
-    "    drug_fea = np.loadtxt(offside_sim_path,dtype=float,delimiter=\",\")\n",
-    "    interaction = np.loadtxt(drug_interaction_matrix_path,dtype=int,delimiter=\",\")\n",
-    "    \n",
-    "    train = []\n",
-    "    label = []\n",
-    "    tmp_fea=[]\n",
-    "    drug_fea_tmp = []\n",
-    "            \n",
-    "    for i in range(0, (interaction.shape[0]-1)):\n",
-    "        for j in range((i+1), interaction.shape[1]):\n",
-    "            label.append(interaction[i,j])\n",
-    "            drug_fea_tmp_1 = list(drug_fea[i])\n",
-    "            drug_fea_tmp_2 = list(drug_fea[j])\n",
-    "            if seperate:\n",
-    "                 tmp_fea = (drug_fea_tmp_1,drug_fea_tmp_2)\n",
-    "            else:\n",
-    "                 tmp_fea = drug_fea_tmp_1 + drug_fea_tmp_2\n",
-    "            train.append(tmp_fea)\n",
-    "\n",
-    "    return np.array(train), np.array(label)"
-   ]
-  },
-  {
-   "cell_type": "code",
-   "execution_count": 1235,
-   "metadata": {},
-   "outputs": [],
-   "source": [
-    "def transfer_array_format(data):\n",
-    "    formated_matrix1 = []\n",
-    "    formated_matrix2 = []\n",
-    "    for val in data:\n",
-    "        formated_matrix1.append(val[0])\n",
-    "        formated_matrix2.append(val[1])\n",
-    "    return np.array(formated_matrix1), np.array(formated_matrix2)"
-   ]
-  },
-  {
-   "cell_type": "code",
-   "execution_count": 1236,
-   "metadata": {},
-   "outputs": [],
-   "source": [
-    "def preprocess_labels(labels, encoder=None, categorical=True):\n",
-    "    if not encoder:\n",
-    "        encoder = LabelEncoder()\n",
-    "        encoder.fit(labels)\n",
-    "        y = encoder.transform(labels).astype(np.int32)\n",
-    "    if categorical:\n",
-    "        y = np_utils.to_categorical(y)\n",
-    "#         print(y)\n",
-    "    return y, encoder"
-   ]
-  },
-  {
-   "cell_type": "code",
-   "execution_count": 1237,
-   "metadata": {},
-   "outputs": [],
-   "source": [
-    "def preprocess_names(labels, encoder=None, categorical=True):\n",
-    "    if not encoder:\n",
-    "        encoder = LabelEncoder()\n",
-    "        encoder.fit(labels)\n",
-    "    if categorical:\n",
-    "        labels = np_utils.to_categorical(labels)\n",
-    "    return labels, encoder"
-   ]
-  },
-  {
-   "cell_type": "code",
-   "execution_count": 1238,
-   "metadata": {},
-   "outputs": [],
-   "source": [
-    "def getStratifiedKFoldSplit(X,y,n_splits):\n",
-    "    skf = StratifiedKFold(n_splits=n_splits)\n",
-    "    return skf.split(X,y)"
-   ]
-  },
-  {
-   "cell_type": "code",
-   "execution_count": 1239,
-   "metadata": {},
-   "outputs": [],
-   "source": [
-    "class NDD(nn.Module):\n",
-    "    def __init__(self, D_in=1096, H1=300, H2=400, D_out=2, drop=0.5):\n",
-    "        super(NDD, self).__init__()\n",
-    "        # an affine operation: y = Wx + b\n",
-    "        self.fc1 = nn.Linear(D_in, H1) # Fully Connected\n",
-    "        self.fc2 = nn.Linear(H1, H2)\n",
-    "        self.fc3 = nn.Linear(H2, D_out)\n",
-    "        self.drop = nn.Dropout(drop)\n",
-    "        self._init_weights()\n",
-    "\n",
-    "    def forward(self, x):\n",
-    "        x = F.relu(self.fc1(x))\n",
-    "        x = self.drop(x)\n",
-    "        x = F.relu(self.fc2(x))\n",
-    "        x = self.drop(x)\n",
-    "        x = self.fc3(x)\n",
-    "        return x\n",
-    "    \n",
-    "    def _init_weights(self):\n",
-    "        for m in self.modules():\n",
-    "            if(isinstance(m, nn.Linear)):\n",
-    "                m.weight.data.normal_(0, 0.05)\n",
-    "                m.bias.data.uniform_(-1,0)"
-   ]
-  },
-  {
-   "cell_type": "code",
-   "execution_count": 1240,
-   "metadata": {},
-   "outputs": [],
-   "source": [
-    "def updateSimilarityDFSingleMetric(df, sim_type, metric, value):\n",
-    "    df.loc[df['Similarity'] == sim_type, metric ] = round(value,3)\n",
-    "    return df"
-   ]
-  },
-  {
-   "cell_type": "code",
-   "execution_count": 1241,
-   "metadata": {},
-   "outputs": [],
-   "source": [
-    "def updateSimilarityDF(df, sim_type, AUROC, AUPR, F1, Rec, Prec):\n",
-    "    df = updateSimilarityDFSingleMetric(df, sim_type, 'AUC', AUROC)\n",
-    "    df = updateSimilarityDFSingleMetric(df, sim_type, 'AUPR', AUPR)\n",
-    "    df = updateSimilarityDFSingleMetric(df, sim_type, 'F-measure', F1)\n",
-    "    df = updateSimilarityDFSingleMetric(df, sim_type, 'Recall', Rec)\n",
-    "    df = updateSimilarityDFSingleMetric(df, sim_type, 'Precision', Prec)\n",
-    "    return df"
-   ]
-  },
-  {
-   "cell_type": "code",
-   "execution_count": 1242,
-   "metadata": {},
-   "outputs": [],
-   "source": [
-    "def getNetParamsStr(net, str_hidden_layers_params, net_params_to_print=[\"max_epochs\", \"batch_size\"]):\n",
-    "    net_params = [val for sublist in [[x,net.get_params()[x]] for x in net_params_to_print] for val in sublist]\n",
-    "    net_params_str = '-'.join(map(str, flattened))\n",
-    "    return(net_params_str+str_hidden_layers_params)"
-   ]
-  },
-  {
-   "cell_type": "code",
-   "execution_count": 1243,
-   "metadata": {},
-   "outputs": [],
-   "source": [
-    "def writeReplicatedIndividualScoresCSV(net, df, destination, str_hidden_layers_params):\n",
-    "    filePath = destination + \"replicatedIndividualScores_\" + getNetParamsStr(net, str_hidden_layers_params) + \".csv\"\n",
-    "    df.to_csv(path_or_buf = filePath, index=False)"
-   ]
-  },
-  {
-   "cell_type": "code",
-   "execution_count": 1244,
-   "metadata": {},
-   "outputs": [],
-   "source": [
-    "def getNDDClassifier(D_in, H1, H2, D_out, drop, Xy_test):\n",
-    "    model = NDD(D_in, H1, H2, D_out, drop)\n",
-    "    \n",
-    "    net = NeuralNetClassifier(\n",
-    "        model,\n",
-    "#         criterion=nn.CrossEntropyLoss,\n",
-    "        criterion=nn.BCEWithLogitsLoss,\n",
-    "        max_epochs=20,\n",
-    "        optimizer=SGD,\n",
-    "        optimizer__lr=0.01,\n",
-    "        optimizer__momentum=0.9,    \n",
-    "        optimizer__weight_decay=1e-6,    \n",
-    "        optimizer__nesterov=True,    \n",
-    "        batch_size=200,\n",
-    "        callbacks=callbacks,\n",
-    "        # Shuffle training data on each epoch\n",
-    "        iterator_train__shuffle=True,\n",
-    "        device=device,\n",
-    "        train_split=predefined_split(Xy_test),\n",
-    "    )\n",
-    "    return net"
-   ]
-  },
-  {
-   "cell_type": "code",
-   "execution_count": 1245,
-   "metadata": {},
-   "outputs": [],
-   "source": [
-    "def avgMetrics(AUROC, AUPR, F1, Rec, Prec, kfold_nsplits):\n",
-    "    AUROC /= kfold_nsplits\n",
-    "    AUPR /= kfold_nsplits\n",
-    "    F1 /= kfold_nsplits\n",
-    "    Rec /= kfold_nsplits\n",
-    "    Prec /= kfold_nsplits\n",
-    "    return AUROC, AUPR, F1, Rec, Prec"
-   ]
-  },
-  {
-   "cell_type": "markdown",
-   "metadata": {},
-   "source": [
-    "# Run"
-   ]
-  },
-  {
-   "cell_type": "code",
-   "execution_count": 1246,
-   "metadata": {},
-   "outputs": [],
-   "source": [
-    "df_paperIndividualScores = pd.read_csv(pathPaperScores)\n",
-    "\n",
-    "df_replicatedIndividualScores = df_paperIndividualScores.copy()\n",
-    "\n",
-    "for col in df_replicatedIndividualScores.columns:\n",
-    "    if col != 'Similarity':\n",
-    "        df_replicatedIndividualScores[col].values[:] = 0"
-   ]
-  },
-  {
-   "cell_type": "code",
-   "execution_count": 1247,
-   "metadata": {},
-   "outputs": [],
-   "source": [
-    "device = torch.device(\"cuda:0\" if torch.cuda.is_available() else \"cpu\")\n",
-    "soft = nn.Softmax(dim=1)"
-   ]
-  },
-  {
-   "cell_type": "code",
-   "execution_count": 1248,
-   "metadata": {},
-   "outputs": [
-    {
-     "name": "stdout",
-     "output_type": "stream",
-     "text": [
-      "Preparing sideeffect data...\n",
-      "Running fold0 for sideeffect...\n"
-     ]
-    },
-    {
-     "ename": "ValueError",
-     "evalue": "Classification metrics can't handle a mix of multilabel-indicator and binary targets",
-     "output_type": "error",
-     "traceback": [
-      "\u001b[0;31m---------------------------------------------------------------------------\u001b[0m",
-      "\u001b[0;31mValueError\u001b[0m                                Traceback (most recent call last)",
-      "\u001b[0;32m<ipython-input-1248-dcf24de0fdf4>\u001b[0m in \u001b[0;36m<module>\u001b[0;34m\u001b[0m\n\u001b[1;32m     50\u001b[0m         \u001b[0mmodelPicklePath\u001b[0m \u001b[0;34m=\u001b[0m \u001b[0mpathPickles\u001b[0m\u001b[0;34m+\u001b[0m\u001b[0;34m\"model_params/model_params_fold\"\u001b[0m \u001b[0;34m+\u001b[0m \u001b[0mstr\u001b[0m\u001b[0;34m(\u001b[0m\u001b[0mi\u001b[0m\u001b[0;34m)\u001b[0m \u001b[0;34m+\u001b[0m \u001b[0;34m\"_\"\u001b[0m \u001b[0;34m+\u001b[0m \u001b[0mstr_hidden_layers_params\u001b[0m\u001b[0;34m+\u001b[0m \u001b[0;34m\"_\"\u001b[0m \u001b[0;34m+\u001b[0m \u001b[0msimilarity\u001b[0m \u001b[0;34m+\u001b[0m \u001b[0;34m\".p\"\u001b[0m\u001b[0;34m\u001b[0m\u001b[0;34m\u001b[0m\u001b[0m\n\u001b[1;32m     51\u001b[0m         \u001b[0;32mif\u001b[0m \u001b[0mdo_train_model\u001b[0m\u001b[0;34m:\u001b[0m\u001b[0;34m\u001b[0m\u001b[0;34m\u001b[0m\u001b[0m\n\u001b[0;32m---> 52\u001b[0;31m             \u001b[0mnet\u001b[0m\u001b[0;34m.\u001b[0m\u001b[0mfit\u001b[0m\u001b[0;34m(\u001b[0m\u001b[0mX_train\u001b[0m\u001b[0;34m,\u001b[0m \u001b[0my_train\u001b[0m\u001b[0;34m)\u001b[0m\u001b[0;34m\u001b[0m\u001b[0;34m\u001b[0m\u001b[0m\n\u001b[0m\u001b[1;32m     53\u001b[0m             \u001b[0mnet\u001b[0m\u001b[0;34m.\u001b[0m\u001b[0msave_params\u001b[0m\u001b[0;34m(\u001b[0m\u001b[0mf_params\u001b[0m\u001b[0;34m=\u001b[0m\u001b[0mmodelPicklePath\u001b[0m\u001b[0;34m)\u001b[0m\u001b[0;34m\u001b[0m\u001b[0;34m\u001b[0m\u001b[0m\n\u001b[1;32m     54\u001b[0m         \u001b[0;32melse\u001b[0m\u001b[0;34m:\u001b[0m\u001b[0;34m\u001b[0m\u001b[0;34m\u001b[0m\u001b[0m\n",
-      "\u001b[0;32m~/anaconda3/lib/python3.7/site-packages/skorch/classifier.py\u001b[0m in \u001b[0;36mfit\u001b[0;34m(self, X, y, **fit_params)\u001b[0m\n\u001b[1;32m    147\u001b[0m         \u001b[0;31m# this is actually a pylint bug:\u001b[0m\u001b[0;34m\u001b[0m\u001b[0;34m\u001b[0m\u001b[0;34m\u001b[0m\u001b[0m\n\u001b[1;32m    148\u001b[0m         \u001b[0;31m# https://github.com/PyCQA/pylint/issues/1085\u001b[0m\u001b[0;34m\u001b[0m\u001b[0;34m\u001b[0m\u001b[0;34m\u001b[0m\u001b[0m\n\u001b[0;32m--> 149\u001b[0;31m         \u001b[0;32mreturn\u001b[0m \u001b[0msuper\u001b[0m\u001b[0;34m(\u001b[0m\u001b[0mNeuralNetClassifier\u001b[0m\u001b[0;34m,\u001b[0m \u001b[0mself\u001b[0m\u001b[0;34m)\u001b[0m\u001b[0;34m.\u001b[0m\u001b[0mfit\u001b[0m\u001b[0;34m(\u001b[0m\u001b[0mX\u001b[0m\u001b[0;34m,\u001b[0m \u001b[0my\u001b[0m\u001b[0;34m,\u001b[0m \u001b[0;34m**\u001b[0m\u001b[0mfit_params\u001b[0m\u001b[0;34m)\u001b[0m\u001b[0;34m\u001b[0m\u001b[0;34m\u001b[0m\u001b[0m\n\u001b[0m\u001b[1;32m    150\u001b[0m \u001b[0;34m\u001b[0m\u001b[0m\n\u001b[1;32m    151\u001b[0m     \u001b[0;32mdef\u001b[0m \u001b[0mpredict_proba\u001b[0m\u001b[0;34m(\u001b[0m\u001b[0mself\u001b[0m\u001b[0;34m,\u001b[0m \u001b[0mX\u001b[0m\u001b[0;34m)\u001b[0m\u001b[0;34m:\u001b[0m\u001b[0;34m\u001b[0m\u001b[0;34m\u001b[0m\u001b[0m\n",
-      "\u001b[0;32m~/anaconda3/lib/python3.7/site-packages/skorch/net.py\u001b[0m in \u001b[0;36mfit\u001b[0;34m(self, X, y, **fit_params)\u001b[0m\n\u001b[1;32m    846\u001b[0m             \u001b[0mself\u001b[0m\u001b[0;34m.\u001b[0m\u001b[0minitialize\u001b[0m\u001b[0;34m(\u001b[0m\u001b[0;34m)\u001b[0m\u001b[0;34m\u001b[0m\u001b[0;34m\u001b[0m\u001b[0m\n\u001b[1;32m    847\u001b[0m \u001b[0;34m\u001b[0m\u001b[0m\n\u001b[0;32m--> 848\u001b[0;31m         \u001b[0mself\u001b[0m\u001b[0;34m.\u001b[0m\u001b[0mpartial_fit\u001b[0m\u001b[0;34m(\u001b[0m\u001b[0mX\u001b[0m\u001b[0;34m,\u001b[0m \u001b[0my\u001b[0m\u001b[0;34m,\u001b[0m \u001b[0;34m**\u001b[0m\u001b[0mfit_params\u001b[0m\u001b[0;34m)\u001b[0m\u001b[0;34m\u001b[0m\u001b[0;34m\u001b[0m\u001b[0m\n\u001b[0m\u001b[1;32m    849\u001b[0m         \u001b[0;32mreturn\u001b[0m \u001b[0mself\u001b[0m\u001b[0;34m\u001b[0m\u001b[0;34m\u001b[0m\u001b[0m\n\u001b[1;32m    850\u001b[0m \u001b[0;34m\u001b[0m\u001b[0m\n",
-      "\u001b[0;32m~/anaconda3/lib/python3.7/site-packages/skorch/net.py\u001b[0m in \u001b[0;36mpartial_fit\u001b[0;34m(self, X, y, classes, **fit_params)\u001b[0m\n\u001b[1;32m    805\u001b[0m         \u001b[0mself\u001b[0m\u001b[0;34m.\u001b[0m\u001b[0mnotify\u001b[0m\u001b[0;34m(\u001b[0m\u001b[0;34m'on_train_begin'\u001b[0m\u001b[0;34m,\u001b[0m \u001b[0mX\u001b[0m\u001b[0;34m=\u001b[0m\u001b[0mX\u001b[0m\u001b[0;34m,\u001b[0m \u001b[0my\u001b[0m\u001b[0;34m=\u001b[0m\u001b[0my\u001b[0m\u001b[0;34m)\u001b[0m\u001b[0;34m\u001b[0m\u001b[0;34m\u001b[0m\u001b[0m\n\u001b[1;32m    806\u001b[0m         \u001b[0;32mtry\u001b[0m\u001b[0;34m:\u001b[0m\u001b[0;34m\u001b[0m\u001b[0;34m\u001b[0m\u001b[0m\n\u001b[0;32m--> 807\u001b[0;31m             \u001b[0mself\u001b[0m\u001b[0;34m.\u001b[0m\u001b[0mfit_loop\u001b[0m\u001b[0;34m(\u001b[0m\u001b[0mX\u001b[0m\u001b[0;34m,\u001b[0m \u001b[0my\u001b[0m\u001b[0;34m,\u001b[0m \u001b[0;34m**\u001b[0m\u001b[0mfit_params\u001b[0m\u001b[0;34m)\u001b[0m\u001b[0;34m\u001b[0m\u001b[0;34m\u001b[0m\u001b[0m\n\u001b[0m\u001b[1;32m    808\u001b[0m         \u001b[0;32mexcept\u001b[0m \u001b[0mKeyboardInterrupt\u001b[0m\u001b[0;34m:\u001b[0m\u001b[0;34m\u001b[0m\u001b[0;34m\u001b[0m\u001b[0m\n\u001b[1;32m    809\u001b[0m             \u001b[0;32mpass\u001b[0m\u001b[0;34m\u001b[0m\u001b[0;34m\u001b[0m\u001b[0m\n",
-      "\u001b[0;32m~/anaconda3/lib/python3.7/site-packages/skorch/net.py\u001b[0m in \u001b[0;36mfit_loop\u001b[0;34m(self, X, y, epochs, **fit_params)\u001b[0m\n\u001b[1;32m    760\u001b[0m             \u001b[0mself\u001b[0m\u001b[0;34m.\u001b[0m\u001b[0mhistory\u001b[0m\u001b[0;34m.\u001b[0m\u001b[0mrecord\u001b[0m\u001b[0;34m(\u001b[0m\u001b[0;34m\"valid_batch_count\"\u001b[0m\u001b[0;34m,\u001b[0m \u001b[0mvalid_batch_count\u001b[0m\u001b[0;34m)\u001b[0m\u001b[0;34m\u001b[0m\u001b[0;34m\u001b[0m\u001b[0m\n\u001b[1;32m    761\u001b[0m \u001b[0;34m\u001b[0m\u001b[0m\n\u001b[0;32m--> 762\u001b[0;31m             \u001b[0mself\u001b[0m\u001b[0;34m.\u001b[0m\u001b[0mnotify\u001b[0m\u001b[0;34m(\u001b[0m\u001b[0;34m'on_epoch_end'\u001b[0m\u001b[0;34m,\u001b[0m \u001b[0;34m**\u001b[0m\u001b[0mon_epoch_kwargs\u001b[0m\u001b[0;34m)\u001b[0m\u001b[0;34m\u001b[0m\u001b[0;34m\u001b[0m\u001b[0m\n\u001b[0m\u001b[1;32m    763\u001b[0m         \u001b[0;32mreturn\u001b[0m \u001b[0mself\u001b[0m\u001b[0;34m\u001b[0m\u001b[0;34m\u001b[0m\u001b[0m\n\u001b[1;32m    764\u001b[0m \u001b[0;34m\u001b[0m\u001b[0m\n",
-      "\u001b[0;32m~/anaconda3/lib/python3.7/site-packages/skorch/net.py\u001b[0m in \u001b[0;36mnotify\u001b[0;34m(self, method_name, **cb_kwargs)\u001b[0m\n\u001b[1;32m    281\u001b[0m         \u001b[0mgetattr\u001b[0m\u001b[0;34m(\u001b[0m\u001b[0mself\u001b[0m\u001b[0;34m,\u001b[0m \u001b[0mmethod_name\u001b[0m\u001b[0;34m)\u001b[0m\u001b[0;34m(\u001b[0m\u001b[0mself\u001b[0m\u001b[0;34m,\u001b[0m \u001b[0;34m**\u001b[0m\u001b[0mcb_kwargs\u001b[0m\u001b[0;34m)\u001b[0m\u001b[0;34m\u001b[0m\u001b[0;34m\u001b[0m\u001b[0m\n\u001b[1;32m    282\u001b[0m         \u001b[0;32mfor\u001b[0m \u001b[0m_\u001b[0m\u001b[0;34m,\u001b[0m \u001b[0mcb\u001b[0m \u001b[0;32min\u001b[0m \u001b[0mself\u001b[0m\u001b[0;34m.\u001b[0m\u001b[0mcallbacks_\u001b[0m\u001b[0;34m:\u001b[0m\u001b[0;34m\u001b[0m\u001b[0;34m\u001b[0m\u001b[0m\n\u001b[0;32m--> 283\u001b[0;31m             \u001b[0mgetattr\u001b[0m\u001b[0;34m(\u001b[0m\u001b[0mcb\u001b[0m\u001b[0;34m,\u001b[0m \u001b[0mmethod_name\u001b[0m\u001b[0;34m)\u001b[0m\u001b[0;34m(\u001b[0m\u001b[0mself\u001b[0m\u001b[0;34m,\u001b[0m \u001b[0;34m**\u001b[0m\u001b[0mcb_kwargs\u001b[0m\u001b[0;34m)\u001b[0m\u001b[0;34m\u001b[0m\u001b[0;34m\u001b[0m\u001b[0m\n\u001b[0m\u001b[1;32m    284\u001b[0m \u001b[0;34m\u001b[0m\u001b[0m\n\u001b[1;32m    285\u001b[0m     \u001b[0;31m# pylint: disable=unused-argument\u001b[0m\u001b[0;34m\u001b[0m\u001b[0;34m\u001b[0m\u001b[0;34m\u001b[0m\u001b[0m\n",
-      "\u001b[0;32m~/anaconda3/lib/python3.7/site-packages/skorch/callbacks/scoring.py\u001b[0m in \u001b[0;36mon_epoch_end\u001b[0;34m(self, net, dataset_train, dataset_valid, **kwargs)\u001b[0m\n\u001b[1;32m    410\u001b[0m \u001b[0;34m\u001b[0m\u001b[0m\n\u001b[1;32m    411\u001b[0m         \u001b[0;32mwith\u001b[0m \u001b[0mcache_net_infer\u001b[0m\u001b[0;34m(\u001b[0m\u001b[0mnet\u001b[0m\u001b[0;34m,\u001b[0m \u001b[0mself\u001b[0m\u001b[0;34m.\u001b[0m\u001b[0muse_caching\u001b[0m\u001b[0;34m,\u001b[0m \u001b[0my_pred\u001b[0m\u001b[0;34m)\u001b[0m \u001b[0;32mas\u001b[0m \u001b[0mcached_net\u001b[0m\u001b[0;34m:\u001b[0m\u001b[0;34m\u001b[0m\u001b[0;34m\u001b[0m\u001b[0m\n\u001b[0;32m--> 412\u001b[0;31m             \u001b[0mcurrent_score\u001b[0m \u001b[0;34m=\u001b[0m \u001b[0mself\u001b[0m\u001b[0;34m.\u001b[0m\u001b[0m_scoring\u001b[0m\u001b[0;34m(\u001b[0m\u001b[0mcached_net\u001b[0m\u001b[0;34m,\u001b[0m \u001b[0mX_test\u001b[0m\u001b[0;34m,\u001b[0m \u001b[0my_test\u001b[0m\u001b[0;34m)\u001b[0m\u001b[0;34m\u001b[0m\u001b[0;34m\u001b[0m\u001b[0m\n\u001b[0m\u001b[1;32m    413\u001b[0m \u001b[0;34m\u001b[0m\u001b[0m\n\u001b[1;32m    414\u001b[0m         \u001b[0mself\u001b[0m\u001b[0;34m.\u001b[0m\u001b[0m_record_score\u001b[0m\u001b[0;34m(\u001b[0m\u001b[0mnet\u001b[0m\u001b[0;34m.\u001b[0m\u001b[0mhistory\u001b[0m\u001b[0;34m,\u001b[0m \u001b[0mcurrent_score\u001b[0m\u001b[0;34m)\u001b[0m\u001b[0;34m\u001b[0m\u001b[0;34m\u001b[0m\u001b[0m\n",
-      "\u001b[0;32m~/anaconda3/lib/python3.7/site-packages/skorch/callbacks/scoring.py\u001b[0m in \u001b[0;36m_scoring\u001b[0;34m(self, net, X_test, y_test)\u001b[0m\n\u001b[1;32m    119\u001b[0m         instead of running inference again, if available.\"\"\"\n\u001b[1;32m    120\u001b[0m         \u001b[0mscorer\u001b[0m \u001b[0;34m=\u001b[0m \u001b[0mcheck_scoring\u001b[0m\u001b[0;34m(\u001b[0m\u001b[0mnet\u001b[0m\u001b[0;34m,\u001b[0m \u001b[0mself\u001b[0m\u001b[0;34m.\u001b[0m\u001b[0mscoring_\u001b[0m\u001b[0;34m)\u001b[0m\u001b[0;34m\u001b[0m\u001b[0;34m\u001b[0m\u001b[0m\n\u001b[0;32m--> 121\u001b[0;31m         \u001b[0;32mreturn\u001b[0m \u001b[0mscorer\u001b[0m\u001b[0;34m(\u001b[0m\u001b[0mnet\u001b[0m\u001b[0;34m,\u001b[0m \u001b[0mX_test\u001b[0m\u001b[0;34m,\u001b[0m \u001b[0my_test\u001b[0m\u001b[0;34m)\u001b[0m\u001b[0;34m\u001b[0m\u001b[0;34m\u001b[0m\u001b[0m\n\u001b[0m\u001b[1;32m    122\u001b[0m \u001b[0;34m\u001b[0m\u001b[0m\n\u001b[1;32m    123\u001b[0m     \u001b[0;32mdef\u001b[0m \u001b[0m_is_best_score\u001b[0m\u001b[0;34m(\u001b[0m\u001b[0mself\u001b[0m\u001b[0;34m,\u001b[0m \u001b[0mcurrent_score\u001b[0m\u001b[0;34m)\u001b[0m\u001b[0;34m:\u001b[0m\u001b[0;34m\u001b[0m\u001b[0;34m\u001b[0m\u001b[0m\n",
-      "\u001b[0;32m~/anaconda3/lib/python3.7/site-packages/sklearn/metrics/_scorer.py\u001b[0m in \u001b[0;36m__call__\u001b[0;34m(self, estimator, X, y_true, sample_weight)\u001b[0m\n\u001b[1;32m    167\u001b[0m                           stacklevel=2)\n\u001b[1;32m    168\u001b[0m         return self._score(partial(_cached_call, None), estimator, X, y_true,\n\u001b[0;32m--> 169\u001b[0;31m                            sample_weight=sample_weight)\n\u001b[0m\u001b[1;32m    170\u001b[0m \u001b[0;34m\u001b[0m\u001b[0m\n\u001b[1;32m    171\u001b[0m     \u001b[0;32mdef\u001b[0m \u001b[0m_factory_args\u001b[0m\u001b[0;34m(\u001b[0m\u001b[0mself\u001b[0m\u001b[0;34m)\u001b[0m\u001b[0;34m:\u001b[0m\u001b[0;34m\u001b[0m\u001b[0;34m\u001b[0m\u001b[0m\n",
-      "\u001b[0;32m~/anaconda3/lib/python3.7/site-packages/sklearn/metrics/_scorer.py\u001b[0m in \u001b[0;36m_score\u001b[0;34m(self, method_caller, estimator, X, y_true, sample_weight)\u001b[0m\n\u001b[1;32m    210\u001b[0m         \u001b[0;32melse\u001b[0m\u001b[0;34m:\u001b[0m\u001b[0;34m\u001b[0m\u001b[0;34m\u001b[0m\u001b[0m\n\u001b[1;32m    211\u001b[0m             return self._sign * self._score_func(y_true, y_pred,\n\u001b[0;32m--> 212\u001b[0;31m                                                  **self._kwargs)\n\u001b[0m\u001b[1;32m    213\u001b[0m \u001b[0;34m\u001b[0m\u001b[0m\n\u001b[1;32m    214\u001b[0m \u001b[0;34m\u001b[0m\u001b[0m\n",
-      "\u001b[0;32m~/anaconda3/lib/python3.7/site-packages/sklearn/metrics/_classification.py\u001b[0m in \u001b[0;36maccuracy_score\u001b[0;34m(y_true, y_pred, normalize, sample_weight)\u001b[0m\n\u001b[1;32m    183\u001b[0m \u001b[0;34m\u001b[0m\u001b[0m\n\u001b[1;32m    184\u001b[0m     \u001b[0;31m# Compute accuracy for each possible representation\u001b[0m\u001b[0;34m\u001b[0m\u001b[0;34m\u001b[0m\u001b[0;34m\u001b[0m\u001b[0m\n\u001b[0;32m--> 185\u001b[0;31m     \u001b[0my_type\u001b[0m\u001b[0;34m,\u001b[0m \u001b[0my_true\u001b[0m\u001b[0;34m,\u001b[0m \u001b[0my_pred\u001b[0m \u001b[0;34m=\u001b[0m \u001b[0m_check_targets\u001b[0m\u001b[0;34m(\u001b[0m\u001b[0my_true\u001b[0m\u001b[0;34m,\u001b[0m \u001b[0my_pred\u001b[0m\u001b[0;34m)\u001b[0m\u001b[0;34m\u001b[0m\u001b[0;34m\u001b[0m\u001b[0m\n\u001b[0m\u001b[1;32m    186\u001b[0m     \u001b[0mcheck_consistent_length\u001b[0m\u001b[0;34m(\u001b[0m\u001b[0my_true\u001b[0m\u001b[0;34m,\u001b[0m \u001b[0my_pred\u001b[0m\u001b[0;34m,\u001b[0m \u001b[0msample_weight\u001b[0m\u001b[0;34m)\u001b[0m\u001b[0;34m\u001b[0m\u001b[0;34m\u001b[0m\u001b[0m\n\u001b[1;32m    187\u001b[0m     \u001b[0;32mif\u001b[0m \u001b[0my_type\u001b[0m\u001b[0;34m.\u001b[0m\u001b[0mstartswith\u001b[0m\u001b[0;34m(\u001b[0m\u001b[0;34m'multilabel'\u001b[0m\u001b[0;34m)\u001b[0m\u001b[0;34m:\u001b[0m\u001b[0;34m\u001b[0m\u001b[0;34m\u001b[0m\u001b[0m\n",
-      "\u001b[0;32m~/anaconda3/lib/python3.7/site-packages/sklearn/metrics/_classification.py\u001b[0m in \u001b[0;36m_check_targets\u001b[0;34m(y_true, y_pred)\u001b[0m\n\u001b[1;32m     88\u001b[0m     \u001b[0;32mif\u001b[0m \u001b[0mlen\u001b[0m\u001b[0;34m(\u001b[0m\u001b[0my_type\u001b[0m\u001b[0;34m)\u001b[0m \u001b[0;34m>\u001b[0m \u001b[0;36m1\u001b[0m\u001b[0;34m:\u001b[0m\u001b[0;34m\u001b[0m\u001b[0;34m\u001b[0m\u001b[0m\n\u001b[1;32m     89\u001b[0m         raise ValueError(\"Classification metrics can't handle a mix of {0} \"\n\u001b[0;32m---> 90\u001b[0;31m                          \"and {1} targets\".format(type_true, type_pred))\n\u001b[0m\u001b[1;32m     91\u001b[0m \u001b[0;34m\u001b[0m\u001b[0m\n\u001b[1;32m     92\u001b[0m     \u001b[0;31m# We can't have more than one value on y_type => The set is no more needed\u001b[0m\u001b[0;34m\u001b[0m\u001b[0;34m\u001b[0m\u001b[0;34m\u001b[0m\u001b[0m\n",
-      "\u001b[0;31mValueError\u001b[0m: Classification metrics can't handle a mix of multilabel-indicator and binary targets"
-     ]
-    }
-   ],
-   "source": [
-    "do_prepare_data = True\n",
-    "do_train_model = True\n",
-    "kfold_nsplits = 5\n",
-    "# similaritiesToRun = df_paperIndividualScores['Similarity']\n",
-    "similaritiesToRun = [\"sideeffect\"]\n",
-    "\n",
-    "for similarity in similaritiesToRun:\n",
-    "    input_fea = pathInput+DS1_path+\"/\" + similarity + \"_Jacarrd_sim.csv\"\n",
-    "    input_lab = pathInput+DS1_path+\"/drug_drug_matrix.csv\"\n",
-    "    dataPicklePath = pathPickles+\"data_X_y_\" + similarity + \"_Jaccard.p\"\n",
-    "\n",
-    "    # Define model\n",
-    "    D_in, H1, H2, D_out, drop = X.shape[1], 300, 400, 2, 0.5\n",
-    "    str_hidden_layers_params = \"-H1-\" + str(H1) + \"-H2-\" + str(H2)\n",
-    "    callbacks = []\n",
-    "    \n",
-    "    # Prepare data if not available\n",
-    "    if do_prepare_data:\n",
-    "        print(\"Preparing \" + similarity + \" data...\")\n",
-    "        X,y = prepare_data(input_fea, input_lab, seperate = False)\n",
-    "\n",
-    "        with open(dataPicklePath, 'wb') as f:\n",
-    "            pickle.dump([X, y], f)\n",
-    "\n",
-    "    # Load X,y and split in to train, test\n",
-    "    with open(dataPicklePath, 'rb') as f:\n",
-    "        X, y = pickle.load(f)\n",
-    "    \n",
-    "    X = X.astype(np.float32)\n",
-    "    y = y.astype(np.int64)    \n",
-    "    \n",
-    "    y_cat = np_utils.to_categorical(y)\n",
-    "    \n",
-    "    AUROC, AUPR, F1, Rec, Prec = 0,0,0,0,0\n",
-    "    kFoldSplit = getStratifiedKFoldSplit(X,y,n_splits=kfold_nsplits)\n",
-    "    for i, indices in enumerate(kFoldSplit):\n",
-    "        print(\"Running fold\" + str(i) + \" for \" + similarity +\"...\")\n",
-    "        \n",
-    "        train_index = indices[0]\n",
-    "        test_index = indices[1]\n",
-    "        X_train, X_test = X[train_index], X[test_index]\n",
-    "#         y_train, y_test = y[train_index], y[test_index]\n",
-    "        y_train, y_test = y_cat[train_index], y_cat[test_index]\n",
-    "    \n",
-    "        # Create Network Classifier\n",
-    "        Xy_test = skorch.dataset.Dataset(X_test, y_test)\n",
-    "        net = getNDDClassifier(D_in, H1, H2, D_out, drop, Xy_test)\n",
-    "        \n",
-    "        # Fit and save OR load model\n",
-    "        modelPicklePath = pathPickles+\"model_params/model_params_fold\" + str(i) + \"_\" + str_hidden_layers_params+ \"_\" + similarity + \".p\"\n",
-    "        if do_train_model:\n",
-    "            net.fit(X_train, y_train)\n",
-    "            net.save_params(f_params=modelPicklePath)\n",
-    "        else:\n",
-    "            net.initialize()  # This is important!\n",
-    "            net.load_params(f_params=modelPicklePath)\n",
-    "\n",
-    "        # Make predictions\n",
-    "        y_pred = net.predict(X_test)\n",
-    "        lr_probs = soft(net.forward(X_test))[:,1]\n",
-    "        lr_precision, lr_recall, _ = precision_recall_curve(y_test, lr_probs)\n",
-    "\n",
-    "        AUROC += roc_auc_score(y_test, y_pred)\n",
-    "        AUPR += auc(lr_recall, lr_precision)\n",
-    "        F1 += f1_score(y_test, y_pred)\n",
-    "        Rec += recall_score(y_test, y_pred)\n",
-    "        Prec += precision_score(y_test, y_pred)\n",
-    "        \n",
-    "        print(i, similarity, AUROC, AUPR, F1, Rec, Prec)\n",
-    "        \n",
-    "    \n",
-    "    AUROC, AUPR, F1, Rec, Prec = avgMetrics(AUROC, AUPR, F1, Rec, Prec, kfold_nsplits)\n",
-    "    print(similarity, AUROC, AUPR, F1, Rec, Prec)\n",
-    "    \n",
-    "    # Fill replicated metrics\n",
-    "    updateSimilarityDF(df_replicatedIndividualScores, similarity, AUROC, AUPR, F1, Rec, Prec)\n",
-    "    \n",
-    "# Write CSV\n",
-    "writeReplicatedIndividualScoresCSV(net, df_replicatedIndividualScores, pathRuns, str_hidden_layers_params)"
-   ]
-  },
-  {
-   "cell_type": "markdown",
-   "metadata": {},
-   "source": [
-    "# Compare to Paper"
-   ]
-  },
-  {
-   "cell_type": "code",
-   "execution_count": null,
-   "metadata": {
-    "scrolled": true
-   },
-   "outputs": [],
-   "source": [
-    "print(df_paperIndividualScores)"
-   ]
-  },
-  {
-   "cell_type": "code",
-   "execution_count": null,
-   "metadata": {},
-   "outputs": [],
-   "source": [
-    "print(df_replicatedIndividualScores)"
-   ]
-  },
-  {
-   "cell_type": "code",
-   "execution_count": null,
-   "metadata": {
-    "scrolled": false
-   },
-   "outputs": [],
-   "source": [
-    "diff_metrics = ['AUC', 'AUPR', 'F-measure', 'Recall', 'Precision']\n",
-    "df_diff = df_paperIndividualScores[diff_metrics] - df_replicatedIndividualScores[diff_metrics]\n",
-    "df_diff_abs = df_diff.abs()\n",
-    "df_diff_percent = (df_diff_abs / df_paperIndividualScores[diff_metrics]) * 100"
-   ]
-  },
-  {
-   "cell_type": "code",
-   "execution_count": null,
-   "metadata": {},
-   "outputs": [],
-   "source": [
-    "df_diff"
-   ]
-  },
-  {
-   "cell_type": "code",
-   "execution_count": null,
-   "metadata": {},
-   "outputs": [],
-   "source": [
-    "from seaborn import heatmap\n",
-    "heatmap(df_diff, yticklabels=df_paperIndividualScores[\"Similarity\"])"
-   ]
-  },
-  {
-   "cell_type": "code",
-   "execution_count": null,
-   "metadata": {},
-   "outputs": [],
-   "source": [
-    "heatmap(df_diff_abs, yticklabels=df_paperIndividualScores[\"Similarity\"])"
-   ]
-  },
-  {
-   "cell_type": "code",
-   "execution_count": null,
-   "metadata": {},
-   "outputs": [],
-   "source": [
-    "heatmap(df_diff_percent, yticklabels=df_paperIndividualScores[\"Similarity\"])"
-   ]
-  },
-  {
-   "cell_type": "code",
-   "execution_count": null,
-   "metadata": {},
-   "outputs": [],
-   "source": [
-    "from sklearn.metrics import mean_squared_error\n",
-    "mean_squared_error(df_paperIndividualScores[diff_metrics],\n",
-    "                   df_replicatedIndividualScores[diff_metrics])"
-   ]
-  },
-  {
-   "cell_type": "code",
-   "execution_count": null,
-   "metadata": {},
-   "outputs": [],
-   "source": []
-  }
- ],
- "metadata": {
-  "kernelspec": {
-   "display_name": "Python 3",
-   "language": "python",
-   "name": "python3"
-  },
-  "language_info": {
-   "codemirror_mode": {
-    "name": "ipython",
-    "version": 3
-   },
-   "file_extension": ".py",
-   "mimetype": "text/x-python",
-   "name": "python",
-   "nbconvert_exporter": "python",
-   "pygments_lexer": "ipython3",
-   "version": "3.7.3"
-  }
- },
- "nbformat": 4,
- "nbformat_minor": 2
-}
diff --git a/notebooks/02_AA_Skorch_DDI.ipynb b/notebooks/02_AA_Skorch_DDI.ipynb
deleted file mode 100644
index 111f7ab..0000000
--- a/notebooks/02_AA_Skorch_DDI.ipynb
+++ /dev/null
@@ -1,581 +0,0 @@
-{
- "cells": [
-  {
-   "cell_type": "markdown",
-   "metadata": {},
-   "source": [
-    "![](https://scikit-learn.org/stable/_images/grid_search_workflow.png)"
-   ]
-  },
-  {
-   "cell_type": "code",
-   "execution_count": 1230,
-   "metadata": {},
-   "outputs": [],
-   "source": [
-    "import warnings\n",
-    "warnings.filterwarnings('ignore')"
-   ]
-  },
-  {
-   "cell_type": "code",
-   "execution_count": 1231,
-   "metadata": {},
-   "outputs": [],
-   "source": [
-    "import numpy as np\n",
-    "import pandas as pd\n",
-    "\n",
-    "import pickle\n",
-    "\n",
-    "from sklearn.datasets import make_classification\n",
-    "from sklearn.pipeline import Pipeline\n",
-    "from sklearn.preprocessing import LabelEncoder\n",
-    "from sklearn.model_selection import GridSearchCV\n",
-    "from sklearn.model_selection import train_test_split\n",
-    "from sklearn.model_selection import StratifiedKFold\n",
-    "from sklearn.metrics import accuracy_score, f1_score, roc_auc_score, precision_score, recall_score, matthews_corrcoef, precision_recall_curve, auc\n",
-    "\n",
-    "from keras.utils import np_utils\n",
-    "\n",
-    "import torch\n",
-    "from torch import nn\n",
-    "import torch.nn.functional as F\n",
-    "from torch.utils.data import TensorDataset\n",
-    "from torch.utils.data import Dataset\n",
-    "from torch.utils.data import DataLoader\n",
-    "from torch.utils.tensorboard import SummaryWriter\n",
-    "from torch.optim import SGD\n",
-    "\n",
-    "import skorch\n",
-    "from skorch import NeuralNetClassifier\n",
-    "from skorch.callbacks import EpochScoring\n",
-    "from skorch.callbacks import TensorBoard\n",
-    "from skorch.helper import predefined_split"
-   ]
-  },
-  {
-   "cell_type": "code",
-   "execution_count": 1232,
-   "metadata": {},
-   "outputs": [],
-   "source": [
-    "# import configurations (file paths, etc.)\n",
-    "import yaml\n",
-    "try:\n",
-    "    from yaml import CLoader as Loader, CDumper as Dumper\n",
-    "except ImportError:\n",
-    "    from yaml import Loader, Dumper\n",
-    "    \n",
-    "configFile = '../cluster/data/medinfmk/ddi/config/config.yml'\n",
-    "\n",
-    "with open(configFile, 'r') as ymlfile:\n",
-    "    cfg = yaml.load(ymlfile, Loader=Loader)"
-   ]
-  },
-  {
-   "cell_type": "code",
-   "execution_count": 1233,
-   "metadata": {},
-   "outputs": [],
-   "source": [
-    "pathInput = cfg['filePaths']['dirRaw']\n",
-    "pathOutput = cfg['filePaths']['dirProcessed']\n",
-    "# path to store python binary files (pickles)\n",
-    "# in order not to recalculate them every time\n",
-    "pathPickles = cfg['filePaths']['dirProcessedFiles']['dirPickles']\n",
-    "pathRuns = cfg['filePaths']['dirProcessedFiles']['dirRuns']\n",
-    "pathPaperScores = cfg['filePaths']['dirRawFiles']['paper-individual-metrics-scores']\n",
-    "datasetDirs = cfg['filePaths']['dirRawDatasets']\n",
-    "DS1_path = str(datasetDirs[0])"
-   ]
-  },
-  {
-   "cell_type": "markdown",
-   "metadata": {},
-   "source": [
-    "# Helper Functions"
-   ]
-  },
-  {
-   "cell_type": "code",
-   "execution_count": 1234,
-   "metadata": {},
-   "outputs": [],
-   "source": [
-    "def prepare_data(input_fea, input_lab, seperate=False):\n",
-    "    offside_sim_path = input_fea\n",
-    "    drug_interaction_matrix_path = input_lab\n",
-    "    drug_fea = np.loadtxt(offside_sim_path,dtype=float,delimiter=\",\")\n",
-    "    interaction = np.loadtxt(drug_interaction_matrix_path,dtype=int,delimiter=\",\")\n",
-    "    \n",
-    "    train = []\n",
-    "    label = []\n",
-    "    tmp_fea=[]\n",
-    "    drug_fea_tmp = []\n",
-    "            \n",
-    "    for i in range(0, (interaction.shape[0]-1)):\n",
-    "        for j in range((i+1), interaction.shape[1]):\n",
-    "            label.append(interaction[i,j])\n",
-    "            drug_fea_tmp_1 = list(drug_fea[i])\n",
-    "            drug_fea_tmp_2 = list(drug_fea[j])\n",
-    "            if seperate:\n",
-    "                 tmp_fea = (drug_fea_tmp_1,drug_fea_tmp_2)\n",
-    "            else:\n",
-    "                 tmp_fea = drug_fea_tmp_1 + drug_fea_tmp_2\n",
-    "            train.append(tmp_fea)\n",
-    "\n",
-    "    return np.array(train), np.array(label)"
-   ]
-  },
-  {
-   "cell_type": "code",
-   "execution_count": 1235,
-   "metadata": {},
-   "outputs": [],
-   "source": [
-    "def transfer_array_format(data):\n",
-    "    formated_matrix1 = []\n",
-    "    formated_matrix2 = []\n",
-    "    for val in data:\n",
-    "        formated_matrix1.append(val[0])\n",
-    "        formated_matrix2.append(val[1])\n",
-    "    return np.array(formated_matrix1), np.array(formated_matrix2)"
-   ]
-  },
-  {
-   "cell_type": "code",
-   "execution_count": 1236,
-   "metadata": {},
-   "outputs": [],
-   "source": [
-    "def preprocess_labels(labels, encoder=None, categorical=True):\n",
-    "    if not encoder:\n",
-    "        encoder = LabelEncoder()\n",
-    "        encoder.fit(labels)\n",
-    "        y = encoder.transform(labels).astype(np.int32)\n",
-    "    if categorical:\n",
-    "        y = np_utils.to_categorical(y)\n",
-    "#         print(y)\n",
-    "    return y, encoder"
-   ]
-  },
-  {
-   "cell_type": "code",
-   "execution_count": 1237,
-   "metadata": {},
-   "outputs": [],
-   "source": [
-    "def preprocess_names(labels, encoder=None, categorical=True):\n",
-    "    if not encoder:\n",
-    "        encoder = LabelEncoder()\n",
-    "        encoder.fit(labels)\n",
-    "    if categorical:\n",
-    "        labels = np_utils.to_categorical(labels)\n",
-    "    return labels, encoder"
-   ]
-  },
-  {
-   "cell_type": "code",
-   "execution_count": 1238,
-   "metadata": {},
-   "outputs": [],
-   "source": [
-    "def getStratifiedKFoldSplit(X,y,n_splits):\n",
-    "    skf = StratifiedKFold(n_splits=n_splits)\n",
-    "    return skf.split(X,y)"
-   ]
-  },
-  {
-   "cell_type": "code",
-   "execution_count": 1239,
-   "metadata": {},
-   "outputs": [],
-   "source": [
-    "class NDD(nn.Module):\n",
-    "    def __init__(self, D_in=1096, H1=300, H2=400, D_out=2, drop=0.5):\n",
-    "        super(NDD, self).__init__()\n",
-    "        # an affine operation: y = Wx + b\n",
-    "        self.fc1 = nn.Linear(D_in, H1) # Fully Connected\n",
-    "        self.fc2 = nn.Linear(H1, H2)\n",
-    "        self.fc3 = nn.Linear(H2, D_out)\n",
-    "        self.drop = nn.Dropout(drop)\n",
-    "        self._init_weights()\n",
-    "\n",
-    "    def forward(self, x):\n",
-    "        x = F.relu(self.fc1(x))\n",
-    "        x = self.drop(x)\n",
-    "        x = F.relu(self.fc2(x))\n",
-    "        x = self.drop(x)\n",
-    "        x = self.fc3(x)\n",
-    "        return x\n",
-    "    \n",
-    "    def _init_weights(self):\n",
-    "        for m in self.modules():\n",
-    "            if(isinstance(m, nn.Linear)):\n",
-    "                m.weight.data.normal_(0, 0.05)\n",
-    "                m.bias.data.uniform_(-1,0)"
-   ]
-  },
-  {
-   "cell_type": "code",
-   "execution_count": 1240,
-   "metadata": {},
-   "outputs": [],
-   "source": [
-    "def updateSimilarityDFSingleMetric(df, sim_type, metric, value):\n",
-    "    df.loc[df['Similarity'] == sim_type, metric ] = round(value,3)\n",
-    "    return df"
-   ]
-  },
-  {
-   "cell_type": "code",
-   "execution_count": 1241,
-   "metadata": {},
-   "outputs": [],
-   "source": [
-    "def updateSimilarityDF(df, sim_type, AUROC, AUPR, F1, Rec, Prec):\n",
-    "    df = updateSimilarityDFSingleMetric(df, sim_type, 'AUC', AUROC)\n",
-    "    df = updateSimilarityDFSingleMetric(df, sim_type, 'AUPR', AUPR)\n",
-    "    df = updateSimilarityDFSingleMetric(df, sim_type, 'F-measure', F1)\n",
-    "    df = updateSimilarityDFSingleMetric(df, sim_type, 'Recall', Rec)\n",
-    "    df = updateSimilarityDFSingleMetric(df, sim_type, 'Precision', Prec)\n",
-    "    return df"
-   ]
-  },
-  {
-   "cell_type": "code",
-   "execution_count": 1242,
-   "metadata": {},
-   "outputs": [],
-   "source": [
-    "def getNetParamsStr(net, str_hidden_layers_params, net_params_to_print=[\"max_epochs\", \"batch_size\"]):\n",
-    "    net_params = [val for sublist in [[x,net.get_params()[x]] for x in net_params_to_print] for val in sublist]\n",
-    "    net_params_str = '-'.join(map(str, flattened))\n",
-    "    return(net_params_str+str_hidden_layers_params)"
-   ]
-  },
-  {
-   "cell_type": "code",
-   "execution_count": 1243,
-   "metadata": {},
-   "outputs": [],
-   "source": [
-    "def writeReplicatedIndividualScoresCSV(net, df, destination, str_hidden_layers_params):\n",
-    "    filePath = destination + \"replicatedIndividualScores_\" + getNetParamsStr(net, str_hidden_layers_params) + \".csv\"\n",
-    "    df.to_csv(path_or_buf = filePath, index=False)"
-   ]
-  },
-  {
-   "cell_type": "code",
-   "execution_count": 1244,
-   "metadata": {},
-   "outputs": [],
-   "source": [
-    "def getNDDClassifier(D_in, H1, H2, D_out, drop, Xy_test):\n",
-    "    model = NDD(D_in, H1, H2, D_out, drop)\n",
-    "    \n",
-    "    net = NeuralNetClassifier(\n",
-    "        model,\n",
-    "#         criterion=nn.CrossEntropyLoss,\n",
-    "        criterion=nn.BCEWithLogitsLoss,\n",
-    "        max_epochs=20,\n",
-    "        optimizer=SGD,\n",
-    "        optimizer__lr=0.01,\n",
-    "        optimizer__momentum=0.9,    \n",
-    "        optimizer__weight_decay=1e-6,    \n",
-    "        optimizer__nesterov=True,    \n",
-    "        batch_size=200,\n",
-    "        callbacks=callbacks,\n",
-    "        # Shuffle training data on each epoch\n",
-    "        iterator_train__shuffle=True,\n",
-    "        device=device,\n",
-    "        train_split=predefined_split(Xy_test),\n",
-    "    )\n",
-    "    return net"
-   ]
-  },
-  {
-   "cell_type": "code",
-   "execution_count": 1245,
-   "metadata": {},
-   "outputs": [],
-   "source": [
-    "def avgMetrics(AUROC, AUPR, F1, Rec, Prec, kfold_nsplits):\n",
-    "    AUROC /= kfold_nsplits\n",
-    "    AUPR /= kfold_nsplits\n",
-    "    F1 /= kfold_nsplits\n",
-    "    Rec /= kfold_nsplits\n",
-    "    Prec /= kfold_nsplits\n",
-    "    return AUROC, AUPR, F1, Rec, Prec"
-   ]
-  },
-  {
-   "cell_type": "markdown",
-   "metadata": {},
-   "source": [
-    "# Run"
-   ]
-  },
-  {
-   "cell_type": "code",
-   "execution_count": 1246,
-   "metadata": {},
-   "outputs": [],
-   "source": [
-    "df_paperIndividualScores = pd.read_csv(pathPaperScores)\n",
-    "\n",
-    "df_replicatedIndividualScores = df_paperIndividualScores.copy()\n",
-    "\n",
-    "for col in df_replicatedIndividualScores.columns:\n",
-    "    if col != 'Similarity':\n",
-    "        df_replicatedIndividualScores[col].values[:] = 0"
-   ]
-  },
-  {
-   "cell_type": "code",
-   "execution_count": 1247,
-   "metadata": {},
-   "outputs": [],
-   "source": [
-    "device = torch.device(\"cuda:0\" if torch.cuda.is_available() else \"cpu\")\n",
-    "soft = nn.Softmax(dim=1)"
-   ]
-  },
-  {
-   "cell_type": "code",
-   "execution_count": 1248,
-   "metadata": {},
-   "outputs": [
-    {
-     "name": "stdout",
-     "output_type": "stream",
-     "text": [
-      "Preparing sideeffect data...\n",
-      "Running fold0 for sideeffect...\n"
-     ]
-    },
-    {
-     "ename": "ValueError",
-     "evalue": "Classification metrics can't handle a mix of multilabel-indicator and binary targets",
-     "output_type": "error",
-     "traceback": [
-      "\u001b[0;31m---------------------------------------------------------------------------\u001b[0m",
-      "\u001b[0;31mValueError\u001b[0m                                Traceback (most recent call last)",
-      "\u001b[0;32m<ipython-input-1248-dcf24de0fdf4>\u001b[0m in \u001b[0;36m<module>\u001b[0;34m\u001b[0m\n\u001b[1;32m     50\u001b[0m         \u001b[0mmodelPicklePath\u001b[0m \u001b[0;34m=\u001b[0m \u001b[0mpathPickles\u001b[0m\u001b[0;34m+\u001b[0m\u001b[0;34m\"model_params/model_params_fold\"\u001b[0m \u001b[0;34m+\u001b[0m \u001b[0mstr\u001b[0m\u001b[0;34m(\u001b[0m\u001b[0mi\u001b[0m\u001b[0;34m)\u001b[0m \u001b[0;34m+\u001b[0m \u001b[0;34m\"_\"\u001b[0m \u001b[0;34m+\u001b[0m \u001b[0mstr_hidden_layers_params\u001b[0m\u001b[0;34m+\u001b[0m \u001b[0;34m\"_\"\u001b[0m \u001b[0;34m+\u001b[0m \u001b[0msimilarity\u001b[0m \u001b[0;34m+\u001b[0m \u001b[0;34m\".p\"\u001b[0m\u001b[0;34m\u001b[0m\u001b[0;34m\u001b[0m\u001b[0m\n\u001b[1;32m     51\u001b[0m         \u001b[0;32mif\u001b[0m \u001b[0mdo_train_model\u001b[0m\u001b[0;34m:\u001b[0m\u001b[0;34m\u001b[0m\u001b[0;34m\u001b[0m\u001b[0m\n\u001b[0;32m---> 52\u001b[0;31m             \u001b[0mnet\u001b[0m\u001b[0;34m.\u001b[0m\u001b[0mfit\u001b[0m\u001b[0;34m(\u001b[0m\u001b[0mX_train\u001b[0m\u001b[0;34m,\u001b[0m \u001b[0my_train\u001b[0m\u001b[0;34m)\u001b[0m\u001b[0;34m\u001b[0m\u001b[0;34m\u001b[0m\u001b[0m\n\u001b[0m\u001b[1;32m     53\u001b[0m             \u001b[0mnet\u001b[0m\u001b[0;34m.\u001b[0m\u001b[0msave_params\u001b[0m\u001b[0;34m(\u001b[0m\u001b[0mf_params\u001b[0m\u001b[0;34m=\u001b[0m\u001b[0mmodelPicklePath\u001b[0m\u001b[0;34m)\u001b[0m\u001b[0;34m\u001b[0m\u001b[0;34m\u001b[0m\u001b[0m\n\u001b[1;32m     54\u001b[0m         \u001b[0;32melse\u001b[0m\u001b[0;34m:\u001b[0m\u001b[0;34m\u001b[0m\u001b[0;34m\u001b[0m\u001b[0m\n",
-      "\u001b[0;32m~/anaconda3/lib/python3.7/site-packages/skorch/classifier.py\u001b[0m in \u001b[0;36mfit\u001b[0;34m(self, X, y, **fit_params)\u001b[0m\n\u001b[1;32m    147\u001b[0m         \u001b[0;31m# this is actually a pylint bug:\u001b[0m\u001b[0;34m\u001b[0m\u001b[0;34m\u001b[0m\u001b[0;34m\u001b[0m\u001b[0m\n\u001b[1;32m    148\u001b[0m         \u001b[0;31m# https://github.com/PyCQA/pylint/issues/1085\u001b[0m\u001b[0;34m\u001b[0m\u001b[0;34m\u001b[0m\u001b[0;34m\u001b[0m\u001b[0m\n\u001b[0;32m--> 149\u001b[0;31m         \u001b[0;32mreturn\u001b[0m \u001b[0msuper\u001b[0m\u001b[0;34m(\u001b[0m\u001b[0mNeuralNetClassifier\u001b[0m\u001b[0;34m,\u001b[0m \u001b[0mself\u001b[0m\u001b[0;34m)\u001b[0m\u001b[0;34m.\u001b[0m\u001b[0mfit\u001b[0m\u001b[0;34m(\u001b[0m\u001b[0mX\u001b[0m\u001b[0;34m,\u001b[0m \u001b[0my\u001b[0m\u001b[0;34m,\u001b[0m \u001b[0;34m**\u001b[0m\u001b[0mfit_params\u001b[0m\u001b[0;34m)\u001b[0m\u001b[0;34m\u001b[0m\u001b[0;34m\u001b[0m\u001b[0m\n\u001b[0m\u001b[1;32m    150\u001b[0m \u001b[0;34m\u001b[0m\u001b[0m\n\u001b[1;32m    151\u001b[0m     \u001b[0;32mdef\u001b[0m \u001b[0mpredict_proba\u001b[0m\u001b[0;34m(\u001b[0m\u001b[0mself\u001b[0m\u001b[0;34m,\u001b[0m \u001b[0mX\u001b[0m\u001b[0;34m)\u001b[0m\u001b[0;34m:\u001b[0m\u001b[0;34m\u001b[0m\u001b[0;34m\u001b[0m\u001b[0m\n",
-      "\u001b[0;32m~/anaconda3/lib/python3.7/site-packages/skorch/net.py\u001b[0m in \u001b[0;36mfit\u001b[0;34m(self, X, y, **fit_params)\u001b[0m\n\u001b[1;32m    846\u001b[0m             \u001b[0mself\u001b[0m\u001b[0;34m.\u001b[0m\u001b[0minitialize\u001b[0m\u001b[0;34m(\u001b[0m\u001b[0;34m)\u001b[0m\u001b[0;34m\u001b[0m\u001b[0;34m\u001b[0m\u001b[0m\n\u001b[1;32m    847\u001b[0m \u001b[0;34m\u001b[0m\u001b[0m\n\u001b[0;32m--> 848\u001b[0;31m         \u001b[0mself\u001b[0m\u001b[0;34m.\u001b[0m\u001b[0mpartial_fit\u001b[0m\u001b[0;34m(\u001b[0m\u001b[0mX\u001b[0m\u001b[0;34m,\u001b[0m \u001b[0my\u001b[0m\u001b[0;34m,\u001b[0m \u001b[0;34m**\u001b[0m\u001b[0mfit_params\u001b[0m\u001b[0;34m)\u001b[0m\u001b[0;34m\u001b[0m\u001b[0;34m\u001b[0m\u001b[0m\n\u001b[0m\u001b[1;32m    849\u001b[0m         \u001b[0;32mreturn\u001b[0m \u001b[0mself\u001b[0m\u001b[0;34m\u001b[0m\u001b[0;34m\u001b[0m\u001b[0m\n\u001b[1;32m    850\u001b[0m \u001b[0;34m\u001b[0m\u001b[0m\n",
-      "\u001b[0;32m~/anaconda3/lib/python3.7/site-packages/skorch/net.py\u001b[0m in \u001b[0;36mpartial_fit\u001b[0;34m(self, X, y, classes, **fit_params)\u001b[0m\n\u001b[1;32m    805\u001b[0m         \u001b[0mself\u001b[0m\u001b[0;34m.\u001b[0m\u001b[0mnotify\u001b[0m\u001b[0;34m(\u001b[0m\u001b[0;34m'on_train_begin'\u001b[0m\u001b[0;34m,\u001b[0m \u001b[0mX\u001b[0m\u001b[0;34m=\u001b[0m\u001b[0mX\u001b[0m\u001b[0;34m,\u001b[0m \u001b[0my\u001b[0m\u001b[0;34m=\u001b[0m\u001b[0my\u001b[0m\u001b[0;34m)\u001b[0m\u001b[0;34m\u001b[0m\u001b[0;34m\u001b[0m\u001b[0m\n\u001b[1;32m    806\u001b[0m         \u001b[0;32mtry\u001b[0m\u001b[0;34m:\u001b[0m\u001b[0;34m\u001b[0m\u001b[0;34m\u001b[0m\u001b[0m\n\u001b[0;32m--> 807\u001b[0;31m             \u001b[0mself\u001b[0m\u001b[0;34m.\u001b[0m\u001b[0mfit_loop\u001b[0m\u001b[0;34m(\u001b[0m\u001b[0mX\u001b[0m\u001b[0;34m,\u001b[0m \u001b[0my\u001b[0m\u001b[0;34m,\u001b[0m \u001b[0;34m**\u001b[0m\u001b[0mfit_params\u001b[0m\u001b[0;34m)\u001b[0m\u001b[0;34m\u001b[0m\u001b[0;34m\u001b[0m\u001b[0m\n\u001b[0m\u001b[1;32m    808\u001b[0m         \u001b[0;32mexcept\u001b[0m \u001b[0mKeyboardInterrupt\u001b[0m\u001b[0;34m:\u001b[0m\u001b[0;34m\u001b[0m\u001b[0;34m\u001b[0m\u001b[0m\n\u001b[1;32m    809\u001b[0m             \u001b[0;32mpass\u001b[0m\u001b[0;34m\u001b[0m\u001b[0;34m\u001b[0m\u001b[0m\n",
-      "\u001b[0;32m~/anaconda3/lib/python3.7/site-packages/skorch/net.py\u001b[0m in \u001b[0;36mfit_loop\u001b[0;34m(self, X, y, epochs, **fit_params)\u001b[0m\n\u001b[1;32m    760\u001b[0m             \u001b[0mself\u001b[0m\u001b[0;34m.\u001b[0m\u001b[0mhistory\u001b[0m\u001b[0;34m.\u001b[0m\u001b[0mrecord\u001b[0m\u001b[0;34m(\u001b[0m\u001b[0;34m\"valid_batch_count\"\u001b[0m\u001b[0;34m,\u001b[0m \u001b[0mvalid_batch_count\u001b[0m\u001b[0;34m)\u001b[0m\u001b[0;34m\u001b[0m\u001b[0;34m\u001b[0m\u001b[0m\n\u001b[1;32m    761\u001b[0m \u001b[0;34m\u001b[0m\u001b[0m\n\u001b[0;32m--> 762\u001b[0;31m             \u001b[0mself\u001b[0m\u001b[0;34m.\u001b[0m\u001b[0mnotify\u001b[0m\u001b[0;34m(\u001b[0m\u001b[0;34m'on_epoch_end'\u001b[0m\u001b[0;34m,\u001b[0m \u001b[0;34m**\u001b[0m\u001b[0mon_epoch_kwargs\u001b[0m\u001b[0;34m)\u001b[0m\u001b[0;34m\u001b[0m\u001b[0;34m\u001b[0m\u001b[0m\n\u001b[0m\u001b[1;32m    763\u001b[0m         \u001b[0;32mreturn\u001b[0m \u001b[0mself\u001b[0m\u001b[0;34m\u001b[0m\u001b[0;34m\u001b[0m\u001b[0m\n\u001b[1;32m    764\u001b[0m \u001b[0;34m\u001b[0m\u001b[0m\n",
-      "\u001b[0;32m~/anaconda3/lib/python3.7/site-packages/skorch/net.py\u001b[0m in \u001b[0;36mnotify\u001b[0;34m(self, method_name, **cb_kwargs)\u001b[0m\n\u001b[1;32m    281\u001b[0m         \u001b[0mgetattr\u001b[0m\u001b[0;34m(\u001b[0m\u001b[0mself\u001b[0m\u001b[0;34m,\u001b[0m \u001b[0mmethod_name\u001b[0m\u001b[0;34m)\u001b[0m\u001b[0;34m(\u001b[0m\u001b[0mself\u001b[0m\u001b[0;34m,\u001b[0m \u001b[0;34m**\u001b[0m\u001b[0mcb_kwargs\u001b[0m\u001b[0;34m)\u001b[0m\u001b[0;34m\u001b[0m\u001b[0;34m\u001b[0m\u001b[0m\n\u001b[1;32m    282\u001b[0m         \u001b[0;32mfor\u001b[0m \u001b[0m_\u001b[0m\u001b[0;34m,\u001b[0m \u001b[0mcb\u001b[0m \u001b[0;32min\u001b[0m \u001b[0mself\u001b[0m\u001b[0;34m.\u001b[0m\u001b[0mcallbacks_\u001b[0m\u001b[0;34m:\u001b[0m\u001b[0;34m\u001b[0m\u001b[0;34m\u001b[0m\u001b[0m\n\u001b[0;32m--> 283\u001b[0;31m             \u001b[0mgetattr\u001b[0m\u001b[0;34m(\u001b[0m\u001b[0mcb\u001b[0m\u001b[0;34m,\u001b[0m \u001b[0mmethod_name\u001b[0m\u001b[0;34m)\u001b[0m\u001b[0;34m(\u001b[0m\u001b[0mself\u001b[0m\u001b[0;34m,\u001b[0m \u001b[0;34m**\u001b[0m\u001b[0mcb_kwargs\u001b[0m\u001b[0;34m)\u001b[0m\u001b[0;34m\u001b[0m\u001b[0;34m\u001b[0m\u001b[0m\n\u001b[0m\u001b[1;32m    284\u001b[0m \u001b[0;34m\u001b[0m\u001b[0m\n\u001b[1;32m    285\u001b[0m     \u001b[0;31m# pylint: disable=unused-argument\u001b[0m\u001b[0;34m\u001b[0m\u001b[0;34m\u001b[0m\u001b[0;34m\u001b[0m\u001b[0m\n",
-      "\u001b[0;32m~/anaconda3/lib/python3.7/site-packages/skorch/callbacks/scoring.py\u001b[0m in \u001b[0;36mon_epoch_end\u001b[0;34m(self, net, dataset_train, dataset_valid, **kwargs)\u001b[0m\n\u001b[1;32m    410\u001b[0m \u001b[0;34m\u001b[0m\u001b[0m\n\u001b[1;32m    411\u001b[0m         \u001b[0;32mwith\u001b[0m \u001b[0mcache_net_infer\u001b[0m\u001b[0;34m(\u001b[0m\u001b[0mnet\u001b[0m\u001b[0;34m,\u001b[0m \u001b[0mself\u001b[0m\u001b[0;34m.\u001b[0m\u001b[0muse_caching\u001b[0m\u001b[0;34m,\u001b[0m \u001b[0my_pred\u001b[0m\u001b[0;34m)\u001b[0m \u001b[0;32mas\u001b[0m \u001b[0mcached_net\u001b[0m\u001b[0;34m:\u001b[0m\u001b[0;34m\u001b[0m\u001b[0;34m\u001b[0m\u001b[0m\n\u001b[0;32m--> 412\u001b[0;31m             \u001b[0mcurrent_score\u001b[0m \u001b[0;34m=\u001b[0m \u001b[0mself\u001b[0m\u001b[0;34m.\u001b[0m\u001b[0m_scoring\u001b[0m\u001b[0;34m(\u001b[0m\u001b[0mcached_net\u001b[0m\u001b[0;34m,\u001b[0m \u001b[0mX_test\u001b[0m\u001b[0;34m,\u001b[0m \u001b[0my_test\u001b[0m\u001b[0;34m)\u001b[0m\u001b[0;34m\u001b[0m\u001b[0;34m\u001b[0m\u001b[0m\n\u001b[0m\u001b[1;32m    413\u001b[0m \u001b[0;34m\u001b[0m\u001b[0m\n\u001b[1;32m    414\u001b[0m         \u001b[0mself\u001b[0m\u001b[0;34m.\u001b[0m\u001b[0m_record_score\u001b[0m\u001b[0;34m(\u001b[0m\u001b[0mnet\u001b[0m\u001b[0;34m.\u001b[0m\u001b[0mhistory\u001b[0m\u001b[0;34m,\u001b[0m \u001b[0mcurrent_score\u001b[0m\u001b[0;34m)\u001b[0m\u001b[0;34m\u001b[0m\u001b[0;34m\u001b[0m\u001b[0m\n",
-      "\u001b[0;32m~/anaconda3/lib/python3.7/site-packages/skorch/callbacks/scoring.py\u001b[0m in \u001b[0;36m_scoring\u001b[0;34m(self, net, X_test, y_test)\u001b[0m\n\u001b[1;32m    119\u001b[0m         instead of running inference again, if available.\"\"\"\n\u001b[1;32m    120\u001b[0m         \u001b[0mscorer\u001b[0m \u001b[0;34m=\u001b[0m \u001b[0mcheck_scoring\u001b[0m\u001b[0;34m(\u001b[0m\u001b[0mnet\u001b[0m\u001b[0;34m,\u001b[0m \u001b[0mself\u001b[0m\u001b[0;34m.\u001b[0m\u001b[0mscoring_\u001b[0m\u001b[0;34m)\u001b[0m\u001b[0;34m\u001b[0m\u001b[0;34m\u001b[0m\u001b[0m\n\u001b[0;32m--> 121\u001b[0;31m         \u001b[0;32mreturn\u001b[0m \u001b[0mscorer\u001b[0m\u001b[0;34m(\u001b[0m\u001b[0mnet\u001b[0m\u001b[0;34m,\u001b[0m \u001b[0mX_test\u001b[0m\u001b[0;34m,\u001b[0m \u001b[0my_test\u001b[0m\u001b[0;34m)\u001b[0m\u001b[0;34m\u001b[0m\u001b[0;34m\u001b[0m\u001b[0m\n\u001b[0m\u001b[1;32m    122\u001b[0m \u001b[0;34m\u001b[0m\u001b[0m\n\u001b[1;32m    123\u001b[0m     \u001b[0;32mdef\u001b[0m \u001b[0m_is_best_score\u001b[0m\u001b[0;34m(\u001b[0m\u001b[0mself\u001b[0m\u001b[0;34m,\u001b[0m \u001b[0mcurrent_score\u001b[0m\u001b[0;34m)\u001b[0m\u001b[0;34m:\u001b[0m\u001b[0;34m\u001b[0m\u001b[0;34m\u001b[0m\u001b[0m\n",
-      "\u001b[0;32m~/anaconda3/lib/python3.7/site-packages/sklearn/metrics/_scorer.py\u001b[0m in \u001b[0;36m__call__\u001b[0;34m(self, estimator, X, y_true, sample_weight)\u001b[0m\n\u001b[1;32m    167\u001b[0m                           stacklevel=2)\n\u001b[1;32m    168\u001b[0m         return self._score(partial(_cached_call, None), estimator, X, y_true,\n\u001b[0;32m--> 169\u001b[0;31m                            sample_weight=sample_weight)\n\u001b[0m\u001b[1;32m    170\u001b[0m \u001b[0;34m\u001b[0m\u001b[0m\n\u001b[1;32m    171\u001b[0m     \u001b[0;32mdef\u001b[0m \u001b[0m_factory_args\u001b[0m\u001b[0;34m(\u001b[0m\u001b[0mself\u001b[0m\u001b[0;34m)\u001b[0m\u001b[0;34m:\u001b[0m\u001b[0;34m\u001b[0m\u001b[0;34m\u001b[0m\u001b[0m\n",
-      "\u001b[0;32m~/anaconda3/lib/python3.7/site-packages/sklearn/metrics/_scorer.py\u001b[0m in \u001b[0;36m_score\u001b[0;34m(self, method_caller, estimator, X, y_true, sample_weight)\u001b[0m\n\u001b[1;32m    210\u001b[0m         \u001b[0;32melse\u001b[0m\u001b[0;34m:\u001b[0m\u001b[0;34m\u001b[0m\u001b[0;34m\u001b[0m\u001b[0m\n\u001b[1;32m    211\u001b[0m             return self._sign * self._score_func(y_true, y_pred,\n\u001b[0;32m--> 212\u001b[0;31m                                                  **self._kwargs)\n\u001b[0m\u001b[1;32m    213\u001b[0m \u001b[0;34m\u001b[0m\u001b[0m\n\u001b[1;32m    214\u001b[0m \u001b[0;34m\u001b[0m\u001b[0m\n",
-      "\u001b[0;32m~/anaconda3/lib/python3.7/site-packages/sklearn/metrics/_classification.py\u001b[0m in \u001b[0;36maccuracy_score\u001b[0;34m(y_true, y_pred, normalize, sample_weight)\u001b[0m\n\u001b[1;32m    183\u001b[0m \u001b[0;34m\u001b[0m\u001b[0m\n\u001b[1;32m    184\u001b[0m     \u001b[0;31m# Compute accuracy for each possible representation\u001b[0m\u001b[0;34m\u001b[0m\u001b[0;34m\u001b[0m\u001b[0;34m\u001b[0m\u001b[0m\n\u001b[0;32m--> 185\u001b[0;31m     \u001b[0my_type\u001b[0m\u001b[0;34m,\u001b[0m \u001b[0my_true\u001b[0m\u001b[0;34m,\u001b[0m \u001b[0my_pred\u001b[0m \u001b[0;34m=\u001b[0m \u001b[0m_check_targets\u001b[0m\u001b[0;34m(\u001b[0m\u001b[0my_true\u001b[0m\u001b[0;34m,\u001b[0m \u001b[0my_pred\u001b[0m\u001b[0;34m)\u001b[0m\u001b[0;34m\u001b[0m\u001b[0;34m\u001b[0m\u001b[0m\n\u001b[0m\u001b[1;32m    186\u001b[0m     \u001b[0mcheck_consistent_length\u001b[0m\u001b[0;34m(\u001b[0m\u001b[0my_true\u001b[0m\u001b[0;34m,\u001b[0m \u001b[0my_pred\u001b[0m\u001b[0;34m,\u001b[0m \u001b[0msample_weight\u001b[0m\u001b[0;34m)\u001b[0m\u001b[0;34m\u001b[0m\u001b[0;34m\u001b[0m\u001b[0m\n\u001b[1;32m    187\u001b[0m     \u001b[0;32mif\u001b[0m \u001b[0my_type\u001b[0m\u001b[0;34m.\u001b[0m\u001b[0mstartswith\u001b[0m\u001b[0;34m(\u001b[0m\u001b[0;34m'multilabel'\u001b[0m\u001b[0;34m)\u001b[0m\u001b[0;34m:\u001b[0m\u001b[0;34m\u001b[0m\u001b[0;34m\u001b[0m\u001b[0m\n",
-      "\u001b[0;32m~/anaconda3/lib/python3.7/site-packages/sklearn/metrics/_classification.py\u001b[0m in \u001b[0;36m_check_targets\u001b[0;34m(y_true, y_pred)\u001b[0m\n\u001b[1;32m     88\u001b[0m     \u001b[0;32mif\u001b[0m \u001b[0mlen\u001b[0m\u001b[0;34m(\u001b[0m\u001b[0my_type\u001b[0m\u001b[0;34m)\u001b[0m \u001b[0;34m>\u001b[0m \u001b[0;36m1\u001b[0m\u001b[0;34m:\u001b[0m\u001b[0;34m\u001b[0m\u001b[0;34m\u001b[0m\u001b[0m\n\u001b[1;32m     89\u001b[0m         raise ValueError(\"Classification metrics can't handle a mix of {0} \"\n\u001b[0;32m---> 90\u001b[0;31m                          \"and {1} targets\".format(type_true, type_pred))\n\u001b[0m\u001b[1;32m     91\u001b[0m \u001b[0;34m\u001b[0m\u001b[0m\n\u001b[1;32m     92\u001b[0m     \u001b[0;31m# We can't have more than one value on y_type => The set is no more needed\u001b[0m\u001b[0;34m\u001b[0m\u001b[0;34m\u001b[0m\u001b[0;34m\u001b[0m\u001b[0m\n",
-      "\u001b[0;31mValueError\u001b[0m: Classification metrics can't handle a mix of multilabel-indicator and binary targets"
-     ]
-    }
-   ],
-   "source": [
-    "do_prepare_data = True\n",
-    "do_train_model = True\n",
-    "kfold_nsplits = 5\n",
-    "# similaritiesToRun = df_paperIndividualScores['Similarity']\n",
-    "similaritiesToRun = [\"sideeffect\"]\n",
-    "\n",
-    "for similarity in similaritiesToRun:\n",
-    "    input_fea = pathInput+DS1_path+\"/\" + similarity + \"_Jacarrd_sim.csv\"\n",
-    "    input_lab = pathInput+DS1_path+\"/drug_drug_matrix.csv\"\n",
-    "    dataPicklePath = pathPickles+\"data_X_y_\" + similarity + \"_Jaccard.p\"\n",
-    "\n",
-    "    # Define model\n",
-    "    D_in, H1, H2, D_out, drop = X.shape[1], 300, 400, 2, 0.5\n",
-    "    str_hidden_layers_params = \"-H1-\" + str(H1) + \"-H2-\" + str(H2)\n",
-    "    callbacks = []\n",
-    "    \n",
-    "    # Prepare data if not available\n",
-    "    if do_prepare_data:\n",
-    "        print(\"Preparing \" + similarity + \" data...\")\n",
-    "        X,y = prepare_data(input_fea, input_lab, seperate = False)\n",
-    "\n",
-    "        with open(dataPicklePath, 'wb') as f:\n",
-    "            pickle.dump([X, y], f)\n",
-    "\n",
-    "    # Load X,y and split in to train, test\n",
-    "    with open(dataPicklePath, 'rb') as f:\n",
-    "        X, y = pickle.load(f)\n",
-    "    \n",
-    "    X = X.astype(np.float32)\n",
-    "    y = y.astype(np.int64)    \n",
-    "    \n",
-    "    y_cat = np_utils.to_categorical(y)\n",
-    "    \n",
-    "    AUROC, AUPR, F1, Rec, Prec = 0,0,0,0,0\n",
-    "    kFoldSplit = getStratifiedKFoldSplit(X,y,n_splits=kfold_nsplits)\n",
-    "    for i, indices in enumerate(kFoldSplit):\n",
-    "        print(\"Running fold\" + str(i) + \" for \" + similarity +\"...\")\n",
-    "        \n",
-    "        train_index = indices[0]\n",
-    "        test_index = indices[1]\n",
-    "        X_train, X_test = X[train_index], X[test_index]\n",
-    "#         y_train, y_test = y[train_index], y[test_index]\n",
-    "        y_train, y_test = y_cat[train_index], y_cat[test_index]\n",
-    "    \n",
-    "        # Create Network Classifier\n",
-    "        Xy_test = skorch.dataset.Dataset(X_test, y_test)\n",
-    "        net = getNDDClassifier(D_in, H1, H2, D_out, drop, Xy_test)\n",
-    "        \n",
-    "        # Fit and save OR load model\n",
-    "        modelPicklePath = pathPickles+\"model_params/model_params_fold\" + str(i) + \"_\" + str_hidden_layers_params+ \"_\" + similarity + \".p\"\n",
-    "        if do_train_model:\n",
-    "            net.fit(X_train, y_train)\n",
-    "            net.save_params(f_params=modelPicklePath)\n",
-    "        else:\n",
-    "            net.initialize()  # This is important!\n",
-    "            net.load_params(f_params=modelPicklePath)\n",
-    "\n",
-    "        # Make predictions\n",
-    "        y_pred = net.predict(X_test)\n",
-    "        lr_probs = soft(net.forward(X_test))[:,1]\n",
-    "        lr_precision, lr_recall, _ = precision_recall_curve(y_test, lr_probs)\n",
-    "\n",
-    "        AUROC += roc_auc_score(y_test, y_pred)\n",
-    "        AUPR += auc(lr_recall, lr_precision)\n",
-    "        F1 += f1_score(y_test, y_pred)\n",
-    "        Rec += recall_score(y_test, y_pred)\n",
-    "        Prec += precision_score(y_test, y_pred)\n",
-    "        \n",
-    "        print(i, similarity, AUROC, AUPR, F1, Rec, Prec)\n",
-    "        \n",
-    "    \n",
-    "    AUROC, AUPR, F1, Rec, Prec = avgMetrics(AUROC, AUPR, F1, Rec, Prec, kfold_nsplits)\n",
-    "    print(similarity, AUROC, AUPR, F1, Rec, Prec)\n",
-    "    \n",
-    "    # Fill replicated metrics\n",
-    "    updateSimilarityDF(df_replicatedIndividualScores, similarity, AUROC, AUPR, F1, Rec, Prec)\n",
-    "    \n",
-    "# Write CSV\n",
-    "writeReplicatedIndividualScoresCSV(net, df_replicatedIndividualScores, pathRuns, str_hidden_layers_params)"
-   ]
-  },
-  {
-   "cell_type": "markdown",
-   "metadata": {},
-   "source": [
-    "# Compare to Paper"
-   ]
-  },
-  {
-   "cell_type": "code",
-   "execution_count": null,
-   "metadata": {
-    "scrolled": true
-   },
-   "outputs": [],
-   "source": [
-    "print(df_paperIndividualScores)"
-   ]
-  },
-  {
-   "cell_type": "code",
-   "execution_count": null,
-   "metadata": {},
-   "outputs": [],
-   "source": [
-    "print(df_replicatedIndividualScores)"
-   ]
-  },
-  {
-   "cell_type": "code",
-   "execution_count": null,
-   "metadata": {
-    "scrolled": false
-   },
-   "outputs": [],
-   "source": [
-    "diff_metrics = ['AUC', 'AUPR', 'F-measure', 'Recall', 'Precision']\n",
-    "df_diff = df_paperIndividualScores[diff_metrics] - df_replicatedIndividualScores[diff_metrics]\n",
-    "df_diff_abs = df_diff.abs()\n",
-    "df_diff_percent = (df_diff_abs / df_paperIndividualScores[diff_metrics]) * 100"
-   ]
-  },
-  {
-   "cell_type": "code",
-   "execution_count": null,
-   "metadata": {},
-   "outputs": [],
-   "source": [
-    "df_diff"
-   ]
-  },
-  {
-   "cell_type": "code",
-   "execution_count": null,
-   "metadata": {},
-   "outputs": [],
-   "source": [
-    "from seaborn import heatmap\n",
-    "heatmap(df_diff, yticklabels=df_paperIndividualScores[\"Similarity\"])"
-   ]
-  },
-  {
-   "cell_type": "code",
-   "execution_count": null,
-   "metadata": {},
-   "outputs": [],
-   "source": [
-    "heatmap(df_diff_abs, yticklabels=df_paperIndividualScores[\"Similarity\"])"
-   ]
-  },
-  {
-   "cell_type": "code",
-   "execution_count": null,
-   "metadata": {},
-   "outputs": [],
-   "source": [
-    "heatmap(df_diff_percent, yticklabels=df_paperIndividualScores[\"Similarity\"])"
-   ]
-  },
-  {
-   "cell_type": "code",
-   "execution_count": null,
-   "metadata": {},
-   "outputs": [],
-   "source": [
-    "from sklearn.metrics import mean_squared_error\n",
-    "mean_squared_error(df_paperIndividualScores[diff_metrics],\n",
-    "                   df_replicatedIndividualScores[diff_metrics])"
-   ]
-  },
-  {
-   "cell_type": "code",
-   "execution_count": null,
-   "metadata": {},
-   "outputs": [],
-   "source": []
-  }
- ],
- "metadata": {
-  "kernelspec": {
-   "display_name": "Python 3",
-   "language": "python",
-   "name": "python3"
-  },
-  "language_info": {
-   "codemirror_mode": {
-    "name": "ipython",
-    "version": 3
-   },
-   "file_extension": ".py",
-   "mimetype": "text/x-python",
-   "name": "python",
-   "nbconvert_exporter": "python",
-   "pygments_lexer": "ipython3",
-   "version": "3.7.3"
-  }
- },
- "nbformat": 4,
- "nbformat_minor": 2
-}
diff --git a/req.txt b/req.txt
index b3eed48..475bd1a 100644
--- a/req.txt
+++ b/req.txt
@@ -173,7 +173,7 @@ scikit-image==0.14.2
 scikit-learn==0.20.3
 scipy==1.2.1
 seaborn==0.9.0
-SecretStorage==3.1.1
+secretstorage==3.1.1
 Send2Trash==1.5.0
 simplegeneric==0.8.1
 singledispatch==3.4.0.3
diff --git a/req_conda.txt b/req_conda.txt
new file mode 100644
index 0000000..ee6aa79
--- /dev/null
+++ b/req_conda.txt
@@ -0,0 +1,197 @@
+absl-py==0.8.1
+alabaster==0.7.12
+anaconda-client==1.7.2
+anaconda-navigator==1.9.7
+anaconda-project==0.8.2
+asn1crypto==0.24.0
+astor==0.8.0
+astroid==2.2.5
+astropy==3.1.2
+atomicwrites==1.3.0
+attrs==19.1.0
+Babel==2.6.0
+backcall==0.1.0
+backports.os==0.1.1
+beautifulsoup4==4.7.1
+biopython==1.73
+bitarray==0.8.3
+bkcharts==0.2
+bleach==3.1.0
+bokeh==1.0.4
+boto==2.49.0
+Bottleneck==1.2.1
+certifi==2019.3.9
+cffi==1.12.2
+chardet==3.0.4
+Click==7.0
+cloudpickle==0.8.0
+clyent==1.2.2
+colorama==0.4.1
+conda==4.6.14
+conda-build==3.17.8
+conda-verify==3.1.1
+contextlib2==0.5.5
+cryptography==2.6.1
+cycler==0.10.0
+Cython==0.29.6
+cytoolz==0.9.0.1
+dask==1.1.4
+deap==1.3.0
+decorator==4.4.0
+defusedxml==0.5.0
+distributed==1.26.0
+docutils==0.14
+entrypoints==0.3
+et-xmlfile==1.0.1
+fastcache==1.0.2
+filelock==3.0.10
+Flask==1.0.2
+future==0.17.1
+gast==0.2.2
+gevent==1.4.0
+glob2==0.6
+gmpy2==2.0.8
+google-pasta==0.1.7
+greenlet==0.4.15
+h5py==2.9.0
+heapdict==1.0.0
+html5lib==1.0.1
+idna==2.8
+imageio==2.5.0
+imagesize==1.1.0
+ipykernel==5.1.0
+ipython==7.4.0
+ipywidgets==7.4.2
+isort==4.3.16
+itsdangerous==1.1.0
+jdcal==1.4
+jedi==0.13.3
+jeepney==0.4
+Jinja2==2.10
+jsonschema==3.0.1
+jupyter==1.0.0
+jupyterlab==0.35.4
+Keras==2.3.1
+Keras-Applications==1.0.8
+Keras-Preprocessing==1.1.0
+keyring==18.0.0
+kiwisolver==1.0.1
+lazy-object-proxy==1.3.1
+llvmlite==0.28.0
+locket==0.2.0
+lxml==4.3.2
+Markdown==3.1.1
+MarkupSafe==1.1.1
+matplotlib==3.0.3
+mccabe==0.6.1
+mistune==0.8.4
+more-itertools==6.0.0
+mpmath==1.1.0
+multipledispatch==0.6.0
+navigator-updater==0.2.1
+nbconvert==5.4.1
+nbformat==4.4.0
+networkx==2.2
+nltk==3.4
+nose==1.3.7
+notebook==5.7.8
+numba==0.43.1
+numexpr==2.6.9
+numpy==1.16.2
+numpydoc==0.8.0
+olefile==0.46
+openpyxl==2.6.1
+packaging==19.0
+pandas==0.24.2
+pandocfilters==1.4.2
+parso==0.3.4
+partd==0.3.10
+path.py==11.5.0
+pathlib2==2.3.3
+patsy==0.5.1
+pep8==1.7.1
+pexpect==4.6.0
+pickleshare==0.7.5
+Pillow==5.4.1
+pkginfo==1.5.0.1
+pluggy==0.9.0
+ply==3.11
+protobuf==3.10.0
+psutil==5.6.1
+ptyprocess==0.6.0
+py==1.8.0
+pycodestyle==2.5.0
+pycosat==0.6.3
+pycparser==2.19
+pycrypto==2.6.1
+pycurl==7.43.0.2
+pyflakes==2.1.1
+Pygments==2.3.1
+pylint==2.3.1
+pyodbc==4.0.26
+pyOpenSSL==19.0.0
+pyparsing==2.3.1
+pyrsistent==0.14.11
+PySocks==1.6.8
+pytest==4.3.1
+pytest-arraydiff==0.3
+pytest-astropy==0.5.0
+pytest-doctestplus==0.3.0
+pytest-openfiles==0.3.2
+pytest-remotedata==0.3.1
+python-dateutil==2.8.0
+pytz==2018.9
+PyWavelets==1.0.2
+PyYAML==5.1
+pyzmq==18.0.0
+QtAwesome==0.5.7
+qtconsole==4.4.3
+QtPy==1.7.0
+regex==2019.4.14
+requests==2.21.0
+rope==0.12.0
+scikit-image==0.14.2
+scikit-learn==0.20.3
+scipy==1.2.1
+seaborn==0.9.0
+Send2Trash==1.5.0
+simplegeneric==0.8.1
+singledispatch==3.4.0.3
+six==1.12.0
+skorch==0.7.0
+snowballstemmer==1.2.1
+sortedcollections==1.1.2
+sortedcontainers==2.1.0
+soupsieve==1.8
+Sphinx==1.8.5
+sphinxcontrib-websupport==1.1.0
+spyder==3.3.3
+spyder-kernels==0.4.2
+SQLAlchemy==1.3.1
+statsmodels==0.9.0
+sympy==1.3
+tabulate==0.8.6
+tblib==1.3.2
+tensorboard==2.0.0
+tensorflow==2.0.0
+termcolor==1.1.0
+terminado==0.8.1
+testpath==0.4.2
+toolz==0.9.0
+tornado==6.0.2
+tqdm==4.31.1
+traitlets==4.3.2
+umap-learn==0.3.10
+unicodecsv==0.14.1
+urllib3==1.24.1
+wcwidth==0.1.7
+webencodings==0.5.1
+Werkzeug==0.14.1
+widgetsnbextension==3.4.2
+wrapt==1.11.1
+wurlitzer==1.0.2
+xlrd==1.2.0
+XlsxWriter==1.1.5
+xlwt==1.3.0
+zict==0.1.4
+zipp==0.3.3
diff --git a/req_pip.txt b/req_pip.txt
new file mode 100644
index 0000000..60471d8
--- /dev/null
+++ b/req_pip.txt
@@ -0,0 +1,25 @@
+secretstorage==3.1.1
+jupyter-console==6.0.0
+mkl-random==1.0.2
+backports.shutil-get-terminal-size==1.0.0
+ipython-genutils==0.2.0
+grpcio==1.24.1
+jupyterlab-server==0.2.0
+mkl-fft==1.0.10
+ruamel-yaml==0.15.46
+umi-tools==1.0.0
+importlib-metadata==0.0.0
+libarchive-c==2.8
+jupyter-core==4.4.0
+tables==3.5.1
+torch==1.3.0+cpu
+torchvision==0.4.1+cpu
+pytorch-ignite==0.2.1
+msgpack==0.6.1
+tensorflow-estimator==2.0.1
+jupyter-client==5.2.4
+opt-einsum==3.1.0
+pysam==0.15.2
+lief==0.9.0
+prometheus-client==0.6.0
+prompt-toolkit==2.0.9
diff --git a/setup.py b/setup.py
new file mode 100644
index 0000000..2d7303f
--- /dev/null
+++ b/setup.py
@@ -0,0 +1,16 @@
+from setuptools import setup
+
+setup(name='ddi',
+      version='0.0.1',
+      description='',
+      url='https://github.com/CMI-UZH/side-effects',
+      packages=['ddi'],
+      python_requires='>=3.6.0',
+      install_requires=[
+            'numpy',
+            'pandas',
+            'scipy',
+            'scikit-learn',
+            'torch'
+      ],
+      zip_safe=False)
\ No newline at end of file