Skip to content
New issue

Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.

By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.

Already on GitHub? Sign in to your account

chore: sync main to dev #108

Merged
merged 3 commits into from
Oct 8, 2024
Merged
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
7 changes: 7 additions & 0 deletions CHANGELOG.md
Original file line number Diff line number Diff line change
@@ -1,5 +1,12 @@
# Changelog

## [1.4.6](https://github.com/RIVM-bioinformatics/ViroConstrictor/compare/v1.4.5...v1.4.6) (2024-10-08)


### Bug Fixes

* properly solve DAG workflow for nonsegmented matched-ref samples ([02a821a](https://github.com/RIVM-bioinformatics/ViroConstrictor/commit/02a821a44c3ed3741c65825789ef25ad3e2093c1))

## [1.4.5](https://github.com/RIVM-bioinformatics/ViroConstrictor/compare/v1.4.4...v1.4.5) (2024-09-25)


Expand Down
2 changes: 1 addition & 1 deletion CITATION.cff
Original file line number Diff line number Diff line change
Expand Up @@ -24,7 +24,7 @@ authors:
National Institute for Public Health and the
Environment (RIVM)
- name: "The RIVM-IDS Bioinformatics team"
version: 1.4.5 #x-release-please-version
version: 1.4.6 #x-release-please-version
doi: 10.5281/zenodo.7688035
identifiers:
- type: doi
Expand Down
2 changes: 1 addition & 1 deletion ViroConstrictor/__init__.py
Original file line number Diff line number Diff line change
@@ -1,2 +1,2 @@
__version__ = "1.4.5"
__version__ = "1.4.6"
__prog__ = "ViroConstrictor"
72 changes: 41 additions & 31 deletions ViroConstrictor/workflow/scripts/amplicon_covs.py
Original file line number Diff line number Diff line change
Expand Up @@ -147,37 +147,39 @@ def remove_alt_primer_r(df):


def Find_NonOverlap(df):
dd = df.to_dict(orient="records")
startingpoint = {}
endingpoint = {}
lastindex = list(enumerate(dd))[-1][0]
firstindex = list(enumerate(dd))[0][0]
for x, v in enumerate(dd):
t_end = v.get("rightstart")
s = dd[x - 1].get("rightstart") if x != firstindex else v.get("leftend")
end_override = dd[x + 1].get("leftend") if x != lastindex else None
primerstart = s
if end_override is not None and end_override in range(primerstart, t_end):
primerend = end_override
else:
primerend = t_end
startingpoint[primerstart] = v.get("name")
endingpoint[primerend] = v.get("name")

startdf = (
pd.DataFrame.from_dict(startingpoint, orient="index")
.reset_index()
.rename(columns={0: "name", "index": "unique_start"})
)
enddf = (
pd.DataFrame.from_dict(endingpoint, orient="index")
.reset_index()
.rename(columns={0: "name", "index": "unique_end"})
)
df = pd.merge(df, startdf, on="name", how="inner")
df = pd.merge(df, enddf, on="name", how="inner")

return df
if not df.empty:
dd = df.to_dict(orient="records")
startingpoint = {}
endingpoint = {}
lastindex = list(enumerate(dd))[-1][0]
firstindex = list(enumerate(dd))[0][0]
for x, v in enumerate(dd):
t_end = v.get("rightstart")
s = dd[x - 1].get("rightstart") if x != firstindex else v.get("leftend")
end_override = dd[x + 1].get("leftend") if x != lastindex else None
primerstart = s
if end_override is not None and end_override in range(primerstart, t_end):
primerend = end_override
else:
primerend = t_end
startingpoint[primerstart] = v.get("name")
endingpoint[primerend] = v.get("name")

startdf = (
pd.DataFrame.from_dict(startingpoint, orient="index")
.reset_index()
.rename(columns={0: "name", "index": "unique_start"})
)
enddf = (
pd.DataFrame.from_dict(endingpoint, orient="index")
.reset_index()
.rename(columns={0: "name", "index": "unique_end"})
)
df = pd.merge(df, startdf, on="name", how="inner")
df = pd.merge(df, enddf, on="name", how="inner")
return df
else:
return pd.DataFrame(columns=["name", "leftstart", "leftend", "rightstart", "rightend", "unique_start", "unique_end"])


def avg(lst):
Expand Down Expand Up @@ -251,6 +253,14 @@ def pad_name(name):
lf = remove_alt_primer_l(remove_alt_keyword(lf))
rf = remove_alt_primer_r(remove_alt_keyword(rf))

# if either lf or rf is empty, write empty csv and exit
# csv will have one row with index "flags.key" and an empty value, no column name
if len(lf) == 0 or len(rf) == 0:
df = pd.DataFrame({flags.key: [None]})
print(df)
df.to_csv(flags.output, sep=",", index=False, header=False)
sys.exit(0)

non_overlapping_points = Find_NonOverlap(
pd.merge(lf, rf, on="name", how="inner")
.rename(
Expand Down
19 changes: 14 additions & 5 deletions ViroConstrictor/workflow/workflow.smk
Original file line number Diff line number Diff line change
Expand Up @@ -655,19 +655,28 @@ def group_aminoacids_inputs(wildcards):
select_samples = list(
samples_df.loc[samples_df["Virus"] == i]["sample"].unique()
)
select_refIDs = list(samples_df.loc[samples_df["Virus"] == i]["RefID"].unique())
# for x in select_samples:
# y = samples_df.loc[(samples_df["Virus"] == i) & (samples_df["sample"] == x)]["RefID"].unique()
# print(y)
# select_refIDs = list(samples_df.loc[samples_df["Virus"] == i]["RefID"].unique())
# print(select_refIDs)

# create a dictionary of dictionaries for each virus, with 'i' as the primary key and sample as the secondary key having a list of refIDs as the value
struct[i] = {sample: select_refIDs for sample in select_samples}

struct[i] = {
sample: list(
samples_df.loc[
(samples_df["Virus"] == i) & (samples_df["sample"] == sample)
]["RefID"].unique()
)
for sample in select_samples
}
file_list = []
for virus, sample in struct.items():
for sample, refid in sample.items():
for ref in refid:
file_list.append(
f"{datadir}Virus~{virus}/RefID~{ref}/{amino}{sample}/aa.faa"
)

return file_list


Expand Down Expand Up @@ -772,7 +781,7 @@ rule concat_boc:

rule calculate_amplicon_cov:
input:
pr=f"{datadir}{wc_folder}{prim}" "{sample}_removedprimers.bed",
pr=f"{datadir}{wc_folder}{prim}" "{sample}_primers.bed",
cov=rules.trueconsense.output.cov,
output:
f"{datadir}{wc_folder}{prim}" "{sample}_ampliconcoverage.csv",
Expand Down
Loading