Skip to content

Commit

Permalink
allow both gff and txt output at the same time
Browse files Browse the repository at this point in the history
  • Loading branch information
ctSkennerton committed Mar 28, 2019
1 parent 42ec900 commit bcd7427
Show file tree
Hide file tree
Showing 4 changed files with 88 additions and 28 deletions.
77 changes: 59 additions & 18 deletions CRISPRFinder.java
Original file line number Diff line number Diff line change
Expand Up @@ -5,6 +5,7 @@ public class CRISPRFinder
{
private String inputFileName;
private String outputFileName;
private String outputGffFileName;

private int screenDisplay;
private int minNumRepeats;
Expand All @@ -25,6 +26,7 @@ public class CRISPRFinder

public CRISPRFinder(String _inputFileName,
String _outputFileName,
String _outputGffFileName,
int _screenDisplay,
int _minNumRepeats,
int _minRepeatLength,
Expand All @@ -37,6 +39,7 @@ public CRISPRFinder(String _inputFileName,
{
inputFileName = _inputFileName;
outputFileName = _outputFileName;
outputGffFileName = _outputGffFileName;

screenDisplay = _screenDisplay;
minNumRepeats = _minNumRepeats;
Expand All @@ -63,7 +66,7 @@ public CRISPRFinder(String _inputFileName,
}

try {
outputFileStream = new FileOutputStream(outputFile, false);
outputFileStream = new FileOutputStream(outputFile, false);
spacers = new PrintStream(outputFileStream);
} catch (FileNotFoundException e) {
// TODO Auto-generated catch block
Expand Down Expand Up @@ -254,15 +257,16 @@ private boolean findRepeats( DNASequence sequence, int readNum )
FileOutputStream outputFileStream;
PrintStream out;

FileOutputStream outputGffFileStream;
PrintStream gffOut = null;

if (screenDisplay == 1)
out = System.out;
else
{
if ( outputFileName.equals("") )
if ( outputFileName == "" )
outputFileName = "a.out";

//System.out.println("Writing results in file '" + outputFileName + "'");
//System.out.println("");

File outputFile = new File(outputFileName);
if ( readNum == 1 && outputFile.exists() )
Expand All @@ -274,6 +278,23 @@ private boolean findRepeats( DNASequence sequence, int readNum )

outputFileStream = new FileOutputStream(outputFile, true);
out = new PrintStream(outputFileStream);


if (! outputGffFileName.equals(""))
{
File outputGffFile = new File(outputGffFileName);
if ( readNum == 1 && outputGffFile.exists() )
{
boolean success = outputFile.delete();
if (!success)
throw new IllegalArgumentException("Error: Could not delete file '" + outputFile + "'");
}

outputGffFileStream = new FileOutputStream(outputGffFile, true);
gffOut = new PrintStream(outputGffFileStream);
gffOut.println("##gff-version 3");
printGffHeader = false;
}
}

if (repeatsFound)
Expand All @@ -297,20 +318,15 @@ private boolean findRepeats( DNASequence sequence, int readNum )
for (int k = 0; k < CRISPRVector.size(); k++)
{
currCRISPR = (CRISPR)CRISPRVector.elementAt(k);
if(outputformat > 0) {
String crispr_id = "CRISPR" + (++totalCrisprCount);
out.print(sequence.getName() + "\tminced:" + minced.VERSION + "\trepeat_region\t");
out.print((currCRISPR.start() + 1) + "\t" + (currCRISPR.end() + 1) + "\t");
out.print(currCRISPR.numRepeats() + "\t.\t.\tID="+ crispr_id + ";rpt_type=direct;rpt_family=CRISPR;rpt_unit_seq="+ currCRISPR.repeatStringAt(1));
out.print("\n");
if(outputformat == 2) {
out.print(currCRISPR.toGff(sequence.getName(), crispr_id));
}
totalCrisprCount++;
if(outputformat > 0 && gffOut == null ) {
printGff(out, sequence, currCRISPR);
} else {
out.print("CRISPR " + (++totalCrisprCount) + " Range: " + (currCRISPR.start() + 1) + " - " + (currCRISPR.end() + 1) + "\n");
out.print(currCRISPR.toString());
out.print("Repeats: " + currCRISPR.numRepeats() + "\t" + "Average Length: " + currCRISPR.averageRepeatLength() + "\t\t");
out.print("Average Length: " + currCRISPR.averageSpacerLength() + "\n\n");
printTable(out, currCRISPR);
}

if (gffOut != null) {
printGff(gffOut, sequence, currCRISPR);
}
if(printSpacers) {
for (int i = 0; i < currCRISPR.numSpacers(); ++i) {
Expand All @@ -333,8 +349,33 @@ private boolean findRepeats( DNASequence sequence, int readNum )
out.close();

}
catch (Exception e) { System.err.println ("--Error writing to file-- \n"); }
catch (Exception e) {
System.err.println ("--Error writing to file-- \n");
e.printStackTrace(System.err);
}

return true;
}

private boolean printGff(PrintStream out, DNASequence sequence, CRISPR currCRISPR) {
String crispr_id = "CRISPR" + totalCrisprCount;
out.print(sequence.getName() + "\tminced:" + minced.VERSION + "\trepeat_region\t");
out.print((currCRISPR.start() + 1) + "\t" + (currCRISPR.end() + 1) + "\t");
out.print(currCRISPR.numRepeats() + "\t.\t.\tID="+ crispr_id + ";rpt_type=direct;rpt_family=CRISPR;rpt_unit_seq="+ currCRISPR.repeatStringAt(1));
out.print("\n");
if(outputformat == 2) {
out.print(currCRISPR.toGff(sequence.getName(), crispr_id));
}
return true;
}

private boolean printTable(PrintStream out, CRISPR currCRISPR) {
out.print("CRISPR " + totalCrisprCount + " Range: " + (currCRISPR.start() + 1) + " - " + (currCRISPR.end() + 1) + "\n");
out.print(currCRISPR.toString());
out.print("Repeats: " + currCRISPR.numRepeats() + "\t" + "Average Length: " + currCRISPR.averageRepeatLength() + "\t\t");
out.print("Average Length: " + currCRISPR.averageSpacerLength() + "\n\n");
return true;
}

}

5 changes: 5 additions & 0 deletions README.md
Original file line number Diff line number Diff line change
Expand Up @@ -52,6 +52,11 @@ The output can be large, so save it in a file:

minced -minNR 2 metagenome.fna metagenome.crisprs

You can also save both the table output and the gff output at the same
time:

minced ecoli.fna out.txt out.gff

## COPYRIGHT AND LICENSE

```
Expand Down
22 changes: 18 additions & 4 deletions minced.java
Original file line number Diff line number Diff line change
Expand Up @@ -2,7 +2,7 @@

public class minced
{
public static final String VERSION = "0.3.3";
public static final String VERSION = "0.4.0";
public static void main(String[] args)
{
//default values
Expand Down Expand Up @@ -178,9 +178,12 @@ else if (args[i].endsWith("screen"))
// Last options should be an input file and optional output file
String inputFileName = "";
String outputFileName = "";
String outputGffFileName = "";
boolean outputFileSpecified = false;
boolean outputGffFileSpecified = false;
int numArgsRemaining = args.length - numOptions;


if (numArgsRemaining == 1)
inputFileName = args[i];
else if (numArgsRemaining == 2)
Expand All @@ -190,6 +193,15 @@ else if (numArgsRemaining == 2)
outputFileName = args[i + 1];
screenDisplay = 0;
}
else if (numArgsRemaining == 3)
{
inputFileName = args[i];
outputFileSpecified = true;
outputGffFileSpecified = true;
outputFileName = args[i + 1];
outputGffFileName = args[i + 2];
screenDisplay = 0;
}
else
{
System.out.println("Improper usage.");
Expand Down Expand Up @@ -234,6 +246,7 @@ else if (numArgsRemaining == 2)

CRISPRFinder client = new CRISPRFinder(inputFileName,
outputFileName,
outputGffFileName,
screenDisplay,
minNumRepeats,
minRepeatLength,
Expand All @@ -251,7 +264,7 @@ public static void printUsage()
{
System.out.println("MinCED, a program to find CRISPRs in shotgun DNA sequences or full genomes");
System.out.println();
System.out.println("Usage: minced [options] file.fa [outputFile]");
System.out.println("Usage: minced [options] file.fa [outputFile.txt] [outputFile.gff]");
System.out.println();
System.out.println("Options: -searchWL Length of search window used to discover CRISPRs (range: 6-9). Default: 8");
System.out.println(" -minNR Minimum number of repeats a CRISPR must contain. Default: 3");
Expand All @@ -268,8 +281,9 @@ public static void printUsage()
System.out.println(" --version Output version information");
System.out.println();
System.out.println("Examples: minced ecoli.fna");
System.out.println(" minced -minNR 2 metagenome.fna");
System.out.println(" minced -minNR 2 metagenome.fna metagenome.crisprs");
System.out.println(" minced metagenome.fna");
System.out.println(" minced metagenome.fna metagenome.crisprs");
System.out.println(" minced metagenome.fna metagenome.crisprs metagenome.gff");
System.out.println();
}

Expand Down
12 changes: 6 additions & 6 deletions t/Aquifex_aeolicus_VF5.expected
Original file line number Diff line number Diff line change
@@ -1,7 +1,7 @@
##gff-version 3
gi|15282445|ref|NC_000918.1| minced:0.3.3 repeat_region 156460 156767 5 . . ID=CRISPR1;rpt_type=direct;rpt_family=CRISPR;rpt_unit_seq=GTTCCTAATGTACCGTGTGGAGTTGAAACC
gi|15282445|ref|NC_000918.1| minced:0.3.3 repeat_region 244561 244791 4 . . ID=CRISPR2;rpt_type=direct;rpt_family=CRISPR;rpt_unit_seq=GTTTCAACTCCACACGGTACATTAGGAAC
gi|15282445|ref|NC_000918.1| minced:0.3.3 repeat_region 279264 279555 5 . . ID=CRISPR3;rpt_type=direct;rpt_family=CRISPR;rpt_unit_seq=GTTTTAACTCCACACGGTACATTAGAAAC
gi|15282445|ref|NC_000918.1| minced:0.3.3 repeat_region 1226626 1226861 4 . . ID=CRISPR4;rpt_type=direct;rpt_family=CRISPR;rpt_unit_seq=CGTTTCTAATGTACCGTAGAGGAGTTGAAAC
gi|15282445|ref|NC_000918.1| minced:0.3.3 repeat_region 1379598 1379894 5 . . ID=CRISPR5;rpt_type=direct;rpt_family=CRISPR;rpt_unit_seq=GTTTCAACTCCACTACGGTACATTAGGAAC
gi|15282445|ref|NC_000918.1| minced:0.3.3 repeat_region 1418900 1419060 3 . . ID=CRISPR6;rpt_type=direct;rpt_family=CRISPR;rpt_unit_seq=GTTCCTAATGTACCGTGTGGAGTTGAAAC
gi|15282445|ref|NC_000918.1| minced:0.4.0 repeat_region 156460 156767 5 . . ID=CRISPR1;rpt_type=direct;rpt_family=CRISPR;rpt_unit_seq=GTTCCTAATGTACCGTGTGGAGTTGAAACC
gi|15282445|ref|NC_000918.1| minced:0.4.0 repeat_region 244561 244791 4 . . ID=CRISPR2;rpt_type=direct;rpt_family=CRISPR;rpt_unit_seq=GTTTCAACTCCACACGGTACATTAGGAAC
gi|15282445|ref|NC_000918.1| minced:0.4.0 repeat_region 279264 279555 5 . . ID=CRISPR3;rpt_type=direct;rpt_family=CRISPR;rpt_unit_seq=GTTTTAACTCCACACGGTACATTAGAAAC
gi|15282445|ref|NC_000918.1| minced:0.4.0 repeat_region 1226626 1226861 4 . . ID=CRISPR4;rpt_type=direct;rpt_family=CRISPR;rpt_unit_seq=CGTTTCTAATGTACCGTAGAGGAGTTGAAAC
gi|15282445|ref|NC_000918.1| minced:0.4.0 repeat_region 1379598 1379894 5 . . ID=CRISPR5;rpt_type=direct;rpt_family=CRISPR;rpt_unit_seq=GTTTCAACTCCACTACGGTACATTAGGAAC
gi|15282445|ref|NC_000918.1| minced:0.4.0 repeat_region 1418900 1419060 3 . . ID=CRISPR6;rpt_type=direct;rpt_family=CRISPR;rpt_unit_seq=GTTCCTAATGTACCGTGTGGAGTTGAAAC

0 comments on commit bcd7427

Please sign in to comment.