-
Notifications
You must be signed in to change notification settings - Fork 0
/
Copy pathfilter_fusion_out.py
105 lines (90 loc) · 5.34 KB
/
filter_fusion_out.py
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
#!/usr/bin/env python
import argparse
from modules.collective_script.methods import open_file
from modules.fusioncatcher.methods import create_fc_output, process_fusion_catcher
from modules.starfusion.methods import create_sf_output, process_star_fusion
from modules.jaffa.methods import create_jaffa_output, process_jaffa
from modules.arriba.methods import create_arriba_output, process_arriba
"""
##################################################
Center for Molecular and Biomolecular Informatics (CMBI) / RTC Bioinformatics
Author: Joshua Koopmans
Version: 1.0
Email: [email protected]
##################################################
This script controls the logic of the CLI program. Arguments are created and parsed,
and depending on the arguments, specific methods are executed.
This script makes use of the python packages "fusioncatcher" and "starfusion".
"""
def main():
"""
Logic of program. Arguments passed are parsed and assigned to variables.
Depending on the selected fusion detection tool, some logic specific for that tool is executed.
"""
args = parse_arguments()
input_file = args.input
output_file = args.output
if args.tool == "jaffa":
file_content = open_file(input_file, True)
else:
file_content = open_file(input_file, False)
fusion_inspector_format = False
if args.fusion_inspector == "yes":
fusion_inspector_format = True
if args.tool == "starfusion":
junction_threshold = args.threshold_junction
spanning_threshold = args.threshold_spanning
out_string = process_star_fusion(file_content, spanning_threshold, junction_threshold)
create_sf_output(output_file, out_string)
if args.tool == "fusioncatcher":
out_string, out_string_fusion_inspector = process_fusion_catcher(file_content, fusion_inspector_format)
create_fc_output(output_file, out_string, fusion_inspector_format, out_string_fusion_inspector)
if args.tool == "jaffa":
spanning_threshold = args.threshold_spanning
confidence_threshold = args.threshold_confidence
out_string, out_string_fusion_inspector = process_jaffa(file_content, fusion_inspector_format, confidence_threshold, spanning_threshold)
create_jaffa_output(output_file, out_string, fusion_inspector_format, out_string_fusion_inspector)
if args.tool == "arriba":
junction_threshold = args.threshold_junction
spanning_threshold = args.threshold_spanning
out_string, out_string_fusion_inspector = process_arriba(file_content, spanning_threshold, junction_threshold)
create_arriba_output(output_file, out_string, fusion_inspector_format, out_string_fusion_inspector)
def parse_arguments():
"""
Using argparse, arguments are added for the CLI program.
Input types, default values, help messages, choices, etc. are declared while adding an argument.
:return: Object with parsed arguments.
"""
parser = argparse.ArgumentParser(
description="Filter output of either STAR-Fusion or fusionCatcher fusion gene detection tool.")
parser.add_argument("-i", "--input", type=str, required=True, help="Input file")
parser.add_argument("-o", "--output", type=str, required=True, help="Desired output file name")
parser.add_argument("-t", "--tool", type=str, required=True, help="Select tool that generated output file",
choices=["starfusion", "fusioncatcher", "jaffa", "arriba"])
parser.add_argument("--threshold-junction", type=int,
help="Amount of junction reads to filter by (only starfusion & arriba)",
default=8)
parser.add_argument("--threshold-spanning", type=int, help="Amount of spanning frag reads to filter by "
"(only starfusion & arriba)",
default=8)
parser.add_argument("--threshold-confidence", type=str, help="Confidence level to filter by (only jaffa)",
default="All", choices=["HighConfidence", "MediumConfidence", "LowConfidence", "All"])
parser.add_argument("--fusion-inspector", type=str, help="Additional filtered file with the first column formatted"
" for FusionInspector",
choices=("yes", "no"), default="no")
args = parser.parse_args()
# Only if starfusion or jaffa or arriba is the selected tool will you be able to specify a threshold for spanning reads.
# Only is starfusion or arriba is the selected tool will you be able to specify a threshold for junction reads.
# Only is jaffa is the selected tool will you be able to specify a threshold confidence.
# FusionCatcher output is filtered on terms.
if (args.tool not in ["starfusion", "arriba"]) and args.threshold_junction != 8:
parser.error('--threshold-junction can only be set when --tool=starfusion or --tool=arriba.')
exit(1)
if (args.tool not in ["starfusion", "jaffa", "arriba"]) and args.threshold_spanning != 8:
parser.error('--threshold-spanning can only be set when --tool=starfusion or --tool=jaffa or --tool=arriba.')
exit(1)
if args.tool not in ["jaffa"] and args.threshold_confidence not in ["All"]:
parser.error('--threshold-confidence can only be set when --tool=jaffa.')
exit(1)
return args
main()