Skip to content

Commit 512e39d

Browse files
committed
init commit
1 parent 846a347 commit 512e39d

8 files changed

Lines changed: 1632 additions & 0 deletions
Lines changed: 1 addition & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1 @@
1+
Scripts used for E. intestinalis genome assemblies.
Lines changed: 144 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,144 @@
1+
#!/usr/bin/env python3
2+
3+
name = "assign_chromosome_number.py"
4+
version = "0.2.0"
5+
updated = "2025-08-12"
6+
7+
from sys import argv
8+
from argparse import ArgumentParser
9+
from os import makedirs
10+
from os.path import isdir,basename
11+
from textwrap import wrap
12+
13+
############################################################
14+
## Readme
15+
############################################################
16+
17+
usage = f"""
18+
NAME {name}
19+
VERSION {version}
20+
UPDATED {updated}
21+
SYNOPSIS Utilizes chromosome map generated by orient_fastas_to_reference.py to homogenize naming between the reference
22+
and the new assembly.
23+
24+
USAGE {name} \\
25+
-m all.map \\
26+
-f 50507.oriented.fasta \\
27+
-o 50507
28+
29+
OPTIONS
30+
-m (--map) Chromosome map file
31+
-f (--fasta) Assembly FASTA file
32+
-o (--outdir) Output directory [Default = CHROMOSOME_ASSIGNMENT]
33+
"""
34+
35+
if len(argv) < 2:
36+
print(usage)
37+
exit()
38+
39+
############################################################
40+
## Command lines arguments
41+
############################################################
42+
43+
GetOptions = ArgumentParser()
44+
45+
GetOptions.add_argument("-m","--map",required=True)
46+
GetOptions.add_argument("-f","--fasta",required=True)
47+
GetOptions.add_argument("-o","--outdir",default="CHROMOSOME_ASSIGNMENT")
48+
49+
args = GetOptions.parse_args()
50+
51+
map_file = args.map
52+
fasta_file = args.fasta
53+
outdir = args.outdir
54+
55+
if not isdir(outdir):
56+
makedirs(outdir,mode=0o755)
57+
58+
filename = basename(fasta_file).split(".")[0]
59+
60+
############################################################
61+
## Read fasta file
62+
############################################################
63+
64+
FASTA = open(fasta_file,'r')
65+
66+
locus = ""
67+
contigs = {}
68+
69+
for line in FASTA:
70+
71+
line = line.strip()
72+
73+
if line == "":
74+
continue
75+
76+
## Read fasta header
77+
if line[0] == ">":
78+
79+
locus = line[1:]
80+
contigs[locus] = ""
81+
continue
82+
83+
contigs[locus] += line
84+
85+
FASTA.close()
86+
87+
############################################################
88+
## Read chromosome mapping file
89+
############################################################
90+
91+
MAP = open(map_file,'r')
92+
93+
mappings = {}
94+
mapped_to = ""
95+
96+
for line in MAP:
97+
98+
line = line.strip()
99+
100+
if line == "":
101+
continue
102+
103+
if line[0:2] == ">>":
104+
105+
mapped_to = line[2:].split("\t")[0]
106+
mappings[mapped_to] = []
107+
continue
108+
109+
if line[0] == ">":
110+
111+
mapped,type = line[1:].split("\t")[0:2]
112+
113+
if type != "Primary":
114+
continue
115+
116+
mappings[mapped_to].append(mapped)
117+
118+
continue
119+
120+
MAP.close()
121+
122+
ASSIGNED_FASTA = open(f"{outdir}/{filename}.assigned.fasta",'w')
123+
ASSIGNMENTS = open(f"{outdir}/{filename}.chromosome_assignments",'w')
124+
125+
############################################################
126+
## Assign chromosome to arbitrary contigs
127+
############################################################
128+
129+
## Iterate over chromsome:contig mappings
130+
for chromosome in sorted(mappings.keys()):
131+
132+
## Iterate over all contigs that mapped to a chromosome
133+
for index,contig in enumerate(mappings[chromosome]):
134+
135+
ASSIGNED_FASTA.write(f">{chromosome}s{index+1}\n")
136+
137+
## Wrap contig sequence at 60 character length
138+
sequence = "\n".join(wrap(contigs[contig],60))
139+
140+
ASSIGNED_FASTA.write(f"{sequence}\n")
141+
ASSIGNMENTS.write(f"{contig} => {chromosome}s{index+1}\n")
142+
143+
ASSIGNED_FASTA.close()
144+
ASSIGNMENTS.close()

0 commit comments

Comments
 (0)