1+ #!/usr/bin/env python3
2+
3+ name = "assign_chromosome_number.py"
4+ version = "0.2.0"
5+ updated = "2025-08-12"
6+
7+ from sys import argv
8+ from argparse import ArgumentParser
9+ from os import makedirs
10+ from os .path import isdir ,basename
11+ from textwrap import wrap
12+
13+ ############################################################
14+ ## Readme
15+ ############################################################
16+
17+ usage = f"""
18+ NAME { name }
19+ VERSION { version }
20+ UPDATED { updated }
21+ SYNOPSIS Utilizes chromosome map generated by orient_fastas_to_reference.py to homogenize naming between the reference
22+ and the new assembly.
23+
24+ USAGE { name } \\
25+ -m all.map \\
26+ -f 50507.oriented.fasta \\
27+ -o 50507
28+
29+ OPTIONS
30+ -m (--map) Chromosome map file
31+ -f (--fasta) Assembly FASTA file
32+ -o (--outdir) Output directory [Default = CHROMOSOME_ASSIGNMENT]
33+ """
34+
35+ if len (argv ) < 2 :
36+ print (usage )
37+ exit ()
38+
39+ ############################################################
40+ ## Command lines arguments
41+ ############################################################
42+
43+ GetOptions = ArgumentParser ()
44+
45+ GetOptions .add_argument ("-m" ,"--map" ,required = True )
46+ GetOptions .add_argument ("-f" ,"--fasta" ,required = True )
47+ GetOptions .add_argument ("-o" ,"--outdir" ,default = "CHROMOSOME_ASSIGNMENT" )
48+
49+ args = GetOptions .parse_args ()
50+
51+ map_file = args .map
52+ fasta_file = args .fasta
53+ outdir = args .outdir
54+
55+ if not isdir (outdir ):
56+ makedirs (outdir ,mode = 0o755 )
57+
58+ filename = basename (fasta_file ).split ("." )[0 ]
59+
60+ ############################################################
61+ ## Read fasta file
62+ ############################################################
63+
64+ FASTA = open (fasta_file ,'r' )
65+
66+ locus = ""
67+ contigs = {}
68+
69+ for line in FASTA :
70+
71+ line = line .strip ()
72+
73+ if line == "" :
74+ continue
75+
76+ ## Read fasta header
77+ if line [0 ] == ">" :
78+
79+ locus = line [1 :]
80+ contigs [locus ] = ""
81+ continue
82+
83+ contigs [locus ] += line
84+
85+ FASTA .close ()
86+
87+ ############################################################
88+ ## Read chromosome mapping file
89+ ############################################################
90+
91+ MAP = open (map_file ,'r' )
92+
93+ mappings = {}
94+ mapped_to = ""
95+
96+ for line in MAP :
97+
98+ line = line .strip ()
99+
100+ if line == "" :
101+ continue
102+
103+ if line [0 :2 ] == ">>" :
104+
105+ mapped_to = line [2 :].split ("\t " )[0 ]
106+ mappings [mapped_to ] = []
107+ continue
108+
109+ if line [0 ] == ">" :
110+
111+ mapped ,type = line [1 :].split ("\t " )[0 :2 ]
112+
113+ if type != "Primary" :
114+ continue
115+
116+ mappings [mapped_to ].append (mapped )
117+
118+ continue
119+
120+ MAP .close ()
121+
122+ ASSIGNED_FASTA = open (f"{ outdir } /{ filename } .assigned.fasta" ,'w' )
123+ ASSIGNMENTS = open (f"{ outdir } /{ filename } .chromosome_assignments" ,'w' )
124+
125+ ############################################################
126+ ## Assign chromosome to arbitrary contigs
127+ ############################################################
128+
129+ ## Iterate over chromsome:contig mappings
130+ for chromosome in sorted (mappings .keys ()):
131+
132+ ## Iterate over all contigs that mapped to a chromosome
133+ for index ,contig in enumerate (mappings [chromosome ]):
134+
135+ ASSIGNED_FASTA .write (f">{ chromosome } s{ index + 1 } \n " )
136+
137+ ## Wrap contig sequence at 60 character length
138+ sequence = "\n " .join (wrap (contigs [contig ],60 ))
139+
140+ ASSIGNED_FASTA .write (f"{ sequence } \n " )
141+ ASSIGNMENTS .write (f"{ contig } => { chromosome } s{ index + 1 } \n " )
142+
143+ ASSIGNED_FASTA .close ()
144+ ASSIGNMENTS .close ()
0 commit comments