44#
55# SPDX-License-Identifier: LGPL-3.0-or-later
66
7- '''SigMF Validator'''
8-
7+ """SigMF Validator"""
98import argparse
9+ import glob
1010import json
1111import logging
12+ import os
13+ import sys
14+
15+ # required for Python 3.7
16+ from typing import Optional , Tuple
17+
18+ # multi-threading library - should work well as I/O will be the primary
19+ # cost for small SigMF files. Swap to ProcessPool if files are large.
20+ from concurrent .futures import ThreadPoolExecutor , as_completed
1221
1322import jsonschema
1423
1726
1827
1928def extend_with_default (validator_class ):
20- '''
29+ """
2130 Boilerplate code from [1] to retrieve jsonschema default dict.
2231
2332 References
2433 ----------
2534 [1] https://python-jsonschema.readthedocs.io/en/stable/faq/
26- '''
35+ """
2736 validate_properties = validator_class .VALIDATORS ["properties" ]
2837
2938 def set_defaults (validator , properties , instance , topschema ):
3039 for property , subschema in properties .items ():
3140 if "default" in subschema :
3241 instance .setdefault (property , subschema ["default" ])
3342
34- for error in validate_properties (
35- validator , properties , instance , topschema ,
43+ for err in validate_properties (
44+ validator ,
45+ properties ,
46+ instance ,
47+ topschema ,
3648 ):
37- yield error
49+ yield err
3850
3951 return jsonschema .validators .extend (
40- validator_class , {"properties" : set_defaults },
52+ validator_class ,
53+ {"properties" : set_defaults },
4154 )
4255
4356
4457def get_default_metadata (ref_schema = schema .get_schema ()):
45- '''
58+ """
4659 retrieve defaults from schema
4760 FIXME: not working yet
48- '''
61+ """
4962 default = {}
5063 validator = extend_with_default (jsonschema .Draft7Validator )
5164 validator (ref_schema ).validate (default )
5265 return default
5366
5467
5568def validate (metadata , ref_schema = schema .get_schema ()):
56- '''
69+ """
5770 Check that the provided `metadata` dict is valid according to the `ref_schema` dict.
5871 Walk entire schema and check all keys.
5972
@@ -69,30 +82,67 @@ def validate(metadata, ref_schema=schema.get_schema()):
6982 Returns
7083 -------
7184 None, will raise error if invalid.
72- '''
85+ """
7386 jsonschema .validators .validate (instance = metadata , schema = ref_schema )
7487
7588 # assure capture and annotation order
7689 # TODO: There is a way to do this with just the schema apparently.
77- for key in [' captures' , ' annotations' ]:
90+ for key in [" captures" , " annotations" ]:
7891 count = - 1
7992 for item in metadata [key ]:
80- new_count = item [' core:sample_start' ]
93+ new_count = item [" core:sample_start" ]
8194 if new_count < count :
82- raise jsonschema .exceptions .ValidationError (f' { key } has bad order' )
95+ raise jsonschema .exceptions .ValidationError (f" { key } has bad order" )
8396 else :
8497 count = new_count
8598
8699
87- def main ():
88- parser = argparse .ArgumentParser (description = 'Validate SigMF Archive or file pair against JSON schema.' ,
89- prog = 'sigmf_validate' )
90- parser .add_argument ('filename' , help = 'SigMF path (extension optional).' )
91- parser .add_argument ('--skip-checksum' , action = 'store_true' , help = 'Skip reading dataset to validate checksum.' )
92- parser .add_argument ('-v' , '--verbose' , action = 'count' , default = 0 )
93- parser .add_argument ('--version' , action = 'version' , version = f'%(prog)s { toolversion } ' )
100+ def _validate_single_file (filename , skip_checksum : bool , logger : logging .Logger ) -> int :
101+ """Validates a single SigMF file.
94102
95- args = parser .parse_args ()
103+ To be called as part of a multithreading / multiprocess application.
104+
105+ Parameters
106+ ----------
107+ filename : str
108+ Path and name to sigmf.data or sigmf.meta file.
109+ skip_checksum : bool
110+ Whether to perform checksum computation.
111+ logger : logging.Logger
112+ Logging object to log errors to.
113+
114+ Returns
115+ -------
116+ rc : int
117+ 0 if OK, 1 if err
118+ """
119+ try :
120+ # load signal
121+ signal = sigmffile .fromfile (filename , skip_checksum = skip_checksum )
122+ # validate
123+ signal .validate ()
124+
125+ # handle any of 4 exceptions at once...
126+ except (jsonschema .exceptions .ValidationError , error .SigMFFileError , json .decoder .JSONDecodeError , IOError ) as err :
127+ # catch the error, log, and continue
128+ logger .error ("file `{}`: {}" .format (filename , err ))
129+ return 1
130+ else :
131+ return 0
132+
133+
134+ def main (arg_tuple : Optional [Tuple [str , ...]] = None ) -> None :
135+ """entry-point for command-line validator"""
136+ parser = argparse .ArgumentParser (
137+ description = "Validate SigMF Archive or file pair against JSON schema." , prog = "sigmf_validate"
138+ )
139+ parser .add_argument ("path" , nargs = "*" , help = "SigMF path(s). Accepts * wildcards and extensions are optional." )
140+ parser .add_argument ("--skip-checksum" , action = "store_true" , help = "Skip reading dataset to validate checksum." )
141+ parser .add_argument ("-v" , "--verbose" , action = "count" , default = 0 )
142+ parser .add_argument ("--version" , action = "version" , version = f"%(prog)s { toolversion } " )
143+
144+ # allow pass-in arg_tuple for testing purposes
145+ args = parser .parse_args (arg_tuple )
96146
97147 level_lut = {
98148 0 : logging .WARNING ,
@@ -102,22 +152,35 @@ def main():
102152 log = logging .getLogger ()
103153 logging .basicConfig (level = level_lut [min (args .verbose , 2 )])
104154
105- try :
106- signal = sigmffile .fromfile (args .filename , skip_checksum = args .skip_checksum )
107- except error .SigMFFileError as err :
108- # this happens if checksum fails
109- log .error (err )
110- exit (1 )
111- except IOError as err :
112- log .error (err )
113- log .error ('Unable to read SigMF, bad path?' )
114- exit (1 )
115- except json .decoder .JSONDecodeError as err :
116- log .error (err )
117- log .error ('Unable to decode malformed JSON.' )
118- exit (1 )
119- signal .validate ()
120- log .info ('Validation OK!' )
155+ paths = []
156+ # resolve possible wildcards
157+ for path in args .path :
158+ paths += glob .glob (path )
159+
160+ # multi-processing / threading pathway.
161+ n_completed = 0
162+ n_total = len (paths )
163+ # estimate number of CPU cores
164+ # https://stackoverflow.com/questions/1006289/how-to-find-out-the-number-of-cpus-using-python
165+ est_cpu_cores = len (os .sched_getaffinity (0 ))
166+ # create a thread pool
167+ # https://docs.python.org/3.7/library/concurrent.futures.html#threadpoolexecutor
168+ with ThreadPoolExecutor (max_workers = est_cpu_cores - 1 ) as executor :
169+ # submit jobs
170+ future_validations = {executor .submit (_validate_single_file , path , args .skip_checksum , log ) for path in paths }
171+ # load and await jobs to complete... no return
172+ for future in as_completed (future_validations ):
173+ if future .result () == 0 :
174+ n_completed += 1
175+
176+ if n_total == 0 :
177+ log .error ("No paths to validate." )
178+ sys .exit (1 )
179+ elif n_completed != n_total :
180+ log .info (f"Validated { n_completed } of { n_total } files OK" )
181+ sys .exit (1 )
182+ else :
183+ log .info (f"Validated all { n_total } files OK!" )
121184
122185
123186if __name__ == "__main__" :
0 commit comments