Skip to content

Commit e40429b

Browse files
gregparkesGregory ParkesTeque5
authored
Globstar Support; Fixes #51 (PR#59)
* Added multi-threading code / plus black formatting * allow glob with relative and absolute paths; +tests * fix broken README links & isort config --------- Co-authored-by: Gregory Parkes <gregorymparkes@gmail.com> Co-authored-by: Teque5 <teque5@gmail.com>
1 parent ccc1cbf commit e40429b

4 files changed

Lines changed: 155 additions & 41 deletions

File tree

README.md

Lines changed: 1 addition & 2 deletions
Original file line numberDiff line numberDiff line change
@@ -4,8 +4,7 @@ This python module makes it easy to interact with Signal Metadata Format
44
(SigMF) recordings. This module works with Python 3.7+ and is distributed
55
freely under the terms GNU Lesser GPL v3 License.
66

7-
The [SigMF specification document](https://github.com/sigmf/SigMF/blob/HEAD/sigmf-spec.md)
8-
is located in the [SigMF](https://github.com/sigmf/SigMF) repository.
7+
This module follows the SigMF specification [html](https://sigmf.org/)/[pdf](https://sigmf.github.io/SigMF/sigmf-spec.pdf) from the [spec repository](https://github.com/sigmf/SigMF).
98

109
# Installation
1110

pyproject.toml

Lines changed: 3 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -91,6 +91,9 @@ inputs = ['sigmf', 'tests']
9191
[tool.black]
9292
line-length = 120
9393

94+
[tool.isort]
95+
profile = "black"
96+
9497
[tool.tox]
9598
legacy_tox_ini = '''
9699
[tox]

sigmf/validate.py

Lines changed: 102 additions & 39 deletions
Original file line numberDiff line numberDiff line change
@@ -4,11 +4,20 @@
44
#
55
# SPDX-License-Identifier: LGPL-3.0-or-later
66

7-
'''SigMF Validator'''
8-
7+
"""SigMF Validator"""
98
import argparse
9+
import glob
1010
import json
1111
import logging
12+
import os
13+
import sys
14+
15+
# required for Python 3.7
16+
from typing import Optional, Tuple
17+
18+
# multi-threading library - should work well as I/O will be the primary
19+
# cost for small SigMF files. Swap to ProcessPool if files are large.
20+
from concurrent.futures import ThreadPoolExecutor, as_completed
1221

1322
import jsonschema
1423

@@ -17,43 +26,47 @@
1726

1827

1928
def extend_with_default(validator_class):
20-
'''
29+
"""
2130
Boilerplate code from [1] to retrieve jsonschema default dict.
2231
2332
References
2433
----------
2534
[1] https://python-jsonschema.readthedocs.io/en/stable/faq/
26-
'''
35+
"""
2736
validate_properties = validator_class.VALIDATORS["properties"]
2837

2938
def set_defaults(validator, properties, instance, topschema):
3039
for property, subschema in properties.items():
3140
if "default" in subschema:
3241
instance.setdefault(property, subschema["default"])
3342

34-
for error in validate_properties(
35-
validator, properties, instance, topschema,
43+
for err in validate_properties(
44+
validator,
45+
properties,
46+
instance,
47+
topschema,
3648
):
37-
yield error
49+
yield err
3850

3951
return jsonschema.validators.extend(
40-
validator_class, {"properties": set_defaults},
52+
validator_class,
53+
{"properties": set_defaults},
4154
)
4255

4356

4457
def get_default_metadata(ref_schema=schema.get_schema()):
45-
'''
58+
"""
4659
retrieve defaults from schema
4760
FIXME: not working yet
48-
'''
61+
"""
4962
default = {}
5063
validator = extend_with_default(jsonschema.Draft7Validator)
5164
validator(ref_schema).validate(default)
5265
return default
5366

5467

5568
def validate(metadata, ref_schema=schema.get_schema()):
56-
'''
69+
"""
5770
Check that the provided `metadata` dict is valid according to the `ref_schema` dict.
5871
Walk entire schema and check all keys.
5972
@@ -69,30 +82,67 @@ def validate(metadata, ref_schema=schema.get_schema()):
6982
Returns
7083
-------
7184
None, will raise error if invalid.
72-
'''
85+
"""
7386
jsonschema.validators.validate(instance=metadata, schema=ref_schema)
7487

7588
# assure capture and annotation order
7689
# TODO: There is a way to do this with just the schema apparently.
77-
for key in ['captures', 'annotations']:
90+
for key in ["captures", "annotations"]:
7891
count = -1
7992
for item in metadata[key]:
80-
new_count = item['core:sample_start']
93+
new_count = item["core:sample_start"]
8194
if new_count < count:
82-
raise jsonschema.exceptions.ValidationError(f'{key} has bad order')
95+
raise jsonschema.exceptions.ValidationError(f"{key} has bad order")
8396
else:
8497
count = new_count
8598

8699

87-
def main():
88-
parser = argparse.ArgumentParser(description='Validate SigMF Archive or file pair against JSON schema.',
89-
prog='sigmf_validate')
90-
parser.add_argument('filename', help='SigMF path (extension optional).')
91-
parser.add_argument('--skip-checksum', action='store_true', help='Skip reading dataset to validate checksum.')
92-
parser.add_argument('-v', '--verbose', action='count', default=0)
93-
parser.add_argument('--version', action='version', version=f'%(prog)s {toolversion}')
100+
def _validate_single_file(filename, skip_checksum: bool, logger: logging.Logger) -> int:
101+
"""Validates a single SigMF file.
94102
95-
args = parser.parse_args()
103+
To be called as part of a multithreading / multiprocess application.
104+
105+
Parameters
106+
----------
107+
filename : str
108+
Path and name to sigmf.data or sigmf.meta file.
109+
skip_checksum : bool
110+
Whether to perform checksum computation.
111+
logger : logging.Logger
112+
Logging object to log errors to.
113+
114+
Returns
115+
-------
116+
rc : int
117+
0 if OK, 1 if err
118+
"""
119+
try:
120+
# load signal
121+
signal = sigmffile.fromfile(filename, skip_checksum=skip_checksum)
122+
# validate
123+
signal.validate()
124+
125+
# handle any of 4 exceptions at once...
126+
except (jsonschema.exceptions.ValidationError, error.SigMFFileError, json.decoder.JSONDecodeError, IOError) as err:
127+
# catch the error, log, and continue
128+
logger.error("file `{}`: {}".format(filename, err))
129+
return 1
130+
else:
131+
return 0
132+
133+
134+
def main(arg_tuple: Optional[Tuple[str, ...]] = None) -> None:
135+
"""entry-point for command-line validator"""
136+
parser = argparse.ArgumentParser(
137+
description="Validate SigMF Archive or file pair against JSON schema.", prog="sigmf_validate"
138+
)
139+
parser.add_argument("path", nargs="*", help="SigMF path(s). Accepts * wildcards and extensions are optional.")
140+
parser.add_argument("--skip-checksum", action="store_true", help="Skip reading dataset to validate checksum.")
141+
parser.add_argument("-v", "--verbose", action="count", default=0)
142+
parser.add_argument("--version", action="version", version=f"%(prog)s {toolversion}")
143+
144+
# allow pass-in arg_tuple for testing purposes
145+
args = parser.parse_args(arg_tuple)
96146

97147
level_lut = {
98148
0: logging.WARNING,
@@ -102,22 +152,35 @@ def main():
102152
log = logging.getLogger()
103153
logging.basicConfig(level=level_lut[min(args.verbose, 2)])
104154

105-
try:
106-
signal = sigmffile.fromfile(args.filename, skip_checksum=args.skip_checksum)
107-
except error.SigMFFileError as err:
108-
# this happens if checksum fails
109-
log.error(err)
110-
exit(1)
111-
except IOError as err:
112-
log.error(err)
113-
log.error('Unable to read SigMF, bad path?')
114-
exit(1)
115-
except json.decoder.JSONDecodeError as err:
116-
log.error(err)
117-
log.error('Unable to decode malformed JSON.')
118-
exit(1)
119-
signal.validate()
120-
log.info('Validation OK!')
155+
paths = []
156+
# resolve possible wildcards
157+
for path in args.path:
158+
paths += glob.glob(path)
159+
160+
# multi-processing / threading pathway.
161+
n_completed = 0
162+
n_total = len(paths)
163+
# estimate number of CPU cores
164+
# https://stackoverflow.com/questions/1006289/how-to-find-out-the-number-of-cpus-using-python
165+
est_cpu_cores = len(os.sched_getaffinity(0))
166+
# create a thread pool
167+
# https://docs.python.org/3.7/library/concurrent.futures.html#threadpoolexecutor
168+
with ThreadPoolExecutor(max_workers=est_cpu_cores - 1) as executor:
169+
# submit jobs
170+
future_validations = {executor.submit(_validate_single_file, path, args.skip_checksum, log) for path in paths}
171+
# load and await jobs to complete... no return
172+
for future in as_completed(future_validations):
173+
if future.result() == 0:
174+
n_completed += 1
175+
176+
if n_total == 0:
177+
log.error("No paths to validate.")
178+
sys.exit(1)
179+
elif n_completed != n_total:
180+
log.info(f"Validated {n_completed} of {n_total} files OK")
181+
sys.exit(1)
182+
else:
183+
log.info(f"Validated all {n_total} files OK!")
121184

122185

123186
if __name__ == "__main__":

tests/test_validation.py

Lines changed: 49 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -8,6 +8,7 @@
88

99
import tempfile
1010
import unittest
11+
from pathlib import Path
1112

1213
from jsonschema.exceptions import ValidationError
1314

@@ -23,6 +24,54 @@ def test_valid_data():
2324
SigMFFile(TEST_METADATA).validate()
2425

2526

27+
class CommandLineValidator(unittest.TestCase):
28+
"""Check behavior of command-line parser"""
29+
30+
def setUp(self):
31+
"""Create a directory with some valid files"""
32+
self.tmp_dir = tempfile.TemporaryDirectory()
33+
self.tmp_path = tmp_path = Path(self.tmp_dir.name)
34+
junk_path = tmp_path / "junk"
35+
TEST_FLOAT32_DATA.tofile(junk_path)
36+
some_meta = SigMFFile(TEST_METADATA, data_file=junk_path)
37+
some_meta.tofile(tmp_path / "a")
38+
some_meta.tofile(tmp_path / "b")
39+
some_meta.tofile(tmp_path / "c", toarchive=True)
40+
41+
def tearDown(self):
42+
"""cleanup"""
43+
self.tmp_dir.cleanup()
44+
45+
def test_normal(self):
46+
"""able to parse archives and non-archives"""
47+
args = (str(self.tmp_path / "*.sigmf*"),)
48+
sigmf.validate.main(args)
49+
50+
def test_normal_skip(self):
51+
"""able to skip checksum"""
52+
args = (str(self.tmp_path / "*.sigmf*"), "--skip-checksum")
53+
sigmf.validate.main(args)
54+
55+
def test_partial(self):
56+
"""checks some but not all files"""
57+
args = (str(self.tmp_path / "*"),)
58+
with self.assertRaises(SystemExit):
59+
sigmf.validate.main(args)
60+
61+
def test_missing(self):
62+
"""exit with rc=1 when run on empty"""
63+
with self.assertRaises(SystemExit) as cm:
64+
sigmf.validate.main(tuple())
65+
self.assertEqual((1,), cm.exception.args)
66+
67+
def test_version(self):
68+
"""exit with rc=0 after printing version"""
69+
args = ("--version",)
70+
with self.assertRaises(SystemExit) as cm:
71+
sigmf.validate.main(args)
72+
self.assertEqual((0,), cm.exception.args)
73+
74+
2675
class FailingCases(unittest.TestCase):
2776
"""Cases where the validator should throw an exception."""
2877

0 commit comments

Comments
 (0)