Skip to content

Commit df8b7ce

Browse files
committed
[wip] Did various adjustments to 'check' and some cleaning
1 parent e15616b commit df8b7ce

8 files changed

Lines changed: 206 additions & 192 deletions

File tree

check.py

Lines changed: 116 additions & 73 deletions
Original file line numberDiff line numberDiff line change
@@ -1,85 +1,127 @@
11
import gpt
22

3-
def check_layer_names(pkg):
3+
4+
planmap = {
5+
'layers': {
6+
'geologic_units': {
7+
'columns': ['name','rgb','geo_type','geo_code'],
8+
'geometry': 'Polygon'
9+
},
10+
'geologic_contacts': {
11+
'columns': ['geo_type'],
12+
'geometry': 'Linestring'
13+
},
14+
'surface_features': {
15+
'columns': ['geo_type'],
16+
'geometry': 'Polygon'
17+
},
18+
'linear_features': {
19+
'columns': ['geo_type'],
20+
'geometry': 'Linestring'
21+
},
22+
'layer_styles': {
23+
'columns': ['styleQML','styleSLD']
24+
}
25+
}
26+
}
27+
28+
def check_layer_names(pkg, layers):
429
"""
5-
Some layers are mandatory, others are commonly used, but not necessarily.
30+
Return True/False if all 'layers' were found/not
31+
32+
Args:
33+
pkg: Geopkg
34+
layers: List[str]
35+
List of layer names expected to be found
636
"""
737
out = []
8-
_mandatory = ['geologic_units',
9-
'layer_styles']
10-
_common = ['geologic_contacts',
11-
'linear_features',
12-
'surface_features']
13-
14-
# check for the mandatory one
15-
if not all(l in pkg for l in _mandatory):
16-
out.append("Mandatory layers {!s} not found.".format(_mandatory))
17-
18-
# check for the commonly used.
19-
# Extra layers -- not in _mandatory neither in _common -- can be used,
20-
# which makes the checking for optional layers a bit will defined.
21-
# What we can do here is to check for if there are many layers and *warn*
22-
# the user if these _common ones are not there.
23-
if len(pkg) >= len(_mandatory + _common):
24-
if not all(l in pkg for l in _common):
25-
# The cool thing to do here is to have their geometry inspected
26-
# to have a clue on layer/content match
27-
msg = "Expected to see layers {!s}".format(_common)
28-
out.append(msg)
29-
30-
return out
31-
32-
LAYER_DEFS = dict(
33-
geologic_units = {'columns': ['name','rgb','geo_type','geo_code'],
34-
'geometry': 'Polygon'},
35-
geologic_contacts = {'columns': ['geo_type'],
36-
'geometry': 'Linestring'},
37-
surface_features = {'columns': ['geo_type'],
38-
'geometry': 'Polygon'},
39-
linear_features = {'columns': ['geo_type'],
40-
'geometry': 'Linestring'},
41-
layer_styles = {'columns': ['styleQML','styleSLD']}
42-
)
43-
44-
def check_field_names(pkg, case_insensitive=False):
45-
out = []
46-
for _layer,_defs in LAYER_DEFS.items():
47-
if _layer not in pkg:
48-
msg = "Layer {} not found in pkg".format(_layer)
49-
out.append(msg)
50-
continue
5138

52-
if case_insensitive:
53-
_check_columns = lambda c: c not in pkg[_layer].lower()
54-
else:
55-
_check_columns = lambda c: c not in pkg[_layer]
56-
not_found = list(filter(_check_columns, _defs['columns']))
57-
if len(not_found):
58-
msg = "Columns {} not found in layer {}".format(not_found,_layer)
59-
out.append(msg)
39+
pl = set(pkg.keys())
40+
el = set(layers)
41+
42+
layers_found = el.intersection(pl)
43+
layers_extra = pl.difference(el)
44+
layers_notfound = el.difference(layers_found)
45+
46+
print("Expected layers:", list(layers))
47+
print("Layers found:", list(layers_found))
48+
print("Layers not found:", list(layers_notfound))
49+
print("Extra layers:", list(layers_extra))
50+
51+
# Return True if all required layers were found
52+
return len(layers_notfound) == 0
53+
54+
55+
def _check_if_sets_match(values_have, values_expected):
56+
"""
57+
Return (notfound,extra) sets of "not-found" and "extra" values
58+
59+
Args:
60+
values_have: List[str]
61+
Set/list of values we have at hand
62+
values_expected: List[str]
63+
Set/list of values we expect to find
64+
"""
65+
have = set(values_have)
66+
expect = set(values_expected)
67+
68+
found = expect.intersection(have)
69+
notfound = expect.difference(found)
70+
extra = have.difference(expect)
71+
72+
return (notfound, extra)
73+
74+
75+
def check_field_names(pkg, layer_columns, case_insensitive=False):
76+
"""
77+
Return True/False if all columns were found/not in respective layer(s)
78+
79+
Args:
80+
pkg: Geopkg
81+
layer_columns: Dict[str, List[str]]
82+
Dictionary providing columns names (values) for each layer (key)
83+
"""
84+
for layer,columns in layer_columns.items():
85+
assert layer in pkg, "Layer '{}' not found in pkg".format(layer)
86+
87+
notfound, extra = _check_if_sets_match(pkg[layer].columns, columns)
88+
89+
if len(notfound):
90+
print("Columns {} not found in layer {}".format(notfound,layer))
91+
92+
return len(notfound) == 0
6093

61-
return out
6294

6395
def check_geometry(pkg):
6496
"""
65-
Check if geometry has nulls
97+
Return True/False if geometry columns has some nulls/not
98+
99+
If _all_ values of a geometry column are Null, it's ok (case of 'layer_styles').
100+
But if _some_ values are Null, it's not ok, return False.
101+
102+
Args:
103+
pkg: Geopkg
66104
"""
67-
out = []
68-
for lname, df in pkg.layers():
105+
ok = True
106+
for layer, df in pkg.layers:
69107
geometry_bool = df['geometry'].isnull()
70108
if all(geometry_bool):
109+
print("All values of 'geometry' from layer '{}' are Null.".format(layer))
71110
continue
72111
if any(geometry_bool):
73-
msg = ("Found null geometries in layer {}\n{}"
74-
.format(lname,df[geometry_bool]))
75-
out.append(msg)
112+
print("Found Null geometries in layer {}:".format(layer))
113+
print(df[geometry_bool])
114+
ok = False
115+
116+
return ok
76117

77-
return out
78118

79119
def check_crs(pkg):
80-
out = []
120+
"""
121+
Return True if only one CRS is found, False if multiple were found
122+
"""
81123
CRSs = dict()
82-
for lname, df in pkg.layers():
124+
for lname, df in pkg.layers:
83125
crs = df.crs
84126
# - 'layer_styles' (by QGIS) has no 'geometry'
85127
# - accumulate crs(s) in a hash to check later for heterogeneity (multiple crs's)
@@ -90,13 +132,13 @@ def check_crs(pkg):
90132

91133
# - if more than one crs was found, say it
92134
if len(CRSs) > 1:
93-
out.append('Multiple CRSs found')
94-
135+
print('Multiple CRSs found')
95136
# For each crs (hopefully, one), print it (WKT)
96137
for crs,lrs in CRSs.items():
97-
out.append(crs.to_string())
138+
print(crs.to_string())
139+
140+
return len(CRSs) == 1
98141

99-
return out
100142

101143
def geopackage(gpkg):
102144
"""
@@ -114,9 +156,10 @@ def geopackage(gpkg):
114156
# del pkg[lname]
115157

116158
# Check layer names
117-
res = check_layer_names(pkg)
118-
res += check_field_names(pkg)
119-
res += check_geometry(pkg)
120-
res += check_crs(pkg)
121-
for r in res:
122-
print(r)
159+
layers_defs = planmap['layers']
160+
layer_columns = {l:defs['columns'] for l,defs in layers_defs.items()}
161+
162+
ok = check_layer_names(pkg, layers_defs.keys())
163+
ok *= check_field_names(pkg, layer_columns)
164+
ok *= check_geometry(pkg)
165+
ok *= check_crs(pkg)

conftest.py

Lines changed: 2 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,2 @@
1+
def pytest_addoption(parser):
2+
parser.addoption("--pkgpath", action="store", default="path/to/PMID")

tasks/__init__.py

Lines changed: 0 additions & 50 deletions
This file was deleted.

test_geopackage.py

Lines changed: 18 additions & 5 deletions
Original file line numberDiff line numberDiff line change
@@ -35,24 +35,37 @@ def gpkg():
3535
def test_00(geopkg_path):
3636
assert os.path.exists(geopkg_path), "Geopackage file/path NOT found."
3737
gpkg = gpt.read_file(geopkg_path)
38-
assert gpkg
38+
assert gpkg, "Geopackage is apparently empty/null."
3939
_data['gpkg'] = gpkg
40+
print("\nGeopackage loaded ({})".format(geopkg_path))
4041

4142
# def test_geopackage():
4243
# out = check.geopackage(_data['gpkg'])
4344

4445
def test_layer_names(gpkg):
46+
print("\n* Layer names")
4547
res = check.check_layer_names(gpkg)
46-
assert res
48+
if res:
49+
print("\n".join(res))
50+
assert len(res) == 0
4751

4852
def test_field_names(gpkg):
53+
print("\n* Field names")
4954
res = check.check_field_names(gpkg)
50-
assert res
55+
if res:
56+
print("\n".join(res))
57+
assert len(res) == 0
5158

5259
def test_geometry(gpkg):
5360
res = check.check_geometry(gpkg)
54-
assert res
61+
if res:
62+
print("\n* Geometry")
63+
print("\n".join(res))
64+
assert len(res) == 0
5565

5666
def test_crs(gpkg):
5767
res = check.check_crs(gpkg)
58-
assert res
68+
if res:
69+
print("\n* CRS")
70+
print("\n".join(res))
71+
assert len(res) == 0

validate.py

Lines changed: 12 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,12 @@
1+
import gpt
2+
import check
3+
4+
if __name__ == '__main__':
5+
import sys
6+
7+
if len(sys.argv) < 2:
8+
print("\nUsage: {} <path/to/geopackage>".format(sys.argv[0]))
9+
sys.exit(1)
10+
11+
filename = sys.argv[1]
12+
check.geopackage(filename)

validator/document/validator.py

Whitespace-only changes.

validator/vector/columns_units.json

Lines changed: 0 additions & 5 deletions
This file was deleted.

0 commit comments

Comments
 (0)