Skip to content

Commit c964dc6

Browse files
committed
solve conflicts
2 parents 0929539 + 2e8acb9 commit c964dc6

19 files changed

Lines changed: 331 additions & 408 deletions

README.md

Lines changed: 13 additions & 3 deletions
Original file line numberDiff line numberDiff line change
@@ -7,9 +7,19 @@ run a series of checks following a package/data model.
77

88
## To run the validator
99

10-
```bash
11-
$ python validate.py <path>
12-
```
10+
Since I'm trying different methods to have this validator steps running together, there are different ways to run _a_ validator.
11+
12+
* Pytest:
13+
14+
```bash
15+
$ pytest --pkgpath 'path/to/geopackage.gpkg'
16+
```
17+
18+
* Custom (python pipeline):
19+
20+
```bash
21+
$ python validate.py <path>
22+
```
1323

1424
## Where are the models
1525

check.py

Lines changed: 96 additions & 78 deletions
Original file line numberDiff line numberDiff line change
@@ -1,85 +1,103 @@
11
import gpt
22

3-
def check_layer_names(pkg):
3+
4+
def check_layer_names(pkg, layers):
45
"""
5-
Some layers are mandatory, others are commonly used, but not necessarily.
6+
Return True/False if all 'layers' were found/not
7+
8+
Args:
9+
pkg: Geopkg
10+
layers: List[str]
11+
List of layer names expected to be found
612
"""
713
out = []
8-
_mandatory = ['geologic_units',
9-
'layer_styles']
10-
_common = ['geologic_contacts',
11-
'linear_features',
12-
'surface_features']
13-
14-
# check for the mandatory one
15-
if not all(l in pkg for l in _mandatory):
16-
out.append("Mandatory layers {!s} not found.".format(_mandatory))
17-
18-
# check for the commonly used.
19-
# Extra layers -- not in _mandatory neither in _common -- can be used,
20-
# which makes the checking for optional layers a bit will defined.
21-
# What we can do here is to check for if there are many layers and *warn*
22-
# the user if these _common ones are not there.
23-
if len(pkg) >= len(_mandatory + _common):
24-
if not all(l in pkg for l in _common):
25-
# The cool thing to do here is to have their geometry inspected
26-
# to have a clue on layer/content match
27-
msg = "Expected to see layers {!s}".format(_common)
28-
out.append(msg)
29-
30-
return out
31-
32-
LAYER_DEFS = dict(
33-
geologic_units = {'columns': ['name','rgb','geo_type','geo_code'],
34-
'geometry': 'Polygon'},
35-
geologic_contacts = {'columns': ['geo_type'],
36-
'geometry': 'Linestring'},
37-
surface_features = {'columns': ['geo_type'],
38-
'geometry': 'Polygon'},
39-
linear_features = {'columns': ['geo_type'],
40-
'geometry': 'Linestring'},
41-
layer_styles = {'columns': ['styleQML','styleSLD']}
42-
)
43-
44-
def check_field_names(pkg, case_insensitive=False):
45-
out = []
46-
for _layer,_defs in LAYER_DEFS.items():
47-
if _layer not in pkg:
48-
msg = "Layer {} not found in pkg".format(_layer)
49-
out.append(msg)
50-
continue
5114

52-
if case_insensitive:
53-
_check_columns = lambda c: c not in pkg[_layer].lower()
54-
else:
55-
_check_columns = lambda c: c not in pkg[_layer]
56-
not_found = list(filter(_check_columns, _defs['columns']))
57-
if len(not_found):
58-
msg = "Columns {} not found in layer {}".format(not_found,_layer)
59-
out.append(msg)
15+
pl = set(pkg.keys())
16+
el = set(layers)
17+
18+
layers_found = el.intersection(pl)
19+
layers_extra = pl.difference(el)
20+
layers_notfound = el.difference(layers_found)
21+
22+
print("Expected layers:", list(layers))
23+
print("Layers found:", list(layers_found))
24+
print("Layers not found:", list(layers_notfound))
25+
print("Extra layers:", list(layers_extra))
26+
27+
# Return True if all required layers were found
28+
return len(layers_notfound) == 0
29+
30+
31+
def _check_if_sets_match(values_have, values_expected):
32+
"""
33+
Return (notfound,extra) sets of "not-found" and "extra" values
34+
35+
Args:
36+
values_have: List[str]
37+
Set/list of values we have at hand
38+
values_expected: List[str]
39+
Set/list of values we expect to find
40+
"""
41+
have = set(values_have)
42+
expect = set(values_expected)
43+
44+
found = expect.intersection(have)
45+
notfound = expect.difference(found)
46+
extra = have.difference(expect)
47+
48+
return (notfound, extra)
49+
50+
51+
def check_field_names(pkg, layer_columns, case_insensitive=False):
52+
"""
53+
Return True/False if all columns were found/not in respective layer(s)
54+
55+
Args:
56+
pkg: Geopkg
57+
layer_columns: Dict[str, List[str]]
58+
Dictionary providing columns names (values) for each layer (key)
59+
"""
60+
for layer,columns in layer_columns.items():
61+
assert layer in pkg, "Layer '{}' not found in pkg".format(layer)
62+
63+
notfound, extra = _check_if_sets_match(pkg[layer].columns, columns)
64+
65+
if len(notfound):
66+
print("Columns {} not found in layer {}".format(notfound,layer))
67+
68+
return len(notfound) == 0
6069

61-
return out
6270

6371
def check_geometry(pkg):
6472
"""
65-
Check if geometry has nulls
73+
Return True/False if geometry columns has some nulls/not
74+
75+
If _all_ values of a geometry column are Null, it's ok (case of 'layer_styles').
76+
But if _some_ values are Null, it's not ok, return False.
77+
78+
Args:
79+
pkg: Geopkg
6680
"""
67-
out = []
68-
for lname, df in pkg.layers():
81+
ok = True
82+
for layer, df in pkg.layers:
6983
geometry_bool = df['geometry'].isnull()
7084
if all(geometry_bool):
85+
print("All values of 'geometry' from layer '{}' are Null.".format(layer))
7186
continue
7287
if any(geometry_bool):
73-
msg = ("Found null geometries in layer {}\n{}"
74-
.format(lname,df[geometry_bool]))
75-
out.append(msg)
88+
print("Found Null geometries in layer {}:".format(layer))
89+
print(df[geometry_bool])
90+
ok = False
91+
92+
return ok
7693

77-
return out
7894

7995
def check_crs(pkg):
80-
out = []
96+
"""
97+
Return True if only one CRS is found, False if multiple were found
98+
"""
8199
CRSs = dict()
82-
for lname, df in pkg.layers():
100+
for lname, df in pkg.layers:
83101
crs = df.crs
84102
# - 'layer_styles' (by QGIS) has no 'geometry'
85103
# - accumulate crs(s) in a hash to check later for heterogeneity (multiple crs's)
@@ -90,15 +108,15 @@ def check_crs(pkg):
90108

91109
# - if more than one crs was found, say it
92110
if len(CRSs) > 1:
93-
out.append('Multiple CRSs found')
94-
111+
print('Multiple CRSs found')
95112
# For each crs (hopefully, one), print it (WKT)
96113
for crs,lrs in CRSs.items():
97-
out.append(crs.to_string())
114+
print(crs.to_string())
115+
116+
return len(CRSs) == 1
98117

99-
return out
100118

101-
def geopackage(gpkg):
119+
def geopackage(gpkg, schema_name='planmap'):
102120
"""
103121
Checks to be done:
104122
- check layer names
@@ -108,15 +126,15 @@ def geopackage(gpkg):
108126
- check if there are multiple "shape" and "id" columns
109127
"""
110128
pkg = gpt.read_file(gpkg)
111-
# # make it "case insensitive" (lower the keys)
112-
# for lname in list(pkg.keys()):
113-
# pkg[lname.lower()] = pkg[lname]
114-
# del pkg[lname]
129+
130+
import schema
131+
gpkg_schema = getattr(schema,schema_name)
115132

116133
# Check layer names
117-
res = check_layer_names(pkg)
118-
res += check_field_names(pkg)
119-
res += check_geometry(pkg)
120-
res += check_crs(pkg)
121-
for r in res:
122-
print(r)
134+
layers_defs = gpkg_schema['layers']
135+
layer_columns = {l:defs['columns'] for l,defs in layers_defs.items()}
136+
137+
ok = check_layer_names(pkg, layers_defs.keys())
138+
ok *= check_field_names(pkg, layer_columns)
139+
ok *= check_geometry(pkg)
140+
ok *= check_crs(pkg)

conftest.py

Lines changed: 3 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,3 @@
1+
def pytest_addoption(parser):
2+
parser.addoption("--pkgpath", action="store", default="path/to/PMID")
3+
parser.addoption("--schema", action="store", default="planmap")

json_schema/__init__.py

Lines changed: 1 addition & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1 @@
1+
from .geopackage_layers import validate as geopackage
Lines changed: 26 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,26 @@
1+
{
2+
"$schema": "http://json-schema.org/draft-07/schema#",
3+
4+
"title": "Match Geopackage layers list",
5+
6+
"$comment": "Works for, e.g., ['units','contacts','bla'], but not for ['bla','units','contacts']",
7+
"$example": ["units","contacts","another-column"],
8+
9+
"definitions": {
10+
"layers": {
11+
"type": "string",
12+
"enum": ["units", "contacts"]
13+
}
14+
},
15+
16+
"type": "array",
17+
"items": [
18+
{
19+
"$ref": "#/definitions/layers"
20+
}
21+
],
22+
"minItems": 2,
23+
24+
"uniqueItems": true,
25+
"additionalItems": { "type": "string" }
26+
}
Lines changed: 73 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,73 @@
1+
{
2+
"$schema": "http://json-schema.org/draft-07/schema#",
3+
4+
"title": "Match Geopackage's 'Units' layer columns list",
5+
6+
"$comment": "Works for, e.g., ['name','geometry','code','bla'], but not for ['bla','name','geometry','code']",
7+
"$example": ["name","geometry","code","another-column"],
8+
9+
"definitions": {
10+
11+
"point_coordinates": {
12+
"type": "array",
13+
"items": [
14+
{
15+
"type": "number"
16+
}
17+
],
18+
"minItems": 2,
19+
"maxItems": 3
20+
},
21+
22+
"line_coordinates": {
23+
"type": "array",
24+
"items": [
25+
{
26+
"$ref": "#/definitions/point_coordinates"
27+
}
28+
],
29+
"minItems": 2
30+
},
31+
32+
"polygon_coordinates": {
33+
"type": "array",
34+
"items": [
35+
{
36+
"$ref": "#/definitions/line_coordinates"
37+
},
38+
{ "minItems": 4 }
39+
],
40+
"minItems": 1,
41+
"maxItems": 1
42+
},
43+
44+
"polygon_geometry": {
45+
"type": "object",
46+
"properties": {
47+
"type": {
48+
"const": "Polygon"
49+
},
50+
"coordinates": {
51+
"$ref": "#/definitions/polygon_coordinates"
52+
}
53+
}
54+
}
55+
},
56+
57+
58+
"type": "object",
59+
"properties": {
60+
61+
"Unit": {
62+
"type": "string"
63+
},
64+
65+
"RGB": {
66+
"type": "string"
67+
},
68+
69+
"geometry": {
70+
"$ref": "#/definitions/polygon_geometry"
71+
}
72+
}
73+
}

schema.py

Lines changed: 23 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,23 @@
1+
planmap = {
2+
'layers': {
3+
'geologic_units': {
4+
'columns': ['name','rgb','geo_type','geo_code'],
5+
'geometry': 'Polygon'
6+
},
7+
'geologic_contacts': {
8+
'columns': ['geo_type'],
9+
'geometry': 'Linestring'
10+
},
11+
'surface_features': {
12+
'columns': ['geo_type'],
13+
'geometry': 'Polygon'
14+
},
15+
'linear_features': {
16+
'columns': ['geo_type'],
17+
'geometry': 'Linestring'
18+
},
19+
'layer_styles': {
20+
'columns': ['styleQML','styleSLD']
21+
}
22+
}
23+
}

0 commit comments

Comments
 (0)