Skip to content

Commit 45ebefe

Browse files
committed
Have checks output printed
1 parent 76d2e35 commit 45ebefe

3 files changed

Lines changed: 251 additions & 7 deletions

File tree

check.py

Lines changed: 122 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,122 @@
1+
import gpt
2+
3+
def check_layer_names(pkg):
4+
"""
5+
Some layers are mandatory, others are commonly used, but not necessarily.
6+
"""
7+
out = []
8+
_mandatory = ['geologic_units',
9+
'layer_styles']
10+
_common = ['geologic_contacts',
11+
'linear_features',
12+
'surface_features']
13+
14+
# check for the mandatory one
15+
if not all(l in pkg for l in _mandatory):
16+
out.append("Mandatory layers {!s} not found.".format(_mandatory))
17+
18+
# check for the commonly used.
19+
# Extra layers -- not in _mandatory neither in _common -- can be used,
20+
# which makes the checking for optional layers a bit will defined.
21+
# What we can do here is to check for if there are many layers and *warn*
22+
# the user if these _common ones are not there.
23+
if len(pkg) >= len(_mandatory + _common):
24+
if not all(l in pkg for l in _common):
25+
# The cool thing to do here is to have their geometry inspected
26+
# to have a clue on layer/content match
27+
msg = "Expected to see layers {!s}".format(_common)
28+
out.append(msg)
29+
30+
return out
31+
32+
LAYER_DEFS = dict(
33+
geologic_units = {'columns': ['name','rgb','geo_type','geo_code'],
34+
'geometry': 'Polygon'},
35+
geologic_contacts = {'columns': ['geo_type'],
36+
'geometry': 'Linestring'},
37+
surface_features = {'columns': ['geo_type'],
38+
'geometry': 'Polygon'},
39+
linear_features = {'columns': ['geo_type'],
40+
'geometry': 'Linestring'},
41+
layer_styles = {'columns': ['styleQML','styleSLD']}
42+
)
43+
44+
def check_field_names(pkg, case_insensitive=False):
45+
out = []
46+
for _layer,_defs in LAYER_DEFS.items():
47+
if _layer not in pkg:
48+
msg = "Layer {} not found in pkg".format(_layer)
49+
out.append(msg)
50+
continue
51+
52+
if case_insensitive:
53+
_check_columns = lambda c: c not in pkg[_layer].lower()
54+
else:
55+
_check_columns = lambda c: c not in pkg[_layer]
56+
not_found = list(filter(_check_columns, _defs['columns']))
57+
if len(not_found):
58+
msg = "Columns {} not found in layer {}".format(not_found,_layer)
59+
out.append(msg)
60+
61+
return out
62+
63+
def check_geometry(pkg):
64+
"""
65+
Check if geometry has nulls
66+
"""
67+
out = []
68+
for lname, df in pkg.layers():
69+
geometry_bool = df['geometry'].isnull()
70+
if all(geometry_bool):
71+
continue
72+
if any(geometry_bool):
73+
msg = ("Found null geometries in layer {}\n{}"
74+
.format(lname,df[geometry_bool]))
75+
out.append(msg)
76+
77+
return out
78+
79+
def check_crs(pkg):
80+
out = []
81+
CRSs = dict()
82+
for lname, df in pkg.layers():
83+
crs = df.crs
84+
# - 'layer_styles' (by QGIS) has no 'geometry'
85+
# - accumulate crs(s) in a hash to check later for heterogeneity (multiple crs's)
86+
if lname != 'layer_styles':
87+
lcrs = CRSs.get(crs, [])
88+
lcrs.append(lname)
89+
CRSs[crs] = lcrs
90+
91+
# - if more than one crs was found, say it
92+
if len(CRSs) > 1:
93+
out.append('Multiple CRSs found')
94+
95+
# For each crs (hopefully, one), print it (WKT)
96+
for crs,lrs in CRSs.items():
97+
out.append(crs.to_string())
98+
99+
return out
100+
101+
def geopackage(gpkg):
102+
"""
103+
Checks to be done:
104+
- check layer names
105+
- check column names
106+
- check if any geometry entry is null, unless all of them are null
107+
- check if all CRS are equal (for table layers with geometry)
108+
- check if there are multiple "shape" and "id" columns
109+
"""
110+
pkg = gpt.read_file(gpkg)
111+
# # make it "case insensitive" (lower the keys)
112+
# for lname in list(pkg.keys()):
113+
# pkg[lname.lower()] = pkg[lname]
114+
# del pkg[lname]
115+
116+
# Check layer names
117+
res = check_layer_names(pkg)
118+
res += check_field_names(pkg)
119+
res += check_geometry(pkg)
120+
res += check_crs(pkg)
121+
for r in res:
122+
print(r)

lib.py

Lines changed: 91 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,91 @@
1+
import geopandas
2+
import fiona
3+
import numpy
4+
import pandas
5+
import shapely
6+
from pprint import pprint as pp
7+
from fiona import listlayers
8+
9+
10+
def read(fname, lname):
11+
import geopandas
12+
return geopandas.read_file(fname, layer=lname, driver='GKPG')
13+
14+
def check_crs():
15+
CRSs = dict()
16+
for ln in listlayers(FiNAME):
17+
ld = read(FiNAME, ln)
18+
crs = ld.crs
19+
# CRS check:
20+
# - 'layer_styles' (by QGIS) has no 'geometry'
21+
# - accumulate crs(s) in a hash to check later for heterogeneity (multiple crs's)
22+
if ln != 'layer_styles':
23+
lcrs = CRSs.get(crs, [])
24+
lcrs.append(ln)
25+
CRSs[crs] = lcrs
26+
else:
27+
assert crs is None, "Expecting 'layer_styles' CRS to be `None`, instead got {}".format(crs)
28+
# Check CRS:
29+
# - if more than one crs was found, say it
30+
if len(CRSs) > 1:
31+
msg = 'WARNING: multiple CRS found in this dataset!'
32+
fnc = '*' * (len(msg)+2)
33+
print('{fnc}\n {msg} \n{fnc}'.format(fnc=fnc,msg=msg))
34+
# For each crs (hopefully, one), print it (WKT)
35+
for crs,lrs in CRSs.items():
36+
msg = '\nCRS: {}'.format(lrs)
37+
print(msg)
38+
print('---')
39+
print(crs.to_string())
40+
41+
# Fix geometry (3D -> 2D)
42+
def drop_Z(gdf):
43+
def _drop_Z(feature):
44+
return shapely.wkt.loads(feature.to_wkt())
45+
46+
return geopandas.GeoDataFrame(L, geometry=L.geometry.apply(_drop_Z))
47+
48+
# Print rows with NULLs
49+
def print_nulls(L):
50+
"""
51+
If 'geometry' has Nulls, clean them out
52+
"""
53+
if L.isnull().sum().sum():
54+
print("There are some nulls in table:")
55+
with pandas.option_context('display.width',160):
56+
_nils = L.loc[L.isnull().any(axis=1)]
57+
print("\n{}\n-----".format(_nils))
58+
59+
# Check for all-null columns...
60+
def clean_nulls(L):
61+
nil_cols = filter(lambda c:all(L[c].isnull()), L.columns)
62+
for c in nil_cols:
63+
print('Removing all-none column:',c)
64+
del L[c]
65+
if L['geometry'].isnull().any():
66+
print("Null 'geometry' value(s) found, removing them.")
67+
L.dropna(subset=['geometry'], inplace=True)
68+
return L
69+
70+
# Set OBJECTID Index
71+
def set_objectid(L):
72+
if 'OBJECTID' in L.columns:
73+
L.set_index('OBJECTID', inplace=True)
74+
else:
75+
L.index.name = 'OBJECTID'
76+
return L
77+
78+
def summary():
79+
for ln in listlayers(FiNAME):
80+
# Label
81+
print('='*_tw)
82+
print("'{}'".format(ln))
83+
print('-'*_tw)
84+
ld = read(FiNAME, ln)
85+
if len(ld) > n_samples:
86+
print(ld.sample(n_samples))
87+
else:
88+
print(ld)
89+
print("CRS:",ld.crs.to_string())
90+
print('-'*_tw)
91+
print()

validate.py

Lines changed: 38 additions & 7 deletions
Original file line numberDiff line numberDiff line change
@@ -8,6 +8,8 @@
88
"""
99
import os
1010
import sys
11+
from itertools import zip_longest
12+
1113

1214
# _here = os.path.abspath(__file__)
1315
# sys.path.insert(0, _here)
@@ -18,9 +20,17 @@
1820
# 'geo_path': 'tmp/PM-MAR-MS-Arsinoes_01/vector/PM-MAR-MS-Arsinoes_01.gpkg',
1921
# 'geopackage_tables': ['units', 'contacts']
2022
# }
23+
import check
2124

25+
import logging
2226

23-
from itertools import zip_longest
27+
logger = logging.getLogger('planmap')
28+
logger.setLevel('DEBUG')
29+
if True:
30+
logger.addHandler(logging.StreamHandler())
31+
else:
32+
LOGPATH='validator.log'
33+
logger.addHandler(logging.FileHandler(LOGPATH))
2434

2535
def parse_package_name(pkgname):
2636
tokens = pkgname.split('-')
@@ -41,25 +51,46 @@ def parse_package_name(pkgname):
4151
dtype = lambda t:','.join(types[_] for _ in t)
4252
)
4353

54+
print(list(zip_longest(fields,tokens)))
4455
pkg_specs = {field: lookup.get(field, lambda t:t)(token)
4556
for field,token in zip_longest(fields,tokens)}
4657
return pkg_specs
4758

4859

49-
def run(pkg_path, log_path):
60+
def check_vector(pkg_specs, pkg_path):
61+
"""
62+
Check:
63+
- "pkg_path/vector/pkg_name.gpkg" exist?
64+
- has "geologic_units,geologic_contacts,surface_features,linear_features" ?
65+
- do tables have mandatory columns?
66+
- is 'geometry' column full?
67+
- are all layers at the same projection?
68+
"""
69+
gpkg = os.path.join(pkg_path, 'vector', pkg_specs['name']+'.gpkg')
70+
out = check.geopackage(gpkg)
71+
72+
def check_raster(pkg_specs, pkg_path):
73+
pass
74+
75+
def check_model(pkg_specs, pkg_path):
76+
pass
77+
78+
def run(pkg_path):
5079
#parse package name
5180
pkg_name = os.path.basename(os.path.abspath(pkg_path))
81+
logger.debug(f"Package name: {pkg_name}")
5282
pkg_specs = parse_package_name(pkg_name)
53-
print(pkg_specs)
54-
# body,dtype,label,sublabel = pkg_specs
83+
pkg_specs['name'] = pkg_name
84+
logger.info(pkg_specs)
85+
86+
# Check geopackage
87+
out = check_vector(pkg_specs, pkg_path)
5588

5689

5790
if __name__ == '__main__':
5891
from argparse import ArgumentParser
5992

6093
parser = ArgumentParser()
61-
parser.add_argument('--log', default=None, type=str)
62-
# parser.add_argument('--cfg', default=None, type=str)
6394
parser.add_argument('path', type=str,
6495
help="Path of PlanMap package")
6596

@@ -68,4 +99,4 @@ def run(pkg_path, log_path):
6899

69100
# res = run(args.cfg, args.log)
70101

71-
res = run(pkg_path, 'validator.log')
102+
res = run(pkg_path)

0 commit comments

Comments
 (0)