Skip to content

Commit ddf7566

Browse files
authored
Add Python 3.11+ type hints to core modules (#51)
* Add Python 3.11+ type hints to core modules - Add type annotations to dumptool.py, jsontool.py, main.py, and utils.py - Use modern Python 3.11+ syntax (e.g., set[str] instead of Set[str]) - Add io and typing imports for proper type hints - Import JsonType from utils for consistent JSON type annotations - Add return type annotations to all functions * Update CI to Python 3.11+ and modernize type hints - CI workflow: Remove Python 3.8/3.9/3.10, keep 3.11/3.12/3.13/3.14 - pyproject.toml: Add Python 3.14 classifier, update ruff target-version to py311, mypy to 3.11 - Add pytest-cov to dependency-groups for test coverage - Modernize all type hints to Python 3.11+ syntax: - Union[X, Y] → X | Y - Optional[X] → X | None - List[X] → list[X] - Dict[X, Y] → dict[X, Y] - Tuple[X, Y] → tuple[X, Y] - Import from collections.abc instead of typing for Callable, Iterable, Iterator - Add strict=True to zip() call - Use ternary operator for fout assignment in main.py
1 parent 7cd29b8 commit ddf7566

7 files changed

Lines changed: 174 additions & 556 deletions

File tree

.github/workflows/ci.yml

Lines changed: 1 addition & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -41,7 +41,7 @@ jobs:
4141
runs-on: ubuntu-latest
4242
strategy:
4343
matrix:
44-
python-version: ["3.8", "3.9", "3.10", "3.11", "3.12", "3.13"]
44+
python-version: ["3.11", "3.12", "3.13", "3.14"]
4545
steps:
4646
- uses: actions/checkout@v4
4747
- uses: actions/setup-python@v5

jsoncsv/dumptool.py

Lines changed: 48 additions & 31 deletions
Original file line numberDiff line numberDiff line change
@@ -2,112 +2,123 @@
22
# 2015.10.09
33

44
import csv
5+
import io
56
import json
7+
from typing import Any
68

79
import xlwt
810

11+
from jsoncsv.utils import JsonType
12+
913

1014
class Dump:
11-
def __init__(self, fin, fout, **kwargs):
15+
def __init__(self, fin: io.TextIOBase, fout: io.TextIOBase | io.BytesIO, **kwargs: Any) -> None:
1216
self.fin = fin
1317
self.fout = fout
1418
self.initialize(**kwargs)
1519

16-
def initialize(self, **kwargs):
20+
def initialize(self, **kwargs: Any) -> None:
1721
pass
1822

19-
def prepare(self):
23+
def prepare(self) -> None:
2024
pass
2125

22-
def dump_file(self, obj):
26+
def dump_file(self) -> None:
2327
raise NotImplementedError
2428

25-
def on_finish(self):
29+
def on_finish(self) -> None:
2630
pass
2731

28-
def dump(self):
32+
def dump(self) -> None:
2933
self.prepare()
3034
self.dump_file()
3135
self.on_finish()
3236

3337

3438
class ReadHeadersMixin:
3539
@staticmethod
36-
def load_headers(fin, read_row=None, sort_type=None): # noqa: ARG004 - reserved for future use
37-
headers = set()
38-
datas = []
40+
def load_headers(
41+
fin: io.TextIOBase,
42+
read_row: int | None = None,
43+
sort_type: bool | None = None, # noqa: ARG004 - reserved for future use
44+
) -> tuple[list[str], list[dict[str, JsonType]]]:
45+
headers: set[str] = set()
46+
datas: list[dict[str, JsonType]] = []
3947

4048
# read
4149
if not read_row or read_row < 1:
4250
read_row = -1
4351

4452
for line in fin:
4553
obj = json.loads(line)
54+
assert isinstance(obj, dict)
4655
headers.update(obj.keys())
4756
datas.append(obj)
4857

4958
read_row -= 1
5059
if not read_row:
5160
break
5261
# TODO: add some sort_type here
53-
headers = sorted(headers)
62+
headers_list = sorted(headers)
5463

55-
return (list(headers), datas)
64+
return (headers_list, datas)
5665

5766

5867
class DumpExcel(Dump, ReadHeadersMixin):
59-
def initialize(self, **kwargs):
68+
def initialize(self, **kwargs: Any) -> None:
6069
super().initialize(**kwargs)
6170
self._read_row = kwargs.get('read_row')
6271
self._sort_type = kwargs.get('sort_type')
6372

64-
def prepare(self):
65-
headers, datas = self.load_headers(self.fin, self._read_row,
66-
self._sort_type)
73+
def prepare(self) -> None:
74+
headers, datas = self.load_headers(self.fin, self._read_row, self._sort_type)
6775
self._headers = headers
6876
self._datas = datas
6977

70-
def write_headers(self):
78+
def write_headers(self) -> None:
7179
raise NotImplementedError
7280

73-
def write_obj(self):
81+
def write_obj(self, obj: dict[str, JsonType]) -> None:
7482
raise NotImplementedError
7583

76-
def dump_file(self):
84+
def dump_file(self) -> None:
7785
self.write_headers()
7886

7987
for obj in self._datas:
8088
self.write_obj(obj)
8189

8290
for line in self.fin:
8391
obj = json.loads(line)
92+
assert isinstance(obj, dict)
8493
self.write_obj(obj)
8594

8695

8796
class DumpCSV(DumpExcel):
88-
def initialize(self, **kwargs):
97+
def initialize(self, **kwargs: Any) -> None:
8998
super().initialize(**kwargs)
90-
self.csv_writer = None
99+
self.csv_writer: csv.DictWriter[str] | None = None
91100

92-
def write_headers(self):
101+
def write_headers(self) -> None:
102+
assert isinstance(self.fout, io.TextIOBase)
93103
self.csv_writer = csv.DictWriter(self.fout, self._headers)
94104
self.csv_writer.writeheader()
95105

96-
def write_obj(self, obj):
97-
patched_obj = {
106+
def write_obj(self, obj: dict[str, JsonType]) -> None:
107+
patched_obj: dict[str, str] = {
98108
key: self.patch_value(value)
99109
for key, value in obj.items()
100110
}
111+
assert self.csv_writer is not None
101112
self.csv_writer.writerow(patched_obj)
102113

103-
def patch_value(self, value):
114+
def patch_value(self, value: JsonType) -> str:
104115
if value in (None, {}, []):
105116
return ""
106-
return value
117+
return str(value)
107118

108119

109120
class DumpXLS(DumpExcel):
110-
def initialize(self, **kwargs):
121+
def initialize(self, **kwargs: Any) -> None:
111122
super().initialize(**kwargs)
112123

113124
self.sheet = kwargs.get('sheet', 'Sheet1')
@@ -116,30 +127,36 @@ def initialize(self, **kwargs):
116127
self.row = 0
117128
self.cloumn = 0
118129

119-
def write_headers(self):
130+
def write_headers(self) -> None:
120131
for head in self._headers:
121132
self.ws.write(self.row, self.cloumn, head)
122133
self.cloumn += 1
123134
self.row += 1
124135

125-
def write_obj(self, obj):
136+
def write_obj(self, obj: dict[str, JsonType]) -> None:
126137
self.cloumn = 0
127138

128139
for head in self._headers:
129140
value = obj.get(head)
130141
# patch
131-
if value in ({},):
142+
if value == {}:
132143
value = "{}"
133144
self.ws.write(self.row, self.cloumn, value)
134145
self.cloumn += 1
135146

136147
self.row += 1
137148

138-
def on_finish(self):
149+
def on_finish(self) -> None:
150+
assert isinstance(self.fout, io.BufferedIOBase)
139151
self.wb.save(self.fout)
140152

141153

142-
def dump_excel(fin, fout, klass, **kwargs):
154+
def dump_excel(
155+
fin: io.TextIOBase,
156+
fout: io.TextIOBase | io.BytesIO,
157+
klass: type[DumpExcel],
158+
**kwargs: Any,
159+
) -> None:
143160
if not isinstance(klass, type) or not issubclass(klass, DumpExcel):
144161
raise ValueError("unknow dumpexcel type")
145162

jsoncsv/jsontool.py

Lines changed: 40 additions & 18 deletions
Original file line numberDiff line numberDiff line change
@@ -1,26 +1,40 @@
11
# author@alingse
22
# 2016.05.27
33

4+
import io
45
import json
6+
from collections.abc import Callable, Iterable, Iterator
57
from copy import deepcopy
68
from itertools import groupby
79
from operator import itemgetter
810

9-
from jsoncsv.utils import decode_safe_key, encode_safe_key
11+
from jsoncsv.utils import (
12+
DecodedPathType,
13+
JsonType,
14+
LeafInputType,
15+
LeafType,
16+
PathType,
17+
decode_safe_key,
18+
encode_safe_key,
19+
)
1020

1121
__all__ = [
1222
'convert_json',
1323
'expand',
1424
'restore',
1525
]
1626

27+
# Type alias for the func parameter in convert_json
28+
# Use ... to indicate additional keyword arguments are accepted
29+
ConvertFunc = Callable[..., dict[str, JsonType]] | Callable[..., JsonType]
1730

18-
def gen_leaf(root, path=None):
31+
32+
def gen_leaf(root: JsonType, path: PathType | None = None) -> Iterator[LeafType]:
1933
if path is None:
2034
path = []
2135

2236
if not isinstance(root, (dict, list)) or not root:
23-
leaf = (path, root)
37+
leaf: LeafType = (path, root)
2438
yield leaf
2539
else:
2640
items = root.items() if isinstance(root, dict) else enumerate(root)
@@ -32,7 +46,7 @@ def gen_leaf(root, path=None):
3246
yield leaf
3347

3448

35-
def is_array_index(keys, enable_str=True):
49+
def is_array_index(keys: Iterable[int | str], enable_str: bool = True) -> bool:
3650
keys = list(deepcopy(keys))
3751
# 不强调有序
3852
key_map = dict.fromkeys(keys, True)
@@ -44,7 +58,7 @@ def is_array_index(keys, enable_str=True):
4458
return bool(enable_str and all(str(key) in key_map for key in int_keys))
4559

4660

47-
def from_leaf(leafs):
61+
def from_leaf(leafs: Iterable[LeafInputType]) -> JsonType:
4862
# [(path, value), (path, value)]
4963
leafs = list(leafs)
5064

@@ -58,10 +72,10 @@ def from_leaf(leafs):
5872
_get_head = itemgetter(0)
5973
_get_leaf = itemgetter(1)
6074

61-
zlist = list(zip(heads, leafs))
75+
zlist = list(zip(heads, leafs, strict=True))
6276
glist = groupby(sorted(zlist, key=_get_head), key=_get_head)
6377

64-
child = []
78+
child: list[tuple[int | str, JsonType]] = []
6579
for g in glist:
6680
head, _zlist = g
6781
_leafs = map(_get_leaf, _zlist)
@@ -73,30 +87,31 @@ def from_leaf(leafs):
7387
child.sort(key=lambda x: int(x[0]))
7488
return list(map(_get_leaf, child))
7589

76-
return dict(child)
90+
return dict(child) # type: ignore[arg-type]
7791

7892

79-
def expand(origin, separator='.', safe=False):
93+
def expand(origin: JsonType, separator: str = '.', safe: bool = False) -> dict[str, JsonType]:
8094
root = origin
8195
leafs = gen_leaf(root)
8296

83-
expobj = {}
97+
expobj: dict[str, JsonType] = {}
8498
for path, value in leafs:
85-
path = map(str, path)
99+
# Convert path elements to strings
100+
str_path: list[str] = [str(p) for p in path]
86101

87-
key = encode_safe_key(path, separator) if safe else separator.join(path)
102+
key = encode_safe_key(str_path, separator) if safe else separator.join(str_path)
88103
expobj[key] = value
89104

90105
return expobj
91106

92107

93-
def restore(expobj, separator='.', safe=False):
94-
leafs = []
108+
def restore(expobj: dict[str, JsonType], separator: str = '.', safe: bool = False) -> JsonType:
109+
leafs: list[tuple[DecodedPathType, JsonType]] = []
95110

96111
items = expobj.items()
97112

98113
for key, value in items:
99-
path = decode_safe_key(key, separator) if safe else key.split(separator)
114+
path: DecodedPathType = decode_safe_key(key, separator) if safe else key.split(separator)
100115

101116
if key == '':
102117
path = []
@@ -107,7 +122,14 @@ def restore(expobj, separator='.', safe=False):
107122
return origin
108123

109124

110-
def convert_json(fin, fout, func, separator=".", safe=False, json_array=False):
125+
def convert_json(
126+
fin: io.TextIOBase,
127+
fout: io.TextIOBase,
128+
func: ConvertFunc,
129+
separator: str = '.',
130+
safe: bool = False,
131+
json_array: bool = False,
132+
) -> None:
111133
'''
112134
ensure fin/fout is TextIO
113135
'''
@@ -116,7 +138,7 @@ def convert_json(fin, fout, func, separator=".", safe=False, json_array=False):
116138
raise ValueError("unknow convert_json type")
117139

118140
# default: read json objects from each line
119-
def gen_objs():
141+
def gen_objs() -> Iterator[JsonType]:
120142
for line in fin:
121143
obj = json.loads(line)
122144
yield obj
@@ -125,7 +147,7 @@ def gen_objs():
125147

126148
if json_array:
127149
# read all input as json array
128-
def gen_objs_from_array():
150+
def gen_objs_from_array() -> Iterator[JsonType]:
129151
objs = json.load(fin)
130152
assert isinstance(objs, list)
131153
yield from objs

0 commit comments

Comments
 (0)