Skip to content

Commit c41cc2d

Browse files
committed
Added own implementation
1 parent 13eeebe commit c41cc2d

7 files changed

Lines changed: 372 additions & 110 deletions

File tree

src/edgeml/Dataset.py

Lines changed: 58 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,58 @@
1+
import tempfile
2+
import h5py
3+
import numpy as np
4+
from edgeml.TimeSeries import TimeSeries
5+
from edgeml.Labeling import Labeling, Label
6+
from functools import reduce
7+
import pandas as pd
8+
9+
class Dataset():
10+
def __init__(self, backendURL, readKey=None, writeKey=None):
11+
self._backendURL = backendURL
12+
self._readKey = readKey
13+
self._writeKey = writeKey
14+
15+
self._id = None
16+
self.name = None
17+
self.metaData = None
18+
self.timeSeries = None
19+
self.labelings = None
20+
21+
def parse(self, data, labelings):
22+
self._id = data["_id"]
23+
self.name = data["name"]
24+
self.metaData = data["metaData"]
25+
self.timeSeries = []
26+
for ts in data["timeSeries"]:
27+
tmp_timeSeries = TimeSeries(self._backendURL, self._id, self._readKey, self._writeKey)
28+
tmp_timeSeries.parse(ts)
29+
self.timeSeries.append(tmp_timeSeries)
30+
31+
self.labelings = []
32+
label_name_map = {label['_id']: label['name'] for entry in labelings for label in entry.get('labels', [])}
33+
34+
for labeling in data["labelings"]:
35+
labeling["name"] = next(x["name"] for x in labelings if x["_id"] == labeling["labelingId"])
36+
for label in labeling["labels"]:
37+
label["name"] = label_name_map[label["type"]]
38+
temp_labeling = Labeling()
39+
temp_labeling.parse(labeling)
40+
self.labelings.append(temp_labeling)
41+
42+
@property
43+
def data(self):
44+
df = reduce(lambda x,y: pd.merge(x,y, on='time', how='outer'), [x.data for x in self.timeSeries])
45+
for labeling in self.labelings:
46+
for label in labeling.labels:
47+
if labeling.name not in df.columns:
48+
df[labeling.name] = ""
49+
df.loc[(df['time'] >= label.start) & (df['time'] <= label.end), labeling.name] = label.name
50+
51+
52+
53+
54+
return df
55+
56+
def loadData(self):
57+
for ts in self.timeSeries:
58+
ts.loadData()

src/edgeml/Labeling.py

Lines changed: 42 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,42 @@
1+
class Labeling:
2+
def __init__(self):
3+
self._id = None
4+
self.labels = None
5+
self.name = None
6+
7+
def parse(self, data):
8+
self._id = data["labelingId"]
9+
self.name = data["name"]
10+
self.labels = []
11+
for label in data["labels"]:
12+
tmp_label = Label()
13+
tmp_label.parse(label)
14+
self.labels.append(tmp_label)
15+
16+
def __str__(self):
17+
return f"Labeling(_id={self._id}, labels={self.labels}, name={self.name})"
18+
19+
def __repr__(self):
20+
return str(self)
21+
22+
23+
class Label:
24+
def __init__(self):
25+
self._id = None
26+
self.start = None
27+
self.end = None
28+
self.type = None
29+
self.name = None
30+
31+
def parse(self, data):
32+
self._id = data["_id"]
33+
self.start = data["start"]
34+
self.end = data["end"]
35+
self.type = data["type"]
36+
self.name = data["name"]
37+
38+
def __str__(self):
39+
return f"Label(_id={self._id}, start={self.start}, end={self.end}, type={self.type}, name={self.name})"
40+
41+
def __repr__(self):
42+
return str(self)

src/edgeml/Project.py

Lines changed: 28 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,28 @@
1+
import requests as req
2+
from edgeml.consts import getProjectEndpoint
3+
from edgeml.Dataset import Dataset
4+
5+
6+
class Project():
7+
8+
def __init__(self, backendURL, readKey=None, writeKey=None):
9+
self.backendURL = backendURL
10+
self._readKey=readKey
11+
self._writeKey=writeKey
12+
res = req.get(backendURL + getProjectEndpoint + readKey)
13+
if res.status_code == 403:
14+
raise RuntimeError("Invalid key")
15+
elif res.status_code >= 300:
16+
raise RuntimeError(res.reason)
17+
self.datasets = []
18+
res_data = res.json()
19+
datasets = res_data["datasets"]
20+
self.labeligns = res_data["labelings"]
21+
for d in datasets:
22+
tmp_dataset = Dataset(backendURL, self._readKey, self._writeKey)
23+
tmp_dataset.parse(d, self.labeligns)
24+
self.datasets.append(tmp_dataset)
25+
26+
def loadData(self):
27+
for d in self.datasets:
28+
d.loadData()

src/edgeml/TimeSeries.py

Lines changed: 55 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,55 @@
1+
import tempfile
2+
import h5py
3+
import numpy as np
4+
from edgeml.consts import getProjectEndpoint
5+
import requests as req
6+
import pandas as pd
7+
8+
class SamplingRate:
9+
def __init__(self, mean, var):
10+
self.mean = mean
11+
self.var = var
12+
13+
14+
class TimeSeries:
15+
def __init__(self, backendURL, datasetId, readKey=None, writeKey=None):
16+
self._backendURL = backendURL
17+
self._datasetId = datasetId
18+
self._readKey = readKey
19+
self._writeKey = writeKey
20+
self._id = None
21+
self.name = None
22+
self.start = None
23+
self.end = None
24+
self.unit = None
25+
self._data = None
26+
self.samplingRate = None
27+
self.length = None
28+
29+
def parse(self, data):
30+
self._id = data["_id"]
31+
self.name = data["name"]
32+
self.start = data["start"]
33+
self.end = data["end"]
34+
self.unit = data["unit"]
35+
self.samplingRate = SamplingRate(data["samplingRate"]["mean"], data["samplingRate"]["var"])
36+
self.length = data["length"]
37+
38+
@property
39+
def data(self):
40+
if self._data is None:
41+
raise Exception("You need to load the data first. Call loadData on the project, dataset, or time-series level.")
42+
return self._data
43+
44+
@data.setter
45+
def data(self, value):
46+
self._data = value
47+
48+
def loadData(self) -> pd.DataFrame:
49+
res = req.get(self._backendURL + getProjectEndpoint + self._readKey + "/" + self._datasetId + "/" + self._id)
50+
with tempfile.NamedTemporaryFile(suffix=".h5", delete=False) as temp_file:
51+
temp_file.write(res.content)
52+
with h5py.File(temp_file.name, "r") as hf:
53+
time_array = np.array(hf["time"])
54+
data_array = np.array(hf["data"])
55+
self.data = pd.DataFrame({"time": time_array, self.name: data_array})

src/edgeml/consts.py

Lines changed: 5 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,5 @@
1+
2+
uploadDataset = "/api/deviceapi/uploadDataset",
3+
initDatasetIncrement = "/ds/api/dataset/init/",
4+
addDatasetIncrement = "/ds/api/dataset/append/"
5+
getProjectEndpoint = "/ds/api/project/"

src/edgeml/edgeml.py

Lines changed: 28 additions & 110 deletions
Original file line numberDiff line numberDiff line change
@@ -1,114 +1,32 @@
1-
from typing import List
21
import requests as req
3-
import pandas as pd
4-
import time as timelib
5-
from functools import reduce
6-
7-
# TODO add typing
8-
9-
uploadDataset = "/api/deviceApi/uploadDataset"
10-
initDatasetIncrement = "/api/deviceApi/initDatasetIncrement"
11-
addDatasetIncrement = "/api/deviceApi/addDatasetIncrement"
12-
addDatasetIncrementBatch = "/api/deviceApi/addDatasetIncrementBatch"
13-
getProjectEndpoint = "/api/deviceApi/getProject"
14-
15-
#
16-
# Uploads a whole dataset to a specific project
17-
# @param {string} url - The url of the backend server
18-
# @param {string} key - The Device-Api-Key
19-
# @param {object} dataset - The dataset to upload
20-
# @returns A Promise indicating success or failure
21-
#
22-
23-
def sendDataset(url: str, key: str, dataset: dict):
24-
try:
25-
res = req.post(url + uploadDataset, json = {"key": key, "payload": dataset})
26-
except req.exceptions.RequestException:
27-
raise "error" #TODO
28-
29-
#
30-
# Returns the all datasets and labels belonging to a project
31-
# Can be used for further processing
32-
# @param {string} url - The url of the backend server
33-
# @param {string} key - The Device-Api-Key
34-
#
35-
36-
def getProject(url: str, key: str):
37-
print('fetching project...')
38-
res = req.post(url + getProjectEndpoint, json = {"key": key})
39-
if res.ok:
40-
return res.json()
41-
if res.status_code == 403:
42-
raise RuntimeError("Invalid key")
43-
raise RuntimeError(res.reason)
44-
45-
def __extractLabels(dataset, labeling: str=None):
46-
labelingSets = dataset['labels']
47-
matchedSet = None
48-
for labelingSet in labelingSets:
49-
if labelingSet and labelingSet[0] and (labelingSet[0]['labelingName'] == labeling or labeling == None):
50-
labeling = labelingSet[0]['labelingName']
51-
matchedSet = labelingSet
52-
break
53-
if matchedSet == None:
54-
return (None, None, None)
55-
labelSet = {} # stores different start and end times (intervals) belonging to a label
56-
labelIds = {} # assing distinct ids to labels, required for training with data
57-
labelId = 0
58-
for label in labelingSet:
59-
name = label['name']
60-
start = label['start']
61-
end = label['end']
62-
if not name in labelSet:
63-
labelSet[name] = []
64-
labelIds[name] = labelId # assign id to the label
65-
labelId = labelId + 1
66-
labelSet[name].append((start, end)) # add interval to the label
67-
return (labeling, labelSet, labelIds)
68-
69-
#
70-
# Returns a list of Pandas.DataFrames generated from the datasets in the project
71-
# Each dataframe corresponds to a single dataset in the project
72-
# For each dataset only with the given labeling labeled parts are included in the dataframes
73-
# If no labeling is provided, first labeling with a valid label on part of the dataset will be used for that dataset
74-
# In this case different datasets may have different labelings as a result in the returned list
75-
# @param {string} url - The url of the backend server
76-
# @param {string} key - The Device-Api-Key
77-
# @param {string} labeling - Labeling used to generate the dataframes
78-
79-
def getDataFrames(url: str, key: str, labeling: str=None) -> List[pd.DataFrame]:
80-
datasets = getProject(url, key)['datasets']
81-
df_project: List[pd.DataFrame] = []
82-
for dataset in datasets:
83-
(labeling, labelSet, labelIds) = __extractLabels(dataset, labeling)
84-
if labelSet == None: # dataset is not labeled
85-
continue
86-
sensors = dataset['sensors']
87-
df_dataset = []
88-
for sensor in sensors:
89-
sensorName = sensor['name']
90-
data = sensor['data']
91-
df_sensor = {'timestamp': [], 'label': [], sensorName: []}
92-
for dataPoint in data:
93-
timestamp = dataPoint['timestamp']
94-
value = dataPoint['datapoint']
95-
for label, intervals in labelSet.items():
96-
for start, end in intervals:
97-
if timestamp >= start and timestamp <= end:
98-
df_sensor['timestamp'].append(timestamp)
99-
df_sensor[sensorName].append(value)
100-
df_sensor['label'].append(label)
101-
# can break here if it is ensured that labels are not overlapping
102-
df_sensor = pd.DataFrame(df_sensor)
103-
df_dataset.append(df_sensor)
104-
if not df_dataset:
105-
continue
106-
df_dataset = reduce(
107-
lambda left, right: pd.merge(
108-
left, right, on=['timestamp', 'label'], how='outer'), df_dataset
109-
).sort_values('timestamp').reset_index(drop=True)
110-
df_project.append(df_dataset)
111-
return df_project
2+
from edgeml.consts import getProjectEndpoint
3+
from edgeml.Dataset import Dataset
4+
import timelib
5+
6+
7+
class edgeml:
8+
9+
def __init__(self, backendURL, readKey=None, writeKey=None):
10+
self.backendURL = backendURL
11+
self._readKey=readKey
12+
self._writeKey=writeKey
13+
res = req.get(backendURL + getProjectEndpoint + readKey)
14+
if res.status_code == 403:
15+
raise RuntimeError("Invalid key")
16+
elif res.status_code >= 300:
17+
raise RuntimeError(res.reason)
18+
self.datasets = []
19+
res_data = res.json()
20+
datasets = res_data["datasets"]
21+
self.labeligns = res_data["labelings"]
22+
for d in datasets:
23+
tmp_dataset = Dataset(backendURL, self._readKey, self._writeKey)
24+
tmp_dataset.parse(d, self.labeligns)
25+
self.datasets.append(tmp_dataset)
26+
27+
def loadData(self):
28+
for d in self.datasets:
29+
d.loadData()
11230

11331
#
11432
# @param {string} url - The url of the backend server

0 commit comments

Comments
 (0)