Skip to content

Commit 56fd03f

Browse files
committed
revise getDataFrames logic
1 parent 1f7019b commit 56fd03f

1 file changed

Lines changed: 59 additions & 64 deletions

File tree

src/edgeml/edgeml.py

Lines changed: 59 additions & 64 deletions
Original file line numberDiff line numberDiff line change
@@ -1,7 +1,8 @@
11
from typing import List
22
import requests as req
3-
from pandas import DataFrame
3+
import pandas as pd
44
import time as timelib
5+
from functools import reduce
56

67
# TODO add typing
78

@@ -24,7 +25,7 @@ def sendDataset(url: str, key: str, dataset: dict):
2425
res = req.post(url + uploadDataset, json = {"key": key, "payload": dataset})
2526
except req.exceptions.RequestException:
2627
raise "error" #TODO
27-
28+
2829
#
2930
# Returns the all datasets and labels belonging to a project
3031
# Can be used for further processing
@@ -35,81 +36,75 @@ def sendDataset(url: str, key: str, dataset: dict):
3536
def getProject(url: str, key: str):
3637
print('fetching project...')
3738
res = req.post(url + getProjectEndpoint, json = {"key": key})
38-
if res.ok:
39+
if res.ok:
3940
return res.json()
4041
if res.status_code == 403:
4142
raise RuntimeError("Invalid key")
4243
raise RuntimeError(res.reason)
4344

44-
def __extractLabels(dataset):
45-
labels = dataset['labels']
46-
labelType = "No labeling, no dataframe will be generated"
47-
if labels:
48-
labelType = labels[0][0]['labelingName']
49-
labelset = {} # stores different start and end times (intervals) belonging to a label
45+
def __extractLabels(dataset, labeling: str=None):
46+
labelingSets = dataset['labels']
47+
matchedSet = None
48+
for labelingSet in labelingSets:
49+
if labelingSet and labelingSet[0] and (labelingSet[0]['labelingName'] == labeling or labeling == None):
50+
labeling = labelingSet[0]['labelingName']
51+
matchedSet = labelingSet
52+
break
53+
if matchedSet == None:
54+
return (None, None, None)
55+
labelSet = {} # stores different start and end times (intervals) belonging to a label
5056
labelIds = {} # assing distinct ids to labels, required for training with data
5157
labelId = 0
52-
for labelData in labels:
53-
for label in labelData:
54-
name = label['name']
55-
start = label['start']
56-
end = label['end']
57-
if not name in labelset:
58-
labelset[name] = []
59-
labelIds[name] = labelId # assign id to the label
60-
labelId = labelId + 1
61-
labelset[name].append((start, end)) # add interval to the label
62-
return (labelType, labelset, labelIds)
63-
64-
def __processDataset(dataset):
65-
dataTimeValueSensor = {} # sensor values fused into single timestamps
66-
sensors = dataset['sensors']
67-
for sensor in sensors:
68-
sensorName = sensor['name']
69-
data = sensor['data']
70-
for dataPoint in data:
71-
timestamp = dataPoint['timestamp']
72-
dataPointValue = dataPoint['datapoint']
73-
if timestamp not in dataTimeValueSensor:
74-
dataTimeValueSensor[timestamp] = []
75-
dataTimeValueSensor[timestamp].append({'value': dataPointValue, 'sensor': sensorName})
76-
return dataTimeValueSensor
58+
for label in labelingSet:
59+
name = label['name']
60+
start = label['start']
61+
end = label['end']
62+
if not name in labelSet:
63+
labelSet[name] = []
64+
labelIds[name] = labelId # assign id to the label
65+
labelId = labelId + 1
66+
labelSet[name].append((start, end)) # add interval to the label
67+
return (labeling, labelSet, labelIds)
7768

7869
#
7970
# Returns a list of Pandas.DataFrames generated from the dataset
8071
# @param {string} url - The url of the backend server
8172
# @param {string} key - The Device-Api-Key
82-
#
73+
# @param {string} labeling - Labeling used to generate the dataframes
8374

84-
def getDataFrames(url: str, key: str) -> List[DataFrame]:
75+
def getDataFrames(url: str, key: str, labeling: str=None) -> List[pd.DataFrame]:
8576
datasets = getProject(url, key)['datasets']
86-
dataFrames: List[DataFrame] = []
77+
df_project: List[pd.DataFrame] = []
8778
for dataset in datasets:
88-
(labelType, labelset, labelIds) = __extractLabels(dataset)
89-
dataTimeValueSensor = __processDataset(dataset)
90-
dataFrame = {'id': [], labelType: []}
91-
id = 0
92-
for timestamp, timestampData in dataTimeValueSensor.items():
93-
for data in timestampData:
94-
value = data['value']
95-
sensor = data['sensor']
96-
if not sensor in dataFrame:
97-
dataFrame[sensor] = []
98-
labelFound = False
99-
for label, intervals in labelset.items():
100-
for interval in intervals:
101-
start = interval[0]
102-
end = interval[1]
79+
(labeling, labelSet, labelIds) = __extractLabels(dataset, labeling)
80+
if labelSet == None: # dataset is not labeled
81+
continue
82+
sensors = dataset['sensors']
83+
df_dataset = []
84+
for sensor in sensors:
85+
sensorName = sensor['name']
86+
data = sensor['data']
87+
df_sensor = {'timestamp': [], 'label': [], sensorName: []}
88+
for dataPoint in data:
89+
timestamp = dataPoint['timestamp']
90+
value = dataPoint['datapoint']
91+
for label, intervals in labelSet.items():
92+
for start, end in intervals:
10393
if timestamp >= start and timestamp <= end:
104-
if data == timestampData[0]:
105-
dataFrame[labelType].append(label)
106-
dataFrame['id'].append(id)
107-
id = id + 1
108-
dataFrame[sensor].append(value)
109-
break
110-
dataFrame = DataFrame(dataFrame)
111-
dataFrames.append(dataFrame)
112-
return dataFrames
94+
df_sensor['timestamp'].append(timestamp)
95+
df_sensor[sensorName].append(value)
96+
df_sensor['label'].append(label)
97+
# can break here if it is ensured that labels are not overlapping
98+
df_sensor = pd.DataFrame(df_sensor)
99+
df_dataset.append(df_sensor)
100+
if not df_dataset:
101+
continue
102+
df_dataset = reduce(
103+
lambda left, right: pd.merge(
104+
left, right, on=['timestamp', 'label'], how='outer'), df_dataset
105+
).sort_values('timestamp').reset_index(drop=True)
106+
df_project.append(df_dataset)
107+
return df_project
113108

114109
#
115110
# @param {string} url - The url of the backend server
@@ -130,7 +125,7 @@ def __init__(self, url: str, key: str, name: str, useDeviceTime: bool) -> None:
130125
self.dataStore = {'datasetKey': self.datasetKey, 'data': []}
131126
self.counter = 0
132127
self.error = None
133-
128+
134129

135130
def addDataPoint(self, sensorName: str, value: float, time: int = None):
136131
if (self.error):
@@ -139,7 +134,7 @@ def addDataPoint(self, sensorName: str, value: float, time: int = None):
139134
raise ValueError("Datapoint is not a number")
140135
if (not self.useDeviceTime and type(time) is not int and type(time) is not float):
141136
raise ValueError("Provide a valid timestamp")
142-
137+
143138
if (self.useDeviceTime):
144139
time = timelib.time()
145140

@@ -157,7 +152,7 @@ def addDataPoint(self, sensorName: str, value: float, time: int = None):
157152
dataPoint['start'] = min(dataPoint['start'], time)
158153
dataPoint['end'] = max(dataPoint['end'], time)
159154
break
160-
155+
161156
self.counter = self.counter + 1
162157
if self.counter > 1000:
163158
self.upload()

0 commit comments

Comments
 (0)