11from typing import List
22import requests as req
3- from pandas import DataFrame
3+ import pandas as pd
44import time as timelib
5+ from functools import reduce
56
67# TODO add typing
78
@@ -24,7 +25,7 @@ def sendDataset(url: str, key: str, dataset: dict):
2425 res = req .post (url + uploadDataset , json = {"key" : key , "payload" : dataset })
2526 except req .exceptions .RequestException :
2627 raise "error" #TODO
27-
28+
2829#
2930# Returns the all datasets and labels belonging to a project
3031# Can be used for further processing
@@ -35,81 +36,75 @@ def sendDataset(url: str, key: str, dataset: dict):
3536def getProject (url : str , key : str ):
3637 print ('fetching project...' )
3738 res = req .post (url + getProjectEndpoint , json = {"key" : key })
38- if res .ok :
39+ if res .ok :
3940 return res .json ()
4041 if res .status_code == 403 :
4142 raise RuntimeError ("Invalid key" )
4243 raise RuntimeError (res .reason )
4344
44- def __extractLabels (dataset ):
45- labels = dataset ['labels' ]
46- labelType = "No labeling, no dataframe will be generated"
47- if labels :
48- labelType = labels [0 ][0 ]['labelingName' ]
49- labelset = {} # stores different start and end times (intervals) belonging to a label
45+ def __extractLabels (dataset , labeling : str = None ):
46+ labelingSets = dataset ['labels' ]
47+ matchedSet = None
48+ for labelingSet in labelingSets :
49+ if labelingSet and labelingSet [0 ] and (labelingSet [0 ]['labelingName' ] == labeling or labeling == None ):
50+ labeling = labelingSet [0 ]['labelingName' ]
51+ matchedSet = labelingSet
52+ break
53+ if matchedSet == None :
54+ return (None , None , None )
55+ labelSet = {} # stores different start and end times (intervals) belonging to a label
5056 labelIds = {} # assing distinct ids to labels, required for training with data
5157 labelId = 0
52- for labelData in labels :
53- for label in labelData :
54- name = label ['name' ]
55- start = label ['start' ]
56- end = label ['end' ]
57- if not name in labelset :
58- labelset [name ] = []
59- labelIds [name ] = labelId # assign id to the label
60- labelId = labelId + 1
61- labelset [name ].append ((start , end )) # add interval to the label
62- return (labelType , labelset , labelIds )
63-
64- def __processDataset (dataset ):
65- dataTimeValueSensor = {} # sensor values fused into single timestamps
66- sensors = dataset ['sensors' ]
67- for sensor in sensors :
68- sensorName = sensor ['name' ]
69- data = sensor ['data' ]
70- for dataPoint in data :
71- timestamp = dataPoint ['timestamp' ]
72- dataPointValue = dataPoint ['datapoint' ]
73- if timestamp not in dataTimeValueSensor :
74- dataTimeValueSensor [timestamp ] = []
75- dataTimeValueSensor [timestamp ].append ({'value' : dataPointValue , 'sensor' : sensorName })
76- return dataTimeValueSensor
58+ for label in labelingSet :
59+ name = label ['name' ]
60+ start = label ['start' ]
61+ end = label ['end' ]
62+ if not name in labelSet :
63+ labelSet [name ] = []
64+ labelIds [name ] = labelId # assign id to the label
65+ labelId = labelId + 1
66+ labelSet [name ].append ((start , end )) # add interval to the label
67+ return (labeling , labelSet , labelIds )
7768
7869#
7970# Returns a list of Pandas.DataFrames generated from the dataset
8071# @param {string} url - The url of the backend server
8172# @param {string} key - The Device-Api-Key
82- #
73+ # @param {string} labeling - Labeling used to generate the dataframes
8374
84- def getDataFrames (url : str , key : str ) -> List [DataFrame ]:
75+ def getDataFrames (url : str , key : str , labeling : str = None ) -> List [pd . DataFrame ]:
8576 datasets = getProject (url , key )['datasets' ]
86- dataFrames : List [DataFrame ] = []
77+ df_project : List [pd . DataFrame ] = []
8778 for dataset in datasets :
88- (labelType , labelset , labelIds ) = __extractLabels (dataset )
89- dataTimeValueSensor = __processDataset (dataset )
90- dataFrame = {'id' : [], labelType : []}
91- id = 0
92- for timestamp , timestampData in dataTimeValueSensor .items ():
93- for data in timestampData :
94- value = data ['value' ]
95- sensor = data ['sensor' ]
96- if not sensor in dataFrame :
97- dataFrame [sensor ] = []
98- labelFound = False
99- for label , intervals in labelset .items ():
100- for interval in intervals :
101- start = interval [0 ]
102- end = interval [1 ]
79+ (labeling , labelSet , labelIds ) = __extractLabels (dataset , labeling )
80+ if labelSet == None : # dataset is not labeled
81+ continue
82+ sensors = dataset ['sensors' ]
83+ df_dataset = []
84+ for sensor in sensors :
85+ sensorName = sensor ['name' ]
86+ data = sensor ['data' ]
87+ df_sensor = {'timestamp' : [], 'label' : [], sensorName : []}
88+ for dataPoint in data :
89+ timestamp = dataPoint ['timestamp' ]
90+ value = dataPoint ['datapoint' ]
91+ for label , intervals in labelSet .items ():
92+ for start , end in intervals :
10393 if timestamp >= start and timestamp <= end :
104- if data == timestampData [0 ]:
105- dataFrame [labelType ].append (label )
106- dataFrame ['id' ].append (id )
107- id = id + 1
108- dataFrame [sensor ].append (value )
109- break
110- dataFrame = DataFrame (dataFrame )
111- dataFrames .append (dataFrame )
112- return dataFrames
94+ df_sensor ['timestamp' ].append (timestamp )
95+ df_sensor [sensorName ].append (value )
96+ df_sensor ['label' ].append (label )
97+ # can break here if it is ensured that labels are not overlapping
98+ df_sensor = pd .DataFrame (df_sensor )
99+ df_dataset .append (df_sensor )
100+ if not df_dataset :
101+ continue
102+ df_dataset = reduce (
103+ lambda left , right : pd .merge (
104+ left , right , on = ['timestamp' , 'label' ], how = 'outer' ), df_dataset
105+ ).sort_values ('timestamp' ).reset_index (drop = True )
106+ df_project .append (df_dataset )
107+ return df_project
113108
114109#
115110# @param {string} url - The url of the backend server
@@ -130,7 +125,7 @@ def __init__(self, url: str, key: str, name: str, useDeviceTime: bool) -> None:
130125 self .dataStore = {'datasetKey' : self .datasetKey , 'data' : []}
131126 self .counter = 0
132127 self .error = None
133-
128+
134129
135130 def addDataPoint (self , sensorName : str , value : float , time : int = None ):
136131 if (self .error ):
@@ -139,7 +134,7 @@ def addDataPoint(self, sensorName: str, value: float, time: int = None):
139134 raise ValueError ("Datapoint is not a number" )
140135 if (not self .useDeviceTime and type (time ) is not int and type (time ) is not float ):
141136 raise ValueError ("Provide a valid timestamp" )
142-
137+
143138 if (self .useDeviceTime ):
144139 time = timelib .time ()
145140
@@ -157,7 +152,7 @@ def addDataPoint(self, sensorName: str, value: float, time: int = None):
157152 dataPoint ['start' ] = min (dataPoint ['start' ], time )
158153 dataPoint ['end' ] = max (dataPoint ['end' ], time )
159154 break
160-
155+
161156 self .counter = self .counter + 1
162157 if self .counter > 1000 :
163158 self .upload ()
0 commit comments