'Python SQLAlchemy structure and averaged data retrieval
I have a database using Flask-sqlalchemy and I want to store and retrieve (averaged) temperature and humidity data of a sensor. I've written the class below that stores both together, since I figured it was one sensor and the values are valid for the same timestep they were measured.
My question is, how can I make this performant?
Specifically I found that the retrieval of data takes a long time ~5s for some hundered points. I know that it's programmed very inefficiently - how do I improve this mindfully? The only thing I really need is a function to retrieve measurements in a time-range or in the order that I put them in, which is equivalent and a moving average or blockwise averages. The bulk of the time is consumed by: TempHum.query.filter(TempHum.timestamp>starttime)
The main focus is: how do I efficiently structure the database and how to retrieve (and filter) the data efficiently.
#!/usr/bin/python3
from flask_sqlalchemy import SQLAlchemy
from datetime import datetime, timedelta
from math import floor
db = SQLAlchemy()
# Define a Table in the database
class TempHum(db.Model):
__tablename__ = 'tempHumMeasurements'
# Here we define columns for the table
# Each column is also a Python attribute.
id = db.Column(db.Integer, primary_key = True)
timestamp = db.Column(db.DateTime(timezone=True))
temperature = db.Column('temperature', db.Float)
humidity = db.Column('humidity', db.Float)
location = db.Column('location', db.String(32))
def __init__(self, **kwargs):
super(TempHum, self).__init__(**kwargs)
# do custom initialization here
#now.isoformat(timespec="seconds")
self.timestamp = datetime.now().replace(microsecond=0)
self.location = 'room1'
def __repr__(self):
return "<TempHum {}>".format(self.timestamp.strftime("%d %m %Y, %H:%M"))
def getNSamples(n):
return TempHum.query.\
order_by(TempHum.timestamp.desc()).\
limit(n).\
from_self().order_by(TempHum.timestamp.asc()).all()
def getSampledNDays(n):
starttime = datetime.now() - timedelta(days=n)
meas = TempHum.query.\
filter(TempHum.timestamp>starttime).\
order_by(TempHum.timestamp.asc()).all()
return meas1
# ___________
# helper functions
def mean(lst):
return sum(lst)/len(lst)
def averageData(measures):
mid = floor(len(measures)/2)
meantemp = TempHum.mean([tmp.temperature for tmp in measures])
meanhum = TempHum.mean([tmp.humidity for tmp in measures])
means = TempHum(temperature=meantemp,humidity=meanhum)
means.timestamp = measures[mid].timestamp
return means
def downsample(arr,average_mins):
# if values have the correct sampling time they are averaged
# over average_count values, otherwise all data is taken into account
# arr is an array of TempHum objects that should be downsampled
average_time = timedelta(minutes=average_mins)
buffer = []
output_array = []
for k,date in enumerate(arr):
if not buffer: # buffer first one if buffer is empty
buffer.append(arr[k])
continue
timestep = arr[k].timestamp-buffer[0].timestamp
if timestep<average_time:
# values within the window
buffer.append(arr[k])
else:
averaged = TempHum.averageData(buffer)
output_array.append(averaged)
buffer.clear()
buffer.append(arr[k])
return output_array;
class HeaterState(db.Model):
id = db.Column(db.Integer, primary_key=True)
timestamp = db.Column(db.DateTime(timezone=True))
heating = db.Column('heating', db.Boolean)
def __init__(self, **kwargs):
super(HeaterState, self).__init__(**kwargs)
self.timestamp = datetime.now().replace(microsecond=0)
def __repr__(self):
return "<HeaterState {}>".format(self.timestamp.strftime("%d %m %Y, %H:%M"))
def getSampledNDays(n):
starttime = datetime.now() - timedelta(days=n)
meas = HeaterState.query.\
filter(HeaterState.timestamp>starttime).\
order_by(HeaterState.timestamp.asc()).all()
return meas
Solution 1:[1]
Have you tried adding an index on the timestamp column?
Something like this:
class TempHum(db.Model):
...
timestamp = db.Column(db.DateTime(timezone=True), db_index=True)
Sources
This article follows the attribution requirements of Stack Overflow and is licensed under CC BY-SA 3.0.
Source: Stack Overflow
| Solution | Source |
|---|---|
| Solution 1 | Chris Sears |
