'A SQL only runs slow in a psycopg2 / PostgreSQLcode
I had a PostgreSQL for IoT applications. There are many IoT timeseries data in one table, called events. Since it contains too many rows, about 800M rows. The SQL query of this table returns very slow. I delete most of the rows, at this moment it contains only 9K rows. UPDATE: its size expands to 500K in the next day. I can test the SQL response time from a cloud console from AliCloud/RDS, most of them can return within 2ms.
However, I found one of my psycopg2 code of querying this table still return very slow, longer than 60 seconds.
#!/usr/bin/env python3
#coding: utf-8
import psycopg2
import time
import sys
# Take care the connection params
def main():
conn = None
try:
host = "pgm-myaddress.rds.aliyuncs.com"
port = 1921
#'''
account = "mypgserver"
password = "mypasswd"
database = "mytestdb"
connect_timeout = 10
options = "-c DateStyle="
conn = psycopg2.connect(host = host,
port = port,
user = account,
password = password,
database = database,
connect_timeout = connect_timeout,
options = options)
conn.autocommit = True
cursor = conn.cursor()
_start = time.time()
# Take care the quotation marks
eui64 = "EC1BBDFFFE29011F"
cursor.execute("SELECT * FROM controllers WHERE guid = %s LIMIT 1", [eui64])
if result is not None:
controllerId = result[0]
print(result)
print("Controller_ID: {}".format(controllerId))
_elapsed = time.time() - _start
print("SQL_1 Elapsed:{}\r\n".format(_elapsed))
else:
print("SQL_1:None\r\n")
sys.exit(0)
_start = time.time()
cursor.execute("SELECT * FROM devices WHERE fk_controller = %s LIMIT 1", [controllerId])
result = cursor.fetchone()
deviceId = result[0]
print(result)
print("Device_ID: {}".format(deviceId))
_elapsed = time.time() - _start
print("SQL_2 Elapsed:{}\r\n".format(_elapsed))
_start = time.time()
cursor.execute("SELECT * FROM events ORDER by id DESC LIMIT 100")
result = cursor.fetchall()
print("Count = {}".format(len(result)))
_elapsed = time.time() - _start
print("Elapsed = {}\r\n".format(_elapsed))
_start = time.time()
controllerId = 1108
cursor.execute("SELECT * FROM events WHERE fk_controller = %s ORDER by id DESC LIMIT 20", [controllerId])
result = cursor.fetchall()
print("Count = {}".format(len(result)))
for res in result:
print("Event : {}".format(res))
_elapsed = time.time() - _start
print("Elapsed = {}\r\n".format(_elapsed))
_start = time.time()
deviceId = 1012
cursor.execute("SELECT * FROM events WHERE fk_device = %s ORDER by id DESC LIMIT 20", [deviceId])
result = cursor.fetchall()
print("Count = {}".format(len(result)))
for res in result:
print("Event : {}".format(res))
_elapsed = time.time() - _start
print("Elapsed = {}\r\n".format(_elapsed))
_start = time.time()
deviceId = 1012
cursor.execute("EXPLAIN ANALYZE SELECT * FROM events WHERE fk_device = %s ORDER by id DESC LIMIT 20", [deviceId])
result = cursor.fetchall()
print("Explain: {}".format(result))
_elapsed = time.time() - _start
print("Elapsed = {}\r\n".format(_elapsed))
_start = time.time()
deviceId = 1012
cursor.execute("EXPLAIN ANALYZE SELECT * FROM events WHERE fk_device = %s LIMIT 20", [deviceId])
result = cursor.fetchall()
print("Explain: {}".format(result))
_elapsed = time.time() - _start
print("Elapsed = {}\r\n".format(_elapsed))
except Exception as e:
print(e)
finally:
if conn:
conn.close()
if __name__ == "__main__":
main()
Then I got the following result:
SQL_1:0.0015239715576171875
(1212, 1, 1, None, 5, 'EC1BBDFFFE29075A', 'EC1BBDFFFE29075A', '68e1437b-f0da-4615-a934-1790012e6dba', '2.2.0', 21, 42842, 42942, '0', 'C01_9884e3072fbd', '6241902e', datetime.datetime(2022, 4, 8, 23, 12, 56, 363467, tzinfo=psycopg2.tz.FixedOffsetTimezone(offset=480, name=None)), datetime.datetime(2021, 8, 23, 13, 40, 17, 947000, tzinfo=psycopg2.tz.FixedOffsetTimezone(offset=480, name=None)), datetime.datetime(2021, 8, 23, 13, 40, 18, 884370, tzinfo=psycopg2.tz.FixedOffsetTimezone(offset=480, name=None)), datetime.datetime(2022, 4, 8, 23, 12, 56, 806480, tzinfo=psycopg2.tz.FixedOffsetTimezone(offset=480, name=None)), None)
Controller_ID: 1212
SQL_2:0.0009047985076904297
(1108, 1, 1, None, 1212, 8, 'sensors', 'EC1BBDFFFE29075A', 'EC1BBDFFFE29075A', Decimal('0'), Decimal('0'), '', {}, {}, datetime.datetime(2021, 8, 23, 13, 40, 19, 628069, tzinfo=psycopg2.tz.FixedOffsetTimezone(offset=480, name=None)), datetime.datetime(2021, 8, 23, 13, 40, 19, 628069, tzinfo=psycopg2.tz.FixedOffsetTimezone(offset=480, name=None)), datetime.datetime(2021, 8, 23, 13, 40, 19, 628069, tzinfo=psycopg2.tz.FixedOffsetTimezone(offset=480, name=None)), datetime.datetime(2021, 8, 23, 13, 40, 19, 628069, tzinfo=psycopg2.tz.FixedOffsetTimezone(offset=480, name=None)), None)
Device_ID: 1108
Count = 5
Elapsed = 31.72335386276245
Explain: [('Limit (cost=0.42..432.80 rows=20 width=136) (actual time=0.013..0.050 rows=20 loops=1)',), (' -> Index Scan Backward using events_pkey on events (cost=0.42..1885205.89 rows=87202 width=136) (actual time=0.012..0.046 rows=20 loops=1)',), (' Filter: (fk_device = 1012)',), (' Rows Removed by Filter: 111',), ('Planning Time: 0.062 ms',), ('Execution Time: 0.065 ms',)]
Elapsed = 0.001512765884399414
Explain: [('Limit (cost=0.00..427.41 rows=20 width=136) (actual time=72971.667..72971.730 rows=20 loops=1)',), (' -> Seq Scan on events (cost=0.00..1863543.06 rows=87202 width=136) (actual time=72971.666..72971.724 rows=20 loops=1)',), (' Filter: (fk_device = 1012)',), (' Rows Removed by Filter: 224',), ('Planning Time: 0.035 ms',), ('Execution Time: 72971.750 ms',)]
Elapsed = 72.97247743606567
The first two SQL return within 2ms, but the latest one returns longer than 31 seconds. I tested the latest SQL in Alicloud's console, only 50ms.
UPDATE
I find that the performance is related to the "ORDER BY id DESC" in the query. We can find out that query without "ORDER BY id" will start Seq Scan, aka one by one sequential scan. The query orderd by id will start Index Scan over events_pkey primary key, which is much faster than sequential scan.
Sources
This article follows the attribution requirements of Stack Overflow and is licensed under CC BY-SA 3.0.
Source: Stack Overflow
| Solution | Source |
|---|
