'Postgres: optimize performance for URN JOIN matching vs INT

In our database we are thinking using URNs everywhere, instead of IDs.

e.g. urn:ny:MyTypeOfObject:123

Testing the performance on

CREATE table addresses AS 
SELECT 
i as id_int,
CONCAT('urn:ny:address:', i::text) as id_urn
FROM 
generate_series(1, 100000000) s(i);  -- 100M


CREATE table addresses_attrib AS 
SELECT 
i as id_int,
CONCAT('urn:ny:address:', i::text) as id_urn,
i%79 as random_filter
FROM 
generate_series(1, 10000000) s(i); -- 10M

-- CREATE index both on id_int and id_urn
CREATE index int_a1 on addresses(id_int);
CREATE index int_aa1 on addresses_attrib(id_int);
CREATE index urn_a1 on addresses(id_urn);
CREATE index urn_aa1 on addresses_attrib(id_urn);

SELECT * FROM addresses_attrib aa
JOIN addresses a on (aa.id_int=a.id_int)
WHERE aa.random_filter=4
LIMIT 100000

VS

SELECT * FROM addresses_attrib aa
JOIN addresses a on (aa.id_urn=a.id_urn)
WHERE aa.random_filter=4
LIMIT 100000

Using pg_bench, the int based one is about twice as fast (0.5 avg the int) compared to the urn one (1.2s average) according to pg_bench.

Is there any way to speed up this kind of joins on URNs?



Sources

This article follows the attribution requirements of Stack Overflow and is licensed under CC BY-SA 3.0.

Source: Stack Overflow

Solution Source