'Generate rows from string of numbers
by I have an Oracle 18c table that has strings like this:
select
'((0 5 0, 10 10 11.18, 30 0 33.54),(50 10 33.54, 60 10 43.54))' as multipart_lines
--There are more rows in the actual table.
from
dual
MULTIPART_LINES
-------------------------------------------------------------
((0 5 0, 10 10 11.18, 30 0 33.54),(50 10 33.54, 60 10 43.54))
-- v1 v2 v3 v4 v5
-- | part 1 | | part 2 |
- Individual coordinates are separated by spaces.
- Vertices (X Y Z coordinates) are separated by commas.
- Line parts are wrapped in brackets and separated by commas.
In a query, I want to generate rows for each vertex:
PART_NUM VERTEX_NUM X Y Z
---------- ---------- ---------- ---------- ----------
1 1 0 5 0
1 2 10 10 11.18
1 3 30 0 33.54
2 1 50 10 33.54
2 2 60 10 43.54
- I want to do this in a query. I don't want to insert rows into a table.
- Unfortunately, I don't have CREATE TYPE privileges in the database. But I can create functions (and of course, inline functions are an option too).
How can I generate rows from the numbers (vertices) in the string?
Solution 1:[1]
As an alternative - here is how you can process the input strings to convert them to proper JSON strings; then the task becomes trivial. Showing just the JSON-ization first, separately, as it really is the meaningful part of this solution; then after I show the query and result, I will complete the solution by adding the JSON manipulation.
with
inputs (id, multipart_lines) as (
select 2810,
'((0 5 0, 10 10 11.18, 30 0 33.54),(50 10 33.54, 60 10 43.54))'
from dual union all
select 7284, '((-2.3 0.2 3))' from dual
)
, j (id, ml) as (
select id,
regexp_replace(
regexp_replace(
regexp_replace(
regexp_replace(
regexp_replace(multipart_lines
, '\(\s*\(\s*', '[[[')
, '\s*\)\s*\)', ']]]')
, '\s*\)\s*,\s*\(\s*', '],[')
, '\s*,\s*', '],[')
, '\s+', ',')
from inputs
)
select * from j;
ID ML
----- --------------------------------------------------------------------
2810 [[[0,5,0],[10,10,11.18],[30,0,33.54]],[[50,10,33.54],[60,10,43.54]]]
7284 [[[-2.3,0.2,3]]]
Your inputs should really look like the strings in column ml in my subquery j - then you could process them like this:
with
inputs (id, multipart_lines) as (
........
)
, j (id, ml) as (
........
)
select id, part_num, vertex_num, x, y, z
from j,
json_table(ml, '$[*]'
columns (
part_num for ordinality,
nested path '$[*]'
columns (
vertex_num for ordinality,
x number path '$[0]',
y number path '$[1]',
z number path '$[2]'
)
)
)
order by id, part_num, vertex_num -- if needed
;
The output is the same as in my other answer.
Solution 2:[2]
It would be so much simpler if the input was in some standard format - for example JSON. Then the task would be trivial. Do you have any power over that?
If not, you could either transform the input into proper JSON (or similar), or you could attack the problem directly. I illustrate the latter below, assuming Oracle version 12.1 or higher.
with
inputs (id, multipart_lines) as (
select 2810,
'((0 5 0, 10 10 11.18, 30 0 33.54),(50 10 33.54, 60 10 43.54))'
from dual union all
select 7284, '((-2.3 0.2 3))' from dual
)
select id, part_num, vertex_num, x, y, z
from inputs
cross join lateral
( select level as part_num,
regexp_substr(multipart_lines,
'\(([^()]+)\)', 1, level, null, 1) as part
from dual
connect by level <= regexp_count(multipart_lines, '\(') - 1
)
cross join lateral
(
select level as vertex_num,
regexp_substr(part, '[^,]+', 1, level) as vertex
from dual
connect by level <= regexp_count(part, ',') + 1
)
cross join lateral
(
select to_number(regexp_substr(vertex, '[^ ]+', 1, 1)) as x,
to_number(regexp_substr(vertex, '[^ ]+', 1, 2)) as y,
to_number(regexp_substr(vertex, '[^ ]+', 1, 3)) as z
from dual
)
order by id, part_num, vertex_num -- if needed
;
Output (from the sample inputs I included in the query):
ID PART_NUM VERTEX_NUM X Y Z
---------- ---------- ---------- ---------- ---------- ----------
2810 1 1 0 5 0
2810 1 2 10 10 11.18
2810 1 3 30 0 33.54
2810 2 1 50 10 33.54
2810 2 2 60 10 43.54
7284 1 1 -2.3 .2 3
Solution 3:[3]
I see that mathguy has got there. I was working with from json_table but I can't unnest 2 rows at a time otherwise I'd be basically there with a second use of row_number() over (partition by Paru_num).
create table sample(value varchar(100));?
insert into sample values ('((0 5 0, 10 10 11.18, 30 0 33.54),(50 10 33.54, 60 10 43.54))')
1 rows affected
with a as ( select '["'|| regexp_replace(value,'\(|\)','') ||'"]' a from sample ), b as ( select regexp_replace(a,', ?','","') b from a ), c as ( SELECT value c FROM json_table( (select b from b) , '$[*]' COLUMNS (value PATH '$') ) ), d as ( SELECT c d, instr(c,' ') s1, instr(c,' ',instr(c,' ')+1) s2 from c) select substr(d,0,s1) x, substr(d,s1+1,s2-s1) y, substr(d,s2+1) z from d
X | Y | Z :-- | :-- | :---- 0 | 5 | 0 10 | 10 | 11.18 30 | 0 | 33.54 50 | 10 | 33.54 60 | 10 | 43.54
with a as ( select '["'|| regexp_replace(value,'\(+|\)+','"') ||'"]' a from sample ), b as( select replace(a,'""','"')b from a ), c as ( SELECT row_number() over (order by 'zero') pn, value c FROM json_table( (select b from b) , '$[*]' COLUMNS (value PATH '$') ) ), d as ( select '["'|| pn ||' '|| regexp_replace(c,', ?','","'||pn||' ')||'"]' d from c ) select * from d| D | | :----------------------------------------- | | ["1 0 5 0","1 10 10 11.18","1 30 0 33.54"] | | ["2 50 10 33.54","2 60 10 43.54"] |
with a as ( select '["'|| regexp_replace(value,'\(+|\)+','"') ||'"]' a from sample ), b as( select replace(a,'""','"')b from a ), c as ( SELECT row_number() over (order by 'zero') pn, value c FROM json_table( (select b from b) , '$[*]' COLUMNS (value PATH '$') ) ), d as ( select '["'|| pn ||' '|| regexp_replace(c,', ?','","'||pn||' ')||'"]' d from c ), e as ( SELECT row_number() over (order by 'zero') pn, value c FROM json_table( (select d from d) , '$[*]' COLUMNS (value PATH '$') ) ) select * from eORA-01427: single-row subquery returns more than one row
db<>fiddle here
Solution 4:[4]
You can do it with only simple string functions (which are much faster than regular expressions) and recursive sub-queries:
WITH line_bounds (id, multipart_lines, line_no, spos, epos) AS (
SELECT id,
multipart_lines,
1,
2,
INSTR(multipart_lines, ')', 2)
FROM table_name
UNION ALL
SELECT id,
multipart_lines,
line_no + 1,
epos + 2,
INSTR(multipart_lines, ')', epos + 2)
FROM line_bounds
WHERE epos > 0
)
SEARCH DEPTH FIRST BY id SET line_order,
row_bounds (id, line, line_no, row_no, spos, epos) AS (
SELECT id,
SUBSTR(multipart_lines, spos + 1, epos - spos - 1),
line_no,
1,
1,
INSTR(
SUBSTR(multipart_lines, spos + 1, epos - spos - 1),
',',
1
)
FROM line_bounds
WHERE epos > 0
UNION ALL
SELECT id,
line,
line_no,
row_no + 1,
epos + 2,
INSTR(line, ',', epos + 2)
FROM row_bounds
WHERE epos > 0
)
SEARCH DEPTH FIRST BY id, line_no SET row_order,
column_bounds (id, line, line_no, row_no, xpos, ypos, zpos, epos) AS (
SELECT id,
line,
line_no,
row_no,
spos,
INSTR(line, ' ', spos, 1),
INSTR(line, ' ', spos, 2),
epos
FROM row_bounds
)
SELECT id,
line_no,
row_no,
SUBSTR(line, xpos + 0, ypos - xpos) AS x,
SUBSTR(line, ypos + 1, zpos - ypos - 1) AS y,
CASE epos
WHEN 0
THEN SUBSTR(line, zpos + 1)
ELSE SUBSTR(line, zpos + 1, epos - zpos - 1)
END AS z
FROM column_bounds
Which, for the sample data:
CREATE TABLE table_name (id, multipart_lines) AS
SELECT 1, '((0 5 0, 10 10 11.18, 30 0 33.54),(50 10 33.54, 60 10 43.54))' FROM DUAL UNION ALL
SELECT 2, '((0 1 0, 0 2 0, 0 3 0),(0 4 0, 0 5 0))' FROM DUAL;
Outputs:
ID LINE_NO ROW_NO X Y Z 1 1 1 0 5 0 1 1 2 10 10 11.18 1 1 3 30 0 33.54 1 2 1 50 10 33.54 1 2 2 60 10 43.54 2 1 1 0 1 0 2 1 2 0 2 0 2 1 3 0 3 0 2 2 1 0 4 0 2 2 2 0 5 0
db<>fiddle here
Solution 5:[5]
Use SDO_GEOMETRY to parse the string and then use SDO_UTIL.EXTRACT to get each element and then SDO_UTIL.GETVERTICES to get the vertices:
SELECT t.id,
e.elem_no,
v.id AS coord_id,
x,
y,
z
FROM ( SELECT id,
SDO_GEOMETRY('MULTILINESTRING '||multipart_lines) AS shape
FROM table_name
) t
CROSS JOIN LATERAL (
SELECT LEVEL AS elem_no,
SDO_UTIL.EXTRACT(t.shape, LEVEL) AS elem
FROM DUAL
CONNECT BY LEVEL <= SDO_UTIL.GETNUMELEM(t.shape)
) e
CROSS APPLY TABLE(SDO_UTIL.GETVERTICES(e.elem)) v
Which, for the sample data:
CREATE TABLE table_name (id, multipart_lines) AS
SELECT 1, '((0 5 0, 10 10 11.18, 30 0 33.54),(50 10 33.54, 60 10 43.54))' FROM DUAL UNION ALL
SELECT 2, '((0 1 0, 0 2 0, 0 3 0),(0 4 0, 0 5 0))' FROM DUAL;
Outputs:
ID ELEM_NO COORD_ID X Y Z 1 1 1 0 5 0 1 1 2 10 10 11.18 1 1 3 30 0 33.54 1 2 1 50 10 33.54 1 2 2 60 10 43.54 2 1 1 0 1 0 2 1 2 0 2 0 2 1 3 0 3 0 2 2 1 0 4 0 2 2 2 0 5 0
db<>fiddle here
Solution 6:[6]
@SolomonYakobson provided this answer in an Oracle Community post.
with sample as (
select '((0 5 0, 10 10 11.18, 30 0 33.54),(50 10 33.54, 60 10 43.54))' as multipart_lines
--There are more rows in the actual table.
from dual
)
select part_num,
vertex_num,
to_number(regexp_substr(vertex,'[^ ]+')) x,
to_number(regexp_substr(vertex,'[^ ]+',1,2)) y,
to_number(regexp_substr(vertex,'[^ ]+',1,3)) z
from sample,
lateral(
select level part_num,
regexp_substr(multipart_lines,'\(([^()]+)',1,level,null,1) part
from dual
connect by level < regexp_count(multipart_lines,'\(')
),
lateral(
select level vertex_num,
regexp_substr(part,'[^,]+',1,level) vertex
from dual
connect by level <= regexp_count(part,',') + 1
)
/
PART_NUM VERTEX_NUM X Y Z
---------- ---------- ---------- ---------- ----------
1 1 0 5 0
1 2 10 10 11.18
1 3 30 0 33.54
2 1 50 10 33.54
2 2 60 10 43.54
As well as an Oracle 19c solution:
create or replace
function split_multipart_line(
p_line varchar2
)
return varchar2
sql_macro
is
begin
return q'[
select part_num,
vertex_num,
to_number(regexp_substr(vertex,'[^ ]+')) x,
to_number(regexp_substr(vertex,'[^ ]+',1,2)) y,
to_number(regexp_substr(vertex,'[^ ]+',1,3)) z
from dual,
lateral(
select level part_num,
regexp_substr(p_line,'\(([^()]+)',1,level,null,1) part
from dual
connect by level < regexp_count(p_line,'\(')
),
lateral(
select level vertex_num,
regexp_substr(part,'[^,]+',1,level) vertex
from dual
connect by level <= regexp_count(part,',') + 1
)]';
end;
/
Function created.
SQL> with sample as (
2 select '((0 5 0, 10 10 11.18, 30 0 33.54),(50 10 33.54, 60 10 43.54))' multipart_lines from dual union all
3 select '((1 2 3, 4 5 6, 7 8 9, 10 11 12),(22 33 44, 55 66 77))' multipart_lines from dual
4 )
5 select l.*
6 from sample,
7 lateral(
8 select *
9 from split_multipart_line(multipart_lines)
10 ) l
11 /
PART_NUM VERTEX_NUM X Y Z
---------- ---------- ---------- ---------- ----------
1 1 0 5 0
1 2 10 10 11.18
1 3 30 0 33.54
2 1 50 10 33.54
2 2 60 10 43.54
1 1 1 2 3
1 2 4 5 6
1 3 7 8 9
1 4 10 11 12
2 1 22 33 44
2 2 55 66 77
11 rows selected.
SQL>
Sources
This article follows the attribution requirements of Stack Overflow and is licensed under CC BY-SA 3.0.
Source: Stack Overflow
| Solution | Source |
|---|---|
| Solution 1 | |
| Solution 2 | mathguy |
| Solution 3 | |
| Solution 4 | MT0 |
| Solution 5 | MT0 |
| Solution 6 | User1974 |
