'terraform database creation in athena
I am trying to create a database using terraform and this seems very complicated for a poor query...
Could you help me, please?
I have tried null_resource with local-exec and data "external" Python... I think I am looking the wrong way
ex which doesn't works in terraform 0.12
resource "null_resource" "create-endpoint" {
provisioner "local-exec" {
query = <<EOF
{
CREATE EXTERNAL TABLE `dashboard_loading_time`(
`timestamp_iso` string,
`app_identification` struct<service:string,app_name:string,app_type:string,stage:string>,
`user` struct<api_gateway_key:struct<id:string,name:string>,mashery_key:struct<id:string,name:string>,employee:struct<id:string,name:string>>,
`action` struct<action_type:string,path:string>,
`result` struct<status:string,http_status:string,response:struct<response:string>>)
PARTITIONED BY (
`year` int)
ROW FORMAT SERDE
'org.openx.data.jsonserde.JsonSerDe'
STORED AS INPUTFORMAT
'org.apache.hadoop.mapred.TextInputFormat'
OUTPUTFORMAT
'org.apache.hadoop.hive.ql.io.HiveIgnoreKeyTextOutputFormat'
LOCATION
's3://xxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxx/dev'
}
EOF
command = "aws athena start-query-execution --query-string "query""
}
}
I would like to find the simplest way to do this using terraform.
Solution 1:[1]
If you wanna make it for athena, need to make glue resources.
try below code with terraform.
variable "service_name" {
default = "demo-service"
}
variable "workspace" {
default = "dev"
}
variable "columns" {
default = {
id = "int"
type = "string"
status = "int"
created_at = "timestamp"
}
}
resource "aws_glue_catalog_database" "athena" {
name = "${var.service_name}_db"
}
resource "aws_glue_catalog_table" "athena" {
name = "${var.service_name}_logs"
database_name = "${aws_glue_catalog_database.athena.name}"
table_type = "EXTERNAL_TABLE"
parameters = {
EXTERNAL = "TRUE"
}
storage_descriptor {
location = "s3://${var.service_name}-${var.workspace}-data-pipeline/log/"
input_format = "org.apache.hadoop.mapred.TextInputFormat"
output_format = "org.apache.hadoop.hive.ql.io.IgnoreKeyTextOutputFormat"
ser_de_info {
name = "jsonserde"
serialization_library = "org.openx.data.jsonserde.JsonSerDe"
parameters = {
"serialization.format" = "1"
}
}
dynamic "columns" {
for_each = "${var.columns}"
content {
name = "${columns.key}"
type = "${columns.value}"
}
}
}
partition_keys {
name = "year"
type = "string"
}
partition_keys {
name = "month"
type = "string"
}
partition_keys {
name = "day"
type = "string"
}
partition_keys {
name = "hour"
type = "string"
}
}
refer to this repository : aws-serverless-data-pipeline-by-terraform
Solution 2:[2]
resource "aws_glue_catalog_table" "aws_glue_catalog_table" {
name = "mytable"
database_name = aws_glue_catalog_database.aws_glue_catalog_database.name
table_type = "EXTERNAL_TABLE"
parameters = {
"classification" = "json"
}
storage_descriptor {
location = "s3://mybucket/myprefix"
input_format = "org.apache.hadoop.mapred.TextInputFormat"
output_format = "org.apache.hadoop.hive.ql.io.HiveIgnoreKeyTextOutputFormat"
ser_de_info {
name = "myserdeinfo"
serialization_library = "org.openx.data.jsonserde.JsonSerDe"
parameters = {
"paths" = "jsonrootname"
}
}
columns {
name = "column1"
type = "array<struct<resourcearn:string,tags:array<struct<key:string,value:string>>>>"
}
}
partition_keys {
name = "part1"
type = "string"
}
partition_keys {
name = "part2"
type = "string"
}
}
Sources
This article follows the attribution requirements of Stack Overflow and is licensed under CC BY-SA 3.0.
Source: Stack Overflow
| Solution | Source |
|---|---|
| Solution 1 | GNOKOHEAT |
| Solution 2 | SimonB |
