'Hitting ArrowTypeError: Input object was not a NumPy array when trying to upload a geodataframe to BigQuery in a Python notebook

I'm using a function in a Python notebook to try to upload a geodataframe into a BigQuery Table:

def geopandas_to_gbq(geodataframe, dataset_name, table_name, replace_table=True):
    """
    This function loads a GeoPandas GeoDataFrame into a BigQuery table. The
    geometry field on the data frame will be configured correctly within BQ.
    Note that there is a function called to_gbq that exists on Pandas already.
    But that function won't create geography fields in the appropriate places.
    Instead it will upload geographic data as a string.
    """
    import warnings; warnings.filterwarnings('ignore', message='.*initial implementation of Parquet.*')

    geog_field_name = geodataframe._geometry_column_name
    print(f'Loading data into `{dataset_name}.{table_name}` with geography field `{geog_field_name}`...')
    job_config = bigquery.LoadJobConfig(
        schema=[
            bigquery.SchemaField(geog_field_name, bigquery.enums.SqlTypeNames.GEOGRAPHY)
        ],
        write_disposition="WRITE_TRUNCATE" if replace_table else "WRITE_APPEND",
    )
    client = bigquery.Client()
    job = client.load_table_from_dataframe(
        geodataframe.to_crs(4326), f'{dataset_name}.{table_name}', job_config=job_config
    )
    return job.result()

I'm creating the geodataframe like this :

import geopandas as gpd
from shapely import wkt

df = pd.read_csv('/content/mycsvfile.csv')
df['geometry'] = df.geowkt.apply(wkt.loads)
df.drop('geowkt', axis=1, inplace=True) #Drop WKT column

# Geopandas GeoDataFrame
gdf = gpd.GeoDataFrame(df, geometry='geometry',crs='epsg:4326')
gdf.head()

geopandas_to_gbq(gdf, 'TEST', 'upload_table', replace_table=True)

It seems to work fine but when I run the function I hit what seems to be an arrow error.

7 frames
<ipython-input-8-02fd56f750de> in geopandas_to_gbq(geodataframe, dataset_name, table_name, replace_table)
     98     client = bigquery.Client()
     99     job = client.load_table_from_dataframe(
--> 100         geodataframe.to_crs(4326), f'{dataset_name}.{table_name}', job_config=job_config
    101     )
    102     return job.result()

/usr/local/lib/python3.7/dist-packages/google/cloud/bigquery/client.py in load_table_from_dataframe(self, dataframe, destination, num_retries, job_id, job_id_prefix, location, project, job_config, parquet_compression)
   1608                     job_config.schema,
   1609                     tmppath,
-> 1610                     parquet_compression=parquet_compression,
   1611                 )
   1612             else:

/usr/local/lib/python3.7/dist-packages/google/cloud/bigquery/_pandas_helpers.py in dataframe_to_parquet(dataframe, bq_schema, filepath, parquet_compression)
    366         raise ValueError("pyarrow is required for BigQuery schema conversion.")
    367 
--> 368     arrow_table = dataframe_to_arrow(dataframe, bq_schema)
    369     pyarrow.parquet.write_table(arrow_table, filepath, compression=parquet_compression)
    370 

/usr/local/lib/python3.7/dist-packages/google/cloud/bigquery/_pandas_helpers.py in dataframe_to_arrow(dataframe, bq_schema)
    333         arrow_names.append(bq_field.name)
    334         arrow_arrays.append(
--> 335             bq_to_arrow_array(get_column_or_index(dataframe, bq_field.name), bq_field)
    336         )
    337 

/usr/local/lib/python3.7/dist-packages/google/cloud/bigquery/_pandas_helpers.py in bq_to_arrow_array(series, bq_field)
    185     if bq_field.field_type.upper() in schema._STRUCT_TYPES:
    186         return pyarrow.StructArray.from_pandas(series, type=arrow_type)
--> 187     return pyarrow.array(series, type=arrow_type)
    188 
    189 

/usr/local/lib/python3.7/dist-packages/pyarrow/array.pxi in pyarrow.lib.array()

/usr/local/lib/python3.7/dist-packages/pyarrow/array.pxi in pyarrow.lib._ndarray_to_array()

/usr/local/lib/python3.7/dist-packages/pyarrow/error.pxi in pyarrow.lib.check_status()

ArrowTypeError: Input object was not a NumPy array

I can upload a df fine with the geometry as a string and then just create a copy of the table and convert the string to a geography. But I'd like to figure out what's wrong with this upload function as is.



Sources

This article follows the attribution requirements of Stack Overflow and is licensed under CC BY-SA 3.0.

Source: Stack Overflow

Solution Source