'Convert pandas data frame column, which has values of vectors, into tensors

My question is how to convert a vector on pandas data frame into tensors. The data frame has a resume column which has a vector representations of each resume document. I need to convert this column of the dataset into Tensors. The code is here below. enter image description here

The resume column has a list of numbers or we can say vectors and the category column of the data frame has scalar values. I tried to convert into tensors in this way:

tf.convert_to_tensor(output[["Resume"]])

Other approachs I have tried are numeric_dict_ds = tf.data.Dataset.from_tensor_slices((dict(output[["Resume"]]), output[["Category"]])) And the last approach was numeric_dataset = tf.data.Dataset.from_tensor_slices((numeric_features, target))

But None of them is working

import os
import shutil
import tensorflow as tf
import tensorflow_hub as hub
import tensorflow_text as text
from official.nlp import optimization  # to create AdamW optimizer
import matplotlib.pyplot as plt
from sklearn.metrics.pairwise import cosine_similarity
from sklearn.preprocessing import LabelEncoder
tf.get_logger().setLevel('ERROR')




warnings.filterwarnings('ignore')
nltk.download('stopwords')

stopwords_set = set(stopwords.words('english')+['``',"''"])
def clean_resume_text(resume):
    resume = resume.lower()
    resume = re.sub('http\S+\s*',' ',resume) #to remove url
    resume = ''.join([w for w in resume if not w.isdigit()]) # remove the digits
    resume = re.sub('RT|cc',' ',resume) # to remove RT and cc
    resume = re.sub('#\S+','',resume) # to remove hastags
    resume = re.sub('@\S+',' ',resume) # to remove mentions
    resume = ''.join([w for w in resume if w not in string.punctuation])# to remove puntuations
    resume = re.sub('\W',' ',resume)
    #resume = ''.join([w for w in resume if w not in stopwords_set])
    resume = re.sub(r'[^\x00-\x7f]',r' ',resume)
    resume = re.sub('\s+',' ',resume)# to remove extra spaces
    return resume

resume_df['Resume']=resume_df.Resume.apply(lambda x: clean_resume_text(x))
bert_preprocess = hub.KerasLayer("https://tfhub.dev/tensorflow/bert_en_uncased_preprocess/3")
bert_encoder = hub.KerasLayer("https://tfhub.dev/tensorflow/bert_en_uncased_L-12_H-768_A-12/4",trainable=True)

def get_sentence_embeding(sentences):
    preprocessed_text = bert_preprocess(sentences)
    return bert_encoder(preprocessed_text)['pooled_output']

resume_df["Resume"]=resume_df.Resume.apply(lambda x: get_sentence_embeding([x]))
#Save the the vectorized dataframe
resume_df.to_pickle("resume_Embedding.pkl")
output = pd.read_pickle("resume_Embedding.pkl")
encoder=LabelEncoder()
output["Category"]=encoder.fit_transform(output["Category"])

output=tf.convert_to_tensor(output[["Resume","Category"]])
model=tf.keras.Sequential([
    keras.Input(shape=output.shape),
    keras.layers.Dense(output.shape[0],activation='relu'),
    keras.layers.Dense(64,activation='relu'),
    keras.layers.Dense(25,activation='softmax')
])
model.compile(optimizer='adam',loss='mse',metrics=['accuracy'])


Solution 1:[1]

import tensorflow as tf
resume = [[1.0, 2.0], [3.0, 4.0], [5.0, 6.0]]
tf.convert_to_tensor(resume, dtype=tf.float32)

Output

<tf.Tensor: shape=(3, 2), dtype=float32, numpy=
array([[1., 2.],
       [3., 4.],
       [5., 6.]], dtype=float32)>

Take a look at this link

Sources

This article follows the attribution requirements of Stack Overflow and is licensed under CC BY-SA 3.0.

Source: Stack Overflow

Solution Source
Solution 1 TFer