'Word2Vec embedding to LSTM layers?

I am now working on a neural network that should predict the next activity and the outcome (both or just one, depending on the self.net_out parameter of a trace (sequence of events, taken from an eventlog). The inputs of the net are windows (prefixes) of a trace of a specific size. Right now it looks like this:

def nn(self,params):
            #done in this function so that, in case, win_size easily can become a parameter
            X_train,Y_train,Z_train = self.build_windows(self.traces_train,self.win_size)

            if(self.net_embedding==0):
                if(self.net_out!=2):
                    Y_train = self.leA.fit_transform(Y_train)
                    Y_train = to_categorical(Y_train)
                    label=Y_train
                if(self.net_out!=1):
                    Z_train = self.leO.fit_transform(Z_train)
                    Z_train = to_categorical(Z_train)
                    label=Z_train

            unique_events = len(self.act_dictionary) 

            input_act = Input(shape=self.win_size, dtype='int32', name='input_act')
            if(self.net_embedding==0):
                x_act = Embedding(output_dim=params["output_dim_embedding"], input_dim=unique_events + 1, input_length=self.win_size)(
                                 input_act)
            else:
                print("WIP")

            n_layers = int(params["n_layers"]["n_layers"])

            l1 = LSTM(params["shared_lstm_size"], return_sequences=True, kernel_initializer='glorot_uniform',dropout=params['dropout'])(x_act)
            l1 = BatchNormalization()(l1)
            if(self.net_out!=2):
                l_a = LSTM(params["lstmA_size_1"], return_sequences=(n_layers != 1), kernel_initializer='glorot_uniform',dropout=params['dropout'])(l1)
                l_a = BatchNormalization()(l_a)
            elif(self.net_out!=1):
                l_o = LSTM(params["lstmO_size_1"], return_sequences=(n_layers != 1), kernel_initializer='glorot_uniform',dropout=params['dropout'])(l1)
                l_o = BatchNormalization()(l_o)

            for i in range(2,n_layers+1):
                if(self.net_out!=2):
                    l_a = LSTM(params["n_layers"]["lstmA_size_%s_%s" % (i, n_layers)], return_sequences=(n_layers != i), kernel_initializer='glorot_uniform',dropout=params['dropout'])(l_a)
                    l_a = BatchNormalization()(l_a)

                if(self.net_out!=1):
                    l_o = LSTM(params["n_layers"]["lstmO_size_%s_%s" % (i, n_layers)], return_sequences=(n_layers != i), kernel_initializer='glorot_uniform',dropout=params['dropout'])(l_o)
                    l_o = BatchNormalization()(l_o)

            outputs=[]
            if(self.net_out!=2):
                output_l = Dense(self.outsize_act, activation='softmax', name='act_output')(l_a)
                outputs.append(output_l)
            if(self.net_out!=1):
                output_o = Dense(self.outsize_out, activation='softmax', name='outcome_output')(l_o)
                outputs.append(output_o)

            model = Model(inputs=input_act, outputs=outputs)
            print(model.summary())

            opt = Adam(lr=params["learning_rate"])

            if(self.net_out==0):
                loss = {'act_output':'categorical_crossentropy', 'outcome_output':'categorical_crossentropy'}
                loss_weights= [params['gamma'], 1-params['gamma']]
            if(self.net_out==1):
                loss = {'act_output':'categorical_crossentropy'}
                loss_weights= [1,1]
            if(self.net_out==2):
                loss = {'outcome_output':'categorical_crossentropy'}
                loss_weights=[1,1]

            model.compile(loss=loss, optimizer=opt, loss_weights=loss_weights ,metrics=['accuracy'])
            early_stopping = EarlyStopping(monitor='val_loss',
                                           patience=20)
            lr_reducer = ReduceLROnPlateau(monitor='val_loss', factor=0.5, patience=10, verbose=0, mode='auto',
                                           min_delta=0.0001, cooldown=0, min_lr=0)

            if(self.net_out==0):
                history = model.fit(X_train, [Y_train,Z_train], epochs=3, batch_size=2**params['batch_size'], verbose=2, callbacks=[early_stopping, lr_reducer], validation_split =0.2 )
            else:
                history = model.fit(X_train, label, epochs=300, batch_size=2**params['batch_size'], verbose=2, callbacks=[early_stopping, lr_reducer], validation_split =0.2 )

            scores = [history.history['val_loss'][epoch] for epoch in range(len(history.history['loss']))]

            score = min(scores)
            #global best_score, best_model
            if self.best_score > score:
                    self.best_score = score
                    self.best_model = model

            return {'loss': score, 'status': STATUS_OK}

As it can be seen, I need to consider 2 types of embeddings: for the one that I already implemented and tested (self.net_embedding=0), each activity/event in each trace (and consequently window) is mapped as an integer; then I apply fit_transform and to_categorical. The second type of embedding that I have to try is by using word2vec. To do so, I already changed the format of the input, not converting each activity in an integer but by keeping it as a string (the actual name of the activity, standardized to just numbers and letters). I don't know how to proceed though: I guess I should do something like

w2vModel=  Word2Vec(X_train, size=params['word2vec_size'], min_count=1)

to get the embedded windows by w2vModel.wv, but how do I pass these to the lstm layers then? Into what should I change the embedding layer after the input one (where I put print(WIP) for now)?



Sources

This article follows the attribution requirements of Stack Overflow and is licensed under CC BY-SA 3.0.

Source: Stack Overflow

Solution Source