'TensorFlow Lite Conversion Fails

Goal:

I am trying to convert a TensorFlow model with custom functions to Tflite that is capable of on-node-training.

System information:

  • OS Platform and Distribution (e.g., Linux Ubuntu 16.04): Ubuntu 18.04
  • TensorFlow installation (pip package or built from source): Pip
  • TensorFlow installation (pip package or built from source): tensorflow==2.8.0rc1

Problem:

conversion Fails when adding the "update_control_variate" function.

Code:

Link to the Google Colab: https://colab.research.google.com/drive/1crWPg__nYgt5IUFOA_OzgU0qgvgzmsZc?usp=sharing

Part 1:

!pip install tensorflow==2.8.0rc1

import matplotlib.pyplot as plt
import numpy as np
import tensorflow as tf
from tensorflow.keras.models import Model
from tensorflow.keras.layers import *
from tensorflow.keras.regularizers import *
from tensorflow.keras.callbacks import *
from tensorflow.keras.preprocessing.sequence import pad_sequences
import keras
from tqdm import tqdm
print("TensorFlow version:", tf.__version__)

class TestModel: 
    def __init__(self):      
        case2Idx = {'A': 0, 'B':1, 'C':2}
        
        self.caseEmbeddings = np.identity(len(case2Idx), dtype='float32')
        long_chars = " 0123456789"
        largest_word = []
        self.char2Idx = {"PADDING":1, "UNKNOWN":0}
        for c in long_chars:
            self.char2Idx[c] = len(self.char2Idx)
        self.charsEmbeddings = np.identity(len(self.char2Idx), dtype='float32')

    def get_model(self):
        convs = []
        casing_input = Input(shape=(12,), name='casing_input', dtype=tf.float32)
        casing = Embedding(output_dim=self.caseEmbeddings.shape[1],
                           input_dim=self.caseEmbeddings.shape[0], weights=[self.caseEmbeddings], 
                           trainable=True, name='casing_embedding')(casing_input)

        character_input=Input(shape=(12,6,),name='char_input', dtype=tf.float32)
        embed_char_out=TimeDistributed(Embedding(len(self.char2Idx), 
                                 156, 
                                 embeddings_initializer=tf.keras.initializers.RandomUniform(minval=-0.5, maxval=0.5),
                                                 trainable=True), name='char_embedding')(character_input)

        for fsz in [2, 5]:
            l_conv = TimeDistributed(Conv1D(filters=64, kernel_size=fsz, activation='relu', 
                                            strides=1))(embed_char_out)
            l_pool = TimeDistributed(GlobalMaxPooling1D())(l_conv)
            convs.append(l_pool)

        output = concatenate([casing, l_pool])
        output = Bidirectional(LSTM(74, return_sequences=False, dropout=0.5, recurrent_dropout=0.50))(output)
        output = Dense(1, activation='sigmoid')(output)

        model = Model(inputs=[casing_input, character_input], outputs=[output])
        return model

import numpy as np
import sys
m_model = TestModel()

class NodeTrainableModel(tf.Module): 
    
    def __init__(self, trainedmodel,
                 c_i: list = None, 
                 c_g:list = None):
        
        super(NodeTrainableModel, self).__init__()
        self.model_1 = trainedmodel
        self.model_2 = trainedmodel
        self.c_i = c_i
        self.c_g = c_g

        self.model_1.compile(
            optimizer=tf.keras.optimizers.Adam(),
            loss=tf.keras.losses.BinaryCrossentropy()
        )
        
        
    @tf.function(input_signature=[
        tf.TensorSpec([None, 12], tf.float32),
        tf.TensorSpec([None, 12, 6], tf.float32),
        tf.TensorSpec([None], tf.float32),
    ])

    def train(self, x_case, x_chars, labels):
       
        def process_gradient(g_j:tf.Tensor, c_g_j:tf.Tensor, c_i_j:tf.Tensor) -> tf.Tensor:
            try:
                tf.debugging.check_numerics(g_j, message='Checking gradient')
            except Exception as e:
                assert "Checking g : Tensor had NaN values" in e.message

            return g_j + c_g_j + c_i_j

        with tf.GradientTape() as tape:
            predictions = self.model_1([x_case, x_chars], training=True)
            loss_value = self.model_1.loss(labels, predictions)

        grads = tape.gradient(loss_value, self.model_1.trainable_weights)


        if self.c_i is None:
            self.model_1.optimizer.apply_gradients(
                zip(grads, self.model_1.trainable_weights)
            )
            

        else:
            
            if self.c_g is None:
                self.c_g = [g_c_i if \
                                  not isinstance(g_c_i, tf.IndexedSlices) else g_c_i for g_c_i in self.c_i]

            processed_grads = [process_gradient(g ,i, j) if \
                                not isinstance(i, tf.IndexedSlices) else i for g, i, j \
                                in zip(grads, self.c_g, self.c_i)]
            
            self.model_1.optimizer.apply_gradients(
                zip(processed_grads, self.model_1.trainable_weights)
            )
            
        results = {"loss": loss_value}
            
        return results
      
    
    @tf.function(input_signature=[
        tf.TensorSpec([None, 12], tf.float32),
        tf.TensorSpec([None, 12, 6], tf.float32)
    ])    
    def infer(self, x_case, x_chars):           
        logits = self.model_1([x_case, x_chars], training=False)
        return {
                "outputs": logits
                }
        

    @tf.function(input_signature=[
        tf.TensorSpec([None, 12], tf.float32),
        tf.TensorSpec([None, 12, 6], tf.float32),
        tf.TensorSpec([None], tf.float32),
    ]) 

    def update_control_variate(self, x_case, x_chars, labels):
        """making an additional pass over the local
            data to compute the gradient at the server model x"""

        if not self.c_i is None:
            self.c_i_old = self.c_i.copy()
          
        with tf.GradientTape() as tape:
            predictions = self.model_2([x_case, x_chars])
            loss_ = self.model_1.loss(labels, predictions)

        c_i = tape.gradient(loss_, self.model_2.trainable_weights)

        return c_i
    
    @tf.function(input_signature=[])    
    def get_the_update(self):
      
        if not self.c_g is None:
            delta_c = [g_c_i + g_c_i_old if \
                                    not isinstance(g_c_i, tf.IndexedSlices) else g_c_i for g_c_i, g_c_i_old\
                                    in zip(self.c_i, self.c_i_old)]
        else:
            delta_c = self.c_i

        delta_weights = [local_w + global_w \
                                for local_w, global_w\
                                in zip(self.model_1.trainable_weights, 
                                      self.model_2.trainable_weights)]

        return delta_weights
        
    
    @tf.function(input_signature=[tf.TensorSpec(shape=[], dtype=tf.string)])
    def save(self, checkpoint_path):
        tensor_names = [weight.name for weight in self.model_1.weights]
        tensors_to_save = [weight.read_value() for weight in self.model_1.weights]
        tf.raw_ops.Save(
            filename=checkpoint_path, tensor_names=tensor_names,
            data=tensors_to_save, name='save')
        return {
                  "checkpoint_path": checkpoint_path
              }

    @tf.function(input_signature=[tf.TensorSpec(shape=[], dtype=tf.string)])
    def restore(self, checkpoint_path):
        restored_tensors = {}
        for var in self.model_1.weights:
            restored = tf.raw_ops.Restore(
              file_pattern=checkpoint_path, tensor_name=var.name, dt=var.dtype,
              name='restore')
            var.assign(restored)
            restored_tensors[var.name] = restored
        return restored_tensors

Part 2:

Create an instance of the model and trained the model

model = m_model.get_model()
m = NodeTrainableModel(model)

more_losses = {}
for i in tqdm(range(10)):
    z = np.random.randint(11, size=[32, 12, 6])
    x = np.random.randint(3, size=[32, 12])
    casing = tf.convert_to_tensor(x, np.float32)
    char = tf.convert_to_tensor(z, np.float32)
    y = np.array(np.random.randint(0, 2, size = 32))
    y = tf.convert_to_tensor(y, np.float32)

    result = m.train(x_case = casing, x_chars = char, labels = y)
    more_losses[i] = result['loss']
    if (i + 1) % 10 == 0:
        print(f"Finished {i+1} epochs")
        print(f"  loss: {more_losses[i]:.3f}")

# Save the trained weights to a checkpoint.
m.save('/tmp/model.ckpt')

Part 3:

Convert the model to Tflite version

signatures = [
  m.train.get_concrete_function(),
  m.infer.get_concrete_function(),
  m.save.get_concrete_function(),
  m.restore.get_concrete_function(),
  m.get_the_update.get_concrete_function(),
  m.update_control_variate.get_concrete_function()               
] 
    
converter = tf.lite.TFLiteConverter.from_concrete_functions(signatures, m) 
converter.target_spec.supported_ops = [
    tf.lite.OpsSet.TFLITE_BUILTINS,  # enable TensorFlow Lite ops.
    tf.lite.OpsSet.SELECT_TF_OPS,  # enable TensorFlow ops.
    tf.lite.Optimize.DEFAULT
]
converter.target_spec.supported_types = [tf.float32]
converter.experimental_enable_resource_variables = True
converter.experimental_new_converter = True
converter.allow_custom_ops = True
tflite_model = converter.convert()

Failure in conversion


2022-01-27 11:53:08.600343: W tensorflow/compiler/mlir/lite/python/tf_tfl_flatbuffer_helpers.cc:357] Ignored output_format.
2022-01-27 11:53:08.600385: W tensorflow/compiler/mlir/lite/python/tf_tfl_flatbuffer_helpers.cc:360] Ignored drop_control_dependency.
2022-01-27 11:53:08.600602: I tensorflow/cc/saved_model/reader.cc:43] Reading SavedModel from: saved_model
2022-01-27 11:53:08.647834: I tensorflow/cc/saved_model/reader.cc:78] Reading meta graph with tags { serve }
2022-01-27 11:53:08.647877: I tensorflow/cc/saved_model/reader.cc:119] Reading SavedModel debug info (if present) from: saved_model
2022-01-27 11:53:08.787707: I tensorflow/cc/saved_model/loader.cc:228] Restoring SavedModel bundle.
2022-01-27 11:53:09.230312: I tensorflow/cc/saved_model/loader.cc:212] Running initialization op on SavedModel bundle at path: saved_model
2022-01-27 11:53:09.631676: I tensorflow/cc/saved_model/loader.cc:301] SavedModel load for tags { serve }; Status: success: OK. Took 1031073 microseconds.
2022-01-27 11:53:10.288551: E tensorflow/compiler/mlir/tensorflow/translate/tf_mlir_translate.cc:207] SavedModel V1 import failed: FAILED_PRECONDITION: Graph does not contain node:

ConverterError Traceback (most recent call last)
/tmp/ipykernel_5933/617434638.py in
10 converter.experimental_new_converter = True
11 converter.allow_custom_ops = True
---> 12 tflite_model = converter.convert()

~/anaconda3/envs/tf_2.8/lib/python3.9/site-packages/tensorflow/lite/python/lite.py in convert(self)
1712 Invalid quantization parameters.
1713 """
-> 1714 return super(TFLiteConverterV2, self).convert()
1715
1716

~/anaconda3/envs/tf_2.8/lib/python3.9/site-packages/tensorflow/lite/python/lite.py in wrapper(self, *args, **kwargs)
801 def wrapper(self, *args, **kwargs):
802 # pylint: disable=protected-access
--> 803 return self._convert_and_export_metrics(convert_func, *args, **kwargs)
804 # pylint: enable=protected-access
805

~/anaconda3/envs/tf_2.8/lib/python3.9/site-packages/tensorflow/lite/python/lite.py in _convert_and_export_metrics(self, convert_func, *args, **kwargs)
787 self._save_conversion_params_metric()
788 start_time = time.process_time()
--> 789 result = convert_func(self, *args, **kwargs)
790 elapsed_time_ms = (time.process_time() - start_time) * 1000
791 if result:

~/anaconda3/envs/tf_2.8/lib/python3.9/site-packages/tensorflow/lite/python/lite.py in convert(self)
1371 """
1372 if self.experimental_lower_to_saved_model:
-> 1373 saved_model_convert_result = self._convert_as_saved_model()
1374 if saved_model_convert_result:
1375 return saved_model_convert_result

~/anaconda3/envs/tf_2.8/lib/python3.9/site-packages/tensorflow/lite/python/lite.py in _convert_as_saved_model(self)
1351 if self.saved_model_dir:
1352 self._validate_inputs(graph_def, input_tensors)
-> 1353 return self._convert_from_saved_model(graph_def)
1354 finally:
1355 shutil.rmtree(temp_dir, True)

~/anaconda3/envs/tf_2.8/lib/python3.9/site-packages/tensorflow/lite/python/lite.py in _convert_from_saved_model(self, graph_def)
965 converter_kwargs.update(quant_mode.converter_flags())
966
--> 967 result = _convert_saved_model(**converter_kwargs)
968 return self._optimize_tflite_model(
969 result, quant_mode, quant_io=self.experimental_new_quantizer)

~/anaconda3/envs/tf_2.8/lib/python3.9/site-packages/tensorflow/lite/python/convert_phase.py in wrapper(*args, **kwargs)
211 else:
212 report_error_message(str(converter_error))
--> 213 raise converter_error from None # Re-throws the exception.
214 except Exception as error:
215 report_error_message(str(error))

~/anaconda3/envs/tf_2.8/lib/python3.9/site-packages/tensorflow/lite/python/convert_phase.py in wrapper(*args, **kwargs)
204 def wrapper(*args, **kwargs):
205 try:
--> 206 return func(*args, **kwargs)
207 except ConverterError as converter_error:
208 if converter_error.errors:

~/anaconda3/envs/tf_2.8/lib/python3.9/site-packages/tensorflow/lite/python/convert.py in convert_saved_model(**kwargs)
787 model_flags = build_model_flags(**kwargs)
788 conversion_flags = build_conversion_flags(**kwargs)
--> 789 data = convert(
790 model_flags.SerializeToString(),
791 conversion_flags.SerializeToString(),

~/anaconda3/envs/tf_2.8/lib/python3.9/site-packages/tensorflow/lite/python/convert.py in convert(model_flags_str, conversion_flags_str, input_data_str, debug_info_str, enable_mlir_converter)
304 for error_data in _metrics_wrapper.retrieve_collected_errors():
305 converter_error.append_error(error_data)
--> 306 raise converter_error
307
308 return _run_deprecated_conversion_binary(

ConverterError: Graph does not contain node:



Sources

This article follows the attribution requirements of Stack Overflow and is licensed under CC BY-SA 3.0.

Source: Stack Overflow

Solution Source