预处理输入特征

Note

keras有其标准的预处理层,当标准层不足以完成任务时,也可以自定义预处理层。

标准预处理层

import numpy as np
import tensorflow as tf

# 各列均值为[1, 8, 5],标准差为[1, 1, 1]
adapt_data = np.array([[0., 7., 4.],
                       [2., 9., 6.],
                       [0., 7., 4.],
                       [2., 9., 6.]], dtype='float32')
input_data = np.array([[0., 7., 4.]], dtype='float32')
# 标准化层
layer = tf.keras.layers.Normalization(axis=-1)
# 以adapt_data为准
layer.adapt(adapt_data)
# as expected
layer(input_data)
<tf.Tensor: shape=(1, 3), dtype=float32, numpy=array([[-1., -1., -1.]], dtype=float32)>
x = np.array([[-1.5, 1.0, 3.4, .5], 
              [0.0, 3.0, 1.3, 0.0]])
# 指定分桶边界,也可以按num_bins定分桶个数然后adapt
layer = tf.keras.layers.Discretization(bin_boundaries=[0., 1., 2.])
# as expected,注意边界是[)
layer(x)
<tf.Tensor: shape=(2, 4), dtype=int64, numpy=
array([[0, 2, 3, 1],
       [1, 3, 2, 1]])>

自定义预处理层

from tensorflow import keras


class Standardization(keras.layers.Layer):
    def adapt(self, data_sample):
        # 调用adapt()方法来使其适应数据集
        self.means_ = np.mean(data_sample, axis=0, keepdims=True)
        self.stds_ = np.std(data_sample, axis=0, keepdims=True)

    def call(self, inputs):
        # 标准化
        return (inputs - self.means_) / (self.stds_ + keras.backend.epsilon())