预处理输入特征¶
Note
keras有其标准的预处理层,当标准层不足以完成任务时,也可以自定义预处理层。
标准预处理层¶
import numpy as np
import tensorflow as tf
# 各列均值为[1, 8, 5],标准差为[1, 1, 1]
adapt_data = np.array([[0., 7., 4.],
[2., 9., 6.],
[0., 7., 4.],
[2., 9., 6.]], dtype='float32')
input_data = np.array([[0., 7., 4.]], dtype='float32')
# 标准化层
layer = tf.keras.layers.Normalization(axis=-1)
# 以adapt_data为准
layer.adapt(adapt_data)
# as expected
layer(input_data)
<tf.Tensor: shape=(1, 3), dtype=float32, numpy=array([[-1., -1., -1.]], dtype=float32)>
x = np.array([[-1.5, 1.0, 3.4, .5],
[0.0, 3.0, 1.3, 0.0]])
# 指定分桶边界,也可以按num_bins定分桶个数然后adapt
layer = tf.keras.layers.Discretization(bin_boundaries=[0., 1., 2.])
# as expected,注意边界是[)
layer(x)
<tf.Tensor: shape=(2, 4), dtype=int64, numpy=
array([[0, 2, 3, 1],
[1, 3, 2, 1]])>
自定义预处理层¶
from tensorflow import keras
class Standardization(keras.layers.Layer):
def adapt(self, data_sample):
# 调用adapt()方法来使其适应数据集
self.means_ = np.mean(data_sample, axis=0, keepdims=True)
self.stds_ = np.std(data_sample, axis=0, keepdims=True)
def call(self, inputs):
# 标准化
return (inputs - self.means_) / (self.stds_ + keras.backend.epsilon())