整理一下, Tensorflow的模型构建,主要有sequential方法,subclass模型和函数式API。以及模型的嵌套、多输入输出以及自定义方法。

  1. 简单模型

    1
    2
    3
    4
    5
    6
    7
    8
    9
    10
    11
    12
    13
    14
    15
    16
    17
    18
    19
    20
    21
    22
    23
    24
    25
    26
    27
    28
    29
    import tensorflow as tf

    # 加载数据集
    mnist = tf.keras.datasets.mnist

    (x_train, y_train), (x_test, y_test) = mnist.load_data()
    x_train, x_test = x_train / 255.0, x_test / 255.0

    # sequential 模型
    model = tf.keras.models.Sequential([
    tf.keras.layers.Flatten(input_shape=(28, 28)),
    tf.keras.layers.Dense(128, activation='relu'),
    tf.keras.layers.Dropout(0.2),
    tf.keras.layers.Dense(10)
    ])

    # 损失函数
    loss_fn = tf.keras.losses.SparseCategoricalCrossentropy(from_logits=True)

    # 编译模型
    model.compile(optimizer='adam',
    loss=loss_fn,
    metrics=['accuracy'])

    # 模型训练
    model.fit(x_train, y_train, epochs=5)

    # 性能检验
    model.evaluate(x_test, y_test, verbose=2)
  2. 其他

    1
    2
    3
    4
    5
    6
    7
    8
    9
    10
    11
    # 模型输出
    predictions = model(x_train[:1]).numpy()

    # softmax
    tf.nn.softmax(predictions).numpy()

    # 封装
    probability_model = tf.keras.Sequential([
    model,
    tf.keras.layers.Softmax()
    ])
  3. 复杂模型

    1
    2
    3
    4
    5
    6
    7
    8
    9
    10
    11
    12
    13
    14
    15
    16
    17
    18
    19
    20
    21
    22
    23
    24
    25
    26
    27
    28
    29
    30
    31
    32
    33
    34
    35
    36
    37
    38
    39
    40
    41
    42
    43
    44
    45
    46
    47
    48
    49
    50
    51
    52
    53
    54
    55
    56
    57
    58
    59
    60
    61
    62
    63
    64
    65
    66
    67
    68
    69
    70
    71
    72
    73
    74
    75
    76
    77
    78
    79
    80
    81
    82
    83
    84
    85
    86
    87
    88
    89
    90
    91
    92
    93
    94
    95
    96
    97
    98
    99
    100
    101
    import tensorflow as tf
    print("TensorFlow version:", tf.__version__)

    from tensorflow.keras.layers import Dense, Flatten, Conv2D
    from tensorflow.keras import Model

    # 加载数据
    mnist = tf.keras.datasets.mnist

    (x_train, y_train), (x_test, y_test) = mnist.load_data()
    x_train, x_test = x_train / 255.0, x_test / 255.0

    # Add a channels dimension
    x_train = x_train[..., tf.newaxis].astype("float32")
    x_test = x_test[..., tf.newaxis].astype("float32")

    # 切分数据batch以及shuffle
    train_ds = tf.data.Dataset.from_tensor_slices(
    (x_train, y_train)).shuffle(10000).batch(32)

    test_ds = tf.data.Dataset.from_tensor_slices((x_test, y_test)).batch(32)

    # subclass模型
    class MyModel(Model):
    def __init__(self):
    super(MyModel, self).__init__()
    self.conv1 = Conv2D(32, 3, activation='relu')
    self.flatten = Flatten()
    self.d1 = Dense(128, activation='relu')
    self.d2 = Dense(10)

    def call(self, x):
    x = self.conv1(x)
    x = self.flatten(x)
    x = self.d1(x)
    return self.d2(x)

    # Create an instance of the model
    model = MyModel()

    # 损失函数以及优化器
    loss_object = tf.keras.losses.SparseCategoricalCrossentropy(from_logits=True)

    optimizer = tf.keras.optimizers.Adam()

    # 度量损失与准确率
    train_loss = tf.keras.metrics.Mean(name='train_loss')
    train_accuracy = tf.keras.metrics.SparseCategoricalAccuracy(name='train_accuracy')

    test_loss = tf.keras.metrics.Mean(name='test_loss')
    test_accuracy = tf.keras.metrics.SparseCategoricalAccuracy(name='test_accuracy')

    # 模型训练
    @tf.function
    def train_step(images, labels):
    with tf.GradientTape() as tape:
    # training=True is only needed if there are layers with different
    # behavior during training versus inference (e.g. Dropout).
    predictions = model(images, training=True)
    loss = loss_object(labels, predictions)
    gradients = tape.gradient(loss, model.trainable_variables)
    optimizer.apply_gradients(zip(gradients, model.trainable_variables))

    train_loss(loss)
    train_accuracy(labels, predictions)


    # 测试模型
    @tf.function
    def test_step(images, labels):
    # training=False is only needed if there are layers with different
    # behavior during training versus inference (e.g. Dropout).
    predictions = model(images, training=False)
    t_loss = loss_object(labels, predictions)

    test_loss(t_loss)
    test_accuracy(labels, predictions)

    # 训练
    EPOCHS = 5

    for epoch in range(EPOCHS):
    # Reset the metrics at the start of the next epoch
    train_loss.reset_states()
    train_accuracy.reset_states()
    test_loss.reset_states()
    test_accuracy.reset_states()

    for images, labels in train_ds:
    train_step(images, labels)

    for test_images, test_labels in test_ds:
    test_step(test_images, test_labels)

    print(
    f'Epoch {epoch + 1}, '
    f'Loss: {train_loss.result()}, '
    f'Accuracy: {train_accuracy.result() * 100}, '
    f'Test Loss: {test_loss.result()}, '
    f'Test Accuracy: {test_accuracy.result() * 100}'
    )
  4. 函数式API

    1
    2
    3
    4
    5
    6
    7
    8
    9
    10
    11
    12
    13
    14
    15
    16
    17
    18
    19
    20
    21
    22
    23
    24
    25
    26
    27
    28
    29
    30
    import numpy as np
    import tensorflow as tf
    from tensorflow import keras
    from tensorflow.keras import layers

    inputs = keras.Input(shape=(784,))

    dense = layers.Dense(64, activation="relu")
    x = dense(inputs)
    x = layers.Dense(64, activation="relu")(x)
    outputs = layers.Dense(10)(x)

    model = keras.Model(inputs=inputs, outputs=outputs, name="mnist_model")

    (x_train, y_train), (x_test, y_test) = keras.datasets.mnist.load_data()

    x_train = x_train.reshape(60000, 784).astype("float32") / 255
    x_test = x_test.reshape(10000, 784).astype("float32") / 255

    model.compile(
    loss=keras.losses.SparseCategoricalCrossentropy(from_logits=True),
    optimizer=keras.optimizers.RMSprop(),
    metrics=["accuracy"],
    )

    history = model.fit(x_train, y_train, batch_size=64, epochs=2, validation_split=0.2)

    test_scores = model.evaluate(x_test, y_test, verbose=2)
    print("Test loss:", test_scores[0])
    print("Test accuracy:", test_scores[1])
  5. 查看模型

    1
    2
    3
    4
    5
    model.summary()

    # 绘制计算图
    keras.utils.plot_model(model, "my_first_model.png")
    keras.utils.plot_model(model, "my_first_model_with_shape_info.png", show_shapes=True) #显示输入输出的形状
  6. 模型存储与读取

    1
    2
    3
    4
    model.save("path_to_my_model")
    del model
    # Recreate the exact same model purely from the file:
    model = keras.models.load_model("path_to_my_model")
  7. 模型嵌套

    1
    2
    3
    4
    5
    6
    7
    8
    9
    10
    11
    12
    13
    14
    15
    16
    def get_model():
    inputs = keras.Input(shape=(128,))
    outputs = layers.Dense(1)(inputs)
    return keras.Model(inputs, outputs)


    model1 = get_model()
    model2 = get_model()
    model3 = get_model()

    inputs = keras.Input(shape=(128,))
    y1 = model1(inputs)
    y2 = model2(inputs)
    y3 = model3(inputs)
    outputs = layers.average([y1, y2, y3])
    ensemble_model = keras.Model(inputs=inputs, outputs=outputs)
  8. 多个输入输出的模型

    1
    2
    3
    4
    5
    6
    7
    8
    9
    10
    11
    12
    13
    14
    15
    16
    17
    18
    19
    20
    21
    22
    23
    24
    25
    26
    27
    28
    29
    30
    31
    32
    33
    34
    35
    36
    37
    38
    39
    40
    41
    42
    43
    44
    45
    46
    47
    48
    49
    50
    51
    52
    53
    54
    55
    56
    57
    58
    59
    60
    61
    62
    63
    64
    65
    66
    67
    68
    69
    70
    num_tags = 12  # Number of unique issue tags
    num_words = 10000 # Size of vocabulary obtained when preprocessing text data
    num_departments = 4 # Number of departments for predictions

    title_input = keras.Input(
    shape=(None,), name="title"
    ) # Variable-length sequence of ints
    body_input = keras.Input(shape=(None,), name="body") # Variable-length sequence of ints
    tags_input = keras.Input(
    shape=(num_tags,), name="tags"
    ) # Binary vectors of size `num_tags`

    # Embed each word in the title into a 64-dimensional vector
    title_features = layers.Embedding(num_words, 64)(title_input)
    # Embed each word in the text into a 64-dimensional vector
    body_features = layers.Embedding(num_words, 64)(body_input)

    # Reduce sequence of embedded words in the title into a single 128-dimensional vector
    title_features = layers.LSTM(128)(title_features)
    # Reduce sequence of embedded words in the body into a single 32-dimensional vector
    body_features = layers.LSTM(32)(body_features)

    # Merge all available features into a single large vector via concatenation
    x = layers.concatenate([title_features, body_features, tags_input])

    # Stick a logistic regression for priority prediction on top of the features
    priority_pred = layers.Dense(1, name="priority")(x)
    # Stick a department classifier on top of the features
    department_pred = layers.Dense(num_departments, name="department")(x)

    # Instantiate an end-to-end model predicting both priority and department
    model = keras.Model(
    inputs=[title_input, body_input, tags_input],
    outputs=[priority_pred, department_pred],
    )

    model.compile(
    optimizer=keras.optimizers.RMSprop(1e-3),
    loss=[
    keras.losses.BinaryCrossentropy(from_logits=True),
    keras.losses.CategoricalCrossentropy(from_logits=True),
    ],
    loss_weights=[1.0, 0.2],
    )

    # 按名字分配
    model.compile(
    optimizer=keras.optimizers.RMSprop(1e-3),
    loss={
    "priority": keras.losses.BinaryCrossentropy(from_logits=True),
    "department": keras.losses.CategoricalCrossentropy(from_logits=True),
    },
    loss_weights={"priority": 1.0, "department": 0.2},
    )

    # Dummy input data
    title_data = np.random.randint(num_words, size=(1280, 10))
    body_data = np.random.randint(num_words, size=(1280, 100))
    tags_data = np.random.randint(2, size=(1280, num_tags)).astype("float32")

    # Dummy target data
    priority_targets = np.random.random(size=(1280, 1))
    dept_targets = np.random.randint(2, size=(1280, num_departments))

    model.fit(
    {"title": title_data, "body": body_data, "tags": tags_data},
    {"priority": priority_targets, "department": dept_targets},
    epochs=2,
    batch_size=32,
    )
  9. 自定义损失函数

    1
    2
    3
    4
    5
    6
    7
    8
    9
    10
    11
    12
    13
    14
    15
    16
    17
    18
    19
    20
    21
    22
    23
    24
    25
    26
    27
    def custom_mean_squared_error(y_true, y_pred):
    return tf.math.reduce_mean(tf.square(y_true - y_pred))


    model = get_uncompiled_model()
    model.compile(optimizer=keras.optimizers.Adam(), loss=custom_mean_squared_error)

    # We need to one-hot encode the labels to use MSE
    y_train_one_hot = tf.one_hot(y_train, depth=10)
    model.fit(x_train, y_train_one_hot, batch_size=64, epochs=1)

    # 使用其他参数
    class CustomMSE(keras.losses.Loss):
    def __init__(self, regularization_factor=0.1, name="custom_mse"):
    super().__init__(name=name)
    self.regularization_factor = regularization_factor

    def call(self, y_true, y_pred):
    mse = tf.math.reduce_mean(tf.square(y_true - y_pred))
    reg = tf.math.reduce_mean(tf.square(0.5 - y_pred))
    return mse + reg * self.regularization_factor

    model = get_uncompiled_model()
    model.compile(optimizer=keras.optimizers.Adam(), loss=CustomMSE())

    y_train_one_hot = tf.one_hot(y_train, depth=10)
    model.fit(x_train, y_train_one_hot, batch_size=64, epochs=1)