首页 > 编程问答 >如何保存这个 RNN 模型以及如何使用它来构建 api 进行翻译?

如何保存这个 RNN 模型以及如何使用它来构建 api 进行翻译?

时间:2024-07-21 02:53:52浏览次数:9  
标签:python tensorflow keras ml h5

我使用僧伽罗英语数据集并训练它来翻译僧伽罗英语java相关问题。它提供了良好的训练准确性,但问题是保存后无法加载并使用它来开发翻译 API。我将此模型保存为 h5 格式并尝试加载它给出

import os
import shutil
import subprocess
import warnings
from pathlib import Path

import numpy as np
import pandas as pd
import plotly.express as px
import tensorflow as tf
import tensorflow_datasets as tfds
from tensorflow import keras
from keras import layers
!pip install colorama
from colorama import Fore, Style
from IPython.core.display import HTML

warnings.filterwarnings("ignore")

easy_dataset_user = "test"
easy_dataset = "language-translation-englishfrench"
data_dir = Path("data")
from google.colab import drive
drive.mount('/content/drive')

if not ON_KAGGLE:
    download_dataset_from_kaggle(easy_dataset_user, easy_dataset, data_dir)
    easy_dataset_path = "/content/drive/MyDrive/java_sinhala_vocabulary.csv"
else:
    easy_dataset_path = Path(
        "/content/drive/MyDrive/java_sinhala_vocabulary.csv"
    )

easy_dataset = pd.read_csv(easy_dataset_path, encoding="utf-8", engine="pyarrow")
easy_dataset = easy_dataset.sample(len(easy_dataset), random_state=42)
easy_dataset.head()

easy_dataset.info()

easy_dataset["English Words in Sentence"] = (
    easy_dataset["english"].str.split().apply(len)
)
easy_dataset["French Words in Sentence"] = (
    easy_dataset["sinhala"].str.split().apply(len)
)

fig = px.histogram(
    easy_dataset,
    x=["English Words in Sentence", "French Words in Sentence"],
    color_discrete_sequence=["#3f384a", "#e04c5f"],
    labels={"variable": "Variable", "value": "Words in Sentence"},
    marginal="box",
    barmode="group",
    height=540,
    width=840,
    title="Easy Dataset - Words in Sentence",
)
fig.update_layout(
    font_color=FONT_COLOR,
    title_font_size=18,
    plot_bgcolor=BACKGROUND_COLOR,
    paper_bgcolor=BACKGROUND_COLOR,
    bargap=0.2,
    bargroupgap=0.1,
    legend=dict(orientation="h", yanchor="bottom", xanchor="right", y=1.02, x=1),
    yaxis_title="Count",
)
fig.show()

sentences_en = easy_dataset["english"].to_numpy()
sentences_fr = easy_dataset["sinhala"].to_numpy()

valid_fraction = 0.1
valid_len = int(valid_fraction * len(easy_dataset))

sentences_en_train = sentences_en[:-valid_len]
sentences_fr_train = sentences_fr[:-valid_len]

sentences_en_valid = sentences_en[-valid_len:]
sentences_fr_valid = sentences_fr[-valid_len:]

def prepare_input_and_target(sentences_en, sentences_fr):
    """Return data in the format: `((encoder_input, decoder_input), target)`"""
    return (sentences_en, b"startofseq " + sentences_fr), sentences_fr + b" endofseq"


def from_sentences_dataset(
    sentences_en,
    sentences_fr,
    batch_size=32,
    cache=True,
    shuffle=False,
    shuffle_buffer_size=10_000,
    seed=None,
):
    dataset = tf.data.Dataset.from_tensor_slices((sentences_en, sentences_fr))
    dataset = dataset.map(prepare_input_and_target, num_parallel_calls=tf.data.AUTOTUNE)
    if cache:
        dataset = dataset.cache()
    if shuffle:
        dataset = dataset.shuffle(shuffle_buffer_size, seed=seed)
    return dataset.batch(batch_size)

benchmark_ds = from_sentences_dataset(sentences_en_train, sentences_fr_train)
benchmark_ds = benchmark_ds.prefetch(tf.data.AUTOTUNE)
bench_results = tfds.benchmark(benchmark_ds, batch_size=32)

example_ds = from_sentences_dataset(
    sentences_en_train, sentences_fr_train, batch_size=4
)
list(example_ds.take(1))[0]

example_ds.cardinality()  # Number of batches per epoch.

class ColoramaVerbose(keras.callbacks.Callback):
    def on_epoch_end(self, epoch, logs=None):
        print(
            f"{CLR}Epoch: {RED}{epoch + 1:02d}{CLR} -",
            f"{CLR}loss: {RED}{logs['loss']:.5f}{CLR} -",
            f"{CLR}accuracy: {RED}{logs['accuracy']:.5f}{CLR} -",
            f"{CLR}val_loss: {RED}{logs['val_loss']:.5f}{CLR} -",
            f"{CLR}val_accuracy: {RED}{logs['val_accuracy']:.5f}",
        )


def adapt_compile_and_fit(
    model,
    train_dataset,
    valid_dataset,
    n_epochs=50,
    n_patience=5,
    init_lr=0.001,
    lr_decay_rate=0.1,
    colorama_verbose=False,
):


    model.vectorization_en.adapt(
        train_dataset.map(
            lambda sentences, target: sentences[0],  # English sentences.
            num_parallel_calls=tf.data.AUTOTUNE,
        )
    )
    model.vectorization_fr.adapt(
        train_dataset.map(
            lambda sentences, target: sentences[1] + b" endofseq",  # French sentences.
            num_parallel_calls=tf.data.AUTOTUNE,
        )
    )

    train_dataset_prepared = train_dataset.map(
        lambda sentences, target: (sentences, model.vectorization_fr(target)),
        num_parallel_calls=tf.data.AUTOTUNE,
    ).prefetch(tf.data.AUTOTUNE)

    valid_dataset_prepared = valid_dataset.map(
        lambda sentences, target: (sentences, model.vectorization_fr(target)),
        num_parallel_calls=tf.data.AUTOTUNE,
    ).prefetch(tf.data.AUTOTUNE)

    early_stopping_cb = keras.callbacks.EarlyStopping(
        monitor="val_accuracy", patience=n_patience, restore_best_weights=True
    )

    # The line below doesn't work with multi-file interleaving.
    # n_decay_steps = n_epochs * train_dataset_prepared.cardinality().numpy()
    # Less elegant solution.
    n_decay_steps = n_epochs * len(list(train_dataset_prepared))
    scheduled_lr = keras.optimizers.schedules.ExponentialDecay(
        initial_learning_rate=init_lr,
        decay_steps=n_decay_steps,
        decay_rate=lr_decay_rate,
    )

    model_callbacks = [early_stopping_cb]
    verbose_level = 1
    if colorama_verbose:
        model_callbacks.append(ColoramaVerbose())
        verbose_level = 0

    model.compile(
        loss="sparse_categorical_crossentropy",
        optimizer=keras.optimizers.RMSprop(learning_rate=scheduled_lr),
        metrics=["accuracy"],
    )

    return model.fit(
        train_dataset_prepared,
        epochs=n_epochs,
        validation_data=valid_dataset_prepared,
        callbacks=model_callbacks,
        verbose=verbose_level,
    )


def translate(model, sentence_en):
    translation = ""
    for word_idx in range(model.max_sentence_len):
        X_encoder = np.array([sentence_en])
        X_decoder = np.array(["startofseq " + translation])
        # Last token's probas.
        y_proba = model.predict((X_encoder, X_decoder), verbose=0)[0, word_idx]
        predicted_word_id = np.argmax(y_proba)
        predicted_word = model.vectorization_fr.get_vocabulary()[predicted_word_id]
        if predicted_word == "endofseq":
            break
        translation += " " + predicted_word
    return translation.strip()

class BidirectionalEncoderDecoderWithAttention(keras.Model):
    def __init__(
        self,
        vocabulary_size=5000,
        max_sentence_len=50,
        embedding_size=256,
        n_units_lstm=512,
        **kwargs,
    ):
        super().__init__(**kwargs)
        self.max_sentence_len = max_sentence_len

        self.vectorization_en = layers.TextVectorization(
            vocabulary_size, output_sequence_length=max_sentence_len
        )
        self.vectorization_fr = layers.TextVectorization(
            vocabulary_size, output_sequence_length=max_sentence_len
        )

        self.encoder_embedding = layers.Embedding(
            vocabulary_size, embedding_size, mask_zero=True
        )
        self.decoder_embedding = layers.Embedding(
            vocabulary_size, embedding_size, mask_zero=True
        )

        self.encoder = layers.Bidirectional(
            layers.LSTM(n_units_lstm // 2, return_sequences=True, return_state=True)
        )
        self.decoder = layers.LSTM(n_units_lstm, return_sequences=True)
        self.attention = layers.Attention()
        self.output_layer = layers.Dense(vocabulary_size, activation="softmax")

    def call(self, inputs):
        encoder_inputs, decoder_inputs = inputs

        encoder_input_ids = self.vectorization_en(encoder_inputs)
        decoder_input_ids = self.vectorization_fr(decoder_inputs)

        encoder_embeddings = self.encoder_embedding(encoder_input_ids)
        decoder_embeddings = self.decoder_embedding(decoder_input_ids)

        # The final hidden state of the encoder, representing the entire
        # input sequence, is used to initialize the decoder.
        encoder_output, *encoder_state = self.encoder(encoder_embeddings)
        encoder_state = [
            tf.concat(encoder_state[0::2], axis=-1),  # Short-term state (0 & 2).
            tf.concat(encoder_state[1::2], axis=-1),  # Long-term state (1 & 3).
        ]
        decoder_output = self.decoder(decoder_embeddings, initial_state=encoder_state)
        attention_output = self.attention([decoder_output, encoder_output])

        return self.output_layer(attention_output)

K.clear_session()  # Resets all state generated by Keras.
tf.random.set_seed(42)  # Ensure reproducibility on CPU.

easy_train_ds = from_sentences_dataset(
    sentences_en_train, sentences_fr_train, shuffle=True, seed=42
)
easy_valid_ds = from_sentences_dataset(sentences_en_valid, sentences_fr_valid)

bidirect_encoder_decoder = BidirectionalEncoderDecoderWithAttention(max_sentence_len=15)
bidirect_history = adapt_compile_and_fit(
    bidirect_encoder_decoder,
    easy_train_ds,
    easy_valid_ds,
    init_lr=0.01,
    lr_decay_rate=0.01,
    colorama_verbose=True,
)
fig = px.line(
    bidirect_history.history,
    markers=True,
    height=540,
    width=840,
    symbol="variable",
    labels={"variable": "Variable", "value": "Value", "index": "Epoch"},
    title="Easy Dataset - Encoder-Decoder RNN Training Process",
    color_discrete_sequence=px.colors.diverging.balance_r,
)
fig.update_layout(
    font_color=FONT_COLOR,
    title_font_size=18,
    plot_bgcolor=BACKGROUND_COLOR,
    paper_bgcolor=BACKGROUND_COLOR,
)
fig.show()
translation1 = translate(bidirect_encoder_decoder, "Hello, how are you?")
translation2 = translate(bidirect_encoder_decoder, "This is a test sentence")
translation3 = translate(bidirect_encoder_decoder, "you will receive a confirmation code after completing the registration.")

print(CLR + "Actual Possible Translations:")
print(BLUE + "Hello, how are you?".ljust(25), RED + "-> ", BLUE + "හෙලෝ, ඔයාට කෙසේද?")
print(
    BLUE + "This is a test sentence".ljust(25),
    RED + "-> ",
    BLUE + "මෙය පරීක්ෂණ වාක්යයකි.",
)
print(
    BLUE + "you will receive a confirmation code after completing the registration".ljust(25),
    RED + "-> ",
    BLUE + "ලියාපදිංචිය සම්පූර්ණ කලාට පස්සෙ ඔයාට තහවුරු කිරීමේ කේතයක්  හම්බවේවි",
)
print()
print(CLR + "Model Translations:")
print(BLUE + "Hello, how are you?".ljust(25), RED + "-> ", BLUE + translation1)
print(BLUE + "This is a test sentence".ljust(25), RED + "-> ", BLUE + translation2)
print(BLUE + "you will receive a confirmation code after completing the registration".ljust(25), RED + "-> ", BLUE + translation3)
PS D:app> python model.py
2024-07-21 00:08:39.758670: I tensorflow/core/util/port.cc:153] oneDNN custom operations are on. You may see slightly different numerical results due to floating-point round-off errors from different computation orders. To turn them off, set the environment variable `TF_ENABLE_ONEDNN_OPTS=0`.
2024-07-21 00:08:40.515736: I tensorflow/core/util/port.cc:153] oneDNN custom operations are on. You may see slightly different numerical results due to floating-point round-off errors from different computation orders. To turn them off, set the environment variable `TF_ENABLE_ONEDNN_OPTS=0`.
Traceback (most recent call last):
  File "D:app\model.py", line 15, in <module>       
    model = tf.keras.models.load_model(model_path)
            ^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^

保存此模型后请帮助我构建 api。我想使用这个保存的模型构建 api 来获取给定英语句子的含义。我们需要以什么格式保存这个模型。

这是我尝试开发API的方式。

import numpy as np
import keras
from keras import ops
import tensorflow as tf
from tensorflow.keras.models import load_model
from tensorflow.keras.layers import Layer
from fastapi import FastAPI, HTTPException
import tensorflow as tf
import numpy as np

app = FastAPI()

# Load the model
model_path = "english_sinhala_transformer_model.h5"
model = tf.keras.models.load_model(model_path)

def translate(sentence_en):
    translation = ""
    for word_idx in range(model.max_sentence_len):
        X_encoder = np.array([sentence_en])
        X_decoder = np.array(["startofseq " + translation])
        y_proba = model.predict((X_encoder, X_decoder), verbose=0)[0, word_idx]
        predicted_word_id = np.argmax(y_proba)
        predicted_word = model.vectorization_fr.get_vocabulary()[predicted_word_id]
        if predicted_word == "endofseq":
            break
        translation += " " + predicted_word
    return translation.strip()

@app.get("/translate")
def get_translation(sentence: str):
    try:
        translation = translate(sentence)
        return {"translation": translation}
    except Exception as e:
        raise HTTPException(status_code=500, detail=str(e))

以下是如何保存 RNN 模型、加载模型并使用它通过 API 进行翻译的完整指南。

1. 保存模型:

  • 使用 SavedModel 格式: SavedModel 格式是保存 TensorFlow 模型的推荐方式。它保存了模型的架构、权重和计算图,从而可以轻松加载和使用。

```python import tensorflow as tf

# ...(你的模型训练代码)...

# 创建一个保存模型的目录 model_dir = "my_sinhala_english_translator"

# 保存模型 tf.saved_model.save(bidirect_encoder_decoder, model_dir) ```

2. 加载模型:

  • 使用 tf.saved_model.load:

```python import tensorflow as tf

# 加载保存的模型 loaded_model = tf.saved_model.load(model_dir) ```

3. 创建翻译 API (使用 FastAPI):

```python import tensorflow as tf from fastapi import FastAPI, HTTPException

app = FastAPI()

# 加载保存的模型 model_dir = "my_sinhala_english_translator" loaded_model = tf.saved_model.load(model_dir)

def translate(sentence_en): # 你的预处理和翻译逻辑 # ... (例如,标记化、填充、传递给 loaded_model) ... # 返回翻译后的句子 return translation.strip()

@app.get("/translate") def get_translation(sentence: str): try: translation = translate(sentence) return {"translation": translation} except Exception as e: raise HTTPException(status_code=500, detail=str(e)) ```

完整代码(带有注释):

import os
import shutil
import subprocess
import warnings
from pathlib import Path

import numpy as np
import pandas as pd
import plotly.express as px
import tensorflow as tf
import tensorflow_datasets as tfds
from tensorflow import keras
from keras import layers
# ...(其他导入)...

# ...(你的数据加载和预处理代码)...

# ...(你的 BidirectionalEncoderDecoderWithAttention 模型定义)...

# ...(你的训练代码)...

# 保存模型 using SavedModel format
model_dir = "my_sinhala_english_translator"
tf.saved_model.save(bidirect_encoder_decoder, model_dir)

# 创建 FastAPI 应用程序
app = FastAPI()

# 加载保存的模型
loaded_model = tf.saved_model.load(model_dir) 

def translate(sentence_en):
    """翻译给定的英语句子。

    Args:
        sentence_en: 要翻译的英语句子。

    Returns:
        翻译后的僧伽罗语句子。
    """
    # 1. 预处理输入句子:
    #    - 标记化句子。
    #    - 使用 loaded_model.vectorization_en 将标记转换为 ID。
    #    - 填充序列以匹配 loaded_model.max_sentence_len。

    # 2. 初始化翻译循环:
    #    - 从 "startofseq" 标记开始。
    #    - 创建一个空的翻译列表。

    # 3. 循环生成翻译后的标记:
    #    - 将编码后的输入和当前翻译后的标记传递给 loaded_model。
    #    - 从模型输出中获取下一个预测的标记 ID。
    #    - 如果预测的标记是 "endofseq",则停止循环。
    #    - 否则,将预测的标记附加到翻译列表中。

    # 4. 将翻译后的标记列表连接成一个句子。
    #    - 使用 loaded_model.vectorization_fr.get_vocabulary() 将标记 ID 转换回单词。

    return translation.strip()

@app.get("/translate")
def get_translation(sentence: str):
    """翻译给定的英语句子并返回结果。

    Args:
        sentence: 要翻译的英语句子(作为查询参数)。

    Returns:
        包含翻译后的句子的 JSON 响应。
    """
    try:
        translation = translate(sentence)
        return {"translation": translation}
    except Exception as e:
        raise HTTPException(status_code=500, detail=str(e))

运行 API:

  1. 保存模型: 在训练完模型后运行你的 Python 脚本(带有 tf.saved_model.save )。
  2. 运行 API 服务器: 在保存模型的同一目录中,使用以下命令运行 FastAPI 应用程序:

bash uvicorn main:app --reload (你需要将 "main" 替换为你的 Python 文件名,不带 ".py" 后缀)。

  1. 访问 API: 你的 API 现在应该可以在 http://127.0.0.1:8000/translate?sentence=Hello, how are you? 访问。你可以使用浏览器或工具(如 Postman)向该端点发送请求并获取翻译结果。

这为你提供了一个从训练到部署的完整流程,以便使用 FastAPI 创建一个用于僧伽罗语-英语翻译的 API。请记住,根据你的特定需求和数据集调整预处理、训练和 API 端点。

标签:python,tensorflow,keras,ml,h5
From: 78773642

相关文章