代码:
python
import nltk
from nltk.tokenize import word_tokenize
from nltk.corpus import stopwords
import spacy
from spacy.util import minibatch
import numpy as np
import tensorflow as tf
from tensorflow import keras
from keras.models import Model
from keras.layers import Input, Dense, LSTM, Embedding
def train(self, poems):
数据预处理
tokenized_poems = [word_tokenize(poem) for poem in poems]
stop_words = set(stopwords.words('english'))
词嵌入
word2vec = {}
for poem in tokenized_poems:
for word in poem:
if word not in word2vec:
word2vec[word] = len(word2vec)
转换为数值表示
numerical_poems = []
for poem in tokenized_poems:
numerical_poem = [word2vec[word] for word in poem]
numerical_poems.append(numerical_poem)
训练模型
X_train = np.array([np.array(poem) for poem in numerical_poems])
y_train = np.array([np.array([1 if i == j else 0 for j in range(len(word2vec))]) for poem in numerical_poems for i in range(len(poem))])
def generate_poem(self, prompt, length=100):
转换为数值表示
prompt_embedding = [word2vec[word] for word in word_tokenize(prompt)]
生成诗歌
poem = []
for i in range(length):
input_seq = np.array([prompt_embedding])
output = self.model.predict(input_seq)
word_idx = np.argmax(output)
poem.append(word_idx)
prompt_embedding = np.roll(prompt_embedding, 1)
prompt_embedding[1] = word_idx
将数值表示转换为文本
generated_poem = []
for idx in poem:
for word, vec in word2vec.items():
if vec == idx:
generated_poem.append(word)
break
return ' '.join(generated_poem)
示例用法
poems = ['The sun sets slow and paints the sky', 'The stars come out and twinkle bright', 'The world is full of beauty and wonder']
generator = PoetryGenerator()
generator.train(poems)
prompt = 'The moon glows'
generated_poem = generator.generate_poem(prompt)
print(generated_poem)