practice
import random
import numpy as np
import tflearn
def string_to_semi_redundant_sequences(string, seq_maxlen=25, redun_step=3):
""" string_to_semi_redundant_sequences.
Vectorize a string and returns parsed sequences and targets, along with
the associated dictionary.
Arguments:
string: `str`. Lower-case text from input text file.
seq_maxlen: `int`. Maximum length of a sequence. Default: 25.
redun_step: `int`. Redundancy step. Default: 3.
Returns:
`tuple`: (inputs, targets, dictionary)
"""
print("Vectorizing text...")
chars = set(string)
char_idx = {c: i for i, c in enumerate(chars)}
sequences = []
next_chars = []
for i in range(0, len(string) - seq_maxlen, redun_step):
sequences.append(string[i: i + seq_maxlen])
next_chars.append(string[i + seq_maxlen])
X = np.zeros((len(sequences), seq_maxlen, len(chars)), dtype=np.bool)
Y = np.zeros((len(sequences), len(chars)), dtype=np.bool)
for i, seq in enumerate(sequences):
for t, char in enumerate(seq):
X[i, t, char_idx[char]] = 1
Y[i, char_idx[next_chars[i]]] = 1
print("Text total length: " + str(len(string)))
print("Distinct chars: " + str(len(chars)))
print("Total sequences: " + str(len(sequences)))
return X, Y, char_idx
def textfile_to_semi_redundant_sequences(path, seq_maxlen=25, redun_step=3):
""" Vectorize Text file """
text = open(path).read()
# if to_lower_case:
# text = text.lower()
return string_to_semi_redundant_sequences(text, seq_maxlen, redun_step)
def random_sequence_from_textfile(path, seq_maxlen):
text = open(path).read()
return random_sequence_from_string(text, seq_maxlen)
def random_sequence_from_string(string, seq_maxlen):
rand_index = random.randint(0, len(string) - seq_maxlen - 1)
return string[rand_index: rand_index + seq_maxlen]
path = "xajh.txt"
maxlen = 25
X, Y, char_idx = \
textfile_to_semi_redundant_sequences(path, seq_maxlen=maxlen, redun_step=3)
g = tflearn.input_data([None, maxlen, len(char_idx)])
g = tflearn.lstm(g, 512, return_seq=True)
g = tflearn.dropout(g, 0.5)
# g = tflearn.lstm(g, 512, return_seq=True)
# g = tflearn.dropout(g, 0.5)
g = tflearn.lstm(g, 512)
g = tflearn.dropout(g, 0.5)
g = tflearn.fully_connected(g, len(char_idx), activation='softmax')
g = tflearn.regression(g, optimizer='adam', loss='categorical_crossentropy',
learning_rate=0.001)
m = tflearn.SequenceGenerator(g, dictionary=char_idx,
seq_maxlen=maxlen,
clip_gradients=5.0,
checkpoint_path='model_shakespeare')
for i in range(50):
seed = random_sequence_from_textfile(path, maxlen)
m.fit(X, Y, validation_set=0.1, batch_size=128, n_epoch=1, run_id='shakespeare')
print("-- TESTING...")
print("-- Test with temperature of 1.0 --")
print(m.generate(600, temperature=1.0, seq_seed=seed))
# print("-- Test with temperature of 0.5 --")
# print(m.generate(600, temperature=0.5, seq_seed=seed))
Last updated