forked from x4nth055/pythoncode-tutorials
-
Notifications
You must be signed in to change notification settings - Fork 0
Commit
This commit does not belong to any branch on this repository, and may belong to a fork outside of the repository.
- Loading branch information
Showing
15 changed files
with
100,691 additions
and
0 deletions.
There are no files selected for viewing
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
43 changes: 43 additions & 0 deletions
43
machine-learning/nlp/text-classification/20_news_group_classification.py
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
Original file line number | Diff line number | Diff line change |
---|---|---|
@@ -0,0 +1,43 @@ | ||
from tensorflow.keras.callbacks import TensorBoard | ||
|
||
import os | ||
|
||
from parameters import * | ||
from utils import create_model, load_20_newsgroup_data | ||
|
||
# create these folders if they does not exist | ||
if not os.path.isdir("results"): | ||
os.mkdir("results") | ||
|
||
if not os.path.isdir("logs"): | ||
os.mkdir("logs") | ||
|
||
if not os.path.isdir("data"): | ||
os.mkdir("data") | ||
|
||
# dataset name, IMDB movie reviews dataset | ||
dataset_name = "20_news_group" | ||
# get the unique model name based on hyper parameters on parameters.py | ||
model_name = get_model_name(dataset_name) | ||
|
||
# load the data | ||
data = load_20_newsgroup_data(N_WORDS, SEQUENCE_LENGTH, TEST_SIZE, oov_token=OOV_TOKEN) | ||
|
||
model = create_model(data["tokenizer"].word_index, units=UNITS, n_layers=N_LAYERS, | ||
cell=RNN_CELL, bidirectional=IS_BIDIRECTIONAL, embedding_size=EMBEDDING_SIZE, | ||
sequence_length=SEQUENCE_LENGTH, dropout=DROPOUT, | ||
loss=LOSS, optimizer=OPTIMIZER, output_length=data["y_train"][0].shape[0]) | ||
|
||
model.summary() | ||
|
||
tensorboard = TensorBoard(log_dir=os.path.join("logs", model_name)) | ||
|
||
history = model.fit(data["X_train"], data["y_train"], | ||
batch_size=BATCH_SIZE, | ||
epochs=EPOCHS, | ||
validation_data=(data["X_test"], data["y_test"]), | ||
callbacks=[tensorboard], | ||
verbose=1) | ||
|
||
|
||
model.save(os.path.join("results", model_name) + ".h5") |
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
Original file line number | Diff line number | Diff line change |
---|---|---|
@@ -0,0 +1,6 @@ | ||
# [How to Perform Text Classification in Python using Tensorflow 2 and Keras](https://www.thepythoncode.com/article/text-classification-using-tensorflow-2-and-keras-in-python) | ||
To use this: | ||
- `pip3 install -r requirements.txt` | ||
- Please read [this tutorial](https://www.thepythoncode.com/article/text-classification-using-tensorflow-2-and-keras-in-python) before using this. | ||
- Tweak the hyperparameters in `parameters.py` and train the model. | ||
- For testing, consider using `test.py` |
Oops, something went wrong.