yamle.data.text module#

class yamle.data.text.TorchtextClassificationDataModule(dataset, *args, **kwargs)[source]#

Bases: BaseDataModule

Data module for the torchvision datasets.

Parameters:

dataset¶ (str) – Name of the torchvision dataset. Currently supported are wiki_text_2, wiki_text_103, imdb.
validation_portion¶ (float) – Portion of the training data to use for validation.
seed¶ (int) – Seed for the random number generator.
data_dir¶ (str) – Path to the data directory.

mean = None#

std = None#

task = 'text_classification'#

inputs_dim = None#

inputs_dtype = torch.int64#

outputs_dim = None#

outputs_dtype = torch.int64#

prepare_data()[source]#

Download and prepare the data, the data is stored in self._train_dataset, self._validation_dataset and self._test_dataset.

Return type:: None

plot(tester, save_path, specific_name='')[source]#

Sample random text sequences from the test set and plot them.

Return type:: None

available_transforms: List[str]#

available_test_augmentations: List[str]#

test_augmentations: List[str]#

class yamle.data.text.TorchtextClassificationModelWikiText2(*args, **kwargs)[source]#

Bases: TorchtextClassificationDataModule

Data module for the WikiText2 dataset.

inputs_dim = (20,)#

outputs_dim = 28782#

targets_dim = 20#

available_transforms: List[str]#

available_test_augmentations: List[str]#

test_augmentations: List[str]#

class yamle.data.text.TorchtextClassificationModelWikiText103(*args, **kwargs)[source]#

Bases: TorchtextClassificationDataModule

Data module for the WikiText103 dataset.

inputs_dim = (20,)#

outputs_dim = 28782#

targets_dim = 20#

available_transforms: List[str]#

available_test_augmentations: List[str]#

test_augmentations: List[str]#

class yamle.data.text.TorchtextClassificationModelIMDB(*args, **kwargs)[source]#

Bases: TorchtextClassificationDataModule

Data module for the IMDB dataset.

inputs_dim = (20,)#

outputs_dim = 28782#

targets_dim = 20#

available_transforms: List[str]#

available_test_augmentations: List[str]#

test_augmentations: List[str]#

class yamle.data.text.Shakespeare(*args, **kwargs)[source]#

Bases: TorchtextClassificationDataModule

Data module for the Shakespeare dataset.

inputs_dim = (20,)#

outputs_dim = 28782#

targets_dim = 20#

available_transforms: List[str]#

available_test_augmentations: List[str]#

test_augmentations: List[str]#