| | |
| | |
| | |
| | |
| | |
| | |
| | |
| | |
| | |
| | |
| | |
| | |
| | |
| | |
| |
|
| | import evaluate |
| | import torch |
| | from datasets import load_dataset |
| | from torch.optim import AdamW |
| | from torch.utils.data import DataLoader |
| | from transformers import AutoModelForSequenceClassification, AutoTokenizer, get_linear_schedule_with_warmup |
| |
|
| | from accelerate import Accelerator, DistributedType |
| | from accelerate.utils import set_seed |
| |
|
| |
|
| | def get_dataloaders(accelerator: Accelerator, batch_size: int = 16): |
| | """ |
| | Creates a set of `DataLoader`s for the `glue` dataset, |
| | using "bert-base-cased" as the tokenizer. |
| | |
| | Args: |
| | accelerator (`Accelerator`): |
| | An `Accelerator` object |
| | batch_size (`int`, *optional*): |
| | The batch size for the train and validation DataLoaders. |
| | """ |
| | tokenizer = AutoTokenizer.from_pretrained("bert-base-cased") |
| | datasets = load_dataset("glue", "mrpc") |
| |
|
| | def tokenize_function(examples): |
| | |
| | outputs = tokenizer(examples["sentence1"], examples["sentence2"], truncation=True, max_length=None) |
| | return outputs |
| |
|
| | |
| | |
| | with accelerator.main_process_first(): |
| | tokenized_datasets = datasets.map( |
| | tokenize_function, |
| | batched=True, |
| | remove_columns=["idx", "sentence1", "sentence2"], |
| | ) |
| |
|
| | |
| | |
| | tokenized_datasets = tokenized_datasets.rename_column("label", "labels") |
| |
|
| | def collate_fn(examples): |
| | |
| | max_length = 128 if accelerator.distributed_type == DistributedType.TPU else None |
| | |
| | if accelerator.mixed_precision != "no": |
| | pad_to_multiple_of = 8 |
| | else: |
| | pad_to_multiple_of = None |
| |
|
| | return tokenizer.pad( |
| | examples, |
| | padding="longest", |
| | max_length=max_length, |
| | pad_to_multiple_of=pad_to_multiple_of, |
| | return_tensors="pt", |
| | ) |
| |
|
| | |
| | train_dataloader = DataLoader( |
| | tokenized_datasets["train"], shuffle=True, collate_fn=collate_fn, batch_size=batch_size, drop_last=True |
| | ) |
| | eval_dataloader = DataLoader( |
| | tokenized_datasets["validation"], |
| | shuffle=False, |
| | collate_fn=collate_fn, |
| | batch_size=32, |
| | drop_last=(accelerator.mixed_precision == "fp8"), |
| | ) |
| |
|
| | return train_dataloader, eval_dataloader |
| |
|
| |
|
| | def training_function(config): |
| | |
| | accelerator = Accelerator( |
| | mixed_precision="fp16", |
| | log_with="aim", |
| | project_dir="aim_logs" |
| | ) |
| | |
| | lr = config["lr"] |
| | num_epochs = int(config["num_epochs"]) |
| | seed = int(config["seed"]) |
| | batch_size = 16 if accelerator.num_processes > 1 else 32 |
| | config["batch_size"] = batch_size |
| | metric = evaluate.load("glue", "mrpc") |
| |
|
| | set_seed(seed, device_specific=True) |
| | train_dataloader, eval_dataloader = get_dataloaders(accelerator, batch_size) |
| | model = AutoModelForSequenceClassification.from_pretrained("bert-base-cased", return_dict=True) |
| | lr = lr * accelerator.num_processes |
| |
|
| | optimizer = AdamW(params=model.parameters(), lr=lr) |
| | lr_scheduler = get_linear_schedule_with_warmup( |
| | optimizer=optimizer, |
| | num_warmup_steps=0, |
| | num_training_steps=(len(train_dataloader) * num_epochs), |
| | ) |
| |
|
| | model, optimizer, train_dataloader, eval_dataloader, lr_scheduler = accelerator.prepare( |
| | model, optimizer, train_dataloader, eval_dataloader, lr_scheduler |
| | ) |
| |
|
| | accelerator.init_trackers(f'{accelerator.num_processes}_gpus', config) |
| |
|
| | current_step = 0 |
| | for epoch in range(num_epochs): |
| | model.train() |
| | total_loss = 0 |
| | for _, batch in enumerate(train_dataloader): |
| | lr = lr_scheduler.get_lr() |
| | outputs = model(**batch) |
| | loss = outputs.loss |
| | batch_loss = accelerator.gather(loss).detach().mean().cpu().float() |
| | total_loss += batch_loss |
| | current_step += 1 |
| | accelerator.log( |
| | { |
| | "batch_loss":batch_loss, |
| | "learning_rate":lr, |
| | }, |
| | step=current_step, |
| | log_kwargs={"aim":{"epoch":epoch}} |
| | ) |
| | accelerator.backward(loss) |
| | optimizer.step() |
| | lr_scheduler.step() |
| | optimizer.zero_grad() |
| | current_step += 1 |
| |
|
| | model.eval() |
| | for step, batch in enumerate(eval_dataloader): |
| | |
| | batch.to(accelerator.device) |
| | with torch.no_grad(): |
| | outputs = model(**batch) |
| | predictions = outputs.logits.argmax(dim=-1) |
| | predictions, references = accelerator.gather_for_metrics((predictions, batch["labels"])) |
| | metric.add_batch( |
| | predictions=predictions, |
| | references=references, |
| | ) |
| |
|
| | eval_metric = metric.compute() |
| | |
| | |
| | accelerator.print(f"epoch {epoch}:", eval_metric) |
| |
|
| | accelerator.log( |
| | { |
| | "accuracy": eval_metric["accuracy"], |
| | "f1": eval_metric["f1"], |
| | "train_loss": total_loss.item() / len(train_dataloader), |
| | }, |
| | log_kwargs = {"aim":{"epoch":epoch}} |
| | ) |
| | accelerator.end_training() |
| |
|
| |
|
| | def main(): |
| | config = {"lr": 2e-5, "num_epochs": 3, "seed": 42} |
| | training_function(config) |
| |
|
| |
|
| | if __name__ == "__main__": |
| | main() |
| |
|