| ``` |
| model: single_linear |
| config: Float8DynamicActivationFloat8WeightConfig |
| config version: 2 |
| torchao version: 0.13.dev |
| ``` |
|
|
| ``` |
| import torch |
| import io |
| |
| model = torch.nn.Sequential(torch.nn.Linear(32, 256, dtype=torch.bfloat16, device="cuda")) |
| |
| from torchao.quantization import quantize_, Float8DynamicActivationFloat8WeightConfig, PerRow |
| quant_config = Float8DynamicActivationFloat8WeightConfig(granularity=PerRow()) |
| quantize_(model, quant_config) |
| example_inputs = (torch.randn(2, 32, dtype=torch.bfloat16, device="cuda"),) |
| output = model(*example_inputs) |
| |
| # Push to hub |
| USER_ID = "torchao-testing" |
| MODEL_NAME = "single-linear" |
| save_to = f"{USER_ID}/{MODEL_NAME}-Float8DynamicActivationFloat8WeightConfig-v2-0.13.dev" |
| |
| from huggingface_hub import HfApi |
| api = HfApi() |
| |
| buf = io.BytesIO() |
| torch.save(model.state_dict(), buf) |
| api.create_repo(save_to, repo_type="model", exist_ok=True) |
| api.upload_file( |
| path_or_fileobj=buf, |
| path_in_repo="model.pt", |
| repo_id=save_to, |
| ) |
| |
| buf = io.BytesIO() |
| torch.save(example_inputs, buf) |
| api.upload_file( |
| path_or_fileobj=buf, |
| path_in_repo="model_inputs.pt", |
| repo_id=save_to, |
| ) |
| |
| buf = io.BytesIO() |
| torch.save(output, buf) |
| api.upload_file( |
| path_or_fileobj=buf, |
| path_in_repo="model_output.pt", |
| repo_id=save_to, |
| ) |
| ``` |