| ``` |
| model: single_linear |
| config: Int8DynamicActivationIntxWeightConfig |
| config version: 2 |
| torchao version: 0.14.dev |
| ``` |
|
|
| ``` |
| import torch |
| import io |
| |
| model = torch.nn.Sequential(torch.nn.Linear(32, 256, dtype=torch.bfloat16, device="cuda")) |
| |
| from torchao.quantization import Int8DynamicActivationIntxWeightConfig, quantize_ |
| from torchao.quantization.granularity import PerGroup |
| |
| version=2 |
| |
| quant_config = Int8DynamicActivationIntxWeightConfig( |
| weight_dtype=torch.int4, |
| weight_granularity=PerGroup(32), |
| version=version |
| ) |
| quantize_(model, quant_config) |
| example_inputs = (torch.randn(2, 32, dtype=torch.bfloat16, device="cuda"),) |
| output = model(*example_inputs) |
| |
| # Push to hub |
| USER_ID = "torchao-testing" |
| MODEL_NAME = "single-linear" |
| save_to = f"{USER_ID}/{MODEL_NAME}-Int8DynamicActivationIntxWeightConfig-v{version}-0.14.dev" |
| |
| from huggingface_hub import HfApi |
| api = HfApi() |
| |
| buf = io.BytesIO() |
| torch.save(model.state_dict(), buf) |
| api.create_repo(save_to, repo_type="model", exist_ok=False) |
| api.upload_file( |
| path_or_fileobj=buf, |
| path_in_repo="model.pt", |
| repo_id=save_to, |
| ) |
| |
| buf = io.BytesIO() |
| torch.save(example_inputs, buf) |
| api.upload_file( |
| path_or_fileobj=buf, |
| path_in_repo="model_inputs.pt", |
| repo_id=save_to, |
| ) |
| |
| buf = io.BytesIO() |
| torch.save(output, buf) |
| api.upload_file( |
| path_or_fileobj=buf, |
| path_in_repo="model_output.pt", |
| repo_id=save_to, |
| ) |
| ``` |
|
|