yujiepan commited on
Commit
3a207ad
·
verified ·
1 Parent(s): 5a1b2b2

Upload folder using huggingface_hub

Browse files
Files changed (3) hide show
  1. .meta.json +1 -1
  2. README.md +5 -5
  3. model.safetensors +1 -1
.meta.json CHANGED
@@ -1,4 +1,4 @@
1
  {
2
- "torch": "2.12.0",
3
  "transformers": "5.9.0"
4
  }
 
1
  {
2
+ "torch": "2.10.0+cu130",
3
  "transformers": "5.9.0"
4
  }
README.md CHANGED
@@ -28,7 +28,7 @@ messages = [
28
  "content": [
29
  {
30
  "type": "audio",
31
- "audio": "https://raw.githubusercontent.com/google-gemma/cookbook/refs/heads/main/Demos/sample-data/journal1.wav",
32
  },
33
  {"type": "text", "text": "Transcribe the following speech segment."},
34
  ],
@@ -42,7 +42,7 @@ messages = [
42
  "content": [
43
  {
44
  "type": "image",
45
- "url": "https://raw.githubusercontent.com/google-gemma/cookbook/refs/heads/main/Demos/sample-data/GoldenGate.png",
46
  },
47
  {"type": "text", "text": "What is shown in this image?"},
48
  ],
@@ -345,12 +345,12 @@ Gemma4ForConditionalGeneration(
345
  (subsample_conv_projection): Gemma4AudioSubSampleConvProjection(
346
  (layer0): Gemma4AudioSubSampleConvProjectionLayer(
347
  (conv): Conv2d(1, 128, kernel_size=(3, 3), stride=(2, 2), padding=(1, 1), bias=False)
348
- (norm): LayerNorm((128,), eps=1e-06, elementwise_affine=True, bias=False)
349
  (act): ReLU()
350
  )
351
  (layer1): Gemma4AudioSubSampleConvProjectionLayer(
352
  (conv): Conv2d(128, 32, kernel_size=(3, 3), stride=(2, 2), padding=(1, 1), bias=False)
353
- (norm): LayerNorm((32,), eps=1e-06, elementwise_affine=True, bias=False)
354
  (act): ReLU()
355
  )
356
  (input_proj_linear): Linear(in_features=1024, out_features=64, bias=False)
@@ -431,5 +431,5 @@ Gemma4ForConditionalGeneration(
431
 
432
  ### Test environment:
433
 
434
- - torch: 2.12.0
435
  - transformers: 5.9.0
 
28
  "content": [
29
  {
30
  "type": "audio",
31
+ "audio": "https://github.com/google-gemma/cookbook/raw/refs/heads/main/apps/sample-data/journal1.wav",
32
  },
33
  {"type": "text", "text": "Transcribe the following speech segment."},
34
  ],
 
42
  "content": [
43
  {
44
  "type": "image",
45
+ "url": "https://raw.githubusercontent.com/google-gemma/cookbook/4a352192744f73fba5b80aeea3a8ba9b543edd29/apps/sample-data/surprise.png",
46
  },
47
  {"type": "text", "text": "What is shown in this image?"},
48
  ],
 
345
  (subsample_conv_projection): Gemma4AudioSubSampleConvProjection(
346
  (layer0): Gemma4AudioSubSampleConvProjectionLayer(
347
  (conv): Conv2d(1, 128, kernel_size=(3, 3), stride=(2, 2), padding=(1, 1), bias=False)
348
+ (norm): LayerNorm((128,), eps=1e-06, elementwise_affine=True)
349
  (act): ReLU()
350
  )
351
  (layer1): Gemma4AudioSubSampleConvProjectionLayer(
352
  (conv): Conv2d(128, 32, kernel_size=(3, 3), stride=(2, 2), padding=(1, 1), bias=False)
353
+ (norm): LayerNorm((32,), eps=1e-06, elementwise_affine=True)
354
  (act): ReLU()
355
  )
356
  (input_proj_linear): Linear(in_features=1024, out_features=64, bias=False)
 
431
 
432
  ### Test environment:
433
 
434
+ - torch: 2.10.0+cu130
435
  - transformers: 5.9.0
model.safetensors CHANGED
@@ -1,3 +1,3 @@
1
  version https://git-lfs.github.com/spec/v1
2
- oid sha256:8582ee51fce3dac0a80d9a90008785011e2e24d9d26234822df7361d2092877b
3
  size 9470740
 
1
  version https://git-lfs.github.com/spec/v1
2
+ oid sha256:032cdee368a3cf233b3ec293cff59bdbcf7d39a836d66b34f44337151da85fe0
3
  size 9470740