Embed dataset-explorer via iframe (JS runs in its own context)

#4
by Reubencf - opened
Files changed (2) hide show
  1. README.md +5 -7
  2. index.html +59 -435
README.md CHANGED
@@ -9,10 +9,8 @@ pinned: false
9
 
10
  # Reuben Data Lab
11
 
12
- An interactive landing for the [ReubenDataLab](https://huggingface.co/ReubenDataLab)
13
- dataset organization. The raw open-source corpus is visualized as a donut
14
- chart alongside the
15
- [Adaption-remastered](https://huggingface.co/collections/Reubencf/proper-adaption)
16
- versions, with a Voronoi treemap showing every language that appears
17
- across the corpus. Click any slice or cell to drill into the dataset /
18
- language details.
 
9
 
10
  # Reuben Data Lab
11
 
12
+ Interactive landing page for the [ReubenDataLab](https://huggingface.co/ReubenDataLab)
13
+ collections. The full dashboard (donut charts, Voronoi language treemap,
14
+ GSAP animations) is hosted at
15
+ [`Reubencf/dataset-explorer`](https://huggingface.co/spaces/Reubencf/dataset-explorer)
16
+ and embedded here via iframe so it runs in its own browsing context.
 
 
index.html CHANGED
@@ -1,435 +1,59 @@
1
- <!DOCTYPE html>
2
- <html lang="en">
3
- <head>
4
- <meta charset="UTF-8" />
5
- <meta name="viewport" content="width=device-width, initial-scale=1.0" />
6
- <title>ReubenDataLab · Dataset Explorer</title>
7
-
8
- <link rel="preconnect" href="https://fonts.googleapis.com">
9
- <link rel="preconnect" href="https://fonts.gstatic.com" crossorigin>
10
- <link href="https://fonts.googleapis.com/css2?family=Geist:wght@100..900&family=Google+Sans:ital,opsz,wght@0,17..18,400..700;1,17..18,400..700&display=swap" rel="stylesheet">
11
-
12
- <script src="vendor/d3.min.js" defer></script>
13
- <script src="vendor/d3-weighted-voronoi.js" defer></script>
14
- <script src="vendor/d3-voronoi-map.js" defer></script>
15
- <script src="vendor/d3-voronoi-treemap.js" defer></script>
16
- <script src="vendor/gsap.min.js" defer></script>
17
-
18
- <style>
19
- :root {
20
- --bg: #000000;
21
- --fg: #ffffff;
22
- --muted: #8a8a94;
23
- --card: #141414;
24
- --card-alt: #1c1c1e;
25
- --border: #262626;
26
- --divider: #2e2e2e;
27
- --tooltip-bg: rgba(20, 20, 20, 0.96);
28
-
29
- --palette-1: #3b82f6;
30
- --palette-2: #10b981;
31
- --palette-3: #ef4444;
32
- --palette-4: #f59e0b;
33
- --palette-5: #8b5cf6;
34
- --palette-6: #ec4899;
35
- --palette-7: #06b6d4;
36
- --palette-8: #84cc16;
37
- --palette-9: #f97316;
38
- --palette-10: #14b8a6;
39
- --palette-11: #a855f7;
40
- --palette-12: #eab308;
41
- }
42
-
43
- * { box-sizing: border-box; }
44
- html, body {
45
- margin: 0; padding: 0;
46
- background: var(--bg);
47
- color: var(--fg);
48
- font-family: "Geist", "Google Sans", -apple-system, BlinkMacSystemFont, "Segoe UI", Roboto, sans-serif;
49
- font-weight: 400;
50
- min-height: 100vh;
51
- -webkit-font-smoothing: antialiased;
52
- letter-spacing: 0.005em;
53
- }
54
-
55
- a { color: var(--fg); text-decoration: none; }
56
- a:hover { opacity: 0.7; }
57
-
58
- /* Header / hero image */
59
- header {
60
- max-width: 1440px;
61
- margin: 0 auto;
62
- padding: 32px 24px 8px 24px;
63
- text-align: center;
64
- }
65
- .hero-img {
66
- display: block;
67
- max-width: 900px;
68
- width: 100%;
69
- height: auto;
70
- margin: 0 auto;
71
- border-radius: 14px;
72
- }
73
-
74
- /* Hero stats banner */
75
- .hero-stats {
76
- max-width: 1440px;
77
- margin: 24px auto 0 auto;
78
- padding: 0 24px;
79
- display: grid;
80
- grid-template-columns: repeat(5, 1fr);
81
- gap: 14px;
82
- }
83
- .stat {
84
- background: var(--card);
85
- border: 1px solid var(--border);
86
- border-radius: 16px;
87
- padding: 18px 14px;
88
- text-align: center;
89
- }
90
- .stat .num {
91
- display: block;
92
- font-size: 1.75rem;
93
- font-weight: 700;
94
- color: var(--fg);
95
- letter-spacing: -0.015em;
96
- line-height: 1.05;
97
- }
98
- .stat .num .decimal { font-size: 0.55em; font-weight: 500; opacity: 0.75; margin-left: 1px; }
99
- .stat .lbl {
100
- display: block;
101
- font-size: 0.68rem;
102
- color: var(--muted);
103
- text-transform: uppercase;
104
- letter-spacing: 0.13em;
105
- margin-top: 8px;
106
- font-weight: 500;
107
- }
108
- .stat .sub {
109
- display: block;
110
- font-size: 0.6rem;
111
- color: var(--muted);
112
- font-weight: 400;
113
- letter-spacing: 0.04em;
114
- margin-top: 4px;
115
- opacity: 0.65;
116
- text-transform: none;
117
- }
118
-
119
- /* Chart sections */
120
- .charts {
121
- max-width: 1440px;
122
- margin: 0 auto;
123
- display: grid;
124
- grid-template-columns: 1fr 1fr;
125
- gap: 24px;
126
- padding: 24px;
127
- }
128
- .chart-card {
129
- background: var(--card);
130
- border: 1px solid var(--border);
131
- border-radius: 20px;
132
- padding: 24px 20px 16px 20px;
133
- }
134
- .chart-card h2 {
135
- text-align: center;
136
- margin: 0 0 4px 0;
137
- font-size: 1.1rem;
138
- font-weight: 600;
139
- color: var(--fg);
140
- letter-spacing: -0.005em;
141
- }
142
- .chart-card .subtitle {
143
- text-align: center;
144
- margin: 0 0 14px 0;
145
- font-size: 0.82rem;
146
- color: var(--muted);
147
- font-weight: 400;
148
- }
149
-
150
- /* Donut */
151
- .donut-wrap {
152
- position: relative;
153
- width: 100%;
154
- max-width: 560px;
155
- aspect-ratio: 1;
156
- margin: 0 auto;
157
- }
158
- .donut-wrap.small { max-width: 400px; }
159
- .donut-svg {
160
- width: 100%;
161
- height: 100%;
162
- display: block;
163
- overflow: visible;
164
- }
165
- .donut-slice { cursor: pointer; transition: filter 0.2s ease; }
166
- .donut-slice:hover { filter: brightness(1.25) drop-shadow(0 0 10px rgba(255,255,255,0.15)); }
167
-
168
- .donut-center {
169
- position: absolute;
170
- inset: 0;
171
- display: flex;
172
- flex-direction: column;
173
- align-items: center;
174
- justify-content: center;
175
- pointer-events: none;
176
- padding: 18%;
177
- text-align: center;
178
- }
179
- .donut-center.small { padding: 22%; }
180
- .center-item { width: 100%; }
181
- .center-label {
182
- font-size: 0.65rem;
183
- font-weight: 500;
184
- color: var(--muted);
185
- letter-spacing: 0.18em;
186
- text-transform: uppercase;
187
- display: flex;
188
- align-items: center;
189
- justify-content: center;
190
- gap: 6px;
191
- }
192
- .center-label .icon { font-size: 0.85rem; opacity: 0.9; }
193
- .center-number {
194
- font-size: clamp(1.8rem, 4.5vw, 2.75rem);
195
- font-weight: 700;
196
- color: var(--fg);
197
- line-height: 1;
198
- letter-spacing: -0.03em;
199
- margin: 4px 0;
200
- }
201
- .center-number .decimal {
202
- font-size: 0.55em;
203
- font-weight: 500;
204
- color: var(--fg);
205
- opacity: 0.72;
206
- margin-left: 1px;
207
- }
208
- .center-divider {
209
- width: 42%;
210
- border: none;
211
- border-top: 1px solid rgba(255, 255, 255, 0.08);
212
- margin: 10px auto;
213
- }
214
-
215
- /* Details card */
216
- .details {
217
- max-width: 1440px;
218
- margin: 0 auto 32px auto;
219
- padding: 0 24px;
220
- }
221
- .details-card {
222
- background: var(--card);
223
- border: 1px solid var(--border);
224
- border-radius: 20px;
225
- padding: 26px 28px;
226
- min-height: 140px;
227
- }
228
- .details-card h3 {
229
- margin: 0 0 8px 0;
230
- font-size: 1.35rem;
231
- color: var(--fg);
232
- display: flex;
233
- align-items: center;
234
- gap: 12px;
235
- font-weight: 600;
236
- letter-spacing: -0.01em;
237
- }
238
- .details-card h3 .swatch { display: inline-block; width: 14px; height: 14px; border-radius: 50%; }
239
- .details-card h3 a { color: var(--fg); font-size: 1.05rem; opacity: 0.85; }
240
- .details-card h3 a:hover { opacity: 1; text-decoration: underline; }
241
- .details-card .tagline { color: var(--muted); font-size: 0.95rem; margin: 0 0 18px 0; }
242
- .kv-grid {
243
- display: grid;
244
- grid-template-columns: repeat(auto-fit, minmax(220px, 1fr));
245
- gap: 12px 24px;
246
- }
247
- .kv .k { color: var(--muted); font-size: 0.75rem; text-transform: uppercase; letter-spacing: 0.1em; margin-bottom: 4px; font-weight: 500; }
248
- .kv .v { color: var(--fg); font-size: 0.9rem; }
249
- .kv .v a { border-bottom: 1px dashed var(--muted); }
250
- .kv .v strong { font-weight: 600; }
251
- .schema-list { display: flex; flex-wrap: wrap; gap: 6px; margin-top: 6px; }
252
- .schema-list code {
253
- background: var(--card-alt);
254
- color: var(--fg);
255
- padding: 3px 8px;
256
- border-radius: 6px;
257
- font-size: 0.78rem;
258
- font-family: "SF Mono", Consolas, monospace;
259
- border: 1px solid var(--border);
260
- }
261
-
262
- /* Extras (modality + treemap) */
263
- .extras {
264
- max-width: 1440px;
265
- margin: 8px auto 0 auto;
266
- padding: 0 24px 24px 24px;
267
- display: grid;
268
- grid-template-columns: 1fr 2fr;
269
- gap: 24px;
270
- }
271
- .plot-treemap { width: 100%; height: 900px; position: relative; }
272
- .plot-treemap svg { width: 100%; height: 100%; display: block; }
273
-
274
- /* Voronoi */
275
- .voronoi-cell {
276
- cursor: pointer;
277
- transition: filter 0.18s ease, opacity 0.18s ease;
278
- }
279
- .voronoi-cell:hover { filter: brightness(1.35) drop-shadow(0 0 8px rgba(255,255,255,0.35)); }
280
- .voronoi-label {
281
- font-family: "Geist", "Google Sans", sans-serif;
282
- font-weight: 600;
283
- fill: #ffffff;
284
- pointer-events: none;
285
- text-anchor: middle;
286
- user-select: none;
287
- }
288
- .voronoi-label .code { font-weight: 400; opacity: 0.8; fill: #ffffff; }
289
- .voronoi-tooltip {
290
- position: absolute;
291
- pointer-events: none;
292
- background: var(--tooltip-bg);
293
- border: 1px solid var(--border);
294
- border-radius: 10px;
295
- padding: 10px 14px;
296
- font-size: 0.85rem;
297
- color: var(--fg);
298
- box-shadow: 0 12px 32px rgba(0,0,0,0.7);
299
- opacity: 0;
300
- transition: opacity 0.12s ease;
301
- white-space: nowrap;
302
- z-index: 20;
303
- font-family: "Geist", sans-serif;
304
- }
305
- .voronoi-tooltip .t-name { font-weight: 700; color: var(--fg); font-size: 0.95rem; }
306
- .voronoi-tooltip .t-code { color: var(--muted); font-size: 0.72rem; margin-left: 4px; }
307
- .voronoi-tooltip .t-rows { color: var(--fg); font-weight: 600; margin-top: 4px; opacity: 0.9; }
308
-
309
- /* Donut tooltip (shared style) */
310
- .donut-tooltip {
311
- position: fixed;
312
- pointer-events: none;
313
- background: var(--tooltip-bg);
314
- border: 1px solid var(--border);
315
- border-radius: 10px;
316
- padding: 10px 14px;
317
- font-size: 0.85rem;
318
- color: var(--fg);
319
- box-shadow: 0 12px 32px rgba(0,0,0,0.7);
320
- opacity: 0;
321
- transition: opacity 0.12s ease;
322
- white-space: nowrap;
323
- z-index: 50;
324
- font-family: "Geist", sans-serif;
325
- }
326
- .donut-tooltip .t-name { font-weight: 700; font-size: 0.95rem; }
327
- .donut-tooltip .t-meta { color: var(--muted); font-size: 0.78rem; margin-top: 4px; }
328
-
329
- footer {
330
- max-width: 1440px;
331
- margin: 0 auto 32px auto;
332
- padding: 0 24px;
333
- text-align: center;
334
- color: var(--muted);
335
- font-size: 0.8rem;
336
- font-weight: 400;
337
- }
338
- footer a { border-bottom: 1px dashed var(--muted); }
339
-
340
- @media (max-width: 900px) {
341
- .hero-stats { grid-template-columns: repeat(2, 1fr); }
342
- .extras { grid-template-columns: 1fr; }
343
- }
344
- @media (max-width: 780px) {
345
- .charts { grid-template-columns: 1fr; }
346
- }
347
- </style>
348
- </head>
349
- <body>
350
-
351
- <header>
352
- <img src="Reubensdataset.png" alt="Reuben's Data Lab" class="hero-img" />
353
- </header>
354
-
355
- <section class="hero-stats">
356
- <div class="stat">
357
- <span class="num" data-value="12"></span>
358
- <span class="lbl">Raw datasets</span>
359
- <span class="sub">in four HF collections</span>
360
- </div>
361
- <div class="stat">
362
- <span class="num" data-value="14.8M"></span>
363
- <span class="lbl">Total rows</span>
364
- <span class="sub">every row, every dataset</span>
365
- </div>
366
- <div class="stat">
367
- <span class="num" data-value="130+"></span>
368
- <span class="lbl">Languages</span>
369
- <span class="sub">many rarely seen online</span>
370
- </div>
371
- <div class="stat">
372
- <span class="num" data-value="4"></span>
373
- <span class="lbl">Modalities</span>
374
- <span class="sub">audio, text, images, code</span>
375
- </div>
376
- <div class="stat">
377
- <span class="num" data-value="17"></span>
378
- <span class="lbl">Days to build</span>
379
- <span class="sub">April 8 to April 24, 2026</span>
380
- </div>
381
- </section>
382
-
383
- <section class="charts">
384
- <div class="chart-card">
385
- <h2>Raw corpus</h2>
386
- <div class="subtitle">Every dataset I've created in the <a href="https://huggingface.co/ReubenDataLab/collections" target="_blank" rel="noopener">ReubenDataLab collections</a></div>
387
- <div class="donut-wrap">
388
- <svg id="chart-raw" class="donut-svg"></svg>
389
- <div class="donut-center" id="center-raw"></div>
390
- </div>
391
- </div>
392
- <div class="chart-card">
393
- <h2>Adaption-remastered</h2>
394
- <div class="subtitle">Improved datasets after running them through <a href="https://adaptionlabs.ai" target="_blank" rel="noopener">adaptionlabs.ai</a></div>
395
- <div class="donut-wrap">
396
- <svg id="chart-adaption" class="donut-svg"></svg>
397
- <div class="donut-center" id="center-adaption"></div>
398
- </div>
399
- </div>
400
- </section>
401
-
402
- <div class="details">
403
- <div id="details-card" class="details-card" style="display: none;"></div>
404
- </div>
405
-
406
- <section class="extras">
407
- <div class="chart-card">
408
- <h2>Modality split</h2>
409
- <div class="subtitle">Share of the corpus by data type</div>
410
- <div class="donut-wrap small">
411
- <svg id="chart-modality" class="donut-svg"></svg>
412
- <div class="donut-center small" id="center-modality"></div>
413
- </div>
414
- </div>
415
- <div class="chart-card">
416
- <h2>Languages across the corpus</h2>
417
- <div class="subtitle">Every language that appears in any raw dataset, sized (log-scale) by total row count. Hover for exact numbers.</div>
418
- <div id="chart-treemap" class="plot-treemap">
419
- <div id="voronoi-tooltip" class="voronoi-tooltip"></div>
420
- </div>
421
- </div>
422
- </section>
423
-
424
- <div id="donut-tooltip" class="donut-tooltip"></div>
425
-
426
- <footer>
427
- Data self-reported from HF dataset pages · Built for the
428
- <a href="https://www.adaptionlabs.ai/blog/the-uncharted-data-challenge" target="_blank">Uncharted Data Challenge</a>
429
- · Author <a href="https://huggingface.co/Reubencf" target="_blank">@Reubencf</a>
430
- </footer>
431
-
432
- <script src="app.js" defer></script>
433
-
434
- </body>
435
- </html>
 
1
+ <!DOCTYPE html>
2
+ <html lang="en">
3
+ <head>
4
+ <meta charset="UTF-8" />
5
+ <meta name="viewport" content="width=device-width, initial-scale=1.0" />
6
+ <title>Reuben Data Lab</title>
7
+ <style>
8
+ html, body {
9
+ margin: 0;
10
+ padding: 0;
11
+ background: #000;
12
+ color: #fff;
13
+ font-family: -apple-system, BlinkMacSystemFont, "Segoe UI", Roboto, sans-serif;
14
+ overflow: hidden;
15
+ }
16
+ .frame-wrap {
17
+ width: 100%;
18
+ height: 100vh;
19
+ min-height: 2800px;
20
+ background: #000;
21
+ }
22
+ iframe {
23
+ width: 100%;
24
+ height: 100%;
25
+ border: 0;
26
+ display: block;
27
+ background: #000;
28
+ }
29
+ .fallback {
30
+ padding: 32px 24px;
31
+ text-align: center;
32
+ color: #8a8a94;
33
+ font-size: 0.9rem;
34
+ }
35
+ .fallback a { color: #fff; border-bottom: 1px dashed #8a8a94; text-decoration: none; }
36
+ </style>
37
+ </head>
38
+ <body>
39
+
40
+ <div class="frame-wrap">
41
+ <iframe
42
+ src="https://reubencf-dataset-explorer.static.hf.space/"
43
+ title="ReubenDataLab Dataset Explorer"
44
+ allow="fullscreen"
45
+ loading="lazy"
46
+ referrerpolicy="no-referrer-when-downgrade"></iframe>
47
+ </div>
48
+
49
+ <noscript>
50
+ <div class="fallback">
51
+ Interactive dataset explorer at
52
+ <a href="https://huggingface.co/spaces/Reubencf/dataset-explorer" target="_blank">
53
+ huggingface.co/spaces/Reubencf/dataset-explorer
54
+ </a>.
55
+ </div>
56
+ </noscript>
57
+
58
+ </body>
59
+ </html>