Upload TCUPY.py
Browse files
TCUPY.py
CHANGED
|
@@ -35,7 +35,7 @@ r'''############################################################################
|
|
| 35 |
#
|
| 36 |
# Critical dependencies
|
| 37 |
#
|
| 38 |
-
# !pip install cupy-
|
| 39 |
# !pip install numpy==1.26.4
|
| 40 |
#
|
| 41 |
################################################################################
|
|
@@ -1233,6 +1233,115 @@ def find_matches_fast(src_array, trg_array, seed: int = 0) -> int:
|
|
| 1233 |
|
| 1234 |
###################################################################################
|
| 1235 |
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 1236 |
print('Module is loaded!')
|
| 1237 |
print('Enjoy! :)')
|
| 1238 |
print('=' * 70)
|
|
|
|
| 35 |
#
|
| 36 |
# Critical dependencies
|
| 37 |
#
|
| 38 |
+
# !pip install cupy-cuda13x
|
| 39 |
# !pip install numpy==1.26.4
|
| 40 |
#
|
| 41 |
################################################################################
|
|
|
|
| 1233 |
|
| 1234 |
###################################################################################
|
| 1235 |
|
| 1236 |
+
def find_repeating_non_overlapping_patterns(arr, min_len):
|
| 1237 |
+
"""
|
| 1238 |
+
Finds all repeating non-overlapping patterns of min_len and longer.
|
| 1239 |
+
GPU-Accelerated using CuPy with O(N) memory per length.
|
| 1240 |
+
"""
|
| 1241 |
+
n = len(arr)
|
| 1242 |
+
if n < min_len * 2:
|
| 1243 |
+
return {}
|
| 1244 |
+
|
| 1245 |
+
arr_cpu = np.asarray(arr, dtype=np.int64)
|
| 1246 |
+
arr_gpu = cp.asarray(arr_cpu)
|
| 1247 |
+
max_len = n // 2
|
| 1248 |
+
|
| 1249 |
+
consumed = [False] * n
|
| 1250 |
+
result = {}
|
| 1251 |
+
|
| 1252 |
+
# Use a large prime base. We intentionally let np.int64 overflow naturally
|
| 1253 |
+
# (modulo 2^64), which cancels out perfectly in the subtraction below.
|
| 1254 |
+
BASE = np.int64(1000000007)
|
| 1255 |
+
|
| 1256 |
+
# Pre-compute rolling powers (suppress the expected overflow warning)
|
| 1257 |
+
powers_cpu = np.ones(max_len + 1, dtype=np.int64)
|
| 1258 |
+
with np.errstate(over='ignore'):
|
| 1259 |
+
for i in range(1, max_len + 1):
|
| 1260 |
+
powers_cpu[i] = powers_cpu[i-1] * BASE
|
| 1261 |
+
|
| 1262 |
+
powers_gpu = cp.asarray(powers_cpu)
|
| 1263 |
+
|
| 1264 |
+
# Prefix sum array allows O(1) hash retrieval for any length
|
| 1265 |
+
pref = cp.zeros(n + 1, dtype=cp.int64)
|
| 1266 |
+
pref[1:] = arr_gpu
|
| 1267 |
+
for i in range(1, n + 1):
|
| 1268 |
+
pref[i] = pref[i-1] * BASE + arr_gpu[i-1]
|
| 1269 |
+
|
| 1270 |
+
for L in range(max_len, min_len - 1, -1):
|
| 1271 |
+
n_hashes = n - L + 1
|
| 1272 |
+
if n_hashes <= 0:
|
| 1273 |
+
continue
|
| 1274 |
+
|
| 1275 |
+
# 1. O(1) Space Hashing on GPU
|
| 1276 |
+
# Subtracting two overflowed sums perfectly cancels out the overflow,
|
| 1277 |
+
# yielding the exact polynomial hash difference.
|
| 1278 |
+
raw = pref[L:L + n_hashes] - (pref[:n_hashes] * powers_gpu[L])
|
| 1279 |
+
|
| 1280 |
+
# XOR the upper and lower 32 bits to guarantee a positive, uniformly distributed hash
|
| 1281 |
+
hash_l = raw ^ (raw >> 32)
|
| 1282 |
+
|
| 1283 |
+
# 2. Group matching hashes via Global Sort
|
| 1284 |
+
sort_idx = cp.argsort(hash_l)
|
| 1285 |
+
sorted_hash = hash_l[sort_idx]
|
| 1286 |
+
|
| 1287 |
+
# Find boundaries where hashes change
|
| 1288 |
+
diff_idx = cp.where(sorted_hash[1:] != sorted_hash[:-1])[0]
|
| 1289 |
+
start_idx = cp.concatenate([cp.array([0]), diff_idx + 1])
|
| 1290 |
+
end_idx = cp.concatenate([diff_idx + 1, cp.array([n_hashes])])
|
| 1291 |
+
|
| 1292 |
+
# Keep only hashes that appear 2 or more times
|
| 1293 |
+
valid_mask = (end_idx - start_idx) >= 2
|
| 1294 |
+
start_idx_cpu = cp.asnumpy(start_idx[valid_mask])
|
| 1295 |
+
end_idx_cpu = cp.asnumpy(end_idx[valid_mask])
|
| 1296 |
+
|
| 1297 |
+
if len(start_idx_cpu) == 0:
|
| 1298 |
+
continue
|
| 1299 |
+
|
| 1300 |
+
# 3. Extract all candidate indices to CPU in ONE fast transfer
|
| 1301 |
+
sort_cpu = cp.asnumpy(sort_idx)
|
| 1302 |
+
indices_flat = sort_cpu[np.concatenate([np.arange(s, e) for s, e in zip(start_idx_cpu, end_idx_cpu)])]
|
| 1303 |
+
|
| 1304 |
+
# 4. Group by EXACT sub-array content using tobytes() to enforce 100% correctness
|
| 1305 |
+
groups = {}
|
| 1306 |
+
for idx in indices_flat:
|
| 1307 |
+
pat_key = bytes(arr_cpu[idx:idx+L].tobytes())
|
| 1308 |
+
if pat_key not in groups:
|
| 1309 |
+
groups[pat_key] = []
|
| 1310 |
+
groups[pat_key].append(idx)
|
| 1311 |
+
|
| 1312 |
+
# 5. Process each pattern (Sequential greedy interval selection)
|
| 1313 |
+
for pat_key, indices in groups.items():
|
| 1314 |
+
count = 0
|
| 1315 |
+
last_end = -1
|
| 1316 |
+
|
| 1317 |
+
valid_indices = []
|
| 1318 |
+
for i in indices:
|
| 1319 |
+
if consumed[i]:
|
| 1320 |
+
continue
|
| 1321 |
+
if i >= last_end:
|
| 1322 |
+
count += 1
|
| 1323 |
+
last_end = i + L
|
| 1324 |
+
valid_indices.append(i)
|
| 1325 |
+
|
| 1326 |
+
if count >= 2:
|
| 1327 |
+
# Reconstruct the tuple pattern, casting to native Python int
|
| 1328 |
+
pat = tuple(int(x) for x in arr_cpu[valid_indices[0]:valid_indices[0]+L])
|
| 1329 |
+
result[pat] = count
|
| 1330 |
+
|
| 1331 |
+
# Re-iterate to mark consumed indices
|
| 1332 |
+
last_end = -1
|
| 1333 |
+
for i in indices:
|
| 1334 |
+
if consumed[i]:
|
| 1335 |
+
continue
|
| 1336 |
+
if i >= last_end:
|
| 1337 |
+
for k in range(i, i + L):
|
| 1338 |
+
consumed[k] = True
|
| 1339 |
+
last_end = i + L
|
| 1340 |
+
|
| 1341 |
+
return result
|
| 1342 |
+
|
| 1343 |
+
###################################################################################
|
| 1344 |
+
|
| 1345 |
print('Module is loaded!')
|
| 1346 |
print('Enjoy! :)')
|
| 1347 |
print('=' * 70)
|