Upload TMIDIX.py
Browse files
TMIDIX.py
CHANGED
|
@@ -48,7 +48,7 @@ r'''
|
|
| 48 |
|
| 49 |
###################################################################################
|
| 50 |
|
| 51 |
-
__version__ = "26.
|
| 52 |
|
| 53 |
###################################################################################
|
| 54 |
|
|
@@ -1474,6 +1474,12 @@ def _encode(events_lol, unknown_callback=None, never_add_eot=False,
|
|
| 1474 |
|
| 1475 |
import os
|
| 1476 |
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 1477 |
import datetime
|
| 1478 |
|
| 1479 |
from datetime import datetime
|
|
@@ -1522,8 +1528,6 @@ import heapq
|
|
| 1522 |
|
| 1523 |
import matplotlib.pyplot as plt
|
| 1524 |
|
| 1525 |
-
import psutil
|
| 1526 |
-
|
| 1527 |
import json
|
| 1528 |
|
| 1529 |
from pathlib import Path
|
|
@@ -1539,6 +1543,8 @@ from fnmatch import fnmatch
|
|
| 1539 |
|
| 1540 |
from typing import List, Optional, Tuple, Dict, Any, Optional, Iterable, Set
|
| 1541 |
|
|
|
|
|
|
|
| 1542 |
###################################################################################
|
| 1543 |
#
|
| 1544 |
# Original TMIDI Tegridy helper functions
|
|
@@ -11795,26 +11801,130 @@ def is_mostly_wide_peaks_and_valleys(values,
|
|
| 11795 |
###################################################################################
|
| 11796 |
|
| 11797 |
def system_memory_utilization(return_dict=False):
|
|
|
|
|
|
|
|
|
|
|
|
|
| 11798 |
|
| 11799 |
-
|
| 11800 |
-
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 11801 |
|
| 11802 |
else:
|
| 11803 |
-
|
| 11804 |
-
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 11805 |
|
| 11806 |
###################################################################################
|
| 11807 |
|
| 11808 |
def system_cpus_utilization(return_dict=False):
|
|
|
|
|
|
|
|
|
|
|
|
|
| 11809 |
|
| 11810 |
-
|
| 11811 |
-
|
| 11812 |
-
|
| 11813 |
-
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 11814 |
|
| 11815 |
else:
|
| 11816 |
-
|
| 11817 |
-
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 11818 |
|
| 11819 |
###################################################################################
|
| 11820 |
|
|
@@ -18662,6 +18772,151 @@ def escore_notes_run_time(escore_notes, time_idx=1, dur_idx=2):
|
|
| 18662 |
|
| 18663 |
###################################################################################
|
| 18664 |
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 18665 |
print('Module loaded!')
|
| 18666 |
print('=' * 70)
|
| 18667 |
print('Enjoy! :)')
|
|
|
|
| 48 |
|
| 49 |
###################################################################################
|
| 50 |
|
| 51 |
+
__version__ = "26.5.13" # TMIDIX version
|
| 52 |
|
| 53 |
###################################################################################
|
| 54 |
|
|
|
|
| 1474 |
|
| 1475 |
import os
|
| 1476 |
|
| 1477 |
+
import platform
|
| 1478 |
+
|
| 1479 |
+
import ctypes
|
| 1480 |
+
|
| 1481 |
+
import time
|
| 1482 |
+
|
| 1483 |
import datetime
|
| 1484 |
|
| 1485 |
from datetime import datetime
|
|
|
|
| 1528 |
|
| 1529 |
import matplotlib.pyplot as plt
|
| 1530 |
|
|
|
|
|
|
|
| 1531 |
import json
|
| 1532 |
|
| 1533 |
from pathlib import Path
|
|
|
|
| 1543 |
|
| 1544 |
from typing import List, Optional, Tuple, Dict, Any, Optional, Iterable, Set
|
| 1545 |
|
| 1546 |
+
import subprocess
|
| 1547 |
+
|
| 1548 |
###################################################################################
|
| 1549 |
#
|
| 1550 |
# Original TMIDI Tegridy helper functions
|
|
|
|
| 11801 |
###################################################################################
|
| 11802 |
|
| 11803 |
def system_memory_utilization(return_dict=False):
|
| 11804 |
+
"""
|
| 11805 |
+
Cross‑platform memory utilization without psutil.
|
| 11806 |
+
Works on Linux, macOS, Windows.
|
| 11807 |
+
"""
|
| 11808 |
|
| 11809 |
+
system = platform.system()
|
| 11810 |
+
|
| 11811 |
+
if system == "Linux":
|
| 11812 |
+
# /proc/meminfo is the most reliable
|
| 11813 |
+
meminfo = {}
|
| 11814 |
+
with open("/proc/meminfo") as f:
|
| 11815 |
+
for line in f:
|
| 11816 |
+
key, val = line.split(":")
|
| 11817 |
+
meminfo[key.strip()] = int(val.strip().split()[0]) * 1024 # kB → bytes
|
| 11818 |
+
|
| 11819 |
+
total = meminfo["MemTotal"]
|
| 11820 |
+
available = meminfo.get("MemAvailable", meminfo["MemFree"])
|
| 11821 |
+
used = total - available
|
| 11822 |
+
percent = (used / total) * 100
|
| 11823 |
+
|
| 11824 |
+
elif system == "Darwin": # macOS
|
| 11825 |
+
import subprocess, re
|
| 11826 |
+
vm = subprocess.check_output(["vm_stat"]).decode()
|
| 11827 |
+
pages = {}
|
| 11828 |
+
for line in vm.split("\n"):
|
| 11829 |
+
m = re.match(r"(.+):\s+(\d+)\.", line)
|
| 11830 |
+
if m:
|
| 11831 |
+
pages[m.group(1)] = int(m.group(2))
|
| 11832 |
+
|
| 11833 |
+
page_size = int(subprocess.check_output(["sysctl", "-n", "hw.pagesize"]).decode())
|
| 11834 |
+
total = int(subprocess.check_output(["sysctl", "-n", "hw.memsize"]).decode())
|
| 11835 |
+
free = (pages["Pages free"] + pages["Pages inactive"]) * page_size
|
| 11836 |
+
used = total - free
|
| 11837 |
+
percent = (used / total) * 100
|
| 11838 |
+
|
| 11839 |
+
elif system == "Windows":
|
| 11840 |
+
class MEMORYSTATUSEX(ctypes.Structure):
|
| 11841 |
+
_fields_ = [
|
| 11842 |
+
("dwLength", ctypes.c_ulong),
|
| 11843 |
+
("dwMemoryLoad", ctypes.c_ulong),
|
| 11844 |
+
("ullTotalPhys", ctypes.c_ulonglong),
|
| 11845 |
+
("ullAvailPhys", ctypes.c_ulonglong),
|
| 11846 |
+
]
|
| 11847 |
+
|
| 11848 |
+
mem = MEMORYSTATUSEX()
|
| 11849 |
+
mem.dwLength = ctypes.sizeof(MEMORYSTATUSEX)
|
| 11850 |
+
ctypes.windll.kernel32.GlobalMemoryStatusEx(ctypes.byref(mem))
|
| 11851 |
+
|
| 11852 |
+
total = mem.ullTotalPhys
|
| 11853 |
+
available = mem.ullAvailPhys
|
| 11854 |
+
used = total - available
|
| 11855 |
+
percent = mem.dwMemoryLoad
|
| 11856 |
|
| 11857 |
else:
|
| 11858 |
+
raise NotImplementedError(f"Unsupported OS: {system}")
|
| 11859 |
+
|
| 11860 |
+
if return_dict:
|
| 11861 |
+
return {
|
| 11862 |
+
"total_bytes": total,
|
| 11863 |
+
"used_bytes": used,
|
| 11864 |
+
"available_bytes": available,
|
| 11865 |
+
"percent_used": percent,
|
| 11866 |
+
}
|
| 11867 |
+
|
| 11868 |
+
print(f"RAM memory % used: {percent:.2f}")
|
| 11869 |
+
print(f"RAM Used (GB): {used / (1024**3):.2f}")
|
| 11870 |
|
| 11871 |
###################################################################################
|
| 11872 |
|
| 11873 |
def system_cpus_utilization(return_dict=False):
|
| 11874 |
+
"""
|
| 11875 |
+
Cross‑platform CPU utilization without psutil.
|
| 11876 |
+
Uses 1‑second sampling like psutil.cpu_percent(interval=1).
|
| 11877 |
+
"""
|
| 11878 |
|
| 11879 |
+
system = platform.system()
|
| 11880 |
+
num_cpus = os.cpu_count()
|
| 11881 |
+
|
| 11882 |
+
if system == "Linux":
|
| 11883 |
+
def read_cpu():
|
| 11884 |
+
with open("/proc/stat") as f:
|
| 11885 |
+
fields = f.readline().split()[1:]
|
| 11886 |
+
return list(map(int, fields))
|
| 11887 |
+
|
| 11888 |
+
t1 = read_cpu()
|
| 11889 |
+
time.sleep(1)
|
| 11890 |
+
t2 = read_cpu()
|
| 11891 |
+
|
| 11892 |
+
idle1, idle2 = t1[3], t2[3]
|
| 11893 |
+
total1, total2 = sum(t1), sum(t2)
|
| 11894 |
+
|
| 11895 |
+
idle_delta = idle2 - idle1
|
| 11896 |
+
total_delta = total2 - total1
|
| 11897 |
+
|
| 11898 |
+
cpu_percent = 100 * (1 - idle_delta / total_delta)
|
| 11899 |
+
|
| 11900 |
+
elif system == "Darwin": # macOS
|
| 11901 |
+
def read_cpu():
|
| 11902 |
+
out = subprocess.check_output(["ps", "-A", "-o", "%cpu"]).decode().split()
|
| 11903 |
+
vals = [float(x) for x in out[1:]]
|
| 11904 |
+
return sum(vals)
|
| 11905 |
+
|
| 11906 |
+
cpu1 = read_cpu()
|
| 11907 |
+
time.sleep(1)
|
| 11908 |
+
cpu2 = read_cpu()
|
| 11909 |
+
cpu_percent = min(cpu2 - cpu1, 100.0)
|
| 11910 |
+
|
| 11911 |
+
elif system == "Windows":
|
| 11912 |
+
cmd = ['typeperf', '"\\Processor(_Total)\\% Processor Time"', '-sc', '1']
|
| 11913 |
+
out = subprocess.check_output(cmd).decode()
|
| 11914 |
+
# Last line contains the value
|
| 11915 |
+
cpu_percent = float(out.strip().split("\n")[-1].split(",")[-1].replace('"', ""))
|
| 11916 |
|
| 11917 |
else:
|
| 11918 |
+
raise NotImplementedError(f"Unsupported OS: {system}")
|
| 11919 |
+
|
| 11920 |
+
if return_dict:
|
| 11921 |
+
return {
|
| 11922 |
+
"num_cpus": num_cpus,
|
| 11923 |
+
"cpus_util": cpu_percent,
|
| 11924 |
+
}
|
| 11925 |
+
|
| 11926 |
+
print("Number of CPUs:", num_cpus)
|
| 11927 |
+
print("CPUs utilization:", cpu_percent)
|
| 11928 |
|
| 11929 |
###################################################################################
|
| 11930 |
|
|
|
|
| 18772 |
|
| 18773 |
###################################################################################
|
| 18774 |
|
| 18775 |
+
def find_best_ngram_match(
|
| 18776 |
+
src_counter: Counter,
|
| 18777 |
+
counter_pool: List[Counter],
|
| 18778 |
+
ngram_weights: Optional[Dict[int, float]] = None,
|
| 18779 |
+
min_count: int = 2 # Filter out ngrams appearing less than this (removes noise)
|
| 18780 |
+
) -> Optional[Tuple[int, float, float, float]]:
|
| 18781 |
+
"""
|
| 18782 |
+
Finds the best matching Counter using Log-Scaled Weighted Cosine Similarity,
|
| 18783 |
+
with noise filtering to prevent long-tail vocabulary mismatch penalties.
|
| 18784 |
+
|
| 18785 |
+
Args:
|
| 18786 |
+
src_counter: The Counter object to match against.
|
| 18787 |
+
counter_pool: A list of Counter objects to search through.
|
| 18788 |
+
ngram_weights: A dict mapping ngram LENGTHS to importance weights.
|
| 18789 |
+
Example: {1: 1.0, 2: 2.5, 3: 4.0}.
|
| 18790 |
+
If None, all lengths are weighted equally (1.0).
|
| 18791 |
+
min_count: Minimum count required for an ngram to be included in the comparison.
|
| 18792 |
+
Setting to 2 removes "hapax legomena" (count=1 noise), drastically
|
| 18793 |
+
improving profile similarity for text/ngram data.
|
| 18794 |
+
|
| 18795 |
+
Returns:
|
| 18796 |
+
A tuple of (best_matching_index, best_similarity, pool_mean, pool_std)
|
| 18797 |
+
or None if no viable match is found.
|
| 18798 |
+
"""
|
| 18799 |
+
if not counter_pool:
|
| 18800 |
+
return None
|
| 18801 |
+
|
| 18802 |
+
# Filter out zero/negative counts
|
| 18803 |
+
src = +src_counter
|
| 18804 |
+
if not src:
|
| 18805 |
+
return None
|
| 18806 |
+
|
| 18807 |
+
if ngram_weights is None:
|
| 18808 |
+
ngram_weights = {}
|
| 18809 |
+
|
| 18810 |
+
# --- FILTERING & LOG SCALING ---
|
| 18811 |
+
# 1. Drop ngrams below min_count (removes the noisy long-tail)
|
| 18812 |
+
# 2. Apply 1 + log(count) to compress the dynamic range
|
| 18813 |
+
# 3. Weight by the LENGTH of the ngram key (e.g., len((1, 6)) == 2)
|
| 18814 |
+
def scale_counter(counter: Counter) -> Dict[tuple, float]:
|
| 18815 |
+
scaled = {}
|
| 18816 |
+
for ngram_key, count in counter.items():
|
| 18817 |
+
if count >= min_count:
|
| 18818 |
+
weight = ngram_weights.get(len(ngram_key), 1.0)
|
| 18819 |
+
scaled[ngram_key] = (1.0 + math.log(count)) * weight
|
| 18820 |
+
return scaled
|
| 18821 |
+
|
| 18822 |
+
src_scaled = scale_counter(src)
|
| 18823 |
+
|
| 18824 |
+
# If source becomes empty after filtering, we can't match
|
| 18825 |
+
if not src_scaled:
|
| 18826 |
+
return None
|
| 18827 |
+
|
| 18828 |
+
src_magnitude = math.sqrt(sum(v ** 2 for v in src_scaled.values()))
|
| 18829 |
+
src_raw_total = sum(v for k, v in src.items() if k in src_scaled) # Tiebreaker magnitude
|
| 18830 |
+
|
| 18831 |
+
best_index = -1
|
| 18832 |
+
best_similarity = -1.0
|
| 18833 |
+
best_cand_raw_total = -1.0
|
| 18834 |
+
best_cand_items = []
|
| 18835 |
+
|
| 18836 |
+
all_similarities = []
|
| 18837 |
+
|
| 18838 |
+
for idx, candidate in enumerate(counter_pool):
|
| 18839 |
+
cand = +candidate
|
| 18840 |
+
if not cand:
|
| 18841 |
+
all_similarities.append(0.0)
|
| 18842 |
+
continue
|
| 18843 |
+
|
| 18844 |
+
cand_scaled = scale_counter(cand)
|
| 18845 |
+
|
| 18846 |
+
if not cand_scaled:
|
| 18847 |
+
all_similarities.append(0.0)
|
| 18848 |
+
continue
|
| 18849 |
+
|
| 18850 |
+
# --- COSINE SIMILARITY ---
|
| 18851 |
+
dot_product = 0.0
|
| 18852 |
+
cand_magnitude_sq = 0.0
|
| 18853 |
+
|
| 18854 |
+
# Iterate over candidate keys to find intersections and calculate magnitude
|
| 18855 |
+
for ngram_key, c_val in cand_scaled.items():
|
| 18856 |
+
cand_magnitude_sq += c_val ** 2
|
| 18857 |
+
|
| 18858 |
+
s_val = src_scaled.get(ngram_key)
|
| 18859 |
+
if s_val is not None:
|
| 18860 |
+
dot_product += s_val * c_val
|
| 18861 |
+
|
| 18862 |
+
cand_magnitude = math.sqrt(cand_magnitude_sq)
|
| 18863 |
+
|
| 18864 |
+
# Calculate Cosine Similarity
|
| 18865 |
+
if cand_magnitude == 0 or src_magnitude == 0:
|
| 18866 |
+
similarity = 0.0
|
| 18867 |
+
else:
|
| 18868 |
+
similarity = dot_product / (src_magnitude * cand_magnitude)
|
| 18869 |
+
|
| 18870 |
+
all_similarities.append(similarity)
|
| 18871 |
+
|
| 18872 |
+
# --- Tiebreaker: Raw Magnitude Distance ---
|
| 18873 |
+
cand_raw_total = sum(v for k, v in cand.items() if k in cand_scaled)
|
| 18874 |
+
magnitude_distance = abs(src_raw_total - cand_raw_total)
|
| 18875 |
+
|
| 18876 |
+
# --- Comparison Logic ---
|
| 18877 |
+
if similarity > best_similarity:
|
| 18878 |
+
best_index = idx
|
| 18879 |
+
best_similarity = similarity
|
| 18880 |
+
best_cand_raw_total = cand_raw_total
|
| 18881 |
+
best_cand_items = sorted(cand.items())
|
| 18882 |
+
|
| 18883 |
+
elif similarity == best_similarity:
|
| 18884 |
+
best_mag_distance = abs(src_raw_total - best_cand_raw_total)
|
| 18885 |
+
|
| 18886 |
+
if magnitude_distance < best_mag_distance:
|
| 18887 |
+
best_index = idx
|
| 18888 |
+
best_cand_raw_total = cand_raw_total
|
| 18889 |
+
best_cand_items = sorted(cand.items())
|
| 18890 |
+
|
| 18891 |
+
elif magnitude_distance == best_mag_distance:
|
| 18892 |
+
current_items = sorted(cand.items())
|
| 18893 |
+
if current_items < best_cand_items:
|
| 18894 |
+
best_index = idx
|
| 18895 |
+
best_cand_raw_total = cand_raw_total
|
| 18896 |
+
best_cand_items = current_items
|
| 18897 |
+
|
| 18898 |
+
if best_index == -1:
|
| 18899 |
+
return None
|
| 18900 |
+
|
| 18901 |
+
# --- Calculate Overall Pool Statistics ---
|
| 18902 |
+
valid_sim_count = len(all_similarities)
|
| 18903 |
+
|
| 18904 |
+
if valid_sim_count > 0:
|
| 18905 |
+
pool_mean = sum(all_similarities) / valid_sim_count
|
| 18906 |
+
|
| 18907 |
+
if valid_sim_count > 1:
|
| 18908 |
+
variance = sum((s - pool_mean) ** 2 for s in all_similarities) / (valid_sim_count - 1)
|
| 18909 |
+
pool_std = math.sqrt(variance)
|
| 18910 |
+
else:
|
| 18911 |
+
pool_std = 0.0
|
| 18912 |
+
else:
|
| 18913 |
+
pool_mean = 0.0
|
| 18914 |
+
pool_std = 0.0
|
| 18915 |
+
|
| 18916 |
+
return best_index, best_similarity, pool_mean, pool_std
|
| 18917 |
+
|
| 18918 |
+
###################################################################################
|
| 18919 |
+
|
| 18920 |
print('Module loaded!')
|
| 18921 |
print('=' * 70)
|
| 18922 |
print('Enjoy! :)')
|