asigalov61 commited on
Commit
ea57424
·
verified ·
1 Parent(s): a590766

Upload TMIDIX.py

Browse files
Files changed (1) hide show
  1. TMIDIX.py +268 -13
TMIDIX.py CHANGED
@@ -48,7 +48,7 @@ r'''
48
 
49
  ###################################################################################
50
 
51
- __version__ = "26.4.27" # TMIDIX version
52
 
53
  ###################################################################################
54
 
@@ -1474,6 +1474,12 @@ def _encode(events_lol, unknown_callback=None, never_add_eot=False,
1474
 
1475
  import os
1476
 
 
 
 
 
 
 
1477
  import datetime
1478
 
1479
  from datetime import datetime
@@ -1522,8 +1528,6 @@ import heapq
1522
 
1523
  import matplotlib.pyplot as plt
1524
 
1525
- import psutil
1526
-
1527
  import json
1528
 
1529
  from pathlib import Path
@@ -1539,6 +1543,8 @@ from fnmatch import fnmatch
1539
 
1540
  from typing import List, Optional, Tuple, Dict, Any, Optional, Iterable, Set
1541
 
 
 
1542
  ###################################################################################
1543
  #
1544
  # Original TMIDI Tegridy helper functions
@@ -11795,26 +11801,130 @@ def is_mostly_wide_peaks_and_valleys(values,
11795
  ###################################################################################
11796
 
11797
  def system_memory_utilization(return_dict=False):
 
 
 
 
11798
 
11799
- if return_dict:
11800
- return dict(psutil.virtual_memory()._asdict())
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
11801
 
11802
  else:
11803
- print('RAM memory % used:', psutil.virtual_memory()[2])
11804
- print('RAM Used (GB):', psutil.virtual_memory()[3]/(1024**3))
 
 
 
 
 
 
 
 
 
 
11805
 
11806
  ###################################################################################
11807
 
11808
  def system_cpus_utilization(return_dict=False):
 
 
 
 
11809
 
11810
- if return_dict:
11811
- return {'num_cpus': psutil.cpu_count(),
11812
- 'cpus_util': psutil.cpu_percent()
11813
- }
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
11814
 
11815
  else:
11816
- print('Number of CPUs:', psutil.cpu_count())
11817
- print('CPUs utilization:', psutil.cpu_percent())
 
 
 
 
 
 
 
 
11818
 
11819
  ###################################################################################
11820
 
@@ -18662,6 +18772,151 @@ def escore_notes_run_time(escore_notes, time_idx=1, dur_idx=2):
18662
 
18663
  ###################################################################################
18664
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
18665
  print('Module loaded!')
18666
  print('=' * 70)
18667
  print('Enjoy! :)')
 
48
 
49
  ###################################################################################
50
 
51
+ __version__ = "26.5.13" # TMIDIX version
52
 
53
  ###################################################################################
54
 
 
1474
 
1475
  import os
1476
 
1477
+ import platform
1478
+
1479
+ import ctypes
1480
+
1481
+ import time
1482
+
1483
  import datetime
1484
 
1485
  from datetime import datetime
 
1528
 
1529
  import matplotlib.pyplot as plt
1530
 
 
 
1531
  import json
1532
 
1533
  from pathlib import Path
 
1543
 
1544
  from typing import List, Optional, Tuple, Dict, Any, Optional, Iterable, Set
1545
 
1546
+ import subprocess
1547
+
1548
  ###################################################################################
1549
  #
1550
  # Original TMIDI Tegridy helper functions
 
11801
  ###################################################################################
11802
 
11803
  def system_memory_utilization(return_dict=False):
11804
+ """
11805
+ Cross‑platform memory utilization without psutil.
11806
+ Works on Linux, macOS, Windows.
11807
+ """
11808
 
11809
+ system = platform.system()
11810
+
11811
+ if system == "Linux":
11812
+ # /proc/meminfo is the most reliable
11813
+ meminfo = {}
11814
+ with open("/proc/meminfo") as f:
11815
+ for line in f:
11816
+ key, val = line.split(":")
11817
+ meminfo[key.strip()] = int(val.strip().split()[0]) * 1024 # kB → bytes
11818
+
11819
+ total = meminfo["MemTotal"]
11820
+ available = meminfo.get("MemAvailable", meminfo["MemFree"])
11821
+ used = total - available
11822
+ percent = (used / total) * 100
11823
+
11824
+ elif system == "Darwin": # macOS
11825
+ import subprocess, re
11826
+ vm = subprocess.check_output(["vm_stat"]).decode()
11827
+ pages = {}
11828
+ for line in vm.split("\n"):
11829
+ m = re.match(r"(.+):\s+(\d+)\.", line)
11830
+ if m:
11831
+ pages[m.group(1)] = int(m.group(2))
11832
+
11833
+ page_size = int(subprocess.check_output(["sysctl", "-n", "hw.pagesize"]).decode())
11834
+ total = int(subprocess.check_output(["sysctl", "-n", "hw.memsize"]).decode())
11835
+ free = (pages["Pages free"] + pages["Pages inactive"]) * page_size
11836
+ used = total - free
11837
+ percent = (used / total) * 100
11838
+
11839
+ elif system == "Windows":
11840
+ class MEMORYSTATUSEX(ctypes.Structure):
11841
+ _fields_ = [
11842
+ ("dwLength", ctypes.c_ulong),
11843
+ ("dwMemoryLoad", ctypes.c_ulong),
11844
+ ("ullTotalPhys", ctypes.c_ulonglong),
11845
+ ("ullAvailPhys", ctypes.c_ulonglong),
11846
+ ]
11847
+
11848
+ mem = MEMORYSTATUSEX()
11849
+ mem.dwLength = ctypes.sizeof(MEMORYSTATUSEX)
11850
+ ctypes.windll.kernel32.GlobalMemoryStatusEx(ctypes.byref(mem))
11851
+
11852
+ total = mem.ullTotalPhys
11853
+ available = mem.ullAvailPhys
11854
+ used = total - available
11855
+ percent = mem.dwMemoryLoad
11856
 
11857
  else:
11858
+ raise NotImplementedError(f"Unsupported OS: {system}")
11859
+
11860
+ if return_dict:
11861
+ return {
11862
+ "total_bytes": total,
11863
+ "used_bytes": used,
11864
+ "available_bytes": available,
11865
+ "percent_used": percent,
11866
+ }
11867
+
11868
+ print(f"RAM memory % used: {percent:.2f}")
11869
+ print(f"RAM Used (GB): {used / (1024**3):.2f}")
11870
 
11871
  ###################################################################################
11872
 
11873
  def system_cpus_utilization(return_dict=False):
11874
+ """
11875
+ Cross‑platform CPU utilization without psutil.
11876
+ Uses 1‑second sampling like psutil.cpu_percent(interval=1).
11877
+ """
11878
 
11879
+ system = platform.system()
11880
+ num_cpus = os.cpu_count()
11881
+
11882
+ if system == "Linux":
11883
+ def read_cpu():
11884
+ with open("/proc/stat") as f:
11885
+ fields = f.readline().split()[1:]
11886
+ return list(map(int, fields))
11887
+
11888
+ t1 = read_cpu()
11889
+ time.sleep(1)
11890
+ t2 = read_cpu()
11891
+
11892
+ idle1, idle2 = t1[3], t2[3]
11893
+ total1, total2 = sum(t1), sum(t2)
11894
+
11895
+ idle_delta = idle2 - idle1
11896
+ total_delta = total2 - total1
11897
+
11898
+ cpu_percent = 100 * (1 - idle_delta / total_delta)
11899
+
11900
+ elif system == "Darwin": # macOS
11901
+ def read_cpu():
11902
+ out = subprocess.check_output(["ps", "-A", "-o", "%cpu"]).decode().split()
11903
+ vals = [float(x) for x in out[1:]]
11904
+ return sum(vals)
11905
+
11906
+ cpu1 = read_cpu()
11907
+ time.sleep(1)
11908
+ cpu2 = read_cpu()
11909
+ cpu_percent = min(cpu2 - cpu1, 100.0)
11910
+
11911
+ elif system == "Windows":
11912
+ cmd = ['typeperf', '"\\Processor(_Total)\\% Processor Time"', '-sc', '1']
11913
+ out = subprocess.check_output(cmd).decode()
11914
+ # Last line contains the value
11915
+ cpu_percent = float(out.strip().split("\n")[-1].split(",")[-1].replace('"', ""))
11916
 
11917
  else:
11918
+ raise NotImplementedError(f"Unsupported OS: {system}")
11919
+
11920
+ if return_dict:
11921
+ return {
11922
+ "num_cpus": num_cpus,
11923
+ "cpus_util": cpu_percent,
11924
+ }
11925
+
11926
+ print("Number of CPUs:", num_cpus)
11927
+ print("CPUs utilization:", cpu_percent)
11928
 
11929
  ###################################################################################
11930
 
 
18772
 
18773
  ###################################################################################
18774
 
18775
+ def find_best_ngram_match(
18776
+ src_counter: Counter,
18777
+ counter_pool: List[Counter],
18778
+ ngram_weights: Optional[Dict[int, float]] = None,
18779
+ min_count: int = 2 # Filter out ngrams appearing less than this (removes noise)
18780
+ ) -> Optional[Tuple[int, float, float, float]]:
18781
+ """
18782
+ Finds the best matching Counter using Log-Scaled Weighted Cosine Similarity,
18783
+ with noise filtering to prevent long-tail vocabulary mismatch penalties.
18784
+
18785
+ Args:
18786
+ src_counter: The Counter object to match against.
18787
+ counter_pool: A list of Counter objects to search through.
18788
+ ngram_weights: A dict mapping ngram LENGTHS to importance weights.
18789
+ Example: {1: 1.0, 2: 2.5, 3: 4.0}.
18790
+ If None, all lengths are weighted equally (1.0).
18791
+ min_count: Minimum count required for an ngram to be included in the comparison.
18792
+ Setting to 2 removes "hapax legomena" (count=1 noise), drastically
18793
+ improving profile similarity for text/ngram data.
18794
+
18795
+ Returns:
18796
+ A tuple of (best_matching_index, best_similarity, pool_mean, pool_std)
18797
+ or None if no viable match is found.
18798
+ """
18799
+ if not counter_pool:
18800
+ return None
18801
+
18802
+ # Filter out zero/negative counts
18803
+ src = +src_counter
18804
+ if not src:
18805
+ return None
18806
+
18807
+ if ngram_weights is None:
18808
+ ngram_weights = {}
18809
+
18810
+ # --- FILTERING & LOG SCALING ---
18811
+ # 1. Drop ngrams below min_count (removes the noisy long-tail)
18812
+ # 2. Apply 1 + log(count) to compress the dynamic range
18813
+ # 3. Weight by the LENGTH of the ngram key (e.g., len((1, 6)) == 2)
18814
+ def scale_counter(counter: Counter) -> Dict[tuple, float]:
18815
+ scaled = {}
18816
+ for ngram_key, count in counter.items():
18817
+ if count >= min_count:
18818
+ weight = ngram_weights.get(len(ngram_key), 1.0)
18819
+ scaled[ngram_key] = (1.0 + math.log(count)) * weight
18820
+ return scaled
18821
+
18822
+ src_scaled = scale_counter(src)
18823
+
18824
+ # If source becomes empty after filtering, we can't match
18825
+ if not src_scaled:
18826
+ return None
18827
+
18828
+ src_magnitude = math.sqrt(sum(v ** 2 for v in src_scaled.values()))
18829
+ src_raw_total = sum(v for k, v in src.items() if k in src_scaled) # Tiebreaker magnitude
18830
+
18831
+ best_index = -1
18832
+ best_similarity = -1.0
18833
+ best_cand_raw_total = -1.0
18834
+ best_cand_items = []
18835
+
18836
+ all_similarities = []
18837
+
18838
+ for idx, candidate in enumerate(counter_pool):
18839
+ cand = +candidate
18840
+ if not cand:
18841
+ all_similarities.append(0.0)
18842
+ continue
18843
+
18844
+ cand_scaled = scale_counter(cand)
18845
+
18846
+ if not cand_scaled:
18847
+ all_similarities.append(0.0)
18848
+ continue
18849
+
18850
+ # --- COSINE SIMILARITY ---
18851
+ dot_product = 0.0
18852
+ cand_magnitude_sq = 0.0
18853
+
18854
+ # Iterate over candidate keys to find intersections and calculate magnitude
18855
+ for ngram_key, c_val in cand_scaled.items():
18856
+ cand_magnitude_sq += c_val ** 2
18857
+
18858
+ s_val = src_scaled.get(ngram_key)
18859
+ if s_val is not None:
18860
+ dot_product += s_val * c_val
18861
+
18862
+ cand_magnitude = math.sqrt(cand_magnitude_sq)
18863
+
18864
+ # Calculate Cosine Similarity
18865
+ if cand_magnitude == 0 or src_magnitude == 0:
18866
+ similarity = 0.0
18867
+ else:
18868
+ similarity = dot_product / (src_magnitude * cand_magnitude)
18869
+
18870
+ all_similarities.append(similarity)
18871
+
18872
+ # --- Tiebreaker: Raw Magnitude Distance ---
18873
+ cand_raw_total = sum(v for k, v in cand.items() if k in cand_scaled)
18874
+ magnitude_distance = abs(src_raw_total - cand_raw_total)
18875
+
18876
+ # --- Comparison Logic ---
18877
+ if similarity > best_similarity:
18878
+ best_index = idx
18879
+ best_similarity = similarity
18880
+ best_cand_raw_total = cand_raw_total
18881
+ best_cand_items = sorted(cand.items())
18882
+
18883
+ elif similarity == best_similarity:
18884
+ best_mag_distance = abs(src_raw_total - best_cand_raw_total)
18885
+
18886
+ if magnitude_distance < best_mag_distance:
18887
+ best_index = idx
18888
+ best_cand_raw_total = cand_raw_total
18889
+ best_cand_items = sorted(cand.items())
18890
+
18891
+ elif magnitude_distance == best_mag_distance:
18892
+ current_items = sorted(cand.items())
18893
+ if current_items < best_cand_items:
18894
+ best_index = idx
18895
+ best_cand_raw_total = cand_raw_total
18896
+ best_cand_items = current_items
18897
+
18898
+ if best_index == -1:
18899
+ return None
18900
+
18901
+ # --- Calculate Overall Pool Statistics ---
18902
+ valid_sim_count = len(all_similarities)
18903
+
18904
+ if valid_sim_count > 0:
18905
+ pool_mean = sum(all_similarities) / valid_sim_count
18906
+
18907
+ if valid_sim_count > 1:
18908
+ variance = sum((s - pool_mean) ** 2 for s in all_similarities) / (valid_sim_count - 1)
18909
+ pool_std = math.sqrt(variance)
18910
+ else:
18911
+ pool_std = 0.0
18912
+ else:
18913
+ pool_mean = 0.0
18914
+ pool_std = 0.0
18915
+
18916
+ return best_index, best_similarity, pool_mean, pool_std
18917
+
18918
+ ###################################################################################
18919
+
18920
  print('Module loaded!')
18921
  print('=' * 70)
18922
  print('Enjoy! :)')