| import json |
|
|
| |
| RULES = [ |
| (1, [ |
| 'data/output_GRPO_correctdata', |
| 'output_correct_pause_500_1000', |
| 'output_correct', |
| 'output_correct_pause_500_1000', |
| ]), |
| (2, [ |
| 'output_GRPO_overlap_800', |
| 'output_2000_3000_wrongpause', |
| ]), |
| (3, [ |
| 'output_GRPO_silence_500', |
| 'output_wrong_laugh', |
| ]), |
| (4, [ |
| 'output_GRPO-texterror_800', |
| 'output_text_error_dialog_10002000', |
| ]), |
| ] |
|
|
| def classify(audio_url): |
| for solution, substrings in RULES: |
| for sub in substrings: |
| if sub in audio_url: |
| return solution |
| return None |
|
|
| def main(): |
| json_path = 'merged_test_list.json' |
| output_path = 'merged_test_list_classified.json' |
| with open(json_path, 'r', encoding='utf-8') as f: |
| data = json.load(f) |
|
|
| for item in data: |
| audio_url = item.get('audio_url', '') |
| solution = classify(audio_url) |
| if solution is not None: |
| item['solution'] = solution |
|
|
| with open(output_path, 'w', encoding='utf-8') as f: |
| json.dump(data, f, ensure_ascii=False, indent=2) |
|
|
| if __name__ == '__main__': |
| main() |
|
|