import urllib.request import json import pathlib import os import re from typing import Optional def read_json_url_or_path(src: str) -> dict: if src.startswith(("http://", "https://")): with urllib.request.urlopen(src, timeout=60) as r: return json.load(r) p = pathlib.Path(src).expanduser().resolve() with p.open("r", encoding="utf-8") as f: return json.load(f) # an "issues" directory containing "{numerical_id}-{instance_id}/logs/{timestamp}/run/{instance_id}/generator/{generator_id}/" and within it "notice.log" and "debug.log" def find_failed_trajectory_in_issues_directory(instance_id: str, experiments_dir: str) -> Optional[str]: """ Find a failed trajectory for the given instance within the experiments directory. The function searches for debug.log files that contain successful patches and returns the path to the corresponding notice.log file. The function searches through the directory structure: {experiments_dir}/{numerical_id}-{instance_id}/logs/{timestamp}/run/{instance_id}/generator/{generator_id}/ """ if not os.path.exists(experiments_dir): return None pattern = re.compile(r'^\d+-' + re.escape(instance_id) + r'$') for item in os.listdir(experiments_dir): if pattern.match(item): instance_dir = os.path.join(experiments_dir, item) if not os.path.isdir(instance_dir): continue logs_dir = os.path.join(instance_dir, 'logs') if not os.path.exists(logs_dir): continue timestamp_dirs = [] for timestamp in os.listdir(logs_dir): timestamp_path = os.path.join(logs_dir, timestamp) if os.path.isdir(timestamp_path): timestamp_dirs.append(timestamp) timestamp_dirs.sort(reverse=True) for timestamp in timestamp_dirs: timestamp_path = os.path.join(logs_dir, timestamp) run_dir = os.path.join(timestamp_path, 'run', instance_id, 'generator') if not os.path.exists(run_dir): continue for generator_id in range(5): generator_dir = os.path.join(run_dir, f"{generator_id:03d}") debug_log_path = os.path.join(generator_dir, 'debug.log') notice_log_path = os.path.join(generator_dir, 'notice.log') if not os.path.exists(debug_log_path): continue if _contains_successful_patch(debug_log_path): if os.path.exists(notice_log_path): return os.path.abspath(notice_log_path) return None def _contains_successful_patch(debug_log_path: str) -> bool: """ Check if a debug.log file contains a successful patch. Reads the last non-empty line, splits on 'result_data: ', parses the dict, and checks if the golden_patch contains non-empty patch_content. """ try: with open(debug_log_path, 'r', encoding='utf-8') as f: lines = f.readlines() last_line = None for line in reversed(lines): if line.strip(): last_line = line.strip() break if not last_line: return False if 'result_data: ' not in last_line: return False parts = last_line.split('result_data: ', 1) if len(parts) != 2: return False dict_str = parts[1] try: result_data = eval(dict_str) except: return False if not isinstance(result_data, dict): return False golden_patch = result_data.get('golden_patch') if not isinstance(golden_patch, list) or len(golden_patch) == 0: return False first_patch = golden_patch[0] if not isinstance(first_patch, dict): return False patch_content = first_patch.get('patch_content') if not isinstance(patch_content, str) or not patch_content.strip(): return False return True except (IOError, UnicodeDecodeError, Exception): return False