From 9cd89a6965cbaee16d7f8c66793dac99b629c76a Mon Sep 17 00:00:00 2001 From: Tina Xia Date: Fri, 14 Mar 2025 14:02:02 -0400 Subject: [PATCH 1/4] adding all JDK API changes report scripts --- report-generation/README.md | 55 +++++++ report-generation/generate-csv.py | 62 ++++++++ report-generation/generate-json.py | 154 ++++++++++++++++++ report-generation/generate-report.py | 184 ++++++++++++++++++++++ report-generation/perform-minimization.py | 82 ++++++++++ 5 files changed, 537 insertions(+) create mode 100644 report-generation/README.md create mode 100644 report-generation/generate-csv.py create mode 100644 report-generation/generate-json.py create mode 100644 report-generation/generate-report.py create mode 100644 report-generation/perform-minimization.py diff --git a/report-generation/README.md b/report-generation/README.md new file mode 100644 index 00000000000..f326e0197b1 --- /dev/null +++ b/report-generation/README.md @@ -0,0 +1,55 @@ +JDK API Changes Report Generation + +Tina Xia, tzxia@uwaterloo.ca for issues or questions +Friday, March 14, 2025 + +This project automates the review process for newly introduced methods across subsequent JDK versions. + +----------- + +Configuration + +PERFORMING MINIMIZATION AND STORING RESULTS +1. Create a sub-directory called "results" in the "report-generation" directory + +2. Update the absolute paths in perform-minimization.py to match your environment: + +JDK_REPO_PATH: path to your local JDK repository. +CHECKER_FRAMEWORK_REPO_PATH: path to your local Checker Framework repository. +RESULTS_BASE_DIR: path to the "results" sub-directory you just created + +3. run minimization script: +- python3 perform-minimization.py +- This will generate and store the minimized files for each jdk version in "results" + +GENERATE DIFF JSONS +4. run JSON generation script: +- python3 generate-json.py +- This will compare and store differences between files in a sub-directory called json_files in a directory called "report-files" +- This script can be modified to only generate JSON's for a defined sub-set of minimized files by altering the versions array in the processing portion + +GENERATE CSVs +5. run CSV generation script: +- python3 generate-csv.py +- This will generate a csv tracker for the review status of classes by iterating over the JSON files generated in the previous step +- NOTE: when introducing a new jdk version, this file must be modified to generate a CSV for ONLY the newly introduced jdk version, or past CSVs will be completely reset. + +GENERATE HTML Report +6. run HTML report generation script: +Finally, run the HTML report generator to create a browsable report using Bootstrap styling. This report generates an index and a seperate html page for each jdk version, and opens the report in your browser. + +- python3 generate-report.py +- this creates and opens a report in your browser, loading in information from the CSVs and JSONs generated in the previous step + +----------- + +Use + +Once a method has been reviewed, updated the final column in each row "checked" by changing the entry from "False" to "True". The next time the report generation script is run, a tag will appear on the report that indicates its updated status. You must re-run the report generation script "python3 generate-report.py" to see the changes. + +----------- + +Future Updates + +New JDK Versions: +When a new JDK version is released, update the branch list in the scripts to only include the newly introuced versions. This ensures that processing a new branch does not overwrite progress in previous versions. \ No newline at end of file diff --git a/report-generation/generate-csv.py b/report-generation/generate-csv.py new file mode 100644 index 00000000000..de9c4df266d --- /dev/null +++ b/report-generation/generate-csv.py @@ -0,0 +1,62 @@ +import json +import os +import csv + +csv_folder = "../report-files/csv_reports" +os.makedirs(csv_folder, exist_ok=True) + +data = {} +json_folder = "../report-files/json_files" +os.makedirs(csv_folder, exist_ok=True) + +json_files = [f for f in os.listdir(json_folder) if f.endswith(".json")] + +#this list must be modified for newly added jdk versions, so that past versions are not overwitten with every method review status set to False +versions_to_process = ['jdk-18', 'jdk-19', 'jdk-20', 'jdk-21', 'jdk-22', 'jdk-23', 'jdk-24'] + +for filename in os.listdir(json_folder): + if filename.endswith(".json"): + version = os.path.splitext(filename)[0] + #processes only the versions in the versions_to_process list + if version not in versions_to_process: + continue + file_path = os.path.join(json_folder, filename) + with open(file_path, "r", encoding="utf-8") as f: + file_data = json.load(f) + data[version] = file_data[version] + +for version in data: + #gets current csv path + current_csv = os.path.join(csv_folder, f"{version}_methods.csv") + with open(current_csv, "w", newline='', encoding="utf-8") as csvfile: + + fieldnames = ["version", "file_url", "method", "checked"] + + writer = csv.DictWriter(csvfile, fieldnames=fieldnames) + writer.writeheader() + + #since methods are grouped by file_url in the methods json + for file_url, file_data in data[version].items(): + new_changes = file_data.get("new_changes", {}) + new_classes = new_changes.get("new_classes", []) + new_methods = new_changes.get("new_methods", []) + + writer.writerow({ + "version": version, + "file_url": file_url, + "checked": "False" + }) + + # Write new class methods + for cls in new_classes: + declaration = cls.get("declaration") + + #store checked status for newly introduced classes + writer.writerow({ + "version": version, + "file_url": file_url, + "method": cls.get("declaration"), + "checked": "False" + }) + + print(f"Created csv for {version}") diff --git a/report-generation/generate-json.py b/report-generation/generate-json.py new file mode 100644 index 00000000000..a5af73932f2 --- /dev/null +++ b/report-generation/generate-json.py @@ -0,0 +1,154 @@ +import os +import json +import difflib +import re + +#matches for empty lines, "import", any comment "/*", and closing braces (}) +pattern = re.compile(r'^(?:\s*$|import\b|.*\/\*.*|\s*\*|\s*\})') + +def extract_new_changes(old_lines, new_lines): + + #get list of differences + diff = list(difflib.ndiff(old_lines, new_lines)) + new_changes = [] #list of tuples (new_line_index, content) + new_index = 0 #get index to check whether method belongs to a new class + + for line in diff: + if line.startswith('+ '): + content = line[2:].strip() + if not pattern.match(content): + new_changes.append((new_index, content)) + + #increase line counter + if line.startswith(' ') or line.startswith('+ '): + new_index += 1 + + #creates a list to get the position of every class + class_positions = [] + for i, line in enumerate(new_lines): + + if re.search(r'\bclass\b', line): + class_positions.append((i, line.strip())) + + new_classes = {} + #for new methods that have been added to an existing class + new_methods = [] + + for idx, content in new_changes: + if re.search(r'\bclass\b', content): + new_classes[idx] = {"declaration": content, "methods": []} + + for idx, content in new_changes: + if re.search(r'\bclass\b', content): + continue #already handled + + parent_class_idx = None + for pos, decl in class_positions: + if pos <= idx: + parent_class_idx = pos + else: + break + + if parent_class_idx is not None and parent_class_idx in new_classes: + new_classes[parent_class_idx]["methods"].append(content) + else: + new_methods.append(content) + + return { + "new_classes": list(new_classes.values()), + "new_methods": new_methods + } + +#https://stackoverflow.com/questions/8625991/use-python-os-walk-to-identify-a-list-of-files +#uses os.walk to get all java files in every subdirectory of results (after minimization is performed) +def list_java_files(directory): + java_files = [] + for root, _, files in os.walk(directory): + for file in files: + full_path = os.path.join(root, file) + relative_path = os.path.relpath(full_path, directory) + java_files.append(relative_path) + + #list of relative path of every file + return java_files + +def read_file_lines(filepath): + + with open(filepath, 'r', encoding='utf-8') as f: + return f.readlines() + +#https://docs.python.org/3/library/difflib.html +#uses python's difflib to compare two lines at a time +def extract_new_lines(old_lines, new_lines): + + diff = difflib.ndiff(old_lines, new_lines) + newly_added = [] + + for line in diff: + #new lines will start with + + if line.startswith('+ '): + content = line[2:].strip() + #makes sure a new line passes regex check before appending + if not pattern.match(content): + newly_added.append(content) + return newly_added + +#returns a dictionary of new changes between subsequent JDK versions +def compare_version_pair(old_version_dir, new_version_dir): + new_files = list_java_files(new_version_dir) + file_diff = {} + + for rel_path in new_files: + #generates github URL + branch = os.path.basename(new_version_dir) + url = f"https://github.com/eisop/jdk/tree/{branch}" + parts = rel_path.split(os.path.sep) + url = url + '/' + '/'.join(parts[1:]) + + new_file_path = os.path.join(new_version_dir, rel_path) + old_file_path = os.path.join(old_version_dir, rel_path) + + if os.path.exists(old_file_path): + old_lines = read_file_lines(old_file_path) + else: + old_lines = [] + + new_lines = read_file_lines(new_file_path) + + changes = extract_new_changes(old_lines, new_lines) + if changes["new_classes"] or changes["new_methods"]: + file_diff[url] = { + "new_changes": changes + } + return file_diff + +#this list must be modified for newly added jdk versions +versions = ['master', 'jdk-18', 'jdk-19', 'jdk-20', 'jdk-21', 'jdk-22', 'jdk-23', 'jdk-24'] + +base_path = './results/' + +json_folder = "../report-files/json_files" +os.makedirs(json_folder, exist_ok=True) + +#compares all subsequent versions, creates a seperate json file for each version pair +for i in range(1, len(versions)): + + results = {} + + old_version = versions[i - 1] + new_version = versions[i] + + version_pair_key = new_version + + old_dir = os.path.join(base_path, old_version) + new_dir = os.path.join(base_path, new_version) + + current_diff = compare_version_pair(old_dir, new_dir) + results[version_pair_key] = current_diff + + current_json = os.path.join(json_folder, f"{versions[i]}.json") + + with open(current_json, 'w', encoding='utf-8') as out_f: + json.dump(results, out_f, indent=2) + + print(f"Created json for {old_version} -> {new_version}") \ No newline at end of file diff --git a/report-generation/generate-report.py b/report-generation/generate-report.py new file mode 100644 index 00000000000..3e934e030f4 --- /dev/null +++ b/report-generation/generate-report.py @@ -0,0 +1,184 @@ +import json +import os +import csv +import webbrowser +from html import escape + +data = {} +json_folder = "../report-files/json_files" + +#loads data from all present json files +for filename in os.listdir(json_folder): + if filename.endswith(".json"): + version = os.path.splitext(filename)[0] + file_path = os.path.join(json_folder, filename) + with open(file_path, "r", encoding="utf-8") as f: + file_data = json.load(f) + data[version] = file_data[version] + +checked_methods = {} +# If your CSV files are now directly in the report-files directory, +# update the csv_folder accordingly (remove the subfolder name). +csv_folder = "../report-files/csv_reports" + +#loads the review status from the csv files +for version in data: + csv_file = os.path.join(csv_folder, f"{version}_methods.csv") + if os.path.exists(csv_file): + with open(csv_file, "r", encoding="utf-8") as f_csv: + reader = csv.DictReader(f_csv) + for row in reader: + key = (row["version"], row["file_url"], row["method"]) + checked_methods[key] = (row["checked"].strip().lower() == "true") + +# Set the output directory to be a subfolder inside report-files +output_dir = os.path.join("..", "report-files", "reports") +os.makedirs(output_dir, exist_ok=True) + +custom_css = """ + +""" + +# HTML for the index page +index_content = f""" + + + + + JDK API Changes + + {custom_css} + + +
+

JDK API Changes

+ +
+ + + +""" +#index page into the output_dir (report-files/reports) +index_path = os.path.join(output_dir, "index.html") +with open(index_path, "w", encoding="utf-8") as f_index: + f_index.write(index_content) + +webbrowser.open("file://" + os.path.abspath(index_path)) diff --git a/report-generation/perform-minimization.py b/report-generation/perform-minimization.py new file mode 100644 index 00000000000..adc013dab12 --- /dev/null +++ b/report-generation/perform-minimization.py @@ -0,0 +1,82 @@ +import os +import subprocess +import shutil +import platform + +#update these absolute paths for your environment +JDK_REPO_PATH = "/Users/tinaxia/Desktop/eisop/jdk" +CHECKER_FRAMEWORK_REPO_PATH = "/Users/tinaxia/Desktop/eisop/checker-framework" +RESULTS_BASE_DIR = "/Users/tinaxia/Desktop/eisop/jdk/report-generation/results" + +if platform.system() == "Windows": + gradle_task_cmd = ["cmd", "/c", "gradlew.bat", "copyAndMinimizeAnnotatedJdkFiles"] +else: + gradle_task_cmd = ["sh", "./gradlew", "copyAndMinimizeAnnotatedJdkFiles"] + +#move into JDK repository +try: + os.chdir(JDK_REPO_PATH) +except FileNotFoundError: + print(f"could not find {JDK_REPO_PATH}") + exit(1) + +#fetch remote branches +subprocess.run(["git", "fetch", "--all", "--prune"], check=True) + +#get list of remote branches using ls-remote +result = subprocess.run(["git", "ls-remote", "--heads", "origin"], + capture_output=True, text=True) +branches = [line.split("\t")[1].replace("refs/heads/", "") + for line in result.stdout.splitlines() if line.strip()] + +#processing +for jdk_version in branches: + print(f"\nprocessing version: {jdk_version}") + + # Checkout the branch and reset to the remote version + try: + subprocess.run(["git", "checkout", jdk_version], check=True) + subprocess.run(["git", "reset", "--hard", f"origin/{jdk_version}"], check=True) + except subprocess.CalledProcessError as e: + print(f"Error checking out branch {jdk_version}: {e.stderr}") + continue + + print(f"Checked out JDK version: {jdk_version}") + + #move into checker-framework to run minimization + try: + os.chdir(CHECKER_FRAMEWORK_REPO_PATH) + except FileNotFoundError: + print(f"Could not find {CHECKER_FRAMEWORK_REPO_PATH}") + exit(1) + + # Run the minimization Gradle task + print(f"Running Gradle task for {jdk_version}...") + try: + subprocess.run(gradle_task_cmd, check=True) + print(f"Gradle task done for {jdk_version}!") + except subprocess.CalledProcessError as e: + print(f"Error running for {jdk_version}: {e.stderr}") + exit(1) + + # get output directory of the gradle task + OUTPUT_DIR = os.path.join(CHECKER_FRAMEWORK_REPO_PATH, "framework", "build", "generated", "resources", "annotated-jdk") + if not os.path.exists(OUTPUT_DIR): + print("error finding otuput directory") + os.chdir(JDK_REPO_PATH) + continue + + # Define the final results directory for this branch + result_dir = os.path.join(RESULTS_BASE_DIR, jdk_version) + if os.path.exists(result_dir): + shutil.rmtree(result_dir) + os.makedirs(result_dir, exist_ok=True) + + #move the output directory into results directory + shutil.move(OUTPUT_DIR, result_dir) + print(f"Result stored in: {result_dir}") + + #switch back to JDK repo to process next version + os.chdir(JDK_REPO_PATH) + +print("\nAll minimizations completed!") From 20575d64e3cf3696e733c44a6a3f6ee4c17b7022 Mon Sep 17 00:00:00 2001 From: Tina Xia Date: Sun, 16 Mar 2025 23:42:57 -0400 Subject: [PATCH 2/4] comment clarity edit --- report-generation/README.md | 1 - report-generation/generate-json.py | 2 +- report-generation/generate-report.py | 3 --- 3 files changed, 1 insertion(+), 5 deletions(-) diff --git a/report-generation/README.md b/report-generation/README.md index f326e0197b1..ad7ae670f95 100644 --- a/report-generation/README.md +++ b/report-generation/README.md @@ -36,7 +36,6 @@ GENERATE CSVs GENERATE HTML Report 6. run HTML report generation script: -Finally, run the HTML report generator to create a browsable report using Bootstrap styling. This report generates an index and a seperate html page for each jdk version, and opens the report in your browser. - python3 generate-report.py - this creates and opens a report in your browser, loading in information from the CSVs and JSONs generated in the previous step diff --git a/report-generation/generate-json.py b/report-generation/generate-json.py index a5af73932f2..93643e286e4 100644 --- a/report-generation/generate-json.py +++ b/report-generation/generate-json.py @@ -60,7 +60,7 @@ def extract_new_changes(old_lines, new_lines): } #https://stackoverflow.com/questions/8625991/use-python-os-walk-to-identify-a-list-of-files -#uses os.walk to get all java files in every subdirectory of results (after minimization is performed) +#get all java files in every subdirectory of results (after minimization is performed) def list_java_files(directory): java_files = [] for root, _, files in os.walk(directory): diff --git a/report-generation/generate-report.py b/report-generation/generate-report.py index 3e934e030f4..3efefbf6280 100644 --- a/report-generation/generate-report.py +++ b/report-generation/generate-report.py @@ -17,8 +17,6 @@ data[version] = file_data[version] checked_methods = {} -# If your CSV files are now directly in the report-files directory, -# update the csv_folder accordingly (remove the subfolder name). csv_folder = "../report-files/csv_reports" #loads the review status from the csv files @@ -31,7 +29,6 @@ key = (row["version"], row["file_url"], row["method"]) checked_methods[key] = (row["checked"].strip().lower() == "true") -# Set the output directory to be a subfolder inside report-files output_dir = os.path.join("..", "report-files", "reports") os.makedirs(output_dir, exist_ok=True) From 9f8a7bf088214ed1260130efca7eacd76c243927 Mon Sep 17 00:00:00 2001 From: Tina Xia Date: Wed, 2 Apr 2025 00:03:05 -0400 Subject: [PATCH 3/4] branch restoration changes --- report-generation/perform-minimization.py | 32 +++++++++++++++++++---- 1 file changed, 27 insertions(+), 5 deletions(-) diff --git a/report-generation/perform-minimization.py b/report-generation/perform-minimization.py index adc013dab12..c6a9311691c 100644 --- a/report-generation/perform-minimization.py +++ b/report-generation/perform-minimization.py @@ -13,12 +13,27 @@ else: gradle_task_cmd = ["sh", "./gradlew", "copyAndMinimizeAnnotatedJdkFiles"] +if not os.path.isdir(JDK_REPO_PATH): + print(f"Error: '{JDK_REPO_PATH}' does not exist or is not a directory.") + sys.exit(1) + +if not os.path.isdir(CHECKER_FRAMEWORK_REPO_PATH): + print(f"Error: '{CHECKER_FRAMEWORK_REPO_PATH}' does not exist or is not a directory.") + sys.exit(1) + +if not os.path.isdir(RESULTS_BASE_DIR): + print(f"Error: '{RESULTS_BASE_DIR}' does not exist or is not a directory.") + sys.exit(1) + + #move into JDK repository -try: - os.chdir(JDK_REPO_PATH) -except FileNotFoundError: - print(f"could not find {JDK_REPO_PATH}") - exit(1) +os.chdir(JDK_REPO_PATH) + +initial_branch_result = subprocess.run( + ["git", "rev-parse", "--abbrev-ref", "HEAD"], + capture_output=True, text=True, check=True +) +initial_branch = initial_branch_result.stdout.strip() #fetch remote branches subprocess.run(["git", "fetch", "--all", "--prune"], check=True) @@ -80,3 +95,10 @@ os.chdir(JDK_REPO_PATH) print("\nAll minimizations completed!") + +try: + os.chdir(JDK_REPO_PATH) + subprocess.run(["git", "checkout", initial_branch], check=True) + print(f"\nChecked out the original branch: {initial_branch}") +except Exception as e: + print(f"Warning: could not switch back to the initial branch '{initial_branch}'. Error: {e}") From c8040520db7c09e6da77507263096a463fa75c4e Mon Sep 17 00:00:00 2001 From: Tina Xia Date: Wed, 2 Apr 2025 00:14:08 -0400 Subject: [PATCH 4/4] configuration file creation, updates to branch handling in minimization --- report-generation/README.md | 4 ++-- report-generation/config.py | 4 ++++ report-generation/perform-minimization.py | 27 ++++++++++++++--------- 3 files changed, 22 insertions(+), 13 deletions(-) create mode 100644 report-generation/config.py diff --git a/report-generation/README.md b/report-generation/README.md index ad7ae670f95..61c1b411cfd 100644 --- a/report-generation/README.md +++ b/report-generation/README.md @@ -1,7 +1,7 @@ JDK API Changes Report Generation Tina Xia, tzxia@uwaterloo.ca for issues or questions -Friday, March 14, 2025 +Friday, April 2, 2025 This project automates the review process for newly introduced methods across subsequent JDK versions. @@ -12,7 +12,7 @@ Configuration PERFORMING MINIMIZATION AND STORING RESULTS 1. Create a sub-directory called "results" in the "report-generation" directory -2. Update the absolute paths in perform-minimization.py to match your environment: +2. Update the absolute paths in config.py to match your environment: JDK_REPO_PATH: path to your local JDK repository. CHECKER_FRAMEWORK_REPO_PATH: path to your local Checker Framework repository. diff --git a/report-generation/config.py b/report-generation/config.py new file mode 100644 index 00000000000..58986208afe --- /dev/null +++ b/report-generation/config.py @@ -0,0 +1,4 @@ +# Update these absolute paths for your environment +JDK_REPO_PATH = "/Users/tinaxia/Desktop/eisop/jdk" +CHECKER_FRAMEWORK_REPO_PATH = "/Users/tinaxia/Desktop/eisop/checker-framework" +RESULTS_BASE_DIR = "/Users/tinaxia/Desktop/eisop/jdk/report-generation/results" diff --git a/report-generation/perform-minimization.py b/report-generation/perform-minimization.py index c6a9311691c..9bb3568dae5 100644 --- a/report-generation/perform-minimization.py +++ b/report-generation/perform-minimization.py @@ -2,28 +2,30 @@ import subprocess import shutil import platform +import config #update these absolute paths for your environment -JDK_REPO_PATH = "/Users/tinaxia/Desktop/eisop/jdk" -CHECKER_FRAMEWORK_REPO_PATH = "/Users/tinaxia/Desktop/eisop/checker-framework" -RESULTS_BASE_DIR = "/Users/tinaxia/Desktop/eisop/jdk/report-generation/results" +JDK_REPO_PATH = config.JDK_REPO_PATH +CHECKER_FRAMEWORK_REPO_PATH = config.CHECKER_FRAMEWORK_REPO_PATH +RESULTS_BASE_DIR = config.RESULTS_BASE_DIR if platform.system() == "Windows": gradle_task_cmd = ["cmd", "/c", "gradlew.bat", "copyAndMinimizeAnnotatedJdkFiles"] else: gradle_task_cmd = ["sh", "./gradlew", "copyAndMinimizeAnnotatedJdkFiles"] +#error messages for missing directories if not os.path.isdir(JDK_REPO_PATH): - print(f"Error: '{JDK_REPO_PATH}' does not exist or is not a directory.") - sys.exit(1) + print(f"'{JDK_REPO_PATH}' not found. Please ensure you have the JDK repo on your local machine") + exit(1) if not os.path.isdir(CHECKER_FRAMEWORK_REPO_PATH): - print(f"Error: '{CHECKER_FRAMEWORK_REPO_PATH}' does not exist or is not a directory.") - sys.exit(1) + print(f"'{CHECKER_FRAMEWORK_REPO_PATH}' not found. Please ensure you have the Checker Framework repo on your local machine") + exit(1) if not os.path.isdir(RESULTS_BASE_DIR): - print(f"Error: '{RESULTS_BASE_DIR}' does not exist or is not a directory.") - sys.exit(1) + print(f"'{RESULTS_BASE_DIR}' not found. Please create this directory in the report-generation directory.") + exit(1) #move into JDK repository @@ -97,8 +99,11 @@ print("\nAll minimizations completed!") try: + #move into the jdk os.chdir(JDK_REPO_PATH) + #run git checkout on the initial branch subprocess.run(["git", "checkout", initial_branch], check=True) - print(f"\nChecked out the original branch: {initial_branch}") + print(f"\nMoved back into the initial branch {initial_branch}") except Exception as e: - print(f"Warning: could not switch back to the initial branch '{initial_branch}'. Error: {e}") + #output error in switching back to initial branch + print(f"Error switching back to the initial branch '{initial_branch}'. Error: {e}")