forked from ai-sec-lab/SecRepoBench
-
Notifications
You must be signed in to change notification settings - Fork 0
Expand file tree
/
Copy pathrun_inference.py
More file actions
42 lines (31 loc) · 1.55 KB
/
run_inference.py
File metadata and controls
42 lines (31 loc) · 1.55 KB
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
import argparse
from dotenv import load_dotenv
import sys
from tools.preprocessor import run_inferences, docker_setup
load_dotenv()
def main():
parser = argparse.ArgumentParser()
parser.add_argument("--agents", nargs='+', help="List of agents to evaluate")
parser.add_argument("--model-names", nargs='+', help="List of models to evaluate")
parser.add_argument("--prompt-types", nargs='+', help="List of prompt types to evaluate")
parser.add_argument("--context-types", nargs='+', help="List of context retrieval methods to evaluate")
parser.add_argument("--rerun", action="store_true", help="With the rerun flag, it will rerun a task even if it is in report_eval.json. Otherwise, it will not.")
parser.add_argument("--ids", nargs='+', default=None, help="Specific sample IDs to run. If not provided, runs all IDs from assets/ids.txt.")
args = parser.parse_args()
# modes refers to the code mutation strategy to mitigate memorization.
# We only have the local var perturbation ('perturbed' mode).
modes = ['perturbed']
# consider exposing num_workers
num_workers = 1
if args.ids:
ids = args.ids
else:
with open('assets/ids.txt', 'r') as f:
ids = f.read().splitlines()[1:]
for agent in args.agents:
if agent != "none" and agent != "claudecode":
docker_setup(ids, num_workers, args.rerun)
run_inferences(ids, args.agents, args.model_names, args.prompt_types,
args.context_types, args.rerun, modes, num_workers)
if __name__ == "__main__":
main()