dptech-corp · hypnopump · Aug 1, 2022 · Aug 1, 2022 · Aug 1, 2022 · Aug 1, 2022
diff --git a/.github/workflows/docker.yml b/.github/workflows/docker.yml
@@ -0,0 +1,33 @@
+name: Build and Publish Docker
+
+on:
+  push:
+    branches:
+    - main
+
+jobs:
+  docker:
+    runs-on: ubuntu-latest
+    steps:
+      -
+        name: Checkout
+        uses: actions/checkout@v3
+      -
+        name: Set up QEMU
+        uses: docker/setup-qemu-action@v2
+      -
+        name: Set up Docker Buildx
+        uses: docker/setup-buildx-action@v2
+      -
+        name: Login to DockerHub
+        uses: docker/login-action@v2
+        with:
+          username: ${{ secrets.DOCKERHUB_USERNAME }}
+          password: ${{ secrets.DOCKERHUB_TOKEN }}
+      -
+        name: Build and push
+        uses: docker/build-push-action@v3
+        with:
+          context: ./docker/
+          push: true
+          tags: dptechnology/unifold:latest-pytorch1.11.0-cuda11.3
diff --git a/.gitignore b/.gitignore
@@ -120,3 +120,4 @@ test/
 *.tfevents.*
 *.sto
 *.a3m
+nogit/
diff --git a/CONTRIBUTING.md b/CONTRIBUTING.md
@@ -0,0 +1,11 @@
+# How to Contribute
+
+Uni-Fold is an ongoing project. Our target is to develop better protein folding models and to apply them in real scenarios together with the entire community. We welcome all contributions to this repository, including but not limited to 1) reports and fixes of bugs, 2) new features and 3) accuracy and efficiency improvements.
+
+## Developer Certificate of Origin
+
+Contributions to this project must be accompanied by a [Developer Certificate of Origin](DCO.txt). You (or your employer) retain the copyright to your contribution. The certificate only restrict you to use the same license in your contribution.
+
+## Code review
+
+All submissions, including submissions by project members, require review. We use GitHub pull requests for this purpose. Consult [GitHub Help](https://help.github.com/articles/about-pull-requests/) for more information on using pull requests.
diff --git a/DCO.txt b/DCO.txt
@@ -0,0 +1,21 @@
+                      Developer Certificate of Origin
+
+By making a contribution to this project, the contributor(“I”) certify that:
+
+  (1) The contribution was created in whole or in part by me, and I have the 
+  right to submit it under the open source license indicated in the file; or
+
+  (2) The contribution is based upon previous work which is covered under an 
+  appropriate open source license and I have the right under that license to 
+  submit that work with modifications, whether created in whole or in part by 
+  me, which are under the same open source license (unless I am permitted to 
+  submit under a different license); or
+
+  (3) The contribution was provided directly to me by some other person who 
+  certified (1), (2) or (3),  and I have not modified it.
+
+  (4) I understand and agree that this project and the contribution are 
+  public, and that a record of the contribution (including all personal 
+  information I submit with it, including my sign-off) is maintained 
+  indefinitely and may be redistributed consistent with this project or the 
+  open source license(s) involved.
diff --git a/README.md b/README.md
diff --git a/benchmark/perf_others.py b/benchmark/perf_others.py
@@ -0,0 +1,179 @@
+# modified from https://github.com/hpcaitech/FastFold/blob/main/benchmark/perf.py
+import argparse
+import os
+
+import torch
+import torch.nn as nn
+
+from fastfold.distributed import init_dap
+from fastfold.model.fastnn import Evoformer
+
+
+def main():
+
+    parser = argparse.ArgumentParser(description='Evoformer Standalone Perf Benchmark')
+    parser.add_argument("--dap-size", default=1, type=int, help='batch size')
+    parser.add_argument('--batch-size', default=1, type=int, help='batch size')
+    parser.add_argument('--msa-length', default=128, type=int, help='Sequence Length of MSA')
+    parser.add_argument('--res-length',
+                        default=256,
+                        type=int,
+                        help='Sequence Length of Residues')
+    parser.add_argument('--trials', default=50, type=int, help='Number of Trials to Execute')
+    parser.add_argument('--warmup-trials', default=5, type=int, help='Warmup Trials to discard')
+    parser.add_argument('--layers',
+                        default=4,
+                        type=int,
+                        help='Evoformer Layers to Execute')
+    parser.add_argument('--cm', default=256, type=int, help='MSA hidden dimension')
+    parser.add_argument('--cz', default=128, type=int, help='Pair hidden dimension')
+    parser.add_argument('--heads', default=8, type=int, help='Number of Multihead Attention heads')
+    parser.add_argument('--openfold',
+                        action='store_true',
+                        help='Benchmark with Evoformer Implementation from OpenFold.')
+    parser.add_argument('--openfold-lma',
+                        action='store_true',
+                        help='set use_lma to True in openfold.')
+    parser.add_argument('--fwd', action='store_true', help='Only execute Fwd Pass.')
+
+    args = parser.parse_args()
+
+    init_dap(args.dap_size)
+
+    precision = torch.bfloat16
+    if args.dap_size > 1:
+        # (PyTorch issue) Currently All2All communication does not support the Bfloat16 datatype in PyTorch
+        precision = torch.float16
+
+    if not torch.cuda.is_available():
+        raise NotImplementedError('Running on CPU is not supported')
+
+    torch.manual_seed(42)
+    if torch.cuda.is_available():
+        torch.cuda.manual_seed_all(42)
+
+    if args.openfold:
+        from openfold.model.evoformer import EvoformerBlock
+
+        class OpenFoldEvoformer(nn.Module):
+
+            def __init__(self, d_node, d_pair):
+                super(OpenFoldEvoformer, self).__init__()
+                self.d_node = d_node
+                self.d_pair = d_pair
+
+                self.c_hidden_msa_att = int(d_node / 8)
+                self.c_hidden_pair_att = int(d_pair / 4)
+
+                self.EvoformerBlock = EvoformerBlock(c_m=d_node,
+                                                     c_z=d_pair,
+                                                     c_hidden_msa_att=self.c_hidden_msa_att,
+                                                     c_hidden_opm=self.c_hidden_msa_att,
+                                                     c_hidden_mul=self.d_pair,
+                                                     c_hidden_pair_att=self.c_hidden_pair_att,
+                                                     no_heads_msa=8,
+                                                     no_heads_pair=4,
+                                                     transition_n=4,
+                                                     msa_dropout=0.15,
+                                                     pair_dropout=0.25,
+                                                     inf=1e9,
+                                                     eps=1e-10)
+
+            def forward(self, node, pair, node_mask, pair_mask):
+                node, pair = self.EvoformerBlock(node, pair, node_mask, pair_mask, use_lma=args.openfold_lma)
+                return node, pair
+
+    attn_layers = []
+    for idx in range(0, args.layers):
+        if args.openfold:
+            attn_layers.append(OpenFoldEvoformer(d_node=args.cm, d_pair=args.cz))
+        else:
+            attn_layers.append(Evoformer(d_node=args.cm, d_pair=args.cz))
+        attn_layers[idx].cuda()
+        attn_layers[idx].to(dtype=precision)
+
+    start_evt_fwd = []
+    start_evt_bwd = []
+    stop_evt_bwd = []
+    for recorded_trial in range(0, args.trials):
+        start_evt_fwd.append(torch.cuda.Event(enable_timing=True))
+        start_evt_bwd.append(torch.cuda.Event(enable_timing=True))
+        stop_evt_bwd.append(torch.cuda.Event(enable_timing=True))
+
+    inputs_node = torch.randn(args.batch_size,
+                              args.msa_length // args.dap_size,
+                              args.res_length,
+                              args.cm,
+                              dtype=precision,
+                              device=torch.device("cuda")).requires_grad_(True)
+    inputs_pair = torch.randn(args.batch_size,
+                              args.res_length // args.dap_size,
+                              args.res_length,
+                              args.cz,
+                              dtype=precision,
+                              device=torch.device("cuda")).requires_grad_(True)
+    node_mask = torch.ones((args.batch_size, args.msa_length, args.res_length),
+                           dtype=precision,
+                           device=torch.device("cuda")).requires_grad_(False)
+    pair_mask = torch.ones((args.batch_size, args.res_length, args.res_length),
+                           dtype=precision,
+                           device=torch.device("cuda")).requires_grad_(False)
+
+
+    total_used_mem_gb = 0
+    for trial in range(0, args.trials + args.warmup_trials):
+        layer_inputs = inputs_node, inputs_pair
+        evt_idx = trial - args.warmup_trials
+
+        torch.distributed.barrier()
+        torch.cuda.synchronize()
+        torch.cuda.reset_peak_memory_stats()
+        if evt_idx >= 0:
+            start_evt_fwd[evt_idx].record()
+        with torch.set_grad_enabled(not args.fwd):
+            for lyr_idx in range(0, args.layers):
+                layer_inputs = attn_layers[lyr_idx].forward(
+                    *layer_inputs,
+                    node_mask,
+                    pair_mask,
+                )
+
+        torch.cuda.synchronize()
+
+        if evt_idx >= 0:
+            start_evt_bwd[evt_idx].record()
+
+        if not args.fwd:
+            s = layer_inputs[0].mean() + layer_inputs[1].mean()
+            s.backward()
+
+        torch.cuda.synchronize()
+        cur_cost_mem = torch.cuda.max_memory_allocated() / 1024 / 1024 / 1024
+        total_used_mem_gb += cur_cost_mem
+        if evt_idx >= 0:
+            stop_evt_bwd[evt_idx].record()
+
+
+    torch.cuda.synchronize()
+    elapsed_time_fwd = 0.0
+    elapsed_time_bwd = 0.0
+    for evt_idx in range(0, args.trials):
+        elapsed_time_fwd += start_evt_fwd[evt_idx].elapsed_time(start_evt_bwd[evt_idx])
+        elapsed_time_bwd += start_evt_bwd[evt_idx].elapsed_time(stop_evt_bwd[evt_idx])
+
+    print(
+        "Input: {:4d}, {:4d}, {:4d}, ({:4d} {:4d}), Fwd Time / Layer: {:.3f} ms, Bwd Time / Layer: {:.3f} ms, Memory cost {:.3f} GB".format(
+            args.batch_size,
+            args.msa_length,
+            args.res_length,
+            args.cm,
+            args.cz,
+            elapsed_time_fwd  / (args.trials * args.layers),
+            elapsed_time_bwd  / (args.trials * args.layers),
+            total_used_mem_gb / (args.trials + args.warmup_trials),
+        )
+    )
+
+
+if __name__ == '__main__':
+    main()
-Original file line number
+Diff line change
@@ Expand Up / @@ -120,3 +120,4 @@ test/ @@
     *.tfevents.*
     *.sto
     *.a3m
+    nogit/