Skip to content
Draft
Show file tree
Hide file tree
Changes from all commits
Commits
Show all changes
92 commits
Select commit Hold shift + click to select a range
87a05d2
Refine documentation (#1)
ZiyaoLi Aug 1, 2022
c900ca8
add the convert script and alphafold original configs (#2)
BaozCWJ Aug 1, 2022
e2d555c
change scripts, example data and introduction (#3)
ZiyaoLi Aug 1, 2022
aad1250
fix typos (#4)
ZiyaoLi Aug 1, 2022
6d24472
add docker auto-build workflow (#5)
guolinke Aug 1, 2022
087f49c
add a simple install command (#6)
guolinke Aug 1, 2022
7553b29
code clean for imports
guolinke Aug 1, 2022
bb2d556
fix violation loss weight in monomer fine-tune
guolinke Aug 1, 2022
2e30eb9
add the convert script for AlphaFold and modify the README (#7)
BaozCWJ Aug 2, 2022
0153644
improve details (#8)
ZiyaoLi Aug 2, 2022
a2b00e8
add evaluation set (#9)
ZiyaoLi Aug 2, 2022
f5a4157
add the description about downloading Uni-Fold pretrained model param…
BaozCWJ Aug 3, 2022
7c7ea7b
add script for benchmarking & benchmark results (#10)
guolinke Aug 3, 2022
d167d60
dynamic length for extra_msa
guolinke Aug 5, 2022
f50639e
add config for model_1_ft
guolinke Aug 5, 2022
a97d7e9
fix feature stack of recycling
guolinke Aug 5, 2022
d0c4c13
Update README.md
guolinke Aug 6, 2022
32fbcf9
refine model parameters descriptions
guolinke Aug 6, 2022
c2c12c3
citation (#12)
ZiyaoLi Aug 6, 2022
b88286f
add evaluation results (#13)
ZiyaoLi Aug 6, 2022
2f633b6
link to nvidia-docker-2 (#16)
ZiyaoLi Aug 9, 2022
e05de86
use bf16 in demo by default
guolinke Aug 9, 2022
782592c
Hermite (#17)
ZiyaoLi Aug 10, 2022
7fe51ed
save extra msa mem (#18)
ZiyaoLi Aug 12, 2022
21754b8
add the colab version of Uni-Fold (#19)
BaozCWJ Aug 12, 2022
adb011d
fix the error of unexpected keyword argument in unifold.ipynb (#21)
BaozCWJ Aug 12, 2022
cf6cb27
hide the code blocks for default setting (#22)
BaozCWJ Aug 12, 2022
89f8efb
add citation
guolinke Aug 12, 2022
ffe132c
update the license in colab notebook
guolinke Aug 12, 2022
94fffd8
fix the error about pair.a3m (#24)
BaozCWJ Aug 12, 2022
58fedb1
add the section of download for colab notebook (#25)
BaozCWJ Aug 12, 2022
931703d
Update unifold.ipynb
guolinke Aug 12, 2022
dd4ad8f
Update unifold.ipynb
guolinke Aug 12, 2022
5daf108
refine notebook descriptions
guolinke Aug 12, 2022
b418f72
fix typo in notebook
guolinke Aug 12, 2022
0daa5e0
[colab] less verbosity & faster hhsuite installation
guolinke Aug 12, 2022
8f239d8
Update unifold.ipynb
guolinke Aug 13, 2022
42295f3
Update README.md
guolinke Aug 13, 2022
dbe9ed6
Update setup.py
guolinke Aug 14, 2022
572d450
[colab] fix wheel for p100
guolinke Aug 14, 2022
9126d38
refine unicore installation
guolinke Aug 14, 2022
ba152ad
fix typo in dockerfile
guolinke Aug 14, 2022
68787e2
add links in the header.
guolinke Aug 14, 2022
78352bd
Update README.md
guolinke Aug 14, 2022
768908b
optimize the consumption of memory in inference with chunked setting …
BaozCWJ Aug 14, 2022
c676727
correct the denominator of average memory in benchmark; fix the tri_m…
BaozCWJ Aug 16, 2022
52dc273
add TODO tag into template pair stack
guolinke Aug 18, 2022
8ad306f
Add BladeDISC Introduction to Readme. (#31)
yuchaoli Aug 18, 2022
d57a199
update bioRxiv url
guolinke Aug 21, 2022
8acf998
optimize the memory efficiency for Inference (#32)
BaozCWJ Aug 22, 2022
78f860f
reduce memory cost in multimer inference (#33)
guolinke Aug 24, 2022
ef5b88f
feat(docker): Add opencontainers image-spec to `Dockerfile` (#34)
SauravMaheshkar Aug 25, 2022
8b8fc0a
fix potential bug of is_monomer arg passing (#35)
ZiyaoLi Aug 25, 2022
b0d7ec5
Update Dockerfile
guolinke Aug 26, 2022
1d130ef
update mode option for inference (#36)
BaozCWJ Aug 26, 2022
74a63ef
swap case study (#39)
ZiyaoLi Aug 31, 2022
52adabe
add the automatic settings of block size for various lengths of predi…
BaozCWJ Sep 1, 2022
dfdebfb
change the definition of target name, which will directly depends on …
BaozCWJ Sep 7, 2022
f1f7baa
UF-Symmetry inference code (#44)
ZiyaoLi Sep 7, 2022
79b5811
minor fix of uf-symmetry (#45)
ZiyaoLi Sep 7, 2022
e6d076f
fix typo
ZiyaoLi Sep 7, 2022
02adce3
fix download link in README.md (#47)
avilella Sep 9, 2022
fb7e757
update paper link in readme
guolinke Sep 9, 2022
f06b1b0
Update download_pdb70.sh (#48)
avilella Sep 9, 2022
1c8238e
prevent the redundant processes when touching run all (#49)
BaozCWJ Sep 9, 2022
8dae0b7
change the download source of uf symmetry parameters to google drive …
BaozCWJ Sep 16, 2022
86ea44b
Refine colab notebook (#52)
ZiyaoLi Sep 22, 2022
abe7f88
add the default chains.txt for single-chain fasta (#54)
BaozCWJ Sep 27, 2022
4e65a4e
change default model name (#61)
ZiyaoLi Oct 10, 2022
029b078
move model parameters to github release (#64)
guolinke Oct 19, 2022
2ae159d
fix uf-symmetry wget url (#68)
ZiyaoLi Oct 29, 2022
5a93161
skip download af params (#70)
ZiyaoLi Nov 1, 2022
de21c73
refine colab notebook (#72)
ZiyaoLi Nov 8, 2022
6730e3b
Add documentation about the full dataset download (#75)
guolinke Nov 15, 2022
1fd074b
refine cmd line for model names (#76)
guolinke Nov 21, 2022
5c223a1
move colab url from google drive to github release (#78)
ZiyaoLi Dec 1, 2022
856e8ea
mv to py38 to satisfy colab (#79)
ZiyaoLi Dec 1, 2022
5b87b7e
add script for the evaluation (#82)
guolinke Dec 12, 2022
ab4b89a
add script for label extraction (#89)
teslacool Jan 6, 2023
413c78b
fix type after model.float() (#92)
teslacool Jan 12, 2023
b57e54a
update get assembly from mmcif (#96)
teslacool Feb 15, 2023
61edb37
add dataset download via Volcengine (#103)
guolinke Mar 3, 2023
cc2ede9
Update unifold.ipynb (#109)
ZiyaoLi Mar 24, 2023
cc7a511
add symmetry utils to process input structures (#110)
ZiyaoLi Apr 11, 2023
5596948
fix import libmsym (#117)
ZiyaoLi May 9, 2023
a94b03c
Update unifold.ipynb to cu118torch2.0.0-cp310 (#118)
ZiyaoLi May 11, 2023
a272252
tolerate inconsistencies between pdb_assembly and multi_label (#119)
ZiyaoLi May 11, 2023
af278f0
add alphafold v3 param parse & config (#122)
ZiyaoLi Jun 12, 2023
7e03aa2
correct load v3 (#125)
teslacool Jun 19, 2023
263d6dd
adding flash v2 torch for mask, custom for bias
hypnopump Oct 21, 2023
9fa677d
40% faster but grads are nan
hypnopump Dec 21, 2023
189443e
40% faster but grads are nan
hypnopump Dec 21, 2023
File filter

Filter by extension

Filter by extension


Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
33 changes: 33 additions & 0 deletions .github/workflows/docker.yml
Original file line number Diff line number Diff line change
@@ -0,0 +1,33 @@
name: Build and Publish Docker

on:
push:
branches:
- main

jobs:
docker:
runs-on: ubuntu-latest
steps:
-
name: Checkout
uses: actions/checkout@v3
-
name: Set up QEMU
uses: docker/setup-qemu-action@v2
-
name: Set up Docker Buildx
uses: docker/setup-buildx-action@v2
-
name: Login to DockerHub
uses: docker/login-action@v2
with:
username: ${{ secrets.DOCKERHUB_USERNAME }}
password: ${{ secrets.DOCKERHUB_TOKEN }}
-
name: Build and push
uses: docker/build-push-action@v3
with:
context: ./docker/
push: true
tags: dptechnology/unifold:latest-pytorch1.11.0-cuda11.3
1 change: 1 addition & 0 deletions .gitignore
Original file line number Diff line number Diff line change
Expand Up @@ -120,3 +120,4 @@ test/
*.tfevents.*
*.sto
*.a3m
nogit/
11 changes: 11 additions & 0 deletions CONTRIBUTING.md
Original file line number Diff line number Diff line change
@@ -0,0 +1,11 @@
# How to Contribute

Uni-Fold is an ongoing project. Our target is to develop better protein folding models and to apply them in real scenarios together with the entire community. We welcome all contributions to this repository, including but not limited to 1) reports and fixes of bugs, 2) new features and 3) accuracy and efficiency improvements.

## Developer Certificate of Origin

Contributions to this project must be accompanied by a [Developer Certificate of Origin](DCO.txt). You (or your employer) retain the copyright to your contribution. The certificate only restrict you to use the same license in your contribution.

## Code review

All submissions, including submissions by project members, require review. We use GitHub pull requests for this purpose. Consult [GitHub Help](https://help.github.com/articles/about-pull-requests/) for more information on using pull requests.
21 changes: 21 additions & 0 deletions DCO.txt
Original file line number Diff line number Diff line change
@@ -0,0 +1,21 @@
Developer Certificate of Origin

By making a contribution to this project, the contributor(“I”) certify that:

(1) The contribution was created in whole or in part by me, and I have the
right to submit it under the open source license indicated in the file; or

(2) The contribution is based upon previous work which is covered under an
appropriate open source license and I have the right under that license to
submit that work with modifications, whether created in whole or in part by
me, which are under the same open source license (unless I am permitted to
submit under a different license); or

(3) The contribution was provided directly to me by some other person who
certified (1), (2) or (3), and I have not modified it.

(4) I understand and agree that this project and the contribution are
public, and that a record of the contribution (including all personal
information I submit with it, including my sign-off) is maintained
indefinitely and may be redistributed consistent with this project or the
open source license(s) involved.
320 changes: 284 additions & 36 deletions README.md

Large diffs are not rendered by default.

179 changes: 179 additions & 0 deletions benchmark/perf_others.py
Original file line number Diff line number Diff line change
@@ -0,0 +1,179 @@
# modified from https://github.com/hpcaitech/FastFold/blob/main/benchmark/perf.py
import argparse
import os

import torch
import torch.nn as nn

from fastfold.distributed import init_dap
from fastfold.model.fastnn import Evoformer


def main():

parser = argparse.ArgumentParser(description='Evoformer Standalone Perf Benchmark')
parser.add_argument("--dap-size", default=1, type=int, help='batch size')
parser.add_argument('--batch-size', default=1, type=int, help='batch size')
parser.add_argument('--msa-length', default=128, type=int, help='Sequence Length of MSA')
parser.add_argument('--res-length',
default=256,
type=int,
help='Sequence Length of Residues')
parser.add_argument('--trials', default=50, type=int, help='Number of Trials to Execute')
parser.add_argument('--warmup-trials', default=5, type=int, help='Warmup Trials to discard')
parser.add_argument('--layers',
default=4,
type=int,
help='Evoformer Layers to Execute')
parser.add_argument('--cm', default=256, type=int, help='MSA hidden dimension')
parser.add_argument('--cz', default=128, type=int, help='Pair hidden dimension')
parser.add_argument('--heads', default=8, type=int, help='Number of Multihead Attention heads')
parser.add_argument('--openfold',
action='store_true',
help='Benchmark with Evoformer Implementation from OpenFold.')
parser.add_argument('--openfold-lma',
action='store_true',
help='set use_lma to True in openfold.')
parser.add_argument('--fwd', action='store_true', help='Only execute Fwd Pass.')

args = parser.parse_args()

init_dap(args.dap_size)

precision = torch.bfloat16
if args.dap_size > 1:
# (PyTorch issue) Currently All2All communication does not support the Bfloat16 datatype in PyTorch
precision = torch.float16

if not torch.cuda.is_available():
raise NotImplementedError('Running on CPU is not supported')

torch.manual_seed(42)
if torch.cuda.is_available():
torch.cuda.manual_seed_all(42)

if args.openfold:
from openfold.model.evoformer import EvoformerBlock

class OpenFoldEvoformer(nn.Module):

def __init__(self, d_node, d_pair):
super(OpenFoldEvoformer, self).__init__()
self.d_node = d_node
self.d_pair = d_pair

self.c_hidden_msa_att = int(d_node / 8)
self.c_hidden_pair_att = int(d_pair / 4)

self.EvoformerBlock = EvoformerBlock(c_m=d_node,
c_z=d_pair,
c_hidden_msa_att=self.c_hidden_msa_att,
c_hidden_opm=self.c_hidden_msa_att,
c_hidden_mul=self.d_pair,
c_hidden_pair_att=self.c_hidden_pair_att,
no_heads_msa=8,
no_heads_pair=4,
transition_n=4,
msa_dropout=0.15,
pair_dropout=0.25,
inf=1e9,
eps=1e-10)

def forward(self, node, pair, node_mask, pair_mask):
node, pair = self.EvoformerBlock(node, pair, node_mask, pair_mask, use_lma=args.openfold_lma)
return node, pair

attn_layers = []
for idx in range(0, args.layers):
if args.openfold:
attn_layers.append(OpenFoldEvoformer(d_node=args.cm, d_pair=args.cz))
else:
attn_layers.append(Evoformer(d_node=args.cm, d_pair=args.cz))
attn_layers[idx].cuda()
attn_layers[idx].to(dtype=precision)

start_evt_fwd = []
start_evt_bwd = []
stop_evt_bwd = []
for recorded_trial in range(0, args.trials):
start_evt_fwd.append(torch.cuda.Event(enable_timing=True))
start_evt_bwd.append(torch.cuda.Event(enable_timing=True))
stop_evt_bwd.append(torch.cuda.Event(enable_timing=True))

inputs_node = torch.randn(args.batch_size,
args.msa_length // args.dap_size,
args.res_length,
args.cm,
dtype=precision,
device=torch.device("cuda")).requires_grad_(True)
inputs_pair = torch.randn(args.batch_size,
args.res_length // args.dap_size,
args.res_length,
args.cz,
dtype=precision,
device=torch.device("cuda")).requires_grad_(True)
node_mask = torch.ones((args.batch_size, args.msa_length, args.res_length),
dtype=precision,
device=torch.device("cuda")).requires_grad_(False)
pair_mask = torch.ones((args.batch_size, args.res_length, args.res_length),
dtype=precision,
device=torch.device("cuda")).requires_grad_(False)


total_used_mem_gb = 0
for trial in range(0, args.trials + args.warmup_trials):
layer_inputs = inputs_node, inputs_pair
evt_idx = trial - args.warmup_trials

torch.distributed.barrier()
torch.cuda.synchronize()
torch.cuda.reset_peak_memory_stats()
if evt_idx >= 0:
start_evt_fwd[evt_idx].record()
with torch.set_grad_enabled(not args.fwd):
for lyr_idx in range(0, args.layers):
layer_inputs = attn_layers[lyr_idx].forward(
*layer_inputs,
node_mask,
pair_mask,
)

torch.cuda.synchronize()

if evt_idx >= 0:
start_evt_bwd[evt_idx].record()

if not args.fwd:
s = layer_inputs[0].mean() + layer_inputs[1].mean()
s.backward()

torch.cuda.synchronize()
cur_cost_mem = torch.cuda.max_memory_allocated() / 1024 / 1024 / 1024
total_used_mem_gb += cur_cost_mem
if evt_idx >= 0:
stop_evt_bwd[evt_idx].record()


torch.cuda.synchronize()
elapsed_time_fwd = 0.0
elapsed_time_bwd = 0.0
for evt_idx in range(0, args.trials):
elapsed_time_fwd += start_evt_fwd[evt_idx].elapsed_time(start_evt_bwd[evt_idx])
elapsed_time_bwd += start_evt_bwd[evt_idx].elapsed_time(stop_evt_bwd[evt_idx])

print(
"Input: {:4d}, {:4d}, {:4d}, ({:4d} {:4d}), Fwd Time / Layer: {:.3f} ms, Bwd Time / Layer: {:.3f} ms, Memory cost {:.3f} GB".format(
args.batch_size,
args.msa_length,
args.res_length,
args.cm,
args.cz,
elapsed_time_fwd / (args.trials * args.layers),
elapsed_time_bwd / (args.trials * args.layers),
total_used_mem_gb / (args.trials + args.warmup_trials),
)
)


if __name__ == '__main__':
main()
Loading