diff --git a/plugins/nf-tower/build.gradle b/plugins/nf-tower/build.gradle index 484374f212..5ffe85f27a 100644 --- a/plugins/nf-tower/build.gradle +++ b/plugins/nf-tower/build.gradle @@ -26,7 +26,8 @@ nextflowPlugin { 'io.seqera.tower.plugin.TowerFactory', 'io.seqera.tower.plugin.TowerFusionToken', 'io.seqera.tower.plugin.auth.AuthCommandImpl', - 'io.seqera.tower.plugin.launch.LaunchCommandImpl' + 'io.seqera.tower.plugin.launch.LaunchCommandImpl', + 'io.seqera.tower.plugin.dataset.DatasetPathFactory', ] } diff --git a/plugins/nf-tower/src/main/io/seqera/tower/plugin/dataset/DatasetFileSystem.java b/plugins/nf-tower/src/main/io/seqera/tower/plugin/dataset/DatasetFileSystem.java new file mode 100644 index 0000000000..9a8f6f4ceb --- /dev/null +++ b/plugins/nf-tower/src/main/io/seqera/tower/plugin/dataset/DatasetFileSystem.java @@ -0,0 +1,123 @@ +/* + * Copyright 2013-2024, Seqera Labs + * + * Licensed under the Apache License, Version 2.0 (the "License"); + * you may not use this file except in compliance with the License. + * You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + */ + +package io.seqera.tower.plugin.dataset; + +import java.io.IOException; +import java.nio.file.FileStore; +import java.nio.file.FileSystem; +import java.nio.file.Path; +import java.nio.file.PathMatcher; +import java.nio.file.WatchService; +import java.nio.file.attribute.UserPrincipalLookupService; +import java.nio.file.spi.FileSystemProvider; +import java.util.Collections; +import java.util.Map; +import java.util.Set; + +/** + * Minimal read-only FileSystem for the {@code dataset://} scheme. + *

+ * Datasets are single files resolved via the Seqera Platform API, + * so most filesystem operations (roots, stores, path matching) + * are either trivial or unsupported. + * + * @author Edmund Miller + */ +public class DatasetFileSystem extends FileSystem { + + static final String PATH_SEPARATOR = "/"; + + private final DatasetFileSystemProvider provider; + private final Map env; + private volatile boolean open = true; + + DatasetFileSystem(DatasetFileSystemProvider provider, Map env) { + this.provider = provider; + this.env = env != null ? env : Collections.emptyMap(); + } + + @Override + public FileSystemProvider provider() { + return provider; + } + + @Override + public void close() throws IOException { + open = false; + } + + @Override + public boolean isOpen() { + return open; + } + + @Override + public boolean isReadOnly() { + return true; + } + + @Override + public String getSeparator() { + return PATH_SEPARATOR; + } + + @Override + public Iterable getRootDirectories() { + return Collections.emptyList(); + } + + @Override + public Iterable getFileStores() { + return Collections.emptyList(); + } + + @Override + public Set supportedFileAttributeViews() { + return Set.of("basic"); + } + + @Override + public Path getPath(String first, String... more) { + // Build a dataset path from string components + StringBuilder sb = new StringBuilder(first); + for (String part : more) { + if (!part.isEmpty()) { + sb.append(PATH_SEPARATOR).append(part); + } + } + return new DatasetPath(this, sb.toString()); + } + + @Override + public PathMatcher getPathMatcher(String syntaxAndPattern) { + throw new UnsupportedOperationException("Dataset filesystem does not support path matching"); + } + + @Override + public UserPrincipalLookupService getUserPrincipalLookupService() { + throw new UnsupportedOperationException(); + } + + @Override + public WatchService newWatchService() throws IOException { + throw new UnsupportedOperationException(); + } + + Map getEnv() { + return env; + } +} diff --git a/plugins/nf-tower/src/main/io/seqera/tower/plugin/dataset/DatasetFileSystemProvider.java b/plugins/nf-tower/src/main/io/seqera/tower/plugin/dataset/DatasetFileSystemProvider.java new file mode 100644 index 0000000000..786a9c605d --- /dev/null +++ b/plugins/nf-tower/src/main/io/seqera/tower/plugin/dataset/DatasetFileSystemProvider.java @@ -0,0 +1,345 @@ +/* + * Copyright 2013-2024, Seqera Labs + * + * Licensed under the Apache License, Version 2.0 (the "License"); + * you may not use this file except in compliance with the License. + * You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + */ + +package io.seqera.tower.plugin.dataset; + +import java.io.IOException; +import java.io.InputStream; +import java.net.URI; +import java.net.URLConnection; +import java.nio.channels.SeekableByteChannel; +import java.nio.file.AccessMode; +import java.nio.file.CopyOption; +import java.nio.file.DirectoryStream; +import java.nio.file.FileStore; +import java.nio.file.FileSystem; +import java.nio.file.FileSystemAlreadyExistsException; +import java.nio.file.LinkOption; +import java.nio.file.OpenOption; +import java.nio.file.Path; +import java.nio.file.ReadOnlyFileSystemException; +import java.nio.file.attribute.BasicFileAttributes; +import java.nio.file.attribute.FileAttribute; +import java.nio.file.attribute.FileAttributeView; +import java.nio.file.spi.FileSystemProvider; +import java.util.Map; +import java.util.Set; +import java.util.regex.Pattern; + +import nextflow.file.http.XAuthProvider; +import nextflow.file.http.XAuthRegistry; + +import org.slf4j.Logger; +import org.slf4j.LoggerFactory; + +/** + * NIO FileSystemProvider for the {@code dataset://} scheme. + *

+ * Resolves dataset URIs of the form {@code dataset://name} or + * {@code dataset://name?version=N} to their backing cloud storage + * path via the Seqera Platform API, then delegates all I/O + * operations to the resolved path's provider. + *

+ * Phase 1: read-only — write operations throw {@link ReadOnlyFileSystemException}. + * + * @author Edmund Miller + */ +public class DatasetFileSystemProvider extends FileSystemProvider { + + private static final Logger log = LoggerFactory.getLogger(DatasetFileSystemProvider.class); + private static final Pattern PLATFORM_DATASET_PATH = Pattern.compile(".*?/workspaces/[^/]+/datasets/[^/]+/v/[^/]+/n/.+"); + + private volatile DatasetFileSystem fileSystem; + + @Override + public String getScheme() { + return "dataset"; + } + + @Override + public FileSystem newFileSystem(URI uri, Map env) throws IOException { + if (fileSystem != null) { + throw new FileSystemAlreadyExistsException("Dataset filesystem already exists"); + } + synchronized (this) { + if (fileSystem != null) { + throw new FileSystemAlreadyExistsException("Dataset filesystem already exists"); + } + fileSystem = new DatasetFileSystem(this, env); + return fileSystem; + } + } + + @Override + public FileSystem getFileSystem(URI uri) { + if (fileSystem == null) { + throw new java.nio.file.FileSystemNotFoundException("Dataset filesystem not yet created. Use newFileSystem() first"); + } + return fileSystem; + } + + @Override + public Path getPath(URI uri) { + if (!"dataset".equals(uri.getScheme())) { + throw new IllegalArgumentException("URI scheme must be 'dataset': " + uri); + } + return new DatasetPath(getOrCreateFileSystem(), uri); + } + + // -- read operations: delegate to resolved cloud path -- + + @Override + public InputStream newInputStream(Path path, OpenOption... options) throws IOException { + final Path resolved = resolvedPath(path); + log.debug("newInputStream for dataset path {} -> {}", path, resolved); + return withPlatformDatasetAuth(resolved, () -> resolved.getFileSystem().provider().newInputStream(resolved, options)); + } + + @Override + public SeekableByteChannel newByteChannel(Path path, Set options, FileAttribute... attrs) throws IOException { + final Path resolved = resolvedPath(path); + return withPlatformDatasetAuth(resolved, () -> resolved.getFileSystem().provider().newByteChannel(resolved, options, attrs)); + } + + @Override + public DirectoryStream newDirectoryStream(Path dir, DirectoryStream.Filter filter) throws IOException { + throw new UnsupportedOperationException("Dataset paths do not support directory listing"); + } + + @Override + @SuppressWarnings("unchecked") + public A readAttributes(Path path, Class type, LinkOption... options) throws IOException { + final Path resolved = resolvedPath(path); + return withPlatformDatasetAuth(resolved, () -> resolved.getFileSystem().provider().readAttributes(resolved, type, options)); + } + + @Override + public Map readAttributes(Path path, String attributes, LinkOption... options) throws IOException { + final Path resolved = resolvedPath(path); + return withPlatformDatasetAuth(resolved, () -> resolved.getFileSystem().provider().readAttributes(resolved, attributes, options)); + } + + @Override + public V getFileAttributeView(Path path, Class type, LinkOption... options) { + try { + final Path resolved = resolvedPath(path); + return withPlatformDatasetAuth(resolved, () -> resolved.getFileSystem().provider().getFileAttributeView(resolved, type, options)); + } + catch (IOException e) { + throw new RuntimeException("Failed to resolve dataset path: " + path, e); + } + } + + @Override + public void checkAccess(Path path, AccessMode... modes) throws IOException { + for (AccessMode mode : modes) { + if (mode == AccessMode.WRITE) { + throw new ReadOnlyFileSystemException(); + } + } + final Path resolved = resolvedPath(path); + withPlatformDatasetAuth(resolved, () -> { + resolved.getFileSystem().provider().checkAccess(resolved, modes); + return null; + }); + } + + // -- write operations: read-only in phase 1 -- + + @Override + public void createDirectory(Path dir, FileAttribute... attrs) throws IOException { + throw new ReadOnlyFileSystemException(); + } + + @Override + public void delete(Path path) throws IOException { + throw new ReadOnlyFileSystemException(); + } + + @Override + public void copy(Path source, Path target, CopyOption... options) throws IOException { + throw new ReadOnlyFileSystemException(); + } + + @Override + public void move(Path source, Path target, CopyOption... options) throws IOException { + throw new ReadOnlyFileSystemException(); + } + + @Override + public boolean isSameFile(Path path1, Path path2) throws IOException { + if (path1 instanceof DatasetPath && path2 instanceof DatasetPath) { + return path1.equals(path2); + } + return false; + } + + @Override + public boolean isHidden(Path path) throws IOException { + return false; + } + + @Override + public FileStore getFileStore(Path path) throws IOException { + throw new UnsupportedOperationException(); + } + + @Override + public void setAttribute(Path path, String attribute, Object value, LinkOption... options) throws IOException { + throw new ReadOnlyFileSystemException(); + } + + // -- internal helpers -- + + private DatasetFileSystem getOrCreateFileSystem() { + if (fileSystem == null) { + synchronized (this) { + if (fileSystem == null) { + fileSystem = new DatasetFileSystem(this, null); + } + } + } + return fileSystem; + } + + private T withPlatformDatasetAuth(Path resolved, IoCallable action) throws IOException { + final XAuthProvider authProvider = authProviderFor(resolved); + if (authProvider == null) { + return action.call(); + } + + final XAuthRegistry registry = XAuthRegistry.getInstance(); + registry.register(authProvider); + try { + return action.call(); + } + finally { + registry.unregister(authProvider); + } + } + + private XAuthProvider authProviderFor(Path resolved) { + final URI targetUri = resolved.toUri(); + if (targetUri == null || !isHttpScheme(targetUri.getScheme())) { + return null; + } + if (!isPlatformDatasetDownloadPath(targetUri.getPath())) { + return null; + } + + final String endpoint = DatasetResolver.towerEndpoint(); + final String accessToken = DatasetResolver.towerAccessToken(); + if (isBlank(endpoint) || isBlank(accessToken)) { + return null; + } + + final URI endpointUri = URI.create(endpoint); + if (!isHttpScheme(endpointUri.getScheme()) || !isSameOrigin(endpointUri, targetUri)) { + return null; + } + + return new DatasetBearerAuthProvider(endpointUri, accessToken); + } + + private boolean isHttpScheme(String scheme) { + return "http".equalsIgnoreCase(scheme) || "https".equalsIgnoreCase(scheme); + } + + private boolean isPlatformDatasetDownloadPath(String path) { + return path != null && PLATFORM_DATASET_PATH.matcher(path).matches(); + } + + private static boolean isSameOrigin(URI left, URI right) { + if (left.getScheme() == null || right.getScheme() == null) { + return false; + } + if (!left.getScheme().equalsIgnoreCase(right.getScheme())) { + return false; + } + if (left.getHost() == null || right.getHost() == null) { + return false; + } + if (!left.getHost().equalsIgnoreCase(right.getHost())) { + return false; + } + return defaultPort(left) == defaultPort(right); + } + + private static int defaultPort(URI uri) { + if (uri.getPort() != -1) { + return uri.getPort(); + } + if ("https".equalsIgnoreCase(uri.getScheme())) { + return 443; + } + if ("http".equalsIgnoreCase(uri.getScheme())) { + return 80; + } + return -1; + } + + private boolean isBlank(String value) { + return value == null || value.trim().isEmpty(); + } + + /** + * Resolve a dataset path to its backing cloud storage path. + * Caches the resolved path on the DatasetPath instance. + */ + private Path resolvedPath(Path path) throws IOException { + if (!(path instanceof DatasetPath)) { + throw new IllegalArgumentException("Path must be a DatasetPath: " + path); + } + return ((DatasetPath) path).getResolvedPath(); + } + + @FunctionalInterface + private interface IoCallable { + T call() throws IOException; + } + + private static final class DatasetBearerAuthProvider implements XAuthProvider { + private final URI endpoint; + private final String accessToken; + + private DatasetBearerAuthProvider(URI endpoint, String accessToken) { + this.endpoint = endpoint; + this.accessToken = accessToken; + } + + @Override + public boolean authorize(URLConnection connection) { + final URI target = URI.create(connection.getURL().toString()); + if (!isSameOrigin(endpoint, target)) { + return false; + } + if (!PLATFORM_DATASET_PATH.matcher(target.getPath()).matches()) { + return false; + } + if (connection.getRequestProperty("Authorization") != null) { + return false; + } + + connection.setRequestProperty("Authorization", "Bearer " + accessToken); + return true; + } + + @Override + public boolean refreshToken(URLConnection connection) { + return false; + } + } +} diff --git a/plugins/nf-tower/src/main/io/seqera/tower/plugin/dataset/DatasetPath.java b/plugins/nf-tower/src/main/io/seqera/tower/plugin/dataset/DatasetPath.java new file mode 100644 index 0000000000..0162314db9 --- /dev/null +++ b/plugins/nf-tower/src/main/io/seqera/tower/plugin/dataset/DatasetPath.java @@ -0,0 +1,269 @@ +/* + * Copyright 2013-2024, Seqera Labs + * + * Licensed under the Apache License, Version 2.0 (the "License"); + * you may not use this file except in compliance with the License. + * You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + */ + +package io.seqera.tower.plugin.dataset; + +import java.io.IOException; +import java.net.URI; +import java.nio.file.FileSystem; +import java.nio.file.LinkOption; +import java.nio.file.Path; +import java.nio.file.WatchEvent; +import java.nio.file.WatchKey; +import java.nio.file.WatchService; +import java.util.Iterator; +import java.util.Objects; + +import org.slf4j.Logger; +import org.slf4j.LoggerFactory; + +/** + * A {@link Path} representing a Seqera Platform dataset reference. + *

+ * URI format: {@code dataset://name} or {@code dataset://name?version=N} + *

+ * The path lazily resolves to the backing cloud storage path + * (S3/GCS/Azure) via the Platform API on first I/O access. + * + * @author Edmund Miller + */ +public class DatasetPath implements Path { + + private static final Logger log = LoggerFactory.getLogger(DatasetPath.class); + + private final DatasetFileSystem fileSystem; + private final String datasetName; + private final String version; // null = latest + private final URI uri; + + /** Cached resolved cloud path — populated lazily on first I/O */ + private volatile Path resolvedPath; + + /** + * Construct from a URI (e.g. from provider.getPath(URI)) + */ + DatasetPath(DatasetFileSystem fileSystem, URI uri) { + this.fileSystem = fileSystem; + this.uri = uri; + // dataset://my-samplesheet or dataset:///my-samplesheet + // host = dataset name, or if host is null, first path segment is the name + String name = uri.getHost(); + if (name == null || name.isEmpty()) { + // handle dataset:///name form + String path = uri.getPath(); + if (path != null && path.startsWith("/")) { + path = path.substring(1); + } + name = path; + } + this.datasetName = name; + // parse ?version=N from query string + this.version = parseVersion(uri.getQuery()); + } + + /** + * Construct from string path (e.g. from fileSystem.getPath()) + */ + DatasetPath(DatasetFileSystem fileSystem, String path) { + this.fileSystem = fileSystem; + // strip leading slash if present + if (path.startsWith("/")) { + path = path.substring(1); + } + // check for version suffix: name@version + int atIdx = path.indexOf('@'); + if (atIdx > 0) { + this.datasetName = path.substring(0, atIdx); + this.version = path.substring(atIdx + 1); + } + else { + this.datasetName = path; + this.version = null; + } + this.uri = URI.create("dataset://" + datasetName + (version != null ? "?version=" + version : "")); + } + + public String getDatasetName() { + return datasetName; + } + + public String getVersion() { + return version; + } + + /** + * Resolve this dataset reference to the backing cloud storage path. + * Lazily initialized and cached. + */ + Path getResolvedPath() throws IOException { + if (resolvedPath == null) { + synchronized (this) { + if (resolvedPath == null) { + log.debug("Resolving dataset '{}' version={}", datasetName, version != null ? version : "latest"); + resolvedPath = DatasetResolver.resolve(datasetName, version); + log.debug("Resolved dataset '{}' -> {}", datasetName, resolvedPath); + } + } + } + return resolvedPath; + } + + // -- Path interface -- + + @Override + public FileSystem getFileSystem() { + return fileSystem; + } + + @Override + public boolean isAbsolute() { + return true; + } + + @Override + public Path getRoot() { + return null; + } + + @Override + public Path getFileName() { + // The dataset name is the "file name" + return new DatasetPath(fileSystem, datasetName); + } + + @Override + public Path getParent() { + return null; + } + + @Override + public int getNameCount() { + return 1; + } + + @Override + public Path getName(int index) { + if (index != 0) { + throw new IllegalArgumentException("Invalid name index: " + index); + } + return this; + } + + @Override + public Path subpath(int beginIndex, int endIndex) { + if (beginIndex != 0 || endIndex != 1) { + throw new IllegalArgumentException("Invalid subpath range"); + } + return this; + } + + @Override + public boolean startsWith(Path other) { + return equals(other); + } + + @Override + public boolean endsWith(Path other) { + return equals(other); + } + + @Override + public Path normalize() { + return this; + } + + @Override + public Path resolve(Path other) { + // dataset paths are leaf nodes, cannot resolve children + throw new UnsupportedOperationException("Cannot resolve against a dataset path"); + } + + @Override + public Path relativize(Path other) { + throw new UnsupportedOperationException("Cannot relativize dataset paths"); + } + + @Override + public URI toUri() { + return uri; + } + + @Override + public Path toAbsolutePath() { + return this; + } + + @Override + public Path toRealPath(LinkOption... options) throws IOException { + // Return the resolved cloud path as the "real" path + return getResolvedPath(); + } + + @Override + public WatchKey register(WatchService watcher, WatchEvent.Kind[] events, WatchEvent.Modifier... modifiers) throws IOException { + throw new UnsupportedOperationException(); + } + + @Override + public int compareTo(Path other) { + if (other instanceof DatasetPath) { + DatasetPath o = (DatasetPath) other; + int cmp = datasetName.compareTo(o.datasetName); + if (cmp != 0) return cmp; + if (version == null && o.version == null) return 0; + if (version == null) return -1; + if (o.version == null) return 1; + return version.compareTo(o.version); + } + return toString().compareTo(other.toString()); + } + + @Override + public Iterator iterator() { + return java.util.List.of(this).iterator(); + } + + @Override + public String toString() { + return "dataset://" + datasetName + (version != null ? "?version=" + version : ""); + } + + @Override + public boolean equals(Object o) { + if (this == o) return true; + if (!(o instanceof DatasetPath)) return false; + DatasetPath that = (DatasetPath) o; + return Objects.equals(datasetName, that.datasetName) && Objects.equals(version, that.version); + } + + @Override + public int hashCode() { + return Objects.hash(datasetName, version); + } + + // -- helpers -- + + private static String parseVersion(String query) { + if (query == null || query.isEmpty()) return null; + for (String param : query.split("&")) { + String[] kv = param.split("=", 2); + if (kv.length == 2 && "version".equals(kv[0])) { + return kv[1]; + } + } + return null; + } +} diff --git a/plugins/nf-tower/src/main/io/seqera/tower/plugin/dataset/DatasetPathFactory.groovy b/plugins/nf-tower/src/main/io/seqera/tower/plugin/dataset/DatasetPathFactory.groovy new file mode 100644 index 0000000000..0c5b0e2a51 --- /dev/null +++ b/plugins/nf-tower/src/main/io/seqera/tower/plugin/dataset/DatasetPathFactory.groovy @@ -0,0 +1,75 @@ +/* + * Copyright 2013-2024, Seqera Labs + * + * Licensed under the Apache License, Version 2.0 (the "License"); + * you may not use this file except in compliance with the License. + * You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + */ + +package io.seqera.tower.plugin.dataset + +import java.nio.file.Path + +import groovy.transform.CompileStatic +import groovy.util.logging.Slf4j +import nextflow.file.FileHelper +import nextflow.file.FileSystemPathFactory + +/** + * Factory that intercepts {@code dataset://} URI strings and returns + * a {@link DatasetPath} via the NIO FileSystem provider. + *

+ * Registered as a Nextflow {@code ExtensionPoint} so that + * {@code FileHelper.asPath("dataset://my-samplesheet")} works + * transparently — no pipeline code changes needed. + * + * @author Edmund Miller + */ +@Slf4j +@CompileStatic +class DatasetPathFactory extends FileSystemPathFactory { + + @Override + protected Path parseUri(String str) { + if (!str.startsWith('dataset://')) + return null + + log.debug "Parsing dataset URI: {}", str + + // Normalise to triple-slash form for URI parsing: + // dataset://name → dataset:///name + final normalized = str.startsWith('dataset:///') ? str : 'dataset:///' + str.substring('dataset://'.length()) + + final uri = new URI(null, null, normalized, null, null) + return FileHelper.getOrCreateFileSystemFor(uri).provider().getPath(uri) + } + + @Override + protected String toUriString(Path path) { + if (path instanceof DatasetPath) { + return path.toString() + } + return null + } + + @Override + protected String getBashLib(Path target) { + // dataset:// paths are resolved to cloud paths before execution, + // no special bash lib needed + return null + } + + @Override + protected String getUploadCmd(String source, Path target) { + // read-only — no upload support + return null + } +} diff --git a/plugins/nf-tower/src/main/io/seqera/tower/plugin/dataset/DatasetResolver.groovy b/plugins/nf-tower/src/main/io/seqera/tower/plugin/dataset/DatasetResolver.groovy new file mode 100644 index 0000000000..f77b8c9d9a --- /dev/null +++ b/plugins/nf-tower/src/main/io/seqera/tower/plugin/dataset/DatasetResolver.groovy @@ -0,0 +1,201 @@ +/* + * Copyright 2013-2024, Seqera Labs + * + * Licensed under the Apache License, Version 2.0 (the "License"); + * you may not use this file except in compliance with the License. + * You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + */ + +package io.seqera.tower.plugin.dataset + +import java.net.http.HttpClient +import java.net.http.HttpRequest +import java.net.http.HttpResponse +import java.nio.file.Path + +import groovy.json.JsonSlurper +import groovy.transform.CompileStatic +import groovy.util.logging.Slf4j +import nextflow.Global +import nextflow.Session +import nextflow.exception.AbortOperationException +import nextflow.file.FileHelper +import nextflow.platform.PlatformHelper + +/** + * Resolves a Seqera Platform dataset reference to its backing cloud storage path. + *

+ * Resolution chain: + * 1. Dataset name → GET /datasets?workspaceId=X → DatasetDto.id + * 2. Dataset id + version → GET /datasets/{id}/versions → DatasetVersionDto.url + * 3. Cloud URL string → FileHelper.asPath() → concrete cloud Path (S3/GCS/Azure) + * + * @author Edmund Miller + */ +@Slf4j +@CompileStatic +class DatasetResolver { + + static String towerEndpoint() { + return getEndpoint() + } + + static String towerAccessToken() { + return getAccessToken() + } + + /** + * Resolve a dataset name (and optional version) to the backing cloud storage Path. + * + * @param datasetName The dataset name as shown in Seqera Platform + * @param version The version number (null = latest) + * @return A concrete cloud storage Path (e.g. S3Path, GcsPath) + */ + static Path resolve(String datasetName, String version) { + if (!datasetName) + throw new IllegalArgumentException("Dataset name cannot be null or empty") + + final String endpoint = getEndpoint() + final String accessToken = getAccessToken() + final String workspaceId = getWorkspaceId() + + if (!accessToken) + throw new AbortOperationException("Missing Seqera Platform access token -- set TOWER_ACCESS_TOKEN or tower.accessToken in config") + + final HttpClient httpClient = HttpClient.newHttpClient() + + // Step 1: Resolve dataset name → dataset ID + final String datasetId = resolveDatasetId(httpClient, endpoint, accessToken, workspaceId, datasetName) + + // Step 2: Resolve dataset ID + version → cloud storage URL + final String cloudUrl = resolveCloudUrl(httpClient, endpoint, accessToken, workspaceId, datasetId, version) + + log.debug "Dataset '{}' resolved to cloud URL: {}", datasetName, cloudUrl + + // Step 3: Convert cloud URL → Path via Nextflow's FileHelper + return FileHelper.asPath(cloudUrl) + } + + /** + * Look up a dataset by name, return its ID. + */ + static private String resolveDatasetId(HttpClient httpClient, String endpoint, String accessToken, String workspaceId, String datasetName) { + String url = "${endpoint}/datasets" + if (workspaceId) { + url += "?workspaceId=${workspaceId}" + } + + log.debug "Listing datasets from: {}", url + + final Map json = httpGet(httpClient, url, accessToken) + final List datasets = json.datasets as List + + if (!datasets) { + throw new AbortOperationException("No datasets found in workspace") + } + + // Find dataset by name (case-sensitive match) + final Map dataset = datasets.find { Map it -> it.name == datasetName } + if (!dataset) { + final String available = datasets.collect { Map it -> it.name }.join(', ') + throw new AbortOperationException("Dataset '${datasetName}' not found. Available: ${available}") + } + + return dataset.id as String + } + + /** + * Look up the dataset version's backing cloud URL. + * If version is null, uses the latest version. + */ + static private String resolveCloudUrl(HttpClient httpClient, String endpoint, String accessToken, String workspaceId, String datasetId, String version) { + String url = "${endpoint}/datasets/${datasetId}/versions" + if (workspaceId) { + url += "?workspaceId=${workspaceId}" + } + + log.debug "Listing dataset versions from: {}", url + + final Map json = httpGet(httpClient, url, accessToken) + final List versions = json.versions as List + + if (!versions) { + throw new AbortOperationException("No versions found for dataset ID: ${datasetId}") + } + + Map targetVersion + if (version) { + // Find specific version + targetVersion = versions.find { Map it -> it.version?.toString() == version } + if (!targetVersion) { + throw new AbortOperationException("Version '${version}' not found for dataset ID: ${datasetId}") + } + } + else { + // Use the latest version (highest version number) + targetVersion = versions.max { Map it -> (it.version as Integer) ?: 0 } as Map + } + + final String cloudUrl = targetVersion.url as String + if (!cloudUrl) { + throw new AbortOperationException("Dataset version has no backing storage URL -- dataset ID: ${datasetId}, version: ${targetVersion.version}") + } + + return cloudUrl + } + + /** + * Execute a GET request against the Platform API and parse the JSON response. + */ + static private Map httpGet(HttpClient httpClient, String url, String accessToken) { + final request = HttpRequest.newBuilder() + .uri(URI.create(url)) + .header("Authorization", "Bearer ${accessToken}") + .GET() + .build() + + final HttpResponse response = httpClient.send(request, HttpResponse.BodyHandlers.ofString()) + + if (response.statusCode() < 200 || response.statusCode() >= 300) { + if (response.statusCode() == 401 || response.statusCode() == 403) { + throw new AbortOperationException("Access denied to Seqera Platform API -- check your access token") + } + throw new AbortOperationException("Seqera Platform API error -- HTTP ${response.statusCode()}: ${response.body()}") + } + + return new JsonSlurper().parseText(response.body()) as Map + } + + // -- config helpers -- + + static private String getEndpoint() { + final Map opts = getTowerOpts() + return PlatformHelper.getEndpoint(opts, System.getenv()) + } + + static private String getAccessToken() { + final Map opts = getTowerOpts() + return PlatformHelper.getAccessToken(opts, System.getenv()) + } + + static private String getWorkspaceId() { + final Map opts = getTowerOpts() + return PlatformHelper.getWorkspaceId(opts, System.getenv()) + } + + static private Map getTowerOpts() { + final session = Global.session as Session + if (!session) + throw new AbortOperationException("Nextflow session not initialized -- dataset:// URIs require an active session") + + return session.config?.navigate('tower') as Map ?: Collections.emptyMap() + } +} diff --git a/plugins/nf-tower/src/resources/META-INF/services/java.nio.file.spi.FileSystemProvider b/plugins/nf-tower/src/resources/META-INF/services/java.nio.file.spi.FileSystemProvider new file mode 100644 index 0000000000..b89f518e56 --- /dev/null +++ b/plugins/nf-tower/src/resources/META-INF/services/java.nio.file.spi.FileSystemProvider @@ -0,0 +1,17 @@ +# +# Copyright 2013-2024, Seqera Labs +# +# Licensed under the Apache License, Version 2.0 (the "License"); +# you may not use this file except in compliance with the License. +# You may obtain a copy of the License at +# +# http://www.apache.org/licenses/LICENSE-2.0 +# +# Unless required by applicable law or agreed to in writing, software +# distributed under the License is distributed on an "AS IS" BASIS, +# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +# See the License for the specific language governing permissions and +# limitations under the License. +# + +io.seqera.tower.plugin.dataset.DatasetFileSystemProvider diff --git a/plugins/nf-tower/src/test/io/seqera/tower/plugin/dataset/DatasetFileSystemProviderTest.groovy b/plugins/nf-tower/src/test/io/seqera/tower/plugin/dataset/DatasetFileSystemProviderTest.groovy new file mode 100644 index 0000000000..22d398fdc1 --- /dev/null +++ b/plugins/nf-tower/src/test/io/seqera/tower/plugin/dataset/DatasetFileSystemProviderTest.groovy @@ -0,0 +1,132 @@ +/* + * Copyright 2013-2024, Seqera Labs + * + * Licensed under the Apache License, Version 2.0 (the "License"); + * you may not use this file except in compliance with the License. + * You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + */ + +package io.seqera.tower.plugin.dataset + +import java.nio.file.AccessMode +import java.nio.file.FileSystemAlreadyExistsException +import java.nio.file.ReadOnlyFileSystemException + +import spock.lang.Unroll +import spock.lang.Specification + +/** + * @author Edmund Miller + */ +class DatasetFileSystemProviderTest extends Specification { + + def 'should return dataset scheme'() { + expect: + new DatasetFileSystemProvider().getScheme() == 'dataset' + } + + def 'should create filesystem'() { + given: + def provider = new DatasetFileSystemProvider() + + when: + def fs = provider.newFileSystem(new URI('dataset:///'), [:]) + + then: + fs instanceof DatasetFileSystem + fs.isOpen() + fs.isReadOnly() + } + + def 'should throw on duplicate filesystem creation'() { + given: + def provider = new DatasetFileSystemProvider() + provider.newFileSystem(new URI('dataset:///'), [:]) + + when: + provider.newFileSystem(new URI('dataset:///'), [:]) + + then: + thrown(FileSystemAlreadyExistsException) + } + + def 'should get path from URI'() { + given: + def provider = new DatasetFileSystemProvider() + + when: + def path = provider.getPath(new URI('dataset://my-data')) + + then: + path instanceof DatasetPath + (path as DatasetPath).datasetName == 'my-data' + } + + @Unroll + def 'should reject non-dataset URI #uriString'() { + given: + def provider = new DatasetFileSystemProvider() + + when: + provider.getPath(new URI(uriString)) + + then: + thrown(IllegalArgumentException) + + where: + uriString << ['s3://bucket/key', 'gs://bucket/key', 'file:///tmp/data.csv'] + } + + @Unroll + def 'should throw ReadOnlyFileSystemException on #operation'() { + given: + def provider = new DatasetFileSystemProvider() + + when: + invoke.call(provider) + + then: + thrown(ReadOnlyFileSystemException) + + where: + operation | invoke + 'createDirectory' | { DatasetFileSystemProvider p -> p.createDirectory(p.getPath(new URI('dataset://test'))) } + 'delete' | { DatasetFileSystemProvider p -> p.delete(p.getPath(new URI('dataset://test'))) } + 'copy' | { DatasetFileSystemProvider p -> p.copy(p.getPath(new URI('dataset://src')), p.getPath(new URI('dataset://dst'))) } + 'move' | { DatasetFileSystemProvider p -> p.move(p.getPath(new URI('dataset://src')), p.getPath(new URI('dataset://dst'))) } + 'write access check' | { DatasetFileSystemProvider p -> p.checkAccess(p.getPath(new URI('dataset://test')), AccessMode.WRITE) } + } + + def 'should not be hidden'() { + given: + def provider = new DatasetFileSystemProvider() + def path = provider.getPath(new URI('dataset://test')) + + expect: + !provider.isHidden(path) + } + + @Unroll + def 'should report isSameFile=#expected for #left vs #right'() { + given: + def provider = new DatasetFileSystemProvider() + def a = provider.getPath(new URI(left)) + def b = provider.getPath(new URI(right)) + + expect: + provider.isSameFile(a, b) == expected + + where: + left | right | expected + 'dataset://test' | 'dataset://test' | true + 'dataset://test1' | 'dataset://test2' | false + } +} diff --git a/plugins/nf-tower/src/test/io/seqera/tower/plugin/dataset/DatasetIntegrationTest.groovy b/plugins/nf-tower/src/test/io/seqera/tower/plugin/dataset/DatasetIntegrationTest.groovy new file mode 100644 index 0000000000..8c0434cbfc --- /dev/null +++ b/plugins/nf-tower/src/test/io/seqera/tower/plugin/dataset/DatasetIntegrationTest.groovy @@ -0,0 +1,212 @@ +/* + * Copyright 2013-2024, Seqera Labs + * + * Licensed under the Apache License, Version 2.0 (the "License"); + * you may not use this file except in compliance with the License. + * You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + */ + +package io.seqera.tower.plugin.dataset + +import java.nio.file.Files +import java.nio.file.LinkOption +import java.nio.file.attribute.BasicFileAttributes + +import com.github.tomakehurst.wiremock.WireMockServer +import spock.lang.TempDir +import spock.lang.Unroll + +import static com.github.tomakehurst.wiremock.client.WireMock.* + +/** + * Integration tests that exercise the full dataset:// flow: + * URI string → DatasetPathFactory → DatasetPath → DatasetResolver → resolved Path → I/O + * + * Uses WireMock for Platform API and local file:// paths as the resolved "cloud" storage. + * + * @author Edmund Miller + */ +class DatasetIntegrationTest extends DatasetWireMockSpec { + + @TempDir + File tempDir + + def 'should resolve dataset path and read file contents'() { + given: 'a local file simulating cloud storage' + def csvFile = makeFile('samples.csv', 'sample,fastq_1,fastq_2\nSAMPLE1,s1_R1.fq.gz,s1_R2.fq.gz\n') + + and: 'Platform API mocks' + mockSession(workspaceId: '100') + stubDatasets([[id: '42', name: 'my-samplesheet', mediaType: 'text/csv']], '100') + stubDatasetVersions('42', [[version: 1, url: csvFile.toURI().toString(), fileName: 'samples.csv']], '100') + + when: 'resolving the dataset path' + def path = DatasetResolver.resolve('my-samplesheet', null) + + then: 'resolved to the local file and readable' + path != null + Files.exists(path) + Files.readString(path).contains('SAMPLE1') + + and: 'correct API calls were made' + wireMock.verify(getRequestedFor(urlPathEqualTo('/datasets')) + .withHeader('Authorization', equalTo('Bearer test-token')) + .withQueryParam('workspaceId', equalTo('100'))) + wireMock.verify(getRequestedFor(urlPathEqualTo('/datasets/42/versions')) + .withQueryParam('workspaceId', equalTo('100'))) + } + + @Unroll + def 'should resolve #scenario'() { + given: + mockSession(workspaceId: '100') + stubDatasets([[id: '42', name: 'my-data']]) + + and: + def versions = versionRows.collect { row -> + def file = makeFile(row.fileName as String, row.content as String) + [version: row.version, url: file.toURI().toString()] + } + stubDatasetVersions('42', versions) + + when: + def path = DatasetResolver.resolve('my-data', requestedVersion) + + then: + Files.readString(path) == expectedContent + + where: + scenario | versionRows | requestedVersion | expectedContent + 'specific dataset version' | [[version: 1, fileName: 'v1.csv', content: 'version1'], [version: 2, fileName: 'v2.csv', content: 'version2']] | '1' | 'version1' + 'latest dataset version' | [[version: 1, fileName: 'v1.csv', content: 'old'], [version: 3, fileName: 'v3.csv', content: 'latest']] | null | 'latest' + } + + @Unroll + def 'should delegate #operation through provider'() { + given: + def dataFile = makeFile('data.csv', fileContent) + + and: + mockSession(workspaceId: '100') + stubDatasets([[id: '7', name: 'test-ds']]) + stubDatasetVersions('7', [[version: 1, url: dataFile.toURI().toString()]]) + + and: + def provider = new DatasetFileSystemProvider() + def path = provider.getPath(new URI('dataset://test-ds')) + + when: + def result = operationFn.call(provider, path) + + then: + assert assertFn.call(result) + + where: + operation | fileContent | operationFn | assertFn + 'newInputStream' | 'col1,col2\na,b\n' | { DatasetFileSystemProvider providerRef, dsPath -> providerRef.newInputStream(dsPath).text } | { value -> value == 'col1,col2\na,b\n' } + 'readAttributes' | 'hello' | { DatasetFileSystemProvider providerRef, dsPath -> providerRef.readAttributes(dsPath, BasicFileAttributes, new LinkOption[0]) } | { attrs -> attrs.size() == 5 && !attrs.isDirectory() && attrs.isRegularFile() } + } + + def 'should forward bearer auth when reading platform dataset download URLs'() { + given: + mockSession(workspaceId: '100') + stubDatasets([[id: '7', name: 'secure-ds']], '100') + + and: + def downloadPath = '/workspaces/100/datasets/7/v/1/n/data.csv' + def downloadUrl = "http://localhost:${wireMock.port()}${downloadPath}" + stubDatasetVersions('7', [[version: 1, url: downloadUrl, fileName: 'data.csv']], '100') + wireMock.stubFor(get(urlPathEqualTo(downloadPath)) + .withHeader('Authorization', equalTo('Bearer test-token')) + .willReturn(ok('secure,data\na,b\n'))) + + and: + def provider = new DatasetFileSystemProvider() + def path = provider.getPath(new URI('dataset://secure-ds')) + + when: + def content = provider.newInputStream(path).text + + then: + content == 'secure,data\na,b\n' + + and: + wireMock.verify(1, getRequestedFor(urlPathEqualTo(downloadPath)) + .withHeader('Authorization', equalTo('Bearer test-token'))) + } + + def 'should not forward bearer auth to non-platform hosts'() { + given: + def externalHost = new WireMockServer(0) + externalHost.start() + + and: + mockSession(workspaceId: '100') + stubDatasets([[id: '7', name: 'external-ds']], '100') + + and: + def downloadPath = '/workspaces/100/datasets/7/v/1/n/data.csv' + def downloadUrl = "http://localhost:${externalHost.port()}${downloadPath}" + stubDatasetVersions('7', [[version: 1, url: downloadUrl, fileName: 'data.csv']], '100') + externalHost.stubFor(get(urlPathEqualTo(downloadPath)) + .withHeader('Authorization', matching('.+')) + .atPriority(1) + .willReturn(unauthorized())) + externalHost.stubFor(get(urlPathEqualTo(downloadPath)) + .atPriority(10) + .willReturn(ok('public,data\nx,y\n'))) + + and: + def provider = new DatasetFileSystemProvider() + def path = provider.getPath(new URI('dataset://external-ds')) + + when: + def content = provider.newInputStream(path).text + + then: + content == 'public,data\nx,y\n' + + cleanup: + externalHost.stop() + } + + def 'should cache resolved path across multiple reads'() { + given: + def dataFile = makeFile('data.csv', 'cached') + + and: + mockSession(workspaceId: '100') + stubDatasets([[id: '7', name: 'test-ds']]) + stubDatasetVersions('7', [[version: 1, url: dataFile.toURI().toString()]]) + + and: + def provider = new DatasetFileSystemProvider() + def path = provider.getPath(new URI('dataset://test-ds')) + + when: 'reading twice' + def content1 = provider.newInputStream(path).text + def content2 = provider.newInputStream(path).text + + then: 'both reads succeed' + content1 == 'cached' + content2 == 'cached' + + and: 'API was only called once (path cached on DatasetPath)' + wireMock.verify(1, getRequestedFor(urlPathEqualTo('/datasets'))) + wireMock.verify(1, getRequestedFor(urlPathEqualTo('/datasets/7/versions'))) + } + + private File makeFile(String name, String content) { + def file = new File(tempDir, name) + file.text = content + return file + } +} diff --git a/plugins/nf-tower/src/test/io/seqera/tower/plugin/dataset/DatasetLiveAuthRegressionTest.groovy b/plugins/nf-tower/src/test/io/seqera/tower/plugin/dataset/DatasetLiveAuthRegressionTest.groovy new file mode 100644 index 0000000000..b2fef81d4d --- /dev/null +++ b/plugins/nf-tower/src/test/io/seqera/tower/plugin/dataset/DatasetLiveAuthRegressionTest.groovy @@ -0,0 +1,71 @@ +/* + * Copyright 2013-2024, Seqera Labs + * + * Licensed under the Apache License, Version 2.0 (the "License"); + * you may not use this file except in compliance with the License. + * You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + */ + +package io.seqera.tower.plugin.dataset + +import nextflow.Global +import nextflow.Session +import spock.lang.Requires +import spock.lang.Specification +import spock.lang.Unroll + +/** + * Live regression for dataset file reads backed by Seqera API URLs. + * + * Requires env vars: + * - TOWER_ACCESS_TOKEN + * - TOWER_WORKSPACE_ID + * - DATASET_LIVE_NAMES (comma-separated) + */ +@Requires({ env['TOWER_ACCESS_TOKEN'] && env['TOWER_WORKSPACE_ID'] && env['DATASET_LIVE_NAMES'] }) +class DatasetLiveAuthRegressionTest extends Specification { + + def cleanup() { + Global.session = null + } + + @Unroll + def 'should read live dataset via provider using bearer auth - #datasetName'() { + given: + Global.session = Mock(Session) { + getConfig() >> [tower: [ + endpoint: System.getenv('TOWER_ENDPOINT') ?: 'https://api.cloud.seqera.io', + accessToken: System.getenv('TOWER_ACCESS_TOKEN'), + workspaceId: System.getenv('TOWER_WORKSPACE_ID') + ]] + } + + and: + def provider = new DatasetFileSystemProvider() + def path = provider.getPath(new URI("dataset:///${datasetName}")) + + when: + def bytes = provider.newInputStream(path).readNBytes(64) + + then: + bytes.length > 0 + + where: + datasetName << datasetNames() + } + + private static List datasetNames() { + System.getenv('DATASET_LIVE_NAMES') + .split(',') + .collect { it.trim() } + .findAll { it } + } +} diff --git a/plugins/nf-tower/src/test/io/seqera/tower/plugin/dataset/DatasetPathFactoryTest.groovy b/plugins/nf-tower/src/test/io/seqera/tower/plugin/dataset/DatasetPathFactoryTest.groovy new file mode 100644 index 0000000000..85842ccb65 --- /dev/null +++ b/plugins/nf-tower/src/test/io/seqera/tower/plugin/dataset/DatasetPathFactoryTest.groovy @@ -0,0 +1,72 @@ +/* + * Copyright 2013-2024, Seqera Labs + * + * Licensed under the Apache License, Version 2.0 (the "License"); + * you may not use this file except in compliance with the License. + * You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + */ + +package io.seqera.tower.plugin.dataset + +import java.nio.file.Paths + +import spock.lang.Shared +import spock.lang.Specification +import spock.lang.Unroll + +/** + * @author Edmund Miller + */ +class DatasetPathFactoryTest extends Specification { + + @Shared + DatasetPathFactory factory = new DatasetPathFactory() + + @Shared + DatasetFileSystem datasetFs = new DatasetFileSystem(new DatasetFileSystemProvider(), null) + + @Unroll + def 'parseUri should return null for non-dataset URI #value'() { + expect: + factory.parseUri(value) == null + + where: + value << ['s3://bucket/key', '/local/path', 'gs://bucket/key'] + } + + @Unroll + def 'toUriString should return #expected for #scenario'() { + given: + def path = pathFactory.call() + + expect: + factory.toUriString(path) == expected + + where: + scenario | pathFactory | expected + 'DatasetPath input' | { new DatasetPath(datasetFs, 'my-data') } | 'dataset://my-data' + 'non-DatasetPath' | { Paths.get('/tmp/test') } | null + } + + @Unroll + def '#methodName should return null for dataset path'() { + given: + def path = new DatasetPath(datasetFs, 'my-data') + + expect: + invoke.call(factory, path) == null + + where: + methodName | invoke + 'getBashLib' | { DatasetPathFactory f, target -> f.getBashLib(target) } + 'getUploadCmd' | { DatasetPathFactory f, target -> f.getUploadCmd('/tmp/file', target) } + } +} diff --git a/plugins/nf-tower/src/test/io/seqera/tower/plugin/dataset/DatasetPathTest.groovy b/plugins/nf-tower/src/test/io/seqera/tower/plugin/dataset/DatasetPathTest.groovy new file mode 100644 index 0000000000..fd18706130 --- /dev/null +++ b/plugins/nf-tower/src/test/io/seqera/tower/plugin/dataset/DatasetPathTest.groovy @@ -0,0 +1,124 @@ +/* + * Copyright 2013-2024, Seqera Labs + * + * Licensed under the Apache License, Version 2.0 (the "License"); + * you may not use this file except in compliance with the License. + * You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + */ + +package io.seqera.tower.plugin.dataset + +import spock.lang.Shared +import spock.lang.Specification +import spock.lang.Unroll + +/** + * @author Edmund Miller + */ +class DatasetPathTest extends Specification { + + @Shared + DatasetFileSystem fileSystem = new DatasetFileSystem(new DatasetFileSystemProvider(), null) + + @Unroll + def 'should parse dataset URI #uriString'() { + given: + def path = new DatasetPath(fileSystem, new URI(uriString)) + + expect: + path.datasetName == expectedName + path.version == expectedVersion + path.toString() == expectedToString + + where: + uriString | expectedName | expectedVersion | expectedToString + 'dataset://my-samplesheet' | 'my-samplesheet' | null | 'dataset://my-samplesheet' + 'dataset:///my-samplesheet' | 'my-samplesheet' | null | 'dataset://my-samplesheet' + 'dataset://my-samplesheet?version=3' | 'my-samplesheet' | '3' | 'dataset://my-samplesheet?version=3' + } + + @Unroll + def 'should parse string path #rawPath'() { + given: + def path = new DatasetPath(fileSystem, rawPath) + + expect: + path.datasetName == expectedName + path.version == expectedVersion + + where: + rawPath | expectedName | expectedVersion + 'my-samplesheet' | 'my-samplesheet' | null + '/my-samplesheet' | 'my-samplesheet' | null + 'my-samplesheet@2' | 'my-samplesheet' | '2' + } + + def 'should expose basic path semantics'() { + given: + def path = new DatasetPath(fileSystem, 'test') + + expect: + path.isAbsolute() + path.getNameCount() == 1 + path.getName(0) == path + path.toUri() == new URI('dataset://test') + path.toAbsolutePath().is(path) + } + + @Unroll + def 'should throw for getName(#index)'() { + when: + new DatasetPath(fileSystem, 'test').getName(index) + + then: + thrown(IllegalArgumentException) + + where: + index << [-1, 1] + } + + @Unroll + def 'should compare equality for #left vs #right'() { + given: + def a = new DatasetPath(fileSystem, left) + def b = new DatasetPath(fileSystem, right) + + expect: + (a == b) == expectedEqual + + and: + if (expectedEqual) { + assert a.hashCode() == b.hashCode() + } + + where: + left | right | expectedEqual + 'data' | 'data' | true + 'data1' | 'data2' | false + 'data@1' | 'data@2' | false + } + + @Unroll + def 'should compare order for #left compared to #right'() { + given: + def a = new DatasetPath(fileSystem, left) + def b = new DatasetPath(fileSystem, right) + + expect: + Integer.signum(a.compareTo(b)) == expectedSign + + where: + left | right | expectedSign + 'alpha' | 'beta' | -1 + 'beta' | 'alpha' | 1 + 'alpha' | 'alpha@2' | -1 + } +} diff --git a/plugins/nf-tower/src/test/io/seqera/tower/plugin/dataset/DatasetResolverTest.groovy b/plugins/nf-tower/src/test/io/seqera/tower/plugin/dataset/DatasetResolverTest.groovy new file mode 100644 index 0000000000..f45e01a1d3 --- /dev/null +++ b/plugins/nf-tower/src/test/io/seqera/tower/plugin/dataset/DatasetResolverTest.groovy @@ -0,0 +1,124 @@ +/* + * Copyright 2013-2024, Seqera Labs + * + * Licensed under the Apache License, Version 2.0 (the "License"); + * you may not use this file except in compliance with the License. + * You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + */ + +package io.seqera.tower.plugin.dataset + +import nextflow.Global +import nextflow.exception.AbortOperationException +import spock.lang.Unroll + +import static com.github.tomakehurst.wiremock.client.WireMock.* + +/** + * @author Edmund Miller + */ +class DatasetResolverTest extends DatasetWireMockSpec { + + def 'should throw when no session'() { + given: + Global.session = null + + when: + DatasetResolver.resolve('my-data', null) + + then: + thrown(AbortOperationException) + } + + @Unroll + def 'should reject invalid dataset name: #datasetName'() { + when: + DatasetResolver.resolve(datasetName, null) + + then: + thrown(IllegalArgumentException) + + where: + datasetName << ['', null] + } + + @Unroll + def 'should fail dataset lookup when #scenario'() { + given: + mockSession() + stubLookup.call() + + when: + DatasetResolver.resolve('my-data', null) + + then: + def e = thrown(AbortOperationException) + messageParts.each { part -> assert e.message.contains(part) } + + where: + scenario | stubLookup | messageParts + 'dataset does not exist' | { stubDatasets([[id: '1', name: 'other-data']]) } | ['not found', 'other-data'] + 'workspace has no datasets' | { stubDatasets([]) } | ['No datasets found in workspace'] + 'api returns unauthorized' | { wireMock.stubFor(get(urlPathEqualTo('/datasets')).willReturn(unauthorized())) } | ['Access denied'] + } + + @Unroll + def 'should fail version lookup when #scenario'() { + given: + mockSession() + stubDatasets([[id: '42', name: 'my-data']]) + stubDatasetVersions('42', versions) + + when: + DatasetResolver.resolve('my-data', requestedVersion) + + then: + def e = thrown(AbortOperationException) + e.message.contains(expectedMessage) + + where: + scenario | versions | requestedVersion | expectedMessage + 'selected version has no backing URL' | [[version: 1, url: null]] | null | 'no backing storage URL' + 'requested version does not exist' | [[version: 1, url: 's3://bucket/v1.csv']] | '99' | "Version '99' not found" + } + + def 'should pass workspace ID as query param'() { + given: + mockSession(workspaceId: '12345') + stubDatasets([], '12345') + + when: + DatasetResolver.resolve('my-data', null) + + then: + thrown(AbortOperationException) + + and: + wireMock.verify(getRequestedFor(urlPathEqualTo('/datasets')) + .withQueryParam('workspaceId', equalTo('12345'))) + } + + def 'should send bearer token in Authorization header'() { + given: + mockSession() + stubDatasets([]) + + when: + DatasetResolver.resolve('my-data', null) + + then: + thrown(AbortOperationException) + + and: + wireMock.verify(getRequestedFor(urlPathEqualTo('/datasets')) + .withHeader('Authorization', equalTo('Bearer test-token'))) + } +} diff --git a/plugins/nf-tower/src/test/io/seqera/tower/plugin/dataset/DatasetWireMockSpec.groovy b/plugins/nf-tower/src/test/io/seqera/tower/plugin/dataset/DatasetWireMockSpec.groovy new file mode 100644 index 0000000000..7555ddf321 --- /dev/null +++ b/plugins/nf-tower/src/test/io/seqera/tower/plugin/dataset/DatasetWireMockSpec.groovy @@ -0,0 +1,76 @@ +/* + * Copyright 2013-2024, Seqera Labs + * + * Licensed under the Apache License, Version 2.0 (the "License"); + * you may not use this file except in compliance with the License. + * You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + */ + +package io.seqera.tower.plugin.dataset + +import groovy.json.JsonOutput +import com.github.tomakehurst.wiremock.WireMockServer +import com.github.tomakehurst.wiremock.client.WireMock +import nextflow.Global +import nextflow.Session +import spock.lang.AutoCleanup +import spock.lang.Shared +import spock.lang.Specification + +import static com.github.tomakehurst.wiremock.client.WireMock.* + +/** + * Shared WireMock/session fixture for dataset resolver specs. + */ +abstract class DatasetWireMockSpec extends Specification { + + @Shared + @AutoCleanup('stop') + WireMockServer wireMock = new WireMockServer(0) + + def setupSpec() { + wireMock.start() + WireMock.configureFor('localhost', wireMock.port()) + } + + def setup() { + wireMock.resetAll() + } + + def cleanup() { + Global.session = null + } + + protected void mockSession(Map towerOverrides = [:]) { + def endpoint = "http://localhost:${wireMock.port()}" + def towerConfig = [endpoint: endpoint, accessToken: 'test-token', workspaceId: '12345'] + towerOverrides + + Global.session = Mock(Session) { + getConfig() >> [tower: towerConfig] + } + } + + protected void stubDatasets(List datasets, String workspaceId = null) { + def request = get(urlPathEqualTo('/datasets')) + if (workspaceId) + request = request.withQueryParam('workspaceId', equalTo(workspaceId)) + + wireMock.stubFor(request.willReturn(okJson(JsonOutput.toJson([datasets: datasets])))) + } + + protected void stubDatasetVersions(String datasetId, List versions, String workspaceId = null) { + def request = get(urlPathEqualTo("/datasets/${datasetId}/versions")) + if (workspaceId) + request = request.withQueryParam('workspaceId', equalTo(workspaceId)) + + wireMock.stubFor(request.willReturn(okJson(JsonOutput.toJson([versions: versions])))) + } +}