diff --git a/plugins/nf-tower/build.gradle b/plugins/nf-tower/build.gradle index 484374f212..5ffe85f27a 100644 --- a/plugins/nf-tower/build.gradle +++ b/plugins/nf-tower/build.gradle @@ -26,7 +26,8 @@ nextflowPlugin { 'io.seqera.tower.plugin.TowerFactory', 'io.seqera.tower.plugin.TowerFusionToken', 'io.seqera.tower.plugin.auth.AuthCommandImpl', - 'io.seqera.tower.plugin.launch.LaunchCommandImpl' + 'io.seqera.tower.plugin.launch.LaunchCommandImpl', + 'io.seqera.tower.plugin.dataset.DatasetPathFactory', ] } diff --git a/plugins/nf-tower/src/main/io/seqera/tower/plugin/dataset/DatasetFileSystem.java b/plugins/nf-tower/src/main/io/seqera/tower/plugin/dataset/DatasetFileSystem.java new file mode 100644 index 0000000000..9a8f6f4ceb --- /dev/null +++ b/plugins/nf-tower/src/main/io/seqera/tower/plugin/dataset/DatasetFileSystem.java @@ -0,0 +1,123 @@ +/* + * Copyright 2013-2024, Seqera Labs + * + * Licensed under the Apache License, Version 2.0 (the "License"); + * you may not use this file except in compliance with the License. + * You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + */ + +package io.seqera.tower.plugin.dataset; + +import java.io.IOException; +import java.nio.file.FileStore; +import java.nio.file.FileSystem; +import java.nio.file.Path; +import java.nio.file.PathMatcher; +import java.nio.file.WatchService; +import java.nio.file.attribute.UserPrincipalLookupService; +import java.nio.file.spi.FileSystemProvider; +import java.util.Collections; +import java.util.Map; +import java.util.Set; + +/** + * Minimal read-only FileSystem for the {@code dataset://} scheme. + *
+ * Datasets are single files resolved via the Seqera Platform API,
+ * so most filesystem operations (roots, stores, path matching)
+ * are either trivial or unsupported.
+ *
+ * @author Edmund Miller
+ */
+public class DatasetFileSystem extends FileSystem {
+
+ static final String PATH_SEPARATOR = "/";
+
+ private final DatasetFileSystemProvider provider;
+ private final Map
+ * Resolves dataset URIs of the form {@code dataset://name} or
+ * {@code dataset://name?version=N} to their backing cloud storage
+ * path via the Seqera Platform API, then delegates all I/O
+ * operations to the resolved path's provider.
+ *
+ * Phase 1: read-only — write operations throw {@link ReadOnlyFileSystemException}.
+ *
+ * @author Edmund Miller
+ */
+public class DatasetFileSystemProvider extends FileSystemProvider {
+
+ private static final Logger log = LoggerFactory.getLogger(DatasetFileSystemProvider.class);
+ private static final Pattern PLATFORM_DATASET_PATH = Pattern.compile(".*?/workspaces/[^/]+/datasets/[^/]+/v/[^/]+/n/.+");
+
+ private volatile DatasetFileSystem fileSystem;
+
+ @Override
+ public String getScheme() {
+ return "dataset";
+ }
+
+ @Override
+ public FileSystem newFileSystem(URI uri, Map
+ * URI format: {@code dataset://name} or {@code dataset://name?version=N}
+ *
+ * The path lazily resolves to the backing cloud storage path
+ * (S3/GCS/Azure) via the Platform API on first I/O access.
+ *
+ * @author Edmund Miller
+ */
+public class DatasetPath implements Path {
+
+ private static final Logger log = LoggerFactory.getLogger(DatasetPath.class);
+
+ private final DatasetFileSystem fileSystem;
+ private final String datasetName;
+ private final String version; // null = latest
+ private final URI uri;
+
+ /** Cached resolved cloud path — populated lazily on first I/O */
+ private volatile Path resolvedPath;
+
+ /**
+ * Construct from a URI (e.g. from provider.getPath(URI))
+ */
+ DatasetPath(DatasetFileSystem fileSystem, URI uri) {
+ this.fileSystem = fileSystem;
+ this.uri = uri;
+ // dataset://my-samplesheet or dataset:///my-samplesheet
+ // host = dataset name, or if host is null, first path segment is the name
+ String name = uri.getHost();
+ if (name == null || name.isEmpty()) {
+ // handle dataset:///name form
+ String path = uri.getPath();
+ if (path != null && path.startsWith("/")) {
+ path = path.substring(1);
+ }
+ name = path;
+ }
+ this.datasetName = name;
+ // parse ?version=N from query string
+ this.version = parseVersion(uri.getQuery());
+ }
+
+ /**
+ * Construct from string path (e.g. from fileSystem.getPath())
+ */
+ DatasetPath(DatasetFileSystem fileSystem, String path) {
+ this.fileSystem = fileSystem;
+ // strip leading slash if present
+ if (path.startsWith("/")) {
+ path = path.substring(1);
+ }
+ // check for version suffix: name@version
+ int atIdx = path.indexOf('@');
+ if (atIdx > 0) {
+ this.datasetName = path.substring(0, atIdx);
+ this.version = path.substring(atIdx + 1);
+ }
+ else {
+ this.datasetName = path;
+ this.version = null;
+ }
+ this.uri = URI.create("dataset://" + datasetName + (version != null ? "?version=" + version : ""));
+ }
+
+ public String getDatasetName() {
+ return datasetName;
+ }
+
+ public String getVersion() {
+ return version;
+ }
+
+ /**
+ * Resolve this dataset reference to the backing cloud storage path.
+ * Lazily initialized and cached.
+ */
+ Path getResolvedPath() throws IOException {
+ if (resolvedPath == null) {
+ synchronized (this) {
+ if (resolvedPath == null) {
+ log.debug("Resolving dataset '{}' version={}", datasetName, version != null ? version : "latest");
+ resolvedPath = DatasetResolver.resolve(datasetName, version);
+ log.debug("Resolved dataset '{}' -> {}", datasetName, resolvedPath);
+ }
+ }
+ }
+ return resolvedPath;
+ }
+
+ // -- Path interface --
+
+ @Override
+ public FileSystem getFileSystem() {
+ return fileSystem;
+ }
+
+ @Override
+ public boolean isAbsolute() {
+ return true;
+ }
+
+ @Override
+ public Path getRoot() {
+ return null;
+ }
+
+ @Override
+ public Path getFileName() {
+ // The dataset name is the "file name"
+ return new DatasetPath(fileSystem, datasetName);
+ }
+
+ @Override
+ public Path getParent() {
+ return null;
+ }
+
+ @Override
+ public int getNameCount() {
+ return 1;
+ }
+
+ @Override
+ public Path getName(int index) {
+ if (index != 0) {
+ throw new IllegalArgumentException("Invalid name index: " + index);
+ }
+ return this;
+ }
+
+ @Override
+ public Path subpath(int beginIndex, int endIndex) {
+ if (beginIndex != 0 || endIndex != 1) {
+ throw new IllegalArgumentException("Invalid subpath range");
+ }
+ return this;
+ }
+
+ @Override
+ public boolean startsWith(Path other) {
+ return equals(other);
+ }
+
+ @Override
+ public boolean endsWith(Path other) {
+ return equals(other);
+ }
+
+ @Override
+ public Path normalize() {
+ return this;
+ }
+
+ @Override
+ public Path resolve(Path other) {
+ // dataset paths are leaf nodes, cannot resolve children
+ throw new UnsupportedOperationException("Cannot resolve against a dataset path");
+ }
+
+ @Override
+ public Path relativize(Path other) {
+ throw new UnsupportedOperationException("Cannot relativize dataset paths");
+ }
+
+ @Override
+ public URI toUri() {
+ return uri;
+ }
+
+ @Override
+ public Path toAbsolutePath() {
+ return this;
+ }
+
+ @Override
+ public Path toRealPath(LinkOption... options) throws IOException {
+ // Return the resolved cloud path as the "real" path
+ return getResolvedPath();
+ }
+
+ @Override
+ public WatchKey register(WatchService watcher, WatchEvent.Kind>[] events, WatchEvent.Modifier... modifiers) throws IOException {
+ throw new UnsupportedOperationException();
+ }
+
+ @Override
+ public int compareTo(Path other) {
+ if (other instanceof DatasetPath) {
+ DatasetPath o = (DatasetPath) other;
+ int cmp = datasetName.compareTo(o.datasetName);
+ if (cmp != 0) return cmp;
+ if (version == null && o.version == null) return 0;
+ if (version == null) return -1;
+ if (o.version == null) return 1;
+ return version.compareTo(o.version);
+ }
+ return toString().compareTo(other.toString());
+ }
+
+ @Override
+ public Iterator
+ * Registered as a Nextflow {@code ExtensionPoint} so that
+ * {@code FileHelper.asPath("dataset://my-samplesheet")} works
+ * transparently — no pipeline code changes needed.
+ *
+ * @author Edmund Miller
+ */
+@Slf4j
+@CompileStatic
+class DatasetPathFactory extends FileSystemPathFactory {
+
+ @Override
+ protected Path parseUri(String str) {
+ if (!str.startsWith('dataset://'))
+ return null
+
+ log.debug "Parsing dataset URI: {}", str
+
+ // Normalise to triple-slash form for URI parsing:
+ // dataset://name → dataset:///name
+ final normalized = str.startsWith('dataset:///') ? str : 'dataset:///' + str.substring('dataset://'.length())
+
+ final uri = new URI(null, null, normalized, null, null)
+ return FileHelper.getOrCreateFileSystemFor(uri).provider().getPath(uri)
+ }
+
+ @Override
+ protected String toUriString(Path path) {
+ if (path instanceof DatasetPath) {
+ return path.toString()
+ }
+ return null
+ }
+
+ @Override
+ protected String getBashLib(Path target) {
+ // dataset:// paths are resolved to cloud paths before execution,
+ // no special bash lib needed
+ return null
+ }
+
+ @Override
+ protected String getUploadCmd(String source, Path target) {
+ // read-only — no upload support
+ return null
+ }
+}
diff --git a/plugins/nf-tower/src/main/io/seqera/tower/plugin/dataset/DatasetResolver.groovy b/plugins/nf-tower/src/main/io/seqera/tower/plugin/dataset/DatasetResolver.groovy
new file mode 100644
index 0000000000..f77b8c9d9a
--- /dev/null
+++ b/plugins/nf-tower/src/main/io/seqera/tower/plugin/dataset/DatasetResolver.groovy
@@ -0,0 +1,201 @@
+/*
+ * Copyright 2013-2024, Seqera Labs
+ *
+ * Licensed under the Apache License, Version 2.0 (the "License");
+ * you may not use this file except in compliance with the License.
+ * You may obtain a copy of the License at
+ *
+ * http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+
+package io.seqera.tower.plugin.dataset
+
+import java.net.http.HttpClient
+import java.net.http.HttpRequest
+import java.net.http.HttpResponse
+import java.nio.file.Path
+
+import groovy.json.JsonSlurper
+import groovy.transform.CompileStatic
+import groovy.util.logging.Slf4j
+import nextflow.Global
+import nextflow.Session
+import nextflow.exception.AbortOperationException
+import nextflow.file.FileHelper
+import nextflow.platform.PlatformHelper
+
+/**
+ * Resolves a Seqera Platform dataset reference to its backing cloud storage path.
+ *
+ * Resolution chain:
+ * 1. Dataset name → GET /datasets?workspaceId=X → DatasetDto.id
+ * 2. Dataset id + version → GET /datasets/{id}/versions → DatasetVersionDto.url
+ * 3. Cloud URL string → FileHelper.asPath() → concrete cloud Path (S3/GCS/Azure)
+ *
+ * @author Edmund Miller
+ */
+@Slf4j
+@CompileStatic
+class DatasetResolver {
+
+ static String towerEndpoint() {
+ return getEndpoint()
+ }
+
+ static String towerAccessToken() {
+ return getAccessToken()
+ }
+
+ /**
+ * Resolve a dataset name (and optional version) to the backing cloud storage Path.
+ *
+ * @param datasetName The dataset name as shown in Seqera Platform
+ * @param version The version number (null = latest)
+ * @return A concrete cloud storage Path (e.g. S3Path, GcsPath)
+ */
+ static Path resolve(String datasetName, String version) {
+ if (!datasetName)
+ throw new IllegalArgumentException("Dataset name cannot be null or empty")
+
+ final String endpoint = getEndpoint()
+ final String accessToken = getAccessToken()
+ final String workspaceId = getWorkspaceId()
+
+ if (!accessToken)
+ throw new AbortOperationException("Missing Seqera Platform access token -- set TOWER_ACCESS_TOKEN or tower.accessToken in config")
+
+ final HttpClient httpClient = HttpClient.newHttpClient()
+
+ // Step 1: Resolve dataset name → dataset ID
+ final String datasetId = resolveDatasetId(httpClient, endpoint, accessToken, workspaceId, datasetName)
+
+ // Step 2: Resolve dataset ID + version → cloud storage URL
+ final String cloudUrl = resolveCloudUrl(httpClient, endpoint, accessToken, workspaceId, datasetId, version)
+
+ log.debug "Dataset '{}' resolved to cloud URL: {}", datasetName, cloudUrl
+
+ // Step 3: Convert cloud URL → Path via Nextflow's FileHelper
+ return FileHelper.asPath(cloudUrl)
+ }
+
+ /**
+ * Look up a dataset by name, return its ID.
+ */
+ static private String resolveDatasetId(HttpClient httpClient, String endpoint, String accessToken, String workspaceId, String datasetName) {
+ String url = "${endpoint}/datasets"
+ if (workspaceId) {
+ url += "?workspaceId=${workspaceId}"
+ }
+
+ log.debug "Listing datasets from: {}", url
+
+ final Map json = httpGet(httpClient, url, accessToken)
+ final List