From 2f268fb1d7083db85bf48df84b4c4c5cf31ad799 Mon Sep 17 00:00:00 2001 From: Tigran Mkrtchyan Date: Tue, 8 Nov 2022 10:19:09 +0100 Subject: [PATCH] pool: drop RDB based CEPH support Motivation: Turned out that CEHP support is not that popular. Moreover, sites that have tried to run dcache+ceph decided to go for CephFS (mounted as regular FS) provided better performance, scalability and erasure encoding. Modification: Drop RBD based CEPH file channel, corresponding documentation and properties. Result: less code Acked-by: Marina Sahakyan Target: master Require-book: yes Require-notes: yes --- .../src/main/markdown/cookbook-pool.md | 47 --- .../markdown/cookbook-writing-hsm-plugins.md | 10 - docs/TheBook/src/main/markdown/cookbook.md | 1 - docs/TheBook/src/main/markdown/preface.md | 3 - modules/dcache/pom.xml | 4 - .../pool/repository/ceph/CephFileStore.java | 343 ------------------ .../ceph/CephRepositoryChannel.java | 192 ---------- .../org/dcache/pool/classic/pool.xml | 10 - pom.xml | 5 - skel/share/defaults/pool.properties | 14 +- 10 files changed, 5 insertions(+), 624 deletions(-) delete mode 100644 modules/dcache/src/main/java/org/dcache/pool/repository/ceph/CephFileStore.java delete mode 100644 modules/dcache/src/main/java/org/dcache/pool/repository/ceph/CephRepositoryChannel.java diff --git a/docs/TheBook/src/main/markdown/cookbook-pool.md b/docs/TheBook/src/main/markdown/cookbook-pool.md index c674133dd23..e6cc4027c79 100644 --- a/docs/TheBook/src/main/markdown/cookbook-pool.md +++ b/docs/TheBook/src/main/markdown/cookbook-pool.md @@ -457,53 +457,6 @@ This command does: 3. All new copies of the file will become `sticky`. -## Running pools with CEPH backends - -dCache pools can be configured to store files on locally mounted file systems or use CEPH as a back-end. The property `pool.backend` is used to control which back-end should be used: - -```ini -pool.backend = ceph -``` - -dCache uses CEPH's block devices interface, know as `RBD`, to store data. The dCache pools map one-to-one onto CEPH pools. The CEPH pool must be manually created and, if required, configured before dCache can use it. - -```console-root -rados mkpool -``` - -By default, the CEPH pool name is expected to match the dCache pool name. This can be changed by using - -```ini -pool.backend.ceph.pool-name = ceph-pool-to-use -``` - -dCache uses a locally configured ceph client to operate. The location to client configuration files is controlled by `pool.backend.ceph.config` property and defaults to _/etc/ceph/ceph.conf_. - -For authentication, the property `pool.backend.ceph.cluster` is used to set the cluser name to use, and for a cluster name of "CLNAME", the corresponding file `/etc/ceph/ceph.client.CLNAME.keyring` is used as the key ring. - -In order to support HSM with CEPH-backended pools, the HSM script interface provides URI-like syntax to pass file locations to the HSM script: - - rbd:/// - -for instance: - - rbd://dcache-pool-A/00000051ADCB3BA14799844556CD3AF0A9DF - -The HSM script is responsible to read, write and delete RBD image on GET, PUT and DELETE. - -In order to improve the performance of the backend, tests point to the RBD caching configuration (in `/etc/ceph/ceph.conf`) as the most promisting starting point: - - # Start out in write-through mode, and switch to write-back after the - # first flush request is received. Enabling this is a conservative but - # safe setting in case VMs running on rbd are too old to send flushes, - # like the virtio driver in Linux before 2.6.32. - # Type: Boolean - # Required: No - # (Default: true) - ;rbd cache writethrough until flush = true - rbd cache writethrough until flush = false - - ## Keeping metadata on MongoDB In order to speed up database operations for metadata, dCache pools (starting from version 3.2) can store their metadata on an external MongoDB instance. For production scenarios, a dedicated, performance-optimized and well-maintained MongoDB cluster is required. diff --git a/docs/TheBook/src/main/markdown/cookbook-writing-hsm-plugins.md b/docs/TheBook/src/main/markdown/cookbook-writing-hsm-plugins.md index e190b61402a..a5aa55b6e8b 100644 --- a/docs/TheBook/src/main/markdown/cookbook-writing-hsm-plugins.md +++ b/docs/TheBook/src/main/markdown/cookbook-writing-hsm-plugins.md @@ -44,16 +44,6 @@ contain replicas. It is the responsibility of the nearline storage driver to copy replicas from the pool to the nearline storage and to copy replicas back from the nearline storage into the pool. -Traditionally, dCache pools have stored replicas in the local file system, and a -flush or stage request would refer to the replica in the local file system by -path. Starting with dCache 3.0, *replica store plugins* may provide alternative -backends such as CEPH and thus replicas in a pool may not be accessible through -the local file system. For this reason, dCache 3.0 and newer identify the -replica in the pool by URI. Unless an alternative replica store is used, such a -URI will always use the `file:` scheme. If compatibility with versions earlier -than 2.17 is desired, a driver should refrain from using the `getReplicaUri` -method. - Once flushed to a nearline storage, the replica is identified by a URI generated by the driver. This URI should use the nearline storage type (typically `enstore` or `osm`) as the scheme and the nearline storage instance name as the diff --git a/docs/TheBook/src/main/markdown/cookbook.md b/docs/TheBook/src/main/markdown/cookbook.md index 332c9e42725..46f3c671ce9 100644 --- a/docs/TheBook/src/main/markdown/cookbook.md +++ b/docs/TheBook/src/main/markdown/cookbook.md @@ -44,7 +44,6 @@ This part contains guides for specific tasks a system administrator might want t - [Examples](cookbook-pool.md#examples) - [Renaming a Pool](cookbook-pool.md#renaming-a-pool) - [Pinning Files to a Pool](cookbook-pool.md#pinning-files-to-a-pool) - - [Running pool with CEPH backend](cookbook-pool.md#running-pools-with-ceph-backends) - [Keeping metadata on MongoDB](cookbook-pool.md#keeping-metadata-on-mongodb) - [Handling orphan movers](cookbook-pool.md#handling-orphan-movers) diff --git a/docs/TheBook/src/main/markdown/preface.md b/docs/TheBook/src/main/markdown/preface.md index 183b58d8256..3158d1a1a95 100644 --- a/docs/TheBook/src/main/markdown/preface.md +++ b/docs/TheBook/src/main/markdown/preface.md @@ -23,9 +23,6 @@ A dCache instance will generally consist of many storage (or "pool") nodes. On those nodes, normal Linux filesystems (btrfs, ext4, XFS, ZFS) are used to store data. -Alternatively, dCache pools can use storage space provided by a Ceph object -storage system. - In addition to those possibilities, dCache can use its hierarchical storage management capabilities to transparently use storage systems with different characteristics (like tape libraries for lower-cost, but higher-latency storage). Built-in mechanisms can be used to increase performance and balance loads, diff --git a/modules/dcache/pom.xml b/modules/dcache/pom.xml index 1c1fc4d4d7a..0446dcd7b7f 100644 --- a/modules/dcache/pom.xml +++ b/modules/dcache/pom.xml @@ -301,10 +301,6 @@ jimfs test - - org.dcache - rados4j - diff --git a/modules/dcache/src/main/java/org/dcache/pool/repository/ceph/CephFileStore.java b/modules/dcache/src/main/java/org/dcache/pool/repository/ceph/CephFileStore.java deleted file mode 100644 index 54414a94486..00000000000 --- a/modules/dcache/src/main/java/org/dcache/pool/repository/ceph/CephFileStore.java +++ /dev/null @@ -1,343 +0,0 @@ -/* - * dCache - http://www.dcache.org/ - * - * Copyright (C) 2016 - 2017 Deutsches Elektronen-Synchrotron - * - * This program is free software: you can redistribute it and/or modify - * it under the terms of the GNU Affero General Public License as - * published by the Free Software Foundation, either version 3 of the - * License, or (at your option) any later version. - * - * This program is distributed in the hope that it will be useful, - * but WITHOUT ANY WARRANTY; without even the implied warranty of - * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the - * GNU Affero General Public License for more details. - * - * You should have received a copy of the GNU Affero General Public License - * along with this program. If not, see . - */ -package org.dcache.pool.repository.ceph; - -import static org.dcache.util.ByteUnit.KiB; - -import com.google.common.primitives.Longs; -import diskCacheV111.util.PnfsId; -import java.io.IOException; -import java.net.URI; -import java.net.URISyntaxException; -import java.nio.file.NoSuchFileException; -import java.nio.file.OpenOption; -import java.nio.file.attribute.BasicFileAttributeView; -import java.nio.file.attribute.BasicFileAttributes; -import java.nio.file.attribute.FileTime; -import java.util.Set; -import java.util.stream.Collectors; -import jnr.constants.platform.Errno; -import org.dcache.pool.repository.FileStore; -import org.dcache.pool.repository.RepositoryChannel; -import org.dcache.rados4j.IoCtx; -import org.dcache.rados4j.Rados; -import org.dcache.rados4j.RadosClusterInfo; -import org.dcache.rados4j.RadosException; -import org.dcache.rados4j.Rbd; -import org.dcache.rados4j.RbdImage; -import org.dcache.rados4j.RbdImageInfo; -import org.slf4j.Logger; -import org.slf4j.LoggerFactory; - -/** - * A CEPH based implementation of {@link FileStore}. - */ -public class CephFileStore implements FileStore { - - protected static final Logger LOGGER = LoggerFactory.getLogger(CephFileStore.class); - - /** - * RADOS objects extended attribute name to store creation time. - */ - private final static String CREATION_TIME_ATTR = "creation_time"; - - /** - * RADOS objects extended attribute name to store last access time. - */ - private final static String LAST_ACCESS_TIME_ATTR = "last_access_time"; - - /** - * RADOS objects extended attribute name to store last modification time. - */ - private final static String LAST_MODIFICATION_TIME_ATTR = "last_modification_time"; - - private final Rados rados; - private final IoCtx ctx; - private final Rbd rbd; - private final String poolName; - - public CephFileStore(String poolName, String cluster, String config) throws RadosException { - - rados = new Rados(cluster, config); - rados.connect(); - - ctx = rados.createIoContext(poolName); - rbd = ctx.createRbd(); - this.poolName = poolName; - } - - @Override - public URI get(PnfsId id) { - return toUri(toImageName(id)); - } - - @Override - public boolean contains(PnfsId id) { - try { - RbdImage image = rbd.openReadOnly(toImageName(id)); - image.close(); - return true; - } catch (RadosException e) { - return false; - } - } - - @Override - public BasicFileAttributeView getFileAttributeView(PnfsId id) throws IOException { - String imageName = toImageName(id); - try (RbdImage image = rbd.openReadOnly(imageName)) { - - final RbdImageInfo imageInfo = image.stat(); - - return new BasicFileAttributeView() { - @Override - public String name() { - return "basic"; - } - - @Override - public BasicFileAttributes readAttributes() throws IOException { - return new BasicFileAttributes() { - - private FileTime getTimeFromXattr(String image, String attr) { - long time; - try { - byte[] b = new byte[Long.BYTES]; - ctx.getXattr(toObjName(image), attr, b); - time = Longs.fromByteArray(b); - } catch (RadosException e) { - time = 0; - } - return FileTime.fromMillis(time); - } - - @Override - public FileTime lastModifiedTime() { - return getTimeFromXattr(imageName, LAST_MODIFICATION_TIME_ATTR); - } - - @Override - public FileTime lastAccessTime() { - return getTimeFromXattr(imageName, LAST_ACCESS_TIME_ATTR); - } - - @Override - public FileTime creationTime() { - return getTimeFromXattr(imageName, CREATION_TIME_ATTR); - } - - @Override - public boolean isRegularFile() { - return true; - } - - @Override - public boolean isDirectory() { - return false; - } - - @Override - public boolean isSymbolicLink() { - return false; - } - - @Override - public boolean isOther() { - return false; - } - - @Override - public long size() { - return imageInfo.obj_size.longValue(); - } - - @Override - public Object fileKey() { - return null; - } - }; - } - - private void setTimeToXattr(String image, String attr, FileTime time) - throws RadosException { - ctx.setXattr(toObjName(image), - attr, - Longs.toByteArray(time.toMillis())); - } - - @Override - public void setTimes(FileTime lastModifiedTime, FileTime lastAccessTime, - FileTime createTime) throws IOException { - - if (lastModifiedTime != null) { - setTimeToXattr(imageName, LAST_MODIFICATION_TIME_ATTR, lastModifiedTime); - } - - if (lastAccessTime != null) { - setTimeToXattr(imageName, LAST_ACCESS_TIME_ATTR, lastAccessTime); - } - - if (createTime != null) { - setTimeToXattr(imageName, CREATION_TIME_ATTR, createTime); - } - - } - }; - } catch (RadosException e) { - throwIfMappable(e, "Failed to get file's attribute: " + imageName); - throw e; - } - } - - @Override - public URI create(PnfsId id) throws IOException { - String imageName = toImageName(id); - try { - rbd.create(imageName, 0); - ctx.setXattr(toObjName(imageName), - CREATION_TIME_ATTR, - Longs.toByteArray(System.currentTimeMillis())); - } catch (RadosException e) { - throwIfMappable(e, "Failed to create file: " + imageName); - throw e; - } - return toUri(imageName); - } - - @Override - public void remove(PnfsId id) throws IOException { - String imageName = toImageName(id); - try { - rbd.remove(imageName); - } catch (RadosException e) { - - // ignore file-not-found error code (negative number). - if (Errno.valueOf(Math.abs(e.getErrorCode())) == Errno.ENOENT) { - return; - } - - throwIfMappable(e, "Failed to remove file: " + imageName); - throw e; - } - } - - @Override - public RepositoryChannel openDataChannel(PnfsId id, Set ioMode) - throws IOException { - String imageName = toImageName(id); - try { - return new CephRepositoryChannel(rbd, imageName, ioMode); - } catch (RadosException e) { - throwIfMappable(e, "Failed to open file: " + imageName); - throw e; - } - } - - @Override - public Set index() throws IOException { - try { - return rbd.list() - .stream() - .map(this::toPnfsId) - .collect(Collectors.toSet()); - } catch (RadosException e) { - throwIfMappable(e, "Failed to get list of images"); - throw e; - } - } - - @Override - public long getFreeSpace() throws IOException { - try { - RadosClusterInfo clusterInfo = rados.statCluster(); - return KiB.toBytes(clusterInfo.kb_avail.get()); - } catch (RadosException e) { - throwIfMappable(e, "Failed to get cluster info"); - throw e; - } - } - - @Override - public long getTotalSpace() throws IOException { - try { - RadosClusterInfo clusterInfo = rados.statCluster(); - return KiB.toBytes(clusterInfo.kb.get()); - } catch (RadosException e) { - throwIfMappable(e, "Failed to get cluster info"); - throw e; - } - } - - @Override - public boolean isOk() { - try { - rados.statPool(ctx); - } catch (RadosException e) { - LOGGER.error("Repository health check failed: {}", e.toString()); - return false; - } - return true; - } - - private String toImageName(PnfsId id) { - return id.toString(); - } - - private PnfsId toPnfsId(String s) { - return new PnfsId(s); - } - - /** - * Returns object name corresponding to specified RBD image. - * - * @param image name. - */ - private String toObjName(String img) { - return img + ".rbd"; - } - - private URI toUri(String imageName) { - try { - return new URI("rbd", poolName, "/" + imageName, null, null); - } catch (URISyntaxException e) { - // we should never get here - throw new RuntimeException("Failed to build URI", e); - } - } - - public void shutdown() throws RadosException { - try { - ctx.destroy(); - } finally { - rados.shutdown(); - } - } - - private void throwIfMappable(RadosException e, String msg) throws IOException { - // try to map CEPH errors to dCache/java.io alternatives - // the errcodes are negative numbers :) - - Errno err = Errno.valueOf(Math.abs(e.getErrorCode())); - - switch (err) { - case ENOENT: - throw new NoSuchFileException(msg + " : " + e.getMessage()); - } - } -} diff --git a/modules/dcache/src/main/java/org/dcache/pool/repository/ceph/CephRepositoryChannel.java b/modules/dcache/src/main/java/org/dcache/pool/repository/ceph/CephRepositoryChannel.java deleted file mode 100644 index 2afa7e79d42..00000000000 --- a/modules/dcache/src/main/java/org/dcache/pool/repository/ceph/CephRepositoryChannel.java +++ /dev/null @@ -1,192 +0,0 @@ -/* - * dCache - http://www.dcache.org/ - * - * Copyright (C) 2016 Deutsches Elektronen-Synchrotron - * - * This program is free software: you can redistribute it and/or modify - * it under the terms of the GNU Affero General Public License as - * published by the Free Software Foundation, either version 3 of the - * License, or (at your option) any later version. - * - * This program is distributed in the hope that it will be useful, - * but WITHOUT ANY WARRANTY; without even the implied warranty of - * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the - * GNU Affero General Public License for more details. - * - * You should have received a copy of the GNU Affero General Public License - * along with this program. If not, see . - */ -package org.dcache.pool.repository.ceph; - -import java.io.IOException; -import java.io.SyncFailedException; -import java.nio.ByteBuffer; -import java.nio.channels.ReadableByteChannel; -import java.nio.channels.WritableByteChannel; -import java.nio.file.OpenOption; -import java.nio.file.StandardOpenOption; -import java.util.Set; -import org.dcache.pool.repository.RepositoryChannel; -import org.dcache.rados4j.RadosException; -import org.dcache.rados4j.Rbd; -import org.dcache.rados4j.RbdImage; - -/** - * CEPH back-ended implementation of {@link RepositoryChannel}. - *

- * Uses CEPH's block device image interface to store the data. - */ -public class CephRepositoryChannel implements RepositoryChannel { - - private RbdImage rbdImage; - private final boolean rdOnly; - private long size; - private long offset = 0; - - public CephRepositoryChannel(Rbd rbd, String name, Set mode) - throws RadosException { - if (mode.contains(StandardOpenOption.WRITE)) { - // REVISIT: we do not create image here as it already created by CephFileStore. - //rbd.create(name, 0); - rbdImage = rbd.open(name); - rdOnly = false; - size = 0; - } else if (mode.contains(StandardOpenOption.READ)) { - rbdImage = rbd.openReadOnly(name); - rdOnly = true; - size = rbdImage.stat().obj_size.get(); - } else { - throw new IllegalArgumentException("Illegal mode: " + mode); - } - } - - @Override - public synchronized long position() throws IOException { - return offset; - } - - @Override - public synchronized RepositoryChannel position(long position) throws IOException { - - if (rdOnly) { - offset = Math.min(size, position); - } else { - offset = position; - if (offset > size) { - this.resize(size); - } - } - return this; - } - - @Override - public synchronized long size() throws IOException { - return size; - } - - @Override - public synchronized int write(ByteBuffer src, long position) throws IOException { - - if (position + src.remaining() > size) { - this.resize(position + src.remaining()); - } - - return rbdImage.write(src, position); - } - - @Override - public int read(ByteBuffer dst, long position) throws IOException { - - // CEPH can't read beyond image size - if (position >= size) { - return -1; - } - - return rbdImage.read(dst, position); - } - - private void resize(long size) throws RadosException { - rbdImage.resize(size); - this.size = size; - } - - @Override - public synchronized RepositoryChannel truncate(long size) throws IOException { - - if (this.size < size) { - this.resize(size); - } - return this; - } - - @Override - public void sync() throws SyncFailedException, IOException { - // NOP - } - - @Override - public long transferTo(long position, long count, WritableByteChannel target) - throws IOException { - throw new UnsupportedOperationException("Not supported yet."); - } - - @Override - public long transferFrom(ReadableByteChannel src, long position, long count) - throws IOException { - throw new UnsupportedOperationException("Not supported yet."); - } - - @Override - public long write(ByteBuffer[] srcs, int offset, int length) throws IOException { - throw new UnsupportedOperationException("Not supported yet."); - } - - @Override - public long write(ByteBuffer[] srcs) throws IOException { - throw new UnsupportedOperationException("Not supported yet."); - } - - @Override - public synchronized int write(ByteBuffer src) throws IOException { - int n = this.write(src, offset); - offset += n; - return n; - } - - @Override - public synchronized boolean isOpen() { - return rbdImage != null; - } - - @Override - public synchronized void close() throws IOException { - if (rbdImage != null) { - rbdImage.close(); - rbdImage = null; - } - } - - @Override - public long read(ByteBuffer[] dsts, int offset, int length) throws IOException { - throw new UnsupportedOperationException( - "Not supported yet."); //To change body of generated methods, choose Tools | Templates. - } - - @Override - public long read(ByteBuffer[] dsts) throws IOException { - throw new UnsupportedOperationException( - "Not supported yet."); //To change body of generated methods, choose Tools | Templates. - } - - @Override - public synchronized int read(ByteBuffer dst) throws IOException { - - int n = this.read(dst, offset); - if (n < 0) { - return n; - } - offset += n; - return n; - } - -} diff --git a/modules/dcache/src/main/resources/org/dcache/pool/classic/pool.xml b/modules/dcache/src/main/resources/org/dcache/pool/classic/pool.xml index 43c8713f134..d4cc32daf11 100644 --- a/modules/dcache/src/main/resources/org/dcache/pool/classic/pool.xml +++ b/modules/dcache/src/main/resources/org/dcache/pool/classic/pool.xml @@ -642,16 +642,6 @@ - - - Ceph-based back-end store for pool - - - - - - Store for pool files diff --git a/pom.xml b/pom.xml index b35dda4a6b0..8168ec9c197 100644 --- a/pom.xml +++ b/pom.xml @@ -814,11 +814,6 @@ junit 4.13.1 - - org.dcache - rados4j - 0.0.4 - org.dcache ldap4testing diff --git a/skel/share/defaults/pool.properties b/skel/share/defaults/pool.properties index 1ade13288d0..f13b11dab45 100644 --- a/skel/share/defaults/pool.properties +++ b/skel/share/defaults/pool.properties @@ -762,19 +762,15 @@ pool.info-request-handler.threads=4 # Pool's file store backend type. # -# currently supported types: CEPH or a POSIX file system -(one-of?ceph|posix)pool.backend = posix +# currently supported types: POSIX file system +(immutable)pool.backend = posix # -------- CEPH backend for pools ---- # # CEPH cluster name -pool.backend.ceph.cluster = admin - -# path to ceph config file -pool.backend.ceph.config = /etc/ceph/ceph.conf - -# ceph pool name -pool.backend.ceph.pool-name = ${pool.name} +(obsolete)pool.backend.ceph.cluster = CEPH storage is not supported +(obsolete)pool.backend.ceph.config = CEPH storage is not supported +(obsolete)pool.backend.ceph.pool-name = CEPH storage is not supported # # Document which TCP ports are opened