Skip to content

Commit

Permalink
Implement offloaded timeline deletion
Browse files Browse the repository at this point in the history
  • Loading branch information
arpad-m committed Sep 30, 2024
1 parent 1b4212d commit 21139c8
Show file tree
Hide file tree
Showing 4 changed files with 120 additions and 43 deletions.
54 changes: 48 additions & 6 deletions pageserver/src/tenant.rs
Original file line number Diff line number Diff line change
Expand Up @@ -485,15 +485,60 @@ impl WalRedoManager {
}

pub struct OffloadedTimeline {
pub tenant_shard_id: TenantShardId,
pub timeline_id: TimelineId,
pub ancestor_timeline_id: Option<TimelineId>,

// TODO: once we persist offloaded state, make this lazily constructed
pub remote_client: Arc<RemoteTimelineClient>,

/// Prevent two tasks from deleting the timeline at the same time. If held, the
/// timeline is being deleted. If 'true', the timeline has already been deleted.
pub delete_progress: Arc<tokio::sync::Mutex<DeleteTimelineFlow>>,
}

impl OffloadedTimeline {
fn from_timeline(timeline: &Timeline) -> Self {
Self {
tenant_shard_id: timeline.tenant_shard_id,
timeline_id: timeline.timeline_id,
ancestor_timeline_id: timeline.get_ancestor_timeline_id(),

remote_client: timeline.remote_client.clone(),
delete_progress: timeline.delete_progress.clone(),
}
}
}

#[derive(Clone)]
pub enum TimelineOrOffloaded {
Timeline(Arc<Timeline>),
Offloaded(Arc<OffloadedTimeline>),
}

impl TimelineOrOffloaded {
pub fn tenant_shard_id(&self) -> TenantShardId {
match self {
TimelineOrOffloaded::Timeline(timeline) => timeline.tenant_shard_id,
TimelineOrOffloaded::Offloaded(offloaded) => offloaded.tenant_shard_id,
}
}
pub fn timeline_id(&self) -> TimelineId {
match self {
TimelineOrOffloaded::Timeline(timeline) => timeline.timeline_id,
TimelineOrOffloaded::Offloaded(offloaded) => offloaded.timeline_id,
}
}
pub fn delete_progress(&self) -> &Arc<tokio::sync::Mutex<DeleteTimelineFlow>> {
match self {
TimelineOrOffloaded::Timeline(timeline) => &timeline.delete_progress,
TimelineOrOffloaded::Offloaded(offloaded) => &offloaded.delete_progress,
}
}
pub fn remote_client(&self) -> &Arc<RemoteTimelineClient> {
match self {
TimelineOrOffloaded::Timeline(timeline) => &timeline.remote_client,
TimelineOrOffloaded::Offloaded(offloaded) => &offloaded.remote_client,
}
}
}
Expand Down Expand Up @@ -1490,12 +1535,9 @@ impl Tenant {
timeline
} else {
let cancel = self.cancel.clone();
let timeline_preload = self.load_timeline_metadata(
timeline_id,
self.remote_storage.clone(),
cancel,
)
.await;
let timeline_preload = self
.load_timeline_metadata(timeline_id, self.remote_storage.clone(), cancel)
.await;

let index_part = match timeline_preload.index_part {
Ok(index_part) => {
Expand Down
4 changes: 2 additions & 2 deletions pageserver/src/tenant/gc_block.rs
Original file line number Diff line number Diff line change
Expand Up @@ -141,14 +141,14 @@ impl GcBlock {
Ok(())
}

pub(crate) fn before_delete(&self, timeline: &super::Timeline) {
pub(crate) fn before_delete(&self, timeline_id: &super::TimelineId) {
let unblocked = {
let mut g = self.reasons.lock().unwrap();
if g.is_empty() {
return;
}

g.remove(&timeline.timeline_id);
g.remove(timeline_id);

BlockingReasons::clean_and_summarize(g).is_none()
};
Expand Down
98 changes: 64 additions & 34 deletions pageserver/src/tenant/timeline/delete.rs
Original file line number Diff line number Diff line change
Expand Up @@ -15,7 +15,7 @@ use crate::{
tenant::{
metadata::TimelineMetadata,
remote_timeline_client::{PersistIndexPartWithDeletedFlagError, RemoteTimelineClient},
CreateTimelineCause, DeleteTimelineError, Tenant,
CreateTimelineCause, DeleteTimelineError, Tenant, TimelineOrOffloaded,
},
};

Expand All @@ -24,12 +24,14 @@ use super::{Timeline, TimelineResources};
/// Mark timeline as deleted in S3 so we won't pick it up next time
/// during attach or pageserver restart.
/// See comment in persist_index_part_with_deleted_flag.
async fn set_deleted_in_remote_index(timeline: &Timeline) -> Result<(), DeleteTimelineError> {
match timeline
.remote_client
async fn set_deleted_in_remote_index(
timeline: &TimelineOrOffloaded,
) -> Result<(), DeleteTimelineError> {
let res = timeline
.remote_client()
.persist_index_part_with_deleted_flag()
.await
{
.await;
match res {
// If we (now, or already) marked it successfully as deleted, we can proceed
Ok(()) | Err(PersistIndexPartWithDeletedFlagError::AlreadyDeleted(_)) => (),
// Bail out otherwise
Expand Down Expand Up @@ -127,37 +129,51 @@ pub(super) async fn delete_local_timeline_directory(
}

/// Removes remote layers and an index file after them.
async fn delete_remote_layers_and_index(timeline: &Timeline) -> anyhow::Result<()> {
async fn delete_remote_layers_and_index(timeline: &TimelineOrOffloaded) -> anyhow::Result<()> {
timeline
.remote_client
.remote_client()
.delete_all()
.await
.context("delete_all")
}

/// It is important that this gets called when DeletionGuard is being held.
/// For more context see comments in [`DeleteTimelineFlow::prepare`]
async fn remove_timeline_from_tenant(
async fn remove_maybe_offloaded_timeline_from_tenant(
tenant: &Tenant,
timeline: &Timeline,
timeline: &TimelineOrOffloaded,
_: &DeletionGuard, // using it as a witness
) -> anyhow::Result<()> {
// Remove the timeline from the map.
// This observes the locking order between timelines and timelines_offloaded
let mut timelines = tenant.timelines.lock().unwrap();
let mut timelines_offloaded = tenant.timelines_offloaded.lock().unwrap();
let offloaded_children_exist = timelines_offloaded
.iter()
.any(|(_, entry)| &entry.ancestor_timeline_id == &Some(timeline.timeline_id()));
let children_exist = timelines
.iter()
.any(|(_, entry)| entry.get_ancestor_timeline_id() == Some(timeline.timeline_id));
// XXX this can happen because `branch_timeline` doesn't check `TimelineState::Stopping`.
// We already deleted the layer files, so it's probably best to panic.
// (Ideally, above remove_dir_all is atomic so we don't see this timeline after a restart)
if children_exist {
.any(|(_, entry)| entry.get_ancestor_timeline_id() == Some(timeline.timeline_id()));
// XXX this can happen because of race conditions with branch creation.
// We already deleted the remote layer files, so it's probably best to panic.
if children_exist || offloaded_children_exist {
panic!("Timeline grew children while we removed layer files");
}

timelines
.remove(&timeline.timeline_id)
.expect("timeline that we were deleting was concurrently removed from 'timelines' map");
match timeline {
TimelineOrOffloaded::Timeline(timeline) => {
timelines.remove(&timeline.timeline_id).expect(
"timeline that we were deleting was concurrently removed from 'timelines' map",
);
}
TimelineOrOffloaded::Offloaded(timeline) => {
timelines_offloaded
.remove(&timeline.timeline_id)
.expect("timeline that we were deleting was concurrently removed from 'timelines_offloaded' map");
}
}

drop(timelines_offloaded);
drop(timelines);

Ok(())
Expand Down Expand Up @@ -207,9 +223,11 @@ impl DeleteTimelineFlow {
guard.mark_in_progress()?;

// Now that the Timeline is in Stopping state, request all the related tasks to shut down.
timeline.shutdown(super::ShutdownMode::Hard).await;
if let TimelineOrOffloaded::Timeline(timeline) = &timeline {
timeline.shutdown(super::ShutdownMode::Hard).await;
}

tenant.gc_block.before_delete(&timeline);
tenant.gc_block.before_delete(&timeline.timeline_id());

fail::fail_point!("timeline-delete-before-index-deleted-at", |_| {
Err(anyhow::anyhow!(
Expand Down Expand Up @@ -285,6 +303,7 @@ impl DeleteTimelineFlow {

guard.mark_in_progress()?;

let timeline = TimelineOrOffloaded::Timeline(timeline);
Self::schedule_background(guard, tenant.conf, tenant, timeline);

Ok(())
Expand All @@ -293,7 +312,7 @@ impl DeleteTimelineFlow {
pub(super) fn prepare(
tenant: &Tenant,
timeline_id: TimelineId,
) -> Result<(Arc<Timeline>, DeletionGuard), DeleteTimelineError> {
) -> Result<(TimelineOrOffloaded, DeletionGuard), DeleteTimelineError> {
// Note the interaction between this guard and deletion guard.
// Here we attempt to lock deletion guard when we're holding a lock on timelines.
// This is important because when you take into account `remove_timeline_from_tenant`
Expand All @@ -307,10 +326,13 @@ impl DeleteTimelineFlow {
let timelines = tenant.timelines.lock().unwrap();

let timeline = match timelines.get(&timeline_id) {
Some(t) => t,
Some(t) => TimelineOrOffloaded::Timeline(Arc::clone(t)),
None => {
let offloaded_timelines = tenant.timelines_offloaded.lock().unwrap();
return Err(DeleteTimelineError::NotFound);
match offloaded_timelines.get(&timeline_id) {
Some(t) => TimelineOrOffloaded::Offloaded(Arc::clone(t)),
None => return Err(DeleteTimelineError::NotFound),
}
}
};

Expand All @@ -337,30 +359,32 @@ impl DeleteTimelineFlow {
// to remove the timeline from it.
// Always if you have two locks that are taken in different order this can result in a deadlock.

let delete_progress = Arc::clone(&timeline.delete_progress);
let delete_progress = Arc::clone(timeline.delete_progress());
let delete_lock_guard = match delete_progress.try_lock_owned() {
Ok(guard) => DeletionGuard(guard),
Err(_) => {
// Unfortunately if lock fails arc is consumed.
return Err(DeleteTimelineError::AlreadyInProgress(Arc::clone(
&timeline.delete_progress,
timeline.delete_progress(),
)));
}
};

timeline.set_state(TimelineState::Stopping);
if let TimelineOrOffloaded::Timeline(timeline) = &timeline {
timeline.set_state(TimelineState::Stopping);
}

Ok((Arc::clone(timeline), delete_lock_guard))
Ok((timeline, delete_lock_guard))
}

fn schedule_background(
guard: DeletionGuard,
conf: &'static PageServerConf,
tenant: Arc<Tenant>,
timeline: Arc<Timeline>,
timeline: TimelineOrOffloaded,
) {
let tenant_shard_id = timeline.tenant_shard_id;
let timeline_id = timeline.timeline_id;
let tenant_shard_id = timeline.tenant_shard_id();
let timeline_id = timeline.timeline_id();

task_mgr::spawn(
task_mgr::BACKGROUND_RUNTIME.handle(),
Expand All @@ -371,7 +395,9 @@ impl DeleteTimelineFlow {
async move {
if let Err(err) = Self::background(guard, conf, &tenant, &timeline).await {
error!("Error: {err:#}");
timeline.set_broken(format!("{err:#}"))
if let TimelineOrOffloaded::Timeline(timeline) = timeline {
timeline.set_broken(format!("{err:#}"))
}
};
Ok(())
}
Expand All @@ -383,15 +409,19 @@ impl DeleteTimelineFlow {
mut guard: DeletionGuard,
conf: &PageServerConf,
tenant: &Tenant,
timeline: &Timeline,
timeline: &TimelineOrOffloaded,
) -> Result<(), DeleteTimelineError> {
delete_local_timeline_directory(conf, tenant.tenant_shard_id, timeline).await?;
// Offloaded timelines have no local state
// TODO: once we persist offloaded information, delete the timeline from there, too
if let TimelineOrOffloaded::Timeline(timeline) = timeline {
delete_local_timeline_directory(conf, tenant.tenant_shard_id, timeline).await?;
}

delete_remote_layers_and_index(timeline).await?;

pausable_failpoint!("in_progress_delete");

remove_timeline_from_tenant(tenant, timeline, &guard).await?;
remove_maybe_offloaded_timeline_from_tenant(tenant, timeline, &guard).await?;

*guard = Self::Finished;

Expand Down
7 changes: 6 additions & 1 deletion pageserver/src/tenant/timeline/offload.rs
Original file line number Diff line number Diff line change
@@ -1,6 +1,6 @@
use std::sync::Arc;

use crate::tenant::{OffloadedTimeline, Tenant};
use crate::tenant::{OffloadedTimeline, Tenant, TimelineOrOffloaded};

use super::{
delete::{delete_local_timeline_directory, DeleteTimelineFlow, DeletionGuard},
Expand All @@ -14,6 +14,11 @@ pub(crate) async fn offload_timeline(
tracing::info!("offloading archived timeline");
let (timeline, guard) = DeleteTimelineFlow::prepare(tenant, timeline.timeline_id)?;

let TimelineOrOffloaded::Timeline(timeline) = timeline else {
tracing::error!("timeline already offloaded, but given timeline object");
return Ok(());
};

// TODO extend guard mechanism above with method
// to make deletions possible while offloading is in progress

Expand Down

0 comments on commit 21139c8

Please sign in to comment.