Skip to content

Commit

Permalink
mailbox.c: properly reconstruct v19 -> v20
Browse files Browse the repository at this point in the history
   - use the internaldate of an existing message with same UID
   - pick internaldate.nsec in a deterministic manner
   - make sure internaldate doesn't conflict with an existing JMAPID
   - set the timestamps on the message data file to internaldate
   - update G record and add J record in conv.db
  • Loading branch information
ksmurchison committed Mar 5, 2025
1 parent b5c4874 commit 997d5c0
Show file tree
Hide file tree
Showing 4 changed files with 177 additions and 17 deletions.
67 changes: 67 additions & 0 deletions cassandane/Cassandane/Cyrus/Reconstruct.pm
Original file line number Diff line number Diff line change
Expand Up @@ -44,6 +44,7 @@ use Data::Dumper;
use File::Copy;
use IO::File;
use JSON;
use Cwd qw(abs_path);

use lib '.';
use base qw(Cassandane::Cyrus::TestCase);
Expand Down Expand Up @@ -631,4 +632,70 @@ sub test_downgrade_upgrade
}
}

sub test_upgrade_v19_to_v20
:MailboxLegacyDirs :NoAltNameSpace :Replication :needs_component_replication
{
my ($self) = @_;

my $talk = $self->{store}->get_client();
$talk->create('INBOX.foo');

# replicate and check initial state
$self->run_replication();
$self->check_replication('cassandane');

my $data_file = abs_path("data/old-mailboxes/version19.tar.gz");
die "Old mailbox data does not exist: $data_file" if not -f $data_file;

xlog "installing version 19 mailboxes";
$self->{instance}->unpackfile($data_file, $self->{instance}->get_basedir());
$self->{instance}->unpackfile($data_file, $self->{replica}->get_basedir());

xlog "reconstructing indexes at v19 to get predictable senddate";
$self->{instance}->run_command({ cyrus => 1 }, 'reconstruct', '-G', '-q');
$self->{replica}->run_command({ cyrus => 1 }, 'reconstruct', '-G', '-q');

xlog $self, "Upgrade master to version 20";
$self->{instance}->run_command({ cyrus => 1 }, 'reconstruct', '-V', 'max');

# replicate new version to old version
$self->run_replication();

# check_replication() will fail here due to the internaldate.nsec annotation
# being present on the replica but NOT on the master

xlog $self, "Upgrade replica to version 20";
$self->{replica}->run_command({ cyrus => 1 }, 'reconstruct', '-V', 'max');

$self->run_replication();
$self->check_replication('cassandane');

xlog $self, "Fetching EMAILIDs";
$talk = $self->{master_store}->get_client();
$talk->examine('INBOX');
my $res = $talk->fetch('1:*', '(UID EMAILID)');
my $id1 = $res->{1}{emailid}[0];
my $id2 = $res->{2}{emailid}[0];
my $id3 = $res->{3}{emailid}[0];
my $id4 = $res->{4}{emailid}[0];

$talk->examine('INBOX.foo');
$res = $talk->fetch('1:*', '(UID EMAILID)');
$self->assert_str_equals($id1, $res->{1}{emailid}[0]);

# EMAILIDs on the replca should be identical to those on the master
# since they are the encoded nanoseconds since epoch
$talk = $self->{replica_store}->get_client();
$talk->examine('INBOX');
$res = $talk->fetch('1:*', '(UID EMAILID)');
$self->assert_str_equals($id1, $res->{1}{emailid}[0]);
$self->assert_str_equals($id2, $res->{2}{emailid}[0]);
$self->assert_str_equals($id3, $res->{3}{emailid}[0]);
$self->assert_str_equals($id4, $res->{4}{emailid}[0]);

$talk->examine('INBOX.foo');
$res = $talk->fetch('1:*', '(UID EMAILID)');
$self->assert_str_equals($id1, $res->{1}{emailid}[0]);
}

1;
Binary file added cassandane/data/old-mailboxes/version19.tar.gz
Binary file not shown.
124 changes: 107 additions & 17 deletions imap/mailbox.c
Original file line number Diff line number Diff line change
Expand Up @@ -4861,25 +4861,10 @@ EXPORTED int mailbox_append_index_record(struct mailbox *mailbox,
}
}

int object_storage_enabled = 0 ;
#if defined ENABLE_OBJECTSTORE
object_storage_enabled = config_getswitch(IMAPOPT_OBJECT_STORAGE_ENABLED) ;
#endif

if (!(record->internal_flags & FLAG_INTERNAL_UNLINKED)) {
/* make the file timestamp correct */
if (!(object_storage_enabled && (record->internal_flags & FLAG_INTERNAL_ARCHIVED))) { // maybe there is no file in directory.
struct timespec settimes[] = {
{ record->internaldate.tv_sec, record->internaldate.tv_nsec },
{ record->internaldate.tv_sec, record->internaldate.tv_nsec }
};
const char *fname = mailbox_record_fname(mailbox, record);
r = utimensat(AT_FDCWD, fname, settimes, 0);
if (r == -1) {
syslog(LOG_ERR, "failed to set mtime on %s: %m", fname);
return IMAP_IOERROR;
}
}
r = mailbox_set_datafile_timestamps(mailbox, record);
if (r) return r;

/* write the cache record before buffering the message, it
* will set the cache_offset field. */
Expand Down Expand Up @@ -5379,10 +5364,36 @@ HIDDEN int mailbox_repack_commit(struct mailbox_repack **repackptr)
return r;
}

static int find_dup_msg(const conv_guidrec_t *rec, void *rock)
{
int ret = 0;

if (rec->version == 4 && !rec->part &&
!(rec->internal_flags & FLAG_INTERNAL_EXPUNGED)) {
mbentry_t *mbentry = NULL;

if (conv_guidrec_mbentry(rec, &mbentry)) return 0;

if (mbtype_isa(mbentry->mbtype) == MBTYPE_EMAIL) {
// found a non-expunged duplicate email; use its internaldate
struct timespec *internaldate = (struct timespec *) rock;

TIMESPEC_FROM_NANOSEC(internaldate, rec->internaldate);
ret = CYRUSDB_DONE;
}

mboxlist_entry_free(&mbentry);
}

return ret;
}

/* need a mailbox exclusive lock, we're rewriting files */
static int mailbox_index_repack(struct mailbox *mailbox, int version)
{
struct mailbox_repack *repack = NULL;
struct conversations_state *cstate = NULL;
uint32_t mbtype = mbtype_isa(mailbox_mbtype(mailbox));
const message_t *msg;
struct mailbox_iter *iter = NULL;
struct buf buf = BUF_INITIALIZER;
Expand All @@ -5393,6 +5404,13 @@ static int mailbox_index_repack(struct mailbox *mailbox, int version)
r = mailbox_repack_setup(mailbox, version, &repack);
if (r) goto done;

if (mailbox->i.minor_version < 20 &&
repack->newmailbox.i.minor_version >= 20 &&
!(cstate = mailbox_get_cstate(mailbox))) {
r = IMAP_IOERROR;
goto done;
}

iter = mailbox_iter_init(mailbox, 0, 0);
while ((msg = mailbox_iter_step(iter))) {
const struct index_record *record = msg_record(msg);
Expand Down Expand Up @@ -5534,6 +5552,51 @@ static int mailbox_index_repack(struct mailbox *mailbox, int version)
parsenum(p, &p, 0, &newval);
copyrecord.internaldate.tv_nsec = newval;
}
else {
// assign internaldate.nsec in a deterministic manner -
// use the first 29 bits of GUID
// (0x1FFFFFFF < 999999999 nanoseconds)
copyrecord.internaldate.tv_nsec =
*((uint32_t *) record->guid.value) >> 3;

if (mbtype == MBTYPE_EMAIL) {
// attempt to find an existing message with the same guid
// and use its internaldate instead
struct timespec existing_internaldate = { 0, UTIME_OMIT };
char guid[2*MESSAGE_GUID_SIZE+1];

strcpy(guid, message_guid_encode(&record->guid));

// ignore errors, it's OK for this to fail
conversations_guid_foreach(cstate, guid, find_dup_msg,
&existing_internaldate);

// if we found a matching message, use its internaldate
if (existing_internaldate.tv_nsec != UTIME_OMIT) {
copyrecord.internaldate = existing_internaldate;
}
else {
// make sure we don't have a JMAP ID (internaldate) clash
conversations_adjust_internaldate(cstate, guid,
&copyrecord.internaldate);
}
}

// make the file timestamp correct
r = mailbox_set_datafile_timestamps(mailbox, &copyrecord);
if (r) goto done;

// update G & J records
r = mailbox_update_conversations(mailbox, record, &copyrecord);
if (r) goto done;

// add virtual annotation to old crc
buf_reset(&buf);
buf_printf(&buf, UINT64_FMT, copyrecord.internaldate.tv_nsec);
repack->crcs.annot ^=
crc_annot(record->uid,
IMAP_ANNOT_NS "internaldate.nsec", "", &buf);
}
buf_reset(&buf);
r = annotate_state_writesilent(astate, IMAP_ANNOT_NS "internaldate.nsec", "", &buf);
if (r) goto done;
Expand Down Expand Up @@ -8761,3 +8824,30 @@ EXPORTED struct mboxlist_entry *mailbox_mbentry_from_path(const char *header_pat

return mbentry;
}

EXPORTED int mailbox_set_datafile_timestamps(struct mailbox *mailbox,
struct index_record *record)
{
int object_storage_enabled = 0;
#if defined ENABLE_OBJECTSTORE
object_storage_enabled = config_getswitch(IMAPOPT_OBJECT_STORAGE_ENABLED) ;
#endif

if (object_storage_enabled && (record->internal_flags & FLAG_INTERNAL_ARCHIVED)) {
// there is no file in directory
return 0;
}

const char *fname = mailbox_record_fname(mailbox, record);
struct timespec settimes[] = {
{ record->internaldate.tv_sec, record->internaldate.tv_nsec },
{ record->internaldate.tv_sec, record->internaldate.tv_nsec }
};

if (utimensat(AT_FDCWD, fname, settimes, 0) == -1) {
syslog(LOG_ERR, "failed to set mtime on %s: %m", fname);
return IMAP_IOERROR;
}

return 0;
}
3 changes: 3 additions & 0 deletions imap/mailbox.h
Original file line number Diff line number Diff line change
Expand Up @@ -801,4 +801,7 @@ extern int mailbox_parse_datafilename(const char *name, uint32_t *uidp);

extern struct mboxlist_entry *mailbox_mbentry_from_path(const char *header_path);

extern int mailbox_set_datafile_timestamps(struct mailbox *mailbox,
struct index_record *record);

#endif /* INCLUDED_MAILBOX_H */

0 comments on commit 997d5c0

Please sign in to comment.