Files
phishingclub/backend/seed/recipient_lowercase.go
Ronni Skansing d5192aa792 fix dub recipients migration script
Signed-off-by: Ronni Skansing <rskansing@gmail.com>
2026-06-22 23:14:02 +02:00

207 lines
6.6 KiB
Go

package seed
import (
"github.com/go-errors/errors"
"github.com/phishingclub/phishingclub/database"
"github.com/phishingclub/phishingclub/errs"
"go.uber.org/zap"
"gorm.io/gorm"
)
// RecipientLowercaseReport summarises a lowercase-recipient-emails run.
type RecipientLowercaseReport struct {
GroupsProcessed int
DuplicateGroups int
RecipientsMerged int
EmailsLowercased int
}
// LowercaseRecipientEmails normalises every recipient email to lowercase and
// merges recipients that are the same address apart from casing into one
// canonical lowercased recipient. for each group the already lowercased row is
// kept, otherwise the oldest row is kept and lowercased. every reference to a
// merged recipient (campaign membership, group membership, events, device
// codes) is repointed to the survivor, and any membership that would become a
// duplicate is collapsed to one. the whole run is one transaction and is safe
// to run more than once.
//
// when dryRun is true the work is performed in a transaction that is rolled
// back, so the report reflects exactly what would change without writing any
// changes.
func LowercaseRecipientEmails(db *gorm.DB, logger *zap.SugaredLogger, dryRun bool) (*RecipientLowercaseReport, error) {
report := &RecipientLowercaseReport{}
tx := db.Begin()
if tx.Error != nil {
return nil, errs.Wrap(tx.Error)
}
if err := lowercaseRecipientEmailsInTx(tx, logger, report); err != nil {
tx.Rollback()
return nil, errs.Wrap(errors.Errorf("lowercase recipient emails migration failed: %w", err))
}
if dryRun {
tx.Rollback()
logger.Infow("lowercase-recipient-emails dry run, no changes written",
"groupsProcessed", report.GroupsProcessed,
"duplicateGroups", report.DuplicateGroups,
"recipientsToMerge", report.RecipientsMerged,
"emailsToLowercase", report.EmailsLowercased,
)
return report, nil
}
if err := tx.Commit().Error; err != nil {
return nil, errs.Wrap(err)
}
logger.Infow("lowercase-recipient-emails migration completed",
"groupsProcessed", report.GroupsProcessed,
"duplicateGroups", report.DuplicateGroups,
"recipientsMerged", report.RecipientsMerged,
"emailsLowercased", report.EmailsLowercased,
)
return report, nil
}
// lowercaseRecipientEmailsInTx performs the normalisation and merge work on the
// given transaction, filling in the report as it goes.
func lowercaseRecipientEmailsInTx(tx *gorm.DB, logger *zap.SugaredLogger, report *RecipientLowercaseReport) error {
// find every lowercased email that needs work, either because more than
// one recipient shares it or because the single recipient is not stored
// lowercased
var keys []struct {
LowerEmail string `gorm:"column:lower_email"`
}
err := tx.Raw(
"SELECT LOWER(email) AS lower_email FROM " + database.RECIPIENT_TABLE +
" WHERE email IS NOT NULL AND email != ''" +
" GROUP BY LOWER(email)" +
" HAVING COUNT(*) > 1 OR SUM(CASE WHEN email = LOWER(email) THEN 1 ELSE 0 END) < COUNT(*)",
).Scan(&keys).Error
if err != nil {
return errs.Wrap(err)
}
for _, k := range keys {
report.GroupsProcessed++
// load the group, ordered so the survivor is first: an already
// lowercased row wins, then the oldest row
var rows []struct {
ID string `gorm:"column:id"`
Email string `gorm:"column:email"`
}
err := tx.Raw(
"SELECT id, email FROM "+database.RECIPIENT_TABLE+
" WHERE email IS NOT NULL AND email != '' AND LOWER(email) = ?"+
" ORDER BY CASE WHEN email = LOWER(email) THEN 0 ELSE 1 END ASC, created_at ASC, id ASC",
k.LowerEmail,
).Scan(&rows).Error
if err != nil {
return errs.Wrap(err)
}
if len(rows) == 0 {
continue
}
survivor := rows[0]
if len(rows) > 1 {
report.DuplicateGroups++
}
for _, nonSurvivor := range rows[1:] {
if err := mergeRecipient(tx, nonSurvivor.ID, survivor.ID); err != nil {
return errs.Wrap(err)
}
report.RecipientsMerged++
}
if survivor.Email != k.LowerEmail {
if err := tx.Exec(
"UPDATE "+database.RECIPIENT_TABLE+" SET email = ? WHERE id = ?",
k.LowerEmail, survivor.ID,
).Error; err != nil {
return errs.Wrap(err)
}
report.EmailsLowercased++
}
logger.Debugw("processed recipient email group",
"email", k.LowerEmail,
"survivor", survivor.ID,
"merged", len(rows)-1,
)
}
return nil
}
// mergeRecipient repoints every reference from fromID to toID, collapsing any
// membership that would otherwise duplicate, then deletes the fromID recipient.
// references are repointed before the delete so no foreign key is left dangling.
func mergeRecipient(tx *gorm.DB, fromID string, toID string) error {
// campaign membership is unique on (campaign_id, recipient_id), so first
// drop the rows that the survivor already has for the same campaign
if err := tx.Exec(
"DELETE FROM "+database.CAMPAIGN_RECIPIENT_TABLE_NAME+
" WHERE recipient_id = ? AND campaign_id IN"+
" (SELECT campaign_id FROM "+database.CAMPAIGN_RECIPIENT_TABLE_NAME+
" WHERE recipient_id = ?)",
fromID, toID,
).Error; err != nil {
return err
}
if err := tx.Exec(
"UPDATE "+database.CAMPAIGN_RECIPIENT_TABLE_NAME+
" SET recipient_id = ? WHERE recipient_id = ?",
toID, fromID,
).Error; err != nil {
return err
}
// group membership is unique on (recipient_id, recipient_group_id)
if err := tx.Exec(
"DELETE FROM "+database.RECIPIENT_GROUP_RECIPIENT_TABLE+
" WHERE recipient_id = ? AND recipient_group_id IN"+
" (SELECT recipient_group_id FROM "+database.RECIPIENT_GROUP_RECIPIENT_TABLE+
" WHERE recipient_id = ?)",
fromID, toID,
).Error; err != nil {
return err
}
if err := tx.Exec(
"UPDATE "+database.RECIPIENT_GROUP_RECIPIENT_TABLE+
" SET recipient_id = ? WHERE recipient_id = ?",
toID, fromID,
).Error; err != nil {
return err
}
// device codes are unique on (campaign_id, recipient_id)
if err := tx.Exec(
"DELETE FROM "+database.MICROSOFT_DEVICE_CODE_TABLE+
" WHERE recipient_id = ? AND campaign_id IN"+
" (SELECT campaign_id FROM "+database.MICROSOFT_DEVICE_CODE_TABLE+
" WHERE recipient_id = ?)",
fromID, toID,
).Error; err != nil {
return err
}
if err := tx.Exec(
"UPDATE "+database.MICROSOFT_DEVICE_CODE_TABLE+
" SET recipient_id = ? WHERE recipient_id = ?",
toID, fromID,
).Error; err != nil {
return err
}
// events have no uniqueness on the recipient, so keep them all and repoint
if err := tx.Exec(
"UPDATE "+database.CAMPAIGN_EVENT_TABLE+
" SET recipient_id = ? WHERE recipient_id = ?",
toID, fromID,
).Error; err != nil {
return err
}
// the merged recipient now has no references and can be removed
if err := tx.Exec(
"DELETE FROM "+database.RECIPIENT_TABLE+" WHERE id = ?",
fromID,
).Error; err != nil {
return err
}
return nil
}