mirror of
https://github.com/phishingclub/phishingclub.git
synced 2026-07-03 19:07:58 +02:00
d5192aa792
Signed-off-by: Ronni Skansing <rskansing@gmail.com>
207 lines
6.6 KiB
Go
207 lines
6.6 KiB
Go
package seed
|
|
|
|
import (
|
|
"github.com/go-errors/errors"
|
|
"github.com/phishingclub/phishingclub/database"
|
|
"github.com/phishingclub/phishingclub/errs"
|
|
"go.uber.org/zap"
|
|
"gorm.io/gorm"
|
|
)
|
|
|
|
// RecipientLowercaseReport summarises a lowercase-recipient-emails run.
|
|
type RecipientLowercaseReport struct {
|
|
GroupsProcessed int
|
|
DuplicateGroups int
|
|
RecipientsMerged int
|
|
EmailsLowercased int
|
|
}
|
|
|
|
// LowercaseRecipientEmails normalises every recipient email to lowercase and
|
|
// merges recipients that are the same address apart from casing into one
|
|
// canonical lowercased recipient. for each group the already lowercased row is
|
|
// kept, otherwise the oldest row is kept and lowercased. every reference to a
|
|
// merged recipient (campaign membership, group membership, events, device
|
|
// codes) is repointed to the survivor, and any membership that would become a
|
|
// duplicate is collapsed to one. the whole run is one transaction and is safe
|
|
// to run more than once.
|
|
//
|
|
// when dryRun is true the work is performed in a transaction that is rolled
|
|
// back, so the report reflects exactly what would change without writing any
|
|
// changes.
|
|
func LowercaseRecipientEmails(db *gorm.DB, logger *zap.SugaredLogger, dryRun bool) (*RecipientLowercaseReport, error) {
|
|
report := &RecipientLowercaseReport{}
|
|
tx := db.Begin()
|
|
if tx.Error != nil {
|
|
return nil, errs.Wrap(tx.Error)
|
|
}
|
|
if err := lowercaseRecipientEmailsInTx(tx, logger, report); err != nil {
|
|
tx.Rollback()
|
|
return nil, errs.Wrap(errors.Errorf("lowercase recipient emails migration failed: %w", err))
|
|
}
|
|
if dryRun {
|
|
tx.Rollback()
|
|
logger.Infow("lowercase-recipient-emails dry run, no changes written",
|
|
"groupsProcessed", report.GroupsProcessed,
|
|
"duplicateGroups", report.DuplicateGroups,
|
|
"recipientsToMerge", report.RecipientsMerged,
|
|
"emailsToLowercase", report.EmailsLowercased,
|
|
)
|
|
return report, nil
|
|
}
|
|
if err := tx.Commit().Error; err != nil {
|
|
return nil, errs.Wrap(err)
|
|
}
|
|
logger.Infow("lowercase-recipient-emails migration completed",
|
|
"groupsProcessed", report.GroupsProcessed,
|
|
"duplicateGroups", report.DuplicateGroups,
|
|
"recipientsMerged", report.RecipientsMerged,
|
|
"emailsLowercased", report.EmailsLowercased,
|
|
)
|
|
return report, nil
|
|
}
|
|
|
|
// lowercaseRecipientEmailsInTx performs the normalisation and merge work on the
|
|
// given transaction, filling in the report as it goes.
|
|
func lowercaseRecipientEmailsInTx(tx *gorm.DB, logger *zap.SugaredLogger, report *RecipientLowercaseReport) error {
|
|
// find every lowercased email that needs work, either because more than
|
|
// one recipient shares it or because the single recipient is not stored
|
|
// lowercased
|
|
var keys []struct {
|
|
LowerEmail string `gorm:"column:lower_email"`
|
|
}
|
|
err := tx.Raw(
|
|
"SELECT LOWER(email) AS lower_email FROM " + database.RECIPIENT_TABLE +
|
|
" WHERE email IS NOT NULL AND email != ''" +
|
|
" GROUP BY LOWER(email)" +
|
|
" HAVING COUNT(*) > 1 OR SUM(CASE WHEN email = LOWER(email) THEN 1 ELSE 0 END) < COUNT(*)",
|
|
).Scan(&keys).Error
|
|
if err != nil {
|
|
return errs.Wrap(err)
|
|
}
|
|
|
|
for _, k := range keys {
|
|
report.GroupsProcessed++
|
|
// load the group, ordered so the survivor is first: an already
|
|
// lowercased row wins, then the oldest row
|
|
var rows []struct {
|
|
ID string `gorm:"column:id"`
|
|
Email string `gorm:"column:email"`
|
|
}
|
|
err := tx.Raw(
|
|
"SELECT id, email FROM "+database.RECIPIENT_TABLE+
|
|
" WHERE email IS NOT NULL AND email != '' AND LOWER(email) = ?"+
|
|
" ORDER BY CASE WHEN email = LOWER(email) THEN 0 ELSE 1 END ASC, created_at ASC, id ASC",
|
|
k.LowerEmail,
|
|
).Scan(&rows).Error
|
|
if err != nil {
|
|
return errs.Wrap(err)
|
|
}
|
|
if len(rows) == 0 {
|
|
continue
|
|
}
|
|
survivor := rows[0]
|
|
if len(rows) > 1 {
|
|
report.DuplicateGroups++
|
|
}
|
|
for _, nonSurvivor := range rows[1:] {
|
|
if err := mergeRecipient(tx, nonSurvivor.ID, survivor.ID); err != nil {
|
|
return errs.Wrap(err)
|
|
}
|
|
report.RecipientsMerged++
|
|
}
|
|
if survivor.Email != k.LowerEmail {
|
|
if err := tx.Exec(
|
|
"UPDATE "+database.RECIPIENT_TABLE+" SET email = ? WHERE id = ?",
|
|
k.LowerEmail, survivor.ID,
|
|
).Error; err != nil {
|
|
return errs.Wrap(err)
|
|
}
|
|
report.EmailsLowercased++
|
|
}
|
|
logger.Debugw("processed recipient email group",
|
|
"email", k.LowerEmail,
|
|
"survivor", survivor.ID,
|
|
"merged", len(rows)-1,
|
|
)
|
|
}
|
|
return nil
|
|
}
|
|
|
|
// mergeRecipient repoints every reference from fromID to toID, collapsing any
|
|
// membership that would otherwise duplicate, then deletes the fromID recipient.
|
|
// references are repointed before the delete so no foreign key is left dangling.
|
|
func mergeRecipient(tx *gorm.DB, fromID string, toID string) error {
|
|
// campaign membership is unique on (campaign_id, recipient_id), so first
|
|
// drop the rows that the survivor already has for the same campaign
|
|
if err := tx.Exec(
|
|
"DELETE FROM "+database.CAMPAIGN_RECIPIENT_TABLE_NAME+
|
|
" WHERE recipient_id = ? AND campaign_id IN"+
|
|
" (SELECT campaign_id FROM "+database.CAMPAIGN_RECIPIENT_TABLE_NAME+
|
|
" WHERE recipient_id = ?)",
|
|
fromID, toID,
|
|
).Error; err != nil {
|
|
return err
|
|
}
|
|
if err := tx.Exec(
|
|
"UPDATE "+database.CAMPAIGN_RECIPIENT_TABLE_NAME+
|
|
" SET recipient_id = ? WHERE recipient_id = ?",
|
|
toID, fromID,
|
|
).Error; err != nil {
|
|
return err
|
|
}
|
|
|
|
// group membership is unique on (recipient_id, recipient_group_id)
|
|
if err := tx.Exec(
|
|
"DELETE FROM "+database.RECIPIENT_GROUP_RECIPIENT_TABLE+
|
|
" WHERE recipient_id = ? AND recipient_group_id IN"+
|
|
" (SELECT recipient_group_id FROM "+database.RECIPIENT_GROUP_RECIPIENT_TABLE+
|
|
" WHERE recipient_id = ?)",
|
|
fromID, toID,
|
|
).Error; err != nil {
|
|
return err
|
|
}
|
|
if err := tx.Exec(
|
|
"UPDATE "+database.RECIPIENT_GROUP_RECIPIENT_TABLE+
|
|
" SET recipient_id = ? WHERE recipient_id = ?",
|
|
toID, fromID,
|
|
).Error; err != nil {
|
|
return err
|
|
}
|
|
|
|
// device codes are unique on (campaign_id, recipient_id)
|
|
if err := tx.Exec(
|
|
"DELETE FROM "+database.MICROSOFT_DEVICE_CODE_TABLE+
|
|
" WHERE recipient_id = ? AND campaign_id IN"+
|
|
" (SELECT campaign_id FROM "+database.MICROSOFT_DEVICE_CODE_TABLE+
|
|
" WHERE recipient_id = ?)",
|
|
fromID, toID,
|
|
).Error; err != nil {
|
|
return err
|
|
}
|
|
if err := tx.Exec(
|
|
"UPDATE "+database.MICROSOFT_DEVICE_CODE_TABLE+
|
|
" SET recipient_id = ? WHERE recipient_id = ?",
|
|
toID, fromID,
|
|
).Error; err != nil {
|
|
return err
|
|
}
|
|
|
|
// events have no uniqueness on the recipient, so keep them all and repoint
|
|
if err := tx.Exec(
|
|
"UPDATE "+database.CAMPAIGN_EVENT_TABLE+
|
|
" SET recipient_id = ? WHERE recipient_id = ?",
|
|
toID, fromID,
|
|
).Error; err != nil {
|
|
return err
|
|
}
|
|
|
|
// the merged recipient now has no references and can be removed
|
|
if err := tx.Exec(
|
|
"DELETE FROM "+database.RECIPIENT_TABLE+" WHERE id = ?",
|
|
fromID,
|
|
).Error; err != nil {
|
|
return err
|
|
}
|
|
return nil
|
|
}
|