feat: duplicate detection on CSV import

Compute a sha256 fingerprint (date|description|amount|account_id, first
16 hex chars) for every CSV row and store it as bank_ref. At preview
time, existing fingerprints are fetched and matching rows are shown
greyed out with a "duplicate" label. At confirm time, those rows are
silently skipped — only truly new transactions are inserted.

If every row is a duplicate the user is redirected with ?notice=all_duplicates
instead of inserting an empty batch.

Co-Authored-By: Claude Sonnet 4.6 <noreply@anthropic.com>
This commit is contained in:
Gonçalo Rodrigues 2026-06-13 18:15:23 +01:00
parent 1c2bac1d5f
commit 2170457528
3 changed files with 62 additions and 3 deletions

View File

@ -2,7 +2,9 @@ package main
import (
"context"
"crypto/sha256"
"embed"
"encoding/hex"
"encoding/json"
"fmt"
"html/template"
@ -792,8 +794,24 @@ func (h *Handler) ImportPreview(w http.ResponseWriter, r *http.Request) {
}
}
// compute fingerprints and detect duplicates
var fingerprints []string
for i := range rows {
rows[i].Category = autoCategorize(rows[i].Description, catMap)
rows[i].Fingerprint = txnFingerprint(rows[i].Date, rows[i].Description, rows[i].AmountCents, accountID)
fingerprints = append(fingerprints, rows[i].Fingerprint)
}
existing, _ := h.store.getTransactions(ctx, a.UserID, bson.M{"bank_ref": bson.M{"$in": fingerprints}})
existingRefs := map[string]bool{}
for _, t := range existing {
existingRefs[t.BankRef] = true
}
duplicateCount := 0
for i := range rows {
if existingRefs[rows[i].Fingerprint] {
rows[i].Duplicate = true
duplicateCount++
}
}
importPreview := &CSVImportPreview{
@ -815,6 +833,7 @@ func (h *Handler) ImportPreview(w http.ResponseWriter, r *http.Request) {
"SelectedFormat": string(format),
"SelectedAccount": accountID,
"CategoryColors": catColors,
"DuplicateCount": duplicateCount,
})
}
@ -861,9 +880,24 @@ func (h *Handler) ImportConfirm(w http.ResponseWriter, r *http.Request) {
userCats := r.Form["categories"]
// compute fingerprints and skip duplicates
var fingerprints []string
for _, row := range rows {
fingerprints = append(fingerprints, txnFingerprint(row.Date, row.Description, row.AmountCents, accountID))
}
existing, _ := h.store.getTransactions(ctx, a.UserID, bson.M{"bank_ref": bson.M{"$in": fingerprints}})
existingRefs := map[string]bool{}
for _, t := range existing {
existingRefs[t.BankRef] = true
}
now := time.Now()
var txns []Transaction
for i, row := range rows {
fp := fingerprints[i]
if existingRefs[fp] {
continue
}
date, _ := time.Parse("2006-01-02", row.Date)
cat := "Others"
if i < len(userCats) && userCats[i] != "" {
@ -878,10 +912,16 @@ func (h *Handler) ImportConfirm(w http.ResponseWriter, r *http.Request) {
Description: row.Description,
AmountCents: row.AmountCents,
Category: cat,
BankRef: fp,
CreatedAt: now,
})
}
if len(txns) == 0 {
http.Redirect(w, r, "/transactions?notice=all_duplicates", http.StatusSeeOther)
return
}
if err := h.store.createTransactions(ctx, txns); err != nil {
slog.Error("create transactions", "err", err)
http.Error(w, "save error", http.StatusInternalServerError)
@ -891,6 +931,11 @@ func (h *Handler) ImportConfirm(w http.ResponseWriter, r *http.Request) {
http.Redirect(w, r, "/transactions", http.StatusSeeOther)
}
func txnFingerprint(date, description string, amountCents int64, accountID string) string {
h := sha256.Sum256([]byte(fmt.Sprintf("%s|%s|%d|%s", date, description, amountCents, accountID)))
return hex.EncodeToString(h[:])[:16]
}
func autoCategorize(desc string, catMap map[string]string) string {
desc = strings.ToLower(desc)
keywords := map[string]string{

View File

@ -96,6 +96,8 @@ type CSVImportRow struct {
Description string `json:"description"`
AmountCents int64 `json:"amount_cents"`
Category string `json:"category"`
Fingerprint string `json:"fingerprint"`
Duplicate bool `json:"duplicate"`
}
type CSVImportPreview struct {

View File

@ -14,7 +14,12 @@
<div style="display:flex; justify-content:space-between; align-items:center; margin-bottom:16px; flex-wrap:wrap; gap:8px;">
<div>
<h2 style="font-size:15px; font-weight:700; color:var(--text); text-transform:none; letter-spacing:0;">Preview</h2>
<p class="text-muted" style="margin-top:3px;">{{$d.Preview.Total}} rows — review categories before confirming.</p>
<p class="text-muted" style="margin-top:3px;">
{{$d.Preview.Total}} rows
{{if $d.DuplicateCount}}
<span style="color:var(--yellow, #f59e0b); font-weight:600;">{{$d.DuplicateCount}} already imported</span> (shown greyed out, will be skipped)
{{end}}
</p>
</div>
<a href="/import" class="btn btn-outline btn-sm">← Back</a>
</div>
@ -36,13 +41,19 @@
</thead>
<tbody>
{{range $i, $row := $d.Preview.Rows}}
<tr>
<tr {{if $row.Duplicate}}style="opacity:0.4;"{{end}}>
<td style="white-space:nowrap; color:var(--text2);">{{$row.Date}}</td>
<td style="max-width:280px; overflow:hidden; text-overflow:ellipsis; white-space:nowrap;">{{$row.Description}}</td>
<td style="max-width:280px; overflow:hidden; text-overflow:ellipsis; white-space:nowrap;">
{{$row.Description}}
{{if $row.Duplicate}}<span style="font-size:11px; font-weight:600; color:var(--muted); margin-left:6px;">duplicate</span>{{end}}
</td>
<td class="cents {{if lt $row.AmountCents 0}}negative{{else}}positive{{end}}" style="font-weight:600; white-space:nowrap;">
{{if lt $row.AmountCents 0}}{{else}}+{{end}}€{{cents (centsAbs $row.AmountCents)}}
</td>
<td>
{{if $row.Duplicate}}
<span style="font-size:12px; color:var(--muted);"></span>
{{else}}
<select name="categories" class="cat-select" data-selected="{{$row.Category}}"
style="font-size:12.5px; padding:5px 8px; border:1.5px solid var(--border2);
border-radius:6px; background:var(--bg2); color:var(--text);
@ -51,6 +62,7 @@
<option value="{{$cat}}" {{if eq $cat $row.Category}}selected{{end}}>{{$cat}}</option>
{{end}}
</select>
{{end}}
</td>
</tr>
{{end}}