feat: duplicate detection on CSV import
Compute a sha256 fingerprint (date|description|amount|account_id, first 16 hex chars) for every CSV row and store it as bank_ref. At preview time, existing fingerprints are fetched and matching rows are shown greyed out with a "duplicate" label. At confirm time, those rows are silently skipped — only truly new transactions are inserted. If every row is a duplicate the user is redirected with ?notice=all_duplicates instead of inserting an empty batch. Co-Authored-By: Claude Sonnet 4.6 <noreply@anthropic.com>
This commit is contained in:
parent
1c2bac1d5f
commit
2170457528
@ -2,7 +2,9 @@ package main
|
||||
|
||||
import (
|
||||
"context"
|
||||
"crypto/sha256"
|
||||
"embed"
|
||||
"encoding/hex"
|
||||
"encoding/json"
|
||||
"fmt"
|
||||
"html/template"
|
||||
@ -792,8 +794,24 @@ func (h *Handler) ImportPreview(w http.ResponseWriter, r *http.Request) {
|
||||
}
|
||||
}
|
||||
|
||||
// compute fingerprints and detect duplicates
|
||||
var fingerprints []string
|
||||
for i := range rows {
|
||||
rows[i].Category = autoCategorize(rows[i].Description, catMap)
|
||||
rows[i].Fingerprint = txnFingerprint(rows[i].Date, rows[i].Description, rows[i].AmountCents, accountID)
|
||||
fingerprints = append(fingerprints, rows[i].Fingerprint)
|
||||
}
|
||||
existing, _ := h.store.getTransactions(ctx, a.UserID, bson.M{"bank_ref": bson.M{"$in": fingerprints}})
|
||||
existingRefs := map[string]bool{}
|
||||
for _, t := range existing {
|
||||
existingRefs[t.BankRef] = true
|
||||
}
|
||||
duplicateCount := 0
|
||||
for i := range rows {
|
||||
if existingRefs[rows[i].Fingerprint] {
|
||||
rows[i].Duplicate = true
|
||||
duplicateCount++
|
||||
}
|
||||
}
|
||||
|
||||
importPreview := &CSVImportPreview{
|
||||
@ -815,6 +833,7 @@ func (h *Handler) ImportPreview(w http.ResponseWriter, r *http.Request) {
|
||||
"SelectedFormat": string(format),
|
||||
"SelectedAccount": accountID,
|
||||
"CategoryColors": catColors,
|
||||
"DuplicateCount": duplicateCount,
|
||||
})
|
||||
}
|
||||
|
||||
@ -861,9 +880,24 @@ func (h *Handler) ImportConfirm(w http.ResponseWriter, r *http.Request) {
|
||||
|
||||
userCats := r.Form["categories"]
|
||||
|
||||
// compute fingerprints and skip duplicates
|
||||
var fingerprints []string
|
||||
for _, row := range rows {
|
||||
fingerprints = append(fingerprints, txnFingerprint(row.Date, row.Description, row.AmountCents, accountID))
|
||||
}
|
||||
existing, _ := h.store.getTransactions(ctx, a.UserID, bson.M{"bank_ref": bson.M{"$in": fingerprints}})
|
||||
existingRefs := map[string]bool{}
|
||||
for _, t := range existing {
|
||||
existingRefs[t.BankRef] = true
|
||||
}
|
||||
|
||||
now := time.Now()
|
||||
var txns []Transaction
|
||||
for i, row := range rows {
|
||||
fp := fingerprints[i]
|
||||
if existingRefs[fp] {
|
||||
continue
|
||||
}
|
||||
date, _ := time.Parse("2006-01-02", row.Date)
|
||||
cat := "Others"
|
||||
if i < len(userCats) && userCats[i] != "" {
|
||||
@ -878,10 +912,16 @@ func (h *Handler) ImportConfirm(w http.ResponseWriter, r *http.Request) {
|
||||
Description: row.Description,
|
||||
AmountCents: row.AmountCents,
|
||||
Category: cat,
|
||||
BankRef: fp,
|
||||
CreatedAt: now,
|
||||
})
|
||||
}
|
||||
|
||||
if len(txns) == 0 {
|
||||
http.Redirect(w, r, "/transactions?notice=all_duplicates", http.StatusSeeOther)
|
||||
return
|
||||
}
|
||||
|
||||
if err := h.store.createTransactions(ctx, txns); err != nil {
|
||||
slog.Error("create transactions", "err", err)
|
||||
http.Error(w, "save error", http.StatusInternalServerError)
|
||||
@ -891,6 +931,11 @@ func (h *Handler) ImportConfirm(w http.ResponseWriter, r *http.Request) {
|
||||
http.Redirect(w, r, "/transactions", http.StatusSeeOther)
|
||||
}
|
||||
|
||||
func txnFingerprint(date, description string, amountCents int64, accountID string) string {
|
||||
h := sha256.Sum256([]byte(fmt.Sprintf("%s|%s|%d|%s", date, description, amountCents, accountID)))
|
||||
return hex.EncodeToString(h[:])[:16]
|
||||
}
|
||||
|
||||
func autoCategorize(desc string, catMap map[string]string) string {
|
||||
desc = strings.ToLower(desc)
|
||||
keywords := map[string]string{
|
||||
|
||||
@ -96,6 +96,8 @@ type CSVImportRow struct {
|
||||
Description string `json:"description"`
|
||||
AmountCents int64 `json:"amount_cents"`
|
||||
Category string `json:"category"`
|
||||
Fingerprint string `json:"fingerprint"`
|
||||
Duplicate bool `json:"duplicate"`
|
||||
}
|
||||
|
||||
type CSVImportPreview struct {
|
||||
|
||||
@ -14,7 +14,12 @@
|
||||
<div style="display:flex; justify-content:space-between; align-items:center; margin-bottom:16px; flex-wrap:wrap; gap:8px;">
|
||||
<div>
|
||||
<h2 style="font-size:15px; font-weight:700; color:var(--text); text-transform:none; letter-spacing:0;">Preview</h2>
|
||||
<p class="text-muted" style="margin-top:3px;">{{$d.Preview.Total}} rows — review categories before confirming.</p>
|
||||
<p class="text-muted" style="margin-top:3px;">
|
||||
{{$d.Preview.Total}} rows
|
||||
{{if $d.DuplicateCount}}
|
||||
— <span style="color:var(--yellow, #f59e0b); font-weight:600;">{{$d.DuplicateCount}} already imported</span> (shown greyed out, will be skipped)
|
||||
{{end}}
|
||||
</p>
|
||||
</div>
|
||||
<a href="/import" class="btn btn-outline btn-sm">← Back</a>
|
||||
</div>
|
||||
@ -36,13 +41,19 @@
|
||||
</thead>
|
||||
<tbody>
|
||||
{{range $i, $row := $d.Preview.Rows}}
|
||||
<tr>
|
||||
<tr {{if $row.Duplicate}}style="opacity:0.4;"{{end}}>
|
||||
<td style="white-space:nowrap; color:var(--text2);">{{$row.Date}}</td>
|
||||
<td style="max-width:280px; overflow:hidden; text-overflow:ellipsis; white-space:nowrap;">{{$row.Description}}</td>
|
||||
<td style="max-width:280px; overflow:hidden; text-overflow:ellipsis; white-space:nowrap;">
|
||||
{{$row.Description}}
|
||||
{{if $row.Duplicate}}<span style="font-size:11px; font-weight:600; color:var(--muted); margin-left:6px;">duplicate</span>{{end}}
|
||||
</td>
|
||||
<td class="cents {{if lt $row.AmountCents 0}}negative{{else}}positive{{end}}" style="font-weight:600; white-space:nowrap;">
|
||||
{{if lt $row.AmountCents 0}}−{{else}}+{{end}}€{{cents (centsAbs $row.AmountCents)}}
|
||||
</td>
|
||||
<td>
|
||||
{{if $row.Duplicate}}
|
||||
<span style="font-size:12px; color:var(--muted);">—</span>
|
||||
{{else}}
|
||||
<select name="categories" class="cat-select" data-selected="{{$row.Category}}"
|
||||
style="font-size:12.5px; padding:5px 8px; border:1.5px solid var(--border2);
|
||||
border-radius:6px; background:var(--bg2); color:var(--text);
|
||||
@ -51,6 +62,7 @@
|
||||
<option value="{{$cat}}" {{if eq $cat $row.Category}}selected{{end}}>{{$cat}}</option>
|
||||
{{end}}
|
||||
</select>
|
||||
{{end}}
|
||||
</td>
|
||||
</tr>
|
||||
{{end}}
|
||||
|
||||
Loading…
x
Reference in New Issue
Block a user