feat: duplicate detection on CSV import
Compute a sha256 fingerprint (date|description|amount|account_id, first 16 hex chars) for every CSV row and store it as bank_ref. At preview time, existing fingerprints are fetched and matching rows are shown greyed out with a "duplicate" label. At confirm time, those rows are silently skipped — only truly new transactions are inserted. If every row is a duplicate the user is redirected with ?notice=all_duplicates instead of inserting an empty batch. Co-Authored-By: Claude Sonnet 4.6 <noreply@anthropic.com>
This commit is contained in:
parent
1c2bac1d5f
commit
2170457528
@ -2,7 +2,9 @@ package main
|
|||||||
|
|
||||||
import (
|
import (
|
||||||
"context"
|
"context"
|
||||||
|
"crypto/sha256"
|
||||||
"embed"
|
"embed"
|
||||||
|
"encoding/hex"
|
||||||
"encoding/json"
|
"encoding/json"
|
||||||
"fmt"
|
"fmt"
|
||||||
"html/template"
|
"html/template"
|
||||||
@ -792,8 +794,24 @@ func (h *Handler) ImportPreview(w http.ResponseWriter, r *http.Request) {
|
|||||||
}
|
}
|
||||||
}
|
}
|
||||||
|
|
||||||
|
// compute fingerprints and detect duplicates
|
||||||
|
var fingerprints []string
|
||||||
for i := range rows {
|
for i := range rows {
|
||||||
rows[i].Category = autoCategorize(rows[i].Description, catMap)
|
rows[i].Category = autoCategorize(rows[i].Description, catMap)
|
||||||
|
rows[i].Fingerprint = txnFingerprint(rows[i].Date, rows[i].Description, rows[i].AmountCents, accountID)
|
||||||
|
fingerprints = append(fingerprints, rows[i].Fingerprint)
|
||||||
|
}
|
||||||
|
existing, _ := h.store.getTransactions(ctx, a.UserID, bson.M{"bank_ref": bson.M{"$in": fingerprints}})
|
||||||
|
existingRefs := map[string]bool{}
|
||||||
|
for _, t := range existing {
|
||||||
|
existingRefs[t.BankRef] = true
|
||||||
|
}
|
||||||
|
duplicateCount := 0
|
||||||
|
for i := range rows {
|
||||||
|
if existingRefs[rows[i].Fingerprint] {
|
||||||
|
rows[i].Duplicate = true
|
||||||
|
duplicateCount++
|
||||||
|
}
|
||||||
}
|
}
|
||||||
|
|
||||||
importPreview := &CSVImportPreview{
|
importPreview := &CSVImportPreview{
|
||||||
@ -815,6 +833,7 @@ func (h *Handler) ImportPreview(w http.ResponseWriter, r *http.Request) {
|
|||||||
"SelectedFormat": string(format),
|
"SelectedFormat": string(format),
|
||||||
"SelectedAccount": accountID,
|
"SelectedAccount": accountID,
|
||||||
"CategoryColors": catColors,
|
"CategoryColors": catColors,
|
||||||
|
"DuplicateCount": duplicateCount,
|
||||||
})
|
})
|
||||||
}
|
}
|
||||||
|
|
||||||
@ -861,9 +880,24 @@ func (h *Handler) ImportConfirm(w http.ResponseWriter, r *http.Request) {
|
|||||||
|
|
||||||
userCats := r.Form["categories"]
|
userCats := r.Form["categories"]
|
||||||
|
|
||||||
|
// compute fingerprints and skip duplicates
|
||||||
|
var fingerprints []string
|
||||||
|
for _, row := range rows {
|
||||||
|
fingerprints = append(fingerprints, txnFingerprint(row.Date, row.Description, row.AmountCents, accountID))
|
||||||
|
}
|
||||||
|
existing, _ := h.store.getTransactions(ctx, a.UserID, bson.M{"bank_ref": bson.M{"$in": fingerprints}})
|
||||||
|
existingRefs := map[string]bool{}
|
||||||
|
for _, t := range existing {
|
||||||
|
existingRefs[t.BankRef] = true
|
||||||
|
}
|
||||||
|
|
||||||
now := time.Now()
|
now := time.Now()
|
||||||
var txns []Transaction
|
var txns []Transaction
|
||||||
for i, row := range rows {
|
for i, row := range rows {
|
||||||
|
fp := fingerprints[i]
|
||||||
|
if existingRefs[fp] {
|
||||||
|
continue
|
||||||
|
}
|
||||||
date, _ := time.Parse("2006-01-02", row.Date)
|
date, _ := time.Parse("2006-01-02", row.Date)
|
||||||
cat := "Others"
|
cat := "Others"
|
||||||
if i < len(userCats) && userCats[i] != "" {
|
if i < len(userCats) && userCats[i] != "" {
|
||||||
@ -878,10 +912,16 @@ func (h *Handler) ImportConfirm(w http.ResponseWriter, r *http.Request) {
|
|||||||
Description: row.Description,
|
Description: row.Description,
|
||||||
AmountCents: row.AmountCents,
|
AmountCents: row.AmountCents,
|
||||||
Category: cat,
|
Category: cat,
|
||||||
|
BankRef: fp,
|
||||||
CreatedAt: now,
|
CreatedAt: now,
|
||||||
})
|
})
|
||||||
}
|
}
|
||||||
|
|
||||||
|
if len(txns) == 0 {
|
||||||
|
http.Redirect(w, r, "/transactions?notice=all_duplicates", http.StatusSeeOther)
|
||||||
|
return
|
||||||
|
}
|
||||||
|
|
||||||
if err := h.store.createTransactions(ctx, txns); err != nil {
|
if err := h.store.createTransactions(ctx, txns); err != nil {
|
||||||
slog.Error("create transactions", "err", err)
|
slog.Error("create transactions", "err", err)
|
||||||
http.Error(w, "save error", http.StatusInternalServerError)
|
http.Error(w, "save error", http.StatusInternalServerError)
|
||||||
@ -891,6 +931,11 @@ func (h *Handler) ImportConfirm(w http.ResponseWriter, r *http.Request) {
|
|||||||
http.Redirect(w, r, "/transactions", http.StatusSeeOther)
|
http.Redirect(w, r, "/transactions", http.StatusSeeOther)
|
||||||
}
|
}
|
||||||
|
|
||||||
|
func txnFingerprint(date, description string, amountCents int64, accountID string) string {
|
||||||
|
h := sha256.Sum256([]byte(fmt.Sprintf("%s|%s|%d|%s", date, description, amountCents, accountID)))
|
||||||
|
return hex.EncodeToString(h[:])[:16]
|
||||||
|
}
|
||||||
|
|
||||||
func autoCategorize(desc string, catMap map[string]string) string {
|
func autoCategorize(desc string, catMap map[string]string) string {
|
||||||
desc = strings.ToLower(desc)
|
desc = strings.ToLower(desc)
|
||||||
keywords := map[string]string{
|
keywords := map[string]string{
|
||||||
|
|||||||
@ -96,6 +96,8 @@ type CSVImportRow struct {
|
|||||||
Description string `json:"description"`
|
Description string `json:"description"`
|
||||||
AmountCents int64 `json:"amount_cents"`
|
AmountCents int64 `json:"amount_cents"`
|
||||||
Category string `json:"category"`
|
Category string `json:"category"`
|
||||||
|
Fingerprint string `json:"fingerprint"`
|
||||||
|
Duplicate bool `json:"duplicate"`
|
||||||
}
|
}
|
||||||
|
|
||||||
type CSVImportPreview struct {
|
type CSVImportPreview struct {
|
||||||
|
|||||||
@ -14,7 +14,12 @@
|
|||||||
<div style="display:flex; justify-content:space-between; align-items:center; margin-bottom:16px; flex-wrap:wrap; gap:8px;">
|
<div style="display:flex; justify-content:space-between; align-items:center; margin-bottom:16px; flex-wrap:wrap; gap:8px;">
|
||||||
<div>
|
<div>
|
||||||
<h2 style="font-size:15px; font-weight:700; color:var(--text); text-transform:none; letter-spacing:0;">Preview</h2>
|
<h2 style="font-size:15px; font-weight:700; color:var(--text); text-transform:none; letter-spacing:0;">Preview</h2>
|
||||||
<p class="text-muted" style="margin-top:3px;">{{$d.Preview.Total}} rows — review categories before confirming.</p>
|
<p class="text-muted" style="margin-top:3px;">
|
||||||
|
{{$d.Preview.Total}} rows
|
||||||
|
{{if $d.DuplicateCount}}
|
||||||
|
— <span style="color:var(--yellow, #f59e0b); font-weight:600;">{{$d.DuplicateCount}} already imported</span> (shown greyed out, will be skipped)
|
||||||
|
{{end}}
|
||||||
|
</p>
|
||||||
</div>
|
</div>
|
||||||
<a href="/import" class="btn btn-outline btn-sm">← Back</a>
|
<a href="/import" class="btn btn-outline btn-sm">← Back</a>
|
||||||
</div>
|
</div>
|
||||||
@ -36,13 +41,19 @@
|
|||||||
</thead>
|
</thead>
|
||||||
<tbody>
|
<tbody>
|
||||||
{{range $i, $row := $d.Preview.Rows}}
|
{{range $i, $row := $d.Preview.Rows}}
|
||||||
<tr>
|
<tr {{if $row.Duplicate}}style="opacity:0.4;"{{end}}>
|
||||||
<td style="white-space:nowrap; color:var(--text2);">{{$row.Date}}</td>
|
<td style="white-space:nowrap; color:var(--text2);">{{$row.Date}}</td>
|
||||||
<td style="max-width:280px; overflow:hidden; text-overflow:ellipsis; white-space:nowrap;">{{$row.Description}}</td>
|
<td style="max-width:280px; overflow:hidden; text-overflow:ellipsis; white-space:nowrap;">
|
||||||
|
{{$row.Description}}
|
||||||
|
{{if $row.Duplicate}}<span style="font-size:11px; font-weight:600; color:var(--muted); margin-left:6px;">duplicate</span>{{end}}
|
||||||
|
</td>
|
||||||
<td class="cents {{if lt $row.AmountCents 0}}negative{{else}}positive{{end}}" style="font-weight:600; white-space:nowrap;">
|
<td class="cents {{if lt $row.AmountCents 0}}negative{{else}}positive{{end}}" style="font-weight:600; white-space:nowrap;">
|
||||||
{{if lt $row.AmountCents 0}}−{{else}}+{{end}}€{{cents (centsAbs $row.AmountCents)}}
|
{{if lt $row.AmountCents 0}}−{{else}}+{{end}}€{{cents (centsAbs $row.AmountCents)}}
|
||||||
</td>
|
</td>
|
||||||
<td>
|
<td>
|
||||||
|
{{if $row.Duplicate}}
|
||||||
|
<span style="font-size:12px; color:var(--muted);">—</span>
|
||||||
|
{{else}}
|
||||||
<select name="categories" class="cat-select" data-selected="{{$row.Category}}"
|
<select name="categories" class="cat-select" data-selected="{{$row.Category}}"
|
||||||
style="font-size:12.5px; padding:5px 8px; border:1.5px solid var(--border2);
|
style="font-size:12.5px; padding:5px 8px; border:1.5px solid var(--border2);
|
||||||
border-radius:6px; background:var(--bg2); color:var(--text);
|
border-radius:6px; background:var(--bg2); color:var(--text);
|
||||||
@ -51,6 +62,7 @@
|
|||||||
<option value="{{$cat}}" {{if eq $cat $row.Category}}selected{{end}}>{{$cat}}</option>
|
<option value="{{$cat}}" {{if eq $cat $row.Category}}selected{{end}}>{{$cat}}</option>
|
||||||
{{end}}
|
{{end}}
|
||||||
</select>
|
</select>
|
||||||
|
{{end}}
|
||||||
</td>
|
</td>
|
||||||
</tr>
|
</tr>
|
||||||
{{end}}
|
{{end}}
|
||||||
|
|||||||
Loading…
x
Reference in New Issue
Block a user