<!doctype html>

<html lang="ko">

<head>

  <meta charset="utf-8" />

  <meta name="viewport" content="width=device-width,initial-scale=1" />

  <title>Receipt → Table (OCR)</title>

  <style>

    :root { font-family: system-ui, -apple-system, Segoe UI, Roboto, Arial, sans-serif; }

    body { margin: 24px; line-height: 1.4; }

    .row { display: flex; gap: 16px; flex-wrap: wrap; }

    .card {

      border: 1px solid #ddd; border-radius: 12px; padding: 16px; background: #fff;

      box-shadow: 0 1px 2px rgba(0,0,0,0.04);

    }

    .col { flex: 1 1 340px; min-width: 320px; }

    h1 { font-size: 20px; margin: 0 0 12px; }

    label { display: block; font-weight: 600; margin: 10px 0 6px; }

    input[type="file"] { width: 100%; }

    button {

      padding: 10px 14px; border-radius: 10px; border: 1px solid #ccc; background: #f7f7f7;

      cursor: pointer;

    }

    button.primary { background: #111; color: #fff; border-color: #111; }

    button:disabled { opacity: 0.6; cursor: not-allowed; }

    .muted { color: #666; font-size: 13px; }

    .progress {

      height: 10px; background: #eee; border-radius: 999px; overflow: hidden;

      margin-top: 10px;

    }

    .bar { height: 100%; width: 0%; background: #111; transition: width 0.2s ease; }

    img.preview { max-width: 100%; border-radius: 10px; border: 1px solid #eee; }

    textarea { width: 100%; min-height: 220px; padding: 10px; border-radius: 10px; border: 1px solid #ddd; }

    table { width: 100%; border-collapse: collapse; margin-top: 10px; }

    th, td { border-bottom: 1px solid #eee; padding: 10px; text-align: left; vertical-align: top; }

    th { background: #fafafa; font-size: 13px; color: #444; position: sticky; top: 0; }

    td[contenteditable="true"] { outline: none; }

    .pill {

      display: inline-block; padding: 4px 10px; border-radius: 999px;

      background: #f1f1f1; font-size: 12px; margin-left: 8px;

    }

    .kv { display: grid; grid-template-columns: 110px 1fr; gap: 6px 12px; margin-top: 10px; }

    .kv div { padding: 6px 0; border-bottom: 1px dashed #eee; }

    .kv .k { color: #666; }

    .right { text-align: right; }

    .danger { color: #b00020; }

    .actions { display: flex; gap: 10px; flex-wrap: wrap; margin-top: 12px; }

  </style>

</head>

<body>

  <h1>영수증 사진 → OCR → 표 정리 <span class="pill">USD 중심</span></h1>

  <p class="muted">

    브라우저에서 영수증 이미지를 OCR로 읽고, 항목을 표로 자동 정리합니다. 결과는 표에서 직접 수정 후 CSV로 내보낼 수 있어요.

  </p>


  <div class="row">

    <div class="card col">

      <h2 style="margin:0 0 10px; font-size:16px;">1) 영수증 업로드</h2>


      <label for="file">영수증 이미지 선택</label>

      <input id="file" type="file" accept="image/*" capture="environment" />


      <div class="actions">

        <button id="btnProcess" class="primary" disabled>OCR + 표 만들기</button>

        <button id="btnClear" disabled>초기화</button>

      </div>


      <div class="progress" aria-label="progress">

        <div id="bar" class="bar"></div>

      </div>

      <div id="status" class="muted" style="margin-top:8px;">대기 중…</div>


      <div style="margin-top:14px;">

        <label>미리보기</label>

        <img id="preview" class="preview" alt="preview" />

      </div>


      <p class="muted" style="margin-top:12px;">

        팁: 글자가 선명하게 나오도록 밝은 곳에서, 흔들림 없이 찍어주세요. (가능하면 영수증만 꽉 차게)

      </p>

    </div>


    <div class="card col">

      <h2 style="margin:0 0 10px; font-size:16px;">2) 추출 결과</h2>


      <div class="kv">

        <div class="k">가맹점</div><div id="merchant">-</div>

        <div class="k">날짜</div><div id="date">-</div>

        <div class="k">소계(Subtotal)</div><div id="subtotal">-</div>

        <div class="k">세금(Tax)</div><div id="tax">-</div>

        <div class="k"><b>합계(Total)</b></div><div id="total"><b>-</b></div>

      </div>


      <label style="margin-top:16px;">항목 표 (클릭해서 수정 가능)</label>

      <div style="max-height: 260px; overflow: auto; border: 1px solid #eee; border-radius: 10px;">

        <table id="itemsTable">

          <thead>

            <tr>

              <th style="width: 60%;">Description</th>

              <th style="width: 10%;" class="right">Qty</th>

              <th style="width: 15%;" class="right">Unit</th>

              <th style="width: 15%;" class="right">Amount</th>

            </tr>

          </thead>

          <tbody id="itemsBody">

            <tr><td colspan="4" class="muted">아직 항목이 없습니다. OCR을 실행해 주세요.</td></tr>

          </tbody>

        </table>

      </div>


      <div class="actions">

        <button id="btnAddRow" disabled>+ 행 추가</button>

        <button id="btnRecalc" disabled>합계 다시 계산</button>

        <button id="btnExport" disabled>CSV 내보내기</button>

      </div>


      <label style="margin-top:16px;">OCR 원문 텍스트</label>

      <textarea id="raw" placeholder="OCR 결과 텍스트가 여기에 표시됩니다."></textarea>

      <div class="muted">

        원문이 이상하면 표도 이상할 수 있어요. 원문을 참고해서 표를 수정하세요.

      </div>

    </div>

  </div>


  <!-- Tesseract.js (client-side OCR) -->

  <script src="https://cdn.jsdelivr.net/npm/tesseract.js@5/dist/tesseract.min.js"></script>


  <script>

    const $ = (id) => document.getElementById(id);


    const fileEl = $("file");

    const previewEl = $("preview");

    const btnProcess = $("btnProcess");

    const btnClear = $("btnClear");

    const btnAddRow = $("btnAddRow");

    const btnRecalc = $("btnRecalc");

    const btnExport = $("btnExport");


    const bar = $("bar");

    const statusEl = $("status");


    const rawEl = $("raw");

    const itemsBody = $("itemsBody");


    const merchantEl = $("merchant");

    const dateEl = $("date");

    const subtotalEl = $("subtotal");

    const taxEl = $("tax");

    const totalEl = $("total");


    let currentImage = null;


    const usd = new Intl.NumberFormat("en-US", { style: "currency", currency: "USD" });


    function setProgress(pct, text) {

      bar.style.width = `${Math.max(0, Math.min(100, pct))}%`;

      statusEl.textContent = text || "";

    }


    function resetUI() {

      currentImage = null;

      previewEl.src = "";

      rawEl.value = "";

      merchantEl.textContent = "-";

      dateEl.textContent = "-";

      subtotalEl.textContent = "-";

      taxEl.textContent = "-";

      totalEl.innerHTML = "<b>-</b>";


      itemsBody.innerHTML = `<tr><td colspan="4" class="muted">아직 항목이 없습니다. OCR을 실행해 주세요.</td></tr>`;


      btnProcess.disabled = true;

      btnClear.disabled = true;

      btnAddRow.disabled = true;

      btnRecalc.disabled = true;

      btnExport.disabled = true;


      setProgress(0, "대기 중…");

    }


    function sanitizeText(t) {

      return (t || "")

        .replace(/\r/g, "")

        .replace(/[^\S\n]+/g, " ")

        .replace(/\n{3,}/g, "\n\n")

        .trim();

    }


    function extractMoneyCandidates(line) {

      // money patterns like $12.34 or 12.34 (common on US receipts)

      const re = /(?:\$?\s*)(\d{1,4}(?:,\d{3})*|\d+)\.(\d{2})\b/g;

      const out = [];

      let m;

      while ((m = re.exec(line)) !== null) {

        const whole = m[1].replace(/,/g, "");

        const val = Number(`${whole}.${m[2]}`);

        if (Number.isFinite(val)) out.push(val);

      }

      return out;

    }


    function findDate(text) {

      // common date patterns

      const patterns = [

        /\b(0?[1-9]|1[0-2])[\/\-\.](0?[1-9]|[12]\d|3[01])[\/\-\.]((?:19|20)?\d{2})\b/,

        /\b((?:19|20)\d{2})[\/\-\.](0?[1-9]|1[0-2])[\/\-\.](0?[1-9]|[12]\d|3[01])\b/

      ];

      for (const re of patterns) {

        const m = text.match(re);

        if (m) return m[0];

      }

      return null;

    }


    function likelyMerchant(lines) {

      // heuristic: first non-empty line with letters, not just numbers

      for (const ln of lines.slice(0, 8)) {

        const line = ln.trim();

        if (!line) continue;

        const hasLetters = /[A-Za-z]/.test(line);

        const tooNumeric = /^[\d\W]+$/.test(line);

        if (hasLetters && !tooNumeric) return line;

      }

      // fallback

      return lines.find(l => l.trim())?.trim() || null;

    }


    function parseReceiptText(text) {

      const clean = sanitizeText(text);

      const lines = clean.split("\n").map(s => s.trim()).filter(Boolean);


      const merchant = likelyMerchant(lines);


      const date = findDate(clean);


      // find total/tax/subtotal lines

      const keyMatchers = {

        total: /(grand\s*)?total|amount\s*due|balance\s*due|total\s*due/i,

        subtotal: /sub\s*total|subtotal/i,

        tax: /tax|sales\s*tax/i

      };


      function findKeyAmount(kind) {

        // search from bottom: totals are often near bottom

        for (let i = lines.length - 1; i >= 0; i--) {

          const line = lines[i];

          if (!keyMatchers[kind].test(line)) continue;

          const cands = extractMoneyCandidates(line);

          if (cands.length) return Math.max(...cands);

        }

        return null;

      }


      const total = findKeyAmount("total");

      const tax = findKeyAmount("tax");

      const subtotal = findKeyAmount("subtotal");


      // item lines: lines that end with a money value, excluding totals/payment lines

      const exclude = /(total|subtotal|tax|change|cash|visa|mastercard|amex|discover|debit|credit|tender|auth|approval|balance\s*due|amount\s*due|tip|gratuity)/i;


      const itemLines = [];

      for (const line of lines) {

        if (exclude.test(line)) continue;

        const cands = extractMoneyCandidates(line);

        if (!cands.length) continue;

        // prefer lines where last token looks like a price

        const last = cands[cands.length - 1];

        if (!Number.isFinite(last)) continue;

        itemLines.push({ line, amount: last });

      }


      // Build items with simple qty/unit detection

      const items = itemLines.map(({ line, amount }) => {

        let desc = line;

        let qty = 1;

        let unit = null;


        // remove last amount occurrence from description (best effort)

        // ex: "COKE 2.49" -> "COKE"

        desc = desc.replace(/(?:\$?\s*)\d{1,4}(?:,\d{3})*\.\d{2}\s*$/,"").trim();


        // qty/unit patterns: "2 x 1.99" or "2 @ 1.99"

        const qu = line.match(/\b(\d+)\s*(?:x|@)\s*(\d{1,4}(?:,\d{3})*|\d+)\.(\d{2})\b/i);

        if (qu) {

          qty = Number(qu[1]);

          unit = Number(`${qu[2].replace(/,/g,"")}.${qu[3]}`);

        }


        // fallback: if unit exists and amount is close to qty*unit, keep it

        if (unit && qty && Number.isFinite(amount)) {

          // keep as is

        } else {

          unit = null;

          qty = 1;

        }


        // If description became empty, fallback to original line without amount

        if (!desc) desc = line;


        return { desc, qty, unit, amount };

      });


      // de-duplicate very similar consecutive items (OCR sometimes duplicates)

      const compact = [];

      for (const it of items) {

        const prev = compact[compact.length - 1];

        if (prev && prev.desc === it.desc && prev.amount === it.amount) continue;

        compact.push(it);

      }


      return { merchant, date, subtotal, tax, total, items: compact, raw: clean };

    }


    function renderSummary(parsed) {

      merchantEl.textContent = parsed.merchant || "-";

      dateEl.textContent = parsed.date || "-";


      subtotalEl.textContent = (parsed.subtotal != null) ? usd.format(parsed.subtotal) : "-";

      taxEl.textContent = (parsed.tax != null) ? usd.format(parsed.tax) : "-";


      totalEl.innerHTML = (parsed.total != null)

        ? `<b>${usd.format(parsed.total)}</b>`

        : `<b class="danger">-</b>`;

    }


    function renderItems(items) {

      if (!items || !items.length) {

        itemsBody.innerHTML = `<tr><td colspan="4" class="muted">항목을 찾지 못했어요. 아래 OCR 원문을 확인하고 행을 추가해 주세요.</td></tr>`;

        return;

      }


      itemsBody.innerHTML = "";

      for (const it of items) {

        const tr = document.createElement("tr");


        const tdDesc = document.createElement("td");

        tdDesc.contentEditable = "true";

        tdDesc.textContent = it.desc ?? "";

        tr.appendChild(tdDesc);


        const tdQty = document.createElement("td");

        tdQty.className = "right";

        tdQty.contentEditable = "true";

        tdQty.textContent = (it.qty ?? 1);

        tr.appendChild(tdQty);


        const tdUnit = document.createElement("td");

        tdUnit.className = "right";

        tdUnit.contentEditable = "true";

        tdUnit.textContent = (it.unit != null) ? it.unit.toFixed(2) : "";

        tr.appendChild(tdUnit);


        const tdAmt = document.createElement("td");

        tdAmt.className = "right";

        tdAmt.contentEditable = "true";

        tdAmt.textContent = (it.amount != null) ? it.amount.toFixed(2) : "";

        tr.appendChild(tdAmt);


        itemsBody.appendChild(tr);

      }

    }


    function addEmptyRow() {

      // remove placeholder row if exists

      if (itemsBody.querySelectorAll("tr").length === 1) {

        const only = itemsBody.querySelector("tr");

        if (only && only.children.length === 1) itemsBody.innerHTML = "";

      }


      const tr = document.createElement("tr");

      tr.innerHTML = `

        <td contenteditable="true"></td>

        <td contenteditable="true" class="right">1</td>

        <td contenteditable="true" class="right"></td>

        <td contenteditable="true" class="right"></td>

      `;

      itemsBody.appendChild(tr);

    }


    function recalcTotalsFromTable() {

      const rows = [...itemsBody.querySelectorAll("tr")];

      let sum = 0;


      for (const r of rows) {

        const cells = r.querySelectorAll("td");

        if (cells.length !== 4) continue;


        const amtStr = (cells[3].textContent || "").trim().replace(/\$/g,"");

        const amt = Number(amtStr);

        if (Number.isFinite(amt)) sum += amt;

      }


      // Update subtotal if missing, otherwise show computed hint by overwriting subtotal.

      subtotalEl.textContent = usd.format(sum);

      // total = subtotal + tax (if tax exists)

      const taxText = (taxEl.textContent || "").replace(/[^0-9.]/g,"");

      const taxVal = Number(taxText);

      if (Number.isFinite(taxVal)) {

        totalEl.innerHTML = `<b>${usd.format(sum + taxVal)}</b>`;

      } else {

        totalEl.innerHTML = `<b>${usd.format(sum)}</b>`;

      }

    }


    function exportCSV() {

      const rows = [...itemsBody.querySelectorAll("tr")];

      const lines = [];

      lines.push(["Description","Qty","Unit","Amount"].join(","));


      for (const r of rows) {

        const cells = [...r.querySelectorAll("td")];

        if (cells.length !== 4) continue;


        const vals = cells.map((c) => (c.textContent || "").trim());

        // escape quotes/commas

        const esc = (v) => `"${String(v).replace(/"/g,'""')}"`;

        lines.push(vals.map(esc).join(","));

      }


      // add summary at bottom

      lines.push("");

      lines.push(`"Merchant","${(merchantEl.textContent || "").replace(/"/g,'""')}"`);

      lines.push(`"Date","${(dateEl.textContent || "").replace(/"/g,'""')}"`);

      lines.push(`"Subtotal","${(subtotalEl.textContent || "").replace(/"/g,'""')}"`);

      lines.push(`"Tax","${(taxEl.textContent || "").replace(/"/g,'""')}"`);

      lines.push(`"Total","${(totalEl.textContent || "").replace(/"/g,'""')}"`);


      const blob = new Blob([lines.join("\n")], { type: "text/csv;charset=utf-8" });

      const url = URL.createObjectURL(blob);

      const a = document.createElement("a");

      a.href = url;

      a.download = `receipt_${Date.now()}.csv`;

      document.body.appendChild(a);

      a.click();

      a.remove();

      URL.revokeObjectURL(url);

    }


    fileEl.addEventListener("change", async (e) => {

      const f = e.target.files && e.target.files[0];

      if (!f) {

        resetUI();

        return;

      }


      currentImage = f;

      btnProcess.disabled = false;

      btnClear.disabled = false;


      const url = URL.createObjectURL(f);

      previewEl.src = url;


      setProgress(0, "이미지 선택됨. OCR 실행 준비 완료.");

    });


    btnClear.addEventListener("click", () => {

      fileEl.value = "";

      resetUI();

    });


    btnAddRow.addEventListener("click", addEmptyRow);

    btnRecalc.addEventListener("click", recalcTotalsFromTable);

    btnExport.addEventListener("click", exportCSV);


    btnProcess.addEventListener("click", async () => {

      if (!currentImage) return;


      btnProcess.disabled = true;

      btnAddRow.disabled = true;

      btnRecalc.disabled = true;

      btnExport.disabled = true;


      setProgress(2, "OCR 초기화 중…");


      try {

        const { data } = await Tesseract.recognize(currentImage, "eng", {

          logger: (m) => {

            if (m.status === "recognizing text") {

              const pct = Math.round((m.progress || 0) * 100);

              setProgress(pct, `OCR 진행 중… ${pct}%`);

            } else {

              setProgress(Math.max(parseInt(bar.style.width) || 0, 5), `상태: ${m.status}`);

            }

          }

        });


        const text = sanitizeText(data.text || "");

        rawEl.value = text;


        setProgress(95, "텍스트 파싱 중…");

        const parsed = parseReceiptText(text);


        renderSummary(parsed);

        renderItems(parsed.items);


        setProgress(100, "완료! 표를 확인하고 필요하면 수정하세요.");


        btnAddRow.disabled = false;

        btnRecalc.disabled = false;

        btnExport.disabled = false;

      } catch (err) {

        console.error(err);

        setProgress(0, "오류 발생: OCR에 실패했습니다. 콘솔을 확인해 주세요.");

        btnProcess.disabled = false;

      } finally {

        btnClear.disabled = false;

        btnProcess.disabled = false;

      }

    });


    // init

    resetUI();

  </script>

</body>

</html>