diff --git a/app/projects/finrate/assets/fig-dataset-overview_01.png b/app/projects/finrate/assets/fig-dataset-overview_01.png new file mode 100644 index 0000000..3e94a7a Binary files /dev/null and b/app/projects/finrate/assets/fig-dataset-overview_01.png differ diff --git a/app/projects/finrate/assets/fig-eg-DR_01.png b/app/projects/finrate/assets/fig-eg-DR_01.png new file mode 100644 index 0000000..592271f Binary files /dev/null and b/app/projects/finrate/assets/fig-eg-DR_01.png differ diff --git a/app/projects/finrate/assets/fig-eg-EC_01.png b/app/projects/finrate/assets/fig-eg-EC_01.png new file mode 100644 index 0000000..f150da0 Binary files /dev/null and b/app/projects/finrate/assets/fig-eg-EC_01.png differ diff --git a/app/projects/finrate/assets/fig-eg-LT_01.png b/app/projects/finrate/assets/fig-eg-LT_01.png new file mode 100644 index 0000000..3b64f59 Binary files /dev/null and b/app/projects/finrate/assets/fig-eg-LT_01.png differ diff --git a/app/projects/finrate/assets/image-20260121002058463.png b/app/projects/finrate/assets/image-20260121002058463.png new file mode 100644 index 0000000..9618583 Binary files /dev/null and b/app/projects/finrate/assets/image-20260121002058463.png differ diff --git a/app/projects/finrate/page.mdx b/app/projects/finrate/page.mdx new file mode 100644 index 0000000..d2ae321 --- /dev/null +++ b/app/projects/finrate/page.mdx @@ -0,0 +1,46 @@ +# Fin-RATE: Financial Analytics and Tracking Evaluation Benchmark for LLMs on SEC Filings + +![Overview](./assets/image-20260121002058463.png) + +**Fin-RATE** is a real-world benchmark to evaluate large language models (LLMs) on professional-grade reasoning over **U.S. SEC filings**. +It targets financial analyst workflows that demand: + +- 📄 **Long-context understanding** +- ⏱️ **Cross-year tracking** +- 🏢 **Cross-company comparison** +- 📊 **Structured diagnosis of model failures** + +> 📘 [Paper](https://arxiv.org/abs/2602.07294) | 🤗 [Dataset](https://huggingface.co/datasets/JunrongChen2004/Fin-RATE) +> ⬇️ SEC-based QA benchmark with 7,500 instances + interpretable evaluation. + +--- + +## 🔍 Overview + +Fin-RATE includes **three core QA tasks**, modeling real-world financial reasoning: + +![Fin-RATE Tasks|scale=0.9](./assets/fig-dataset-overview_01.png) + +| | | +| --------- | ------------------------------------------------------------ | +| **DR-QA** | Detail & Reasoning: fine-grained reasoning within one SEC section | +| **EC-QA** | Enterprise Comparison: reasoning across peer firms in the same industry/year | +| **LT-QA** | Longitudinal Tracking: analyzing trends across years for the same firm | + +### DR-QA Example + +![DR-QA Example|scale=0.6](./assets/fig-eg-DR_01.png) + + +### EC-QA Example + +![EC-QA Example|scale=0.6](./assets/fig-eg-EC_01.png) + + +### LT-QA Example + +![LT-QA Example|scale=0.6](./assets/fig-eg-LT_01.png) + + +--- + diff --git a/config/publications.ts b/config/publications.ts index e6b34aa..7e4b497 100644 --- a/config/publications.ts +++ b/config/publications.ts @@ -20,6 +20,19 @@ export interface Publication { } export const publications: Publication[] = [ + { + title: "Fin-RATE: A Real-world Financial Analytics and Tracking Evaluation Benchmark for LLMs on SEC Filings", + authors: "Yidong Jiang, Junrong Chen, Eftychia Makri, Jialin Chen, Peiwen Li, Ali Maatouk, Leandros Tassiulas, Eliot Brenner, Bing Xiang, Rex Ying", + venue: "KDD 2026", + page: "finrate", + code: "https://github.com/jyd777/Fin-RATE", + paper: "https://arxiv.org/abs/2602.07294", + abstract: + "Fin-RATE is a benchmark for evaluating LLMs on U.S. Securities and Exchange Commission (SEC) filings, designed to mirror financial analyst workflows. It covers detail-oriented reasoning within individual disclosures, cross-entity comparison under shared financial topics, and longitudinal tracking of the same firm across reporting periods. Experiments on 17 leading LLMs show substantial performance degradation as tasks move beyond single-document reasoning, with accuracy dropping by 18.60% and 14.35% on longitudinal and cross-entity analysis, respectively. These results reveal comparison hallucinations, temporal/entity mismatches, and weaknesses in reasoning quality and factual consistency.", + impact: + "Fin-RATE provides a diagnostic framework for evaluating LLMs in realistic financial analysis workflows. It reveals that current models struggle with cross-document reasoning, long-context financial tracking, and distinguishing retrieval, generation, reasoning, and context-interpretation errors.", + tags: [Tag.Benchmark], + }, { title: "Cache What Lasts: Token Retention for Memory-Bounded KV Cache in LLMs", authors: "Ngoc Bui, Shubham Sharma, Simran Lamba, Saumitra Mishra, Rex Ying", diff --git a/mdx-components.tsx b/mdx-components.tsx index 1d7a317..f8f2350 100644 --- a/mdx-components.tsx +++ b/mdx-components.tsx @@ -7,13 +7,14 @@ interface ImageOption { } function MarkDownImage(props: any) { - const [title, optionPart] = props.alt.split('|') + const { alt = '', style: _style, ...rest } = props + const [title, optionPart] = alt.split('|') const option: ImageOption = optionPart ? optionPart.split(",").reduce((acc: any, cur: string) => { const [key, value] = cur.split("=") acc[key] = value return acc }, {}) : { scale: 1 } - const width_scale = 100 * option.scale + const width_scale = 100 * Number(option.scale) const style = { width: `${width_scale}%`, height: 'auto', @@ -25,8 +26,8 @@ function MarkDownImage(props: any) { width={0} height={0} sizes="100vw" + {...rest} style={style} - {...(props as ImageProps)} /> {title} diff --git a/package-lock.json b/package-lock.json index 900243e..1e9bf05 100644 --- a/package-lock.json +++ b/package-lock.json @@ -1561,7 +1561,6 @@ "resolved": "https://registry.npmjs.org/@heroui/system/-/system-2.4.13.tgz", "integrity": "sha512-4kL9SSEt+ohEbAsFCsZC6hdjBmwghEjeYsVytkOwI0rkXfu57onlZhJqvpIMnausHFL9DJ/oxzBghvFaA7dUxw==", "license": "MIT", - "peer": true, "dependencies": { "@heroui/react-utils": "2.1.8", "@heroui/system-rsc": "2.3.11", @@ -1680,7 +1679,6 @@ "resolved": "https://registry.npmjs.org/@heroui/theme/-/theme-2.4.13.tgz", "integrity": "sha512-W1wm/x3dx9rZXNdyvjQRueK+9ieTpT2icyttGuVEn7NWgzMhpIbEKsAwtTLZGc5zMHJE2cxJQ/jpdCR8HBe0Dw==", "license": "MIT", - "peer": true, "dependencies": { "@heroui/shared-utils": "2.1.7", "clsx": "^1.2.1", @@ -2630,7 +2628,6 @@ "resolved": "https://registry.npmjs.org/@mdx-js/loader/-/loader-3.1.0.tgz", "integrity": "sha512-xU/lwKdOyfXtQGqn3VnJjlDrmKXEvMi1mgYxVmukEUtVycIz1nh7oQ40bKTd4cA7rLStqu0740pnhGYxGoqsCg==", "license": "MIT", - "peer": true, "dependencies": { "@mdx-js/mdx": "^3.0.0", "source-map": "^0.7.0" @@ -2689,7 +2686,6 @@ "resolved": "https://registry.npmjs.org/@mdx-js/react/-/react-3.1.0.tgz", "integrity": "sha512-QjHtSaoameoalGnKDT3FoIl4+9RwyTmo9ZJGBdLOks/YOiWHoRDI3PUwEzOE7kEmGcV3AFcp9K6dYu9rEuKLAQ==", "license": "MIT", - "peer": true, "dependencies": { "@types/mdx": "^2.0.0" }, @@ -4675,7 +4671,6 @@ "resolved": "https://registry.npmjs.org/@types/react/-/react-18.3.3.tgz", "integrity": "sha512-hti/R0pS0q1/xx+TsI73XIqk26eBsISZ2R0wUijXIngRK9R/e7Xw/cXVxQK7R5JjW+SV4zGcn5hXjudkN/pLIw==", "license": "MIT", - "peer": true, "dependencies": { "@types/prop-types": "*", "csstype": "^3.0.2" @@ -4716,7 +4711,6 @@ "integrity": "sha512-mdekAHOqS9UjlmyF/LSs6AIEvfceV749GFxoBAjwAv0nkevfKHWQFDMcBZWUiIC5ft6ePWivXoS36aKQ0Cy3sw==", "dev": true, "license": "MIT", - "peer": true, "dependencies": { "@eslint-community/regexpp": "^4.5.1", "@typescript-eslint/scope-manager": "7.2.0", @@ -4753,7 +4747,6 @@ "integrity": "sha512-5FKsVcHTk6TafQKQbuIVkXq58Fnbkd2wDL4LB7AURN7RUOu1utVP+G8+6u3ZhEroW3DF6hyo3ZEXxgKgp4KeCg==", "dev": true, "license": "BSD-2-Clause", - "peer": true, "dependencies": { "@typescript-eslint/scope-manager": "7.2.0", "@typescript-eslint/types": "7.2.0", @@ -5147,7 +5140,6 @@ "resolved": "https://registry.npmjs.org/acorn/-/acorn-8.14.1.tgz", "integrity": "sha512-OvQ/2pUDKmgfCg++xsTX1wGxfTaszcHVcTctW4UJB4hibJx2HXxxO5UmVgyjMa+ZDsiaf5wWLXYpRWMmBI0QHg==", "license": "MIT", - "peer": true, "bin": { "acorn": "bin/acorn" }, @@ -5593,7 +5585,6 @@ } ], "license": "MIT", - "peer": true, "dependencies": { "caniuse-lite": "^1.0.30001688", "electron-to-chromium": "^1.5.73", @@ -6493,7 +6484,6 @@ "deprecated": "This version is no longer supported. Please see https://eslint.org/version-support for other options.", "dev": true, "license": "MIT", - "peer": true, "dependencies": { "@eslint-community/eslint-utils": "^4.2.0", "@eslint-community/regexpp": "^4.6.1", @@ -6577,7 +6567,6 @@ "integrity": "sha512-SM8AMJdeQqRYT9O9zguiruQZaN7+z+E4eAP9oiLNGKMtomwaB1E9dcgUD6ZAn/eQAb52USbvezbiljfZUhbJcg==", "dev": true, "license": "MIT", - "peer": true, "bin": { "eslint-config-prettier": "bin/cli.js" }, @@ -6696,7 +6685,6 @@ "integrity": "sha512-ixmkI62Rbc2/w8Vfxyh1jQRTdRTF52VxwRVHl/ykPAmqG+Nb7/kNn+byLP0LxPgI7zWA16Jt82SybJInmMia3A==", "dev": true, "license": "MIT", - "peer": true, "dependencies": { "@rtsao/scc": "^1.1.0", "array-includes": "^3.1.8", @@ -10469,6 +10457,7 @@ "resolved": "https://registry.npmjs.org/motion-dom/-/motion-dom-12.6.3.tgz", "integrity": "sha512-gRY08RjcnzgFYLemUZ1lo/e9RkBxR+6d4BRvoeZDSeArG4XQXERSPapKl3LNQRu22Sndjf1h+iavgY0O4NrYqA==", "license": "MIT", + "peer": true, "dependencies": { "motion-utils": "^12.6.3" } @@ -10477,7 +10466,8 @@ "version": "12.6.3", "resolved": "https://registry.npmjs.org/motion-utils/-/motion-utils-12.6.3.tgz", "integrity": "sha512-R/b3Ia2VxtTNZ4LTEO5pKYau1OUNHOuUfxuP0WFCTDYdHkeTBR9UtxR1cc8mDmKr8PEhmmfnTKGz3rSMjNRoRg==", - "license": "MIT" + "license": "MIT", + "peer": true }, "node_modules/ms": { "version": "2.1.3", @@ -11154,7 +11144,6 @@ } ], "license": "MIT", - "peer": true, "dependencies": { "nanoid": "^3.3.8", "picocolors": "^1.1.1", @@ -11376,7 +11365,6 @@ "resolved": "https://registry.npmjs.org/react/-/react-18.3.1.tgz", "integrity": "sha512-wS+hAgJShR0KhEvPJArfuPVN1+Hz1t0Y6n5jLrGQbkb4urgPE/0Rve+1kMB1v/oWgHgm4WIcV+i7F2pTVj+2iQ==", "license": "MIT", - "peer": true, "dependencies": { "loose-envify": "^1.1.0" }, @@ -11389,7 +11377,6 @@ "resolved": "https://registry.npmjs.org/react-dom/-/react-dom-18.3.1.tgz", "integrity": "sha512-5m4nQKp+rZRb09LNH59GM4BxTh9251/ylbKIbpe7TpGxfJ+9kv6BLkLBXIjjspbgbnIBNqlI23tRnTWT0snUIw==", "license": "MIT", - "peer": true, "dependencies": { "loose-envify": "^1.1.0", "scheduler": "^0.23.2" @@ -12227,7 +12214,6 @@ "resolved": "https://registry.npmjs.org/shiki/-/shiki-1.29.2.tgz", "integrity": "sha512-njXuliz/cP+67jU2hukkxCNuH1yUi4QfdZZY+sMr5PPrIyXSu5iTb/qYC4BiWWB0vZ+7TbdvYUCeL23zpwCfbg==", "license": "MIT", - "peer": true, "dependencies": { "@shikijs/core": "1.29.2", "@shikijs/engine-javascript": "1.29.2", @@ -12933,7 +12919,6 @@ "integrity": "sha512-M7BAV6Rlcy5u+m6oPhAPFgJTzAioX/6B0DxyvDlo9l8+T3nLKbrczg2WLUyzd45L8RqfUMyGPzekbMvX2Ldkwg==", "dev": true, "license": "MIT", - "peer": true, "engines": { "node": ">=12" }, @@ -13131,7 +13116,6 @@ "integrity": "sha512-cW9T5W9xY37cc+jfEnaUvX91foxtHkza3Nw3wkoF4sSlKn0MONdkdEndig/qPBWXNkmplh3NzayQzCiHM4/hqw==", "dev": true, "license": "Apache-2.0", - "peer": true, "bin": { "tsc": "bin/tsc", "tsserver": "bin/tsserver"