@inproceedings{2074db27c02a4d16bf6eced8af756ef0,
title = "Beyond the Answer: Advancing Multi-Hop QA with Fine-Grained Graph Reasoning and Evaluation",
abstract = "Recent advancements in large language models (LLMs) have significantly improved the performance of multi-hop question answering (MHQA) systems. Despite the success of MHQA systems, the evaluation of MHQA is not deeply investigated. Existing evaluations mainly focus on comparing the final answers of the reasoning method and given ground-truths. We argue that the reasoning process should also be evaluated because wrong reasoning process can also lead to the correct final answers. Motivated by this, we propose a “Planner-Executor-Reasoner” (PER) architecture, which forms the core of the Plan-anchored Data Preprocessing (PER-DP) and the Plan-guided Multi-Hop QA (PER-QA). The former provides the ground-truth of intermediate reasoning steps and final answers, and the latter offers them of a reasoning method. Moreover, we design a fine-grained evaluation metric called Plan-aligned Stepwise Evaluation (PSE), which evaluates the intermediate reasoning steps from two aspects: planning and solving. Extensive experiments on ten types of questions demonstrate competitive reasoning performance, improved explainability of the MHQA system, and uncover issues such as “fortuitous reasoning continuance” and “latent reasoning suspension” in RAG-based MHQA systems. Besides, we also demonstrate the potential of our approach in data contamination scenarios. Our data and code have been released at https://github.com/GenIRAG/PER-PSE.",
author = "Qichuan Liu and Chentao Zhang and Chenfeng Zheng and Guosheng Hu and Xiaodong Li and Zhihong Zhang",
note = "Publisher Copyright: {\textcopyright} 2025 Association for Computational Linguistics.; 63rd Annual Meeting of the Association for Computational Linguistics, ACL 2025 ; Conference date: 27-07-2025 Through 01-08-2025",
year = "2025",
doi = "10.18653/v1/2025.acl-long.1142",
language = "English",
series = "Proceedings of the Annual Meeting of the Association for Computational Linguistics",
publisher = "Association for Computational Linguistics (ACL)",
pages = "23433--23456",
editor = "Wanxiang Che and Joyce Nabende and Ekaterina Shutova and Pilehvar, \{Mohammad Taher\}",
booktitle = "Long Papers",
address = "United States",
}