-
Revelio: Interpretable Long-Form Question Answering
Gianluca Moro,
Luca Ragazzi,
Lorenzo Valgimigli,
Fabian Vincenzi,
Tiny Papers @ ICLR • 2024
Read
Cite
Bibtex:
@inproceedings{revelio,
author={Gianluca Moro and Luca Ragazzi and Lorenzo Valgimigli and Fabian Vincenzi},
title={Revelio: Interpretable Long-Form Question Answering},
note={The Second Tiny Papers Track at {ICLR} 2024, Tiny Papers @ {ICLR} 2024, Vienna, Austria, May 11.},
year={2024},
publisher={OpenReview.net},
}
-
Retrieve-and-Rank End-to-End Summarization of Biomedical Studies
Gianluca Moro,
Luca Ragazzi,
Lorenzo Valgimigli,
Lorenzo Molfetta,
SISAP • 2023
Read
Cite
Bibtex:
@inproceedings{DBLP:conf/sisap/MoroRVM23,
author = {Gianluca Moro and
Luca Ragazzi and
Lorenzo Valgimigli and
Lorenzo Molfetta},
editor = {Oscar Pedreira and
Vladimir Estivill{-}Castro},
title = {Retrieve-and-Rank End-to-End Summarization of Biomedical Studies},
booktitle = {Similarity Search and Applications - 16th International Conference,
{SISAP} 2023, {A} Coru{\~{n}}a, Spain, October 9-11, 2023, Proceedings},
series = {Lecture Notes in Computer Science},
volume = {14289},
pages = {64--78},
publisher = {Springer},
year = {2023},
url = {https://doi.org/10.1007/978-3-031-46994-7\_6},
doi = {10.1007/978-3-031-46994-7\_6},
timestamp = {Thu, 09 Nov 2023 21:12:58 +0100},
biburl = {https://dblp.org/rec/conf/sisap/MoroRVM23.bib},
bibsource = {dblp computer science bibliography, https://dblp.org}
}
-
Cogito Ergo Summ: Abstractive Summarization of Biomedical Papers via Semantic Parsing Graphs and Consistency Rewards
Giacomo Frisoni,
Paolo Italiani,
Stefano Salvatori,
Gianluca Moro,
NSA @ IJCAI • 2023
Read
Cite
GitHub
Bibtex:
Non-archival publication. See:
@inproceedings{frisoni-etal-2023-cogitoergosumm,
title = {{Cogito Ergo Summ: Abstractive Summarization of Biomedical Papers via Semantic Parsing Graphs and Consistency Rewards}},
author = "Frisoni, Giacomo and
Italiani, Paolo and
Salvatori, Stefano and
Moro, Gianluca",
booktitle = {Thirty-Seventh {AAAI} Conference on Artificial Intelligence, {AAAI}
2023, Washington, DC, USA, February 7-14,
2023},
month = feb,
year = "2023",
address = "Washington, DC, USA",
publisher = {{AAAI} Press},
abstract = "The automatic synthesis of biomedical publications catalyzes a profound research interest elicited by literature congestion. Current sequence-to-sequence models mainly rely on the lexical surface and seldom consider the deep semantic interconnections between the entities mentioned in the source document. Such superficiality translates into fabricated, poorly informative, redundant, and near-extractive summaries that severely restrict their real-world application in biomedicine, where the specialized jargon and the convoluted facts further emphasize task complexity. To fill this gap, we argue that the summarizer should acquire semantic interpretation over input, exploiting structured and unambiguous representations to capture and conserve the most relevant parts of the text content. This paper presents CogitoErgoSumm, the first framework for biomedical abstractive summarization equipping large pre-trained language models with rich semantic graphs. Precisely, we infuse graphs from two complementary semantic parsing techniques with different goals and granularities—Event Extraction and Abstract Meaning Representation, also designing a reward signal to maximize information content preservation through reinforcement learning. Extensive quantitative and qualitative evaluations on the CDSR dataset show that our solution achieves competitive performance according to multiple metrics, despite using 2.5x fewer parameters. Results and ablation studies indicate that our joint text-graph model generates more enlightening, readable, and consistent summaries."
}
-
Graph-based Summarization of Extracted Essential Knowledge for Low-Resource Scenarios
Gianluca Moro,
Luca Ragazzi,
Lorenzo Valgimigli,
ECAI • 2023
Read
Cite
Bibtex:
@inproceedings{DBLP:conf/ecai/MoroRV23,
author = {Gianluca Moro and
Luca Ragazzi and
Lorenzo Valgimigli},
editor = {Kobi Gal and
Ann Now{\'{e}} and
Grzegorz J. Nalepa and
Roy Fairstein and
Roxana Radulescu},
title = {Graph-Based Abstractive Summarization of Extracted Essential Knowledge
for Low-Resource Scenarios},
booktitle = {{ECAI} 2023 - 26th European Conference on Artificial Intelligence,
September 30 - October 4, 2023, Krak{\'{o}}w, Poland - Including
12th Conference on Prestigious Applications of Intelligent Systems
{(PAIS} 2023)},
series = {Frontiers in Artificial Intelligence and Applications},
volume = {372},
pages = {1747--1754},
publisher = {{IOS} Press},
year = {2023},
url = {https://doi.org/10.3233/FAIA230460},
doi = {10.3233/FAIA230460},
timestamp = {Fri, 27 Oct 2023 20:40:30 +0200},
biburl = {https://dblp.org/rec/conf/ecai/MoroRV23.bib},
bibsource = {dblp computer science bibliography, https://dblp.org}
}
-
Carburacy: Summarization Models Tuning and Comparison in Eco-Sustainable Regimes with a Novel Carbon-Aware Accuracy
Gianluca Moro,
Luca Ragazzi,
Lorenzo Valgimigli,
AAAI • 2023
Read
Cite
Bibtex:
@inproceedings{DBLP:conf/aaai/MoroRV23,
author = {Gianluca Moro and
Luca Ragazzi and
Lorenzo Valgimigli},
editor = {Brian Williams and
Yiling Chen and
Jennifer Neville},
title = {Carburacy: Summarization Models Tuning and Comparison in Eco-Sustainable
Regimes with a Novel Carbon-Aware Accuracy},
booktitle = {Thirty-Seventh {AAAI} Conference on Artificial Intelligence, {AAAI}
2023, Thirty-Fifth Conference on Innovative Applications of Artificial
Intelligence, {IAAI} 2023, Thirteenth Symposium on Educational Advances
in Artificial Intelligence, {EAAI} 2023, Washington, DC, USA, February
7-14, 2023},
pages = {14417--14425},
publisher = {{AAAI} Press},
year = {2023},
url = {https://doi.org/10.1609/aaai.v37i12.26686},
doi = {10.1609/AAAI.V37I12.26686},
timestamp = {Mon, 04 Sep 2023 16:50:26 +0200},
biburl = {https://dblp.org/rec/conf/aaai/MoroRV23.bib},
bibsource = {dblp computer science bibliography, https://dblp.org}
}
-
Cogito Ergo Summ: Abstractive Summarization of Biomedical Papers via Semantic Parsing Graphs and Consistency Rewards
Giacomo Frisoni,
Paolo Italiani,
Stefano Salvatori,
Gianluca Moro,
AAAI • 2023
Read
Cite
GitHub
Bibtex:
@inproceedings{frisoni-etal-2023-cogitoergosumm,
title = {{Cogito Ergo Summ: Abstractive Summarization of Biomedical Papers via Semantic Parsing Graphs and Consistency Rewards}},
author = "Frisoni, Giacomo and
Italiani, Paolo and
Salvatori, Stefano and
Moro, Gianluca",
booktitle = {Thirty-Seventh {AAAI} Conference on Artificial Intelligence, {AAAI}
2023, Washington, DC, USA, February 7-14,
2023},
month = feb,
year = "2023",
address = "Washington, DC, USA",
publisher = {{AAAI} Press},
abstract = "The automatic synthesis of biomedical publications catalyzes a profound research interest elicited by literature congestion. Current sequence-to-sequence models mainly rely on the lexical surface and seldom consider the deep semantic interconnections between the entities mentioned in the source document. Such superficiality translates into fabricated, poorly informative, redundant, and near-extractive summaries that severely restrict their real-world application in biomedicine, where the specialized jargon and the convoluted facts further emphasize task complexity. To fill this gap, we argue that the summarizer should acquire semantic interpretation over input, exploiting structured and unambiguous representations to capture and conserve the most relevant parts of the text content. This paper presents CogitoErgoSumm, the first framework for biomedical abstractive summarization equipping large pre-trained language models with rich semantic graphs. Precisely, we infuse graphs from two complementary semantic parsing techniques with different goals and granularities—Event Extraction and Abstract Meaning Representation, also designing a reward signal to maximize information content preservation through reinforcement learning. Extensive quantitative and qualitative evaluations on the CDSR dataset show that our solution achieves competitive performance according to multiple metrics, despite using 2.5x fewer parameters. Results and ablation studies indicate that our joint text-graph model generates more enlightening, readable, and consistent summaries."
}
-
BioReader: a Retrieval-Enhanced Text-to-Text Transformer for Biomedical Literature
Giacomo Frisoni,
Miki Mizutani,
Gianluca Moro,
Lorenzo Valgimigli,
EMNLP • 2022
Read
Cite
GitHub
Bibtex:
@inproceedings{frisoni-etal-2022-bioreader,
title = "BioReader: a Retrieval-Enhanced Text-to-Text Transformer for Biomedical Literature",
author = "Frisoni, Giacomo and
Mizutani, Miki and
Moro, Gianluca and
Valgimigli, Lorenzo",
booktitle = "Proceedings of the 2022 Conference on Empirical Methods in Natural Language Processing",
month = dec,
year = "2022",
address = "Abu Dhabi, United Arab Emirates",
publisher = "Association for Computational Linguistics",
url = "https://preview.aclanthology.org/emnlp-22-ingestion/2022.emnlp-main.390/",
pages = "5770--5793",
abstract = "The latest batch of research has equipped language models with the ability to attend over relevant and factual information from non-parametric external sources, drawing a complementary path to architectural scaling. Besides mastering language, exploiting and contextualizing the latent world knowledge is crucial in complex domains like biomedicine. However, most works in the field rely on general-purpose models supported by databases like Wikipedia and Books. We introduce BioReader, the first retrieval-enhanced text-to-text model for biomedical natural language processing. Our domain-specific T5-based solution augments the input prompt by fetching and assembling relevant scientific literature chunks from a neural database with ≈60 million tokens centered on PubMed. We fine-tune and evaluate BioReader on a broad array of downstream tasks, significantly outperforming several state-of-the-art methods despite using up to 3x fewer parameters. In tandem with extensive ablation studies, we show that domain knowledge can be easily altered or supplemented to make the model generate correct predictions bypassing the retraining step and thus addressing the literature overload issue."
}
-
Self-supervised Information Retrieval Trained from Self-generated Sets of Queries and Relevant Documents
Gianluca Moro,
Lorenzo Valgimigli,
Alex Rossi,
Cristiano Casadei,
Andrea Montefiori,
SISAP • 2022
Read
Cite
Bibtex:
@inproceedings{DBLP:conf/sisap/MoroVRCM22,
author = {Gianluca Moro and
Lorenzo Valgimigli and
Alex Rossi and
Cristiano Casadei and
Andrea Montefiori},
title = {Self-supervised Information Retrieval Trained from Self-generated
Sets of Queries and Relevant Documents},
booktitle = {{SISAP}},
series = {Lecture Notes in Computer Science},
volume = {13590},
pages = {283--290},
publisher = {Springer},
year = {2022}
}
-
Deep Vision-Language Model for Efficient Multi-modal Similarity Search in Fashion Retrieval
Gianluca Moro,
Stefano Salvatori,
SISAP • 2022
Read
Cite
Web App
Bibtex:
@inbook{moro_salvatori_sisap22,
author = {Moro, Gianluca and Salvatori, Stefano},
year = {2022},
month = {09},
pages = {40-53},
title = {Deep Vision-Language Model for Efficient Multi-modal Similarity Search in Fashion Retrieval},
isbn = {978-3-031-17848-1},
doi = {10.1007/978-3-031-17849-8_4}
}
-
NLG-Metricverse: An End-to-End Library for Evaluating Natural Language Generation
Giacomo Frisoni,
Antonella Carbonaro,
Gianluca Moro,
Andrea Zammarchi,
Marco Avagnano,
COLING • 2022
Read
Cite
GitHub
Bibtex:
@inproceedings{frisoni-etal-2022-nlg,
title = "{NLG}-Metricverse: An End-to-End Library for Evaluating Natural Language Generation",
author = "Frisoni, Giacomo and
Carbonaro, Antonella and
Moro, Gianluca and
Zammarchi, Andrea and
Avagnano, Marco",
booktitle = "Proceedings of the 29th International Conference on Computational Linguistics",
month = oct,
year = "2022",
address = "Gyeongju, Republic of Korea",
publisher = "International Committee on Computational Linguistics",
url = "https://aclanthology.org/2022.coling-1.306",
pages = "3465--3479",
abstract = "Driven by deep learning breakthroughs, natural language generation (NLG) models have been at the center of steady progress in the last few years, with a ubiquitous task influence. However, since our ability to generate human-indistinguishable artificial text lags behind our capacity to assess it, it is paramount to develop and apply even better automatic evaluation metrics. To facilitate researchers to judge the effectiveness of their models broadly, we introduce NLG-Metricverse{---}an end-to-end open-source library for NLG evaluation based on Python. Our framework provides a living collection of NLG metrics in a unified and easy-to-use environment, supplying tools to efficiently apply, analyze, compare, and visualize them. This includes (i) the extensive support to heterogeneous automatic metrics with n-arity management, (ii) the meta-evaluation upon individual performance, metric-metric and metric-human correlations, (iii) graphical interpretations for helping humans better gain score intuitions, (iv) formal categorization and convenient documentation to accelerate metrics understanding. NLG-Metricverse aims to increase the comparability and replicability of NLG research, hopefully stimulating new contributions in the area.",
}
-
Text-to-Text Extraction and Verbalization of Biomedical Event Graphs
Giacomo Frisoni,
Gianluca Moro,
Lorenzo Balzani,
COLING • 2022
Read
Cite
GitHub
Bibtex:
@inproceedings{frisoni-etal-2022-text,
title = "Text-to-Text Extraction and Verbalization of Biomedical Event Graphs",
author = "Frisoni, Giacomo and
Moro, Gianluca and
Balzani, Lorenzo",
booktitle = "Proceedings of the 29th International Conference on Computational Linguistics",
month = oct,
year = "2022",
address = "Gyeongju, Republic of Korea",
publisher = "International Committee on Computational Linguistics",
url = "https://aclanthology.org/2022.coling-1.238",
pages = "2692--2710",
abstract = "Biomedical events represent complex, graphical, and semantically rich interactions expressed in the scientific literature. Almost all contributions in the event realm orbit around semantic parsing, usually employing discriminative architectures and cumbersome multi-step pipelines limited to a small number of target interaction types. We present the first lightweight framework to solve both event extraction and event verbalization with a unified text-to-text approach, allowing us to fuse all the resources so far designed for different tasks. To this end, we present a new event graph linearization technique and release highly comprehensive event-text paired datasets, covering more than 150 event types from multiple biology subareas (English language). By streamlining parsing and generation to translations, we propose baseline transformer model results according to multiple biomedical text mining benchmarks and NLG metrics. Our extractive models achieve greater state-of-the-art performance than single-task competitors and show promising capabilities for the controlled generation of coherent natural language utterances from structured data."
}
-
Enhancing Biomedical Scientific Reviews Summarization with Graph-based Factual Evidence Extracted from Papers
Giacomo Frisoni,
Paolo Italiani,
Francesco Boschi,
Gianluca Moro,
DATA • 2022
• Best Student Paper Award
Read
Cite
Bibtex:
@inproceedings{DBLP:conf/data/FrisoniIBM22,
author = {Giacomo Frisoni and
Paolo Italiani and
Francesco Boschi and
Gianluca Moro},
editor = {Alfredo Cuzzocrea and
Oleg Gusikhin and
Wil M. P. van der Aalst and
Slimane Hammoudi},
title = {Enhancing Biomedical Scientific Reviews Summarization with Graph-based Factual Evidence Extracted from Papers},
booktitle = {Proceedings of the 11th International Conference on Data Science,
Technology and Applications, {DATA} 2022, Lisbon, Portugal, July 11-13,
2022},
pages = {168--179},
publisher = {{SCITEPRESS}},
year = {2022},
url = {https://doi.org/10.5220/0011354900003269},
doi = {10.5220/0011354900003269},
timestamp = {Wed, 03 Aug 2022 15:53:22 +0200},
biburl = {https://dblp.org/rec/conf/data/FrisoniIBM22.bib},
bibsource = {dblp computer science bibliography, https://dblp.org}
}
-
Discriminative Marginalized Probabilistic Neural Method for Multi-Document Summarization of Medical Literature
Gianluca Moro,
Luca Ragazzi,
Lorenzo Valgimigli,
Davide Freddi,
ACL • 2022
Read
Cite
Bibtex:
@inproceedings{moro-etal-2022-discriminative,
title = "Discriminative Marginalized Probabilistic Neural Method for Multi-Document Summarization of Medical Literature",
author = "Moro, Gianluca and
Ragazzi, Luca and
Valgimigli, Lorenzo and
Freddi, Davide",
booktitle = "Proceedings of the 60th Annual Meeting of the Association for Computational Linguistics (Volume 1: Long Papers)",
month = may,
year = "2022",
address = "Dublin, Ireland",
publisher = "Association for Computational Linguistics",
url = "https://aclanthology.org/2022.acl-long.15",
doi = "10.18653/v1/2022.acl-long.15",
pages = "180--189",
abstract = "Although current state-of-the-art Transformer-based solutions succeeded in a wide range for single-document NLP tasks, they still struggle to address multi-input tasks such as multi-document summarization. Many solutions truncate the inputs, thus ignoring potential summary-relevant contents, which is unacceptable in the medical domain where each information can be vital. Others leverage linear model approximations to apply multi-input concatenation, worsening the results because all information is considered, even if it is conflicting or noisy with respect to a shared background. Despite the importance and social impact of medicine, there are no ad-hoc solutions for multi-document summarization. For this reason, we propose a novel discriminative marginalized probabilistic method (DAMEN) trained to discriminate critical information from a cluster of topic-related medical documents and generate a multi-document summary via token probability marginalization. Results prove we outperform the previous state-of-the-art on a biomedical dataset for multi-document summarization of systematic literature reviews. Moreover, we perform extensive ablation studies to motivate the design choices and prove the importance of each module of our method.",
}
-
Semantic Self-Segmentation for Abstractive Summarization of Long Documents in Low-Resource Regimes
Gianluca Moro,
Luca Ragazzi,
AAAI • 2022
Read
Cite
Bibtex:
@inproceedings{DBLP:conf/aaai/MoroR22,
author = {Gianluca Moro and
Luca Ragazzi},
title = {Semantic Self-Segmentation for Abstractive Summarization of Long Documents
in Low-Resource Regimes},
booktitle = {Thirty-Sixth {AAAI} Conference on Artificial Intelligence, {AAAI}
2022, Thirty-Fourth Conference on Innovative Applications of Artificial
Intelligence, {IAAI} 2022, The Twelveth Symposium on Educational Advances
in Artificial Intelligence, {EAAI} 2022 Virtual Event, February 22
- March 1, 2022},
pages = {11085--11093},
publisher = {{AAAI} Press},
year = {2022},
url = {https://ojs.aaai.org/index.php/AAAI/article/view/21357},
timestamp = {Tue, 12 Jul 2022 14:14:21 +0200},
biburl = {https://dblp.org/rec/conf/aaai/MoroR22.bib},
bibsource = {dblp computer science bibliography, https://dblp.org}
}
-
Unsupervised Descriptive Text Mining for Knowledge Graph Learning
Giacomo Frisoni,
Gianluca Moro,
Antonella Carbonaro,
KDIR • 2020
Read
Cite
Bibtex:
@inproceedings{DBLP:conf/ic3k/FrisoniMC20,
author = {Giacomo Frisoni and
Gianluca Moro and
Antonella Carbonaro},
editor = {Ana L. N. Fred and
Joaquim Filipe},
title = {Unsupervised Descriptive Text Mining for Knowledge Graph Learning},
booktitle = {Proceedings of the 12th International Joint Conference on Knowledge
Discovery, Knowledge Engineering and Knowledge Management, {IC3K}
2020, Volume 1: KDIR, Budapest, Hungary, November 2-4, 2020},
pages = {316--324},
publisher = {{SCITEPRESS}},
year = {2020},
url = {https://doi.org/10.5220/0010153603160324},
doi = {10.5220/0010153603160324},
timestamp = {Mon, 23 Nov 2020 16:27:47 +0100},
biburl = {https://dblp.org/rec/conf/ic3k/FrisoniMC20.bib},
bibsource = {dblp computer science bibliography, https://dblp.org}
}
-
Learning Interpretable and Statistically Significant Knowledge from Unlabeled Corpora of Social Text Messages: A Novel Methodology of Descriptive Text Mining
Giacomo Frisoni,
Gianluca Moro,
Antonella Carbonaro,
DATA • 2020
• Best Paper Award
Read
Cite
Bibtex:
@inproceedings{DBLP:conf/data/FrisoniMC20,
author = {Giacomo Frisoni and
Gianluca Moro and
Antonella Carbonaro},
editor = {Slimane Hammoudi and
Christoph Quix and
Jorge Bernardino},
title = {Learning Interpretable and Statistically Significant Knowledge from
Unlabeled Corpora of Social Text Messages: {A} Novel Methodology of
Descriptive Text Mining},
booktitle = {Proceedings of the 9th International Conference on Data Science, Technology
and Applications, {DATA} 2020, Lieusaint, Paris, France, July 7-9,
2020},
pages = {121--132},
publisher = {SciTePress},
year = {2020},
url = {https://doi.org/10.5220/0009892001210132},
doi = {10.5220/0009892001210132},
timestamp = {Wed, 29 Jul 2020 16:56:05 +0200},
biburl = {https://dblp.org/rec/conf/data/FrisoniMC20.bib},
bibsource = {dblp computer science bibliography, https://dblp.org}
}
-
Towards Rare Disease Knowledge Graph Learning from Social Posts of Patients
Giacomo Frisoni,
Gianluca Moro,
Antonella Carbonaro,
RIIFORUM • 2020
Read
Cite
Bibtex:
@inproceedings{DBLP:conf/riiforum/FrisoniMC20,
author = {Giacomo Frisoni and
Gianluca Moro and
Antonella Carbonaro},
editor = {Anna Visvizi and
Miltiadis D. Lytras and
Naif R. Aljohani},
title = {Towards Rare Disease Knowledge Graph Learning from Social Posts of
Patients},
booktitle = {Research and Innovation Forum 2020 - Disruptive Technologies in Times
of Change, {RIIFORUM} 2020, Athens, Greece, 15-17 April 2020},
pages = {577--589},
publisher = {Springer},
year = {2020},
url = {https://doi.org/10.1007/978-3-030-62066-0\_44},
doi = {10.1007/978-3-030-62066-0\_44},
timestamp = {Sat, 09 Apr 2022 12:35:43 +0200},
biburl = {https://dblp.org/rec/conf/riiforum/FrisoniMC20.bib},
bibsource = {dblp computer science bibliography, https://dblp.org}
}
-
Evidence, my Dear Watson: Abstractive Dialogue Summarization on Learnable Relevant Utterances
Paolo Italiani,
Giacomo Frisoni,
Gianluca Moro,
Antonella Carbonaro,
Claudio Sartori,
Neurocomputing • 2023
Read
Cite
Bibtex:
@article{italiani_frisoni_moro_etal_2023,
title = {Evidence, my Dear Watson: Abstractive Dialogue Summarization on Learnable Relevant Utterances},
journal = {Neurocomputing},
year = {2023},
doi = {https://doi.org/10.1016/j.neucom.2023.127132},
url = {https://www.sciencedirect.com/science/article/pii/S0925231223012559},
author = {Paolo Italiani, Giacomo Frisoni, Gianluca Moro, Antonella Carbonaro, Claudio Sartori},
keywords = {Abstractive dialogue summarization, Input augmentation, Text classification, Gumbel-softmax trick, Interpretable natural language processing},
abstract = {Abstractive dialogue summarization requires distilling and rephrasing key information from noisy multi-speaker documents. Combining pre-trained language models with input augmentation techniques has recently led to significant research progress. However, existing solutions still struggle to select relevant chat segments, primarily relying on open-domain and unsupervised annotators not tailored to the actual needs of the summarization task. In this paper, we propose DearWatson, a task-aware utterance-level annotation framework for improving the effectiveness and interpretability of pre-trained dialogue summarization models. Precisely, we learn relevant utterances in the source document and mark them with special tags, that then act as supporting evidence for the generated summary. Quantitative experiments are conducted on two datasets made up of real-life messenger conversations. The results show that DearWatson allows model attention to focus on salient tokens, achieving new state-of-the-art results in three evaluation metrics, including semantic and factuality measures. Human evaluation proves the superiority of our solution in semantic consistency and recall. Finally, extensive ablation studies confirm each module’s importance, also exploring different annotation strategies and parameter-efficient fine-tuning of large generative language models.}
}
-
Multi-Language Transfer Learning for Low-Resource Legal Case Summarization
Gianluca Moro,
Nicola Piscaglia,
Luca Ragazzi,
Paolo Italiani,
Artificial Intelligence and Law • 2023
Read
Cite
Bibtex:
@article{moro2023multi,
title={Multi-language transfer learning for low-resource legal case summarization},
author={Gianluca Moro and Nicola Piscaglia and Luca Ragazzi and Paolo Italiani},
journal={Artificial Intelligence and Law},
url={https://doi.org/10.1007/s10506-023-09373-8},
doi={10.1007/s10506-023-09373-8},
pages={1--29},
year={2023},
publisher={Springer},
}
-
Align-Then-Abstract Representation Learning for Low-Resource Summarization
Gianluca Moro,
Luca Ragazzi,
Neurocomputing • 2023
Read
Cite
Bibtex:
@article{DBLP:journals/ijon/MoroR23,
author = {Gianluca Moro and
Luca Ragazzi},
title = {Align-then-abstract representation learning for low-resource summarization},
journal = {Neurocomputing},
volume = {548},
pages = {126356},
year = {2023},
url = {https://doi.org/10.1016/j.neucom.2023.126356},
doi = {10.1016/J.NEUCOM.2023.126356},
timestamp = {Wed, 01 Nov 2023 08:59:06 +0100},
biburl = {https://dblp.org/rec/journals/ijon/MoroR23.bib},
bibsource = {dblp computer science bibliography, https://dblp.org}
}
-
Efficient Text-Image Semantic Search: a Multi-modal Vision-Language Approach for Fashion Retrieval
Gianluca Moro,
Stefano Salvatori,
Giacomo Frisoni,
Neurocomputing • 2023
Read
Cite
Web App
Bibtex:
@article{moro_salvatori_frisoni_2023,
title = {Efficient text-image semantic search: A multi-modal vision-language approach for fashion retrieval},
journal = {Neurocomputing},
volume = {538},
pages = {126196},
year = {2023},
issn = {0925-2312},
doi = {https://doi.org/10.1016/j.neucom.2023.03.057},
url = {https://www.sciencedirect.com/science/article/pii/S092523122300303X},
author = {Gianluca Moro and Stefano Salvatori and Giacomo Frisoni},
keywords = {Multi-modal retrieval, Metric learning, Vision-and-language transformers, Deep learning, Fashion domain},
abstract = {In this paper, we address the problem of multi-modal retrieval of fashion products. State-of-the-art (SOTA) works proposed in literature use vision-and-language transformers to assign similarity scores to joint text-image pairs, then used for sorting the results during a retrieval phase. However, this approach is inefficient since it requires coupling a query with every record in the dataset and computing a forward pass for each sample at runtime, precluding scalability to large-scale datasets. We thus propose a solution that overcomes the above limitation by combining transformers and deep metric learning to create a latent space where texts and images are separately embedded, and their spatial proximity translates into semantic similarity. Our architecture does not use convolutional neural networks to process images, allowing us to test different levels of image-processing details and metric learning losses. We vastly improve retrieval accuracy results on the FashionGen benchmark (+18.71% and +9.22% Rank@1 on Image-to-Text and Text-to-Image, respectively) while being up to 512x faster. Finally, we analyze the speed-up obtainable by different approximate nearest neighbor retrieval strategies—an optimization precluded to current SOTA contributions. We release our solution as a web application available at https://disi-unibo-nlp.github.io/projects/fashion_retrieval/.}
}
-
Efficient Memory-Enhanced Transformer for Long-Document Summarization in Low-Resource Regimes
Gianluca Moro,
Luca Ragazzi,
Lorenzo Valgimigli,
Giacomo Frisoni,
Claudio Sartori,
Gustavo Marfia,
Sensors • 2023
Read
Cite
Bibtex:
@article{moro_ragazzi_valgimigli_frisoni_sartori_marfia,
author = {Moro, Gianluca and Ragazzi, Luca and Valgimigli, Lorenzo and Frisoni, Giacomo and Sartori, Claudio and Marfia, Gustavo},
title = {Efficient Memory-Enhanced Transformer for Long-Document Summarization in Low-Resource Regimes},
journal = {Sensors},
volume = {23},
year = {2023},
number = {7},
article-number = {3542},
url = {https://www.mdpi.com/1424-8220/23/7/3542},
pubmedid = {37050608},
issn = {1424-8220},
abstract = {Long document summarization poses obstacles to current generative transformer-based models because of the broad context to process and understand. Indeed, detecting long-range dependencies is still challenging for today’s state-of-the-art solutions, usually requiring model expansion at the cost of an unsustainable demand for computing and memory capacities. This paper introduces Emma, a novel efficient memory-enhanced transformer-based architecture. By segmenting a lengthy input into multiple text fragments, our model stores and compares the current chunk with previous ones, gaining the capability to read and comprehend the entire context over the whole document with a fixed amount of GPU memory. This method enables the model to deal with theoretically infinitely long documents, using less than 18 and 13 GB of memory for training and inference, respectively. We conducted extensive performance analyses and demonstrate that Emma achieved competitive results on two datasets of different domains while consuming significantly less GPU memory than competitors do, even in low-resource settings.},
doi = {10.3390/s23073542}
}
-
Graph-Enhanced Biomedical Abstractive Summarization via Factual Evidence Extraction
Giacomo Frisoni,
Paolo Italiani,
Gianluca Moro,
DATA (Revised Selected Papers) • 2023
Read
Cite
GitHub
Bibtex:
@inproceedings{frisoni-etal-2023-data-ext,
author = {Giacomo Frisoni and
Paolo Italiani and
Gianluca Moro},
editor = {Kemal Akkaya and
Umapada Pai},
title = {Graph-Enhanced Biomedical Abstractive Summarization via Factual Evidence Extraction},
booktitle = {SN Computer Science},
publisher = {Springer},
year = {2023},
abstract = {Infusing structured semantic representations into language models is a rising research trend underpinning many natural language processing tasks that require understanding and reasoning capabilities. Decoupling factual non-ambiguous concept units from the lexical surface holds great potential in abstractive summarization, especially in the biomedical domain, where fact selection and rephrasing are made more difficult by specialized jargon and hard factuality constraints. Nevertheless, current graph-augmented contributions rely on extractive binary relations, failing to model real-world n-ary and nested biomedical interactions mentioned in the text. To alleviate this issue, we present EASumm, the first framework for biomedical abstractive summarization empowered by event extraction, namely graph-based representations of relevant medical evidence derived from the source scientific document. By relying on dual text-graph encoders, we prove the promising role of explicit event structures, achieving better or comparable performance than previous state-of-the-art models on the CDSR dataset. We conduct extensive ablation studies, including a wide experimentation of graph representation learning techniques. Finally, we offer some hints to guide future research in the field.}
}
-
Comprehensive Analysis of Knowledge Graph Embedding Techniques Benchmarked on Link Prediction
Ilaria Ferrari,
Giacomo Frisoni,
Paolo Italiani,
Gianluca Moro,
Claudio Sartori,
Electronics • 2022
Read
Cite
GitHub
Bibtex:
@article{ferrari-etal-2022-kg-embedding,
author = {Ferrari, Ilaria and Frisoni, Giacomo and Italiani, Paolo and Moro, Gianluca and Sartori, Claudio},
title = {Comprehensive Analysis of Knowledge Graph Embedding Techniques Benchmarked on Link Prediction},
journal = {Electronics},
volume = {11},
year = {2022},
number = {23},
article-number = {3866},
url = {https://www.mdpi.com/2079-9292/11/23/3866},
issn = {2079-9292},
abstract = {In knowledge graph representation learning, link prediction is among the most popular and influential tasks. Its surge in popularity has resulted in a panoply of orthogonal embedding-based methods projecting entities and relations into low-dimensional continuous vectors. To further enrich the research space, the community witnessed a prolific development of evaluation benchmarks with a variety of structures and domains. Therefore, researchers and practitioners face an unprecedented challenge in effectively identifying the best solution to their needs. To this end, we propose the most comprehensive and up-to-date study to systematically assess the effectiveness and efficiency of embedding models for knowledge graph completion. We compare 13 models on six datasets with different sizes, domains, and relational properties, covering translational, semantic matching, and neural network-based encoders. A fine-grained evaluation is conducted to compare each technique head-to-head in terms of standard metrics, training and evaluation times, memory consumption, carbon footprint, and space geometry. Our results demonstrate the high dependence between performance and graph types, identifying the best options for each scenario. Among all the encoding strategies, the new generation of translational models emerges as the most promising, bringing out the best and most consistent results across all the datasets and evaluation criteria.},
doi = {10.3390/electronics11233866}
}
-
Human Being Detection from UWB NLOS Signals: Accuracy and Generality of Advanced Machine Learning Models
Gianluca Moro,
Federico Di Luca,
Davide Dardari,
Giacomo Frisoni,
Sensors • 2022
Read
Cite
GitHub
Bibtex:
@article{DBLP:journals/sensors/MoroLDF22,
author = {Gianluca Moro and
Federico Di Luca and
Davide Dardari and
Giacomo Frisoni},
title = {Human Being Detection from {UWB} {NLOS} Signals: Accuracy and Generality of Advanced Machine Learning Models},
journal = {Sensors},
volume = {22},
number = {4},
pages = {1656},
year = {2022},
url = {https://doi.org/10.3390/s22041656},
doi = {10.3390/s22041656},
timestamp = {Fri, 01 Apr 2022 11:24:37 +0200},
biburl = {https://dblp.org/rec/journals/sensors/MoroLDF22.bib},
bibsource = {dblp computer science bibliography, https://dblp.org}
}
-
Unsupervised Event Graph Representation and Similarity Learning on Biomedical Literature
Giacomo Frisoni,
Gianluca Moro,
Giulio Carlassare,
Antonella Carbonaro,
Sensors • 2021
Read
Cite
GitHub
Bibtex:
@article{DBLP:journals/sensors/FrisoniMCC22,
author = {Giacomo Frisoni and
Gianluca Moro and
Giulio Carlassare and
Antonella Carbonaro},
title = {Unsupervised Event Graph Representation and Similarity Learning on
Biomedical Literature},
journal = {Sensors},
volume = {22},
number = {1},
pages = {3},
year = {2022},
url = {https://doi.org/10.3390/s22010003},
doi = {10.3390/s22010003},
timestamp = {Tue, 16 Aug 2022 23:05:49 +0200},
biburl = {https://dblp.org/rec/journals/sensors/FrisoniMCC22.bib},
bibsource = {dblp computer science bibliography, https://dblp.org}
}
-
A Survey on Event Extraction for Natural Language Understanding: Riding the Biomedical Literature Wave
Giacomo Frisoni,
Gianluca Moro,
Antonella Carbonaro,
IEEE Access • 2021
Read
Cite
Bibtex:
@ARTICLE{9627684,
author={Frisoni, Giacomo and Moro, Gianluca and Carbonaro, Antonella},
journal={IEEE Access},
title={A Survey on Event Extraction for Natural Language Understanding: Riding the Biomedical Literature Wave},
year={2021},
volume={9},
number={},
pages={160721-160757},
doi={10.1109/ACCESS.2021.3130956}
}
-
Phenomena Explanation from Text: Unsupervised Learning of Interpretable and Statistically Significant Knowledge
Giacomo Frisoni,
Gianluca Moro,
DATA (Revised Selected Papers) • 2020
Read
Cite
GitHub
Bibtex:
@inproceedings{DBLP:conf/data/FrisoniM20,
author = {Giacomo Frisoni and
Gianluca Moro},
editor = {Slimane Hammoudi and
Christoph Quix and
Jorge Bernardino},
title = {Phenomena Explanation from Text: Unsupervised Learning of Interpretable
and Statistically Significant Knowledge},
booktitle = {Data Management Technologies and Applications - 9th International
Conference, {DATA} 2020, Virtual Event, July 7-9, 2020, Revised Selected Papers},
series = {Communications in Computer and Information Science},
volume = {1446},
pages = {293--318},
publisher = {Springer},
year = {2020},
url = {https://doi.org/10.1007/978-3-030-83014-4\_14},
doi = {10.1007/978-3-030-83014-4\_14},
timestamp = {Thu, 29 Jul 2021 13:42:18 +0200},
biburl = {https://dblp.org/rec/conf/data/FrisoniM20.bib},
bibsource = {dblp computer science bibliography, https://dblp.org}
}