@inproceedings{10.1145/3654777.3676381,
author = {Duan, Peitong and Cheng, Chin-Yi and Li, Gang and Hartmann, Bjoern and Li, Yang},
title = {UICrit: Enhancing Automated Design Evaluation with a UI Critique Dataset},
year = {2024},
isbn = {9798400706288},
publisher = {Association for Computing Machinery},
address = {New York, NY, USA},
url = {https://doi.org/10.1145/3654777.3676381},
doi = {10.1145/3654777.3676381},
abstract = {Automated UI evaluation can be beneficial for the design process; for example, to compare different UI designs, or conduct automated heuristic evaluation. LLM-based UI evaluation, in particular, holds the promise of generalizability to a wide variety of UI types and evaluation tasks. However, current LLM-based techniques do not yet match the performance of human evaluators. We hypothesize that automatic evaluation can be improved by collecting a targeted UI feedback dataset and then using this dataset to enhance the performance of general-purpose LLMs. We present a targeted dataset of 3,059 design critiques and quality ratings for 983 mobile UIs, collected from seven designers, each with at least a year of professional design experience. We carried out an in-depth analysis to characterize the dataset’s features. We then applied this dataset to achieve a 55\% performance gain in LLM-generated UI feedback via various few-shot and visual prompting techniques. We also discuss future applications of this dataset, including training a reward model for generative UI techniques, and fine-tuning a tool-agnostic multi-modal LLM that automates UI evaluation.},
booktitle = {Proceedings of the 37th Annual ACM Symposium on User Interface Software and Technology},
articleno = {46},
numpages = {17},
keywords = {Dataset, Large Language Models, UI Design Feedback},
location = {Pittsburgh, PA, USA},
series = {UIST '24}
}