@TechReport{iza:izadps:dp17204, author={Si, Yafei and Yang, Yuyi and Wang, Xi and An, Ruopeng and Zu, Jiaqi and Chen, Xi and Fan, Xiaojing and Gong, Sen}, title={Quality and Accountability of Large Language Models (LLMs) in Healthcare in Low- And Middle-Income Countries (LMIC): A Simulated Patient Study Using ChatGPT}, year={2024}, month={Aug}, institution={Institute of Labor Economics (IZA)}, address={Bonn}, type={IZA Discussion Paper}, number={17204}, url={https://www.iza.org/publications/dp17204}, abstract={Using simulated patients to mimic nine established non-communicable and infectious diseases over 27 trials, we assess ChatGPT's effectiveness and reliability in diagnosing and treating common diseases in low- and middle-income countries. We find ChatGPT's performance varied within a single disease, despite a high level of accuracy in both correct diagnosis (74.1%) and medication prescription (84.5%). Additionally, ChatGPT recommended a concerning level of unnecessary or harmful medications (85.2%) even with correct diagnoses. Finally, ChatGPT performed better in managing non-communicable diseases compared to infectious ones. These results highlight the need for cautious AI integration in healthcare systems to ensure quality and safety.}, keywords={safety;quality;ChatGPT;Large Language Models;generative AI;simulated patient;healthcare;low- and middle-income countries}, }