<?xml version="1.0" encoding="UTF-8"?><!DOCTYPE article PUBLIC "-//NLM//DTD Journal Publishing DTD v2.0 20040830//EN" "journalpublishing.dtd"><article xmlns:mml="http://www.w3.org/1998/Math/MathML" xmlns:xlink="http://www.w3.org/1999/xlink" dtd-version="2.0" xml:lang="en" article-type="research-article"><front><journal-meta><journal-id journal-id-type="nlm-ta">JMIR Rehabil Assist Technol</journal-id><journal-id journal-id-type="publisher-id">rehab</journal-id><journal-id journal-id-type="index">17</journal-id><journal-title>JMIR Rehabilitation and Assistive Technologies</journal-title><abbrev-journal-title>JMIR Rehabil Assist Technol</abbrev-journal-title><issn pub-type="epub">2369-2529</issn><publisher><publisher-name>JMIR Publications</publisher-name><publisher-loc>Toronto, Canada</publisher-loc></publisher></journal-meta><article-meta><article-id pub-id-type="publisher-id">v13i1e79073</article-id><article-id pub-id-type="doi">10.2196/79073</article-id><article-categories><subj-group subj-group-type="heading"><subject>Original Paper</subject></subj-group></article-categories><title-group><article-title>Assessing the Role of Medical Caption Technology to Support Physician-Patient Communication for Patients With Hearing Loss: Mixed Methods Pilot Study</article-title></title-group><contrib-group><contrib contrib-type="author" equal-contrib="yes"><name name-style="western"><surname>Hughes</surname><given-names>Sarah E</given-names></name><degrees>BA</degrees><xref ref-type="aff" rid="aff1">1</xref><xref ref-type="fn" rid="equal-contrib1">*</xref></contrib><contrib contrib-type="author" equal-contrib="yes"><name name-style="western"><surname>Wu</surname><given-names>Liang-Yuan</given-names></name><degrees>MS</degrees><xref ref-type="aff" rid="aff2">2</xref><xref ref-type="fn" rid="equal-contrib1">*</xref></contrib><contrib contrib-type="author"><name name-style="western"><surname>Ma</surname><given-names>Lindsay J</given-names></name><degrees>MD</degrees><xref ref-type="aff" rid="aff1">1</xref></contrib><contrib contrib-type="author"><name name-style="western"><surname>Jain</surname><given-names>Dhruv</given-names></name><degrees>PhD</degrees><xref ref-type="aff" rid="aff2">2</xref></contrib><contrib contrib-type="author" corresp="yes"><name name-style="western"><surname>McKee</surname><given-names>Michael M</given-names></name><degrees>MD, MPH</degrees><xref ref-type="aff" rid="aff3">3</xref><xref ref-type="aff" rid="aff4">4</xref></contrib></contrib-group><aff id="aff1"><institution>University of Michigan Medical School</institution><addr-line>Ann Arbor</addr-line><addr-line>MI</addr-line><country>United States</country></aff><aff id="aff2"><institution>Department of Computer Science and Engineering, University of Michigan</institution><addr-line>Ann Arbor</addr-line><addr-line>MI</addr-line><country>United States</country></aff><aff id="aff3"><institution>Department of Family Medicine, University of Michigan</institution><addr-line>1018 Fuller St.</addr-line><addr-line>Ann Arbor</addr-line><addr-line>MI</addr-line><country>United States</country></aff><aff id="aff4"><institution>Department of Physical Medicine and Rehabilitation, University of Michigan Medical School</institution><addr-line>Ann Arbor</addr-line><addr-line>MI</addr-line><country>United States</country></aff><contrib-group><contrib contrib-type="editor"><name name-style="western"><surname>Munce</surname><given-names>Sarah</given-names></name></contrib></contrib-group><contrib-group><contrib contrib-type="reviewer"><name name-style="western"><surname>Roundtree</surname><given-names>Aimee</given-names></name></contrib><contrib contrib-type="reviewer"><name name-style="western"><surname>Ubur</surname><given-names>Sunday David</given-names></name></contrib></contrib-group><author-notes><corresp>Correspondence to Michael M McKee, MD, MPH, Department of Family Medicine, University of Michigan, 1018 Fuller St., Ann Arbor, MI, 48104-1213, United States, 17345395000, 17344264370; <email>mmmckee@med.umich.edu</email></corresp><fn fn-type="equal" id="equal-contrib1"><label>*</label><p>these authors contributed equally</p></fn></author-notes><pub-date pub-type="collection"><year>2026</year></pub-date><pub-date pub-type="epub"><day>15</day><month>1</month><year>2026</year></pub-date><volume>13</volume><elocation-id>e79073</elocation-id><history><date date-type="received"><day>14</day><month>06</month><year>2025</year></date><date date-type="rev-recd"><day>21</day><month>11</month><year>2025</year></date><date date-type="accepted"><day>03</day><month>12</month><year>2025</year></date></history><copyright-statement>&#x00A9; Sarah E Hughes, Liang-Yuan Wu, Lindsay J Ma, Dhruv Jain, Michael M McKee. Originally published in JMIR Rehabilitation and Assistive Technology (<ext-link ext-link-type="uri" xlink:href="https://rehab.jmir.org">https://rehab.jmir.org</ext-link>), 15.1.2026. </copyright-statement><copyright-year>2026</copyright-year><license license-type="open-access" xlink:href="https://creativecommons.org/licenses/by/4.0/"><p>This is an open-access article distributed under the terms of the Creative Commons Attribution License (<ext-link ext-link-type="uri" xlink:href="https://creativecommons.org/licenses/by/4.0/">https://creativecommons.org/licenses/by/4.0/</ext-link>), which permits unrestricted use, distribution, and reproduction in any medium, provided the original work, first published in JMIR Rehabilitation and Assistive Technology, is properly cited. The complete bibliographic information, a link to the original publication on <ext-link ext-link-type="uri" xlink:href="https://rehab.jmir.org/">https://rehab.jmir.org/</ext-link>, as well as this copyright and license information must be included.</p></license><self-uri xlink:type="simple" xlink:href="https://rehab.jmir.org/2026/1/e79073"/><abstract><sec><title>Background</title><p>Speech recognition technology is widely used by individuals who are Deaf/deaf and hard-of-hearing (DHH) in everyday communication, but its clinical applications remain underexplored. Communication barriers in health care can compromise safety, understanding, and autonomy for individuals who are DHH.</p></sec><sec><title>Objective</title><p>This study aimed to evaluate a real-time speech recognition system (SRS) tailored for clinical settings, examining its usability, perceived effectiveness, and transcription accuracy among users who are DHH.</p></sec><sec sec-type="methods"><title>Methods</title><p>We conducted a pilot study with 10 adults who are DHH participating in mock outpatient encounters using a custom SRS powered by Google&#x2019;s speech-to-text application programming interface. We used a convergent parallel mixed-methods design, collecting quantitative usability ratings and qualitative interview data during the same study session. These datasets were subsequently merged and jointly interpreted. Participants completed postscenario surveys and structured exit interviews assessing distraction, trust, ease of use, satisfaction, and emotional response. Caption accuracy was benchmarked against professional communication access real-time translation transcripts using word error rate (WER). Because WER assigns equal weight to all tokens, it does not differentiate between routine transcription errors and those involving safety-critical clinical terms (eg, medications or diagnoses). Therefore, WER may underestimate the potential impact of certain errors in medical contexts.</p></sec><sec sec-type="results"><title>Results</title><p>Across 29 clinical scenario simulations, 86% (25/29) of participants found captions nondistracting, 90% (26/29) reported them easy to follow and trustworthy, and 76% (22/29) were satisfied with the experience. Participants described the SRS as intuitive, emotionally grounding, and preferable to lip reading in masked settings. WER ranged from 12.7% to 22.8%, consistent with benchmarks for automated SRSs. Interviews revealed themes of increased confidence in following clinical conversations and staying engaged despite masked communication. Participants reported less anxiety about missing critical medical information and expressed a strong interest in expanding the tool to real-world settings, especially for older adults or those with cognitive impairments.</p></sec><sec sec-type="conclusions"><title>Conclusions</title><p>Our findings support the potential of real-time captioning to enhance accessibility and reduce the cognitive and mental burden of communication for individuals who are DHH in clinical care. Participants described the SRS as both functionally effective and personally empowering. While accuracy for complex medical terminology remains a limitation, participants consistently expressed trust in the system and a desire for its integration into clinical care. Future research should explore real-world implementation, domain-specific optimization, and the development of user-centered evaluation metrics that extend beyond transcription fidelity to include trust, autonomy, and communication equity.</p></sec></abstract><kwd-group><kwd>health communication</kwd><kwd>hearing loss</kwd><kwd>deafness</kwd><kwd>speech recognition software</kwd><kwd>usability testing</kwd><kwd>health care accessibility</kwd></kwd-group></article-meta></front><body><sec id="s1" sec-type="intro"><title>Introduction</title><p>Effective communication is foundational to safe, equitable, and high-quality health care [<xref ref-type="bibr" rid="ref1">1</xref>]. However, individuals who are Deaf/deaf and hard-of-hearing (DHH) often face communication barriers that compromise understanding and autonomy [<xref ref-type="bibr" rid="ref2">2</xref>]. These barriers contribute to poor health outcomes and reduced patient engagement in real-time clinical settings [<xref ref-type="bibr" rid="ref2">2</xref>]. The scale of this issue highlights the need to understand which communication support tools are available and provided, and to whom. In the United States, an estimated 48 million people live with some degree of hearing loss (HL), and 1 in 3 adults older than 65 years experiences disabling age-related hearing loss [<xref ref-type="bibr" rid="ref3">3</xref>,<xref ref-type="bibr" rid="ref4">4</xref>]. Despite this growing population, access to communication supports remains inconsistent [<xref ref-type="bibr" rid="ref5">5</xref>,<xref ref-type="bibr" rid="ref6">6</xref>].</p><p>Deaf individuals who use American Sign Language often receive interpreter services [<xref ref-type="bibr" rid="ref7">7</xref>]. In contrast, oral communicators with people with HL who normally rely on spoken English are less likely to receive accommodations such as captioning, assistive listening devices, or environmental modifications [<xref ref-type="bibr" rid="ref7">7</xref>]. Especially in clinical workflows, interpreter services are systematically implemented, whereas accommodations for oral communicators are likely not [<xref ref-type="bibr" rid="ref8">8</xref>-<xref ref-type="bibr" rid="ref12">12</xref>]. This gap persists despite longstanding mandates under the Americans with Disabilities Act, which mandates effective communication in health care [<xref ref-type="bibr" rid="ref13">13</xref>]. As a result, many patients who are DHH still receive incomplete or delayed health information [<xref ref-type="bibr" rid="ref5">5</xref>]. These gaps undermine informed decision-making, autonomy, and overall care outcomes [<xref ref-type="bibr" rid="ref14">14</xref>,<xref ref-type="bibr" rid="ref15">15</xref>]. Far from logistical oversights, these structural inequities perpetuate persistent disparities in care for individuals who are DHH.</p><p>These long-standing disparities became even more visible during the COVID-19 pandemic [<xref ref-type="bibr" rid="ref14">14</xref>]. Universal masking eliminated lip reading and facial cues, which were essential supports for many individuals who are DHH and rely on oral communication [<xref ref-type="bibr" rid="ref16">16</xref>]. This shift underscored the need for scalable solutions to maintain accessible communication in high-stakes settings [<xref ref-type="bibr" rid="ref14">14</xref>,<xref ref-type="bibr" rid="ref17">17</xref>].</p><p>Real-time captioning is 1 solution for improving communication access for individuals who are DHH when traditional strategies (eg, lip reading or interpreters) are unavailable [<xref ref-type="bibr" rid="ref18">18</xref>]. Captioning tools can be deployed quickly and readily support both in-person and virtual communication [<xref ref-type="bibr" rid="ref19">19</xref>]. However, captioning accuracy of clinical conversations may be affected by terminology unique to the medical field or speaker attribution and is understudied [<xref ref-type="bibr" rid="ref19">19</xref>,<xref ref-type="bibr" rid="ref20">20</xref>]. This has left a critical gap in the development of effective and equitable access tools.</p><p>By allowing both conversation partners to see each other&#x2019;s faces while reading the same captions, transparent or dual-visibility captioning preserves the natural flow of spoken interaction and is a promising solution for clinical communication. Prior work, such as See-Through Captions [<xref ref-type="bibr" rid="ref21">21</xref>], See-Through Captions in a Museum Guided Tour [<xref ref-type="bibr" rid="ref22">22</xref>], and Wearable Subtitles [<xref ref-type="bibr" rid="ref23">23</xref>], has primarily focused on general or educational settings. Our study extends this line of research into medical contexts, where communication accuracy can directly affect patient safety and outcomes. It also emphasizes the emotional and psychological impact of captioning during clinical interactions and addresses the unique technical challenges posed by medical vocabulary and workflow integration.</p><p>In summary, we developed and evaluated a real-time captioning tool using Google&#x2019;s speech-to-text engine to generate live captions during simulated clinical encounters. We tested this system in dynamic, medically relevant scenarios designed to simulate typical ambulatory care encounters. In this pilot study, we explored how individuals who are DHH experienced the captioning system in these simulated encounters, focusing on usability, accuracy, and communication access.</p></sec><sec id="s2" sec-type="methods"><title>Methods</title><sec id="s2-1"><title>Background</title><p>The pilot took place in a patient room at one of the Department of Family Medicine clinics. The primary goal was to assess the feasibility and acceptability of a real-time captioning tool in a clinical setting. Secondary objectives included evaluating ease of use, distraction, trust, and satisfaction, factors critical to determining whether the tool supports communication access. Quantitative and qualitative data were collected concurrently within the same study session using a convergent parallel mixed-methods design. Participants completed postscenario surveys and a brief structured exit interview during the same visit, allowing us to analyze both datasets in parallel before merging findings during the interpretation phase.</p></sec><sec id="s2-2"><title>Recruitment</title><p>We recruited participants who self-identified as DHH through internal email lists compiled from prior studies, social media, and snowball sampling. Inclusion criteria included people who were DHH, preferred to communicate in spoken English, and were at least 18 years old. Recruitment materials explained that the study evaluated a real-time captioning system in simulated medical scenarios.</p></sec><sec id="s2-3"><title>Mock Clinical Scenarios</title><p>Participants completed 3 mock clinical scenarios using the automated speech recognition system (SRS) which was developed by us. The SRS used Google&#x2019;s speech-to-text application programming interface to transcribe speech to text with low latency and competitive accuracy [<xref ref-type="bibr" rid="ref24">24</xref>]. The setup included 2 iPads arranged in a tented position so that each device faced either the participant or the mock doctor. Both iPads displayed the generated captions simultaneously (<xref ref-type="fig" rid="figure1">Figure 1</xref>).</p><p>Before each experiment, we used a random number generator to assign scenario order for each participant. Two team members (both medical students) alternated between serving as the mock doctor (administering scenarios) or facilitator (administering postscenario surveys and exit interviews).</p><p>The scenarios were based on commonly reported primary care concerns: (1) back pain, (2) headache, and (3) high blood pressure. Scenario scripts were designed by trained medical students and a clinical faculty member to closely replicate real clinical conversations. The mock doctors wore surgical masks to simulate real-life communication barriers, such as muffled sound and loss of visual cues.</p><fig position="float" id="figure1"><label>Figure 1.</label><caption><p>An example of a mock clinical scenario with the real-time speech recognition system set up on a table between the participant (left) and the mock doctor (right). A microphone on the iPad facing the mock doctor detects audio during interviews. Transcripts are displayed on both iPads in real time. SRS: speech recognition system.</p></caption><graphic alt-version="no" mimetype="image" position="float" xlink:type="simple" xlink:href="rehab_v13i1e79073_fig01.png"/></fig></sec><sec id="s2-4"><title>Postscenario SRS Assessments</title><p>Following each scenario, participants provided feedback on the captioning system, rating it across 4 domains: distraction, ease of use, trust, and overall satisfaction (<xref ref-type="supplementary-material" rid="app1">Multimedia Appendix 1</xref>). Scenario-specific questions included: &#x201C;In your discussion with the mock doctor, how distracting were the captions?&#x201D; &#x201C;How easy or difficult was it to watch the caption while talking with the mock doctor?&#x201D; &#x201C;How much did you trust the accuracy of the generated captions?&#x201D; &#x201C;In this scenario, how satisfied were you with the captioning technology?&#x201D; To reduce response bias, we alternated the direction of the scales: ease of use and trust rated from 1 (strong agreement) to 5 (strong disagreement), and satisfaction rated from 1 (strong disagreement) to 5 (strong agreement). Distraction was scaled separately from 1 (strong disagreement) to 3 (strong agreement).</p></sec><sec id="s2-5"><title>Participant Survey Questions</title><p>To evaluate user experience with the SRS, participants completed a structured exit interview consisting of 9 questions (5 scalar and 4 open-ended items; <xref ref-type="supplementary-material" rid="app1">Multimedia Appendix 1</xref>). To ensure accessibility, a study team member read all questions aloud while they were displayed on an iPad (Apple Inc). We audio-recorded and transcribed responses verbatim using a third-party service, then deidentified the transcripts. We reviewed audio files to clarify unclear segments. Given the brief interviews, we organized and analyzed responses in Microsoft Excel (version 16.77).</p><p>Open-ended responses were reviewed using a structured framework aligned with predefined domains: ease of use, comfort, satisfaction, trust, emotional response, and the captioning system&#x2019;s ability to support or replace lip reading. Overall, 3 team members (SEH, LJM, and LW) independently applied initial codes to a subset of transcripts. Coding discrepancies were resolved through discussion, and the codebook was refined iteratively. Consistency was maintained through regular team meetings, and reflexive discussions were used to address potential bias.</p><p>Themes were identified based on frequency, relevance to study aims, and salience across participants. Representative participant comments were selected to illustrate key insights. Thematic saturation was reached when no new concepts emerged from successive interviews.</p></sec><sec id="s2-6"><title>Mixed Methods Integration</title><p>To integrate quantitative and qualitative data, we used a convergent parallel approach in which both datasets were collected during the same phase, analyzed separately, and then merged during the interpretation phase. Integration occurred through (1) narrative weaving of findings across domains and (2) construction of a joint display that juxtaposed quantitative ratings with representative qualitative insights to generate meta-inferences. This approach allowed identification of areas of convergence and divergence between usability ratings and participants&#x2019; lived communication experiences.</p></sec><sec id="s2-7"><title>Closed Captioning Accuracy</title><p>In addition to participant feedback, we analyzed the accuracy of the system&#x2019;s transcriptions. We compiled all transcripts generated by the mock doctors and compared them to professional communication access real-time translation transcripts.</p><p>We used word error rate (WER), a standard metric in automatic speech recognition (ASR) that calculates errors as the ratio of insertions, deletions, and substitutions required to align the system output with the reference [<xref ref-type="bibr" rid="ref25">25</xref>,<xref ref-type="bibr" rid="ref26">26</xref>]. We implemented WER calculations using the Python-based <italic>jiwer</italic> library, which provides standardized scoring for automated SRSs. This approach allowed us to assess how closely the SRS-generated captions matched professional-level transcription, validating the system&#x2019;s effectiveness in realistic use cases.</p></sec><sec id="s2-8"><title>Statistical Analysis</title><p>We performed univariate analyses on demographic data and postscenario survey responses. Because of the small sample size, the study was not powered to detect subgroup differences. For transcript analysis, we segmented transcripts from mock sessions into 3 distinct scenarios. To focus on the primary use case, captioning clinician speech, we excluded utterances from participants who are DHH and analyzed only the mock doctors&#x2019; speech.</p></sec><sec id="s2-9"><title>Ethical Considerations</title><p>This study was approved by the University of Michigan Institutional Review Board (IRB; HUM00240244). All participants provided informed consent prior to participation. Participants were informed of the study purpose, procedures, potential risks, and their right to withdraw at any time without penalty. All study data were deidentified prior to analysis, and transcripts were reviewed to remove personally identifiable information. Audio recordings and transcripts were stored on secure, password-protected institutional servers accessible only to the study team. Participants received a US $25 Amazon gift card for their participation. The individuals depicted in the figure provided explicit written consent for publication of their images. The individuals shown in <xref ref-type="fig" rid="figure1">Figure 1</xref> provided explicit written consent for their images to be published.</p></sec></sec><sec id="s3" sec-type="results"><title>Results</title><sec id="s3-1"><title>Participant Characteristics</title><p>Overall, 11 participants who are DHH enrolled and participated in the pilot study. Due to equipment failure resulting in complete data recording loss with Participant 5, this participant was excluded from the analysis. The 10 remaining participants had an even distribution of genders (<xref ref-type="table" rid="table1">Table 1</xref>).</p><table-wrap id="t1" position="float"><label>Table 1.</label><caption><p>Study participant demographics.</p></caption><table id="table1" frame="hsides" rules="groups"><thead><tr><td align="left" valign="bottom">ID</td><td align="left" valign="bottom">Age (years)</td><td align="left" valign="bottom">Sex</td><td align="left" valign="bottom">Identity</td><td align="left" valign="bottom">Hearing loss levels<sup><xref ref-type="table-fn" rid="table1fn1">a</xref></sup></td><td align="left" valign="bottom">Wearable technology<sup><xref ref-type="table-fn" rid="table1fn2">b</xref></sup></td><td align="left" valign="bottom">Lip reader</td></tr></thead><tbody><tr><td align="left" valign="top">P01</td><td align="left" valign="top">61</td><td align="left" valign="top">Female</td><td align="left" valign="top">HoH<sup><xref ref-type="table-fn" rid="table1fn3">c</xref></sup></td><td align="left" valign="top">Severe</td><td align="left" valign="top">Yes</td><td align="left" valign="top">All of the time</td></tr><tr><td align="left" valign="top">P02</td><td align="left" valign="top">66</td><td align="left" valign="top">Male</td><td align="left" valign="top">HoH</td><td align="left" valign="top">Severe</td><td align="left" valign="top">Yes</td><td align="left" valign="top">Sometimes</td></tr><tr><td align="left" valign="top">P03</td><td align="left" valign="top">66</td><td align="left" valign="top">Male</td><td align="left" valign="top">HoH</td><td align="left" valign="top">Severe</td><td align="left" valign="top">Yes</td><td align="left" valign="top">No</td></tr><tr><td align="left" valign="top">P04</td><td align="left" valign="top">66</td><td align="left" valign="top">Female</td><td align="left" valign="top">HoH</td><td align="left" valign="top">Moderately severe</td><td align="left" valign="top">Yes</td><td align="left" valign="top">Sometimes</td></tr><tr><td align="left" valign="top">P06</td><td align="left" valign="top">43</td><td align="left" valign="top">Female</td><td align="left" valign="top">Deaf</td><td align="left" valign="top">Profound</td><td align="left" valign="top">Yes</td><td align="left" valign="top">All of the time</td></tr><tr><td align="left" valign="top">P07</td><td align="left" valign="top">21</td><td align="left" valign="top">Female</td><td align="left" valign="top">deaf</td><td align="left" valign="top">Moderately severe</td><td align="left" valign="top">Yes</td><td align="left" valign="top">Sometimes</td></tr><tr><td align="left" valign="top">P08</td><td align="left" valign="top">39</td><td align="left" valign="top">Female</td><td align="left" valign="top">HoH</td><td align="left" valign="top">Severe</td><td align="left" valign="top">Yes</td><td align="left" valign="top">All of the time</td></tr><tr><td align="left" valign="top">P09</td><td align="left" valign="top">24</td><td align="left" valign="top">Male</td><td align="left" valign="top">Deaf</td><td align="left" valign="top">Profound</td><td align="left" valign="top">Yes</td><td align="left" valign="top">No</td></tr><tr><td align="left" valign="top">P10</td><td align="left" valign="top">56</td><td align="left" valign="top">Male</td><td align="left" valign="top">deaf</td><td align="left" valign="top">Profound</td><td align="left" valign="top">Yes</td><td align="left" valign="top">Sometimes</td></tr><tr><td align="left" valign="top">P11</td><td align="left" valign="top">20</td><td align="left" valign="top">Male</td><td align="left" valign="top">HoH</td><td align="left" valign="top">Mild</td><td align="left" valign="top">Yes</td><td align="left" valign="top">Sometimes</td></tr></tbody></table><table-wrap-foot><fn id="table1fn1"><p><sup>a</sup>Hearing loss levels were self-identified, and all participants reported equal hearing loss levels bilaterally.</p></fn><fn id="table1fn2"><p><sup>b</sup>Wearable technology includes hearing aids and cochlear implants.</p></fn><fn id="table1fn3"><p><sup>c</sup>HoH: hard of hearing.</p></fn></table-wrap-foot></table-wrap><p>The mean age of the participants was 46.2 (SD 19.3) years. Six participants identified as &#x201C;hard of hearing,&#x201D; 22 as &#x201C;Deaf,&#x201D; and 2 as &#x201C;deaf.&#x201D; Seven participants self-reported severe to profound HL, and all participants had bilateral HL. Five participants self-reported congenital HL, 2 reported childhood onsets of HL (&#x003C;12 y old), and 2 reported HL as adults (&#x003E;18 y old). Hearing aids were used by 8 participants, and 2 participants used cochlear implants. Seven participants used captioning services in the past. Seven also incorporated smartphone-based hearing assistive technology. Three used &#x201C;other&#x201D; tools, including using cupped hands behind ears to assist in hearing. Eight participants reported varying degrees of dependence on lip reading, but 5 participants depended sometimes on lip reading and 5 depended fully on lip reading.</p></sec><sec id="s3-2"><title>Postscenario SRS Assessments</title><p>There were 29 postscenario SRS assessment surveys, 3 survey responses each from 9 participants and 2 survey responses from 1 participant. One survey response from participant P11 was not collected due to a technician error. Overall, participants found the captioning technology not distracting in 86% (25/29) of scenarios (<xref ref-type="table" rid="table2">Table 2</xref>). In 90% (26/29) of scenarios, participants trusted the accuracy of generated transcription and felt the captions were easy to watch while conversing with the mock doctor. In 76% (22/29) of scenarios, participants were satisfied with the captioning technology. The technology was least satisfying to participants in the back pain scenarios (70% satisfaction) compared to the high blood pressure (78% satisfaction) and headache (80% satisfaction) scenarios.</p><table-wrap id="t2" position="float"><label>Table 2.</label><caption><p>Summary of participant assessments regarding live captioning technology compiled from all 3 scenarios and dichotomized.</p></caption><table id="table2" frame="hsides" rules="groups"><thead><tr><td align="left" valign="bottom">Questions<sup><xref ref-type="table-fn" rid="table2fn1">a</xref></sup></td><td align="left" valign="bottom">Assessments</td><td align="left" valign="bottom" colspan="2">Values, n (%)</td></tr></thead><tbody><tr><td align="left" valign="top">In your discussion with the mock doctor, how distracting were the captions?</td><td align="left" valign="top">Not distracting<sup><xref ref-type="table-fn" rid="table2fn2">b</xref></sup></td><td align="left" valign="top" colspan="2">25 (86)</td></tr><tr><td align="left" valign="top">How easy or difficult was it to watch the caption while talking with the mock doctor?</td><td align="left" valign="top">Easy<sup><xref ref-type="table-fn" rid="table2fn3">c</xref></sup></td><td align="left" valign="top" colspan="2">26 (90)</td></tr><tr><td align="left" valign="top">How much did you trust the accuracy of the generated captions?</td><td align="left" valign="top">Trusted<sup><xref ref-type="table-fn" rid="table2fn4">d</xref></sup></td><td align="left" valign="top" colspan="2">26 (90)</td></tr><tr><td align="left" valign="top">In this scenario, how satisfied were you with the captioning technology?</td><td align="left" valign="top">Satisfied<sup><xref ref-type="table-fn" rid="table2fn5">e</xref></sup></td><td align="left" valign="top" colspan="2">22 (76)</td></tr></tbody></table><table-wrap-foot><fn id="table2fn1"><p><sup>a</sup>For all 4 questions, n=29 since 1 of the 10 participants did not participate in 1 of the 3 scenarios.</p></fn><fn id="table2fn2"><p><sup>b</sup>Not distracting: not at all distracting.</p></fn><fn id="table2fn3"><p><sup>c</sup>Easy: very easy + somewhat easy.</p></fn><fn id="table2fn4"><p><sup>d</sup>Trusted: completely trusted + somewhat trusted.</p></fn><fn id="table2fn5"><p><sup>e</sup>Satisfied: very satisfied + somewhat satisfied.</p></fn></table-wrap-foot></table-wrap></sec><sec id="s3-3"><title>Participant Experience Surveys</title><p>All 10 participants completed structured exit interviews following the captioning scenarios, providing reflections on their overall experience with the SRS (<xref ref-type="table" rid="table3">Table 3</xref>). Interview responses were analyzed using a predefined framework aligned with domains explored in the postscenario ratings (eg, ease of use, comfort, satisfaction, trust, emotional impact, and support for lip reading). This section summarizes participant perspectives and provides representative quotes to contextualize the quantitative results described above.</p><table-wrap id="t3" position="float"><label>Table 3.</label><caption><p>Representative participant reflections by theme.</p></caption><table id="table3" frame="hsides" rules="groups"><thead><tr><td align="left" valign="bottom">Themes</td><td align="left" valign="bottom">Relevant quotes<sup><xref ref-type="table-fn" rid="table3fn1">a</xref></sup></td><td align="left" valign="bottom">Interpretation</td></tr></thead><tbody><tr><td align="left" valign="top">Ease of use</td><td align="left" valign="top">&#x201C;At first I wasn&#x2019;t sure what to expect, but after a few lines of text I stopped even thinking about it&#x2014;it just worked. That made me feel more in control.&#x201D; (P04)</td><td align="left" valign="top">Participants found the system intuitive and accessible.</td></tr><tr><td align="left" valign="top">Comfort</td><td align="left" valign="top">&#x201C;I didn&#x2019;t have to strain or overthink. It just flowed naturally and I didn&#x2019;t even realize how relaxed I was until the end.&#x201D; (P08)</td><td align="left" valign="top">Technology reduced cognitive effort and fostered emotional ease.</td></tr><tr><td align="left" valign="top">Satisfaction</td><td align="left" valign="top">&#x201C;I was happy. I wish all the doctors would have something like this. It made me feel like my experience mattered.&#x201D; (P03)</td><td align="left" valign="top">Participant expresses satisfaction and a sense of being valued.</td></tr><tr><td align="left" valign="top">Safety and trust</td><td align="left" valign="top">&#x201C;Because it&#x2019;s live, it feels very safe. You&#x2019;re not left guessing, and I felt confident nothing important was missed.&#x201D; (P01)</td><td align="left" valign="top">Real-time functionality enhanced user confidence and perception of safety.</td></tr><tr><td align="left" valign="top">Emotional response</td><td align="left" valign="top">&#x201C;I didn&#x2019;t realize how much stress I usually carry during appointments. This made me feel heard and like I could finally breathe.&#x201D; (P09)</td><td align="left" valign="top">System reduced communication-related anxiety and supported emotional well-being.</td></tr><tr><td align="left" valign="top">Support or replace lip reading</td><td align="left" valign="top">&#x201C;With the mask on, it would have been extremely difficult to follow&#x2014;and with the captioning, it was just leaps better. I wasn&#x2019;t exhausted from trying to read lips the whole time.&#x201D; (P07)</td><td align="left" valign="top">The technology was viewed as a vital alternative to lip reading, especially in masked settings.</td></tr></tbody></table><table-wrap-foot><fn id="table3fn1"><p><sup>a</sup>Relevant quotes from individual participants illustrating each core theme, including insight into perceived usability, comfort, satisfaction, emotional impact, and the role of real-time captions in supporting communication.</p></fn></table-wrap-foot></table-wrap><p>Most participants (9/10) described the system as easy to use, frequently using phrases like &#x201C;very easy&#x201D; or &#x201C;easier than usual.&#x201D;</p><p>One participant remarked,</p><disp-quote><p>After a few lines of text I stopped even thinking about it&#x2014;it just worked. That made me feel more in control.</p></disp-quote><p>Another noted,</p><disp-quote><p>It was easier than usual because we don&#x2019;t have captioning. It&#x2019;s always nice to have it just in case you miss something.</p></disp-quote><p>Participants also reported high comfort with the system. Descriptions included &#x201C;very comfortable,&#x201D; &#x201C;easy to work with,&#x201D; and &#x201C;high comfortability.<italic>&#x201D;</italic></p><p>As 1 participant shared,</p><disp-quote><p>It just flowed naturally, and I didn&#x2019;t even realize how relaxed I was until the end.</p></disp-quote><p>Satisfaction was also high across interviews. While 76% of scenario ratings reflected satisfaction, all participants described themselves as satisfied or very satisfied in exit interviews. One stated,</p><disp-quote><p>I was happy. I wish all the doctors would have something like this.&#x201D;</p></disp-quote><p>Another shared,</p><disp-quote><p>I was pretty satisfied, and the captioning was spot-on.</p></disp-quote><p>When asked about trust in the system, participants frequently described the captions as reliable. One participant reflected,</p><disp-quote><p>Because it&#x2019;s live, it feels very safe. You&#x2019;re not left guessing.</p></disp-quote><p>A few raised questions about data privacy, with one noting,</p><disp-quote><p>I would also want to know what happens to the transcript and who has access to it.</p></disp-quote><p>Participants also described emotional benefits from the technology. In total, 9 of 10 participants used words like &#x201C;reassured,&#x201D; &#x201C;relaxed,&#x201D; and &#x201C;comfortable&#x201D; to describe how the SR<italic>S</italic> made them feel. One participant shared,</p><disp-quote><p>This made me feel heard and like I could finally breathe.</p></disp-quote><p>Perceptions of the captions&#x2019; ability to support or replace lip reading were more varied. Several participants described the system as a helpful supplement or improvement, particularly in masked settings. As one noted,</p><disp-quote><p>With the mask on, I definitely depended on it more.</p></disp-quote><p>Another stated,</p><disp-quote><p>I think it&#x2019;s better than lip reading.</p></disp-quote><p>Others expressed that lip reading remained important, with one participant saying,</p><disp-quote><p>Not going to replace lip reading... captions help, but I still rely on visual cues.</p></disp-quote><p>Beyond these predefined domains, participants spontaneously shared reflections on broader applications of the SRS. Several expressed enthusiasm for expanding its use in real-world clinical settings, with one stating,</p><disp-quote><p>I would like to see that in many doctor&#x2019;s offices tomorrow.</p></disp-quote><p>Others suggested the system may be particularly helpful for patients who are older, have cognitive impairments, or use interpreters. A few noted that having real-time captions reduced the pressure to maintain constant visual attention, allowing for more natural communication and less fatigue.</p></sec><sec id="s3-4"><title>Closed Captioning Accuracy</title><p>We collected and preprocessed transcripts from 10 mock clinical sessions. Due to varying levels of verbosity among the participants, the total transcript lengths varied substantially, ranging from 1144 to 4704 words.</p><p>Overall, participants found the SRS to be sufficiently accurate (<xref ref-type="table" rid="table4">Table 4</xref>). For instance, P04 noted that the system was <italic>&#x201C;more accurate than the phone captions&#x201D;</italic> she typically uses in daily conversations. Similarly, P06 commented on the system&#x2019;s effectiveness compared to human captioners, stating,</p><disp-quote><p>A lot of the captions I had were court reporters&#x2014;<italic>they caption fast, but sometimes they make mistakes. ... And this one [the SRS], it&#x2019;s more accurate and I see words better.</italic></p></disp-quote><p>Nonetheless, participants expressed concerns about the system&#x2019;s ability to handle more complex or specialized medical vocabulary. For example, P10 questioned <italic>&#x201C;how it would be with more complex medical terminologies,&#x201D;</italic> in real clinical settings where more technical jargon and medication names were frequently used.</p><table-wrap id="t4" position="float"><label>Table 4.</label><caption><p>The word error rate for each scenario, along with the accumulated word error rate for each participant across all 3 scenarios. These word error rate scores specifically reflect the accuracy of the automated speech recognition system in transcribing the mock doctors&#x2019; speech.</p></caption><table id="table4" frame="hsides" rules="groups"><thead><tr><td align="left" valign="bottom">ID</td><td align="left" valign="bottom">Mock doctor</td><td align="left" valign="bottom">Scenario 1<sup><xref ref-type="table-fn" rid="table4fn1">a</xref></sup></td><td align="left" valign="bottom">Scenario 2<sup><xref ref-type="table-fn" rid="table4fn1">a</xref></sup></td><td align="left" valign="bottom">Scenario 3<sup><xref ref-type="table-fn" rid="table4fn1">a</xref></sup></td><td align="left" valign="bottom">Accumulated (range: 0.127-0.167)</td></tr></thead><tbody><tr><td align="left" valign="top">P01</td><td align="left" valign="top">M1</td><td align="left" valign="top">0.136</td><td align="left" valign="top">0.133</td><td align="left" valign="top">0.125</td><td align="left" valign="top">0.131</td></tr><tr><td align="left" valign="top">P02</td><td align="left" valign="top">M2</td><td align="left" valign="top">0.193</td><td align="left" valign="top">0.141</td><td align="left" valign="top">0.133</td><td align="left" valign="top">0.153</td></tr><tr><td align="left" valign="top">P03</td><td align="left" valign="top">M1</td><td align="left" valign="top">0.129</td><td align="left" valign="top">0.122</td><td align="left" valign="top">0.133</td><td align="left" valign="top">0.127</td></tr><tr><td align="left" valign="top">P04</td><td align="left" valign="top">M2</td><td align="left" valign="top">0.228</td><td align="left" valign="top">0.128</td><td align="left" valign="top">0.151</td><td align="left" valign="top">0.167</td></tr><tr><td align="left" valign="top">P06</td><td align="left" valign="top">M2</td><td align="left" valign="top">0.137</td><td align="left" valign="top">0.152</td><td align="left" valign="top">0.135</td><td align="left" valign="top">0.144</td></tr><tr><td align="left" valign="top">P07</td><td align="left" valign="top">M1</td><td align="left" valign="top">0.127</td><td align="left" valign="top">0.134</td><td align="left" valign="top">0.132</td><td align="left" valign="top">0.133</td></tr><tr><td align="left" valign="top">P08</td><td align="left" valign="top">M2</td><td align="left" valign="top">0.155</td><td align="left" valign="top">0.142</td><td align="left" valign="top">0.152</td><td align="left" valign="top">0.149</td></tr><tr><td align="left" valign="top">P09</td><td align="left" valign="top">M1</td><td align="left" valign="top">0.136</td><td align="left" valign="top">0.131</td><td align="left" valign="top">0.141</td><td align="left" valign="top">0.137</td></tr><tr><td align="left" valign="top">P10</td><td align="left" valign="top">M1</td><td align="left" valign="top">0.127</td><td align="left" valign="top">0.126</td><td align="left" valign="top">0.133</td><td align="left" valign="top">0.129</td></tr><tr><td align="left" valign="top">P11</td><td align="left" valign="top">M2</td><td align="left" valign="top">0.147</td><td align="left" valign="top">0.185</td><td align="left" valign="top">0.134</td><td align="left" valign="top">0.151</td></tr></tbody></table><table-wrap-foot><fn id="table4fn1"><p><sup>a</sup>The scenario-level word error rates ranged between 0.122 and 0.228.</p></fn></table-wrap-foot></table-wrap></sec><sec id="s3-5"><title>Joint Display of Integrated Findings</title><p>To illustrate convergence between quantitative usability ratings and qualitative interview themes, we constructed a joint display summarizing merged findings and resulting meta-inferences across key domains (<xref ref-type="table" rid="table5">Table 5</xref>).</p><table-wrap id="t5" position="float"><label>Table 5.</label><caption><p>Joint display of integrated quantitative and qualitative findings</p></caption><table id="table5" frame="hsides" rules="groups"><thead><tr><td align="left" valign="bottom">Domain</td><td align="left" valign="bottom">Quantitative result</td><td align="left" valign="bottom">Representative quote</td><td align="left" valign="bottom">Integrated meta-inference</td></tr></thead><tbody><tr><td align="left" valign="top">Ease of use</td><td align="left" valign="top">90% rated captions &#x201C;easy&#x201D;</td><td align="left" valign="top">&#x201C;After a few lines of text I stopped even thinking about it&#x2014;it just worked.&#x201D;</td><td align="left" valign="top">High usability with minimal cognitive load<break/>Captions supported natural conversational flow</td></tr><tr><td align="left" valign="top">Comfort</td><td align="left" valign="top">Not directly measured</td><td align="left" valign="top">&#x201C;It just flowed naturally, and I didn&#x2019;t realize how relaxed I was.&#x201D;</td><td align="left" valign="top">Technology reduced strain and fostered emotional ease during communication</td></tr><tr><td align="left" valign="top">Satisfaction</td><td align="left" valign="top">76% satisfied</td><td align="left" valign="top">&#x201C;I wish all the doctors would have something like this.&#x201D;</td><td align="left" valign="top">Satisfaction tied to both functional value and feeling understood and supported</td></tr><tr><td align="left" valign="top">Safety and trust</td><td align="left" valign="top">90% trusted accuracy</td><td align="left" valign="top">&#x201C;Because it&#x2019;s live, it feels very safe. You&#x2019;re not left guessing.&#x201D;</td><td align="left" valign="top">Real-time display strengthened perceived safety and reliability despite minor errors</td></tr><tr><td align="left" valign="top">Emotional response</td><td align="left" valign="top">Not directly measured</td><td align="left" valign="top">&#x201C;This made me feel heard and like I could finally breathe.&#x201D;</td><td align="left" valign="top">Captions enhanced psychological safety and reduced anxiety&#x2014;benefits not captured numerically</td></tr><tr><td align="left" valign="top">Support or replace lip reading</td><td align="left" valign="top">Not directly measured</td><td align="left" valign="top">&#x201C;With the mask on, I depended on it more&#x2026; it was leaps better.&#x201D;</td><td align="left" valign="top">Captions supplemented or replaced lip reading, reducing fatigue in masked settings</td></tr></tbody></table></table-wrap></sec></sec><sec id="s4" sec-type="discussion"><title>Discussion</title><sec id="s4-1"><title>Principal Results</title><p>To successfully deploy SRS in clinical settings, it is essential that the system accurately captures and reflects clinicians&#x2019; speech. Our findings show that although the SRS output was not flawless, its WERs fell between 0.10 and 0.20, a range generally considered acceptable for real-world ASR use [<xref ref-type="bibr" rid="ref19">19</xref>,<xref ref-type="bibr" rid="ref26">26</xref>]. Furthermore, participants understood the captions with relative ease, suggesting that transcription quality was sufficient to support comprehension in simulated outpatient scenarios. However, stricter accuracy standards may be required in high-stakes contexts, such as discussions of medications or treatment options, where small errors can have serious consequences.</p><p>Although WER is widely used to evaluate ASR performance, it weighs all error types equally, regardless of their impact on comprehension [<xref ref-type="bibr" rid="ref27">27</xref>]. Prior work has proposed alternative evaluation approaches that aim to capture semantic accuracy or user-centric measures of intelligibility and usefulness [<xref ref-type="bibr" rid="ref20">20</xref>]. In clinical communication, we support developing evaluation metrics that align more closely with safety-critical requirements. Such metrics would be instrumental in determining when ASR systems are truly ready for deployment in health care environments. In clinical settings, misrecognition of medical terminology can have consequences far more serious than common transcription errors, especially when involving medication names, diagnoses, or treatment instructions. Because of this, future work should consider safety-critical evaluation frameworks that go beyond traditional WER. Approaches, such as semantic error analysis, comprehension-based scoring, or accuracy, weighting for medically significant terms could better capture the real-world implications of captioning errors in health care communication.</p><p>Our participants represented a variety of ages, genders, HL levels, and degrees of dependence on lip reading. However, most participants had previously used captioning technology as an accommodation, so our usability findings may be less generalizable to individuals who are DHH with no prior captioning experience. Also, only 2 participants preferred written communication with hearing people. Therefore, satisfaction with our captioning technology may be higher than our results suggest for people who are DHH and depend more on written communication. Regardless of the scenario, most participants were satisfied with the SRS, trusted its accuracy, found it easy to watch, and were not distracted.</p><p>Participants trusted the captioning system despite occasional transcription errors, which embodies the concept of trust-in-automation frameworks, where user reliance is shaped by perceived system reliability and predictability [<xref ref-type="bibr" rid="ref28">28</xref>]. Exit interviews revealed that beyond meeting technical expectations, the captioning system also meaningfully supported emotional connection, trust, and autonomy during clinical interactions. Participants described the captions as easy to use and grounding. They also noted reduced stress, lower cognitive fatigue, better understanding, and a stronger sense of being heard. Encouragingly, the observed reduction in stress and fatigue is consistent with prior work where assistive technology helped manage cognitive effort during information processing [<xref ref-type="bibr" rid="ref29">29</xref>,<xref ref-type="bibr" rid="ref30">30</xref>]. These findings suggest that accessibility tools should be evaluated not only by their accuracy but by their ability to support psychological safety and communication equity [<xref ref-type="bibr" rid="ref31">31</xref>].</p><p>Additionally, although participants generally trusted the captioning system, a few raised concerns about transcript privacy and data handling. These concerns highlight the ethical need for transparency when implementing automated captioning in health care. This pilot used secure, locally stored recordings without identifiable data, but clinical deployment will require Health Insurance Portability and Accountability Act (HIPAA)-compliant encryption and explicit consent protocols. Adding user controls, such as options to delete transcripts or disable storage, could further strengthen trust among users who are DHH and other vulnerable populations. Nevertheless, participants recommended broader adoption of SRS, particularly for older adults and others facing progressive hearing-related communication barriers, underscoring the system&#x2019;s potential to improve care for a heterogeneous population of DHH patients.</p></sec><sec id="s4-2"><title>Limitations</title><p>While our findings are promising, this study has several limitations. Most participants were experienced caption users and had prior familiarity with assistive communication technologies, which may have positively influenced usability and satisfaction ratings. As a result, these findings may not fully represent the experiences of individuals who are DHH and are less familiar with captioning or other accessibility tools or primary American Sign Language users. Future research should include participants with varying levels of captioning experience and a broader demographic range to better assess generalizability and identify barriers for first-time users. This study was conducted in controlled, simulated settings, which may not fully reflect the complexity and spontaneity of real-world medical encounters. Because these mock scenarios involved medical students rather than practicing clinicians, the communication dynamics may differ from authentic physician&#x2013;patient interactions. Future work should therefore include real-world clinical deployments to evaluate how captioning systems perform in active care settings and adapt to diverse communication styles and environmental conditions.</p><p>Second, although our participant pool included individuals with diverse hearing identities and varying degrees of familiarity with assistive technologies, it does not capture the full range of experiences within the broader community who are DHH. Future work should include longitudinal application in various clinical settings and recruitment of a more diverse participant population to better assess long-term usability and impact.</p><p>In addition, our SRS was not specifically optimized for medical vocabulary. This limitation was evident in the system&#x2019;s tendency to misrecognize medical terminology, words that are infrequent in everyday speech yet crucial for accurate clinical communication. Furthermore, while we used WER as a standard quantitative evaluation metric, it does not fully capture how users who are DHH interpret and understand captions, particularly in high-stakes contexts. Future research should explore the development of domain-specific SRS trained on medical speech and adopt evaluation metrics that better reflect comprehension and user experience among individuals who are DHH. Finally, since WER assigns equal weight to all tokens, it does not differentiate between routine transcription errors and those involving safety-critical clinical terms (eg, medications or diagnoses). Therefore, WER may underestimate the potential impact of certain errors in medical contexts.</p></sec><sec id="s4-3"><title>Future Directions</title><p>Improving SRS accuracy for medical terminology remains a key technical priority for clinical use. Strategies may include (1) speech recognition models on deidentified clinical audio to capture the acoustic variability of real-world medical speech [<xref ref-type="bibr" rid="ref32">32</xref>], (2) embedding domain-specific medical dictionaries and medication name libraries into the language model of the SRS systems to reduce substitution errors [<xref ref-type="bibr" rid="ref33">33</xref>], (3) leveraging context-aware large language models that can infer meaning from partial or uncertain input [<xref ref-type="bibr" rid="ref34">34</xref>], and (4) integrating clinician feedback loops for rapid correction of recurring misinterpretations [<xref ref-type="bibr" rid="ref35">35</xref>]. These enhancements would not only improve accuracy for technical vocabulary but also strengthen user trust and perceived reliability in clinical environments.</p><p>Building on these preliminary findings, future work should also explore integration with medical-domain ASR models to enhance accuracy for specialized terminology and complex clinical dialog. Longitudinal studies will be valuable for assessing maintained usability, user trust, and performance over time. Additionally, testing captioning systems in broader clinical contexts, such as emergency care, geriatrics, and among patients with cognitive impairment, will help determine their adaptability and impact across diverse care settings.</p></sec><sec id="s4-4"><title>Implications for Clinical Workflow Integration</title><p>Our findings demonstrate that real-time captioning is usable and beneficial in clinical settings for patients who are DHH, aligning with prior evidence that captioning improved recall of anesthesia-related consent conversations [<xref ref-type="bibr" rid="ref36">36</xref>]. Given this demonstrated value, practical integration of captioning tools into clinical workflows will require thoughtful design to minimize disruption while enhancing accessibility. Participants envisioned use cases in which SRS displays could be embedded within existing electronic health record systems or mirrored on clinician tablets to preserve natural eye contact and conversational flow. Integration will also depend on clear institutional protocols for activating captioning on demand, ensuring confidentiality, and providing clinician training on how to engage with patients who are DHH using this technology. Establishing these processes could enable captioning to function as a routine accessibility feature rather than an exception, supporting both efficiency and equitable communication in care delivery.</p></sec><sec id="s4-5"><title>Conclusions</title><p>This pilot study demonstrates that artificial intelligence&#x2013;enhanced captioning can meaningfully improve communication experiences for individuals who are DHH in clinical settings. Participants found the system intuitive, emotionally supportive, and effective in bridging common communication barriers, especially those worsened by face masks and unfamiliar environments. While traditional captioning tools often fall short in medical contexts, integrating large language models into the speech recognition process offers a promising path toward more coherent, accurate, and human-centered accessibility. By centering on user perspectives, this study highlights the importance of evaluating assistive technologies not only for transcription quality, but for their impact on trust, inclusion, and psychological safety. Future research should build on these early insights to further refine captioning systems, examine their use in real-world clinical care, and ensure that patients who are DHH are active partners in the design of accessible digital health solutions.</p></sec></sec></body><back><ack><p>The authors thank the Deaf/deaf and hard-of-hearing community members who participated in this study for generously sharing their insights and experiences. We are grateful to the Dexter Family Medicine clinic at the University of Michigan for their support in facilitating pilot testing.</p></ack><notes><sec><title>Funding</title><p>This work was supported by funding from the Blue Cross Blue Shield of Michigan Foundation (2024010111) and by the University of Michigan&#x2019;s e-HAIL (e-Health and Artificial Intelligence Laboratory) program.</p></sec><sec><title>Data Availability</title><p>All data generated or analyzed during this study are included in this published article. Further inquiries can be directed to the corresponding author.</p></sec></notes><fn-group><fn fn-type="con"><p>Methodology (equal), participant recruitment (equal), investigation (equal), data curation (lead), formal analysis (lead), writing &#x2013; original draft (lead), and writing &#x2013; review and editing (equal): SEH</p><p>Methodology (equal), participant recruitment (equal), technology development (lead), investigation (equal), data curation (equal), formal analysis (equal), writing &#x2013; original draft (equal), and writing &#x2013; review and editing (equal): LYW</p><p>Methodology (equal), project administration (lead), investigation (equal), data curation (equal), formal analysis (equal), writing &#x2013; original draft (equal), and writing &#x2013; review and editing (equal): LJM</p><p>Conceptualization (equal), technology development (supporting), methodology (supporting), writing &#x2013; review and editing (equal), and supervision (equal): DJ</p><p>Conceptualization (equal), methodology (equal), writing &#x2013; original draft (supporting), writing &#x2013; review and editing (equal), and supervision (lead): MMM</p></fn><fn fn-type="conflict"><p>None declared.</p></fn></fn-group><glossary><title>Abbreviations</title><def-list><def-item><term id="abb1">ASR</term><def><p>automatic speech recognition</p></def></def-item><def-item><term id="abb2">DHH</term><def><p>Deaf/deaf or hard of hearing</p></def></def-item><def-item><term id="abb3">HIPAA</term><def><p>Health Insurance Portability and Accountability Act</p></def></def-item><def-item><term id="abb4">HL</term><def><p>hearing loss</p></def></def-item><def-item><term id="abb5">SRS</term><def><p>speech recognition system</p></def></def-item><def-item><term id="abb6">WER</term><def><p>word error rate</p></def></def-item></def-list></glossary><ref-list><title>References</title><ref id="ref1"><label>1</label><nlm-citation citation-type="journal"><person-group person-group-type="author"><name name-style="western"><surname>Guttman</surname><given-names>OT</given-names> </name><name name-style="western"><surname>Lazzara</surname><given-names>EH</given-names> </name><name name-style="western"><surname>Keebler</surname><given-names>JR</given-names> </name><name name-style="western"><surname>Webster</surname><given-names>KLW</given-names> </name><name name-style="western"><surname>Gisick</surname><given-names>LM</given-names> </name><name name-style="western"><surname>Baker</surname><given-names>AL</given-names> </name></person-group><article-title>Dissecting communication barriers in healthcare: a path to enhancing communication resiliency, reliability, and patient safety</article-title><source>J Patient Saf</source><year>2021</year><month>12</month><day>1</day><volume>17</volume><issue>8</issue><fpage>e1465</fpage><lpage>e1471</lpage><pub-id pub-id-type="doi">10.1097/PTS.0000000000000541</pub-id><pub-id pub-id-type="medline">30418425</pub-id></nlm-citation></ref><ref id="ref2"><label>2</label><nlm-citation citation-type="journal"><person-group person-group-type="author"><name name-style="western"><surname>McKee</surname><given-names>M</given-names> </name><name name-style="western"><surname>James</surname><given-names>TG</given-names> </name><name name-style="western"><surname>Helm</surname><given-names>KVT</given-names> </name><etal/></person-group><article-title>Reframing our health care system for patients with hearing loss</article-title><source>J Speech Lang Hear Res</source><year>2022</year><month>10</month><day>17</day><volume>65</volume><issue>10</issue><fpage>3633</fpage><lpage>3645</lpage><pub-id pub-id-type="doi">10.1044/2022_JSLHR-22-00052</pub-id><pub-id pub-id-type="medline">35969852</pub-id></nlm-citation></ref><ref id="ref3"><label>3</label><nlm-citation citation-type="web"><article-title>Quick statistics about hearing, balance, &#x0026; dizziness</article-title><source>NIH</source><access-date>2025-05-26</access-date><comment><ext-link ext-link-type="uri" xlink:href="https://www.nidcd.nih.gov/health/statistics/quick-statistics-hearing">https://www.nidcd.nih.gov/health/statistics/quick-statistics-hearing</ext-link></comment></nlm-citation></ref><ref id="ref4"><label>4</label><nlm-citation citation-type="web"><article-title>Culturally affirmative and linguistically accessible services</article-title><source>National Association of the Deaf</source><access-date>2025-05-10</access-date><comment><ext-link ext-link-type="uri" xlink:href="https://www.nad.org/resources/health-care-and-mental-health-services/mental-health-services/culturally-affirmative-and-linguistically-accessible-services/">https://www.nad.org/resources/health-care-and-mental-health-services/mental-health-services/culturally-affirmative-and-linguistically-accessible-services/</ext-link></comment></nlm-citation></ref><ref id="ref5"><label>5</label><nlm-citation citation-type="journal"><person-group person-group-type="author"><name name-style="western"><surname>James</surname><given-names>TG</given-names> </name><name name-style="western"><surname>Coady</surname><given-names>KA</given-names> </name><name name-style="western"><surname>Stacciarini</surname><given-names>JMR</given-names> </name><etal/></person-group><article-title>&#x201C;They&#x2019;re not willing to accommodate deaf patients&#x201D;: communication experiences of deaf american sign language users in the emergency department</article-title><source>Qual Health Res</source><year>2022</year><month>01</month><volume>32</volume><issue>1</issue><fpage>48</fpage><lpage>63</lpage><pub-id pub-id-type="doi">10.1177/10497323211046238</pub-id><pub-id pub-id-type="medline">34823402</pub-id></nlm-citation></ref><ref id="ref6"><label>6</label><nlm-citation citation-type="journal"><person-group person-group-type="author"><name name-style="western"><surname>Marlow</surname><given-names>NM</given-names> </name><name name-style="western"><surname>Samuels</surname><given-names>SK</given-names> </name><name name-style="western"><surname>Jo</surname><given-names>A</given-names> </name><name name-style="western"><surname>Mainous</surname><given-names>AG</given-names> </name></person-group><article-title>Patient-provider communication quality for persons with disabilities: a cross-sectional analysis of the Health Information National Trends Survey</article-title><source>Disabil Health J</source><year>2019</year><month>10</month><volume>12</volume><issue>4</issue><fpage>732</fpage><lpage>737</lpage><pub-id pub-id-type="doi">10.1016/j.dhjo.2019.03.010</pub-id><pub-id pub-id-type="medline">30995967</pub-id></nlm-citation></ref><ref id="ref7"><label>7</label><nlm-citation citation-type="journal"><person-group person-group-type="author"><name name-style="western"><surname>Diamond</surname><given-names>L</given-names> </name><name name-style="western"><surname>Izquierdo</surname><given-names>K</given-names> </name><name name-style="western"><surname>Canfield</surname><given-names>D</given-names> </name><name name-style="western"><surname>Matsoukas</surname><given-names>K</given-names> </name><name name-style="western"><surname>Gany</surname><given-names>F</given-names> </name></person-group><article-title>A systematic review of the impact of patient-physician non-english language concordance on quality of care and outcomes</article-title><source>J Gen Intern Med</source><year>2019</year><month>08</month><volume>34</volume><issue>8</issue><fpage>1591</fpage><lpage>1606</lpage><pub-id pub-id-type="doi">10.1007/s11606-019-04847-5</pub-id><pub-id pub-id-type="medline">31147980</pub-id></nlm-citation></ref><ref id="ref8"><label>8</label><nlm-citation citation-type="journal"><person-group person-group-type="author"><name name-style="western"><surname>Barnett</surname><given-names>S</given-names> </name><name name-style="western"><surname>McKee</surname><given-names>M</given-names> </name><name name-style="western"><surname>Smith</surname><given-names>SR</given-names> </name><name name-style="western"><surname>Pearson</surname><given-names>TA</given-names> </name></person-group><article-title>Deaf sign language users, health inequities, and public health: opportunity for social justice</article-title><source>Prev Chronic Dis</source><year>2011</year><month>03</month><volume>8</volume><issue>2</issue><fpage>A45</fpage><pub-id pub-id-type="medline">21324259</pub-id></nlm-citation></ref><ref id="ref9"><label>9</label><nlm-citation citation-type="journal"><person-group person-group-type="author"><name name-style="western"><surname>Barnett</surname><given-names>SL</given-names> </name><name name-style="western"><surname>Matthews</surname><given-names>KA</given-names> </name><name name-style="western"><surname>Sutter</surname><given-names>EJ</given-names> </name><etal/></person-group><article-title>Collaboration with deaf communities to conduct accessible health surveillance</article-title><source>Am J Prev Med</source><year>2017</year><month>03</month><volume>52</volume><issue>3 Suppl 3</issue><fpage>S250</fpage><lpage>S254</lpage><pub-id pub-id-type="doi">10.1016/j.amepre.2016.10.011</pub-id><pub-id pub-id-type="medline">28215374</pub-id></nlm-citation></ref><ref id="ref10"><label>10</label><nlm-citation citation-type="journal"><person-group person-group-type="author"><name name-style="western"><surname>Pollard</surname><given-names>RQ</given-names> </name><name name-style="western"><surname>Barnett</surname><given-names>S</given-names> </name></person-group><article-title>Health-related vocabulary knowledge among deaf adults</article-title><source>Rehabil Psychol</source><year>2009</year><month>05</month><volume>54</volume><issue>2</issue><fpage>182</fpage><lpage>185</lpage><pub-id pub-id-type="doi">10.1037/a0015771</pub-id><pub-id pub-id-type="medline">19469608</pub-id></nlm-citation></ref><ref id="ref11"><label>11</label><nlm-citation citation-type="journal"><person-group person-group-type="author"><name name-style="western"><surname>Mitra</surname><given-names>M</given-names> </name><name name-style="western"><surname>Akobirshoev</surname><given-names>I</given-names> </name><name name-style="western"><surname>McKee</surname><given-names>MM</given-names> </name><name name-style="western"><surname>Iezzoni</surname><given-names>LI</given-names> </name></person-group><article-title>Birth outcomes among U.S. women with hearing loss</article-title><source>Am J Prev Med</source><year>2016</year><month>12</month><volume>51</volume><issue>6</issue><fpage>865</fpage><lpage>873</lpage><pub-id pub-id-type="doi">10.1016/j.amepre.2016.08.001</pub-id><pub-id pub-id-type="medline">27687529</pub-id></nlm-citation></ref><ref id="ref12"><label>12</label><nlm-citation citation-type="journal"><person-group person-group-type="author"><name name-style="western"><surname>Alexander</surname><given-names>A</given-names> </name><name name-style="western"><surname>Ladd</surname><given-names>P</given-names> </name><name name-style="western"><surname>Powell</surname><given-names>S</given-names> </name></person-group><article-title>Deafness might damage your health</article-title><source>Lancet</source><year>2012</year><month>03</month><day>17</day><volume>379</volume><issue>9820</issue><fpage>979</fpage><lpage>981</lpage><pub-id pub-id-type="doi">10.1016/S0140-6736(11)61670-X</pub-id><pub-id pub-id-type="medline">22423872</pub-id></nlm-citation></ref><ref id="ref13"><label>13</label><nlm-citation citation-type="web"><source>ADA.gov</source><access-date>2025-05-27</access-date><comment><ext-link ext-link-type="uri" xlink:href="https://www.ada.gov">https://www.ada.gov</ext-link></comment></nlm-citation></ref><ref id="ref14"><label>14</label><nlm-citation citation-type="journal"><person-group person-group-type="author"><name name-style="western"><surname>McKee</surname><given-names>M</given-names> </name><name name-style="western"><surname>Moran</surname><given-names>C</given-names> </name><name name-style="western"><surname>Zazove</surname><given-names>P</given-names> </name></person-group><article-title>Overcoming additional barriers to care for deaf and hard of hearing patients during COVID-19</article-title><source>JAMA Otolaryngol Head Neck Surg</source><year>2020</year><month>09</month><day>1</day><volume>146</volume><issue>9</issue><fpage>781</fpage><lpage>782</lpage><pub-id pub-id-type="doi">10.1001/jamaoto.2020.1705</pub-id><pub-id pub-id-type="medline">32692807</pub-id></nlm-citation></ref><ref id="ref15"><label>15</label><nlm-citation citation-type="journal"><person-group person-group-type="author"><name name-style="western"><surname>Iezzoni</surname><given-names>LI</given-names> </name><name name-style="western"><surname>O&#x2019;Day</surname><given-names>BL</given-names> </name><name name-style="western"><surname>Killeen</surname><given-names>M</given-names> </name><name name-style="western"><surname>Harker</surname><given-names>H</given-names> </name></person-group><article-title>Communicating about health care: observations from persons who are deaf or hard of hearing</article-title><source>Ann Intern Med</source><year>2004</year><month>03</month><day>2</day><volume>140</volume><issue>5</issue><fpage>356</fpage><lpage>362</lpage><pub-id pub-id-type="doi">10.7326/0003-4819-140-5-200403020-00011</pub-id><pub-id pub-id-type="medline">14996677</pub-id></nlm-citation></ref><ref id="ref16"><label>16</label><nlm-citation citation-type="journal"><person-group person-group-type="author"><name name-style="western"><surname>Moreland</surname><given-names>CJ</given-names> </name><name name-style="western"><surname>Ruffin</surname><given-names>CV</given-names> </name><name name-style="western"><surname>Morris</surname><given-names>MA</given-names> </name><name name-style="western"><surname>McKee</surname><given-names>M</given-names> </name></person-group><article-title>Unmasked: how the COVID-19 pandemic exacerbates disparities for people with communication-based disabilities</article-title><source>J Hosp Med</source><year>2021</year><month>03</month><volume>16</volume><issue>3</issue><fpage>185</fpage><lpage>188</lpage><pub-id pub-id-type="doi">10.12788/jhm.3562</pub-id><pub-id pub-id-type="medline">33617440</pub-id></nlm-citation></ref><ref id="ref17"><label>17</label><nlm-citation citation-type="journal"><person-group person-group-type="author"><name name-style="western"><surname>Bernard</surname><given-names>A</given-names> </name><name name-style="western"><surname>Weiss</surname><given-names>S</given-names> </name><name name-style="western"><surname>Rahman</surname><given-names>M</given-names> </name><etal/></person-group><article-title>The impact of COVID-19 and pandemic mitigation measures on persons with sensory impairment</article-title><source>Am J Ophthalmol</source><year>2022</year><month>02</month><volume>234</volume><fpage>49</fpage><lpage>58</lpage><pub-id pub-id-type="doi">10.1016/j.ajo.2021.06.019</pub-id><pub-id pub-id-type="medline">34197781</pub-id></nlm-citation></ref><ref id="ref18"><label>18</label><nlm-citation citation-type="confproc"><person-group person-group-type="author"><name name-style="western"><surname>Kawas</surname><given-names>S</given-names> </name><name name-style="western"><surname>Karalis</surname><given-names>G</given-names> </name><name name-style="western"><surname>Wen</surname><given-names>T</given-names> </name><name name-style="western"><surname>Ladner</surname><given-names>RE</given-names> </name></person-group><article-title>Improving real-time captioning experiences for deaf and hard of hearing students</article-title><conf-name>ASSETS &#x2019;16: Proceedings of the 18th International ACM SIGACCESS Conference on Computers and Accessibility</conf-name><conf-date>Oct 23-26, 2016</conf-date><pub-id pub-id-type="doi">10.1145/2982142.2982164</pub-id></nlm-citation></ref><ref id="ref19"><label>19</label><nlm-citation citation-type="confproc"><person-group person-group-type="author"><name name-style="western"><surname>Kafle</surname><given-names>S</given-names> </name><name name-style="western"><surname>Huenerfauth</surname><given-names>M</given-names> </name></person-group><article-title>Evaluating the usability of automatically generated captions for people who are deaf or hard of hearing</article-title><conf-name>ASSETS &#x2019;17: Proceedings of the 19th International ACM SIGACCESS Conference on Computers and Accessibility</conf-name><conf-date>Oct 20 to Nov 1, 2017</conf-date><pub-id pub-id-type="doi">10.1145/3132525.3132542</pub-id></nlm-citation></ref><ref id="ref20"><label>20</label><nlm-citation citation-type="confproc"><person-group person-group-type="author"><name name-style="western"><surname>Favre</surname><given-names>B</given-names> </name><name name-style="western"><surname>Cheung</surname><given-names>K</given-names> </name><name name-style="western"><surname>Kazemian</surname><given-names>S</given-names> </name><etal/></person-group><article-title>Automatic human utility evaluation of ASR systems: does WER really predict performance?</article-title><access-date>2025-12-15</access-date><conf-name>Interspeech 2013</conf-name><conf-date>Aug 25-29, 2013</conf-date><comment><ext-link ext-link-type="uri" xlink:href="https://www.isca-archive.org/interspeech_2013/favre13_interspeech.pdf">https://www.isca-archive.org/interspeech_2013/favre13_interspeech.pdf</ext-link></comment></nlm-citation></ref><ref id="ref21"><label>21</label><nlm-citation citation-type="confproc"><person-group person-group-type="author"><name name-style="western"><surname>Yamamoto</surname><given-names>K</given-names> </name><name name-style="western"><surname>Suzuki</surname><given-names>I</given-names> </name><name name-style="western"><surname>Shitara</surname><given-names>A</given-names> </name><name name-style="western"><surname>Ochiai</surname><given-names>Y</given-names> </name></person-group><article-title>See-through captions: real-time captioning on transparent display for deaf and hard-of-hearing people</article-title><conf-name>ASSETS &#x2019;21: Proceedings of the 23rd International ACM SIGACCESS Conference on Computers and Accessibilit</conf-name><conf-date>Oct 18-22, 2021</conf-date><pub-id pub-id-type="doi">10.1145/3441852.3476551</pub-id></nlm-citation></ref><ref id="ref22"><label>22</label><nlm-citation citation-type="confproc"><person-group person-group-type="author"><name name-style="western"><surname>Suzuki</surname><given-names>I</given-names> </name><name name-style="western"><surname>Yamamoto</surname><given-names>K</given-names> </name><name name-style="western"><surname>Shitara</surname><given-names>A</given-names> </name><name name-style="western"><surname>Hyakuta</surname><given-names>R</given-names> </name><name name-style="western"><surname>Iijima</surname><given-names>R</given-names> </name><name name-style="western"><surname>Ochiai</surname><given-names>Y</given-names> </name></person-group><article-title>See-through captions in a museum guided tour: exploring museum guided tour for deaf and hard-of-hearing people with real-time captioning on transparent display</article-title><conf-name>Computers Helping People with Special Needs: 18th International Conference, ICCHP-AAATE 2022</conf-name><conf-date>Jul 11-15, 2022</conf-date><pub-id pub-id-type="doi">10.1007/978-3-031-08648-9_64</pub-id></nlm-citation></ref><ref id="ref23"><label>23</label><nlm-citation citation-type="confproc"><person-group person-group-type="author"><name name-style="western"><surname>Olwal</surname><given-names>A</given-names> </name><name name-style="western"><surname>Balke</surname><given-names>K</given-names> </name><name name-style="western"><surname>Votintcev</surname><given-names>D</given-names> </name></person-group><article-title>Wearable subtitles: augmenting spoken communication with lightweight eyewear for all-day captioning</article-title><access-date>2025-12-15</access-date><conf-name>UIST &#x2019;20: Proceedings of the 33rd Annual ACM Symposium on User Interface Software and Technology</conf-name><conf-date>Oct 20-23, 2020</conf-date><comment><ext-link ext-link-type="uri" xlink:href="https://dl.acm.org/doi/proceedings/10.1145/3379337">https://dl.acm.org/doi/proceedings/10.1145/3379337</ext-link></comment><pub-id pub-id-type="doi">10.1145/3379337.3415817</pub-id></nlm-citation></ref><ref id="ref24"><label>24</label><nlm-citation citation-type="web"><article-title>Speech-to-Text</article-title><source>Google Cloud</source><access-date>2025-05-27</access-date><comment><ext-link ext-link-type="uri" xlink:href="https://cloud.google.com/speech-to-text">https://cloud.google.com/speech-to-text</ext-link></comment></nlm-citation></ref><ref id="ref25"><label>25</label><nlm-citation citation-type="confproc"><person-group person-group-type="author"><name name-style="western"><surname>Morris</surname><given-names>AC</given-names> </name><name name-style="western"><surname>Maier</surname><given-names>V</given-names> </name><name name-style="western"><surname>Green</surname><given-names>P</given-names> </name></person-group><article-title>From WER and RIL to MER and WIL: improved evaluation measures for connected speech recognition</article-title><access-date>2025-12-15</access-date><conf-name>Interspeech 2004</conf-name><conf-date>Oct 4-8, 2004</conf-date><comment><ext-link ext-link-type="uri" xlink:href="https://www.isca-archive.org/interspeech_2004/morris04_interspeech.pdf">https://www.isca-archive.org/interspeech_2004/morris04_interspeech.pdf</ext-link></comment><pub-id pub-id-type="doi">10.21437/Interspeech.2004-668</pub-id></nlm-citation></ref><ref id="ref26"><label>26</label><nlm-citation citation-type="web"><article-title>jitsi/jiwer</article-title><source>GitHub</source><access-date>2025-05-27</access-date><comment><ext-link ext-link-type="uri" xlink:href="https://github.com/jitsi/jiwer">https://github.com/jitsi/jiwer</ext-link></comment></nlm-citation></ref><ref id="ref27"><label>27</label><nlm-citation citation-type="confproc"><person-group person-group-type="author"><name name-style="western"><surname>Ye-Yi</surname><given-names>W</given-names> </name><name name-style="western"><surname>Acero</surname><given-names>A</given-names> </name><name name-style="western"><surname>Chelba</surname><given-names>C</given-names> </name></person-group><article-title>Is word error rate a good indicator for spoken language understanding accuracy</article-title><conf-name>2003 IEEE Workshop on Automatic Speech Recognition and Understanding (IEEE Cat No03EX721)</conf-name><conf-date>Nov 30 to Dec 4, 2003</conf-date><pub-id pub-id-type="doi">10.1109/ASRU.2003.1318504</pub-id></nlm-citation></ref><ref id="ref28"><label>28</label><nlm-citation citation-type="journal"><person-group person-group-type="author"><name name-style="western"><surname>Lee</surname><given-names>JD</given-names> </name><name name-style="western"><surname>See</surname><given-names>KA</given-names> </name></person-group><article-title>Trust in automation: designing for appropriate reliance</article-title><source>Hum Factors</source><year>2004</year><volume>46</volume><issue>1</issue><fpage>50</fpage><lpage>80</lpage><pub-id pub-id-type="doi">10.1518/hfes.46.1.50_30392</pub-id><pub-id pub-id-type="medline">15151155</pub-id></nlm-citation></ref><ref id="ref29"><label>29</label><nlm-citation citation-type="journal"><person-group person-group-type="author"><name name-style="western"><surname>Sweller</surname><given-names>J</given-names> </name></person-group><article-title>Cognitive load during problem solving: effects on learning</article-title><source>Cogn Sci</source><year>1988</year><month>06</month><volume>12</volume><issue>2</issue><fpage>257</fpage><lpage>285</lpage><pub-id pub-id-type="doi">10.1016/0364-0213(88)90023-7</pub-id></nlm-citation></ref><ref id="ref30"><label>30</label><nlm-citation citation-type="journal"><person-group person-group-type="author"><name name-style="western"><surname>Paas</surname><given-names>F</given-names> </name><name name-style="western"><surname>Renkl</surname><given-names>A</given-names> </name><name name-style="western"><surname>Sweller</surname><given-names>J</given-names> </name></person-group><article-title>Cognitive load theory and instructional design: recent developments</article-title><source>Educ Psychol</source><year>2003</year><month>01</month><day>1</day><volume>38</volume><issue>1</issue><fpage>1</fpage><lpage>4</lpage><pub-id pub-id-type="doi">10.1207/S15326985EP3801_1</pub-id></nlm-citation></ref><ref id="ref31"><label>31</label><nlm-citation citation-type="journal"><person-group person-group-type="author"><name name-style="western"><surname>Bickmore</surname><given-names>TW</given-names> </name><name name-style="western"><surname>Paasche-Orlow</surname><given-names>MK</given-names> </name></person-group><article-title>The role of information technology in health literacy research</article-title><source>J Health Commun</source><year>2012</year><volume>17 Suppl 3</volume><issue>sup3</issue><fpage>23</fpage><lpage>29</lpage><pub-id pub-id-type="doi">10.1080/10810730.2012.712626</pub-id><pub-id pub-id-type="medline">23030559</pub-id></nlm-citation></ref><ref id="ref32"><label>32</label><nlm-citation citation-type="confproc"><person-group person-group-type="author"><name name-style="western"><surname>Afonja</surname><given-names>T</given-names> </name><name name-style="western"><surname>Olatunji</surname><given-names>T</given-names> </name><name name-style="western"><surname>Ogun</surname><given-names>S</given-names> </name><name name-style="western"><surname>Etori</surname><given-names>NA</given-names> </name><name name-style="western"><surname>Owodunni</surname><given-names>A</given-names> </name><name name-style="western"><surname>Yekini</surname><given-names>M</given-names> </name></person-group><article-title>Performant ASR models for medical entities in accented speech</article-title><access-date>2025-12-15</access-date><conf-name>Interspeech 2024</conf-name><conf-date>Sep 1-5, 2024</conf-date><comment><ext-link ext-link-type="uri" xlink:href="https://www.isca-archive.org/interspeech_2024/afonja24_interspeech.pdf">https://www.isca-archive.org/interspeech_2024/afonja24_interspeech.pdf</ext-link></comment></nlm-citation></ref><ref id="ref33"><label>33</label><nlm-citation citation-type="confproc"><person-group person-group-type="author"><name name-style="western"><surname>Michalopoulos</surname><given-names>G</given-names> </name><name name-style="western"><surname>Wang</surname><given-names>Y</given-names> </name><name name-style="western"><surname>Kaka</surname><given-names>H</given-names> </name><name name-style="western"><surname>Chen</surname><given-names>H</given-names> </name><name name-style="western"><surname>Wong</surname><given-names>A</given-names> </name></person-group><article-title>UmlsBERT: clinical domain knowledge augmentation of contextual embeddings using the unified medical language system metathesaurus</article-title><access-date>2025-12-15</access-date><conf-name>Proceedings of the 2021 Conference of the North American Chapter of the Association for Computational Linguistics</conf-name><conf-date>Jun 6-11, 2021</conf-date><comment><ext-link ext-link-type="uri" xlink:href="https://aclanthology.org/2021.naacl-main">https://aclanthology.org/2021.naacl-main</ext-link></comment><pub-id pub-id-type="doi">10.18653/v1/2021.naacl-main.139</pub-id></nlm-citation></ref><ref id="ref34"><label>34</label><nlm-citation citation-type="other"><person-group person-group-type="author"><name name-style="western"><surname>Hsie</surname><given-names>C</given-names> </name><name name-style="western"><surname>Moreira</surname><given-names>C</given-names> </name><name name-style="western"><surname>Nobre</surname><given-names>IB</given-names> </name><etal/></person-group><article-title>DALL-m: context-aware clinical data augmentation with llms</article-title><source>arXiv</source><comment>Preprint posted online on  May 1, 2025</comment><pub-id pub-id-type="doi">10.48550/ARXIV.2407.08227</pub-id></nlm-citation></ref><ref id="ref35"><label>35</label><nlm-citation citation-type="other"><person-group person-group-type="author"><name name-style="western"><surname>Wu</surname><given-names>LY</given-names> </name><name name-style="western"><surname>Jain</surname><given-names>D</given-names> </name></person-group><article-title>EvolveCaptions: empowering DHH users through real-time collaborative captioning</article-title><source>arXiv</source><comment>Preprint posted online on  Oct 2, 2025</comment><pub-id pub-id-type="doi">10.48550/ARXIV.2510.02181</pub-id></nlm-citation></ref><ref id="ref36"><label>36</label><nlm-citation citation-type="journal"><person-group person-group-type="author"><name name-style="western"><surname>Spehar</surname><given-names>B</given-names> </name><name name-style="western"><surname>Tye-Murray</surname><given-names>N</given-names> </name><name name-style="western"><surname>Myerson</surname><given-names>J</given-names> </name><name name-style="western"><surname>Murray</surname><given-names>DJ</given-names> </name></person-group><article-title>Real- time captioning for improving informed consent: patient and physician benefits</article-title><source>Reg Anesth Pain Med</source><year>2016</year><volume>41</volume><issue>1</issue><fpage>65</fpage><lpage>68</lpage><pub-id pub-id-type="doi">10.1097/AAP.0000000000000347</pub-id><pub-id pub-id-type="medline">26650432</pub-id></nlm-citation></ref></ref-list><app-group><supplementary-material id="app1"><label>Multimedia Appendix 1</label><p>Structured exit interview questions.</p><media xlink:href="rehab_v13i1e79073_app1.docx" xlink:title="DOCX File, 17 KB"/></supplementary-material></app-group></back></article>