<?xml version="1.0" encoding="UTF-8"?><!DOCTYPE article PUBLIC "-//NLM//DTD Journal Publishing DTD v2.0 20040830//EN" "journalpublishing.dtd"><article xmlns:mml="http://www.w3.org/1998/Math/MathML" xmlns:xlink="http://www.w3.org/1999/xlink" dtd-version="2.0" xml:lang="en" article-type="research-article"><front><journal-meta><journal-id journal-id-type="nlm-ta">JMIR Rehabil Assist Technol</journal-id><journal-id journal-id-type="publisher-id">rehab</journal-id><journal-id journal-id-type="index">17</journal-id><journal-title>JMIR Rehabilitation and Assistive Technologies</journal-title><abbrev-journal-title>JMIR Rehabil Assist Technol</abbrev-journal-title><issn pub-type="epub">2369-2529</issn><publisher><publisher-name>JMIR Publications</publisher-name><publisher-loc>Toronto, Canada</publisher-loc></publisher></journal-meta><article-meta><article-id pub-id-type="publisher-id">v12i1e69230</article-id><article-id pub-id-type="doi">10.2196/69230</article-id><article-categories><subj-group subj-group-type="heading"><subject>Original Paper</subject></subj-group></article-categories><title-group><article-title>A Novel QR Code&#x2013;Based Solution for Secure Electronic Health Record Transfer in Venous Thromboembolism Home Rehabilitation Management: Algorithm Development and Validation</article-title></title-group><contrib-group><contrib contrib-type="author" equal-contrib="yes"><name name-style="western"><surname>Li</surname><given-names>Changzhen</given-names></name><degrees>MS</degrees><xref ref-type="aff" rid="aff1">1</xref><xref ref-type="aff" rid="aff2">2</xref><xref ref-type="fn" rid="equal-contrib1">*</xref></contrib><contrib contrib-type="author" equal-contrib="yes"><name name-style="western"><surname>Jin</surname><given-names>Zhigeng</given-names></name><degrees>MD</degrees><xref ref-type="aff" rid="aff3">3</xref><xref ref-type="fn" rid="equal-contrib1">*</xref></contrib><contrib contrib-type="author"><name name-style="western"><surname>Wang</surname><given-names>Fei</given-names></name><degrees>BS</degrees><xref ref-type="aff" rid="aff1">1</xref></contrib><contrib contrib-type="author"><name name-style="western"><surname>Zhang</surname><given-names>Zheqi</given-names></name><degrees>MS</degrees><xref ref-type="aff" rid="aff3">3</xref><xref ref-type="aff" rid="aff4">4</xref></contrib><contrib contrib-type="author"><name name-style="western"><surname>Liu</surname><given-names>Binbin</given-names></name><degrees>MS</degrees><xref ref-type="aff" rid="aff3">3</xref><xref ref-type="aff" rid="aff4">4</xref></contrib><contrib contrib-type="author" corresp="yes"><name name-style="western"><surname>Guo</surname><given-names>Yutao</given-names></name><degrees>MD, PhD</degrees><xref ref-type="aff" rid="aff3">3</xref></contrib></contrib-group><aff id="aff1"><institution>Technical Department, DrBreath Medical Technology Co, Ltd</institution><addr-line>Shanghai</addr-line><country>China</country></aff><aff id="aff2"><institution>School of Information, Renmin University of China</institution><addr-line>Beijing</addr-line><country>China</country></aff><aff id="aff3"><institution>Department of Pulmonary Vascular and Thrombotic Disease, Sixth Medical Center of Chinese PLA General Hospital</institution><addr-line>No.6 Fucheng Road, Haidian District</addr-line><addr-line>Beijing</addr-line><country>China</country></aff><aff id="aff4"><institution>Chinese PLA Medical School</institution><addr-line>Beijing</addr-line><country>China</country></aff><contrib-group><contrib contrib-type="editor"><name name-style="western"><surname>Allemang</surname><given-names>Brooke</given-names></name></contrib></contrib-group><contrib-group><contrib contrib-type="reviewer"><name name-style="western"><surname>Krishnapatnam</surname><given-names>Mahendra</given-names></name></contrib><contrib contrib-type="reviewer"><name name-style="western"><surname>Kumar</surname><given-names>Rajeev</given-names></name></contrib></contrib-group><author-notes><corresp>Correspondence to Yutao Guo, MD, PhD, Department of Pulmonary Vascular and Thrombotic Disease, Sixth Medical Center of Chinese PLA General Hospital, No.6 Fucheng Road, Haidian District, Beijing, 100048, China, 86 13810021492; <email>dor_guoyt@hotmail.com</email></corresp><fn fn-type="equal" id="equal-contrib1"><label>*</label><p>these authors contributed equally</p></fn></author-notes><pub-date pub-type="collection"><year>2025</year></pub-date><pub-date pub-type="epub"><day>11</day><month>8</month><year>2025</year></pub-date><volume>12</volume><elocation-id>e69230</elocation-id><history><date date-type="received"><day>25</day><month>11</month><year>2024</year></date><date date-type="rev-recd"><day>02</day><month>06</month><year>2025</year></date><date date-type="accepted"><day>03</day><month>06</month><year>2025</year></date></history><copyright-statement>&#x00A9; Changzhen Li, Zhigeng Jin, Fei Wang, Zheqi Zhang, Binbin Liu, Yutao Guo. Originally published in JMIR Rehabilitation and Assistive Technology (<ext-link ext-link-type="uri" xlink:href="https://rehab.jmir.org">https://rehab.jmir.org</ext-link>), 11.8.2025. </copyright-statement><copyright-year>2025</copyright-year><license license-type="open-access" xlink:href="https://creativecommons.org/licenses/by/4.0/"><p>This is an open-access article distributed under the terms of the Creative Commons Attribution License (<ext-link ext-link-type="uri" xlink:href="https://creativecommons.org/licenses/by/4.0/">https://creativecommons.org/licenses/by/4.0/</ext-link>), which permits unrestricted use, distribution, and reproduction in any medium, provided the original work, first published in JMIR Rehabilitation and Assistive Technology, is properly cited. The complete bibliographic information, a link to the original publication on <ext-link ext-link-type="uri" xlink:href="https://rehab.jmir.org/">https://rehab.jmir.org/</ext-link>, as well as this copyright and license information must be included.</p></license><self-uri xlink:type="simple" xlink:href="https://rehab.jmir.org/2025/1/e69230"/><abstract><sec><title>Background</title><p>Venous thromboembolism (VTE) is a common vascular disorder requiring extended anticoagulation therapy postdischarge to reduce recurrence risk. Home rehabilitation management systems that use electronic health records from hospital care provide opportunities for continuous patient monitoring. However, transferring medical data from clinical to home settings raises significant concerns about privacy and security. Conventional methods such as manual data entry, optical character recognition, and dedicated data transmission lines face notable technical and operational challenges.</p></sec><sec><title>Objective</title><p>This study  aims to develop a QR code&#x2013;based security transmission algorithm using Avro and byte pair encoding (BPE). The algorithm supports the secure creation and transfer of out-of-hospital health records by enabling patients to scan QR codes via a dedicated mobile app, ensuring data security and user privacy.</p></sec><sec sec-type="methods"><title>Methods</title><p>Between January and October 2024, 300 hospitalized patients with VTE were recruited at the Sixth Medical Center of the Chinese PLA General Hospital. Post discharge, participants used a home rehabilitation app tailored for VTE management. The QR code&#x2013;based security transmission algorithm  was developed to securely transfer in-hospital electronic health records to the out-of-hospital app. It uses BPE, Avro, and Gzip for optimized data compression and uses ChaCha20 and BLAKE3 for encryption and authentication. Specifically, BPE tokenizes medical text, while Avro serializes JSON (JavaScript Object Notation) objects, contributing to data encryption. A proprietary tokenizer was trained, and compression efficiency was evaluated using a &#x201C;Performance Benchmark Dataset.&#x201D; Comparative analyses were conducted to assess the compression efficiency of JSON serialization methods (Avro and ASN.1 [Abstract Syntax Notation One]), and tokenization algorithms (BPE and unigram).</p></sec><sec sec-type="results"><title>Results</title><p>The dataset consisted of JSON files from 300 patients, averaging 240.1 fields per file (range 89&#x2010;623) and 7095 bytes in size (range 2748&#x2010;17,425 bytes). Using the BPE + Avro + Gzip algorithm, the average file size was reduced to 1048 bytes, achieving a compression ratio of 6.67. This was 1.82 times more efficient than traditional Gzip compression (average file size: 1907 bytes; compression ratio: 3.66; <italic>P</italic>&#x003C;.001). For Chinese medical text tokenization, BPE outperformed unigram with a compression ratio of 4.68 versus 4.55 (<italic>P</italic>&#x003C;.001). Avro and ASN.1 demonstrated comparable compression ratios of 2.57 and 2.59, respectively, when used alone (<italic>P</italic>=.30). However, Avro combined with BPE and Gzip significantly outperformed ASN.1, achieving compression ratios of 6.67 versus 5.21 (<italic>P</italic>&#x003C;.001). Additionally, 84.7% (254/300)  of patients needed to scan only 1 QR code, requiring an average of 3.1 seconds.</p></sec><sec sec-type="conclusions"><title>Conclusions</title><p>The QR code&#x2013;based security transmission algorithm using Avro and BPE  efficiently compresses and transmits data in an encrypted manner and authenticates the identity of the scanning users, ensuring the privacy and security of medical data. Delivered as a software development kit, the algorithm offers straightforward implementation and usability, supporting its broad adoption across various applications.</p></sec></abstract><kwd-group><kwd>venous thromboembolism</kwd><kwd>home rehabilitation</kwd><kwd>electronic health record</kwd><kwd>QR code</kwd><kwd>secure data transmission</kwd><kwd>data compression</kwd><kwd>authenticated encryption</kwd><kwd>tokenization algorithm</kwd><kwd>data serialization</kwd><kwd>health information exchange</kwd><kwd>Avro</kwd></kwd-group></article-meta></front><body><sec id="s1" sec-type="intro"><title>Introduction</title><p>Venous thromboembolism (VTE) is a leading cause of death and disability worldwide [<xref ref-type="bibr" rid="ref1">1</xref>,<xref ref-type="bibr" rid="ref2">2</xref>]. Patients with VTE require prolonged anticoagulant therapy after discharge to prevent the recurrence of thrombosis, and extended home rehabilitation management effectively reduces the incidence of VTE events [<xref ref-type="bibr" rid="ref3">3</xref>]. To enhance the efficiency of home rehabilitation management for patients with VTE, we developed a home rehabilitation mobile health app to assist doctors in managing patients&#x2019; recovery outside the hospital. According to guidelines and consensus, home anticoagulation management for VTE is complex, requiring not only professional medical knowledge but also accurate patient information, such as coagulation indicators, biochemical markers (eg, liver, kidney, and heart function), underlying diseases, and bleeding risks [<xref ref-type="bibr" rid="ref4">4</xref>]. Therefore, establishing comprehensive health records is the first step in effective home rehabilitation management. However, due to the multitude of information fields, manual entry by patients is difficult and leads to low user engagement.</p><p>Using patients&#x2019; electronic health records (EHRs) during hospitalization can improve the success rate of out-of-hospital health record creation [<xref ref-type="bibr" rid="ref5">5</xref>]. Hospital Information Systems are typically deployed within internal (intranet) environments, while out-of-hospital services operate on public internet infrastructure. Industry standards such as HL7 Fast Healthcare Interoperability Resources have been proposed to enable cross-institutional data exchange. However, in practice, the deployment of Fast Healthcare Interoperability Resources or other application programming interface&#x2013;based approaches faces multiple constraints. Their implementation depends on dedicated network connections via front-end machines or network gateways. This implementation approach introduces the following significant issues: (1) high hardware deployment and maintenance costs; (2) increased exposure of hospital intranets, elevating security risks; (3) low efficiency of cross-departmental collaboration, leading to slow service response; and (4) complete incompatibility with closed-network environments. Consequently, such protocols have not been widely adopted in domestic settings.</p><p>Some alternative approaches attempt to use optical character recognition on printed reports. However, the recognition accuracy (75.86%&#x2010;92.46%) is highly affected by scan quality and layout [<xref ref-type="bibr" rid="ref6">6</xref>], generally requiring manual validation [<xref ref-type="bibr" rid="ref7">7</xref>], making it difficult to scale for widespread use. Other solutions use QR code&#x2013;based transmission [<xref ref-type="bibr" rid="ref8">8</xref>-<xref ref-type="bibr" rid="ref14">14</xref>], either transmitting in plaintext&#x2014;posing leakage risks&#x2014;or relying solely on traditional compression algorithms such as Gzip, which offer limited compression efficiency and fail to make full use of QR code capacity. In real-world scenarios, a single QR code typically holds only 500&#x2010;1500 bytes, further highlighting the importance of efficient encoding. Additionally, certain implementations require specialized screens and camera equipment, failing to balance medical privacy protection with end user convenience.</p><p>To address these challenges in cross-network medical information exchange, this study proposed a secure QR code&#x2013;based transmission algorithm. The algorithm encrypts, compresses, and partitions JSON-formatted (JavaScript Object Notation) health records into multiple QR codes. Patients can safely, easily, and accurately complete the creation and transfer of out-of-hospital records simply by scanning the QR codes using a dedicated mobile health app.</p><p>To comprehensively demonstrate the advantages of the proposed solution in terms of recognition accuracy, data privacy, network exposure risk, and deployment cost, <xref ref-type="table" rid="table1">Table 1</xref> presents a comparative analysis between this method and existing typical solutions (including optical character recognition&#x2013;based solutions and dedicated channel solutions) in cross-network scenarios.</p><p>In this study, we have designed the algorithm framework in detail, described the compression and encryption processes, and constructed a training dataset. Finally, we validated the algorithm&#x2019;s performance using real-world clinical data.</p><table-wrap id="t1" position="float"><label>Table 1.</label><caption><p>Comparison of this proposal and existing medical information exchange solutions in cross-network scenarios.</p></caption><table id="table1" frame="hsides" rules="groups"><thead><tr><td align="left" valign="bottom">Dimension</td><td align="left" valign="bottom">OCR-based<sup><xref ref-type="table-fn" rid="table1fn1">a</xref></sup> solution</td><td align="left" valign="bottom">Dedicated channel (FHIR<sup><xref ref-type="table-fn" rid="table1fn2">b</xref></sup> or other API<sup><xref ref-type="table-fn" rid="table1fn3">c</xref></sup>)</td><td align="left" valign="bottom">Proposed scheme</td></tr></thead><tbody><tr><td align="left" valign="top">Accuracy</td><td align="left" valign="top">Low. Affected by image quality and layout; accuracy ranges from 75.86% to 92.46%, manual verification required</td><td align="left" valign="top">High. System-level integration with clear structure and minimal errors</td><td align="left" valign="top">High. Structured data embedded in QR code; stable decoding, unaffected by image quality</td></tr><tr><td align="left" valign="top">Data privacy</td><td align="left" valign="top">Low. Plaintext image transmission with high leakage risk</td><td align="left" valign="top">Medium-high. Relies on physical isolation and encryption; security depends on implementation</td><td align="left" valign="top">High. End-to-end encryption and authentication; supports zero-trust access</td></tr><tr><td align="left" valign="top">Network exposure risk</td><td align="left" valign="top">None. Typically offline image transmission, no network exposure</td><td align="left" valign="top">High. Requires open interfaces; increases attack surface and risk of intrusion</td><td align="left" valign="top">None. Offline QR transmission; no exposed network entry</td></tr><tr><td align="left" valign="top">Deployment cost</td><td align="left" valign="top">Low. No system changes needed, but high manual verification overhead</td><td align="left" valign="top">High. Requires gateway systems and dedicated channels; complex implementation and maintenance</td><td align="left" valign="top">Low. Lightweight SDK integration; high compatibility and easy deployment</td></tr></tbody></table><table-wrap-foot><fn id="table1fn1"><p><sup>a</sup>OCR: optical character recognition.</p></fn><fn id="table1fn2"><p><sup>b</sup>FHIR: Fast Healthcare Interoperability Resources.</p></fn><fn id="table1fn3"><p><sup>c</sup>API: application programming interface.</p></fn></table-wrap-foot></table-wrap></sec><sec id="s2" sec-type="methods"><title>Methods</title><sec id="s2-1"><title>Algorithm Design</title><sec id="s2-1-1"><title>Overview</title><p>We propose a QR code&#x2013;based secure transmission algorithm using Avro and  byte pair encoding (BPE), which supports the secure creation and transfer of out-of-hospital health records for inpatients after discharge. The application scenario of the QR code&#x2013;based secure transmission algorithm using Avro and  byte pair encoding (QRST-AB), as illustrated in <xref ref-type="fig" rid="figure1">Figure 1</xref>, involves the following workflow: (1) health care providers access the patient&#x2019;s EHR; (2) the encoder module applies the algorithm to generate one or more QR codes, which are embedded into a printed rehabilitation report and physically delivered to the patient; (3) the patient scans the QR code using a mobile app; (4) the app uploads the QR code content to a server-side module; and (5) the decoder performs verification and decryption, with results presented to the user. In this process, the encoder operates within the hospital intranet (trusted domain), whereas the decoder resides in the public internet (untrusted domain), enabling secure cross-network transmission.</p><fig position="float" id="figure1"><label>Figure 1.</label><caption><p>Application scenario of the proposed QRST-AB, illustrating the data flow of rehabilitation reports in cross-network environments. EHR: electronic health record; QRST-AB: QR code&#x2013;based security transmission algorithm using Avro and byte pair encoding.</p></caption><graphic alt-version="no" mimetype="image" position="float" xlink:type="simple" xlink:href="rehab_v12i1e69230_fig01.png"/></fig></sec><sec id="s2-1-2"><title>Algorithm Framework</title><p>The QRST-AB algorithm efficiently compresses and encrypts patient medical records in JSON format, divides them into multiple QR codes for secure transmission, and authenticates users through encrypted digital fingerprints, ensuring privacy and data security. The algorithm consists of 2 components: an encoder and a decoder. The encoder and decoder periodically synchronize the secret, schema, and tokenizer through offline methods. The details of the framework and processing flow of the QRST-AB algorithm are shown in <xref ref-type="fig" rid="figure2">Figure 2</xref> and <xref ref-type="other" rid="box1">Textbox 1</xref>.</p><fig position="float" id="figure2"><label>Figure 2.</label><caption><p>The framework and processing flow of the QRST-AB, illustrating how patient medical records are compressed, encrypted, split into QR codes, and securely transmitted and authenticated. BPE: byte pair encoding; JSON: JavaScript Object Notation; QRST-AB: QR code&#x2013;based security transmission algorithm using Avro and byte pair encoding.</p></caption><graphic alt-version="no" mimetype="image" position="float" xlink:type="simple" xlink:href="rehab_v12i1e69230_fig02.png"/></fig><boxed-text id="box1"><title> <bold>Textbox 1</bold>. The processing flow of the QR code&#x2013;based secure transmission algorithm using Avro and byte pair encoding (BPE).</title><list list-type="order"><list-item><p>The patient medical record is deidentified and separated into 2 parts: the patient identifier and the limited data set. The data is processed through a multistage encoding pipeline consisting of BPE, Avro, Gzip, and ChaCha20, to generate a compressed and encrypted data stream, which serves as the message body. The data stream is then divided into multiple subpackets (body1...N) based on the predefined maximum size of the QR code. BPE tokenizes and vectorizes the texts in the medical record; Apache Avro serializes the JSON (JavaScript Object Notation) objects into a binary format; Gzip compresses the binary data stream; Chacha20 encrypts the compressed data to ciphertext.</p></list-item><list-item><p>Every subpacket and patient identifier is hashed using the cryptographic function (BLAKE3) to generate a digital fingerprint and is encapsulated within the message header. This header is combined with body1..N to form multiple QR codes.</p></list-item><list-item><p>The QR codes are printed on A4 paper as part of the home rehabilitation report and handed to the patient, or displayed directly on a hospital computer screen for the patient to view.</p></list-item><list-item><p>The QR codes are scanned by a dedicated app on the smartphone, and the scanned content is uploaded to the server (decoder).</p></list-item><list-item><p>Upon receiving the QR code content, the decoder first generates a new digital fingerprint based on the logged-in user&#x2019;s identifier.</p></list-item><list-item><p>Next, the calculated fingerprint is authenticated against the digital fingerprint in the QR codes, and only if the authentication passes, the decoding process is executed.</p></list-item><list-item><p>In the presence of multiple QR codes, a merging operation is conducted to form a complete information body, which is then converted back into a JSON object through the decoding process. The specific steps include the following: Chacha20 decrypts the ciphertext; Gzip decompresses the binary data; Avro deserializes the binary data back into a JSON object; and BPE restores the string representation of the JSON object from its numerical form.</p></list-item></list></boxed-text></sec><sec id="s2-1-3"><title>Protocol Package Format</title><p>Within each QR code, the data content is divided into 2 parts: the message header and the message body. The header consists of 5 fields: version, fingerprint, transaction, count, and seq. The specific definitions of each field are shown in <xref ref-type="table" rid="table2">Table 2</xref>.</p><table-wrap id="t2" position="float"><label>Table 2.</label><caption><p>Definition of the message header fields in each QR code of the QR code&#x2013;based secure transmission algorithm using Avro and byte pair encoding.</p></caption><table id="table2" frame="hsides" rules="groups"><thead><tr><td align="left" valign="bottom">Field</td><td align="left" valign="bottom">Definition</td><td align="left" valign="bottom">Value formula</td><td align="left" valign="bottom">Description</td><td align="left" valign="bottom">Len (bits)</td></tr></thead><tbody><tr><td align="left" valign="top">Version</td><td align="left" valign="top">Version identifier, a predetermined fixed value</td><td align="left" valign="top"><inline-formula><mml:math id="ieqn1"><mml:mstyle displaystyle="true" scriptlevel="0"><mml:mrow><mml:mi>C</mml:mi><mml:mi>o</mml:mi><mml:mi>n</mml:mi><mml:mi>s</mml:mi><mml:mi>t</mml:mi><mml:mi>a</mml:mi><mml:mi>n</mml:mi><mml:mi>t</mml:mi></mml:mrow></mml:mstyle></mml:math></inline-formula></td><td align="left" valign="top">Used for compatibility with historical versions after updates.</td><td align="char" char="." valign="top">8</td></tr><tr><td align="left" valign="top">Fingerprint</td><td align="left" valign="top">The message fingerprint</td><td align="left" valign="top"><inline-formula><mml:math id="ieqn2"><mml:mstyle displaystyle="true" scriptlevel="0"><mml:mrow><mml:mi>H</mml:mi><mml:mo stretchy="false">(</mml:mo><mml:mi>M</mml:mi><mml:mi>e</mml:mi><mml:mi>s</mml:mi><mml:mi>s</mml:mi><mml:mi>a</mml:mi><mml:mi>g</mml:mi><mml:mi>e</mml:mi><mml:mrow><mml:mo stretchy="false">|</mml:mo></mml:mrow><mml:mrow><mml:mo stretchy="false">|</mml:mo></mml:mrow><mml:mi>I</mml:mi><mml:mi>d</mml:mi><mml:mi>e</mml:mi><mml:mi>n</mml:mi><mml:mi>t</mml:mi><mml:mi>i</mml:mi><mml:mi>t</mml:mi><mml:mi>y</mml:mi><mml:mi>I</mml:mi><mml:mi>D</mml:mi><mml:mo stretchy="false">)</mml:mo><mml:mrow><mml:mo stretchy="false">|</mml:mo></mml:mrow><mml:mo stretchy="false">[</mml:mo><mml:mn>1</mml:mn><mml:mo>:</mml:mo><mml:mi>n</mml:mi><mml:mo stretchy="false">]</mml:mo></mml:mrow></mml:mstyle></mml:math></inline-formula></td><td align="left" valign="top">H() represents a cryptographic hash function. n represents the length of the digest in bytes, ranging from 1 to 64.</td><td align="char" char="." valign="top">16</td></tr><tr><td align="left" valign="top">Transaction</td><td align="left" valign="top">Transaction ID for the current transmission</td><td align="left" valign="top"><inline-formula><mml:math id="ieqn3"><mml:mstyle displaystyle="true" scriptlevel="0"><mml:mrow><mml:msub><mml:mi>T</mml:mi><mml:mrow><mml:mi>a</mml:mi><mml:mi>d</mml:mi><mml:mi>m</mml:mi></mml:mrow></mml:msub><mml:mo>+</mml:mo><mml:msub><mml:mi>T</mml:mi><mml:mrow><mml:mi>t</mml:mi><mml:mi>a</mml:mi><mml:mi>g</mml:mi></mml:mrow></mml:msub></mml:mrow></mml:mstyle></mml:math></inline-formula></td><td align="left" valign="top"><italic>T<sub>adm</sub></italic> represents the patient&#x2019;s admission date, and <italic>T<sub>tag</sub></italic> is a predefined date for a specific tag, such as &#x201D;2020-01-01&#x201D;.</td><td align="char" char="." valign="top">16</td></tr><tr><td align="left" valign="top">Count</td><td align="left" valign="top">Number of QR codes the record is split into</td><td align="left" valign="top"><inline-formula><mml:math id="ieqn4"><mml:mstyle displaystyle="true" scriptlevel="0"><mml:mrow><mml:mrow><mml:mo>[</mml:mo><mml:mfrac><mml:msub><mml:mi>L</mml:mi><mml:mrow><mml:mi>b</mml:mi><mml:mi>o</mml:mi><mml:mi>d</mml:mi><mml:mi>y</mml:mi></mml:mrow></mml:msub><mml:mrow><mml:msub><mml:mi>L</mml:mi><mml:mrow><mml:mi>q</mml:mi><mml:mi>r</mml:mi></mml:mrow></mml:msub><mml:mo>&#x2212;</mml:mo><mml:msub><mml:mi>L</mml:mi><mml:mrow><mml:mi>h</mml:mi><mml:mi>e</mml:mi><mml:mi>a</mml:mi><mml:mi>d</mml:mi><mml:mi>e</mml:mi><mml:mi>r</mml:mi></mml:mrow></mml:msub></mml:mrow></mml:mfrac><mml:mo>]</mml:mo></mml:mrow></mml:mrow></mml:mstyle></mml:math></inline-formula></td><td align="left" valign="top"><italic>L<sub>body</sub></italic> is the total length of the compressed and encrypted body, <italic>L<sub>qr </sub></italic>is the maximum length of a QR code, and <italic>L<sub>header</sub></italic> is the fixed length of the message header</td><td align="char" char="." valign="top">4</td></tr><tr><td align="left" valign="top">Seq</td><td align="left" valign="top">Sequence number of each QR code</td><td align="left" valign="top"><inline-formula><mml:math id="ieqn5"><mml:mstyle displaystyle="true" scriptlevel="0"><mml:mrow><mml:msub><mml:mi>S</mml:mi><mml:mrow><mml:mi>p</mml:mi><mml:mi>r</mml:mi><mml:mi>e</mml:mi><mml:mi>v</mml:mi></mml:mrow></mml:msub><mml:mo>+</mml:mo><mml:mn>1</mml:mn></mml:mrow></mml:mstyle></mml:math></inline-formula></td><td align="left" valign="top">Starts from 0 and increments by 1.</td><td align="char" char="." valign="top">4</td></tr></tbody></table></table-wrap></sec><sec id="s2-1-4"><title>Response Status Codes</title><p>When users receive information contained in the QR codes, the operation is simple&#x2014;scanning each QR code sequentially using the dedicated app. The program provides friendly prompts based on the parsing results. The explanations for different response status codes are listed in <xref ref-type="table" rid="table3">Table 3</xref>.</p><table-wrap id="t3" position="float"><label>Table 3.</label><caption><p>Response status codes and their explanations for users scanning QR codes with the dedicated app in the algorithm.</p></caption><table id="table3" frame="hsides" rules="groups"><thead><tr><td align="left" valign="bottom">Result code</td><td align="left" valign="bottom">Explanation</td></tr></thead><tbody><tr><td align="left" valign="top">finished</td><td align="left" valign="top">All QR codes have been successfully scanned, and decoding is complete.</td></tr><tr><td align="left" valign="top">waiting</td><td align="left" valign="top">Current QR code has been successfully scanned, please continue scanning others.</td></tr><tr><td align="left" valign="top">duplicated</td><td align="left" valign="top">Current QR code has been scanned repeatedly, please continue scanning others.</td></tr><tr><td align="left" valign="top">tag_error</td><td align="left" valign="top">Tag error, not a QR code format supported by the program.</td></tr><tr><td align="left" valign="top">auth_failed</td><td align="left" valign="top">Authentication failed, please verify authorization.</td></tr><tr><td align="left" valign="top">exception</td><td align="left" valign="top">Other exceptions, please contact technical support for assistance.</td></tr></tbody></table></table-wrap></sec><sec id="s2-1-5"><title>Algorithm Scalability</title><p>In this algorithm, each version can define the length of individual fields in the message header. Different versions correspond to different encoders, each with its own secret, schema, and tokenizer. On the decoder side, a multiversion manager enables support for various encoders, allowing compatibility with multiple data centers.</p><p>By leveraging the flexibility of the flexible structure of Avro Schema, it is easy to extend the system to support different JSON formats. In addition to the VTE electronic medical records discussed in this paper, the algorithm can be adapted to other disease types and even to nonmedical application scenarios, demonstrating strong scalability and versatility.</p></sec></sec><sec id="s2-2"><title>Data Compression</title><sec id="s2-2-1"><title>JSON Serialization</title><p>JSON serialization refers to the process of converting data structures or objects into a string or binary form for the purpose of storage or transmission. The opposite process is deserialization, which restores the serialized data back to its original structure or object.</p><p>Within serialization methods, common types include schema-driven serialization and schema-less serialization. The former includes methods such as Abstract Syntax Notation One (ASN.1) using Packed Encoding Rules, Apache Avro, Microsoft Bond, Cap&#x2019;n Proto, FlatBuffers, Protocol Buffers, and Apache Thrift. These methods rely on predefined schemas to ensure the structure and type of data, typically achieving higher space efficiency. Previous studies have shown that ASN.1 and Apache Avro perform particularly well in compression efficiency [<xref ref-type="bibr" rid="ref15">15</xref>,<xref ref-type="bibr" rid="ref16">16</xref>].</p><p>After comparative validation and in combination with the Gzip method, we found that Apache Avro outperforms ASN.1 in terms of compression efficiency, and Avro&#x2019;s syntax definition is more compatible. Therefore, in our algorithm, we adopted Apache Avro as the serialization method.</p></sec><sec id="s2-2-2"><title>Tokenization</title><p>Tokenization is a technique that converts text sequences into numerical sequences, serving as a foundational step in natural language processing tasks. It bridges the gap between raw text and language models. Existing tokenization methods, such as BPE, originate from the field of data compression. Some scholars believe that BPE is effective because it compresses text into fewer tokens, allowing the tokenizer to be trained more efficiently on specific datasets [<xref ref-type="bibr" rid="ref17">17</xref>-<xref ref-type="bibr" rid="ref19">19</xref>].</p><p>The prevalent tokenization techniques in the field of natural language processing include BPE, unigram, and WordPiece. These techniques are adopted by different large language models: BPE is used by the LLaMA series and GPT series, while unigram and WordPiece are used by bidirectional encoder representations from transformers and its variants. Given that our study focuses on Chinese medical texts, which lack explicit delimiters and cannot directly use WordPiece, we have selected BPE and unigram for comparing tokenization compression efficiency.</p><p>In our dataset, we compared the compression efficacy of 2 tokenization methods: BPE and the unigram. Experimental results indicated that the BPE method demonstrated superior compression performance within the algorithmic framework proposed in this study. Therefore, we ultimately selected the BPE tokenization strategy [<xref ref-type="bibr" rid="ref20">20</xref>,<xref ref-type="bibr" rid="ref21">21</xref>].</p></sec><sec id="s2-2-3"><title>Data Compression Process</title><p>To demonstrate the proposed data compression approach in practice, we present the following illustrative example using a simplified JSON record of patient information. The JSON file contains 4 fields: admissionTime, hospitalDays, inpatientDept, and diagnosis, as shown in <xref ref-type="fig" rid="figure3">Figure 3</xref>.</p><fig position="float" id="figure3"><label>Figure 3.</label><caption><p>Explanation of the compression process using BPE and Avro, where BPE tokenizes using the tokenizer and Avro serializes JSON based on schema. BPE: byte pair encoding; JSON: JavaScript Object Notation.</p></caption><graphic alt-version="no" mimetype="image" position="float" xlink:type="simple" xlink:href="rehab_v12i1e69230_fig03.png"/></fig><p>Tokenization is performed on 2 free-text fields: &#x201C;inpatientDept&#x201D; and &#x201C;diagnosis&#x201D;. For example, &#x201C;Type 2 diabetes mellitus&#x201D; is split by the tokenizer into &#x201C;Type &#x2581;2 &#x2581;diabetes &#x2581;mellitus&#x201D; and further converted into a list of numerical values corresponding to tokens [10,302, 6500, and 2233]. It is noteworthy that due to differences in training corpora and hyper-parameter configurations, different tokenizers may generate varying segmentation results and tokens. This transformation not only contributes to compression but also provides a degree of information hiding, as the original text is not human-readable without the corresponding tokenizer.</p><p>Furthermore, Apache Avro is used to transform the JSON into a binary format to complete the serialization operation. As shown in the figure, the blue-marked bytes represent the value of the admission date &#x201C;2022-01-16,&#x201D; the red-marked bytes represent the length of hospital stay &#x201C;11&#x201D; (using variable-length zig-zag encoding, 11(integer)=22(zig-zag)=0&#x00D7;16(octet)), the green-marked bytes represent the hospital department, and the orange-marked bytes represent the 3 disease names of the diagnosis.</p><p>It is important to note that the Avro-encoded binary data does not contain type information or field names, which significantly reduces the size of serialized data. However, this also means that the receiving end must use the same schema as the sending end to correctly read the Avro data. In our algorithm, the Avro schema is not transmitted via the QR code, making it particularly difficult for third parties without the schema to read the Avro data.</p><p>In summary, the exclusive tokenizer and Avro schema definitions held by the server and client provide a robust defense for data security. In the example mentioned above, the original JSON data occupies 185 bytes, and after processing with the BPE tokenization and Avro serialization, the space is reduced to 38 bytes, with a compression ratio reaching 4.87, thereby demonstrating its excellent compression performance.</p></sec></sec><sec id="s2-3"><title>Encryption and Authentication</title><p>Modern authenticated encryption adopts the authenticated encryption with associated data approach. National Institute of Standards and Technology recommends standard algorithms such as AES-GCM, AES-CBC+HMAC, and ChaCha20-Poly1305, which ensure both data confidentiality and message integrity [<xref ref-type="bibr" rid="ref22">22</xref>]. However, these methods require additional overhead, including a 16-byte authentication tag and 8/12/24-byte Nonce or IV fields. AES-CBC further requires 16-byte block alignment, leading to significant message expansion that limits transmission efficiency in space-constrained QR codes.</p><p>This work adopts a symmetric authenticated encryption with associated data scheme, where both encoder (sender) and decoder (receiver) share a 256-bit secret key. ChaCha20 is used for encryption and BLAKE3 for authentication, resulting in only 2 bytes of overhead. ChaCha20 is an IETF-standardized stream cipher that uses a 256-bit key and 64-bit nonce, producing a keystream via 20 rounds of permutation. It does not require alignment, keeps the same ciphertext size, and works fast in software even without special hardware, making it suitable for mobile and low-latency scenarios. BLAKE3 is an advanced cryptographic hash function with higher performance than Secure Hash Algorithm 256 bit and Secure Hash Algorithm 512-bit. It supports variable output lengths, allowing flexible adaptation to different application requirements [<xref ref-type="bibr" rid="ref23">23</xref>].</p><p>Compared with other transmission methods, QR codes printed on paper are harder to steal and are difficult to brute-force due to their low propagation efficiency. In our proposed algorithm, BLAKE3 is configured to produce a 2-byte hash, resulting in a brute-force probability of 1/(2^16), which is acceptable for practical authentication. ChaCha20 encryption, with a 256-bit key, offers a brute-force resistance of 1/(2^256), meeting the National Institute of Standards and Technology Level 3 security standard.</p></sec><sec id="s2-4"><title>Dataset</title><p>During the algorithm development phase, this study constructed a &#x201C;Chinese Medical Text Dataset&#x201D; for training a proprietary tokenizer. The dataset was sourced from the EHRs of approximately 80,000 historical patients from a hospital. The extracted fields included department names, surgical procedures, medication names, disease diagnoses, indicator names, and imaging examinations, among others, and were saved in text. The access control layer integrates time-based one-time password using a 6-digit random code, reducing the probability of unauthorized access to below $10^{-6}$.format.</p><p>In the algorithm validation phase, the study built a &#x201C;Performance Benchmark Dataset&#x201D; to verify the overall compression performance of the algorithm. This dataset was derived from the EHRs of 300 inpatients with VTE at the Sixth Medical Center of the Chinese PLA General Hospital between January and October 2024 and was saved in JSON format. These medical records comprehensively documented the patients&#x2019; basic information, surgical history, diseases, radiological examination results, laboratory test data, risk assessments, and medication upon discharge. Specifically, the basic information included the patient&#x2019;s age, gender, height, weight, department, and length of hospital stay; surgical details recorded the name and timing of the surgery; disease classification encompassed the name and category of the diseases; radiological examination results detailed the date of the examination, the name of the procedure, the body part examined, and the conclusions; laboratory test data listed the name of the indicator, its value, unit, risk indication, and the normal range; medication upon discharge recorded the name of the medication, the daily dosage, and the frequency of administration; and the risk assessment section included the assessment scale, risk level, and risk factors.</p></sec><sec id="s2-5"><title>Performance Metrics and Validation Methods</title><p>The compression ratio in this study is defined by the formula: CompressionRate=OrigDataBytes/CompressedDataBytes.</p><p>In terms of algorithm design, we strive not only to optimize overall performance but also to ensure compatibility with various worst-case scenarios. Therefore, when analyzing the byte size of JSON files and the number of fields in the &#x201C;Performance Benchmark Dataset,&#x201D; we used statistical methods for calculating the mean, as well as the minimum and maximum extreme values.</p><p>We compared the efficiency of our compression algorithm with the traditional Gzip compression algorithm. In addition, we compared the efficiency of the 2 most effective JSON serialization technologies in real patient datasets, as well as the compression efficiency of 2 mainstream tokenization algorithms on specific datasets. All of the above comparisons used mean statistical methods and used <italic>t</italic> tests to calculate <italic>P</italic> values.</p><p>Furthermore, we used cumulative distribution function graphs to analyze the distribution of the number of QR codes scanned by patients and tested the time required for creating patient records via QR codes using various models of mobile phones.</p></sec><sec id="s2-6"><title>Ethical Considerations</title><p>This cohort study was approved by the Medical Ethics Committee of the Sixth Medical Center of Chinese People&#x2019;s Liberation Army General Hospital (approval number: HZKY-PJ-2022-21). A waiver of informed consent was granted because this was a retrospective, data-only study, and all data were fully deidentified in accordance with institutional and national guidelines. Patients using the home rehabilitation service app provided informed consent at registration. QR code information constitutes a deidentified limited dataset with no personally identifiable information included; personally identifiable information is used only locally for authentication and is never transmitted. Encrypted QR codes are accessible only by authorized patients, ensuring data security and access control. All procedures complied with institutional policies, the Declaration of Helsinki, and relevant data protection regulations.</p></sec></sec><sec id="s3" sec-type="results"><title>Results</title><sec id="s3-1"><title>Statistical Characteristics of the Performance Benchmark Dataset</title><p>We conducted a detailed statistical analysis of the fields and their storage sizes in the JSON files of the &#x201C;Performance Benchmark Dataset,&#x201D; which is summarized in <xref ref-type="table" rid="table4">Table 4</xref>. The average number of fields in the raw JSON files was 240.1, ranging from 89 to 623, with an average storage byte size of 7095, varying from 2748 to 17,425. In terms of the number of fields, laboratory test data constituted the highest proportion at 67.5%, followed by risk assessment data at 9.6%. Regarding storage byte size, laboratory test data also represented the largest proportion at 61.4%, with radiological examination result data following at 14.4%.</p><table-wrap id="t4" position="float"><label>Table 4.</label><caption><p>Characteristics of the performance benchmark dataset in this study.</p></caption><table id="table4" frame="hsides" rules="groups"><thead><tr><td align="left" valign="bottom">Patient (n=300)</td><td align="left" valign="bottom">Field (num), mean (min-max)</td><td align="left" valign="bottom">Field (%)</td><td align="left" valign="bottom">Size (bytes), mean (min-max)</td><td align="left" valign="bottom">Size (%)</td></tr></thead><tbody><tr><td align="left" valign="top">Total</td><td align="left" valign="top">240.1 (89-623)</td><td align="left" valign="top">&#x2014;<sup><xref ref-type="table-fn" rid="table4fn1">a</xref></sup></td><td align="left" valign="top">7095 (2748-17,425)</td><td align="left" valign="top">&#x2014;</td></tr><tr><td align="left" valign="top">&#x2003;Base</td><td align="left" valign="top">10 (10-10)</td><td align="left" valign="top">4.2</td><td align="left" valign="top">261 (242-270)</td><td align="left" valign="top">3.7</td></tr><tr><td align="left" valign="top">&#x2003;Operate</td><td align="left" valign="top">1.1 (0-32)</td><td align="left" valign="top">0.5</td><td align="left" valign="top">77 (23-1269)</td><td align="left" valign="top">1.1</td></tr><tr><td align="left" valign="top">&#x2003;Image</td><td align="left" valign="top">17.1 (4-56)</td><td align="left" valign="top">7.1</td><td align="left" valign="top">1022 (153-3944)</td><td align="left" valign="top">14.4</td></tr><tr><td align="left" valign="top">&#x2003;Lab</td><td align="left" valign="top">162 (46-496)</td><td align="left" valign="top">67.5</td><td align="left" valign="top">4354 (1269-13,365)</td><td align="left" valign="top">61.4</td></tr><tr><td align="left" valign="top">&#x2003;Ass</td><td align="left" valign="top">23 (10-35)</td><td align="left" valign="top">9.6</td><td align="left" valign="top">584 (212-1181)</td><td align="left" valign="top">8.2</td></tr><tr><td align="left" valign="top">&#x2003;Disease</td><td align="left" valign="top">12.2 (2-48)</td><td align="left" valign="top">5.1</td><td align="left" valign="top">419 (80-1537)</td><td align="left" valign="top">5.9</td></tr><tr><td align="left" valign="top">&#x2003;Drug</td><td align="left" valign="top">14.6 (0-85)</td><td align="left" valign="top">6.1</td><td align="left" valign="top">379 (28-2042)</td><td align="left" valign="top">5.3</td></tr></tbody></table><table-wrap-foot><fn id="table4fn1"><p><sup>a</sup>Not applicable.</p></fn></table-wrap-foot></table-wrap></sec><sec id="s3-2"><title>Overall Compression Efficiency Analysis</title><p>The average size of the original JSON data was 7095 bytes. After compression using the traditional Gzip algorithm, the average size was reduced to 1907 bytes, with an average compression ratio of 3.66. By applying our developed BPE+Avro+Gzip combined compression algorithm, the average size was further reduced to 1048 bytes, improving the average compression ratio to 6.67. The compression efficiency of our algorithm was 1.82 times that of the traditional Gzip algorithm (<italic>P</italic>&#x003C;.001). The scatter plot distribution in <xref ref-type="fig" rid="figure4">Figure 4</xref> shows that the larger the original JSON file, the more significant the compression effect.</p><fig position="float" id="figure4"><label>Figure 4.</label><caption><p>Comparison of the compression efficiency between our Avro + BPE + Gzip combined method and traditional Gzip in the performance benchmark dataset. BPE: byte pair encoding; JSON: JavaScript Object Notation.</p></caption><graphic alt-version="no" mimetype="image" position="float" xlink:type="simple" xlink:href="rehab_v12i1e69230_fig04.png"/></fig></sec><sec id="s3-3"><title>Comparative Analysis of Avro and ASN.1</title><p>According to previous research, ASN.1 and Avro demonstrated the best compression performance among JSON serialization technologies. For our &#x201C;Performance Benchmark Dataset,&#x201D; Avro achieved a compression ratio of 2.59, while ASN.1 had a compression ratio of 2.57, with no significant difference (<italic>P</italic>=.30). However, Avro significantly outperformed ASN.1 when combined with BPE or Gzip methods (<xref ref-type="table" rid="table5">Table 5</xref> and <xref ref-type="fig" rid="figure5">Figure 5</xref>). Specifically, the compression ratio for Avro+Gzip reached 4.72, compared with 4.12 for ASN.1+ Gzip (<italic>P</italic>&#x003C;.001). Further, when the BPE method was introduced, the compression ratio for Avro+BPE+Gzip was 6.67, while for ASN.1+BPE+Gzip it was 5.21 (<italic>P</italic>&#x003C;.001).</p><table-wrap id="t5" position="float"><label>Table 5.</label><caption><p>Comparison of compression ratios between Avro and Abstract Syntax Notation One (ASN.1) in different combinations with byte pair encoding (BPE) and Gzip.</p></caption><table id="table5" frame="hsides" rules="groups"><thead><tr><td align="left" valign="bottom">Name</td><td align="left" valign="bottom">Median (IQR)</td><td align="left" valign="bottom">Min</td><td align="left" valign="bottom">Max</td></tr></thead><tbody><tr><td align="left" valign="top">ASN.1<sup><xref ref-type="table-fn" rid="table5fn1">a</xref></sup></td><td align="left" valign="top">2.59 (2.45-2.77)</td><td align="left" valign="top">1.78</td><td align="left" valign="top">3.09</td></tr><tr><td align="left" valign="top">Avro</td><td align="left" valign="top">2.57 (2.43-2.75)</td><td align="left" valign="top">1.78</td><td align="left" valign="top">3.07</td></tr><tr><td align="left" valign="top">ASN.1+Gzip<sup><xref ref-type="table-fn" rid="table5fn2">b</xref></sup></td><td align="left" valign="top">4.12 (3.63-4.46)</td><td align="left" valign="top">2.92</td><td align="left" valign="top">6.92</td></tr><tr><td align="left" valign="top">Avro+Gzip</td><td align="left" valign="top">4.72 (4.18-5.15)</td><td align="left" valign="top">3.35</td><td align="left" valign="top">7.73</td></tr><tr><td align="left" valign="top">ASN.1+BPE<sup><xref ref-type="table-fn" rid="table5fn3">c</xref></sup></td><td align="left" valign="top">5.01 (4.83-5.23)</td><td align="left" valign="top">4.07</td><td align="left" valign="top">5.81</td></tr><tr><td align="left" valign="top">Avro+BPE</td><td align="left" valign="top">5.14 (4.94-5.37)</td><td align="left" valign="top">4.16</td><td align="left" valign="top">5.99</td></tr><tr><td align="left" valign="top">ASN.1+BPE+Gzip<sup><xref ref-type="table-fn" rid="table5fn4">d</xref></sup></td><td align="left" valign="top">5.21 (4.95-5.45)</td><td align="left" valign="top">4.12</td><td align="left" valign="top">6.62</td></tr><tr><td align="left" valign="top">Avro+BPE+Gzip</td><td align="left" valign="top">6.67 (5.93-7.20)</td><td align="left" valign="top">4.67</td><td align="left" valign="top">10.41</td></tr></tbody></table><table-wrap-foot><fn id="table5fn1"><p><sup>a</sup>ASN.1 versus Avro; <italic>P</italic>=.30.</p></fn><fn id="table5fn2"><p><sup>b</sup>ASN.1+Gzip versus Avro+Gzip; <italic>P</italic>&#x003C;.001.</p></fn><fn id="table5fn3"><p><sup>c</sup>ASN.1+BPE versus Avro+BPE; <italic>P</italic>&#x003C;.001.</p></fn><fn id="table5fn4"><p><sup>d</sup>ASN.1+BPE+Gzip versus Avro+BPE+Gzip; <italic>P</italic>&#x003C;.001.</p></fn></table-wrap-foot></table-wrap><fig position="float" id="figure5"><label>Figure 5.</label><caption><p>Analysis of compression performance for Avro and ASN.1 in different combinations with BPE and Gzip, where &#x201C;Serial&#x201D; indicates the use of Avro or ASN.1 for JSON serialization. ASN.1: Abstract Syntax Notation One; BPE: byte pair encoding; JSON: JavaScript Object Notation.</p></caption><graphic alt-version="no" mimetype="image" position="float" xlink:type="simple" xlink:href="rehab_v12i1e69230_fig05.png"/></fig></sec><sec id="s3-4"><title>Comparative Analysis of BPE and Unigram</title><p>Our constructed dataset consisted of a total of 1,146,532 records, with a total size of 122.5 MB and an average sentence length of 38.3 Chinese characters, corresponding to an average byte length of 106.8. We divided this dataset into a training set and a test set at a ratio of 9:1.</p><p>Using Google&#x2019;s open-source project SentencePiece for tokenizer training, we trained and validated 2 tokenization methods: unigram and BPE171819, with a vocab_size of 65,535, allowing each token to be represented by a 2-byte unit. In terms of training duration, unigram took 65.91 seconds, while BPE took 360.2 seconds, with unigram being faster. In terms of compression ratio, on the training set, unigram and BPE achieved compression ratios of 4.7 and 4.83, respectively (<italic>P</italic>&#x003C;.001); on the test set, they achieved 4.55 and 4.68, respectively (<italic>P</italic>&#x003C;.001), showing that BPE is more efficient than unigram (<xref ref-type="table" rid="table6">Table 6</xref>).</p><table-wrap id="t6" position="float"><label>Table 6.</label><caption><p>Comparison of compression ratios for byte pair encoding (BPE) and unigram with different serialization methods in the Chinese medical text dataset.</p></caption><table id="table6" frame="hsides" rules="groups"><thead><tr><td align="left" valign="bottom">Dataset</td><td align="left" valign="bottom">Sentences (n=1,146,532)</td><td align="left" valign="bottom">Unigram</td><td align="left" valign="bottom">BPE</td><td align="left" valign="bottom"><italic>P</italic> value</td></tr></thead><tbody><tr><td align="left" valign="top">Train</td><td align="left" valign="top">1,031,697</td><td align="left" valign="top">4.70</td><td align="left" valign="top">4.83</td><td align="left" valign="top">&#x003C;.001</td></tr><tr><td align="left" valign="top">Test</td><td align="left" valign="top">114,835</td><td align="left" valign="top">4.55</td><td align="left" valign="top">4.68</td><td align="left" valign="top">&#x003C;.001</td></tr></tbody></table></table-wrap></sec><sec id="s3-5"><title>QR Code Distribution and Scanning Time</title><p>Based on previous research, when each module size is set to 4&#x00D7;4 pixels, 100% readability can be achieved on A4 white paper [<xref ref-type="bibr" rid="ref24">24</xref>]. For the largest QR code size, the number of modules contained is 177&#x00D7;177 [<xref ref-type="bibr" rid="ref25">25</xref>]. With a border width of 4 and an estimated resolution of 200 dots per inch for a standard printer, even the largest QR code would occupy a space of 3.7 inches (9.39 cm) in both length and width on A4 paper.</p><p>Considering the response speed of mobile phone scanning and the redundancy requirements of QR codes, we selected the version 25 QR code with 117&#x00D7;117 modules, which has a theoretical size of 2.5 inches (6.35 cm). Taking into account the differences in dots per inch, ink density, and default print margins across different hospital printers, we reserved a 20% redundancy, resulting in a designed QR code size of 7.62 cm &#x00D7; 7.62 cm (approximately 3&#x00D7;3 inches), with 2 QR codes per row. The specific layout is shown in <xref ref-type="fig" rid="figure6">Figure 6</xref>.</p><fig position="float" id="figure6"><label>Figure 6.</label><caption><p>Example of a home rehabilitation report given to patients upon discharge for scanning QR codes. VTE: venous thromboembolism.</p></caption><graphic alt-version="no" mimetype="image" position="float" xlink:type="simple" xlink:href="rehab_v12i1e69230_fig06.png"/></fig><p>We used version 25 QR codes with an error correction level set to L, each supporting a maximum binary capacity of 1273 bytes [<xref ref-type="bibr" rid="ref25">25</xref>]. As shown in <xref ref-type="fig" rid="figure7">Figure 7</xref>, following a cumulative distribution function [<xref ref-type="bibr" rid="ref26">26</xref>] analysis of the compressed file sizes for 300 patients, we found that 84.7% of the patients needed to scan only one QR code, while 5.3% of the patients required scanning 2 QR codes. Only an extremely small number of patients may need to scan more than 3 QR codes.</p><fig position="float" id="figure7"><label>Figure 7.</label><caption><p>CDF analysis of file sizes for 300 patients after compression, showing that 84.7% of patients require scanning only 1 QR code with a file size less than 1273 bytes. CDF: cumulative distribution function.</p></caption><graphic alt-version="no" mimetype="image" position="float" xlink:type="simple" xlink:href="rehab_v12i1e69230_fig07.png"/></fig><p>Based on the size of the compressed files, we selected 3 patients for a scanning test, corresponding to 1, 2, and 3 QR codes, respectively. The test timing started when the patient clicked the scan button and ended when the record creation was completed. Testing with different models of mobile phones showed that, on average, scanning 1 QR code took 3.1 seconds, scanning 2 QR codes took 7.30 seconds, and scanning 3 QR codes took 8.73 seconds. ANOVA was used to test the differences, with a <italic>P</italic> value &#x003C;.001.</p><p>Based on the size of the compressed files, we selected 3 patients for a QR code scanning test, corresponding to 1, 2, and 3 QR codes, respectively. The timing started when the patient clicked the scan button and ended when the registration was completed. Different smartphone models were used for the tests, and the results showed that, on average, scanning one QR code took 3.1 seconds, scanning 2 QR codes took 7.30 seconds, and scanning 3 QR codes took 8.73 seconds (<xref ref-type="table" rid="table7">Table 7</xref>). ANOVA was used to test the differences, with a <italic>P</italic> value &#x003C;.001.</p><table-wrap id="t7" position="float"><label>Table 7.</label><caption><p>Duration (in seconds) for different types of patients to scan QR codes and complete record creation using various mobile phone models.</p></caption><table id="table7" frame="hsides" rules="groups"><thead><tr><td align="left" valign="bottom">Phone model</td><td align="left" valign="bottom">Patient 1 (1 QR code)</td><td align="left" valign="bottom">Patient 2 (2 QR codes)</td><td align="left" valign="bottom">Patient 3 (3 QR codes)</td></tr></thead><tbody><tr><td align="left" valign="top">iPhone 13 Pro Max</td><td align="left" valign="top">2.43</td><td align="left" valign="top">8.02</td><td align="left" valign="top">6.32</td></tr><tr><td align="left" valign="top">Xiaomi 13 ultra</td><td align="left" valign="top">2.18</td><td align="left" valign="top">5.95</td><td align="left" valign="top">8.79</td></tr><tr><td align="left" valign="top">iPhone 15 Pro</td><td align="left" valign="top">1.63</td><td align="left" valign="top">5.44</td><td align="left" valign="top">5.68</td></tr><tr><td align="left" valign="top">OPPO Find X5 Pro</td><td align="left" valign="top">1.94</td><td align="left" valign="top">6.50</td><td align="left" valign="top">7.70</td></tr><tr><td align="left" valign="top">OPPO K9s</td><td align="left" valign="top">6.03</td><td align="left" valign="top">8.83</td><td align="left" valign="top">10.54</td></tr><tr><td align="left" valign="top">Redmi K60</td><td align="left" valign="top">2.36</td><td align="left" valign="top">8.88</td><td align="left" valign="top">10.29</td></tr><tr><td align="left" valign="top">Xiaomi 10</td><td align="left" valign="top">2.98</td><td align="left" valign="top">6.28</td><td align="left" valign="top">9.37</td></tr><tr><td align="left" valign="top">Redmi Note 13 Pro</td><td align="left" valign="top">5.30</td><td align="left" valign="top">8.48</td><td align="left" valign="top">11.17</td></tr><tr><td align="left" valign="top">Mean</td><td align="left" valign="top">3.10</td><td align="left" valign="top">7.30</td><td align="left" valign="top">8.73</td></tr></tbody></table></table-wrap></sec></sec><sec id="s4" sec-type="discussion"><title>Discussion</title><sec id="s4-1"><title>Main Findings</title><p>We developed a QRST-AB, which assists patients in creating home rehabilitation records after discharge while ensuring the privacy, security, and efficiency of medical data transmission. The primary outcomes of our study are as follows. (1) We proposed a QR code authentication and encryption mechanism that leverages ChaCha20 encryption and the BLAKE3 hash function, reducing message inflation to as low as 2 bytes and enabling noninteractive cross-domain verification and data security in a zero-trust environment. (2) We developed an efficient combined data compression method, where the BPE+Avro+Gzip compression approach is 1.9 times more efficient than traditional Gzip compression, allowing a single patient&#x2019;s medical record to be compressed into 1&#x2010;3 QR codes, thus improving the practicality and efficiency of QR code transmission in medical data scenarios. (3) The algorithm is provided in software development kit form, making it easy to deploy and use, which facilitates broader adoption.</p></sec><sec id="s4-2"><title>Comparison With Prior Work</title><p>In previous research, there have been instances of embedding medical data, as opposed to URL information, into QR codes for transmission. Specifically, Lin et al [<xref ref-type="bibr" rid="ref8">8</xref>] encoded patients&#x2019; prescription information into QR codes, facilitating the use of these codes by patients at different pharmacies. The QR code encompassed information across 17 fields, including the patient&#x2019;s name, identification, age, type of disease, name of medication, and date, among other critical data. Additionally, Nakayama from Japan [<xref ref-type="bibr" rid="ref9">9</xref>] and Mathivanan from India [<xref ref-type="bibr" rid="ref10">10</xref>,<xref ref-type="bibr" rid="ref11">11</xref>] have successfully embedded electrocardiogram data into QR codes. Lauriot [<xref ref-type="bibr" rid="ref12">12</xref>] adopted the approach of directly embedding the data results of imaging reports into a single QR code. Mao et al [<xref ref-type="bibr" rid="ref13">13</xref>], on the other hand, used a different strategy; they segmented a file containing medical data and embedded the segmented data into consecutive QR codes, which were dynamically and continuously displayed in a streaming video. Users could capture this video stream with a smartphone and subsequently recover the relevant medical data from the QR codes.</p><p>However, in previous practices, despite the use of signal sampling and private encryption algorithms to enhance security when embedding electrocardiogram data into QR codes, medical texts are still embedded in plaintext, which may expose the data to interception by unauthorized third parties, leading to potential leakage of patient privacy information. Moreover, these cases generally lack user authentication mechanisms and exhibit low compression efficiency.</p><p>Prior to this work, several studies have proposed solutions for securing data transmission in untrusted environments. For example, [<xref ref-type="bibr" rid="ref27">27</xref>] introduces authentication and encryption measures for military communication protocols to prevent man-in-the-middle and replay attacks. In [<xref ref-type="bibr" rid="ref28">28</xref>], blockchain technology is used to establish a decentralized trust mechanism&#x2014;for instance, similar to the framework described in [<xref ref-type="bibr" rid="ref29">29</xref>], one can refer to its on-chain authentication and consensus processes to enhance data integrity and immutability when transmitting QR-embedded information. In the health care domain, [<xref ref-type="bibr" rid="ref30">30</xref>] emphasizes the need to balance data security and privacy protection in telemedicine interoperability, which informs our design of QR code data encryption and client authentication. Moreover, [<xref ref-type="bibr" rid="ref31">31</xref>] addresses deepfake disinformation strategies from a holistic cybersecurity perspective, offering guidance on ensuring the integrity and trustworthiness of data embedded in QR codes. Finally, [<xref ref-type="bibr" rid="ref32">32</xref>] presents a hesitant fuzzy decision-making approach for usable-security assessment, which can be applied to optimize the usability of our underlying authentication workflow.</p><p>To address these security risks and efficiency issues, we have taken the following improvement measures. First, we have proposed a complete solution and an end-to-end communication mechanism, defined the protocol specifications, and added authentication information to the protocol header. We use cryptographic hash algorithms to verify the identity of the scanning user, preventing unauthorized users from scanning the QR code. Additionally, we use encrypted transmission, which remains secure even if a third party understands the principles of our algorithm, as they do not possess the ChaCha20 secret key, the Avro encoding schema, and the BPE tokenizer, and thus cannot decipher the contents of the QR code.</p><p>Second, we have proposed an efficient compression algorithm for patient medical record data, organically integrating JSON serialization technology, tokenization techniques based on large language models (including subword tokenization methods like BPE), and binary data compression technology. The combination of BPE+Avro+Gzip has achieved optimal compression performance. To date, there is no precedent for compressing electronic medical records using this combination. Although the transmission efficiency of QR codes is inherently limited and slow, our effective compression algorithm can significantly reduce the size of medical record data, thus greatly enhancing the practicality of QR codes in the transmission of medical information. Furthermore, our solution supports the transmission of multiple QR codes in a single transaction, thereby enabling the carriage of larger volumes of medical records and information.</p><p>Through our improvements and real-world data validation, the QRST-AB possesses the characteristics of encryption, authentication, efficiency, robustness, and scalability. This makes it well-suited for home rehabilitation registration scenarios and other situations requiring the transmission of patient privacy data.</p></sec><sec id="s4-3"><title>Limitations</title><p>There were some limitations to this study. First, due to the limited project timeline, validation was conducted using single-center data only. However, the algorithm was designed with flexibility for multicenter deployment and support for diverse disease types, which will be further validated in future studies. Second, when patient records are large and QR codes contain substantial data, poor network conditions may lead to delays in the decoder&#x2019;s real-time response. Thirdly, the solution relies on manual scanning, and the patient&#x2019;s digital health literacy may affect the user experience; however, most users are capable of performing scanning operations, which helps mitigate this issue.</p></sec><sec id="s4-4"><title>Future Work</title><p>We plan to conduct multicenter validation and extend support to other disease types, as well as optimize the algorithm to reduce response delays caused by large patient data volumes.</p></sec><sec id="s4-5"><title>Conclusions</title><p>The QRST-AB algorithm efficiently compresses and transmits data in an encrypted manner and authenticates the identity of the scanning users, ensuring the privacy and security of medical data. Delivered as a software development kit, the algorithm offers straightforward implementation and usability, supporting its broad adoption across various applications. All code is publicly available through the QRST-AB GitHub repository.</p></sec></sec></body><back><ack><p>This study was supported by the National Clinical Research Center for Orthopedics and Sports Rehabilitation Innovation Fund (grant 23-NCRC-CXJJ-ZD3-4), the Beijing Natural Science Foundation - Haidian Original Innovation Joint Fund (grant L232117), and the National Natural Science Foundation of China (grant 8217020014).</p></ack><notes><sec><title>Data Availability</title><p>The data that support the findings of this study are available from the corresponding author upon reasonable request.</p></sec></notes><fn-group><fn fn-type="con"><p>YG had full access to all of the data in the study and takes responsibility for the integrity of the data and the accuracy of the data analysis. The study was conceptualized and designed by YG, FW, and CL. CL was responsible for the development. Data acquisition, analysis, and interpretation were performed by CL, ZJ, and YG. CL and ZJ drafted the manuscript. YG, FW, ZJ, and CL contributed to the critical revision of the manuscript for important intellectual content. Statistical analysis was conducted by CL, ZZ, and BL. YG obtained the funding for the study.</p></fn><fn fn-type="conflict"><p>None declared.</p></fn></fn-group><glossary><title>Abbreviations</title><def-list><def-item><term id="abb1">ASN.1</term><def><p>Abstract Syntax Notation One</p></def></def-item><def-item><term id="abb2">BPE</term><def><p>byte pair encoding</p></def></def-item><def-item><term id="abb3">EHR</term><def><p>electronic health record</p></def></def-item><def-item><term id="abb4">JSON</term><def><p>JavaScript Object Notation</p></def></def-item><def-item><term id="abb5">QRST-AB</term><def><p>QR code-based security transmission algorithm using Avro and byte pair encoding</p></def></def-item><def-item><term id="abb6">VTE</term><def><p>venous thromboembolism</p></def></def-item></def-list></glossary><ref-list><title>References</title><ref id="ref1"><label>1</label><nlm-citation citation-type="journal"><person-group person-group-type="author"><name name-style="western"><surname>Heit</surname><given-names>JA</given-names> </name></person-group><article-title>Epidemiology of venous thromboembolism</article-title><source>Nat Rev Cardiol</source><year>2015</year><month>08</month><volume>12</volume><issue>8</issue><fpage>464</fpage><lpage>474</lpage><pub-id pub-id-type="doi">10.1038/nrcardio.2015.83</pub-id><pub-id pub-id-type="medline">26076949</pub-id></nlm-citation></ref><ref id="ref2"><label>2</label><nlm-citation citation-type="journal"><person-group person-group-type="author"><name name-style="western"><surname>Nicholson</surname><given-names>M</given-names> </name><name name-style="western"><surname>Chan</surname><given-names>N</given-names> </name><name name-style="western"><surname>Bhagirath</surname><given-names>V</given-names> </name><name name-style="western"><surname>Ginsberg</surname><given-names>J</given-names> </name></person-group><article-title>Prevention of venous thromboembolism in 2020 and beyond</article-title><source>J Clin Med</source><year>2020</year><month>08</month><day>1</day><volume>9</volume><issue>8</issue><fpage>2467</fpage><pub-id pub-id-type="doi">10.3390/jcm9082467</pub-id><pub-id pub-id-type="medline">32752154</pub-id></nlm-citation></ref><ref id="ref3"><label>3</label><nlm-citation citation-type="journal"><person-group person-group-type="author"><name name-style="western"><surname>Mla&#x010D;o</surname><given-names>A</given-names> </name><name name-style="western"><surname>Mla&#x010D;o</surname><given-names>N</given-names> </name><name name-style="western"><surname>Bejtovi&#x0107;</surname><given-names>D</given-names> </name><name name-style="western"><surname>Spu&#x017E;i&#x0107;</surname><given-names>M</given-names> </name><name name-style="western"><surname>D&#x017E;ubur</surname><given-names>A</given-names> </name><name name-style="western"><surname>Begi&#x0107;</surname><given-names>E</given-names> </name></person-group><article-title>Provoked venous thromboembolism during ten-year follow up at the Clinical Centre University of Sarajevo</article-title><source>Med Glas (Zenica)</source><year>2020</year><month>02</month><day>1</day><volume>17</volume><issue>1</issue><fpage>54</fpage><lpage>58</lpage><pub-id pub-id-type="doi">10.17392/1038-20</pub-id><pub-id pub-id-type="medline">31402639</pub-id></nlm-citation></ref><ref id="ref4"><label>4</label><nlm-citation citation-type="journal"><person-group person-group-type="author"><collab>Hospital Pharmacy Professional Committee of the Chinese Pharmaceutical Association</collab></person-group><article-title>Chinese expert consensus on home management of oral anticoagulants</article-title><source>Natl Med J China</source><year>2024</year><volume>104</volume><fpage>2595</fpage><lpage>2612</lpage><pub-id pub-id-type="doi">10.3760/cma.j.cn112137-20231102-00987</pub-id></nlm-citation></ref><ref id="ref5"><label>5</label><nlm-citation citation-type="journal"><person-group person-group-type="author"><name name-style="western"><surname>Wright</surname><given-names>A</given-names> </name><name name-style="western"><surname>McGlinchey</surname><given-names>EA</given-names> </name><name name-style="western"><surname>Poon</surname><given-names>EG</given-names> </name><name name-style="western"><surname>Jenter</surname><given-names>CA</given-names> </name><name name-style="western"><surname>Bates</surname><given-names>DW</given-names> </name><name name-style="western"><surname>Simon</surname><given-names>SR</given-names> </name></person-group><article-title>Ability to generate patient registries among practices with and without electronic health records</article-title><source>J Med Internet Res</source><year>2009</year><month>08</month><day>10</day><volume>11</volume><issue>3</issue><fpage>e31</fpage><pub-id pub-id-type="doi">10.2196/jmir.1166</pub-id><pub-id pub-id-type="medline">19674961</pub-id></nlm-citation></ref><ref id="ref6"><label>6</label><nlm-citation citation-type="journal"><person-group person-group-type="author"><name name-style="western"><surname>Batra</surname><given-names>P</given-names> </name><name name-style="western"><surname>Phalnikar</surname><given-names>N</given-names> </name><name name-style="western"><surname>Kurmi</surname><given-names>D</given-names> </name><name name-style="western"><surname>Tembhurne</surname><given-names>J</given-names> </name><name name-style="western"><surname>Sahare</surname><given-names>P</given-names> </name><name name-style="western"><surname>Diwan</surname><given-names>T</given-names> </name></person-group><article-title>OCR-MRD: performance analysis of different optical character recognition engines for medical report digitization</article-title><source>Int J Inf Tecnol</source><year>2024</year><month>01</month><volume>16</volume><issue>1</issue><fpage>447</fpage><lpage>455</lpage><pub-id pub-id-type="doi">10.1007/s41870-023-01610-2</pub-id></nlm-citation></ref><ref id="ref7"><label>7</label><nlm-citation citation-type="journal"><person-group person-group-type="author"><name name-style="western"><surname>Ujiie</surname><given-names>S</given-names> </name><name name-style="western"><surname>Yada</surname><given-names>S</given-names> </name><name name-style="western"><surname>Wakamiya</surname><given-names>S</given-names> </name><name name-style="western"><surname>Aramaki</surname><given-names>E</given-names> </name></person-group><article-title>Identification of adverse drug event-related Japanese articles: natural language processing analysis</article-title><source>JMIR Med Inform</source><year>2020</year><month>11</month><day>27</day><volume>8</volume><issue>11</issue><fpage>e22661</fpage><pub-id pub-id-type="doi">10.2196/22661</pub-id><pub-id pub-id-type="medline">33245290</pub-id></nlm-citation></ref><ref id="ref8"><label>8</label><nlm-citation citation-type="journal"><person-group person-group-type="author"><name name-style="western"><surname>Lin</surname><given-names>CH</given-names> </name><name name-style="western"><surname>Tsai</surname><given-names>FY</given-names> </name><name name-style="western"><surname>Tsai</surname><given-names>WL</given-names> </name><name name-style="western"><surname>Wen</surname><given-names>HW</given-names> </name><name name-style="western"><surname>Hu</surname><given-names>ML</given-names> </name></person-group><article-title>The feasibility of QR-code prescription in Taiwan</article-title><source>J Clin Pharm Ther</source><year>2012</year><month>12</month><volume>37</volume><issue>6</issue><fpage>643</fpage><lpage>646</lpage><pub-id pub-id-type="doi">10.1111/j.1365-2710.2012.01358.x</pub-id><pub-id pub-id-type="medline">22612397</pub-id></nlm-citation></ref><ref id="ref9"><label>9</label><nlm-citation citation-type="journal"><person-group person-group-type="author"><name name-style="western"><surname>Nakayama</surname><given-names>M</given-names> </name><name name-style="western"><surname>Shimokawa</surname><given-names>H</given-names> </name></person-group><article-title>Evaluation of an electrocardiogram on QR code</article-title><source>Stud Health Technol Inform</source><year>2013</year><volume>192</volume><fpage>1020</fpage><pub-id pub-id-type="medline">23920794</pub-id></nlm-citation></ref><ref id="ref10"><label>10</label><nlm-citation citation-type="journal"><person-group person-group-type="author"><name name-style="western"><surname>Mathivanan</surname><given-names>P</given-names> </name><name name-style="western"><surname>Edward Jero</surname><given-names>S</given-names> </name><name name-style="western"><surname>Ramu</surname><given-names>P</given-names> </name><name name-style="western"><surname>Balaji Ganesh</surname><given-names>A</given-names> </name></person-group><article-title>QR code based patient data protection in ECG steganography</article-title><source>Australas Phys Eng Sci Med</source><year>2018</year><month>12</month><volume>41</volume><issue>4</issue><fpage>1057</fpage><lpage>1068</lpage><pub-id pub-id-type="doi">10.1007/s13246-018-0695-y</pub-id><pub-id pub-id-type="medline">30397899</pub-id></nlm-citation></ref><ref id="ref11"><label>11</label><nlm-citation citation-type="journal"><person-group person-group-type="author"><name name-style="western"><surname>Mathivanan</surname><given-names>P</given-names> </name><name name-style="western"><surname>Ganesh</surname><given-names>AB</given-names> </name><name name-style="western"><surname>Venkatesan</surname><given-names>R</given-names> </name></person-group><article-title>QR code&#x2013;based ECG signal encryption/decryption algorithm</article-title><source>Cryptologia</source><year>2019</year><month>05</month><day>4</day><volume>43</volume><issue>3</issue><fpage>233</fpage><lpage>253</lpage><pub-id pub-id-type="doi">10.1080/01611194.2018.1549122</pub-id></nlm-citation></ref><ref id="ref12"><label>12</label><nlm-citation citation-type="journal"><person-group person-group-type="author"><name name-style="western"><surname>Lauriot Dit Prevost</surname><given-names>A</given-names> </name><name name-style="western"><surname>Bentegeac</surname><given-names>R</given-names> </name><name name-style="western"><surname>Dequesnes</surname><given-names>A</given-names> </name><etal/></person-group><article-title>&#x201C;Re-Materialized&#x201D; medical data: paper-based transmission of structured medical data using QR-Code, for medical imaging reports</article-title><source>Stud Health Technol Inform</source><year>2022</year><month>06</month><day>6</day><volume>290</volume><fpage>210</fpage><lpage>214</lpage><pub-id pub-id-type="doi">10.3233/SHTI220063</pub-id><pub-id pub-id-type="medline">35673002</pub-id></nlm-citation></ref><ref id="ref13"><label>13</label><nlm-citation citation-type="journal"><person-group person-group-type="author"><name name-style="western"><surname>Mao</surname><given-names>H</given-names> </name><name name-style="western"><surname>Chi</surname><given-names>C</given-names> </name><name name-style="western"><surname>Yu</surname><given-names>J</given-names> </name><name name-style="western"><surname>Yang</surname><given-names>P</given-names> </name><name name-style="western"><surname>Qian</surname><given-names>C</given-names> </name><name name-style="western"><surname>Zhao</surname><given-names>D</given-names> </name></person-group><article-title>QRStream: a secure and convenient method for text healthcare data transferring</article-title><source>Annu Int Conf IEEE Eng Med Biol Soc</source><year>2019</year><month>07</month><volume>2019</volume><fpage>3458</fpage><lpage>3462</lpage><pub-id pub-id-type="doi">10.1109/EMBC.2019.8856946</pub-id><pub-id pub-id-type="medline">31946623</pub-id></nlm-citation></ref><ref id="ref14"><label>14</label><nlm-citation citation-type="confproc"><person-group person-group-type="author"><name name-style="western"><surname>Bhardwaj</surname><given-names>C</given-names> </name><name name-style="western"><surname>Garg</surname><given-names>H</given-names> </name></person-group><article-title>An approach for enhancing data storage capacity in quick response code using zip compression technique</article-title><conf-name>2023 International Conference on Artificial Intelligence and Smart Communication (AISC)</conf-name><conf-date>Feb 24-25, 2023</conf-date><conf-loc>Greater Noida, India</conf-loc><pub-id pub-id-type="doi">10.1109/AISC56616.2023.10085559</pub-id></nlm-citation></ref><ref id="ref15"><label>15</label><nlm-citation citation-type="other"><person-group person-group-type="author"><name name-style="western"><surname>Viotti</surname><given-names>JC</given-names> </name><name name-style="western"><surname>Kinderkhedia</surname><given-names>M</given-names> </name></person-group><article-title>A survey of JSON-compatible binary serialization specifications</article-title><source>arXiv</source><comment>Preprint posted online on  Jan 6, 2022</comment><pub-id pub-id-type="doi">10.48550/arXiv.2201.02089</pub-id></nlm-citation></ref><ref id="ref16"><label>16</label><nlm-citation citation-type="other"><person-group person-group-type="author"><name name-style="western"><surname>Jackson</surname><given-names>S</given-names> </name><name name-style="western"><surname>Cummings</surname><given-names>N</given-names> </name><name name-style="western"><surname>Khan</surname><given-names>S</given-names> </name></person-group><article-title>Streaming technologies and serialization protocols: empirical performance analysis</article-title><source>arXiv</source><comment>Preprint posted online on  Jul 18, 2024</comment><pub-id pub-id-type="doi">10.48550/arXiv.2407.13494</pub-id></nlm-citation></ref><ref id="ref17"><label>17</label><nlm-citation citation-type="other"><person-group person-group-type="author"><name name-style="western"><surname>Del&#x00E9;tang</surname><given-names>G</given-names> </name><name name-style="western"><surname>Ruoss</surname><given-names>A</given-names> </name><name name-style="western"><surname>Duquenne</surname><given-names>PA</given-names> </name><etal/></person-group><article-title>Language modeling is compression</article-title><source>arXiv</source><comment>Preprint posted online on  Sep 19, 2023</comment><pub-id pub-id-type="doi">10.48550/arXiv.2309.10668</pub-id></nlm-citation></ref><ref id="ref18"><label>18</label><nlm-citation citation-type="journal"><person-group person-group-type="author"><name name-style="western"><surname>Gage</surname><given-names>P</given-names> </name></person-group><article-title>A new algorithm for data compression</article-title><source>C Users J</source><year>1994</year><volume>12</volume><fpage>23</fpage><lpage>38</lpage><pub-id pub-id-type="doi">10.5555/177910.177914</pub-id></nlm-citation></ref><ref id="ref19"><label>19</label><nlm-citation citation-type="other"><person-group person-group-type="author"><name name-style="western"><surname>Schmidt</surname><given-names>CW</given-names> </name><name name-style="western"><surname>Reddy</surname><given-names>V</given-names> </name><name name-style="western"><surname>Zhang</surname><given-names>H</given-names> </name><etal/></person-group><article-title>Tokenization is more than compression</article-title><source>arXiv</source><comment>Preprint posted online on  Feb 2, 2024</comment><pub-id pub-id-type="doi">10.48550/arXiv.2402.18376</pub-id></nlm-citation></ref><ref id="ref20"><label>20</label><nlm-citation citation-type="confproc"><person-group person-group-type="author"><name name-style="western"><surname>Sennrich</surname><given-names>R</given-names> </name><name name-style="western"><surname>Haddow</surname><given-names>B</given-names> </name><name name-style="western"><surname>Birch</surname><given-names>A</given-names> </name></person-group><article-title>Neural machine translation of rare words with subword units</article-title><conf-name>Proceedings of the 54th Annual Meeting of the Association for Computational Linguistics (Volume 1: Long Papers)</conf-name><conf-date>Aug 7-12, 2016</conf-date><conf-loc>Berlin, Germany</conf-loc></nlm-citation></ref><ref id="ref21"><label>21</label><nlm-citation citation-type="confproc"><person-group person-group-type="author"><name name-style="western"><surname>Kudo</surname><given-names>T</given-names> </name></person-group><article-title>Subword regularization: improving neural network translation models with multiple subword candidates</article-title><conf-name>Proceedings of the 56th Annual Meeting of the Association for Computational Linguistics (Volume 1: Long Papers)</conf-name><conf-date>Jul 15-20, 2018</conf-date><conf-loc>Melbourne, Australia</conf-loc></nlm-citation></ref><ref id="ref22"><label>22</label><nlm-citation citation-type="report"><person-group person-group-type="author"><name name-style="western"><surname>Nir</surname><given-names>Y</given-names> </name><name name-style="western"><surname>Langley</surname><given-names>A</given-names> </name></person-group><article-title>ChaCha20 and poly1305 for IETF protocols</article-title><year>2018</year><publisher-name>Internet Engineering Task Force (IETF)</publisher-name><pub-id pub-id-type="doi">10.17487/RFC8439</pub-id></nlm-citation></ref><ref id="ref23"><label>23</label><nlm-citation citation-type="other"><person-group person-group-type="author"><name name-style="western"><surname>Pandya</surname><given-names>M</given-names> </name></person-group><article-title>Performance evaluation of hashing algorithms on commodity hardware</article-title><source>arXiv</source><comment>Preprint posted online on  Jul 11, 2024</comment><pub-id pub-id-type="doi">10.48550/ARXIV.2407.08284</pub-id></nlm-citation></ref><ref id="ref24"><label>24</label><nlm-citation citation-type="journal"><person-group person-group-type="author"><name name-style="western"><surname>Tarjan</surname><given-names>L</given-names> </name><name name-style="western"><surname>&#x0160;enk</surname><given-names>I</given-names> </name><name name-style="western"><surname>Tegeltija</surname><given-names>S</given-names> </name><name name-style="western"><surname>Stankovski</surname><given-names>S</given-names> </name><name name-style="western"><surname>Ostojic</surname><given-names>G</given-names> </name></person-group><article-title>A readability analysis for QR code application in a traceability system</article-title><source>Comput Electron Agric</source><year>2014</year><month>11</month><volume>109</volume><fpage>1</fpage><lpage>11</lpage><pub-id pub-id-type="doi">10.1016/j.compag.2014.08.015</pub-id></nlm-citation></ref><ref id="ref25"><label>25</label><nlm-citation citation-type="web"><article-title>Information capacity and versions of the QR code</article-title><source>QRcode.com</source><year>2024</year><access-date>2024-10-01</access-date><comment><ext-link ext-link-type="uri" xlink:href="https://www.qrcode.com/en/about/version.html">https://www.qrcode.com/en/about/version.html</ext-link></comment></nlm-citation></ref><ref id="ref26"><label>26</label><nlm-citation citation-type="web"><article-title>Cumulative distribution function</article-title><source>Wikipedia</source><year>2024</year><access-date>2024-10-01</access-date><comment><ext-link ext-link-type="uri" xlink:href="https://en.wikipedia.org/wiki/Cumulative_distribution_function">https://en.wikipedia.org/wiki/Cumulative_distribution_function</ext-link></comment></nlm-citation></ref><ref id="ref27"><label>27</label><nlm-citation citation-type="journal"><person-group person-group-type="author"><name name-style="western"><surname>Kumar</surname><given-names>R</given-names> </name><name name-style="western"><surname>Khan</surname><given-names>RA</given-names> </name></person-group><article-title>Securing communication protocols in military computing</article-title><source>Netw Secur</source><year>2024</year><month>03</month><volume>2024</volume><issue>3</issue><fpage>70011</fpage><lpage>70017</lpage><pub-id pub-id-type="doi">10.12968/S1353-4858(24)70011-7</pub-id></nlm-citation></ref><ref id="ref28"><label>28</label><nlm-citation citation-type="journal"><person-group person-group-type="author"><name name-style="western"><surname>Kumar</surname><given-names>R</given-names> </name><name name-style="western"><surname>Ahmad Khan</surname><given-names>R</given-names> </name></person-group><article-title>Securing military computing with the blockchain</article-title><source>Comput Fraud Secur</source><year>2024</year><month>02</month><volume>2024</volume><issue>2</issue><fpage>70007</fpage><lpage>4</lpage><pub-id pub-id-type="doi">10.12968/S1361-3723(24)70007-4</pub-id></nlm-citation></ref><ref id="ref29"><label>29</label><nlm-citation citation-type="journal"><person-group person-group-type="author"><name name-style="western"><surname>Sahu</surname><given-names>K</given-names> </name><name name-style="western"><surname>Kumar</surname><given-names>R</given-names> </name></person-group><article-title>A secure decentralised finance framework</article-title><source>Comput Fraud Secur</source><year>2024</year><month>03</month><volume>2024</volume><issue>3</issue><fpage>70010</fpage><lpage>70014</lpage><pub-id pub-id-type="doi">10.12968/S1361-3723(24)70010-4</pub-id></nlm-citation></ref><ref id="ref30"><label>30</label><nlm-citation citation-type="journal"><person-group person-group-type="author"><name name-style="western"><surname>Sahu</surname><given-names>K</given-names> </name><name name-style="western"><surname>Kumar</surname><given-names>R</given-names> </name></person-group><article-title>Telemedicine: how to achieve interoperability without compromising data security</article-title><source>Br J Healthc Manag</source><year>2025</year><month>01</month><day>2</day><volume>31</volume><issue>1</issue><fpage>1</fpage><lpage>5</lpage><pub-id pub-id-type="doi">10.12968/bjhc.2024.0029</pub-id></nlm-citation></ref><ref id="ref31"><label>31</label><nlm-citation citation-type="journal"><person-group person-group-type="author"><name name-style="western"><surname>Kumar</surname><given-names>R</given-names> </name><name name-style="western"><surname>Khan</surname><given-names>SA</given-names> </name><name name-style="western"><surname>Alharbe</surname><given-names>N</given-names> </name><name name-style="western"><surname>Khan</surname><given-names>RA</given-names> </name></person-group><article-title>Code of silence: cyber security strategies for combating deepfake disinformation</article-title><source>Comput Fraud Secur</source><year>2024</year><month>04</month><volume>2024</volume><issue>4</issue><pub-id pub-id-type="doi">10.12968/S1361-3723(24)70013-X</pub-id></nlm-citation></ref><ref id="ref32"><label>32</label><nlm-citation citation-type="journal"><person-group person-group-type="author"><name name-style="western"><surname>Kumar</surname><given-names>R</given-names> </name><name name-style="western"><surname>Baz</surname><given-names>A</given-names> </name><name name-style="western"><surname>Alhakami</surname><given-names>H</given-names> </name><etal/></person-group><article-title>A hybrid model of hesitant fuzzy decision-making analysis for estimating usable-security of software</article-title><source>IEEE Access</source><year>2020</year><volume>8</volume><fpage>72694</fpage><lpage>72712</lpage><pub-id pub-id-type="doi">10.1109/ACCESS.2020.2987941</pub-id></nlm-citation></ref></ref-list></back></article>