<?xml version="1.0"?><rdf:RDF xmlns:dc="http://purl.org/dc/elements/1.1/" xmlns:edm="http://www.europeana.eu/schemas/edm/" xmlns:wgs84_pos="http://www.w3.org/2003/01/geo/wgs84_pos" xmlns:foaf="http://xmlns.com/foaf/0.1/" xmlns:rdaGr2="http://rdvocab.info/ElementsGr2" xmlns:oai="http://www.openarchives.org/OAI/2.0/" xmlns:owl="http://www.w3.org/2002/07/owl#" xmlns:rdf="http://www.w3.org/1999/02/22-rdf-syntax-ns#" xmlns:ore="http://www.openarchives.org/ore/terms/" xmlns:skos="http://www.w3.org/2004/02/skos/core#" xmlns:dcterms="http://purl.org/dc/terms/"><edm:WebResource rdf:about="http://www.dlib.si/stream/URN:NBN:SI:DOC-QH60TOX8/c56a9bec-f6b8-4c4b-b18e-4e1dc96bbe20/PDF"><dcterms:extent>1267 KB</dcterms:extent></edm:WebResource><edm:WebResource rdf:about="http://www.dlib.si/stream/URN:NBN:SI:DOC-QH60TOX8/5ebce4c2-119b-4aa3-9ef0-a0714a9b55eb/TEXT"><dcterms:extent>0 KB</dcterms:extent></edm:WebResource><edm:TimeSpan rdf:about="1977-2026"><edm:begin xml:lang="en">1977</edm:begin><edm:end xml:lang="en">2026</edm:end></edm:TimeSpan><edm:ProvidedCHO rdf:about="URN:NBN:SI:DOC-QH60TOX8"><dcterms:isPartOf rdf:resource="https://www.dlib.si/details/URN:NBN:SI:spr-EE5UIE2V" /><dcterms:issued>2025</dcterms:issued><dc:creator>Calcina, Erik</dc:creator><dc:creator>Dolinar, Lenart</dc:creator><dc:creator>Novak, Erik</dc:creator><dc:format xml:lang="sl">številka:27</dc:format><dc:format xml:lang="sl">letnik:49</dc:format><dc:format xml:lang="sl">str. 1-8</dc:format><dc:identifier>DOI:10.31449/inf.v49i27.7763</dc:identifier><dc:identifier>ISSN:1854-3871</dc:identifier><dc:identifier>COBISSID_HOST:245120515</dc:identifier><dc:identifier>URN:URN:NBN:SI:doc-QH60TOX8</dc:identifier><dc:language>en</dc:language><dc:publisher xml:lang="sl">Informatika</dc:publisher><dcterms:isPartOf xml:lang="sl">Informatica (Ljubljana)</dcterms:isPartOf><dc:subject xml:lang="sl">jezikovni modeli</dc:subject><dc:subject xml:lang="sl">sintetični podatki</dc:subject><dc:subject xml:lang="sl">umetna inteligenca</dc:subject><dcterms:temporal rdf:resource="1977-2026" /><dc:title xml:lang="sl">Evaluating open-source large language models for synthetic non-english medical data generation using prompt-based techniques|</dc:title><dc:description xml:lang="sl">Using synthetic data sets to train medicine-focused machine learning models has been shown to enhance their performance; however, most research focuses on English texts. In this paper, we explore generating non-English synthetic medical texts. We propose a methodology for generating medical synthetic data, showcasing it by generating medical texts written in a non-English mixed language. We evaluate our approach with thirteen different language models that are open-source and proprietary, and assess the quality of the data sets in two ways: performing a statistical comparison between the original data set and the generated data sets, and training a classifier to distinguish between original and synthetic examples. The Llama-3.2-3B model achieves the best F1 score of 0.821 ± 0.007 and accuracy of 0.816 ± 0.016, making it most suitable for generating indistinguishable medical synthetic data. In contrast, models like Aya-23, Phi-3, and SmoLLM variants achieve high F1 scores (0.945–0.948), indicating their synthetic data is easily distinguishable from original data. These findings highlight the importance of model selection when generating synthetic medical data sets in non-English languages</dc:description><edm:type>TEXT</edm:type><dc:type xml:lang="sl">znanstveno časopisje</dc:type><dc:type xml:lang="en">journals</dc:type><dc:type rdf:resource="http://www.wikidata.org/entity/Q361785" /></edm:ProvidedCHO><ore:Aggregation rdf:about="http://www.dlib.si/?URN=URN:NBN:SI:DOC-QH60TOX8"><edm:aggregatedCHO rdf:resource="URN:NBN:SI:DOC-QH60TOX8" /><edm:isShownBy rdf:resource="http://www.dlib.si/stream/URN:NBN:SI:DOC-QH60TOX8/c56a9bec-f6b8-4c4b-b18e-4e1dc96bbe20/PDF" /><edm:rights rdf:resource="http://creativecommons.org/licenses/by/4.0/" /><edm:provider>Slovenian National E-content Aggregator</edm:provider><edm:intermediateProvider xml:lang="en">National and University Library of Slovenia</edm:intermediateProvider><edm:dataProvider xml:lang="sl">Slovensko društvo Informatika</edm:dataProvider><edm:object rdf:resource="http://www.dlib.si/streamdb/URN:NBN:SI:DOC-QH60TOX8/maxi/edm" /><edm:isShownAt rdf:resource="http://www.dlib.si/details/URN:NBN:SI:DOC-QH60TOX8" /></ore:Aggregation></rdf:RDF>