{"?xml":{"@version":"1.0"},"edm:RDF":{"@xmlns:dc":"http://purl.org/dc/elements/1.1/","@xmlns:edm":"http://www.europeana.eu/schemas/edm/","@xmlns:wgs84_pos":"http://www.w3.org/2003/01/geo/wgs84_pos","@xmlns:foaf":"http://xmlns.com/foaf/0.1/","@xmlns:rdaGr2":"http://rdvocab.info/ElementsGr2","@xmlns:oai":"http://www.openarchives.org/OAI/2.0/","@xmlns:owl":"http://www.w3.org/2002/07/owl#","@xmlns:rdf":"http://www.w3.org/1999/02/22-rdf-syntax-ns#","@xmlns:ore":"http://www.openarchives.org/ore/terms/","@xmlns:skos":"http://www.w3.org/2004/02/skos/core#","@xmlns:dcterms":"http://purl.org/dc/terms/","edm:WebResource":[{"@rdf:about":"http://www.dlib.si/stream/URN:NBN:SI:DOC-QH60TOX8/c56a9bec-f6b8-4c4b-b18e-4e1dc96bbe20/PDF","dcterms:extent":"1267 KB"},{"@rdf:about":"http://www.dlib.si/stream/URN:NBN:SI:DOC-QH60TOX8/5ebce4c2-119b-4aa3-9ef0-a0714a9b55eb/TEXT","dcterms:extent":"0 KB"}],"edm:TimeSpan":{"@rdf:about":"1977-2026","edm:begin":{"@xml:lang":"en","#text":"1977"},"edm:end":{"@xml:lang":"en","#text":"2026"}},"edm:ProvidedCHO":{"@rdf:about":"URN:NBN:SI:DOC-QH60TOX8","dcterms:isPartOf":[{"@rdf:resource":"https://www.dlib.si/details/URN:NBN:SI:spr-EE5UIE2V"},{"@xml:lang":"sl","#text":"Informatica (Ljubljana)"}],"dcterms:issued":"2025","dc:creator":["Calcina, Erik","Dolinar, Lenart","Novak, Erik"],"dc:format":[{"@xml:lang":"sl","#text":"številka:27"},{"@xml:lang":"sl","#text":"letnik:49"},{"@xml:lang":"sl","#text":"str. 1-8"}],"dc:identifier":["DOI:10.31449/inf.v49i27.7763","ISSN:1854-3871","COBISSID_HOST:245120515","URN:URN:NBN:SI:doc-QH60TOX8"],"dc:language":"en","dc:publisher":{"@xml:lang":"sl","#text":"Informatika"},"dc:subject":[{"@xml:lang":"sl","#text":"jezikovni modeli"},{"@xml:lang":"sl","#text":"sintetični podatki"},{"@xml:lang":"sl","#text":"umetna inteligenca"}],"dcterms:temporal":{"@rdf:resource":"1977-2026"},"dc:title":{"@xml:lang":"sl","#text":"Evaluating open-source large language models for synthetic non-english medical data generation using prompt-based techniques|"},"dc:description":{"@xml:lang":"sl","#text":"Using synthetic data sets to train medicine-focused machine learning models has been shown to enhance their performance; however, most research focuses on English texts. In this paper, we explore generating non-English synthetic medical texts. We propose a methodology for generating medical synthetic data, showcasing it by generating medical texts written in a non-English mixed language. We evaluate our approach with thirteen different language models that are open-source and proprietary, and assess the quality of the data sets in two ways: performing a statistical comparison between the original data set and the generated data sets, and training a classifier to distinguish between original and synthetic examples. The Llama-3.2-3B model achieves the best F1 score of 0.821 ± 0.007 and accuracy of 0.816 ± 0.016, making it most suitable for generating indistinguishable medical synthetic data. In contrast, models like Aya-23, Phi-3, and SmoLLM variants achieve high F1 scores (0.945–0.948), indicating their synthetic data is easily distinguishable from original data. These findings highlight the importance of model selection when generating synthetic medical data sets in non-English languages"},"edm:type":"TEXT","dc:type":[{"@xml:lang":"sl","#text":"znanstveno časopisje"},{"@xml:lang":"en","#text":"journals"},{"@rdf:resource":"http://www.wikidata.org/entity/Q361785"}]},"ore:Aggregation":{"@rdf:about":"http://www.dlib.si/?URN=URN:NBN:SI:DOC-QH60TOX8","edm:aggregatedCHO":{"@rdf:resource":"URN:NBN:SI:DOC-QH60TOX8"},"edm:isShownBy":{"@rdf:resource":"http://www.dlib.si/stream/URN:NBN:SI:DOC-QH60TOX8/c56a9bec-f6b8-4c4b-b18e-4e1dc96bbe20/PDF"},"edm:rights":{"@rdf:resource":"http://creativecommons.org/licenses/by/4.0/"},"edm:provider":"Slovenian National E-content Aggregator","edm:intermediateProvider":{"@xml:lang":"en","#text":"National and University Library of Slovenia"},"edm:dataProvider":{"@xml:lang":"sl","#text":"Slovensko društvo Informatika"},"edm:object":{"@rdf:resource":"http://www.dlib.si/streamdb/URN:NBN:SI:DOC-QH60TOX8/maxi/edm"},"edm:isShownAt":{"@rdf:resource":"http://www.dlib.si/details/URN:NBN:SI:DOC-QH60TOX8"}}}}