<?xml version="1.0"?><rdf:RDF xmlns:dc="http://purl.org/dc/elements/1.1/" xmlns:edm="http://www.europeana.eu/schemas/edm/" xmlns:wgs84_pos="http://www.w3.org/2003/01/geo/wgs84_pos" xmlns:foaf="http://xmlns.com/foaf/0.1/" xmlns:rdaGr2="http://rdvocab.info/ElementsGr2" xmlns:oai="http://www.openarchives.org/OAI/2.0/" xmlns:owl="http://www.w3.org/2002/07/owl#" xmlns:rdf="http://www.w3.org/1999/02/22-rdf-syntax-ns#" xmlns:ore="http://www.openarchives.org/ore/terms/" xmlns:skos="http://www.w3.org/2004/02/skos/core#" xmlns:dcterms="http://purl.org/dc/terms/"><edm:WebResource rdf:about="http://www.dlib.si/stream/URN:NBN:SI:doc-V6OH4GKA/0896596a-466b-449e-a3dd-8089148dbe43/PDF"><dcterms:extent>1795 KB</dcterms:extent></edm:WebResource><edm:WebResource rdf:about="http://www.dlib.si/stream/URN:NBN:SI:doc-V6OH4GKA/dde94fd4-08f4-4cb3-a929-b1c7c06499de/TEXT"><dcterms:extent>0 KB</dcterms:extent></edm:WebResource><edm:TimeSpan rdf:about="2014-2025"><edm:begin xml:lang="en">2014</edm:begin><edm:end xml:lang="en">2025</edm:end></edm:TimeSpan><edm:ProvidedCHO rdf:about="URN:NBN:SI:doc-V6OH4GKA"><dcterms:isPartOf rdf:resource="https://www.dlib.si/details/URN:NBN:SI:spr-QCV9XF2O" /><dcterms:issued>2025</dcterms:issued><dc:creator>Banjanović-Mehmedović, Lejla</dc:creator><dc:creator>Gurdić-Ribić, A.</dc:creator><dc:creator>Husaković, A.</dc:creator><dc:creator>Karabegović, Isak</dc:creator><dc:creator>Prljača, Naser</dc:creator><dc:format xml:lang="sl">številka:1</dc:format><dc:format xml:lang="sl">letnik:20</dc:format><dc:format xml:lang="sl">str. 5-17</dc:format><dc:identifier>DOI:10.14743/apem2025.1.523</dc:identifier><dc:identifier>ISSN:1854-6250</dc:identifier><dc:identifier>COBISSID_HOST:264960259</dc:identifier><dc:identifier>URN:URN:NBN:SI:doc-V6OH4GKA</dc:identifier><dc:language>en</dc:language><dc:publisher xml:lang="sl">Fakulteta za strojništvo, Inštitut za proizvodno strojništvo</dc:publisher><dcterms:isPartOf xml:lang="sl">Advances in production engineering and management</dcterms:isPartOf><dc:subject xml:lang="en">Conservative Q-learning</dc:subject><dc:subject xml:lang="en">deep reinforcement learning</dc:subject><dc:subject xml:lang="en">human-robot collaboration</dc:subject><dc:subject xml:lang="sl">interakcija človek-robot</dc:subject><dc:subject xml:lang="sl">manipulatorji</dc:subject><dc:subject xml:lang="en">robot learning</dc:subject><dc:subject xml:lang="en">robot manipulation tasks</dc:subject><dc:subject xml:lang="sl">robotika</dc:subject><dc:subject xml:lang="sl">robotsko učenje</dc:subject><dc:subject xml:lang="en">soft actor-critic algorithm</dc:subject><dc:subject xml:lang="sl">strojno učenje</dc:subject><dcterms:temporal rdf:resource="2014-2025" /><dc:title xml:lang="sl">Reinforcement learning for robot manipulation tasks in human-robot collaboration using the CQL/SAC algorithms|</dc:title><dc:description xml:lang="sl">The integration of human-robot collaboration (HRC) into industrial and service environments demands efficient and adaptive robotic systems capable of executing diverse tasks, including pick-and-place operations. This paper investigates the application of Soft Actor-Critic (SAC) and Conservative Q-Learning (CQL)—two deep reinforcement learning (DRL) algorithms—for the learning and optimization of pick-and-place actions within HRC scenarios. By leveraging SAC’s capability to balance exploration and exploitation, the robot autonomously learns to perform pick-and-place tasks while adapting to dynamic environments and human interactions. Moreover, the integration of CQL ensures more stable learning by mitigating Q-value overestimation, which proves particularly advantageous in offline and suboptimal data scenarios. The combined use of CQL and SAC enhances policy robustness, facilitating safer and more efficient decision-making in continually evolving environments. The proposed framework combines simulation-based training with transfer learning techniques, enabling seamless deployment in real-world environments. The critical challenge of trajectory completion is addressed through a meticulously designed reward function that promotes efficiency, precision, and safety. Experimental validation demonstrates a 100 % success rate in simulation and an 80 % success rate on real hardware, confirming the practical viability of the proposed model. This work underscores the pivotal role of DRL in enhancing the functionality of collaborative robotic systems, illustrating its applicability across a range of industrial environments</dc:description><edm:type>TEXT</edm:type><dc:type xml:lang="sl">znanstveno časopisje</dc:type><dc:type xml:lang="en">journals</dc:type><dc:type rdf:resource="http://www.wikidata.org/entity/Q361785" /></edm:ProvidedCHO><ore:Aggregation rdf:about="http://www.dlib.si/?URN=URN:NBN:SI:doc-V6OH4GKA"><edm:aggregatedCHO rdf:resource="URN:NBN:SI:doc-V6OH4GKA" /><edm:isShownBy rdf:resource="http://www.dlib.si/stream/URN:NBN:SI:doc-V6OH4GKA/0896596a-466b-449e-a3dd-8089148dbe43/PDF" /><edm:rights rdf:resource="http://creativecommons.org/licenses/by/4.0/" /><edm:provider>Slovenian National E-content Aggregator</edm:provider><edm:intermediateProvider xml:lang="en">National and University Library of Slovenia</edm:intermediateProvider><edm:dataProvider xml:lang="sl">Univerza v Mariboru, Fakulteta za strojništvo</edm:dataProvider><edm:object rdf:resource="http://www.dlib.si/streamdb/URN:NBN:SI:doc-V6OH4GKA/maxi/edm" /><edm:isShownAt rdf:resource="http://www.dlib.si/details/URN:NBN:SI:doc-V6OH4GKA" /></ore:Aggregation></rdf:RDF>