{"?xml":{"@version":"1.0"},"edm:RDF":{"@xmlns:dc":"http://purl.org/dc/elements/1.1/","@xmlns:edm":"http://www.europeana.eu/schemas/edm/","@xmlns:wgs84_pos":"http://www.w3.org/2003/01/geo/wgs84_pos","@xmlns:foaf":"http://xmlns.com/foaf/0.1/","@xmlns:rdaGr2":"http://rdvocab.info/ElementsGr2","@xmlns:oai":"http://www.openarchives.org/OAI/2.0/","@xmlns:owl":"http://www.w3.org/2002/07/owl#","@xmlns:rdf":"http://www.w3.org/1999/02/22-rdf-syntax-ns#","@xmlns:ore":"http://www.openarchives.org/ore/terms/","@xmlns:skos":"http://www.w3.org/2004/02/skos/core#","@xmlns:dcterms":"http://purl.org/dc/terms/","edm:WebResource":[{"@rdf:about":"http://www.dlib.si/stream/URN:NBN:SI:doc-HQISSFTG/610da3e6-ac01-4e67-8859-111e2c9263ea/HTML","dcterms:extent":"37 KB"},{"@rdf:about":"http://www.dlib.si/stream/URN:NBN:SI:doc-HQISSFTG/16238a20-46da-4825-a375-e951110c7f5e/PDF","dcterms:extent":"366 KB"},{"@rdf:about":"http://www.dlib.si/stream/URN:NBN:SI:doc-HQISSFTG/ae0fc1d2-0bf9-49e0-a53c-811119ee691b/TEXT","dcterms:extent":"32 KB"}],"edm:TimeSpan":{"@rdf:about":"1955-2025","edm:begin":{"@xml:lang":"en","#text":"1955"},"edm:end":{"@xml:lang":"en","#text":"2025"}},"edm:ProvidedCHO":{"@rdf:about":"URN:NBN:SI:doc-HQISSFTG","dcterms:isPartOf":[{"@rdf:resource":"https://www.dlib.si/details/URN:NBN:SI:spr-2SDQL3ET"},{"@xml:lang":"sl","#text":"Jezik in slovstvo"}],"dcterms:issued":"2009","dc:creator":"Arhar Holdt, Špela","dc:format":[{"@xml:lang":"sl","#text":"številka:3/4"},{"@xml:lang":"sl","#text":"letnik:54"},{"@xml:lang":"sl","#text":"str. 43-56"}],"dc:identifier":["ISSN:0021-6933","URN:URN:NBN:SI:doc-HQISSFTG"],"dc:language":"sl","dc:publisher":{"@xml:lang":"sl","#text":"Slavistično društvo Slovenije"},"dc:subject":[{"@xml:lang":"sl","#text":"korpusna lingvisitka"},{"@xml:lang":"sl","#text":"lastna imena"},{"@xml:lang":"sl","#text":"leksikonske enote"},{"@xml:lang":"en","#text":"Slovene"},{"@xml:lang":"sl","#text":"slovenščina"},{"@xml:lang":"sl","#text":"učni korpus"},{"@rdf:resource":"http://www.wikidata.org/entity/Q9063"}],"dcterms:temporal":{"@rdf:resource":"1955-2025"},"dc:title":{"@xml:lang":"sl","#text":"Učni korpus SSJ in leksikon besednih oblik za slovenščino|"},"dc:description":[{"@xml:lang":"sl","#text":"The main purpose of the article is the presentation of the preparation of the training corpus and the lexicon of word forms for Slovene. With a scope of 400,000 words, the existing corpus presupposes four-level manually checked annotation: lemmatisation, morphosyntactic and syntactic annotation, and named entity recognition. Together with the JOS100k corpus, the SSJ corpus forms a training corpus of half a million entries for training statistical models, such as for the purposes of morphosyntactic tagging and parsing of Slovene texts. The lexicon will provide approximately 100,000 units, containing morphological paradigms of individual words with a selection of data, in line with the JOS system of morphosyntactic annotation. The inclusion of information about the (derivational) connectedness of lexical units is foreseen, and in the case of observed form variation in language use we will also include information about the frequency of forms and the current definition in normative sources. The inclusion of multiword units is foreseen on the level of multiword proper nouns and forms for which there are variants that are written together and apart"},{"@xml:lang":"sl","#text":"Glavni namen prispevka je predstavitev priprave učnega korpusa ter leksikona besednih oblik za slovenščino. 400.000 besed obsegajoči korpus SSJ predvideva štirinivojsko označenost: lematizacijo, označenost na oblikoskladenjski ter skladenjski ravni ter označenost lastnih imen. Vse oznake bodo ročno pregledane. Skupaj s korpusom JOS100k tvori korpus SSJ polmilijonski učni korpus za učenje statističnih modelov za npr. oblikoskladenjsko označevanje ter skladenjsko razčlenjevanje slovenščine. Leksikon besednih oblik bo prinašal okvirno 100.000 leksikonskih enot, vsebujočih oblikoslovne paradigme posameznih besed z naborom informacij, prekrivnih s sistemom oblikoskladenjskega označevanja JOS. Predvidena je vključitev informacij o (besedotvorni) povezanosti leksikonskih enot, v primeru v jezikovni rabi izpričane oblikovne variantnosti pa bodo vključeni tudi podatki o pogostnosti oblik ter njihovi trenutni opredeljenosti v normativnih virih. Vključitev večbesednih enot je predvidena na ravni večbesednih lastnih imen ter oblik, ki se variantno pišejo skupaj oz. narazen"}],"edm:type":"TEXT","dc:type":[{"@xml:lang":"sl","#text":"znanstveno časopisje"},{"@xml:lang":"en","#text":"journals"},{"@rdf:resource":"http://www.wikidata.org/entity/Q361785"}]},"ore:Aggregation":{"@rdf:about":"http://www.dlib.si/?URN=URN:NBN:SI:doc-HQISSFTG","edm:aggregatedCHO":{"@rdf:resource":"URN:NBN:SI:doc-HQISSFTG"},"edm:isShownBy":{"@rdf:resource":"http://www.dlib.si/stream/URN:NBN:SI:doc-HQISSFTG/16238a20-46da-4825-a375-e951110c7f5e/PDF"},"edm:rights":{"@rdf:resource":"http://rightsstatements.org/vocab/InC/1.0/"},"edm:provider":"Slovenian National E-content Aggregator","edm:intermediateProvider":{"@xml:lang":"en","#text":"National and University Library of Slovenia"},"edm:dataProvider":{"@xml:lang":"sl","#text":"Slavistično društvo Slovenije"},"edm:object":{"@rdf:resource":"http://www.dlib.si/streamdb/URN:NBN:SI:doc-HQISSFTG/maxi/edm"},"edm:isShownAt":{"@rdf:resource":"http://www.dlib.si/details/URN:NBN:SI:doc-HQISSFTG"}}}}