Unverified Commit 028b1b39 authored by jlowryduda's avatar jlowryduda Committed by GitHub
Browse files

Merge pull request #230 from commonsense/jsim

update Sakaizawa's unnamed data set ("tmu-rw") to jSIM
parents 36f09465 f7e41b07
{
"class": "dataset",
"task": "similarity",
"language": ["Japanese"],
"name": "jSIM",
"description": "Japanese Similarity dataset (subsets for tokenized Japanese corpora)",
"domain": "general",
"date": "2018",
"source": "based on Japanese Similarity Dataset (https://github.com/tmu-nlp/JapaneseWordSimilarityDataset)",
"project_page": "http://vecto.space/projects/jSIM",
"version": "2.0",
"size": "1997-4429 word pairs",
"cite": [{
"contribution": "the original dataset",
"bibtex": {
"title": "Construction of a Japanese Word Similarity Dataset",
"author": [
{"name":"Yuya Sakaizawa"},
{"name":"Mamoru Komachi"}
],
"url": "http://www.lrec-conf.org/proceedings/lrec2018/pdf/96.pdf",
"booktitle": "Proceedings of the Eleventh International Conference on Language Resources and Evaluation (LREC 2018)",
"address": "Miyazaki, Japan",
"publisher": "European Language Resources Association (ELRA)",
"year": 2018,
"pages": "948-951",
"type": "inproceedings",
"id":"SakaizawaKomachi2018"
}
},
{"contribution": "subsets for tokenized corpora",
"bibtex": {
"title": "Subcharacter Information in Japanese embeddings: when is it worth it?",
"author":[
{"name":"Marzena Karpinska"},
{"name":"Bofang Li"},
{"name":"Anna Rogers"},
{"name":"Aleksandr Drozd"}
],
"year": 2018,
"type": "inproceedings",
"booktitle":"Proceedings of the Workshop on the Relevance of Linguistic Structure in Neural Architectures for NLP",
"url": "http://aclweb.org/anthology/W18-2905",
"pages": "28-37",
"address": "Melbourne, Australia",
"publisher": "Association for Computational Linguistics",
"id":"KarpinskaLiEtAl_2018"
}
}]
}
word1,word2,mean,ano1,ano2,ano3,ano4,ano5,ano6,ano7,ano8,ano9,ano10,tag,mecab,form1,form2,ambiguity
いい匂いのする,芳ばしい,7,8,7,6,6,8,5,5,7,7,6,verb_misc,no,dict,dict_adj,n/a
イライラした,うざったい,5.8,5,8,5,5,6,6,2,8,6,6,verb_misc,no,conj_verb,dict_adj,n/a
じめじめした,うざったい,5.6,5,7,8,4,4,7,1,2,5,1,verb_misc,no,conj,dict_adj,n/a
すさまじい,強すぎる,6.2,6,7,8,7,3,7,5,6,5,3,adjective_misc,no,dict,conj_adj_verb,n/a
とても暑く,厳しい暑さ,8.2,10,8,6,9,8,9,8,9,7,9,adverb_misc_phrase,no,conj_adj,conj_noun,n/a
ほとんど,多くが,8.2,8,7,8,9,9,8,8,8,5,7,adverb_misc,no,dict,conj_adj,n/a
ほとんどが,ただただ,1.6,2,1,0,5,0,3,7,0,4,3,adverb_misc,no,conj_adj,dict,n/a
ほとんどが,ひたすら,1,0,5,0,0,0,3,3,0,0,1,adverb_misc,no,conj_adj,dict,n/a
むかむかした,忌ま忌ましい,4.8,5,2,4,7,6,8,2,8,8,7,verb_misc,no,conj_verb,dict_adj,n/a
ものすごい,うるさく,6,6,9,7,6,2,6,3,5,5,2,adjective_misc,yes,dict_adj,conj_adj,yes
主に,ほとんどが,7.4,8,5,8,7,9,7,9,6,8,8,adverb_misc,yes,dict,conj_adj,yes
久しい,長くなる,5.6,5,7,6,4,6,6,6,3,9,7,adjective_misc,no,dict,dict,n/a
凄まじい,強すぎる,6.2,8,6,6,5,6,7,7,3,9,6,adjective_misc,no,dict,conj_adj_verb,n/a
厳しく,厳しくて,8,9,8,9,5,9,6,9,9,10,7,adverb_misc,no,conj_adj,conj_adj,n/a
取り除き,廃止した,4.8,7,3,5,6,3,8,5,4,3,6,noun_verb_misc,no,conj_verb,conj_verb,n/a
喧嘩をする,もめる,8.4,8,8,9,9,8,8,8,7,5,8,verb_misc,no,dict,dict,n/a
喧嘩をする,争う,9,8,9,10,9,9,9,8,6,7,8,verb_misc,no,dict,dict,n/a
困る,居辛い,6.2,6,8,8,6,3,3,5,6,7,5,verb_misc,no,dict,dict_adj,n/a
多くが,総じて,6.8,7,6,7,8,6,6,8,8,8,7,adverb_misc,no,conj_adj,dict,n/a
多くが,一般的に,6.6,6,6,3,9,9,7,6,9,6,6,adverb_misc,no,conj_adj,dict,n/a
多くが,全体的に,7.8,7,7,8,9,8,4,8,7,8,8,adverb_misc,no,conj_adj,dict,n/a
多くが,全般的に,7.8,10,8,8,8,5,8,6,8,6,5,adverb_misc,no,conj_adj,dict,n/a
専ら,ほとんどが,4,7,5,1,7,0,7,8,5,6,7,adverb_misc,yes,dict,conj_adj,yes
強い,強すぎる,6,6,7,8,3,6,7,6,6,8,4,adjective_misc,no,dict,conj_adj_verb,n/a
待ちきれない,早く見たい,6.4,6,4,7,7,8,6,7,6,1,3,adjective_misc,no,dict,conj_verb,n/a
待ちきれない,早く来て欲しい,6.2,6,4,7,6,8,6,7,9,8,3,adjective_misc,no,dict,conj_verb,n/a
待ち遠しい,早く見たい,7.2,8,9,7,6,6,6,2,5,7,3,adjective_misc,no,dict,conj_verb,n/a
待ち遠しい,早くしてほしい,7.2,8,9,9,6,4,6,2,7,9,5,adjective_misc,no,dict,conj_verb,n/a
待ち遠しい,早く来てほしい,8,8,9,9,6,8,6,2,8,7,3,adjective_misc,no,dict,conj_verb,n/a
待ち遠しい,早く来て欲しい,7.8,8,9,9,6,7,6,2,8,7,3,adjective_misc,no,dict,conj_verb,n/a
待ち遠しく,早く来てほしく,7.4,8,9,8,6,6,6,2,8,7,5,adverb_misc,no,conj_adj,conj_verb,n/a
恋しい,寂しくなる,3.8,1,5,5,2,6,6,5,7,7,5,adjective_misc,no,dict,dict,n/a
恋しく,早く来てほしく,4.6,6,4,2,5,6,4,6,8,1,3,adverb_misc,no,conj_adj,conj_adj,n/a
楽しそう,おもしろおかしく,5.8,7,7,6,4,5,8,5,7,7,7,adjective_misc,no,conj,conj_adj,n/a
楽しみな,早くしてほしい,6.8,6,9,10,6,3,6,7,7,1,3,adjective_misc,no,dict,conj_verb,n/a
概して,多くが,4.8,6,2,1,8,7,9,5,8,7,8,adverb_phrase,no,dict,conj_adj,n/a
気まずくない,気詰まりしない,6.6,8,0,10,8,7,7,8,8,9,7,adjective_misc,no,conj_adj,conj_verb,n/a
美しく見える,美しい,7.6,9,6,6,9,8,6,8,10,8,5,verb_misc,no,dict,dict_adj,n/a
美味しそう,香りよく,3.8,2,6,3,6,2,4,2,7,7,5,adjective_misc,no,conj,conj_adj,n/a
腹が立つ,苛々しい,7.8,8,7,9,7,8,8,6,8,7,7,verb_phrase_misc,no,dict,dict_adj,n/a
苛々しい,怒りたい,5.8,7,4,8,5,5,9,6,8,8,5,adjective_misc,yes,dict_adj,conj_verb,yes
苛立たしかった,むかむかした,6.4,7,5,7,7,6,7,6,9,10,8,adjective_misc,no,conj_adj,conj_verb,n/a
見るからに,どう見ても,8.6,9,9,10,6,9,9,1,8,6,10,verb_phrase_full,no,dict,conj,n/a
間を置かず,ささっと,7.4,8,8,10,6,5,9,6,9,7,6,verb_phrase_misc,no,conj,dict_adv,n/a
香ばしい,よい香り,8.4,8,8,9,8,9,7,8,9,9,7,adjective_misc,no,dict,dict_noun,n/a
,mara,mara-Lenovo-YOGA-900S-12ISK,27.05.2018 00:24,file:///home/mara/.config/libreoffice/4;
\ No newline at end of file
,mara,mara-Lenovo-YOGA-900S-12ISK,26.05.2018 20:50,file:///home/mara/.config/libreoffice/4;
\ No newline at end of file
,mara,mara-Lenovo-YOGA-900S-12ISK,26.05.2018 20:51,file:///home/mara/.config/libreoffice/4;
\ No newline at end of file
This diff is collapsed.
This diff is collapsed.
This diff is collapsed.
Supports Markdown
0% or .
You are about to add 0 people to the discussion. Proceed with caution.
Finish editing this message first!
Please register or to comment