Skip to content
GitLab
Menu
Projects
Groups
Snippets
Help
Help
Support
Community forum
Keyboard shortcuts
?
Submit feedback
Contribute to GitLab
Sign in
Toggle navigation
Menu
Open sidebar
Kizito Ononuju (Kiz')
conceptnet5
Commits
2ec9fbff
Unverified
Commit
2ec9fbff
authored
Jul 08, 2019
by
jlowryduda
Committed by
GitHub
Jul 08, 2019
Browse files
Merge pull request #266 from commonsense/fix-malformed-related-query
Fix malformed /related query
parents
8f04c723
1fbb9730
Changes
3
Hide whitespace changes
Inline
Side-by-side
Snakefile
View file @
2ec9fbff
...
...
@@ -74,6 +74,12 @@ PRECOMPUTED_DATA_PATH = "/precomputed-data/2016"
PRECOMPUTED_DATA_URL = "https://conceptnet.s3.amazonaws.com" + PRECOMPUTED_DATA_PATH
PRECOMPUTED_S3_UPLOAD = "s3://conceptnet" + PRECOMPUTED_DATA_PATH
# We need an external vocabulary to refer to when miniaturizing our set of
# embeddings. In the normal case, this is the word2vec Google News vocabulary.
# In the test build, we don't have that, we only have a cut-down version of GloVe,
# so we'll switch it to that instead.
MINI_VOCAB_SOURCE = "/vectors/w2v-google-news.h5"
INPUT_EMBEDDINGS = [
'crawl-300d-2M', 'w2v-google-news', 'glove12-840B', 'fasttext-opensubtitles'
]
...
...
@@ -96,6 +102,7 @@ if TESTMODE:
RAW_DATA_URL = "/missing/data"
PRECOMPUTED_DATA_URL = "/missing/data"
EMOJI_LANGUAGES = ['en', 'en_001']
MINI_VOCAB_SOURCE = "/vectors/glove12-840B.h5"
CORE_DATASET_NAMES = [
...
...
@@ -161,6 +168,7 @@ rule test:
DATA + "/assertions/assertions.csv",
DATA + "/psql/done",
DATA + "/assoc/reduced.csv",
DATA + "/vectors/mini.h5",
DATA + "/vectors/plain/numberbatch-en.txt.gz",
...
...
@@ -730,7 +738,7 @@ rule debias:
rule miniaturize:
input:
DATA + "/vectors/numberbatch-biased.h5",
DATA +
"/vectors/w2v-google-news.h5"
DATA +
MINI_VOCAB_SOURCE
output:
DATA + "/vectors/mini.h5"
resources:
...
...
conceptnet5/vectors/query.py
View file @
2ec9fbff
...
...
@@ -122,7 +122,13 @@ class VectorSpaceWrapper(object):
@
staticmethod
def
_englishify
(
term
):
"""
Change the language of a /c/ term to English. If the input isn't a term,
return None.
"""
splits
=
split_uri
(
term
)
if
not
term
.
startswith
(
'/c/'
):
return
None
if
len
(
splits
)
>
2
:
englishified
=
'/c/en/'
+
splits
[
2
]
return
englishified
...
...
@@ -188,7 +194,8 @@ class VectorSpaceWrapper(object):
prefix_weight
=
0.01
if
get_uri_language
(
term
)
!=
'en'
:
englishified
=
self
.
_englishify
(
term
)
expanded
.
append
((
englishified
,
prefix_weight
))
if
englishified
is
not
None
:
expanded
.
append
((
englishified
,
prefix_weight
))
prefix_matches
=
self
.
_match_prefix
(
term
,
prefix_weight
)
expanded
.
extend
(
prefix_matches
)
...
...
tests/small-build/test_api.py
0 → 100644
View file @
2ec9fbff
from
conceptnet5
import
api
from
nose.tools
import
eq_
def
test_related_query
():
# Test that we can look up related terms
result
=
api
.
query_related
(
'/c/en/test'
,
limit
=
3
)
eq_
(
len
(
result
[
'related'
]),
3
)
def
test_related_query_malformed
():
# Test that we fulfill a query for related terms to a nonsense URI, and
# there are simply no results
result
=
api
.
query_related
(
'/c/en,test'
,
limit
=
3
)
eq_
(
len
(
result
[
'related'
]),
0
)
Write
Preview
Markdown
is supported
0%
Try again
or
attach a new file
.
Attach a file
Cancel
You are about to add
0
people
to the discussion. Proceed with caution.
Finish editing this message first!
Cancel
Please
register
or
sign in
to comment