api.py 8.63 KB
Newer Older
jlowryduda's avatar
jlowryduda committed
1
"""
2
This file defines the ConceptNet web API responses.
jlowryduda's avatar
jlowryduda committed
3
"""
4

Rob Speer's avatar
Rob Speer committed
5
6
7
8
9
from conceptnet5.vectors.query import VectorSpaceWrapper
from conceptnet5.nodes import standardized_concept_uri, ld_node

VECTORS = VectorSpaceWrapper()
FINDER = VECTORS.finder
10
CONTEXT = ["http://api.conceptnet.io/ld/conceptnet5.6/context.ld.json"]
Rob Speer's avatar
Rob Speer committed
11
12
13
14
15
16
17
18
19
20
VALID_KEYS = ['rel', 'start', 'end', 'node', 'other', 'source', 'uri']


def success(response):
    response['@context'] = CONTEXT
    return response


def error(response, status, details):
    response['@context'] = CONTEXT
21
    response['error'] = {'status': status, 'details': details}
Rob Speer's avatar
Rob Speer committed
22
23
24
25
    return response


def make_query_url(url, items):
26
27
28
29
30
    """
    Take a URL base and a list of key/value pairs representing parameters,
    and convert them to a complete URL with those parameters in the query
    string.
    """
Rob Speer's avatar
Rob Speer committed
31
32
33
34
35
36
37
38
    str_items = ['{}={}'.format(*item) for item in items]
    if not str_items:
        return url
    else:
        return url + '?' + ('&'.join(str_items))


def groupkey_to_pairs(groupkey, term):
39
40
41
42
43
    """
    Convert a 'groupkey', a structure defined below in 'lookup_grouped_by_feature',
    to a list of pairs representing the parameters that query for just the
    edges in that feature group.
    """
Rob Speer's avatar
Rob Speer committed
44
45
46
47
48
49
50
51
52
53
    direction, rel = groupkey
    if direction == 1:
        return [('rel', rel), ('start', term)]
    elif direction == -1:
        return [('rel', rel), ('end', term)]
    else:
        return [('rel', rel), ('node', term)]


def paginated_url(url, params, offset, limit):
54
55
56
57
    """
    Take in a URL and set 'offset=' and 'limit=' parameters on its query string,
    replacing those parameters if they already existed.
    """
Rob Speer's avatar
Rob Speer committed
58
    new_params = [
59
        (key, val) for (key, val) in params if key != 'offset' and key != 'limit'
Rob Speer's avatar
Rob Speer committed
60
61
62
63
64
    ] + [('offset', offset), ('limit', limit)]
    return make_query_url(url, new_params)


def make_paginated_view(url, params, offset, limit, more):
65
66
67
68
69
70
71
72
73
    """
    Create a JSON-LD structure that describes the fact that this is just
    one page of results and more pages exist.

    This follows what used to be the recommendation at
    https://www.w3.org/community/hydra/wiki/Pagination. It now sort of resembles
    the "PartialCollectionView" proposal. This stuff is still not
    well-standardized.
    """
Rob Speer's avatar
Rob Speer committed
74
75
76
77
    prev_offset = max(0, offset - limit)
    next_offset = offset + limit
    pager = {
        '@id': paginated_url(url, params, offset, limit),
78
        '@type': 'PartialCollectionView',
Rob Speer's avatar
Rob Speer committed
79
        'firstPage': paginated_url(url, params, 0, limit),
80
        'paginatedProperty': 'edges',
Rob Speer's avatar
Rob Speer committed
81
82
83
84
85
    }
    if offset > 0:
        pager['previousPage'] = paginated_url(url, params, prev_offset, limit)
    if more:
        pager['nextPage'] = paginated_url(url, params, next_offset, limit)
86
87
88
        pager['comment'] = (
            "There are more results. Follow the 'nextPage' link for more."
        )
Rob Speer's avatar
Rob Speer committed
89
90
91
92
93
94
95
96
97
98
    return pager


def lookup_grouped_by_feature(term, filters=None, feature_limit=10):
    """
    Given a query for a concept, return assertions about that concept grouped by
    their features (for example, "A dog wants to ..." could be a group).
    """
    if not term.startswith('/c/'):
        return error(
99
            {}, 400, 'Only concept nodes (starting with /c/) can be grouped by feature.'
Rob Speer's avatar
Rob Speer committed
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
        )

    found = FINDER.lookup_grouped_by_feature(term, limit=(feature_limit + 1))
    grouped = []
    for groupkey, assertions in found.items():
        direction, rel = groupkey
        base_url = '/query'
        feature_pairs = groupkey_to_pairs(groupkey, term)
        url = make_query_url(base_url, feature_pairs)
        symmetric = direction == 0
        group = {
            '@id': url,
            'weight': sum(assertion['weight'] for assertion in assertions),
            'feature': dict(feature_pairs),
            'edges': assertions[:feature_limit],
115
            'symmetric': symmetric,
Rob Speer's avatar
Rob Speer committed
116
117
        }
        if len(assertions) > feature_limit:
118
119
120
            view = make_paginated_view(
                base_url, feature_pairs, 0, feature_limit, more=True
            )
Rob Speer's avatar
Rob Speer committed
121
122
123
124
125
126
127
128
129
            group['view'] = view

        grouped.append(group)

    grouped.sort(key=lambda g: -g['weight'])
    for group in grouped:
        del group['weight']

    response = ld_node(term)
130
    if not grouped and not filters:
131
132
133
        return error(
            response, 404, '%r is not a node in ConceptNet.' % response['label']
        )
Rob Speer's avatar
Rob Speer committed
134
135
136
137
138
139
    else:
        response['features'] = grouped
        return success(response)


def lookup_paginated(term, limit=50, offset=0):
140
141
142
143
    """
    Look up edges associated with a particular URI, and return a paginated,
    flat list of results.
    """
Rob Speer's avatar
Rob Speer committed
144
145
146
    # Query one more edge than asked for, so we know if there are more
    found = FINDER.lookup(term, limit=(limit + 1), offset=offset)
    edges = found[:limit]
147
    response = {'@id': term, 'edges': edges}
Rob Speer's avatar
Rob Speer committed
148
149
    more = len(found) > len(edges)
    if len(found) > len(edges) or offset != 0:
150
        response['view'] = make_paginated_view(term, (), offset, limit, more=more)
Rob Speer's avatar
Rob Speer committed
151
152
153
154
155
156
    if not found:
        return error(response, 404, '%r is not a node in ConceptNet.' % term)
    else:
        return success(response)


157
def lookup_single_assertion(uri):
158
159
160
161
162
    """
    Look up an edge with a particular URI (starting with /a/). This differs
    from `lookup_paginated` because there will be at most one matching edge.
    We return that edge if it exists, and if not, we return a 404 error.
    """
163
    found = FINDER.lookup(uri, limit=1)
164
    response = {'@id': uri}
165
166
167
168
169
170
171
    if not found:
        return error(response, 404, '%r is not an assertion in ConceptNet.' % uri)
    else:
        response.update(found[0])
        return success(response)


172
def query_relatedness(node1, node2):
jlowryduda's avatar
jlowryduda committed
173
    """
jlowryduda's avatar
jlowryduda committed
174
175
    Query for the similarity between node1 and node2. Return the cosine
    similarity between the vectors of these two terms.
jlowryduda's avatar
jlowryduda committed
176
    """
177
    if node1 is None or node2 is None:
jlowryduda's avatar
jlowryduda committed
178
        return error({}, 400, 'Arguments should be called node1 and node2.')
179

180
    url = make_query_url('/relatedness', [('node1', node1), ('node2', node2)])
jlowryduda's avatar
jlowryduda committed
181
    try:
182
        relatedness = VECTORS.get_similarity(node1, node2)
183
        response = {'@id': url, 'value': round(float(relatedness), 3)}
jlowryduda's avatar
jlowryduda committed
184
185
186
        return success(response)
    except ValueError:
        return error(
187
188
189
            {'@id': url},
            400,
            "Couldn't look up {} or {} (or both).".format(repr(node1), repr(node2)),
jlowryduda's avatar
jlowryduda committed
190
191
192
        )


193
# TODO: document querying for a list of terms
Rob Speer's avatar
Rob Speer committed
194
def query_related(uri, filter=None, limit=20):
195
196
197
198
    """
    Query for terms that are related to a term, or list of terms, according
    to the mini version of ConceptNet Numberbatch.
    """
Rob Speer's avatar
Rob Speer committed
199
200
201
202
203
204
205
206
207
208
209
210
211
212
213
214
    if uri.startswith('/c/'):
        query = uri
    elif uri.startswith('/list/') and uri.count('/') >= 3:
        try:
            _, _list, language, termlist = uri.split('/', 3)
            query = []
            term_pieces = termlist.split(',')
            for piece in term_pieces:
                if '@' in piece:
                    term, weight = piece.split('@')
                    weight = float(weight)
                else:
                    term = piece
                    weight = 1.
                query.append(('/c/{}/{}'.format(language, term), weight))
        except ValueError:
215
            return error({'@id': uri}, 400, "Couldn't parse this term list: %r" % uri)
Rob Speer's avatar
Rob Speer committed
216
217
    else:
        return error(
218
219
220
            {'@id': uri},
            404,
            '%r is not something that I can find related terms to.' % uri,
Rob Speer's avatar
Rob Speer committed
221
222
223
224
225
226
227
        )

    found = VECTORS.similar_terms(query, filter=filter, limit=limit)
    related = [
        {'@id': key, 'weight': round(float(weight), 3)}
        for (key, weight) in found.items()
    ]
228
    response = {'@id': uri, 'related': related}
Rob Speer's avatar
Rob Speer committed
229
230
231
232
    return response


def query_paginated(query, offset=0, limit=50):
233
234
235
236
237
238
    """
    Search ConceptNet for edges matching a query.

    The query should be provided as a dictionary of criteria. The `query`
    function in the `.api` module constructs such a dictionary.
    """
Rob Speer's avatar
Rob Speer committed
239
240
    found = FINDER.query(query, limit=limit + 1, offset=offset)
    edges = found[:limit]
241
    response = {'@id': make_query_url('/query', query.items()), 'edges': edges}
Rob Speer's avatar
Rob Speer committed
242
243
244
245
246
247
248
249
250
251
    more = len(found) > len(edges)
    if len(found) > len(edges) or offset != 0:
        response['view'] = make_paginated_view(
            '/query', sorted(query.items()), offset, limit, more=more
        )
    return success(response)


def standardize_uri(language, text):
    """
252
    Look up the URI for a given piece of text.
Rob Speer's avatar
Rob Speer committed
253
254
    """
    if text is None or language is None:
255
256
257
        return error(
            {}, 400, "You should include the 'text' and 'language' parameters."
        )
Rob Speer's avatar
Rob Speer committed
258
259
260

    text = text.replace('_', ' ')
    uri = standardized_concept_uri(language, text)
261
    response = {'@id': uri}
Rob Speer's avatar
Rob Speer committed
262
    return success(response)