Commit d70b7f2a authored by Jacky Lin's avatar Jacky Lin
Browse files

add top several links

parent 13d30431
......@@ -67,3 +67,46 @@ func reformatSearchKey(key string) string {
reformatKey = strings.Join(splitKey, "+")
return reformatKey
}
// topNIndex obtains the index of top n highest scores in a given list
func topNIndex(n int, list []float64) []int {
// Initialization
var l, curIndex, tmpIndex int
var topScores []float64
var topIndex []int
topScores = make([]float64, n)
topIndex = make([]int, n)
// if the length of list is short than the given top n
// Provide all the item index in list
l = len(list)
if l < n {
n = l
for i := 0; i < l; i++ {
topIndex[i] = i
}
} else {
// Else find the top n index with top n highest scores
var curScore, tmpScore float64
for i := 0; i < l; i++ {
curScore = list[i]
curIndex = i
for j := 0; j < n; j++ {
if topScores[j] == curScore {
break
}
if topScores[j] < curScore {
// Assign score and index to temp value for swap
tmpScore = topScores[j]
tmpIndex = topIndex[j]
// Assign the value to lists to save
topScores[j] = curScore
topIndex[j] = curIndex
// Finish swap
curScore = tmpScore
curIndex = tmpIndex
}
}
}
}
return topIndex
}
......@@ -5,15 +5,8 @@ import (
)
func (c *Client) FetchEverything(query string) (*Results, error) {
description, title, contents, link := Search(query)
a := Article{
Author: "WIKIPEDIA",
Title: title,
Description: description,
URL: link,
Content: contents,
}
aList := []Article{a}
aList := Search(query)
res := &Results{
TotalResults: 1,
Articles: aList,
......
......@@ -23,11 +23,12 @@ func wikiSearchLinkGenerator(searchKey string) string {
}
// wikiPageScrape will find the most relevant link in searching result
func wikiPageScrape(searchLink string, target string) string {
func wikiPageScrape(searchLink string, target string, numLink int) []string {
// Create list to store the scores
var scores []float64
// A list to store all the links
var allLink []string
// A list to save return links
var allLink, retLink []string
// Create a new collector object
collector := colly.NewCollector(
// Limited to visiting domain to wikipedia
......@@ -61,9 +62,15 @@ func wikiPageScrape(searchLink string, target string) string {
// Print the total number of link obtained
fmt.Printf("[INFO] Obtain total %d link\n", len(allLink))
// Calculate the index of link with the max score
maxScoreIndex := maxArg(scores)
// Another method to use: maxScoreIndex := maxArg(scores)
// Obtain the top n highest score
topIndex := topNIndex(numLink, scores)
retLink = make([]string, numLink)
for i, idx := range topIndex {
retLink[i] = allLink[idx]
}
// Return the link with the max score
return allLink[maxScoreIndex]
return retLink
}
// wikiIntroScrape scrape the introduction part of the wikipedia,
......@@ -108,13 +115,37 @@ func wikiIntroScrape(wikiLink string) (description string, title string, content
return paragraphs[0], pageTitle, content
}
func createWikiContents(links []string) []Article {
var articles []Article
var description, title, contents string
articles = make([]Article, len(links))
for i, link := range links {
description, title, contents = wikiIntroScrape(link)
articles[i] = Article{
Author: "WIKIPEDIA",
Title: title,
Description: description,
URL: link,
Content: contents,
}
}
return articles
}
// Search is the function used to search the keyword
func Search(key string) (description string, title string, contents string, wikiLink string) {
func Search(key string) []Article {
// Reformat search key with multiple key words
key = reformatSearchKey(key)
fmt.Println(key)
// Generate the link for searching
var wikiLink string
wikiLink = wikiSearchLinkGenerator(key)
wikiLink = wikiPageScrape(wikiLink, key)
fmt.Printf("[INFO] The most relevant link is: %s\n", wikiLink)
description, title, contents = wikiIntroScrape(wikiLink)
return description, title, contents, wikiLink
// Obtain the most related links
var topLinks []string
topLinks = wikiPageScrape(wikiLink, key, 5)
fmt.Printf("[INFO] The most relevant link is: %s\n", topLinks)
// Create a wiki preview articles and link
var articles []Article
articles = createWikiContents(topLinks)
return articles
}
Markdown is supported
0% or .
You are about to add 0 people to the discussion. Proceed with caution.
Finish editing this message first!
Please register or to comment