From 9a35ac52a332abccbaf9b8919c2913a5bb825891 Mon Sep 17 00:00:00 2001 From: Alinson Xavier Date: Wed, 1 Oct 2014 18:47:42 -0400 Subject: [PATCH] Support Web of Science / Web of Knowledge. --- src/js/config.js | 3 + src/js/crawler.js | 6 +- src/js/parsers/generic_parser.js | 15 ++++ .../parsers/{scopus.js => scopus_parser.js} | 0 src/js/parsers/web_of_science_parser.js | 72 +++++++++++++++++++ src/js/views/show_map.js | 2 +- src/templates/index.html | 4 +- 7 files changed, 97 insertions(+), 5 deletions(-) create mode 100644 src/js/parsers/generic_parser.js rename src/js/parsers/{scopus.js => scopus_parser.js} (100%) create mode 100644 src/js/parsers/web_of_science_parser.js diff --git a/src/js/config.js b/src/js/config.js index 014bc16..979ddcf 100644 --- a/src/js/config.js +++ b/src/js/config.js @@ -1,3 +1,6 @@ +var scholarius_options = { +}; + var visjs_options = { stabilize: false, nodes: { diff --git a/src/js/crawler.js b/src/js/crawler.js index b8f9454..cfa3661 100644 --- a/src/js/crawler.js +++ b/src/js/crawler.js @@ -34,12 +34,12 @@ ScholarCrawler.prototype.add_citations = function(parent_node, levels) var crawler = this; if(parent_node.is_dummy) - return this._add_citations_from_scopus(parent_node, levels); + return this._add_citations_from_parser(parent_node, levels); articles_db.findOne({_id: parent_node._id}, function(err, parent_article_db) { if(parent_article_db === null || !parent_article_db.is_cached) - crawler._add_citations_from_scopus(parent_node, levels); + crawler._add_citations_from_parser(parent_node, levels); else crawler._add_citations_from_db(crawler.article_to_node(parent_node.article), levels); }); @@ -65,7 +65,7 @@ ScholarCrawler.prototype._add_citations_from_db = function(parent_node, levels) }); } -ScholarCrawler.prototype._add_citations_from_scopus = function(parent_node, levels) +ScholarCrawler.prototype._add_citations_from_parser = function(parent_node, levels) { assert(levels >= 0, "levels should be non-negative"); diff --git a/src/js/parsers/generic_parser.js b/src/js/parsers/generic_parser.js new file mode 100644 index 0000000..cd8d9e7 --- /dev/null +++ b/src/js/parsers/generic_parser.js @@ -0,0 +1,15 @@ +function GenericParser() +{ + this.scopus_parser = new ScopusParser(); + this.web_of_science_parser = new WebOfScienceParser(); +} + + +GenericParser.prototype.parse = function(url, callback) +{ + if(url.indexOf("scopus.com") >= 0) + this.scopus_parser.parse(url, callback); + + if(url.indexOf("webofknowledge.com") >= 0) + this.web_of_science_parser.parse(url, callback); +} diff --git a/src/js/parsers/scopus.js b/src/js/parsers/scopus_parser.js similarity index 100% rename from src/js/parsers/scopus.js rename to src/js/parsers/scopus_parser.js diff --git a/src/js/parsers/web_of_science_parser.js b/src/js/parsers/web_of_science_parser.js new file mode 100644 index 0000000..1ed57a3 --- /dev/null +++ b/src/js/parsers/web_of_science_parser.js @@ -0,0 +1,72 @@ +function WebOfScienceParser() +{ +}; + +WebOfScienceParser.prototype._parse_results_page = function(url, callback) +{ + console.log("parsing"); + var iframe = document.createElement("iframe"); + $(iframe).hide(); + document.body.appendChild(iframe); + iframe.src = url; + + $(iframe).load(function() + { + var articles = []; + var ibody = $(iframe).contents()[0]; + + var select_pages = $(ibody.getElementsByName("pageSize").item(0)); + if(select_pages.val() != 50) { + select_pages.val(50); + select_pages.change(); + return; + } + + $(ibody).find(".search-results-content").each(function(index, li) + { + var article = {}; + + $(li).find("a[href^='/full_record']").each(function(index, tag) { + article.url = tag.href; + article.title = $.trim($(tag).text()); + }); + + $(li).next().find("a[href^='/CitingArticles']").each(function(index, tag) { + article.citations_url = tag.href; + article.n_citations = parseInt($(tag).text()); + }); + + $(li).find('span.label').each(function(index, tag) { + //if($(tag).text().indexOf("By:") == 0) + // article.year = parseInt($.trim($(tag).next().text())); + + if($(tag).text().indexOf("By:") == 0) { + article.authors = $.trim($(tag).parent().text()).replace(/^By: /,""); + var source_year = $.trim($(tag).parent().next().text()); + article.source = source_year.replace(/Published.*/, "").replace(/\s\s+/g, ", ").replace(/, $/, ""); + article.year = parseInt(source_year.substr(source_year.length-4)); + } + }); + + if(!('n_citations' in article)) { + article.citations_url = undefined; + article.n_citations = 0; + } + + article._id = $.md5(article.title + article.authors); + + articles.push(article); + + console.log(article); + }); + + callback(articles); + document.body.removeChild(iframe); + }); +} + +WebOfScienceParser.prototype.parse = function(url, callback) +{ + if(!url) return; + this._parse_results_page(url, callback); +} diff --git a/src/js/views/show_map.js b/src/js/views/show_map.js index 2a12e4b..e695add 100644 --- a/src/js/views/show_map.js +++ b/src/js/views/show_map.js @@ -14,7 +14,7 @@ ShowMapView.prototype.render = function(container) this.nodes.add(this.map.nodes); this.edges.add(this.map.edges); - var parser = new ScopusParser(); + var parser = new GenericParser(); var crawler = new ScholarCrawler(parser, this.nodes, this.edges); crawler.start(); diff --git a/src/templates/index.html b/src/templates/index.html index 485594c..e3ac6c0 100644 --- a/src/templates/index.html +++ b/src/templates/index.html @@ -10,7 +10,9 @@ - + + +