Skip to content

Commit

Permalink
Merge pull request #13 from wbruno/master
Browse files Browse the repository at this point in the history
Cache on MongoDB
  • Loading branch information
rafaell-lycan committed Apr 1, 2015
2 parents 281d6fe + 189ef0c commit ec35c1a
Show file tree
Hide file tree
Showing 16 changed files with 434 additions and 127 deletions.
4 changes: 3 additions & 1 deletion .travis.yml
Original file line number Diff line number Diff line change
@@ -1,3 +1,5 @@
language: node_js
node_js:
- "0.10"
- "0.10"
services:
- mongodb
80 changes: 71 additions & 9 deletions README.md
Original file line number Diff line number Diff line change
Expand Up @@ -7,27 +7,89 @@ A simple scraping application to visualize data about water in São Paulo.
[![License](http://img.shields.io/:license-mit-blue.svg)](https://github.com/rafaell-lycan/sabesp-mananciais-api/blob/master/LICENSE)


#### A little bit about the technology envolve in this project:
## A little bit about the technology envolve in this project:
- Node.js 0.10.x
- Express 4.11.2
- Request 2.53.0
- Cheerio 0.18.0
- Promise 6.1.0
- MongoDB 2.6.7


#### Dev Dependencies:
## Dev Dependencies:
- Nodemon 1.3.6
- JSHint 2.6.0
- Mocha 2.1.0
- Chai 1.10.0
- Supertest 0.15.0
- Istanbul 0.3.5

#### Route Schema:
Description | Method | URL | Params
--- | --- | --- | ---
Get data of today | GET | `/` | NULL
Get data of a specific day | GET | `/:date` | YYYY-MM-DD

#### OBS:
## Tests
```
$ npm test # unit tests
$ npm run test-api # integration tests (with database)
```

## Route Schema:
Description | Method | URL | Params
---------------------------| ------ | ----------- | ---------
Get data of today | GET | `/` | NULL
Get data of a specific day | GET | `/:date` | YYYY-MM-DD
Get data of today | GET | `/v1` | NULL
Get data of a specific day | GET | `/v1/:date` | YYYY-MM-DD
Get data of today | GET | `/v2` | NULL
Get data of a specific day | GET | `/v2/:date` | YYYY-MM-DD

### v0
```
[
{
"name": "Cantareira",
"data": [
{"key": "volume armazenado", "value": "7,2 %"},
{"key": "pluviometria do dia", "value": "0,0 mm"},
{"key": "pluviometria acumulada no mês", "value": "0,0 mm"},
{"key": "média histórica do mês", "value": "271,1 mm"} ]
},
//...
]
```

### v1
```
{
"_id": "551b395e3bc651ca819d4752",
"date": "2015-03-31",
"dams": [
{
"name": "Cantareira",
"data": [
{"key": "volume armazenado", "value": "19,0 %"},
{"key": "pluviometria do dia", "value": "0,2 mm"},
{"key": "pluviometria acumulada no mês", "value": "206,5 mm"},
{"key": "média histórica do mês", "value": "178,0 mm"}
]
},
//..
]
```

### v2
@[wcastello](https://github.com/wcastello) suggestion:
```
[
{
"name": "Cantareira",
"data": {
"volume_armazenado": "19,0 %",
"pluviometria_do_dia": "0,2 mm",
"pluviometria_acumulada_no_mes": "206,5 mm",
"media_historica_do_mes": "178,0 mm"
}
},
//...
]
```

## OBS:
It isn't possible get data before January 1th, 2003.
8 changes: 8 additions & 0 deletions config/default.json
Original file line number Diff line number Diff line change
@@ -0,0 +1,8 @@
{
"mongo": {
"server": "localhost",
"port": "27017",
"database": "sabesp"
}
}

15 changes: 2 additions & 13 deletions index.js
Original file line number Diff line number Diff line change
@@ -1,24 +1,13 @@
(function () {
'use strict';
var express = require('express'),
app = express(),
Sabesp = require('./lib/Sabesp');
app = express();

// Heroku port settings
app.set('port', (process.env.PORT || 8080));
app.use(express.static(__dirname));

app.get('/', function (req, res) {
Sabesp.fetch().then(function(resolve, reject) {
res.json(resolve);
});
});

app.get('/:date', function (req, res) {
Sabesp.fetch(req.params.date).then(function(resolve, reject) {
res.json(resolve);
});
});
app.use('/', require('./routes'));

app.listen(app.get('port'), function () {
console.log('Magic happens on port: ' + app.get('port'));
Expand Down
25 changes: 25 additions & 0 deletions lib/APIVersions.js
Original file line number Diff line number Diff line change
@@ -0,0 +1,25 @@
var api = {
v0: function(resolve, res) {
res.json(resolve.dams || []);
},
v1: function(resolve, res) {
res.json(resolve);
},
v2: function(resolve, res) {
var ret = [];
resolve.dams.forEach(function(each) {
var data = {};

each.data.forEach(function(d) {
data[d.key.replace(/\s+/g, '_').replace('ê', 'e').replace('é', 'e').replace('ó', 'o')] = d.value
})
ret.push({ name: each.name, data: data });
});
res.json(ret);
}
}
api.reject = function(reject) {
console.log(reject);
}

module.exports = api;
30 changes: 22 additions & 8 deletions lib/Helper.js
Original file line number Diff line number Diff line change
Expand Up @@ -2,40 +2,45 @@
var cheerio = require('cheerio');

var Helper = {
json: [],
data: null,
$ : null,
dams : {}
};

Helper.getDamName = function (str) {
var name = str.split(/[./]+/)[1];
return Helper.dams[name];
return str.split(/[./]+/)[1];
};

Helper.buildJSON = function (cssSelector, key) {
Helper.data.find(cssSelector).each(function (i, elem) {
Helper.json[i].data.push({
Helper.json.dams[i].data.push({
key : key,
value : Helper.$(elem).next().text()
});
});
};

Helper.parserHTML = function (html) {
Helper.parserHTML = function (html, date) {
Helper.json = {
date: '',
dams: []
};
Helper.$ = cheerio.load(html);

Helper.$('#tabDados').filter(function () {
Helper.data = Helper.$(this);

Helper.json.date = date;

// Fetch each images on context
Helper.data.find('img').each(function (i, elem) {
Helper.json[i] = {
name : Helper.getDamName(elem.attribs.src),
Helper.json.dams[i] = {
name : Helper.dams[Helper.getDamName(elem.attribs.src)],
data : []
};
});


// Fetch each td with content "volume armazenado"
Helper.buildJSON('td:contains(volume armazenado)', 'volume armazenado');

Expand All @@ -52,6 +57,15 @@
return Helper.json;
};

Helper.today = function() {
var date = new Date(),
year = date.getFullYear(),
month = date.getMonth() + 1,
day = date.getDate();

return year + '-' + (month <= 9 ? '0' + month : month) + '-' + (day <= 1 ? '0' + day : day);
};

Helper.buildData = function(date, token) {
date = date.split('-');

Expand All @@ -67,4 +81,4 @@
};

module.exports = Helper;
})();
})();
39 changes: 39 additions & 0 deletions lib/Mongo.js
Original file line number Diff line number Diff line change
@@ -0,0 +1,39 @@
var MongoClient = require('mongodb').MongoClient,
config = require('config');

/* istanbul ignore next */
var Mongo = (function() {
'use strict';

var module = {
_init: function(callback) {
var url = 'mongodb://' + config.get('mongo.server') + ':' + config.get('mongo.port') + '/' + config.get('mongo.database');

MongoClient.connect(url, callback);
},

findOne: function(collection, query, callback) {
module._init(function(err, db) {
if (db) {
db.collection(collection).findOne(query, callback);
} else {
callback(err);
}
});
},

insert: function(collection, data, callback) {
module._init(function(err, db) {
if (db) {
db.collection(collection).insert(data, callback);
} else {
callback(err);
}
});
}
};

return module;
}());

module.exports = Mongo;
61 changes: 21 additions & 40 deletions lib/Sabesp.js
Original file line number Diff line number Diff line change
Expand Up @@ -5,9 +5,6 @@
Promise = require('promise'),
Helper = require('./Helper');

// Our Sabesp url
var token;

Helper.dams = {
'sistemaCantareira': 'Cantareira',
'sistemaAltoTiete': 'Alto Tietê',
Expand All @@ -20,42 +17,30 @@
var url = 'http://www2.sabesp.com.br/mananciais/DivulgacaoSiteSabesp.aspx';

var Sabesp = {};
Sabesp.fetch = function(date) {
if (date) {
return new Promise(function(resolve, reject) {
var data = Helper.buildData(date, token);

request({
'url': url,
'method': 'POST',
'headers': {
'Accept': 'text/html,application/xhtml+xml,application/xml;q=0.9,image/webp,*-/*;q=0.8',
'Host': 'www2.sabesp.com.br',
'Origin': 'http://www2.sabesp.com.br',
'Referer': url,
'User-Agent': 'Mozilla/5.0 (X11; Linux i686) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/30.0.1599.114 Safari/537.36',
},
'jar': true,
'form': data,
}, function(error, response, html){
resolve(Helper.parserHTML(html));
}
);
Sabesp.fetch = function(date, token) {
return new Promise(function(resolve, reject) {
var data = Helper.buildData(date, token);

request({
'url': url,
'method': 'POST',
'headers': {
'Accept': 'text/html,application/xhtml+xml,application/xml;q=0.9,image/webp,*-/*;q=0.8',
'Host': 'www2.sabesp.com.br',
'Origin': 'http://www2.sabesp.com.br',
'Referer': url,
'User-Agent': 'Mozilla/5.0 (X11; Linux i686) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/30.0.1599.114 Safari/537.36',
},
'jar': true,
'form': data,
}, function(error, response, html){
resolve(Helper.parserHTML(html, date));
}
);

});
} else {
return new Promise(Sabesp.today);
}
};
Sabesp.today = function(resolve, reject) {
request(url, function (error, response, html) {
if (error) {
reject(error);
} else {
resolve(Helper.parserHTML(html));
}
});
};

Sabesp.getToken = function() {
return new Promise(function(resolve, reject) {
request(url, function (error, response, html) {
Expand All @@ -73,9 +58,5 @@
});
};

Sabesp.getToken().then(function(resolve, reject) {
token = resolve;
});

module.exports = Sabesp;
})();
6 changes: 4 additions & 2 deletions package.json
Original file line number Diff line number Diff line change
Expand Up @@ -13,19 +13,21 @@
"scripts": {
"start": "node index.js",
"pre-test": "./node_modules/jshint/bin/jshint *.js lib/*.js",
"test": "node_modules/istanbul/lib/cli.js cover node_modules/.bin/_mocha tests/*"
"test": "node_modules/istanbul/lib/cli.js cover node_modules/.bin/_mocha tests/unit/*",
"test-api": "node_modules/istanbul/lib/cli.js cover node_modules/.bin/_mocha tests/api/*"
},
"engines": {
"node": "0.10.x"
},
"dependencies": {
"cheerio": "^0.18.0",
"config": "^1.12.0",
"express": "^4.11.2",
"mongodb": "^1.4.34",
"promise": "^6.1.0",
"request": "^2.53.0"
},
"devDependencies": {
"chai": "^1.10.0",
"istanbul": "^0.3.5",
"jshint": "^2.6.0",
"mocha": "^2.1.0",
Expand Down
Loading

0 comments on commit ec35c1a

Please sign in to comment.