diff --git a/.gitignore b/.gitignore new file mode 100644 index 0000000..4cc1748 --- /dev/null +++ b/.gitignore @@ -0,0 +1,2 @@ +core-data_recipe.csv* +node_modules diff --git a/README.md b/README.md new file mode 100644 index 0000000..e185e6b --- /dev/null +++ b/README.md @@ -0,0 +1,7 @@ +# Reciplexity + +Use scraped recipe data to do fun things. + +## Getting started + +You'll need this dataset, via Kaggle: https://www.kaggle.com/elisaxxygao/foodrecsysv1/version/1?select=core-data_recipe.csv diff --git a/app.js b/app.js deleted file mode 100644 index 516a620..0000000 --- a/app.js +++ /dev/null @@ -1,45 +0,0 @@ - -/** - * Module dependencies. - */ - -var express = require('express') - , routes = require('./routes') - , http = require('http') - , path = require('path') - -var app = express(); - -app.configure(function(){ - app.set('port', process.env.PORT || 3000); - app.set('views', __dirname + '/views'); - app.set('view engine', 'jade'); - app.use(express.favicon()); - app.use(express.logger('dev')); - app.use(express.bodyParser()); - app.use(express.methodOverride()); - app.use(app.router); - app.use(express.static(path.join(__dirname, 'public'))); -}); - -app.configure('development', function(){ - app.use(express.errorHandler()); -}); - -// Connect to the db -var mongo = require('mongodb'); - -mongo.connect("mongodb://localhost:27017/allrecipes", function(err, db) { - if(!err) { - console.log("We are connected"); - } - http.createServer(app).listen(app.get('port'), function(){ - console.log("Express server listening on port " + app.get('port')); - }); - app.set('db', db); - app.get('/', routes.index); - app.post('/crawl', require('./routes/crawl').crawl); - app.post('/parse', require('./routes/parse').parse); - app.post('/generate', require('./routes/generate').generate); - app.post('/list', require('./routes/generate').list); -}); diff --git a/mongo.js b/mongo.js deleted file mode 100644 index e69de29..0000000 diff --git a/package.json b/package.json index 32bfb5f..6fb6728 100644 --- a/package.json +++ b/package.json @@ -1,16 +1,10 @@ { - "name": "application-name", + "name": "reciplexity", "version": "0.0.1", "private": true, "scripts": { "start": "node app" }, "dependencies": { - "express": "3.1.0", - "jade": "*", - "big-xml": "~0.6.0", - "mongodb": "~1.2.12", - "request": "~2.12.0", - "jsdom": "~0.5.0" } } diff --git a/public/stylesheets/style.css b/public/stylesheets/style.css deleted file mode 100644 index 30e047d..0000000 --- a/public/stylesheets/style.css +++ /dev/null @@ -1,8 +0,0 @@ -body { - padding: 50px; - font: 14px "Lucida Grande", Helvetica, Arial, sans-serif; -} - -a { - color: #00B7FF; -} \ No newline at end of file diff --git a/routes/.crawl.js.swp b/routes/.crawl.js.swp deleted file mode 100644 index 301ae1e..0000000 Binary files a/routes/.crawl.js.swp and /dev/null differ diff --git a/routes/crawl.js b/routes/crawl.js deleted file mode 100644 index 1ecc042..0000000 --- a/routes/crawl.js +++ /dev/null @@ -1,82 +0,0 @@ -// Retrieve ingredient lists from server -var http = require('http') - , jsdom = require('jsdom') - , db - , collection; - -exports.crawl = function(req){ - db = req.app.settings.db; - db.collection('urls', function(err, c) { - collection = c; - // Get ingredient lists - var cursor = collection.find({}); - - recurse(cursor, 0); - }); - - return function(){}; -} - -function recurse(cursor, index){ - cursor.nextObject(function(err, item){ - if (item && item.url) { - extract(item.url, success, failure) - - function success(arr){ - // save ingredients to DB - console.log(arr, index); - db.collection('ingredients', function(err, ingredients){ - ingredients.insert({ ingredients: arr, index: index }); - collection.remove({ url : item.url}); - }); - - // advance cursor and go again - recurse(cursor, index + 1); - }; - - function failure(){ - // wait a while and try again - console.log('timeout happened') - recurse(cursor, index + 1); - }; - - } - }); -} - -// Open a URL and extract the links. Optionally, run a callback. -function extract(url, success, failure){ - var results = []; - - try { - http.get(url, function(res){ - var pageData = ""; - res.setEncoding('utf8'); - res.on('data', function (chunk) { - pageData += chunk; - }); - - res.on('end', function(){ - jsdom.env({ - html: pageData, - scripts: ['http://code.jquery.com/jquery-1.6.min.js'] - }, function(err, window){ - var $ = window.jQuery; - var results = []; - $('.ingredient-name').each(function(i, d){ - results.push($(this).text()); - }); - success(results); - }); - }).on('error', function(err){ - console.log(err); - failure(); - }); - }).setTimeout(2000, function(){ - console.log('Timed out.'); - failure(); - }); - } catch(err){ - failure(); - } -} diff --git a/routes/generate.js b/routes/generate.js deleted file mode 100644 index 535cb25..0000000 --- a/routes/generate.js +++ /dev/null @@ -1,58 +0,0 @@ -// Generate probability tables -var db, collection; - -exports.generate = function(req, res){ - db = req.app.settings.db; - db.collection('ingredients', function(err, c) { - ingredients = c; - - var cursor = ingredients.find({}); - - db.collection('table', function(err, t){ - recurse(cursor, t, 0); - res.send('done'); - }); - - }); - - return function(){}; -}; - -exports.list = function(req, res){ - db = req.app.settings.db; - db.collection('table', function(err, collection) { - var result = collection.find({}).sort({'appearances' : -1}).limit(20).toArray(function(err, stuff){ - res.send(stuff); - }); - }); - - return function(){}; -}; - -// This might be better as a native mongo function, but oh well. -function recurse(cursor, table, index){ - cursor.nextObject(function(err, item){ - if (!item) return; - - item.ingredients.forEach(function(ingredient){ - var others = item.ingredients.slice(0); - others.splice(others.indexOf(ingredient), 1); - - table.findOne({ name : ingredient }, function(err, result){ - entry = result || { name : ingredient, appearances : 0, used_with : {} }; - entry.appearances++; - - others.forEach(function(other){ - if (!entry.used_with[other]) entry.used_with[other] = 1; - else entry.used_with[other]++; - }); - - table.update({ name : ingredient }, entry, { upsert : true }, function(){ - // console.log(ingredient); - recurse(cursor, table, index + 1); - }); - }); - }); - }); - -} diff --git a/routes/index.js b/routes/index.js deleted file mode 100644 index f296005..0000000 --- a/routes/index.js +++ /dev/null @@ -1,8 +0,0 @@ - -/* - * GET home page. - */ - -exports.index = function(req, res){ - res.render('index', { title: 'Express' }); -}; \ No newline at end of file diff --git a/routes/parse.js b/routes/parse.js deleted file mode 100644 index bc77cd5..0000000 --- a/routes/parse.js +++ /dev/null @@ -1,19 +0,0 @@ -// Parse giant XML file into mongo - -exports.parse = function(req){ - var db = req.app.settings.db; - db.collection('urls', function(err, collection) { - - var bigXml = require('big-xml'); - var reader = bigXml.createReader('recipedetail.xml', /^(url)$/, {}); - reader.on('record', function(record) { - for (var prop in record.children){ - var entry = record.children[prop]; - if (entry.text && entry.text.substr(0,4) == "http"){ - collection.insert({url : entry.text, visited : false }); - } - } - }); - }); - return function(){}; -} diff --git a/routes/user.js b/routes/user.js deleted file mode 100644 index d5b34aa..0000000 --- a/routes/user.js +++ /dev/null @@ -1,8 +0,0 @@ - -/* - * GET users listing. - */ - -exports.list = function(req, res){ - res.send("respond with a resource"); -}; \ No newline at end of file diff --git a/views/.index.jade.swp b/views/.index.jade.swp deleted file mode 100644 index d3c2da3..0000000 Binary files a/views/.index.jade.swp and /dev/null differ diff --git a/views/index.jade b/views/index.jade deleted file mode 100644 index e30d6e0..0000000 --- a/views/index.jade +++ /dev/null @@ -1,22 +0,0 @@ -extends layout - -block content - h1= title - p Welcome to #{title} - h2 Create a new crawl - form(name="crawl", action="crawl", method="post") - | URL: - input(type="text", value="http://allrecipes.com", name="url") - input(type="submit", value="Crawl") - form(name="parse", action="parse", method="post") - | URL: - input(type="submit", value="Parse") - form(name="generate", action="generate", method="post") - | URL: - input(type="submit", value="Generate Table") - form(name="list", action="list", method="post") - | URL: - input(type="submit", value="Show Table") - form(name="random", action="random", method="post") - | URL: - input(type="submit", value="Random recipe") diff --git a/views/layout.jade b/views/layout.jade deleted file mode 100644 index 1b7b305..0000000 --- a/views/layout.jade +++ /dev/null @@ -1,7 +0,0 @@ -doctype 5 -html - head - title= title - link(rel='stylesheet', href='/stylesheets/style.css') - body - block content \ No newline at end of file