From aa9bec456f220a7b68741348b5d8f62ead316839 Mon Sep 17 00:00:00 2001 From: Alexander Ose Date: Tue, 28 Dec 2021 15:23:04 -0800 Subject: [PATCH] Nuke everything and start over! --- .gitignore | 2 + README.md | 7 +++ app.js | 45 ------------------- mongo.js | 0 package.json | 8 +--- public/stylesheets/style.css | 8 ---- routes/.crawl.js.swp | Bin 12288 -> 0 bytes routes/crawl.js | 82 ----------------------------------- routes/generate.js | 58 ------------------------- routes/index.js | 8 ---- routes/parse.js | 19 -------- routes/user.js | 8 ---- views/.index.jade.swp | Bin 12288 -> 0 bytes views/index.jade | 22 ---------- views/layout.jade | 7 --- 15 files changed, 10 insertions(+), 264 deletions(-) create mode 100644 .gitignore create mode 100644 README.md delete mode 100644 app.js delete mode 100644 mongo.js delete mode 100644 public/stylesheets/style.css delete mode 100644 routes/.crawl.js.swp delete mode 100644 routes/crawl.js delete mode 100644 routes/generate.js delete mode 100644 routes/index.js delete mode 100644 routes/parse.js delete mode 100644 routes/user.js delete mode 100644 views/.index.jade.swp delete mode 100644 views/index.jade delete mode 100644 views/layout.jade diff --git a/.gitignore b/.gitignore new file mode 100644 index 0000000..4cc1748 --- /dev/null +++ b/.gitignore @@ -0,0 +1,2 @@ +core-data_recipe.csv* +node_modules diff --git a/README.md b/README.md new file mode 100644 index 0000000..e185e6b --- /dev/null +++ b/README.md @@ -0,0 +1,7 @@ +# Reciplexity + +Use scraped recipe data to do fun things. + +## Getting started + +You'll need this dataset, via Kaggle: https://www.kaggle.com/elisaxxygao/foodrecsysv1/version/1?select=core-data_recipe.csv diff --git a/app.js b/app.js deleted file mode 100644 index 516a620..0000000 --- a/app.js +++ /dev/null @@ -1,45 +0,0 @@ - -/** - * Module dependencies. - */ - -var express = require('express') - , routes = require('./routes') - , http = require('http') - , path = require('path') - -var app = express(); - -app.configure(function(){ - app.set('port', process.env.PORT || 3000); - app.set('views', __dirname + '/views'); - app.set('view engine', 'jade'); - app.use(express.favicon()); - app.use(express.logger('dev')); - app.use(express.bodyParser()); - app.use(express.methodOverride()); - app.use(app.router); - app.use(express.static(path.join(__dirname, 'public'))); -}); - -app.configure('development', function(){ - app.use(express.errorHandler()); -}); - -// Connect to the db -var mongo = require('mongodb'); - -mongo.connect("mongodb://localhost:27017/allrecipes", function(err, db) { - if(!err) { - console.log("We are connected"); - } - http.createServer(app).listen(app.get('port'), function(){ - console.log("Express server listening on port " + app.get('port')); - }); - app.set('db', db); - app.get('/', routes.index); - app.post('/crawl', require('./routes/crawl').crawl); - app.post('/parse', require('./routes/parse').parse); - app.post('/generate', require('./routes/generate').generate); - app.post('/list', require('./routes/generate').list); -}); diff --git a/mongo.js b/mongo.js deleted file mode 100644 index e69de29..0000000 diff --git a/package.json b/package.json index 32bfb5f..6fb6728 100644 --- a/package.json +++ b/package.json @@ -1,16 +1,10 @@ { - "name": "application-name", + "name": "reciplexity", "version": "0.0.1", "private": true, "scripts": { "start": "node app" }, "dependencies": { - "express": "3.1.0", - "jade": "*", - "big-xml": "~0.6.0", - "mongodb": "~1.2.12", - "request": "~2.12.0", - "jsdom": "~0.5.0" } } diff --git a/public/stylesheets/style.css b/public/stylesheets/style.css deleted file mode 100644 index 30e047d..0000000 --- a/public/stylesheets/style.css +++ /dev/null @@ -1,8 +0,0 @@ -body { - padding: 50px; - font: 14px "Lucida Grande", Helvetica, Arial, sans-serif; -} - -a { - color: #00B7FF; -} \ No newline at end of file diff --git a/routes/.crawl.js.swp b/routes/.crawl.js.swp deleted file mode 100644 index 301ae1efcda1562f4d0e3cdd2cb5af5388baa677..0000000000000000000000000000000000000000 GIT binary patch literal 0 HcmV?d00001 literal 12288 zcmeI2O^6&t6vrz$Xg)RSQP8*RlI}6HHJb!9?y`hPM8t%c#pod{($ift)4M(0ld9_d zU>Ok)eq2<9kduOe2=O2axu__5)0-f;H_?L^4SMln1mnNDr+a#5*QoKNRKc%jrmO0` zdjI#T8g{!m_0(f0=|O&&q3vVrhf8Cp7w_M}UgC^(JugaDOvjB|J8Y^XzVR5-`)qEo z;YE=Ye%KSLA(K?&!WP$1Q<$s>E&;X?6|f3e1*`&A z0jq#jz$#!BunPQ76z~GZ-o1sfT{{pE{{L_O2C#lRV;_Mx!3FRlcmcG*_a2jC(&4-#+!JOmoxUa$}B0Y=ZA;C*lfm^p6&W8+hB2|NSl!PDRv zxCgA?%GlT7OYjLe59YvWa14xrJAiFg0jq#jz$)q;$d%Bh z2lw4WQ&UrQYNfHS!z6Y~FR)Z3A~TE27S2pp)uCDEaCOENY&y7`NT=eZ8ct??)YTUG z$;K6zP2)|uWSQi>RCU}5S9d~H<65j}w^m(}u`uB}JdE2?1fhuaWbAc?Q{%#e(4`Pz zor0_9;Qz2J3jOlOGpgtHn>PPu9>}fSkXPu7L zz3E27PXfUg&KZN;Pr8l#>*PWH0Plt|-raQOnOQ>FI=UN86ZO2dIPPh$&dMI51r;P+ zF5)G3ZFn2<<++N3YNoKX;h0SoQJAL#v*dTuc(EF7VKIRJib-;_ku|C~H>#SVd?ZGo z$c*dYg}Ecck~NDr;+OtrX?(o!WhCNlq1{wQ6@6hc(^bn=S|bz1=JXZWCX|Wv{DFzV z*#KMlR)m>GgHH8COdg$?J3(F?6f;P71Vv%Is5oJW*+Y22cC}72HKTp}XnOu4XRLBs z^aq;uix3bV-pNg+60W%>vQDTWu|fyvV6}LwTg}n=4yKOR!_q|H)DUCVu-auW)bPLD z2_uoM*T~?ty)e!Pm64a5x=8eJQGa7e@Z<$cUhE6X7oPcQCj(cxuaT3)PohX^*Hmwp;V|?Z2G?6C@!C)iPtzsc)V#gF7U8gd$Qk3R^ zHJTS^LS8zVm{rY(j)qx*1j<_y)ybq2I{xs8(8Uu6Hp)#^lwA>cy*T%VwP&n^Epp8e z+P}ZlDBnN1=sbp@Q_TgaIy}Jgh%gJ@%4~0$(rlZUShE^6k6k?SiYa&^pdzq}Lefza zQ;-7P`SuzV0ZxNba^6Bp+_hpl<4q0Xrxo|pgF_DN-vRG@$5x+3@T&8=sXwo%7V?N#Npq}KC-hix0{A|Dm&Q? lY&CP3n3Jd4XMQ_1GN;9y&@vPo$|4VxS~BSp%6Lgg_6Id)?Fs+@ diff --git a/routes/crawl.js b/routes/crawl.js deleted file mode 100644 index 1ecc042..0000000 --- a/routes/crawl.js +++ /dev/null @@ -1,82 +0,0 @@ -// Retrieve ingredient lists from server -var http = require('http') - , jsdom = require('jsdom') - , db - , collection; - -exports.crawl = function(req){ - db = req.app.settings.db; - db.collection('urls', function(err, c) { - collection = c; - // Get ingredient lists - var cursor = collection.find({}); - - recurse(cursor, 0); - }); - - return function(){}; -} - -function recurse(cursor, index){ - cursor.nextObject(function(err, item){ - if (item && item.url) { - extract(item.url, success, failure) - - function success(arr){ - // save ingredients to DB - console.log(arr, index); - db.collection('ingredients', function(err, ingredients){ - ingredients.insert({ ingredients: arr, index: index }); - collection.remove({ url : item.url}); - }); - - // advance cursor and go again - recurse(cursor, index + 1); - }; - - function failure(){ - // wait a while and try again - console.log('timeout happened') - recurse(cursor, index + 1); - }; - - } - }); -} - -// Open a URL and extract the links. Optionally, run a callback. -function extract(url, success, failure){ - var results = []; - - try { - http.get(url, function(res){ - var pageData = ""; - res.setEncoding('utf8'); - res.on('data', function (chunk) { - pageData += chunk; - }); - - res.on('end', function(){ - jsdom.env({ - html: pageData, - scripts: ['http://code.jquery.com/jquery-1.6.min.js'] - }, function(err, window){ - var $ = window.jQuery; - var results = []; - $('.ingredient-name').each(function(i, d){ - results.push($(this).text()); - }); - success(results); - }); - }).on('error', function(err){ - console.log(err); - failure(); - }); - }).setTimeout(2000, function(){ - console.log('Timed out.'); - failure(); - }); - } catch(err){ - failure(); - } -} diff --git a/routes/generate.js b/routes/generate.js deleted file mode 100644 index 535cb25..0000000 --- a/routes/generate.js +++ /dev/null @@ -1,58 +0,0 @@ -// Generate probability tables -var db, collection; - -exports.generate = function(req, res){ - db = req.app.settings.db; - db.collection('ingredients', function(err, c) { - ingredients = c; - - var cursor = ingredients.find({}); - - db.collection('table', function(err, t){ - recurse(cursor, t, 0); - res.send('done'); - }); - - }); - - return function(){}; -}; - -exports.list = function(req, res){ - db = req.app.settings.db; - db.collection('table', function(err, collection) { - var result = collection.find({}).sort({'appearances' : -1}).limit(20).toArray(function(err, stuff){ - res.send(stuff); - }); - }); - - return function(){}; -}; - -// This might be better as a native mongo function, but oh well. -function recurse(cursor, table, index){ - cursor.nextObject(function(err, item){ - if (!item) return; - - item.ingredients.forEach(function(ingredient){ - var others = item.ingredients.slice(0); - others.splice(others.indexOf(ingredient), 1); - - table.findOne({ name : ingredient }, function(err, result){ - entry = result || { name : ingredient, appearances : 0, used_with : {} }; - entry.appearances++; - - others.forEach(function(other){ - if (!entry.used_with[other]) entry.used_with[other] = 1; - else entry.used_with[other]++; - }); - - table.update({ name : ingredient }, entry, { upsert : true }, function(){ - // console.log(ingredient); - recurse(cursor, table, index + 1); - }); - }); - }); - }); - -} diff --git a/routes/index.js b/routes/index.js deleted file mode 100644 index f296005..0000000 --- a/routes/index.js +++ /dev/null @@ -1,8 +0,0 @@ - -/* - * GET home page. - */ - -exports.index = function(req, res){ - res.render('index', { title: 'Express' }); -}; \ No newline at end of file diff --git a/routes/parse.js b/routes/parse.js deleted file mode 100644 index bc77cd5..0000000 --- a/routes/parse.js +++ /dev/null @@ -1,19 +0,0 @@ -// Parse giant XML file into mongo - -exports.parse = function(req){ - var db = req.app.settings.db; - db.collection('urls', function(err, collection) { - - var bigXml = require('big-xml'); - var reader = bigXml.createReader('recipedetail.xml', /^(url)$/, {}); - reader.on('record', function(record) { - for (var prop in record.children){ - var entry = record.children[prop]; - if (entry.text && entry.text.substr(0,4) == "http"){ - collection.insert({url : entry.text, visited : false }); - } - } - }); - }); - return function(){}; -} diff --git a/routes/user.js b/routes/user.js deleted file mode 100644 index d5b34aa..0000000 --- a/routes/user.js +++ /dev/null @@ -1,8 +0,0 @@ - -/* - * GET users listing. - */ - -exports.list = function(req, res){ - res.send("respond with a resource"); -}; \ No newline at end of file diff --git a/views/.index.jade.swp b/views/.index.jade.swp deleted file mode 100644 index d3c2da36857e127c0bb5929e2529b795adffd8fb..0000000000000000000000000000000000000000 GIT binary patch literal 0 HcmV?d00001 literal 12288 zcmeI&zi-n(6bJBECYA!Ce*lMzM3EqA+5uE4QwM|uRa;RQn9lY~V>sU#``o1B$A7_J zP_gnKl$pPPk%5f?1UnKuJBezE$|wsG)%T>YlCz)h`99tfhpUh7Jy5qh>m=h6(bsQk z$MnwA(RHF!Yvab%iT1IdgoU&1HPzYIlLfoln4F^|7HNI0x4Il$RZExm=3fI>KmY;|fB*y_009U<00RHDfX^1_m+au2WJ$6x zjeKB000Izz00bZa0SG_<0uX=z1R!um1awTawMg_%QvLs5{RZ&kBGGrrXURv&d&yhL zbICw*P4e>s(Fe(~JBG2I-CD-qr)||nR5lLmF1DD y3za<7Og&~J4Ow}omS6b98-69nhrPb4ClneNJyVgho~FQws diff --git a/views/index.jade b/views/index.jade deleted file mode 100644 index e30d6e0..0000000 --- a/views/index.jade +++ /dev/null @@ -1,22 +0,0 @@ -extends layout - -block content - h1= title - p Welcome to #{title} - h2 Create a new crawl - form(name="crawl", action="crawl", method="post") - | URL: - input(type="text", value="http://allrecipes.com", name="url") - input(type="submit", value="Crawl") - form(name="parse", action="parse", method="post") - | URL: - input(type="submit", value="Parse") - form(name="generate", action="generate", method="post") - | URL: - input(type="submit", value="Generate Table") - form(name="list", action="list", method="post") - | URL: - input(type="submit", value="Show Table") - form(name="random", action="random", method="post") - | URL: - input(type="submit", value="Random recipe") diff --git a/views/layout.jade b/views/layout.jade deleted file mode 100644 index 1b7b305..0000000 --- a/views/layout.jade +++ /dev/null @@ -1,7 +0,0 @@ -doctype 5 -html - head - title= title - link(rel='stylesheet', href='/stylesheets/style.css') - body - block content \ No newline at end of file