Skip to content

Commit 5c20b57

Browse files
committed
Web crawler feature started
1 parent f4278fc commit 5c20b57

16 files changed

+856
-182
lines changed

src/Common/Logger.php

Lines changed: 9 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -1,5 +1,14 @@
11
<?php
22

3+
/*
4+
* This file is part of the snippetify package.
5+
*
6+
* (c) Evens Pierre <[email protected]>
7+
*
8+
* For the full copyright and license information, please view the LICENSE
9+
* file that was distributed with this source code.
10+
*/
11+
312
namespace Snippetify\SnippetSniffer\Common;
413

514
use Monolog\Logger as BaseLogger;

src/Common/MetaSnippetCollection.php

Lines changed: 63 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,63 @@
1+
<?php
2+
3+
/*
4+
* This file is part of the snippetify package.
5+
*
6+
* (c) Evens Pierre <[email protected]>
7+
*
8+
* For the full copyright and license information, please view the LICENSE
9+
* file that was distributed with this source code.
10+
*/
11+
12+
namespace Snippetify\SnippetSniffer\Common;
13+
14+
/**
15+
* The snippet's class.
16+
*/
17+
class MetaSnippetCollection
18+
{
19+
/**
20+
* @var Psr\Http\Message\UriInterface
21+
*/
22+
public $uri;
23+
24+
/**
25+
* @var Snippetify\SnippetSniffer\Common\WebPage
26+
*/
27+
public $page;
28+
29+
/**
30+
* @var Snippetify\SnippetSniffer\Common\Snippet[]
31+
*/
32+
public $snippets;
33+
34+
/**
35+
* @return void
36+
*/
37+
public function __construct(array $attributes = [])
38+
{
39+
foreach ($attributes as $key => $value) {
40+
if (property_exists($this, $key)) {
41+
$this->{$key} = $value;
42+
}
43+
}
44+
}
45+
46+
/**
47+
* To array
48+
*
49+
* @return array
50+
*/
51+
public function toArray() {
52+
return get_object_vars($this);
53+
}
54+
55+
/**
56+
* To string
57+
*
58+
* @return string
59+
*/
60+
public function __toString() {
61+
return "Meta Snippet collection: ".json_encode($this->toArray());
62+
}
63+
}

src/Common/Snippet.php

Lines changed: 34 additions & 8 deletions
Original file line numberDiff line numberDiff line change
@@ -1,7 +1,18 @@
11
<?php
22

3+
/*
4+
* This file is part of the snippetify package.
5+
*
6+
* (c) Evens Pierre <[email protected]>
7+
*
8+
* For the full copyright and license information, please view the LICENSE
9+
* file that was distributed with this source code.
10+
*/
11+
312
namespace Snippetify\SnippetSniffer\Common;
413

14+
use Snippetify\ProgrammingLanguages\Facades\Languages;
15+
516
/**
617
* The snippet's class.
718
*/
@@ -10,44 +21,41 @@ class Snippet
1021
const WIKI_TYPE = 'wiki';
1122

1223
/**
13-
* The snippet's title.
1424
* @var string
1525
*/
1626
public $title;
1727

1828
/**
19-
* The snippet's code.
2029
* @var string
2130
*/
2231
public $code;
2332

2433
/**
25-
* The snippet's description.
2634
* @var string
2735
*/
2836
public $description;
2937

3038
/**
31-
* The snippet's type.
3239
* @var string
3340
*/
3441
public $type;
3542

3643
/**
37-
* The snippet's meta.
44+
* @var string
45+
*/
46+
public $language;
47+
48+
/**
3849
* @var array
3950
*/
4051
public $meta;
4152

4253
/**
43-
* The snippet's tags.
4454
* @var array
4555
*/
4656
public $tags;
4757

4858
/**
49-
* Create a new Snippet instance.
50-
*
5159
* @return void
5260
*/
5361
public function __construct(array $attributes = [])
@@ -57,6 +65,24 @@ public function __construct(array $attributes = [])
5765
$this->{$key} = $value;
5866
}
5967
}
68+
69+
$this->setLanguageFromTags($attributes);
70+
}
71+
72+
/**
73+
* Set language from tags
74+
*
75+
* @return void
76+
*/
77+
private function setLanguageFromTags(array $attributes)
78+
{
79+
if (!empty($attributes['tags'])) {
80+
foreach ($attributes['tags'] as $tag) {
81+
if (Languages::exists($tag)) {
82+
$this->language = ucfirst($tag);
83+
}
84+
}
85+
}
6086
}
6187

6288
/**

src/Common/WebPage.php

Lines changed: 83 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,83 @@
1+
<?php
2+
3+
/*
4+
* This file is part of the snippetify package.
5+
*
6+
* (c) Evens Pierre <[email protected]>
7+
*
8+
* For the full copyright and license information, please view the LICENSE
9+
* file that was distributed with this source code.
10+
*/
11+
12+
namespace Snippetify\SnippetSniffer\Common;
13+
14+
/**
15+
* WebPage.
16+
*/
17+
class WebPage
18+
{
19+
/**
20+
* @var string
21+
*/
22+
public $siteName;
23+
24+
/**
25+
* @var Psr\Http\Message\UriInterface
26+
*/
27+
public $siteUri;
28+
29+
/**
30+
* @var string
31+
*/
32+
public $title;
33+
34+
/**
35+
* @var string
36+
*/
37+
public $summary;
38+
39+
/**
40+
* @var Psr\Http\Message\UriInterface
41+
*/
42+
public $link;
43+
44+
/**
45+
* @var string
46+
*/
47+
public $plainText;
48+
49+
/**
50+
* @var array
51+
*/
52+
public $metaTags;
53+
54+
/**
55+
* @return void
56+
*/
57+
public function __construct(array $attributes = [])
58+
{
59+
foreach ($attributes as $key => $value) {
60+
if (property_exists($this, $key)) {
61+
$this->{$key} = $value;
62+
}
63+
}
64+
}
65+
66+
/**
67+
* To array
68+
*
69+
* @return array
70+
*/
71+
public function toArray() {
72+
return get_object_vars($this);
73+
}
74+
75+
/**
76+
* To string
77+
*
78+
* @return string
79+
*/
80+
public function __toString() {
81+
return "Web Page: ".json_encode($this->toArray());
82+
}
83+
}

src/Core.php

Lines changed: 40 additions & 4 deletions
Original file line numberDiff line numberDiff line change
@@ -1,13 +1,49 @@
11
<?php
22

3+
/*
4+
* This file is part of the snippetify package.
5+
*
6+
* (c) Evens Pierre <[email protected]>
7+
*
8+
* For the full copyright and license information, please view the LICENSE
9+
* file that was distributed with this source code.
10+
*/
11+
312
namespace Snippetify\SnippetSniffer;
413

514
class Core
615
{
7-
const APP_NAME = 'Snippet sniffer';
8-
const APP_TYPE = 'snippetify-sniffer';
9-
const APP_VERSION = '1.0.0';
10-
const USER_AGENT = 'Mozilla/5.0 (Macintosh; Intel Mac OS X 10.14; rv:65.0) Gecko/20100101 Firefox/65.0';
16+
public const APP_NAME = 'Snippet sniffer';
17+
public const APP_TYPE = 'snippetify-sniffer';
18+
public const APP_VERSION = '1.1.0';
19+
20+
// Crawler
21+
public const CRAWLER_PROFILE = \Spatie\Crawler\CrawlSubdomains::class;
22+
public const CRAWLER_CONCURENCY = 10;
23+
public const CRAWLER_IGNORE_ROBOTS = true;
24+
public const CRAWLER_MAXIMUM_DEPTH = 50;
25+
public const CRAWLER_EXECUTE_JAVASCRIPT = false;
26+
public const CRAWLER_MAXIMUM_CRAWL_COUNT = 1500;
27+
public const CRAWLER_PARSEABLE_MIME_TYPES = 'text/html';
28+
public const CRAWLER_MAXIMUM_RESPONSE_SIZE = 1024 * 1024 * 3;
29+
public const CRAWLER_DELAY_BETWEEN_REQUESTS = 250;
30+
public const CRAWLER_USER_AGENT = 'Mozilla/5.0 (Macintosh; Intel Mac OS X 10.14; rv:65.0) Gecko/20100101 Firefox/65.0';
31+
32+
/**
33+
* Html Snippet tags
34+
* Add all html snippet tags here
35+
*
36+
* @var array
37+
*/
38+
public const HTML_SNIPPET_TAGS = 'pre[class] code, div[class] code, .highlight pre, code[class]';
39+
40+
/**
41+
* Html Snippet tags
42+
* Add all html snippet tags here
43+
*
44+
* @var array
45+
*/
46+
public const HTML_TAGS_TO_INDEX = 'h1, h2, h3, h4, h5, h6, p, li';
1147

1248

1349
/**

0 commit comments

Comments
 (0)