diff --git a/includes/Checker/Checks/Plugin_Repo/Plugin_Readme_Check.php b/includes/Checker/Checks/Plugin_Repo/Plugin_Readme_Check.php index ba9c7b859..4a6778163 100644 --- a/includes/Checker/Checks/Plugin_Repo/Plugin_Readme_Check.php +++ b/includes/Checker/Checks/Plugin_Repo/Plugin_Readme_Check.php @@ -12,6 +12,8 @@ use WordPress\Plugin_Check\Checker\Checks\Abstract_File_Check; use WordPress\Plugin_Check\Traits\Amend_Check_Result; use WordPress\Plugin_Check\Traits\Find_Readme; +use WordPress\Plugin_Check\Traits\TLD_Names; +use WordPress\Plugin_Check\Traits\External_Utils; use WordPress\Plugin_Check\Traits\License_Utils; use WordPress\Plugin_Check\Traits\Stable_Check; use WordPressdotorg\Plugin_Directory\Readme\Parser; @@ -27,6 +29,8 @@ class Plugin_Readme_Check extends Abstract_File_Check { use Amend_Check_Result; use Find_Readme; + use TLD_Names; + use External_Utils; use Stable_Check; use License_Utils; @@ -106,6 +110,9 @@ protected function check_files( Check_Result $result, array $files ) { // Check the readme file for contributors. $this->check_for_contributors( $result, $readme_file ); + + // Check for third parties privacy notes. + $this->check_for_privacy_notes( $result, $readme_file, $parser, $files ); } /** @@ -652,6 +659,27 @@ private function check_for_contributors( Check_Result $result, string $readme_fi } } + /** + * Checks the readme file for external privacy notes. + * + * @since 1.4.0 + * + * @param Check_Result $result The Check Result to amend. + * @param string $readme_file Readme file. + */ + private function check_for_privacy_notes( Check_Result $result, string $readme_file, Parser $parser, array $files ) { + $existing_tld_names = $this->get_tld_names(); + $domains = $this->load_domains_mentioned_in_readme( $readme_file, $existing_tld_names ); + $files_ext = self::filter_files_by_extensions( $files, array( 'php', 'css', 'js' ) ); + + foreach( $files_ext as $file ) { + $result = $this->find_external_calls( $file ); + + } + + + } + /** * Returns current major WordPress version. * diff --git a/includes/Traits/External_Utils.php b/includes/Traits/External_Utils.php new file mode 100644 index 000000000..a48fbf161 --- /dev/null +++ b/includes/Traits/External_Utils.php @@ -0,0 +1,423 @@ + strlen( $domain_tld ) ) { + $domain_tld = $tld; + } + } + } + + if ( ! empty( $domain_tld ) ) { + // Get domain from host and tld + $domain = str_replace( '.' . $domain_tld, '', $host ); // remove the TLD from the host + $parts = explode( '.', $domain ); // split the remaining host into parts + $domain = end( $parts ) . '.' . $domain_tld; + + //Find domain + $key = $this->get_key_domain_mentioned_in_readme( $domain ); + if ( false !== $key ) { + // If found, just add URL + $domains_mentioned[ $key ]['urls'][] = $url; + if ( ! empty( $path ) ) { + $domains_mentioned[ $key ]['paths'][] = $path; + } + } else { + //Not found, create it. + $domain_mentioned = array( + 'domains' => $this->add_domains_of_same_service( $domain ), + 'urls' => array( $url ), + 'paths' => array(), + ); + if ( ! empty( $path ) ) { + $domain_mentioned['paths'] = array( $path ); + } + $domains_mentioned[] = $domain_mentioned; + } + } + } + } + } + } + } + + } + if ( ! empty( $domains_mentioned ) ) { + $domains_mentioned = array_map( function ( $domain ) { + $domain['urls'] = array_unique( $domain['urls']); + return $domain; + }, $domains_mentioned ); + } + + return $domains_mentioned; + } + + /** + * Get key domain mentioned in readme file. + * + * @since 1.4.0 + * + * @param string $string String. + * @return string|bool Key of domain mentioned in readme file, or false if not found. + */ + function get_key_domain_mentioned_in_readme( $string ) { + if ( ! empty( $this->domainsMentionedReadme ) ) { + foreach ( $this->domainsMentionedReadme as $key => $domains ) { + if ( ! empty( $domains['domains'] ) ) { + foreach ( $domains['domains'] as $domain ) { + if ( str_contains( $string, $domain ) ) { + return $key; + } + } + } + } + } + + return false; + } + + /** + * Add domains of the same service. + * + * @since 1.4.0 + * + * @param string $domain Domain. + * @return array An array containing domains of the same service. + */ + protected function add_domains_of_same_service( $domain ) { + $domains = array( $domain ); + $domainsOfTheSameService = array( + 'paypal.com' => [ 'paypal.com', 'paypalobjects.com' ], + 'google.com' => [ 'google.com', 'googleapis.com', 'googletagmanager.com' ], + 'microsoft.com' => [ 'microsoft.com', 'outlook.com', 'live.com' ], + 'atlassian.net' => [ 'atlassian.com', 'trello.com' ], + 'dropbox.com' => [ 'dropbox.com', 'dropboxapi.com' ], + 'tiktok.com' => [ 'tiktok.com', 'tiktokapis.com' ], + 'zendesk.com' => [ 'zendesk.com', 'zdassets.com' ] + ); + foreach ( $domainsOfTheSameService as $key => $service ) { + foreach ( $service as $serviceDomain ) { + if ( $serviceDomain === $domain ) { + $domains = array_merge( $domains, $domainsOfTheSameService[ $key ] ); + $domains = array_unique( $domains ); + } + } + } + + return $domains; + } + + /** + * Check if domain is mentioned in readme file. + * + * @since 1.4.0 + * + * @param string $domain Domain. + * @return bool True if domain is mentioned in readme file, false otherwise. + */ + protected function is_domain_mentioned_in_readme( $domain ) { + $key = $this->get_key_domain_mentioned_in_readme( $domain ); + if ( false !== $key ) { + return true; + } + + return false; + } + + /** + * Check if domain is documented in readme file. + * + * @since 1.4.0 + * + * @param string $domain Domain. + * @return bool True if domain is documented in readme file, false otherwise. + */ + protected function is_domain_documented_readme( $domain ) { + $key = $this->get_key_domain_mentioned_in_readme( $domain ); + $privacy = false; + $terms = false; + + if ( ! empty( $this->domainsMentionedReadme[ $key ]['paths'] ) ) { + foreach ( $this->domainsMentionedReadme[ $key ]['paths'] as $path ) { + foreach ( $this->privacyCommonURIsPaths as $privacyStr ) { + if ( str_contains( $path, $privacyStr ) ) { + $privacy = $path; + break; + } + } + foreach ( $this->termsCommonURIsPaths as $termsStr ) { + if ( str_contains( $path, $termsStr ) ) { + $terms = $path; + break; + } + } + } + } + + if ( $privacy || $terms ) { // To lower down false positives while keeping the check we are ok to have just one of them. + return true; + } + + return false; + } + + protected function find_external_calls( $file ) { + $lines = file( $file ); + $this->find_functions(); + $this->find_classes(); + $this->regex_estructures( $lines ); + $this->find_declarations( $lines ); + + } + + //Check PHP function calls loading URLs. + function find_functions() { + if ( ! empty( $this->stmts ) ) { + $funcCalls = $this->nodeFinder->findInstanceOf( $this->stmts, Node\Expr\FuncCall::class ); + if ( ! empty( $funcCalls ) ) { + foreach ( $funcCalls as $funccall ) { + $foundInSameLine = true; + $lastFoundExprArray = []; + if ( $this->hasFunctionName( $funccall ) ) { + $log = ''; + $functionName = $this->getCallName($funccall); + + //Enqueue functions + if ( in_array( $functionName, [ + 'wp_register_script', + 'wp_enqueue_script', + 'wp_register_style', + 'wp_enqueue_style' + ] ) ) { + // Look for second parameter of this PHP functions. + if ( isset( $funccall->args[1] ) ) { + $argValue = $funccall->args[1]->value; + if ( ! empty( $argValue ) ) { + $log = $this->checkArgGetLog( $argValue, $foundInSameLine, $lastFoundExprArray ); + } + } + } + + // External calls + if ( in_array( $functionName, [ + 'wp_remote_request', + 'wp_safe_remote_request', + 'wp_remote_get', + 'wp_safe_remote_get', + 'wp_remote_post', + 'wp_safe_remote_post', + 'wp_remote_head', + 'wp_safe_remote_head', + 'wp_remote_fopen', + 'file_get_contents', + 'download_url', + 'fopen', + 'file' + ] ) ) { + // Look for first parameter of this PHP functions. + if ( isset( $funccall->args[0] ) ) { + $argValue = $funccall->args[0]->value; + if ( ! empty( $argValue ) ) { + $log = $this->checkArgGetLog( $argValue, $foundInSameLine, $lastFoundExprArray ); + } + } + } + + if ( ! empty( $log ) ) { + if ( ! $this->isAlreadyLogged( $funccall->getStartLine() ) ) { + $this->logCallExpr( $funccall, 1, $log, true ); + if(!$foundInSameLine && !empty($lastFoundExprArray)){ + foreach ($lastFoundExprArray as $expr) { + $this->saveLog( 0, '# ↳ Found: ' . $this->prettyPrinter->prettyPrint( [ $expr ] ), $this->getLogPostContextId( $log, $this->getLogLineID( $funccall->getStartLine() ) ) ); + } + } + } + } + } + } + } + } + } + + //Check PHP class calls loading URLs. + function find_classes() { + if ( ! empty( $this->stmts ) ) { + $classNews = $this->nodeFinder->findInstanceOf( $this->stmts, Node\Expr\New_::class ); + if ( ! empty( $classNews ) ) { + foreach ( $classNews as $classNew ) { + $foundInSameLine = true; + $lastFoundExprArray = []; + if ( $this->hasClassNewName( $classNew ) ) { + $log = ''; + $className = $classNew->class->toString(); + if ( in_array( $className, [ + 'SoapClient', + 'nusoap_client', + ] ) ) { + if ( isset( $classNew->args[0] ) ) { + $argValue = $classNew->args[0]->value; + if ( ! empty( $argValue ) ) { + $log = $this->checkArgGetLog( $argValue, $foundInSameLine, $lastFoundExprArray ); + } + } + } + + if ( ! empty( $log ) ) { + if ( ! $this->isAlreadyLogged( $classNew->getStartLine() ) ) { + $this->saveLinesNodeDetailLog( $classNew, $log, true ); + if(!$foundInSameLine && !empty($lastFoundExprArray)){ + foreach ($lastFoundExprArray as $expr) { + $this->saveLog( 0, '# ↳ Found: ' . $this->prettyPrinter->prettyPrint( [ $expr ] ), $this->getLogPostContextId( $log, $this->getLogLineID( $classNew->getStartLine() ) ) ); + } + } + } + } + } + } + } + } + } + + // Regex over typical code structures cointaining URLs + function regex_estructures( $lines ) { + $regexArray = [ + 'src-simple' => '/src\s*=\s*\\\?\'((.*?(<\?.+?\?>)?.*?)+?)\\\?\'/', + 'src-double' => '/src\s*=\s*\\\?"((.*?(<\?.+?\?>)?.*?)+?)\\\?"/', + 'css-simple' => '/[:|\\s]\s*url\s*\(\s*\'((.*?(<\?.+?\?>)?.*?)+?)\'\s*\)/', + //We are not covering the case of doing url(https://example.com) as without ' or " this is hard to find. + 'css-double' => '/[:|\\s]\s*url\s*\(\s*"((.*?(<\?.+?\?>)?.*?)+?)"\s*\)/', + //'css' => '[:|\\s]url\s*\(\s*["|\']?(.+?)["|\']?\)', + 'jsImport' => '/@import\s*["|\'|`]((.*?(<\?.+?\?>)?.*?)+?)["|\'|`]/', + 'jsImportScripts' => '/importScripts\s*\(\s*["|\'|`]((.*?(<\?.+?\?>)?.*?)+?)["|\'|`]\s*\)/', + 'jsSetAttribute' => '/setAttribute\s*\(\s*["|\'|`]src["|\'|`]\s*,\s*["|\'|`](.+?)["|\'|`]\s*\)/', + 'jsAjax-simple' => '/\s*url\s*:\s*\'((.*?(<\?.+?\?>)?.*?)+?)\'\s*/', + 'jsAjax-double' => '/\s*url\s*:\s*"((.*?(<\?.+?\?>)?.*?)+?)"\s*/', + 'jsAjax-inverted' => '/\s*url\s*:\s*`((.*?(<\?.+?\?>)?.*?)+?)`\s*/', + 'jsFetch-simple' => '/\s*fetch\s*\(\s*\'((.*?(<\?.+?\?>)?.*?)+?)\'\s*/', + 'jsFetch-double' => '/\s*fetch\s*\(\s*"((.*?(<\?.+?\?>)?.*?)+?)"\s*/', + 'jsFetch-inverted' => '/\s*fetch\s*\(\s*`((.*?(<\?.+?\?>)?.*?)+?)`\s*/', + ]; + + foreach ( $regexArray as $regex ) { + $this->logRegexIncidences( $lines, $regex, '', false ); + } + } + + // Look for any PHP / JS variable declaration and guess if that looks like a external service. + // TODO this function consumes too much time because of getStringsFromAssignsExpr, find ways to optimize it. + function find_declarations( $lines ) { + // Find all the assings in PHP + if ( ! empty( $this->stmts ) ) { + $assigns = $this->nodeFinder->findInstanceOf( $this->stmts, Node\Expr\Assign::class ); + if ( ! empty( $assigns ) ) { + foreach ( $assigns as $assign ) { + if ( ! empty( $assign->expr ) ) { + $foundInSameLine = true; + $stringsArray = $this->getStringsFromAssignsExpr( $assign->expr, $foundInSameLine ); + if ( ! empty( $stringsArray ) ) { + foreach ( $stringsArray as $string ) { + $log = $this->checkStringGetLog( $string, true ); + if ( ! empty( $log ) ) { + if ( ! $this->isAlreadyLogged( $assign->getStartLine() ) ) { + $this->saveLinesNodeDetailLog( $assign, $log, true ); + if(!$foundInSameLine){ + $this->saveLog( 0, '# ↳ Detected: ' . $string, $this->getLogPostContextId( $log, $this->getLogLineID( $assign->getStartLine() ) ) ); + } + } + } + } + } + } + } + } + } + + // Find anything else that looks like an assign (mostly for JS but will also catch PHP and HTML) + // Regex: anything looking like a URL preceded by "XXXX =" except for href. + $regex = '/[a-zA-Z_$][a-zA-Z_$0-9]*(?logRegexIncidences( $lines, $regex, '', true ); + } + +} diff --git a/includes/Traits/TLD_Names.php b/includes/Traits/TLD_Names.php new file mode 100644 index 000000000..be1ede8a5 --- /dev/null +++ b/includes/Traits/TLD_Names.php @@ -0,0 +1,9200 @@ +