diff --git a/core/ast/import.go b/core/ast/import.go index 9e4e857..f88d3b2 100644 --- a/core/ast/import.go +++ b/core/ast/import.go @@ -58,10 +58,18 @@ func (i *ImportNode) SetModuleNameNode(node *sitter.Node) { i.moduleNameNode = node } +func (i *ImportNode) GetModuleItemNode() *sitter.Node { + return i.moduleItemNode +} + func (i *ImportNode) SetModuleItemNode(node *sitter.Node) { i.moduleItemNode = node } +func (i *ImportNode) GetModuleAliasNode() *sitter.Node { + return i.moduleAliasNode +} + func (i *ImportNode) SetModuleAliasNode(node *sitter.Node) { i.moduleAliasNode = node } diff --git a/examples/plugin/callgraph/azureserver.py b/examples/plugin/callgraph/azureserver.py new file mode 100644 index 0000000..cd523e6 --- /dev/null +++ b/examples/plugin/callgraph/azureserver.py @@ -0,0 +1,146 @@ +import os +from flask import Flask, request, jsonify +from azure.storage.blob import BlobServiceClient +from azure.cosmos import CosmosClient +from azure.keyvault.secrets import SecretClient +from azure.ai.translation.text import TextTranslationClient, TranslatorCredential +from azure.servicebus import ServiceBusClient, ServiceBusMessage +from azure.identity import DefaultAzureCredential + +class BaseAzureService: + # Note - this block is unreachable in DFS, as parent constructors aren't resolved yet + def __init__(self, config: dict): + # Use DefaultAzureCredential which supports multiple authentication methods + self.credential = DefaultAzureCredential() + self.config = config + +class AzureStorageServices(BaseAzureService): + def __init__(self, config: dict): + super().__init__(config) + storage_connection_string = os.environ.get("AZURE_STORAGE_CONNECTION_STRING") + if not storage_connection_string: + raise EnvironmentError("AZURE_STORAGE_CONNECTION_STRING env var not set") + + self.blob_service_client = BlobServiceClient.from_connection_string(storage_connection_string) + self.cosmos_client = CosmosClient( + url=config["cosmos_endpoint"], + credential=self.credential + ) + self.keyvault_client = SecretClient( + vault_url=f"https://{config['keyvault_name']}.vault.azure.net/", + credential=self.credential + ) + + def get_file_url(self, container_name, blob_name): + blob_client = self.blob_service_client.get_blob_client( + container=container_name, + blob=blob_name + ) + return blob_client.url + + def run_cosmos_query(self, database_name, container_name, query): + database = self.cosmos_client.get_database_client(database_name) + container = database.get_container_client(container_name) + items = list(container.query_items(query=query, enable_cross_partition_query=True)) + return items + + def add_cosmos_document(self, database_name, container_name, data): + database = self.cosmos_client.get_database_client(database_name) + container = database.get_container_client(container_name) + response = container.create_item(body=data) + return f"Document {response['id']} added to {container_name}" + + def get_secret(self, secret_name): + secret = self.keyvault_client.get_secret(secret_name) + return secret.value + +class AzureAiServices(BaseAzureService): + def __init__(self, config: dict): + super().__init__(config) + translator_key = os.environ.get("AZURE_TRANSLATOR_KEY") + if not translator_key: + raise EnvironmentError("AZURE_TRANSLATOR_KEY env var not set") + + self.translator_credential = TranslatorCredential(translator_key, config["translator_region"]) + self.translator_client = TextTranslationClient(credential=self.translator_credential) + + def translate_text(self, text, target="en"): + response = self.translator_client.translate( + content=[text], + to=[target] + ) + return response[0].translations[0].text + +class AzureMessagingServices(BaseAzureService): + def __init__(self, config: dict): + super().__init__(config) + self.servicebus_connection_string = os.environ.get("AZURE_SERVICEBUS_CONNECTION_STRING") + self.client = ServiceBusClient.from_connection_string( + conn_str=self.servicebus_connection_string + ) + if not self.servicebus_connection_string: + raise EnvironmentError("AZURE_SERVICEBUS_CONNECTION_STRING env var not set") + + def publish_message(self, queue_name, message): + with self.client.get_queue_sender(queue_name) as sender: + message = ServiceBusMessage(message) + sender.send_messages(message) + return "Message sent successfully" + +# Flask App +app = Flask(__name__) +config = { + "cosmos_endpoint": os.environ.get("AZURE_COSMOS_ENDPOINT", "https://your-cosmos-account.documents.azure.com:443/"), + "keyvault_name": os.environ.get("AZURE_KEYVAULT_NAME", "your-keyvault-name"), + "translator_region": os.environ.get("AZURE_TRANSLATOR_REGION", "eastus") +} + +storage_services = AzureStorageServices(config) +ai_services = AzureAiServices(config) +messaging_services = AzureMessagingServices(config) + +@app.route("/storage/url", methods=["GET"]) +def get_file_url(): + container = request.args.get("container") + blob = request.args.get("blob") + url = storage_services.get_file_url(container, blob) + return jsonify({"url": url}) + +@app.route("/cosmos/query", methods=["POST"]) +def cosmos_query(): + database = request.json.get("database") + container = request.json.get("container") + query = request.json.get("query") + result = storage_services.run_cosmos_query(database, container, query) + return jsonify(result) + +@app.route("/servicebus/publish", methods=["POST"]) +def servicebus_publish(): + queue = request.json.get("queue") + message = request.json.get("message") + status = messaging_services.publish_message(queue, message) + return jsonify({"status": status}) + +@app.route("/cosmos/add", methods=["POST"]) +def cosmos_add(): + database = request.json.get("database") + container = request.json.get("container") + data = request.json.get("data") + status = storage_services.add_cosmos_document(database, container, data) + return jsonify({"status": status}) + +@app.route("/secret/get", methods=["GET"]) +def secret_get(): + secret_name = request.args.get("secret_name") + secret = storage_services.get_secret(secret_name) + return jsonify({"secret": secret}) + +@app.route("/translate", methods=["POST"]) +def translate_text(): + text = request.json.get("text") + target = request.json.get("target", "en") + translated = ai_services.translate_text(text, target) + return jsonify({"translated": translated}) + +if __name__ == "__main__": + app.run(debug=True) \ No newline at end of file diff --git a/examples/plugin/callgraph/gcpserver.py b/examples/plugin/callgraph/gcpserver.py new file mode 100644 index 0000000..548e2f3 --- /dev/null +++ b/examples/plugin/callgraph/gcpserver.py @@ -0,0 +1,124 @@ +import os +from flask import Flask, request, jsonify +from google import cloud +import google.cloud.storage as gcpstorage +from google.cloud import bigquery, pubsub_v1, secretmanager, translate_v2 as translatergcp +from google.oauth2 import service_account + +class BaseGCPService: + def __init__(self, config: dict): + # Resolve credentials from environment variable + credentials_path = os.environ.get("GOOGLE_APPLICATION_CREDENTIALS") + if not credentials_path: + raise EnvironmentError("GOOGLE_APPLICATION_CREDENTIALS env var not set") + + self.credentials = service_account.Credentials.from_service_account_file(credentials_path) + self.config = config + +class GCPStorageServices(BaseGCPService): + def __init__(self, config: dict): + super().__init__(config) + self.storage_client = gcpstorage.Client(credentials=self.credentials) + self.bq_client = bigquery.Client(credentials=self.credentials) + self.firestore_client = cloud.firestore.Client(credentials=self.credentials) + self.secret_client = secretmanager.SecretManagerServiceClient(credentials=self.credentials) + + def get_file_url(self, bucket_name, blob_name): + bucket = self.storage_client.bucket(bucket_name) + blob = bucket.blob(blob_name) + return blob.public_url + + def run_bq_query(self, query): + query_job = self.bq_client.query(query) + return [dict(row.items()) for row in query_job.result()] + + def add_firestore_document(self, collection, doc_id, data): + doc_ref = self.firestore_client.collection(collection).document(doc_id) + doc_ref.set(data) + return f"Document {doc_id} added to {collection}" + + def get_secret(self, secret_id, version="latest"): + name = f"projects/{self.config['project_id']}/secrets/{secret_id}/versions/{version}" + response = self.secret_client.access_secret_version(request={"name": name}) + return response.payload.data.decode("UTF-8") + +class GCPAiServices(BaseGCPService): + def __init__(self, config: dict): + super().__init__(config) + self.translate_client = translatergcp.Client(credentials=self.credentials) + + def translate_text(self, text, target="en"): + result = self.translate_client.translate(text, target_language=target) + return result['translatedText'] + +class GCPMessagingServices(BaseGCPService): + def __init__(self, config: dict): + super().__init__(config) + self.pubsub_publisher = pubsub_v1.PublisherClient(credentials=self.credentials) + + def publish_message(self, topic_name, message): + topic_path = self.pubsub_publisher.topic_path(self.config['project_id'], topic_name) + future = self.pubsub_publisher.publish(topic_path, message.encode("utf-8")) + return future.result() + + +# Flask App +app = Flask(__name__) +config = { + "project_id": os.environ.get("GCP_PROJECT_ID", "your-gcp-project-id") +} +storage_services = GCPStorageServices(config) +ai_services = GCPAiServices(config) +messaging_services = GCPMessagingServices(config) + + +@app.route("/storage/url", methods=["GET"]) +def get_file_url(): + bucket = request.args.get("bucket") + blob = request.args.get("blob") + url = storage_services.get_file_url(bucket, blob) + return jsonify({"url": url}) + + +@app.route("/bigquery/query", methods=["POST"]) +def bigquery_query(): + query = request.json.get("query") + result = storage_services.run_bq_query(query) + return jsonify(result) + + +@app.route("/pubsub/publish", methods=["POST"]) +def pubsub_publish(): + topic = request.json.get("topic") + message = request.json.get("message") + msg_id = messaging_services.publish_message(topic, message) + return jsonify({"message_id": msg_id}) + + +@app.route("/firestore/add", methods=["POST"]) +def firestore_add(): + collection = request.json.get("collection") + doc_id = request.json.get("doc_id") + data = request.json.get("data") + status = storage_services.add_firestore_document(collection, doc_id, data) + return jsonify({"status": status}) + + +@app.route("/secret/get", methods=["GET"]) +def secret_get(): + secret_id = request.args.get("secret_id") + version = request.args.get("version", "latest") + secret = storage_services.get_secret(secret_id, version) + return jsonify({"secret": secret}) + + +@app.route("/translate", methods=["POST"]) +def translate_text(): + text = request.json.get("text") + target = request.json.get("target", "en") + translated = ai_services.translate_text(text, target) + return jsonify({"translated": translated}) + + +if __name__ == "__main__": + app.run(debug=True) diff --git a/examples/plugin/callgraph/main.go b/examples/plugin/callgraph/main.go new file mode 100644 index 0000000..87b8b7d --- /dev/null +++ b/examples/plugin/callgraph/main.go @@ -0,0 +1,121 @@ +package main + +import ( + "context" + "flag" + "fmt" + "strings" + + "github.com/safedep/code/core" + "github.com/safedep/code/fs" + "github.com/safedep/code/lang" + "github.com/safedep/code/parser" + "github.com/safedep/code/plugin" + "github.com/safedep/code/plugin/callgraph" + "github.com/safedep/dry/log" +) + +var ( + dirToWalk string + language string +) + +func init() { + log.InitZapLogger("walker", "dev") + + flag.StringVar(&dirToWalk, "dir", "", "Directory to walk") + flag.StringVar(&language, "lang", "python", "Language to use for parsing files") + + flag.Parse() +} + +func main() { + if dirToWalk == "" { + flag.Usage() + return + } + + err := run() + if err != nil { + panic(err) + } +} + +func run() error { + fileSystem, err := fs.NewLocalFileSystem(fs.LocalFileSystemConfig{ + AppDirectories: []string{dirToWalk}, + }) + + if err != nil { + return fmt.Errorf("failed to create local filesystem: %w", err) + } + + language, err := lang.GetLanguage(language) + if err != nil { + return fmt.Errorf("failed to get language: %w", err) + } + + walker, err := fs.NewSourceWalker(fs.SourceWalkerConfig{}, []core.Language{language}) + if err != nil { + return fmt.Errorf("failed to create source walker: %w", err) + } + + treeWalker, err := parser.NewWalkingParser(walker, []core.Language{language}) + if err != nil { + return fmt.Errorf("failed to create tree walker: %w", err) + } + + // consume callgraph + var callgraphCallback callgraph.CallgraphCallback = func(_ context.Context, cg *callgraph.CallGraph) error { + treeData, err := cg.Tree.Data() + if err != nil { + return fmt.Errorf("failed to get tree data: %w", err) + } + + cg.PrintAssignmentGraph() + cg.PrintCallGraph() + + fmt.Println("DFS Traversal results:") + for _, resultItem := range cg.DFS() { + terminalMessage := "" + if resultItem.Terminal { + terminalMessage = " (terminal)" + } + + fmt.Printf("%s %s%s\n", strings.Repeat(">", resultItem.Depth), resultItem.Namespace, terminalMessage) + } + + signatureMatcher := callgraph.NewSignatureMatcher(parsedSignatures.Signatures) + signatureMatches, err := signatureMatcher.MatchSignatures(cg) + if err != nil { + return fmt.Errorf("failed to match signatures: %w", err) + } + + fmt.Printf("\nSignature matches for %s:\n", cg.FileName) + for _, match := range signatureMatches { + fmt.Printf("Match found: %s (%s)\n", match.MatchedSignature.ID, match.MatchedLanguageCode) + for _, condition := range match.MatchedConditions { + fmt.Printf("\tCondition: %s - %s\n", condition.Condition.Type, condition.Condition.Value) + for _, evidence := range condition.Evidences { + evidenceContent, exists := evidence.GetContentDetails(treeData) + evidenceDetailString := "" + if exists { + evidenceDetailString = fmt.Sprintf("@ (L%d #%d to L%d #%d)", evidenceContent.StartLine, evidenceContent.StartColumn, evidenceContent.EndLine, evidenceContent.EndColumn) + } + fmt.Printf("\t\tEvidence: %s %s\n", evidence.Namespace, evidenceDetailString) + } + } + } + return nil + } + + pluginExecutor, err := plugin.NewTreeWalkPluginExecutor(treeWalker, []core.Plugin{ + callgraph.NewCallGraphPlugin(callgraphCallback), + }) + + if err != nil { + return fmt.Errorf("failed to create plugin executor: %w", err) + } + + return pluginExecutor.Execute(context.Background(), fileSystem) +} diff --git a/examples/plugin/callgraph/pending.py b/examples/plugin/callgraph/pending.py new file mode 100644 index 0000000..8d87d96 --- /dev/null +++ b/examples/plugin/callgraph/pending.py @@ -0,0 +1,59 @@ +# Pending --------------------------------------------- + +import base64 +from utils import printinit, printenc, printdec, printf2 + +class SomeClass: + def __init__(self): + printinit("Initialized") + pass + def outer_method(self): + print("Called outer_method") + return self + +# @TODO - Refer attributeResolver for more details +deepresultvalue = SomeClass().outer_method().inner_method() + +# @TODO - This would require return value processing, which is a complex task +deepresultvalue.deepest_method() + +# @TODO - We're not able to identify instance as return values from factory functions yet +def create_outer(): + return SomeClass() + +# @TODO - Can't work with return values yet +a = SomeClass() +b = a.outer_method() # @TODO - class information needed for this + + + +class Encoding: + def __init__(self): + pass + def apply(self, msg, func): + return func(msg) + +encoder = Encoding() +encoded = encoder.apply("Hello, World!".encode('utf-8'), base64.b64encode) +printenc(encoded) +decoded = encoder.apply(encoded, base64.b64decode) +printdec(decoded) + + +# @TODO - Unable to resolve declaration afterwards (Python, Javascript, Go, etc support this feature) +def declaredFirst(value): + declaredLater(value) +def declaredLater(value): + print("GG", value) +declaredFirst(1) + +# Another sample - +def f1(value): + f2(value) + pass +def f2(value): + if value == 0: + return + f1(value-1) +f1(5) + diff --git a/examples/plugin/callgraph/signatures.go b/examples/plugin/callgraph/signatures.go new file mode 100644 index 0000000..316b56a --- /dev/null +++ b/examples/plugin/callgraph/signatures.go @@ -0,0 +1,26 @@ +package main + +import ( + _ "embed" + + "github.com/safedep/code/plugin/callgraph" + "github.com/safedep/dry/log" + "gopkg.in/yaml.v3" +) + +//go:embed signatures.yaml +var signatureYAML []byte + +type signatureFile struct { + Version string `yaml:"version"` + Signatures []callgraph.Signature `yaml:"signatures"` +} + +var parsedSignatures signatureFile + +func init() { + err := yaml.Unmarshal(signatureYAML, &parsedSignatures) + if err != nil { + log.Fatalf("Failed to parse signature YAML: %v", err) + } +} diff --git a/examples/plugin/callgraph/signatures.yaml b/examples/plugin/callgraph/signatures.yaml new file mode 100644 index 0000000..156e765 --- /dev/null +++ b/examples/plugin/callgraph/signatures.yaml @@ -0,0 +1,277 @@ +version: 0.1 +signatures: + - id: gcp.storage + description: "Google Cloud Storage Client" + tags: [storage, google-cloud, bucket] + languages: + python: + match: any + conditions: + - type: call + value: "google.cloud.storage.Client" + javascript: + match: all + conditions: + - type: call + value: "@google-cloud/storage.Storage" + + - id: azure.storage.blob + description: "Azure Blob Storage Client" + tags: [storage, azure, blob] + languages: + python: + match: any + conditions: + - type: call + value: "azure.storage.blob.BlobServiceClient" + javascript: + match: all + conditions: + - type: call + value: "@azure/storage-blob.BlobServiceClient" + + - id: gcp.bigquery + description: "Google Cloud BigQuery Client" + tags: [bigquery, google-cloud, data-warehouse] + languages: + python: + match: any + conditions: + - type: call + value: "google.cloud.bigquery.Client" + javascript: + match: all + conditions: + - type: call + value: "@google-cloud/bigquery.BigQuery" + + - id: azure.cosmos + description: "Azure Cosmos DB Client" + tags: [cosmos, database, azure, nosql] + languages: + python: + match: any + conditions: + - type: call + value: "azure.cosmos.CosmosClient" + javascript: + match: all + conditions: + - type: call + value: "@azure/cosmos.CosmosClient" + + - id: gcp.pubsub + description: "Google Cloud Pub/Sub Client" + tags: [pubsub, messaging, google-cloud] + languages: + python: + match: all + conditions: + - type: call + value: "google.cloud.pubsub.PublisherClient" + javascript: + match: any + conditions: + - type: call + value: "@google-cloud/pubsub.PubSub" + + - id: azure.servicebus + description: "Azure Service Bus Client" + tags: [servicebus, messaging, azure] + languages: + python: + match: any + conditions: + - type: call + value: "azure.servicebus.ServiceBusClient" + javascript: + match: all + conditions: + - type: call + value: "@azure/service-bus.ServiceBusClient" + + - id: gcp.firestore + description: "Google Cloud Firestore Client" + tags: [firestore, database, google-cloud] + languages: + python: + match: any + conditions: + - type: call + value: "google.cloud.firestore.Client" + javascript: + match: all + conditions: + - type: call + value: "@google-cloud/firestore.Firestore" + + - id: azure.cosmos.container + description: "Azure Cosmos DB Container Client" + tags: [cosmos, database, container, azure] + languages: + python: + match: any + conditions: + - type: call + value: "azure.cosmos.ContainerProxy" + javascript: + match: all + conditions: + - type: call + value: "@azure/cosmos.Container" + + - id: gcp.vision + description: "Google Cloud Vision API Client" + tags: [vision, image-analysis, ai, google-cloud] + languages: + python: + match: all + conditions: + - type: call + value: "google.cloud.vision.ImageAnnotatorClient" + javascript: + match: all + conditions: + - type: call + value: "@google-cloud/vision.ImageAnnotatorClient" + + - id: azure.vision + description: "Azure Computer Vision Client" + tags: [vision, image-analysis, ai, azure] + languages: + python: + match: any + conditions: + - type: call + value: "azure.ai.vision.ContentAnalysisClient" + javascript: + match: all + conditions: + - type: call + value: "@azure/ai-vision-image-analysis.ImageAnalysisClient" + + - id: gcp.translate + description: "Google Cloud Translation Client" + tags: [translate, nlp, language, google-cloud] + languages: + python: + match: any + conditions: + - type: call + value: "google.cloud.translate.TranslationServiceClient" + - type: call + value: "google.cloud.translate_v2.Client" + javascript: + match: all + conditions: + - type: call + value: "@google-cloud/translate.TranslationServiceClient" + + - id: azure.translator + description: "Azure Translator Client" + tags: [translate, nlp, language, azure] + languages: + python: + match: any + conditions: + - type: call + value: "azure.ai.translation.text.TextTranslationClient" + javascript: + match: all + conditions: + - type: call + value: "@azure/ai-translation-text.TextTranslationClient" + + - id: gcp.pubsub_v1.publisher + description: "Google Cloud Pub/Sub Publisher Client" + tags: [pubsub, messaging, google-cloud] + languages: + python: + match: any + conditions: + - type: call + value: "google.cloud.pubsub_v1.PublisherClient" + javascript: + match: any + conditions: + - type: call + value: "@google-cloud/pubsub.PublisherClient" + + - id: gcp.pubsub_v1.subscriber + description: "Google Cloud Pub/Sub Subscriber Client" + tags: [pubsub, messaging, google-cloud] + languages: + python: + match: any + conditions: + - type: call + value: "google.cloud.pubsub_v1.SubscriberClient" + javascript: + match: any + conditions: + - type: call + value: "@google-cloud/pubsub.SubscriberClient" + + - id: azure.servicebus.sender + description: "Azure Service Bus Sender Client" + tags: [servicebus, messaging, sender, azure] + languages: + python: + match: any + conditions: + - type: call + value: "azure.servicebus.ServiceBusSender" + javascript: + match: any + conditions: + - type: call + value: "@azure/service-bus.ServiceBusSender" + + - id: azure.servicebus.receiver + description: "Azure Service Bus Receiver Client" + tags: [servicebus, messaging, receiver, azure] + languages: + python: + match: any + conditions: + - type: call + value: "azure.servicebus.ServiceBusReceiver" + javascript: + match: any + conditions: + - type: call + value: "@azure/service-bus.ServiceBusReceiver" + + - id: gcp.secretmanager + description: "Google Cloud Secret Manager Client" + tags: [secret-manager, google-cloud] + languages: + python: + match: any + conditions: + - type: call + value: "google.cloud.secretmanager.SecretManagerServiceClient" + javascript: + match: all + conditions: + - type: call + value: "@google-cloud/secret-manager.SecretManagerServiceClient" + + - id: azure.IAM + description: "Azure IAM services" + tags: [keyvault, secrets, azure, credentials] + languages: + python: + match: any + conditions: + - + type: call + value: "azure.identity.DefaultAzureCredential" + - + type: call + value: "azure.keyvault.secrets.SecretClient" + javascript: + match: all + conditions: + - type: call + value: "@azure/keyvault-secrets.SecretClient" \ No newline at end of file diff --git a/examples/plugin/callgraph/testAll.py b/examples/plugin/callgraph/testAll.py new file mode 100644 index 0000000..6f4656f --- /dev/null +++ b/examples/plugin/callgraph/testAll.py @@ -0,0 +1,97 @@ +import requests +import parser +import pstats +import zipfile +import tarfile +import gettext +import flask +from openai import Openai +from os import path, listdir, getenv, chdir + + +# Correct callgraph & assignment resolution ------------------------------------------ + +# Correctly assigned to appropriate imports +requests.get("https://example.com/" + chdir("something")) +Openai("gpt-3.5-turbo") + +# Correctly assigned to builtin keyword - print +print("Hello") + +# Archiver assignment to zipfile.ZipFile and tarfile.open.makearchive detected correctly +archiver = zipfile.ZipFile +if getenv("USE_TAR"): + archiver = tarfile.open.makearchive + +# Function Calls (path.join) added to call from current namespace (here filename) +# Note - return values & arg assignments aren't processed +archiver(path.join("something", gettext.get("xyz"))) + +# Parsed correctly +path.altsep.capitalize( + "something", + getenv("xyz"), + parser.parse("https://example.com") +) + +# Literal assignment +somenumber = 7.0 + +# Correctly assigned multiple attribute values +abc = path.altsep.__dict__ +abc = path.altsep +abc = listdir +abc = requests.__url__ +abc = 7 +abc = True +abc = "gg" +abc = somenumber + +# This forms a chain of assignments +# spd => abc => [listdir, 7, True, somenumber, ....] +spd = abc + +# Attribute assignee +path.altsep.__dict__ = "something" +path.altsep.__dict__ = "something else" + + +# Nested function definitions & scoped calls correctly parsed +def add(a, b): + return a + b +def sub(a, b): + return a - b +def complexop(a, b): + def add(a,b): + return a*2 + b*2 + x = a + return add(x, b) + add(a*2, b) + sub(a*2, b) + +r1 = 95 + 7.3 + 2 +res = complexop(1, 2) + add(3, 4) + add(5, 6) + r1 - somenumber + 95 + 7.3 + pstats.getsomestat() + +# Correctly processes constructor, member function and member variables by instance keyword ie. self.name, self.value +class TesterClass: + def __init__(self): + self.name = "TesterClass name" + self.value = 42 + if getenv("USE_TAR"): + self.value = 100 + + def helper_method(self): + print("Called helper_method") + return self.value + + def deepest_method(self): + self.helper_method() + print("Called deepest_method") + return "Success" + + def aboutme(self): + print(f"Name: {self.name}") + +# Correctly identifies that adfff is instance of TesterClass +# so any qualifier on adfff is resolved as member of TesterClass +alice = TesterClass() +alice.aboutme() +bannername = alice.name diff --git a/examples/plugin/callgraph/testClass.py b/examples/plugin/callgraph/testClass.py new file mode 100644 index 0000000..9b6ffd3 --- /dev/null +++ b/examples/plugin/callgraph/testClass.py @@ -0,0 +1,58 @@ +import pprint +from xyz import printxyz1, printxyz2, printxyz3 +from os import getenv + +# Correctly processes constructor, member function and member variables by instance keyword ie. self.name, self.value +class TesterClass: + def __init__(self): + self.name = "TesterClass name" + self.value = 42 + if getenv("USE_TAR"): + self.value = 100 + + def helper_method(self): + print("Called helper_method") + return self.value + + def deepest_method(self): + self.helper_method() + print("Called deepest_method") + return "Success" + + def aboutme(self): + print(f"Name: {self.name}") + +# Correctly identifies that alice is an instance of TesterClass +# so any qualifier on alice is resolved as a member of TesterClass +alice = TesterClass() +alice.aboutme() +bannername = alice.name + + + + +class ClassA: + def method1(self): + printxyz2("GG") + def method2(self): + printxyz2("GG") + +class ClassB: + def method1(self): + printxyz2("GG") + def method2(self): + printxyz2("GG") + def methodUnique(self): + printxyz3("GG") + pprint.pp("GG") + + +x = ClassA() +x = ClassB() +x.method1() +y = x +y.method1() +y.method2() +y.methodUnique() # @TODO - This creates a call to namespace that doesn't exist + + diff --git a/examples/plugin/callgraph/testFunctions.py b/examples/plugin/callgraph/testFunctions.py new file mode 100644 index 0000000..b809dea --- /dev/null +++ b/examples/plugin/callgraph/testFunctions.py @@ -0,0 +1,83 @@ +import pstats +import pprint +from xyzprintmodule import printer1, printer2, printer3, printer4, printer6 +from os import listdir as listdirfn, chmod + + +# Recursive +def factorial(x): + if x == 0 or x == 1: + return 1 + else: + return x * factorial(x-1) +print(factorial(5)) + + +# Function assignment +def foo(): + pprint.pprint("foo") +def bar(): + print("bar") +baz = bar + +xyz = "abc" +xyz = 25 +xyz = foo +xyz = baz +xyz() + + +# Nested & scoped functions +def outerfn1(): + chmod("outerfn1") + pass +def outerfn2(): + listdirfn("outerfn2") + pass + +def fn1(): + printer4("outer fn1") + +def nestParent(): + def parentScopedFn(): + print("parentScopedFn") + fn1() # Must call outer fn1 with printer4 + + def nestChild(): + printer1("nestChild") + outerfn1() + + def fn1(): + printer6("inner fn1") + + def childScopedFn(): + printer2("childScopedFn") + fn1() # Must call outer fn1 with printer6 + + def nestGrandChildUseless(): + printer3("nestGrandChildUseless") + + def nestGrandChild(): + pprint.pp("nestGrandChild") + parentScopedFn() + outerfn2() + childScopedFn() + + nestGrandChild() + + outerfn1() + nestChild() + +nestParent() + + + + +# Function Assignments, return values aren't processed, since its a complex taxk +def add(a, b): + return a + b +def sub(a, b): + return a - b +somenumber = 5 +r1 = 95 + 7.3 + 2 +res = add(3, 4) + sub(8, 6) + r1 - somenumber + 95 + 7.3 + pstats.getsomestat() diff --git a/go.mod b/go.mod index c717521..25742f1 100644 --- a/go.mod +++ b/go.mod @@ -3,18 +3,21 @@ module github.com/safedep/code go 1.23.2 require ( - github.com/safedep/dry v0.0.0-20241225162018-db93ecfc503f + github.com/safedep/dry v0.0.0-20250428071408-5bf2ea4d87ee github.com/smacker/go-tree-sitter v0.0.0-20240827094217-dd81d9e9be82 - github.com/stretchr/testify v1.9.0 + github.com/stretchr/testify v1.10.0 + gopkg.in/yaml.v3 v3.0.1 ) require ( github.com/davecgh/go-spew v1.1.1 // indirect + github.com/go-viper/mapstructure/v2 v2.2.1 // indirect + github.com/golang/protobuf v1.5.4 // indirect + github.com/oklog/ulid/v2 v2.1.0 // indirect github.com/pmezard/go-difflib v1.0.0 // indirect go.uber.org/multierr v1.11.0 // indirect go.uber.org/zap v1.27.0 // indirect - golang.org/x/mod v0.1.0 // indirect - golang.org/x/sync v0.12.0 // indirect + google.golang.org/protobuf v1.36.5 // indirect gopkg.in/natefinch/lumberjack.v2 v2.2.1 // indirect - gopkg.in/yaml.v3 v3.0.1 // indirect + sigs.k8s.io/yaml v1.4.0 // indirect ) diff --git a/go.sum b/go.sum index 0b32d35..e5884b2 100644 --- a/go.sum +++ b/go.sum @@ -1,37 +1,43 @@ +github.com/cncf/xds/go v0.0.0-20240905190251-b4127c9b8d78 h1:QVw89YDxXxEe+l8gU8ETbOasdwEV+avkR75ZzsVV9WI= +github.com/cncf/xds/go v0.0.0-20240905190251-b4127c9b8d78/go.mod h1:W+zGtBO5Y1IgJhy4+A9GOqVhqLpfZi+vwmdNXUehLA8= github.com/davecgh/go-spew v1.1.1 h1:vj9j/u1bqnvCEfJOwUhtlOARqs3+rkHYY13jYWTU97c= github.com/davecgh/go-spew v1.1.1/go.mod h1:J7Y8YcW2NihsgmVo/mv3lAwl/skON4iLHjSsI+c5H38= -github.com/google/go-cmp v0.6.0 h1:ofyhxvXcZhMsU5ulbFiLKl/XBFqE1GSq7atu8tAmTRI= -github.com/google/go-cmp v0.6.0/go.mod h1:17dUlkBOakJ0+DkrSSNjCkIjxS6bF9zb3elmeNGIjoY= +github.com/envoyproxy/go-control-plane v0.13.1 h1:vPfJZCkob6yTMEgS+0TwfTUfbHjfy/6vOJ8hUWX/uXE= +github.com/envoyproxy/go-control-plane v0.13.1/go.mod h1:X45hY0mufo6Fd0KW3rqsGvQMw58jvjymeCzBU3mWyHw= +github.com/envoyproxy/protoc-gen-validate v1.1.0 h1:tntQDh69XqOCOZsDz0lVJQez/2L6Uu2PdjCQwWCJ3bM= +github.com/envoyproxy/protoc-gen-validate v1.1.0/go.mod h1:sXRDRVmzEbkM7CVcM06s9shE/m23dg3wzjl0UWqJ2q4= +github.com/go-viper/mapstructure/v2 v2.2.1 h1:ZAaOCxANMuZx5RCeg0mBdEZk7DZasvvZIxtHqx8aGss= +github.com/go-viper/mapstructure/v2 v2.2.1/go.mod h1:oJDH3BJKyqBA2TXFhDsKDGDTlndYOZ6rGS0BRZIxGhM= +github.com/golang/protobuf v1.5.4 h1:i7eJL8qZTpSEXOPTxNKhASYpMn+8e5Q6AdndVa1dWek= +github.com/golang/protobuf v1.5.4/go.mod h1:lnTiLA8Wa4RWRcIUkrtSVa5nRhsEGBg48fD6rSs7xps= +github.com/google/go-cmp v0.5.9 h1:O2Tfq5qg4qc4AmwVlvv0oLiVAGB7enBSJ2x2DqQFi38= +github.com/google/go-cmp v0.5.9/go.mod h1:17dUlkBOakJ0+DkrSSNjCkIjxS6bF9zb3elmeNGIjoY= +github.com/oklog/ulid/v2 v2.1.0 h1:+9lhoxAP56we25tyYETBBY1YLA2SaoLvUFgrP2miPJU= +github.com/oklog/ulid/v2 v2.1.0/go.mod h1:rcEKHmBBKfef9DhnvX7y1HZBYxjXb0cP5ExxNsTT1QQ= +github.com/pborman/getopt v0.0.0-20170112200414-7148bc3a4c30/go.mod h1:85jBQOZwpVEaDAr341tbn15RS4fCAsIst0qp7i8ex1o= +github.com/planetscale/vtprotobuf v0.6.1-0.20240319094008-0393e58bdf10 h1:GFCKgmp0tecUJ0sJuv4pzYCqS9+RGSn52M3FUwPs+uo= +github.com/planetscale/vtprotobuf v0.6.1-0.20240319094008-0393e58bdf10/go.mod h1:t/avpk3KcrXxUnYOhZhMXJlSEyie6gQbtLq5NM3loB8= github.com/pmezard/go-difflib v1.0.0 h1:4DBwDE0NGyQoBHbLQYPwSUPoCMWR5BEzIk/f1lZbAQM= github.com/pmezard/go-difflib v1.0.0/go.mod h1:iKH77koFhYxTK1pcRnkKkqfTogsbg7gZNVY4sRDYZ/4= -github.com/safedep/dry v0.0.0-20241225162018-db93ecfc503f h1:ajZQsGSDmgn90eJdyHBUB9e3wnkdCFbJnWsAH0REryE= -github.com/safedep/dry v0.0.0-20241225162018-db93ecfc503f/go.mod h1:VNiIEzsaDJUncMyS+Aly7Hojf3qYNAz+J6Kmi0DALFw= +github.com/safedep/dry v0.0.0-20250428071408-5bf2ea4d87ee h1:tXiUQLk31tR8We0e9Q5io9/ZbYD9gcni3RgNJNxioWM= +github.com/safedep/dry v0.0.0-20250428071408-5bf2ea4d87ee/go.mod h1:Mdqx/Q2DhAcN38XiUNTGCC5MktofYDQW9Az7YWGEF0s= github.com/smacker/go-tree-sitter v0.0.0-20240827094217-dd81d9e9be82 h1:6C8qej6f1bStuePVkLSFxoU22XBS165D3klxlzRg8F4= github.com/smacker/go-tree-sitter v0.0.0-20240827094217-dd81d9e9be82/go.mod h1:xe4pgH49k4SsmkQq5OT8abwhWmnzkhpgnXeekbx2efw= -github.com/stretchr/testify v1.9.0 h1:HtqpIVDClZ4nwg75+f6Lvsy/wHu+3BoSGCbBAcpTsTg= -github.com/stretchr/testify v1.9.0/go.mod h1:r2ic/lqez/lEtzL7wO/rwa5dbSLXVDPFyf8C91i36aY= +github.com/stretchr/testify v1.10.0 h1:Xv5erBjTwe/5IxqUQTdXv5kgmIvbHo3QQyRwhJsOfJA= +github.com/stretchr/testify v1.10.0/go.mod h1:r2ic/lqez/lEtzL7wO/rwa5dbSLXVDPFyf8C91i36aY= go.uber.org/goleak v1.3.0 h1:2K3zAYmnTNqV73imy9J1T3WC+gmCePx2hEGkimedGto= go.uber.org/goleak v1.3.0/go.mod h1:CoHD4mav9JJNrW/WLlf7HGZPjdw8EucARQHekz1X6bE= go.uber.org/multierr v1.11.0 h1:blXXJkSxSSfBVBlC76pxqeO+LN3aDfLQo+309xJstO0= go.uber.org/multierr v1.11.0/go.mod h1:20+QtiLqy0Nd6FdQB9TLXag12DsQkrbs3htMFfDN80Y= go.uber.org/zap v1.27.0 h1:aJMhYGrd5QSmlpLMr2MftRKl7t8J8PTZPA732ud/XR8= go.uber.org/zap v1.27.0/go.mod h1:GB2qFLM7cTU87MWRP2mPIjqfIDnGu+VIO4V/SdhGo2E= -golang.org/x/crypto v0.0.0-20190308221718-c2843e01d9a2/go.mod h1:djNgcEr1/C05ACkg1iLfiJU5Ep61QUkGW8qpdssI0+w= -golang.org/x/crypto v0.0.0-20190510104115-cbcb75029529/go.mod h1:yigFU9vqHzYiE8UmvKecakEJjdnWj3jj499lnFckfCI= -golang.org/x/mod v0.1.0/go.mod h1:0QHyrYULN0/3qlju5TqG8bIK38QM8yzMo5ekMj3DlcY= -golang.org/x/mod v0.24.0 h1:ZfthKaKaT4NrhGVZHO1/WDTwGES4De8KtWO0SIbNJMU= -golang.org/x/mod v0.24.0/go.mod h1:IXM97Txy2VM4PJ3gI61r1YEk/gAj6zAHN3AdZt6S9Ww= -golang.org/x/net v0.0.0-20190404232315-eb5bcb51f2a3/go.mod h1:t9HGtf8HONx5eT2rtn7q6eTqICYqUVnKs3thJo3Qplg= -golang.org/x/sync v0.12.0 h1:MHc5BpPuC30uJk597Ri8TV3CNZcTLu6B6z4lJy+g6Jw= -golang.org/x/sync v0.12.0/go.mod h1:1dzgHSNfp02xaA81J2MS99Qcpr2w7fw1gpm99rleRqA= -golang.org/x/sys v0.0.0-20190215142949-d0b11bdaac8a/go.mod h1:STP8DvDyc/dI5b8T5hshtkjS+E42TnysNCUPdjciGhY= -golang.org/x/sys v0.0.0-20190412213103-97732733099d/go.mod h1:h1NjWce9XRLGQEsW7wpKNCjG9DtNlClVuFLEZdDNbEs= -golang.org/x/text v0.3.0/go.mod h1:NqM8EUOU14njkJ3fqMW+pc6Ldnwhi/IjpwHt7yyuwOQ= -golang.org/x/tools v0.31.0 h1:0EedkvKDbh+qistFTd0Bcwe/YLh4vHwWEkiI0toFIBU= -golang.org/x/tools v0.31.0/go.mod h1:naFTU+Cev749tSJRXJlna0T3WxKvb1kWEx15xA4SdmQ= +google.golang.org/protobuf v1.36.5 h1:tPhr+woSbjfYvY6/GPufUoYizxw1cF/yFoxJ2fmpwlM= +google.golang.org/protobuf v1.36.5/go.mod h1:9fA7Ob0pmnwhb644+1+CVWFRbNajQ6iRojtC/QF5bRE= gopkg.in/check.v1 v0.0.0-20161208181325-20d25e280405 h1:yhCVgyC4o1eVCa2tZl7eS0r+SDo693bJlVdllGtEeKM= gopkg.in/check.v1 v0.0.0-20161208181325-20d25e280405/go.mod h1:Co6ibVJAznAaIkqp8huTwlJQCZ016jof/cbN4VW5Yz0= gopkg.in/natefinch/lumberjack.v2 v2.2.1 h1:bBRl1b0OH9s/DuPhuXpNl+VtCaJXFZ5/uEFST95x9zc= gopkg.in/natefinch/lumberjack.v2 v2.2.1/go.mod h1:YD8tP3GAjkrDg1eZH7EGmyESg/lsYskCTPBJVb9jqSc= gopkg.in/yaml.v3 v3.0.1 h1:fxVm/GzAzEWqLHuvctI91KS9hhNmmWOoWu0XTYJS7CA= gopkg.in/yaml.v3 v3.0.1/go.mod h1:K4uyk7z7BCEPqu6E+C64Yfv1cQ7kz7rIZviUmN+EgEM= +sigs.k8s.io/yaml v1.4.0 h1:Mk1wCc2gy/F0THH0TAp1QYyJNzRm2KCLy3o5ASXVI5E= +sigs.k8s.io/yaml v1.4.0/go.mod h1:Ejl7/uTz7PSA4eKMyQCUTnhZYNmLIl+5c2lQPGR2BPY= diff --git a/lang/python_resolvers.go b/lang/python_resolvers.go index 07a003f..339446b 100644 --- a/lang/python_resolvers.go +++ b/lang/python_resolvers.go @@ -86,12 +86,6 @@ func (r *pythonResolvers) ResolveImports(tree core.ParseTree) ([]*ast.ImportNode node.SetModuleNameNode(m.Captures[0].Node) node.SetModuleItemNode(m.Captures[1].Node) node.SetModuleAliasNode(m.Captures[2].Node) - // print node type and contents of all captures - // fmt.Println("Node", m.Captures[0].Node.Content(*data)) - // for _, capture := range m.Captures { - // fmt.Printf("Capture: %s, %s\n", capture.Node.Type(), capture.Node.Content(*data)) - // } - imports = append(imports, node) return nil }), diff --git a/plugin/callgraph/assignment.go b/plugin/callgraph/assignment.go new file mode 100644 index 0000000..42d722c --- /dev/null +++ b/plugin/callgraph/assignment.go @@ -0,0 +1,81 @@ +package callgraph + +import ( + "slices" + + "github.com/safedep/dry/utils" + sitter "github.com/smacker/go-tree-sitter" +) + +type assignmentNode struct { + Namespace string + AssignedTo []string + TreeNode *sitter.Node +} + +func newAssignmentGraphNode(namespace string, treeNode *sitter.Node) *assignmentNode { + return &assignmentNode{ + Namespace: namespace, + AssignedTo: []string{}, + TreeNode: treeNode, + } +} + +type assignmentGraph struct { + Assignments map[string]*assignmentNode // Map of identifier to possible namespaces or other identifiers +} + +func newAssignmentGraph() *assignmentGraph { + return &assignmentGraph{Assignments: make(map[string]*assignmentNode)} +} + +func (ag *assignmentGraph) AddIdentifier(identifier string, treeNode *sitter.Node) *assignmentNode { + if _, exists := ag.Assignments[identifier]; !exists { + ag.Assignments[identifier] = newAssignmentGraphNode(identifier, treeNode) + } + return ag.Assignments[identifier] +} + +// Add an assignment +func (ag *assignmentGraph) AddAssignment(identifier string, identifierTreeNode *sitter.Node, target string, targetTreeNode *sitter.Node) { + if _, exists := ag.Assignments[identifier]; !exists { + ag.Assignments[identifier] = newAssignmentGraphNode(identifier, identifierTreeNode) + } + if _, exists := ag.Assignments[target]; !exists { + ag.Assignments[target] = newAssignmentGraphNode(target, targetTreeNode) + } + if !slices.Contains(ag.Assignments[identifier].AssignedTo, target) { + ag.Assignments[identifier].AssignedTo = append(ag.Assignments[identifier].AssignedTo, target) + } +} + +// Resolve an identifier to its targets (leaf nodes of the DFS tree) +func (ag *assignmentGraph) Resolve(identifier string) []*assignmentNode { + targets := utils.PtrTo([]*assignmentNode{}) + visited := make(map[string]bool) + ag.resolveUtil(identifier, visited, targets) + return *targets +} + +// Utility function to resolve the identifier to its targets recursively +func (ag *assignmentGraph) resolveUtil(currentIdentifier string, visited map[string]bool, targets *[]*assignmentNode) { + if visited[currentIdentifier] { + return + } + visited[currentIdentifier] = true + + identifierNode, exists := ag.Assignments[currentIdentifier] + if !exists { + return + } + + // If the current identifier has no assignments, it's a leaf node + if len(identifierNode.AssignedTo) == 0 { + *targets = append(*targets, identifierNode) + return + } + + for _, targetIdentifier := range identifierNode.AssignedTo { + ag.resolveUtil(targetIdentifier, visited, targets) + } +} diff --git a/plugin/callgraph/builtins.go b/plugin/callgraph/builtins.go new file mode 100644 index 0000000..ef7b66b --- /dev/null +++ b/plugin/callgraph/builtins.go @@ -0,0 +1,42 @@ +package callgraph + +import ( + "embed" + "encoding/json" + + "github.com/safedep/code/core" + "github.com/safedep/dry/log" +) + +//go:embed builtins.json +var builtinsFS embed.FS + +// languageBuiltins holds built-in functions for each language +type languageBuiltins map[string][]string + +var allBuiltins languageBuiltins + +// Loads built-in functions from the embedded JSON file +func initBuiltins() { + // Read the builtins.json file + data, err := builtinsFS.ReadFile("builtins.json") + if err != nil { + log.Errorf("failed to read builtins.json: %v", err) + panic(err) + } + + // Parse the JSON + if err := json.Unmarshal(data, &allBuiltins); err != nil { + log.Errorf("failed to unmarshal builtins.json: %v", err) + panic(err) + } +} + +func getBuiltins(lang core.Language) []string { + builtins, ok := allBuiltins[string(lang.Meta().Code)] + if !ok { + log.Debugf("No built-ins defined for language %s", lang.Meta().Code) + return []string{} + } + return builtins +} diff --git a/plugin/callgraph/builtins.json b/plugin/callgraph/builtins.json new file mode 100644 index 0000000..22e67bb --- /dev/null +++ b/plugin/callgraph/builtins.json @@ -0,0 +1,221 @@ +{ + "python": [ + "ArithmeticError", + "AssertionError", + "AttributeError", + "BaseException", + "BaseExceptionGroup", + "BlockingIOError", + "BrokenPipeError", + "BufferError", + "BytesWarning", + "ChildProcessError", + "ConnectionAbortedError", + "ConnectionError", + "ConnectionRefusedError", + "ConnectionResetError", + "DeprecationWarning", + "EOFError", + "Ellipsis", + "EncodingWarning", + "EnvironmentError", + "Exception", + "ExceptionGroup", + "False", + "FileExistsError", + "FileNotFoundError", + "FloatingPointError", + "FutureWarning", + "GeneratorExit", + "IOError", + "ImportError", + "ImportWarning", + "IndentationError", + "IndexError", + "InterruptedError", + "IsADirectoryError", + "KeyError", + "KeyboardInterrupt", + "LookupError", + "MemoryError", + "ModuleNotFoundError", + "NameError", + "None", + "NotADirectoryError", + "NotImplemented", + "NotImplementedError", + "OSError", + "OverflowError", + "PendingDeprecationWarning", + "PermissionError", + "ProcessLookupError", + "RecursionError", + "ReferenceError", + "ResourceWarning", + "RuntimeError", + "RuntimeWarning", + "StopAsyncIteration", + "StopIteration", + "SyntaxError", + "SyntaxWarning", + "SystemError", + "SystemExit", + "TabError", + "TimeoutError", + "True", + "TypeError", + "UnboundLocalError", + "UnicodeDecodeError", + "UnicodeEncodeError", + "UnicodeError", + "UnicodeTranslateError", + "UnicodeWarning", + "UserWarning", + "ValueError", + "Warning", + "ZeroDivisionError", + "__build_class__", + "__debug__", + "__doc__", + "__import__", + "__loader__", + "__name__", + "__package__", + "__spec__", + "abs", + "aiter", + "all", + "anext", + "any", + "ascii", + "bin", + "bool", + "breakpoint", + "bytearray", + "bytes", + "callable", + "chr", + "classmethod", + "compile", + "complex", + "copyright", + "credits", + "delattr", + "dict", + "dir", + "divmod", + "enumerate", + "eval", + "exec", + "exit", + "filter", + "float", + "format", + "frozenset", + "getattr", + "globals", + "hasattr", + "hash", + "help", + "hex", + "id", + "input", + "int", + "isinstance", + "issubclass", + "iter", + "len", + "license", + "list", + "locals", + "map", + "max", + "memoryview", + "min", + "next", + "object", + "oct", + "open", + "ord", + "pow", + "print", + "property", + "quit", + "range", + "repr", + "reversed", + "round", + "set", + "setattr", + "slice", + "sorted", + "staticmethod", + "str", + "sum", + "super", + "tuple", + "type", + "vars", + "zip" + ], + "javascript": [ + "console", + "parseInt", + "parseFloat", + "isNaN", + "isFinite", + "setTimeout", + "setInterval", + "clearTimeout", + "clearInterval", + "eval", + "encodeURI", + "decodeURI", + "encodeURIComponent", + "decodeURIComponent", + "Number", + "String", + "Boolean", + "Array", + "Object", + "Function", + "Date", + "Math", + "RegExp", + "JSON", + "Promise", + "Symbol", + "BigInt", + "Map", + "Set", + "WeakMap", + "WeakSet", + "Reflect", + "Proxy", + "Intl", + "undefined", + "NaN", + "Infinity" + ], + "go": [ + "panic", + "recover", + "make", + "new", + "len", + "cap", + "append", + "copy", + "delete", + "close", + "complex", + "real", + "imag", + "print", + "println", + "iota", + "true", + "false", + "nil" + ] +} diff --git a/plugin/callgraph/callgraph.go b/plugin/callgraph/callgraph.go new file mode 100644 index 0000000..28701bf --- /dev/null +++ b/plugin/callgraph/callgraph.go @@ -0,0 +1,180 @@ +package callgraph + +import ( + "fmt" + "slices" + + "github.com/safedep/code/core" + "github.com/safedep/dry/log" + sitter "github.com/smacker/go-tree-sitter" +) + +const namespaceSeparator = "//" + +// CallGraphNode represents a single node in the call graph +type CallGraphNode struct { + Namespace string + CallsTo []string + TreeNode *sitter.Node +} + +type ContentDetails struct { + StartLine uint32 + EndLine uint32 + StartColumn uint32 + EndColumn uint32 + Content string +} + +// GetContentDetails returns the content details of the node +// If tree sitter node is nil, it returns false indicating that the content details are not available +// else, it returns the content details and true +func (gn *CallGraphNode) GetContentDetails(treeData *[]byte) (ContentDetails, bool) { + if gn.TreeNode == nil { + return ContentDetails{}, false + } + return ContentDetails{ + StartLine: gn.TreeNode.StartPoint().Row, + EndLine: gn.TreeNode.EndPoint().Row, + StartColumn: gn.TreeNode.StartPoint().Column, + EndColumn: gn.TreeNode.EndPoint().Column, + Content: gn.TreeNode.Content(*treeData), + }, true +} + +func newCallGraphNode(namespace string, treeNode *sitter.Node) *CallGraphNode { + return &CallGraphNode{ + Namespace: namespace, + CallsTo: []string{}, + TreeNode: treeNode, + } +} + +type CallGraph struct { + FileName string + Nodes map[string]*CallGraphNode + Tree core.ParseTree + assignmentGraph assignmentGraph + classConstructors map[string]bool +} + +func newCallGraph(fileName string, importedIdentifiers map[string]parsedImport, tree core.ParseTree) (*CallGraph, error) { + language, err := tree.Language() + if err != nil { + return nil, fmt.Errorf("failed to get language from parse tree: %w", err) + } + + builtIns := getBuiltins(language) + + cg := &CallGraph{ + FileName: fileName, + Nodes: make(map[string]*CallGraphNode), + Tree: tree, + assignmentGraph: *newAssignmentGraph(), + classConstructors: make(map[string]bool), + } + + for identifier, importedIdentifier := range importedIdentifiers { + cg.AddNode(importedIdentifier.Namespace, importedIdentifier.NamespaceTreeNode) + if identifier == importedIdentifier.Namespace { + cg.assignmentGraph.AddIdentifier(importedIdentifier.Namespace, importedIdentifier.NamespaceTreeNode) + } else { + cg.assignmentGraph.AddAssignment(identifier, importedIdentifier.IdentifierTreeNode, importedIdentifier.Namespace, importedIdentifier.NamespaceTreeNode) + } + } + + for _, namespace := range builtIns { + cg.assignmentGraph.AddIdentifier(namespace, nil) // @TODO - Can't create sitter node for keywords + } + + return cg, nil +} + +func (cg *CallGraph) AddNode(identifier string, treeNode *sitter.Node) { + if _, exists := cg.Nodes[identifier]; !exists { + cg.Nodes[identifier] = newCallGraphNode(identifier, treeNode) + } +} + +// AddEdge adds an edge from one function to another +func (cg *CallGraph) AddEdge(caller string, callerTreeNode *sitter.Node, callee string, calleeTreeNode *sitter.Node) { + cg.AddNode(caller, callerTreeNode) + cg.AddNode(callee, calleeTreeNode) + if !slices.Contains(cg.Nodes[caller].CallsTo, callee) { + cg.Nodes[caller].CallsTo = append(cg.Nodes[caller].CallsTo, callee) + } +} + +func (cg *CallGraph) PrintCallGraph() { + fmt.Println("Call Graph:") + for caller, node := range cg.Nodes { + fmt.Printf(" %s (calls)=> %v\n", caller, node.CallsTo) + } + fmt.Println() +} + +func (cg *CallGraph) PrintAssignmentGraph() { + fmt.Println("Assignment Graph:") + for assignmentNamespace, assignmentNode := range cg.assignmentGraph.Assignments { + fmt.Printf(" %s => %v\n", assignmentNamespace, assignmentNode.AssignedTo) + } + fmt.Println() +} + +type DfsResultItem struct { + Namespace string + Node *CallGraphNode + Caller *CallGraphNode + Depth int + Terminal bool +} + +func (cg *CallGraph) DFS() []DfsResultItem { + visited := make(map[string]bool) + var dfsResult []DfsResultItem + cg.dfsUtil(cg.FileName, nil, visited, &dfsResult, 0) + return dfsResult +} + +func (cg *CallGraph) dfsUtil(namespace string, caller *CallGraphNode, visited map[string]bool, result *[]DfsResultItem, depth int) { + if visited[namespace] { + return + } + + callgraphNode, callgraphNodeExists := cg.Nodes[namespace] + + // Mark the current node as visited and add it to the result + visited[namespace] = true + *result = append(*result, DfsResultItem{ + Namespace: namespace, + Node: callgraphNode, + Caller: caller, + Depth: depth, + Terminal: !callgraphNodeExists || len(callgraphNode.CallsTo) == 0, + }) + + assignmentGraphNode, assignmentNodeExists := cg.assignmentGraph.Assignments[namespace] + if assignmentNodeExists { + // Recursively visit all the nodes assigned to the current node + for _, assigned := range assignmentGraphNode.AssignedTo { + cg.dfsUtil(assigned, caller, visited, result, depth) + } + } + + // Recursively visit all the nodes called by the current node + // Any variable assignment would be ignored here, since it won't be in callgraph + if callgraphNodeExists { + for _, callee := range callgraphNode.CallsTo { + cg.dfsUtil(callee, callgraphNode, visited, result, depth+1) + } + } +} + +func (cg *CallGraph) GetInstanceKeyword() (string, bool) { + language, err := cg.Tree.Language() + if err != nil { + log.Errorf("failed to get language from parse tree: %v", err) + return "", false + } + return resolveInstanceKeyword(language) +} diff --git a/plugin/callgraph/config.go b/plugin/callgraph/config.go new file mode 100644 index 0000000..c893f33 --- /dev/null +++ b/plugin/callgraph/config.go @@ -0,0 +1,12 @@ +package callgraph + +// TS nodes Ignored when parsing AST +// eg. comment is useless, imports are already resolved +var ignoredTypesList = []string{"comment"} +var ignoredTypes = make(map[string]bool) + +func init() { + for _, ignoredType := range ignoredTypesList { + ignoredTypes[ignoredType] = true + } +} diff --git a/plugin/callgraph/fixtures/.gitkeep b/plugin/callgraph/fixtures/.gitkeep new file mode 100644 index 0000000..e69de29 diff --git a/plugin/callgraph/fixtures/testClass.py b/plugin/callgraph/fixtures/testClass.py new file mode 100644 index 0000000..9b6ffd3 --- /dev/null +++ b/plugin/callgraph/fixtures/testClass.py @@ -0,0 +1,58 @@ +import pprint +from xyz import printxyz1, printxyz2, printxyz3 +from os import getenv + +# Correctly processes constructor, member function and member variables by instance keyword ie. self.name, self.value +class TesterClass: + def __init__(self): + self.name = "TesterClass name" + self.value = 42 + if getenv("USE_TAR"): + self.value = 100 + + def helper_method(self): + print("Called helper_method") + return self.value + + def deepest_method(self): + self.helper_method() + print("Called deepest_method") + return "Success" + + def aboutme(self): + print(f"Name: {self.name}") + +# Correctly identifies that alice is an instance of TesterClass +# so any qualifier on alice is resolved as a member of TesterClass +alice = TesterClass() +alice.aboutme() +bannername = alice.name + + + + +class ClassA: + def method1(self): + printxyz2("GG") + def method2(self): + printxyz2("GG") + +class ClassB: + def method1(self): + printxyz2("GG") + def method2(self): + printxyz2("GG") + def methodUnique(self): + printxyz3("GG") + pprint.pp("GG") + + +x = ClassA() +x = ClassB() +x.method1() +y = x +y.method1() +y.method2() +y.methodUnique() # @TODO - This creates a call to namespace that doesn't exist + + diff --git a/plugin/callgraph/fixtures/testFunctions.py b/plugin/callgraph/fixtures/testFunctions.py new file mode 100644 index 0000000..b809dea --- /dev/null +++ b/plugin/callgraph/fixtures/testFunctions.py @@ -0,0 +1,83 @@ +import pstats +import pprint +from xyzprintmodule import printer1, printer2, printer3, printer4, printer6 +from os import listdir as listdirfn, chmod + + +# Recursive +def factorial(x): + if x == 0 or x == 1: + return 1 + else: + return x * factorial(x-1) +print(factorial(5)) + + +# Function assignment +def foo(): + pprint.pprint("foo") +def bar(): + print("bar") +baz = bar + +xyz = "abc" +xyz = 25 +xyz = foo +xyz = baz +xyz() + + +# Nested & scoped functions +def outerfn1(): + chmod("outerfn1") + pass +def outerfn2(): + listdirfn("outerfn2") + pass + +def fn1(): + printer4("outer fn1") + +def nestParent(): + def parentScopedFn(): + print("parentScopedFn") + fn1() # Must call outer fn1 with printer4 + + def nestChild(): + printer1("nestChild") + outerfn1() + + def fn1(): + printer6("inner fn1") + + def childScopedFn(): + printer2("childScopedFn") + fn1() # Must call outer fn1 with printer6 + + def nestGrandChildUseless(): + printer3("nestGrandChildUseless") + + def nestGrandChild(): + pprint.pp("nestGrandChild") + parentScopedFn() + outerfn2() + childScopedFn() + + nestGrandChild() + + outerfn1() + nestChild() + +nestParent() + + + + +# Function Assignments, return values aren't processed, since its a complex taxk +def add(a, b): + return a + b +def sub(a, b): + return a - b +somenumber = 5 +r1 = 95 + 7.3 + 2 +res = add(3, 4) + sub(8, 6) + r1 - somenumber + 95 + 7.3 + pstats.getsomestat() diff --git a/plugin/callgraph/imports.go b/plugin/callgraph/imports.go new file mode 100644 index 0000000..cafe4d7 --- /dev/null +++ b/plugin/callgraph/imports.go @@ -0,0 +1,83 @@ +package callgraph + +import ( + "strings" + + "github.com/safedep/code/core" + "github.com/safedep/code/core/ast" + sitter "github.com/smacker/go-tree-sitter" +) + +// Parses namespaces & sitter nodes for imported identifiers +// eg. import pprint is parsed as: +// pprint -> pprint +// eg. from os import listdir as listdirfn, chmod is parsed as: +// listdirfn -> os//listdir +// chmod -> os//chmod +type parsedImport struct { + Identifier string + IdentifierTreeNode *sitter.Node + Namespace string + NamespaceTreeNode *sitter.Node +} + +func parseImports(imports []*ast.ImportNode, lang core.Language) map[string]parsedImport { + importedIdentifierNamespaces := make(map[string]parsedImport) + for _, imp := range imports { + if imp.IsWildcardImport() { + continue + } + itemNamespace := imp.ModuleItem() + moduleNamespace := resolveNamespaceWithSeparator(imp.ModuleName(), lang) + if itemNamespace == "" { + itemNamespace = moduleNamespace + } else { + itemNamespace = moduleNamespace + namespaceSeparator + itemNamespace + } + + moduleItemIdentifierKey := resolveSubmoduleIdentifier(imp.ModuleItem(), lang) + moduleAliasIdentifierKey := resolveSubmoduleIdentifier(imp.ModuleAlias(), lang) + + identifierKey := moduleNamespace + identifierTreeNode := imp.GetModuleNameNode() + if moduleAliasIdentifierKey != "" { + identifierKey = moduleAliasIdentifierKey + identifierTreeNode = imp.GetModuleAliasNode() + } else if moduleItemIdentifierKey != "" { + identifierKey = moduleItemIdentifierKey + identifierTreeNode = imp.GetModuleItemNode() + } + importedIdentifierNamespaces[identifierKey] = parsedImport{ + Identifier: identifierKey, + IdentifierTreeNode: identifierTreeNode, + Namespace: itemNamespace, + NamespaceTreeNode: imp.GetModuleNameNode().Parent(), + } + } + return importedIdentifierNamespaces +} + +// For submodule imports, we need to replace separator with our namespaceSeparator for consistency +// eg. in python "from os.path import abspath" -> ModuleName = os.path -> os//path +var submoduleSeparator = map[core.LanguageCode]string{ + core.LanguageCodeGo: "/", + core.LanguageCodeJavascript: "/", + core.LanguageCodePython: ".", +} + +func resolveNamespaceWithSeparator(moduleName string, lang core.Language) string { + separator, exists := submoduleSeparator[lang.Meta().Code] + if exists { + return strings.Join(strings.Split(moduleName, separator), namespaceSeparator) + } + return moduleName +} + +func resolveSubmoduleIdentifier(identifier string, lang core.Language) string { + separator, exists := submoduleSeparator[lang.Meta().Code] + if exists && strings.Contains(identifier, separator) { + parts := strings.Split(identifier, separator) + return parts[len(parts)-1] + } + return identifier +} diff --git a/plugin/callgraph/instanceKeywords.go b/plugin/callgraph/instanceKeywords.go new file mode 100644 index 0000000..509a753 --- /dev/null +++ b/plugin/callgraph/instanceKeywords.go @@ -0,0 +1,32 @@ +package callgraph + +import "github.com/safedep/code/core" + +var instanceKeywordMapping = map[string]string{ + "python": "self", + "javascript": "this", + "java": "this", + "csharp": "this", + "ruby": "self", + "php": "$this", + "golang": "this", + "typescript": "this", + "swift": "self", + "rust": "self", + "scala": "this", + "objective-c": "self", + "dart": "this", + "elixir": "this", + "clojure": "this", + "lua": "self", + "perl": "self", + "r": "this", +} + +func resolveInstanceKeyword(language core.Language) (string, bool) { + langCode := language.Meta().Code + if keyword, exists := instanceKeywordMapping[string(langCode)]; exists { + return keyword, true + } + return "", false +} diff --git a/plugin/callgraph/plugin.go b/plugin/callgraph/plugin.go new file mode 100644 index 0000000..f1dca58 --- /dev/null +++ b/plugin/callgraph/plugin.go @@ -0,0 +1,128 @@ +package callgraph + +import ( + "context" + "fmt" + "sync" + + "github.com/safedep/code/core" + "github.com/safedep/dry/log" + sitter "github.com/smacker/go-tree-sitter" +) + +type CallgraphCallback core.PluginCallback[*CallGraph] + +type callgraphPlugin struct { + // Callback function which is called with the callgraph + callgraphCallback CallgraphCallback +} + +// Verify contract +var _ core.TreePlugin = (*callgraphPlugin)(nil) + +var loadBuiltinOnce sync.Once + +func NewCallGraphPlugin(callgraphCallback CallgraphCallback) *callgraphPlugin { + // Load builtin keywords + loadBuiltinOnce.Do(initBuiltins) + + return &callgraphPlugin{ + callgraphCallback: callgraphCallback, + } +} + +func (p *callgraphPlugin) Name() string { + return "CallgraphPlugin" +} + +var supportedLanguages = []core.LanguageCode{core.LanguageCodePython} + +func (p *callgraphPlugin) SupportedLanguages() []core.LanguageCode { + return supportedLanguages +} + +func (p *callgraphPlugin) AnalyzeTree(ctx context.Context, tree core.ParseTree) error { + lang, err := tree.Language() + if err != nil { + return fmt.Errorf("failed to get language: %w", err) + } + + file, err := tree.File() + if err != nil { + return fmt.Errorf("failed to get file: %w", err) + } + + log.Debugf("callgraph - Analyzing tree for language: %s, file: %s\n", lang.Meta().Code, file.Name()) + + cg, err := buildCallGraph(tree, lang, file.Name()) + + if err != nil { + return fmt.Errorf("failed to build call graph: %w", err) + } + + return p.callgraphCallback(ctx, cg) +} + +// buildCallGraph builds a call graph from the syntax tree +func buildCallGraph(tree core.ParseTree, lang core.Language, filePath string) (*CallGraph, error) { + astRootNode := tree.Tree().RootNode() + + treeData, err := tree.Data() + if err != nil { + return nil, fmt.Errorf("failed to get tree data: %w", err) + } + + imports, err := lang.Resolvers().ResolveImports(tree) + if err != nil { + return nil, fmt.Errorf("failed to resolve imports: %w", err) + } + + // Required to map identifiers to imported modules as assignments + importedIdentifiers := parseImports(imports, lang) + + // log.Debugf("Imported identifier => namespace:") + // for identifier, parsedImport := range importedIdentifiers { + // log.Debugf(" %s => %s\n", identifier, parsedImport.Namespace) + // } + + callGraph, err := newCallGraph(filePath, importedIdentifiers, tree) + if err != nil { + return nil, fmt.Errorf("failed to create call graph: %w", err) + } + + // Add root node to the call graph + callGraph.AddNode(filePath, astRootNode) + + processChildren(astRootNode, *treeData, filePath, callGraph, processorMetadata{}) + + return callGraph, nil +} + +func processNode(node *sitter.Node, treeData []byte, currentNamespace string, callGraph *CallGraph, metadata processorMetadata) processorResult { + if node == nil { + return newProcessorResult() + } + + nodeProcessor, exists := nodeProcessors[node.Type()] + if exists { + return nodeProcessor(node, treeData, currentNamespace, callGraph, metadata) + } + + // log.Debugf("Can't process %s with namespace: %s => %s", node.Type(), currentNamespace, node.Content(treeData)) + return emptyProcessor(node, treeData, currentNamespace, callGraph, metadata) +} + +func processChildren(node *sitter.Node, treeData []byte, currentNamespace string, callGraph *CallGraph, metadata processorMetadata) processorResult { + if node == nil { + return newProcessorResult() + } + + childrenResults := newProcessorResult() + + for i := 0; i < int(node.ChildCount()); i++ { + result := processNode(node.Child(i), treeData, currentNamespace, callGraph, metadata) + childrenResults.addResults(result) + } + + return childrenResults +} diff --git a/plugin/callgraph/plugin_test.go b/plugin/callgraph/plugin_test.go new file mode 100644 index 0000000..e3e0d0f --- /dev/null +++ b/plugin/callgraph/plugin_test.go @@ -0,0 +1,156 @@ +package callgraph + +import ( + "context" + "fmt" + "testing" + + "github.com/safedep/code/core" + "github.com/safedep/code/pkg/test" + "github.com/safedep/code/plugin" + "github.com/stretchr/testify/assert" +) + +type callgraphTestcase struct { + Language core.LanguageCode + FilePath string + + // Namezpaces representing assignment graph nodes (not exhaustive) + ExpectedAssignmentGraph map[string][]string + + // Namezpaces representing callgraph nodes (not exhaustive) + ExpectedCallGraph map[string][]string +} + +var testcases = []callgraphTestcase{ + { + Language: core.LanguageCodePython, + FilePath: "fixtures/testClass.py", + ExpectedAssignmentGraph: map[string][]string{ + "printxyz2": {"xyz//printxyz2"}, + "xyz//printxyz2": {}, + "printxyz1": {"xyz//printxyz1"}, + "xyz//printxyz1": {}, + "fixtures/testClass.py//x": {"fixtures/testClass.py//ClassA", "fixtures/testClass.py//ClassB"}, + "fixtures/testClass.py//TesterClass//__init__": {}, + "fixtures/testClass.py//alice": {"fixtures/testClass.py//TesterClass"}, + "fixtures/testClass.py//bannername": {"fixtures/testClass.py//TesterClass//name"}, + "fixtures/testClass.py//y": {"fixtures/testClass.py//x"}, + "fixtures/testClass.py//TesterClass//self//name": {"fixtures/testClass.py//TesterClass//__init__//\"TesterClass name\""}, + "fixtures/testClass.py//TesterClass//self//value": {"fixtures/testClass.py//TesterClass//__init__//42", "fixtures/testClass.py//TesterClass//__init__//100"}, + }, + ExpectedCallGraph: map[string][]string{ + "fixtures/testClass.py": { + "fixtures/testClass.py//TesterClass", + "fixtures/testClass.py//TesterClass//aboutme", + "fixtures/testClass.py//ClassA", + "fixtures/testClass.py//ClassB", + "fixtures/testClass.py//ClassA//method1", + "fixtures/testClass.py//ClassB//method1", + "fixtures/testClass.py//ClassA//method2", + "fixtures/testClass.py//ClassB//method2", + "fixtures/testClass.py//ClassA//methodUnique", + "fixtures/testClass.py//ClassB//methodUnique", + }, + "fixtures/testClass.py//TesterClass": {"fixtures/testClass.py//TesterClass//__init__"}, + "fixtures/testClass.py//TesterClass//__init__": {"getenv"}, + "fixtures/testClass.py//TesterClass//self//__init__": {"fixtures/testClass.py//TesterClass//__init__"}, + "fixtures/testClass.py//TesterClass//self//aboutme": {"fixtures/testClass.py//TesterClass//aboutme"}, + "fixtures/testClass.py//TesterClass//deepest_method": {"fixtures/testClass.py//TesterClass//self//helper_method", "print"}, + "fixtures/testClass.py//TesterClass//helper_method": {"print"}, + "fixtures/testClass.py//TesterClass//aboutme": {"print"}, + "fixtures/testClass.py//ClassA": {}, + "fixtures/testClass.py//ClassA//self": {}, + "fixtures/testClass.py//ClassA//self//method1": {"fixtures/testClass.py//ClassA//method1"}, + "fixtures/testClass.py//ClassA//method1": {"printxyz2"}, + "fixtures/testClass.py//ClassA//method2": {"printxyz2"}, + "fixtures/testClass.py//ClassB": {}, + "fixtures/testClass.py//ClassB//method1": {"printxyz2"}, + "fixtures/testClass.py//ClassB//self//method2": {"fixtures/testClass.py//ClassB//method2"}, + "fixtures/testClass.py//ClassB//methodUnique": {"printxyz3", "pprint//pp"}, + "fixtures/testClass.py//ClassB//self//methodUnique": {"fixtures/testClass.py//ClassB//methodUnique"}, + }, + }, + { + Language: core.LanguageCodePython, + FilePath: "fixtures/testFunctions.py", + ExpectedAssignmentGraph: map[string][]string{ + "listdirfn": {"os//listdir"}, + "printer2": {"xyzprintmodule//printer2"}, + "printer3": {"xyzprintmodule//printer3"}, + "printer4": {"xyzprintmodule//printer4"}, + "fixtures/testFunctions.py//baz": {"fixtures/testFunctions.py//bar"}, + "fixtures/testFunctions.py//xyz": {"fixtures/testFunctions.py//\"abc\"", "fixtures/testFunctions.py//25", "fixtures/testFunctions.py//foo", "fixtures/testFunctions.py//baz"}, + "fixtures/testFunctions.py//r1": {"fixtures/testFunctions.py//95", "fixtures/testFunctions.py//7.3", "fixtures/testFunctions.py//2"}, + "fixtures/testFunctions.py//res": {"fixtures/testFunctions.py//r1", "fixtures/testFunctions.py//somenumber", "fixtures/testFunctions.py//95", "fixtures/testFunctions.py//7.3"}, + }, + ExpectedCallGraph: map[string][]string{ + "fixtures/testFunctions.py": { + "fixtures/testFunctions.py//factorial", + "print", + "fixtures/testFunctions.py//xyz", + "fixtures/testFunctions.py//nestParent", + "fixtures/testFunctions.py//add", + "fixtures/testFunctions.py//sub", + "pstats//getsomestat", + }, + "fixtures/testFunctions.py//factorial": {"fixtures/testFunctions.py//factorial"}, + "fixtures/testFunctions.py//foo": {"pprint//pprint"}, + "fixtures/testFunctions.py//bar": {"print"}, + "fixtures/testFunctions.py//outerfn1": {"chmod"}, + "fixtures/testFunctions.py//nestParent": {"fixtures/testFunctions.py//outerfn1", "fixtures/testFunctions.py//nestParent//nestChild"}, + "fixtures/testFunctions.py//outerfn2": {"listdirfn"}, + "fixtures/testFunctions.py//nestParent//nestChild//fn1": {"printer6"}, + "fixtures/testFunctions.py//nestParent//nestChild//childScopedFn": {"printer2", "fixtures/testFunctions.py//nestParent//nestChild//fn1"}, + "fixtures/testFunctions.py//nestParent//nestChild": {"printer1", "fixtures/testFunctions.py//outerfn1", "fixtures/testFunctions.py//nestParent//nestChild//nestGrandChild"}, + "fixtures/testFunctions.py//fn1": {"printer4"}, + "fixtures/testFunctions.py//nestParent//nestChild//nestGrandChildUseless": {"printer3"}, + "fixtures/testFunctions.py//nestParent//nestChild//nestGrandChild": {"pprint//pp", "fixtures/testFunctions.py//nestParent//parentScopedFn", "fixtures/testFunctions.py//outerfn2", "fixtures/testFunctions.py//nestParent//nestChild//childScopedFn"}, + }, + }, +} + +func TestCallgraphPlugin(t *testing.T) { + for _, testcase := range testcases { + t.Run(fmt.Sprintf("%s__%s", testcase.FilePath, testcase.Language), func(t *testing.T) { + filePaths := []string{testcase.FilePath} + treeWalker, fileSystem, err := test.SetupBasicPluginContext(filePaths, []core.LanguageCode{testcase.Language}) + assert.NoError(t, err) + + var callgraphCallback CallgraphCallback = func(ctx context.Context, cg *CallGraph) error { + for assigneeNamespace, expectedAssignmentsNamespaces := range testcase.ExpectedAssignmentGraph { + assigneeNode, exists := cg.assignmentGraph.Assignments[assigneeNamespace] + assert.True(t, exists, "Expected assignee node %s to exist in assignment graph", assigneeNamespace) + assert.NotNil(t, assigneeNode, "Expected assignee node %s to be non-nil", assigneeNamespace) + if assigneeNode == nil { + continue + } + + assert.Equal(t, assigneeNamespace, assigneeNode.Namespace) + assert.ElementsMatch(t, expectedAssignmentsNamespaces, assigneeNode.AssignedTo) + } + + for sourceNamespace, expectedTargetNamespaces := range testcase.ExpectedCallGraph { + sourceNode, exists := cg.Nodes[sourceNamespace] + assert.True(t, exists, "Expected source node %s to exist in call graph", sourceNamespace) + assert.NotNil(t, sourceNode, "Expected source node %s to be non-nil", sourceNamespace) + if sourceNode == nil { + continue + } + + assert.Equal(t, sourceNamespace, sourceNode.Namespace) + assert.ElementsMatch(t, expectedTargetNamespaces, sourceNode.CallsTo) + } + return nil + } + + pluginExecutor, err := plugin.NewTreeWalkPluginExecutor(treeWalker, []core.Plugin{ + NewCallGraphPlugin(callgraphCallback), + }) + assert.NoError(t, err) + + err = pluginExecutor.Execute(context.Background(), fileSystem) + assert.NoError(t, err) + }) + } +} diff --git a/plugin/callgraph/processors.go b/plugin/callgraph/processors.go new file mode 100644 index 0000000..2f528d0 --- /dev/null +++ b/plugin/callgraph/processors.go @@ -0,0 +1,515 @@ +package callgraph + +import ( + "fmt" + "strings" + + "github.com/safedep/dry/log" + sitter "github.com/smacker/go-tree-sitter" +) + +type processorMetadata struct { + insideClass bool + insideFunction bool +} + +type processorResult struct { + ImmediateCalls []*CallGraphNode // Will be needed to manage assignment-for-call-returned values + ImmediateAssignments []*assignmentNode +} + +func newProcessorResult() processorResult { + return processorResult{ + ImmediateCalls: []*CallGraphNode{}, + ImmediateAssignments: []*assignmentNode{}, + } +} + +// addResults adds the results of the provided processorResults to the current (callee) processorResult +func (pr *processorResult) addResults(results ...processorResult) { + for _, result := range results { + pr.ImmediateAssignments = append(pr.ImmediateAssignments, result.ImmediateAssignments...) + // @TODO - add some entries in assignment graph basis the pr.immediateCalls + } +} + +type nodeProcessor func(node *sitter.Node, treeData []byte, currentNamespace string, callGraph *CallGraph, metadata processorMetadata) processorResult + +var nodeProcessors map[string]nodeProcessor + +func init() { + nodeProcessors = map[string]nodeProcessor{ + "module": emptyProcessor, + "program": emptyProcessor, + "expression_statement": emptyProcessor, + "binary_operator": binaryOperatorProcessor, + "identifier": identifierProcessor, + "class_definition": classDefinitionProcessor, + "function_definition": functionDefinitionProcessor, + "call": callProcessor, + "return": emptyProcessor, + "return_statement": functionReturnProcessor, + "arguments": emptyProcessor, + "argument_list": emptyProcessor, + "attribute": attributeProcessor, + "assignment": assignmentProcessor, + "subscript": skippedProcessor, + } + + // Literals + for _, symbol := range []string{"string", "number", "integer", "float", "double", "boolean", "null", "undefined", "true", "false"} { + nodeProcessors[symbol] = literalValueProcessor + } + + skippedNodeTypes := []string{ + // Imports + "import_statement", "import", "import_from_statement", + // Operators + "+", "-", "*", "/", "%", "**", "//", "=", "+=", "-=", "*=", "/=", "%=", + // Symbols + ",", ":", ";", ".", "(", ")", "{", "}", "[", "]", + // Comments and fillers + "comment", "whitespace", "newline", + // Other + } + for _, symbol := range skippedNodeTypes { + nodeProcessors[symbol] = skippedProcessor + } +} + +func emptyProcessor(emptyNode *sitter.Node, treeData []byte, currentNamespace string, callGraph *CallGraph, metadata processorMetadata) processorResult { + if emptyNode == nil { + return newProcessorResult() + } + + return processChildren(emptyNode, treeData, currentNamespace, callGraph, metadata) +} + +func skippedProcessor(node *sitter.Node, treeData []byte, currentNamespace string, callGraph *CallGraph, metadata processorMetadata) processorResult { + if node == nil { + return newProcessorResult() + } + + return newProcessorResult() +} + +func literalValueProcessor(literalNode *sitter.Node, treeData []byte, currentNamespace string, callGraph *CallGraph, metadata processorMetadata) processorResult { + if literalNode == nil { + return newProcessorResult() + } + + result := newProcessorResult() + literalNamespace := currentNamespace + namespaceSeparator + literalNode.Content(treeData) + literalAssignmentNode := callGraph.assignmentGraph.AddIdentifier(literalNamespace, literalNode) + result.ImmediateAssignments = append(result.ImmediateAssignments, literalAssignmentNode) + return result +} + +func classDefinitionProcessor(classDefNode *sitter.Node, treeData []byte, currentNamespace string, callGraph *CallGraph, metadata processorMetadata) processorResult { + if classDefNode == nil { + return newProcessorResult() + } + + classNameNode := classDefNode.ChildByFieldName("name") + if classNameNode == nil { + log.Errorf("Class definition without name - %s", classDefNode.Content(treeData)) + return newProcessorResult() + } + + // Class definition has its own scope, hence its own namespace + classNamespace := currentNamespace + namespaceSeparator + classNameNode.Content(treeData) + callGraph.AddNode(classNamespace, classDefNode) + + // Assignment is added so that we can resolve class constructor when a function with same name as classname is called + callGraph.assignmentGraph.AddIdentifier(classNamespace, classDefNode) + callGraph.classConstructors[classNamespace] = true + + instanceKeyword, exists := callGraph.GetInstanceKeyword() + if exists { + instanceNamespace := classNamespace + namespaceSeparator + instanceKeyword + callGraph.AddNode(instanceNamespace, nil) // @TODO - Can't create sitter node for instance keyword + callGraph.assignmentGraph.AddIdentifier(instanceNamespace, nil) + } + + classBody := classDefNode.ChildByFieldName("body") + if classBody == nil { + log.Errorf("Class definition without body - %s", classDefNode.Content(treeData)) + return newProcessorResult() + } + + metadata.insideClass = true + processChildren(classBody, treeData, classNamespace, callGraph, metadata) + metadata.insideClass = false + + log.Debugf("Register class definition for %s - %s", classNameNode.Content(treeData), classNamespace) + + return newProcessorResult() +} + +func functionDefinitionProcessor(functionDefNode *sitter.Node, treeData []byte, currentNamespace string, callGraph *CallGraph, metadata processorMetadata) processorResult { + if functionDefNode == nil { + return newProcessorResult() + } + + functionNameNode := functionDefNode.ChildByFieldName("name") + if functionNameNode == nil { + log.Errorf("Function definition without name - %s", functionDefNode.Content(treeData)) + return newProcessorResult() + } + + funcName := functionNameNode.Content(treeData) + + // Function definition has its own scope, hence its own namespace + functionNamespace := currentNamespace + namespaceSeparator + funcName + + // Add function to the call graph + if _, exists := callGraph.Nodes[functionNamespace]; !exists { + callGraph.AddNode(functionNamespace, functionDefNode) + callGraph.assignmentGraph.AddIdentifier(functionNamespace, functionDefNode) + log.Debugf("Register function definition for %s - %s", funcName, functionNamespace) + + // Add virtual fn call from class => classConstructor + if metadata.insideClass { + instanceKeyword, exists := callGraph.GetInstanceKeyword() + if exists { + instanceNamespace := currentNamespace + namespaceSeparator + instanceKeyword + namespaceSeparator + funcName + callGraph.AddEdge(instanceNamespace, nil, functionNamespace, functionDefNode) // @TODO - Can't create sitter node for instance keyword + log.Debugf("Register instance member function definition for %s - %s\n", funcName, instanceNamespace) + } + if funcName == "__init__" { + callGraph.AddEdge(currentNamespace, nil, functionNamespace, functionDefNode) // @TODO - Can't create sitter node for instance keyword + log.Debugf("Register class constructor for %s", currentNamespace) + } + } + } + + results := newProcessorResult() + + functionBody := functionDefNode.ChildByFieldName("body") + if functionBody != nil { + metadata.insideFunction = true + result := processChildren(functionBody, treeData, functionNamespace, callGraph, metadata) + metadata.insideFunction = false + results.addResults(result) + } + + return results +} + +func functionReturnProcessor(fnReturnNode *sitter.Node, treeData []byte, currentNamespace string, callGraph *CallGraph, metadata processorMetadata) processorResult { + if fnReturnNode == nil { + return newProcessorResult() + } + + // @TODO - Improve this to handle assignments for return values + // How to handle cross assignment-call + // eg. def main(): x = y() + // here, we know, main calls=> y, + // handle, x assigned=> return values of y + + return processChildren(fnReturnNode, treeData, currentNamespace, callGraph, metadata) +} + +func assignmentProcessor(node *sitter.Node, treeData []byte, currentNamespace string, callGraph *CallGraph, metadata processorMetadata) processorResult { + if node == nil { + return newProcessorResult() + } + + leftNode := node.ChildByFieldName("left") + if leftNode == nil { + log.Errorf("Assignment without left node - %s", node.Content(treeData)) + return newProcessorResult() + } + + rightNode := node.ChildByFieldName("right") + if rightNode == nil { + log.Errorf("Assignment without right node - %s", node.Content(treeData)) + return newProcessorResult() + } + + // @TODO - Handle multi variate assignments, eg. a, b = 1, 2 + + assigneeNodes := []*assignmentNode{} + + if leftNode.Type() == "attribute" { + // eg. xyz.attr = 1 + // must be resolved to xyz//attr (assigned)=> 1 + attributeResult := attributeProcessor(leftNode, treeData, currentNamespace, callGraph, metadata) + assigneeNodes = attributeResult.ImmediateAssignments + } + + // Create new fallback assignment node for leftNode if not found + if len(assigneeNodes) == 0 { + assigneeNodes = []*assignmentNode{ + callGraph.assignmentGraph.AddIdentifier(currentNamespace+namespaceSeparator+leftNode.Content(treeData), leftNode), + } + } + + result := processNode(rightNode, treeData, currentNamespace, callGraph, metadata) + + // Process & note direct calls of processChildren(right,...), and assign returned values in assignment graph + + for _, assigneeNode := range assigneeNodes { + for _, immediateCall := range result.ImmediateCalls { + callGraph.AddEdge(assigneeNode.Namespace, assigneeNode.TreeNode, immediateCall.Namespace, immediateCall.TreeNode) + } + for _, immediateAssignment := range result.ImmediateAssignments { + callGraph.assignmentGraph.AddAssignment(assigneeNode.Namespace, assigneeNode.TreeNode, immediateAssignment.Namespace, immediateAssignment.TreeNode) + } + + // log.Debugf("Resolved assignment for '%s' => %v\n", assigneeNode.Namespace, assigneeNode.AssignedTo) + // if callGraph.Nodes[assigneeNode.Namespace] != nil { + // log.Debugf("\tGraph edges -> %v\n", callGraph.Nodes[assigneeNode.Namespace].CallsTo) + // } + } + return newProcessorResult() +} + +func attributeProcessor(attributeNode *sitter.Node, treeData []byte, currentNamespace string, callGraph *CallGraph, metadata processorMetadata) processorResult { + if attributeNode == nil { + return newProcessorResult() + } + + objectSymbol, attributeQualifierNamespace, err := attributeResolver(attributeNode, treeData, currentNamespace, callGraph, metadata) + if err != nil { + log.Errorf("Error resolving attribute - %v", err) + return newProcessorResult() + } + + targetObject, objectResolved := resolveSymbol(objectSymbol, currentNamespace, callGraph) + if !objectResolved { + log.Errorf("Object not found in namespace for attribute - %s (Obj - %s, Attr - %s)", attributeNode.Content(treeData), objectSymbol, attributeQualifierNamespace) + return newProcessorResult() + } + + resolvedObjects := callGraph.assignmentGraph.Resolve(targetObject.Namespace) + + // log.Debugf("Resolved attribute for `%s` => %v // %s\n", node.Content(treeData), resolvedObjectNamespaces, attributeQualifierNamespace) + + // We only handle assignments for attributes here eg. xyz.attr + // 'called' attributes eg. xyz.attr(), are handled in callProcessor directly + result := newProcessorResult() + for _, resolvedObject := range resolvedObjects { + finalAttributeNamespace := resolvedObject.Namespace + namespaceSeparator + attributeQualifierNamespace + finalAttributeNode := callGraph.assignmentGraph.AddIdentifier(finalAttributeNamespace, attributeNode) + result.ImmediateAssignments = append(result.ImmediateAssignments, finalAttributeNode) + } + + return result +} + +func binaryOperatorProcessor(node *sitter.Node, treeData []byte, currentNamespace string, callGraph *CallGraph, metadata processorMetadata) processorResult { + if node == nil { + return newProcessorResult() + } + + leftNode := node.ChildByFieldName("left") + if leftNode == nil { + log.Errorf("Binary operator without left node - %s", node.Content(treeData)) + return newProcessorResult() + } + rightNode := node.ChildByFieldName("right") + if rightNode == nil { + log.Errorf("Binary operator without right node - %s", node.Content(treeData)) + return newProcessorResult() + } + + results := newProcessorResult() + + leftResult := processNode(leftNode, treeData, currentNamespace, callGraph, metadata) + rightResult := processNode(rightNode, treeData, currentNamespace, callGraph, metadata) + results.addResults(leftResult, rightResult) + + return results +} + +func identifierProcessor(identifierNode *sitter.Node, treeData []byte, currentNamespace string, callGraph *CallGraph, metadata processorMetadata) processorResult { + if identifierNode == nil { + return newProcessorResult() + } + + result := newProcessorResult() + + identifierAssignmentNode, identifierResolved := resolveSymbol(identifierNode.Content(treeData), currentNamespace, callGraph) + + if identifierResolved { + result.ImmediateAssignments = append(result.ImmediateAssignments, identifierAssignmentNode) + return result + } + + // If not found iby search, we can assume it is a new identifier + identifierAssignmentNode = callGraph.assignmentGraph.AddIdentifier( + currentNamespace+namespaceSeparator+identifierNode.Content(treeData), + identifierNode, + ) + + result.ImmediateAssignments = append(result.ImmediateAssignments, identifierAssignmentNode) + + return result +} + +func callProcessor(callNode *sitter.Node, treeData []byte, currentNamespace string, callGraph *CallGraph, metadata processorMetadata) processorResult { + if callNode == nil { + return newProcessorResult() + } + + functionNode := callNode.ChildByFieldName("function") + argumentsNode := callNode.ChildByFieldName("arguments") + if functionNode != nil { + return functionCallProcessor(functionNode, argumentsNode, treeData, currentNamespace, callGraph, metadata) + } + + return newProcessorResult() +} + +func functionCallProcessor(functionCallNode *sitter.Node, argumentsNode *sitter.Node, treeData []byte, currentNamespace string, callGraph *CallGraph, metadata processorMetadata) processorResult { + result := newProcessorResult() + + functionName := functionCallNode.Content(treeData) + + markClassAssignment := func(classAssignmentNode *assignmentNode) { + if classAssignmentNode != nil && callGraph.classConstructors[classAssignmentNode.Namespace] { + // Include class namespace in assignments for constructors + result.ImmediateAssignments = append(result.ImmediateAssignments, classAssignmentNode) + log.Debugf("Class constructed - %s in fncall for %s\n", classAssignmentNode, functionName) + } + } + + // Process function arguments + if argumentsNode != nil { + // @TODO - Ideally, the result.ImmediateAssignments should be associated with called function + // but, we don't have parameter and their positional information, which is a complex task + // Hence, we're not processing argument results here + processNode(argumentsNode, treeData, currentNamespace, callGraph, metadata) + } + + functionAssignmentNode, functionResolvedBySearch := resolveSymbol(functionName, currentNamespace, callGraph) + if functionResolvedBySearch { + log.Debugf("Call %s searched (direct) & resolved to %s\n", functionName, functionAssignmentNode.Namespace) + callGraph.AddEdge(currentNamespace, nil, functionAssignmentNode.Namespace, functionAssignmentNode.TreeNode) // Assumption - current namespace exists in the graph + markClassAssignment(functionAssignmentNode) + return result + } + + // @TODO - Handle class qualified builtins eg. console.log, console.warn etc + // @TODO - Handle function calls with multiple qualifiers eg. abc.xyz.attr() + // Resolve qualified function calls, eg. xyz.attr() + + // Process attributes + functionObjectNode := functionCallNode.ChildByFieldName("object") + functionAttributeNode := functionCallNode.ChildByFieldName("attribute") + if functionAttributeNode != nil && functionObjectNode != nil { + log.Debugf("Call %s searched (attr qualified) & resolved to object - %s (%s), attribute - %s (%s) \n", functionName, functionObjectNode.Content(treeData), functionObjectNode.Type(), functionAttributeNode.Content(treeData), functionAttributeNode.Type()) + + objectSymbol, attributeQualifierNamespace, err := attributeResolver(functionObjectNode, treeData, currentNamespace, callGraph, metadata) + if err != nil { + log.Errorf("Error resolving function attribute - %v", err) + return newProcessorResult() + } + finalAttributeNamespace := functionAttributeNode.Content(treeData) + if attributeQualifierNamespace != "" { + finalAttributeNamespace = attributeQualifierNamespace + namespaceSeparator + finalAttributeNamespace + } + + objectAssignmentNode, functionResolvedByObjectQualifiedSearch := resolveSymbol(objectSymbol, currentNamespace, callGraph) + + if functionResolvedByObjectQualifiedSearch { + resolvedObjectNodes := callGraph.assignmentGraph.Resolve(objectAssignmentNode.Namespace) + for _, resolvedObjectNode := range resolvedObjectNodes { + functionNamespace := resolvedObjectNode.Namespace + namespaceSeparator + finalAttributeNamespace + + // log.Debugf("Call %s searched (attr qualified) & resolved to %s\n", functionName, functionNamespace) + callGraph.AddEdge(currentNamespace, nil, functionNamespace, nil) // @TODO - Assumed current namespace & functionNamespace to be pre-existing + + markClassAssignment(callGraph.assignmentGraph.Assignments[functionNamespace]) + } + return result + } + } + + // @TODO - Rethink on this + // if not found, possibility of hoisting (declared later) + + // @TODO - In order to handle function assignment to a variable, modify below code to search assignment graph also for scoped namespaces + + // @TODO - Handle argument assignment + // eg. for def add(a, b) + // if used as, add(x,y), we must assign add//a => x, add//b => y + // argumentNode := node.ChildByFieldName("arguments") + + // Builtin assignment already available + // @TODO - Handle class qualified builtins eg. console.log, console.warn etc + + log.Errorf("Couldn't process function call - %s", functionName) + return newProcessorResult() +} + +// Search symbol in parent namespaces (from self to parent to grandparent ...) +// eg. namespace - nestNestedFn.py//nestParent//nestChild, callTarget - outerfn1 +// try searching for outerfn1 in graph with all scope levels +// eg. search nestNestedFn.py//nestParent//nestChild//outerfn1 +// then nestNestedFn.py//nestParent//outerfn1 then nestNestedFn.py//outerfn1 and so on +func resolveSymbol(symbol string, currentNamespace string, callGraph *CallGraph) (*assignmentNode, bool) { + if symbol == "" { + return nil, false + } + + for i := strings.Count(currentNamespace, namespaceSeparator) + 1; i >= 0; i-- { + searchNamespace := strings.Join(strings.Split(currentNamespace, namespaceSeparator)[:i], namespaceSeparator) + namespaceSeparator + symbol + if i == 0 { + searchNamespace = symbol + } + + // Note - We're searching in assignment graph currently, since callgraph includes only nodes from defined functions, however assignment graph also has imported function items + searchedAssignmentNode, exists := callGraph.assignmentGraph.Assignments[searchNamespace] + if exists { + return searchedAssignmentNode, true + } + } + + return nil, false +} + +// Resolves a attribute eg. xyz.attr.subattr -> xyz, attr//subattr +// Returns objectSymbol, attributeQualifierNamespace, err +// This can be used to identify correct objNamespace for objectSymbol, finally resulting +// objNamespace//attributeQualifierNamespace +func attributeResolver(node *sitter.Node, treeData []byte, currentNamespace string, callGraph *CallGraph, metadata processorMetadata) (string, string, error) { + if node == nil { + return "", "", fmt.Errorf("fnAttributeResolver - node is nil") + } + + if node.Type() == "identifier" { + return node.Content(treeData), "", nil + } + + // @TODO - In cases of immediate attribution on constructors, we must resolve the objectNode of types - "call" also + // eg. result = someClass().attr.attr + + if node.Type() != "attribute" { + return "", "", fmt.Errorf("invalid node type for attribute resolver - %s", node.Type()) + } + + objectNode := node.ChildByFieldName("object") + subAttributeNode := node.ChildByFieldName("attribute") + + if objectNode == nil { + return "", "", fmt.Errorf("object node not found for attribute - %s", node.Content(treeData)) + } + if subAttributeNode == nil { + return "", "", fmt.Errorf("sub-attribute node not found for attribute - %s", node.Content(treeData)) + } + + objectSymbol, objectSubAttributeNamespace, err := attributeResolver(objectNode, treeData, currentNamespace, callGraph, metadata) + + if err != nil { + return "", "", err + } + + attributeQualifierNamespace := subAttributeNode.Content(treeData) + if objectSubAttributeNamespace != "" { + attributeQualifierNamespace = objectSubAttributeNamespace + namespaceSeparator + attributeQualifierNamespace + } + + return objectSymbol, attributeQualifierNamespace, nil +} diff --git a/plugin/callgraph/signatures.go b/plugin/callgraph/signatures.go new file mode 100644 index 0000000..b7a004b --- /dev/null +++ b/plugin/callgraph/signatures.go @@ -0,0 +1,107 @@ +package callgraph + +import ( + _ "embed" + + "github.com/safedep/code/core" + "github.com/safedep/dry/ds/trie" + "github.com/safedep/dry/log" +) + +type Signature struct { + ID string `yaml:"id"` + Description string `yaml:"description"` + Tags []string `yaml:"tags"` + Languages map[core.LanguageCode]LanguageMatchers `yaml:"languages"` +} + +const ( + MatchAny = "any" + MatchAll = "all" +) + +type LanguageMatchers struct { + Match string `yaml:"match"` + Conditions []SignatureCondition `yaml:"conditions"` +} + +type SignatureCondition struct { + Type string `yaml:"type"` // "call" or "import_module" + Value string `yaml:"value"` // function or module name +} + +type MatchCondition struct { + Condition SignatureCondition + Evidences []*CallGraphNode +} + +type SignatureMatchResult struct { + MatchedSignature *Signature + MatchedLanguageCode core.LanguageCode + MatchedConditions []MatchCondition +} + +type SignatureMatcher struct { + targetSignatures []Signature +} + +func NewSignatureMatcher(targetSignatures []Signature) *SignatureMatcher { + return &SignatureMatcher{ + targetSignatures: targetSignatures, + } +} + +func (sm *SignatureMatcher) MatchSignatures(cg *CallGraph) ([]SignatureMatchResult, error) { + language, err := cg.Tree.Language() + if err != nil { + log.Errorf("failed to get language from parse tree: %v", err) + return nil, err + } + + languageCode := language.Meta().Code + + matcherResults := []SignatureMatchResult{} + + functionCallTrie := trie.NewTrie[CallGraphNode]() + functionCallResultItems := cg.DFS() + for _, resultItem := range functionCallResultItems { + // We record the caller node in the trie for every namespace, + // since the caller is evidence of that namespace's usage + functionCallTrie.Insert(resultItem.Namespace, resultItem.Caller) + } + + for _, signature := range sm.targetSignatures { + languageSignature, exists := signature.Languages[languageCode] + if !exists { + continue + } + + matchedConditions := []MatchCondition{} + for _, condition := range languageSignature.Conditions { + if condition.Type == "call" { + matchCondition := MatchCondition{ + Condition: condition, + Evidences: []*CallGraphNode{}, + } + lookupNamespace := resolveNamespaceWithSeparator(condition.Value, language) + lookupEntries := functionCallTrie.WordsWithPrefix(lookupNamespace) + for _, lookupEntry := range lookupEntries { + matchCondition.Evidences = append(matchCondition.Evidences, lookupEntry.Value) + } + + if len(matchCondition.Evidences) > 0 { + matchedConditions = append(matchedConditions, matchCondition) + } + } + } + + if (languageSignature.Match == MatchAny && len(matchedConditions) > 0) || (languageSignature.Match == MatchAll && len(matchedConditions) == len(languageSignature.Conditions)) { + matcherResults = append(matcherResults, SignatureMatchResult{ + MatchedSignature: &signature, + MatchedLanguageCode: languageCode, + MatchedConditions: matchedConditions, + }) + } + } + return matcherResults, nil +} diff --git a/plugin/stripcomments/stripcomments_test.go b/plugin/stripcomments/stripcomments_test.go index 915941f..a402fcd 100644 --- a/plugin/stripcomments/stripcomments_test.go +++ b/plugin/stripcomments/stripcomments_test.go @@ -62,7 +62,11 @@ func TestStripComments(t *testing.T) { strippedBytes, err := io.ReadAll(readers[0]) assert.NoError(t, err) - defer expectedReader.Close() + defer func() { + err = expectedReader.Close() + assert.NoError(t, err) + }() + expectedBytes, err := io.ReadAll(expectedReader) assert.NoError(t, err)