diff --git a/.editorconfig b/.editorconfig new file mode 100644 index 0000000..04bbf2e --- /dev/null +++ b/.editorconfig @@ -0,0 +1,19 @@ +[*] +insert_final_newline = true + +# Override for Makefile +[{Makefile,makefile,GNUmakefile}] +indent_style = tab +indent_size = 4 + +[Makefile.*] +indent_style = tab +indent_size = 4 + +[*.md] +indent_style = space +indent_size = 2 + +[*.sh] +indent_style = tab +indent_size = 2 diff --git a/.gitignore b/.gitignore index 22b2e0f..9653248 100644 --- a/.gitignore +++ b/.gitignore @@ -1,161 +1,46 @@ -# Byte-compiled / optimized / DLL files -__pycache__/ -*.py[cod] -*$py.class - -# C extensions +# If you prefer the allow list template instead of the deny list, see community template: +# https://github.com/github/gitignore/blob/main/community/Golang/Go.AllowList.gitignore +# +# Binaries for programs and plugins +*.exe +*.exe~ +*.dll *.so +*.dylib -# Distribution / packaging -.Python -.DS_Store -build/ -develop-eggs/ -dist/ -downloads/ -eggs/ -.eggs/ -lib/ -lib64/ -parts/ -sdist/ -var/ -wheels/ -share/python-wheels/ -*.egg-info/ -.installed.cfg -*.egg -MANIFEST - -# PyInstaller -# Usually these files are written by a python script from a template -# before PyInstaller builds the exe, so as to inject date/other infos into it. -*.manifest -*.spec - -# Installer logs -pip-log.txt -pip-delete-this-directory.txt - -# Unit test / coverage reports -htmlcov/ -.tox/ -.nox/ -.coverage -.coverage.* -.cache -nosetests.xml -coverage.xml -*.cover -*.py,cover -.hypothesis/ -.pytest_cache/ -cover/ - -# Translations -*.mo -*.pot - -# Django stuff: -*.log -local_settings.py -db.sqlite3 -db.sqlite3-journal - -# Flask stuff: -instance/ -.webassets-cache - -# Scrapy stuff: -.scrapy - -# Sphinx documentation -docs/_build/ - -# PyBuilder -.pybuilder/ -target/ +# Test binary, built with `go test -c` +*.test -# Jupyter Notebook -.ipynb_checkpoints +# Output of the go coverage tool, specifically when used with LiteIDE +*.out -# IPython -profile_default/ -ipython_config.py +# Dependency directories (remove the comment below to include it) +# vendor/ -# pyenv -# For a library or package, you might want to ignore these files since the code is -# intended to run in multiple environments; otherwise, check them in: -# .python-version +# Go workspace file +go.work +.vscode/ -# pipenv -# According to pypa/pipenv#598, it is recommended to include Pipfile.lock in version control. -# However, in case of collaboration, if having platform-specific dependencies or dependencies -# having no cross-platform support, pipenv may install dependencies that don't work, or not -# install all needed dependencies. -#Pipfile.lock +# Module directory +.terraform +**/.idea +**/*.iml -# poetry -# Similar to Pipfile.lock, it is generally recommended to include poetry.lock in version control. -# This is especially recommended for binary packages to ensure reproducibility, and is more -# commonly ignored for libraries. -# https://python-poetry.org/docs/basic-usage/#commit-your-poetrylock-file-to-version-control -#poetry.lock - -# pdm -# Similar to Pipfile.lock, it is generally recommended to include pdm.lock in version control. -#pdm.lock -# pdm stores project-wide configurations in .pdm.toml, but it is recommended to not include it -# in version control. -# https://pdm.fming.dev/#use-with-ide -.pdm.toml - -# PEP 582; used by e.g. github.com/David-OConnor/pyflow and github.com/pdm-project/pdm -__pypackages__/ - -# Celery stuff -celerybeat-schedule -celerybeat.pid - -# SageMath parsed files -*.sage.py - -# Environments -.env -.venv -env/ -venv/ -ENV/ -env.bak/ -venv.bak/ - -# Spyder project settings -.spyderproject -.spyproject - -# Rope project settings -.ropeproject - -# mkdocs documentation -/site - -# mypy -.mypy_cache/ -.dmypy.json -dmypy.json +**/build +**/dist +**/.helmfile/ +.DS_Store +/variant +*.tar +*.gz +/bin -# Pyre type checker -.pyre/ +# Nix +.envrc +.direnv/ -# pytype static type analyzer -.pytype/ +# ScrapNGo specific +ScrapNGo.txt -# Cython debug symbols -cython_debug/ +dist/ -# PyCharm -# JetBrains specific template is maintained in a separate JetBrains.gitignore that can -# be found at https://github.com/github/gitignore/blob/main/Global/JetBrains.gitignore -# and can be added to the global gitignore or merged into this file. For a more nuclear -# option (not recommended) you can uncomment the following to ignore the entire idea folder. -#.idea/ diff --git a/Brewfile b/Brewfile new file mode 100644 index 0000000..adde83a --- /dev/null +++ b/Brewfile @@ -0,0 +1,3 @@ +brew "go" +brew "gofumpt" +brew "exiftool" diff --git a/FUNDING.yml b/FUNDING.yml new file mode 100644 index 0000000..f95d757 --- /dev/null +++ b/FUNDING.yml @@ -0,0 +1 @@ +github: rosesecurity diff --git a/Makefile b/Makefile new file mode 100644 index 0000000..040a51a --- /dev/null +++ b/Makefile @@ -0,0 +1,41 @@ +BINARY_NAME=scrapNGo +VERSION=local +GO=go + +default: help + +help: ## List Makefile targets + @grep -E '^[a-zA-Z_-]+:.*?## .*$$' $(MAKEFILE_LIST) | sort | awk 'BEGIN {FS = ":.*?## "}; {printf "\033[36m%-30s\033[0m %s\n", $$1, $$2}' + +all: build + +fmt: ## Format Go files + gofumpt -w . + +build: ## Build ScrapNGo + env $(if $(GOOS),GOOS=$(GOOS)) $(if $(GOARCH),GOARCH=$(GOARCH)) $(GO) build -o build/$(BINARY_NAME) -ldflags "-X 'github.com/RoseSecurity/ScrapNGo/cmd.Version=${VERSION}'" main.go + +deps: ## Download dependencies + go mod download + +get: ## Install dependencies + go get + +clean: ## Clean up build artifacts + $(GO) clean + rm ./build/$(BINARY_NAME) + +testacc: ## Run acceptance tests + go test ./... + +run: build ## Run ScrapNGo + ./build/$(BINARY_NAME) + +docs: build ## Generate documentation + ./build/$(BINARY_NAME) docs + +version: build ## View binary version + chmod +x ./build/$(BINARY_NAME) + ./build/$(BINARY_NAME) version + +.PHONY: all build install clean run fmt help diff --git a/SECURITY.md b/SECURITY.md new file mode 100644 index 0000000..65906bd --- /dev/null +++ b/SECURITY.md @@ -0,0 +1,29 @@ +# Security Policy + +## Reporting a Vulnerability + +If you believe you have found a security vulnerability in any repository owned by RoseSecurity, please let me know straight away. I will investigate all legitimate reports and do my best to quickly fix the problem. + +### What to Include in Your Report + +To help me better understand the nature and scope of the issue, please include as much of the following information as possible in your report: + + - Description of the vulnerability and its potential impact. + - Step-by-step instructions to reproduce the issue. + - Affected versions and configurations. + - Any possible mitigations or workarounds that you have identified. + +### What to Expect + +> [!NOTE] +> **Bug Bounties** +> +> RoseSecurity **does not** provide bug bounties for vulnerability disclosures. +> +> As an open-source contributor, I release projects for free under a permissive license, encouraging community contributions. +> + +After you submit a report, I will: +- Respond to your report within 48 hours to acknowledge receipt. +- Provide an estimated time frame for addressing the vulnerability. +- Notify you when the issue is resolved. diff --git a/cmd/docs.go b/cmd/docs.go new file mode 100644 index 0000000..1013b89 --- /dev/null +++ b/cmd/docs.go @@ -0,0 +1,22 @@ +package cmd + +import ( + "github.com/spf13/cobra" + "github.com/spf13/cobra/doc" +) + +// Generate documentation for ScrapNGo commands and output to docs directory +var docsCmd = &cobra.Command{ + Use: "docs", + Short: "Generate documentation for the CLI", + SilenceUsage: true, + Hidden: true, + RunE: func(cmd *cobra.Command, args []string) error { + err := doc.GenMarkdownTree(cmd.Root(), "./docs") + if err != nil { + return err + } + + return nil + }, +} diff --git a/cmd/root.go b/cmd/root.go new file mode 100644 index 0000000..2387e4c --- /dev/null +++ b/cmd/root.go @@ -0,0 +1,100 @@ +package cmd + +import ( + "fmt" + "log" + + tui "github.com/RoseSecurity/ScrapNGo/internal/tui/utils" + "github.com/RoseSecurity/ScrapNGo/pkg/utils" + "github.com/spf13/cobra" +) + +var ( + blue = "\033[34m" + red = "\033[91m" + green = "\033[92m" + norm = "\x1b[0m" + tag = "@RoseSecurity" +) + +var rootCmd = &cobra.Command{ + Use: "scrapNGo", + Short: "ScrapNGo enumerates documents, manuals, and sensitive PDFs for key phrases and words that can be utilized in dictionary and brute force attacks.", + Long: `ScrapNGo enumerates documents, manuals, and sensitive PDFs for key phrases and words +that can be utilized in dictionary and brute force attacks. These keywords are outputted +to a text file (ScrapNGo.txt in the directory which the tool was run from) that can be read +by tools such as Hydra, Dirb, and other offensive security tools for initial access and +lateral movement.`, + Run: func(cmd *cobra.Command, args []string) { + file, _ := cmd.Flags().GetString("file") + mode, _ := cmd.Flags().GetString("mode") + outputFile, _ := cmd.Flags().GetString("output-file") + + // Print help if no file is provided + if file == "" { + cmd.Help() + return + } + + // Extract text from PDF + fileContent, err := utils.ExtractTextFromPDF(file) + if err != nil { + utils.LogErrorAndExit(err) + } + + wordList := utils.RemoveCommonWords(fileContent) + + if len(wordList) == 0 { + log.Fatalf("No content found in the PDF file: %s", file) + } + + // Process PDF content based on mode + var keywords []string + switch mode { + case "word-frequency": + spinner := tui.StartSpinner("Extracting word frequency...\n") + keywords = utils.WordFrequency(wordList, 100) + tui.StopSpinner(spinner) + case "metadata": + spinner := tui.StartSpinner("Extracting PDF metadata...\n") + utils.PrintMetadata(file) + if err != nil { + utils.LogErrorAndExit(err) + } + tui.StopSpinner(spinner) + return + case "entropy": + // keywords = utils.CalculateEntropy(wordList, 100) + // default: + // keywords = utils.ExtractKeywords(fileContent) + } + + // Write output to file + if err := utils.WriteToFile(outputFile, keywords); err != nil { + utils.LogErrorAndExit(err) + } + + fmt.Printf("%s has been created!\n", outputFile) + }, +} + +func init() { + // Custom help menu to display banner + rootCmd.SetHelpFunc(func(cmd *cobra.Command, args []string) { + fmt.Println() + tui.PrintStyledText("SCRAPPY") + fmt.Println(cmd.UsageString()) + }) + // Docs and Version commands + rootCmd.AddCommand(docsCmd) + rootCmd.AddCommand(versionCmd) + rootCmd.Flags().StringP("file", "f", "", "PDF input file") + rootCmd.Flags().StringP("mode", "m", "full", "Modes of operation: full, word-frequency, metadata, entropy") + rootCmd.Flags().StringP("output-file", "o", "ScrapNGo.txt", "Output file name") +} + +func Execute() { + if err := rootCmd.Execute(); err != nil { + utils.LogErrorAndExit(err) + } +} diff --git a/cmd/version.go b/cmd/version.go new file mode 100644 index 0000000..0d6aa62 --- /dev/null +++ b/cmd/version.go @@ -0,0 +1,66 @@ +package cmd + +import ( + "encoding/json" + "fmt" + "io" + "net/http" + "strings" + + "github.com/fatih/color" + "github.com/spf13/cobra" + "golang.org/x/mod/semver" +) + +// Placeholder for builds +var Version = "1.0.0" + +type Release struct { + TagName string `json:"tag_name"` +} + +var versionCmd = &cobra.Command{ + Use: "version", + Short: "Print the CLI version", + Long: `This command prints the CLI version`, + Example: "scrapNGo version", + Run: func(cmd *cobra.Command, args []string) { + fmt.Println("scrapNGo: " + Version) + latestReleaseTag, err := latestRelease() + if err == nil && latestReleaseTag != "" { + latestRelease := strings.TrimPrefix(latestReleaseTag, "v") + currentRelease := strings.TrimPrefix(Version, "v") + if semver.Compare(latestRelease, currentRelease) > 0 { + updateScrapNGo(latestRelease) + } + } + }, +} + +// Fetch latest release for comparison to current version +func latestRelease() (string, error) { + resp, err := http.Get("https://api.github.com/repos/RoseSecurity/ScrapNGo/releases/latest") + if err != nil { + return "", fmt.Errorf("failed to fetch version: %w", err) + } + defer resp.Body.Close() + + body, err := io.ReadAll(resp.Body) + if err != nil { + return "", fmt.Errorf("failed to read response body: %w", err) + } + + var release Release + if err := json.Unmarshal(body, &release); err != nil { + return "", fmt.Errorf("failed to parse version: %w", err) + } + + return release.TagName, nil +} + +// Display out of date warning +func updateScrapNGo(latestVersion string) { + c1 := color.New(color.FgCyan) + + c1.Println(fmt.Sprintf("\nYour version of ScrapNGo is out of date. The latest version is %s\n\n", latestVersion)) +} diff --git a/docs/scrapNGo.md b/docs/scrapNGo.md new file mode 100644 index 0000000..cfdb880 --- /dev/null +++ b/docs/scrapNGo.md @@ -0,0 +1,30 @@ +## scrapNGo + +ScrapNGo enumerates documents, manuals, and sensitive PDFs for key phrases and words that can be utilized in dictionary and brute force attacks. + +### Synopsis + +ScrapNGo enumerates documents, manuals, and sensitive PDFs for key phrases and words +that can be utilized in dictionary and brute force attacks. These keywords are outputted +to a text file (ScrapNGo.txt in the directory which the tool was run from) that can be read +by tools such as Hydra, Dirb, and other offensive security tools for initial access and +lateral movement. + +``` +scrapNGo [flags] +``` + +### Options + +``` + -f, --file string PDF input file + -h, --help help for scrapNGo + -m, --mode string Modes of operation: full, word-frequency, metadata, entropy (default "full") + -o, --output-file string Output file name (default "ScrapNGo.txt") +``` + +### SEE ALSO + +* [scrapNGo completion](scrapNGo_completion.md) - Generate the autocompletion script for the specified shell + +###### Auto generated by spf13/cobra on 15-Dec-2024 diff --git a/docs/scrapNGo_completion.md b/docs/scrapNGo_completion.md new file mode 100644 index 0000000..2b50aff --- /dev/null +++ b/docs/scrapNGo_completion.md @@ -0,0 +1,25 @@ +## scrapNGo completion + +Generate the autocompletion script for the specified shell + +### Synopsis + +Generate the autocompletion script for scrapNGo for the specified shell. +See each sub-command's help for details on how to use the generated script. + + +### Options + +``` + -h, --help help for completion +``` + +### SEE ALSO + +* [scrapNGo](scrapNGo.md) - ScrapNGo enumerates documents, manuals, and sensitive PDFs for key phrases and words that can be utilized in dictionary and brute force attacks. +* [scrapNGo completion bash](scrapNGo_completion_bash.md) - Generate the autocompletion script for bash +* [scrapNGo completion fish](scrapNGo_completion_fish.md) - Generate the autocompletion script for fish +* [scrapNGo completion powershell](scrapNGo_completion_powershell.md) - Generate the autocompletion script for powershell +* [scrapNGo completion zsh](scrapNGo_completion_zsh.md) - Generate the autocompletion script for zsh + +###### Auto generated by spf13/cobra on 15-Dec-2024 diff --git a/docs/scrapNGo_completion_bash.md b/docs/scrapNGo_completion_bash.md new file mode 100644 index 0000000..9fba4ff --- /dev/null +++ b/docs/scrapNGo_completion_bash.md @@ -0,0 +1,44 @@ +## scrapNGo completion bash + +Generate the autocompletion script for bash + +### Synopsis + +Generate the autocompletion script for the bash shell. + +This script depends on the 'bash-completion' package. +If it is not installed already, you can install it via your OS's package manager. + +To load completions in your current shell session: + + source <(scrapNGo completion bash) + +To load completions for every new session, execute once: + +#### Linux: + + scrapNGo completion bash > /etc/bash_completion.d/scrapNGo + +#### macOS: + + scrapNGo completion bash > $(brew --prefix)/etc/bash_completion.d/scrapNGo + +You will need to start a new shell for this setup to take effect. + + +``` +scrapNGo completion bash +``` + +### Options + +``` + -h, --help help for bash + --no-descriptions disable completion descriptions +``` + +### SEE ALSO + +* [scrapNGo completion](scrapNGo_completion.md) - Generate the autocompletion script for the specified shell + +###### Auto generated by spf13/cobra on 15-Dec-2024 diff --git a/docs/scrapNGo_completion_fish.md b/docs/scrapNGo_completion_fish.md new file mode 100644 index 0000000..9c7969c --- /dev/null +++ b/docs/scrapNGo_completion_fish.md @@ -0,0 +1,35 @@ +## scrapNGo completion fish + +Generate the autocompletion script for fish + +### Synopsis + +Generate the autocompletion script for the fish shell. + +To load completions in your current shell session: + + scrapNGo completion fish | source + +To load completions for every new session, execute once: + + scrapNGo completion fish > ~/.config/fish/completions/scrapNGo.fish + +You will need to start a new shell for this setup to take effect. + + +``` +scrapNGo completion fish [flags] +``` + +### Options + +``` + -h, --help help for fish + --no-descriptions disable completion descriptions +``` + +### SEE ALSO + +* [scrapNGo completion](scrapNGo_completion.md) - Generate the autocompletion script for the specified shell + +###### Auto generated by spf13/cobra on 15-Dec-2024 diff --git a/docs/scrapNGo_completion_powershell.md b/docs/scrapNGo_completion_powershell.md new file mode 100644 index 0000000..e2472bc --- /dev/null +++ b/docs/scrapNGo_completion_powershell.md @@ -0,0 +1,32 @@ +## scrapNGo completion powershell + +Generate the autocompletion script for powershell + +### Synopsis + +Generate the autocompletion script for powershell. + +To load completions in your current shell session: + + scrapNGo completion powershell | Out-String | Invoke-Expression + +To load completions for every new session, add the output of the above command +to your powershell profile. + + +``` +scrapNGo completion powershell [flags] +``` + +### Options + +``` + -h, --help help for powershell + --no-descriptions disable completion descriptions +``` + +### SEE ALSO + +* [scrapNGo completion](scrapNGo_completion.md) - Generate the autocompletion script for the specified shell + +###### Auto generated by spf13/cobra on 15-Dec-2024 diff --git a/docs/scrapNGo_completion_zsh.md b/docs/scrapNGo_completion_zsh.md new file mode 100644 index 0000000..54010b9 --- /dev/null +++ b/docs/scrapNGo_completion_zsh.md @@ -0,0 +1,46 @@ +## scrapNGo completion zsh + +Generate the autocompletion script for zsh + +### Synopsis + +Generate the autocompletion script for the zsh shell. + +If shell completion is not already enabled in your environment you will need +to enable it. You can execute the following once: + + echo "autoload -U compinit; compinit" >> ~/.zshrc + +To load completions in your current shell session: + + source <(scrapNGo completion zsh); compdef _scrapNGo scrapNGo + +To load completions for every new session, execute once: + +#### Linux: + + scrapNGo completion zsh > "${fpath[1]}/_scrapNGo" + +#### macOS: + + scrapNGo completion zsh > $(brew --prefix)/share/zsh/site-functions/_scrapNGo + +You will need to start a new shell for this setup to take effect. + + +``` +scrapNGo completion zsh [flags] +``` + +### Options + +``` + -h, --help help for zsh + --no-descriptions disable completion descriptions +``` + +### SEE ALSO + +* [scrapNGo completion](scrapNGo_completion.md) - Generate the autocompletion script for the specified shell + +###### Auto generated by spf13/cobra on 15-Dec-2024 diff --git a/go.mod b/go.mod new file mode 100644 index 0000000..6886a92 --- /dev/null +++ b/go.mod @@ -0,0 +1,32 @@ +module github.com/RoseSecurity/ScrapNGo + +go 1.23.4 + +require ( + github.com/arsham/figurine v1.3.0 + github.com/barasher/go-exiftool v1.10.0 + github.com/briandowns/spinner v1.23.1 + github.com/fatih/color v1.18.0 + github.com/jwalton/go-supportscolor v1.2.0 + github.com/ledongthuc/pdf v0.0.0-20240201131950-da5b75280b06 + github.com/mattn/go-colorable v0.1.13 + github.com/olekukonko/tablewriter v0.0.5 + github.com/spf13/cobra v1.6.1 + golang.org/x/mod v0.22.0 +) + +require ( + github.com/arsham/rainbow v1.2.1 // indirect + github.com/common-nighthawk/go-figure v0.0.0-20210622060536-734e95fb86be // indirect + github.com/cpuguy83/go-md2man/v2 v2.0.2 // indirect + github.com/inconshreveable/mousetrap v1.1.0 // indirect + github.com/kr/pretty v0.3.1 // indirect + github.com/mattn/go-isatty v0.0.20 // indirect + github.com/mattn/go-runewidth v0.0.9 // indirect + github.com/pkg/errors v0.9.1 // indirect + github.com/russross/blackfriday/v2 v2.1.0 // indirect + github.com/spf13/pflag v1.0.5 // indirect + golang.org/x/sys v0.25.0 // indirect + golang.org/x/term v0.1.0 // indirect + gopkg.in/yaml.v3 v3.0.1 // indirect +) diff --git a/go.sum b/go.sum new file mode 100644 index 0000000..8a5e180 --- /dev/null +++ b/go.sum @@ -0,0 +1,71 @@ +github.com/arsham/figurine v1.3.0 h1:vpGbzp460B1gkdFt9jrl95v4wDE2vP3BDcg0AKWJ7J0= +github.com/arsham/figurine v1.3.0/go.mod h1:cnw6B/y/XzRObDhQoqNJnpAGuSSrkjCcqZCcMJ1ag/I= +github.com/arsham/rainbow v1.2.1 h1:iS8o/1WAPVFvhtMZgdiy7zM8mD+XIWZfwzGXD6manKI= +github.com/arsham/rainbow v1.2.1/go.mod h1:vERoG76FE/wN9rGJRv9H/tTfH873AX6wfQdJqNRy6fA= +github.com/barasher/go-exiftool v1.10.0 h1:f5JY5jc42M7tzR6tbL9508S2IXdIcG9QyieEXNMpIhs= +github.com/barasher/go-exiftool v1.10.0/go.mod h1:F9s/a3uHSM8YniVfwF+sbQUtP8Gmh9nyzigNF+8vsWo= +github.com/briandowns/spinner v1.23.1 h1:t5fDPmScwUjozhDj4FA46p5acZWIPXYE30qW2Ptu650= +github.com/briandowns/spinner v1.23.1/go.mod h1:LaZeM4wm2Ywy6vO571mvhQNRcWfRUnXOs0RcKV0wYKM= +github.com/common-nighthawk/go-figure v0.0.0-20210622060536-734e95fb86be h1:J5BL2kskAlV9ckgEsNQXscjIaLiOYiZ75d4e94E6dcQ= +github.com/common-nighthawk/go-figure v0.0.0-20210622060536-734e95fb86be/go.mod h1:mk5IQ+Y0ZeO87b858TlA645sVcEcbiX6YqP98kt+7+w= +github.com/cpuguy83/go-md2man/v2 v2.0.2 h1:p1EgwI/C7NhT0JmVkwCD2ZBK8j4aeHQX2pMHHBfMQ6w= +github.com/cpuguy83/go-md2man/v2 v2.0.2/go.mod h1:tgQtvFlXSQOSOSIRvRPT7W67SCa46tRHOmNcaadrF8o= +github.com/creack/pty v1.1.9/go.mod h1:oKZEueFk5CKHvIhNR5MUki03XCEU+Q6VDXinZuGJ33E= +github.com/davecgh/go-spew v1.1.0/go.mod h1:J7Y8YcW2NihsgmVo/mv3lAwl/skON4iLHjSsI+c5H38= +github.com/davecgh/go-spew v1.1.1 h1:vj9j/u1bqnvCEfJOwUhtlOARqs3+rkHYY13jYWTU97c= +github.com/davecgh/go-spew v1.1.1/go.mod h1:J7Y8YcW2NihsgmVo/mv3lAwl/skON4iLHjSsI+c5H38= +github.com/fatih/color v1.18.0 h1:S8gINlzdQ840/4pfAwic/ZE0djQEH3wM94VfqLTZcOM= +github.com/fatih/color v1.18.0/go.mod h1:4FelSpRwEGDpQ12mAdzqdOukCy4u8WUtOY6lkT/6HfU= +github.com/inconshreveable/mousetrap v1.0.1/go.mod h1:vpF70FUmC8bwa3OWnCshd2FqLfsEA9PFc4w1p2J65bw= +github.com/inconshreveable/mousetrap v1.1.0 h1:wN+x4NVGpMsO7ErUn/mUI3vEoE6Jt13X2s0bqwp9tc8= +github.com/inconshreveable/mousetrap v1.1.0/go.mod h1:vpF70FUmC8bwa3OWnCshd2FqLfsEA9PFc4w1p2J65bw= +github.com/jwalton/go-supportscolor v1.2.0 h1:g6Ha4u7Vm3LIsQ5wmeBpS4gazu0UP1DRDE8y6bre4H8= +github.com/jwalton/go-supportscolor v1.2.0/go.mod h1:hFVUAZV2cWg+WFFC4v8pT2X/S2qUUBYMioBD9AINXGs= +github.com/kr/pretty v0.3.1 h1:flRD4NNwYAUpkphVc1HcthR4KEIFJ65n8Mw5qdRn3LE= +github.com/kr/pretty v0.3.1/go.mod h1:hoEshYVHaxMs3cyo3Yncou5ZscifuDolrwPKZanG3xk= +github.com/kr/text v0.2.0 h1:5Nx0Ya0ZqY2ygV366QzturHI13Jq95ApcVaJBhpS+AY= +github.com/kr/text v0.2.0/go.mod h1:eLer722TekiGuMkidMxC/pM04lWEeraHUUmBw8l2grE= +github.com/ledongthuc/pdf v0.0.0-20240201131950-da5b75280b06 h1:kacRlPN7EN++tVpGUorNGPn/4DnB7/DfTY82AOn6ccU= +github.com/ledongthuc/pdf v0.0.0-20240201131950-da5b75280b06/go.mod h1:imJHygn/1yfhB7XSJJKlFZKl/J+dCPAknuiaGOshXAs= +github.com/mattn/go-colorable v0.1.13 h1:fFA4WZxdEF4tXPZVKMLwD8oUnCTTo08duU7wxecdEvA= +github.com/mattn/go-colorable v0.1.13/go.mod h1:7S9/ev0klgBDR4GtXTXX8a3vIGJpMovkB8vQcUbaXHg= +github.com/mattn/go-isatty v0.0.16/go.mod h1:kYGgaQfpe5nmfYZH+SKPsOc2e4SrIfOl2e/yFXSvRLM= +github.com/mattn/go-isatty v0.0.20 h1:xfD0iDuEKnDkl03q4limB+vH+GxLEtL/jb4xVJSWWEY= +github.com/mattn/go-isatty v0.0.20/go.mod h1:W+V8PltTTMOvKvAeJH7IuucS94S2C6jfK/D7dTCTo3Y= +github.com/mattn/go-runewidth v0.0.9 h1:Lm995f3rfxdpd6TSmuVCHVb/QhupuXlYr8sCI/QdE+0= +github.com/mattn/go-runewidth v0.0.9/go.mod h1:H031xJmbD/WCDINGzjvQ9THkh0rPKHF+m2gUSrubnMI= +github.com/olekukonko/tablewriter v0.0.5 h1:P2Ga83D34wi1o9J6Wh1mRuqd4mF/x/lgBS7N7AbDhec= +github.com/olekukonko/tablewriter v0.0.5/go.mod h1:hPp6KlRPjbx+hW8ykQs1w3UBbZlj6HuIJcUGPhkA7kY= +github.com/pkg/diff v0.0.0-20210226163009-20ebb0f2a09e/go.mod h1:pJLUxLENpZxwdsKMEsNbx1VGcRFpLqf3715MtcvvzbA= +github.com/pkg/errors v0.9.1 h1:FEBLx1zS214owpjy7qsBeixbURkuhQAwrK5UwLGTwt4= +github.com/pkg/errors v0.9.1/go.mod h1:bwawxfHBFNV+L2hUp1rHADufV3IMtnDRdf1r5NINEl0= +github.com/pmezard/go-difflib v1.0.0 h1:4DBwDE0NGyQoBHbLQYPwSUPoCMWR5BEzIk/f1lZbAQM= +github.com/pmezard/go-difflib v1.0.0/go.mod h1:iKH77koFhYxTK1pcRnkKkqfTogsbg7gZNVY4sRDYZ/4= +github.com/rogpeppe/go-internal v1.9.0 h1:73kH8U+JUqXU8lRuOHeVHaa/SZPifC7BkcraZVejAe8= +github.com/rogpeppe/go-internal v1.9.0/go.mod h1:WtVeX8xhTBvf0smdhujwtBcq4Qrzq/fJaraNFVN+nFs= +github.com/russross/blackfriday/v2 v2.1.0 h1:JIOH55/0cWyOuilr9/qlrm0BSXldqnqwMsf35Ld67mk= +github.com/russross/blackfriday/v2 v2.1.0/go.mod h1:+Rmxgy9KzJVeS9/2gXHxylqXiyQDYRxCVz55jmeOWTM= +github.com/spf13/cobra v1.6.1 h1:o94oiPyS4KD1mPy2fmcYYHHfCxLqYjJOhGsCHFZtEzA= +github.com/spf13/cobra v1.6.1/go.mod h1:IOw/AERYS7UzyrGinqmz6HLUo219MORXGxhbaJUqzrY= +github.com/spf13/pflag v1.0.5 h1:iy+VFUOCP1a+8yFto/drg2CJ5u0yRoB7fZw3DKv/JXA= +github.com/spf13/pflag v1.0.5/go.mod h1:McXfInJRrz4CZXVZOBLb0bTZqETkiAhM9Iw0y3An2Bg= +github.com/stretchr/objx v0.1.0/go.mod h1:HFkY916IF+rwdDfMAkV7OtwuqBVzrE8GR6GFx+wExME= +github.com/stretchr/testify v1.3.0/go.mod h1:M5WIy9Dh21IEIfnGCwXGc5bZfKNJtfHm1UVUgZn+9EI= +github.com/stretchr/testify v1.7.1 h1:5TQK59W5E3v0r2duFAb7P95B6hEeOyEnHRa8MjYSMTY= +github.com/stretchr/testify v1.7.1/go.mod h1:6Fq8oRcR53rry900zMqJjRRixrwX3KX962/h/Wwjteg= +golang.org/x/mod v0.22.0 h1:D4nJWe9zXqHOmWqj4VMOJhvzj7bEZg4wEYa759z1pH4= +golang.org/x/mod v0.22.0/go.mod h1:6SkKJ3Xj0I0BrPOZoBy3bdMptDDU9oJrpohJ3eWZ1fY= +golang.org/x/sys v0.0.0-20201119102817-f84b799fce68/go.mod h1:h1NjWce9XRLGQEsW7wpKNCjG9DtNlClVuFLEZdDNbEs= +golang.org/x/sys v0.0.0-20210220050731-9a76102bfb43/go.mod h1:h1NjWce9XRLGQEsW7wpKNCjG9DtNlClVuFLEZdDNbEs= +golang.org/x/sys v0.0.0-20220811171246-fbc7d0a398ab/go.mod h1:oPkhp1MJrh7nUepCBck5+mAzfO9JrbApNNgaTdGDITg= +golang.org/x/sys v0.6.0/go.mod h1:oPkhp1MJrh7nUepCBck5+mAzfO9JrbApNNgaTdGDITg= +golang.org/x/sys v0.25.0 h1:r+8e+loiHxRqhXVl6ML1nO3l1+oFoWbnlu2Ehimmi34= +golang.org/x/sys v0.25.0/go.mod h1:/VUhepiaJMQUp4+oa/7Zr1D23ma6VTLIYjOOTFZPUcA= +golang.org/x/term v0.0.0-20210220032956-6a3ed077a48d/go.mod h1:bj7SfCRtBDWHUb9snDiAeCFNEtKQo2Wmx5Cou7ajbmo= +golang.org/x/term v0.1.0 h1:g6Z6vPFA9dYBAF7DWcH6sCcOntplXsDKcliusYijMlw= +golang.org/x/term v0.1.0/go.mod h1:jbD1KX2456YbFQfuXm/mYQcufACuNUgVhRMnK/tPxf8= +gopkg.in/check.v1 v0.0.0-20161208181325-20d25e280405/go.mod h1:Co6ibVJAznAaIkqp8huTwlJQCZ016jof/cbN4VW5Yz0= +gopkg.in/check.v1 v1.0.0-20201130134442-10cb98267c6c h1:Hei/4ADfdWqJk1ZMxUNpqntNwaWcugrBjAiHlqqRiVk= +gopkg.in/check.v1 v1.0.0-20201130134442-10cb98267c6c/go.mod h1:JHkPIbrfpd72SG/EVd6muEfDQjcINNoR0C8j2r3qZ4Q= +gopkg.in/yaml.v3 v3.0.1 h1:fxVm/GzAzEWqLHuvctI91KS9hhNmmWOoWu0XTYJS7CA= +gopkg.in/yaml.v3 v3.0.1/go.mod h1:K4uyk7z7BCEPqu6E+C64Yfv1cQ7kz7rIZviUmN+EgEM= diff --git a/internal/tui/utils/utils.go b/internal/tui/utils/utils.go new file mode 100644 index 0000000..6a653a2 --- /dev/null +++ b/internal/tui/utils/utils.go @@ -0,0 +1,43 @@ +package utils + +import ( + "fmt" + "os" + "time" + + "github.com/arsham/figurine/figurine" + "github.com/briandowns/spinner" + "github.com/jwalton/go-supportscolor" + "github.com/mattn/go-colorable" +) + +const ( + ColorReset = "\033[0m" + ColorGreen = "\033[32m" + ColorBold = "\033[1m" +) + +// PrintStyledText prints a styled text to the terminal +func PrintStyledText(text string) error { + // Check if the terminal supports colors + if supportscolor.Stdout().SupportsColor { + return figurine.Write(os.Stdout, text, "ANSI Regular.flf") + } + return nil +} + +// StartSpinner prints a spinner to the terminal +func StartSpinner(message string) *spinner.Spinner { + s := spinner.New(spinner.CharSets[14], 100*time.Millisecond) + s.Color("magenta") + s.Writer = colorable.NewColorableStdout() // Ensure colors are supported on Windows + s.Suffix = " " + message + fmt.Printf("%s%s%s ", ColorBold+ColorGreen, s.Suffix, ColorReset) + s.Start() + return s +} + +// StopSpinner stops the spinner +func StopSpinner(s *spinner.Spinner) { + s.Stop() +} diff --git a/main.go b/main.go new file mode 100644 index 0000000..28afeb6 --- /dev/null +++ b/main.go @@ -0,0 +1,7 @@ +package main + +import "github.com/RoseSecurity/ScrapNGo/cmd" + +func main() { + cmd.Execute() +} diff --git a/pkg/utils/file_utils.go b/pkg/utils/file_utils.go new file mode 100644 index 0000000..2c388ca --- /dev/null +++ b/pkg/utils/file_utils.go @@ -0,0 +1,24 @@ +package utils + +import ( + "fmt" + "os" +) + +// WriteToFile writes the keywords to a specified output file +func WriteToFile(filename string, keywords []string) error { + file, err := os.Create(filename) + if err != nil { + return fmt.Errorf("error creating output file: %v", err) + } + defer file.Close() + + // Write keywords to file + for _, keyword := range keywords { + if _, err := file.WriteString(keyword + "\n"); err != nil { + return fmt.Errorf("error writing to file: %v", err) + } + } + + return nil +} diff --git a/pkg/utils/log_utils.go b/pkg/utils/log_utils.go new file mode 100644 index 0000000..4a83c3b --- /dev/null +++ b/pkg/utils/log_utils.go @@ -0,0 +1,44 @@ +package utils + +import ( + "errors" + "os" + "os/exec" + + "github.com/fatih/color" +) + +const ( + LogLevelTrace = "Trace" + LogLevelDebug = "Debug" + LogLevelInfo = "Info" + LogLevelWarning = "Warning" +) + +// LogErrorAndExit logs errors to std.Error and exits with an error code +func LogErrorAndExit(err error) { + if err != nil { + LogError(err) + + // Find the executed command's exit code from the error + var exitError *exec.ExitError + if errors.As(err, &exitError) { + exitCode := exitError.ExitCode() + os.Exit(exitCode) + } + } +} + +// LogError logs errors to std.Error +func LogError(err error) { + if err != nil { + c := color.New(color.FgRed) + _, err2 := c.Fprintln(color.Error, err.Error()+"\n") + if err2 != nil { + color.Red("Error logging the error:") + color.Red("%s\n", err2) + color.Red("Original error:") + color.Red("%s\n", err) + } + } +} diff --git a/pkg/utils/mode_utils.go b/pkg/utils/mode_utils.go new file mode 100644 index 0000000..f63e348 --- /dev/null +++ b/pkg/utils/mode_utils.go @@ -0,0 +1,89 @@ +package utils + +import ( + "fmt" + "os" + "sort" + "strings" + + "github.com/barasher/go-exiftool" + "github.com/olekukonko/tablewriter" +) + +// ANSI escape codes for bold and blue text +var ( + blue = "\033[1;34m" + norm = "\x1b[0m" +) + +type wordCount struct { + word string + count int +} + +// WordFrequency returns the top N most frequently used keywords from the input slice of strings +func WordFrequency(input []string, topN int) []string { + // Create a map to store word frequencies + frequency := make(map[string]int) + + // Iterate over each string in the input slice + for _, sentence := range input { + // Split the sentence into words and normalize them to lowercase + words := strings.Fields(strings.ToLower(sentence)) + for _, word := range words { + // Increment the count for each word in the map + frequency[word]++ + } + } + + // Convert the map to a slice of wordCount structs for sorting + counts := make([]wordCount, 0, len(frequency)) + for word, count := range frequency { + counts = append(counts, wordCount{word, count}) + } + + // Sort the slice by count in descending order, then by word alphabetically + sort.Slice(counts, func(i, j int) bool { + if counts[i].count == counts[j].count { + return counts[i].word < counts[j].word + } + return counts[i].count > counts[j].count + }) + + // Collect the top N words + result := make([]string, 0, topN) + for i := 0; i < topN && i < len(counts); i++ { + result = append(result, counts[i].word) + } + + return result +} + +// PrintMetadata uses exiftool to print PDF metadata +func PrintMetadata(file string) { + et, err := exiftool.NewExiftool() + if err != nil { + LogErrorAndExit(err) + return + } + defer et.Close() + + fileInfos := et.ExtractMetadata(file) + + table := tablewriter.NewWriter(os.Stdout) + table.SetHeader([]string{"Key", "Value"}) + table.SetBorder(false) + + for _, fileInfo := range fileInfos { + if fileInfo.Err != nil { + LogError(fileInfo.Err) + continue + } + + for k, v := range fileInfo.Fields { + table.Append([]string{k, fmt.Sprintf("%v", v)}) + } + } + + table.Render() +} diff --git a/pkg/utils/pdf_utils.go b/pkg/utils/pdf_utils.go new file mode 100644 index 0000000..11464b5 --- /dev/null +++ b/pkg/utils/pdf_utils.go @@ -0,0 +1,123 @@ +package utils + +import ( + "bytes" + "fmt" + "regexp" + "strings" + "unicode" + "unicode/utf8" + + "github.com/ledongthuc/pdf" +) + +// isValidWord checks if a word is meaningful and should be kept +func isValidWord(word string) bool { + // Trim non-letter characters + word = strings.TrimFunc(word, func(r rune) bool { + return !unicode.IsLetter(r) + }) + + // Reject words that are too short or invalid + if utf8.RuneCountInString(word) < 3 { + return false + } + + // Check the ratio of letters to other characters + letters, numbers := 0, 0 + for _, r := range word { + switch { + case unicode.IsLetter(r): + letters++ + case unicode.IsNumber(r): + numbers++ + } + } + + // Require at least 2 letters and <50% numeric characters + return letters >= 2 && float64(numbers)/float64(len(word)) < 0.5 +} + +// ExtractTextFromPDF extracts text from a PDF file and splits it into cleaned words +func ExtractTextFromPDF(file string) ([]string, error) { + f, r, err := pdf.Open(file) + if err != nil { + return nil, fmt.Errorf("failed to open PDF file: %w", err) + } + defer f.Close() + + var buffer bytes.Buffer + + // Extract text from each page + for pageIndex := 0; pageIndex < r.NumPage(); pageIndex++ { + page := r.Page(pageIndex) + if page.V.IsNull() { + continue + } + buffer.WriteString(fmt.Sprintf("%v", page.Content())) + } + + // Unicode normalization for consistent encoding + text := strings.ToValidUTF8(buffer.String(), "") + + // Remove non-alphanumeric characters except spaces and dashes + reg := regexp.MustCompile(`[^\w\s-]`) + cleanedText := reg.ReplaceAllString(text, " ") + + // Split text into words + words := strings.Fields(cleanedText) + var validWords []string + + // Validate words + for _, word := range words { + if isValidWord(word) { + validWords = append(validWords, word) + } + } + + return validWords, nil +} + +// RemoveCommonWords filters out common words and deduplicates the input +func RemoveCommonWords(keywords []string) []string { + commonWords := map[string]struct{}{ + "and": {}, "the": {}, "at": {}, "there": {}, "some": {}, "my": {}, "of": {}, "be": {}, + "use": {}, "her": {}, "than": {}, "this": {}, "an": {}, "would": {}, "first": {}, "a": {}, + "have": {}, "each": {}, "to": {}, "from": {}, "which": {}, "like": {}, "been": {}, "in": {}, + "or": {}, "she": {}, "him": {}, "is": {}, "one": {}, "do": {}, "into": {}, "who": {}, "you": {}, + "had": {}, "how": {}, "that": {}, "by": {}, "their": {}, "has": {}, "its": {}, "it": {}, "if": {}, + "he": {}, "but": {}, "was": {}, "not": {}, "up": {}, "more": {}, "for": {}, "are": {}, "were": {}, + "as": {}, "we": {}, "with": {}, "when": {}, "then": {}, "no": {}, "come": {}, "his": {}, "your": {}, + "them": {}, "way": {}, "they": {}, "can": {}, "these": {}, "could": {}, "may": {}, "I": {}, + "said": {}, "so": {}, + } + + seenWords := make(map[string]struct{}) + var result []string + + // Filter out common words and duplicates + for _, word := range keywords { + lowerWord := strings.ToLower(word) + if _, isCommon := commonWords[lowerWord]; !isCommon { + if _, seen := seenWords[lowerWord]; !seen { + seenWords[lowerWord] = struct{}{} + result = append(result, lowerWord) + } + } + } + + return result +} + +// cleanToken applies additional token cleanup rules +func cleanToken(token string) string { + // Remove hexadecimal prefixes (e.g., "0x1234") + token = regexp.MustCompile(`^(0x|0\d+)`).ReplaceAllString(token, "") + + // Trim non-letter characters + token = strings.TrimFunc(token, func(r rune) bool { + return !unicode.IsLetter(r) + }) + + return token +} diff --git a/pyproject.toml b/pyproject.toml deleted file mode 100644 index f0ae7c3..0000000 --- a/pyproject.toml +++ /dev/null @@ -1,43 +0,0 @@ -[build-system] -build-backend = "poetry.core.masonry.api" -requires = ["poetry-core"] - -# https://peps.python.org/pep-0621/ -[tool.poetry] -authors = ["RoseSecurity