diff --git a/next-env.d.ts b/next-env.d.ts index 52e831b43..4f11a03dc 100644 --- a/next-env.d.ts +++ b/next-env.d.ts @@ -2,4 +2,4 @@ /// // NOTE: This file should not be edited -// see https://nextjs.org/docs/pages/api-reference/config/typescript for more information. +// see https://nextjs.org/docs/basic-features/typescript for more information. diff --git a/pages/advanced-algorithms/available-algorithms/community_detection.mdx b/pages/advanced-algorithms/available-algorithms/community_detection.mdx index 2d8de79c9..d698e143c 100644 --- a/pages/advanced-algorithms/available-algorithms/community_detection.mdx +++ b/pages/advanced-algorithms/available-algorithms/community_detection.mdx @@ -78,6 +78,8 @@ the procedure. gain in modularity goes below this threshold, a final iteration is performed using the `community_alg_threshold` value. Valid values are between 0 and 1 (exclusive). This parameter's value should be higher than `community_alg_threshold`. +- `num_of_threads: integer (default=Half of the system's maximum thread count)` ➡ Specifies the number of threads used for parallel execution in the algorithm's parallelized parts. +**Note**: OpenMP (omp) is used for parallelization, so the actual thread usage may depend on system settings and OpenMP configurations. {

Output:

} diff --git a/pages/advanced-algorithms/available-algorithms/date.mdx b/pages/advanced-algorithms/available-algorithms/date.mdx index 58fe18304..082354115 100644 --- a/pages/advanced-algorithms/available-algorithms/date.mdx +++ b/pages/advanced-algorithms/available-algorithms/date.mdx @@ -1,6 +1,6 @@ --- title: date -description: Discover how to effectively perform date-based operations in your graph data with Memgraph's Date class. Also, check out our documentation to learn how to manipulate and better contextualize dates in your data. +description: Discover how to effectively perform date-based operations in your graph data with Memgraph's Date class. Also, check out our documentation to learn how to manipulate and better contextualize dates in your data. --- import { Steps } from 'nextra/components' @@ -60,6 +60,18 @@ The `timezone` parameter can be specified with the database TZ identifier (text) name, as listed for [timezones](https://en.wikipedia.org/wiki/List_of_tz_database_time_zones). + + +Starting with `tzdata` version `2024b`, its format has changed and an +incompatibility with the current libstdc++ has been introduced. As a result, +using `tzdata` version `2024b` or later will break the timezone feature in +Memgraph. This could lead to incorrect handling of timezones and unexpected +behavior in your application. To avoid compatibility issues, please ensure that +you are using `tzdata` `v2024a` or earlier with Memgraph until libstdc++ has +been updated to support the new format in tzdata. + + + {

Output:

} - `formatted: string` ➡ The received time in the specified format. @@ -69,7 +81,7 @@ name, as listed for Use the following query to get a string representation from a time value: ```cypher -CALL date.format(74976, "h", "%Y/%m/%d %H:%M:%S %Z", "Mexico/BajaNorte") +CALL date.format(74976, "h", "%Y/%m/%d %H:%M:%S %Z", "Mexico/BajaNorte") YIELD formatted RETURN formatted; ``` @@ -107,6 +119,18 @@ The `timezone` parameter can be specified with the database TZ identifier (text) name, as listed for [timezones](https://en.wikipedia.org/wiki/List_of_tz_database_time_zones). + + +Starting with `tzdata` version `2024b`, its format has changed and an +incompatibility with the current libstdc++ has been introduced. As a result, +using `tzdata` version `2024b` or later will break the timezone feature in +Memgraph. This could lead to incorrect handling of timezones and unexpected +behavior in your application. To avoid compatibility issues, please ensure that +you are using `tzdata` `v2024a` or earlier with Memgraph until libstdc++ has +been updated to support the new format in tzdata. + + + {

Output:

} - `parsed: int` ➡ The number of time units that have elapsed since the Unix epoch. @@ -116,7 +140,7 @@ name, as listed for Use the following query to parse the date string: ```cypher -CALL date.parse("2023/08/03 14:30:00", "h", "%Y/%m/%d %H:%M:%S", "Europe/Zagreb") +CALL date.parse("2023/08/03 14:30:00", "h", "%Y/%m/%d %H:%M:%S", "Europe/Zagreb") YIELD parsed RETURN parsed; ``` @@ -134,9 +158,9 @@ Adds two numeric values representing quantities of time in specific units. {

Input:

} -- `time: int` ➡ The first term in the addition operation. +- `time: int` ➡ The first term in the addition operation. - `unit: string` ➡ The time unit of the above value. -- `add_value: int` ➡ The second term in the addition operation. +- `add_value: int` ➡ The second term in the addition operation. - `add_unit: string` ➡ The time unit of the above value. The `unit` parameter supports the following values: diff --git a/pages/advanced-algorithms/available-algorithms/migrate.mdx b/pages/advanced-algorithms/available-algorithms/migrate.mdx index 4b274e9d0..dccec6d37 100644 --- a/pages/advanced-algorithms/available-algorithms/migrate.mdx +++ b/pages/advanced-algorithms/available-algorithms/migrate.mdx @@ -9,7 +9,13 @@ import { Steps } from 'nextra/components' # migrate -A module that contains procedures describing graphs on a meta-level. +The `migrate` module provides an efficient way to transfer graph data from various relational databases +into Memgraph. This module allows you to retrieve data from various source systems, +transforming tabular data into graph structures. + +With Cypher, you can shape the migrated data dynamically, making it easy to create nodes, +establish relationships, and enrich your graph. Below are examples showing how to retrieve, +filter, and convert relational data into a graph format. Input: } -* `table_or_sql: str` ➡ Table name or an SQL query. When the table name is specified, the module - will migrate all the rows from the table. In the case that a SQL query is provided, the module - will migrate the rows returned from the queries. -* `config: mgp.Map` ➡ Connection configuration parameters (as in `mysql.connector.connect`). -* `config_path` ➡ Path to a JSON file containing configuration parameters (as in `mysql.connector.connect`). -* `params: mgp.Nullable[mgp.Any] (default=None)` ➡ Optionally, queries can be parameterized. In that case, `params` provides parameter values. - +- `table_or_sql: str` ➡ Table name or an SQL query. +- `config: mgp.Map` ➡ Connection parameters (as in `mysql.connector.connect`). +- `config_path` ➡ Path to a JSON file containing configuration parameters. +- `params: mgp.Nullable[mgp.Any] (default=None)` ➡ Query parameters (if applicable). + {

Output:

} -* `row: mgp.Map`: The result table as a stream of rows. +- `row: mgp.Map` ➡ The result table as a stream of rows. {

Usage:

} -To inspect a sample of rows, use the following query: - +#### Retrieve and inspect data ```cypher -CALL migrate.mysql('example_table', {user:'memgraph', - password:'password', - host:'localhost', - database:'demo_db'} ) +CALL migrate.mysql('example_table', {user: 'memgraph', + password: 'password', + host: 'localhost', + database: 'demo_db'} ) YIELD row -RETURN row; +RETURN row LIMIT 5000; ``` -In the case you want to migrate specific results from a SQL query, it is enough to modify the -first argument of the query module call, and continue to use the Cypher query language to -shape your results: - +#### Filter specific data ```cypher -CALL migrate.mysql('SELECT * FROM example_table', {user:'memgraph', - password:'password', - host:'localhost', - database:'demo_db'} ) +CALL migrate.mysql('SELECT * FROM users', {user: 'memgraph', + password: 'password', + host: 'localhost', + database: 'demo_db'} ) YIELD row -WITH row WHERE row.age >= 30 RETURN row; ``` -### `sql_server()` +#### Create nodes from migrated data +```cypher +CALL migrate.mysql('SELECT id, name, age FROM users', {user: 'memgraph', + password: 'password', + host: 'localhost', + database: 'demo_db'} ) +YIELD row +CREATE (u:User {id: row.id, name: row.name, age: row.age}); +``` + +#### Create relationships between users +```cypher +CALL migrate.mysql('SELECT user1_id, user2_id FROM friendships', {user: 'memgraph', + password: 'password', + host: 'localhost', + database: 'demo_db'} ) +YIELD row +MATCH (u1:User {id: row.user1_id}), (u2:User {id: row.user2_id}) +CREATE (u1)-[:FRIENDS_WITH]->(u2); +``` -With the `migrate.sql_server()` procedure you can access SQL Server and migrate your data -to Memgraph. The result table is converted into a stream, and the returned rows can -be used to create graph structures. The value of the `config` parameter must be -at least an empty map. If `config_path` is passed, every key-value pair from -JSON file will overwrite any values in `config` file. +--- + +### `oracle_db()` + +With the `migrate.oracle_db()` procedure, you can access Oracle DB and migrate your data to Memgraph. {

Input:

} -* `table_or_sql: str` ➡ Table name or an SQL query. When the table name is specified, the module - will migrate all the rows from the table. In the case that a SQL query is provided, the module - will migrate the rows returned from the queries. -* `config: mgp.Map` ➡ Connection configuration parameters (as in `pyodbc.connect`). -* `config_path` ➡ Path to the JSON file containing configuration parameters (as in `pyodbc.connect`). -* `params: mgp.Nullable[mgp.Any] (default=None)` ➡ Optionally, queries can be parameterized. In that case, `params` provides parameter values. - +- `table_or_sql: str` ➡ Table name or an SQL query. +- `config: mgp.Map` ➡ Connection parameters (as in `mysql.connector.connect`). +- `config_path` ➡ Path to a JSON file containing configuration parameters. +- `params: mgp.Nullable[mgp.Any] (default=None)` ➡ Query parameters (if applicable). + {

Output:

} -* `row: mgp.Map`: The result table as a stream of rows. +- `row: mgp.Map` ➡ The result table as a stream of rows. {

Usage:

} -To inspect the first 5000 rows from a database, use the following query: - +#### Retrieve and inspect data ```cypher -CALL migrate.sql_server('example_table', {user:'memgraph', - password:'password', - host:'localhost', - database:'demo_db'} ) +CALL migrate.oracle_db('example_table', {user: 'memgraph', + password: 'password', + host: 'localhost', + database: 'demo_db'} ) YIELD row RETURN row LIMIT 5000; ``` -In the case you want to migrate specific results from a SQL query, it is enough to modify the -first argument of the query module call, and continue to use the Cypher query language to -shape your results: - +#### Merge nodes to avoid duplicates ```cypher -CALL migrate.sql_server('SELECT * FROM example_table', {user:'memgraph', - password:'password', - host:'localhost', - database:'demo_db'} ) +CALL migrate.oracle_db('SELECT id, name FROM companies', {user: 'memgraph', + password: 'password', + host: 'localhost', + database: 'business_db'} ) YIELD row -WITH row -WHERE row.age >= 30 -RETURN row; +MERGE (c:Company {id: row.id}) +SET c.name = row.name; ``` -### `oracle_db()` +--- + +### `postgresql()` -With the `migrate.oracle_db` you can access Oracle DB and migrate your data to Memgraph. -The result table is converted into a stream, and the returned rows can be used to -create graph structures. The value of the `config` parameter must be at least an -empty map. If `config_path` is passed, every key-value pair from JSON file will -overwrite any values in `config` file. +With the `migrate.postgresql()` procedure, you can access PostgreSQL and migrate your data to Memgraph. {

Input:

} -* `table_or_sql: str` ➡ Table name or an SQL query. When the table name is specified, the module - will migrate all the rows from the table. In the case that a SQL query is provided, the module - will migrate the rows returned from the queries. -* `config: mgp.Map` ➡ Connection configuration parameters (as in `oracledb.connect`). -* `config_path` ➡ Path to the JSON file containing configuration parameters (as in `oracledb.connect`). -* `params: mgp.Nullable[mgp.Any] (default=None)` ➡ Optionally, queries may be parameterized. In that case, `params` provides parameter values. - +- `table_or_sql: str` ➡ Table name or an SQL query. +- `config: mgp.Map` ➡ Connection parameters (as in `mysql.connector.connect`). +- `config_path` ➡ Path to a JSON file containing configuration parameters. +- `params: mgp.Nullable[mgp.Any] (default=None)` ➡ Query parameters (if applicable). + {

Output:

} -* `row: mgp.Map`: The result table as a stream of rows. +- `row: mgp.Map` ➡ The result table as a stream of rows. {

Usage:

} -To inspect the first 5000 rows from a database, use the following query: - +#### Retrieve and inspect data ```cypher -CALL migrate.oracle_db('example_table', {user:'memgraph', - password:'password', - host:'localhost', - database:'demo_db'} ) +CALL migrate.postgresql('example_table', {user: 'memgraph', + password: 'password', + host: 'localhost', + database: 'demo_db'} ) YIELD row RETURN row LIMIT 5000; ``` -In the case you want to migrate specific results from a SQL query, it is enough to modify the -first argument of the query module call, and continue to use the Cypher query language to -shape your results: +#### Create nodes for products +```cypher +CALL migrate.postgresql('SELECT product_id, name, price FROM products', {user: 'memgraph', + password: 'password', + host: 'localhost', + database: 'retail_db'} ) +YIELD row +CREATE (p:Product {id: row.product_id, name: row.name, price: row.price}); +``` +#### Establish relationships between orders and customers ```cypher -CALL migrate.oracle_db('SELECT * FROM example_table', {user:'memgraph', - password:'password', - host:'localhost', - database:'demo_db'} ) +CALL migrate.postgresql('SELECT order_id, customer_id FROM orders', {user: 'memgraph', + password: 'password', + host: 'localhost', + database: 'retail_db'} ) YIELD row -WITH row -WHERE row.age >= 30 -RETURN row; +MATCH (o:Order {id: row.order_id}), (c:Customer {id: row.customer_id}) +CREATE (c)-[:PLACED]->(o); ``` -### `postgresql()` +--- -With the `migrate.postgresql` you can access PostgreSQL and migrate your data to Memgraph. -The result table is converted into a stream, and the returned rows can be used to -create graph structures. The value of the `config` parameter must be at least an -empty map. If `config_path` is passed, every key-value pair from JSON file will -overwrite any values in `config` file. +### `sql_server()` + +With the `migrate.sql_server()` procedure, you can access SQL Server and migrate your data to Memgraph. {

Input:

} -* `table_or_sql: str` ➡ Table name or an SQL query. When the table name is specified, the module - will migrate all the rows from the table. In the case that a SQL query is provided, the module - will migrate the rows returned from the queries. -* `config: mgp.Map` ➡ Connection configuration parameters (as in `psycopg2.connect`). -* `config_path` ➡ Path to the JSON file containing configuration parameters (as in `psycopg2.connect`). -* `params: mgp.Nullable[mgp.Any] (default=None)` ➡ Optionally, queries may be parameterized. In that case, `params` provides parameter values. - +- `table_or_sql: str` ➡ Table name or an SQL query. +- `config: mgp.Map` ➡ Connection parameters (as in `mysql.connector.connect`). +- `config_path` ➡ Path to a JSON file containing configuration parameters. +- `params: mgp.Nullable[mgp.Any] (default=None)` ➡ Query parameters (if applicable). + {

Output:

} -* `row: mgp.Map`: The result table as a stream of rows. +- `row: mgp.Map` ➡ The result table as a stream of rows. {

Usage:

} -To inspect the first 5000 rows from a database, use the following query: - +#### Retrieve and inspect data ```cypher -CALL migrate.postgresql('example_table', {user:'memgraph', - password:'password', - host:'localhost', - database:'demo_db'} ) +CALL migrate.sql_server('example_table', {user: 'memgraph', + password: 'password', + host: 'localhost', + database: 'demo_db'} ) YIELD row RETURN row LIMIT 5000; ``` -In the case you want to migrate specific results from a SQL query, it is enough to modify the -first argument of the query module call, and continue to use the Cypher query language to -shape your results: +#### Convert SQL table rows into graph nodes +```cypher +CALL migrate.sql_server('SELECT id, name, role FROM employees', {user: 'memgraph', + password: 'password', + host: 'localhost', + database: 'company_db'} ) +YIELD row +CREATE (e:Employee {id: row.id, name: row.name, role: row.role}); +``` + +--- + +### `s3()` +With the `migrate.s3()` procedure, you can **access a CSV file in AWS S3**, stream the data into Memgraph, +and transform it into a **graph representation** using Cypher. The migration is using the Python `boto3` client. + +{

Input:

} + +- `file_path: str` ➡ S3 file path in the format `'s3://bucket-name/path/to/file.csv'`. +- `config: mgp.Map` ➡ AWS connection parameters. All of them are optional. + - `aws_access_key_id` - if not provided, environment variable `AWS_ACCESS_KEY_ID` will be used + - `aws_secret_access_key` - if not provided, environment variable `AWS_SECRET_ACCESS_KEY` will be used + - `region_name` - if not provided, environment variable `AWS_REGION` will be used + - `aws_session_token` - if not provided, environment variable `AWS_SESSION_TOKEN` will be used +- `config_path: str` (optional) ➡ Path to a JSON file containing AWS credentials. + +{

Output:

} + +- `row: mgp.Map` ➡ Each row from the CSV file as a structured dictionary. + +{

Usage:

} + +#### Retrieve and inspect CSV data from S3 +```cypher +CALL migrate.s3('s3://my-bucket/data.csv', {aws_access_key_id: 'your-key', + aws_secret_access_key: 'your-secret', + region_name: 'us-east-1'} ) +YIELD row +RETURN row +LIMIT 100; +``` + +#### Filter specific rows from the CSV ```cypher -CALL migrate.postgresql('SELECT * FROM example_table', {user:'memgraph', - password:'password', - host:'localhost', - database:'demo_db'} ) +CALL migrate.s3('s3://my-bucket/customers.csv', {aws_access_key_id: 'your-key', + aws_secret_access_key: 'your-secret', + region_name: 'us-west-2'} ) YIELD row -WITH row WHERE row.age >= 30 RETURN row; ``` + +#### Create nodes dynamically from CSV data +```cypher +CALL migrate.s3('s3://my-bucket/employees.csv', {aws_access_key_id: 'your-key', + aws_secret_access_key: 'your-secret', + region_name: 'eu-central-1'} ) +YIELD row +CREATE (e:Employee {id: row.id, name: row.name, position: row.position}); +``` diff --git a/pages/advanced-algorithms/available-algorithms/pagerank.mdx b/pages/advanced-algorithms/available-algorithms/pagerank.mdx index b64e98320..7f4037e08 100644 --- a/pages/advanced-algorithms/available-algorithms/pagerank.mdx +++ b/pages/advanced-algorithms/available-algorithms/pagerank.mdx @@ -76,6 +76,8 @@ If subgraph is not specified, the algorithm is computed on the entire graph by d * `stop_epsilon: double (default=1e-5)` ➡ Value used to terminate the iterations of PageRank. If change from one iteration to another is lower than *stop_epsilon*, execution is stopped. +* `num_of_threads: integer (default=1)` ➡ Number of threads used for executing the algorithm. + Increasing the number of threads gives only marginal improvements so the recommended (and default) number of threads is 1. {

Output:

} diff --git a/pages/client-libraries/python.mdx b/pages/client-libraries/python.mdx index 21de3bed3..fbe40f50a 100644 --- a/pages/client-libraries/python.mdx +++ b/pages/client-libraries/python.mdx @@ -11,8 +11,8 @@ import CodeSnippet from '/components/code-snippet/CodeSnippet' Learn how to create a Python application that connects to the Memgraph database and executes simple queries. -Both [Neo4j Python client](https://neo4j.com/docs/python-manual/current/) and [GQLAlchemy](https://github.com/memgraph/gqlalchemy) can be used to connect to Memgraph with Python. -This guide will show how to use Neo4j Python client and for more information on GQLAlchemy, check out its [documentation](https://memgraph.github.io/gqlalchemy/). +Both [Neo4j Python client](https://neo4j.com/docs/python-manual/current/) and [GQLAlchemy](https://github.com/memgraph/gqlalchemy) can be used to connect to Memgraph with Python. +This guide will show how to use Neo4j Python client and for more information on GQLAlchemy, check out its [documentation](https://memgraph.github.io/gqlalchemy/). Memgraph and Neo4j both support Bolt protocol and Cypher queries, which means that same client can be used to connect to both databases. This is very convenient if switching between the two databases is needed. This guide is based on the client version v5 and above. Some examples may @@ -29,7 +29,7 @@ Necessary prerequisites that should be installed in your local environment are: -### Run Memgraph +### Run Memgraph If you're new to Memgraph or you're in a developing stage, we recommend using the Memgraph Platform. Besides the database, it also @@ -60,7 +60,7 @@ communicate with the client using the exposed 7687 port. Memgraph Lab is a web application you can use to visualize the data. It's accessible at [http://localhost:3000](http://localhost:3000) if Memgraph Platform is running correctly. The 7444 port enables Memgraph Lab to access and preview the logs, -which is why both of these ports need to be exposed. +which is why both of these ports need to be exposed. For more information visit the getting started guide on [how to run Memgraph with Docker](/getting-started/install-memgraph/docker). @@ -177,6 +177,8 @@ Once the database is running and the client is installed or available in Python, - [Connect without authentication (default)](#connect-without-authentication-default) - [Connect with authentication](#connect-with-authentication) - [Connect with self-signed certificate](#encrypted-database-connection-with-self-signed-certificate) +- [Connect with Single sign-on (SSO)](#connect-with-single-sign-on-sso) +- [Impersonate a user](#impersonate-a-user) #### Connect without authentication (default) @@ -269,6 +271,47 @@ with GraphDatabase.driver(URI, auth=AUTH) as client: print(record["name"]) ``` +#### Connect with Single sign-on (SSO) + + +This is currently only supported for OIDC SSO. + + +To use SSO with the Python driver you need to get the access and id tokens yourself. +One simple way to do it is to use the authlib library and follow the official [tutorial](https://docs.authlib.org/en/latest/client/oauth2.html). + +To connect to the Memgraph database you have to use the `custom_auth` class with the `scheme` parameter set as `oidc-entra-id`, `oidc-okta` or `oidc-custom` depending on which scheme you are using, +`credentials` parameter set to contain both access and id tokens in the format shown in the example below. Finally set `principal` and `realm` parameters to `None`. + +Below is an example of connecting to the Memgraph database using OIDC SSO with custom auth scheme. +```python +with neo4j.GraphDatabase.driver( + "bolt://localhost:7687", + auth=neo4j.custom_auth( + scheme="oidc-custom", + credentials=f"access_token={token['access_token']};id_token={token['id_token']}", + principal=None, + realm=None, + ) +) as driver: +``` + +#### Impersonate a user + + +[User impersonation](/database-management/authentication-and-authorization/impersonate-user) is an Enterprise feature. + + +Once logged in, a user with the correct permissions can impersonate a different +users during a session. This means that any query executing during that session +will be executed as if the impersonated user executed it. The target user can be +defined during session creation as in the following snippet: + +```python +with driver.session(impersonated_user="user1") as session: + # queries here will be executed as if user1 executed them +``` + ### Query the database After connecting your client to Memgraph, you can start running queries. The simplest way to run queries is by using the `execute_query()` method which has an automatic transaction management. @@ -398,8 +441,8 @@ Path will contain [Nodes](#process-the-node-result) and [Relationships[#process- Transaction is a unit of work that is executed on the database, it could be some basic read, write or complex set of steps in form of series of queries. There can be multiple ways to mange transaction, but usually, they are managed automatically by the client or manually by the explicit code steps. Transaction management defines how to handle the transaction, when to commit, rollback, or terminate it. -On the driver side, if a transaction fails because of a transient error, the transaction is retried automatically. -The transient error will occur during write conflicts or network failures. The driver will retry the transaction function with an exponentially increasing delay. +On the driver side, if a transaction fails because of a transient error, the transaction is retried automatically. +The transient error will occur during write conflicts or network failures. The driver will retry the transaction function with an exponentially increasing delay. #### Automatic transaction management @@ -645,9 +688,9 @@ The `Session.run()` method is most commonly used for `LOAD CSV` clause to preven #### Concurrent transactions -It is possible to run concurrent transactions with Python's client by leveraging threads or processes. -Using threads could cause your code to be partially locked because of [Global interpreter lock (GIL)](https://wiki.python.org/moin/GlobalInterpreterLock), -resulting in slow execution. Hence, it is always better to run your workloads in separate processes, +It is possible to run concurrent transactions with Python's client by leveraging threads or processes. +Using threads could cause your code to be partially locked because of [Global interpreter lock (GIL)](https://wiki.python.org/moin/GlobalInterpreterLock), +resulting in slow execution. Hence, it is always better to run your workloads in separate processes, where each process will have its own interpreter and memory space, avoiding GIL issues. To leverage multiple concurrent processes, you can use Python's `multiprocessing` module. Here is an example of how to run concurrent transactions with `multiprocessing` module: @@ -659,7 +702,7 @@ from neo4j import GraphDatabase HOST_PORT = "bolt://localhost:7687" def process_chunk(query, create_list): - try: + try: driver = GraphDatabase.driver(HOST_PORT, auth=("", "")) with driver.session() as session: session.run(query, {"batch": create_list}) @@ -680,7 +723,7 @@ with multiprocessing.Pool(10) as pool: pool.starmap(process_chunk, [(query, chunk) for chunk in chunks]) ``` -Each process will execute a query that contains a chunk of nodes. +Each process will execute a query that contains a chunk of nodes. You can control the number of concurrent transactions and processes by specifying the number of processes in the `multiprocessing.Pool` constructor. Each transaction will be a separate connection to the database and will be @@ -693,7 +736,7 @@ conflicting transactions. The typical scenario is when two transactions try to update the same node simultaneously, or add a relationship to the same node. It is a write-write conflict between transactions. In this case, the first transaction will pass, and one of the transactions will fail, and you will need -to handle the error and retry the transaction. +to handle the error and retry the transaction. If you are running transactions in parallel, you should avoid [implicit transactions](#implicit-transactions) because you can't control the execution @@ -702,7 +745,7 @@ process, and there are no retries. You can use the [managed transactions](#managed-transactions) or [explicit transactions](#explicit-transactions) to handle the conflicting transactions. Explicit API provides full control of the process, and it is recommended for -production use and handling conflicts. +production use and handling conflicts. Here is an example of how to handle conflicting transactions in explicit API: @@ -716,7 +759,7 @@ def process_chunk(query, create_list, max_retries=100, initial_wait_time=0.200, tx.commit() break except TransientError as te: - jitter = random.uniform(0, jitter) * initial_wait_time + jitter = random.uniform(0, jitter) * initial_wait_time wait_time = initial_wait_time * (backoff_factor ** attempt) + jitter print(f"Commit failed on attempt {attempt+1}. Retrying in {wait_time} seconds...") time.sleep(wait_time) @@ -730,9 +773,9 @@ In the example above, we are using the `begin_transaction()` method to start a transaction, and then we are running the query inside the transaction. If the transaction fails with a `TransientError,` the transaction will be retried using the retry strategy. Otherwise, another error occurred, and the transaction -should be aborted. +should be aborted. -The essential aspects of the retry strategy are the following arguments: +The essential aspects of the retry strategy are the following arguments: - `max_retries` - the maximum number of retries before the transaction will be aborted with a timeout error. This number should be set based on the expected @@ -750,7 +793,7 @@ for 2 minutes + waiting time. retry. If there are a lot of transactions running in parallel, it is recommended to use `jitter` to avoid the thundering herd problem. -If you use managed transactions, you can configure the retry scenario to use the `session` configuration. Here is an example: +If you use managed transactions, you can configure the retry scenario to use the `session` configuration. Here is an example: ```python import multiprocessing @@ -781,18 +824,18 @@ In this case, the `TransientError` will be retried using the retry strategy that The essential configuration arguments are the following: - `max_transaction_retry_time` - the maximum time the transaction will be - retried; after that, it will be aborted with a timeout error. + retried; after that, it will be aborted with a timeout error. - `initial_retry_delay` - the time that the transaction will wait before the - first retry. + first retry. - `retry_delay_multiplier` - the factor by which the retry delay will be multiplied after each retry. - `retry_delay_jitter_factor` - the factor by which the retry delay will be - randomized after each retry. + randomized after each retry. If you are still struggling with conflicts and serialization errors while using a Python client, we recommend referring to our [Serialization errors](/help-center/errors/serialization) page -for detailed guidance on troubleshooting and best practices. +for detailed guidance on troubleshooting and best practices. \ No newline at end of file diff --git a/pages/clustering/high-availability.mdx b/pages/clustering/high-availability.mdx index b8b49afc4..798644c32 100644 --- a/pages/clustering/high-availability.mdx +++ b/pages/clustering/high-availability.mdx @@ -264,7 +264,11 @@ network communication could be set up incorrectly, etc. The user can remove the UNREGISTER INSTANCE instanceName; ``` -At the moment of registration, the instance that you want to unregister must not be MAIN because unregistering MAIN could lead to an inconsistent cluster state. +When unregistering an instance, ensure that the instance being unregistered is +**not** the MAIN instance. Unregistering MAIN can lead to an inconsistent +cluster state. Additionally, the cluster must have an **alive** MAIN instance +during the unregistration process. If no MAIN instance is available, the +operation cannot be guaranteed to succeed. The instance requested to be unregistered will also be unregistered from the current MAIN's REPLICA set. @@ -367,7 +371,8 @@ Consider the instance to be down only if several consecutive pings fail because For the majority of RPC messages, Memgraph uses a default timeout of 10s. This is to ensure that when sending a RPC request, the client will not block indefinitely before receiving a response if the communication between the client and the server is broken. The list of RPC messages -for which the timeout is used is the following: +for which the timeout is used is the following: + - ShowInstancesReq -> coordinator sending to coordinator - DemoteMainToReplicaReq -> coordinator sending to data instances - PromoteToMainReq -> coordinator sending to data instances @@ -381,7 +386,34 @@ for which the timeout is used is the following: - FrequentHeartbeatReq -> coordinator sending to data instances - HeartbeatReq -> main sending to replica - TimestampReq -> main sending to replica - +- SystemHeartbeatReq -> main sending to replica +- ForceResetStorageReq -> main sending to replica. The timeout is set to 60s. +- SystemRecoveryReq -> main sending to replica. The timeout set to 5s. + + +For replication-related RPC messages — AppendDeltasRpc, CurrentWalRpc, and +WalFilesRpc — it is not practical to set a strict execution timeout. The +processing time on the replica side is directly proportional to the amount of +data being transferred. To handle this, the replica sends periodic progress +updates to the main instance after processing every 100,000 deltas. Since +processing 100,000 deltas is expected to take a relatively consistent amount of +time, we can enforce a timeout based on this interval. The default timeout for +these RPC messages is 30 seconds, though in practice, processing 100,000 deltas +typically takes less than 3 seconds. + +SnapshotRpc is also a replication-related RPC message, but its execution time +is tracked differently. The replica sends an update to the main instance after +completing 1,000,000 units of work. The work units are assigned as follows: + +- Processing nodes, edges, or indexed entities (label index, label-property index, + edge type index, edge type property index) = 1 unit +- Processing a node inside a point or text index = 10 units +- Processing a node inside a vector index (most computationally expensive) = + 1,000 units + +With this unit-based tracking system, the replica is expected to report progress +every 2–3 seconds. Given this, a timeout of 60 seconds is set to avoid +unnecessary network instability while ensuring responsiveness. ## Failover @@ -438,14 +470,36 @@ listening to MAIN with the given UUID. #### Force sync of data -On failover, the current logic is to choose the most-up-to-date instance from all available instances to promote to the new MAIN. For promotion to the MAIN -to successfully happen, the new MAIN figures out if REPLICA is behind (has less up-to-date data) or has data that the new MAIN doesn't have. -If REPLICA has data that MAIN doesn't have, in that case REPLICA is in a diverged-from-MAIN state. If at least one REPLICA is in diverged-from-MAIN -state, failover won't succeed as MAIN can't replicate data to diverged-from-MAIN REPLICA. -When choosing a new MAIN in the failover procedure from the list of available REPLICAs, the instance with the latest commit timestamp for the default database is chosen as the new MAIN. -In case some other instance had more up-to-date data when the failover procedure was choosing a new MAIN but was down at that point when rejoining the cluster, -the new MAIN instance sends a force sync RPC request to such instance. Force sync RPC request deletes all current data on all databases on a given instance and accepts data from the -current MAIN. This way cluster will always follow the current MAIN. +During a failover event, Memgraph selects the most up-to-date, alive instance to +become the new MAIN. The selection process works as follows: +1. From the list of available REPLICA instances, Memgraph chooses the one with +the latest commit timestamp for the default database. +2. If an instance that had more recent data was down during this selection +process, it will not be considered for promotion to MAIN. + +If a previously down instance had more up-to-date data but was unavailable +during failover, it will go through a specific recovery process upon rejoining +the cluster: +- The new MAIN will clear the returning replica’s storage. +- The returning replica will then receive all commits from the new MAIN to + synchronize its state. +- The replica's old durability files will be preserved in a `.old` directory in + `data_directory/snapshots` and `data_directory/wal` folders, allowing admins + to manually recover data if needed. + +Memgraph prioritizes availability over strict consistency (leaning towards AP in +the CAP theorem). While it aims to maintain consistency as much as possible, the +current failover logic can result in a non-zero Recovery Point Objective (RPO), +that is, data loss, because: +- The promoted MAIN might not have received all commits from the previous MAIN + before the failure. +- This design ensures that the MAIN remains writable for the maximum possible + time. + +If your environment requires strong consistency and can tolerate write +unavailability, [reach out to +us](https://github.com/memgraph/memgraph/discussions). We are actively exploring +support for a fully synchronous mode. ## Actions on follower coordinators @@ -453,13 +507,6 @@ current MAIN. This way cluster will always follow the current MAIN. From follower coordinators you can only execute `SHOW INSTANCES`. Registration of data instance, unregistration of data instances, demoting instance, setting instance to MAIN and force reseting cluster state are all disabled. - - -Under certain extreme scenarios, the current implementation of HA could lead to having Recovery Point Objective (RPO) != 0 (aka data loss). These are environments with high volume of transactions -where data is constantly changed, added, deleted... If you are operating in such scenarios, please open an issue on [GitHub](https://github.com/memgraph/memgraph/issues) -as we are eager to expand our support for this kind of workload. - - ## Instances' restart diff --git a/pages/custom-query-modules/c/c-api.mdx b/pages/custom-query-modules/c/c-api.mdx index 65ad739b0..389e2ac8d 100644 --- a/pages/custom-query-modules/c/c-api.mdx +++ b/pages/custom-query-modules/c/c-api.mdx @@ -113,6 +113,7 @@ Memgraph in order to use them. | enum [mgp_error](#variable-mgp-error) | **[mgp_list_contains_deleted](#function-mgp-list-contains-deleted)**(struct mgp_list * list, int * result)
Result is non-zero if the given list contains any deleted values, otherwise 0. | | enum [mgp_error](#variable-mgp-error) | **[mgp_list_append](#function-mgp-list-append)**(struct mgp_list * list, struct mgp_value * val)
Append a copy of mgp_value to mgp_list if capacity allows. | | enum [mgp_error](#variable-mgp-error) | **[mgp_list_append_extend](#function-mgp-list-append-extend)**(struct mgp_list * list, struct mgp_value * val)
Append a copy of mgp_value to mgp_list increasing capacity if needed. | +| enum [mgp_error](#variable-mgp-error) | **[mgp_list_reserve](#function-mgp-list-reserve)**(struct mgp_list * list, size_t n)
Ensure the underlying capacity of the mgp_list is at least n. | | enum [mgp_error](#variable-mgp-error) | **[mgp_list_size](#function-mgp-list-size)**(struct mgp_list * list, size_t * result)
Get the number of elements stored in mgp_list. | | enum [mgp_error](#variable-mgp-error) | **[mgp_list_capacity](#function-mgp-list-capacity)**(struct mgp_list * list, size_t * result)
Get the total number of elements for which there's already allocated memory in mgp_list. | | enum [mgp_error](#variable-mgp-error) | **[mgp_list_at](#function-mgp-list-at)**(struct mgp_list * list, size_t index, struct mgp_value ** result)
Get the element in mgp_list at given position. | @@ -197,6 +198,8 @@ Memgraph in order to use them. | enum [mgp_error](#variable-mgp-error) | **[mgp_graph_edge_set_to](#function-mgp-graph-edge-set-to)**(struct mgp_graph * graph, struct mgp_edge * e, struct mgp_vertex * new_to, struct mgp_memory * memory, struct mgp_edge ** result)
Change edge to (end) vertex. | | enum [mgp_error](#variable-mgp-error) | **[mgp_graph_edge_change_type](#function-mgp-graph-edge-change-type)**(struct mgp_graph * graph, struct mgp_edge * e, struct mgp_edge_type new_type, struct mgp_memory * memory, struct mgp_edge ** result)
Change edge type. | | enum [mgp_error](#variable-mgp-error) | **[mgp_graph_delete_edge](#function-mgp-graph-delete-edge)**(struct mgp_graph * graph, struct mgp_edge * edge)
Delete an edge from the graph. | +| enum [mgp_error](#variable-mgp-error) | **[mgp_graph_approximate_vertex_count](#function-mgp-graph-approximate-vertex-count)**(struct mgp_graph * graph, size_t * result)
Retrieves an approximate count of vertices in the graph. Note that this number is not exact and should be used with caution. | +| enum [mgp_error](#variable-mgp-error) | **[mgp_graph_approximate_edge_count](#function-mgp-graph-approximate-edges-count)**(struct mgp_graph * graph, size_t * result)
Retrieves an approximate count of edges in the graph. Note that this number is not exact and should be used with caution. | | void | **[mgp_vertices_iterator_destroy](#function-mgp-vertices-iterator-destroy)**(struct mgp_vertices_iterator * it)
Free the memory used by a mgp_vertices_iterator. | | enum [mgp_error](#variable-mgp-error) | **[mgp_graph_iter_vertices](#function-mgp-graph-iter-vertices)**(struct mgp_graph * g, struct mgp_memory * memory, struct mgp_vertices_iterator ** result)
Start iterating over vertices of the given graph. | | enum [mgp_error](#variable-mgp-error) | **[mgp_vertices_iterator_underlying_graph_is_mutable](#function-mgp-vertices-iterator-underlying-graph-is-mutable)**(struct mgp_vertices_iterator * it, int * result)
Result is non-zero if the vertices returned by this iterator can be modified. | @@ -1288,6 +1291,16 @@ Append a copy of mgp_value to mgp_list increasing capacity if needed. The list copies the given value and therefore does not take ownership of the original value. You still need to call mgp_value_destroy to free the original value. In case of a capacity change, the previously contained elements will move in memory and any references to them will be invalid. Return MGP_ERROR_UNABLE_TO_ALLOCATE if unable to allocate a mgp_value. +### mgp_list_reserve [#function-mgp-list-reserve] +```cpp +enum mgp_error mgp_list_reserve( + struct mgp_list * list, + size_t n +) +``` + +Ensure the underlying capacity of the mgp_list is at least n. + ### mgp_list_size [#function-mgp-list-size] ```cpp @@ -2404,6 +2417,25 @@ Delete an edge from the graph. When the first parameter to a procedure is a proj Return MGP_ERROR_IMMUTABLE_OBJECT if `graph` is immutable. Return MGP_ERROR_SERIALIZATION_ERROR if `edge`, its source or destination vertex has been modified by another transaction. +### mgp_graph_approximate_vertex_count [#function-mgp-graph-approximate-vertex-count] +```cpp +enum mgp_error mgp_graph_approximate_vertex_count( + struct mgp_graph * graph, + size_t * result +) +``` + +Retrieves an approximate count of vertices in the graph. Note that this number is not exact and should be used with caution. + +### mgp_graph_approximate_edge_count [#function-mgp-graph-approximate-edges-count] +```cpp +enum mgp_error mgp_graph_approximate_edge_count( + struct mgp_graph * graph, + size_t * result +) +``` + +Retrieves an approximate count of edges in the graph. Note that this number is not exact and should be used with caution. ### mgp_vertices_iterator_destroy [#function-mgp-vertices-iterator-destroy] ```cpp @@ -4176,6 +4208,10 @@ void mgp_vertices_iterator_destroy(struct mgp_vertices_iterator *it); enum mgp_error mgp_graph_iter_vertices(struct mgp_graph *g, struct mgp_memory *memory, struct mgp_vertices_iterator **result); +enum mgp_error mgp_graph_approximate_vertex_count(struct mgp_graph *graph, size_t *result); + +enum mgp_error mgp_graph_approximate_edge_count(struct mgp_graph *graph, size_t *result); + enum mgp_error mgp_vertices_iterator_underlying_graph_is_mutable(struct mgp_vertices_iterator *it, int *result); enum mgp_error mgp_vertices_iterator_get(struct mgp_vertices_iterator *it, struct mgp_vertex **result); diff --git a/pages/custom-query-modules/cpp/cpp-api.md b/pages/custom-query-modules/cpp/cpp-api.md index 965501a97..5a16729fb 100644 --- a/pages/custom-query-modules/cpp/cpp-api.md +++ b/pages/custom-query-modules/cpp/cpp-api.md @@ -1786,6 +1786,7 @@ List(List &&other) noexcept | `Empty` | Returns whether the list is empty. | | `Append` | Appends the given `value` to the list. | | `AppendExtend` | Extends the list and appends the given `value` to it. | +| `Reserve` | Ensure underlying capacity is at least `n`. | | `begin`
`end`
`cbegin`
`cend` | Returns the beginning/end of the `List` iterator. | | `ToString` | Returns the list's string representation. | diff --git a/pages/data-migration.mdx b/pages/data-migration.mdx index 169c6fd4e..5a8cccc0b 100644 --- a/pages/data-migration.mdx +++ b/pages/data-migration.mdx @@ -15,7 +15,7 @@ Where is the data you want to migrate? - [CYPHERL files](#cypherl-files) - [Neo4j](#neo4j) - [Data from an application or a program](#data-from-an-application-or-a-program) -- [Relational database management system (MySQL, SQL Server, Oracle)](#rdbms) +- [Relational database management system (MySQL, SQL Server, Oracle DB, PostgreSQL, AWS S3)](#rdbms) - [In a stream](#data-from-a-stream) - [Parquet, ORC or IPC/Feather/Arrow file](#parquet-orc-or-ipcfeatherarrow-file) - [NetworkX, PyG or DGL graph](#networkx-pyg-or-dgl-graph) @@ -86,10 +86,10 @@ data](/data-modeling) and rewrite the CSV file, then import it into Memgraph using the LOAD CSV clause, like in this [example](/data-migration/migrate-from-rdbms). -Alternatively, you can use the [`migration` +Alternatively, you can use the [`migrate` module](/advanced-algorithms/available-algorithms/migrate) from the MAGE graph -library which allows you to access data from a MySQL database, an SQL Server or -an Oracle database. +library which allows you to access data from a MySQL database, an SQL Server, +Oracle database, PostgreSQL or a CSV file in AWS S3. ## Data from a stream diff --git a/pages/data-migration/csv.mdx b/pages/data-migration/csv.mdx index ad4c667ba..efe163e1b 100644 --- a/pages/data-migration/csv.mdx +++ b/pages/data-migration/csv.mdx @@ -230,12 +230,12 @@ There are also two variations of the files: files with a header and files withou - [`people_relationships_wh.csv`](https://public-assets.memgraph.com/import-data/load-csv-cypher/one-type-nodes/people_relationships_wh.csv)
The file contains the following data: ```plaintext - id_from,id_to - 100,101 - 100,102 - 100,103 - 101,103 - 102,104 + id_from,id_to,type + 100,101,IS_FRIENDS_WITH + 100,102,IS_FRIENDS_WITH + 100,103,IS_FRIENDS_WITH + 101,103,IS_FRIENDS_WITH + 102,104,IS_FRIENDS_WITH ``` @@ -285,13 +285,7 @@ There are also two variations of the files: files with a header and files withou ``` If successful, you should receive an `Empty set (0.014 sec)` message. - - You can also create, set, or remove labels using property values, here is an example: - - ```cypher - LOAD CSV FROM "/path-to/people_nodes_wh.csv" WITH HEADER AS row - CREATE (p:row.label {id: row.id, name: row.name}); - ``` + Notice how **node labels can be dynamically created* from the CSV file. @@ -335,10 +329,11 @@ There are also two variations of the files: files with a header and files withou ```cypher LOAD CSV FROM "/path-to/people_relationships_wh.csv" WITH HEADER AS row MATCH (p1:Person {id: row.id_from}), (p2:Person {id: row.id_to}) - CREATE (p1)-[:IS_FRIENDS_WITH]->(p2); + CREATE (p1)-[:row.type]->(p2); ``` If successful, you should receive an `Empty set (0.014 sec)` message. + Notice how **relationship types can be dynamically created** from the CSV file. diff --git a/pages/database-management/authentication-and-authorization.mdx b/pages/database-management/authentication-and-authorization.mdx index 9769cc6c3..9fb2be18e 100644 --- a/pages/database-management/authentication-and-authorization.mdx +++ b/pages/database-management/authentication-and-authorization.mdx @@ -21,3 +21,8 @@ Learn how to manage roles, set up their privileges and fine-grained access contr Learn how to integrate with third-party auth systems and manage user authentication and access control using Memgraph's auth module. + +## [Impersonate user](/database-management/authentication-and-authorization/impersonate-user) (Enterprise) + +Learn how the impersonate user feature enables authorized users to execute +queries with the full permissions and context of another user. \ No newline at end of file diff --git a/pages/database-management/authentication-and-authorization/_meta.ts b/pages/database-management/authentication-and-authorization/_meta.ts index f060b54da..52e2d13d8 100644 --- a/pages/database-management/authentication-and-authorization/_meta.ts +++ b/pages/database-management/authentication-and-authorization/_meta.ts @@ -1,6 +1,6 @@ export default { "users": "Users", "role-based-access-control": "Role-based access control", - "auth-system-integrations": "Auth system integrations" + "auth-system-integrations": "Auth system integrations", + "impersonate-user": "Impersonate user" } - \ No newline at end of file diff --git a/pages/database-management/authentication-and-authorization/auth-system-integrations.mdx b/pages/database-management/authentication-and-authorization/auth-system-integrations.mdx index b87c05883..731341775 100644 --- a/pages/database-management/authentication-and-authorization/auth-system-integrations.mdx +++ b/pages/database-management/authentication-and-authorization/auth-system-integrations.mdx @@ -295,6 +295,31 @@ Role mapping is described [here](#single-sign-on). Issuer is `https://{your-okta-domain}.okta.com/oauth2/default/`. You can find the client ID on the Admin panel -> Applications -> General. You can find the authorization server on the Admin panel -> Security -> API -> Authorization Servers -> Audience. By default, it is set to `api://default`. +##### Custom auth + + +This is currently only supported through the Neo4j drivers. + + +If you are using an OIDC provider which is not listed above you can use you the custom auth scheme. +The only requirement is that your OIDC provider supports verifying the tokens through RSA algorithm (public & private key). + +Setup the following environmental variables: + +```mdx +MEMGRAPH_SSO_CUSTOM_OIDC_PUBLIC_KEY_ENDPOINT=`URI where the public key for validating the tokens is exposed` +MEMGRAPH_SSO_CUSTOM_OIDC_ACCESS_TOKEN_AUDIENCE=`access token audience` +MEMGRAPH_SSO_CUSTOM_OIDC_ID_TOKEN_AUDIENCE=`id token audience` +MEMGRAPH_SSO_CUSTOM_OIDC_ROLE_FIELD=`access token field to be used in the role mapping` +MEMGRAPH_SSO_CUSTOM_OIDC_USERNAME= +MEMGRAPH_SSO_CUSTOM_OIDC_ROLE_MAPPING= +``` + +Usernames are described below and role mappings are described [here](#single-sign-on). +One way to deduce the audience of the access and id tokens is to decode them using a tool like `jwt.io`, check the `aud` field and deduce what it is. +Often time access and id token will the use the same audience. For example in MS Entra ID both tokens use the client ID as audience. + + ##### Username The username variable tells the OIDC module what to use as the username. It has the format `token-type:field`. Token type can be `id` or `access` depending on whether you want to use a field from the access or the ID token for the username. See the following to learn more about [access](https://www.okta.com/identity-101/access-token/) and [id](https://developer.okta.com/docs/guides/validate-id-tokens/main/#id-tokens-vs-access-tokens) tokens. @@ -306,6 +331,12 @@ For Okta one commonly used field is `access:sub` which is usually the email of t OIDC is by default enabled using the Memgraph `oidc.py` module. To use a custom auth module use the `--auth-module-mappings` [flag](/database-management/configuration#auth-module) like the following: `--auth-module-mappings=oidc-entra-id:/path/to/oidc-entra-module;oidc-okta:/path/to/oidc-okta-module` depending on the SSO provider you want to use. +#### Using OIDC SSO with the Neo4j Python driver + +Connecting using SSO is supported with the Neo4j Python driver. For the +instructions on how to connect, check the [Python driver +docs](/client-libraries/python#connect-with-single-sign-on-sso). + ## Basic (username + password) auth When Memgraph is set up to use the external auth module for basic authentication diff --git a/pages/database-management/authentication-and-authorization/impersonate-user.mdx b/pages/database-management/authentication-and-authorization/impersonate-user.mdx new file mode 100644 index 000000000..ac45147a7 --- /dev/null +++ b/pages/database-management/authentication-and-authorization/impersonate-user.mdx @@ -0,0 +1,132 @@ +--- +title: Impersonate user +description: Learn how the impersonate user feature enables authorized users to execute queries with the full permissions and context of another user. +--- + +import { Callout } from 'nextra/components' +import {CommunityLinks} from '/components/social-card/CommunityLinks' + + +# Impersonate user (Enterprise) + + + +The **impersonate user** feature lets authorized users run queries on behalf of +another user. When using impersonation, the permitted user adopts the full +permissions and context of the impersonated user. This means they can execute +queries and perform actions exactly as the impersonated user, with all +associated privileges. + +This feature provides a powerful tool for managing user permissions, debugging, +and performing administrative tasks. By leveraging the ability to impersonate +other users, you can ensure more efficient management of users and roles while +maintaining robust security and access control within your graph database. + + + +**Enterprise**: Impersonate user feature requires a Memgraph Enterprise license to +function. For more information on Memgraph Enterprise and instructions +on how to enable it, refer to the +[enabling Memgraph Enterprise documentation](/database-management/enabling-memgraph-enterprise). + + + +## Targeting impersonation + +The impersonated user is defined at the session start, as shown in the example +using the Neo4j Python driver: + +```python +with driver.session(impersonated_user="user1") as session: + # queries here will be executed as if user1 executed them +``` + +During this session, all queries will be executed with the privileges and +context of the impersonated user (`user1` in this case), effectively "switching" +the identity for the duration of the session. + +## Permissions for impersonation + +Only certain users or roles have the ability to impersonate other users. These permissions are managed with three key queries: +- [`GRANT IMPERSONATE_USER`](#grant-impersonate-user) +- [`DENY IMPERSONATE_USER`](#deny-impersonate-user) +- [`REVOKE IMPERSONATE_USER`](#revoke-impersonate-user) + +### Grant impersonate user + +The `GRANT IMPERSONATE_USER` query allows a user or role to impersonate specific users or all users. The syntax and behavior are as follows: + +```cypher +GRANT IMPERSONATE_USER [*] [list of users] TO user/role; +``` + +Here is the explanation of arguments: +- `*`: Grants permission to impersonate all users. +- `list of users`: Grants permission to impersonate specific users (comma-separated list). +- `user/role`: The user or role receiving the impersonation permission. + +Here is an example of granting the `admin` role permission to impersonate `user1` and `user2`: +```cypher +GRANT IMPERSONATE_USER user1,user2 TO admin; +``` + +Here is an example of granting the `admin_user` user permission to impersonate all users: +```cypher +GRANT IMPERSONATE_USER * TO admin_user; +``` + +### Deny impersonate user + +The `DENY IMPERSONATE_USER` denies impersonation rights to specific users or roles, allowing you to restrict who can impersonate whom. +The syntax and behavior are as follows: + +```cypher +DENY IMPERSONATE_USER list of users TO user/role; +``` + +Here is the explanation of arguments: +- `list of users`: Deny impersonation for specific users (comma-separated list). +- `user/role`: The user or role being restricted from impersonating others. + +Here is an example of denying the `admin` role the ability to impersonate `user1` and `user2`: +```cypher +DENY IMPERSONATE_USER user1,user2 TO admin; +``` + +### Revoke impersonate user + +The `REVOKE IMPERSONATE_USER` removes the impersonation rights for a given user or role. It revokes all impersonation permissions for the specified user/role. +The syntax and behavior are as follows: + +```cypher +REVOKE IMPERSONATE_USER FROM user/role; +``` + +Here is the explanation of arguments: +- `user/role`: The user or role whose impersonation permissions are being revoked. + +Here is an example of revoking all impersonation permissions for the `admin` role: +```cypher +REVOKE IMPERSONATE_USER FROM admin; +``` + + +**Important things to note** + +When using the `GRANT` or `DENY` commands, you must provide exhaustive lists of users. This means that the existing configuration will be replaced by the new list provided. +For example: +- First command: + ```cypher + GRANT IMPERSONATE_USER user1,user2 TO admin; + ``` +- Second command (this overrides the first one): + ```cypher + GRANT IMPERSONATE_USER user3 TO admin; + ``` + After the second command, the `admin` role will only be able to impersonate `user3`, even though the first command allowed impersonation of `user1` and `user2`. + +Permissions can be granted or denied to individual users or roles. For example, an `admin` role might have impersonation privileges that individual users do not have. +The `REVOKE` command removes any impersonation permissions for the specified user or role, ensuring that they cannot impersonate any user unless granted explicitly again. + + + \ No newline at end of file diff --git a/pages/database-management/configuration.mdx b/pages/database-management/configuration.mdx index c67f742f5..89bbfe19a 100644 --- a/pages/database-management/configuration.mdx +++ b/pages/database-management/configuration.mdx @@ -489,6 +489,7 @@ This section contains the list of all other relevant flags used within Memgraph. | `--also-log-to-stderr=false` | Log messages go to stderr in addition to logfiles. | `[bool]` | | `--data-directory=/var/lib/memgraph` | Path to directory in which to save all permanent data. | `[string]` | | `--data-recovery-on-startup=true` | Facilitates recovery of one or more individual databases and their contents during startup. Replaces `--storage-recover-on-startup` | `[bool]` | +| `--debug-query-plans=false` | Enable DEBUG logging of potential query plans. | `[string]` | | `--delta-chain-cache-threshold=128` | The minimum number of deltas worth caching when rebuilding a certain object's state. Useful when executing parallel transactions dependant on changes of a frequently changed graph object, to lower CPU usage. Must be a positive non-zero integer. | `[uint64]` | | `--flag-file` | Path to the additional configuration file, overrides the default configuration settings. | `[string]` | | `--help` | Show help on all flags and exit. The default values is `false`. | `[bool]` | diff --git a/pages/database-management/enabling-memgraph-enterprise.mdx b/pages/database-management/enabling-memgraph-enterprise.mdx index 70a6b70fc..7f779092f 100644 --- a/pages/database-management/enabling-memgraph-enterprise.mdx +++ b/pages/database-management/enabling-memgraph-enterprise.mdx @@ -1,6 +1,6 @@ --- title: Enabling Memgraph Enterprise -description: Learn how to enable Memgraph Enterprise and leverage its advanced capabilities to scale and optimize your graph computing tasks. Find out more in our documentation and pricing pages. +description: Learn how to enable Memgraph Enterprise and leverage its advanced capabilities to scale and optimize your graph computing tasks. Find out more in our documentation and pricing pages. --- # Enabling Memgraph Enterprise @@ -18,6 +18,7 @@ The following Memgraph features are only available in Enterprise Edition: - [Multi-tenancy](/database-management/multi-tenancy) - [Role-based access control](/database-management/authentication-and-authorization/role-based-access-control) - [Label-based access control](/database-management/authentication-and-authorization/role-based-access-control#label-based-access-control) +- [Impersonate user](/database-management/authentication-and-authorization/impersonate-user) - [High availability](/clustering/high-availability) and [automatic failover](/clustering/high-availability#failover) - [Time to live](/querying/time-to-live) - [Query sharing](/data-visualization/user-manual/query-sharing) in Memgraph Lab @@ -38,7 +39,7 @@ If you want to enable the Enterprise Edition on startup, [set the configuration flags](/configuration/configuration-settings#changing-configuration) or [environment variables](/database-management/configuration#environment-variables) to the -correct values. +correct values. If you are already running Memgraph Community Edition, these configuration flags can also be adjusted [during @@ -50,7 +51,7 @@ SET DATABASE SETTING 'organization.name' TO 'Organization'; SET DATABASE SETTING 'enterprise.license' TO 'License'; ``` -To check the set values run: +To check the set values run: ```opencypher SHOW DATABASE SETTING 'organization.name'; diff --git a/pages/database-management/logs.mdx b/pages/database-management/logs.mdx index 0100f2988..4c2392444 100644 --- a/pages/database-management/logs.mdx +++ b/pages/database-management/logs.mdx @@ -26,6 +26,11 @@ You can check the log level by running `SHOW DATABASE SETTING "log.level";` quer To access the logs from the Memgraph Lab interface, make sure to expose the port 7444 when starting Memgraph with Docker. + +To get additional information on the generated query plans, set +`--debug-query-plans` to `True`, along with `--log-level` set to `DEBUG` or `TRACE`. + + ## Access logs If you installed Memgraph with Linux, logs can be found in the diff --git a/pages/fundamentals/data-types.mdx b/pages/fundamentals/data-types.mdx index 5d7435849..dc40bc5bb 100644 --- a/pages/fundamentals/data-types.mdx +++ b/pages/fundamentals/data-types.mdx @@ -287,7 +287,7 @@ calling the function `duration()`. For strings, the duration format is: `P[nD]T[nH][nM][nS]`. The `n` stands for a number, and the capital letters are used as a separator -with each field in `[]` marked optional. +with each field in `[]` marked optional. | name | description | | :--: | :---------: | @@ -296,9 +296,9 @@ with each field in `[]` marked optional. | M | Minutes | | S | Seconds | -When using strings, only the last filed can be a double, e.g., `P2DT2.5H`. +When using strings, only the last filed can be a double, e.g., `P2DT2.5H`. -Example: +Example: ```cypher CREATE (:F1Laps {lap: duration("PT2M2.33S")}); @@ -310,7 +310,7 @@ Maps can contain the following six fields: `day`, `hour`, `minute`, `second`, `millisecond` and `microsecond`. Every field can be a double, an int or a mixture of both. Memgraph also supports negative durations. -Example: +Example: ```cypher CREATE (:F1Laps {lap: duration({minute:2, second:2, microsecond:33})}); @@ -373,7 +373,7 @@ MATCH (f:F1Laps) RETURN f.lap.second; ### Date You can create a property of temporal type `Date` from a string or map by -calling the function `Date()`. +calling the function `Date()`. **String** @@ -389,7 +389,7 @@ For strings, the date format is specified by the ISO 8601: `YYYY-MM-DD` or The lowest year is `0` and the highest is `9999`. -Example: +Example: ```cypher CREATE (:Person {birthday: date("1947-07-30")}); @@ -402,7 +402,7 @@ to the current date of the calendar (UTC clock). For maps, three fields are available: `year`, `month`, `day`. -Example: +Example: ```cypher CREATE (:Person {birthday: date({year:1947, month:7, day:30})}); @@ -416,7 +416,7 @@ You can access the individual fields of a date through its properties: | month | Returns the month field | | day | Returns the day field | -Example: +Example: ```cypher MATCH (b:Person) RETURN b.birthday.year; @@ -425,7 +425,7 @@ MATCH (b:Person) RETURN b.birthday.year; ### LocalTime You can create a property of temporal type `LocalTime` from a string or map by -calling the function `localTime()`. +calling the function `localTime()`. **Strings** @@ -438,7 +438,7 @@ or `[T]hh:mm` or `[T]hhmmss` or `[T]hhmm` or `[T]hh`. | m | Minutes | | s | Seconds | -Example: +Example: ```cypher CREATE (:School {Calculus: localTime("09:15:00")}); @@ -481,25 +481,25 @@ MATCH (s:School) RETURN s.Calculus.hour; ### LocalDateTime -You can create a property of temporal type `LocalDateTime` from a string or map by calling the function `localDateTime()`. +You can create a property of temporal type `LocalDateTime` from a string or map by calling the function `localDateTime()`. LocalDateTime uses the defined [timezone](#database-timezone) to convert between local and UTC time. -At a lower level, LocalDateTime will use system time (UTC), changing the instance timezone will -change the displayed time point, but will not change the underlying data. +At a lower level, LocalDateTime will use system time (UTC), changing the instance timezone will +change the displayed time point, but will not change the underlying data. All LocalDateTime is converted to UTC, so comparing time points between different timezones gives the correct result. -When recovering from pre-2.19 snapshots and WALs, the observed LocalDateTime might change due to the introduction of the timezone. +When recovering from pre-2.19 snapshots and WALs, the observed LocalDateTime might change due to the introduction of the timezone. Previously LocalDateTime was interpreted and saved as UTC time. -Post 2.19, the displayed LocalDateTime is in the local timezone and gets converted to UTC time. +Post 2.19, the displayed LocalDateTime is in the local timezone and gets converted to UTC time. -Pre 2.19 executing `LocalDateTime()` would return the current UTC time. +Pre 2.19 executing `LocalDateTime()` would return the current UTC time. Any such saved data is still "correct" post 2.19; timezone will correctly be applied and local time will be displayed. Executing `LocalDateTime("2024-07-24T13:30:00")` will give different result pre and post 2.19. @@ -569,15 +569,23 @@ MATCH (f:Flights) RETURN f.AIR123.year; `LocalDateTime` uses the set database timezone to properly convert between system time (UTC) and local (user) time. The timezone can be defined via: -1. `--timezone` command-line argument +1. `--timezone` command-line argument 1. `SET DATABASE SETTING "timezone" TO "Europe/Rome"` query Both methods use IANA timezone descriptors to specify the timezone. See [list of time zones](https://en.wikipedia.org/wiki/List_of_tz_database_time_zones#List). + + +Starting with `tzdata` version `2024b`, its format has changed and an incompatibility with the current libstdc++ has been introduced. +As a result, using `tzdata` version `2024b` or later will break the timezone feature in Memgraph. This could lead to incorrect handling of timezones and unexpected behavior in your application. +To avoid compatibility issues, please ensure that you are using `tzdata` `v2024a` or earlier with Memgraph until libstdc++ has been updated to support the new format in tzdata. + + + ### ZonedDateTime You can create a value of the `ZonedDateTime` type from a string or a map by -calling the `datetime()` function. +calling the `datetime()` function. **Strings** @@ -799,7 +807,7 @@ Memgraph supports four Coordinate Reference Systems (CRS) for spatial data, divi A WGS-84 point consists of longitude, latitude, and height if the point is 3D. Longitude and latitude are specified in degrees while height is specified in meters. -Longitude has to be in the [-180, 180] range, latitude in the [-90, 90] range and height can be any `Float` value. +Longitude has to be in the [-180, 180] range, latitude in the [-90, 90] range and height can be any `Float` value. | Point type | SRID | CRS | | :---------------: | :----------: | :--------: | diff --git a/pages/fundamentals/indexes.mdx b/pages/fundamentals/indexes.mdx index dda363ca8..daa8e216f 100644 --- a/pages/fundamentals/indexes.mdx +++ b/pages/fundamentals/indexes.mdx @@ -301,15 +301,21 @@ memgraph> MATCH (n:Person) WHERE n.name =~ ".*an$" RETURN n.name; ## Show created indexes -To check all the labels and label-property pairs that Memgraph currently -indexes, use the following query: +To see all the information on the label, label-property, edge-type, edge-type +property and point indexes, run the following query: ```cypher SHOW INDEX INFO; ``` -The query displays a table of all label and label-property indexes presently -kept by Memgraph, ordered by index type, label, property and count. +The query displays a table holding the information on the indexes created, +ordered by index type, label, property and count. + +To retrieve information about vector indexes, run the following query: + +``` +SHOW VECTOR INDEX INFO; +``` ## Delete an index diff --git a/pages/getting-started/install-memgraph/kubernetes.mdx b/pages/getting-started/install-memgraph/kubernetes.mdx index 95904a7f6..160ee3ffe 100644 --- a/pages/getting-started/install-memgraph/kubernetes.mdx +++ b/pages/getting-started/install-memgraph/kubernetes.mdx @@ -52,11 +52,30 @@ By default, the Helm chart will create a `PersistentVolumeClaim` (PVC) for storage and logs. If the storage class for PVC is not defined, PVC will use the default one available in the cluster. The storage class can be configured in the `values.yaml` file. To avoid losing your data, make sure you have `Retain` -reclaim policy. If you delete `PersistentVolumeClaim` without having `Retain` -reclaim policy, you will lose your data because `PersistentVolume` will be -deleted. +reclaim policy set on your storage class. If you delete `PersistentVolumeClaim` +without having `Retain` reclaim policy, you will lose your data because +`PersistentVolume` will get deleted too. The alternative to creating a new +storage class is to patch your existing storage class by applying the `Retain` +policy. This is necessary because the default Kubernetes policy is `Delete`. The +patching can be done using the following bash script: + +```bash +#!/bin/bash + +# Get all Persistent Volume names +PVS=$(kubectl get pv --no-headers -o custom-columns=":metadata.name") + +# Loop through each PV and patch it +for pv in $PVS; do + echo "Patching PV: $pv" + kubectl patch pv $pv -p '{"spec":{"persistentVolumeReclaimPolicy":"Retain"}}' +done + +echo + An example of a storage class for AWS EBS volumes: + ```yaml storageClass: name: "gp2" @@ -68,7 +87,8 @@ storageClass: ``` Default template for a storage class is part of the Helm chart and can be found -in the repository. +in the repository. If you don't want to create a new storage class, set +`storageClass.create` to `false`. More details on the configuration options can be found in the [configuration section](#configuration-options). @@ -82,6 +102,19 @@ you can enable them in the `values.yaml` file. The secrets are prepared to work for environment variables `MEMGRAPH_USER` and `MEMGRAPH_PASSWORD`. +### Probes + +Memgraph standalone chart uses startup, readiness and liveness probes. The startup probe +is used to determine when a container application has started. The liveness +probe is used to determine when a container should be restarted. The readiness +probe is used to determine when a container is ready to start accepting traffic. +The startup probe will succeed only after the recovery of the Memgraph has +finished. Liveness and readiness probes will start after the startup probe +succeeds. By default, the startup probe has to succeed within 2 hours. If the +recovery from backup takes longer than that, update the configuration to the +value that is high enough. The liveness and readiness probe have to succeed at +least once in 5 minutes for a pod to be considered ready. + ### System configuration The Helm chart will set the linux kernel `vm.max_map_count` parameter to `262144` by default @@ -152,13 +185,10 @@ their default values. | `service.type` | Kubernetes service type | `ClusterIP` | | `service.enableBolt` | Enable Bolt protocol | `true` | | `service.boltPort` | Bolt protocol port | `7687` | -| `service.boltProtocol` | Protocol used by Bolt | `TCP` | | `service.enableWebsocketMonitoring` | Enable WebSocket monitoring | `false` | | `service.websocketPortMonitoring` | WebSocket monitoring port | `7444` | -| `service.websocketPortMonitoringProtocol` | Protocol used by WebSocket monitoring | `TCP` | | `service.enableHttpMonitoring` | Enable HTTP monitoring | `false` | | `service.httpPortMonitoring` | HTTP monitoring port | `9091` | -| `service.httpPortMonitoringProtocol` | Protocol used by HTTP monitoring | `http` | | `service.annotations` | Annotations to add to the service | `{}` | | `persistentVolumeClaim.createStorageClaim` | Enable creation of a Persistent Volume Claim for storage | `true` | | `persistentVolumeClaim.storageClassName` | Storage class name for the persistent volume claim | `""` | @@ -183,19 +213,22 @@ their default values. | `serviceAccount.annotations` | Annotations to add to the service account | `{}` | | `serviceAccount.name` | The name of the service account to use. If not set and create is true, a name is generated. | `""` | | `container.terminationGracePeriodSeconds` | Grace period for pod termination | `1800` | -| `probes.liveliness.initialDelaySeconds` | Initial delay for liveliness probe | `10` | -| `probes.liveliness.periodSeconds` | Period seconds for liveliness probe | `60` | -| `probes.liveliness.failureThreshold` | Failure threshold for liveliness probe | `3` | -| `probes.readiness.initialDelaySeconds` | Initial delay for readiness probe | `10` | -| `probes.readiness.periodSeconds` | Period seconds for readiness probe | `30` | -| `probes.readiness.failureThreshold` | Failure threshold for readiness probe | `3` | -| `probes.startup.initialDelaySeconds` | Initial delay for startup probe | `10` | -| `probes.startup.periodSeconds` | Period seconds for startup probe | `10` | -| `probes.startup.failureThreshold` | Failure threshold for startup probe | `30` | +| `container.livenessProbe.tcpSocket.port` | Port used for TCP connection. Should be the same as bolt port. | `7687` | +| `container.livenessProbe.failureThreshold` | Failure threshold for liveness probe | `20` | +| `container.livenessProbe.timeoutSeconds` | Initial delay for readiness probe | `10` | +| `container.livenessProbe.periodSeconds` | Period seconds for readiness probe | `5` | +| `container.readinessProbe.tcpSocket.port` | Port used for TCP connection. Should be the same as bolt port. | `7687` | +| `container.readinessProbe.failureThreshold` | Failure threshold for readiness probe | `20` | +| `container.readinessProbe.timeoutSeconds` | Initial delay for readiness probe | `10` | +| `container.readinessProbe.periodSeconds` | Period seconds for readiness probe | `5` | +| `container.startupProbe.tcpSocket.port` | Port used for TCP connection. Should be the same as bolt port. | `7687` | +| `container.startupProbe.failureThreshold` | Failure threshold for startup probe | `1440` | +| `container.startupProbe.periodSeconds` | Period seconds for startup probe | `10` | | `nodeSelectors` | Node selectors for pod. Left empty by default. | `{}` | | `customQueryModules` | List of custom Query modules that should be mounted to Memgraph Pod | `[]` | | `sysctlInitContainer.enabled` | Enable the init container to set sysctl parameters | `true` | | `sysctlInitContainer.maxMapCount` | Value for `vm.max_map_count` to be set by the init container | `262144` | +| `storageClass.create` | If set to true, new StorageClass will be created. | `false` | | `storageClass.name` | Name of the StorageClass | `"memgraph-generic-storage-class"` | | `storageClass.provisioner` | Provisioner for the StorageClass | `""` | | `storageClass.storageType` | Type of storage for the StorageClass | `""` | @@ -412,8 +445,11 @@ You can use `sysctlInitContainer` configuration parameter to [increase the `vm_m ### Authentication By default, there is no user or password configured for Memgraph instances. You can use `secrets` configuration parameter -to create the user with the password. +to create the user with the password. The secret can be created in the following way: +``` +kubectl create secret generic memgraph-secrets --from-literal=USER=memgraph --from-literal=PASSWORD=memgraph +``` ### Setting up the cluster @@ -461,44 +497,68 @@ externalAccessConfig.dataInstance.serviceType=IngressNginx,externalAccessConfig. The chart will also install `IngressNginx` automatically with all required configuration. -#### Configuration options +### Probes + +Memgraph HA chart uses startup, readiness and liveness probes. The startup probe +is used to determine when a container application has started. The liveness +probe is used to determine when a container should be restarted. The readiness +probe is used to determine when a container is ready to start accepting traffic. +The startup probe will succeed only after the recovery of the Memgraph has +finished. Liveness and readiness probes will start after the startup probe +succeeds. By default, the startup probe has to succeed within 2 hours. If the +recovery from backup takes longer than that, update the configuration to the +value that is high enough. The liveness and readiness probe have to succeed at +least once in 5 minutes for a pod to be considered ready. + +### Configuration options The following table lists the configurable parameters of the Memgraph HA chart and their default values. -| Parameter | Description | Default | -| -------------------------------------------------- | -------------------------------------------------------------------------------------------------- | -------------------------- | -| `image.repository` | Memgraph Docker image repository | `memgraph/memgraph` | -| `image.tag` | Specific tag for the Memgraph Docker image. Overrides the image tag whose default is chart version. | `2.22.0` | -| `image.pullPolicy` | Image pull policy | `IfNotPresent` | -| `env.MEMGRAPH_ENTERPRISE_LICENSE` | Memgraph enterprise license | `` | -| `env.MEMGRAPH_ORGANIZATION_NAME` | Organization name | `` | -| `storage.libPVCSize` | Size of the storage PVC | `1Gi` | -| `storage.libStorageClassName` | The name of the storage class used for storing data. | `""` | -| `storage.libStorageAccessMode` | Access mode used for lib storage. | `ReadWriteOnce` | -| `storage.logPVCSize` | Size of the log PVC | `1Gi` | -| `storage.logStorageClassName` | The name of the storage class used for storing logs. | `""` | -| `storage.logStorageAccessMode` | Access mode used for log storage. | `ReadWriteOnce` | -| `externalAccess.coordinator.serviceType` | IngressNginx, NodePort, CommonLoadBalancer or LoadBalancer. | `NodePort` | -| `externalAccess.dataInstance.serviceType` | IngressNginx, NodePort or LoadBalancer. | `NodePort` | -| `ports.boltPort` | Bolt port used on coordinator and data instances. | `7687` | -| `ports.managementPort` | Management port used on coordinator and data instances. | `10000` | -| `ports.replicationPort` | Replication port used on data instances. | `20000` | -| `ports.coordinatorPort` | Coordinator port used on coordinators. | `12000` | -| `affinity.unique` | Schedule pods on different nodes in the cluster | `false` | -| `affinity.parity` | Schedule pods on the same node with maximum one coordinator and one data node | `false` | -| `affinity.nodeSelection` | Schedule pods on nodes with specific labels | `false` | -| `affinity.roleLabelKey` | Label key for node selection | `role` | -| `affinity.dataNodeLabelValue` | Label value for data nodes | `data-node` | -| `affinity.coordinatorNodeLabelValue` | Label value for coordinator nodes | `coordinator-node` | -| `data` | Configuration for data instances | See `data` section | -| `coordinators` | Configuration for coordinator instances | See `coordinators` section | -| `sysctlInitContainer.enabled` | Enable the init container to set sysctl parameters | `true` | -| `sysctlInitContainer.maxMapCount` | Value for `vm.max_map_count` to be set by the init container | `262144` | -| `secrets.enabled` | Enable the use of Kubernetes secrets for Memgraph credentials | `false` | -| `secrets.name` | The name of the Kubernetes secret containing Memgraph credentials | `memgraph-secrets` | -| `secrets.userKey` | The key in the Kubernetes secret for the Memgraph user, the value is passed to the `MEMGRAPH_USER` env. | `USER` | -| `secrets.passwordKey` | The key in the Kubernetes secret for the Memgraph password, the value is passed to the `MEMGRAPH_PASSWORD`. | `PASSWORD` | +| Parameter | Description | Default | +| ------------------------------------------- | ----------------------------------------------------------------------------------------------------------- | -------------------------- | +| `image.repository` | Memgraph Docker image repository | `memgraph/memgraph` | +| `image.tag` | Specific tag for the Memgraph Docker image. Overrides the image tag whose default is chart version. | `3.0.0` | +| `image.pullPolicy` | Image pull policy | `IfNotPresent` | +| `env.MEMGRAPH_ENTERPRISE_LICENSE` | Memgraph enterprise license | `` | +| `env.MEMGRAPH_ORGANIZATION_NAME` | Organization name | `` | +| `storage.libPVCSize` | Size of the storage PVC | `1Gi` | +| `storage.libStorageClassName` | The name of the storage class used for storing data. | `""` | +| `storage.libStorageAccessMode` | Access mode used for lib storage. | `ReadWriteOnce` | +| `storage.logPVCSize` | Size of the log PVC | `1Gi` | +| `storage.logStorageClassName` | The name of the storage class used for storing logs. | `""` | +| `storage.logStorageAccessMode` | Access mode used for log storage. | `ReadWriteOnce` | +| `externalAccess.coordinator.serviceType` | IngressNginx, NodePort, CommonLoadBalancer or LoadBalancer. | `NodePort` | +| `externalAccess.dataInstance.serviceType` | IngressNginx, NodePort or LoadBalancer. | `NodePort` | +| `ports.boltPort` | Bolt port used on coordinator and data instances. | `7687` | +| `ports.managementPort` | Management port used on coordinator and data instances. | `10000` | +| `ports.replicationPort` | Replication port used on data instances. | `20000` | +| `ports.coordinatorPort` | Coordinator port used on coordinators. | `12000` | +| `affinity.unique` | Schedule pods on different nodes in the cluster | `false` | +| `affinity.parity` | Schedule pods on the same node with maximum one coordinator and one data node | `false` | +| `affinity.nodeSelection` | Schedule pods on nodes with specific labels | `false` | +| `affinity.roleLabelKey` | Label key for node selection | `role` | +| `affinity.dataNodeLabelValue` | Label value for data nodes | `data-node` | +| `affinity.coordinatorNodeLabelValue` | Label value for coordinator nodes | `coordinator-node` | +| `container.livenessProbe.tcpSocket.port` | Port used for TCP connection. Should be the same as bolt port. | `7687` | +| `container.livenessProbe.failureThreshold` | Failure threshold for liveness probe | `20` | +| `container.livenessProbe.timeoutSeconds` | Initial delay for readiness probe | `10` | +| `container.livenessProbe.periodSeconds` | Period seconds for readiness probe | `5` | +| `container.readinessProbe.tcpSocket.port` | Port used for TCP connection. Should be the same as bolt port. | `7687` | +| `container.readinessProbe.failureThreshold` | Failure threshold for readiness probe | `20` | +| `container.readinessProbe.timeoutSeconds` | Initial delay for readiness probe | `10` | +| `container.readinessProbe.periodSeconds` | Period seconds for readiness probe | `5` | +| `container.startupProbe.tcpSocket.port` | Port used for TCP connection. Should be the same as bolt port. | `7687` | +| `container.startupProbe.failureThreshold` | Failure threshold for startup probe | `1440` | +| `container.startupProbe.periodSeconds` | Period seconds for startup probe | `10` | +| `data` | Configuration for data instances | See `data` section | +| `coordinators` | Configuration for coordinator instances | See `coordinators` section | +| `sysctlInitContainer.enabled` | Enable the init container to set sysctl parameters | `true` | +| `sysctlInitContainer.maxMapCount` | Value for `vm.max_map_count` to be set by the init container | `262144` | +| `secrets.enabled` | Enable the use of Kubernetes secrets for Memgraph credentials | `false` | +| `secrets.name` | The name of the Kubernetes secret containing Memgraph credentials | `memgraph-secrets` | +| `secrets.userKey` | The key in the Kubernetes secret for the Memgraph user, the value is passed to the `MEMGRAPH_USER` env. | `USER` | +| `secrets.passwordKey` | The key in the Kubernetes secret for the Memgraph password, the value is passed to the `MEMGRAPH_PASSWORD`. | `PASSWORD` | For the `data` and `coordinators` sections, each item in the list has the following parameters: diff --git a/pages/help-center/errors/transactions.mdx b/pages/help-center/errors/transactions.mdx index 0bd2c8580..8970fd853 100644 --- a/pages/help-center/errors/transactions.mdx +++ b/pages/help-center/errors/transactions.mdx @@ -12,6 +12,7 @@ import {CommunityLinks} from '/components/social-card/CommunityLinks' While working with Memgraph, you can encounter various transaction errors. Here are some of them, along with the instructions on how to handle them: - [Conflicting transactions](#conflicting-transaction) - [Transaction timeout](#transaction-timeout) +- [Storage access timeout](#storage-access-timeout) ## Conflicting transactions @@ -112,5 +113,31 @@ To change that, update the flag `--query-execution-timeout-sec` value to a value Here are the [instructions](/configuration/configuration-settings#using-flags-and-config-file) on how to update the configuration. +## Storage access timeout - +### Error messages + +Here are the storage access error messages you might encounter: + +1. **Cannot access storage, unique access query is running. Try again later.** +2. **Cannot get unique access to the storage. Try stopping other queries that are running in parallel.** + +### Understanding storage access timeout + +Storage access timeouts occur during query preparation when the query execution engine cannot get the required type of access to the storage. There are two types of storage access: + +- **Shared access**: Multiple queries can have shared access at the same time, but shared access cannot be granted while a query with unique access is running. +- **Unique access**: Only one query can have unique access at a time, and no other query can have any type of access during that period. + +These timeouts prevent worker starvation and database blocking that could occur if queries were to wait indefinitely for storage access. + +### Handling storage access timeout + +When you encounter a storage access timeout: + +1. Check for long-running queries that might be blocking storage access. +2. Consider breaking down complex queries that require unique access into smaller operations. +3. Retry the query after other queries have completed. +4. If possible, schedule queries requiring unique access during periods of lower database activity. + + \ No newline at end of file diff --git a/pages/querying/best-practices.mdx b/pages/querying/best-practices.mdx index ff66e2898..f49ffd29f 100644 --- a/pages/querying/best-practices.mdx +++ b/pages/querying/best-practices.mdx @@ -96,6 +96,11 @@ nonsense or sales pitch, just tech. />
+To prepare the information we need to help you debug the issue, it is best to +have `--log-level` set to `TRACE` along with the `--debug-query-plans` set to `True`. +That will provide better diagnostics to identify poor cost modelling over +generated plans and hence poor plan selection. + ## How to speed up query execution Before diving into the details of the query execution optimization, it is diff --git a/pages/querying/clauses/create.mdx b/pages/querying/clauses/create.mdx index 94d747877..0dd62b22a 100644 --- a/pages/querying/clauses/create.mdx +++ b/pages/querying/clauses/create.mdx @@ -21,9 +21,11 @@ more details. 1.1. [Creating a single node](#11-creating-a-single-node)
1.2. [Creating a node with properties](#12-creating-a-node-with-properties)
1.3. [Creating multiple nodes](#13-creating-multiple-nodes)
+ 1.4. [Creating node labels dynamically](#14-creating-node-labels-dynamically)
2. [Creating relationships](#2-creating-relationships)
2.1. [Creating a relationship between two nodes](#21-creating-a-relationship-between-two-nodes)
2.2. [Creating a relationship with properties](#22-creating-a-relationship-with-properties)
+ 2.3. [Creating relationship types dynamically](#15-creating-relationship-types-dynamically)
3. [Creating a path](#3-creating-a-path) 4. [Creating an enum](#4-creating-an-enum) @@ -116,6 +118,28 @@ Output: +------------+------------+ ``` +### 1.4. Creating node labels dynamically + +Node labels can be created dynamically from variable values. The functionality only works with CREATE. +Matching and merging of dynamic node labels is not supported since query plan and scanning indices are created upfront. + +```cypher +WITH {label_value: "Label"} as x +CREATE (n:x.label_value) RETURN n; +``` + +Output: +```nocopy ++------------+ +| n | ++------------+ +| (:Label) | ++------------+ +``` + +This functionality can especially be useful when importing data from CSV or other sources, since at that point you can inject the arbitrary labels +into the graph. + ## 2. Creating relationships ### 2.1. Creating a relationship between two nodes @@ -176,6 +200,28 @@ Output: +---------------------------------+ ``` +### 2.3. Creating relationship types dynamically + +Relationship types can be created dynamically from variable values. The functionality only works with CREATE. +Matching and merging of dynamic relationship types is not supported since query plan and scanning indices are created upfront. + +```cypher +WITH {edge_type_value: "EDGE_TYPE"} as x +CREATE ()-[r:x.edge_type_value]->() RETURN r; +``` + +Output: +```nocopy ++--------------+ +| n | ++--------------+ +| [:EDGE_TYPE] | ++--------------+ +``` + +This functionality can especially be useful when importing data from CSV or other sources, since at that point you can inject the arbitrary +edge types into the graph. + ## 3. Creating a path When creating a path all the entities of the pattern will be created. diff --git a/pages/querying/differences-in-cypher-implementations.mdx b/pages/querying/differences-in-cypher-implementations.mdx index 0a787f869..69a153dd9 100644 --- a/pages/querying/differences-in-cypher-implementations.mdx +++ b/pages/querying/differences-in-cypher-implementations.mdx @@ -255,7 +255,6 @@ RETURN val, valueType(val) = "INTEGER" **Scalar functions**: - `elementId()` - `id()` can be used instead -- `length()` - `size()` can be used instead - `nullIf()` **Aggregating functions**: diff --git a/pages/querying/expressions.mdx b/pages/querying/expressions.mdx index c749d5b5b..11d1fb45b 100644 --- a/pages/querying/expressions.mdx +++ b/pages/querying/expressions.mdx @@ -54,6 +54,9 @@ MATCH (n:$label) RETURN n; ``` +This syntax supports parameters of type `String` and `List[String]`, allowing a +node to have one or multiple labels assigned. + Using parameters as property maps is partially supported, it isn't supported in `MATCH` nor `MERGE` clause. For example, the following query is illegal: @@ -109,6 +112,26 @@ session.run('CREATE (alice:Person {name: $0, age: $1})', To use parameters with some other driver, please consult the appropriate documentation. +## Dynamic node labels creation + +Starting from version 3.1, Memgraph allows node labels to be created using +expressions of type `String` or `List[String]`. + +For example, the following query will create a node with the label `Foo`: + +``` +WITH {label: "Foo"} as var +CREATE (:var.label); +``` + +while the following query will result in the creation of node labels `Foo` and `Bar`. + +``` +WITH {labels: ["Foo", "Bar"]} as var +CREATE (:var.labels); +``` + + ## CASE Conditional expressions can be expressed in the Cypher language with the `CASE` diff --git a/pages/querying/functions.mdx b/pages/querying/functions.mdx index 159412439..1c0d71ec7 100644 --- a/pages/querying/functions.mdx +++ b/pages/querying/functions.mdx @@ -25,14 +25,14 @@ This section contains the list of supported functions. ### Temporal functions - | Name | Signature | Description | - | --------------- | -------------------------------------------------------------------------- | ------------------------------------------------------------------------- | - | `duration` | `duration(value: string\|Duration) -> (Duration)` | Returns the data type that represents a period of time. | - | `date` | `date(value: string\|Date\|LocalDateTime) -> (Date)` | Returns the data type that represents a date with year, month, and day. | - | `localTime` | `localTime(value: string\|LocalTime\|LocalDateTime) -> (LocalTime)` | Returns the data type that represents time within a day without timezone. | - | `localDateTime` | `localDateTime(value: string\|LocalDateTime)-> (LocalDateTime)` | Returns the data type that represents a date and local time. | - | `datetime` | `datetime(value: NULL\|string\|map)-> (ZonedDateTime)` | Returns the `ZonedDateTime` value defined by the given parameters. | - + | Name | Signature | Description | + | --------------- | ----------------------------------------------------------------------------------------- | ------------------------------------------------------------------------- | + | `duration` | `duration(value: string\|Duration) -> (Duration)` | Returns the data type that represents a period of time. | + | `date` | `date(value: string\|Date\|LocalDateTime\|ZonedDateTime) -> (Date)` | Returns the data type that represents a date with year, month, and day. | + | `localTime` | `localTime(value: string\|LocalTime\|LocalDateTime\|ZonedDateTime) -> (LocalTime)` | Returns the data type that represents time within a day without timezone. | + | `localDateTime` | `localDateTime(value: string\|LocalDateTime\|ZonedDateTime)-> (LocalDateTime)` | Returns the data type that represents a date and local time. | + | `datetime` | `datetime(value: NULL\|string\|map\|ZonedDateTime)-> (ZonedDateTime)` | Returns the `ZonedDateTime` value defined by the given parameters. | + ### Scalar functions | Name | Signature | Description | @@ -47,10 +47,11 @@ This section contains the list of supported functions. | `head` | `head(list: List[any]) -> (any)` | Returns the first element of a list. | | `id` | `id(value: Node\|Relationship) -> (integer)` | Returns identifier for a given node or relationship. The identifier is generated during the initialization of a node or a relationship and will be persisted through the durability mechanism. | | `last` | `last(list: List[any]) -> (any)` | Returns the last element of a list. | + | `length` | `length(value: List\|string\|Map\|Path) -> (integer)` | Returns the number of elements in the value. When given a **list** it returns the size of the list. When given a string it returns the number of characters. When given a path it returns the number of expansions (relationships) in that path. | | `properties` | `properties(value: Node\|Relationship) -> (Map[string, any])` | Returns the property map of a node or a relationship. | | `propertySize` | `propertySize(entity: Node\|Relationship, property-name: string) -> (integer)` | Returns the total amount of bytes stored in RAM for the property of a given entity node or relationship. For more information, check [storage of properties inside Memgraph](/fundamentals/storage-memory-usage#calculate-storage-memory-usage). | | `randomUUID` | `randomUUID() -> (string)` | Returns randomly-generated Universally Unique Identifier (UUID) | - | `size` | `size(value: List[any]\|string\|Map[string, any]\|Path) -> (integer)` | Returns the number of elements in the value. When given a **list** it returns the size of the list. When given a string it returns the number of characters. When given a path it returns the number of expansions (relationships) in that path. | + | `size` | `size(value: List\|string\|Map\|Path) -> (integer)` | Returns the number of elements in the value. When given a **list** it returns the size of the list. When given a string it returns the number of characters. When given a path it returns the number of expansions (relationships) in that path. | | `startNode` | `startNode(relationship: Relationship) -> (Node)` | Returns the starting node of a relationship. | | `toBoolean` | `toBoolean(value: boolean\|integer\|string) -> (boolean)` | Converts the input argument to a boolean value, regardless of case sensitivity. The values `true` and `false` are directly converted to `true` or `false`, respectively. Additionally, the strings "true" and "t" are mapped to `true`, while the strings "false" and "f" are mapped to `false`. | | `toFloat` | `toFloat(value: number\|string) -> (float)` | Converts the argument to a floating point number. | @@ -81,6 +82,7 @@ This section contains the list of supported functions. | `relationships` | `relationships(path: Path) -> (List[Relationship])` | Returns a list of relationships (edges) from a path. | | `single` | `single(variable IN list WHERE predicate)` | Check if only one element of a list satisfies a predicate. | | `tail` | `tail(list: List[any]) -> (List[any])` | Returns all elements after the first of a given list. | + | `toSet` | `toSet(list: List[any]) -> (List[any])` | Returns the list of distinct elements from the initial list. | | `uniformSample` | `uniformSample(list: List[any], size: integer) -> (List[any])` | Returns elements of a given list randomly oversampled or undersampled to desired size | ### Maps @@ -146,13 +148,12 @@ This section contains the list of supported functions. All aggregation functions can be used with the `DISTINCT` operator to perform calculations only on unique values. For example, `count(DISTINCT n.prop)` and `collect(DISTINCT n.prop)`. - ### Graph projection functions - - | Name | Signature | Description | - | --------- | ------------------------------------------------------------------ | ----------------------------------------------------------------------------------------------------- | - | `project` | `project(row: path) -> map("nodes":list[Node], "edges":list[Edge])`| Creates a projected graph consisting of nodes and relationships from aggregated paths. | - +### Graph projection functions + | Name | Signature | Description | + | --------- | -------------------------------------------------------------------------------------------- | ------------------------------------------------------------------------------------------------------------------------------------------------------------- | + | `project` | `project(row: path) -> map("nodes":list[Node], "edges":list[Edge])` | Creates a projected graph consisting of nodes and relationships from aggregated paths. | + | `project` | `project(nodes: List[Node], edges:List[Edge]) -> map("nodes":list[Node], "edges":list[Edge])`| Creates a projected graph consisting of nodes and relationships from a list of nodes and a list of relationships, ignoring duplicate nodes and relationships. | ### String functions diff --git a/pages/querying/vector-search.mdx b/pages/querying/vector-search.mdx index eb3331abf..68139f14a 100644 --- a/pages/querying/vector-search.mdx +++ b/pages/querying/vector-search.mdx @@ -70,6 +70,7 @@ Unlike other index types, the query planner currently does not utilize vector in ### Show vector indices To retrieve information about vector indices, use `vector_search.show_index_info()` procedure. +Additionally, the same information can be retrieved with the `SHOW VECTOR INDEX INFO` query. {

Output:

} @@ -78,14 +79,21 @@ To retrieve information about vector indices, use `vector_search.show_index_info - `property: string` ➡ The name of the property on which vector index is indexed. - `dimension: int` ➡ The dimension of vectors in the index. - `capacity: int` ➡ The capacity of the vector index. +- `metric: string` ➡ Metric used for vector search similarity. - `size: int` ➡ The number of entries in the vector index. {

Usage:

} -```shell +```cypher CALL vector_search.show_index_info() YIELD * RETURN *; ``` +or + +```cypher +SHOW VECTOR INDEX INFO; +``` + ### Query vector index Use the `vector_search.search()` procedure to search for similar vectors within @@ -96,7 +104,7 @@ vector based on the selected similarity metric. - `index_name: string` ➡ The vector index to search. - `limit: int` ➡ The number of nearest neighbors to return. -- `search_query: List[float]` ➡ The vector to query in the index. +- `search_query: List[float|int]` ➡ The vector to query in the index. Providing a different type will result in an exception. {

Output:

} @@ -179,6 +187,12 @@ Then, run the following query to inspect vector index: CALL vector_search.show_index_info() YIELD * RETURN *; ``` +We can get the same information with the following command: + +```cypher +SHOW VECTOR INDEX INFO; +``` + The above query will result with: ``` diff --git a/pages/release-notes.mdx b/pages/release-notes.mdx index 4b57b4905..4d89dfa29 100644 --- a/pages/release-notes.mdx +++ b/pages/release-notes.mdx @@ -57,6 +57,262 @@ updated. ## 🚀 Latest release +### Memgraph v3.1.0 - Mar 12th, 2025 + +{

⚠️ Breaking changes

} + +- Improved error messages might result in a `breaking` outcome under your + applications if the application assumes the exact content of error messages. + [#2379](https://github.com/memgraph/memgraph/pull/2379) +- `include/mg_exceptions.hpp` had ABI break which requires C++ modules to be + rebuilt. [#2702](https://github.com/memgraph/memgraph/pull/2702) + +{

✨ New features

} + +- Added toSet() function for removing duplicates from a list. The ordering of + the list items is not preserved. This extension enables users to have more + manipulation over List item objects in the query planner. Users can call the + toSet(list) if they need to remove any duplicate values from the list. + [#2659](https://github.com/memgraph/memgraph/pull/2659) +- Add custom authentication scheme for using OIDC Single Sign-On. Any OIDC + scheme that exposes a public key for validating tokens using RSA is supported + using Neo4j drivers. [#2715](https://github.com/memgraph/memgraph/pull/2715) +- Enterprise Feature: Impersonate User. Allows the user to impersonate a + different user while executing queries. Users or roles with the correct + permissions can define the target user for a session and execute queries as + if the target user was logged in. + [#2742](https://github.com/memgraph/memgraph/pull/2742) +- `project()` can now project a subgraph from lists of nodes and relationships, + rather than just from paths. This relaxes the required format of query + results needed to create subgraphs for further processing, or to pass to + query modules. Users can build a subgraph from nodes and relationships which + are computed separately, or come from different parts of your query, making + queries cleaner, more performant, and easier to maintain. + [#2611](https://github.com/memgraph/memgraph/pull/2611) +- Added mgp_list_reserve (C API) and List::Reserve (C++ API). Allows for more + memory efficient list population under query modules. + [#2650](https://github.com/memgraph/memgraph/pull/2650) +- Added query `SHOW VECTOR INDEX INFO` to display information about vector + indices in a Cypher-like way. Although there is also a `CALL + vector_search.show_index_info() YIELD * RETURN *`, the query was added to + increase simplicity. The information can not be added to the `SHOW INDEX + INFO` query because vector indices have additional information like + dimension, size, capacity, and metric, so a different organization of records + was needed. [#2757](https://github.com/memgraph/memgraph/pull/2757) +- Added functionality to assign multiple dynamic labels for a node from a + variable or parameter. Users can now put dynamic label parameters in maps and + assign labels like in the example `WITH {my_labels: ["Label1", "Label2"]} as + x CREATE (n:x.my_labels). The result of the query is a node with 2 labels + assigned from the list.` + [#2713](https://github.com/memgraph/memgraph/pull/2713) +- Added support for dynamic relationship creation. Users can now create + relationship types based on variable values. Example: `WITH {my_edge_type: + "KNOWS"} as x CREATE ()-[x.my_edge_type]->();` The result of this query would + be a created triplet with the relationship type `KNOWS`. + [#2558](https://github.com/memgraph/memgraph/pull/2558) +- Added the `length` function, which outputs the number of relationships in a + given path object in the query or the number of elements in a list. The + `length` function is identical to the already existing `size` function + [#2681](https://github.com/memgraph/memgraph/pull/2681) + +{

🛠️ Improvements

} + +- Optimise `EdgeAccessor::IsVisible` such that connectivity check is only done + when --storage-properties-on-edges=false. This gives better performance over + queries involving edges. + [#2769](https://github.com/memgraph/memgraph/pull/2769) +- Added fast retrieval of approximate vertex and edge counts in the C API with + `mgp_graph_approximate_vertex_count` and `mgp_graph_approximate_edge_count`. + Users can now use these functions to optimize certain data structures more + efficiently. For example, they can use this information to preallocate memory + for `std::vector`. [#2762](https://github.com/memgraph/memgraph/pull/2762) +- The timeout is added to the SnapshotRpc message. A replica will now update + the main on its progress when applying the snapshot received as part of the + recovery process. [#2701](https://github.com/memgraph/memgraph/pull/2701) +- The timeout is added to the following RPC messages: AppendDeltasRpc, + WalFilesRpc and CurrentWalRpc. The replica will now, for every 100k processed + deltas, send the RPC message saying that the progress is made. Main will wait + for at most 30s for the replica reply, after which it will drop the + connection. [#2690](https://github.com/memgraph/memgraph/pull/2690) +- SHOW REPLICAS query uses a best estimation of the REPLICA's state instead of + querying the REPLICA for its current state. The SHOW REPLICAS query now + returns immediately, as it does not need to wait for the REPLICA response. + This means the query is lightweight, and users can query it more frequently. + [#2679](https://github.com/memgraph/memgraph/pull/2679) +- Improved error messages that get sent to the user in cases of query failure. + Better understanding of the issue that caused a query to fail and easier + debugging. Better error messages also improve final results when in an AI + pipeline. [#2379](https://github.com/memgraph/memgraph/pull/2379) +- SHOW/USE DATABASE reclassified as a non-system query. Multiple such queries + can be executed in parallel. Since the queries do not require unique access + anymore, users can execute them much more frequently and without impacting + performance. [#2674](https://github.com/memgraph/memgraph/pull/2674) +- Add diagnostic flag `--debug-query-plans` to help see all generated plans and + their costs. Fixed filter's cost estimate and other small cost estimator + changes. Performance improvement for index iteration. Together, they help + provide better plans/diagnostics around plans/performance on using indexes. + [#2722](https://github.com/memgraph/memgraph/pull/2722) +- Performance improvement around calling of query modules, less work is + performed setting up and moving around values. + [#2640](https://github.com/memgraph/memgraph/pull/2640) +- Performance fix. Reduced contention by ensuring batch payloads were send to + sockets. Performance is now better when returning many small rows. + [#2667](https://github.com/memgraph/memgraph/pull/2667) +- SystemRecoveryRPC message will now be configured with a 5min timeout. + Recovering 1000 tenants on replica takes approximately 25s so the 5min + timeout should cover all cases successfully. Users shouldn't observe a + different behavior compared to the before. + [#2654](https://github.com/memgraph/memgraph/pull/2654) + +{

🐞 Bug fixes

} + +- When a failover is done on the replica which doesn't contain all commits as + the old main, ForceResetRpc won't be sent anymore to the old main (now + replica). Instead, the new main will handle the old main (now replica)'s + history through recovery. The old main's durability files will be saved + inside the .old directory (data_directory/snapshots and data_directory/wals) + so that admins can always recover the newer state if needed. + [#2727](https://github.com/memgraph/memgraph/pull/2727) +- When started in a single-tenant mode, hidden files won't be symlinked in a + default database directory anymore. This should enable people to use Memgraph + in K8s with various storage systems under the hood. + [#2748](https://github.com/memgraph/memgraph/pull/2748) +- Vector index will now correctly index only vertices with the desired label + for indexing in the dataset where there exist vertices with different labels + but with the shared property used for indexing. + [#2701](https://github.com/memgraph/memgraph/pull/2701) +- Coordinators will not deadlock anymore when the user requests unregistration + of data instances. At the moment of unregistration, the cluster now needs to + have alive main instance so that the success of the requested unregistration + operation can be guaranteed. + [#2735](https://github.com/memgraph/memgraph/pull/2735) +- Fixed a segmentation fault that could occur in certain scenarios when parsing + the vector index configuration map during vector index creation. + [#2720](https://github.com/memgraph/memgraph/pull/2720) +- Misaligned snapshot and wal timestamps cause data loss. Replication via + snapshot and wals could lead to data loss due to misaligned timestamps. + [#2697](https://github.com/memgraph/memgraph/pull/2697) +- Queries that access storage can now timeout if storage access cannot be + granted. Fixes potential deadlock under heavy load. Users could see a new + error, the access timeout. In such cases, they can retry later on or make + sure there are no conflicting queries. + [#2561](https://github.com/memgraph/memgraph/pull/2651) +- The action for unregistering the replication instance wasn't durable before, + so after restarting the coordinator instance, the replication instance would + be observed again. [#2673](https://github.com/memgraph/memgraph/pull/2673) +- Parallel recovery didn't work with WAL files. Index and constraints recovery + would use data batches calculated from the snapshot data. This would lead to + a failure if the snapshot is not present or partial recovery if the snapshot + and WALs were present. Users can now recover all data correctly and in + parallel. [#2658](https://github.com/memgraph/memgraph/pull/2658) +- If finding steps needed to recover a replica fail, the database won't be + crashed. The replica will remain in MAYBE_BEHIND state and get recovered + later. [#2639](https://github.com/memgraph/memgraph/pull/2639) +- Resolved issues with reloading `numpy` and `dgl` modules, which could cause + unexpected behavior when loaded multiple times. This fix ensures + compatibility with the latest versions of these packages. + [#2638](https://github.com/memgraph/memgraph/pull/2638) +- Query planner fixed to handle matching multiple paths whose symbols were + already matched. This avoids a crash on queries of the type `MATCH (n0:N) + OPTIONAL MATCH p0 = (n0), p1 = (n0) RETURN *;`, i.e., those with multiple + paths using the same already matched variable, such as `n0` in the given + example. Such queries can now be run safely. + [#2692](https://github.com/memgraph/memgraph/pull/2692) +- Query modules can now handle returning error message even when we have hit + memory limits. We no longer terminate when trying to copying a small string + during mgp_func_result_set_error_msg. This means we are now more robust which + using `--memory-limit`. + [#2702](https://github.com/memgraph/memgraph/pull/2702) +- Bugfix for subtle data race around concurrent find+remove, there was a rare + possibility of data integrity failure that would allow multiple unique + entries. [#2686](https://github.com/memgraph/memgraph/pull/2686) +- Fix memory leak in C++ API and in `convert.str2object`. + [#2643](https://github.com/memgraph/memgraph/pull/2643) +- The action for unregistering the replication instance wasn't durable, so + after restarting the coordinator instance, the replication instance would be + observed again. [#2673](https://github.com/memgraph/memgraph/pull/2673) +- Added yielding to reader-writer locks on ARM64. This improves efficiency and + power consumption on contented locks on machines running on the ARM64 + chipset. Users can make better use of processing resources with highly + contented databases running on such machines. + [#2666](https://github.com/memgraph/memgraph/pull/2666) +- Fix occasional system flakiness caused by overly optimistic memory ordering + in `RWSpinLock`. On machines with a weak memory model (such as ARM64), this + may occasionally have caused reads of stale data between threads. Users can + now expect the same correct behavior consistently on all architectures. + [#2668](https://github.com/memgraph/memgraph/pull/2668) +- Temporal functions `date()`, `localtime()`, `localdatetime()` and + `datetime()` now accept and convert any compatible time formats. Users can + create queries that compare different temporal types and convert between + temporal formats. [#2630](https://github.com/memgraph/memgraph/pull/2630) +- ForceResetStorageRPC message is now configured with the timeout of 60''. + SystemHeartbeatRPC message is now configured with the timeout of 10''. If a + replica cannot read epoch id, the database won't crash anymore. Users + shouldn't see any different behavior compared to the before. + [#2648](https://github.com/memgraph/memgraph/pull/2648) +- Rewritten query planning when using Edge indexing inside Optional. Query plan + will prefer to expand from existing nodes rather than use a new edge index to + scan over new relationships + [#2741](https://github.com/memgraph/memgraph/pull/2741) +- Type checking added for vector search module. There was a bug where a user + could provide any value, which would be translated into a list of zero + values. Now, the supported type is a list of integer and double values. Not + providing this value will result in an exception + [#2759](https://github.com/memgraph/memgraph/pull/2759) + +### MAGE v3.1.0 - Mar 12th, 2025 + +{

✨ New features

} + +- Added migration from Amazon S3 CSV files. Users can now perform `CALL + migrate.s3() YIELD row` syntax in order to populate their data directly from + S3 [#547](https://github.com/memgraph/mage/pull/547) + +{

🛠️ Improvements

} + +- The base Docker image has been switched from Debian 11 to Ubuntu 24.04 to + ensure compatibility with toolchain-v6 and long-term security support. + [535](https://github.com/memgraph/mage/pull/535) +- Improve static PageRank algorithm. It is now roughly 3 times faster, and it + uses 10%-15% less memory. Additionally, it now contains an argument to set + the number of threads used. [#552](https://github.com/memgraph/mage/pull/552) +- Improved Louvain community detection performance and reduced memory usage. A + configuration option was added to control the number of threads used in the + algorithm's parallelized sections. Introduced support for aborting the + algorithm when Memgraph terminates the query using the module. + [#549](https://github.com/memgraph/mage/pull/549) + +### Lab v3.1.0 - Mar 12th, 2025 + +{

✨ New features

} + +- The query summary now shows the number of + hops in your Cypher query. +- You can delete all or specific recent + connections from the login page. + +{

🛠️ Improvements

} + +- Username and password authentication is + always visible when connecting to a Memgraph + HA cluster. +- Screenshot size is reduced when saving + Graph Style Script (GSS) to remote storage, + preventing request and storage limit issues. + +{

🐞 Bug fixes

} + +- Vector indexes in `cypherl` import files and + multiple queries are now handled correctly in + separate transactions. +- Fixed an issue where Lab couldn't find query + module names with mixed case. +- Fixed an issue where connecting to a custom + database in an HA cluster resulted in an "Unknown + database name" error. + +## Previous releases + ### Memgraph v3.0.0 - Jan 29th, 2025 {

⚠️ Breaking changes

} @@ -264,8 +520,6 @@ updated. - Screenshots are now generated as expected when saving graph style scripts from query execution. -## Previous releases - ### Memgraph v2.22.1 - Dec 22, 2024 {

🛠️ Improvements

}