From 7c0a644fb2d72d64d3a6157dead043c43527b2f7 Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?Micha=C5=82=20Jasikowski?= Date: Wed, 27 Nov 2024 13:54:56 +0100 Subject: [PATCH 001/127] Changed log format for some SQLiteNode warnings --- sqlitecluster/SQLiteNode.cpp | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) diff --git a/sqlitecluster/SQLiteNode.cpp b/sqlitecluster/SQLiteNode.cpp index 1f2359478..1e91d4cf9 100644 --- a/sqlitecluster/SQLiteNode.cpp +++ b/sqlitecluster/SQLiteNode.cpp @@ -1496,7 +1496,7 @@ void SQLiteNode::_onMESSAGE(SQLitePeer* peer, const SData& message) { peer->sendMessage(response); } catch (const SException& e) { // This is the same handling as at the bottom of _onMESSAGE. - PWARN("Error processing message '" << message.methodLine << "' (" << e.what() << "), reconnecting."); + SWARN("Error processing message, reconnecting", {{"peer", peer->name}, {"message", message.methodLine}, {"reason", e.what()}}); SData reconnect("RECONNECT"); reconnect["Reason"] = e.what(); peer->sendMessage(reconnect.serialize()); @@ -1713,7 +1713,7 @@ void SQLiteNode::_onMESSAGE(SQLitePeer* peer, const SData& message) { STHROW("unrecognized message"); } } catch (const SException& e) { - PWARN("Error processing message '" << message.methodLine << "' (" << e.what() << "), reconnecting."); + SWARN("Error processing message, reconnecting", {{"peer", peer->name}, {"message", message.methodLine}, {"reason", e.what()}}); SData reconnect("RECONNECT"); reconnect["Reason"] = e.what(); peer->sendMessage(reconnect.serialize()); From d9a333481c3b62b4fc2757e008e14ee35618a759 Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?Micha=C5=82=20Jasikowski?= Date: Thu, 28 Nov 2024 12:03:59 +0100 Subject: [PATCH 002/127] Fixed log format for some SQLiteNode warnings --- sqlitecluster/SQLiteNode.cpp | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) diff --git a/sqlitecluster/SQLiteNode.cpp b/sqlitecluster/SQLiteNode.cpp index 1e91d4cf9..94a50a9d9 100644 --- a/sqlitecluster/SQLiteNode.cpp +++ b/sqlitecluster/SQLiteNode.cpp @@ -1496,7 +1496,7 @@ void SQLiteNode::_onMESSAGE(SQLitePeer* peer, const SData& message) { peer->sendMessage(response); } catch (const SException& e) { // This is the same handling as at the bottom of _onMESSAGE. - SWARN("Error processing message, reconnecting", {{"peer", peer->name}, {"message", message.methodLine}, {"reason", e.what()}}); + SWARN("Error processing message, reconnecting", {{"peer", peer->name}, {"message", message.methodLine}, {"reason", string(e.what())}}); SData reconnect("RECONNECT"); reconnect["Reason"] = e.what(); peer->sendMessage(reconnect.serialize()); @@ -1713,7 +1713,7 @@ void SQLiteNode::_onMESSAGE(SQLitePeer* peer, const SData& message) { STHROW("unrecognized message"); } } catch (const SException& e) { - SWARN("Error processing message, reconnecting", {{"peer", peer->name}, {"message", message.methodLine}, {"reason", e.what()}}); + SWARN("Error processing message, reconnecting", {{"peer", peer->name}, {"message", message.methodLine}, {"reason", string(e.what())}}); SData reconnect("RECONNECT"); reconnect["Reason"] = e.what(); peer->sendMessage(reconnect.serialize()); From 1fc0f2f3182ab1e82c478960591ca9fe7dba5a5d Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?Micha=C5=82=20Jasikowski?= Date: Thu, 28 Nov 2024 12:14:48 +0100 Subject: [PATCH 003/127] Reverting changes for tests --- sqlitecluster/SQLiteNode.cpp | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) diff --git a/sqlitecluster/SQLiteNode.cpp b/sqlitecluster/SQLiteNode.cpp index 94a50a9d9..4b170025b 100644 --- a/sqlitecluster/SQLiteNode.cpp +++ b/sqlitecluster/SQLiteNode.cpp @@ -1496,7 +1496,7 @@ void SQLiteNode::_onMESSAGE(SQLitePeer* peer, const SData& message) { peer->sendMessage(response); } catch (const SException& e) { // This is the same handling as at the bottom of _onMESSAGE. - SWARN("Error processing message, reconnecting", {{"peer", peer->name}, {"message", message.methodLine}, {"reason", string(e.what())}}); + // SWARN("Error processing message, reconnecting", {{"peer", peer->name}, {"message", message.methodLine}, {"reason", string(e.what())}}); SData reconnect("RECONNECT"); reconnect["Reason"] = e.what(); peer->sendMessage(reconnect.serialize()); @@ -1713,7 +1713,7 @@ void SQLiteNode::_onMESSAGE(SQLitePeer* peer, const SData& message) { STHROW("unrecognized message"); } } catch (const SException& e) { - SWARN("Error processing message, reconnecting", {{"peer", peer->name}, {"message", message.methodLine}, {"reason", string(e.what())}}); + //SWARN("Error processing message, reconnecting", {{"peer", peer->name}, {"message", message.methodLine}, {"reason", string(e.what())}}); SData reconnect("RECONNECT"); reconnect["Reason"] = e.what(); peer->sendMessage(reconnect.serialize()); From cae4fb990fa729652bd29d8ab13528187af5e4e6 Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?Micha=C5=82=20Jasikowski?= Date: Thu, 28 Nov 2024 12:52:58 +0100 Subject: [PATCH 004/127] Fixed log format for some SQLiteNode warnings --- sqlitecluster/SQLiteNode.cpp | 12 ++++++++++-- 1 file changed, 10 insertions(+), 2 deletions(-) diff --git a/sqlitecluster/SQLiteNode.cpp b/sqlitecluster/SQLiteNode.cpp index 4b170025b..928f8fd37 100644 --- a/sqlitecluster/SQLiteNode.cpp +++ b/sqlitecluster/SQLiteNode.cpp @@ -1496,7 +1496,11 @@ void SQLiteNode::_onMESSAGE(SQLitePeer* peer, const SData& message) { peer->sendMessage(response); } catch (const SException& e) { // This is the same handling as at the bottom of _onMESSAGE. - // SWARN("Error processing message, reconnecting", {{"peer", peer->name}, {"message", message.methodLine}, {"reason", string(e.what())}}); + SWARN("Error processing message, reconnecting", { + {"peer", peer ? peer->name : "unknown"}, + {"message", !message.methodLine.empty() ? message.methodLine : ""}, + {"reason", e.what()} + }); SData reconnect("RECONNECT"); reconnect["Reason"] = e.what(); peer->sendMessage(reconnect.serialize()); @@ -1713,7 +1717,11 @@ void SQLiteNode::_onMESSAGE(SQLitePeer* peer, const SData& message) { STHROW("unrecognized message"); } } catch (const SException& e) { - //SWARN("Error processing message, reconnecting", {{"peer", peer->name}, {"message", message.methodLine}, {"reason", string(e.what())}}); + SWARN("Error processing message, reconnecting", { + {"peer", peer ? peer->name : "unknown"}, + {"message", !message.methodLine.empty() ? message.methodLine : ""}, + {"reason", e.what()} + }); SData reconnect("RECONNECT"); reconnect["Reason"] = e.what(); peer->sendMessage(reconnect.serialize()); From 30dde06fa45f25f8b570a73564c194592e97687b Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?Micha=C5=82=20Jasikowski?= Date: Thu, 28 Nov 2024 13:31:56 +0100 Subject: [PATCH 005/127] Fixed log format for some SQLiteNode warnings --- sqlitecluster/SQLiteNode.cpp | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) diff --git a/sqlitecluster/SQLiteNode.cpp b/sqlitecluster/SQLiteNode.cpp index 0b4b69184..5e5984670 100644 --- a/sqlitecluster/SQLiteNode.cpp +++ b/sqlitecluster/SQLiteNode.cpp @@ -1523,7 +1523,7 @@ void SQLiteNode::_onMESSAGE(SQLitePeer* peer, const SData& message) { // This is the same handling as at the bottom of _onMESSAGE. SWARN("Error processing message, reconnecting", { {"peer", peer ? peer->name : "unknown"}, - {"message", !message.methodLine.empty() ? message.methodLine : ""}, + {"message", !message.empty()? message.methodLine : ""}, {"reason", e.what()} }); SData reconnect("RECONNECT"); @@ -1755,7 +1755,7 @@ void SQLiteNode::_onMESSAGE(SQLitePeer* peer, const SData& message) { } catch (const SException& e) { SWARN("Error processing message, reconnecting", { {"peer", peer ? peer->name : "unknown"}, - {"message", !message.methodLine.empty() ? message.methodLine : ""}, + {"message", !message.empty() ? message.methodLine : ""}, {"reason", e.what()} }); SData reconnect("RECONNECT"); From c1e6ed69eff3e94408819ce4a37dd69dc34b71a1 Mon Sep 17 00:00:00 2001 From: Monil Bhavsar Date: Mon, 2 Dec 2024 18:03:40 +0530 Subject: [PATCH 006/127] Overload functions to result for writes --- sqlitecluster/SQLite.cpp | 17 ++++++++++++----- sqlitecluster/SQLite.h | 4 +++- 2 files changed, 15 insertions(+), 6 deletions(-) diff --git a/sqlitecluster/SQLite.cpp b/sqlitecluster/SQLite.cpp index 5ea59ced0..76c70ff00 100644 --- a/sqlitecluster/SQLite.cpp +++ b/sqlitecluster/SQLite.cpp @@ -566,18 +566,25 @@ bool SQLite::write(const string& query) { } // This is literally identical to the idempotent version except for the check for _noopUpdateMode. - return _writeIdempotent(query); + SQResult ignore; + return _writeIdempotent(query, ignore); } bool SQLite::writeIdempotent(const string& query) { - return _writeIdempotent(query); + SQResult ignore; + return _writeIdempotent(query, ignore); +} + +bool SQLite::writeIdempotent(const string& query, SQResult& result) { + return _writeIdempotent(query, result); } bool SQLite::writeUnmodified(const string& query) { - return _writeIdempotent(query, true); + SQResult ignore; + return _writeIdempotent(query, ignore, true); } -bool SQLite::_writeIdempotent(const string& query, bool alwaysKeepQueries) { +bool SQLite::_writeIdempotent(const string& query, SQResult& result, bool alwaysKeepQueries) { SASSERT(_insideTransaction); _queryCache.clear(); _queryCount++; @@ -600,7 +607,7 @@ bool SQLite::_writeIdempotent(const string& query, bool alwaysKeepQueries) { { shared_lock lock(_sharedData.writeLock); if (_enableRewrite) { - resultCode = SQuery(_db, "read/write transaction", query, 2'000'000, true); + resultCode = SQuery(_db, "read/write transaction", query, result, 2'000'000, true); if (resultCode == SQLITE_AUTH) { // Run re-written query. _currentlyRunningRewritten = true; diff --git a/sqlitecluster/SQLite.h b/sqlitecluster/SQLite.h index 842b51430..d7392670b 100644 --- a/sqlitecluster/SQLite.h +++ b/sqlitecluster/SQLite.h @@ -1,5 +1,6 @@ #pragma once #include +#include #include class SQLite { @@ -108,6 +109,7 @@ class SQLite { // It's intended to be used for `mockRequest` enabled commands, such that we only run a version of them that's // known to be repeatable. What counts as repeatable is up to the individual command. bool writeIdempotent(const string& query); + bool writeIdempotent(const string& query, SQResult& result); // This runs a query completely unchanged, always adding it to the uncommitted query, such that it will be recorded // in the journal even if it had no effect on the database. This lets replicated or synchronized queries be added @@ -400,7 +402,7 @@ class SQLite { static thread_local int64_t _conflictPage; static thread_local string _conflictTable; - bool _writeIdempotent(const string& query, bool alwaysKeepQueries = false); + bool _writeIdempotent(const string& query, SQResult& result, bool alwaysKeepQueries = false); // Constructs a UNION query from a list of 'query parts' over each of our journal tables. // Fore each table, queryParts will be joined with that table's name as a separator. I.e., if you have a tables From f0163c1235f3c732f3ac963d40d8b731a78bc80c Mon Sep 17 00:00:00 2001 From: Monil Bhavsar Date: Mon, 2 Dec 2024 18:04:15 +0530 Subject: [PATCH 007/127] Pass missing param for non rewrite mode --- sqlitecluster/SQLite.cpp | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/sqlitecluster/SQLite.cpp b/sqlitecluster/SQLite.cpp index 76c70ff00..da14f4590 100644 --- a/sqlitecluster/SQLite.cpp +++ b/sqlitecluster/SQLite.cpp @@ -617,7 +617,7 @@ bool SQLite::_writeIdempotent(const string& query, SQResult& result, bool always _currentlyRunningRewritten = false; } } else { - resultCode = SQuery(_db, "read/write transaction", query); + resultCode = SQuery(_db, "read/write transaction", query, result); } } From 15701ec984f3ae9c43c3f88753268df4b27a800c Mon Sep 17 00:00:00 2001 From: Monil Bhavsar Date: Mon, 2 Dec 2024 18:09:35 +0530 Subject: [PATCH 008/127] Add comment --- sqlitecluster/SQLite.h | 3 +++ 1 file changed, 3 insertions(+) diff --git a/sqlitecluster/SQLite.h b/sqlitecluster/SQLite.h index d7392670b..e8c68e3e9 100644 --- a/sqlitecluster/SQLite.h +++ b/sqlitecluster/SQLite.h @@ -109,6 +109,9 @@ class SQLite { // It's intended to be used for `mockRequest` enabled commands, such that we only run a version of them that's // known to be repeatable. What counts as repeatable is up to the individual command. bool writeIdempotent(const string& query); + + // Executes a write query and retrieves the result. + // Designed for use with queries that include a RETURNING clause bool writeIdempotent(const string& query, SQResult& result); // This runs a query completely unchanged, always adding it to the uncommitted query, such that it will be recorded From f32265ffdabff7a7a83d5380a39858d73c0d6b4d Mon Sep 17 00:00:00 2001 From: Tyler Karaszewski Date: Mon, 2 Dec 2024 15:45:16 -0800 Subject: [PATCH 009/127] Improve shutdown of test --- test/clustertest/tests/ForkedNodeApprovalTest.cpp | 5 +++++ 1 file changed, 5 insertions(+) diff --git a/test/clustertest/tests/ForkedNodeApprovalTest.cpp b/test/clustertest/tests/ForkedNodeApprovalTest.cpp index 1b3f982c4..197b883cc 100644 --- a/test/clustertest/tests/ForkedNodeApprovalTest.cpp +++ b/test/clustertest/tests/ForkedNodeApprovalTest.cpp @@ -152,5 +152,10 @@ struct ForkedNodeApprovalTest : tpunit::TestFixture { // Ok, now we can start the second follower back up and secondary leader should be able to lead. tester.getTester(2).startServer(false); ASSERT_TRUE(tester.getTester(1).waitForState("LEADING")); + + // We call stopServer on the forked leader because it crashed, but the cluster tester doesn't realize, so shutting down + // normally will time out after a minute. Calling `stopServer` explicitly will clear the server PID, and we won't need + // to wait for this timeout. + tester.getTester(0).stopServer(); } } __ForkedNodeApprovalTest; From b496479b05a1459b4d220978b5d1cc81e2e26e42 Mon Sep 17 00:00:00 2001 From: Monil Bhavsar Date: Tue, 3 Dec 2024 13:23:56 +0530 Subject: [PATCH 010/127] Add automated test --- test/tests/LibStuffTest.cpp | 32 ++++++++++++++++++++++++++++++++ 1 file changed, 32 insertions(+) diff --git a/test/tests/LibStuffTest.cpp b/test/tests/LibStuffTest.cpp index 80a6c92f8..cfd5d5193 100644 --- a/test/tests/LibStuffTest.cpp +++ b/test/tests/LibStuffTest.cpp @@ -35,6 +35,7 @@ struct LibStuff : tpunit::TestFixture { TEST(LibStuff::SREMatchTest), TEST(LibStuff::SREReplaceTest), TEST(LibStuff::SQResultTest), + TEST(LibStuff::testReturningClause), TEST(LibStuff::SRedactSensitiveValuesTest) ) { } @@ -746,6 +747,37 @@ struct LibStuff : tpunit::TestFixture { ASSERT_EQUAL(result[0]["coco"], "name1"); } + void testReturningClause() { + // Given a sqlite DB with a table and pre inserted values + SQLite db(":memory:", 1000, 1000, 1); + db.beginTransaction(SQLite::TRANSACTION_TYPE::EXCLUSIVE); + db.write("CREATE TABLE testReturning(id INTEGER PRIMARY KEY, name STRING, value STRING, created DATE);"); + db.write("INSERT INTO testReturning VALUES(11, 'name1', 'value1', '2024-12-02');"); + db.write("INSERT INTO testReturning VALUES(21, 'name2', 'value2', '2024-12-03');"); + db.prepare(); + db.commit(); + + // When trying to delete a row by returning the deleted items + db.beginTransaction(SQLite::TRANSACTION_TYPE::SHARED); + SQResult result; + db.writeIdempotent("DELETE FROM testReturning WHERE id = 21 AND name = 'name2' RETURNING id, name;", result); + db.prepare(); + db.commit(); + + // Verify that deleted items were returned as expected + ASSERT_EQUAL("21", result[0][0]); + ASSERT_EQUAL("name2", result[0][1]); + + // Verify that the row was successfully deleted and now the table has only one row + db.beginTransaction(SQLite::TRANSACTION_TYPE::SHARED); + db.read("SELECT name, value FROM testReturning ORDER BY id;", result); + db.rollback(); + + ASSERT_EQUAL(1, result.size()); + ASSERT_EQUAL(result[0]["name"], "name1"); + ASSERT_EQUAL(result[0]["value"], "value1"); + } + void SRedactSensitiveValuesTest() { string logValue = R"({"edits":["test1", "test2", "test3"]})"; SRedactSensitiveValues(logValue); From d15d30074e69cfa6c1ed056de81434b67d1eafac Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?Micha=C5=82=20Jasikowski?= Date: Tue, 3 Dec 2024 12:44:42 +0100 Subject: [PATCH 011/127] Added logging parameters to whitelist --- libstuff/SLog.cpp | 2 ++ sqlitecluster/SQLiteNode.cpp | 4 ++-- 2 files changed, 4 insertions(+), 2 deletions(-) diff --git a/libstuff/SLog.cpp b/libstuff/SLog.cpp index e87c92398..1cadbafbe 100644 --- a/libstuff/SLog.cpp +++ b/libstuff/SLog.cpp @@ -74,8 +74,10 @@ static const set PARAMS_WHITELIST = { "lastIP", "logParam", "nvpName", + "peer", "policyAccountID", "policyID", + "reason", "reportID", "requestID", "requestTimestamp", diff --git a/sqlitecluster/SQLiteNode.cpp b/sqlitecluster/SQLiteNode.cpp index 5e5984670..0b911c9b0 100644 --- a/sqlitecluster/SQLiteNode.cpp +++ b/sqlitecluster/SQLiteNode.cpp @@ -1523,7 +1523,7 @@ void SQLiteNode::_onMESSAGE(SQLitePeer* peer, const SData& message) { // This is the same handling as at the bottom of _onMESSAGE. SWARN("Error processing message, reconnecting", { {"peer", peer ? peer->name : "unknown"}, - {"message", !message.empty()? message.methodLine : ""}, + {"errorMessage", !message.empty()? message.methodLine : ""}, {"reason", e.what()} }); SData reconnect("RECONNECT"); @@ -1755,7 +1755,7 @@ void SQLiteNode::_onMESSAGE(SQLitePeer* peer, const SData& message) { } catch (const SException& e) { SWARN("Error processing message, reconnecting", { {"peer", peer ? peer->name : "unknown"}, - {"message", !message.empty() ? message.methodLine : ""}, + {"errorMessage", !message.empty() ? message.methodLine : ""}, {"reason", e.what()} }); SData reconnect("RECONNECT"); From a962a8d5e04ef3fcfffd2835c1919d353ba987b6 Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?Micha=C5=82=20Jasikowski?= Date: Tue, 3 Dec 2024 12:55:34 +0100 Subject: [PATCH 012/127] Added logging parameters to whitelist --- libstuff/SLog.cpp | 3 +++ sqlitecluster/SQLiteNode.cpp | 6 +++--- 2 files changed, 6 insertions(+), 3 deletions(-) diff --git a/libstuff/SLog.cpp b/libstuff/SLog.cpp index bb3943082..a963ed277 100644 --- a/libstuff/SLog.cpp +++ b/libstuff/SLog.cpp @@ -49,6 +49,9 @@ static set PARAMS_WHITELIST = { "indexName", "isUnique", "logParam", + "message", + "peer", + "reason", "requestID", "status", "userID", diff --git a/sqlitecluster/SQLiteNode.cpp b/sqlitecluster/SQLiteNode.cpp index 53884d297..662b3476f 100644 --- a/sqlitecluster/SQLiteNode.cpp +++ b/sqlitecluster/SQLiteNode.cpp @@ -1522,7 +1522,7 @@ void SQLiteNode::_onMESSAGE(SQLitePeer* peer, const SData& message) { // This is the same handling as at the bottom of _onMESSAGE. SWARN("Error processing message, reconnecting", { {"peer", peer ? peer->name : "unknown"}, - {"errorMessage", !message.empty()? message.methodLine : ""}, + {"message", !message.empty()? message.methodLine : ""}, {"reason", e.what()} }); SData reconnect("RECONNECT"); @@ -1754,8 +1754,8 @@ void SQLiteNode::_onMESSAGE(SQLitePeer* peer, const SData& message) { } catch (const SException& e) { SWARN("Error processing message, reconnecting", { {"peer", peer ? peer->name : "unknown"}, - {"errorMessage", !message.empty() ? message.methodLine : ""}, - {"reason", e.what()} + {"message", !message.empty() ? message.methodLine : ""}, + {"reason", e.what()} }); SData reconnect("RECONNECT"); reconnect["Reason"] = e.what(); From db39c2cc63d10468e06cb28805666082b9ac5c78 Mon Sep 17 00:00:00 2001 From: Tim Golen Date: Wed, 4 Dec 2024 10:38:56 -0700 Subject: [PATCH 013/127] Add some log params for policy audit logs --- libstuff/SLog.cpp | 4 ++++ 1 file changed, 4 insertions(+) diff --git a/libstuff/SLog.cpp b/libstuff/SLog.cpp index bb3943082..148c42603 100644 --- a/libstuff/SLog.cpp +++ b/libstuff/SLog.cpp @@ -52,6 +52,10 @@ static set PARAMS_WHITELIST = { "requestID", "status", "userID", + "policyID", + "employeeEmail", + "approver", + "approvers", }; string addLogParams(string&& message, const STable& params) { From d817e679c1bdffcd7ac82eaba2ee411fcf47d70e Mon Sep 17 00:00:00 2001 From: Tim Golen Date: Wed, 4 Dec 2024 10:59:08 -0700 Subject: [PATCH 014/127] Add another log param --- libstuff/SLog.cpp | 1 + 1 file changed, 1 insertion(+) diff --git a/libstuff/SLog.cpp b/libstuff/SLog.cpp index 148c42603..eb644d28b 100644 --- a/libstuff/SLog.cpp +++ b/libstuff/SLog.cpp @@ -56,6 +56,7 @@ static set PARAMS_WHITELIST = { "employeeEmail", "approver", "approvers", + "employees", }; string addLogParams(string&& message, const STable& params) { From c03124a1ee7edb689c86cb33660f9822afbad29a Mon Sep 17 00:00:00 2001 From: Tyler Karaszewski Date: Wed, 4 Dec 2024 12:19:00 -0800 Subject: [PATCH 015/127] Allow first message to be either LOGIN or NODE_LOGIN to deprecate old message --- sqlitecluster/SQLiteNode.cpp | 38 +++++++++++++++++++++++++++++------- 1 file changed, 31 insertions(+), 7 deletions(-) diff --git a/sqlitecluster/SQLiteNode.cpp b/sqlitecluster/SQLiteNode.cpp index 79be57fed..3ea870cd7 100644 --- a/sqlitecluster/SQLiteNode.cpp +++ b/sqlitecluster/SQLiteNode.cpp @@ -56,6 +56,20 @@ // dbCountAtStart: The highest committed transaction in the DB at the start of this transaction on leader, for // optimizing replication. +// On LOGIN vs NODE_LOGIN. +// _onConnect sends a LOGIN message. +// _onConnect is called in exctly two places: +// 1. In response to a NODE_LOGIN message received on a newly connected socket on the sync port. It's expected when +// establishing a connection, a node sends this NODE_LOGIN as its first message. +// 2. Immediately following establishing a TCP connection to another node and sending a NODE_LOGIN message. In the case that +// we are the initiating node, we immediately queue three messages: +// 1. NODE_LOGIN +// 2. PING +// 3. LOGIN +// +// When we receive a NODE_LOGIN, we immediately respond with a PING followed by a LOGIN (by calling _onConnect). +// We can cobine all of these into a single login message. + #undef SLOGPREFIX #define SLOGPREFIX "{" << _name << "/" << SQLiteNode::stateName(_state) << "} " @@ -1284,6 +1298,9 @@ void SQLiteNode::_onMESSAGE(SQLitePeer* peer, const SData& message) { peer->latency = max(STimeNow() - message.calc64("Timestamp"), 1ul); SINFO("Received PONG from peer '" << peer->name << "' (" << peer->latency/1000 << "ms latency)"); return; + } else if (SIEquals(message.methodLine, "NODE_LOGIN")) { + // Do nothing, this keeps this code from warning until NODE_LOGIN is deprecated. + return; } // Every other message broadcasts the current state of the node @@ -1357,6 +1374,8 @@ void SQLiteNode::_onMESSAGE(SQLitePeer* peer, const SData& message) { STHROW("you're *not* supposed to be a 0-priority permafollower"); } + // Validate hash here, mark node as forked if found. + // It's an error to have to peers configured with the same priority, except 0 and -1 SASSERT(_priority == -1 || _priority == 0 || message.calc("Priority") != _priority); PINFO("Peer logged in at '" << message["State"] << "', priority #" << message["Priority"] << " commit #" @@ -1745,7 +1764,7 @@ void SQLiteNode::_onMESSAGE(SQLitePeer* peer, const SData& message) { } } } else { - STHROW("unrecognized message"); + STHROW("unrecognized message: " + message.methodLine); } } catch (const SException& e) { PWARN("Error processing message '" << message.methodLine << "' (" << e.what() << "), reconnecting."); @@ -1759,13 +1778,15 @@ void SQLiteNode::_onMESSAGE(SQLitePeer* peer, const SData& message) { void SQLiteNode::_onConnect(SQLitePeer* peer) { SASSERT(peer); SASSERTWARN(!peer->loggedIn); - // Send the LOGIN - PINFO("Sending LOGIN"); SData login("LOGIN"); + login["Name"] = _name; login["Priority"] = to_string(_priority); login["State"] = stateName(_state); login["Version"] = _version; login["Permafollower"] = _originalPriority ? "false" : "true"; + PINFO("Sending " << login.serialize()); + + // NOTE: the following call adds CommitCount, Hash, and commandAddress fields. _sendToPeer(peer, login); } @@ -2565,12 +2586,14 @@ void SQLiteNode::postPoll(fd_map& fdm, uint64_t& nextActivity) { int messageSize = message.deserialize(socket->recvBuffer); if (messageSize) { socket->recvBuffer.consumeFront(messageSize); - if (SIEquals(message.methodLine, "NODE_LOGIN")) { + // Allow either LOGIN or NODE_LOGIN until we deprecate NODE_LOGIN. + if (SIEquals(message.methodLine, "NODE_LOGIN") || SIEquals(message.methodLine, "LOGIN")) { SQLitePeer* peer = getPeerByName(message["Name"]); if (peer) { if (peer->setSocket(socket)) { - _sendPING(peer); _onConnect(peer); + _sendPING(peer); + _onMESSAGE(peer, message); // Connected OK, don't need in _unauthenticatedIncomingSockets anymore. socketsToRemove.push_back(socket); @@ -2589,7 +2612,7 @@ void SQLiteNode::postPoll(fd_map& fdm, uint64_t& nextActivity) { STHROW("Unauthenticated node '" + message["Name"] + "' attempted to connected, rejecting."); } } else { - STHROW("expecting NODE_LOGIN"); + STHROW("expecting LOGIN or NODE_LOGIN"); } } else if (STimeNow() > socket->lastRecvTime + 5'000'000) { STHROW("Incoming socket didn't send a message for over 5s, closing."); @@ -2612,11 +2635,12 @@ void SQLiteNode::postPoll(fd_map& fdm, uint64_t& nextActivity) { switch (result) { case SQLitePeer::PeerPostPollStatus::JUST_CONNECTED: { + // When NODE_LOGIN is deprecated, we can remove the next 3 lines. SData login("NODE_LOGIN"); login["Name"] = _name; peer->sendMessage(login.serialize()); - _sendPING(peer); _onConnect(peer); + _sendPING(peer); } break; case SQLitePeer::PeerPostPollStatus::SOCKET_ERROR: From e5659c7e4eab52a257b76f6e9d301be589948743 Mon Sep 17 00:00:00 2001 From: Tyler Karaszewski Date: Wed, 4 Dec 2024 14:22:36 -0800 Subject: [PATCH 016/127] add known bad peer, refuse to talk to it --- sqlitecluster/SQLiteNode.cpp | 21 ++++++++++++++++----- sqlitecluster/SQLitePeer.cpp | 2 ++ sqlitecluster/SQLitePeer.h | 3 +++ 3 files changed, 21 insertions(+), 5 deletions(-) diff --git a/sqlitecluster/SQLiteNode.cpp b/sqlitecluster/SQLiteNode.cpp index 3ea870cd7..5f8e7d893 100644 --- a/sqlitecluster/SQLiteNode.cpp +++ b/sqlitecluster/SQLiteNode.cpp @@ -1303,6 +1303,12 @@ void SQLiteNode::_onMESSAGE(SQLitePeer* peer, const SData& message) { return; } + // We allow PING and PONG even for bad peers just to avoid them getting caught in reconnect cycles. + if (peer->knownBad) { + PINFO("Received message " << message.methodLine << " from known bad peer, ignoring."); + return; + } + // Every other message broadcasts the current state of the node if (!message.isSet("CommitCount")) { STHROW("missing CommitCount"); @@ -1374,17 +1380,22 @@ void SQLiteNode::_onMESSAGE(SQLitePeer* peer, const SData& message) { STHROW("you're *not* supposed to be a 0-priority permafollower"); } - // Validate hash here, mark node as forked if found. - // It's an error to have to peers configured with the same priority, except 0 and -1 SASSERT(_priority == -1 || _priority == 0 || message.calc("Priority") != _priority); - PINFO("Peer logged in at '" << message["State"] << "', priority #" << message["Priority"] << " commit #" - << message["CommitCount"] << " (" << message["Hash"] << ")"); peer->priority = message.calc("Priority"); - peer->loggedIn = true; peer->version = message["Version"]; peer->state = stateFromName(message["State"]); + // Validate hash here, mark node as forked if found. + if (false) { + PINFO("Peer is forked, marking as bad, will ignore."); + peer->knownBad = true; + } else { + PINFO("Peer logged in at '" << message["State"] << "', priority #" << message["Priority"] << " commit #" + << message["CommitCount"] << " (" << message["Hash"] << ")"); + peer->loggedIn = true; + } + // If the peer is already standing up, go ahead and approve or deny immediately. if (peer->state == SQLiteNodeState::STANDINGUP) { _sendStandupResponse(peer, message); diff --git a/sqlitecluster/SQLitePeer.cpp b/sqlitecluster/SQLitePeer.cpp index 82c2de8a2..4daf2c785 100644 --- a/sqlitecluster/SQLitePeer.cpp +++ b/sqlitecluster/SQLitePeer.cpp @@ -26,6 +26,7 @@ SQLitePeer::SQLitePeer(const string& name_, const string& host_, const STable& p transactionResponse(Response::NONE), version(), lastPingTime(0), + knownBad(false), hash() { } @@ -79,6 +80,7 @@ void SQLitePeer::reset() { version = ""; lastPingTime = 0, setCommit(0, ""); + knownBad = false; } void SQLitePeer::shutdownSocket() { diff --git a/sqlitecluster/SQLitePeer.h b/sqlitecluster/SQLitePeer.h index 0eb984656..296a11ed8 100644 --- a/sqlitecluster/SQLitePeer.h +++ b/sqlitecluster/SQLitePeer.h @@ -91,6 +91,9 @@ class SQLitePeer { atomic version; atomic lastPingTime; + // Set to true when this peer is known to be unusable, I.e., when it has a database that is forked from us. + atomic knownBad; + private: // For initializing the permafollower value from the params list. static bool isPermafollower(const STable& params); From ccdc5cd82f1b933381600cdec0900a5e21374192 Mon Sep 17 00:00:00 2001 From: Tyler Karaszewski Date: Wed, 4 Dec 2024 15:51:51 -0800 Subject: [PATCH 017/127] Stop communicating on fork. Test don't pass yet. --- sqlitecluster/SQLiteNode.cpp | 27 +++++++++++++++++++++------ 1 file changed, 21 insertions(+), 6 deletions(-) diff --git a/sqlitecluster/SQLiteNode.cpp b/sqlitecluster/SQLiteNode.cpp index 5f8e7d893..3b95b3833 100644 --- a/sqlitecluster/SQLiteNode.cpp +++ b/sqlitecluster/SQLiteNode.cpp @@ -1386,14 +1386,23 @@ void SQLiteNode::_onMESSAGE(SQLitePeer* peer, const SData& message) { peer->version = message["Version"]; peer->state = stateFromName(message["State"]); - // Validate hash here, mark node as forked if found. - if (false) { - PINFO("Peer is forked, marking as bad, will ignore."); - peer->knownBad = true; - } else { + uint64_t peerCommitCount; + string peerCommitHash; + bool hashesMatch = true; + peer->getCommit(peerCommitCount, peerCommitHash); + if (!peerCommitHash.empty() && peerCommitCount <= getCommitCount()) { + string query, hash; + _db.getCommit(peerCommitCount, query, hash); + hashesMatch = (peerCommitHash == hash); + } + + if (hashesMatch) { PINFO("Peer logged in at '" << message["State"] << "', priority #" << message["Priority"] << " commit #" << message["CommitCount"] << " (" << message["Hash"] << ")"); peer->loggedIn = true; + } else { + PINFO("Peer is forked, marking as bad, will ignore."); + peer->knownBad = true; } // If the peer is already standing up, go ahead and approve or deny immediately. @@ -1775,7 +1784,7 @@ void SQLiteNode::_onMESSAGE(SQLitePeer* peer, const SData& message) { } } } else { - STHROW("unrecognized message: " + message.methodLine); + SINFO("unrecognized message: " + message.methodLine); } } catch (const SException& e) { PWARN("Error processing message '" << message.methodLine << "' (" << e.what() << "), reconnecting."); @@ -1902,6 +1911,9 @@ void SQLiteNode::_sendToPeer(SQLitePeer* peer, const SData& message) { // We can treat this whole function as atomic and thread-safe as it sends data to a peer with it's own atomic // `sendMessage` and the peer itself (assuming it's something from _peerList, which, if not, don't do that) is // const and will exist without changing until destruction. + if (peer->knownBad) { + PINFO("Skipping message " << message.methodLine << " to known bad peer."); + } peer->sendMessage(_addPeerHeaders(message).serialize()); } @@ -1910,6 +1922,9 @@ void SQLiteNode::_sendToAllPeers(const SData& message, bool subscribedOnly) { // Loop across all connected peers and send the message. _peerList is const so this is thread-safe. for (auto peer : _peerList) { + if (peer->knownBad) { + PINFO("Skipping message " << message.methodLine << " to known bad peer."); + } // This check is strictly thread-safe, as SQLitePeer::subscribed is atomic, but there's still a race condition // around checking subscribed and then sending, as subscribed could technically change. if (!subscribedOnly || peer->subscribed) { From 466725411ae8838f1e77e9451b1638cbfeb2b2c3 Mon Sep 17 00:00:00 2001 From: Tyler Karaszewski Date: Wed, 4 Dec 2024 16:18:36 -0800 Subject: [PATCH 018/127] Rename variable, remove _forkedFrom list --- sqlitecluster/SQLiteNode.cpp | 53 ++++++++++++++++++++++-------------- sqlitecluster/SQLiteNode.h | 7 ----- sqlitecluster/SQLitePeer.cpp | 4 +-- sqlitecluster/SQLitePeer.h | 2 +- 4 files changed, 36 insertions(+), 30 deletions(-) diff --git a/sqlitecluster/SQLiteNode.cpp b/sqlitecluster/SQLiteNode.cpp index 3b95b3833..705e29a29 100644 --- a/sqlitecluster/SQLiteNode.cpp +++ b/sqlitecluster/SQLiteNode.cpp @@ -639,7 +639,7 @@ bool SQLiteNode::update() { // Find the freshest non-broken peer (including permafollowers). if (peer->loggedIn) { loggedInPeers.push_back(peer->name); - if (_forkedFrom.count(peer->name)) { + if (peer->forked) { SWARN("Hash mismatch. Forked from peer " << peer->name << " so not considering it." << _getLostQuorumLogMessage()); continue; } @@ -757,7 +757,7 @@ bool SQLiteNode::update() { continue; } - if (_forkedFrom.count(peer->name)) { + if (peer->forked) { // Forked nodes are treated as ineligible for leader, etc. SHMMM("Not counting forked peer " << peer->name << " for freshest, highestPriority, or currentLeader."); continue; @@ -1304,8 +1304,8 @@ void SQLiteNode::_onMESSAGE(SQLitePeer* peer, const SData& message) { } // We allow PING and PONG even for bad peers just to avoid them getting caught in reconnect cycles. - if (peer->knownBad) { - PINFO("Received message " << message.methodLine << " from known bad peer, ignoring."); + if (peer->forked) { + PINFO("Received message " << message.methodLine << " from forked peer, ignoring."); return; } @@ -1402,7 +1402,13 @@ void SQLiteNode::_onMESSAGE(SQLitePeer* peer, const SData& message) { peer->loggedIn = true; } else { PINFO("Peer is forked, marking as bad, will ignore."); - peer->knownBad = true; + + // Send it a message before we mark it as forked, as we'll refuse afterward. + SData forked("FORKED"); + _sendToPeer(peer, forked); + + // And mark it dead until it reconnects. + peer->forked = true; } // If the peer is already standing up, go ahead and approve or deny immediately. @@ -1579,14 +1585,21 @@ void SQLiteNode::_onMESSAGE(SQLitePeer* peer, const SData& message) { SQResult result; uint64_t commitNum = SToUInt64(message["hashMismatchNumber"]); _db.getCommits(commitNum, commitNum, result); - _forkedFrom.insert(peer->name); + peer->forked = true; + + size_t forkedCount = 0; + for (const auto& p : _peerList) { + if (p->forked) { + forkedCount++; + } + } SALERT("Hash mismatch. Peer " << peer->name << " and I have forked at commit " << message["hashMismatchNumber"] - << ". I have forked from " << _forkedFrom.size() << " other nodes. I am " << stateName(_state) + << ". I have forked from " << forkedCount << " other nodes. I am " << stateName(_state) << " and have hash " << result[0][0] << " for that commit. Peer has hash " << message["hashMismatchValue"] << "." << _getLostQuorumLogMessage()); - if (_forkedFrom.size() > ((_peerList.size() + 1) / 2)) { + if (forkedCount > ((_peerList.size() + 1) / 2)) { SERROR("Hash mismatch. I have forked from over half the cluster. This is unrecoverable." << _getLostQuorumLogMessage()); } @@ -1783,8 +1796,11 @@ void SQLiteNode::_onMESSAGE(SQLitePeer* peer, const SData& message) { << e.what() << "', ignoring."); } } + } else if (SIEquals(message.methodLine, "FORKED")) { + peer->forked = true; + PINFO("Peer said we're forked, beleiving them."); } else { - SINFO("unrecognized message: " + message.methodLine); + PINFO("unrecognized message: " + message.methodLine); } } catch (const SException& e) { PWARN("Error processing message '" << message.methodLine << "' (" << e.what() << "), reconnecting."); @@ -1911,8 +1927,8 @@ void SQLiteNode::_sendToPeer(SQLitePeer* peer, const SData& message) { // We can treat this whole function as atomic and thread-safe as it sends data to a peer with it's own atomic // `sendMessage` and the peer itself (assuming it's something from _peerList, which, if not, don't do that) is // const and will exist without changing until destruction. - if (peer->knownBad) { - PINFO("Skipping message " << message.methodLine << " to known bad peer."); + if (peer->forked) { + PINFO("Skipping message " << message.methodLine << " to forked peer."); } peer->sendMessage(_addPeerHeaders(message).serialize()); } @@ -1922,8 +1938,8 @@ void SQLiteNode::_sendToAllPeers(const SData& message, bool subscribedOnly) { // Loop across all connected peers and send the message. _peerList is const so this is thread-safe. for (auto peer : _peerList) { - if (peer->knownBad) { - PINFO("Skipping message " << message.methodLine << " to known bad peer."); + if (peer->forked) { + PINFO("Skipping message " << message.methodLine << " to forked peer."); } // This check is strictly thread-safe, as SQLitePeer::subscribed is atomic, but there's still a race condition // around checking subscribed and then sending, as subscribed could technically change. @@ -2014,16 +2030,12 @@ void SQLiteNode::_changeState(SQLiteNodeState newState, uint64_t commitIDToCance _leadPeer = nullptr; } - if (newState >= SQLiteNodeState::STANDINGUP) { - // Not forked from anyone. Note that this includes both LEADING and FOLLOWING. - _forkedFrom.clear(); - } - // Re-enable commits if they were disabled during a previous stand-down. if (newState != SQLiteNodeState::SEARCHING) { _db.setCommitEnabled(true); } +#if 0 // If we're going searching and have forked from at least 1 peer, sleep for a second. This is intended to prevent thousands of lines of log spam when this happens in an infinite // loop. It's entirely possible that we do this for valid reasons - it may be the peer that has the bad database and not us, and there are plenty of other reasons we could switch to // SEARCHING, but in those cases, we just wait an extra second before trying again. @@ -2031,6 +2043,7 @@ void SQLiteNode::_changeState(SQLiteNodeState newState, uint64_t commitIDToCance SWARN("Going searching while forked peers present, sleeping 1 second." << _getLostQuorumLogMessage()); sleep(1); } +#endif // Additional logic for some new states if (newState == SQLiteNodeState::LEADING) { @@ -2250,7 +2263,7 @@ void SQLiteNode::_updateSyncPeer() continue; } - if (_forkedFrom.count(peer->name)) { + if (peer->forked) { SWARN("Hash mismatch. Can't choose peer " << peer->name << " due to previous hash mismatch."); continue; } @@ -2821,7 +2834,7 @@ void SQLiteNode::_sendStandupResponse(SQLitePeer* peer, const SData& message) { return; } - if (_forkedFrom.count(peer->name)) { + if (peer->forked) { PHMMM("Forked from peer, can't approve standup."); response["Response"] = "abstain"; response["Reason"] = "We are forked"; diff --git a/sqlitecluster/SQLiteNode.h b/sqlitecluster/SQLiteNode.h index 7c14667d4..3826ba0bd 100644 --- a/sqlitecluster/SQLiteNode.h +++ b/sqlitecluster/SQLiteNode.h @@ -393,13 +393,6 @@ class SQLiteNode : public STCPManager { // This can be removed once we've figured out why replication falls behind. See this issue: https://github.com/Expensify/Expensify/issues/210528 atomic _concurrentReplicateTransactions = 0; - // We keep a set of strings that are the names of nodes we've forked from, in the case we ever receive a hash mismatch while trying to synchronize. - // Whenever we become LEADING or FOLLOWING this is cleared. This resets the case where one node has forked, we attempt to synchronize from it, and fail, - // but later synchronize from someone else. Once we've come up completely, we no longer "hold a grudge" against this node, which will likely get fixed - // while we're online. - // In the event that this list becomes longer than half the cluster size, the node kills itself and logs that it's in an unrecoverable state. - set _forkedFrom; - // A pointer to a SQLite instance that is passed to plugin's stateChanged function. This prevents plugins from operating on the same handle that // the sync node is when they run queries in stateChanged. SQLite* pluginDB; diff --git a/sqlitecluster/SQLitePeer.cpp b/sqlitecluster/SQLitePeer.cpp index 4daf2c785..35146de52 100644 --- a/sqlitecluster/SQLitePeer.cpp +++ b/sqlitecluster/SQLitePeer.cpp @@ -26,7 +26,7 @@ SQLitePeer::SQLitePeer(const string& name_, const string& host_, const STable& p transactionResponse(Response::NONE), version(), lastPingTime(0), - knownBad(false), + forked(false), hash() { } @@ -80,7 +80,7 @@ void SQLitePeer::reset() { version = ""; lastPingTime = 0, setCommit(0, ""); - knownBad = false; + forked = false; } void SQLitePeer::shutdownSocket() { diff --git a/sqlitecluster/SQLitePeer.h b/sqlitecluster/SQLitePeer.h index 296a11ed8..d6d0a8030 100644 --- a/sqlitecluster/SQLitePeer.h +++ b/sqlitecluster/SQLitePeer.h @@ -92,7 +92,7 @@ class SQLitePeer { atomic lastPingTime; // Set to true when this peer is known to be unusable, I.e., when it has a database that is forked from us. - atomic knownBad; + atomic forked; private: // For initializing the permafollower value from the params list. From 0689a931a2845e03f5c2ebb64a5efc2728d4e579 Mon Sep 17 00:00:00 2001 From: Tyler Karaszewski Date: Wed, 4 Dec 2024 16:42:58 -0800 Subject: [PATCH 019/127] Fix fork test --- sqlitecluster/SQLiteNode.cpp | 48 +++++++++++++++++------- sqlitecluster/SQLiteNode.h | 2 + test/clustertest/tests/ForkCheckTest.cpp | 5 +++ 3 files changed, 41 insertions(+), 14 deletions(-) diff --git a/sqlitecluster/SQLiteNode.cpp b/sqlitecluster/SQLiteNode.cpp index 705e29a29..8478678fa 100644 --- a/sqlitecluster/SQLiteNode.cpp +++ b/sqlitecluster/SQLiteNode.cpp @@ -1409,6 +1409,7 @@ void SQLiteNode::_onMESSAGE(SQLitePeer* peer, const SData& message) { // And mark it dead until it reconnects. peer->forked = true; + _dieIfForkedFromCluster(); } // If the peer is already standing up, go ahead and approve or deny immediately. @@ -1587,22 +1588,11 @@ void SQLiteNode::_onMESSAGE(SQLitePeer* peer, const SData& message) { _db.getCommits(commitNum, commitNum, result); peer->forked = true; - size_t forkedCount = 0; - for (const auto& p : _peerList) { - if (p->forked) { - forkedCount++; - } - } - SALERT("Hash mismatch. Peer " << peer->name << " and I have forked at commit " << message["hashMismatchNumber"] - << ". I have forked from " << forkedCount << " other nodes. I am " << stateName(_state) - << " and have hash " << result[0][0] << " for that commit. Peer has hash " << message["hashMismatchValue"] << "." - << _getLostQuorumLogMessage()); - - if (forkedCount > ((_peerList.size() + 1) / 2)) { - SERROR("Hash mismatch. I have forked from over half the cluster. This is unrecoverable." << _getLostQuorumLogMessage()); - } + << ". I am " << stateName(_state) << " and have hash " << result[0][0] << " for that commit. Peer has hash " + << message["hashMismatchValue"] << "." << _getLostQuorumLogMessage()); + _dieIfForkedFromCluster(); STHROW("Hash mismatch"); } if (!_syncPeer) { @@ -1799,6 +1789,23 @@ void SQLiteNode::_onMESSAGE(SQLitePeer* peer, const SData& message) { } else if (SIEquals(message.methodLine, "FORKED")) { peer->forked = true; PINFO("Peer said we're forked, beleiving them."); + _dieIfForkedFromCluster(); + + // If leader said we're forked from it, we need a new leader. + if (peer == _leadPeer) { + _leadPeer = nullptr; + _changeState(SQLiteNodeState::SEARCHING); + } + + // If our sync peer said we're forked from it, and we're currently synchronizing, we need a new sync peer. + // However, if we're not currently syncing, then we don't need to change states, this peer could have been chosen + // hours or days ago. + if (peer == _syncPeer) { + _syncPeer = nullptr; + if (_state == SQLiteNodeState::SYNCHRONIZING) { + _changeState(SQLiteNodeState::SEARCHING); + } + } } else { PINFO("unrecognized message: " + message.methodLine); } @@ -2908,3 +2915,16 @@ void SQLiteNode::_sendStandupResponse(SQLitePeer* peer, const SData& message) { } _sendToPeer(peer, response); } + +void SQLiteNode::_dieIfForkedFromCluster() { + size_t forkedCount = 0; + for (const auto& p : _peerList) { + if (p->forked) { + forkedCount++; + } + } + + if (forkedCount > ((_peerList.size() + 1) / 2)) { + SERROR("I have forked from over half the cluster (" << forkedCount << " nodes). This is unrecoverable." << _getLostQuorumLogMessage()); + } +} \ No newline at end of file diff --git a/sqlitecluster/SQLiteNode.h b/sqlitecluster/SQLiteNode.h index 3826ba0bd..354aef7c9 100644 --- a/sqlitecluster/SQLiteNode.h +++ b/sqlitecluster/SQLiteNode.h @@ -280,6 +280,8 @@ class SQLiteNode : public STCPManager { // commitCount that we do, this will return null. void _updateSyncPeer(); + void _dieIfForkedFromCluster(); + const string _commandAddress; const string _name; const vector _peerList; diff --git a/test/clustertest/tests/ForkCheckTest.cpp b/test/clustertest/tests/ForkCheckTest.cpp index 6e0c0d262..330c8735f 100644 --- a/test/clustertest/tests/ForkCheckTest.cpp +++ b/test/clustertest/tests/ForkCheckTest.cpp @@ -119,5 +119,10 @@ struct ForkCheckTest : tpunit::TestFixture { // And that signal should have been ABORT. ASSERT_EQUAL(SIGABRT, WTERMSIG(status)); + + // We call stopServer on the forked leader because it crashed, but the cluster tester doesn't realize, so shutting down + // normally will time out after a minute. Calling `stopServer` explicitly will clear the server PID, and we won't need + // to wait for this timeout. + tester.getTester(0).stopServer(); } } __ForkCheckTest; From 002a9373aa6b08cde04fbbfd2f0b0c029a2e50b9 Mon Sep 17 00:00:00 2001 From: Tyler Karaszewski Date: Wed, 4 Dec 2024 16:46:01 -0800 Subject: [PATCH 020/127] remove test that tests condition that's no longer used. --- .../tests/ForkedNodeApprovalTest.cpp | 161 ------------------ 1 file changed, 161 deletions(-) delete mode 100644 test/clustertest/tests/ForkedNodeApprovalTest.cpp diff --git a/test/clustertest/tests/ForkedNodeApprovalTest.cpp b/test/clustertest/tests/ForkedNodeApprovalTest.cpp deleted file mode 100644 index 197b883cc..000000000 --- a/test/clustertest/tests/ForkedNodeApprovalTest.cpp +++ /dev/null @@ -1,161 +0,0 @@ - -#include - -#include -#include -#include -#include - -struct ForkedNodeApprovalTest : tpunit::TestFixture { - ForkedNodeApprovalTest() - : tpunit::TestFixture("ForkedNodeApproval", TEST(ForkedNodeApprovalTest::test)) {} - - pair getMaxJournalCommit(BedrockTester& tester, bool online = true) { - SQResult journals; - tester.readDB("SELECT name FROM sqlite_schema WHERE type ='table' AND name LIKE 'journal%';", journals, online); - uint64_t maxJournalCommit = 0; - string maxJournalTable; - for (auto& row : journals.rows) { - string maxID = tester.readDB("SELECT MAX(id) FROM " + row[0] + ";", online); - try { - uint64_t maxCommitNum = stoull(maxID); - if (maxCommitNum > maxJournalCommit) { - maxJournalCommit = maxCommitNum; - maxJournalTable = row[0]; - } - } catch (const invalid_argument& e) { - // do nothing, skip this journal with no entries. - continue; - } - } - return make_pair(maxJournalCommit, maxJournalTable); - } - - void test() { - // Create a cluster, wait for it to come up. - BedrockClusterTester tester(ClusterSize::THREE_NODE_CLUSTER); - - // We'll tell the threads to stop when they're done. - atomic stop(false); - - // We want to not spam a stopped leader. - atomic leaderIsUp(true); - - // Just use a bunch of copies of the same command. - SData spamCommand("idcollision"); - - // In a vector. - const vector commands(100, spamCommand); - - // Now create 9 threads spamming 100 commands at a time, each. 9 cause we have three nodes. - vector threads; - for (size_t i = 0; i < 9; i++) { - threads.emplace_back([&tester, i, &commands, &stop, &leaderIsUp](){ - while (!stop) { - // Pick a tester, send, don't care about the result. - size_t testerNum = i % 3; - if (testerNum == 0 && !leaderIsUp) { - // If we're looking for leader and it's down, wait a second to avoid pegging the CPU. - sleep(1); - } else { - // If we're not leader or leader is up, spam away! - tester.getTester(testerNum).executeWaitMultipleData(commands); - } - } - }); - } - - // Let them spam for a second. - sleep(1); - - // We can try and stop the leader. - leaderIsUp = false; - tester.getTester(0).stopServer(); - - // Spam a few more commands and then we can stop. - sleep(1); - stop = true; - for (auto& t : threads) { - t.join(); - } - - // Fetch the latest journal commits on leader and follower - auto result = getMaxJournalCommit(tester.getTester(0), false); - - uint64_t leaderMaxCommit = result.first; - string leaderMaxCommitJournal = result.second; - result = getMaxJournalCommit(tester.getTester(1)); - uint64_t followerMaxCommit = result.first; - - // Make sure the follower got farther than the leader. - ASSERT_GREATER_THAN(followerMaxCommit, leaderMaxCommit); - - // We need to release any DB that the tester is holding. - tester.getTester(0).freeDB(); - tester.getTester(1).freeDB(); - - // Break leader. - { - string filename = tester.getTester(0).getArg("-db"); - string query = "UPDATE " + leaderMaxCommitJournal + " SET hash = 'abcdef123456' WHERE id = " + to_string(leaderMaxCommit) + ";"; - - sqlite3* db = nullptr; - sqlite3_open_v2(filename.c_str(), &db, SQLITE_OPEN_READWRITE | SQLITE_OPEN_CREATE | SQLITE_OPEN_NOMUTEX, NULL); - char* errMsg = nullptr; - sqlite3_exec(db, query.c_str(), 0, 0, &errMsg); - if (errMsg) { - cout << "Error updating db: " << errMsg << endl; - ASSERT_TRUE(false); - } - sqlite3_close_v2(db); - } - - // Stop the second follower. - tester.getTester(2).stopServer(); - - // Start the broken leader back up. - tester.getTester(0).startServer(false); - - // We should not get a leader, the primary leader needs to synchronize, but can't because it's forked. - // The secondary leader should go leading, but can't, because it only receives `abstain` responses to standup requests. - // It's possible for the secondary leader to go leading once, but it should quickly fall out of leading when the fork is detected and primary leader reconnects. - // After that, it should not go leading again, primary leader should abstain from participation. - auto start = chrono::steady_clock::now(); - bool abstainDetected = false; - while (true) { - if (chrono::steady_clock::now() - start > 30s) { - cout << "It's been 30 seconds." << endl; - break; - } - SData command("Status"); - auto responseJson = tester.getTester(1).executeWaitMultipleData({command}, 1, true)[0].content; - - auto json = SParseJSONObject(responseJson); - auto peers = SParseJSONArray(json["peerList"]); - for (auto& peer : peers) { - auto peerJSON = SParseJSONObject(peer); - if (peerJSON["name"] == "cluster_node_0" && peerJSON["standupResponse"] == "ABSTAIN") { - abstainDetected = true; - break; - } - } - if (abstainDetected) { - break; - } - - // try again. - usleep(50'000); - } - - ASSERT_TRUE(abstainDetected); - - // Ok, now we can start the second follower back up and secondary leader should be able to lead. - tester.getTester(2).startServer(false); - ASSERT_TRUE(tester.getTester(1).waitForState("LEADING")); - - // We call stopServer on the forked leader because it crashed, but the cluster tester doesn't realize, so shutting down - // normally will time out after a minute. Calling `stopServer` explicitly will clear the server PID, and we won't need - // to wait for this timeout. - tester.getTester(0).stopServer(); - } -} __ForkedNodeApprovalTest; From f50aec04744ff9fb9cfb44e18b5c378bff3f1e2a Mon Sep 17 00:00:00 2001 From: Tyler Karaszewski Date: Wed, 4 Dec 2024 16:54:03 -0800 Subject: [PATCH 021/127] Remove ABSTAIN --- sqlitecluster/SQLiteNode.cpp | 25 ------------------------- sqlitecluster/SQLitePeer.cpp | 3 --- sqlitecluster/SQLitePeer.h | 3 +-- 3 files changed, 1 insertion(+), 30 deletions(-) diff --git a/sqlitecluster/SQLiteNode.cpp b/sqlitecluster/SQLiteNode.cpp index 8478678fa..7374ac0f3 100644 --- a/sqlitecluster/SQLiteNode.cpp +++ b/sqlitecluster/SQLiteNode.cpp @@ -861,7 +861,6 @@ bool SQLiteNode::update() { size_t numFullPeers = 0; size_t numLoggedInFullPeers = 0; size_t approveCount = 0; - size_t abstainCount = 0; if (_isShuttingDown) { SINFO("Shutting down while standing up, setting state to SEARCHING"); _changeState(SQLiteNodeState::SEARCHING); @@ -879,9 +878,6 @@ bool SQLiteNode::update() { // Has it responded yet? if (peer->standupResponse == SQLitePeer::Response::NONE) { // This peer hasn't yet responded. We do nothing with it in this case, maybe it will have responded by the next check. - } else if (peer->standupResponse == SQLitePeer::Response::ABSTAIN) { - PHMMM("Peer abstained from participation in quorum"); - abstainCount++; } else if (peer->standupResponse == SQLitePeer::Response::DENY) { // It responeded, but didn't approve -- abort PHMMM("Refused our STANDUP, cancel and RE-SEARCH"); @@ -894,16 +890,6 @@ bool SQLiteNode::update() { } } - // If the majority of full peers responds with abstain, then re-search. - const bool majorityAbstained = abstainCount * 2 > numFullPeers; - if (majorityAbstained) { - // Majority abstained, meaning we're probably forked, - // so we go back to searching so we can go back to synchronizing and see if we're forked. - SHMMM("Majority of full peers abstained; re-SEARCHING."); - _changeState(SQLiteNodeState::SEARCHING); - return true; // Re-update - } - // If everyone's responded with approval and we form a majority, then finish standup. bool majorityConnected = numLoggedInFullPeers * 2 >= numFullPeers; bool quorumApproved = approveCount * 2 >= numFullPeers; @@ -1531,9 +1517,6 @@ void SQLiteNode::_onMESSAGE(SQLitePeer* peer, const SData& message) { if (SIEquals(message["Response"], "approve")) { PINFO("Received standup approval"); peer->standupResponse = SQLitePeer::Response::APPROVE; - } else if (SIEquals(message["Response"], "abstain")) { - PINFO("Received standup abstain"); - peer->standupResponse = SQLitePeer::Response::ABSTAIN; } else { PHMMM("Received standup denial: reason='" << message["Reason"] << "'"); peer->standupResponse = SQLitePeer::Response::DENY; @@ -2841,14 +2824,6 @@ void SQLiteNode::_sendStandupResponse(SQLitePeer* peer, const SData& message) { return; } - if (peer->forked) { - PHMMM("Forked from peer, can't approve standup."); - response["Response"] = "abstain"; - response["Reason"] = "We are forked"; - _sendToPeer(peer, response); - return; - } - // What's our state if (SWITHIN(SQLiteNodeState::STANDINGUP, _state, SQLiteNodeState::STANDINGDOWN)) { // Oh crap, it's trying to stand up while we're leading. Who is higher priority? diff --git a/sqlitecluster/SQLitePeer.cpp b/sqlitecluster/SQLitePeer.cpp index 35146de52..c93589520 100644 --- a/sqlitecluster/SQLitePeer.cpp +++ b/sqlitecluster/SQLitePeer.cpp @@ -207,9 +207,6 @@ string SQLitePeer::responseName(Response response) { case Response::DENY: return "DENY"; break; - case Response::ABSTAIN: - return "ABSTAIN"; - break; default: return ""; } diff --git a/sqlitecluster/SQLitePeer.h b/sqlitecluster/SQLitePeer.h index d6d0a8030..48bc3433c 100644 --- a/sqlitecluster/SQLitePeer.h +++ b/sqlitecluster/SQLitePeer.h @@ -8,8 +8,7 @@ class SQLitePeer { enum class Response { NONE, APPROVE, - DENY, - ABSTAIN + DENY }; enum class PeerPostPollStatus { From 0e828eb21f325f189642a47678a6d3ef562f9495 Mon Sep 17 00:00:00 2001 From: Tyler Karaszewski Date: Wed, 4 Dec 2024 17:01:06 -0800 Subject: [PATCH 022/127] Comment cleanup --- sqlitecluster/SQLiteNode.cpp | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) diff --git a/sqlitecluster/SQLiteNode.cpp b/sqlitecluster/SQLiteNode.cpp index 7374ac0f3..7f779d43d 100644 --- a/sqlitecluster/SQLiteNode.cpp +++ b/sqlitecluster/SQLiteNode.cpp @@ -56,6 +56,7 @@ // dbCountAtStart: The highest committed transaction in the DB at the start of this transaction on leader, for // optimizing replication. +// NOTE: This comment as well as NODE_LOGIN should be removed after https://github.com/Expensify/Bedrock/pull/1999 is deployed. // On LOGIN vs NODE_LOGIN. // _onConnect sends a LOGIN message. // _onConnect is called in exctly two places: @@ -68,7 +69,6 @@ // 3. LOGIN // // When we receive a NODE_LOGIN, we immediately respond with a PING followed by a LOGIN (by calling _onConnect). -// We can cobine all of these into a single login message. #undef SLOGPREFIX #define SLOGPREFIX "{" << _name << "/" << SQLiteNode::stateName(_state) << "} " @@ -2902,4 +2902,4 @@ void SQLiteNode::_dieIfForkedFromCluster() { if (forkedCount > ((_peerList.size() + 1) / 2)) { SERROR("I have forked from over half the cluster (" << forkedCount << " nodes). This is unrecoverable." << _getLostQuorumLogMessage()); } -} \ No newline at end of file +} From 2d6b73adc2ca09cc8fe42249a826584da282ddb5 Mon Sep 17 00:00:00 2001 From: Tyler Karaszewski Date: Wed, 4 Dec 2024 17:19:39 -0800 Subject: [PATCH 023/127] Standardize use of _sendToPeer --- sqlitecluster/SQLiteNode.cpp | 22 +++++++++++----------- 1 file changed, 11 insertions(+), 11 deletions(-) diff --git a/sqlitecluster/SQLiteNode.cpp b/sqlitecluster/SQLiteNode.cpp index 7f779d43d..d6c03bb70 100644 --- a/sqlitecluster/SQLiteNode.cpp +++ b/sqlitecluster/SQLiteNode.cpp @@ -1277,7 +1277,7 @@ void SQLiteNode::_onMESSAGE(SQLitePeer* peer, const SData& message) { SINFO("Received PING from peer '" << peer->name << "'. Sending PONG."); SData pong("PONG"); pong["Timestamp"] = message["Timestamp"]; - peer->sendMessage(pong.serialize()); + peer->sendMessage(pong); return; } else if (SIEquals(message.methodLine, "PONG")) { // Latency must be > 0 because we treat 0 as "not connected". @@ -1531,7 +1531,7 @@ void SQLiteNode::_onMESSAGE(SQLitePeer* peer, const SData& message) { SINFO("Asked to help SYNCHRONIZE but shutting down."); SData response("SYNCHRONIZE_RESPONSE"); response["ShuttingDown"] = "true"; - peer->sendMessage(response); + _sendToPeer(peer, response); } else { _pendingSynchronizeResponses++; static atomic synchronizeCount(0); @@ -1546,13 +1546,13 @@ void SQLiteNode::_onMESSAGE(SQLitePeer* peer, const SData& message) { // The following two lines are copied from `_sendToPeer`. response["CommitCount"] = to_string(db.getCommitCount()); response["Hash"] = db.getCommittedHash(); - peer->sendMessage(response); + _sendToPeer(peer, response); } catch (const SException& e) { // This is the same handling as at the bottom of _onMESSAGE. PWARN("Error processing message '" << message.methodLine << "' (" << e.what() << "), reconnecting."); SData reconnect("RECONNECT"); reconnect["Reason"] = e.what(); - peer->sendMessage(reconnect.serialize()); + _sendToPeer(peer, reconnect); peer->shutdownSocket(); } @@ -1796,7 +1796,7 @@ void SQLiteNode::_onMESSAGE(SQLitePeer* peer, const SData& message) { PWARN("Error processing message '" << message.methodLine << "' (" << e.what() << "), reconnecting."); SData reconnect("RECONNECT"); reconnect["Reason"] = e.what(); - peer->sendMessage(reconnect.serialize()); + _sendToPeer(peer, reconnect); peer->shutdownSocket(); } } @@ -1920,11 +1920,11 @@ void SQLiteNode::_sendToPeer(SQLitePeer* peer, const SData& message) { if (peer->forked) { PINFO("Skipping message " << message.methodLine << " to forked peer."); } - peer->sendMessage(_addPeerHeaders(message).serialize()); + peer->sendMessage(_addPeerHeaders(message)); } void SQLiteNode::_sendToAllPeers(const SData& message, bool subscribedOnly) { - const string serializedMessage = _addPeerHeaders(message).serialize(); + const SData messageWithHeaders = _addPeerHeaders(message); // Loop across all connected peers and send the message. _peerList is const so this is thread-safe. for (auto peer : _peerList) { @@ -1934,7 +1934,7 @@ void SQLiteNode::_sendToAllPeers(const SData& message, bool subscribedOnly) { // This check is strictly thread-safe, as SQLitePeer::subscribed is atomic, but there's still a race condition // around checking subscribed and then sending, as subscribed could technically change. if (!subscribedOnly || peer->subscribed) { - peer->sendMessage(serializedMessage); + peer->sendMessage(messageWithHeaders); } } } @@ -2667,7 +2667,7 @@ void SQLiteNode::postPoll(fd_map& fdm, uint64_t& nextActivity) { // When NODE_LOGIN is deprecated, we can remove the next 3 lines. SData login("NODE_LOGIN"); login["Name"] = _name; - peer->sendMessage(login.serialize()); + _sendToPeer(peer, login); _onConnect(peer); _sendPING(peer); } @@ -2676,7 +2676,7 @@ void SQLiteNode::postPoll(fd_map& fdm, uint64_t& nextActivity) { { SData reconnect("RECONNECT"); reconnect["Reason"] = "socket error"; - peer->sendMessage(reconnect.serialize()); + _sendToPeer(peer, reconnect); peer->shutdownSocket(); } break; @@ -2734,7 +2734,7 @@ void SQLiteNode::_sendPING(SQLitePeer* peer) { SASSERT(peer); SData ping("PING"); ping["Timestamp"] = SToStr(STimeNow()); - peer->sendMessage(ping.serialize()); + peer->sendMessage(ping); } SQLitePeer* SQLiteNode::getPeerByName(const string& name) const { From 859ba9f500361ef02dbb79505d0abf1f6f1333b1 Mon Sep 17 00:00:00 2001 From: Tyler Karaszewski Date: Wed, 4 Dec 2024 18:37:00 -0800 Subject: [PATCH 024/127] Update sqlitecluster/SQLiteNode.cpp Co-authored-by: Carlos Alvarez --- sqlitecluster/SQLiteNode.cpp | 1 + 1 file changed, 1 insertion(+) diff --git a/sqlitecluster/SQLiteNode.cpp b/sqlitecluster/SQLiteNode.cpp index d6c03bb70..38f676c10 100644 --- a/sqlitecluster/SQLiteNode.cpp +++ b/sqlitecluster/SQLiteNode.cpp @@ -1930,6 +1930,7 @@ void SQLiteNode::_sendToAllPeers(const SData& message, bool subscribedOnly) { for (auto peer : _peerList) { if (peer->forked) { PINFO("Skipping message " << message.methodLine << " to forked peer."); + continue; } // This check is strictly thread-safe, as SQLitePeer::subscribed is atomic, but there's still a race condition // around checking subscribed and then sending, as subscribed could technically change. From 12cdd008b05440e883e8dbde799e9411446242f6 Mon Sep 17 00:00:00 2001 From: Tyler Karaszewski Date: Wed, 4 Dec 2024 18:37:25 -0800 Subject: [PATCH 025/127] Update sqlitecluster/SQLiteNode.cpp Co-authored-by: Carlos Alvarez --- sqlitecluster/SQLiteNode.cpp | 1 + 1 file changed, 1 insertion(+) diff --git a/sqlitecluster/SQLiteNode.cpp b/sqlitecluster/SQLiteNode.cpp index 38f676c10..a26c84f71 100644 --- a/sqlitecluster/SQLiteNode.cpp +++ b/sqlitecluster/SQLiteNode.cpp @@ -1919,6 +1919,7 @@ void SQLiteNode::_sendToPeer(SQLitePeer* peer, const SData& message) { // const and will exist without changing until destruction. if (peer->forked) { PINFO("Skipping message " << message.methodLine << " to forked peer."); + return; } peer->sendMessage(_addPeerHeaders(message)); } From d8c495fe8bdb16dae2226d3baa0dd1229226e339 Mon Sep 17 00:00:00 2001 From: Tyler Karaszewski Date: Thu, 5 Dec 2024 09:42:34 -0800 Subject: [PATCH 026/127] Refactor test to make multiple tests easier to run --- test/clustertest/tests/ForkCheckTest.cpp | 61 ++++++++++++++---------- 1 file changed, 35 insertions(+), 26 deletions(-) diff --git a/test/clustertest/tests/ForkCheckTest.cpp b/test/clustertest/tests/ForkCheckTest.cpp index 330c8735f..631df5264 100644 --- a/test/clustertest/tests/ForkCheckTest.cpp +++ b/test/clustertest/tests/ForkCheckTest.cpp @@ -1,3 +1,4 @@ +#include "test/lib/BedrockTester.h" #include #include @@ -7,7 +8,9 @@ struct ForkCheckTest : tpunit::TestFixture { ForkCheckTest() - : tpunit::TestFixture("ForkCheck", TEST(ForkCheckTest::test)) {} + : tpunit::TestFixture("ForkCheck", + TEST(ForkCheckTest::forkAtShutDown), + TEST(ForkCheckTest::forkAtCrash)) {} pair getMaxJournalCommit(BedrockTester& tester, bool online = true) { SQResult journals; @@ -30,40 +33,42 @@ struct ForkCheckTest : tpunit::TestFixture { return make_pair(maxJournalCommit, maxJournalTable); } - void test() { - // Create a cluster, wait for it to come up. - BedrockClusterTester tester(ClusterSize::FIVE_NODE_CLUSTER); - - // We'll tell the threads to stop when they're done. - atomic stop(false); - - // We want to not spam a stopped leader. - atomic leaderIsUp(true); - + vector createThreads(size_t num, BedrockClusterTester& tester, atomic& stop, atomic& leaderIsUp) { // Just use a bunch of copies of the same command. - SData spamCommand("idcollision"); - - // In a vector. - const vector commands(100, spamCommand); - - // Now create 9 threads spamming 100 commands at a time, each. 9 cause we have three nodes. vector threads; - for (size_t i = 0; i < 9; i++) { - threads.emplace_back([&tester, i, &commands, &stop, &leaderIsUp](){ + for (size_t num = 0; num < 9; num++) { + threads.emplace_back([&tester, num, &stop, &leaderIsUp](){ + const vector commands(100, SData("idcollision")); while (!stop) { // Pick a tester, send, don't care about the result. - size_t testerNum = i % 3; + size_t testerNum = num % 5; if (testerNum == 0 && !leaderIsUp) { - // If we're looking for leader and it's down, wait a second to avoid pegging the CPU. - sleep(1); - } else { - // If we're not leader or leader is up, spam away! - tester.getTester(testerNum).executeWaitMultipleData(commands); + // If leader's off, don't use it. + testerNum = 1; } + tester.getTester(testerNum).executeWaitMultipleData(commands); } }); } + return threads; + } + + // This primary test here checks that a node that is forked will not be able to rejoin the cluster when reconnecting. + // This is a reasonable test for a fork that happens at shutdown. + void forkAtShutDown() { + // Create a cluster, wait for it to come up. + BedrockClusterTester tester(ClusterSize::FIVE_NODE_CLUSTER); + + // We'll tell the threads to stop when they're done. + atomic stop(false); + + // We want to not spam a stopped leader. + atomic leaderIsUp(true); + + // Now create 15 threads spamming 100 commands at a time, each. 15 cause we have five nodes. + vector threads = createThreads(15, tester, stop, leaderIsUp); + // Let them spam for a second. sleep(1); @@ -71,7 +76,7 @@ struct ForkCheckTest : tpunit::TestFixture { leaderIsUp = false; tester.getTester(0).stopServer(); - // Spam a few more commands and then we can stop. + // Spam a few more commands so thar the follower is ahead of the stopped leader, and then we can stop. sleep(1); stop = true; for (auto& t : threads) { @@ -125,4 +130,8 @@ struct ForkCheckTest : tpunit::TestFixture { // to wait for this timeout. tester.getTester(0).stopServer(); } + + void forkAtCrash() { + + } } __ForkCheckTest; From ac2aa66647d2fe6c331393bf2058ae62935bb2e9 Mon Sep 17 00:00:00 2001 From: Tyler Karaszewski Date: Thu, 5 Dec 2024 10:05:00 -0800 Subject: [PATCH 027/127] Remove second test --- test/clustertest/tests/ForkCheckTest.cpp | 10 +++------- 1 file changed, 3 insertions(+), 7 deletions(-) diff --git a/test/clustertest/tests/ForkCheckTest.cpp b/test/clustertest/tests/ForkCheckTest.cpp index 631df5264..835cc76cd 100644 --- a/test/clustertest/tests/ForkCheckTest.cpp +++ b/test/clustertest/tests/ForkCheckTest.cpp @@ -4,13 +4,13 @@ #include #include #include +#include #include struct ForkCheckTest : tpunit::TestFixture { ForkCheckTest() : tpunit::TestFixture("ForkCheck", - TEST(ForkCheckTest::forkAtShutDown), - TEST(ForkCheckTest::forkAtCrash)) {} + TEST(ForkCheckTest::forkAtShutDown)) {} pair getMaxJournalCommit(BedrockTester& tester, bool online = true) { SQResult journals; @@ -66,7 +66,7 @@ struct ForkCheckTest : tpunit::TestFixture { // We want to not spam a stopped leader. atomic leaderIsUp(true); - // Now create 15 threads spamming 100 commands at a time, each. 15 cause we have five nodes. + // Now create 15 threads spamming 100 commands at a time, each. 15 because we have five nodes. vector threads = createThreads(15, tester, stop, leaderIsUp); // Let them spam for a second. @@ -130,8 +130,4 @@ struct ForkCheckTest : tpunit::TestFixture { // to wait for this timeout. tester.getTester(0).stopServer(); } - - void forkAtCrash() { - - } } __ForkCheckTest; From b74c92c8ee3af9d4a879fd8361c88d40bbaae458 Mon Sep 17 00:00:00 2001 From: Florent De Neve Date: Thu, 5 Dec 2024 16:16:25 -0400 Subject: [PATCH 028/127] Update SQLite with more logs --- libstuff/sqlite3.c | 103 +++++++++++++++++++++++++++++++++++++++------ libstuff/sqlite3.h | 2 +- 2 files changed, 90 insertions(+), 15 deletions(-) diff --git a/libstuff/sqlite3.c b/libstuff/sqlite3.c index 8084ff4a9..e95e639a2 100644 --- a/libstuff/sqlite3.c +++ b/libstuff/sqlite3.c @@ -18,7 +18,7 @@ ** separate file. This file contains only code for the core SQLite library. ** ** The content in this amalgamation comes from Fossil check-in -** 853f9cf453d13cf826443b0d27331e1f4e9e. +** 5f9f6764e9dffef60213bbc9604940ddfc71. */ #define SQLITE_CORE 1 #define SQLITE_AMALGAMATION 1 @@ -465,7 +465,7 @@ extern "C" { */ #define SQLITE_VERSION "3.47.0" #define SQLITE_VERSION_NUMBER 3047000 -#define SQLITE_SOURCE_ID "2024-11-20 20:26:59 853f9cf453d13cf826443b0d27331e1f4e9e06f9f4ce674f22ccadad86e20937" +#define SQLITE_SOURCE_ID "2024-12-05 19:45:14 5f9f6764e9dffef60213bbc9604940ddfc713436333c3f62ed8a090697fcbb1e" /* ** CAPI3REF: Run-Time Library Version Numbers @@ -18229,10 +18229,19 @@ struct sqlite3 { #define SCHEMA_TIME_BEFORE_PREPARE 8 #define SCHEMA_TIME_BEFORE_FINALIZE 9 #define SCHEMA_TIME_BEGIN_ANALYZE_LOAD 10 -#define SCHEMA_TIME_END_ANALYZE_LOAD 11 -#define SCHEMA_TIME_FINISH 12 -#define SCHEMA_TIME_N 13 +#define SCHEMA_TIME_AFTER_CLEAR_STATS 11 +#define SCHEMA_TIME_AFTER_STAT1 12 +#define SCHEMA_TIME_AFTER_DEFAULTS 13 + +#define SCHEMA_TIME_AFTER_STAT4_Q1 14 +#define SCHEMA_TIME_AFTER_STAT4_Q2 15 +#define SCHEMA_TIME_AFTER_STAT4 16 + +#define SCHEMA_TIME_END_ANALYZE_LOAD 17 +#define SCHEMA_TIME_FINISH 18 + +#define SCHEMA_TIME_N 19 #define SCHEMA_TIME_TIMEOUT (2 * 1000 * 1000) @@ -67552,6 +67561,25 @@ static void walCleanupHash(Wal *pWal){ #endif /* SQLITE_ENABLE_EXPENSIVE_ASSERT */ } +/* +** Zero the n byte block indicated by pointer p. n Must be a multiple of +** 8, and p must be aligned to an 8-byte boundary. +*/ +static void zero64(void *p, int n){ + size_t c = n / sizeof(u64); + void *d = p; + + assert( (n & 0x7)==0 ); + assert( EIGHT_BYTE_ALIGNMENT(p) ); + + __asm__ volatile ( + "rep stosq" + : "+D" (d), "+c" (c) + : "a" (0) + : "memory" + ); +} + /* ** Set an entry in the wal-index that will map database page number ** pPage into WAL frame iFrame. @@ -67590,10 +67618,10 @@ static int walIndexAppend(Wal *pWal, int iWal, u32 iFrame, u32 iPage){ ** entire hash table and aPgno[] array before proceeding. */ if( pWal->aCommitTime ) t = sqlite3STimeNow(); - if( idx==1 ){ + if( idx==1 && sLoc.aPgno[0]!=0 ){ int nByte = (int)((u8*)&sLoc.aHash[HASHTABLE_NSLOT] - (u8*)sLoc.aPgno); - assert( nByte>=0 ); - memset((void*)sLoc.aPgno, 0, nByte); + assert( nByte>=0 && (nByte & 0x07)==0 ); + zero64((void*)sLoc.aPgno, nByte); } if( pWal->aCommitTime ){ pWal->aCommitTime[COMMIT_TIME_WALINDEX_MEMSETUS]+=sqlite3STimeNow()-t; @@ -67763,6 +67791,7 @@ static int walIndexRecoverOne(Wal *pWal, int iWal, u32 *pnCkpt, int *pbZero){ if( aShare==0 ) break; SEH_SET_ON_ERROR(iPg, aShare); pWal->apWiData[iPg] = aPrivate; + memset(aPrivate, 0, WALINDEX_PGSZ); if( iWal ){ assert( version==WAL_VERSION2 ); @@ -68891,6 +68920,39 @@ static int walCheckpoint( } } + if( bWal2 && rc==SQLITE_OK && eMode!=SQLITE_CHECKPOINT_PASSIVE ){ + /* In wal2 mode, a non-passive checkpoint waits for all readers of + ** the wal file just checkpointed to finish, then zeroes the hash + ** tables associated with that wal file. This is because in some + ** deployments, zeroing the hash tables as they are overwritten within + ** COMMIT commands is a significant performance hit. + ** + ** Currently, both of the "PART" locks are held for the wal file + ** being checkpointed. i.e. if iCkpt==0, then we already hold both + ** WAL_LOCK_PART1 and WAL_LOCK_PART1_FULL2. If we now also take an + ** exclusive lock on WAL_LOCK_PART2_FULL1, then it is guaranteed that + ** there are no remaining readers of the (iCkpt==0) wal file. Similar + ** logic, with different locks, is used for (iCkpt==1). + */ + int lockIdx = WAL_READ_LOCK( + iCkpt==0 ? WAL_LOCK_PART2_FULL1 : WAL_LOCK_PART1_FULL2 + ); + assert( iCkpt==0 || iCkpt==1 ); + rc = walBusyLock(pWal, xBusy, pBusyArg, lockIdx, 1); + if( rc==SQLITE_OK ){ + int iHash; + for(iHash = walFramePage2(iCkpt, mxSafeFrame); iHash>=0; iHash-=2){ + WalHashLoc sLoc; + int nByte; + memset(&sLoc, 0, sizeof(sLoc)); + walHashGet(pWal, iHash, &sLoc); + nByte = (int)((u8*)&sLoc.aHash[HASHTABLE_NSLOT] - (u8*)sLoc.aPgno); + memset((void*)sLoc.aPgno, 0, nByte); + } + walUnlockExclusive(pWal, lockIdx, 1); + } + } + if( rc==SQLITE_BUSY ){ /* Reset the return code so as not to report a checkpoint failure ** just because there are active readers. */ @@ -93407,7 +93469,7 @@ SQLITE_PRIVATE void sqlite3CommitTimeLog(u64 *aCommit){ } zStr = sqlite3_mprintf("%z%s%s%d%s", zStr, (zStr?", ":""),zHash,iVal,zU); } - sqlite3_log(SQLITE_WARNING, "slow commit (v=12): (%s)", zStr); + sqlite3_log(SQLITE_WARNING, "slow commit (v=15): (%s)", zStr); sqlite3_free(zStr); } } @@ -93435,7 +93497,7 @@ SQLITE_PRIVATE void sqlite3PrepareTimeLog(const char *zSql, int nSql, u64 *aPrep } if( nByte<0 ){ nByte = sqlite3Strlen30(zSql); } sqlite3_log(SQLITE_WARNING, - "slow prepare (v=12): (%s) [%.*s]", zStr, nByte, zSql + "slow prepare (v=15): (%s) [%.*s]", zStr, nByte, zSql ); sqlite3_free(zStr); } @@ -93451,7 +93513,7 @@ SQLITE_PRIVATE void sqlite3SchemaTimeLog(u64 *aSchema){ (aSchema[ii]==0 ? 0 : (int)(aSchema[ii] - i1)) ); } - sqlite3_log(SQLITE_WARNING, "slow schema (v=12): (%s)", zStr); + sqlite3_log(SQLITE_WARNING, "slow schema (v=15): (%s)", zStr); sqlite3_free(zStr); } } @@ -100277,7 +100339,9 @@ case OP_AutoCommit: { u64 aCommit[COMMIT_TIME_N]; memset(aCommit, 0, sizeof(aCommit)); - sqlite3CommitTimeSet(aCommit, COMMIT_TIME_START); + if( iRollback==0 ){ + sqlite3CommitTimeSet(aCommit, COMMIT_TIME_START); + } if( iRollback ){ assert( desiredAutoCommit==1 ); @@ -100324,7 +100388,7 @@ case OP_AutoCommit: { rc = SQLITE_ERROR; } sqlite3CommitTimeSet(aCommit, COMMIT_TIME_FINISH); - if( desiredAutoCommit ) sqlite3CommitTimeLog(aCommit); + if( desiredAutoCommit && !iRollback ) sqlite3CommitTimeLog(aCommit); goto vdbe_return; }else{ sqlite3VdbeError(p, @@ -123783,6 +123847,8 @@ static int loadStatTbl( sqlite3DbFree(db, zSql); if( rc ) return rc; + sqlite3PrepareTimeSet(db->aSchemaTime, SCHEMA_TIME_AFTER_STAT4_Q1); + while( sqlite3_step(pStmt)==SQLITE_ROW ){ char *zIndex; /* Index name */ Index *pIdx; /* Pointer to the index object */ @@ -123828,6 +123894,7 @@ static int loadStatTbl( pIdx->nSample++; } rc = sqlite3_finalize(pStmt); + sqlite3PrepareTimeSet(db->aSchemaTime, SCHEMA_TIME_AFTER_STAT4_Q2); if( rc==SQLITE_OK ) initAvgEq(pPrevIdx); return rc; } @@ -123901,6 +123968,8 @@ SQLITE_PRIVATE int sqlite3AnalysisLoad(sqlite3 *db, int iDb){ #endif } + sqlite3PrepareTimeSet(db->aSchemaTime, SCHEMA_TIME_AFTER_CLEAR_STATS); + /* Load new statistics out of the sqlite_stat1 table */ sInfo.db = db; sInfo.zDatabase = db->aDb[iDb].zDbSName; @@ -123917,6 +123986,8 @@ SQLITE_PRIVATE int sqlite3AnalysisLoad(sqlite3 *db, int iDb){ } } + sqlite3PrepareTimeSet(db->aSchemaTime, SCHEMA_TIME_AFTER_STAT1); + /* Set appropriate defaults on all indexes not in the sqlite_stat1 table */ assert( sqlite3SchemaMutexHeld(db, iDb, 0) ); for(i=sqliteHashFirst(&pSchema->idxHash); i; i=sqliteHashNext(i)){ @@ -123924,6 +123995,8 @@ SQLITE_PRIVATE int sqlite3AnalysisLoad(sqlite3 *db, int iDb){ if( !pIdx->hasStat1 ) sqlite3DefaultRowEst(pIdx); } + sqlite3PrepareTimeSet(db->aSchemaTime, SCHEMA_TIME_AFTER_DEFAULTS); + /* Load the statistics from the sqlite_stat4 table. */ #ifdef SQLITE_ENABLE_STAT4 if( rc==SQLITE_OK ){ @@ -123938,6 +124011,8 @@ SQLITE_PRIVATE int sqlite3AnalysisLoad(sqlite3 *db, int iDb){ } #endif + sqlite3PrepareTimeSet(db->aSchemaTime, SCHEMA_TIME_AFTER_STAT4); + if( rc==SQLITE_NOMEM ){ sqlite3OomFault(db); } @@ -257829,7 +257904,7 @@ static void fts5SourceIdFunc( ){ assert( nArg==0 ); UNUSED_PARAM2(nArg, apUnused); - sqlite3_result_text(pCtx, "fts5: 2024-11-20 20:26:59 853f9cf453d13cf826443b0d27331e1f4e9e06f9f4ce674f22ccadad86e20937", -1, SQLITE_TRANSIENT); + sqlite3_result_text(pCtx, "fts5: 2024-12-05 19:45:14 5f9f6764e9dffef60213bbc9604940ddfc713436333c3f62ed8a090697fcbb1e", -1, SQLITE_TRANSIENT); } /* diff --git a/libstuff/sqlite3.h b/libstuff/sqlite3.h index 7d8654612..6287d400b 100644 --- a/libstuff/sqlite3.h +++ b/libstuff/sqlite3.h @@ -148,7 +148,7 @@ extern "C" { */ #define SQLITE_VERSION "3.47.0" #define SQLITE_VERSION_NUMBER 3047000 -#define SQLITE_SOURCE_ID "2024-11-20 20:26:59 853f9cf453d13cf826443b0d27331e1f4e9e06f9f4ce674f22ccadad86e20937" +#define SQLITE_SOURCE_ID "2024-12-05 19:45:14 5f9f6764e9dffef60213bbc9604940ddfc713436333c3f62ed8a090697fcbb1e" /* ** CAPI3REF: Run-Time Library Version Numbers From e6774554997c399f2626d3e5f417de3c8d382c71 Mon Sep 17 00:00:00 2001 From: Tyler Karaszewski Date: Thu, 5 Dec 2024 13:24:01 -0800 Subject: [PATCH 029/127] Fix last instance of _forkedFrom --- sqlitecluster/SQLiteNode.cpp | 8 +++++--- 1 file changed, 5 insertions(+), 3 deletions(-) diff --git a/sqlitecluster/SQLiteNode.cpp b/sqlitecluster/SQLiteNode.cpp index a26c84f71..5042e2e3e 100644 --- a/sqlitecluster/SQLiteNode.cpp +++ b/sqlitecluster/SQLiteNode.cpp @@ -2027,15 +2027,17 @@ void SQLiteNode::_changeState(SQLiteNodeState newState, uint64_t commitIDToCance _db.setCommitEnabled(true); } -#if 0 // If we're going searching and have forked from at least 1 peer, sleep for a second. This is intended to prevent thousands of lines of log spam when this happens in an infinite // loop. It's entirely possible that we do this for valid reasons - it may be the peer that has the bad database and not us, and there are plenty of other reasons we could switch to // SEARCHING, but in those cases, we just wait an extra second before trying again. - if (newState == SQLiteNodeState::SEARCHING && _forkedFrom.size()) { + bool forkedPeers = false; + for (const auto p : _peerList) { + forkedPeers = forkedPeers || p->forked; + } + if (newState == SQLiteNodeState::SEARCHING && forkedPeers) { SWARN("Going searching while forked peers present, sleeping 1 second." << _getLostQuorumLogMessage()); sleep(1); } -#endif // Additional logic for some new states if (newState == SQLiteNodeState::LEADING) { From 91b83737a1a6db078b562d9dea155d7c179a5698 Mon Sep 17 00:00:00 2001 From: Tyler Karaszewski Date: Thu, 5 Dec 2024 13:37:07 -0800 Subject: [PATCH 030/127] Remove MAX_PEER_FALL_BEHIND --- BedrockServer.cpp | 15 -------- sqlitecluster/SQLiteNode.cpp | 71 ------------------------------------ sqlitecluster/SQLiteNode.h | 9 ----- 3 files changed, 95 deletions(-) diff --git a/BedrockServer.cpp b/BedrockServer.cpp index ea610213e..55bb30ae6 100644 --- a/BedrockServer.cpp +++ b/BedrockServer.cpp @@ -1799,7 +1799,6 @@ bool BedrockServer::_isControlCommand(const unique_ptr& command) SIEquals(command->request.methodLine, "EnableSQLTracing") || SIEquals(command->request.methodLine, "BlockWrites") || SIEquals(command->request.methodLine, "UnblockWrites") || - SIEquals(command->request.methodLine, "SetMaxPeerFallBehind") || SIEquals(command->request.methodLine, "SetMaxSocketThreads") || SIEquals(command->request.methodLine, "CRASH_COMMAND") ) { @@ -1954,20 +1953,6 @@ void BedrockServer::_control(unique_ptr& command) { } else { response.methodLine = "401 Don't Use Zero"; } - } else if (SIEquals(command->request.methodLine, "SetMaxPeerFallBehind")) { - // Look up the existing value so we can report what it was. - uint64_t existingValue = SQLiteNode::MAX_PEER_FALL_BEHIND; - response["previousValue"] = to_string(existingValue); - - uint64_t newValue = command->request.calcU64("value"); - if (newValue < SQLiteNode::MIN_APPROVE_FREQUENCY) { - // We won't break everything on purpose. This can be used to check the existing value without changing anything by passing `0`. - response.methodLine = "400 Refusing to set peer fall behind below " + to_string(SQLiteNode::MIN_APPROVE_FREQUENCY); - } else { - // Set the new value and return 200 OK. - SQLiteNode::MAX_PEER_FALL_BEHIND = newValue; - response["previousValue"] = to_string(existingValue); - } } } diff --git a/sqlitecluster/SQLiteNode.cpp b/sqlitecluster/SQLiteNode.cpp index 79be57fed..41b6d06df 100644 --- a/sqlitecluster/SQLiteNode.cpp +++ b/sqlitecluster/SQLiteNode.cpp @@ -64,10 +64,6 @@ SQLiteNode* SQLiteNode::KILLABLE_SQLITE_NODE{0}; // Initializations for static vars. const uint64_t SQLiteNode::RECV_TIMEOUT{STIME_US_PER_S * 30}; -// Setting this to 10 or lower may deadlock the server, as followers are only guaranteed to respond to every 10th message. -// If the threshold for blocking commits is less than 10, we may block, but never receive a message indicating that we should unblock. -atomic SQLiteNode::MAX_PEER_FALL_BEHIND{1000}; - const string SQLiteNode::CONSISTENCY_LEVEL_NAMES[] = {"ASYNC", "ONE", "QUORUM"}; @@ -110,17 +106,6 @@ const vector SQLiteNode::_initPeers(const string& peerListString) { return peerList; } -size_t SQLiteNode::_initQuorumSize(const vector& _peerList, const int priority) { - // We will start with one node required for quorum unless we're a permafollower, in which case, we'll start with 0 to exclude ourself. - size_t result{priority ? 1ul : 0ul}; - for (const auto& p : _peerList) { - if (!p->permaFollower) { - ++result; - } - } - return result; -} - SQLiteNode::SQLiteNode(SQLiteServer& server, shared_ptr dbPool, const string& name, const string& host, const string& peerList, int priority, uint64_t firstTimeout, const string& version, const string& commandPort) @@ -129,7 +114,6 @@ SQLiteNode::SQLiteNode(SQLiteServer& server, shared_ptr dbPool, cons _name(name), _peerList(_initPeers(peerList)), _originalPriority(priority), - _quorumSize(_initQuorumSize(_peerList, _originalPriority)), _port(host.empty() ? nullptr : openPort(host, 30)), _version(version), _commitState(CommitState::UNINITIALIZED), @@ -1298,41 +1282,6 @@ void SQLiteNode::_onMESSAGE(SQLitePeer* peer, const SData& message) { } peer->setCommit(message.calcU64("CommitCount"), message["Hash"]); - // If we're leading, see if this peer meets the definition of "up-to-date", which is to say, it's close enough to in-sync with us. - // We can skip checking if the peer is a permafollower, because we don't care about his state. - if (!peer->permaFollower && _state == SQLiteNodeState::LEADING) { - if (peer->commitCount + MAX_PEER_FALL_BEHIND > getCommitCount()) { - _upToDatePeers.insert(peer); - } else { - _upToDatePeers.erase(peer); - } - - // Example - // Quorum size 3: - // We have 1 up-to-date peer. - // 1 >= 3/2 - // Integer division, so 3/2 = 1. - // 1 >= 1. - // quorumUpToDate = true - bool quorumUpToDate = _upToDatePeers.size() >= (_quorumSize / 2); - - if (quorumUpToDate && _commitsBlocked) { - _commitsBlocked = false; - SINFO("[clustersync] Cluster is no longer behind by over " << MAX_PEER_FALL_BEHIND << " commits. Unblocking new commits."); - _db.exclusiveUnlockDB(); - } else if (!quorumUpToDate && !_commitsBlocked && !_db.insideTransaction()) { - _commitsBlocked = true; - uint64_t myCommitCount = getCommitCount(); - SWARN("[clustersync] Cluster is behind by over " << MAX_PEER_FALL_BEHIND << " commits. New commits blocked until the cluster catches up."); - uint64_t start = STimeNow(); - _db.exclusiveLockDB(); - SINFO("[clustersync] Took " << (STimeNow() - start) << "us to block commits. Dumping cluster commit state. I have commit: " << myCommitCount); - for (const auto& p : _peerList) { - SINFO("[clustersync] Peer " << p->name << " has commit " << p->commitCount << ", behind by: " << (myCommitCount - p->commitCount)); - } - } - } - // Classify and process the message if (SIEquals(message.methodLine, "LOGIN")) { // LOGIN: This is the first message sent to and received from a new peer. It communicates the current state of @@ -1993,14 +1942,6 @@ void SQLiteNode::_changeState(SQLiteNodeState newState, uint64_t commitIDToCance _db.popCommittedTransactions(); _lastSentTransactionID = _db.getCommitCount(); } - - // Mark peers that are up-to-date so we have a valid starting state. - _upToDatePeers.clear(); - for (const auto& peer : _peerList) { - if (!peer->permaFollower && (peer->commitCount + MAX_PEER_FALL_BEHIND > getCommitCount())) { - _upToDatePeers.insert(peer); - } - } } else if (newState == SQLiteNodeState::STANDINGDOWN) { // start the timeout countdown. _standDownTimeout.alarmDuration = STIME_US_PER_S * 30; // 30s timeout before we give up @@ -2015,18 +1956,6 @@ void SQLiteNode::_changeState(SQLiteNodeState newState, uint64_t commitIDToCance } } - // If we've blocked commits, unblock before switching states. This implies we *were* leading and now are not, - // so commits remaining blocked doesn't really make sense any more anyway, except in the case where we're switching - // from LEADING to STANDINGDOWN in which case we *could* keep this blocked, though that'd be weird, too. We'd - // need to wait around with commits blocked until the cluster caught up, so that we could really start shutting down, which - // stops processing new commands anyway. We might as well just run through whatever's waiting. - // But also, there's another reason to do this even in the LEADING->STANDINGDOWN case, and that's because the locks acquired in - // exclusiveLockDB() are not recursive, so we need to release them before we call `exclusiveLockDB` again just after this `if` block. - if (_commitsBlocked) { - _commitsBlocked = false; - _db.exclusiveUnlockDB(); - } - // IMPORTANT: Don't return early or throw from this method after here. // Note: _stateMutex is already locked here (by update, _replicate, or postPoll). _db.exclusiveLockDB(); diff --git a/sqlitecluster/SQLiteNode.h b/sqlitecluster/SQLiteNode.h index 7c14667d4..652f157f0 100644 --- a/sqlitecluster/SQLiteNode.h +++ b/sqlitecluster/SQLiteNode.h @@ -90,9 +90,6 @@ class SQLiteNode : public STCPManager { // This is expressed as "every Nth message", where e.g., if MIN_APPROVE_FREQUENCY is 10, we will respond to at least every 10th BEGIN_TRANSACTION message. static const size_t MIN_APPROVE_FREQUENCY; - // The maximum number of commits behind we'll allow a quorum number of peers to be before we block commits on leader. - static atomic MAX_PEER_FALL_BEHIND; - // Get and SQLiteNode State from it's name. static SQLiteNodeState stateFromName(const string& name); @@ -215,7 +212,6 @@ class SQLiteNode : public STCPManager { static atomic currentReplicateThreadID; static const vector _initPeers(const string& peerList); - static size_t _initQuorumSize(const vector& _peerList, const int priority); // Queue a SYNCHRONIZE message based on the current state of the node, thread-safe, but you need to pass the // *correct* DB for the thread that's making the call (i.e., you can't use the node's internal DB from a worker @@ -288,11 +284,6 @@ class SQLiteNode : public STCPManager { // to make sure it's up-to-date. Store the configured priority here and use "-1" until we're ready to fully join the cluster. const int _originalPriority; - // If we're leading and we're too far ahead of the rest of the cluster, we block new commits. This prevents us from forking too far ahead of everyone else. - const size_t _quorumSize; - bool _commitsBlocked{false}; - set _upToDatePeers; - // A string representing an address (i.e., `127.0.0.1:80`) where this server accepts commands. I.e., "the command port". const unique_ptr _port; From 54cdcbc1ca8e549387338781da27933e9336fab1 Mon Sep 17 00:00:00 2001 From: Tyler Karaszewski Date: Thu, 5 Dec 2024 19:44:30 -0800 Subject: [PATCH 031/127] Code review feedback --- sqlitecluster/SQLiteNode.cpp | 21 +++++++++++++++------ 1 file changed, 15 insertions(+), 6 deletions(-) diff --git a/sqlitecluster/SQLiteNode.cpp b/sqlitecluster/SQLiteNode.cpp index 2bd99cd2f..e7c1a3703 100644 --- a/sqlitecluster/SQLiteNode.cpp +++ b/sqlitecluster/SQLiteNode.cpp @@ -1720,7 +1720,7 @@ void SQLiteNode::_onMESSAGE(SQLitePeer* peer, const SData& message) { } } else if (SIEquals(message.methodLine, "FORKED")) { peer->forked = true; - PINFO("Peer said we're forked, beleiving them."); + PINFO("Peer said we're forked, believing them."); _dieIfForkedFromCluster(); // If leader said we're forked from it, we need a new leader. @@ -2825,14 +2825,23 @@ void SQLiteNode::_sendStandupResponse(SQLitePeer* peer, const SData& message) { } void SQLiteNode::_dieIfForkedFromCluster() { - size_t forkedCount = 0; + size_t quorumNodeCount = 0; + size_t forkedFullPeerCount = 0; for (const auto& p : _peerList) { - if (p->forked) { - forkedCount++; + if (!p->permaFollower) { + forkedFullPeerCount++; + if (p->forked) { + forkedFullPeerCount++; + } } } - if (forkedCount > ((_peerList.size() + 1) / 2)) { - SERROR("I have forked from over half the cluster (" << forkedCount << " nodes). This is unrecoverable." << _getLostQuorumLogMessage()); + // Am *I* a permaFollower? + if (_originalPriority != 0) { + quorumNodeCount++; + } + + if (forkedFullPeerCount >= (quorumNodeCount + 1) / 2) { + SERROR("I have forked from over half the cluster (" << forkedFullPeerCount << " nodes). This is unrecoverable." << _getLostQuorumLogMessage()); } } From 7c06bf595122db6b9ae23cdb20914295c7cd926e Mon Sep 17 00:00:00 2001 From: Tyler Karaszewski Date: Thu, 5 Dec 2024 19:48:43 -0800 Subject: [PATCH 032/127] Revert "Update SQLite with a small optimization + more logs (v15)" --- libstuff/sqlite3.c | 103 ++++++--------------------------------------- libstuff/sqlite3.h | 2 +- 2 files changed, 15 insertions(+), 90 deletions(-) diff --git a/libstuff/sqlite3.c b/libstuff/sqlite3.c index e95e639a2..8084ff4a9 100644 --- a/libstuff/sqlite3.c +++ b/libstuff/sqlite3.c @@ -18,7 +18,7 @@ ** separate file. This file contains only code for the core SQLite library. ** ** The content in this amalgamation comes from Fossil check-in -** 5f9f6764e9dffef60213bbc9604940ddfc71. +** 853f9cf453d13cf826443b0d27331e1f4e9e. */ #define SQLITE_CORE 1 #define SQLITE_AMALGAMATION 1 @@ -465,7 +465,7 @@ extern "C" { */ #define SQLITE_VERSION "3.47.0" #define SQLITE_VERSION_NUMBER 3047000 -#define SQLITE_SOURCE_ID "2024-12-05 19:45:14 5f9f6764e9dffef60213bbc9604940ddfc713436333c3f62ed8a090697fcbb1e" +#define SQLITE_SOURCE_ID "2024-11-20 20:26:59 853f9cf453d13cf826443b0d27331e1f4e9e06f9f4ce674f22ccadad86e20937" /* ** CAPI3REF: Run-Time Library Version Numbers @@ -18229,19 +18229,10 @@ struct sqlite3 { #define SCHEMA_TIME_BEFORE_PREPARE 8 #define SCHEMA_TIME_BEFORE_FINALIZE 9 #define SCHEMA_TIME_BEGIN_ANALYZE_LOAD 10 +#define SCHEMA_TIME_END_ANALYZE_LOAD 11 +#define SCHEMA_TIME_FINISH 12 -#define SCHEMA_TIME_AFTER_CLEAR_STATS 11 -#define SCHEMA_TIME_AFTER_STAT1 12 -#define SCHEMA_TIME_AFTER_DEFAULTS 13 - -#define SCHEMA_TIME_AFTER_STAT4_Q1 14 -#define SCHEMA_TIME_AFTER_STAT4_Q2 15 -#define SCHEMA_TIME_AFTER_STAT4 16 - -#define SCHEMA_TIME_END_ANALYZE_LOAD 17 -#define SCHEMA_TIME_FINISH 18 - -#define SCHEMA_TIME_N 19 +#define SCHEMA_TIME_N 13 #define SCHEMA_TIME_TIMEOUT (2 * 1000 * 1000) @@ -67561,25 +67552,6 @@ static void walCleanupHash(Wal *pWal){ #endif /* SQLITE_ENABLE_EXPENSIVE_ASSERT */ } -/* -** Zero the n byte block indicated by pointer p. n Must be a multiple of -** 8, and p must be aligned to an 8-byte boundary. -*/ -static void zero64(void *p, int n){ - size_t c = n / sizeof(u64); - void *d = p; - - assert( (n & 0x7)==0 ); - assert( EIGHT_BYTE_ALIGNMENT(p) ); - - __asm__ volatile ( - "rep stosq" - : "+D" (d), "+c" (c) - : "a" (0) - : "memory" - ); -} - /* ** Set an entry in the wal-index that will map database page number ** pPage into WAL frame iFrame. @@ -67618,10 +67590,10 @@ static int walIndexAppend(Wal *pWal, int iWal, u32 iFrame, u32 iPage){ ** entire hash table and aPgno[] array before proceeding. */ if( pWal->aCommitTime ) t = sqlite3STimeNow(); - if( idx==1 && sLoc.aPgno[0]!=0 ){ + if( idx==1 ){ int nByte = (int)((u8*)&sLoc.aHash[HASHTABLE_NSLOT] - (u8*)sLoc.aPgno); - assert( nByte>=0 && (nByte & 0x07)==0 ); - zero64((void*)sLoc.aPgno, nByte); + assert( nByte>=0 ); + memset((void*)sLoc.aPgno, 0, nByte); } if( pWal->aCommitTime ){ pWal->aCommitTime[COMMIT_TIME_WALINDEX_MEMSETUS]+=sqlite3STimeNow()-t; @@ -67791,7 +67763,6 @@ static int walIndexRecoverOne(Wal *pWal, int iWal, u32 *pnCkpt, int *pbZero){ if( aShare==0 ) break; SEH_SET_ON_ERROR(iPg, aShare); pWal->apWiData[iPg] = aPrivate; - memset(aPrivate, 0, WALINDEX_PGSZ); if( iWal ){ assert( version==WAL_VERSION2 ); @@ -68920,39 +68891,6 @@ static int walCheckpoint( } } - if( bWal2 && rc==SQLITE_OK && eMode!=SQLITE_CHECKPOINT_PASSIVE ){ - /* In wal2 mode, a non-passive checkpoint waits for all readers of - ** the wal file just checkpointed to finish, then zeroes the hash - ** tables associated with that wal file. This is because in some - ** deployments, zeroing the hash tables as they are overwritten within - ** COMMIT commands is a significant performance hit. - ** - ** Currently, both of the "PART" locks are held for the wal file - ** being checkpointed. i.e. if iCkpt==0, then we already hold both - ** WAL_LOCK_PART1 and WAL_LOCK_PART1_FULL2. If we now also take an - ** exclusive lock on WAL_LOCK_PART2_FULL1, then it is guaranteed that - ** there are no remaining readers of the (iCkpt==0) wal file. Similar - ** logic, with different locks, is used for (iCkpt==1). - */ - int lockIdx = WAL_READ_LOCK( - iCkpt==0 ? WAL_LOCK_PART2_FULL1 : WAL_LOCK_PART1_FULL2 - ); - assert( iCkpt==0 || iCkpt==1 ); - rc = walBusyLock(pWal, xBusy, pBusyArg, lockIdx, 1); - if( rc==SQLITE_OK ){ - int iHash; - for(iHash = walFramePage2(iCkpt, mxSafeFrame); iHash>=0; iHash-=2){ - WalHashLoc sLoc; - int nByte; - memset(&sLoc, 0, sizeof(sLoc)); - walHashGet(pWal, iHash, &sLoc); - nByte = (int)((u8*)&sLoc.aHash[HASHTABLE_NSLOT] - (u8*)sLoc.aPgno); - memset((void*)sLoc.aPgno, 0, nByte); - } - walUnlockExclusive(pWal, lockIdx, 1); - } - } - if( rc==SQLITE_BUSY ){ /* Reset the return code so as not to report a checkpoint failure ** just because there are active readers. */ @@ -93469,7 +93407,7 @@ SQLITE_PRIVATE void sqlite3CommitTimeLog(u64 *aCommit){ } zStr = sqlite3_mprintf("%z%s%s%d%s", zStr, (zStr?", ":""),zHash,iVal,zU); } - sqlite3_log(SQLITE_WARNING, "slow commit (v=15): (%s)", zStr); + sqlite3_log(SQLITE_WARNING, "slow commit (v=12): (%s)", zStr); sqlite3_free(zStr); } } @@ -93497,7 +93435,7 @@ SQLITE_PRIVATE void sqlite3PrepareTimeLog(const char *zSql, int nSql, u64 *aPrep } if( nByte<0 ){ nByte = sqlite3Strlen30(zSql); } sqlite3_log(SQLITE_WARNING, - "slow prepare (v=15): (%s) [%.*s]", zStr, nByte, zSql + "slow prepare (v=12): (%s) [%.*s]", zStr, nByte, zSql ); sqlite3_free(zStr); } @@ -93513,7 +93451,7 @@ SQLITE_PRIVATE void sqlite3SchemaTimeLog(u64 *aSchema){ (aSchema[ii]==0 ? 0 : (int)(aSchema[ii] - i1)) ); } - sqlite3_log(SQLITE_WARNING, "slow schema (v=15): (%s)", zStr); + sqlite3_log(SQLITE_WARNING, "slow schema (v=12): (%s)", zStr); sqlite3_free(zStr); } } @@ -100339,9 +100277,7 @@ case OP_AutoCommit: { u64 aCommit[COMMIT_TIME_N]; memset(aCommit, 0, sizeof(aCommit)); - if( iRollback==0 ){ - sqlite3CommitTimeSet(aCommit, COMMIT_TIME_START); - } + sqlite3CommitTimeSet(aCommit, COMMIT_TIME_START); if( iRollback ){ assert( desiredAutoCommit==1 ); @@ -100388,7 +100324,7 @@ case OP_AutoCommit: { rc = SQLITE_ERROR; } sqlite3CommitTimeSet(aCommit, COMMIT_TIME_FINISH); - if( desiredAutoCommit && !iRollback ) sqlite3CommitTimeLog(aCommit); + if( desiredAutoCommit ) sqlite3CommitTimeLog(aCommit); goto vdbe_return; }else{ sqlite3VdbeError(p, @@ -123847,8 +123783,6 @@ static int loadStatTbl( sqlite3DbFree(db, zSql); if( rc ) return rc; - sqlite3PrepareTimeSet(db->aSchemaTime, SCHEMA_TIME_AFTER_STAT4_Q1); - while( sqlite3_step(pStmt)==SQLITE_ROW ){ char *zIndex; /* Index name */ Index *pIdx; /* Pointer to the index object */ @@ -123894,7 +123828,6 @@ static int loadStatTbl( pIdx->nSample++; } rc = sqlite3_finalize(pStmt); - sqlite3PrepareTimeSet(db->aSchemaTime, SCHEMA_TIME_AFTER_STAT4_Q2); if( rc==SQLITE_OK ) initAvgEq(pPrevIdx); return rc; } @@ -123968,8 +123901,6 @@ SQLITE_PRIVATE int sqlite3AnalysisLoad(sqlite3 *db, int iDb){ #endif } - sqlite3PrepareTimeSet(db->aSchemaTime, SCHEMA_TIME_AFTER_CLEAR_STATS); - /* Load new statistics out of the sqlite_stat1 table */ sInfo.db = db; sInfo.zDatabase = db->aDb[iDb].zDbSName; @@ -123986,8 +123917,6 @@ SQLITE_PRIVATE int sqlite3AnalysisLoad(sqlite3 *db, int iDb){ } } - sqlite3PrepareTimeSet(db->aSchemaTime, SCHEMA_TIME_AFTER_STAT1); - /* Set appropriate defaults on all indexes not in the sqlite_stat1 table */ assert( sqlite3SchemaMutexHeld(db, iDb, 0) ); for(i=sqliteHashFirst(&pSchema->idxHash); i; i=sqliteHashNext(i)){ @@ -123995,8 +123924,6 @@ SQLITE_PRIVATE int sqlite3AnalysisLoad(sqlite3 *db, int iDb){ if( !pIdx->hasStat1 ) sqlite3DefaultRowEst(pIdx); } - sqlite3PrepareTimeSet(db->aSchemaTime, SCHEMA_TIME_AFTER_DEFAULTS); - /* Load the statistics from the sqlite_stat4 table. */ #ifdef SQLITE_ENABLE_STAT4 if( rc==SQLITE_OK ){ @@ -124011,8 +123938,6 @@ SQLITE_PRIVATE int sqlite3AnalysisLoad(sqlite3 *db, int iDb){ } #endif - sqlite3PrepareTimeSet(db->aSchemaTime, SCHEMA_TIME_AFTER_STAT4); - if( rc==SQLITE_NOMEM ){ sqlite3OomFault(db); } @@ -257904,7 +257829,7 @@ static void fts5SourceIdFunc( ){ assert( nArg==0 ); UNUSED_PARAM2(nArg, apUnused); - sqlite3_result_text(pCtx, "fts5: 2024-12-05 19:45:14 5f9f6764e9dffef60213bbc9604940ddfc713436333c3f62ed8a090697fcbb1e", -1, SQLITE_TRANSIENT); + sqlite3_result_text(pCtx, "fts5: 2024-11-20 20:26:59 853f9cf453d13cf826443b0d27331e1f4e9e06f9f4ce674f22ccadad86e20937", -1, SQLITE_TRANSIENT); } /* diff --git a/libstuff/sqlite3.h b/libstuff/sqlite3.h index 6287d400b..7d8654612 100644 --- a/libstuff/sqlite3.h +++ b/libstuff/sqlite3.h @@ -148,7 +148,7 @@ extern "C" { */ #define SQLITE_VERSION "3.47.0" #define SQLITE_VERSION_NUMBER 3047000 -#define SQLITE_SOURCE_ID "2024-12-05 19:45:14 5f9f6764e9dffef60213bbc9604940ddfc713436333c3f62ed8a090697fcbb1e" +#define SQLITE_SOURCE_ID "2024-11-20 20:26:59 853f9cf453d13cf826443b0d27331e1f4e9e06f9f4ce674f22ccadad86e20937" /* ** CAPI3REF: Run-Time Library Version Numbers From ff664a7857dc062662a8b4f39661d357c40925da Mon Sep 17 00:00:00 2001 From: Florent De Neve Date: Fri, 6 Dec 2024 10:48:02 -0400 Subject: [PATCH 033/127] SQLite fix for non x86 arch --- libstuff/sqlite3.c | 107 +++++++++++++++++++++++++++++++++++++++------ libstuff/sqlite3.h | 2 +- 2 files changed, 94 insertions(+), 15 deletions(-) diff --git a/libstuff/sqlite3.c b/libstuff/sqlite3.c index 8084ff4a9..f10a739a6 100644 --- a/libstuff/sqlite3.c +++ b/libstuff/sqlite3.c @@ -18,7 +18,7 @@ ** separate file. This file contains only code for the core SQLite library. ** ** The content in this amalgamation comes from Fossil check-in -** 853f9cf453d13cf826443b0d27331e1f4e9e. +** 5fa1699e31856a6585cc59183641c4cc99e1. */ #define SQLITE_CORE 1 #define SQLITE_AMALGAMATION 1 @@ -465,7 +465,7 @@ extern "C" { */ #define SQLITE_VERSION "3.47.0" #define SQLITE_VERSION_NUMBER 3047000 -#define SQLITE_SOURCE_ID "2024-11-20 20:26:59 853f9cf453d13cf826443b0d27331e1f4e9e06f9f4ce674f22ccadad86e20937" +#define SQLITE_SOURCE_ID "2024-12-06 09:36:28 5fa1699e31856a6585cc59183641c4cc99e1ccd99c384d13e9c4442e9c07f41a" /* ** CAPI3REF: Run-Time Library Version Numbers @@ -18229,10 +18229,19 @@ struct sqlite3 { #define SCHEMA_TIME_BEFORE_PREPARE 8 #define SCHEMA_TIME_BEFORE_FINALIZE 9 #define SCHEMA_TIME_BEGIN_ANALYZE_LOAD 10 -#define SCHEMA_TIME_END_ANALYZE_LOAD 11 -#define SCHEMA_TIME_FINISH 12 -#define SCHEMA_TIME_N 13 +#define SCHEMA_TIME_AFTER_CLEAR_STATS 11 +#define SCHEMA_TIME_AFTER_STAT1 12 +#define SCHEMA_TIME_AFTER_DEFAULTS 13 + +#define SCHEMA_TIME_AFTER_STAT4_Q1 14 +#define SCHEMA_TIME_AFTER_STAT4_Q2 15 +#define SCHEMA_TIME_AFTER_STAT4 16 + +#define SCHEMA_TIME_END_ANALYZE_LOAD 17 +#define SCHEMA_TIME_FINISH 18 + +#define SCHEMA_TIME_N 19 #define SCHEMA_TIME_TIMEOUT (2 * 1000 * 1000) @@ -67552,6 +67561,29 @@ static void walCleanupHash(Wal *pWal){ #endif /* SQLITE_ENABLE_EXPENSIVE_ASSERT */ } +/* +** Zero the n byte block indicated by pointer p. n Must be a multiple of +** 8, and p must be aligned to an 8-byte boundary. +*/ +static void zero64(void *p, int n){ +#if defined(__x86_64__) + size_t c = n / sizeof(u64); + void *d = p; + + assert( (n & 0x7)==0 ); + assert( EIGHT_BYTE_ALIGNMENT(p) ); + + __asm__ volatile ( + "rep stosq" + : "+D" (d), "+c" (c) + : "a" (0) + : "memory" + ); +#else + memset(p, 0, n); +#endif +} + /* ** Set an entry in the wal-index that will map database page number ** pPage into WAL frame iFrame. @@ -67590,10 +67622,10 @@ static int walIndexAppend(Wal *pWal, int iWal, u32 iFrame, u32 iPage){ ** entire hash table and aPgno[] array before proceeding. */ if( pWal->aCommitTime ) t = sqlite3STimeNow(); - if( idx==1 ){ + if( idx==1 && sLoc.aPgno[0]!=0 ){ int nByte = (int)((u8*)&sLoc.aHash[HASHTABLE_NSLOT] - (u8*)sLoc.aPgno); - assert( nByte>=0 ); - memset((void*)sLoc.aPgno, 0, nByte); + assert( nByte>=0 && (nByte & 0x07)==0 ); + zero64((void*)sLoc.aPgno, nByte); } if( pWal->aCommitTime ){ pWal->aCommitTime[COMMIT_TIME_WALINDEX_MEMSETUS]+=sqlite3STimeNow()-t; @@ -67763,6 +67795,7 @@ static int walIndexRecoverOne(Wal *pWal, int iWal, u32 *pnCkpt, int *pbZero){ if( aShare==0 ) break; SEH_SET_ON_ERROR(iPg, aShare); pWal->apWiData[iPg] = aPrivate; + memset(aPrivate, 0, WALINDEX_PGSZ); if( iWal ){ assert( version==WAL_VERSION2 ); @@ -68891,6 +68924,39 @@ static int walCheckpoint( } } + if( bWal2 && rc==SQLITE_OK && eMode!=SQLITE_CHECKPOINT_PASSIVE ){ + /* In wal2 mode, a non-passive checkpoint waits for all readers of + ** the wal file just checkpointed to finish, then zeroes the hash + ** tables associated with that wal file. This is because in some + ** deployments, zeroing the hash tables as they are overwritten within + ** COMMIT commands is a significant performance hit. + ** + ** Currently, both of the "PART" locks are held for the wal file + ** being checkpointed. i.e. if iCkpt==0, then we already hold both + ** WAL_LOCK_PART1 and WAL_LOCK_PART1_FULL2. If we now also take an + ** exclusive lock on WAL_LOCK_PART2_FULL1, then it is guaranteed that + ** there are no remaining readers of the (iCkpt==0) wal file. Similar + ** logic, with different locks, is used for (iCkpt==1). + */ + int lockIdx = WAL_READ_LOCK( + iCkpt==0 ? WAL_LOCK_PART2_FULL1 : WAL_LOCK_PART1_FULL2 + ); + assert( iCkpt==0 || iCkpt==1 ); + rc = walBusyLock(pWal, xBusy, pBusyArg, lockIdx, 1); + if( rc==SQLITE_OK ){ + int iHash; + for(iHash = walFramePage2(iCkpt, mxSafeFrame); iHash>=0; iHash-=2){ + WalHashLoc sLoc; + int nByte; + memset(&sLoc, 0, sizeof(sLoc)); + walHashGet(pWal, iHash, &sLoc); + nByte = (int)((u8*)&sLoc.aHash[HASHTABLE_NSLOT] - (u8*)sLoc.aPgno); + memset((void*)sLoc.aPgno, 0, nByte); + } + walUnlockExclusive(pWal, lockIdx, 1); + } + } + if( rc==SQLITE_BUSY ){ /* Reset the return code so as not to report a checkpoint failure ** just because there are active readers. */ @@ -93407,7 +93473,7 @@ SQLITE_PRIVATE void sqlite3CommitTimeLog(u64 *aCommit){ } zStr = sqlite3_mprintf("%z%s%s%d%s", zStr, (zStr?", ":""),zHash,iVal,zU); } - sqlite3_log(SQLITE_WARNING, "slow commit (v=12): (%s)", zStr); + sqlite3_log(SQLITE_WARNING, "slow commit (v=15): (%s)", zStr); sqlite3_free(zStr); } } @@ -93435,7 +93501,7 @@ SQLITE_PRIVATE void sqlite3PrepareTimeLog(const char *zSql, int nSql, u64 *aPrep } if( nByte<0 ){ nByte = sqlite3Strlen30(zSql); } sqlite3_log(SQLITE_WARNING, - "slow prepare (v=12): (%s) [%.*s]", zStr, nByte, zSql + "slow prepare (v=15): (%s) [%.*s]", zStr, nByte, zSql ); sqlite3_free(zStr); } @@ -93451,7 +93517,7 @@ SQLITE_PRIVATE void sqlite3SchemaTimeLog(u64 *aSchema){ (aSchema[ii]==0 ? 0 : (int)(aSchema[ii] - i1)) ); } - sqlite3_log(SQLITE_WARNING, "slow schema (v=12): (%s)", zStr); + sqlite3_log(SQLITE_WARNING, "slow schema (v=15): (%s)", zStr); sqlite3_free(zStr); } } @@ -100277,7 +100343,9 @@ case OP_AutoCommit: { u64 aCommit[COMMIT_TIME_N]; memset(aCommit, 0, sizeof(aCommit)); - sqlite3CommitTimeSet(aCommit, COMMIT_TIME_START); + if( iRollback==0 ){ + sqlite3CommitTimeSet(aCommit, COMMIT_TIME_START); + } if( iRollback ){ assert( desiredAutoCommit==1 ); @@ -100324,7 +100392,7 @@ case OP_AutoCommit: { rc = SQLITE_ERROR; } sqlite3CommitTimeSet(aCommit, COMMIT_TIME_FINISH); - if( desiredAutoCommit ) sqlite3CommitTimeLog(aCommit); + if( desiredAutoCommit && !iRollback ) sqlite3CommitTimeLog(aCommit); goto vdbe_return; }else{ sqlite3VdbeError(p, @@ -123783,6 +123851,8 @@ static int loadStatTbl( sqlite3DbFree(db, zSql); if( rc ) return rc; + sqlite3PrepareTimeSet(db->aSchemaTime, SCHEMA_TIME_AFTER_STAT4_Q1); + while( sqlite3_step(pStmt)==SQLITE_ROW ){ char *zIndex; /* Index name */ Index *pIdx; /* Pointer to the index object */ @@ -123828,6 +123898,7 @@ static int loadStatTbl( pIdx->nSample++; } rc = sqlite3_finalize(pStmt); + sqlite3PrepareTimeSet(db->aSchemaTime, SCHEMA_TIME_AFTER_STAT4_Q2); if( rc==SQLITE_OK ) initAvgEq(pPrevIdx); return rc; } @@ -123901,6 +123972,8 @@ SQLITE_PRIVATE int sqlite3AnalysisLoad(sqlite3 *db, int iDb){ #endif } + sqlite3PrepareTimeSet(db->aSchemaTime, SCHEMA_TIME_AFTER_CLEAR_STATS); + /* Load new statistics out of the sqlite_stat1 table */ sInfo.db = db; sInfo.zDatabase = db->aDb[iDb].zDbSName; @@ -123917,6 +123990,8 @@ SQLITE_PRIVATE int sqlite3AnalysisLoad(sqlite3 *db, int iDb){ } } + sqlite3PrepareTimeSet(db->aSchemaTime, SCHEMA_TIME_AFTER_STAT1); + /* Set appropriate defaults on all indexes not in the sqlite_stat1 table */ assert( sqlite3SchemaMutexHeld(db, iDb, 0) ); for(i=sqliteHashFirst(&pSchema->idxHash); i; i=sqliteHashNext(i)){ @@ -123924,6 +123999,8 @@ SQLITE_PRIVATE int sqlite3AnalysisLoad(sqlite3 *db, int iDb){ if( !pIdx->hasStat1 ) sqlite3DefaultRowEst(pIdx); } + sqlite3PrepareTimeSet(db->aSchemaTime, SCHEMA_TIME_AFTER_DEFAULTS); + /* Load the statistics from the sqlite_stat4 table. */ #ifdef SQLITE_ENABLE_STAT4 if( rc==SQLITE_OK ){ @@ -123938,6 +124015,8 @@ SQLITE_PRIVATE int sqlite3AnalysisLoad(sqlite3 *db, int iDb){ } #endif + sqlite3PrepareTimeSet(db->aSchemaTime, SCHEMA_TIME_AFTER_STAT4); + if( rc==SQLITE_NOMEM ){ sqlite3OomFault(db); } @@ -257829,7 +257908,7 @@ static void fts5SourceIdFunc( ){ assert( nArg==0 ); UNUSED_PARAM2(nArg, apUnused); - sqlite3_result_text(pCtx, "fts5: 2024-11-20 20:26:59 853f9cf453d13cf826443b0d27331e1f4e9e06f9f4ce674f22ccadad86e20937", -1, SQLITE_TRANSIENT); + sqlite3_result_text(pCtx, "fts5: 2024-12-06 09:36:28 5fa1699e31856a6585cc59183641c4cc99e1ccd99c384d13e9c4442e9c07f41a", -1, SQLITE_TRANSIENT); } /* diff --git a/libstuff/sqlite3.h b/libstuff/sqlite3.h index 7d8654612..c79479408 100644 --- a/libstuff/sqlite3.h +++ b/libstuff/sqlite3.h @@ -148,7 +148,7 @@ extern "C" { */ #define SQLITE_VERSION "3.47.0" #define SQLITE_VERSION_NUMBER 3047000 -#define SQLITE_SOURCE_ID "2024-11-20 20:26:59 853f9cf453d13cf826443b0d27331e1f4e9e06f9f4ce674f22ccadad86e20937" +#define SQLITE_SOURCE_ID "2024-12-06 09:36:28 5fa1699e31856a6585cc59183641c4cc99e1ccd99c384d13e9c4442e9c07f41a" /* ** CAPI3REF: Run-Time Library Version Numbers From 5456a9888fb1903ff29a2f97144df705fa6caaa3 Mon Sep 17 00:00:00 2001 From: Florent De Neve Date: Fri, 6 Dec 2024 14:05:42 -0400 Subject: [PATCH 034/127] Update SQLite with more logs, v16 --- libstuff/sqlite3.c | 41 +++++++++++++++++++++++++---------------- libstuff/sqlite3.h | 2 +- 2 files changed, 26 insertions(+), 17 deletions(-) diff --git a/libstuff/sqlite3.c b/libstuff/sqlite3.c index f10a739a6..b3c5eebf7 100644 --- a/libstuff/sqlite3.c +++ b/libstuff/sqlite3.c @@ -18,7 +18,7 @@ ** separate file. This file contains only code for the core SQLite library. ** ** The content in this amalgamation comes from Fossil check-in -** 5fa1699e31856a6585cc59183641c4cc99e1. +** 65b753735b8e8fb70d2b522d527426f1eb5c. */ #define SQLITE_CORE 1 #define SQLITE_AMALGAMATION 1 @@ -465,7 +465,7 @@ extern "C" { */ #define SQLITE_VERSION "3.47.0" #define SQLITE_VERSION_NUMBER 3047000 -#define SQLITE_SOURCE_ID "2024-12-06 09:36:28 5fa1699e31856a6585cc59183641c4cc99e1ccd99c384d13e9c4442e9c07f41a" +#define SQLITE_SOURCE_ID "2024-12-06 17:52:38 65b753735b8e8fb70d2b522d527426f1eb5c09339fb4b15cf69cbd2e595b160f" /* ** CAPI3REF: Run-Time Library Version Numbers @@ -18234,21 +18234,22 @@ struct sqlite3 { #define SCHEMA_TIME_AFTER_STAT1 12 #define SCHEMA_TIME_AFTER_DEFAULTS 13 -#define SCHEMA_TIME_AFTER_STAT4_Q1 14 -#define SCHEMA_TIME_AFTER_STAT4_Q2 15 -#define SCHEMA_TIME_AFTER_STAT4 16 +#define SCHEMA_TIME_STAT4_Q1_BODY 14 +#define SCHEMA_TIME_AFTER_STAT4_Q1 15 +#define SCHEMA_TIME_AFTER_STAT4_Q2 16 +#define SCHEMA_TIME_AFTER_STAT4 17 -#define SCHEMA_TIME_END_ANALYZE_LOAD 17 -#define SCHEMA_TIME_FINISH 18 +#define SCHEMA_TIME_END_ANALYZE_LOAD 18 +#define SCHEMA_TIME_FINISH 19 -#define SCHEMA_TIME_N 19 -#define SCHEMA_TIME_TIMEOUT (2 * 1000 * 1000) +#define SCHEMA_TIME_N 20 +#define SCHEMA_TIME_TIMEOUT (0 * 1000 * 1000) #define sqlite3PrepareTimeSet(x,y) sqlite3CommitTimeSet(x,y) SQLITE_PRIVATE void sqlite3PrepareTimeLog(const char *zSql, int nSql, u64 *aPrepareTime); -SQLITE_PRIVATE void sqlite3SchemaTimeLog(u64 *aSchemaTime); +SQLITE_PRIVATE void sqlite3SchemaTimeLog(u64 *aSchemaTime, const char *zFile); #define PREPARE_TIME_TIMEOUT (2 * 1000 * 1000) /* 2 second timeout */ @@ -93473,7 +93474,7 @@ SQLITE_PRIVATE void sqlite3CommitTimeLog(u64 *aCommit){ } zStr = sqlite3_mprintf("%z%s%s%d%s", zStr, (zStr?", ":""),zHash,iVal,zU); } - sqlite3_log(SQLITE_WARNING, "slow commit (v=15): (%s)", zStr); + sqlite3_log(SQLITE_WARNING, "slow commit (v=16): (%s)", zStr); sqlite3_free(zStr); } } @@ -93501,12 +93502,12 @@ SQLITE_PRIVATE void sqlite3PrepareTimeLog(const char *zSql, int nSql, u64 *aPrep } if( nByte<0 ){ nByte = sqlite3Strlen30(zSql); } sqlite3_log(SQLITE_WARNING, - "slow prepare (v=15): (%s) [%.*s]", zStr, nByte, zSql + "slow prepare (v=16): (%s) [%.*s]", zStr, nByte, zSql ); sqlite3_free(zStr); } } -SQLITE_PRIVATE void sqlite3SchemaTimeLog(u64 *aSchema){ +SQLITE_PRIVATE void sqlite3SchemaTimeLog(u64 *aSchema, const char *zFile){ u64 i1 = aSchema[SCHEMA_TIME_START]; assert( SCHEMA_TIME_START==0 && SCHEMA_TIME_FINISH==SCHEMA_TIME_N-1 ); if( aSchema[SCHEMA_TIME_FINISH]>(i1+SCHEMA_TIME_TIMEOUT) ){ @@ -93517,7 +93518,7 @@ SQLITE_PRIVATE void sqlite3SchemaTimeLog(u64 *aSchema){ (aSchema[ii]==0 ? 0 : (int)(aSchema[ii] - i1)) ); } - sqlite3_log(SQLITE_WARNING, "slow schema (v=15): (%s)", zStr); + sqlite3_log(SQLITE_WARNING, "slow schema (%s) (v=16): (%s)", zFile, zStr); sqlite3_free(zStr); } } @@ -123800,6 +123801,8 @@ static int loadStatTbl( tRowcnt *pSpace; /* Available allocated memory space */ u8 *pPtr; /* Available memory as a u8 for easier manipulation */ + u64 t = sqlite3STimeNow(); + zIndex = (char *)sqlite3_column_text(pStmt, 0); if( zIndex==0 ) continue; nSample = sqlite3_column_int(pStmt, 1); @@ -123839,6 +123842,9 @@ static int loadStatTbl( pIdx->aSample[i].anDLt = pSpace; pSpace += nIdxCol; } assert( ((u8*)pSpace)-nByte==(u8*)(pIdx->aSample) ); + if( db->aSchemaTime ){ + db->aSchemaTime[SCHEMA_TIME_STAT4_Q1_BODY] += (sqlite3STimeNow() - t); + } } rc = sqlite3_finalize(pStmt); if( rc ) return rc; @@ -145944,7 +145950,10 @@ SQLITE_PRIVATE int sqlite3InitOne(sqlite3 *db, int iDb, char **pzErrMsg, u32 mFl error_out: db->aSchemaTime = 0; sqlite3PrepareTimeSet(aSchemaTime, SCHEMA_TIME_FINISH); - sqlite3SchemaTimeLog(aSchemaTime); + if( rc==SQLITE_OK && iDb==0 ){ + const char *zFile = sqlite3BtreeGetFilename(pDb->pBt); + sqlite3SchemaTimeLog(aSchemaTime, zFile); + } if( rc ){ if( rc==SQLITE_NOMEM || rc==SQLITE_IOERR_NOMEM ){ sqlite3OomFault(db); @@ -257908,7 +257917,7 @@ static void fts5SourceIdFunc( ){ assert( nArg==0 ); UNUSED_PARAM2(nArg, apUnused); - sqlite3_result_text(pCtx, "fts5: 2024-12-06 09:36:28 5fa1699e31856a6585cc59183641c4cc99e1ccd99c384d13e9c4442e9c07f41a", -1, SQLITE_TRANSIENT); + sqlite3_result_text(pCtx, "fts5: 2024-12-06 17:52:38 65b753735b8e8fb70d2b522d527426f1eb5c09339fb4b15cf69cbd2e595b160f", -1, SQLITE_TRANSIENT); } /* diff --git a/libstuff/sqlite3.h b/libstuff/sqlite3.h index c79479408..ff4f7b3f8 100644 --- a/libstuff/sqlite3.h +++ b/libstuff/sqlite3.h @@ -148,7 +148,7 @@ extern "C" { */ #define SQLITE_VERSION "3.47.0" #define SQLITE_VERSION_NUMBER 3047000 -#define SQLITE_SOURCE_ID "2024-12-06 09:36:28 5fa1699e31856a6585cc59183641c4cc99e1ccd99c384d13e9c4442e9c07f41a" +#define SQLITE_SOURCE_ID "2024-12-06 17:52:38 65b753735b8e8fb70d2b522d527426f1eb5c09339fb4b15cf69cbd2e595b160f" /* ** CAPI3REF: Run-Time Library Version Numbers From f4bb1f1a58c2076d75b2b3becbcd3810bfb33bac Mon Sep 17 00:00:00 2001 From: Tyler Karaszewski Date: Fri, 6 Dec 2024 14:22:41 -0800 Subject: [PATCH 035/127] Fix wrong count being incremented --- sqlitecluster/SQLiteNode.cpp | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/sqlitecluster/SQLiteNode.cpp b/sqlitecluster/SQLiteNode.cpp index 29aad0bcc..e0281c1e1 100644 --- a/sqlitecluster/SQLiteNode.cpp +++ b/sqlitecluster/SQLiteNode.cpp @@ -2837,7 +2837,7 @@ void SQLiteNode::_dieIfForkedFromCluster() { size_t forkedFullPeerCount = 0; for (const auto& p : _peerList) { if (!p->permaFollower) { - forkedFullPeerCount++; + quorumNodeCount++; if (p->forked) { forkedFullPeerCount++; } From 8d3fad988ae525735cef2fc54b7e0ce4ca87585b Mon Sep 17 00:00:00 2001 From: Tyler Karaszewski Date: Fri, 6 Dec 2024 14:25:30 -0800 Subject: [PATCH 036/127] Change comment --- sqlitecluster/SQLiteNode.cpp | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/sqlitecluster/SQLiteNode.cpp b/sqlitecluster/SQLiteNode.cpp index e0281c1e1..2907e7e84 100644 --- a/sqlitecluster/SQLiteNode.cpp +++ b/sqlitecluster/SQLiteNode.cpp @@ -1273,7 +1273,7 @@ void SQLiteNode::_onMESSAGE(SQLitePeer* peer, const SData& message) { return; } - // We allow PING and PONG even for bad peers just to avoid them getting caught in reconnect cycles. + // We ignore everything except PING and PONG from forked nodes, so we can return here in that case. if (peer->forked) { PINFO("Received message " << message.methodLine << " from forked peer, ignoring."); return; From c86f79953454fe2b25ac3f8ad0321fdc03f82ae5 Mon Sep 17 00:00:00 2001 From: Tyler Karaszewski Date: Fri, 6 Dec 2024 14:26:08 -0800 Subject: [PATCH 037/127] Update sqlitecluster/SQLiteNode.cpp Co-authored-by: Carlos Alvarez --- sqlitecluster/SQLiteNode.cpp | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/sqlitecluster/SQLiteNode.cpp b/sqlitecluster/SQLiteNode.cpp index 2907e7e84..5a75611cd 100644 --- a/sqlitecluster/SQLiteNode.cpp +++ b/sqlitecluster/SQLiteNode.cpp @@ -2844,7 +2844,7 @@ void SQLiteNode::_dieIfForkedFromCluster() { } } - // Am *I* a permaFollower? + // Increase quorumNodeCount if *I* am not a permafollower if (_originalPriority != 0) { quorumNodeCount++; } From 72bb681807990909932613676a8a6924a6d06e36 Mon Sep 17 00:00:00 2001 From: Monil Bhavsar Date: Mon, 9 Dec 2024 23:15:20 +0530 Subject: [PATCH 038/127] Update write method to accept result argument --- sqlitecluster/SQLite.cpp | 10 ++++++++++ sqlitecluster/SQLite.h | 6 +++++- 2 files changed, 15 insertions(+), 1 deletion(-) diff --git a/sqlitecluster/SQLite.cpp b/sqlitecluster/SQLite.cpp index da14f4590..12e74c634 100644 --- a/sqlitecluster/SQLite.cpp +++ b/sqlitecluster/SQLite.cpp @@ -570,6 +570,16 @@ bool SQLite::write(const string& query) { return _writeIdempotent(query, ignore); } +bool SQLite::write(const string& query, SQResult& result) { + if (_noopUpdateMode) { + SALERT("Non-idempotent write in _noopUpdateMode. Query: " << query); + return true; + } + + // This is literally identical to the idempotent version except for the check for _noopUpdateMode. + return _writeIdempotent(query, result); +} + bool SQLite::writeIdempotent(const string& query) { SQResult ignore; return _writeIdempotent(query, ignore); diff --git a/sqlitecluster/SQLite.h b/sqlitecluster/SQLite.h index e8c68e3e9..ac3e786fd 100644 --- a/sqlitecluster/SQLite.h +++ b/sqlitecluster/SQLite.h @@ -101,10 +101,14 @@ class SQLite { bool addColumn(const string& tableName, const string& column, const string& columnType); // Performs a read/write query (eg, INSERT, UPDATE, DELETE). This is added to the current transaction's query list. - // Returns true on success. + // Returns true on success. // If we're in noop-update mode, this call alerts and performs no write, but returns as if it had completed. bool write(const string& query); + // Performs a read/write query + // Designed for use with queries that include a RETURNING clause + bool write(const string& query, SQResult& result); + // This is the same as `write` except it runs successfully without any warnings or errors in noop-update mode. // It's intended to be used for `mockRequest` enabled commands, such that we only run a version of them that's // known to be repeatable. What counts as repeatable is up to the individual command. From caf0ed3ae9ad811e3d92f8a737633e53286a2e48 Mon Sep 17 00:00:00 2001 From: Florent De Neve Date: Mon, 9 Dec 2024 16:32:31 -0400 Subject: [PATCH 039/127] Allow the checkpointer to zero the old *-shm pages instead of the writer who does so in the COMMIT block --- sqlitecluster/SQLite.cpp | 4 +++- 1 file changed, 3 insertions(+), 1 deletion(-) diff --git a/sqlitecluster/SQLite.cpp b/sqlitecluster/SQLite.cpp index e4f63acb2..178ffb035 100644 --- a/sqlitecluster/SQLite.cpp +++ b/sqlitecluster/SQLite.cpp @@ -797,7 +797,9 @@ int SQLite::commit(const string& description, function* preCheckpointCal if (_sharedData.outstandingFramesToCheckpoint) { auto start = STimeNow(); int framesCheckpointed = 0; - sqlite3_wal_checkpoint_v2(_db, 0, SQLITE_CHECKPOINT_PASSIVE, NULL, &framesCheckpointed); + sqlite3_busy_timeout(_db, 120'000); // 2 minutes + sqlite3_wal_checkpoint_v2(_db, 0, SQLITE_CHECKPOINT_FULL, NULL, &framesCheckpointed); + sqlite3_busy_timeout(_db, 0); auto end = STimeNow(); SINFO("Checkpointed " << framesCheckpointed << " (total) frames of " << _sharedData.outstandingFramesToCheckpoint << " in " << (end - start) << "us."); From a27c258f8933dcb7eaf333f91bbce247051d75ac Mon Sep 17 00:00:00 2001 From: Tyler Karaszewski Date: Mon, 9 Dec 2024 16:57:55 -0800 Subject: [PATCH 040/127] A bunch of notes --- sqlitecluster/SQLiteNode.cpp | 26 +++++++++++++++++----- sqlitecluster/SQLiteSequentialNotifier.cpp | 8 ++++++- 2 files changed, 28 insertions(+), 6 deletions(-) diff --git a/sqlitecluster/SQLiteNode.cpp b/sqlitecluster/SQLiteNode.cpp index 5a75611cd..e789bde1f 100644 --- a/sqlitecluster/SQLiteNode.cpp +++ b/sqlitecluster/SQLiteNode.cpp @@ -187,6 +187,10 @@ void SQLiteNode::_replicate(SQLitePeer* peer, SData command, size_t sqlitePoolIn } _replicateStartCV.notify_all(); + if (_replicationThreadsShouldExit) { + SINFO("Late replicate start, just exiting."); + } + // Initialize each new thread with a new number. SInitialize("replicate" + to_string(currentReplicateThreadID.fetch_add(1))); @@ -214,12 +218,18 @@ void SQLiteNode::_replicate(SQLitePeer* peer, SData command, size_t sqlitePoolIn uint64_t waitForCount = SStartsWith(command["ID"], "ASYNC") ? command.calcU64("dbCountAtStart") : currentCount; SINFO("[performance] BEGIN_TRANSACTION replicate thread for commit " << newCount << " waiting on DB count " << waitForCount << " (" << (quorum ? "QUORUM" : "ASYNC") << ")"); while (true) { + // Ok, why doesn't this get counted? + // It's waiting on: commit 26056807239 waiting on DB count 26056807238 + // It seems like this should return immediately. + // Ok, is it possible we got past here and waited somewhere else? We were either stuck here, or... SQLiteSequentialNotifier::RESULT result = _localCommitNotifier.waitFor(waitForCount, false); + // My current inclination is that maybe we reset the commit notifier before the thread really starts. if (result == SQLiteSequentialNotifier::RESULT::UNKNOWN) { // This should be impossible. SERROR("Got UNKNOWN result from waitFor, which shouldn't happen"); } else if (result == SQLiteSequentialNotifier::RESULT::COMPLETED) { // Success case. + // If we didn't get stuck above, we must have hit here, because otherwise we would have logged. break; } else if (result == SQLiteSequentialNotifier::RESULT::CANCELED) { SINFO("_localCommitNotifier.waitFor canceled early, returning."); @@ -237,6 +247,7 @@ void SQLiteNode::_replicate(SQLitePeer* peer, SData command, size_t sqlitePoolIn if (commitAttemptCount > 1) { SINFO("Commit attempt number " << commitAttemptCount << " for concurrent replication."); } + // We never log this line, so we can't have gotten to here. SINFO("[performance] BEGIN for commit " << newCount); bool uniqueContraintsError = false; try { @@ -1640,14 +1651,20 @@ void SQLiteNode::_onMESSAGE(SQLitePeer* peer, const SData& message) { throw e; } } else if (SIEquals(message.methodLine, "BEGIN_TRANSACTION") || SIEquals(message.methodLine, "COMMIT_TRANSACTION") || SIEquals(message.methodLine, "ROLLBACK_TRANSACTION")) { + // Race condition here. What if _replicationThreadsShouldExit changes after this check? if (_replicationThreadsShouldExit) { SINFO("Discarding replication message, stopping FOLLOWING"); } else { + // Ok, so the race condition could be here, right? + // Right this instance, the thread count is 0, so we can move past the check that + // Waits for it to be 0. + // But then this thread starts. Can that happen? auto threadID = _replicationThreadCount.fetch_add(1); SDEBUG("Spawning concurrent replicate thread (blocks until DB handle available): " << threadID); try { uint64_t threadAttemptStartTimestamp = STimeNow(); _replicateThreadStarted = false; + // Either here. thread(&SQLiteNode::_replicate, this, peer, message, _dbPool->getIndex(false), threadAttemptStartTimestamp).detach(); { unique_lock lock(_replicateStartMutex); @@ -1917,12 +1934,8 @@ void SQLiteNode::_changeState(SQLiteNodeState newState, uint64_t commitIDToCance // Polling wait for threads to quit. This could use a notification model such as with a condition_variable, // which would probably be "better" but introduces yet more state variables for a state that we're rarely // in, and so I've left it out for the time being. - size_t infoCount = 1; while (_replicationThreadCount) { - if (infoCount % 100 == 0) { - SINFO("Waiting for " << _replicationThreadCount << " remaining replication threads."); - } - infoCount++; + SINFO("Waiting for " << _replicationThreadCount << " remaining replication threads."); usleep(10'000); } @@ -1930,8 +1943,11 @@ void SQLiteNode::_changeState(SQLiteNodeState newState, uint64_t commitIDToCance _replicationThreadsShouldExit = false; // Guaranteed to be done right now. + // I bet this is wrong when these get reset. If we no threads, these get reset before they check? + // That doesn't make sense to me for when we should increment _replicationThreadCount _localCommitNotifier.reset(); _leaderCommitNotifier.reset(); + // If the above completed, we should immediately see `Switching from... ` logged. // We have no leader anymore. _leadPeer = nullptr; diff --git a/sqlitecluster/SQLiteSequentialNotifier.cpp b/sqlitecluster/SQLiteSequentialNotifier.cpp index 2af9c731c..69d30f1ac 100644 --- a/sqlitecluster/SQLiteSequentialNotifier.cpp +++ b/sqlitecluster/SQLiteSequentialNotifier.cpp @@ -31,6 +31,8 @@ SQLiteSequentialNotifier::RESULT SQLiteSequentialNotifier::waitFor(uint64_t valu SINFO("Canceled after " << _cancelAfter << ", but waiting for " << value << " so not returning yet."); } else { // Canceled and we're not before the cancellation cutoff. + // I don't see how we don't return here. Maybe we never acquire `waitingThreadMutex`? + SINFO("Returning canceled because _cancelAfter=" << _cancelAfter << " and value=" << value); return RESULT::CANCELED; } } else if (_globalResult != RESULT::UNKNOWN) { @@ -52,6 +54,8 @@ SQLiteSequentialNotifier::RESULT SQLiteSequentialNotifier::waitFor(uint64_t valu // We should investigate any instances of thew below logline to see if they're same as for the success cases mentioned above (i.e., the timeout happens simultaneously as the // cancellation) or if the log line is delayed by up to a second (indicating a problem). if (_globalResult == RESULT::CANCELED || state->result == RESULT::CANCELED) { + // I bet removing a 1 second delay means we don't hit this. we get to calling `reset` on this sooner, possibly while there are threads waiting here. + // It's possible that we hit the timeout here after `cancel()` has set the global value, but before we received the notification. // This isn't a problem, and we can jump back to the top of the loop and check again. If there's some problem, we'll see it there. SINFO("Hit 1s timeout while global cancel " << (_globalResult == RESULT::CANCELED) << " or " << " specific cancel " << (state->result == RESULT::CANCELED)); @@ -114,10 +118,12 @@ void SQLiteSequentialNotifier::cancel(uint64_t cancelAfter) { auto& valueThreadMap = *valueThreadMapPtr; // If cancelAfter is specified, start from that value. Otherwise, we start from the beginning. auto start = _cancelAfter ? valueThreadMap.upper_bound(_cancelAfter) : valueThreadMap.begin(); - SINFO("[performance] Next value to cancel after " << cancelAfter << " is " << start->first); if (start == valueThreadMap.end()) { // There's nothing to remove. + SINFO("[performance] Next value to cancel after " << cancelAfter << " is N/A"); return; + } else { + SINFO("[performance] Next value to cancel after " << cancelAfter << " is " << start->first); } // Now iterate across whatever's remaining and mark it canceled. From d9ec8964aa99d695601c335dbc458e71053fb361 Mon Sep 17 00:00:00 2001 From: Tyler Karaszewski Date: Mon, 9 Dec 2024 17:47:49 -0800 Subject: [PATCH 041/127] More notes --- sqlitecluster/SQLiteNode.cpp | 25 +++++++++++++++++++++++++ 1 file changed, 25 insertions(+) diff --git a/sqlitecluster/SQLiteNode.cpp b/sqlitecluster/SQLiteNode.cpp index e789bde1f..920f03e10 100644 --- a/sqlitecluster/SQLiteNode.cpp +++ b/sqlitecluster/SQLiteNode.cpp @@ -199,6 +199,19 @@ void SQLiteNode::_replicate(SQLitePeer* peer, SData command, size_t sqlitePoolIn // Allow the DB handle to be returned regardless of how this function exits. SQLiteScopedHandle dbScope(*_dbPool, sqlitePoolIndex); + // In dev, we sometimes crash on the destructor for the above after `detach` which implies to me that the DB Pool could have been deleted before we exited. + // This is a different manifestation of what could be the same issue. + // IN dev, we get: + // 2024-12-09T21:12:45.030255+00:00 expensidev2004 bedrock10013: xxxxxx (SQLiteNode.cpp:215) _replicate [replicate2065] [info] {cluster_node_4/SEARCHING} [performance] BEGIN_TRANSACTION replicate thread for commit 15003 waiting on DB count 15001 (ASYNC) + // 2024-12-09T21:12:49.023666+00:00 expensidev2004 bedrock10013: xxxxxx (SQLiteNode.cpp:1913) _changeState [sync] [info] {cluster_node_4/FOLLOWING} Replication threads should exit, canceling commits after current leader commit 0 + // 2024-12-09T21:12:49.030957+00:00 expensidev2004 bedrock10013: xxxxxx (SQLiteSequentialNotifier.cpp:57) waitFor [replicate2065] [info] Hit 1s timeout while global cancel 1 or specific cancel 0 + // 2024-12-09T21:12:49.030963+00:00 expensidev2004 bedrock10013: xxxxxx (SQLiteNode.cpp:225) _replicate [replicate2065] [info] {cluster_node_4/FOLLOWING} _localCommitNotifier.waitFor canceled early, returning. + // 2024-12-09T21:12:49.030977+00:00 expensidev2004 bedrock10013: xxxxxx (SSignal.cpp:193) _SSignal_StackTrace [replicate2065] [warn] Signal Segmentation fault(11) caused crash, logging stack trace. + // Why doesn't this get cancelled as well? + // I'm not sure why dev thinks leader has commit 0. + // 2024-12-09T21:12:49.023613+00:00 expensidev2004 bedrock10013: xxxxxx (SQLiteNode.cpp:1902) _changeState [sync] [info] {cluster_node_4/FOLLOWING} [NOTIFY] setting commit count to: 17001 + // 2024-12-09T21:12:49.023674+00:00 expensidev2004 bedrock10013: xxxxxx (SQLiteSequentialNotifier.cpp:105) cancel [sync] [info] Canceling all pending transactions after 0 + SQLite& db = dbScope.db(); bool goSearchingOnExit = false; @@ -224,6 +237,10 @@ void SQLiteNode::_replicate(SQLitePeer* peer, SData command, size_t sqlitePoolIn // Ok, is it possible we got past here and waited somewhere else? We were either stuck here, or... SQLiteSequentialNotifier::RESULT result = _localCommitNotifier.waitFor(waitForCount, false); // My current inclination is that maybe we reset the commit notifier before the thread really starts. + + // I think we get stuck in `waitFor` because it's been reset to 0 and we're waiting for every commit from the + // Beginning of time. I'm not acutally sure why it ever returns, though. + if (result == SQLiteSequentialNotifier::RESULT::UNKNOWN) { // This should be impossible. SERROR("Got UNKNOWN result from waitFor, which shouldn't happen"); @@ -1653,12 +1670,17 @@ void SQLiteNode::_onMESSAGE(SQLitePeer* peer, const SData& message) { } else if (SIEquals(message.methodLine, "BEGIN_TRANSACTION") || SIEquals(message.methodLine, "COMMIT_TRANSACTION") || SIEquals(message.methodLine, "ROLLBACK_TRANSACTION")) { // Race condition here. What if _replicationThreadsShouldExit changes after this check? if (_replicationThreadsShouldExit) { + // Interestingly, this doesn't happen. + // I think load is light when this issue occurs. SINFO("Discarding replication message, stopping FOLLOWING"); } else { // Ok, so the race condition could be here, right? // Right this instance, the thread count is 0, so we can move past the check that // Waits for it to be 0. // But then this thread starts. Can that happen? + // So the sync thread does the state switch. + // Who is running this though, it should also be the sync thread? + // I don't see how anyone else would be able to do this. auto threadID = _replicationThreadCount.fetch_add(1); SDEBUG("Spawning concurrent replicate thread (blocks until DB handle available): " << threadID); try { @@ -1938,6 +1960,9 @@ void SQLiteNode::_changeState(SQLiteNodeState newState, uint64_t commitIDToCance SINFO("Waiting for " << _replicationThreadCount << " remaining replication threads."); usleep(10'000); } + // How can the above fail???? + // We only increment _replicationThreadCount in the sync thread and we are reading it here in the sync thread. + // It is feasible to call `_changeState` from another thread but that's not what's happening in the issue we're seeing. // Done exiting. Reset so that we can resume FOLLOWING in the future. _replicationThreadsShouldExit = false; From 7784a9700a72a68dd969809bbbae764bd42b290b Mon Sep 17 00:00:00 2001 From: Florent De Neve Date: Tue, 10 Dec 2024 10:11:12 -0400 Subject: [PATCH 042/127] Update SQLite with a fix for non passive checkpoints --- libstuff/sqlite3.c | 8 ++++---- libstuff/sqlite3.h | 2 +- 2 files changed, 5 insertions(+), 5 deletions(-) diff --git a/libstuff/sqlite3.c b/libstuff/sqlite3.c index b3c5eebf7..727617327 100644 --- a/libstuff/sqlite3.c +++ b/libstuff/sqlite3.c @@ -18,7 +18,7 @@ ** separate file. This file contains only code for the core SQLite library. ** ** The content in this amalgamation comes from Fossil check-in -** 65b753735b8e8fb70d2b522d527426f1eb5c. +** 1a59cae3c31aea25cef3705cce2477e26515. */ #define SQLITE_CORE 1 #define SQLITE_AMALGAMATION 1 @@ -465,7 +465,7 @@ extern "C" { */ #define SQLITE_VERSION "3.47.0" #define SQLITE_VERSION_NUMBER 3047000 -#define SQLITE_SOURCE_ID "2024-12-06 17:52:38 65b753735b8e8fb70d2b522d527426f1eb5c09339fb4b15cf69cbd2e595b160f" +#define SQLITE_SOURCE_ID "2024-12-09 21:26:21 1a59cae3c31aea25cef3705cce2477e26515a0463cf9094bd29951899b758767" /* ** CAPI3REF: Run-Time Library Version Numbers @@ -71575,7 +71575,7 @@ SQLITE_PRIVATE int sqlite3WalCheckpoint( ** writer lock retried until either the busy-handler returns 0 or the ** lock is successfully obtained. */ - if( eMode!=SQLITE_CHECKPOINT_PASSIVE ){ + if( eMode!=SQLITE_CHECKPOINT_PASSIVE && isWalMode2(pWal)==0 ){ rc = walBusyLock(pWal, xBusy2, pBusyArg, WAL_WRITE_LOCK, 1); if( rc==SQLITE_OK ){ pWal->writeLock = 1; @@ -257917,7 +257917,7 @@ static void fts5SourceIdFunc( ){ assert( nArg==0 ); UNUSED_PARAM2(nArg, apUnused); - sqlite3_result_text(pCtx, "fts5: 2024-12-06 17:52:38 65b753735b8e8fb70d2b522d527426f1eb5c09339fb4b15cf69cbd2e595b160f", -1, SQLITE_TRANSIENT); + sqlite3_result_text(pCtx, "fts5: 2024-12-09 21:26:21 1a59cae3c31aea25cef3705cce2477e26515a0463cf9094bd29951899b758767", -1, SQLITE_TRANSIENT); } /* diff --git a/libstuff/sqlite3.h b/libstuff/sqlite3.h index ff4f7b3f8..77aaa8cbd 100644 --- a/libstuff/sqlite3.h +++ b/libstuff/sqlite3.h @@ -148,7 +148,7 @@ extern "C" { */ #define SQLITE_VERSION "3.47.0" #define SQLITE_VERSION_NUMBER 3047000 -#define SQLITE_SOURCE_ID "2024-12-06 17:52:38 65b753735b8e8fb70d2b522d527426f1eb5c09339fb4b15cf69cbd2e595b160f" +#define SQLITE_SOURCE_ID "2024-12-09 21:26:21 1a59cae3c31aea25cef3705cce2477e26515a0463cf9094bd29951899b758767" /* ** CAPI3REF: Run-Time Library Version Numbers From 730efe6ce2670aafec9747744f81e05244d9b9a2 Mon Sep 17 00:00:00 2001 From: John Lee Date: Tue, 10 Dec 2024 17:21:50 +0000 Subject: [PATCH 043/127] Handle open transactions monitoring on nodes --- sqlitecluster/SQLite.cpp | 20 +++++++++++++++++++- sqlitecluster/SQLite.h | 5 +++++ 2 files changed, 24 insertions(+), 1 deletion(-) diff --git a/sqlitecluster/SQLite.cpp b/sqlitecluster/SQLite.cpp index e4f63acb2..e0c84d7c1 100644 --- a/sqlitecluster/SQLite.cpp +++ b/sqlitecluster/SQLite.cpp @@ -401,10 +401,13 @@ bool SQLite::beginTransaction(TRANSACTION_TYPE type) { // Reset before the query, as it's possible the query sets these. _autoRolledBack = false; - SINFO("[concurrent] Beginning transaction"); + SINFO("[concurrent] Beginning transaction - open transaction count: " << (_sharedData.openTransactionCount + 1)); uint64_t before = STimeNow(); _insideTransaction = !SQuery(_db, "starting db transaction", "BEGIN CONCURRENT"); + _sharedData.incrementOpenTransactions(); + SINFO("Open transaction count: " << _sharedData.openTransactionCount); + // Because some other thread could commit once we've run `BEGIN CONCURRENT`, this value can be slightly behind // where we're actually able to start such that we know we shouldn't get a conflict if this commits successfully on // leader. However, this is perfectly safe, it just adds the possibility that threads on followers wait for an @@ -788,6 +791,8 @@ int SQLite::commit(const string& description, function* preCheckpointCal _mutexLocked = false; _queryCache.clear(); + _sharedData.decrementOpenTransactions(); + if (preCheckpointCallback != nullptr) { (*preCheckpointCallback)(); } @@ -843,6 +848,8 @@ void SQLite::rollback() { _rollbackElapsed += STimeNow() - before; } + _sharedData.decrementOpenTransactions(); + // Finally done with this. _insideTransaction = false; _uncommittedHash.clear(); @@ -1168,6 +1175,7 @@ string SQLite::getLastConflictTable() const { SQLite::SharedData::SharedData() : nextJournalCount(0), _commitEnabled(true), +openTransactionCount(0), _commitLockTimer("commit lock timer", { {"EXCLUSIVE", chrono::steady_clock::duration::zero()}, {"SHARED", chrono::steady_clock::duration::zero()}, @@ -1201,6 +1209,16 @@ void SQLite::SharedData::commitTransactionInfo(uint64_t commitID) { _committedTransactions.insert(_preparedTransactions.extract(commitID)); } +void SQLite::SharedData::incrementOpenTransactions() { + lock_guard lock(_internalStateMutex); + openTransactionCount++; +} + +void SQLite::SharedData::decrementOpenTransactions() { + lock_guard lock(_internalStateMutex); + openTransactionCount--; +} + map> SQLite::SharedData::popCommittedTransactions() { lock_guard lock(_internalStateMutex); decltype(_committedTransactions) result; diff --git a/sqlitecluster/SQLite.h b/sqlitecluster/SQLite.h index 842b51430..c33a3c63f 100644 --- a/sqlitecluster/SQLite.h +++ b/sqlitecluster/SQLite.h @@ -314,6 +314,11 @@ class SQLite { // If set to false, this prevents any thread from being able to commit to the DB. atomic _commitEnabled; + // These blocks are to monitor the number of open transactions on the whole server. + void incrementOpenTransactions(); + void decrementOpenTransactions(); + atomic openTransactionCount; + SPerformanceTimer _commitLockTimer; // We use this flag to prevent to threads running checkpoints t the same time. From 5211d06dc554851360c8a583467c02aa416471b5 Mon Sep 17 00:00:00 2001 From: John Lee Date: Tue, 10 Dec 2024 12:22:25 -0500 Subject: [PATCH 044/127] Remove unnecessary log --- sqlitecluster/SQLite.cpp | 1 - 1 file changed, 1 deletion(-) diff --git a/sqlitecluster/SQLite.cpp b/sqlitecluster/SQLite.cpp index e0c84d7c1..fcac40b00 100644 --- a/sqlitecluster/SQLite.cpp +++ b/sqlitecluster/SQLite.cpp @@ -406,7 +406,6 @@ bool SQLite::beginTransaction(TRANSACTION_TYPE type) { _insideTransaction = !SQuery(_db, "starting db transaction", "BEGIN CONCURRENT"); _sharedData.incrementOpenTransactions(); - SINFO("Open transaction count: " << _sharedData.openTransactionCount); // Because some other thread could commit once we've run `BEGIN CONCURRENT`, this value can be slightly behind // where we're actually able to start such that we know we shouldn't get a conflict if this commits successfully on From c8524918cdb87c597aca669d5fb1fd0a034930b9 Mon Sep 17 00:00:00 2001 From: Florent De Neve Date: Tue, 10 Dec 2024 14:40:20 -0400 Subject: [PATCH 045/127] Update SQLite with more logs, v17 --- libstuff/sqlite3.c | 20 +++++++++++--------- libstuff/sqlite3.h | 2 +- 2 files changed, 12 insertions(+), 10 deletions(-) diff --git a/libstuff/sqlite3.c b/libstuff/sqlite3.c index 727617327..a7111ef6c 100644 --- a/libstuff/sqlite3.c +++ b/libstuff/sqlite3.c @@ -18,7 +18,7 @@ ** separate file. This file contains only code for the core SQLite library. ** ** The content in this amalgamation comes from Fossil check-in -** 1a59cae3c31aea25cef3705cce2477e26515. +** df4183ace93b788b798b258274bf6b651906. */ #define SQLITE_CORE 1 #define SQLITE_AMALGAMATION 1 @@ -465,7 +465,7 @@ extern "C" { */ #define SQLITE_VERSION "3.47.0" #define SQLITE_VERSION_NUMBER 3047000 -#define SQLITE_SOURCE_ID "2024-12-09 21:26:21 1a59cae3c31aea25cef3705cce2477e26515a0463cf9094bd29951899b758767" +#define SQLITE_SOURCE_ID "2024-12-10 14:56:20 df4183ace93b788b798b258274bf6b651906c9f1cf2af4983e447cdf52904523" /* ** CAPI3REF: Run-Time Library Version Numbers @@ -93474,7 +93474,7 @@ SQLITE_PRIVATE void sqlite3CommitTimeLog(u64 *aCommit){ } zStr = sqlite3_mprintf("%z%s%s%d%s", zStr, (zStr?", ":""),zHash,iVal,zU); } - sqlite3_log(SQLITE_WARNING, "slow commit (v=16): (%s)", zStr); + sqlite3_log(SQLITE_WARNING, "slow commit (v=17): (%s)", zStr); sqlite3_free(zStr); } } @@ -93502,7 +93502,7 @@ SQLITE_PRIVATE void sqlite3PrepareTimeLog(const char *zSql, int nSql, u64 *aPrep } if( nByte<0 ){ nByte = sqlite3Strlen30(zSql); } sqlite3_log(SQLITE_WARNING, - "slow prepare (v=16): (%s) [%.*s]", zStr, nByte, zSql + "slow prepare (v=17): (%s) [%.*s]", zStr, nByte, zSql ); sqlite3_free(zStr); } @@ -93514,11 +93514,13 @@ SQLITE_PRIVATE void sqlite3SchemaTimeLog(u64 *aSchema, const char *zFile){ char *zStr = 0; int ii; for(ii=1; ii Date: Tue, 10 Dec 2024 10:44:49 -0800 Subject: [PATCH 046/127] Fix but maybe not the right fix --- BedrockServer.cpp | 3 ++- sqlitecluster/SQLiteNode.cpp | 12 +++++++++--- sqlitecluster/SQLiteNode.h | 3 ++- 3 files changed, 13 insertions(+), 5 deletions(-) diff --git a/BedrockServer.cpp b/BedrockServer.cpp index 55bb30ae6..98e59414f 100644 --- a/BedrockServer.cpp +++ b/BedrockServer.cpp @@ -213,8 +213,9 @@ void BedrockServer::sync() // we're leading, then the next update() loop will set us to standing down, and then we won't accept any new // commands, and we'll shortly run through the existing queue. if (_shutdownState.load() == COMMANDS_FINISHED) { - SINFO("All clients responded to, " << BedrockCommand::getCommandCount() << " commands remaining. Shutting down sync node."); + SINFO("All clients responded to, " << BedrockCommand::getCommandCount() << " commands remaining."); if (_syncNode->beginShutdown()) { + SINFO("Beginning shuttdown of sync node."); // This will cause us to skip the next `poll` iteration which avoids a 1 second wait. _notifyDoneSync.push(true); } diff --git a/sqlitecluster/SQLiteNode.cpp b/sqlitecluster/SQLiteNode.cpp index 920f03e10..b641614ca 100644 --- a/sqlitecluster/SQLiteNode.cpp +++ b/sqlitecluster/SQLiteNode.cpp @@ -145,6 +145,7 @@ SQLiteNode::SQLiteNode(SQLiteServer& server, shared_ptr dbPool, cons _stateTimeout(STimeNow() + firstTimeout), _syncPeer(nullptr) { + SINFO("TYLER _replicationThreadCount reset to : " << _replicationThreadCount); KILLABLE_SQLITE_NODE = this; SASSERT(_originalPriority >= 0); onPrepareHandlerEnabled = false; @@ -1669,7 +1670,8 @@ void SQLiteNode::_onMESSAGE(SQLitePeer* peer, const SData& message) { } } else if (SIEquals(message.methodLine, "BEGIN_TRANSACTION") || SIEquals(message.methodLine, "COMMIT_TRANSACTION") || SIEquals(message.methodLine, "ROLLBACK_TRANSACTION")) { // Race condition here. What if _replicationThreadsShouldExit changes after this check? - if (_replicationThreadsShouldExit) { + if (_replicationThreadsShouldExit || _state == SQLiteNodeState::SEARCHING) { + // So this fix probably works, but maybe we don't even want to call _onMESSAGE when we're detaching? // Interestingly, this doesn't happen. // I think load is light when this issue occurs. SINFO("Discarding replication message, stopping FOLLOWING"); @@ -1682,6 +1684,7 @@ void SQLiteNode::_onMESSAGE(SQLitePeer* peer, const SData& message) { // Who is running this though, it should also be the sync thread? // I don't see how anyone else would be able to do this. auto threadID = _replicationThreadCount.fetch_add(1); + SINFO("TYLER _replicationThreadCount incremented to : " << threadID + 1); SDEBUG("Spawning concurrent replicate thread (blocks until DB handle available): " << threadID); try { uint64_t threadAttemptStartTimestamp = STimeNow(); @@ -1703,7 +1706,8 @@ void SQLiteNode::_onMESSAGE(SQLitePeer* peer, const SData& message) { // and waiting for the transaction that failed will be stuck in an infinite loop. To prevent that // we're changing the state to SEARCHING and sending the cancelAfter property to drop all threads // that depend on the transaction that failed to be threaded. - _replicationThreadCount.fetch_sub(1); + auto was = _replicationThreadCount.fetch_sub(1); + SINFO("TYLER _replicationThreadCount decremented to : " << was - 1); SWARN("Caught system_error starting _replicate thread with " << _replicationThreadCount.load() << " threads. e.what()=" << e.what()); _changeState(SQLiteNodeState::SEARCHING, message.calcU64("NewCount") - 1); STHROW("Error starting replicate thread so giving up and reconnecting."); @@ -1956,10 +1960,12 @@ void SQLiteNode::_changeState(SQLiteNodeState newState, uint64_t commitIDToCance // Polling wait for threads to quit. This could use a notification model such as with a condition_variable, // which would probably be "better" but introduces yet more state variables for a state that we're rarely // in, and so I've left it out for the time being. - while (_replicationThreadCount) { + SINFO("TYLER _replicationThreadCount before state change: " << _replicationThreadCount); + while (_replicationThreadCount.load()) { SINFO("Waiting for " << _replicationThreadCount << " remaining replication threads."); usleep(10'000); } + SINFO("TYLER _replicationThreadCount after state change: " << _replicationThreadCount); // How can the above fail???? // We only increment _replicationThreadCount in the sync thread and we are reading it here in the sync thread. // It is feasible to call `_changeState` from another thread but that's not what's happening in the issue we're seeing. diff --git a/sqlitecluster/SQLiteNode.h b/sqlitecluster/SQLiteNode.h index 57c5c1056..ab476b653 100644 --- a/sqlitecluster/SQLiteNode.h +++ b/sqlitecluster/SQLiteNode.h @@ -198,7 +198,8 @@ class SQLiteNode : public STCPManager { public: ScopedDecrement(CounterType& counter) : _counter(counter) {} ~ScopedDecrement() { - --_counter; + auto decrementedTo = --_counter; + SINFO("TYLER _counter decremented to : " << decrementedTo); } private: CounterType& _counter; From a7df55eafb3a04e9f9fbf14defbfea249247fe1f Mon Sep 17 00:00:00 2001 From: John Lee Date: Tue, 10 Dec 2024 13:58:47 -0500 Subject: [PATCH 047/127] No need for lock --- sqlitecluster/SQLite.cpp | 17 ++++------------- sqlitecluster/SQLite.h | 4 +--- 2 files changed, 5 insertions(+), 16 deletions(-) diff --git a/sqlitecluster/SQLite.cpp b/sqlitecluster/SQLite.cpp index fcac40b00..e0ded6539 100644 --- a/sqlitecluster/SQLite.cpp +++ b/sqlitecluster/SQLite.cpp @@ -405,7 +405,8 @@ bool SQLite::beginTransaction(TRANSACTION_TYPE type) { uint64_t before = STimeNow(); _insideTransaction = !SQuery(_db, "starting db transaction", "BEGIN CONCURRENT"); - _sharedData.incrementOpenTransactions(); + // We actively track transaction counts incrementing and decrementing to log the number of active open transactions at any given moment. + _sharedData.openTransactionCount++; // Because some other thread could commit once we've run `BEGIN CONCURRENT`, this value can be slightly behind // where we're actually able to start such that we know we shouldn't get a conflict if this commits successfully on @@ -790,7 +791,7 @@ int SQLite::commit(const string& description, function* preCheckpointCal _mutexLocked = false; _queryCache.clear(); - _sharedData.decrementOpenTransactions(); + _sharedData.openTransactionCount--; if (preCheckpointCallback != nullptr) { (*preCheckpointCallback)(); @@ -847,7 +848,7 @@ void SQLite::rollback() { _rollbackElapsed += STimeNow() - before; } - _sharedData.decrementOpenTransactions(); + _sharedData.openTransactionCount--; // Finally done with this. _insideTransaction = false; @@ -1208,16 +1209,6 @@ void SQLite::SharedData::commitTransactionInfo(uint64_t commitID) { _committedTransactions.insert(_preparedTransactions.extract(commitID)); } -void SQLite::SharedData::incrementOpenTransactions() { - lock_guard lock(_internalStateMutex); - openTransactionCount++; -} - -void SQLite::SharedData::decrementOpenTransactions() { - lock_guard lock(_internalStateMutex); - openTransactionCount--; -} - map> SQLite::SharedData::popCommittedTransactions() { lock_guard lock(_internalStateMutex); decltype(_committedTransactions) result; diff --git a/sqlitecluster/SQLite.h b/sqlitecluster/SQLite.h index c33a3c63f..f88b9d3c7 100644 --- a/sqlitecluster/SQLite.h +++ b/sqlitecluster/SQLite.h @@ -314,9 +314,7 @@ class SQLite { // If set to false, this prevents any thread from being able to commit to the DB. atomic _commitEnabled; - // These blocks are to monitor the number of open transactions on the whole server. - void incrementOpenTransactions(); - void decrementOpenTransactions(); + // This variable is used to monitor the number of open transactions on the whole server. atomic openTransactionCount; SPerformanceTimer _commitLockTimer; From 39c0d877e27385aa87cc75989dfbec17a4c2960c Mon Sep 17 00:00:00 2001 From: Tyler Karaszewski Date: Tue, 10 Dec 2024 11:31:48 -0800 Subject: [PATCH 048/127] Add explanatory message --- sqlitecluster/SQLiteNode.cpp | 22 ++++++++++------------ 1 file changed, 10 insertions(+), 12 deletions(-) diff --git a/sqlitecluster/SQLiteNode.cpp b/sqlitecluster/SQLiteNode.cpp index b641614ca..2c1500240 100644 --- a/sqlitecluster/SQLiteNode.cpp +++ b/sqlitecluster/SQLiteNode.cpp @@ -145,7 +145,6 @@ SQLiteNode::SQLiteNode(SQLiteServer& server, shared_ptr dbPool, cons _stateTimeout(STimeNow() + firstTimeout), _syncPeer(nullptr) { - SINFO("TYLER _replicationThreadCount reset to : " << _replicationThreadCount); KILLABLE_SQLITE_NODE = this; SASSERT(_originalPriority >= 0); onPrepareHandlerEnabled = false; @@ -1669,11 +1668,15 @@ void SQLiteNode::_onMESSAGE(SQLitePeer* peer, const SData& message) { throw e; } } else if (SIEquals(message.methodLine, "BEGIN_TRANSACTION") || SIEquals(message.methodLine, "COMMIT_TRANSACTION") || SIEquals(message.methodLine, "ROLLBACK_TRANSACTION")) { - // Race condition here. What if _replicationThreadsShouldExit changes after this check? - if (_replicationThreadsShouldExit || _state == SQLiteNodeState::SEARCHING) { - // So this fix probably works, but maybe we don't even want to call _onMESSAGE when we're detaching? - // Interestingly, this doesn't happen. - // I think load is light when this issue occurs. + if (_state != SQLiteNodeState::FOLLOWING) { + // These messages are only valid while following, but we do not throw if we receive them in other states, as + // it's not neccesarily an error. Specifically, as we switch away from FOLLOWING, there may still be a stream + // of transactions being broadcast. We do not attempt to handle these, as we keep careful count of which + // replication threads are currently running, and reset the replication state tracking when we're not following. + // Attempting to handle replication messages in some other state will break that tracking. + return; + } + if (_replicationThreadsShouldExit) { SINFO("Discarding replication message, stopping FOLLOWING"); } else { // Ok, so the race condition could be here, right? @@ -1684,7 +1687,6 @@ void SQLiteNode::_onMESSAGE(SQLitePeer* peer, const SData& message) { // Who is running this though, it should also be the sync thread? // I don't see how anyone else would be able to do this. auto threadID = _replicationThreadCount.fetch_add(1); - SINFO("TYLER _replicationThreadCount incremented to : " << threadID + 1); SDEBUG("Spawning concurrent replicate thread (blocks until DB handle available): " << threadID); try { uint64_t threadAttemptStartTimestamp = STimeNow(); @@ -1706,8 +1708,6 @@ void SQLiteNode::_onMESSAGE(SQLitePeer* peer, const SData& message) { // and waiting for the transaction that failed will be stuck in an infinite loop. To prevent that // we're changing the state to SEARCHING and sending the cancelAfter property to drop all threads // that depend on the transaction that failed to be threaded. - auto was = _replicationThreadCount.fetch_sub(1); - SINFO("TYLER _replicationThreadCount decremented to : " << was - 1); SWARN("Caught system_error starting _replicate thread with " << _replicationThreadCount.load() << " threads. e.what()=" << e.what()); _changeState(SQLiteNodeState::SEARCHING, message.calcU64("NewCount") - 1); STHROW("Error starting replicate thread so giving up and reconnecting."); @@ -1960,12 +1960,10 @@ void SQLiteNode::_changeState(SQLiteNodeState newState, uint64_t commitIDToCance // Polling wait for threads to quit. This could use a notification model such as with a condition_variable, // which would probably be "better" but introduces yet more state variables for a state that we're rarely // in, and so I've left it out for the time being. - SINFO("TYLER _replicationThreadCount before state change: " << _replicationThreadCount); - while (_replicationThreadCount.load()) { + while (_replicationThreadCount) { SINFO("Waiting for " << _replicationThreadCount << " remaining replication threads."); usleep(10'000); } - SINFO("TYLER _replicationThreadCount after state change: " << _replicationThreadCount); // How can the above fail???? // We only increment _replicationThreadCount in the sync thread and we are reading it here in the sync thread. // It is feasible to call `_changeState` from another thread but that's not what's happening in the issue we're seeing. From 5d29d00945558a0bff37842bc6cceb217d5f2b77 Mon Sep 17 00:00:00 2001 From: Tyler Karaszewski Date: Tue, 10 Dec 2024 11:38:00 -0800 Subject: [PATCH 049/127] Cleanup --- sqlitecluster/SQLiteNode.cpp | 49 +++------------------- sqlitecluster/SQLiteNode.h | 3 +- sqlitecluster/SQLiteSequentialNotifier.cpp | 8 +--- 3 files changed, 8 insertions(+), 52 deletions(-) diff --git a/sqlitecluster/SQLiteNode.cpp b/sqlitecluster/SQLiteNode.cpp index 2c1500240..70bf83dab 100644 --- a/sqlitecluster/SQLiteNode.cpp +++ b/sqlitecluster/SQLiteNode.cpp @@ -187,10 +187,6 @@ void SQLiteNode::_replicate(SQLitePeer* peer, SData command, size_t sqlitePoolIn } _replicateStartCV.notify_all(); - if (_replicationThreadsShouldExit) { - SINFO("Late replicate start, just exiting."); - } - // Initialize each new thread with a new number. SInitialize("replicate" + to_string(currentReplicateThreadID.fetch_add(1))); @@ -199,19 +195,6 @@ void SQLiteNode::_replicate(SQLitePeer* peer, SData command, size_t sqlitePoolIn // Allow the DB handle to be returned regardless of how this function exits. SQLiteScopedHandle dbScope(*_dbPool, sqlitePoolIndex); - // In dev, we sometimes crash on the destructor for the above after `detach` which implies to me that the DB Pool could have been deleted before we exited. - // This is a different manifestation of what could be the same issue. - // IN dev, we get: - // 2024-12-09T21:12:45.030255+00:00 expensidev2004 bedrock10013: xxxxxx (SQLiteNode.cpp:215) _replicate [replicate2065] [info] {cluster_node_4/SEARCHING} [performance] BEGIN_TRANSACTION replicate thread for commit 15003 waiting on DB count 15001 (ASYNC) - // 2024-12-09T21:12:49.023666+00:00 expensidev2004 bedrock10013: xxxxxx (SQLiteNode.cpp:1913) _changeState [sync] [info] {cluster_node_4/FOLLOWING} Replication threads should exit, canceling commits after current leader commit 0 - // 2024-12-09T21:12:49.030957+00:00 expensidev2004 bedrock10013: xxxxxx (SQLiteSequentialNotifier.cpp:57) waitFor [replicate2065] [info] Hit 1s timeout while global cancel 1 or specific cancel 0 - // 2024-12-09T21:12:49.030963+00:00 expensidev2004 bedrock10013: xxxxxx (SQLiteNode.cpp:225) _replicate [replicate2065] [info] {cluster_node_4/FOLLOWING} _localCommitNotifier.waitFor canceled early, returning. - // 2024-12-09T21:12:49.030977+00:00 expensidev2004 bedrock10013: xxxxxx (SSignal.cpp:193) _SSignal_StackTrace [replicate2065] [warn] Signal Segmentation fault(11) caused crash, logging stack trace. - // Why doesn't this get cancelled as well? - // I'm not sure why dev thinks leader has commit 0. - // 2024-12-09T21:12:49.023613+00:00 expensidev2004 bedrock10013: xxxxxx (SQLiteNode.cpp:1902) _changeState [sync] [info] {cluster_node_4/FOLLOWING} [NOTIFY] setting commit count to: 17001 - // 2024-12-09T21:12:49.023674+00:00 expensidev2004 bedrock10013: xxxxxx (SQLiteSequentialNotifier.cpp:105) cancel [sync] [info] Canceling all pending transactions after 0 - SQLite& db = dbScope.db(); bool goSearchingOnExit = false; @@ -231,22 +214,12 @@ void SQLiteNode::_replicate(SQLitePeer* peer, SData command, size_t sqlitePoolIn uint64_t waitForCount = SStartsWith(command["ID"], "ASYNC") ? command.calcU64("dbCountAtStart") : currentCount; SINFO("[performance] BEGIN_TRANSACTION replicate thread for commit " << newCount << " waiting on DB count " << waitForCount << " (" << (quorum ? "QUORUM" : "ASYNC") << ")"); while (true) { - // Ok, why doesn't this get counted? - // It's waiting on: commit 26056807239 waiting on DB count 26056807238 - // It seems like this should return immediately. - // Ok, is it possible we got past here and waited somewhere else? We were either stuck here, or... SQLiteSequentialNotifier::RESULT result = _localCommitNotifier.waitFor(waitForCount, false); - // My current inclination is that maybe we reset the commit notifier before the thread really starts. - - // I think we get stuck in `waitFor` because it's been reset to 0 and we're waiting for every commit from the - // Beginning of time. I'm not acutally sure why it ever returns, though. - if (result == SQLiteSequentialNotifier::RESULT::UNKNOWN) { // This should be impossible. SERROR("Got UNKNOWN result from waitFor, which shouldn't happen"); } else if (result == SQLiteSequentialNotifier::RESULT::COMPLETED) { // Success case. - // If we didn't get stuck above, we must have hit here, because otherwise we would have logged. break; } else if (result == SQLiteSequentialNotifier::RESULT::CANCELED) { SINFO("_localCommitNotifier.waitFor canceled early, returning."); @@ -264,7 +237,6 @@ void SQLiteNode::_replicate(SQLitePeer* peer, SData command, size_t sqlitePoolIn if (commitAttemptCount > 1) { SINFO("Commit attempt number " << commitAttemptCount << " for concurrent replication."); } - // We never log this line, so we can't have gotten to here. SINFO("[performance] BEGIN for commit " << newCount); bool uniqueContraintsError = false; try { @@ -1679,19 +1651,11 @@ void SQLiteNode::_onMESSAGE(SQLitePeer* peer, const SData& message) { if (_replicationThreadsShouldExit) { SINFO("Discarding replication message, stopping FOLLOWING"); } else { - // Ok, so the race condition could be here, right? - // Right this instance, the thread count is 0, so we can move past the check that - // Waits for it to be 0. - // But then this thread starts. Can that happen? - // So the sync thread does the state switch. - // Who is running this though, it should also be the sync thread? - // I don't see how anyone else would be able to do this. auto threadID = _replicationThreadCount.fetch_add(1); SDEBUG("Spawning concurrent replicate thread (blocks until DB handle available): " << threadID); try { uint64_t threadAttemptStartTimestamp = STimeNow(); _replicateThreadStarted = false; - // Either here. thread(&SQLiteNode::_replicate, this, peer, message, _dbPool->getIndex(false), threadAttemptStartTimestamp).detach(); { unique_lock lock(_replicateStartMutex); @@ -1708,6 +1672,7 @@ void SQLiteNode::_onMESSAGE(SQLitePeer* peer, const SData& message) { // and waiting for the transaction that failed will be stuck in an infinite loop. To prevent that // we're changing the state to SEARCHING and sending the cancelAfter property to drop all threads // that depend on the transaction that failed to be threaded. + _replicationThreadCount.fetch_sub(1); SWARN("Caught system_error starting _replicate thread with " << _replicationThreadCount.load() << " threads. e.what()=" << e.what()); _changeState(SQLiteNodeState::SEARCHING, message.calcU64("NewCount") - 1); STHROW("Error starting replicate thread so giving up and reconnecting."); @@ -1960,23 +1925,21 @@ void SQLiteNode::_changeState(SQLiteNodeState newState, uint64_t commitIDToCance // Polling wait for threads to quit. This could use a notification model such as with a condition_variable, // which would probably be "better" but introduces yet more state variables for a state that we're rarely // in, and so I've left it out for the time being. + size_t infoCount = 1; while (_replicationThreadCount) { - SINFO("Waiting for " << _replicationThreadCount << " remaining replication threads."); + if (infoCount % 100 == 0) { + SINFO("Waiting for " << _replicationThreadCount << " remaining replication threads."); + } usleep(10'000); + infoCount++; } - // How can the above fail???? - // We only increment _replicationThreadCount in the sync thread and we are reading it here in the sync thread. - // It is feasible to call `_changeState` from another thread but that's not what's happening in the issue we're seeing. // Done exiting. Reset so that we can resume FOLLOWING in the future. _replicationThreadsShouldExit = false; // Guaranteed to be done right now. - // I bet this is wrong when these get reset. If we no threads, these get reset before they check? - // That doesn't make sense to me for when we should increment _replicationThreadCount _localCommitNotifier.reset(); _leaderCommitNotifier.reset(); - // If the above completed, we should immediately see `Switching from... ` logged. // We have no leader anymore. _leadPeer = nullptr; diff --git a/sqlitecluster/SQLiteNode.h b/sqlitecluster/SQLiteNode.h index ab476b653..57c5c1056 100644 --- a/sqlitecluster/SQLiteNode.h +++ b/sqlitecluster/SQLiteNode.h @@ -198,8 +198,7 @@ class SQLiteNode : public STCPManager { public: ScopedDecrement(CounterType& counter) : _counter(counter) {} ~ScopedDecrement() { - auto decrementedTo = --_counter; - SINFO("TYLER _counter decremented to : " << decrementedTo); + --_counter; } private: CounterType& _counter; diff --git a/sqlitecluster/SQLiteSequentialNotifier.cpp b/sqlitecluster/SQLiteSequentialNotifier.cpp index 69d30f1ac..c7814d344 100644 --- a/sqlitecluster/SQLiteSequentialNotifier.cpp +++ b/sqlitecluster/SQLiteSequentialNotifier.cpp @@ -31,8 +31,6 @@ SQLiteSequentialNotifier::RESULT SQLiteSequentialNotifier::waitFor(uint64_t valu SINFO("Canceled after " << _cancelAfter << ", but waiting for " << value << " so not returning yet."); } else { // Canceled and we're not before the cancellation cutoff. - // I don't see how we don't return here. Maybe we never acquire `waitingThreadMutex`? - SINFO("Returning canceled because _cancelAfter=" << _cancelAfter << " and value=" << value); return RESULT::CANCELED; } } else if (_globalResult != RESULT::UNKNOWN) { @@ -54,10 +52,9 @@ SQLiteSequentialNotifier::RESULT SQLiteSequentialNotifier::waitFor(uint64_t valu // We should investigate any instances of thew below logline to see if they're same as for the success cases mentioned above (i.e., the timeout happens simultaneously as the // cancellation) or if the log line is delayed by up to a second (indicating a problem). if (_globalResult == RESULT::CANCELED || state->result == RESULT::CANCELED) { - // I bet removing a 1 second delay means we don't hit this. we get to calling `reset` on this sooner, possibly while there are threads waiting here. - // It's possible that we hit the timeout here after `cancel()` has set the global value, but before we received the notification. // This isn't a problem, and we can jump back to the top of the loop and check again. If there's some problem, we'll see it there. + // Does this still happen?? Might be fixed. SINFO("Hit 1s timeout while global cancel " << (_globalResult == RESULT::CANCELED) << " or " << " specific cancel " << (state->result == RESULT::CANCELED)); continue; } @@ -120,10 +117,7 @@ void SQLiteSequentialNotifier::cancel(uint64_t cancelAfter) { auto start = _cancelAfter ? valueThreadMap.upper_bound(_cancelAfter) : valueThreadMap.begin(); if (start == valueThreadMap.end()) { // There's nothing to remove. - SINFO("[performance] Next value to cancel after " << cancelAfter << " is N/A"); return; - } else { - SINFO("[performance] Next value to cancel after " << cancelAfter << " is " << start->first); } // Now iterate across whatever's remaining and mark it canceled. From 9e2094597bf29c04233e48ba4484fb0bcd0059dd Mon Sep 17 00:00:00 2001 From: Tyler Karaszewski Date: Tue, 10 Dec 2024 11:41:09 -0800 Subject: [PATCH 050/127] Cleanup 2 --- sqlitecluster/SQLiteNode.cpp | 2 +- sqlitecluster/SQLiteSequentialNotifier.cpp | 2 ++ 2 files changed, 3 insertions(+), 1 deletion(-) diff --git a/sqlitecluster/SQLiteNode.cpp b/sqlitecluster/SQLiteNode.cpp index 70bf83dab..34881cdf4 100644 --- a/sqlitecluster/SQLiteNode.cpp +++ b/sqlitecluster/SQLiteNode.cpp @@ -1930,8 +1930,8 @@ void SQLiteNode::_changeState(SQLiteNodeState newState, uint64_t commitIDToCance if (infoCount % 100 == 0) { SINFO("Waiting for " << _replicationThreadCount << " remaining replication threads."); } - usleep(10'000); infoCount++; + usleep(10'000); } // Done exiting. Reset so that we can resume FOLLOWING in the future. diff --git a/sqlitecluster/SQLiteSequentialNotifier.cpp b/sqlitecluster/SQLiteSequentialNotifier.cpp index c7814d344..a0f84a156 100644 --- a/sqlitecluster/SQLiteSequentialNotifier.cpp +++ b/sqlitecluster/SQLiteSequentialNotifier.cpp @@ -117,8 +117,10 @@ void SQLiteSequentialNotifier::cancel(uint64_t cancelAfter) { auto start = _cancelAfter ? valueThreadMap.upper_bound(_cancelAfter) : valueThreadMap.begin(); if (start == valueThreadMap.end()) { // There's nothing to remove. + SINFO("[performance] No available values to cancel after " << cancelAfter); return; } + SINFO("[performance] Next value to cancel after " << cancelAfter << " is " << start->first); // Now iterate across whatever's remaining and mark it canceled. auto current = start; From 6b211c5f16c8a5d2d633ed417d2b01e0bc00bb96 Mon Sep 17 00:00:00 2001 From: Tyler Karaszewski Date: Tue, 10 Dec 2024 12:18:03 -0800 Subject: [PATCH 051/127] Fix logline --- sqlitecluster/SQLiteSequentialNotifier.cpp | 3 +-- 1 file changed, 1 insertion(+), 2 deletions(-) diff --git a/sqlitecluster/SQLiteSequentialNotifier.cpp b/sqlitecluster/SQLiteSequentialNotifier.cpp index a0f84a156..e12505ecd 100644 --- a/sqlitecluster/SQLiteSequentialNotifier.cpp +++ b/sqlitecluster/SQLiteSequentialNotifier.cpp @@ -54,8 +54,7 @@ SQLiteSequentialNotifier::RESULT SQLiteSequentialNotifier::waitFor(uint64_t valu if (_globalResult == RESULT::CANCELED || state->result == RESULT::CANCELED) { // It's possible that we hit the timeout here after `cancel()` has set the global value, but before we received the notification. // This isn't a problem, and we can jump back to the top of the loop and check again. If there's some problem, we'll see it there. - // Does this still happen?? Might be fixed. - SINFO("Hit 1s timeout while global cancel " << (_globalResult == RESULT::CANCELED) << " or " << " specific cancel " << (state->result == RESULT::CANCELED)); + SINFO("Hit 1s timeout while global cancel " << (_globalResult == RESULT::CANCELED) << " or specific cancel " << (state->result == RESULT::CANCELED)); continue; } } From 1723cfb3e28e364840efbba4b3453a9e18a1a79f Mon Sep 17 00:00:00 2001 From: Tyler Karaszewski Date: Tue, 10 Dec 2024 13:03:15 -0800 Subject: [PATCH 052/127] Add extra log line --- sqlitecluster/SQLiteNode.cpp | 1 + 1 file changed, 1 insertion(+) diff --git a/sqlitecluster/SQLiteNode.cpp b/sqlitecluster/SQLiteNode.cpp index 34881cdf4..9395cf6a0 100644 --- a/sqlitecluster/SQLiteNode.cpp +++ b/sqlitecluster/SQLiteNode.cpp @@ -1646,6 +1646,7 @@ void SQLiteNode::_onMESSAGE(SQLitePeer* peer, const SData& message) { // of transactions being broadcast. We do not attempt to handle these, as we keep careful count of which // replication threads are currently running, and reset the replication state tracking when we're not following. // Attempting to handle replication messages in some other state will break that tracking. + SINFO("Ignoring " << message.methodLine << " in state " << stateName(_state)); return; } if (_replicationThreadsShouldExit) { From 9473d7620a95c1e8eda7838136294ece40d0c0af Mon Sep 17 00:00:00 2001 From: John Lee Date: Wed, 11 Dec 2024 14:38:52 -0500 Subject: [PATCH 053/127] Move openTransactionCount around --- sqlitecluster/SQLite.cpp | 8 ++++---- 1 file changed, 4 insertions(+), 4 deletions(-) diff --git a/sqlitecluster/SQLite.cpp b/sqlitecluster/SQLite.cpp index e0ded6539..a83d91e4d 100644 --- a/sqlitecluster/SQLite.cpp +++ b/sqlitecluster/SQLite.cpp @@ -401,13 +401,13 @@ bool SQLite::beginTransaction(TRANSACTION_TYPE type) { // Reset before the query, as it's possible the query sets these. _autoRolledBack = false; - SINFO("[concurrent] Beginning transaction - open transaction count: " << (_sharedData.openTransactionCount + 1)); - uint64_t before = STimeNow(); - _insideTransaction = !SQuery(_db, "starting db transaction", "BEGIN CONCURRENT"); - // We actively track transaction counts incrementing and decrementing to log the number of active open transactions at any given moment. _sharedData.openTransactionCount++; + SINFO("[concurrent] Beginning transaction - open transaction count: " << (_sharedData.openTransactionCount)); + uint64_t before = STimeNow(); + _insideTransaction = !SQuery(_db, "starting db transaction", "BEGIN CONCURRENT"); + // Because some other thread could commit once we've run `BEGIN CONCURRENT`, this value can be slightly behind // where we're actually able to start such that we know we shouldn't get a conflict if this commits successfully on // leader. However, this is perfectly safe, it just adds the possibility that threads on followers wait for an From 16c390d24cdafbb85bb08f17af18058d41b12768 Mon Sep 17 00:00:00 2001 From: Tyler Karaszewski Date: Wed, 11 Dec 2024 15:06:45 -0800 Subject: [PATCH 054/127] Remove redundant NODE_LOGIN --- sqlitecluster/SQLiteNode.cpp | 28 +++------------------------- 1 file changed, 3 insertions(+), 25 deletions(-) diff --git a/sqlitecluster/SQLiteNode.cpp b/sqlitecluster/SQLiteNode.cpp index 9395cf6a0..1cdba752d 100644 --- a/sqlitecluster/SQLiteNode.cpp +++ b/sqlitecluster/SQLiteNode.cpp @@ -56,20 +56,6 @@ // dbCountAtStart: The highest committed transaction in the DB at the start of this transaction on leader, for // optimizing replication. -// NOTE: This comment as well as NODE_LOGIN should be removed after https://github.com/Expensify/Bedrock/pull/1999 is deployed. -// On LOGIN vs NODE_LOGIN. -// _onConnect sends a LOGIN message. -// _onConnect is called in exctly two places: -// 1. In response to a NODE_LOGIN message received on a newly connected socket on the sync port. It's expected when -// establishing a connection, a node sends this NODE_LOGIN as its first message. -// 2. Immediately following establishing a TCP connection to another node and sending a NODE_LOGIN message. In the case that -// we are the initiating node, we immediately queue three messages: -// 1. NODE_LOGIN -// 2. PING -// 3. LOGIN -// -// When we receive a NODE_LOGIN, we immediately respond with a PING followed by a LOGIN (by calling _onConnect). - #undef SLOGPREFIX #define SLOGPREFIX "{" << _name << "/" << SQLiteNode::stateName(_state) << "} " @@ -1268,9 +1254,6 @@ void SQLiteNode::_onMESSAGE(SQLitePeer* peer, const SData& message) { peer->latency = max(STimeNow() - message.calc64("Timestamp"), 1ul); SINFO("Received PONG from peer '" << peer->name << "' (" << peer->latency/1000 << "ms latency)"); return; - } else if (SIEquals(message.methodLine, "NODE_LOGIN")) { - // Do nothing, this keeps this code from warning until NODE_LOGIN is deprecated. - return; } // We ignore everything except PING and PONG from forked nodes, so we can return here in that case. @@ -2553,7 +2536,7 @@ void SQLiteNode::postPoll(fd_map& fdm, uint64_t& nextActivity) { // with peers, so we store any that we can remove in this list. list socketsToRemove; - // Check each new connection for a NODE_LOGIN message. + // Check each new connection for a LOGIN message. for (auto socket : _unauthenticatedIncomingSockets) { STCPManager::postPoll(fdm, *socket); try { @@ -2565,8 +2548,7 @@ void SQLiteNode::postPoll(fd_map& fdm, uint64_t& nextActivity) { int messageSize = message.deserialize(socket->recvBuffer); if (messageSize) { socket->recvBuffer.consumeFront(messageSize); - // Allow either LOGIN or NODE_LOGIN until we deprecate NODE_LOGIN. - if (SIEquals(message.methodLine, "NODE_LOGIN") || SIEquals(message.methodLine, "LOGIN")) { + if (SIEquals(message.methodLine, "LOGIN")) { SQLitePeer* peer = getPeerByName(message["Name"]); if (peer) { if (peer->setSocket(socket)) { @@ -2591,7 +2573,7 @@ void SQLiteNode::postPoll(fd_map& fdm, uint64_t& nextActivity) { STHROW("Unauthenticated node '" + message["Name"] + "' attempted to connected, rejecting."); } } else { - STHROW("expecting LOGIN or NODE_LOGIN"); + STHROW("expecting LOGIN"); } } else if (STimeNow() > socket->lastRecvTime + 5'000'000) { STHROW("Incoming socket didn't send a message for over 5s, closing."); @@ -2614,10 +2596,6 @@ void SQLiteNode::postPoll(fd_map& fdm, uint64_t& nextActivity) { switch (result) { case SQLitePeer::PeerPostPollStatus::JUST_CONNECTED: { - // When NODE_LOGIN is deprecated, we can remove the next 3 lines. - SData login("NODE_LOGIN"); - login["Name"] = _name; - _sendToPeer(peer, login); _onConnect(peer); _sendPING(peer); } From 9e22d67782ed43cbde2edb41a903ad0baa80eb35 Mon Sep 17 00:00:00 2001 From: Tyler Karaszewski Date: Wed, 11 Dec 2024 16:38:11 -0800 Subject: [PATCH 055/127] Accept but do not send, NODE_LOGIN --- sqlitecluster/SQLiteNode.cpp | 11 +++++++- sqlitecluster/SQLiteNode.h | 4 +-- test/clustertest/tests/ClusterUpgradeTest.cpp | 27 +++++++++++++++++++ 3 files changed, 39 insertions(+), 3 deletions(-) diff --git a/sqlitecluster/SQLiteNode.cpp b/sqlitecluster/SQLiteNode.cpp index 1cdba752d..0cbd018b1 100644 --- a/sqlitecluster/SQLiteNode.cpp +++ b/sqlitecluster/SQLiteNode.cpp @@ -1254,6 +1254,12 @@ void SQLiteNode::_onMESSAGE(SQLitePeer* peer, const SData& message) { peer->latency = max(STimeNow() - message.calc64("Timestamp"), 1ul); SINFO("Received PONG from peer '" << peer->name << "' (" << peer->latency/1000 << "ms latency)"); return; + } else if (SIEquals(message.methodLine, "NODE_LOGIN")) { + // We need to return early here to ignore this deprecated message and avoid throwing: + // STHROW("not logged in"); + // Below. We can remove this check after one more deploy cycle. + // https://github.com/Expensify/Expensify/issues/450953 + return; } // We ignore everything except PING and PONG from forked nodes, so we can return here in that case. @@ -2548,7 +2554,10 @@ void SQLiteNode::postPoll(fd_map& fdm, uint64_t& nextActivity) { int messageSize = message.deserialize(socket->recvBuffer); if (messageSize) { socket->recvBuffer.consumeFront(messageSize); - if (SIEquals(message.methodLine, "LOGIN")) { + // Old nodes, for one more upgrade cycle, will still send `NODE_LOGIN`. We can remove this check after this + // code is deployed. + // See: https://github.com/Expensify/Expensify/issues/450953 + if (SIEquals(message.methodLine, "NODE_LOGIN") || SIEquals(message.methodLine, "LOGIN")) { SQLitePeer* peer = getPeerByName(message["Name"]); if (peer) { if (peer->setSocket(socket)) { diff --git a/sqlitecluster/SQLiteNode.h b/sqlitecluster/SQLiteNode.h index 57c5c1056..fd4ba5f6f 100644 --- a/sqlitecluster/SQLiteNode.h +++ b/sqlitecluster/SQLiteNode.h @@ -181,7 +181,7 @@ class SQLiteNode : public STCPManager { // would be a good idea for the caller to read any new commands or traffic from the network. bool update(); - // Look up the correct peer by the name it supplies in a NODE_LOGIN + // Look up the correct peer by the name it supplies in a LOGIN // message. Does not lock, but this method is const and all it does is // access _peerList and peer->name, both of which are const. So it is safe // to call from other public functions. @@ -293,7 +293,7 @@ class SQLiteNode : public STCPManager { const string _version; // These are sockets that have been accepted on the node port but have not yet been associated with a peer (because - // they need to send a NODE_LOGIN message with their name first). + // they need to send a LOGIN message with their name first). set _unauthenticatedIncomingSockets; // The write consistency requested for the current in-progress commit. diff --git a/test/clustertest/tests/ClusterUpgradeTest.cpp b/test/clustertest/tests/ClusterUpgradeTest.cpp index e1da7ef6b..b38ea6d5c 100644 --- a/test/clustertest/tests/ClusterUpgradeTest.cpp +++ b/test/clustertest/tests/ClusterUpgradeTest.cpp @@ -113,6 +113,9 @@ struct ClusterUpgradeTest : tpunit::TestFixture { // Get the versions from the cluster. auto versions = getVersions(); + for (auto s : versions) { + cout << s << endl; + } // Save the production version for later comparison. string prodVersion = versions[0]; @@ -128,6 +131,12 @@ struct ClusterUpgradeTest : tpunit::TestFixture { tester->getTester(2).startServer(); ASSERT_TRUE(tester->getTester(2).waitForState("FOLLOWING")); + cout << "Server 2 is upgraded." << endl; + versions = getVersions(); + for (auto s : versions) { + cout << s << endl; + } + // Verify the server has been upgraded and the version is different. versions = getVersions(); string devVersion = versions[2]; @@ -152,15 +161,33 @@ struct ClusterUpgradeTest : tpunit::TestFixture { // We should get the expected cluster state. ASSERT_TRUE(tester->getTester(0).waitForState("LEADING")); + cout << "Leader has been upgraded. It should receive NODE_LOGIN from old nodes." << endl; ASSERT_TRUE(tester->getTester(1).waitForState("FOLLOWING")); ASSERT_TRUE(tester->getTester(2).waitForState("FOLLOWING")); + // Now 0 and 2 are the new version, and 1 is the old version. versions = getVersions(); ASSERT_EQUAL(versions[0], devVersion); ASSERT_EQUAL(versions[1], prodVersion); ASSERT_EQUAL(versions[2], devVersion); + // Cycle the old version. We want it to come up and make an outgoing connection to a new version. It should send node_login. + for (int i =0; i < 10; i++) { + cout << "Cyling old server." << endl; + tester->getTester(1).stopServer(); + tester->getTester(1).startServer(); + tester->getTester(1).waitForState("FOLLOWING"); + } + + // Cycle the new version. We want it to come up and make an outgoing connection to a new version. It should send node_login. + for (int i =0; i < 10; i++) { + cout << "Cyling new server." << endl; + tester->getTester(2).stopServer(); + tester->getTester(2).startServer(); + tester->getTester(2).waitForState("FOLLOWING"); + } + // Now we need to send a command to node 1 to verify we can escalate old->new. cmdResult = tester->getTester(1).executeWaitMultipleData({cmd}); ASSERT_EQUAL(cmdResult[0].methodLine, "200 OK"); From 9326901473a795e1c7c59d82d0085276c858b7c9 Mon Sep 17 00:00:00 2001 From: Tyler Karaszewski Date: Wed, 11 Dec 2024 16:39:32 -0800 Subject: [PATCH 056/127] Remove test change --- test/clustertest/tests/ClusterUpgradeTest.cpp | 27 ------------------- 1 file changed, 27 deletions(-) diff --git a/test/clustertest/tests/ClusterUpgradeTest.cpp b/test/clustertest/tests/ClusterUpgradeTest.cpp index b38ea6d5c..e1da7ef6b 100644 --- a/test/clustertest/tests/ClusterUpgradeTest.cpp +++ b/test/clustertest/tests/ClusterUpgradeTest.cpp @@ -113,9 +113,6 @@ struct ClusterUpgradeTest : tpunit::TestFixture { // Get the versions from the cluster. auto versions = getVersions(); - for (auto s : versions) { - cout << s << endl; - } // Save the production version for later comparison. string prodVersion = versions[0]; @@ -131,12 +128,6 @@ struct ClusterUpgradeTest : tpunit::TestFixture { tester->getTester(2).startServer(); ASSERT_TRUE(tester->getTester(2).waitForState("FOLLOWING")); - cout << "Server 2 is upgraded." << endl; - versions = getVersions(); - for (auto s : versions) { - cout << s << endl; - } - // Verify the server has been upgraded and the version is different. versions = getVersions(); string devVersion = versions[2]; @@ -161,33 +152,15 @@ struct ClusterUpgradeTest : tpunit::TestFixture { // We should get the expected cluster state. ASSERT_TRUE(tester->getTester(0).waitForState("LEADING")); - cout << "Leader has been upgraded. It should receive NODE_LOGIN from old nodes." << endl; ASSERT_TRUE(tester->getTester(1).waitForState("FOLLOWING")); ASSERT_TRUE(tester->getTester(2).waitForState("FOLLOWING")); - // Now 0 and 2 are the new version, and 1 is the old version. versions = getVersions(); ASSERT_EQUAL(versions[0], devVersion); ASSERT_EQUAL(versions[1], prodVersion); ASSERT_EQUAL(versions[2], devVersion); - // Cycle the old version. We want it to come up and make an outgoing connection to a new version. It should send node_login. - for (int i =0; i < 10; i++) { - cout << "Cyling old server." << endl; - tester->getTester(1).stopServer(); - tester->getTester(1).startServer(); - tester->getTester(1).waitForState("FOLLOWING"); - } - - // Cycle the new version. We want it to come up and make an outgoing connection to a new version. It should send node_login. - for (int i =0; i < 10; i++) { - cout << "Cyling new server." << endl; - tester->getTester(2).stopServer(); - tester->getTester(2).startServer(); - tester->getTester(2).waitForState("FOLLOWING"); - } - // Now we need to send a command to node 1 to verify we can escalate old->new. cmdResult = tester->getTester(1).executeWaitMultipleData({cmd}); ASSERT_EQUAL(cmdResult[0].methodLine, "200 OK"); From 88577f41d3b59098cc6bde90e465416724e994cb Mon Sep 17 00:00:00 2001 From: Cole Eason Date: Thu, 12 Dec 2024 12:29:47 -0500 Subject: [PATCH 057/127] Disallow more non-deterministic queries in writes --- sqlitecluster/SQLite.cpp | 13 ++++++++++--- sqlitecluster/SQLite.h | 9 +++++++++ test/tests/WriteTest.cpp | 16 ++++++++++++++-- 3 files changed, 33 insertions(+), 5 deletions(-) diff --git a/sqlitecluster/SQLite.cpp b/sqlitecluster/SQLite.cpp index e4f63acb2..8b9115535 100644 --- a/sqlitecluster/SQLite.cpp +++ b/sqlitecluster/SQLite.cpp @@ -988,14 +988,21 @@ int SQLite::_authorize(int actionCode, const char* detail1, const char* detail2, !strcmp(detail2, "strftime") || !strcmp(detail2, "changes") || !strcmp(detail2, "last_insert_rowid") || - !strcmp(detail2, "sqlite3_version") + !strcmp(detail2, "sqlite_version") ) { _isDeterministicQuery = false; } - if (!strcmp(detail2, "current_timestamp")) { + // Prevent using certain non-deterministic functions in writes which could cause synchronization with followers to + // result in inconsistent data. Some are not included here because they can be used in a deterministic way that is valid. + // i.e. you can do UPDATE x = DATE('2024-01-01') and its deterministic whereas UPDATE x = DATE('now') is not. It's up to + // callers to prevent using these functions inappropriately. + if (!strcmp(detail2, "current_timestamp") || + !strcmp(detail2, "random") || + !strcmp(detail2, "last_insert_rowid") || + !strcmp(detail2, "changes") || + !strcmp(detail2, "sqlite_version")) { if (_currentlyWriting) { - // Prevent using `current_timestamp` in writes which could cause synchronization with followers to result in inconsistent data. return SQLITE_DENY; } } diff --git a/sqlitecluster/SQLite.h b/sqlitecluster/SQLite.h index 842b51430..50fb4b99c 100644 --- a/sqlitecluster/SQLite.h +++ b/sqlitecluster/SQLite.h @@ -468,6 +468,15 @@ class SQLite { void _checkInterruptErrors(const string& error) const; // Called internally by _sqliteAuthorizerCallback to authorize columns for a query. + // + // PRO-TIP: you can play with the authorizer using the `sqlite3` CLI tool, by running `.auth ON` then running + // your query. The columns displayed are the same as what is passed to this function. + // + // The information passed to this function is different based on the first parameter, actionCode. + // You can see what information is passed for each action code here https://www.sqlite.org/c3ref/c_alter_table.html. + // Note that as of writing this comment, the page seems slightly out of date and the parameter numbers are all off + // by one. That is, the first paramter passed to the callback funciton is actually the integer action code, not the + // second. int _authorize(int actionCode, const char* detail1, const char* detail2, const char* detail3, const char* detail4); // It's possible for certain transactions (namely, timing out a write operation, see here: diff --git a/test/tests/WriteTest.cpp b/test/tests/WriteTest.cpp index 4beef02ab..f50d99b88 100644 --- a/test/tests/WriteTest.cpp +++ b/test/tests/WriteTest.cpp @@ -18,7 +18,7 @@ struct WriteTest : tpunit::TestFixture { TEST(WriteTest::updateAndInsertWithHttp), TEST(WriteTest::shortHandSyntax), TEST(WriteTest::keywordsAsValue), - TEST(WriteTest::blockTimeFunctions), + TEST(WriteTest::blockNonDeterministicFunctions), AFTER_CLASS(WriteTest::tearDown)) { } BedrockTester* tester; @@ -181,7 +181,7 @@ struct WriteTest : tpunit::TestFixture { tester->executeWaitVerifyContent(query3); } - void blockTimeFunctions() { + void blockNonDeterministicFunctions() { // Verify writing the string 'CURRENT_TIMESTAMP' is fine. SData query("query: INSERT INTO stuff VALUES ( NULL, 11, 'CURRENT_TIMESTAMP' );"); tester->executeWaitVerifyContent(query); @@ -193,6 +193,18 @@ struct WriteTest : tpunit::TestFixture { // But allow the function to run in reads. query.methodLine = "query: SELECT CURRENT_TIMESTAMP;"; tester->executeWaitVerifyContent(query); + + // Verify writing the string 'RANDOM' is fine. + query.methodLine = "query: INSERT INTO stuff VALUES ( NULL, 11, 'RANDOM' );"; + tester->executeWaitVerifyContent(query); + + // But verify calling the function RANDOM is blocked when writing. + query.methodLine = "query: INSERT INTO stuff VALUES ( NULL, 11, RANDOM() );"; + tester->executeWaitVerifyContent(query, "502 Query failed"); + + // But allow the function to run in reads. + query.methodLine = "query: SELECT random();"; + tester->executeWaitVerifyContent(query); } } __WriteTest; From 5e18907d94d2159e1a0f76d1bcb7518d3d40299d Mon Sep 17 00:00:00 2001 From: Cole Eason Date: Thu, 12 Dec 2024 12:44:23 -0500 Subject: [PATCH 058/127] Update test that uses non-dertministic function --- test/tests/WriteTest.cpp | 4 +++- 1 file changed, 3 insertions(+), 1 deletion(-) diff --git a/test/tests/WriteTest.cpp b/test/tests/WriteTest.cpp index f50d99b88..46105532c 100644 --- a/test/tests/WriteTest.cpp +++ b/test/tests/WriteTest.cpp @@ -1,4 +1,5 @@ #include +#include #include struct WriteTest : tpunit::TestFixture { @@ -38,7 +39,8 @@ struct WriteTest : tpunit::TestFixture { for (int i = 0; i < 50; i++) { SData query("Query"); query["writeConsistency"] = "ASYNC"; - query["query"] = "INSERT INTO foo VALUES ( RANDOM() );"; + uint64_t rand = SRandom::rand64(); + query["query"] = "INSERT INTO foo VALUES (" + to_string(rand) + ");"; tester->executeWaitVerifyContent(query); } From d93a7dd957e2cc9fcc452e632c9b28ac89b7f9ed Mon Sep 17 00:00:00 2001 From: Florent De Neve Date: Thu, 12 Dec 2024 16:48:26 -0400 Subject: [PATCH 059/127] Update SQLite version: - raises the limit on the number of arguments to an SQL function to 1000 - in cases where the writer has to do the memset() slowing down COMMIT (i.e. when you use passive checkpoints), the memset() is now only 16KB, not 32KB - we attempt to load stat4 data using only a single pass. And only the simple query (formerly the second query). --- libstuff/sqlite3.c | 180 ++++++++++++++++++++++++++++++++++----------- libstuff/sqlite3.h | 2 +- 2 files changed, 140 insertions(+), 42 deletions(-) diff --git a/libstuff/sqlite3.c b/libstuff/sqlite3.c index a7111ef6c..e02dd0087 100644 --- a/libstuff/sqlite3.c +++ b/libstuff/sqlite3.c @@ -18,7 +18,7 @@ ** separate file. This file contains only code for the core SQLite library. ** ** The content in this amalgamation comes from Fossil check-in -** df4183ace93b788b798b258274bf6b651906. +** 3c25c69c93e55738cdbfdd87fa3c879b8786. */ #define SQLITE_CORE 1 #define SQLITE_AMALGAMATION 1 @@ -465,7 +465,7 @@ extern "C" { */ #define SQLITE_VERSION "3.47.0" #define SQLITE_VERSION_NUMBER 3047000 -#define SQLITE_SOURCE_ID "2024-12-10 14:56:20 df4183ace93b788b798b258274bf6b651906c9f1cf2af4983e447cdf52904523" +#define SQLITE_SOURCE_ID "2024-12-12 20:39:56 3c25c69c93e55738cdbfdd87fa3c879b878674973955490770f5e274da1ca9a4" /* ** CAPI3REF: Run-Time Library Version Numbers @@ -14125,9 +14125,13 @@ struct fts5_api { /* ** The maximum number of arguments to an SQL function. +** +** This value has a hard upper limit of 32767 due to storage +** constraints (it needs to fit inside a i16). We keep it +** lower than that to prevent abuse. */ #ifndef SQLITE_MAX_FUNCTION_ARG -# define SQLITE_MAX_FUNCTION_ARG 127 +# define SQLITE_MAX_FUNCTION_ARG 1000 #endif /* @@ -18236,14 +18240,15 @@ struct sqlite3 { #define SCHEMA_TIME_STAT4_Q1_BODY 14 #define SCHEMA_TIME_AFTER_STAT4_Q1 15 -#define SCHEMA_TIME_AFTER_STAT4_Q2 16 -#define SCHEMA_TIME_AFTER_STAT4 17 +#define SCHEMA_TIME_STAT4_Q2_BODY 16 +#define SCHEMA_TIME_AFTER_STAT4_Q2 17 +#define SCHEMA_TIME_AFTER_STAT4 18 -#define SCHEMA_TIME_END_ANALYZE_LOAD 18 -#define SCHEMA_TIME_FINISH 19 +#define SCHEMA_TIME_END_ANALYZE_LOAD 19 +#define SCHEMA_TIME_FINISH 20 -#define SCHEMA_TIME_N 20 -#define SCHEMA_TIME_TIMEOUT (0 * 1000 * 1000) +#define SCHEMA_TIME_N 21 +#define SCHEMA_TIME_TIMEOUT (500 * 1000) @@ -18418,7 +18423,7 @@ SQLITE_PRIVATE void sqlite3SchemaTimeLog(u64 *aSchemaTime, const char *zFile); ** field is used by per-connection app-def functions. */ struct FuncDef { - i8 nArg; /* Number of arguments. -1 means unlimited */ + i16 nArg; /* Number of arguments. -1 means unlimited */ u32 funcFlags; /* Some combination of SQLITE_FUNC_* */ void *pUserData; /* User data parameter */ FuncDef *pNext; /* Next function with same name */ @@ -19247,7 +19252,7 @@ struct Index { ** expression, or a reference to a VIRTUAL column */ #ifdef SQLITE_ENABLE_STAT4 int nSample; /* Number of elements in aSample[] */ - int mxSample; /* Number of slots allocated to aSample[] */ + int nSampleAlloc; /* Number of slots allocated to aSample[] */ int nSampleCol; /* Size of IndexSample.anEq[] and so on */ tRowcnt *aAvgEq; /* Average nEq values for keys not in aSample */ IndexSample *aSample; /* Samples of the left-most key */ @@ -24030,7 +24035,7 @@ struct sqlite3_context { int isError; /* Error code returned by the function. */ u8 enc; /* Encoding to use for results */ u8 skipFlag; /* Skip accumulator loading if true */ - u8 argc; /* Number of arguments */ + u16 argc; /* Number of arguments */ sqlite3_value *argv[1]; /* Argument set */ }; @@ -67624,9 +67629,9 @@ static int walIndexAppend(Wal *pWal, int iWal, u32 iFrame, u32 iPage){ */ if( pWal->aCommitTime ) t = sqlite3STimeNow(); if( idx==1 && sLoc.aPgno[0]!=0 ){ - int nByte = (int)((u8*)&sLoc.aHash[HASHTABLE_NSLOT] - (u8*)sLoc.aPgno); - assert( nByte>=0 && (nByte & 0x07)==0 ); - zero64((void*)sLoc.aPgno, nByte); + /* Special for BEDROCK branch: Zero only the aHash[] part. Not the + ** aPgno[] part of the page. */ + zero64((void*)sLoc.aHash, HASHTABLE_NSLOT * sizeof(sLoc.aHash[0])); } if( pWal->aCommitTime ){ pWal->aCommitTime[COMMIT_TIME_WALINDEX_MEMSETUS]+=sqlite3STimeNow()-t; @@ -67637,11 +67642,23 @@ static int walIndexAppend(Wal *pWal, int iWal, u32 iFrame, u32 iPage){ ** writing one or more dirty pages to the WAL to free up memory). ** Remove the remnants of that writers uncommitted transaction from ** the hash-table before writing any new entries. - */ - if( sLoc.aPgno[idx-1] ){ + ** + ** Special for BEDROCK branch: On this branch we do not assume that + ** the aPgno[] part of each hash-table has been zeroed. Therefore, we + ** only need to clear out the remnants of an old writer's transaction if + ** the hash table matches the aPgno[] entry (as it would if a write + ** transaction was interrupted). And, because this makes the test more + ** expensive, we only do the check for the first frame written by each + ** transaction. */ + if( sLoc.aPgno[idx-1] && iFrame-1==walidxGetMxFrame(&pWal->hdr, iWal) ){ if( pWal->aCommitTime ) t = sqlite3STimeNow(); - walCleanupHash(pWal); - assert( !sLoc.aPgno[idx-1] ); + nCollide = idx; + for(iKey=walHash(iPage); sLoc.aHash[iKey]; iKey=walNextHash(iKey)){ + if( sLoc.aHash[iKey]==idx ){ + walCleanupHash(pWal); + } + if( (nCollide--)==0 ) return SQLITE_CORRUPT_BKPT; + } if( pWal->aCommitTime ){ pWal->aCommitTime[COMMIT_TIME_WALINDEX_CLEANUPUS]+=sqlite3STimeNow()-t; } @@ -93474,7 +93491,7 @@ SQLITE_PRIVATE void sqlite3CommitTimeLog(u64 *aCommit){ } zStr = sqlite3_mprintf("%z%s%s%d%s", zStr, (zStr?", ":""),zHash,iVal,zU); } - sqlite3_log(SQLITE_WARNING, "slow commit (v=17): (%s)", zStr); + sqlite3_log(SQLITE_WARNING, "slow commit (v=18): (%s)", zStr); sqlite3_free(zStr); } } @@ -93502,7 +93519,7 @@ SQLITE_PRIVATE void sqlite3PrepareTimeLog(const char *zSql, int nSql, u64 *aPrep } if( nByte<0 ){ nByte = sqlite3Strlen30(zSql); } sqlite3_log(SQLITE_WARNING, - "slow prepare (v=17): (%s) [%.*s]", zStr, nByte, zSql + "slow prepare (v=18): (%s) [%.*s]", zStr, nByte, zSql ); sqlite3_free(zStr); } @@ -93515,12 +93532,15 @@ SQLITE_PRIVATE void sqlite3SchemaTimeLog(u64 *aSchema, const char *zFile){ int ii; for(ii=1; iipVdbe, opcode, in2, dest, in1, (void*)p4, P4_COLLSEQ); - sqlite3VdbeChangeP5(pParse->pVdbe, (u8)p5); + sqlite3VdbeChangeP5(pParse->pVdbe, (u16)p5); return addr; } @@ -123675,6 +123695,7 @@ SQLITE_PRIVATE void sqlite3DeleteIndexSamples(sqlite3 *db, Index *pIdx){ if( db->pnBytesFreed==0 ){ pIdx->nSample = 0; pIdx->aSample = 0; + pIdx->nSampleAlloc = 0; } #else UNUSED_PARAMETER(db); @@ -123759,6 +123780,70 @@ static Index *findIndexOrPrimaryKey( return pIdx; } +/* +** Grow the pIdx->aSample[] array. Return SQLITE_OK if successful, or +** SQLITE_NOMEM otherwise. +*/ +static int growSampleArray(sqlite3 *db, Index *pIdx){ + int nIdxCol = pIdx->nSampleCol; + int nNew = 0; + IndexSample *aNew = 0; + int nByte = 0; + tRowcnt *pSpace; /* Available allocated memory space */ + u8 *pPtr; /* Available memory as a u8 for easier manipulation */ + int i; + + /* In production set the initial allocation to SQLITE_STAT4_SAMPLES. This + ** means that reallocation will almost never be required. But for debug + ** builds, set the initial allocation size to 6 entries so that the + ** reallocation code gets tested. todo: use real tests for this. */ + assert( pIdx->nSample==pIdx->nSampleAlloc ); +#ifdef SQLITE_DEBUG + nNew = 6; +#else + nNew = SQLITE_STAT4_SAMPLES; +#endif + if( pIdx->nSample ){ + nNew = pIdx->nSample*2; + } + + nByte = ROUND8(sizeof(IndexSample) * nNew); + nByte += sizeof(tRowcnt) * nIdxCol * 3 * nNew; + nByte += nIdxCol * sizeof(tRowcnt); /* Space for Index.aAvgEq[] */ + + aNew = (IndexSample*)sqlite3DbMallocZero(db, nByte); + if( aNew==0 ) return SQLITE_NOMEM_BKPT; + + pPtr = (u8*)aNew; + pPtr += ROUND8(nNew*sizeof(pIdx->aSample[0])); + pSpace = (tRowcnt*)pPtr; + + pIdx->aAvgEq = pSpace; pSpace += nIdxCol; + assert( EIGHT_BYTE_ALIGNMENT( pSpace ) ); + + if( pIdx->nSample ){ + /* Copy the contents of the anEq[], anLt[], anDLt[] arrays for all + ** extant samples to the new location. */ + int nByte = nIdxCol * 3 * sizeof(tRowcnt) * pIdx->nSample; + memcpy(pSpace, pIdx->aSample[0].anEq, nByte); + } + for(i=0; inSample ){ + aNew[i].p = pIdx->aSample[i].p; + aNew[i].n = pIdx->aSample[i].n; + } + } + assert( ((u8*)pSpace)-nByte==(u8*)aNew ); + + sqlite3DbFree(db, pIdx->aSample); + pIdx->aSample = aNew; + pIdx->nSampleAlloc = nNew; + return SQLITE_OK; +} + /* ** Load the content from either the sqlite_stat4 ** into the relevant Index.aSample[] arrays. @@ -123784,6 +123869,7 @@ static int loadStatTbl( IndexSample *pSample; /* A slot in pIdx->aSample[] */ assert( db->lookaside.bDisable ); +#if 0 zSql = sqlite3MPrintf(db, zSql1, zDb); if( !zSql ){ return SQLITE_NOMEM_BKPT; @@ -123850,6 +123936,9 @@ static int loadStatTbl( } rc = sqlite3_finalize(pStmt); if( rc ) return rc; +#endif + + sqlite3PrepareTimeSet(db->aSchemaTime, SCHEMA_TIME_AFTER_STAT4_Q1); zSql = sqlite3MPrintf(db, zSql2, zDb); if( !zSql ){ @@ -123859,29 +123948,34 @@ static int loadStatTbl( sqlite3DbFree(db, zSql); if( rc ) return rc; - sqlite3PrepareTimeSet(db->aSchemaTime, SCHEMA_TIME_AFTER_STAT4_Q1); - while( sqlite3_step(pStmt)==SQLITE_ROW ){ char *zIndex; /* Index name */ Index *pIdx; /* Pointer to the index object */ int nCol = 1; /* Number of columns in index */ + u64 t = sqlite3STimeNow(); zIndex = (char *)sqlite3_column_text(pStmt, 0); if( zIndex==0 ) continue; pIdx = findIndexOrPrimaryKey(db, zIndex, zDb); if( pIdx==0 ) continue; - if( pIdx->nSample>=pIdx->mxSample ){ - /* Too many slots used because the same index appears in - ** sqlite_stat4 using multiple names */ - continue; + + if( pIdx->nSample==pIdx->nSampleAlloc ){ + pIdx->pTable->tabFlags |= TF_HasStat4; + assert( !HasRowid(pIdx->pTable) || pIdx->nColumn==pIdx->nKeyCol+1 ); + if( !HasRowid(pIdx->pTable) && IsPrimaryKeyIndex(pIdx) ){ + pIdx->nSampleCol = pIdx->nKeyCol; + }else{ + pIdx->nSampleCol = pIdx->nColumn; + } + if( growSampleArray(db, pIdx) ) break; } - /* This next condition is true if data has already been loaded from - ** the sqlite_stat4 table. */ - nCol = pIdx->nSampleCol; + if( pIdx!=pPrevIdx ){ initAvgEq(pPrevIdx); pPrevIdx = pIdx; } + + nCol = pIdx->nSampleCol; pSample = &pIdx->aSample[pIdx->nSample]; decodeIntArray((char*)sqlite3_column_text(pStmt,1),nCol,pSample->anEq,0,0); decodeIntArray((char*)sqlite3_column_text(pStmt,2),nCol,pSample->anLt,0,0); @@ -123904,6 +123998,10 @@ static int loadStatTbl( memcpy(pSample->p, sqlite3_column_blob(pStmt, 4), pSample->n); } pIdx->nSample++; + + if( db->aSchemaTime ){ + db->aSchemaTime[SCHEMA_TIME_STAT4_Q2_BODY] += (sqlite3STimeNow() - t); + } } rc = sqlite3_finalize(pStmt); sqlite3PrepareTimeSet(db->aSchemaTime, SCHEMA_TIME_AFTER_STAT4_Q2); @@ -144195,7 +144293,7 @@ SQLITE_PRIVATE void sqlite3Pragma( /* Do the b-tree integrity checks */ sqlite3VdbeAddOp4(v, OP_IntegrityCk, 1, cnt, 8, (char*)aRoot,P4_INTARRAY); - sqlite3VdbeChangeP5(v, (u8)i); + sqlite3VdbeChangeP5(v, (u16)i); addr = sqlite3VdbeAddOp1(v, OP_IsNull, 2); VdbeCoverage(v); sqlite3VdbeAddOp4(v, OP_String8, 0, 3, 0, sqlite3MPrintf(db, "*** in database %s ***\n", db->aDb[i].zDbSName), @@ -153462,7 +153560,7 @@ static void finalizeAggFunctions(Parse *pParse, AggInfo *pAggInfo){ } sqlite3VdbeAddOp3(v, OP_AggStep, 0, regAgg, AggInfoFuncReg(pAggInfo,i)); sqlite3VdbeAppendP4(v, pF->pFunc, P4_FUNCDEF); - sqlite3VdbeChangeP5(v, (u8)nArg); + sqlite3VdbeChangeP5(v, (u16)nArg); sqlite3VdbeAddOp2(v, OP_Next, pF->iOBTab, iTop+1); VdbeCoverage(v); sqlite3VdbeJumpHere(v, iTop); sqlite3ReleaseTempRange(pParse, regAgg, nArg); @@ -153625,7 +153723,7 @@ static void updateAccumulator( } sqlite3VdbeAddOp3(v, OP_AggStep, 0, regAgg, AggInfoFuncReg(pAggInfo,i)); sqlite3VdbeAppendP4(v, pF->pFunc, P4_FUNCDEF); - sqlite3VdbeChangeP5(v, (u8)nArg); + sqlite3VdbeChangeP5(v, (u16)nArg); sqlite3ReleaseTempRange(pParse, regAgg, nArg); } if( addrNext ){ @@ -157019,7 +157117,7 @@ SQLITE_PRIVATE void sqlite3CodeRowTriggerDirect( ** invocation is disallowed if (a) the sub-program is really a trigger, ** not a foreign key action, and (b) the flag to enable recursive triggers ** is clear. */ - sqlite3VdbeChangeP5(v, (u8)bRecursive); + sqlite3VdbeChangeP5(v, (u16)bRecursive); } } @@ -175428,7 +175526,7 @@ static void windowAggStep( sqlite3VdbeAddOp3(v, bInverse? OP_AggInverse : OP_AggStep, bInverse, regArg, pWin->regAccum); sqlite3VdbeAppendP4(v, pFunc, P4_FUNCDEF); - sqlite3VdbeChangeP5(v, (u8)nArg); + sqlite3VdbeChangeP5(v, (u16)nArg); if( pWin->bExprArgs ){ sqlite3ReleaseTempRange(pParse, regArg, nArg); } @@ -187037,8 +187135,8 @@ static const int aHardLimit[] = { #if SQLITE_MAX_VDBE_OP<40 # error SQLITE_MAX_VDBE_OP must be at least 40 #endif -#if SQLITE_MAX_FUNCTION_ARG<0 || SQLITE_MAX_FUNCTION_ARG>127 -# error SQLITE_MAX_FUNCTION_ARG must be between 0 and 127 +#if SQLITE_MAX_FUNCTION_ARG<0 || SQLITE_MAX_FUNCTION_ARG>32767 +# error SQLITE_MAX_FUNCTION_ARG must be between 0 and 32767 #endif #if SQLITE_MAX_ATTACHED<0 || SQLITE_MAX_ATTACHED>125 # error SQLITE_MAX_ATTACHED must be between 0 and 125 @@ -257919,7 +258017,7 @@ static void fts5SourceIdFunc( ){ assert( nArg==0 ); UNUSED_PARAM2(nArg, apUnused); - sqlite3_result_text(pCtx, "fts5: 2024-12-10 14:56:20 df4183ace93b788b798b258274bf6b651906c9f1cf2af4983e447cdf52904523", -1, SQLITE_TRANSIENT); + sqlite3_result_text(pCtx, "fts5: 2024-12-12 20:39:56 3c25c69c93e55738cdbfdd87fa3c879b878674973955490770f5e274da1ca9a4", -1, SQLITE_TRANSIENT); } /* diff --git a/libstuff/sqlite3.h b/libstuff/sqlite3.h index 8d452cc85..2983455f0 100644 --- a/libstuff/sqlite3.h +++ b/libstuff/sqlite3.h @@ -148,7 +148,7 @@ extern "C" { */ #define SQLITE_VERSION "3.47.0" #define SQLITE_VERSION_NUMBER 3047000 -#define SQLITE_SOURCE_ID "2024-12-10 14:56:20 df4183ace93b788b798b258274bf6b651906c9f1cf2af4983e447cdf52904523" +#define SQLITE_SOURCE_ID "2024-12-12 20:39:56 3c25c69c93e55738cdbfdd87fa3c879b878674973955490770f5e274da1ca9a4" /* ** CAPI3REF: Run-Time Library Version Numbers From 0b8f440a6cd6e3322af8ba76117c3bb81159bda2 Mon Sep 17 00:00:00 2001 From: Tyler Karaszewski Date: Thu, 12 Dec 2024 14:43:51 -0800 Subject: [PATCH 060/127] Silence warning --- sqlitecluster/SQLiteNode.cpp | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/sqlitecluster/SQLiteNode.cpp b/sqlitecluster/SQLiteNode.cpp index 0cbd018b1..17db479ba 100644 --- a/sqlitecluster/SQLiteNode.cpp +++ b/sqlitecluster/SQLiteNode.cpp @@ -1850,7 +1850,7 @@ void SQLiteNode::_onDisconnect(SQLitePeer* peer) { // Store the time at which this happened for diagnostic purposes. _lastLostQuorum = STimeNow(); for (const auto* p : _peerList) { - SWARN("[clustersync] Peer " << p->name << " logged in? " << (p->loggedIn ? "TRUE" : "FALSE") << (p->permaFollower ? " (permaFollower)" : "")); + SINFO("[clustersync] Peer " << p->name << " logged in? " << (p->loggedIn ? "TRUE" : "FALSE") << (p->permaFollower ? " (permaFollower)" : "")); } _changeState(SQLiteNodeState::SEARCHING); } From a5792f94e7e7c3f0535987198c2f3bfcfd0aa45a Mon Sep 17 00:00:00 2001 From: Tyler Karaszewski Date: Fri, 13 Dec 2024 12:35:40 -0800 Subject: [PATCH 061/127] Move delete out of commit lock --- sqlitecluster/SQLite.cpp | 73 +++++++++++++--------------------------- sqlitecluster/SQLite.h | 4 --- 2 files changed, 24 insertions(+), 53 deletions(-) diff --git a/sqlitecluster/SQLite.cpp b/sqlitecluster/SQLite.cpp index 1d2fd946a..d1ebf1797 100644 --- a/sqlitecluster/SQLite.cpp +++ b/sqlitecluster/SQLite.cpp @@ -164,28 +164,6 @@ vector SQLite::initializeJournal(sqlite3* db, int minJournalTables) { return journalNames; } -uint64_t SQLite::initializeJournalSize(sqlite3* db, const vector& journalNames) { - // We keep track of the number of rows in the journal, so that we can delete old entries when we're over our size - // limit. - // We want the min of all journal tables. - string minQuery = _getJournalQuery(journalNames, {"SELECT MIN(id) AS id FROM"}, true); - minQuery = "SELECT MIN(id) AS id FROM (" + minQuery + ")"; - - // And the max. - string maxQuery = _getJournalQuery(journalNames, {"SELECT MAX(id) AS id FROM"}, true); - maxQuery = "SELECT MAX(id) AS id FROM (" + maxQuery + ")"; - - // Look up the min and max values in the database. - SQResult result; - SASSERT(!SQuery(db, "getting commit min", minQuery, result)); - uint64_t min = SToUInt64(result[0][0]); - SASSERT(!SQuery(db, "getting commit max", maxQuery, result)); - uint64_t max = SToUInt64(result[0][0]); - - // And save the difference as the size of the journal. - return max - min; -} - void SQLite::commonConstructorInitialization(bool hctree) { // Perform sanity checks. SASSERT(!_filename.empty()); @@ -229,7 +207,6 @@ SQLite::SQLite(const string& filename, int cacheSize, int maxJournalSize, _db(initializeDB(_filename, mmapSizeGB, hctree)), _journalNames(initializeJournal(_db, minJournalTables)), _sharedData(initializeSharedData(_db, _filename, _journalNames, hctree)), - _journalSize(initializeJournalSize(_db, _journalNames)), _cacheSize(cacheSize), _mmapSizeGB(mmapSizeGB) { @@ -242,7 +219,6 @@ SQLite::SQLite(const SQLite& from) : _db(initializeDB(_filename, from._mmapSizeGB, false)), // Create a *new* DB handle from the same filename, don't copy the existing handle. _journalNames(from._journalNames), _sharedData(from._sharedData), - _journalSize(from._journalSize), _cacheSize(from._cacheSize), _mmapSizeGB(from._mmapSizeGB) { @@ -665,6 +641,30 @@ bool SQLite::_writeIdempotent(const string& query, SQResult& result, bool always bool SQLite::prepare(uint64_t* transactionID, string* transactionhash) { SASSERT(_insideTransaction); + // Pick a journal for this transaction. + const int64_t journalID = _sharedData.nextJournalCount++; + _journalName = _journalNames[journalID % _journalNames.size()]; + + // It's possible to attempt to commit a transaction with no writes. We'll skip truncating the journal in this case to avoid + // Turning a no=op into a write. + if (_uncommittedQuery.size()) { + // Look up the oldest commit in our chosen journal, and compute the oldest commit we intend to keep. + SQResult result; + SASSERT(!SQuery(_db, "getting commit min", "SELECT MIN(id) AS id FROM " + _journalName, result)); + uint64_t minJournalEntry = SToUInt64(result[0][0]); + uint64_t oldestCommitToKeep = _sharedData.commitCount - _maxJournalSize; + + // We limit deletions to a relatively small number to avoid making this extremenly slow for some transactions in the case + // where this journal in particular has accumulated a large backlog. + static const size_t deleteLimit = 10; + if (minJournalEntry < oldestCommitToKeep) { + string query = "DELETE FROM " + _journalName + " WHERE id < " + SQ(oldestCommitToKeep) + " LIMIT " + SQ(deleteLimit); + SASSERT(!SQuery(_db, "Deleting oldest journal rows", query)); + size_t deletedCount = sqlite3_changes(_db); + SINFO("Removed " << deletedCount << " rows from journal " << _journalName); + } + } + // We lock this here, so that we can guarantee the order in which commits show up in the database. if (!_mutexLocked) { auto start = STimeNow(); @@ -680,8 +680,6 @@ bool SQLite::prepare(uint64_t* transactionID, string* transactionhash) { // We pass the journal number selected to the handler so that a caller can utilize the // same method bedrock does for accessing 1 table per thread, in order to attempt to // reduce conflicts on tables that are written to on every command - const int64_t journalID = _sharedData.nextJournalCount++; - _journalName = _journalNames[journalID % _journalNames.size()]; if (_shouldNotifyPluginsOnPrepare) { (*_onPrepareHandler)(*this, journalID); } @@ -738,28 +736,6 @@ int SQLite::commit(const string& description, function* preCheckpointCal SASSERT(!_uncommittedHash.empty()); // Must prepare first int result = 0; - // Do we need to truncate as we go? - uint64_t newJournalSize = _journalSize + 1; - if (newJournalSize > _maxJournalSize) { - // Delete the oldest entry - uint64_t before = STimeNow(); - string query = "DELETE FROM " + _journalName + " " - "WHERE id < (SELECT MAX(id) FROM " + _journalName + ") - " + SQ(_maxJournalSize) + " " - "LIMIT 10"; - SASSERT(!SQuery(_db, "Deleting oldest journal rows", query)); - - // Figure out the new journal size. - SQResult result; - SASSERT(!SQuery(_db, "getting commit min", "SELECT MIN(id) AS id FROM " + _journalName, result)); - uint64_t min = SToUInt64(result[0][0]); - SASSERT(!SQuery(_db, "getting commit max", "SELECT MAX(id) AS id FROM " + _journalName, result)); - uint64_t max = SToUInt64(result[0][0]); - newJournalSize = max - min; - - // Log timing info. - _writeElapsed += STimeNow() - before; - } - // Make sure one is ready to commit SDEBUG("Committing transaction"); @@ -798,7 +774,6 @@ int SQLite::commit(const string& description, function* preCheckpointCal } _commitElapsed += STimeNow() - before; - _journalSize = newJournalSize; _sharedData.incrementCommit(_uncommittedHash); _insideTransaction = false; _uncommittedHash.clear(); diff --git a/sqlitecluster/SQLite.h b/sqlitecluster/SQLite.h index 648d0b1a7..15767a7e5 100644 --- a/sqlitecluster/SQLite.h +++ b/sqlitecluster/SQLite.h @@ -354,7 +354,6 @@ class SQLite { static SharedData& initializeSharedData(sqlite3* db, const string& filename, const vector& journalNames, bool hctree); static sqlite3* initializeDB(const string& filename, int64_t mmapSizeGB, bool hctree); static vector initializeJournal(sqlite3* db, int minJournalTables); - static uint64_t initializeJournalSize(sqlite3* db, const vector& journalNames); void commonConstructorInitialization(bool hctree = false); // The filename of this DB, canonicalized to its full path on disk. @@ -375,9 +374,6 @@ class SQLite { // The name of the journal table that this particular DB handle with write to. string _journalName; - // The current size of the journal, in rows. TODO: Why isn't this in SharedData? - uint64_t _journalSize; - // True when we have a transaction in progress. bool _insideTransaction = false; From 09ae2b0f05ce0629824e6bb9d41511e1c4f557b1 Mon Sep 17 00:00:00 2001 From: Tyler Karaszewski Date: Fri, 13 Dec 2024 12:37:09 -0800 Subject: [PATCH 062/127] typo --- sqlitecluster/SQLite.cpp | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/sqlitecluster/SQLite.cpp b/sqlitecluster/SQLite.cpp index d1ebf1797..456156e81 100644 --- a/sqlitecluster/SQLite.cpp +++ b/sqlitecluster/SQLite.cpp @@ -646,7 +646,7 @@ bool SQLite::prepare(uint64_t* transactionID, string* transactionhash) { _journalName = _journalNames[journalID % _journalNames.size()]; // It's possible to attempt to commit a transaction with no writes. We'll skip truncating the journal in this case to avoid - // Turning a no=op into a write. + // Turning a no-op into a write. if (_uncommittedQuery.size()) { // Look up the oldest commit in our chosen journal, and compute the oldest commit we intend to keep. SQResult result; From 85de3d3b1cf9c4821c827eb199b8a91514e69252 Mon Sep 17 00:00:00 2001 From: Tyler Karaszewski Date: Fri, 13 Dec 2024 13:46:38 -0800 Subject: [PATCH 063/127] Fix integer underflow --- sqlitecluster/SQLite.cpp | 13 +++++++++---- 1 file changed, 9 insertions(+), 4 deletions(-) diff --git a/sqlitecluster/SQLite.cpp b/sqlitecluster/SQLite.cpp index 456156e81..49e89c0b1 100644 --- a/sqlitecluster/SQLite.cpp +++ b/sqlitecluster/SQLite.cpp @@ -650,9 +650,14 @@ bool SQLite::prepare(uint64_t* transactionID, string* transactionhash) { if (_uncommittedQuery.size()) { // Look up the oldest commit in our chosen journal, and compute the oldest commit we intend to keep. SQResult result; - SASSERT(!SQuery(_db, "getting commit min", "SELECT MIN(id) AS id FROM " + _journalName, result)); - uint64_t minJournalEntry = SToUInt64(result[0][0]); - uint64_t oldestCommitToKeep = _sharedData.commitCount - _maxJournalSize; + SASSERT(!SQuery(_db, "getting commit min", "SELECT MIN(id) FROM " + _journalName, result)); + uint64_t minJournalEntry = result.size() ? SToUInt64(result[0][0]) : 0; + uint64_t commitCount = _sharedData.commitCount; + + // If the commitCount is less than the max journal size, keep everything. Otherwise, keep everything from + // commitCount - _maxJournalSize forward. We can't just do the last subtraction part because it overflows our unsigned + // int. + uint64_t oldestCommitToKeep = commitCount < _maxJournalSize ? 0 : commitCount - _maxJournalSize; // We limit deletions to a relatively small number to avoid making this extremenly slow for some transactions in the case // where this journal in particular has accumulated a large backlog. @@ -661,7 +666,7 @@ bool SQLite::prepare(uint64_t* transactionID, string* transactionhash) { string query = "DELETE FROM " + _journalName + " WHERE id < " + SQ(oldestCommitToKeep) + " LIMIT " + SQ(deleteLimit); SASSERT(!SQuery(_db, "Deleting oldest journal rows", query)); size_t deletedCount = sqlite3_changes(_db); - SINFO("Removed " << deletedCount << " rows from journal " << _journalName); + SINFO("Removed " << deletedCount << " rows from journal " << _journalName << ", oldestToKeep: " << oldestCommitToKeep << ", count:" << commitCount << ", limit: " << _maxJournalSize); } } From ebcc4a99f77734b46fba2d3bfcd57e1679d18c2a Mon Sep 17 00:00:00 2001 From: Florent De Neve Date: Mon, 16 Dec 2024 11:11:28 -0400 Subject: [PATCH 064/127] Update SQLite with more logs, v19 --- libstuff/sqlite3.c | 36 ++++++++++++++++++++++++------------ libstuff/sqlite3.h | 2 +- 2 files changed, 25 insertions(+), 13 deletions(-) diff --git a/libstuff/sqlite3.c b/libstuff/sqlite3.c index e02dd0087..5381fa9ec 100644 --- a/libstuff/sqlite3.c +++ b/libstuff/sqlite3.c @@ -18,7 +18,7 @@ ** separate file. This file contains only code for the core SQLite library. ** ** The content in this amalgamation comes from Fossil check-in -** 3c25c69c93e55738cdbfdd87fa3c879b8786. +** fa87355f6286be1e92f22a71cbfbfb13d1a4. */ #define SQLITE_CORE 1 #define SQLITE_AMALGAMATION 1 @@ -465,7 +465,7 @@ extern "C" { */ #define SQLITE_VERSION "3.47.0" #define SQLITE_VERSION_NUMBER 3047000 -#define SQLITE_SOURCE_ID "2024-12-12 20:39:56 3c25c69c93e55738cdbfdd87fa3c879b878674973955490770f5e274da1ca9a4" +#define SQLITE_SOURCE_ID "2024-12-13 18:13:51 fa87355f6286be1e92f22a71cbfbfb13d1a478d5fb5b38abedbd78bf903171fa" /* ** CAPI3REF: Run-Time Library Version Numbers @@ -18241,13 +18241,14 @@ struct sqlite3 { #define SCHEMA_TIME_STAT4_Q1_BODY 14 #define SCHEMA_TIME_AFTER_STAT4_Q1 15 #define SCHEMA_TIME_STAT4_Q2_BODY 16 -#define SCHEMA_TIME_AFTER_STAT4_Q2 17 -#define SCHEMA_TIME_AFTER_STAT4 18 +#define SCHEMA_TIME_STAT4_SAMPLE_MALLOC 17 +#define SCHEMA_TIME_AFTER_STAT4_Q2 18 +#define SCHEMA_TIME_AFTER_STAT4 19 -#define SCHEMA_TIME_END_ANALYZE_LOAD 19 -#define SCHEMA_TIME_FINISH 20 +#define SCHEMA_TIME_END_ANALYZE_LOAD 20 +#define SCHEMA_TIME_FINISH 21 -#define SCHEMA_TIME_N 21 +#define SCHEMA_TIME_N 22 #define SCHEMA_TIME_TIMEOUT (500 * 1000) @@ -93491,7 +93492,7 @@ SQLITE_PRIVATE void sqlite3CommitTimeLog(u64 *aCommit){ } zStr = sqlite3_mprintf("%z%s%s%d%s", zStr, (zStr?", ":""),zHash,iVal,zU); } - sqlite3_log(SQLITE_WARNING, "slow commit (v=18): (%s)", zStr); + sqlite3_log(SQLITE_WARNING, "slow commit (v=19): (%s)", zStr); sqlite3_free(zStr); } } @@ -93519,7 +93520,7 @@ SQLITE_PRIVATE void sqlite3PrepareTimeLog(const char *zSql, int nSql, u64 *aPrep } if( nByte<0 ){ nByte = sqlite3Strlen30(zSql); } sqlite3_log(SQLITE_WARNING, - "slow prepare (v=18): (%s) [%.*s]", zStr, nByte, zSql + "slow prepare (v=19): (%s) [%.*s]", zStr, nByte, zSql ); sqlite3_free(zStr); } @@ -93535,12 +93536,13 @@ SQLITE_PRIVATE void sqlite3SchemaTimeLog(u64 *aSchema, const char *zFile){ if( val!=0 && ii!=SCHEMA_TIME_STAT4_Q1_BODY && ii!=SCHEMA_TIME_STAT4_Q2_BODY + && ii!=SCHEMA_TIME_STAT4_SAMPLE_MALLOC ){ val -= i1; } zStr = sqlite3_mprintf("%z%s%d", zStr, (zStr?", ":""), val); } - sqlite3_log(SQLITE_WARNING, "slow schema (%s) (v=18): (%s)", zFile, zStr); + sqlite3_log(SQLITE_WARNING, "slow schema (%s) (v=19): (%s)", zFile, zStr); sqlite3_free(zStr); } } @@ -123568,6 +123570,9 @@ static void decodeIntArray( #endif if( *z==' ' ) z++; } + if( aOut ){ + for(/* no-op */; iaSchemaTime ){ + t = sqlite3STimeNow(); + } + aNew = (IndexSample*)sqlite3DbMallocRaw(db, nByte); if( aNew==0 ) return SQLITE_NOMEM_BKPT; + if( db->aSchemaTime ){ + db->aSchemaTime[SCHEMA_TIME_STAT4_SAMPLE_MALLOC] += (sqlite3STimeNow() - t); + } pPtr = (u8*)aNew; pPtr += ROUND8(nNew*sizeof(pIdx->aSample[0])); @@ -258017,7 +258029,7 @@ static void fts5SourceIdFunc( ){ assert( nArg==0 ); UNUSED_PARAM2(nArg, apUnused); - sqlite3_result_text(pCtx, "fts5: 2024-12-12 20:39:56 3c25c69c93e55738cdbfdd87fa3c879b878674973955490770f5e274da1ca9a4", -1, SQLITE_TRANSIENT); + sqlite3_result_text(pCtx, "fts5: 2024-12-13 18:13:51 fa87355f6286be1e92f22a71cbfbfb13d1a478d5fb5b38abedbd78bf903171fa", -1, SQLITE_TRANSIENT); } /* diff --git a/libstuff/sqlite3.h b/libstuff/sqlite3.h index 2983455f0..fe6bc6016 100644 --- a/libstuff/sqlite3.h +++ b/libstuff/sqlite3.h @@ -148,7 +148,7 @@ extern "C" { */ #define SQLITE_VERSION "3.47.0" #define SQLITE_VERSION_NUMBER 3047000 -#define SQLITE_SOURCE_ID "2024-12-12 20:39:56 3c25c69c93e55738cdbfdd87fa3c879b878674973955490770f5e274da1ca9a4" +#define SQLITE_SOURCE_ID "2024-12-13 18:13:51 fa87355f6286be1e92f22a71cbfbfb13d1a478d5fb5b38abedbd78bf903171fa" /* ** CAPI3REF: Run-Time Library Version Numbers From 5acd465856c55587499396deef2979b75d7e1534 Mon Sep 17 00:00:00 2001 From: Florent De Neve Date: Mon, 16 Dec 2024 12:47:10 -0400 Subject: [PATCH 065/127] Allow to pass checkpointMode via CLI to make it dynamically configurable --- BedrockServer.cpp | 13 +++++++++---- main.cpp | 2 ++ sqlitecluster/SQLite.cpp | 32 +++++++++++++++++++++++++------- sqlitecluster/SQLite.h | 5 ++++- sqlitecluster/SQLitePool.cpp | 5 +++-- sqlitecluster/SQLitePool.h | 2 +- 6 files changed, 44 insertions(+), 15 deletions(-) diff --git a/BedrockServer.cpp b/BedrockServer.cpp index 98e59414f..cbe7d7e9a 100644 --- a/BedrockServer.cpp +++ b/BedrockServer.cpp @@ -97,7 +97,7 @@ void BedrockServer::sync() // We use fewer FDs on test machines that have other resource restrictions in place. SINFO("Setting dbPool size to: " << _dbPoolSize); - _dbPool = make_shared(_dbPoolSize, args["-db"], args.calc("-cacheSize"), args.calc("-maxJournalSize"), journalTables, mmapSizeGB, args.isSet("-hctree")); + _dbPool = make_shared(_dbPoolSize, args["-db"], args.calc("-cacheSize"), args.calc("-maxJournalSize"), journalTables, mmapSizeGB, args.isSet("-hctree"), args["-checkpointMode"]); SQLite& db = _dbPool->getBase(); // Initialize the command processor. @@ -358,7 +358,7 @@ void BedrockServer::sync() committingCommand = true; _syncNode->startCommit(SQLiteNode::QUORUM); _lastQuorumCommandTime = STimeNow(); - + // This interrupts the next poll loop immediately. This prevents a 1-second wait when running as a single server. _notifyDoneSync.push(true); SDEBUG("Finished sending distributed transaction for db upgrade."); @@ -1267,6 +1267,11 @@ BedrockServer::BedrockServer(const SData& args_) sort(versions.begin(), versions.end()); _version = SComposeList(versions, ":"); + const set validCheckpointModes = {"PASSIVE", "FULL", "RESTART", "TRUNCATE"}; + if (validCheckpointModes.find(args["-checkpointMode"]) == validCheckpointModes.end()) { + SERROR("Invalid checkpoint mode " << args["-checkpointMode"]); + } + list pluginString; for (auto& p : plugins) { pluginString.emplace_back(p.first); @@ -1695,14 +1700,14 @@ void BedrockServer::_status(unique_ptr& command) { size_t totalCount = 0; for (const auto& s : _crashCommands) { totalCount += s.second.size(); - + vector paramsArray; for (const STable& params : s.second) { if (!params.empty()) { paramsArray.push_back(SComposeJSONObject(params)); } } - + STable commandObject; commandObject[s.first] = SComposeJSONArray(paramsArray); crashCommandListArray.push_back(SComposeJSONObject(commandObject)); diff --git a/main.cpp b/main.cpp index 240a1da88..160f4262d 100644 --- a/main.cpp +++ b/main.cpp @@ -236,6 +236,7 @@ int main(int argc, char* argv[]) { << endl; cout << "-maxJournalSize <#commits> Number of commits to retain in the historical journal (default 1000000)" << endl; + cout << "-checkpointMode Accepts PASSIVE|FULL|RESTART|TRUNCATE, which is the value passed to https://www.sqlite.org/c3ref/wal_checkpoint_v2.html" << endl; cout << endl; cout << "Quick Start Tips:" << endl; cout << "-----------------" << endl; @@ -299,6 +300,7 @@ int main(int argc, char* argv[]) { SETDEFAULT("-maxJournalSize", "1000000"); SETDEFAULT("-queryLog", "queryLog.csv"); SETDEFAULT("-enableMultiWrite", "true"); + SETDEFAULT("-checkpointMode", "PASSIVE"); args["-plugins"] = SComposeList(loadPlugins(args)); diff --git a/sqlitecluster/SQLite.cpp b/sqlitecluster/SQLite.cpp index 2a5f4e4e0..0f81a685b 100644 --- a/sqlitecluster/SQLite.cpp +++ b/sqlitecluster/SQLite.cpp @@ -223,7 +223,7 @@ void SQLite::commonConstructorInitialization(bool hctree) { } SQLite::SQLite(const string& filename, int cacheSize, int maxJournalSize, - int minJournalTables, int64_t mmapSizeGB, bool hctree) : + int minJournalTables, int64_t mmapSizeGB, bool hctree, const string& checkpointMode) : _filename(initializeFilename(filename)), _maxJournalSize(maxJournalSize), _db(initializeDB(_filename, mmapSizeGB, hctree)), @@ -231,7 +231,8 @@ SQLite::SQLite(const string& filename, int cacheSize, int maxJournalSize, _sharedData(initializeSharedData(_db, _filename, _journalNames, hctree)), _journalSize(initializeJournalSize(_db, _journalNames)), _cacheSize(cacheSize), - _mmapSizeGB(mmapSizeGB) + _mmapSizeGB(mmapSizeGB), + _checkpointMode(checkpointMode) { commonConstructorInitialization(hctree); } @@ -244,7 +245,8 @@ SQLite::SQLite(const SQLite& from) : _sharedData(from._sharedData), _journalSize(from._journalSize), _cacheSize(from._cacheSize), - _mmapSizeGB(from._mmapSizeGB) + _mmapSizeGB(from._mmapSizeGB), + _checkpointMode(from._checkpointMode) { // This can always pass "true" because the copy constructor does not need to set the DB to WAL2 mode, it would have been set in the object being copied. commonConstructorInitialization(true); @@ -819,11 +821,27 @@ int SQLite::commit(const string& description, function* preCheckpointCal if (_sharedData.outstandingFramesToCheckpoint) { auto start = STimeNow(); int framesCheckpointed = 0; - sqlite3_busy_timeout(_db, 120'000); // 2 minutes - sqlite3_wal_checkpoint_v2(_db, 0, SQLITE_CHECKPOINT_FULL, NULL, &framesCheckpointed); - sqlite3_busy_timeout(_db, 0); + + // We default to PASSIVE checkpoint everywhere as that has been the value proven to work fine for many years. + if (_checkpointMode != "PASSIVE") { + int checkpointMode = SQLITE_CHECKPOINT_PASSIVE; + if (_checkpointMode == "FULL") { + checkpointMode = SQLITE_CHECKPOINT_FULL; + } else if (_checkpointMode == "RESTART") { + checkpointMode = SQLITE_CHECKPOINT_RESTART; + } else if (_checkpointMode == "TRUNCATE") { + checkpointMode = SQLITE_CHECKPOINT_TRUNCATE; + } + // For non-passive checkpoints, we must set a busy timeout in order to wait on any readers. + // We set it to 2 minutes as the majority of transactions should take less than that. + sqlite3_busy_timeout(_db, 120'000); + sqlite3_wal_checkpoint_v2(_db, 0, checkpointMode, NULL, &framesCheckpointed); + sqlite3_busy_timeout(_db, 0); + } else { + sqlite3_wal_checkpoint_v2(_db, 0, SQLITE_CHECKPOINT_PASSIVE, NULL, &framesCheckpointed); + } auto end = STimeNow(); - SINFO("Checkpointed " << framesCheckpointed << " (total) frames of " << _sharedData.outstandingFramesToCheckpoint << " in " << (end - start) << "us."); + SINFO(_checkpointMode << " checkpoint complete with " << framesCheckpointed << " frames checkpointed of " << _sharedData.outstandingFramesToCheckpoint << " frames outstanding in " << (end - start) << "us."); // It might not actually be 0, but we'll just let sqlite tell us what it is next time _walHookCallback runs. _sharedData.outstandingFramesToCheckpoint = 0; diff --git a/sqlitecluster/SQLite.h b/sqlitecluster/SQLite.h index 648d0b1a7..e11e91e3c 100644 --- a/sqlitecluster/SQLite.h +++ b/sqlitecluster/SQLite.h @@ -57,7 +57,7 @@ class SQLite { // // mmapSizeGB: address space to use for memory-mapped IO, in GB. SQLite(const string& filename, int cacheSize, int maxJournalSize, int minJournalTables, - int64_t mmapSizeGB = 0, bool hctree = false); + int64_t mmapSizeGB = 0, bool hctree = false, const string& checkpointMode = "PASSIVE"); // This constructor is not exactly a copy constructor. It creates an other SQLite object based on the first except // with a *different* journal table. This avoids a lot of locking around creating structures that we know already @@ -529,4 +529,7 @@ class SQLite { // Set to true inside of a write query. bool _currentlyWriting{false}; + + // One of PASSIVE|FULL|RESTART|TRUNCATE, translated to corresponding values to be passed to sqlite3_wal_checkpoint_v2. + string _checkpointMode; }; diff --git a/sqlitecluster/SQLitePool.cpp b/sqlitecluster/SQLitePool.cpp index ca3d7a4ad..75d1a31f2 100644 --- a/sqlitecluster/SQLitePool.cpp +++ b/sqlitecluster/SQLitePool.cpp @@ -8,9 +8,10 @@ SQLitePool::SQLitePool(size_t maxDBs, int maxJournalSize, int minJournalTables, int64_t mmapSizeGB, - bool hctree) + bool hctree, + const string& checkpointMode) : _maxDBs(max(maxDBs, 1ul)), - _baseDB(filename, cacheSize, maxJournalSize, minJournalTables, mmapSizeGB, hctree), + _baseDB(filename, cacheSize, maxJournalSize, minJournalTables, mmapSizeGB, hctree, checkpointMode), _objects(_maxDBs, nullptr) { } diff --git a/sqlitecluster/SQLitePool.h b/sqlitecluster/SQLitePool.h index 8cbc6c92e..cf12e38e2 100644 --- a/sqlitecluster/SQLitePool.h +++ b/sqlitecluster/SQLitePool.h @@ -7,7 +7,7 @@ class SQLitePool { public: // Create a pool of DB handles. SQLitePool(size_t maxDBs, const string& filename, int cacheSize, int maxJournalSize, int minJournalTables, - int64_t mmapSizeGB = 0, bool hctree = false); + int64_t mmapSizeGB = 0, bool hctree = false, const string& checkpointMode = "PASSIVE"); ~SQLitePool(); // Get the base object (the first one created, which uses the `journal` table). Note that if called by multiple From 211d274912f13ca237a0b9dc9229308c75a198a8 Mon Sep 17 00:00:00 2001 From: Daniel Silva Date: Mon, 16 Dec 2024 17:19:16 -0300 Subject: [PATCH 066/127] added metrics to read and write --- sqlitecluster/SQLite.cpp | 17 ++++++++++------- sqlitecluster/SQLite.h | 4 +++- 2 files changed, 13 insertions(+), 8 deletions(-) diff --git a/sqlitecluster/SQLite.cpp b/sqlitecluster/SQLite.cpp index 1d2fd946a..891f3430c 100644 --- a/sqlitecluster/SQLite.cpp +++ b/sqlitecluster/SQLite.cpp @@ -416,7 +416,8 @@ bool SQLite::beginTransaction(TRANSACTION_TYPE type) { _dbCountAtStart = getCommitCount(); _queryCache.clear(); _tablesUsed.clear(); - _queryCount = 0; + _readQueryCount = 0; + _writeQueryCount = 0; _cacheHits = 0; _beginElapsed = STimeNow() - before; _readElapsed = 0; @@ -512,7 +513,7 @@ string SQLite::read(const string& query) const { bool SQLite::read(const string& query, SQResult& result, bool skipInfoWarn) const { uint64_t before = STimeNow(); bool queryResult = false; - _queryCount++; + _readQueryCount++; auto foundQuery = _queryCache.find(query); if (foundQuery != _queryCache.end()) { result = foundQuery->second; @@ -600,7 +601,7 @@ bool SQLite::writeUnmodified(const string& query) { bool SQLite::_writeIdempotent(const string& query, SQResult& result, bool alwaysKeepQueries) { SASSERT(_insideTransaction); _queryCache.clear(); - _queryCount++; + _writeQueryCount++; // Must finish everything with semicolon. SASSERT(query.empty() || SEndsWith(query, ";")); @@ -829,9 +830,10 @@ int SQLite::commit(const string& description, function* preCheckpointCal _sharedData.checkpointInProgress.clear(); } SINFO(description << " COMMIT " << SToStr(_sharedData.commitCount) << " complete in " << time << ". Wrote " << (endPages - startPages) - << " pages. WAL file size is " << sz << " bytes. " << _queryCount << " queries attempted, " << _cacheHits + << " pages. WAL file size is " << sz << " bytes. " << _readQueryCount << " read queries attempted, " << _writeQueryCount << " write queries attempted, " << _cacheHits << " served from cache. Used journal " << _journalName); - _queryCount = 0; + _readQueryCount = 0; + _writeQueryCount = 0; _cacheHits = 0; _dbCountAtStart = 0; _lastConflictPage = 0; @@ -886,8 +888,9 @@ void SQLite::rollback() { SINFO("Rolling back but not inside transaction, ignoring."); } _queryCache.clear(); - SDEBUG("Transaction rollback with " << _queryCount << " queries attempted, " << _cacheHits << " served from cache."); - _queryCount = 0; + SINFO("[performance] Transaction rollback with " << _readQueryCount << " read queries attempted, " << _writeQueryCount << " write queries attempted, " << _cacheHits << " served from cache."); + _readQueryCount = 0; + _writeQueryCount = 0; _cacheHits = 0; _dbCountAtStart = 0; } diff --git a/sqlitecluster/SQLite.h b/sqlitecluster/SQLite.h index 648d0b1a7..d19f858be 100644 --- a/sqlitecluster/SQLite.h +++ b/sqlitecluster/SQLite.h @@ -509,7 +509,9 @@ class SQLite { set _tablesUsed; // Number of queries that have been attempted in this transaction (for metrics only). - mutable int64_t _queryCount = 0; + mutable int64_t _readQueryCount = 0; + + mutable int64_t _writeQueryCount = 0; // Number of queries found in cache in this transaction (for metrics only). mutable int64_t _cacheHits = 0; From 7377779ce973fbdf2b5913434f53d489c441cb2f Mon Sep 17 00:00:00 2001 From: Tyler Karaszewski Date: Mon, 16 Dec 2024 12:41:26 -0800 Subject: [PATCH 067/127] Code review feedback --- sqlitecluster/SQLite.cpp | 49 ++++++++++++++++++++-------------------- 1 file changed, 25 insertions(+), 24 deletions(-) diff --git a/sqlitecluster/SQLite.cpp b/sqlitecluster/SQLite.cpp index 49e89c0b1..59b1f2757 100644 --- a/sqlitecluster/SQLite.cpp +++ b/sqlitecluster/SQLite.cpp @@ -645,29 +645,30 @@ bool SQLite::prepare(uint64_t* transactionID, string* transactionhash) { const int64_t journalID = _sharedData.nextJournalCount++; _journalName = _journalNames[journalID % _journalNames.size()]; - // It's possible to attempt to commit a transaction with no writes. We'll skip truncating the journal in this case to avoid - // Turning a no-op into a write. - if (_uncommittedQuery.size()) { - // Look up the oldest commit in our chosen journal, and compute the oldest commit we intend to keep. - SQResult result; - SASSERT(!SQuery(_db, "getting commit min", "SELECT MIN(id) FROM " + _journalName, result)); - uint64_t minJournalEntry = result.size() ? SToUInt64(result[0][0]) : 0; - uint64_t commitCount = _sharedData.commitCount; - - // If the commitCount is less than the max journal size, keep everything. Otherwise, keep everything from - // commitCount - _maxJournalSize forward. We can't just do the last subtraction part because it overflows our unsigned - // int. - uint64_t oldestCommitToKeep = commitCount < _maxJournalSize ? 0 : commitCount - _maxJournalSize; - - // We limit deletions to a relatively small number to avoid making this extremenly slow for some transactions in the case - // where this journal in particular has accumulated a large backlog. - static const size_t deleteLimit = 10; - if (minJournalEntry < oldestCommitToKeep) { - string query = "DELETE FROM " + _journalName + " WHERE id < " + SQ(oldestCommitToKeep) + " LIMIT " + SQ(deleteLimit); - SASSERT(!SQuery(_db, "Deleting oldest journal rows", query)); - size_t deletedCount = sqlite3_changes(_db); - SINFO("Removed " << deletedCount << " rows from journal " << _journalName << ", oldestToKeep: " << oldestCommitToKeep << ", count:" << commitCount << ", limit: " << _maxJournalSize); - } + // Look up the oldest commit in our chosen journal, and compute the oldest commit we intend to keep. + SQResult journalLookupResult; + SASSERT(!SQuery(_db, "getting commit min", "SELECT MIN(id) FROM " + _journalName, journalLookupResult)); + uint64_t minJournalEntry = journalLookupResult.size() ? SToUInt64(journalLookupResult[0][0]) : 0; + + // Note that this can change before we hold the lock on _sharedData.commitLock, but it doesn't matter yet, as we're only + // using it to truncate the journal. We'll reset this value once we acquire that lock. + uint64_t commitCount = _sharedData.commitCount; + + // If the commitCount is less than the max journal size, keep everything. Otherwise, keep everything from + // commitCount - _maxJournalSize forward. We can't just do the last subtraction part because it overflows our unsigned + // int. + uint64_t oldestCommitToKeep = commitCount < _maxJournalSize ? 0 : commitCount - _maxJournalSize; + + // We limit deletions to a relatively small number to avoid making this extremely slow for some transactions in the case + // where this journal in particular has accumulated a large backlog. + static const size_t deleteLimit = 10; + if (minJournalEntry < oldestCommitToKeep) { + auto startUS = STimeNow(); + string query = "DELETE FROM " + _journalName + " WHERE id < " + SQ(oldestCommitToKeep) + " LIMIT " + SQ(deleteLimit); + SASSERT(!SQuery(_db, "Deleting oldest journal rows", query)); + size_t deletedCount = sqlite3_changes(_db); + SINFO("Removed " << deletedCount << " rows from journal " << _journalName << ", oldestToKeep: " << oldestCommitToKeep << ", count:" + << commitCount << ", limit: " << _maxJournalSize << ", in " << (STimeNow() - startUS) << "us."); } // We lock this here, so that we can guarantee the order in which commits show up in the database. @@ -691,7 +692,7 @@ bool SQLite::prepare(uint64_t* transactionID, string* transactionhash) { // Now that we've locked anybody else from committing, look up the state of the database. We don't need to lock the // SharedData object to get these values as we know it can't currently change. - uint64_t commitCount = _sharedData.commitCount; + commitCount = _sharedData.commitCount; // Queue up the journal entry string lastCommittedHash = getCommittedHash(); // This is why we need the lock. From 451441baf934dcf271c0fdaa1ac1652dabad3b6d Mon Sep 17 00:00:00 2001 From: Florent De Neve Date: Mon, 16 Dec 2024 16:50:31 -0400 Subject: [PATCH 068/127] Store _checkpointMode as int --- BedrockServer.cpp | 5 ----- main.cpp | 2 ++ sqlitecluster/SQLite.cpp | 38 +++++++++++++++++++++----------------- sqlitecluster/SQLite.h | 5 +++-- 4 files changed, 26 insertions(+), 24 deletions(-) diff --git a/BedrockServer.cpp b/BedrockServer.cpp index cbe7d7e9a..be9297eab 100644 --- a/BedrockServer.cpp +++ b/BedrockServer.cpp @@ -1267,11 +1267,6 @@ BedrockServer::BedrockServer(const SData& args_) sort(versions.begin(), versions.end()); _version = SComposeList(versions, ":"); - const set validCheckpointModes = {"PASSIVE", "FULL", "RESTART", "TRUNCATE"}; - if (validCheckpointModes.find(args["-checkpointMode"]) == validCheckpointModes.end()) { - SERROR("Invalid checkpoint mode " << args["-checkpointMode"]); - } - list pluginString; for (auto& p : plugins) { pluginString.emplace_back(p.first); diff --git a/main.cpp b/main.cpp index 160f4262d..e318e70ca 100644 --- a/main.cpp +++ b/main.cpp @@ -300,6 +300,8 @@ int main(int argc, char* argv[]) { SETDEFAULT("-maxJournalSize", "1000000"); SETDEFAULT("-queryLog", "queryLog.csv"); SETDEFAULT("-enableMultiWrite", "true"); + + // We default to PASSIVE checkpoint everywhere as that has been the value proven to work fine for many years. SETDEFAULT("-checkpointMode", "PASSIVE"); args["-plugins"] = SComposeList(loadPlugins(args)); diff --git a/sqlitecluster/SQLite.cpp b/sqlitecluster/SQLite.cpp index 0f81a685b..3a8d0f9c2 100644 --- a/sqlitecluster/SQLite.cpp +++ b/sqlitecluster/SQLite.cpp @@ -232,7 +232,7 @@ SQLite::SQLite(const string& filename, int cacheSize, int maxJournalSize, _journalSize(initializeJournalSize(_db, _journalNames)), _cacheSize(cacheSize), _mmapSizeGB(mmapSizeGB), - _checkpointMode(checkpointMode) + _checkpointMode(getCheckpointModeFromString(checkpointMode)) { commonConstructorInitialization(hctree); } @@ -822,24 +822,12 @@ int SQLite::commit(const string& description, function* preCheckpointCal auto start = STimeNow(); int framesCheckpointed = 0; - // We default to PASSIVE checkpoint everywhere as that has been the value proven to work fine for many years. - if (_checkpointMode != "PASSIVE") { - int checkpointMode = SQLITE_CHECKPOINT_PASSIVE; - if (_checkpointMode == "FULL") { - checkpointMode = SQLITE_CHECKPOINT_FULL; - } else if (_checkpointMode == "RESTART") { - checkpointMode = SQLITE_CHECKPOINT_RESTART; - } else if (_checkpointMode == "TRUNCATE") { - checkpointMode = SQLITE_CHECKPOINT_TRUNCATE; - } - // For non-passive checkpoints, we must set a busy timeout in order to wait on any readers. - // We set it to 2 minutes as the majority of transactions should take less than that. + // For non-passive checkpoints, we must set a busy timeout in order to wait on any readers. + // We set it to 2 minutes as the majority of transactions should take less than that. + if (_checkpointMode != SQLITE_CHECKPOINT_PASSIVE) { sqlite3_busy_timeout(_db, 120'000); - sqlite3_wal_checkpoint_v2(_db, 0, checkpointMode, NULL, &framesCheckpointed); - sqlite3_busy_timeout(_db, 0); - } else { - sqlite3_wal_checkpoint_v2(_db, 0, SQLITE_CHECKPOINT_PASSIVE, NULL, &framesCheckpointed); } + sqlite3_wal_checkpoint_v2(_db, 0, _checkpointMode, NULL, &framesCheckpointed); auto end = STimeNow(); SINFO(_checkpointMode << " checkpoint complete with " << framesCheckpointed << " frames checkpointed of " << _sharedData.outstandingFramesToCheckpoint << " frames outstanding in " << (end - start) << "us."); @@ -865,6 +853,22 @@ int SQLite::commit(const string& description, function* preCheckpointCal return result; } +int SQLite::getCheckpointModeFromString(const string& checkpointModeString) { + if (checkpointModeString == "PASSIVE") { + return SQLITE_CHECKPOINT_PASSIVE; + } + if (checkpointModeString == "FULL") { + return SQLITE_CHECKPOINT_FULL; + } + if (checkpointModeString == "RESTART") { + return SQLITE_CHECKPOINT_RESTART; + } + if (checkpointModeString == "TRUNCATE") { + return SQLITE_CHECKPOINT_TRUNCATE; + } + SERROR("Invalid checkpoint type: " << checkpointModeString); +} + map> SQLite::popCommittedTransactions() { return _sharedData.popCommittedTransactions(); } diff --git a/sqlitecluster/SQLite.h b/sqlitecluster/SQLite.h index e11e91e3c..81254f2c2 100644 --- a/sqlitecluster/SQLite.h +++ b/sqlitecluster/SQLite.h @@ -356,6 +356,7 @@ class SQLite { static vector initializeJournal(sqlite3* db, int minJournalTables); static uint64_t initializeJournalSize(sqlite3* db, const vector& journalNames); void commonConstructorInitialization(bool hctree = false); + static int getCheckpointModeFromString(const string& checkpointModeString); // The filename of this DB, canonicalized to its full path on disk. const string _filename; @@ -530,6 +531,6 @@ class SQLite { // Set to true inside of a write query. bool _currentlyWriting{false}; - // One of PASSIVE|FULL|RESTART|TRUNCATE, translated to corresponding values to be passed to sqlite3_wal_checkpoint_v2. - string _checkpointMode; + // One of 0|1|2|3 (a.k.a. PASSIVE|FULL|RESTART|TRUNCATE), which is the value to be passed to sqlite3_wal_checkpoint_v2. + int _checkpointMode; }; From 69389b56920dcc8473ed51652c23f3ba78093027 Mon Sep 17 00:00:00 2001 From: Tyler Karaszewski Date: Mon, 16 Dec 2024 15:17:41 -0800 Subject: [PATCH 069/127] Attempt to figure out where it gets stuck --- ci_tests.sh | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/ci_tests.sh b/ci_tests.sh index 77c36ec94..ca0615581 100755 --- a/ci_tests.sh +++ b/ci_tests.sh @@ -48,7 +48,7 @@ mark_fold end test_bedrock mark_fold start test_bedrock_cluster cd test/clustertest -./clustertest -threads 8 +./clustertest -threads 1 cd ../.. mark_fold end test_bedrock_cluster From d0dfdea3bb8493079cde26fa6ffb75c25594c79e Mon Sep 17 00:00:00 2001 From: Tyler Karaszewski Date: Mon, 16 Dec 2024 15:49:32 -0800 Subject: [PATCH 070/127] I think ConflictSpam breaks --- ci_tests.sh | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/ci_tests.sh b/ci_tests.sh index ca0615581..1c9290653 100755 --- a/ci_tests.sh +++ b/ci_tests.sh @@ -48,7 +48,7 @@ mark_fold end test_bedrock mark_fold start test_bedrock_cluster cd test/clustertest -./clustertest -threads 1 +./clustertest -threads 1 -only ConflictSpam cd ../.. mark_fold end test_bedrock_cluster From 4cf5fe79f4d2f1013e1d867e12bdc7889c143910 Mon Sep 17 00:00:00 2001 From: Tyler Karaszewski Date: Mon, 16 Dec 2024 17:02:12 -0800 Subject: [PATCH 071/127] Binary search the tests, first half. --- ci_tests.sh | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/ci_tests.sh b/ci_tests.sh index 1c9290653..34eddbc1a 100755 --- a/ci_tests.sh +++ b/ci_tests.sh @@ -48,7 +48,7 @@ mark_fold end test_bedrock mark_fold start test_bedrock_cluster cd test/clustertest -./clustertest -threads 1 -only ConflictSpam +./clustertest -threads 1 -only BadCommand,BroadcastCommand,ClusterUpgrade,ConflictSpam,ControlCommand,DoubleDetach,Escalate,FastStandDown,FinishJob,ForkCheck,FutureExecution,GracefulFailover,HTTPS,JobID cd ../.. mark_fold end test_bedrock_cluster From 975de1ad57046def806b4dc85f0686ede6e6149f Mon Sep 17 00:00:00 2001 From: Tyler Karaszewski Date: Mon, 16 Dec 2024 17:19:08 -0800 Subject: [PATCH 072/127] Binary search the tests, first quarter. --- ci_tests.sh | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/ci_tests.sh b/ci_tests.sh index 34eddbc1a..dbf645fd8 100755 --- a/ci_tests.sh +++ b/ci_tests.sh @@ -48,7 +48,7 @@ mark_fold end test_bedrock mark_fold start test_bedrock_cluster cd test/clustertest -./clustertest -threads 1 -only BadCommand,BroadcastCommand,ClusterUpgrade,ConflictSpam,ControlCommand,DoubleDetach,Escalate,FastStandDown,FinishJob,ForkCheck,FutureExecution,GracefulFailover,HTTPS,JobID +./clustertest -threads 1 -only BadCommand,BroadcastCommand,ClusterUpgrade,ConflictSpam,ControlCommand,DoubleDetach,Escalate cd ../.. mark_fold end test_bedrock_cluster From ea89fed5a6599605f32b193446f00363fadf1917 Mon Sep 17 00:00:00 2001 From: Tyler Karaszewski Date: Mon, 16 Dec 2024 17:36:16 -0800 Subject: [PATCH 073/127] Binary search the tests, first 1/8th. --- ci_tests.sh | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/ci_tests.sh b/ci_tests.sh index dbf645fd8..304009bee 100755 --- a/ci_tests.sh +++ b/ci_tests.sh @@ -48,7 +48,7 @@ mark_fold end test_bedrock mark_fold start test_bedrock_cluster cd test/clustertest -./clustertest -threads 1 -only BadCommand,BroadcastCommand,ClusterUpgrade,ConflictSpam,ControlCommand,DoubleDetach,Escalate +./clustertest -threads 1 -only BadCommand,BroadcastCommand,ClusterUpgrade,ConflictSpam cd ../.. mark_fold end test_bedrock_cluster From 9310e263d8fb01c7bd15824703ae9d9eece24f65 Mon Sep 17 00:00:00 2001 From: Tyler Karaszewski Date: Mon, 16 Dec 2024 17:43:46 -0800 Subject: [PATCH 074/127] Binary search the tests, second 1/8th. --- ci_tests.sh | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/ci_tests.sh b/ci_tests.sh index 304009bee..f9ca0cd99 100755 --- a/ci_tests.sh +++ b/ci_tests.sh @@ -48,7 +48,7 @@ mark_fold end test_bedrock mark_fold start test_bedrock_cluster cd test/clustertest -./clustertest -threads 1 -only BadCommand,BroadcastCommand,ClusterUpgrade,ConflictSpam +./clustertest -threads 1 -only ControlCommand,DoubleDetach,Escalate cd ../.. mark_fold end test_bedrock_cluster From c7d39b053da22b113ef7d74d46d6969c069a5c33 Mon Sep 17 00:00:00 2001 From: Tyler Karaszewski Date: Mon, 16 Dec 2024 17:55:30 -0800 Subject: [PATCH 075/127] Only control --- ci_tests.sh | 3 ++- 1 file changed, 2 insertions(+), 1 deletion(-) diff --git a/ci_tests.sh b/ci_tests.sh index f9ca0cd99..a06f9b06d 100755 --- a/ci_tests.sh +++ b/ci_tests.sh @@ -48,7 +48,8 @@ mark_fold end test_bedrock mark_fold start test_bedrock_cluster cd test/clustertest -./clustertest -threads 1 -only ControlCommand,DoubleDetach,Escalate +./clustertest -threads 1 -only ControlCommand +# ControlCommand,DoubleDetach,Escalate cd ../.. mark_fold end test_bedrock_cluster From c16c1859178c2289f40703bf2c36e23398b6e0be Mon Sep 17 00:00:00 2001 From: Tyler Karaszewski Date: Mon, 16 Dec 2024 18:04:31 -0800 Subject: [PATCH 076/127] Only DoubleDetach --- ci_tests.sh | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/ci_tests.sh b/ci_tests.sh index a06f9b06d..caa9588eb 100755 --- a/ci_tests.sh +++ b/ci_tests.sh @@ -48,7 +48,7 @@ mark_fold end test_bedrock mark_fold start test_bedrock_cluster cd test/clustertest -./clustertest -threads 1 -only ControlCommand +./clustertest -threads 1 -only DoubleDetach # ControlCommand,DoubleDetach,Escalate cd ../.. mark_fold end test_bedrock_cluster From aa1019992f6ca18babeb12cf75a327a36818577d Mon Sep 17 00:00:00 2001 From: Tyler Karaszewski Date: Mon, 16 Dec 2024 18:16:43 -0800 Subject: [PATCH 077/127] Only Escalate --- ci_tests.sh | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/ci_tests.sh b/ci_tests.sh index caa9588eb..98bda9fff 100755 --- a/ci_tests.sh +++ b/ci_tests.sh @@ -48,7 +48,7 @@ mark_fold end test_bedrock mark_fold start test_bedrock_cluster cd test/clustertest -./clustertest -threads 1 -only DoubleDetach +./clustertest -threads 1 -only Escalate # ControlCommand,DoubleDetach,Escalate cd ../.. mark_fold end test_bedrock_cluster From 42adccf6baaccc2940ee27b62f01fac96fa473ef Mon Sep 17 00:00:00 2001 From: Tyler Karaszewski Date: Mon, 16 Dec 2024 19:02:03 -0800 Subject: [PATCH 078/127] Except DoubleDetach --- ci_tests.sh | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/ci_tests.sh b/ci_tests.sh index 98bda9fff..42ded1ee4 100755 --- a/ci_tests.sh +++ b/ci_tests.sh @@ -48,7 +48,7 @@ mark_fold end test_bedrock mark_fold start test_bedrock_cluster cd test/clustertest -./clustertest -threads 1 -only Escalate +./clustertest -threads 1 -except DoubleDetach # ControlCommand,DoubleDetach,Escalate cd ../.. mark_fold end test_bedrock_cluster From b8fae0c95a93a7096ea27b3496e03fd3c750d166 Mon Sep 17 00:00:00 2001 From: Tyler Karaszewski Date: Tue, 17 Dec 2024 07:52:30 -0800 Subject: [PATCH 079/127] Add cout notes --- test/clustertest/tests/DoubleDetachTest.cpp | 6 ++++++ 1 file changed, 6 insertions(+) diff --git a/test/clustertest/tests/DoubleDetachTest.cpp b/test/clustertest/tests/DoubleDetachTest.cpp index f7d5b8b8a..9ac06dfd2 100644 --- a/test/clustertest/tests/DoubleDetachTest.cpp +++ b/test/clustertest/tests/DoubleDetachTest.cpp @@ -23,16 +23,22 @@ struct DoubleDetachTest : tpunit::TestFixture { void testDoubleDetach() { // Test a control command + cout << "A" << endl; BedrockTester& follower = tester->getTester(1); // Detach + cout << "B" << endl; SData detachCommand("detach"); + cout << "C" << endl; follower.executeWaitVerifyContent(detachCommand, "203 DETACHING", true); // Wait for it to detach + cout << "D" << endl; sleep(3); + cout << "E" << endl; follower.executeWaitVerifyContent(detachCommand, "400 Already detached", true); + cout << "F" << endl; } } __DoubleDetachTest; From e1e7c0d474a64cc719e3776e511543b0d21f9bab Mon Sep 17 00:00:00 2001 From: Tyler Karaszewski Date: Tue, 17 Dec 2024 08:20:43 -0800 Subject: [PATCH 080/127] Needs only, not except --- ci_tests.sh | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/ci_tests.sh b/ci_tests.sh index 42ded1ee4..caa9588eb 100755 --- a/ci_tests.sh +++ b/ci_tests.sh @@ -48,7 +48,7 @@ mark_fold end test_bedrock mark_fold start test_bedrock_cluster cd test/clustertest -./clustertest -threads 1 -except DoubleDetach +./clustertest -threads 1 -only DoubleDetach # ControlCommand,DoubleDetach,Escalate cd ../.. mark_fold end test_bedrock_cluster From 3df5fdfe869919ea2f5b7b68f5368bc8dc6c7cb9 Mon Sep 17 00:00:00 2001 From: Florent De Neve Date: Tue, 17 Dec 2024 12:37:25 -0400 Subject: [PATCH 081/127] Move sqlite3_busy_timeout to commonConstructorInitialization --- sqlitecluster/SQLite.cpp | 14 +++++++------- 1 file changed, 7 insertions(+), 7 deletions(-) diff --git a/sqlitecluster/SQLite.cpp b/sqlitecluster/SQLite.cpp index 3a8d0f9c2..3a831e409 100644 --- a/sqlitecluster/SQLite.cpp +++ b/sqlitecluster/SQLite.cpp @@ -220,6 +220,12 @@ void SQLite::commonConstructorInitialization(bool hctree) { // Always set synchronous commits to off for best commit performance in WAL mode. SASSERT(!SQuery(_db, "setting synchronous commits to off", "PRAGMA synchronous = OFF;")); + + // For non-passive checkpoints, we must set a busy timeout in order to wait on any readers. + // We set it to 2 minutes as the majority of transactions should take less than that. + if (_checkpointMode != SQLITE_CHECKPOINT_PASSIVE) { + sqlite3_busy_timeout(_db, 120'000); + } } SQLite::SQLite(const string& filename, int cacheSize, int maxJournalSize, @@ -821,15 +827,9 @@ int SQLite::commit(const string& description, function* preCheckpointCal if (_sharedData.outstandingFramesToCheckpoint) { auto start = STimeNow(); int framesCheckpointed = 0; - - // For non-passive checkpoints, we must set a busy timeout in order to wait on any readers. - // We set it to 2 minutes as the majority of transactions should take less than that. - if (_checkpointMode != SQLITE_CHECKPOINT_PASSIVE) { - sqlite3_busy_timeout(_db, 120'000); - } sqlite3_wal_checkpoint_v2(_db, 0, _checkpointMode, NULL, &framesCheckpointed); auto end = STimeNow(); - SINFO(_checkpointMode << " checkpoint complete with " << framesCheckpointed << " frames checkpointed of " << _sharedData.outstandingFramesToCheckpoint << " frames outstanding in " << (end - start) << "us."); + SINFO("Checkpoint with type=" << _checkpointMode << " complete with " << framesCheckpointed << " frames checkpointed of " << _sharedData.outstandingFramesToCheckpoint << " frames outstanding in " << (end - start) << "us."); // It might not actually be 0, but we'll just let sqlite tell us what it is next time _walHookCallback runs. _sharedData.outstandingFramesToCheckpoint = 0; From 64263b28b1528c0651f1569bbccba0ac646a549c Mon Sep 17 00:00:00 2001 From: Tyler Karaszewski Date: Tue, 17 Dec 2024 09:08:16 -0800 Subject: [PATCH 082/127] Attemot to shutdown cleanly --- test/clustertest/tests/DoubleDetachTest.cpp | 7 ++++++- 1 file changed, 6 insertions(+), 1 deletion(-) diff --git a/test/clustertest/tests/DoubleDetachTest.cpp b/test/clustertest/tests/DoubleDetachTest.cpp index 9ac06dfd2..71b93e140 100644 --- a/test/clustertest/tests/DoubleDetachTest.cpp +++ b/test/clustertest/tests/DoubleDetachTest.cpp @@ -28,7 +28,7 @@ struct DoubleDetachTest : tpunit::TestFixture { // Detach cout << "B" << endl; - SData detachCommand("detach"); + SData detachCommand("Detach"); cout << "C" << endl; follower.executeWaitVerifyContent(detachCommand, "203 DETACHING", true); @@ -39,6 +39,11 @@ struct DoubleDetachTest : tpunit::TestFixture { cout << "E" << endl; follower.executeWaitVerifyContent(detachCommand, "400 Already detached", true); cout << "F" << endl; + + // Re-attach to make shutdown clean. + SData attachCommand("Attach"); + follower.executeWaitVerifyContent(attachCommand, "204 ATTACHING", true); + cout << "G" << endl; } } __DoubleDetachTest; From 92644ade49c73a26afafbb8eab118247a38155fe Mon Sep 17 00:00:00 2001 From: Tyler Karaszewski Date: Tue, 17 Dec 2024 09:15:33 -0800 Subject: [PATCH 083/127] Remove test changes --- ci_tests.sh | 3 +-- test/clustertest/tests/DoubleDetachTest.cpp | 7 ------- 2 files changed, 1 insertion(+), 9 deletions(-) diff --git a/ci_tests.sh b/ci_tests.sh index caa9588eb..77c36ec94 100755 --- a/ci_tests.sh +++ b/ci_tests.sh @@ -48,8 +48,7 @@ mark_fold end test_bedrock mark_fold start test_bedrock_cluster cd test/clustertest -./clustertest -threads 1 -only DoubleDetach -# ControlCommand,DoubleDetach,Escalate +./clustertest -threads 8 cd ../.. mark_fold end test_bedrock_cluster diff --git a/test/clustertest/tests/DoubleDetachTest.cpp b/test/clustertest/tests/DoubleDetachTest.cpp index 71b93e140..db1edf34f 100644 --- a/test/clustertest/tests/DoubleDetachTest.cpp +++ b/test/clustertest/tests/DoubleDetachTest.cpp @@ -23,27 +23,20 @@ struct DoubleDetachTest : tpunit::TestFixture { void testDoubleDetach() { // Test a control command - cout << "A" << endl; BedrockTester& follower = tester->getTester(1); // Detach - cout << "B" << endl; SData detachCommand("Detach"); - cout << "C" << endl; follower.executeWaitVerifyContent(detachCommand, "203 DETACHING", true); // Wait for it to detach - cout << "D" << endl; sleep(3); - cout << "E" << endl; follower.executeWaitVerifyContent(detachCommand, "400 Already detached", true); - cout << "F" << endl; // Re-attach to make shutdown clean. SData attachCommand("Attach"); follower.executeWaitVerifyContent(attachCommand, "204 ATTACHING", true); - cout << "G" << endl; } } __DoubleDetachTest; From 9da6394f2fce5d87ff7883030e69846a0c2d0da3 Mon Sep 17 00:00:00 2001 From: Tyler Karaszewski Date: Tue, 17 Dec 2024 12:38:55 -0800 Subject: [PATCH 084/127] Add log lines --- BedrockServer.cpp | 7 +++++++ sqlitecluster/SQLite.cpp | 4 ++++ 2 files changed, 11 insertions(+) diff --git a/BedrockServer.cpp b/BedrockServer.cpp index 98e59414f..a31c7f40e 100644 --- a/BedrockServer.cpp +++ b/BedrockServer.cpp @@ -1823,6 +1823,7 @@ atomic __quiesceShouldUnlock(false); thread* __quiesceThread = nullptr; void BedrockServer::_control(unique_ptr& command) { + SINFO("Received control command: " << command->request.methodLine); SData& response = command->response; string reason = "MANUAL"; response.methodLine = "200 OK"; @@ -1913,7 +1914,9 @@ void BedrockServer::_control(unique_ptr& command) { if (dbPoolCopy) { SQLiteScopedHandle dbScope(*_dbPool, _dbPool->getIndex()); SQLite& db = dbScope.db(); + SINFO("[quiesce] Exclusive locking DB"); db.exclusiveLockDB(); + SINFO("[quiesce] Exclusive locked DB"); locked = true; while (true) { if (__quiesceShouldUnlock) { @@ -1936,12 +1939,16 @@ void BedrockServer::_control(unique_ptr& command) { response.methodLine = "200 Blocked"; } } else if (SIEquals(command->request.methodLine, "UnblockWrites")) { + SINFO("[quiesce] Locking __quiesceLock"); lock_guard lock(__quiesceLock); + SINFO("[quiesce] __quiesceLock locked"); if (!__quiesceThread) { response.methodLine = "200 Not Blocked"; } else { __quiesceShouldUnlock = true; + SINFO("[quiesce] Joining __quiesceThread"); __quiesceThread->join(); + SINFO("[quiesce] __quiesceThread joined"); delete __quiesceThread; __quiesceThread = nullptr; response.methodLine = "200 Unblocked"; diff --git a/sqlitecluster/SQLite.cpp b/sqlitecluster/SQLite.cpp index f31888cd6..26d3b9a18 100644 --- a/sqlitecluster/SQLite.cpp +++ b/sqlitecluster/SQLite.cpp @@ -323,13 +323,17 @@ void SQLite::exclusiveLockDB() { // writes in this case. // So when these are both locked by the same thread at the same time, `commitLock` is always locked first, and we do it the same way here to avoid deadlocks. try { + SINFO("Locking commitLock"); _sharedData.commitLock.lock(); + SINFO("commitLock Locked"); } catch (const system_error& e) { SWARN("Caught system_error calling _sharedData.commitLock, code: " << e.code() << ", message: " << e.what()); throw; } try { + SINFO("Locking writeLock"); _sharedData.writeLock.lock(); + SINFO("writeLock Locked"); } catch(const system_error& e) { SWARN("Caught system_error calling _sharedData.writeLock, code: " << e.code() << ", message: " << e.what()); throw; From 259970629cf337b48f86189c7b50813c0dbb0c00 Mon Sep 17 00:00:00 2001 From: Tyler Karaszewski Date: Tue, 17 Dec 2024 16:04:08 -0800 Subject: [PATCH 085/127] Add fix --- BedrockServer.cpp | 1 + sqlitecluster/SQLite.cpp | 7 +++++++ sqlitecluster/SQLiteNode.cpp | 7 ++++++- sqlitecluster/SQLiteNode.h | 8 ++++---- sqlitecluster/SQLitePeer.cpp | 1 + 5 files changed, 19 insertions(+), 5 deletions(-) diff --git a/BedrockServer.cpp b/BedrockServer.cpp index a31c7f40e..c36298c04 100644 --- a/BedrockServer.cpp +++ b/BedrockServer.cpp @@ -1721,6 +1721,7 @@ void BedrockServer::_status(unique_ptr& command) { // Coalesce all of the peer data into one value to return or return // an error message if we timed out getting the peerList data. list peerList; + // This blocks during state change list peerData = getPeerInfo(); for (const STable& peerTable : peerData) { peerList.push_back(SComposeJSONObject(peerTable)); diff --git a/sqlitecluster/SQLite.cpp b/sqlitecluster/SQLite.cpp index 26d3b9a18..1b55cfe7e 100644 --- a/sqlitecluster/SQLite.cpp +++ b/sqlitecluster/SQLite.cpp @@ -669,6 +669,7 @@ bool SQLite::prepare(uint64_t* transactionID, string* transactionhash) { static const size_t deleteLimit = 10; if (minJournalEntry < oldestCommitToKeep) { auto startUS = STimeNow(); + shared_lock lock(_sharedData.writeLock); string query = "DELETE FROM " + _journalName + " WHERE id < " + SQ(oldestCommitToKeep) + " LIMIT " + SQ(deleteLimit); SASSERT(!SQuery(_db, "Deleting oldest journal rows", query)); size_t deletedCount = sqlite3_changes(_db); @@ -676,6 +677,12 @@ bool SQLite::prepare(uint64_t* transactionID, string* transactionhash) { << commitCount << ", limit: " << _maxJournalSize << ", in " << (STimeNow() - startUS) << "us."); } + // So let's say that a replicate thread is running the above. Neither the write or commit lock is held. + + // Let's say another thread calls `exclusiveLockDB`. It grabs the commit lock. + // We grab the write lock above. Other thread waits. We release writeLock, it acquires it. Writes are now blocked. + // We attempot to grab commitLock below. We block. + // We lock this here, so that we can guarantee the order in which commits show up in the database. if (!_mutexLocked) { auto start = STimeNow(); diff --git a/sqlitecluster/SQLiteNode.cpp b/sqlitecluster/SQLiteNode.cpp index 17db479ba..480b6f3f1 100644 --- a/sqlitecluster/SQLiteNode.cpp +++ b/sqlitecluster/SQLiteNode.cpp @@ -493,7 +493,12 @@ void SQLiteNode::_sendOutstandingTransactions(const set& commitOnlyIDs } list SQLiteNode::getPeerInfo() const { - shared_lock sharedLock(_stateMutex); + // This does not lock _stateMutex. It follows the rule in `SQLiteNode.h` that says: + // * Alternatively, a public `const` method that is a simple getter for an atomic property can skip the lock. + // peer->getData is atomic internally, so we can treat `peer->getData()` as a simple getter for an atomic property. + // _peerList is also `const` and so we can iterate this list safely regardless of the lock. + // This makes this function a slightly more complex getter for an atomic property, but it's still safe to skip + // The state lock here. list peerData; for (SQLitePeer* peer : _peerList) { peerData.emplace_back(peer->getData()); diff --git a/sqlitecluster/SQLiteNode.h b/sqlitecluster/SQLiteNode.h index fd4ba5f6f..06868daae 100644 --- a/sqlitecluster/SQLiteNode.h +++ b/sqlitecluster/SQLiteNode.h @@ -25,16 +25,16 @@ * Rules for maintaining SQLiteNode methods so that atomicity works as intended. * * No non-const members should be publicly exposed. - * Any public method that is `const` must shared_lock<>(nodeMutex). + * Any public method that is `const` must shared_lock<>(_stateMutex). * Alternatively, a public `const` method that is a simple getter for an atomic property can skip the lock. - * Any public method that is non-const must unique_lock<>(nodeMutex) before changing any internal state, and must hold + * Any public method that is non-const must unique_lock<>(_stateMutex) before changing any internal state, and must hold * this lock until it is done changing state to make this method's changes atomic. * Any private methods must not call public methods. - * Any private methods must not lock nodeMutex (for recursion reasons). + * Any private methods must not lock _stateMutex (for recursion reasons). * Any public methods must not call other public methods. * * `_replicate` is a special exception because it runs in multiple threads internally. It needs to handle locking if it - * changes any internal state (and it calls `changeState`, which does). + * changes any internal state (and it calls `changeState`, which it does). * */ diff --git a/sqlitecluster/SQLitePeer.cpp b/sqlitecluster/SQLitePeer.cpp index c93589520..a9636ca1d 100644 --- a/sqlitecluster/SQLitePeer.cpp +++ b/sqlitecluster/SQLitePeer.cpp @@ -225,6 +225,7 @@ void SQLitePeer::getCommit(uint64_t& count, string& hashString) const { } STable SQLitePeer::getData() const { + lock_guard lock(peerMutex); // Add all of our standard stuff. STable result({ {"name", name}, From fe86e09155b000cb4207d93cc956d74dbf4aa88b Mon Sep 17 00:00:00 2001 From: Tyler Karaszewski Date: Tue, 17 Dec 2024 16:07:26 -0800 Subject: [PATCH 086/127] Remove test code --- BedrockServer.cpp | 1 - sqlitecluster/SQLite.cpp | 6 ------ sqlitecluster/SQLiteNode.h | 2 +- 3 files changed, 1 insertion(+), 8 deletions(-) diff --git a/BedrockServer.cpp b/BedrockServer.cpp index 45fdcda4a..c0cad65f3 100644 --- a/BedrockServer.cpp +++ b/BedrockServer.cpp @@ -1721,7 +1721,6 @@ void BedrockServer::_status(unique_ptr& command) { // Coalesce all of the peer data into one value to return or return // an error message if we timed out getting the peerList data. list peerList; - // This blocks during state change list peerData = getPeerInfo(); for (const STable& peerTable : peerData) { peerList.push_back(SComposeJSONObject(peerTable)); diff --git a/sqlitecluster/SQLite.cpp b/sqlitecluster/SQLite.cpp index 39cf31044..80a4636fc 100644 --- a/sqlitecluster/SQLite.cpp +++ b/sqlitecluster/SQLite.cpp @@ -685,12 +685,6 @@ bool SQLite::prepare(uint64_t* transactionID, string* transactionhash) { << commitCount << ", limit: " << _maxJournalSize << ", in " << (STimeNow() - startUS) << "us."); } - // So let's say that a replicate thread is running the above. Neither the write or commit lock is held. - - // Let's say another thread calls `exclusiveLockDB`. It grabs the commit lock. - // We grab the write lock above. Other thread waits. We release writeLock, it acquires it. Writes are now blocked. - // We attempot to grab commitLock below. We block. - // We lock this here, so that we can guarantee the order in which commits show up in the database. if (!_mutexLocked) { auto start = STimeNow(); diff --git a/sqlitecluster/SQLiteNode.h b/sqlitecluster/SQLiteNode.h index 06868daae..817a4e9b9 100644 --- a/sqlitecluster/SQLiteNode.h +++ b/sqlitecluster/SQLiteNode.h @@ -34,7 +34,7 @@ * Any public methods must not call other public methods. * * `_replicate` is a special exception because it runs in multiple threads internally. It needs to handle locking if it - * changes any internal state (and it calls `changeState`, which it does). + * changes any internal state (and it calls `changeState`, which does). * */ From bead743c6273181462fadf8e4991c5e9b08af4a0 Mon Sep 17 00:00:00 2001 From: Tyler Karaszewski Date: Wed, 18 Dec 2024 09:54:00 -0800 Subject: [PATCH 087/127] Final PR to remove NODE_LOGIN --- sqlitecluster/SQLiteNode.cpp | 11 +---------- 1 file changed, 1 insertion(+), 10 deletions(-) diff --git a/sqlitecluster/SQLiteNode.cpp b/sqlitecluster/SQLiteNode.cpp index 480b6f3f1..21a958594 100644 --- a/sqlitecluster/SQLiteNode.cpp +++ b/sqlitecluster/SQLiteNode.cpp @@ -1259,12 +1259,6 @@ void SQLiteNode::_onMESSAGE(SQLitePeer* peer, const SData& message) { peer->latency = max(STimeNow() - message.calc64("Timestamp"), 1ul); SINFO("Received PONG from peer '" << peer->name << "' (" << peer->latency/1000 << "ms latency)"); return; - } else if (SIEquals(message.methodLine, "NODE_LOGIN")) { - // We need to return early here to ignore this deprecated message and avoid throwing: - // STHROW("not logged in"); - // Below. We can remove this check after one more deploy cycle. - // https://github.com/Expensify/Expensify/issues/450953 - return; } // We ignore everything except PING and PONG from forked nodes, so we can return here in that case. @@ -2559,10 +2553,7 @@ void SQLiteNode::postPoll(fd_map& fdm, uint64_t& nextActivity) { int messageSize = message.deserialize(socket->recvBuffer); if (messageSize) { socket->recvBuffer.consumeFront(messageSize); - // Old nodes, for one more upgrade cycle, will still send `NODE_LOGIN`. We can remove this check after this - // code is deployed. - // See: https://github.com/Expensify/Expensify/issues/450953 - if (SIEquals(message.methodLine, "NODE_LOGIN") || SIEquals(message.methodLine, "LOGIN")) { + if (SIEquals(message.methodLine, "LOGIN")) { SQLitePeer* peer = getPeerByName(message["Name"]); if (peer) { if (peer->setSocket(socket)) { From 6b27f16ed4c77372d50dce8aa40d287b4fe8c47a Mon Sep 17 00:00:00 2001 From: Tyler Karaszewski Date: Thu, 19 Dec 2024 09:16:55 -0800 Subject: [PATCH 088/127] Who needs a query for this --- Makefile | 2 +- main.cpp | 2 +- sqlitecluster/SQLite.cpp | 6 ++++-- 3 files changed, 6 insertions(+), 4 deletions(-) diff --git a/Makefile b/Makefile index 71a34084a..9e6073ab9 100644 --- a/Makefile +++ b/Makefile @@ -15,7 +15,7 @@ INCLUDE = -I$(PROJECT) -I$(PROJECT)/mbedtls/include CXXFLAGS = -g -std=c++20 -fPIC -DSQLITE_ENABLE_NORMALIZE $(BEDROCK_OPTIM_COMPILE_FLAG) -Wall -Werror -Wformat-security -Wno-unqualified-std-cast-call -Wno-error=deprecated-declarations $(INCLUDE) # Amalgamation flags -AMALGAMATION_FLAGS = -Wno-unused-but-set-variable -DSQLITE_ENABLE_FTS5 -DSQLITE_ENABLE_STAT4 -DSQLITE_ENABLE_JSON1 -DSQLITE_ENABLE_SESSION -DSQLITE_ENABLE_PREUPDATE_HOOK -DSQLITE_ENABLE_UPDATE_DELETE_LIMIT -DSQLITE_ENABLE_NOOP_UPDATE -DSQLITE_MUTEX_ALERT_MILLISECONDS=20 -DHAVE_USLEEP=1 -DSQLITE_MAX_MMAP_SIZE=17592186044416ull -DSQLITE_SHARED_MAPPING -DSQLITE_ENABLE_NORMALIZE -DSQLITE_MAX_PAGE_COUNT=4294967294 -DSQLITE_DISABLE_PAGECACHE_OVERFLOW_STATS +AMALGAMATION_FLAGS = -Wno-unused-but-set-variable -DSQLITE_ENABLE_FTS5 -DSQLITE_ENABLE_STAT4 -DSQLITE_ENABLE_JSON1 -DSQLITE_ENABLE_SESSION -DSQLITE_ENABLE_PREUPDATE_HOOK -DSQLITE_ENABLE_UPDATE_DELETE_LIMIT -DSQLITE_ENABLE_NOOP_UPDATE -DSQLITE_MUTEX_ALERT_MILLISECONDS=20 -DHAVE_USLEEP=1 -DSQLITE_MAX_MMAP_SIZE=17592186044416ull -DSQLITE_SHARED_MAPPING -DSQLITE_ENABLE_NORMALIZE -DSQLITE_MAX_PAGE_COUNT=4294967294 -DSQLITE_DISABLE_PAGECACHE_OVERFLOW_STATS -DSQLITE_DEFAULT_CACHE_SIZE=-51200 # All our intermediate, dependency, object, etc files get hidden in here. INTERMEDIATEDIR = .build diff --git a/main.cpp b/main.cpp index e318e70ca..4bd034d26 100644 --- a/main.cpp +++ b/main.cpp @@ -294,7 +294,7 @@ int main(int argc, char* argv[]) { SETDEFAULT("-commandPortPrivate", "localhost:8890"); SETDEFAULT("-controlPort", "localhost:9999"); SETDEFAULT("-nodeName", SGetHostName()); - SETDEFAULT("-cacheSize", SToStr(1024 * 1024)); // 1024 * 1024KB = 1GB. + SETDEFAULT("-cacheSize", SToStr(0)); SETDEFAULT("-plugins", "db,jobs,cache,mysql"); SETDEFAULT("-priority", "100"); SETDEFAULT("-maxJournalSize", "1000000"); diff --git a/sqlitecluster/SQLite.cpp b/sqlitecluster/SQLite.cpp index 80a4636fc..a2f8201c3 100644 --- a/sqlitecluster/SQLite.cpp +++ b/sqlitecluster/SQLite.cpp @@ -183,8 +183,10 @@ void SQLite::commonConstructorInitialization(bool hctree) { sqlite3_trace_v2(_db, SQLITE_TRACE_STMT, _sqliteTraceCallback, this); // Update the cache. -size means KB; +size means pages - SINFO("Setting cache_size to " << _cacheSize << "KB"); - SQuery(_db, "increasing cache size", "PRAGMA cache_size = -" + SQ(_cacheSize) + ";"); + if (_cacheSize) { + SINFO("Setting cache_size to " << _cacheSize << "KB"); + SQuery(_db, "increasing cache size", "PRAGMA cache_size = -" + SQ(_cacheSize) + ";"); + } // Register the authorizer callback which allows callers to whitelist particular data in the DB. sqlite3_set_authorizer(_db, _sqliteAuthorizerCallback, this); From 2f6cdab758610d14f403b68f908c9d19128b11bf Mon Sep 17 00:00:00 2001 From: Tyler Karaszewski Date: Fri, 20 Dec 2024 12:14:13 -0800 Subject: [PATCH 089/127] Fewer mallocs --- libstuff/sqlite3.c | 207 ++++++++++++++++++++++----------------------- libstuff/sqlite3.h | 2 +- 2 files changed, 103 insertions(+), 106 deletions(-) diff --git a/libstuff/sqlite3.c b/libstuff/sqlite3.c index 5381fa9ec..33034cd19 100644 --- a/libstuff/sqlite3.c +++ b/libstuff/sqlite3.c @@ -18,7 +18,7 @@ ** separate file. This file contains only code for the core SQLite library. ** ** The content in this amalgamation comes from Fossil check-in -** fa87355f6286be1e92f22a71cbfbfb13d1a4. +** b40cd7395c44b1f2d019d8e809e03de0e083. */ #define SQLITE_CORE 1 #define SQLITE_AMALGAMATION 1 @@ -465,7 +465,7 @@ extern "C" { */ #define SQLITE_VERSION "3.47.0" #define SQLITE_VERSION_NUMBER 3047000 -#define SQLITE_SOURCE_ID "2024-12-13 18:13:51 fa87355f6286be1e92f22a71cbfbfb13d1a478d5fb5b38abedbd78bf903171fa" +#define SQLITE_SOURCE_ID "2024-12-20 19:37:41 b40cd7395c44b1f2d019d8e809e03de0e083c93693322a72ddb250a85640528f" /* ** CAPI3REF: Run-Time Library Version Numbers @@ -17859,6 +17859,9 @@ struct Schema { u8 enc; /* Text encoding used by this database */ u16 schemaFlags; /* Flags associated with this schema */ int cache_size; /* Number of pages to use in the cache */ +#ifdef SQLITE_ENABLE_STAT4 + void *pStat4Space; /* Memory for stat4 Index.aSample[] arrays */ +#endif }; /* @@ -18238,11 +18241,13 @@ struct sqlite3 { #define SCHEMA_TIME_AFTER_STAT1 12 #define SCHEMA_TIME_AFTER_DEFAULTS 13 -#define SCHEMA_TIME_STAT4_Q1_BODY 14 -#define SCHEMA_TIME_AFTER_STAT4_Q1 15 -#define SCHEMA_TIME_STAT4_Q2_BODY 16 -#define SCHEMA_TIME_STAT4_SAMPLE_MALLOC 17 -#define SCHEMA_TIME_AFTER_STAT4_Q2 18 +#define SCHEMA_TIME_AFTER_STAT4_SPACE 14 +#define SCHEMA_TIME_AFTER_STAT4_PREPARE 15 + +#define SCHEMA_TIME_STAT4_GROWUS 16 +#define SCHEMA_TIME_STAT4_Q2_BODYUS 17 +#define SCHEMA_TIME_AFTER_STAT4_Q2 18 + #define SCHEMA_TIME_AFTER_STAT4 19 #define SCHEMA_TIME_END_ANALYZE_LOAD 20 @@ -93492,7 +93497,7 @@ SQLITE_PRIVATE void sqlite3CommitTimeLog(u64 *aCommit){ } zStr = sqlite3_mprintf("%z%s%s%d%s", zStr, (zStr?", ":""),zHash,iVal,zU); } - sqlite3_log(SQLITE_WARNING, "slow commit (v=19): (%s)", zStr); + sqlite3_log(SQLITE_WARNING, "slow commit (v=20): (%s)", zStr); sqlite3_free(zStr); } } @@ -93520,7 +93525,7 @@ SQLITE_PRIVATE void sqlite3PrepareTimeLog(const char *zSql, int nSql, u64 *aPrep } if( nByte<0 ){ nByte = sqlite3Strlen30(zSql); } sqlite3_log(SQLITE_WARNING, - "slow prepare (v=19): (%s) [%.*s]", zStr, nByte, zSql + "slow prepare (v=20): (%s) [%.*s]", zStr, nByte, zSql ); sqlite3_free(zStr); } @@ -93534,15 +93539,14 @@ SQLITE_PRIVATE void sqlite3SchemaTimeLog(u64 *aSchema, const char *zFile){ for(ii=1; iiaSample[j]; sqlite3DbFree(db, p->p); } - sqlite3DbFree(db, pIdx->aSample); + if( pIdx->nSampleAlloc!=SQLITE_STAT4_EST_SAMPLES ){ + sqlite3DbFree(db, pIdx->aSample); + } } if( db->pnBytesFreed==0 ){ pIdx->nSample = 0; @@ -123789,7 +123802,7 @@ static Index *findIndexOrPrimaryKey( ** Grow the pIdx->aSample[] array. Return SQLITE_OK if successful, or ** SQLITE_NOMEM otherwise. */ -static int growSampleArray(sqlite3 *db, Index *pIdx){ +static int growSampleArray(sqlite3 *db, Index *pIdx, int *piOff){ int nIdxCol = pIdx->nSampleCol; int nNew = 0; IndexSample *aNew = 0; @@ -123799,31 +123812,24 @@ static int growSampleArray(sqlite3 *db, Index *pIdx){ int i; u64 t; - /* In production set the initial allocation to SQLITE_STAT4_SAMPLES. This - ** means that reallocation will almost never be required. But for debug - ** builds, set the initial allocation size to 6 entries so that the - ** reallocation code gets tested. todo: use real tests for this. */ assert( pIdx->nSample==pIdx->nSampleAlloc ); -#ifdef SQLITE_DEBUG - nNew = 6; -#else - nNew = SQLITE_STAT4_SAMPLES; -#endif + nNew = SQLITE_STAT4_EST_SAMPLES; if( pIdx->nSample ){ nNew = pIdx->nSample*2; } + /* Set nByte to the required amount of space */ nByte = ROUND8(sizeof(IndexSample) * nNew); nByte += sizeof(tRowcnt) * nIdxCol * 3 * nNew; nByte += nIdxCol * sizeof(tRowcnt); /* Space for Index.aAvgEq[] */ - if( db->aSchemaTime ){ - t = sqlite3STimeNow(); - } - aNew = (IndexSample*)sqlite3DbMallocRaw(db, nByte); - if( aNew==0 ) return SQLITE_NOMEM_BKPT; - if( db->aSchemaTime ){ - db->aSchemaTime[SCHEMA_TIME_STAT4_SAMPLE_MALLOC] += (sqlite3STimeNow() - t); + if( nNew==SQLITE_STAT4_EST_SAMPLES ){ + aNew = (IndexSample*)&((u8*)pIdx->pSchema->pStat4Space)[*piOff]; + *piOff += nByte; + assert( *piOff<=sqlite3_msize(pIdx->pSchema->pStat4Space) ); + }else{ + aNew = (IndexSample*)sqlite3DbMallocRaw(db, nByte); + if( aNew==0 ) return SQLITE_NOMEM_BKPT; } pPtr = (u8*)aNew; @@ -123850,15 +123856,53 @@ static int growSampleArray(sqlite3 *db, Index *pIdx){ } assert( ((u8*)pSpace)-nByte==(u8*)aNew ); - sqlite3DbFree(db, pIdx->aSample); + if( pIdx->nSample!=SQLITE_STAT4_EST_SAMPLES ){ + sqlite3DbFree(db, pIdx->aSample); + } pIdx->aSample = aNew; pIdx->nSampleAlloc = nNew; return SQLITE_OK; } /* -** Load the content from either the sqlite_stat4 -** into the relevant Index.aSample[] arrays. +** Allocate the space that will likely be required for the Index.aSample[] +** arrays populated by loading data from the sqlite_stat4 table. Return +** SQLITE_OK if successful, or SQLITE_NOMEM otherwise. +*/ +static int stat4AllocSpace(sqlite3 *db, const char *zDb){ + int iDb = sqlite3FindDbName(db, zDb); + Schema *pSchema = db->aDb[iDb].pSchema; + int nByte = 0; + HashElem *k; + + assert( iDb>=0 ); + assert( pSchema->pStat4Space==0 ); + for(k=sqliteHashFirst(&pSchema->idxHash); k; k=sqliteHashNext(k)){ + Index *pIdx = sqliteHashData(k); + int nIdxCol; + if( !HasRowid(pIdx->pTable) && IsPrimaryKeyIndex(pIdx) ){ + nIdxCol = pIdx->nKeyCol; + }else{ + nIdxCol = pIdx->nColumn; + } + nByte += ROUND8(sizeof(IndexSample) * SQLITE_STAT4_EST_SAMPLES); + nByte += sizeof(tRowcnt) * nIdxCol * 3 * SQLITE_STAT4_EST_SAMPLES; + nByte += nIdxCol * sizeof(tRowcnt); /* Space for Index.aAvgEq[] */ + } + + if( nByte>0 ){ + pSchema->pStat4Space = sqlite3_malloc(nByte); + if( pSchema->pStat4Space==0 ){ + return SQLITE_NOMEM_BKPT; + } + } + + return SQLITE_OK; +} + +/* +** Load the content from the sqlite_stat4 into the relevant Index.aSample[] +** arrays. ** ** Arguments zSql1 and zSql2 must point to SQL statements that return ** data equivalent to the following: @@ -123879,78 +123923,16 @@ static int loadStatTbl( char *zSql; /* Text of the SQL statement */ Index *pPrevIdx = 0; /* Previous index in the loop */ IndexSample *pSample; /* A slot in pIdx->aSample[] */ + int iBlockOff = 0; /* Offset into Schema.pStat4Space */ assert( db->lookaside.bDisable ); -#if 0 - zSql = sqlite3MPrintf(db, zSql1, zDb); - if( !zSql ){ - return SQLITE_NOMEM_BKPT; - } - rc = sqlite3_prepare(db, zSql, -1, &pStmt, 0); - sqlite3DbFree(db, zSql); - if( rc ) return rc; - - while( sqlite3_step(pStmt)==SQLITE_ROW ){ - int nIdxCol = 1; /* Number of columns in stat4 records */ - - char *zIndex; /* Index name */ - Index *pIdx; /* Pointer to the index object */ - int nSample; /* Number of samples */ - int nByte; /* Bytes of space required */ - int i; /* Bytes of space required */ - tRowcnt *pSpace; /* Available allocated memory space */ - u8 *pPtr; /* Available memory as a u8 for easier manipulation */ - - u64 t = sqlite3STimeNow(); - zIndex = (char *)sqlite3_column_text(pStmt, 0); - if( zIndex==0 ) continue; - nSample = sqlite3_column_int(pStmt, 1); - pIdx = findIndexOrPrimaryKey(db, zIndex, zDb); - assert( pIdx==0 || pIdx->nSample==0 ); - if( pIdx==0 ) continue; - if( pIdx->aSample!=0 ){ - /* The same index appears in sqlite_stat4 under multiple names */ - continue; - } - assert( !HasRowid(pIdx->pTable) || pIdx->nColumn==pIdx->nKeyCol+1 ); - if( !HasRowid(pIdx->pTable) && IsPrimaryKeyIndex(pIdx) ){ - nIdxCol = pIdx->nKeyCol; - }else{ - nIdxCol = pIdx->nColumn; - } - pIdx->nSampleCol = nIdxCol; - pIdx->mxSample = nSample; - nByte = ROUND8(sizeof(IndexSample) * nSample); - nByte += sizeof(tRowcnt) * nIdxCol * 3 * nSample; - nByte += nIdxCol * sizeof(tRowcnt); /* Space for Index.aAvgEq[] */ - - pIdx->aSample = sqlite3DbMallocZero(db, nByte); - if( pIdx->aSample==0 ){ - sqlite3_finalize(pStmt); - return SQLITE_NOMEM_BKPT; - } - pPtr = (u8*)pIdx->aSample; - pPtr += ROUND8(nSample*sizeof(pIdx->aSample[0])); - pSpace = (tRowcnt*)pPtr; - assert( EIGHT_BYTE_ALIGNMENT( pSpace ) ); - pIdx->aAvgEq = pSpace; pSpace += nIdxCol; - pIdx->pTable->tabFlags |= TF_HasStat4; - for(i=0; iaSample[i].anEq = pSpace; pSpace += nIdxCol; - pIdx->aSample[i].anLt = pSpace; pSpace += nIdxCol; - pIdx->aSample[i].anDLt = pSpace; pSpace += nIdxCol; - } - assert( ((u8*)pSpace)-nByte==(u8*)(pIdx->aSample) ); - if( db->aSchemaTime ){ - db->aSchemaTime[SCHEMA_TIME_STAT4_Q1_BODY] += (sqlite3STimeNow() - t); - } - } - rc = sqlite3_finalize(pStmt); - if( rc ) return rc; -#endif + /* Allocate the Schema.pStat4Space block that will be used for the + ** Index.aSample[] arrays populated by this call. */ + rc = stat4AllocSpace(db, zDb); + if( rc!=SQLITE_OK ) return rc; - sqlite3PrepareTimeSet(db->aSchemaTime, SCHEMA_TIME_AFTER_STAT4_Q1); + sqlite3PrepareTimeSet(db->aSchemaTime, SCHEMA_TIME_AFTER_STAT4_SPACE); zSql = sqlite3MPrintf(db, zSql2, zDb); if( !zSql ){ @@ -123960,6 +123942,8 @@ static int loadStatTbl( sqlite3DbFree(db, zSql); if( rc ) return rc; + sqlite3PrepareTimeSet(db->aSchemaTime, SCHEMA_TIME_AFTER_STAT4_PREPARE); + while( sqlite3_step(pStmt)==SQLITE_ROW ){ char *zIndex; /* Index name */ Index *pIdx; /* Pointer to the index object */ @@ -123972,6 +123956,7 @@ static int loadStatTbl( if( pIdx==0 ) continue; if( pIdx->nSample==pIdx->nSampleAlloc ){ + u64 t2; pIdx->pTable->tabFlags |= TF_HasStat4; assert( !HasRowid(pIdx->pTable) || pIdx->nColumn==pIdx->nKeyCol+1 ); if( !HasRowid(pIdx->pTable) && IsPrimaryKeyIndex(pIdx) ){ @@ -123979,7 +123964,11 @@ static int loadStatTbl( }else{ pIdx->nSampleCol = pIdx->nColumn; } - if( growSampleArray(db, pIdx) ) break; + t2 = sqlite3STimeNow(); + if( growSampleArray(db, pIdx, &iBlockOff) ) break; + if( db->aSchemaTime ){ + db->aSchemaTime[SCHEMA_TIME_STAT4_GROWUS] += (sqlite3STimeNow() - t); + } } if( pIdx!=pPrevIdx ){ @@ -124012,7 +124001,7 @@ static int loadStatTbl( pIdx->nSample++; if( db->aSchemaTime ){ - db->aSchemaTime[SCHEMA_TIME_STAT4_Q2_BODY] += (sqlite3STimeNow() - t); + db->aSchemaTime[SCHEMA_TIME_STAT4_Q2_BODYUS] += (sqlite3STimeNow() - t); } } rc = sqlite3_finalize(pStmt); @@ -124089,6 +124078,10 @@ SQLITE_PRIVATE int sqlite3AnalysisLoad(sqlite3 *db, int iDb){ pIdx->aSample = 0; #endif } +#ifdef SQLITE_ENABLE_STAT4 + sqlite3_free(pSchema->pStat4Space); + pSchema->pStat4Space = 0; +#endif sqlite3PrepareTimeSet(db->aSchemaTime, SCHEMA_TIME_AFTER_CLEAR_STATS); @@ -131355,6 +131348,10 @@ SQLITE_PRIVATE void sqlite3SchemaClear(void *p){ pSchema->iGeneration++; } pSchema->schemaFlags &= ~(DB_SchemaLoaded|DB_ResetWanted); +#ifdef SQLITE_ENABLE_STAT4 + sqlite3_free(pSchema->pStat4Space); + pSchema->pStat4Space = 0; +#endif } /* @@ -258029,7 +258026,7 @@ static void fts5SourceIdFunc( ){ assert( nArg==0 ); UNUSED_PARAM2(nArg, apUnused); - sqlite3_result_text(pCtx, "fts5: 2024-12-13 18:13:51 fa87355f6286be1e92f22a71cbfbfb13d1a478d5fb5b38abedbd78bf903171fa", -1, SQLITE_TRANSIENT); + sqlite3_result_text(pCtx, "fts5: 2024-12-20 19:37:41 b40cd7395c44b1f2d019d8e809e03de0e083c93693322a72ddb250a85640528f", -1, SQLITE_TRANSIENT); } /* diff --git a/libstuff/sqlite3.h b/libstuff/sqlite3.h index fe6bc6016..9827d4007 100644 --- a/libstuff/sqlite3.h +++ b/libstuff/sqlite3.h @@ -148,7 +148,7 @@ extern "C" { */ #define SQLITE_VERSION "3.47.0" #define SQLITE_VERSION_NUMBER 3047000 -#define SQLITE_SOURCE_ID "2024-12-13 18:13:51 fa87355f6286be1e92f22a71cbfbfb13d1a478d5fb5b38abedbd78bf903171fa" +#define SQLITE_SOURCE_ID "2024-12-20 19:37:41 b40cd7395c44b1f2d019d8e809e03de0e083c93693322a72ddb250a85640528f" /* ** CAPI3REF: Run-Time Library Version Numbers From 19c8ca1b1b33dcc1120fd75f83fc02ce0665a256 Mon Sep 17 00:00:00 2001 From: Rafe Colton Date: Fri, 20 Dec 2024 20:47:56 +0000 Subject: [PATCH 090/127] Remove cache size assert --- sqlitecluster/SQLite.cpp | 1 - 1 file changed, 1 deletion(-) diff --git a/sqlitecluster/SQLite.cpp b/sqlitecluster/SQLite.cpp index a2f8201c3..8d10d6ee1 100644 --- a/sqlitecluster/SQLite.cpp +++ b/sqlitecluster/SQLite.cpp @@ -167,7 +167,6 @@ vector SQLite::initializeJournal(sqlite3* db, int minJournalTables) { void SQLite::commonConstructorInitialization(bool hctree) { // Perform sanity checks. SASSERT(!_filename.empty()); - SASSERT(_cacheSize > 0); SASSERT(_maxJournalSize > 0); // WAL is what allows simultaneous read/writing. From b1a1e946812216501894baf3890e3e74374ac0ed Mon Sep 17 00:00:00 2001 From: danieldoglas Date: Mon, 23 Dec 2024 17:26:07 +0000 Subject: [PATCH 091/127] receiving and calling new callback --- sqlitecluster/SQLiteNode.cpp | 10 +++++++++- sqlitecluster/SQLiteNode.h | 2 +- 2 files changed, 10 insertions(+), 2 deletions(-) diff --git a/sqlitecluster/SQLiteNode.cpp b/sqlitecluster/SQLiteNode.cpp index 21a958594..21adf634f 100644 --- a/sqlitecluster/SQLiteNode.cpp +++ b/sqlitecluster/SQLiteNode.cpp @@ -1240,7 +1240,7 @@ bool SQLiteNode::update() { // Messages // Here are the messages that can be received, and how a cluster node will respond to each based on its state: -void SQLiteNode::_onMESSAGE(SQLitePeer* peer, const SData& message) { +void SQLiteNode::_onMESSAGE(SQLitePeer* peer, const SData& message, function commandPortCallback) { try { SASSERT(peer); SASSERTWARN(!message.empty()); @@ -1655,6 +1655,14 @@ void SQLiteNode::_onMESSAGE(SQLitePeer* peer, const SData& message) { } } } + // This will notify the BedrockServer's callback to block or unblock the command port based on + // how many commits we are behind. + { + SQLiteScopedHandle dbScope(*_dbPool, _dbPool->getIndex(false)); + SQLite& db = dbScope.db(); + const int64_t currentCommitDifference = message.calcU64("NewCount") - db.getCommitCount(); + commandPortCallback(currentCommitDifference); + } } catch (const system_error& e) { // If the server is strugling and falling behind on replication, we might have too many threads // causing a resource exhaustion. If that happens, all the transactions that are already threaded diff --git a/sqlitecluster/SQLiteNode.h b/sqlitecluster/SQLiteNode.h index 817a4e9b9..75d66a86a 100644 --- a/sqlitecluster/SQLiteNode.h +++ b/sqlitecluster/SQLiteNode.h @@ -243,7 +243,7 @@ class SQLiteNode : public STCPManager { void _onDisconnect(SQLitePeer* peer); // Called when the peer sends us a message; throw an SException to reconnect. - void _onMESSAGE(SQLitePeer* peer, const SData& message); + void _onMESSAGE(SQLitePeer* peer, const SData& message, function commandPortCallback = nullptr); void _reconnectAll(); void _reconnectPeer(SQLitePeer* peer); void _recvSynchronize(SQLitePeer* peer, const SData& message); From ef6dc048c47e4d790e9bc2b8589c3ab38cbd7cb6 Mon Sep 17 00:00:00 2001 From: danieldoglas Date: Mon, 23 Dec 2024 17:37:44 +0000 Subject: [PATCH 092/127] sending function on post poll --- sqlitecluster/SQLiteNode.cpp | 6 +++--- sqlitecluster/SQLiteNode.h | 2 +- 2 files changed, 4 insertions(+), 4 deletions(-) diff --git a/sqlitecluster/SQLiteNode.cpp b/sqlitecluster/SQLiteNode.cpp index 21adf634f..5e6bc53c7 100644 --- a/sqlitecluster/SQLiteNode.cpp +++ b/sqlitecluster/SQLiteNode.cpp @@ -1657,7 +1657,7 @@ void SQLiteNode::_onMESSAGE(SQLitePeer* peer, const SData& message, functiongetIndex(false)); SQLite& db = dbScope.db(); const int64_t currentCommitDifference = message.calcU64("NewCount") - db.getCommitCount(); @@ -2536,7 +2536,7 @@ STCPManager::Socket* SQLiteNode::_acceptSocket() { return socket; } -void SQLiteNode::postPoll(fd_map& fdm, uint64_t& nextActivity) { +void SQLiteNode::postPoll(fd_map& fdm, uint64_t& nextActivity, function commandPortCallback) { unique_lock uniqueLock(_stateMutex); // Accept any new peers @@ -2643,7 +2643,7 @@ void SQLiteNode::postPoll(fd_map& fdm, uint64_t& nextActivity) { size_t messagesDeqeued = 0; while (true) { SData message = peer->popMessage(); - _onMESSAGE(peer, message); + _onMESSAGE(peer, messagem, commandPortCallback); messagesDeqeued++; if (messagesDeqeued >= 100) { // We should run again immediately, we have more to do. diff --git a/sqlitecluster/SQLiteNode.h b/sqlitecluster/SQLiteNode.h index 75d66a86a..dbedbdd86 100644 --- a/sqlitecluster/SQLiteNode.h +++ b/sqlitecluster/SQLiteNode.h @@ -165,7 +165,7 @@ class SQLiteNode : public STCPManager { void kill(); // Handle any read/write events that occurred. - void postPoll(fd_map& fdm, uint64_t& nextActivity); + void postPoll(fd_map& fdm, uint64_t& nextActivity, function commandPortCallback = nullptr); // Constructor/Destructor SQLiteNode(SQLiteServer& server, shared_ptr dbPool, const string& name, const string& host, From 8603f433781d24dcd24af183689b9de414f8636a Mon Sep 17 00:00:00 2001 From: danieldoglas Date: Mon, 23 Dec 2024 17:38:32 +0000 Subject: [PATCH 093/127] adding callback function for blocking the port --- BedrockServer.cpp | 19 ++++++++++++++++++- 1 file changed, 18 insertions(+), 1 deletion(-) diff --git a/BedrockServer.cpp b/BedrockServer.cpp index c0cad65f3..d7d132491 100644 --- a/BedrockServer.cpp +++ b/BedrockServer.cpp @@ -253,7 +253,24 @@ void BedrockServer::sync() // Process any activity in our plugins. AutoTimerTime postPollTime(postPollTimer); - _syncNode->postPoll(fdm, nextActivity); + _syncNode->postPoll(fdm, nextActivity, [&](const int64_t currentCommitCountDiff){ + // If the command port is already closed, we don't need to check anything and can early return. + if (_isCommandPortLikelyBlocked) { + return; + } + const string blockReason = "COMMITS_LAGGING_BEHIND"; + // If + if (currentCommitCountDiff > 50'000) { + SINFO("Node is lagging behind, blocking command port so it can catch up."); + blockCommandPort(blockReason); + } else if (currentCommitCountDiff < 10'000 && _commandPortBlockReasons.find(blockReason) == _commandPortBlockReasons.end()) { + // We verify if we have the block reason we expected before unblocking so we don't call unblock every time, which will + // generate a warning if we don't have the block reason. + SINFO("Node is caught up enough, unblocking command port."); + unblockCommandPort(blockReason); + } + + }); _syncNodeQueuedCommands.postPoll(fdm); _notifyDoneSync.postPoll(fdm); } From 257b07c9fe641f8f374cd71d9dba20ffbca458bc Mon Sep 17 00:00:00 2001 From: danieldoglas Date: Mon, 23 Dec 2024 17:40:25 +0000 Subject: [PATCH 094/127] function should be logically correct now --- BedrockServer.cpp | 9 ++++----- 1 file changed, 4 insertions(+), 5 deletions(-) diff --git a/BedrockServer.cpp b/BedrockServer.cpp index d7d132491..13e86ba93 100644 --- a/BedrockServer.cpp +++ b/BedrockServer.cpp @@ -260,16 +260,15 @@ void BedrockServer::sync() } const string blockReason = "COMMITS_LAGGING_BEHIND"; // If - if (currentCommitCountDiff > 50'000) { + if (!_isCommandPortLikelyBlocked && currentCommitCountDiff > 50'000) { SINFO("Node is lagging behind, blocking command port so it can catch up."); blockCommandPort(blockReason); - } else if (currentCommitCountDiff < 10'000 && _commandPortBlockReasons.find(blockReason) == _commandPortBlockReasons.end()) { - // We verify if we have the block reason we expected before unblocking so we don't call unblock every time, which will - // generate a warning if we don't have the block reason. + } else if (_isCommandPortLikelyBlocked && currentCommitCountDiff < 10'000 && _commandPortBlockReasons.find(blockReason) == _commandPortBlockReasons.end()) { + // We verify if we have the block reason we expected before calling unblock. Unblock would generate a warning, and we don't + // want to do that if don't really need to. SINFO("Node is caught up enough, unblocking command port."); unblockCommandPort(blockReason); } - }); _syncNodeQueuedCommands.postPoll(fdm); _notifyDoneSync.postPoll(fdm); From 8d887836034a0a50c90c53ff9b76f647663d4921 Mon Sep 17 00:00:00 2001 From: danieldoglas Date: Mon, 23 Dec 2024 17:48:15 +0000 Subject: [PATCH 095/127] adding new methods to the SQLiteServer interface --- sqlitecluster/SQLiteServer.h | 4 ++++ 1 file changed, 4 insertions(+) diff --git a/sqlitecluster/SQLiteServer.h b/sqlitecluster/SQLiteServer.h index 0d8804f0d..245892a66 100644 --- a/sqlitecluster/SQLiteServer.h +++ b/sqlitecluster/SQLiteServer.h @@ -13,4 +13,8 @@ class SQLiteServer : public STCPManager { // We call this method whenever a node changes state virtual void notifyStateChangeToPlugins(SQLite& db, SQLiteNodeState newState) = 0; + + // You must block and unblock the command port with *identical strings*. + virtual void blockCommandPort(const string& reason); + virtual void unblockCommandPort(const string& reason); }; From 82a1e21986280f25e2742e9e88e85c29ef886979 Mon Sep 17 00:00:00 2001 From: danieldoglas Date: Mon, 23 Dec 2024 17:48:41 +0000 Subject: [PATCH 096/127] adding override into the methods --- BedrockServer.h | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) diff --git a/BedrockServer.h b/BedrockServer.h index 4a6175d27..a23bcc886 100644 --- a/BedrockServer.h +++ b/BedrockServer.h @@ -182,8 +182,8 @@ class BedrockServer : public SQLiteServer { void onNodeLogin(SQLitePeer* peer) override; // You must block and unblock the command port with *identical strings*. - void blockCommandPort(const string& reason); - void unblockCommandPort(const string& reason); + void blockCommandPort(const string& reason) override; + void unblockCommandPort(const string& reason) override; // Legacy version of above. void suppressCommandPort(const string& reason, bool suppress, bool manualOverride = false); From d4d19c624fa6373e41c4a74a82a977f4394ea9e9 Mon Sep 17 00:00:00 2001 From: danieldoglas Date: Mon, 23 Dec 2024 17:51:12 +0000 Subject: [PATCH 097/127] fixing test plugin --- test/tests/SQLiteNodeTest.cpp | 2 ++ 1 file changed, 2 insertions(+) diff --git a/test/tests/SQLiteNodeTest.cpp b/test/tests/SQLiteNodeTest.cpp index 4bdb882e3..046c5e48a 100644 --- a/test/tests/SQLiteNodeTest.cpp +++ b/test/tests/SQLiteNodeTest.cpp @@ -26,6 +26,8 @@ class TestServer : public SQLiteServer { virtual bool canStandDown() { return true; } virtual void onNodeLogin(SQLitePeer* peer) { } virtual void notifyStateChangeToPlugins(SQLite& db, SQLiteNodeState newState) {} + virtual void blockCommandPort(const string& reason) { }; + virtual void unblockCommandPort(const string& reason) { }; }; struct SQLiteNodeTest : tpunit::TestFixture { From d5384e3d7cc5e78619dc3e65f871d37254628722 Mon Sep 17 00:00:00 2001 From: danieldoglas Date: Mon, 23 Dec 2024 17:53:32 +0000 Subject: [PATCH 098/127] removing new function, using the current port block functions --- sqlitecluster/SQLiteNode.cpp | 24 ++++++++++++++---------- sqlitecluster/SQLiteNode.h | 4 ++-- 2 files changed, 16 insertions(+), 12 deletions(-) diff --git a/sqlitecluster/SQLiteNode.cpp b/sqlitecluster/SQLiteNode.cpp index 5e6bc53c7..8dbc73692 100644 --- a/sqlitecluster/SQLiteNode.cpp +++ b/sqlitecluster/SQLiteNode.cpp @@ -1240,7 +1240,7 @@ bool SQLiteNode::update() { // Messages // Here are the messages that can be received, and how a cluster node will respond to each based on its state: -void SQLiteNode::_onMESSAGE(SQLitePeer* peer, const SData& message, function commandPortCallback) { +void SQLiteNode::_onMESSAGE(SQLitePeer* peer, const SData& message) { try { SASSERT(peer); SASSERTWARN(!message.empty()); @@ -1655,13 +1655,17 @@ void SQLiteNode::_onMESSAGE(SQLitePeer* peer, const SData& message, functiongetIndex(false)); - SQLite& db = dbScope.db(); - const int64_t currentCommitDifference = message.calcU64("NewCount") - db.getCommitCount(); - commandPortCallback(currentCommitDifference); + const int64_t currentCommitDifference = message.calcU64("NewCount") - getCommitCount(); + const string blockReason = "COMMITS_LAGGING_BEHIND"; + // If + if (!_isCommandPortLikelyBlocked && currentCommitCountDiff > 50'000) { + SINFO("Node is lagging behind, blocking command port so it can catch up."); + blockCommandPort(blockReason); + } else if (_isCommandPortLikelyBlocked && currentCommitCountDiff < 10'000 && _commandPortBlockReasons.find(blockReason) == _commandPortBlockReasons.end()) { + // We verify if we have the block reason we expected before calling unblock. Unblock would generate a warning, and we don't + // want to do that if don't really need to. + SINFO("Node is caught up enough, unblocking command port."); + unblockCommandPort(blockReason); } } catch (const system_error& e) { // If the server is strugling and falling behind on replication, we might have too many threads @@ -2536,7 +2540,7 @@ STCPManager::Socket* SQLiteNode::_acceptSocket() { return socket; } -void SQLiteNode::postPoll(fd_map& fdm, uint64_t& nextActivity, function commandPortCallback) { +void SQLiteNode::postPoll(fd_map& fdm, uint64_t& nextActivity) { unique_lock uniqueLock(_stateMutex); // Accept any new peers @@ -2643,7 +2647,7 @@ void SQLiteNode::postPoll(fd_map& fdm, uint64_t& nextActivity, functionpopMessage(); - _onMESSAGE(peer, messagem, commandPortCallback); + _onMESSAGE(peer, messagem); messagesDeqeued++; if (messagesDeqeued >= 100) { // We should run again immediately, we have more to do. diff --git a/sqlitecluster/SQLiteNode.h b/sqlitecluster/SQLiteNode.h index dbedbdd86..817a4e9b9 100644 --- a/sqlitecluster/SQLiteNode.h +++ b/sqlitecluster/SQLiteNode.h @@ -165,7 +165,7 @@ class SQLiteNode : public STCPManager { void kill(); // Handle any read/write events that occurred. - void postPoll(fd_map& fdm, uint64_t& nextActivity, function commandPortCallback = nullptr); + void postPoll(fd_map& fdm, uint64_t& nextActivity); // Constructor/Destructor SQLiteNode(SQLiteServer& server, shared_ptr dbPool, const string& name, const string& host, @@ -243,7 +243,7 @@ class SQLiteNode : public STCPManager { void _onDisconnect(SQLitePeer* peer); // Called when the peer sends us a message; throw an SException to reconnect. - void _onMESSAGE(SQLitePeer* peer, const SData& message, function commandPortCallback = nullptr); + void _onMESSAGE(SQLitePeer* peer, const SData& message); void _reconnectAll(); void _reconnectPeer(SQLitePeer* peer); void _recvSynchronize(SQLitePeer* peer, const SData& message); From a4bf7d181e99db21278653508cd15cc4382a9eb1 Mon Sep 17 00:00:00 2001 From: danieldoglas Date: Mon, 23 Dec 2024 18:11:32 +0000 Subject: [PATCH 099/127] adding new method to check if port is closed or not --- BedrockServer.cpp | 18 +++++++----------- sqlitecluster/SQLiteServer.h | 1 + 2 files changed, 8 insertions(+), 11 deletions(-) diff --git a/BedrockServer.cpp b/BedrockServer.cpp index 13e86ba93..47a66934a 100644 --- a/BedrockServer.cpp +++ b/BedrockServer.cpp @@ -258,17 +258,6 @@ void BedrockServer::sync() if (_isCommandPortLikelyBlocked) { return; } - const string blockReason = "COMMITS_LAGGING_BEHIND"; - // If - if (!_isCommandPortLikelyBlocked && currentCommitCountDiff > 50'000) { - SINFO("Node is lagging behind, blocking command port so it can catch up."); - blockCommandPort(blockReason); - } else if (_isCommandPortLikelyBlocked && currentCommitCountDiff < 10'000 && _commandPortBlockReasons.find(blockReason) == _commandPortBlockReasons.end()) { - // We verify if we have the block reason we expected before calling unblock. Unblock would generate a warning, and we don't - // want to do that if don't really need to. - SINFO("Node is caught up enough, unblocking command port."); - unblockCommandPort(blockReason); - } }); _syncNodeQueuedCommands.postPoll(fdm); _notifyDoneSync.postPoll(fdm); @@ -1605,6 +1594,13 @@ void BedrockServer::unblockCommandPort(const string& reason) { } } +virtual void isCommandPortClosed(const string& reason) { + if (!strlen(reason)) { + return _isCommandPortLikelyBlocked; + } + return _commandPortBlockReasons.find(reason) != _commandPortBlockReasons.end(); +} + void BedrockServer::suppressCommandPort(const string& reason, bool suppress, bool manualOverride) { if (suppress) { blockCommandPort("LEGACY_" + reason); diff --git a/sqlitecluster/SQLiteServer.h b/sqlitecluster/SQLiteServer.h index 245892a66..2a0f8f4f5 100644 --- a/sqlitecluster/SQLiteServer.h +++ b/sqlitecluster/SQLiteServer.h @@ -17,4 +17,5 @@ class SQLiteServer : public STCPManager { // You must block and unblock the command port with *identical strings*. virtual void blockCommandPort(const string& reason); virtual void unblockCommandPort(const string& reason); + virtual void isCommandPortClosed(const string& reason); }; From fb549a89284e6d4e13a327f9e3fae14311346522 Mon Sep 17 00:00:00 2001 From: danieldoglas Date: Mon, 23 Dec 2024 18:16:28 +0000 Subject: [PATCH 100/127] small changes --- BedrockServer.cpp | 2 ++ sqlitecluster/SQLiteNode.cpp | 9 +++------ 2 files changed, 5 insertions(+), 6 deletions(-) diff --git a/BedrockServer.cpp b/BedrockServer.cpp index 47a66934a..66e6bf99d 100644 --- a/BedrockServer.cpp +++ b/BedrockServer.cpp @@ -1598,6 +1598,8 @@ virtual void isCommandPortClosed(const string& reason) { if (!strlen(reason)) { return _isCommandPortLikelyBlocked; } + // Get the shared mutex so we don't execute read operations while changing the set + shared_mutex lock(_portMutex); return _commandPortBlockReasons.find(reason) != _commandPortBlockReasons.end(); } diff --git a/sqlitecluster/SQLiteNode.cpp b/sqlitecluster/SQLiteNode.cpp index 8dbc73692..405fd90a0 100644 --- a/sqlitecluster/SQLiteNode.cpp +++ b/sqlitecluster/SQLiteNode.cpp @@ -1657,13 +1657,10 @@ void SQLiteNode::_onMESSAGE(SQLitePeer* peer, const SData& message) { } const int64_t currentCommitDifference = message.calcU64("NewCount") - getCommitCount(); const string blockReason = "COMMITS_LAGGING_BEHIND"; - // If - if (!_isCommandPortLikelyBlocked && currentCommitCountDiff > 50'000) { - SINFO("Node is lagging behind, blocking command port so it can catch up."); + if (currentCommitCountDiff > 50'000 && !isCommandPortClosed(blockReason)) { + SINFO("Node is lagging behind, closing command port so it can catch up."); blockCommandPort(blockReason); - } else if (_isCommandPortLikelyBlocked && currentCommitCountDiff < 10'000 && _commandPortBlockReasons.find(blockReason) == _commandPortBlockReasons.end()) { - // We verify if we have the block reason we expected before calling unblock. Unblock would generate a warning, and we don't - // want to do that if don't really need to. + } else if (isCommandPortClosed(blockReason) && currentCommitCountDiff < 10'000) { SINFO("Node is caught up enough, unblocking command port."); unblockCommandPort(blockReason); } From 5962cdcdd3d4c5252c6484e1fd8141be1c87e763 Mon Sep 17 00:00:00 2001 From: danieldoglas Date: Mon, 23 Dec 2024 18:18:40 +0000 Subject: [PATCH 101/127] fixing return type and adding to test plugin --- BedrockServer.cpp | 2 +- sqlitecluster/SQLiteServer.h | 2 +- test/tests/SQLiteNodeTest.cpp | 1 + 3 files changed, 3 insertions(+), 2 deletions(-) diff --git a/BedrockServer.cpp b/BedrockServer.cpp index 66e6bf99d..5a6c2aec1 100644 --- a/BedrockServer.cpp +++ b/BedrockServer.cpp @@ -1594,7 +1594,7 @@ void BedrockServer::unblockCommandPort(const string& reason) { } } -virtual void isCommandPortClosed(const string& reason) { +virtual bool isCommandPortClosed(const string& reason) { if (!strlen(reason)) { return _isCommandPortLikelyBlocked; } diff --git a/sqlitecluster/SQLiteServer.h b/sqlitecluster/SQLiteServer.h index 2a0f8f4f5..c4e0e2913 100644 --- a/sqlitecluster/SQLiteServer.h +++ b/sqlitecluster/SQLiteServer.h @@ -17,5 +17,5 @@ class SQLiteServer : public STCPManager { // You must block and unblock the command port with *identical strings*. virtual void blockCommandPort(const string& reason); virtual void unblockCommandPort(const string& reason); - virtual void isCommandPortClosed(const string& reason); + virtual bool isCommandPortClosed(const string& reason); }; diff --git a/test/tests/SQLiteNodeTest.cpp b/test/tests/SQLiteNodeTest.cpp index 046c5e48a..3ef16ad7c 100644 --- a/test/tests/SQLiteNodeTest.cpp +++ b/test/tests/SQLiteNodeTest.cpp @@ -28,6 +28,7 @@ class TestServer : public SQLiteServer { virtual void notifyStateChangeToPlugins(SQLite& db, SQLiteNodeState newState) {} virtual void blockCommandPort(const string& reason) { }; virtual void unblockCommandPort(const string& reason) { }; + virtual bool isCommandPortClosed(const string& reason) { return false; }; }; struct SQLiteNodeTest : tpunit::TestFixture { From 88c2980bff5391104726e1266065b51bf280d1b7 Mon Sep 17 00:00:00 2001 From: danieldoglas Date: Mon, 23 Dec 2024 18:37:08 +0000 Subject: [PATCH 102/127] fixing locks issues --- BedrockServer.cpp | 31 +++++++++++++------------------ BedrockServer.h | 3 ++- sqlitecluster/SQLiteNode.cpp | 10 +++++----- 3 files changed, 20 insertions(+), 24 deletions(-) diff --git a/BedrockServer.cpp b/BedrockServer.cpp index 5a6c2aec1..5e6a546b6 100644 --- a/BedrockServer.cpp +++ b/BedrockServer.cpp @@ -253,12 +253,7 @@ void BedrockServer::sync() // Process any activity in our plugins. AutoTimerTime postPollTime(postPollTimer); - _syncNode->postPoll(fdm, nextActivity, [&](const int64_t currentCommitCountDiff){ - // If the command port is already closed, we don't need to check anything and can early return. - if (_isCommandPortLikelyBlocked) { - return; - } - }); + _syncNode->postPoll(fdm, nextActivity); _syncNodeQueuedCommands.postPoll(fdm); _notifyDoneSync.postPoll(fdm); } @@ -1206,7 +1201,7 @@ bool BedrockServer::_wouldCrash(const unique_ptr& command) { } void BedrockServer::_resetServer() { - lock_guard lock(_portMutex); + lock_guard lock(_portMutex); _requestCount = 0; _upgradeInProgress = false; @@ -1319,7 +1314,7 @@ BedrockServer::BedrockServer(const SData& args_) // Allow sending control commands when the server's not LEADING/FOLLOWING. SINFO("Opening control port on '" << args["-controlPort"] << "'"); { - lock_guard lock(_portMutex); + lock_guard lock(_portMutex); _controlPort = openPort(args["-controlPort"]); } @@ -1375,7 +1370,7 @@ bool BedrockServer::shutdownComplete() { } void BedrockServer::prePoll(fd_map& fdm) { - lock_guard lock(_portMutex); + lock_guard lock(_portMutex); // This will interrupt poll when we shut down. _notifyDone.prePoll(fdm); @@ -1401,7 +1396,7 @@ void BedrockServer::postPoll(fd_map& fdm, uint64_t& nextActivity) { // NOTE: There are no sockets managed here, just ports. // Open the port the first time we enter a command-processing state { - lock_guard lock(_portMutex); + lock_guard lock(_portMutex); if (_commandPortBlockReasons.empty() && (getState() == SQLiteNodeState::LEADING || getState() == SQLiteNodeState::FOLLOWING) && _shutdownState.load() == RUNNING) { // Open the port @@ -1570,7 +1565,7 @@ void BedrockServer::_reply(unique_ptr& command) { void BedrockServer::blockCommandPort(const string& reason) { - lock_guard lock(_portMutex); + lock_guard lock(_portMutex); _commandPortBlockReasons.insert(reason); _isCommandPortLikelyBlocked = true; if (_commandPortBlockReasons.size() == 1) { @@ -1581,7 +1576,7 @@ void BedrockServer::blockCommandPort(const string& reason) { } void BedrockServer::unblockCommandPort(const string& reason) { - lock_guard lock(_portMutex); + lock_guard lock(_portMutex); auto it = _commandPortBlockReasons.find(reason); if (it == _commandPortBlockReasons.end()) { SWARN("Tried to remove command port block because: " << reason << ", but it wasn't blocked for that reason!"); @@ -1594,12 +1589,12 @@ void BedrockServer::unblockCommandPort(const string& reason) { } } -virtual bool isCommandPortClosed(const string& reason) { - if (!strlen(reason)) { +bool BedrockServer::isCommandPortClosed(const string& reason) { + if (reason.empty()) { return _isCommandPortLikelyBlocked; } // Get the shared mutex so we don't execute read operations while changing the set - shared_mutex lock(_portMutex); + shared_lock lock(_portMutex); return _commandPortBlockReasons.find(reason) != _commandPortBlockReasons.end(); } @@ -1741,7 +1736,7 @@ void BedrockServer::_status(unique_ptr& command) { } { - lock_guard lock(_portMutex); + lock_guard lock(_portMutex); content["commandPortBlockReasons"] = SComposeJSONArray(_commandPortBlockReasons); } @@ -2015,7 +2010,7 @@ void BedrockServer::_beginShutdown(const string& reason, bool detach) { // down, so otherwise there's a race condition where that happens just after we close them but before we // change the state. { - lock_guard lock(_portMutex); + lock_guard lock(_portMutex); _commandPortPublic = nullptr; _commandPortPrivate = nullptr; if (!_detach) { @@ -2094,7 +2089,7 @@ void BedrockServer::_acceptSockets() { // Lock _portMutex so suppressing the port does not cause it to be null // in the middle of this function. - lock_guard lock(_portMutex); + lock_guard lock(_portMutex); for (auto& p : _portPluginMap) { portList.push_back(reference_wrapper>(p.first)); diff --git a/BedrockServer.h b/BedrockServer.h index a23bcc886..77982ce5e 100644 --- a/BedrockServer.h +++ b/BedrockServer.h @@ -184,6 +184,7 @@ class BedrockServer : public SQLiteServer { // You must block and unblock the command port with *identical strings*. void blockCommandPort(const string& reason) override; void unblockCommandPort(const string& reason) override; + bool isCommandPortClosed(const string& reason) override; // Legacy version of above. void suppressCommandPort(const string& reason, bool suppress, bool manualOverride = false); @@ -377,7 +378,7 @@ class BedrockServer : public SQLiteServer { atomic _detach; // Pointers to the ports on which we accept commands. - mutex _portMutex; + shared_mutex _portMutex; // The "control port" is intended to be open to privileged clients (i.e., localhost and other nodes in the Bedrock // cluster) it can be used to run any command including commands meant for cluster operations, changing server diff --git a/sqlitecluster/SQLiteNode.cpp b/sqlitecluster/SQLiteNode.cpp index 405fd90a0..59843c842 100644 --- a/sqlitecluster/SQLiteNode.cpp +++ b/sqlitecluster/SQLiteNode.cpp @@ -1657,12 +1657,12 @@ void SQLiteNode::_onMESSAGE(SQLitePeer* peer, const SData& message) { } const int64_t currentCommitDifference = message.calcU64("NewCount") - getCommitCount(); const string blockReason = "COMMITS_LAGGING_BEHIND"; - if (currentCommitCountDiff > 50'000 && !isCommandPortClosed(blockReason)) { + if (currentCommitDifference > 50'000 && !_server.isCommandPortClosed(blockReason)) { SINFO("Node is lagging behind, closing command port so it can catch up."); - blockCommandPort(blockReason); - } else if (isCommandPortClosed(blockReason) && currentCommitCountDiff < 10'000) { + _server.blockCommandPort(blockReason); + } else if (currentCommitDifference < 10'000 && _server.isCommandPortClosed(blockReason)) { SINFO("Node is caught up enough, unblocking command port."); - unblockCommandPort(blockReason); + _server.unblockCommandPort(blockReason); } } catch (const system_error& e) { // If the server is strugling and falling behind on replication, we might have too many threads @@ -2644,7 +2644,7 @@ void SQLiteNode::postPoll(fd_map& fdm, uint64_t& nextActivity) { size_t messagesDeqeued = 0; while (true) { SData message = peer->popMessage(); - _onMESSAGE(peer, messagem); + _onMESSAGE(peer, message); messagesDeqeued++; if (messagesDeqeued >= 100) { // We should run again immediately, we have more to do. From d6f3d80b4dc38659517e4ceb74cbf51cf63b7790 Mon Sep 17 00:00:00 2001 From: danieldoglas Date: Mon, 23 Dec 2024 18:49:19 +0000 Subject: [PATCH 103/127] fixing methods to be true virtual --- sqlitecluster/SQLiteServer.h | 6 +++--- 1 file changed, 3 insertions(+), 3 deletions(-) diff --git a/sqlitecluster/SQLiteServer.h b/sqlitecluster/SQLiteServer.h index c4e0e2913..c5b82d4c0 100644 --- a/sqlitecluster/SQLiteServer.h +++ b/sqlitecluster/SQLiteServer.h @@ -15,7 +15,7 @@ class SQLiteServer : public STCPManager { virtual void notifyStateChangeToPlugins(SQLite& db, SQLiteNodeState newState) = 0; // You must block and unblock the command port with *identical strings*. - virtual void blockCommandPort(const string& reason); - virtual void unblockCommandPort(const string& reason); - virtual bool isCommandPortClosed(const string& reason); + virtual void blockCommandPort(const string& reason) = 0; + virtual void unblockCommandPort(const string& reason) = 0; + virtual bool isCommandPortClosed(const string& reason) = 0; }; From a7dffc08f483b9f1e8bafade4a67cf69a7390f00 Mon Sep 17 00:00:00 2001 From: danieldoglas Date: Mon, 23 Dec 2024 19:28:17 +0000 Subject: [PATCH 104/127] addressing comments --- BedrockServer.cpp | 27 +++++++++------------------ BedrockServer.h | 3 +-- sqlitecluster/SQLiteNode.cpp | 9 --------- sqlitecluster/SQLiteNode.h | 3 ++- sqlitecluster/SQLiteServer.h | 1 - test/tests/SQLiteNodeTest.cpp | 1 - 6 files changed, 12 insertions(+), 32 deletions(-) diff --git a/BedrockServer.cpp b/BedrockServer.cpp index 5e6a546b6..c0cad65f3 100644 --- a/BedrockServer.cpp +++ b/BedrockServer.cpp @@ -1201,7 +1201,7 @@ bool BedrockServer::_wouldCrash(const unique_ptr& command) { } void BedrockServer::_resetServer() { - lock_guard lock(_portMutex); + lock_guard lock(_portMutex); _requestCount = 0; _upgradeInProgress = false; @@ -1314,7 +1314,7 @@ BedrockServer::BedrockServer(const SData& args_) // Allow sending control commands when the server's not LEADING/FOLLOWING. SINFO("Opening control port on '" << args["-controlPort"] << "'"); { - lock_guard lock(_portMutex); + lock_guard lock(_portMutex); _controlPort = openPort(args["-controlPort"]); } @@ -1370,7 +1370,7 @@ bool BedrockServer::shutdownComplete() { } void BedrockServer::prePoll(fd_map& fdm) { - lock_guard lock(_portMutex); + lock_guard lock(_portMutex); // This will interrupt poll when we shut down. _notifyDone.prePoll(fdm); @@ -1396,7 +1396,7 @@ void BedrockServer::postPoll(fd_map& fdm, uint64_t& nextActivity) { // NOTE: There are no sockets managed here, just ports. // Open the port the first time we enter a command-processing state { - lock_guard lock(_portMutex); + lock_guard lock(_portMutex); if (_commandPortBlockReasons.empty() && (getState() == SQLiteNodeState::LEADING || getState() == SQLiteNodeState::FOLLOWING) && _shutdownState.load() == RUNNING) { // Open the port @@ -1565,7 +1565,7 @@ void BedrockServer::_reply(unique_ptr& command) { void BedrockServer::blockCommandPort(const string& reason) { - lock_guard lock(_portMutex); + lock_guard lock(_portMutex); _commandPortBlockReasons.insert(reason); _isCommandPortLikelyBlocked = true; if (_commandPortBlockReasons.size() == 1) { @@ -1576,7 +1576,7 @@ void BedrockServer::blockCommandPort(const string& reason) { } void BedrockServer::unblockCommandPort(const string& reason) { - lock_guard lock(_portMutex); + lock_guard lock(_portMutex); auto it = _commandPortBlockReasons.find(reason); if (it == _commandPortBlockReasons.end()) { SWARN("Tried to remove command port block because: " << reason << ", but it wasn't blocked for that reason!"); @@ -1589,15 +1589,6 @@ void BedrockServer::unblockCommandPort(const string& reason) { } } -bool BedrockServer::isCommandPortClosed(const string& reason) { - if (reason.empty()) { - return _isCommandPortLikelyBlocked; - } - // Get the shared mutex so we don't execute read operations while changing the set - shared_lock lock(_portMutex); - return _commandPortBlockReasons.find(reason) != _commandPortBlockReasons.end(); -} - void BedrockServer::suppressCommandPort(const string& reason, bool suppress, bool manualOverride) { if (suppress) { blockCommandPort("LEGACY_" + reason); @@ -1736,7 +1727,7 @@ void BedrockServer::_status(unique_ptr& command) { } { - lock_guard lock(_portMutex); + lock_guard lock(_portMutex); content["commandPortBlockReasons"] = SComposeJSONArray(_commandPortBlockReasons); } @@ -2010,7 +2001,7 @@ void BedrockServer::_beginShutdown(const string& reason, bool detach) { // down, so otherwise there's a race condition where that happens just after we close them but before we // change the state. { - lock_guard lock(_portMutex); + lock_guard lock(_portMutex); _commandPortPublic = nullptr; _commandPortPrivate = nullptr; if (!_detach) { @@ -2089,7 +2080,7 @@ void BedrockServer::_acceptSockets() { // Lock _portMutex so suppressing the port does not cause it to be null // in the middle of this function. - lock_guard lock(_portMutex); + lock_guard lock(_portMutex); for (auto& p : _portPluginMap) { portList.push_back(reference_wrapper>(p.first)); diff --git a/BedrockServer.h b/BedrockServer.h index 77982ce5e..a23bcc886 100644 --- a/BedrockServer.h +++ b/BedrockServer.h @@ -184,7 +184,6 @@ class BedrockServer : public SQLiteServer { // You must block and unblock the command port with *identical strings*. void blockCommandPort(const string& reason) override; void unblockCommandPort(const string& reason) override; - bool isCommandPortClosed(const string& reason) override; // Legacy version of above. void suppressCommandPort(const string& reason, bool suppress, bool manualOverride = false); @@ -378,7 +377,7 @@ class BedrockServer : public SQLiteServer { atomic _detach; // Pointers to the ports on which we accept commands. - shared_mutex _portMutex; + mutex _portMutex; // The "control port" is intended to be open to privileged clients (i.e., localhost and other nodes in the Bedrock // cluster) it can be used to run any command including commands meant for cluster operations, changing server diff --git a/sqlitecluster/SQLiteNode.cpp b/sqlitecluster/SQLiteNode.cpp index 59843c842..21a958594 100644 --- a/sqlitecluster/SQLiteNode.cpp +++ b/sqlitecluster/SQLiteNode.cpp @@ -1655,15 +1655,6 @@ void SQLiteNode::_onMESSAGE(SQLitePeer* peer, const SData& message) { } } } - const int64_t currentCommitDifference = message.calcU64("NewCount") - getCommitCount(); - const string blockReason = "COMMITS_LAGGING_BEHIND"; - if (currentCommitDifference > 50'000 && !_server.isCommandPortClosed(blockReason)) { - SINFO("Node is lagging behind, closing command port so it can catch up."); - _server.blockCommandPort(blockReason); - } else if (currentCommitDifference < 10'000 && _server.isCommandPortClosed(blockReason)) { - SINFO("Node is caught up enough, unblocking command port."); - _server.unblockCommandPort(blockReason); - } } catch (const system_error& e) { // If the server is strugling and falling behind on replication, we might have too many threads // causing a resource exhaustion. If that happens, all the transactions that are already threaded diff --git a/sqlitecluster/SQLiteNode.h b/sqlitecluster/SQLiteNode.h index 817a4e9b9..dbdd17879 100644 --- a/sqlitecluster/SQLiteNode.h +++ b/sqlitecluster/SQLiteNode.h @@ -364,9 +364,10 @@ class SQLiteNode : public STCPManager { // Stopwatch to track if we're giving up on the server preventing a standdown. SStopwatch _standDownTimeout; - // Our current State. + // Our current State. atomic _state; + atomic _blockedCommandPort{false}; // This is an integer that increments every time we change states. This is useful for responses to state changes // (i.e., approving standup) to verify that the messages we're receiving are relevant to the current state change, // and not stale responses to old changes. diff --git a/sqlitecluster/SQLiteServer.h b/sqlitecluster/SQLiteServer.h index c5b82d4c0..9b10e3835 100644 --- a/sqlitecluster/SQLiteServer.h +++ b/sqlitecluster/SQLiteServer.h @@ -17,5 +17,4 @@ class SQLiteServer : public STCPManager { // You must block and unblock the command port with *identical strings*. virtual void blockCommandPort(const string& reason) = 0; virtual void unblockCommandPort(const string& reason) = 0; - virtual bool isCommandPortClosed(const string& reason) = 0; }; diff --git a/test/tests/SQLiteNodeTest.cpp b/test/tests/SQLiteNodeTest.cpp index 3ef16ad7c..046c5e48a 100644 --- a/test/tests/SQLiteNodeTest.cpp +++ b/test/tests/SQLiteNodeTest.cpp @@ -28,7 +28,6 @@ class TestServer : public SQLiteServer { virtual void notifyStateChangeToPlugins(SQLite& db, SQLiteNodeState newState) {} virtual void blockCommandPort(const string& reason) { }; virtual void unblockCommandPort(const string& reason) { }; - virtual bool isCommandPortClosed(const string& reason) { return false; }; }; struct SQLiteNodeTest : tpunit::TestFixture { From 0434c8b14f76a4fa584f090080f82b0335aee405 Mon Sep 17 00:00:00 2001 From: danieldoglas Date: Mon, 23 Dec 2024 19:32:28 +0000 Subject: [PATCH 105/127] addressing comment --- sqlitecluster/SQLiteNode.cpp | 14 +++++++++++++- sqlitecluster/SQLiteNode.h | 4 +++- 2 files changed, 16 insertions(+), 2 deletions(-) diff --git a/sqlitecluster/SQLiteNode.cpp b/sqlitecluster/SQLiteNode.cpp index 21a958594..2c184c523 100644 --- a/sqlitecluster/SQLiteNode.cpp +++ b/sqlitecluster/SQLiteNode.cpp @@ -1278,7 +1278,19 @@ void SQLiteNode::_onMESSAGE(SQLitePeer* peer, const SData& message) { peer->commandAddress = message["commandAddress"]; } peer->setCommit(message.calcU64("CommitCount"), message["Hash"]); - + + // We check the commit difference with 12,500 commits behind because that + // represents ~30s of commits. If we're behind, let's close the command port + // so we can catch up with the cluster before processing new commands. + const int64_t currentCommitDifference = message.calcU64("NewCount") - getCommitCount(); + const string blockReason = "COMMITS_LAGGING_BEHIND"; + if (currentCommitDifference > 12'500 && !_blockedCommandPort) { + SINFO("Node is lagging behind, closing command port so it can catch up."); + _server.blockCommandPort(blockReason); + } else if (currentCommitDifference < 10'000 && _blockedCommandPort) { + SINFO("Node is caught up enough, unblocking command port."); + _server.unblockCommandPort(blockReason); + } // Classify and process the message if (SIEquals(message.methodLine, "LOGIN")) { // LOGIN: This is the first message sent to and received from a new peer. It communicates the current state of diff --git a/sqlitecluster/SQLiteNode.h b/sqlitecluster/SQLiteNode.h index dbdd17879..793b8c287 100644 --- a/sqlitecluster/SQLiteNode.h +++ b/sqlitecluster/SQLiteNode.h @@ -367,7 +367,9 @@ class SQLiteNode : public STCPManager { // Our current State. atomic _state; - atomic _blockedCommandPort{false}; + // Keeps track if we have closed the command port for commits fallen behind. + bool _blockedCommandPort{false}; + // This is an integer that increments every time we change states. This is useful for responses to state changes // (i.e., approving standup) to verify that the messages we're receiving are relevant to the current state change, // and not stale responses to old changes. From 6f5596f37cb172620918e8a828b8924ad4da9893 Mon Sep 17 00:00:00 2001 From: danieldoglas Date: Mon, 23 Dec 2024 20:56:42 +0000 Subject: [PATCH 106/127] last change --- sqlitecluster/SQLiteNode.cpp | 6 +++--- 1 file changed, 3 insertions(+), 3 deletions(-) diff --git a/sqlitecluster/SQLiteNode.cpp b/sqlitecluster/SQLiteNode.cpp index 2c184c523..d40372b92 100644 --- a/sqlitecluster/SQLiteNode.cpp +++ b/sqlitecluster/SQLiteNode.cpp @@ -1282,12 +1282,12 @@ void SQLiteNode::_onMESSAGE(SQLitePeer* peer, const SData& message) { // We check the commit difference with 12,500 commits behind because that // represents ~30s of commits. If we're behind, let's close the command port // so we can catch up with the cluster before processing new commands. - const int64_t currentCommitDifference = message.calcU64("NewCount") - getCommitCount(); const string blockReason = "COMMITS_LAGGING_BEHIND"; - if (currentCommitDifference > 12'500 && !_blockedCommandPort) { + const int64_t currentCommitDifference = getCommitCount() - peer->commitCount; + if (peer == _leadPeer && currentCommitDifference >= 12'500 && !_blockedCommandPort) { SINFO("Node is lagging behind, closing command port so it can catch up."); _server.blockCommandPort(blockReason); - } else if (currentCommitDifference < 10'000 && _blockedCommandPort) { + } else if (currentCommitDifference < 1'000 && _blockedCommandPort) { SINFO("Node is caught up enough, unblocking command port."); _server.unblockCommandPort(blockReason); } From 2d023066835673dd411882602ef676b5fd90e65d Mon Sep 17 00:00:00 2001 From: Daniel Silva Date: Mon, 23 Dec 2024 18:57:44 -0300 Subject: [PATCH 107/127] Update SQLiteNode.cpp --- sqlitecluster/SQLiteNode.cpp | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/sqlitecluster/SQLiteNode.cpp b/sqlitecluster/SQLiteNode.cpp index d40372b92..74339d29f 100644 --- a/sqlitecluster/SQLiteNode.cpp +++ b/sqlitecluster/SQLiteNode.cpp @@ -1283,7 +1283,7 @@ void SQLiteNode::_onMESSAGE(SQLitePeer* peer, const SData& message) { // represents ~30s of commits. If we're behind, let's close the command port // so we can catch up with the cluster before processing new commands. const string blockReason = "COMMITS_LAGGING_BEHIND"; - const int64_t currentCommitDifference = getCommitCount() - peer->commitCount; + const int64_t currentCommitDifference = peer->commitCount - getCommitCount(); if (peer == _leadPeer && currentCommitDifference >= 12'500 && !_blockedCommandPort) { SINFO("Node is lagging behind, closing command port so it can catch up."); _server.blockCommandPort(blockReason); From bd8d47076c02c501bc789c9f5284577d50a20ed7 Mon Sep 17 00:00:00 2001 From: Daniel Silva Date: Mon, 23 Dec 2024 18:59:15 -0300 Subject: [PATCH 108/127] Update SQLiteNode.cpp --- sqlitecluster/SQLiteNode.cpp | 2 ++ 1 file changed, 2 insertions(+) diff --git a/sqlitecluster/SQLiteNode.cpp b/sqlitecluster/SQLiteNode.cpp index 74339d29f..d22f55ea8 100644 --- a/sqlitecluster/SQLiteNode.cpp +++ b/sqlitecluster/SQLiteNode.cpp @@ -1287,9 +1287,11 @@ void SQLiteNode::_onMESSAGE(SQLitePeer* peer, const SData& message) { if (peer == _leadPeer && currentCommitDifference >= 12'500 && !_blockedCommandPort) { SINFO("Node is lagging behind, closing command port so it can catch up."); _server.blockCommandPort(blockReason); + _blockedCommandPort = true; } else if (currentCommitDifference < 1'000 && _blockedCommandPort) { SINFO("Node is caught up enough, unblocking command port."); _server.unblockCommandPort(blockReason); + _blockedCommandPort = false; } // Classify and process the message if (SIEquals(message.methodLine, "LOGIN")) { From 5ba847ec91b0eb8f012a8374a26d8db3eeab5b07 Mon Sep 17 00:00:00 2001 From: danieldoglas Date: Tue, 24 Dec 2024 15:46:35 +0000 Subject: [PATCH 109/127] adding logic to skip leader --- sqlitecluster/SQLiteNode.cpp | 14 ++++++++++++++ 1 file changed, 14 insertions(+) diff --git a/sqlitecluster/SQLiteNode.cpp b/sqlitecluster/SQLiteNode.cpp index d40372b92..c62e3ed7d 100644 --- a/sqlitecluster/SQLiteNode.cpp +++ b/sqlitecluster/SQLiteNode.cpp @@ -2197,6 +2197,7 @@ void SQLiteNode::_updateSyncPeer() { SQLitePeer* newSyncPeer = nullptr; uint64_t commitCount = _db.getCommitCount(); + bool isLeaderValidPeer = false; for (auto peer : _peerList) { // If either of these conditions are true, then we can't use this peer. if (!peer->loggedIn || peer->commitCount <= commitCount) { @@ -2208,6 +2209,13 @@ void SQLiteNode::_updateSyncPeer() continue; } + // We want to sync, if possible, from a peer that is not the leader. So at this point, skip choosing it + // as the newSyncPeer. + if (peer == _leadPeer) { + isLeaderValidPeer = true; + continue; + } + // Any peer that makes it to here is a usable peer, so it's by default better than nothing. if (!newSyncPeer) { newSyncPeer = peer; @@ -2230,6 +2238,12 @@ void SQLiteNode::_updateSyncPeer() } } + // If we reached this point, it means that there are no other available peers to sync from, but leader + // was a valid choice. In this case, let's use it as the newSyncPeer. + if (!newSyncPeer && isLeaderValidPeer) { + newSyncPeer = _leadPeer; + } + // Log that we've changed peers. if (_syncPeer != newSyncPeer) { string from, to; From ff4dacd864cedffd2d3efe38ddd531262b4b6fe2 Mon Sep 17 00:00:00 2001 From: Daniel Silva Date: Tue, 24 Dec 2024 16:05:26 -0300 Subject: [PATCH 110/127] Update sqlitecluster/SQLiteNode.cpp Co-authored-by: Carlos Alvarez --- sqlitecluster/SQLiteNode.cpp | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/sqlitecluster/SQLiteNode.cpp b/sqlitecluster/SQLiteNode.cpp index d22f55ea8..b86d51422 100644 --- a/sqlitecluster/SQLiteNode.cpp +++ b/sqlitecluster/SQLiteNode.cpp @@ -1285,7 +1285,7 @@ void SQLiteNode::_onMESSAGE(SQLitePeer* peer, const SData& message) { const string blockReason = "COMMITS_LAGGING_BEHIND"; const int64_t currentCommitDifference = peer->commitCount - getCommitCount(); if (peer == _leadPeer && currentCommitDifference >= 12'500 && !_blockedCommandPort) { - SINFO("Node is lagging behind, closing command port so it can catch up."); + SINFO("Node is behind by " + SToStr(currentCommitDifference) + " commits, closing command port so it can catch up."); _server.blockCommandPort(blockReason); _blockedCommandPort = true; } else if (currentCommitDifference < 1'000 && _blockedCommandPort) { From 2d25cab4aec6fca76dc38bae251f723306c3e543 Mon Sep 17 00:00:00 2001 From: Daniel Silva Date: Tue, 24 Dec 2024 16:06:56 -0300 Subject: [PATCH 111/127] Update sqlitecluster/SQLiteNode.cpp Co-authored-by: Carlos Alvarez --- sqlitecluster/SQLiteNode.cpp | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/sqlitecluster/SQLiteNode.cpp b/sqlitecluster/SQLiteNode.cpp index b86d51422..a5ce3992e 100644 --- a/sqlitecluster/SQLiteNode.cpp +++ b/sqlitecluster/SQLiteNode.cpp @@ -1289,7 +1289,7 @@ void SQLiteNode::_onMESSAGE(SQLitePeer* peer, const SData& message) { _server.blockCommandPort(blockReason); _blockedCommandPort = true; } else if (currentCommitDifference < 1'000 && _blockedCommandPort) { - SINFO("Node is caught up enough, unblocking command port."); + SINFO("Node is caught up enough (behind by " + SToStr(currentCommitDifference) + " commits), re-opening command port."); _server.unblockCommandPort(blockReason); _blockedCommandPort = false; } From a47789745b3a4cb5f06c63be79e3c85f00520c3c Mon Sep 17 00:00:00 2001 From: danieldoglas Date: Tue, 24 Dec 2024 19:10:29 +0000 Subject: [PATCH 112/127] addressing comments --- sqlitecluster/SQLiteNode.cpp | 2 ++ 1 file changed, 2 insertions(+) diff --git a/sqlitecluster/SQLiteNode.cpp b/sqlitecluster/SQLiteNode.cpp index a5ce3992e..c8e27e19a 100644 --- a/sqlitecluster/SQLiteNode.cpp +++ b/sqlitecluster/SQLiteNode.cpp @@ -1289,6 +1289,8 @@ void SQLiteNode::_onMESSAGE(SQLitePeer* peer, const SData& message) { _server.blockCommandPort(blockReason); _blockedCommandPort = true; } else if (currentCommitDifference < 1'000 && _blockedCommandPort) { + // We'll open the command port again if we're 1k commits behind, which + // translates to ~2s of commits. SINFO("Node is caught up enough (behind by " + SToStr(currentCommitDifference) + " commits), re-opening command port."); _server.unblockCommandPort(blockReason); _blockedCommandPort = false; From 9da7bf279f58d91ee36a46e26cbd2524f9cb000f Mon Sep 17 00:00:00 2001 From: danieldoglas Date: Tue, 24 Dec 2024 19:11:18 +0000 Subject: [PATCH 113/127] changing variable name --- sqlitecluster/SQLiteNode.cpp | 8 ++++---- sqlitecluster/SQLiteNode.h | 2 +- 2 files changed, 5 insertions(+), 5 deletions(-) diff --git a/sqlitecluster/SQLiteNode.cpp b/sqlitecluster/SQLiteNode.cpp index c8e27e19a..4db1beca1 100644 --- a/sqlitecluster/SQLiteNode.cpp +++ b/sqlitecluster/SQLiteNode.cpp @@ -1284,16 +1284,16 @@ void SQLiteNode::_onMESSAGE(SQLitePeer* peer, const SData& message) { // so we can catch up with the cluster before processing new commands. const string blockReason = "COMMITS_LAGGING_BEHIND"; const int64_t currentCommitDifference = peer->commitCount - getCommitCount(); - if (peer == _leadPeer && currentCommitDifference >= 12'500 && !_blockedCommandPort) { + if (peer == _leadPeer && currentCommitDifference >= 12'500 && !_blockedCommandPortForBeingBehind) { SINFO("Node is behind by " + SToStr(currentCommitDifference) + " commits, closing command port so it can catch up."); _server.blockCommandPort(blockReason); - _blockedCommandPort = true; - } else if (currentCommitDifference < 1'000 && _blockedCommandPort) { + _blockedCommandPortForBeingBehind = true; + } else if (currentCommitDifference < 1'000 && _blockedCommandPortForBeingBehind) { // We'll open the command port again if we're 1k commits behind, which // translates to ~2s of commits. SINFO("Node is caught up enough (behind by " + SToStr(currentCommitDifference) + " commits), re-opening command port."); _server.unblockCommandPort(blockReason); - _blockedCommandPort = false; + _blockedCommandPortForBeingBehind = false; } // Classify and process the message if (SIEquals(message.methodLine, "LOGIN")) { diff --git a/sqlitecluster/SQLiteNode.h b/sqlitecluster/SQLiteNode.h index 793b8c287..6377c9511 100644 --- a/sqlitecluster/SQLiteNode.h +++ b/sqlitecluster/SQLiteNode.h @@ -368,7 +368,7 @@ class SQLiteNode : public STCPManager { atomic _state; // Keeps track if we have closed the command port for commits fallen behind. - bool _blockedCommandPort{false}; + bool _blockedCommandPortForBeingBehind{false}; // This is an integer that increments every time we change states. This is useful for responses to state changes // (i.e., approving standup) to verify that the messages we're receiving are relevant to the current state change, From a92f259b6d4f5bfd2b103b8731819cf791082731 Mon Sep 17 00:00:00 2001 From: Daniel Silva Date: Tue, 31 Dec 2024 01:08:25 -0300 Subject: [PATCH 114/127] addressing comments --- sqlitecluster/SQLiteNode.cpp | 4 +--- 1 file changed, 1 insertion(+), 3 deletions(-) diff --git a/sqlitecluster/SQLiteNode.cpp b/sqlitecluster/SQLiteNode.cpp index 465972743..a87e25f96 100644 --- a/sqlitecluster/SQLiteNode.cpp +++ b/sqlitecluster/SQLiteNode.cpp @@ -2201,7 +2201,6 @@ void SQLiteNode::_updateSyncPeer() { SQLitePeer* newSyncPeer = nullptr; uint64_t commitCount = _db.getCommitCount(); - bool isLeaderValidPeer = false; for (auto peer : _peerList) { // If either of these conditions are true, then we can't use this peer. if (!peer->loggedIn || peer->commitCount <= commitCount) { @@ -2216,7 +2215,6 @@ void SQLiteNode::_updateSyncPeer() // We want to sync, if possible, from a peer that is not the leader. So at this point, skip choosing it // as the newSyncPeer. if (peer == _leadPeer) { - isLeaderValidPeer = true; continue; } @@ -2244,7 +2242,7 @@ void SQLiteNode::_updateSyncPeer() // If we reached this point, it means that there are no other available peers to sync from, but leader // was a valid choice. In this case, let's use it as the newSyncPeer. - if (!newSyncPeer && isLeaderValidPeer) { + if (!newSyncPeer && _leadPeer) { newSyncPeer = _leadPeer; } From f855ce027f4435db35294966f3fe8515a1ff05d5 Mon Sep 17 00:00:00 2001 From: Andrew Rosiclair Date: Fri, 3 Jan 2025 16:36:43 -0500 Subject: [PATCH 115/127] add log params --- libstuff/SLog.cpp | 2 ++ 1 file changed, 2 insertions(+) diff --git a/libstuff/SLog.cpp b/libstuff/SLog.cpp index ff94e5a9b..2e4f662a5 100644 --- a/libstuff/SLog.cpp +++ b/libstuff/SLog.cpp @@ -60,6 +60,8 @@ static set PARAMS_WHITELIST = { "approver", "approvers", "employees", + "mergeFromEmail", + "mergeToEmail", }; string addLogParams(string&& message, const STable& params) { From e9ddb19f864465106248d37eeae4feff3a1a5205 Mon Sep 17 00:00:00 2001 From: Tyler Karaszewski Date: Mon, 6 Jan 2025 11:19:36 -0800 Subject: [PATCH 116/127] Only create one connection for readDB, allow subclass to override --- libstuff/sqlite3.c | 28267 +++++++++++++++++++--- libstuff/sqlite3.h | 67 +- test/clustertest/BedrockClusterTester.h | 4 +- test/lib/BedrockTester.cpp | 4 +- test/lib/BedrockTester.h | 2 +- 5 files changed, 25307 insertions(+), 3037 deletions(-) diff --git a/libstuff/sqlite3.c b/libstuff/sqlite3.c index 33034cd19..64b8eca84 100644 --- a/libstuff/sqlite3.c +++ b/libstuff/sqlite3.c @@ -1,6 +1,6 @@ /****************************************************************************** ** This file is an amalgamation of many separate C source files from SQLite -** version 3.47.0. By combining all the individual C code files into this +** version 3.48.0. By combining all the individual C code files into this ** single large file, the entire code can be compiled as a single translation ** unit. This allows many compilers to do optimizations that would not be ** possible if the files were compiled separately. Performance improvements @@ -18,8 +18,11 @@ ** separate file. This file contains only code for the core SQLite library. ** ** The content in this amalgamation comes from Fossil check-in -** b40cd7395c44b1f2d019d8e809e03de0e083. +** ed829bf2b069a48c644ae5706399dad7486e with changes in files: +** +** */ +#ifndef SQLITE_AMALGAMATION #define SQLITE_CORE 1 #define SQLITE_AMALGAMATION 1 #ifndef SQLITE_PRIVATE @@ -79,6 +82,17 @@ # define SQLITE_TCLAPI #endif +#define SQLITE_ENABLE_HCT 1 +#ifdef SQLITE_ENABLE_HCT +# define SQLITE_OMIT_SHARED_CACHE 1 +# define SQLITE_ENABLE_PREUPDATE_HOOK 1 +#endif + +#ifndef SQLITE_DISABLE_PAGECACHE_OVERFLOW_STATS +# define SQLITE_DISABLE_PAGECACHE_OVERFLOW_STATS 1 +#endif +#define SQLITE_SHARED_MAPPING 1 + /* ** Include the header file used to customize the compiler options for MSVC. ** This should be done first so that it can successfully prevent spurious @@ -463,9 +477,9 @@ extern "C" { ** [sqlite3_libversion_number()], [sqlite3_sourceid()], ** [sqlite_version()] and [sqlite_source_id()]. */ -#define SQLITE_VERSION "3.47.0" -#define SQLITE_VERSION_NUMBER 3047000 -#define SQLITE_SOURCE_ID "2024-12-20 19:37:41 b40cd7395c44b1f2d019d8e809e03de0e083c93693322a72ddb250a85640528f" +#define SQLITE_VERSION "3.48.0" +#define SQLITE_VERSION_NUMBER 3048000 +#define SQLITE_SOURCE_ID "2024-11-15 19:25:39 ed829bf2b069a48c644ae5706399dad7486e5abb87dc1225764038ac258ea4dc" /* ** CAPI3REF: Run-Time Library Version Numbers @@ -969,6 +983,13 @@ SQLITE_API int sqlite3_exec( ** filesystem supports doing multiple write operations atomically when those ** write operations are bracketed by [SQLITE_FCNTL_BEGIN_ATOMIC_WRITE] and ** [SQLITE_FCNTL_COMMIT_ATOMIC_WRITE]. +** +** The SQLITE_IOCAP_SUBPAGE_READ property means that it is ok to read +** from the database file in amounts that are not a multiple of the +** page size and that do not begin at a page boundary. Without this +** property, SQLite is careful to only do full-page reads and write +** on aligned pages, with the one exception that it will do a sub-page +** read of the first page to access the database header. */ #define SQLITE_IOCAP_ATOMIC 0x00000001 #define SQLITE_IOCAP_ATOMIC512 0x00000002 @@ -985,6 +1006,7 @@ SQLITE_API int sqlite3_exec( #define SQLITE_IOCAP_POWERSAFE_OVERWRITE 0x00001000 #define SQLITE_IOCAP_IMMUTABLE 0x00002000 #define SQLITE_IOCAP_BATCH_ATOMIC 0x00004000 +#define SQLITE_IOCAP_SUBPAGE_READ 0x00008000 /* ** CAPI3REF: File Locking Levels @@ -1131,6 +1153,7 @@ struct sqlite3_file { **
  • [SQLITE_IOCAP_POWERSAFE_OVERWRITE] **
  • [SQLITE_IOCAP_IMMUTABLE] **
  • [SQLITE_IOCAP_BATCH_ATOMIC] +**
  • [SQLITE_IOCAP_SUBPAGE_READ] ** ** ** The SQLITE_IOCAP_ATOMIC property means that all writes of @@ -1408,6 +1431,11 @@ struct sqlite3_io_methods { ** pointed to by the pArg argument. This capability is used during testing ** and only needs to be supported when SQLITE_TEST is defined. ** +**
  • [[SQLITE_FCNTL_NULL_IO]] +** The [SQLITE_FCNTL_NULL_IO] opcode sets the low-level file descriptor +** or file handle for the [sqlite3_file] object such that it will no longer +** read or write to the database file. +** **
  • [[SQLITE_FCNTL_WAL_BLOCK]] ** The [SQLITE_FCNTL_WAL_BLOCK] is a signal to the VFS layer that it might ** be advantageous to block on the next WAL lock if the lock is not immediately @@ -1561,6 +1589,7 @@ struct sqlite3_io_methods { #define SQLITE_FCNTL_EXTERNAL_READER 40 #define SQLITE_FCNTL_CKSM_FILE 41 #define SQLITE_FCNTL_RESET_CACHE 42 +#define SQLITE_FCNTL_NULL_IO 43 /* deprecated names */ #define SQLITE_GET_LOCKPROXYFILE SQLITE_FCNTL_GET_LOCKPROXYFILE @@ -2939,10 +2968,14 @@ SQLITE_API void sqlite3_set_last_insert_rowid(sqlite3*,sqlite3_int64); ** deleted by the most recently completed INSERT, UPDATE or DELETE ** statement on the database connection specified by the only parameter. ** The two functions are identical except for the type of the return value -** and that if the number of rows modified by the most recent INSERT, UPDATE +** and that if the number of rows modified by the most recent INSERT, UPDATE, ** or DELETE is greater than the maximum value supported by type "int", then ** the return value of sqlite3_changes() is undefined. ^Executing any other ** type of SQL statement does not modify the value returned by these functions. +** For the purposes of this interface, a CREATE TABLE AS SELECT statement +** does not count as an INSERT, UPDATE or DELETE statement and hence the rows +** added to the new table by the CREATE TABLE AS SELECT statement are not +** counted. ** ** ^Only changes made directly by the INSERT, UPDATE or DELETE statement are ** considered - auxiliary changes caused by [CREATE TRIGGER | triggers], @@ -4539,13 +4572,17 @@ SQLITE_API int sqlite3_limit(sqlite3*, int id, int newVal); ** and sqlite3_prepare16_v3() use UTF-16. ** ** ^If the nByte argument is negative, then zSql is read up to the -** first zero terminator. ^If nByte is positive, then it is the -** number of bytes read from zSql. ^If nByte is zero, then no prepared +** first zero terminator. ^If nByte is positive, then it is the maximum +** number of bytes read from zSql. When nByte is positive, zSql is read +** up to the first zero terminator or until the nByte bytes have been read, +** whichever comes first. ^If nByte is zero, then no prepared ** statement is generated. ** If the caller knows that the supplied string is nul-terminated, then ** there is a small performance advantage to passing an nByte parameter that ** is the number of bytes in the input string including ** the nul-terminator. +** Note that nByte measure the length of the input in bytes, not +** characters, even for the UTF-16 interfaces. ** ** ^If pzTail is not NULL then *pzTail is made to point to the first byte ** past the end of the first SQL statement in zSql. These routines only @@ -5916,7 +5953,7 @@ SQLITE_API int sqlite3_create_window_function( ** This flag instructs SQLite to omit some corner-case optimizations that ** might disrupt the operation of the [sqlite3_value_subtype()] function, ** causing it to return zero rather than the correct subtype(). -** SQL functions that invokes [sqlite3_value_subtype()] should have this +** All SQL functions that invoke [sqlite3_value_subtype()] should have this ** property. If the SQLITE_SUBTYPE property is omitted, then the return ** value from [sqlite3_value_subtype()] might sometimes be zero even though ** a non-zero subtype was specified by the function argument expression. @@ -8681,8 +8718,9 @@ SQLITE_API int sqlite3_test_control(int op, ...); #define SQLITE_TESTCTRL_TRACEFLAGS 31 #define SQLITE_TESTCTRL_TUNE 32 #define SQLITE_TESTCTRL_LOGEST 33 -#define SQLITE_TESTCTRL_USELONGDOUBLE 34 -#define SQLITE_TESTCTRL_LAST 34 /* Largest TESTCTRL */ +#define SQLITE_TESTCTRL_USELONGDOUBLE 34 /* NOT USED */ +#define SQLITE_TESTCTRL_HCT_MTCOMMIT 35 +#define SQLITE_TESTCTRL_LAST 35 /* Largest TESTCTRL */ /* ** CAPI3REF: SQL Keyword Checking @@ -9657,6 +9695,16 @@ typedef struct sqlite3_backup sqlite3_backup; ** APIs are not strictly speaking threadsafe. If they are invoked at the ** same time as another thread is invoking sqlite3_backup_step() it is ** possible that they return invalid values. +** +** Alternatives To Using The Backup API +** +** Other techniques for safely creating a consistent backup of an SQLite +** database include: +** +**
      +**
    • The [VACUUM INTO] command. +**
    • The [sqlite3_rsync] utility program. +**
    */ SQLITE_API sqlite3_backup *sqlite3_backup_init( sqlite3 *pDest, /* Destination database handle */ @@ -10856,6 +10904,14 @@ typedef struct sqlite3_snapshot { ** If there is not already a read-transaction open on schema S when ** this function is called, one is opened automatically. ** +** If a read-transaction is opened by this function, then it is guaranteed +** that the returned snapshot object may not be invalidated by a database +** writer or checkpointer until after the read-transaction is closed. This +** is not guaranteed if a read-transaction is already open when this +** function is called. In that case, any subsequent write or checkpoint +** operation on the database may invalidate the returned snapshot handle, +** even while the read-transaction remains open. +** ** The following must be true for this function to succeed. If any of ** the following statements are false when sqlite3_snapshot_get() is ** called, SQLITE_ERROR is returned. The final value of *P is undefined @@ -11278,6 +11334,9 @@ SQLITE_API int sqlite3_commit_status( # undef double #endif +SQLITE_API void sqlite3_hct_cas_failure(int nCASFailCnt, int nCASFailReset); +SQLITE_API void sqlite3_hct_proc_failure(int nProcFailCnt); + #if defined(__wasi__) # undef SQLITE_WASI # define SQLITE_WASI 1 @@ -11292,7 +11351,7 @@ SQLITE_API int sqlite3_commit_status( #if 0 } /* End of the 'extern "C"' block */ #endif -#endif /* SQLITE3_H */ +/* #endif for SQLITE3_H will be added by mksqlite3.tcl */ /******** Begin file sqlite3rtree.h *********/ /* @@ -13671,7 +13730,6 @@ struct Fts5ExtensionApi { ** Applications may also register custom tokenizer types. A tokenizer ** is registered by providing fts5 with a populated instance of the ** following structure. All structure methods must be defined, setting -** ** any member of the fts5_tokenizer struct to NULL leads to undefined ** behaviour. The structure methods are expected to function as follows: ** @@ -14015,6 +14073,7 @@ struct fts5_api { #endif /* _FTS5_H */ /******** End of fts5.h *********/ +#endif /* SQLITE3_H */ /************** End of sqlite3.h *********************************************/ /************** Continuing where we left off in sqliteInt.h ******************/ @@ -14060,6 +14119,7 @@ struct fts5_api { #ifndef SQLITE_MAX_LENGTH # define SQLITE_MAX_LENGTH 1000000000 #endif +#define SQLITE_MIN_LENGTH 30 /* Minimum value for the length limit */ /* ** This is the maximum number of @@ -14125,13 +14185,9 @@ struct fts5_api { /* ** The maximum number of arguments to an SQL function. -** -** This value has a hard upper limit of 32767 due to storage -** constraints (it needs to fit inside a i16). We keep it -** lower than that to prevent abuse. */ #ifndef SQLITE_MAX_FUNCTION_ARG -# define SQLITE_MAX_FUNCTION_ARG 1000 +# define SQLITE_MAX_FUNCTION_ARG 127 #endif /* @@ -14418,7 +14474,7 @@ struct fts5_api { ** which case memory allocation statistics are disabled by default. */ #if !defined(SQLITE_DEFAULT_MEMSTATUS) -# define SQLITE_DEFAULT_MEMSTATUS 1 +# define SQLITE_DEFAULT_MEMSTATUS 0 #endif /* @@ -14831,122 +14887,122 @@ SQLITE_PRIVATE void sqlite3HashClear(Hash*); #define TK_GE 59 #define TK_ESCAPE 60 #define TK_COLUMNKW 61 -#define TK_DO 62 -#define TK_FOR 63 -#define TK_IGNORE 64 -#define TK_INITIALLY 65 -#define TK_INSTEAD 66 -#define TK_NO 67 -#define TK_KEY 68 -#define TK_OF 69 -#define TK_OFFSET 70 -#define TK_PRAGMA 71 -#define TK_RAISE 72 -#define TK_RECURSIVE 73 -#define TK_REPLACE 74 -#define TK_RESTRICT 75 -#define TK_ROW 76 -#define TK_ROWS 77 -#define TK_TRIGGER 78 -#define TK_VACUUM 79 -#define TK_VIEW 80 -#define TK_VIRTUAL 81 -#define TK_WITH 82 -#define TK_NULLS 83 -#define TK_FIRST 84 -#define TK_LAST 85 -#define TK_CURRENT 86 -#define TK_FOLLOWING 87 -#define TK_PARTITION 88 -#define TK_PRECEDING 89 -#define TK_RANGE 90 -#define TK_UNBOUNDED 91 -#define TK_EXCLUDE 92 -#define TK_GROUPS 93 -#define TK_OTHERS 94 -#define TK_TIES 95 -#define TK_GENERATED 96 -#define TK_ALWAYS 97 -#define TK_MATERIALIZED 98 -#define TK_REINDEX 99 -#define TK_RENAME 100 -#define TK_CTIME_KW 101 -#define TK_ANY 102 -#define TK_BITAND 103 -#define TK_BITOR 104 -#define TK_LSHIFT 105 -#define TK_RSHIFT 106 -#define TK_PLUS 107 -#define TK_MINUS 108 -#define TK_STAR 109 -#define TK_SLASH 110 -#define TK_REM 111 -#define TK_CONCAT 112 -#define TK_PTR 113 -#define TK_COLLATE 114 -#define TK_BITNOT 115 -#define TK_ON 116 -#define TK_INDEXED 117 -#define TK_STRING 118 -#define TK_JOIN_KW 119 -#define TK_CONSTRAINT 120 -#define TK_DEFAULT 121 -#define TK_NULL 122 -#define TK_PRIMARY 123 -#define TK_UNIQUE 124 -#define TK_CHECK 125 -#define TK_REFERENCES 126 -#define TK_AUTOINCR 127 -#define TK_INSERT 128 -#define TK_DELETE 129 -#define TK_UPDATE 130 -#define TK_SET 131 -#define TK_DEFERRABLE 132 -#define TK_FOREIGN 133 -#define TK_DROP 134 -#define TK_UNION 135 -#define TK_ALL 136 -#define TK_EXCEPT 137 -#define TK_INTERSECT 138 -#define TK_SELECT 139 -#define TK_VALUES 140 -#define TK_DISTINCT 141 -#define TK_DOT 142 -#define TK_FROM 143 -#define TK_JOIN 144 -#define TK_USING 145 -#define TK_ORDER 146 -#define TK_GROUP 147 -#define TK_HAVING 148 -#define TK_LIMIT 149 -#define TK_WHERE 150 -#define TK_RETURNING 151 -#define TK_INTO 152 -#define TK_NOTHING 153 -#define TK_FLOAT 154 -#define TK_BLOB 155 -#define TK_INTEGER 156 -#define TK_VARIABLE 157 -#define TK_CASE 158 -#define TK_WHEN 159 -#define TK_THEN 160 -#define TK_ELSE 161 -#define TK_INDEX 162 -#define TK_ALTER 163 -#define TK_ADD 164 -#define TK_WINDOW 165 -#define TK_OVER 166 -#define TK_FILTER 167 -#define TK_COLUMN 168 -#define TK_AGG_FUNCTION 169 -#define TK_AGG_COLUMN 170 -#define TK_TRUEFALSE 171 -#define TK_FUNCTION 172 -#define TK_UPLUS 173 -#define TK_UMINUS 174 -#define TK_TRUTH 175 -#define TK_REGISTER 176 -#define TK_CONCURRENT 177 +#define TK_CONCURRENT 62 +#define TK_DO 63 +#define TK_FOR 64 +#define TK_IGNORE 65 +#define TK_INITIALLY 66 +#define TK_INSTEAD 67 +#define TK_NO 68 +#define TK_KEY 69 +#define TK_OF 70 +#define TK_OFFSET 71 +#define TK_PRAGMA 72 +#define TK_RAISE 73 +#define TK_RECURSIVE 74 +#define TK_REPLACE 75 +#define TK_RESTRICT 76 +#define TK_ROW 77 +#define TK_ROWS 78 +#define TK_TRIGGER 79 +#define TK_VACUUM 80 +#define TK_VIEW 81 +#define TK_VIRTUAL 82 +#define TK_WITH 83 +#define TK_NULLS 84 +#define TK_FIRST 85 +#define TK_LAST 86 +#define TK_CURRENT 87 +#define TK_FOLLOWING 88 +#define TK_PARTITION 89 +#define TK_PRECEDING 90 +#define TK_RANGE 91 +#define TK_UNBOUNDED 92 +#define TK_EXCLUDE 93 +#define TK_GROUPS 94 +#define TK_OTHERS 95 +#define TK_TIES 96 +#define TK_GENERATED 97 +#define TK_ALWAYS 98 +#define TK_MATERIALIZED 99 +#define TK_REINDEX 100 +#define TK_RENAME 101 +#define TK_CTIME_KW 102 +#define TK_ANY 103 +#define TK_BITAND 104 +#define TK_BITOR 105 +#define TK_LSHIFT 106 +#define TK_RSHIFT 107 +#define TK_PLUS 108 +#define TK_MINUS 109 +#define TK_STAR 110 +#define TK_SLASH 111 +#define TK_REM 112 +#define TK_CONCAT 113 +#define TK_PTR 114 +#define TK_COLLATE 115 +#define TK_BITNOT 116 +#define TK_ON 117 +#define TK_INDEXED 118 +#define TK_STRING 119 +#define TK_JOIN_KW 120 +#define TK_CONSTRAINT 121 +#define TK_DEFAULT 122 +#define TK_NULL 123 +#define TK_PRIMARY 124 +#define TK_UNIQUE 125 +#define TK_CHECK 126 +#define TK_REFERENCES 127 +#define TK_AUTOINCR 128 +#define TK_INSERT 129 +#define TK_DELETE 130 +#define TK_UPDATE 131 +#define TK_SET 132 +#define TK_DEFERRABLE 133 +#define TK_FOREIGN 134 +#define TK_DROP 135 +#define TK_UNION 136 +#define TK_ALL 137 +#define TK_EXCEPT 138 +#define TK_INTERSECT 139 +#define TK_SELECT 140 +#define TK_VALUES 141 +#define TK_DISTINCT 142 +#define TK_DOT 143 +#define TK_FROM 144 +#define TK_JOIN 145 +#define TK_USING 146 +#define TK_ORDER 147 +#define TK_GROUP 148 +#define TK_HAVING 149 +#define TK_LIMIT 150 +#define TK_WHERE 151 +#define TK_RETURNING 152 +#define TK_INTO 153 +#define TK_NOTHING 154 +#define TK_FLOAT 155 +#define TK_BLOB 156 +#define TK_INTEGER 157 +#define TK_VARIABLE 158 +#define TK_CASE 159 +#define TK_WHEN 160 +#define TK_THEN 161 +#define TK_ELSE 162 +#define TK_INDEX 163 +#define TK_ALTER 164 +#define TK_ADD 165 +#define TK_WINDOW 166 +#define TK_OVER 167 +#define TK_FILTER 168 +#define TK_COLUMN 169 +#define TK_AGG_FUNCTION 170 +#define TK_AGG_COLUMN 171 +#define TK_TRUEFALSE 172 +#define TK_FUNCTION 173 +#define TK_UPLUS 174 +#define TK_UMINUS 175 +#define TK_TRUTH 176 +#define TK_REGISTER 177 #define TK_VECTOR 178 #define TK_SELECT_COLUMN 179 #define TK_IF_NULL_ROW 180 @@ -14964,6 +15020,7 @@ SQLITE_PRIVATE void sqlite3HashClear(Hash*); #include #include #include +#include /* ** Use a macro to replace memcpy() if compiled with SQLITE_INLINE_MEMCPY. @@ -14986,7 +15043,6 @@ SQLITE_PRIVATE void sqlite3HashClear(Hash*); # define float sqlite_int64 # define fabs(X) ((X)<0?-(X):(X)) # define sqlite3IsOverflow(X) 0 -# define LONGDOUBLE_TYPE sqlite_int64 # ifndef SQLITE_BIG_DBL # define SQLITE_BIG_DBL (((sqlite3_int64)1)<<50) # endif @@ -15161,9 +15217,6 @@ SQLITE_PRIVATE void sqlite3HashClear(Hash*); # define INT8_TYPE signed char # endif #endif -#ifndef LONGDOUBLE_TYPE -# define LONGDOUBLE_TYPE long double -#endif typedef sqlite_int64 i64; /* 8-byte signed integer */ typedef sqlite_uint64 u64; /* 8-byte unsigned integer */ typedef UINT32_TYPE u32; /* 4-byte unsigned integer */ @@ -16335,8 +16388,6 @@ SQLITE_PRIVATE void sqlite3PagerRefdump(Pager*); SQLITE_PRIVATE int sqlite3PagerWalSystemErrno(Pager*); #endif -SQLITE_PRIVATE void sqlite3PagerSetCommitTime(Pager *pPager, u64 *aCommitTime); - #endif /* SQLITE_PAGER_H */ /************** End of pager.h ***********************************************/ @@ -16378,6 +16429,9 @@ SQLITE_PRIVATE void sqlite3PagerSetCommitTime(Pager *pPager, u64 *aCommitTime); #define BTREE_AUTOVACUUM_FULL 1 /* Do full auto-vacuum */ #define BTREE_AUTOVACUUM_INCR 2 /* Incremental vacuum */ +typedef struct BtCursorMethods BtCursorMethods; +typedef struct BtreeMethods BtreeMethods; + /* ** Forward declarations of structure */ @@ -16585,6 +16639,9 @@ SQLITE_PRIVATE int sqlite3BtreeCursor( ); SQLITE_PRIVATE BtCursor *sqlite3BtreeFakeValidCursor(void); SQLITE_PRIVATE int sqlite3BtreeCursorSize(void); +#ifdef SQLITE_DEBUG +SQLITE_PRIVATE int sqlite3BtreeClosesWithCursor(Btree*,BtCursor*); +#endif SQLITE_PRIVATE void sqlite3BtreeCursorZero(BtCursor*); SQLITE_PRIVATE void sqlite3BtreeCursorHintFlags(BtCursor*, unsigned); #ifdef SQLITE_ENABLE_CURSOR_HINTS @@ -16763,6 +16820,21 @@ SQLITE_PRIVATE int sqlite3SchemaMutexHeld(sqlite3*,int,Schema*); # define sqlite3BtreeHoldsAllMutexes(X) 1 # define sqlite3SchemaMutexHeld(X,Y,Z) 1 #endif +#define BTREE_DIR_NONE 0 +#define BTREE_DIR_FORWARD 1 +#define BTREE_DIR_REVERSE 2 + +#ifdef SQLITE_ENABLE_HCT +SQLITE_PRIVATE void sqlite3BtreeCursorDir(BtCursor*, int eDir); +SQLITE_PRIVATE int sqlite3HctVtabInit(sqlite3*); +SQLITE_PRIVATE int sqlite3BtreeSchemaLoaded(Btree *pBt); +#else +# define sqlite3BtreeCursorDir(a,b) +# define sqlite3BtreeSchemaLoaded(x) SQLITE_OK +#endif + +SQLITE_PRIVATE int sqlite3BtreePragma(Btree *pBtree, char **aFnctl); +SQLITE_PRIVATE int sqlite3BtreeIdxDelete(BtCursor*, UnpackedRecord*); #endif /* SQLITE_BTREE_H */ @@ -17058,23 +17130,23 @@ typedef struct VdbeOpList VdbeOpList; #define OP_SetCookie 100 #define OP_ReopenIdx 101 /* synopsis: root=P2 iDb=P3 */ #define OP_OpenRead 102 /* synopsis: root=P2 iDb=P3 */ -#define OP_BitAnd 103 /* same as TK_BITAND, synopsis: r[P3]=r[P1]&r[P2] */ -#define OP_BitOr 104 /* same as TK_BITOR, synopsis: r[P3]=r[P1]|r[P2] */ -#define OP_ShiftLeft 105 /* same as TK_LSHIFT, synopsis: r[P3]=r[P2]<>r[P1] */ -#define OP_Add 107 /* same as TK_PLUS, synopsis: r[P3]=r[P1]+r[P2] */ -#define OP_Subtract 108 /* same as TK_MINUS, synopsis: r[P3]=r[P2]-r[P1] */ -#define OP_Multiply 109 /* same as TK_STAR, synopsis: r[P3]=r[P1]*r[P2] */ -#define OP_Divide 110 /* same as TK_SLASH, synopsis: r[P3]=r[P2]/r[P1] */ -#define OP_Remainder 111 /* same as TK_REM, synopsis: r[P3]=r[P2]%r[P1] */ -#define OP_Concat 112 /* same as TK_CONCAT, synopsis: r[P3]=r[P2]+r[P1] */ -#define OP_OpenWrite 113 /* synopsis: root=P2 iDb=P3 */ +#define OP_OpenWrite 103 /* synopsis: root=P2 iDb=P3 */ +#define OP_BitAnd 104 /* same as TK_BITAND, synopsis: r[P3]=r[P1]&r[P2] */ +#define OP_BitOr 105 /* same as TK_BITOR, synopsis: r[P3]=r[P1]|r[P2] */ +#define OP_ShiftLeft 106 /* same as TK_LSHIFT, synopsis: r[P3]=r[P2]<>r[P1] */ +#define OP_Add 108 /* same as TK_PLUS, synopsis: r[P3]=r[P1]+r[P2] */ +#define OP_Subtract 109 /* same as TK_MINUS, synopsis: r[P3]=r[P2]-r[P1] */ +#define OP_Multiply 110 /* same as TK_STAR, synopsis: r[P3]=r[P1]*r[P2] */ +#define OP_Divide 111 /* same as TK_SLASH, synopsis: r[P3]=r[P2]/r[P1] */ +#define OP_Remainder 112 /* same as TK_REM, synopsis: r[P3]=r[P2]%r[P1] */ +#define OP_Concat 113 /* same as TK_CONCAT, synopsis: r[P3]=r[P2]+r[P1] */ #define OP_OpenDup 114 -#define OP_BitNot 115 /* same as TK_BITNOT, synopsis: r[P2]= ~r[P1] */ -#define OP_OpenAutoindex 116 /* synopsis: nColumn=P2 */ +#define OP_OpenAutoindex 115 /* synopsis: nColumn=P2 */ +#define OP_BitNot 116 /* same as TK_BITNOT, synopsis: r[P2]= ~r[P1] */ #define OP_OpenEphemeral 117 /* synopsis: nColumn=P2 */ -#define OP_String8 118 /* same as TK_STRING, synopsis: r[P2]='P4' */ -#define OP_SorterOpen 119 +#define OP_SorterOpen 118 +#define OP_String8 119 /* same as TK_STRING, synopsis: r[P2]='P4' */ #define OP_SequenceTest 120 /* synopsis: if( cursor[P1].ctr++ ) pc = P2 */ #define OP_OpenPseudo 121 /* synopsis: P3 columns in r[P2] */ #define OP_Close 122 @@ -17109,8 +17181,8 @@ typedef struct VdbeOpList VdbeOpList; #define OP_DropTable 151 #define OP_DropIndex 152 #define OP_DropTrigger 153 -#define OP_Real 154 /* same as TK_FLOAT, synopsis: r[P2]=P4 */ -#define OP_IntegrityCk 155 +#define OP_IntegrityCk 154 +#define OP_Real 155 /* same as TK_FLOAT, synopsis: r[P2]=P4 */ #define OP_RowSetAdd 156 /* synopsis: rowset(P1)=r[P2] */ #define OP_Param 157 #define OP_FkCounter 158 /* synopsis: fkctr[P1]+=P2 */ @@ -17171,14 +17243,14 @@ typedef struct VdbeOpList VdbeOpList; /* 72 */ 0x10, 0x10, 0x00, 0x10, 0x00, 0x10, 0x10, 0x00,\ /* 80 */ 0x00, 0x10, 0x10, 0x00, 0x00, 0x00, 0x02, 0x02,\ /* 88 */ 0x02, 0x00, 0x00, 0x12, 0x1e, 0x20, 0x40, 0x00,\ -/* 96 */ 0x00, 0x00, 0x10, 0x10, 0x00, 0x40, 0x40, 0x26,\ +/* 96 */ 0x00, 0x00, 0x10, 0x10, 0x00, 0x40, 0x40, 0x00,\ /* 104 */ 0x26, 0x26, 0x26, 0x26, 0x26, 0x26, 0x26, 0x26,\ -/* 112 */ 0x26, 0x00, 0x40, 0x12, 0x40, 0x40, 0x10, 0x00,\ +/* 112 */ 0x26, 0x26, 0x40, 0x40, 0x12, 0x40, 0x00, 0x10,\ /* 120 */ 0x00, 0x00, 0x40, 0x00, 0x40, 0x40, 0x10, 0x10,\ /* 128 */ 0x00, 0x00, 0x00, 0x00, 0x00, 0x40, 0x00, 0x50,\ /* 136 */ 0x00, 0x40, 0x04, 0x04, 0x00, 0x40, 0x50, 0x40,\ /* 144 */ 0x10, 0x00, 0x00, 0x10, 0x00, 0x00, 0x00, 0x00,\ -/* 152 */ 0x00, 0x00, 0x10, 0x00, 0x06, 0x10, 0x00, 0x04,\ +/* 152 */ 0x00, 0x00, 0x00, 0x10, 0x06, 0x10, 0x00, 0x04,\ /* 160 */ 0x1a, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00,\ /* 168 */ 0x00, 0x00, 0x00, 0x00, 0x00, 0x40, 0x10, 0x50,\ /* 176 */ 0x40, 0x00, 0x10, 0x10, 0x02, 0x12, 0x12, 0x00,\ @@ -17439,71 +17511,171 @@ SQLITE_PRIVATE void sqlite3VdbePrintOp(FILE*, int, VdbeOp*); SQLITE_PRIVATE int sqlite3CursorRangeHintExprCheck(Walker *pWalker, Expr *pExpr); #endif - -#define COMMIT_TIME_START 0 -#define COMMIT_TIME_BEFORE_HALT 1 -#define COMMIT_TIME_BEFORE_VDBECOMMIT 2 - -#define COMMIT_TIME_BEFORE_PHASEONE 3 -#define COMMIT_TIME_START_FIXUNLOCKED 4 -#define COMMIT_TIME_START_RELOCATE1 5 -#define COMMIT_TIME_START_RELOCATE2 6 - -#define COMMIT_TIME_OTHERWRITERS 7 -#define COMMIT_TIME_RELOCATE1COUNT 8 -#define COMMIT_TIME_RELOCATE2COUNT 9 - -#define COMMIT_TIME_RELOCATE2_READUS 10 -#define COMMIT_TIME_RELOCATE2_READCOUNT 11 -#define COMMIT_TIME_RELOCATE2_EXACTUS 12 -#define COMMIT_TIME_RELOCATE2_ALLOCATEUS 13 -#define COMMIT_TIME_RELOCATE2_RELOCATEUS 14 - -#define COMMIT_TIME_AFTER_FIXUNLOCKED 15 - -#define COMMIT_TIME_BEFORE_WALFRAMES 16 -#define COMMIT_TIME_AFTER_CHANGECOUNTER 17 -#define COMMIT_TIME_AFTER_RESTARTLOG 18 -#define COMMIT_TIME_AFTER_WRITEHDR 19 - -#define COMMIT_TIME_OSWRITE 20 - -#define COMMIT_TIME_AFTER_WRITEFRAMES 21 - -#define COMMIT_TIME_NFRAME 22 -#define COMMIT_TIME_HASHMAPUS 23 - -#define COMMIT_TIME_BEFORE_WALINDEX 24 - -#define COMMIT_TIME_WALINDEX_HASHGETUS 25 -#define COMMIT_TIME_WALINDEX_MEMSETUS 26 -#define COMMIT_TIME_WALINDEX_CLEANUPUS 27 -#define COMMIT_TIME_WALINDEX_ENTRYUS 28 - -#define COMMIT_TIME_AFTER_WALINDEX 29 -#define COMMIT_TIME_AFTER_WALINDEXHDR 30 -#define COMMIT_TIME_WALFRAMESFLAGS 31 -#define COMMIT_TIME_AFTER_WALFRAMES 32 -#define COMMIT_TIME_BEFORE_PHASETWO 33 -#define COMMIT_TIME_AFTER_PHASETWO 34 - -#define COMMIT_TIME_AFTER_VDBECOMMIT 35 -#define COMMIT_TIME_AFTER_HALT 36 -#define COMMIT_TIME_FINISH 37 - -#define COMMIT_TIME_N 38 - -/* #define COMMIT_TIME_TIMEOUT (2*1000*1000) */ -#define COMMIT_TIME_TIMEOUT (10*1000) /* 10ms threshold */ - -SQLITE_PRIVATE void sqlite3CommitTimeLog(u64*); -SQLITE_PRIVATE u64 sqlite3STimeNow(); -SQLITE_PRIVATE void sqlite3CommitTimeSet(u64*, int); - #endif /* SQLITE_VDBE_H */ /************** End of vdbe.h ************************************************/ /************** Continuing where we left off in sqliteInt.h ******************/ +/************** Include btreeModules.h in the middle of sqliteInt.h **********/ +/************** Begin file btreeModules.h ************************************/ +SQLITE_PRIVATE int sqlite3HctBtreeCursor(Btree*, Pgno, int, struct KeyInfo*, BtCursor*); +SQLITE_PRIVATE sqlite3_uint64 sqlite3HctBtreeSeekCount(Btree*); +SQLITE_PRIVATE Pgno sqlite3HctBtreeLastPage(Btree*); +SQLITE_PRIVATE int sqlite3HctBtreeClose(Btree*); +SQLITE_PRIVATE int sqlite3HctBtreeSetCacheSize(Btree*, int); +SQLITE_PRIVATE int sqlite3HctBtreeSetSpillSize(Btree*, int); +SQLITE_PRIVATE int sqlite3HctBtreeSetMmapLimit(Btree*, sqlite3_int64); +SQLITE_PRIVATE int sqlite3HctBtreeSetPagerFlags(Btree*, unsigned); +SQLITE_PRIVATE int sqlite3HctBtreeSetPageSize(Btree*, int, int, int); +SQLITE_PRIVATE int sqlite3HctBtreeGetPageSize(Btree*); +SQLITE_PRIVATE int sqlite3HctBtreeGetReserveNoMutex(Btree*); +SQLITE_PRIVATE int sqlite3HctBtreeGetRequestedReserve(Btree*); +SQLITE_PRIVATE Pgno sqlite3HctBtreeMaxPageCount(Btree*, Pgno); +SQLITE_PRIVATE int sqlite3HctBtreeSecureDelete(Btree*, int); +SQLITE_PRIVATE int sqlite3HctBtreeSetAutoVacuum(Btree*, int); +SQLITE_PRIVATE int sqlite3HctBtreeGetAutoVacuum(Btree*); +SQLITE_PRIVATE int sqlite3HctBtreeNewDb(Btree*); +SQLITE_PRIVATE int sqlite3HctBtreeBeginTrans(Btree*, int, int*); +SQLITE_PRIVATE int sqlite3HctBtreeIncrVacuum(Btree*); +SQLITE_PRIVATE int sqlite3HctBtreeCommitPhaseOne(Btree*, const char*); +SQLITE_PRIVATE int sqlite3HctBtreeCommitPhaseTwo(Btree*, int); +SQLITE_PRIVATE int sqlite3HctBtreeCommit(Btree*); +SQLITE_PRIVATE int sqlite3HctBtreeTripAllCursors(Btree*, int, int); +SQLITE_PRIVATE int sqlite3HctBtreeRollback(Btree*, int, int); +SQLITE_PRIVATE int sqlite3HctBtreeBeginStmt(Btree*, int); +SQLITE_PRIVATE int sqlite3HctBtreeSavepoint(Btree*, int, int); +SQLITE_PRIVATE int sqlite3HctBtreeCreateTable(Btree*, Pgno*, int); +SQLITE_PRIVATE int sqlite3HctBtreeClearTable(Btree*, int, i64*); +SQLITE_PRIVATE int sqlite3HctBtreeDropTable(Btree*, int, int*); +SQLITE_PRIVATE void sqlite3HctBtreeGetMeta(Btree*, int, u32*); +SQLITE_PRIVATE int sqlite3HctBtreeUpdateMeta(Btree*, int, u32); +SQLITE_PRIVATE int sqlite3HctBtreePragma(Btree*, char**); +SQLITE_PRIVATE Pager *sqlite3HctBtreePager(Btree*); +SQLITE_PRIVATE const char *sqlite3HctBtreeGetFilename(Btree*); +SQLITE_PRIVATE const char *sqlite3HctBtreeGetJournalname(Btree*); +SQLITE_PRIVATE int sqlite3HctBtreeTxnState(Btree*); +SQLITE_PRIVATE int sqlite3HctBtreeIsInBackup(Btree*); +SQLITE_PRIVATE void *sqlite3HctBtreeSchema(Btree*, int, void(*)(void *)); +SQLITE_PRIVATE int sqlite3HctBtreeSchemaLocked(Btree*); +SQLITE_PRIVATE int sqlite3HctBtreeIsReadonly(Btree*); +SQLITE_PRIVATE int sqlite3HctBtreeSetVersion(Btree*, int); +SQLITE_PRIVATE int sqlite3HctBtreeIntegrityCheck(sqlite3*, Btree*, Pgno*, Mem*, int, int, int*, char**); +SQLITE_PRIVATE int sqlite3HctBtreeCheckpoint(Btree*, int, int *, int *); +SQLITE_PRIVATE int sqlite3HctBtreeExclusiveLock(Btree*); +SQLITE_PRIVATE int sqlite3HctBtreeNext(BtCursor*, int); +SQLITE_PRIVATE int sqlite3HctBtreeCursorHasMoved(BtCursor*); +SQLITE_PRIVATE void sqlite3HctBtreeClearCursor(BtCursor*); +SQLITE_PRIVATE int sqlite3HctBtreeCursorRestore(BtCursor*, int*); +SQLITE_PRIVATE void sqlite3HctBtreeCursorHintFlags(BtCursor*, unsigned); +SQLITE_PRIVATE int sqlite3HctBtreeCloseCursor(BtCursor*); +SQLITE_PRIVATE int sqlite3HctBtreeCursorIsValid(BtCursor*); +SQLITE_PRIVATE int sqlite3HctBtreeCursorIsValidNN(BtCursor*); +SQLITE_PRIVATE i64 sqlite3HctBtreeIntegerKey(BtCursor*); +SQLITE_PRIVATE void sqlite3HctBtreeCursorPin(BtCursor*); +SQLITE_PRIVATE void sqlite3HctBtreeCursorUnpin(BtCursor*); +SQLITE_PRIVATE u32 sqlite3HctBtreePayloadSize(BtCursor*); +SQLITE_PRIVATE sqlite3_int64 sqlite3HctBtreeMaxRecordSize(BtCursor*); +SQLITE_PRIVATE int sqlite3HctBtreePayload(BtCursor*, u32, u32, void*); +SQLITE_PRIVATE int sqlite3HctBtreePayloadChecked(BtCursor*, u32, u32, void *); +SQLITE_PRIVATE const void *sqlite3HctBtreePayloadFetch(BtCursor*, u32*); +SQLITE_PRIVATE int sqlite3HctBtreeFirst(BtCursor*, int*); +SQLITE_PRIVATE int sqlite3HctBtreeLast(BtCursor*, int*); +SQLITE_PRIVATE int sqlite3HctBtreeTableMoveto(BtCursor*, i64, int, int*); +SQLITE_PRIVATE int sqlite3HctBtreeIndexMoveto(BtCursor*, UnpackedRecord*, int*); +SQLITE_PRIVATE void sqlite3HctBtreeCursorDir(BtCursor*, int); +SQLITE_PRIVATE int sqlite3HctBtreeEof(BtCursor*); +SQLITE_PRIVATE i64 sqlite3HctBtreeRowCountEst(BtCursor*); +SQLITE_PRIVATE int sqlite3HctBtreePrevious(BtCursor*, int); +SQLITE_PRIVATE int sqlite3HctBtreeInsert(BtCursor*, const BtreePayload*, int, int); +SQLITE_PRIVATE int sqlite3HctBtreeDelete(BtCursor*, u8); +SQLITE_PRIVATE int sqlite3HctBtreeIdxDelete(BtCursor*, UnpackedRecord*); +SQLITE_PRIVATE int sqlite3HctBtreePutData(BtCursor*, u32, u32, void*); +SQLITE_PRIVATE void sqlite3HctBtreeIncrblobCursor(BtCursor*); +SQLITE_PRIVATE int sqlite3HctBtreeCursorHasHint(BtCursor*, unsigned int); +SQLITE_PRIVATE int sqlite3HctBtreeTransferRow(BtCursor*, BtCursor*, i64); +SQLITE_PRIVATE int sqlite3HctBtreeClearTableOfCursor(BtCursor*); +SQLITE_PRIVATE int sqlite3HctBtreeCount(sqlite3*, BtCursor*, i64*); +SQLITE_PRIVATE int sqlite3StockBtreeCursor(Btree*, Pgno, int, struct KeyInfo*, BtCursor*); +SQLITE_PRIVATE sqlite3_uint64 sqlite3StockBtreeSeekCount(Btree*); +SQLITE_PRIVATE Pgno sqlite3StockBtreeLastPage(Btree*); +SQLITE_PRIVATE int sqlite3StockBtreeClose(Btree*); +SQLITE_PRIVATE int sqlite3StockBtreeSetCacheSize(Btree*, int); +SQLITE_PRIVATE int sqlite3StockBtreeSetSpillSize(Btree*, int); +SQLITE_PRIVATE int sqlite3StockBtreeSetMmapLimit(Btree*, sqlite3_int64); +SQLITE_PRIVATE int sqlite3StockBtreeSetPagerFlags(Btree*, unsigned); +SQLITE_PRIVATE int sqlite3StockBtreeSetPageSize(Btree*, int, int, int); +SQLITE_PRIVATE int sqlite3StockBtreeGetPageSize(Btree*); +SQLITE_PRIVATE int sqlite3StockBtreeGetReserveNoMutex(Btree*); +SQLITE_PRIVATE int sqlite3StockBtreeGetRequestedReserve(Btree*); +SQLITE_PRIVATE Pgno sqlite3StockBtreeMaxPageCount(Btree*, Pgno); +SQLITE_PRIVATE int sqlite3StockBtreeSecureDelete(Btree*, int); +SQLITE_PRIVATE int sqlite3StockBtreeSetAutoVacuum(Btree*, int); +SQLITE_PRIVATE int sqlite3StockBtreeGetAutoVacuum(Btree*); +SQLITE_PRIVATE int sqlite3StockBtreeNewDb(Btree*); +SQLITE_PRIVATE int sqlite3StockBtreeBeginTrans(Btree*, int, int*); +SQLITE_PRIVATE int sqlite3StockBtreeIncrVacuum(Btree*); +SQLITE_PRIVATE int sqlite3StockBtreeCommitPhaseOne(Btree*, const char*); +SQLITE_PRIVATE int sqlite3StockBtreeCommitPhaseTwo(Btree*, int); +SQLITE_PRIVATE int sqlite3StockBtreeCommit(Btree*); +SQLITE_PRIVATE int sqlite3StockBtreeTripAllCursors(Btree*, int, int); +SQLITE_PRIVATE int sqlite3StockBtreeRollback(Btree*, int, int); +SQLITE_PRIVATE int sqlite3StockBtreeBeginStmt(Btree*, int); +SQLITE_PRIVATE int sqlite3StockBtreeSavepoint(Btree*, int, int); +SQLITE_PRIVATE int sqlite3StockBtreeCreateTable(Btree*, Pgno*, int); +SQLITE_PRIVATE int sqlite3StockBtreeClearTable(Btree*, int, i64*); +SQLITE_PRIVATE int sqlite3StockBtreeDropTable(Btree*, int, int*); +SQLITE_PRIVATE void sqlite3StockBtreeGetMeta(Btree*, int, u32*); +SQLITE_PRIVATE int sqlite3StockBtreeUpdateMeta(Btree*, int, u32); +SQLITE_PRIVATE int sqlite3StockBtreePragma(Btree*, char**); +SQLITE_PRIVATE Pager *sqlite3StockBtreePager(Btree*); +SQLITE_PRIVATE const char *sqlite3StockBtreeGetFilename(Btree*); +SQLITE_PRIVATE const char *sqlite3StockBtreeGetJournalname(Btree*); +SQLITE_PRIVATE int sqlite3StockBtreeTxnState(Btree*); +SQLITE_PRIVATE int sqlite3StockBtreeIsInBackup(Btree*); +SQLITE_PRIVATE void *sqlite3StockBtreeSchema(Btree*, int, void(*)(void *)); +SQLITE_PRIVATE int sqlite3StockBtreeSchemaLocked(Btree*); +SQLITE_PRIVATE int sqlite3StockBtreeIsReadonly(Btree*); +SQLITE_PRIVATE int sqlite3StockBtreeSetVersion(Btree*, int); +SQLITE_PRIVATE int sqlite3StockBtreeIntegrityCheck(sqlite3*, Btree*, Pgno*, Mem*, int, int, int*, char**); +SQLITE_PRIVATE int sqlite3StockBtreeCheckpoint(Btree*, int, int *, int *); +SQLITE_PRIVATE int sqlite3StockBtreeExclusiveLock(Btree*); +SQLITE_PRIVATE int sqlite3StockBtreeNext(BtCursor*, int); +SQLITE_PRIVATE int sqlite3StockBtreeCursorHasMoved(BtCursor*); +SQLITE_PRIVATE void sqlite3StockBtreeClearCursor(BtCursor*); +SQLITE_PRIVATE int sqlite3StockBtreeCursorRestore(BtCursor*, int*); +SQLITE_PRIVATE void sqlite3StockBtreeCursorHintFlags(BtCursor*, unsigned); +SQLITE_PRIVATE int sqlite3StockBtreeCloseCursor(BtCursor*); +SQLITE_PRIVATE int sqlite3StockBtreeCursorIsValid(BtCursor*); +SQLITE_PRIVATE int sqlite3StockBtreeCursorIsValidNN(BtCursor*); +SQLITE_PRIVATE i64 sqlite3StockBtreeIntegerKey(BtCursor*); +SQLITE_PRIVATE void sqlite3StockBtreeCursorPin(BtCursor*); +SQLITE_PRIVATE void sqlite3StockBtreeCursorUnpin(BtCursor*); +SQLITE_PRIVATE u32 sqlite3StockBtreePayloadSize(BtCursor*); +SQLITE_PRIVATE sqlite3_int64 sqlite3StockBtreeMaxRecordSize(BtCursor*); +SQLITE_PRIVATE int sqlite3StockBtreePayload(BtCursor*, u32, u32, void*); +SQLITE_PRIVATE int sqlite3StockBtreePayloadChecked(BtCursor*, u32, u32, void *); +SQLITE_PRIVATE const void *sqlite3StockBtreePayloadFetch(BtCursor*, u32*); +SQLITE_PRIVATE int sqlite3StockBtreeFirst(BtCursor*, int*); +SQLITE_PRIVATE int sqlite3StockBtreeLast(BtCursor*, int*); +SQLITE_PRIVATE int sqlite3StockBtreeTableMoveto(BtCursor*, i64, int, int*); +SQLITE_PRIVATE int sqlite3StockBtreeIndexMoveto(BtCursor*, UnpackedRecord*, int*); +SQLITE_PRIVATE void sqlite3StockBtreeCursorDir(BtCursor*, int); +SQLITE_PRIVATE int sqlite3StockBtreeEof(BtCursor*); +SQLITE_PRIVATE i64 sqlite3StockBtreeRowCountEst(BtCursor*); +SQLITE_PRIVATE int sqlite3StockBtreePrevious(BtCursor*, int); +SQLITE_PRIVATE int sqlite3StockBtreeInsert(BtCursor*, const BtreePayload*, int, int); +SQLITE_PRIVATE int sqlite3StockBtreeDelete(BtCursor*, u8); +SQLITE_PRIVATE int sqlite3StockBtreeIdxDelete(BtCursor*, UnpackedRecord*); +SQLITE_PRIVATE int sqlite3StockBtreePutData(BtCursor*, u32, u32, void*); +SQLITE_PRIVATE void sqlite3StockBtreeIncrblobCursor(BtCursor*); +SQLITE_PRIVATE int sqlite3StockBtreeCursorHasHint(BtCursor*, unsigned int); +SQLITE_PRIVATE int sqlite3StockBtreeTransferRow(BtCursor*, BtCursor*, i64); +SQLITE_PRIVATE int sqlite3StockBtreeClearTableOfCursor(BtCursor*); +SQLITE_PRIVATE int sqlite3StockBtreeCount(sqlite3*, BtCursor*, i64*); +SQLITE_PRIVATE BtCursor *sqlite3StockBtreeFakeValidCursor(void); + + +/************** End of btreeModules.h ****************************************/ +/************** Continuing where we left off in sqliteInt.h ******************/ /************** Include pcache.h in the middle of sqliteInt.h ****************/ /************** Begin file pcache.h ******************************************/ /* @@ -17859,9 +18031,6 @@ struct Schema { u8 enc; /* Text encoding used by this database */ u16 schemaFlags; /* Flags associated with this schema */ int cache_size; /* Number of pages to use in the cache */ -#ifdef SQLITE_ENABLE_STAT4 - void *pStat4Space; /* Memory for stat4 Index.aSample[] arrays */ -#endif }; /* @@ -17986,47 +18155,11 @@ struct FuncDefHash { }; #define SQLITE_FUNC_HASH(C,L) (((C)+(L))%SQLITE_FUNC_HASH_SZ) -#if defined(SQLITE_USER_AUTHENTICATION) -# warning "The SQLITE_USER_AUTHENTICATION extension is deprecated. \ - See ext/userauth/user-auth.txt for details." -#endif -#ifdef SQLITE_USER_AUTHENTICATION -/* -** Information held in the "sqlite3" database connection object and used -** to manage user authentication. -*/ -typedef struct sqlite3_userauth sqlite3_userauth; -struct sqlite3_userauth { - u8 authLevel; /* Current authentication level */ - int nAuthPW; /* Size of the zAuthPW in bytes */ - char *zAuthPW; /* Password used to authenticate */ - char *zAuthUser; /* User name used to authenticate */ -}; - -/* Allowed values for sqlite3_userauth.authLevel */ -#define UAUTH_Unknown 0 /* Authentication not yet checked */ -#define UAUTH_Fail 1 /* User authentication failed */ -#define UAUTH_User 2 /* Authenticated as a normal user */ -#define UAUTH_Admin 3 /* Authenticated as an administrator */ - -/* Functions used only by user authorization logic */ -SQLITE_PRIVATE int sqlite3UserAuthTable(const char*); -SQLITE_PRIVATE int sqlite3UserAuthCheckLogin(sqlite3*,const char*,u8*); -SQLITE_PRIVATE void sqlite3UserAuthInit(sqlite3*); -SQLITE_PRIVATE void sqlite3CryptFunc(sqlite3_context*,int,sqlite3_value**); - -#endif /* SQLITE_USER_AUTHENTICATION */ - /* ** typedef for the authorization callback function. */ -#ifdef SQLITE_USER_AUTHENTICATION - typedef int (*sqlite3_xauth)(void*,int,const char*,const char*,const char*, - const char*, const char*); -#else - typedef int (*sqlite3_xauth)(void*,int,const char*,const char*,const char*, - const char*); -#endif +typedef int (*sqlite3_xauth)(void*,int,const char*,const char*,const char*, + const char*); #ifndef SQLITE_OMIT_DEPRECATED /* This is an extra SQLITE_TRACE macro that indicates "legacy" tracing @@ -18189,9 +18322,6 @@ struct sqlite3 { void (*xUnlockNotify)(void **, int); /* Unlock notify callback */ sqlite3 *pNextBlocked; /* Next in list of all blocked connections */ #endif -#ifdef SQLITE_USER_AUTHENTICATION - sqlite3_userauth auth; /* User authentication information */ -#endif #ifndef SQLITE_OMIT_CONCURRENT /* Return values for sqlite3_commit_status() requests: ** SQLITE_COMMIT_CONFLICT_DB, CONFLICT_FRAME and CONFLICT_PGNO. @@ -18199,70 +18329,16 @@ struct sqlite3 { u32 aCommit[5]; #endif - u64 *aPrepareTime; - u64 *aSchemaTime; -}; - -#define PREPARE_TIME_START 0 -#define PREPARE_TIME_BEGINPARSE 1 -#define PREPARE_TIME_BEGINPRAGMA 2 - -#define PREPARE_TIME_BEGINAUTHCHECK 3 -#define PREPARE_TIME_ENDAUTHCHECK 4 -#define PREPARE_TIME_BEGINLOADSCHEMA 5 -#define PREPARE_TIME_ENDLOADSCHEMA 6 - - -#define PREPARE_TIME_BEGINCACHESIZE 7 -#define PREPARE_TIME_BEGINSETCACHESIZE 8 -#define PREPARE_TIME_ENDSETCACHESIZE 9 -#define PREPARE_TIME_ENDCACHESIZE 10 -#define PREPARE_TIME_ENDPRAGMA 11 -#define PREPARE_TIME_ENDPARSE 12 -#define PREPARE_TIME_FINISH 13 + /* Used as part of testing hctree commits */ + void (*xMtCommit)(void*, int); + void *pMtCommitCtx; -#define PREPARE_TIME_N 14 + /* The sqlite3_hct_journal_validation_hook() callback */ + void *pValidateArg; + int (*xValidate)(void*, i64, const char*, const void*, int, i64); - - -#define SCHEMA_TIME_START 0 -#define SCHEMA_TIME_AFTER_CREATE_1 1 -#define SCHEMA_TIME_AFTER_OPEN_TRANS 2 -#define SCHEMA_TIME_AFTER_GET_META 3 -#define SCHEMA_TIME_AFTER_FIX_ENCODING 4 -#define SCHEMA_TIME_AFTER_SETCACHESIZE 5 -#define SCHEMA_TIME_BEGIN_EXEC 6 -#define SCHEMA_TIME_BEFORE_STEP 7 -#define SCHEMA_TIME_BEFORE_PREPARE 8 -#define SCHEMA_TIME_BEFORE_FINALIZE 9 -#define SCHEMA_TIME_BEGIN_ANALYZE_LOAD 10 - -#define SCHEMA_TIME_AFTER_CLEAR_STATS 11 -#define SCHEMA_TIME_AFTER_STAT1 12 -#define SCHEMA_TIME_AFTER_DEFAULTS 13 - -#define SCHEMA_TIME_AFTER_STAT4_SPACE 14 -#define SCHEMA_TIME_AFTER_STAT4_PREPARE 15 - -#define SCHEMA_TIME_STAT4_GROWUS 16 -#define SCHEMA_TIME_STAT4_Q2_BODYUS 17 -#define SCHEMA_TIME_AFTER_STAT4_Q2 18 - -#define SCHEMA_TIME_AFTER_STAT4 19 - -#define SCHEMA_TIME_END_ANALYZE_LOAD 20 -#define SCHEMA_TIME_FINISH 21 - -#define SCHEMA_TIME_N 22 -#define SCHEMA_TIME_TIMEOUT (500 * 1000) - - - -#define sqlite3PrepareTimeSet(x,y) sqlite3CommitTimeSet(x,y) -SQLITE_PRIVATE void sqlite3PrepareTimeLog(const char *zSql, int nSql, u64 *aPrepareTime); -SQLITE_PRIVATE void sqlite3SchemaTimeLog(u64 *aSchemaTime, const char *zFile); - -#define PREPARE_TIME_TIMEOUT (2 * 1000 * 1000) /* 2 second timeout */ + int bHctMigrate; +}; /* @@ -18429,7 +18505,7 @@ SQLITE_PRIVATE void sqlite3SchemaTimeLog(u64 *aSchemaTime, const char *zFile); ** field is used by per-connection app-def functions. */ struct FuncDef { - i16 nArg; /* Number of arguments. -1 means unlimited */ + i8 nArg; /* Number of arguments. -1 means unlimited */ u32 funcFlags; /* Some combination of SQLITE_FUNC_* */ void *pUserData; /* User data parameter */ FuncDef *pNext; /* Next function with same name */ @@ -19115,6 +19191,7 @@ struct FKey { struct KeyInfo { u32 nRef; /* Number of references to this KeyInfo object */ u8 enc; /* Text encoding - one of the SQLITE_UTF* values */ + u16 nUniqField; u16 nKeyField; /* Number of key columns in the index */ u16 nAllField; /* Total columns, including key plus others */ sqlite3 *db; /* The database connection */ @@ -19258,7 +19335,7 @@ struct Index { ** expression, or a reference to a VIRTUAL column */ #ifdef SQLITE_ENABLE_STAT4 int nSample; /* Number of elements in aSample[] */ - int nSampleAlloc; /* Number of slots allocated to aSample[] */ + int mxSample; /* Number of slots allocated to aSample[] */ int nSampleCol; /* Size of IndexSample.anEq[] and so on */ tRowcnt *aAvgEq; /* Average nEq values for keys not in aSample */ IndexSample *aSample; /* Samples of the left-most key */ @@ -19569,7 +19646,7 @@ struct Expr { #define EP_IsTrue 0x10000000 /* Always has boolean value of TRUE */ #define EP_IsFalse 0x20000000 /* Always has boolean value of FALSE */ #define EP_FromDDL 0x40000000 /* Originates from sqlite_schema */ - /* 0x80000000 // Available */ +#define EP_SubtArg 0x80000000 /* Is argument to SQLITE_SUBTYPE function */ /* The EP_Propagate mask is a set of properties that automatically propagate ** upwards into parent nodes. @@ -20125,7 +20202,7 @@ struct Select { ** row of result as the key in table pDest->iSDParm. ** Apply the affinity pDest->affSdst before storing ** results. if pDest->iSDParm2 is positive, then it is -** a regsiter holding a Bloom filter for the IN operator +** a register holding a Bloom filter for the IN operator ** that should be populated in addition to the ** pDest->iSDParm table. This SRT is used to ** implement "IN (SELECT ...)". @@ -20724,7 +20801,6 @@ struct Sqlite3Config { u8 bUseCis; /* Use covering indices for full-scans */ u8 bSmallMalloc; /* Avoid large memory allocations if true */ u8 bExtraSchemaChecks; /* Verify type,name,tbl_name in schema */ - u8 bUseLongDouble; /* Make use of long double */ #ifdef SQLITE_DEBUG u8 bJsonSelfcheck; /* Double-check JSON parsing */ #endif @@ -21099,15 +21175,6 @@ SQLITE_PRIVATE int sqlite3CorruptPgnoError(int,Pgno); # define SQLITE_ENABLE_FTS3 1 #endif -/* -** The ctype.h header is needed for non-ASCII systems. It is also -** needed by FTS3 when FTS3 is included in the amalgamation. -*/ -#if !defined(SQLITE_ASCII) || \ - (defined(SQLITE_ENABLE_FTS3) && defined(SQLITE_AMALGAMATION)) -# include -#endif - /* ** The following macros mimic the standard library functions toupper(), ** isspace(), isalnum(), isdigit() and isxdigit(), respectively. The @@ -21731,7 +21798,7 @@ SQLITE_PRIVATE int sqlite3GetInt32(const char *, int*); SQLITE_PRIVATE int sqlite3GetUInt32(const char*, u32*); SQLITE_PRIVATE int sqlite3Atoi(const char*); #ifndef SQLITE_OMIT_UTF16 -SQLITE_PRIVATE int sqlite3Utf16ByteLen(const void *pData, int nChar); +SQLITE_PRIVATE int sqlite3Utf16ByteLen(const void *pData, int nByte, int nChar); #endif SQLITE_PRIVATE int sqlite3Utf8CharLen(const char *pData, int nByte); SQLITE_PRIVATE u32 sqlite3Utf8Read(const u8**); @@ -22314,6 +22381,11 @@ SQLITE_PRIVATE sqlite3_uint64 sqlite3Hwtime(void); # define IS_STMT_SCANSTATUS(db) 0 #endif +#ifdef SQLITE_ENABLE_HCT +SQLITE_PRIVATE int sqlite3IsHct(Btree*); +SQLITE_PRIVATE int sqlite3HctSchemaOp(Btree*, const char*); +#endif + #endif /* SQLITEINT_H */ /************** End of sqliteInt.h *******************************************/ @@ -23189,9 +23261,6 @@ static const char * const sqlite3azCompileOpt[] = { #ifdef SQLITE_UNTESTABLE "UNTESTABLE", #endif -#ifdef SQLITE_USER_AUTHENTICATION - "USER_AUTHENTICATION", -#endif #ifdef SQLITE_USE_ALLOCA "USE_ALLOCA", #endif @@ -23468,7 +23537,6 @@ SQLITE_PRIVATE SQLITE_WSD struct Sqlite3Config sqlite3Config = { SQLITE_ALLOW_COVERING_INDEX_SCAN, /* bUseCis */ 0, /* bSmallMalloc */ 1, /* bExtraSchemaChecks */ - sizeof(LONGDOUBLE_TYPE)>8, /* bUseLongDouble */ #ifdef SQLITE_DEBUG 0, /* bJsonSelfcheck */ #endif @@ -24041,7 +24109,7 @@ struct sqlite3_context { int isError; /* Error code returned by the function. */ u8 enc; /* Encoding to use for results */ u8 skipFlag; /* Skip accumulator loading if true */ - u16 argc; /* Number of arguments */ + u8 argc; /* Number of arguments */ sqlite3_value *argv[1]; /* Argument set */ }; @@ -24162,7 +24230,6 @@ struct Vdbe { int nScan; /* Entries in aScan[] */ ScanStatus *aScan; /* Scan definitions for sqlite3_stmt_scanstatus() */ #endif - u64 *aCommitTime; }; /* @@ -24189,9 +24256,11 @@ struct PreUpdate { int iBlobWrite; /* Value returned by preupdate_blobwrite() */ i64 iKey1; /* First key value passed to hook */ i64 iKey2; /* Second key value passed to hook */ + Mem oldipk; /* Memory cell holding "old" IPK value */ Mem *aNew; /* Array of new.* values */ Table *pTab; /* Schema object being updated */ Index *pPk; /* PK index if pTab is WITHOUT ROWID */ + sqlite3_value **apDflt; /* Array of default values, if required */ }; /* @@ -29637,16 +29706,29 @@ SQLITE_API void sqlite3_mutex_leave(sqlite3_mutex *p){ /* ** The sqlite3_mutex_held() and sqlite3_mutex_notheld() routine are ** intended for use inside assert() statements. +** +** Because these routines raise false-positive alerts in TSAN, disable +** them (make them always return 1) when compiling with TSAN. */ SQLITE_API int sqlite3_mutex_held(sqlite3_mutex *p){ +# if defined(__has_feature) +# if __has_feature(thread_sanitizer) + p = 0; +# endif +# endif assert( p==0 || sqlite3GlobalConfig.mutex.xMutexHeld ); return p==0 || sqlite3GlobalConfig.mutex.xMutexHeld(p); } SQLITE_API int sqlite3_mutex_notheld(sqlite3_mutex *p){ +# if defined(__has_feature) +# if __has_feature(thread_sanitizer) + p = 0; +# endif +# endif assert( p==0 || sqlite3GlobalConfig.mutex.xMutexNotheld ); return p==0 || sqlite3GlobalConfig.mutex.xMutexNotheld(p); } -#endif +#endif /* NDEBUG */ #endif /* !defined(SQLITE_MUTEX_OMIT) */ @@ -33003,7 +33085,7 @@ SQLITE_API char *sqlite3_snprintf(int n, char *zBuf, const char *zFormat, ...){ */ static void renderLogMsg(int iErrCode, const char *zFormat, va_list ap){ StrAccum acc; /* String accumulator */ - char zMsg[SQLITE_PRINT_BUF_SIZE*10]; /* Complete log message */ + char zMsg[SQLITE_PRINT_BUF_SIZE*3]; /* Complete log message */ sqlite3StrAccumInit(&acc, 0, zMsg, sizeof(zMsg), 0); sqlite3_str_vappendf(&acc, zFormat, ap); @@ -35061,7 +35143,7 @@ static const unsigned char sqlite3Utf8Trans1[] = { c = *(zIn++); \ if( c>=0xc0 ){ \ c = sqlite3Utf8Trans1[c-0xc0]; \ - while( zIn!=zTerm && (*zIn & 0xc0)==0x80 ){ \ + while( zIn=0xd8 && c<0xdc && z[0]>=0xdc && z[0]<0xe0 ) z += 2; + if( c>=0xd8 && c<0xdc && z<=zEnd && z[0]>=0xdc && z[0]<0xe0 ) z += 2; n++; } return (int)(z-(unsigned char const *)zIn) @@ -36033,6 +36117,8 @@ SQLITE_PRIVATE int sqlite3AtoF(const char *z, double *pResult, int length, u8 en int eValid = 1; /* True exponent is either not used or is well-formed */ int nDigit = 0; /* Number of digits processed */ int eType = 1; /* 1: pure integer, 2+: fractional -1 or less: bad UTF16 */ + double rr[2]; + u64 s2; assert( enc==SQLITE_UTF8 || enc==SQLITE_UTF16LE || enc==SQLITE_UTF16BE ); *pResult = 0.0; /* Default return value, in case of an error */ @@ -36144,68 +36230,41 @@ SQLITE_PRIVATE int sqlite3AtoF(const char *z, double *pResult, int length, u8 en e++; } - if( e==0 ){ - *pResult = s; - }else if( sqlite3Config.bUseLongDouble ){ - LONGDOUBLE_TYPE r = (LONGDOUBLE_TYPE)s; - if( e>0 ){ - while( e>=100 ){ e-=100; r *= 1.0e+100L; } - while( e>=10 ){ e-=10; r *= 1.0e+10L; } - while( e>=1 ){ e-=1; r *= 1.0e+01L; } - }else{ - while( e<=-100 ){ e+=100; r *= 1.0e-100L; } - while( e<=-10 ){ e+=10; r *= 1.0e-10L; } - while( e<=-1 ){ e+=1; r *= 1.0e-01L; } - } - assert( r>=0.0 ); - if( r>+1.7976931348623157081452742373e+308L ){ -#ifdef INFINITY - *pResult = +INFINITY; -#else - *pResult = 1.0e308*10.0; + rr[0] = (double)s; + s2 = (u64)rr[0]; +#if defined(_MSC_VER) && _MSC_VER<1700 + if( s2==0x8000000000000000LL ){ s2 = 2*(u64)(0.5*rr[0]); } #endif - }else{ - *pResult = (double)r; + rr[1] = s>=s2 ? (double)(s - s2) : -(double)(s2 - s); + if( e>0 ){ + while( e>=100 ){ + e -= 100; + dekkerMul2(rr, 1.0e+100, -1.5902891109759918046e+83); + } + while( e>=10 ){ + e -= 10; + dekkerMul2(rr, 1.0e+10, 0.0); + } + while( e>=1 ){ + e -= 1; + dekkerMul2(rr, 1.0e+01, 0.0); } }else{ - double rr[2]; - u64 s2; - rr[0] = (double)s; - s2 = (u64)rr[0]; -#if defined(_MSC_VER) && _MSC_VER<1700 - if( s2==0x8000000000000000LL ){ s2 = 2*(u64)(0.5*rr[0]); } -#endif - rr[1] = s>=s2 ? (double)(s - s2) : -(double)(s2 - s); - if( e>0 ){ - while( e>=100 ){ - e -= 100; - dekkerMul2(rr, 1.0e+100, -1.5902891109759918046e+83); - } - while( e>=10 ){ - e -= 10; - dekkerMul2(rr, 1.0e+10, 0.0); - } - while( e>=1 ){ - e -= 1; - dekkerMul2(rr, 1.0e+01, 0.0); - } - }else{ - while( e<=-100 ){ - e += 100; - dekkerMul2(rr, 1.0e-100, -1.99918998026028836196e-117); - } - while( e<=-10 ){ - e += 10; - dekkerMul2(rr, 1.0e-10, -3.6432197315497741579e-27); - } - while( e<=-1 ){ - e += 1; - dekkerMul2(rr, 1.0e-01, -5.5511151231257827021e-18); - } + while( e<=-100 ){ + e += 100; + dekkerMul2(rr, 1.0e-100, -1.99918998026028836196e-117); + } + while( e<=-10 ){ + e += 10; + dekkerMul2(rr, 1.0e-10, -3.6432197315497741579e-27); + } + while( e<=-1 ){ + e += 1; + dekkerMul2(rr, 1.0e-01, -5.5511151231257827021e-18); } - *pResult = rr[0]+rr[1]; - if( sqlite3IsNaN(*pResult) ) *pResult = 1e300*1e300; } + *pResult = rr[0]+rr[1]; + if( sqlite3IsNaN(*pResult) ) *pResult = 1e300*1e300; if( sign<0 ) *pResult = -*pResult; assert( !sqlite3IsNaN(*pResult) ); @@ -36526,9 +36585,10 @@ SQLITE_PRIVATE void sqlite3FpDecode(FpDecode *p, double r, int iRound, int mxRou int i; u64 v; int e, exp = 0; + double rr[2]; + p->isSpecial = 0; p->z = p->zBuf; - assert( mxRound>0 ); /* Convert negative numbers to positive. Deal with Infinity, 0.0, and @@ -36556,62 +36616,45 @@ SQLITE_PRIVATE void sqlite3FpDecode(FpDecode *p, double r, int iRound, int mxRou /* Multiply r by powers of ten until it lands somewhere in between ** 1.0e+19 and 1.0e+17. + ** + ** Use Dekker-style double-double computation to increase the + ** precision. + ** + ** The error terms on constants like 1.0e+100 computed using the + ** decimal extension, for example as follows: + ** + ** SELECT decimal_exp(decimal_sub('1.0e+100',decimal(1.0e+100))); */ - if( sqlite3Config.bUseLongDouble ){ - LONGDOUBLE_TYPE rr = r; - if( rr>=1.0e+19 ){ - while( rr>=1.0e+119L ){ exp+=100; rr *= 1.0e-100L; } - while( rr>=1.0e+29L ){ exp+=10; rr *= 1.0e-10L; } - while( rr>=1.0e+19L ){ exp++; rr *= 1.0e-1L; } - }else{ - while( rr<1.0e-97L ){ exp-=100; rr *= 1.0e+100L; } - while( rr<1.0e+07L ){ exp-=10; rr *= 1.0e+10L; } - while( rr<1.0e+17L ){ exp--; rr *= 1.0e+1L; } + rr[0] = r; + rr[1] = 0.0; + if( rr[0]>9.223372036854774784e+18 ){ + while( rr[0]>9.223372036854774784e+118 ){ + exp += 100; + dekkerMul2(rr, 1.0e-100, -1.99918998026028836196e-117); + } + while( rr[0]>9.223372036854774784e+28 ){ + exp += 10; + dekkerMul2(rr, 1.0e-10, -3.6432197315497741579e-27); + } + while( rr[0]>9.223372036854774784e+18 ){ + exp += 1; + dekkerMul2(rr, 1.0e-01, -5.5511151231257827021e-18); } - v = (u64)rr; }else{ - /* If high-precision floating point is not available using "long double", - ** then use Dekker-style double-double computation to increase the - ** precision. - ** - ** The error terms on constants like 1.0e+100 computed using the - ** decimal extension, for example as follows: - ** - ** SELECT decimal_exp(decimal_sub('1.0e+100',decimal(1.0e+100))); - */ - double rr[2]; - rr[0] = r; - rr[1] = 0.0; - if( rr[0]>9.223372036854774784e+18 ){ - while( rr[0]>9.223372036854774784e+118 ){ - exp += 100; - dekkerMul2(rr, 1.0e-100, -1.99918998026028836196e-117); - } - while( rr[0]>9.223372036854774784e+28 ){ - exp += 10; - dekkerMul2(rr, 1.0e-10, -3.6432197315497741579e-27); - } - while( rr[0]>9.223372036854774784e+18 ){ - exp += 1; - dekkerMul2(rr, 1.0e-01, -5.5511151231257827021e-18); - } - }else{ - while( rr[0]<9.223372036854774784e-83 ){ - exp -= 100; - dekkerMul2(rr, 1.0e+100, -1.5902891109759918046e+83); - } - while( rr[0]<9.223372036854774784e+07 ){ - exp -= 10; - dekkerMul2(rr, 1.0e+10, 0.0); - } - while( rr[0]<9.22337203685477478e+17 ){ - exp -= 1; - dekkerMul2(rr, 1.0e+01, 0.0); - } + while( rr[0]<9.223372036854774784e-83 ){ + exp -= 100; + dekkerMul2(rr, 1.0e+100, -1.5902891109759918046e+83); + } + while( rr[0]<9.223372036854774784e+07 ){ + exp -= 10; + dekkerMul2(rr, 1.0e+10, 0.0); + } + while( rr[0]<9.22337203685477478e+17 ){ + exp -= 1; + dekkerMul2(rr, 1.0e+01, 0.0); } - v = rr[1]<0.0 ? (u64)rr[0]-(u64)(-rr[1]) : (u64)rr[0]+(u64)rr[1]; } - + v = rr[1]<0.0 ? (u64)rr[0]-(u64)(-rr[1]) : (u64)rr[0]+(u64)rr[1]; /* Extract significant digits. */ i = sizeof(p->zBuf)-1; @@ -37382,104 +37425,6 @@ SQLITE_PRIVATE int sqlite3VListNameToNum(VList *pIn, const char *zName, int nNam return 0; } -/* -** High-resolution hardware timer used for debugging and testing only. -*/ -#if defined(VDBE_PROFILE) \ - || defined(SQLITE_PERFORMANCE_TRACE) \ - || defined(SQLITE_ENABLE_STMT_SCANSTATUS) -/************** Include hwtime.h in the middle of util.c *********************/ -/************** Begin file hwtime.h ******************************************/ -/* -** 2008 May 27 -** -** The author disclaims copyright to this source code. In place of -** a legal notice, here is a blessing: -** -** May you do good and not evil. -** May you find forgiveness for yourself and forgive others. -** May you share freely, never taking more than you give. -** -****************************************************************************** -** -** This file contains inline asm code for retrieving "high-performance" -** counters for x86 and x86_64 class CPUs. -*/ -#ifndef SQLITE_HWTIME_H -#define SQLITE_HWTIME_H - -/* -** The following routine only works on Pentium-class (or newer) processors. -** It uses the RDTSC opcode to read the cycle count value out of the -** processor and returns that value. This can be used for high-res -** profiling. -*/ -#if !defined(__STRICT_ANSI__) && \ - (defined(__GNUC__) || defined(_MSC_VER)) && \ - (defined(i386) || defined(__i386__) || defined(_M_IX86)) - - #if defined(__GNUC__) - - __inline__ sqlite_uint64 sqlite3Hwtime(void){ - unsigned int lo, hi; - __asm__ __volatile__ ("rdtsc" : "=a" (lo), "=d" (hi)); - return (sqlite_uint64)hi << 32 | lo; - } - - #elif defined(_MSC_VER) - - __declspec(naked) __inline sqlite_uint64 __cdecl sqlite3Hwtime(void){ - __asm { - rdtsc - ret ; return value at EDX:EAX - } - } - - #endif - -#elif !defined(__STRICT_ANSI__) && (defined(__GNUC__) && defined(__x86_64__)) - - __inline__ sqlite_uint64 sqlite3Hwtime(void){ - unsigned int lo, hi; - __asm__ __volatile__ ("rdtsc" : "=a" (lo), "=d" (hi)); - return (sqlite_uint64)hi << 32 | lo; - } - -#elif !defined(__STRICT_ANSI__) && (defined(__GNUC__) && defined(__ppc__)) - - __inline__ sqlite_uint64 sqlite3Hwtime(void){ - unsigned long long retval; - unsigned long junk; - __asm__ __volatile__ ("\n\ - 1: mftbu %1\n\ - mftb %L0\n\ - mftbu %0\n\ - cmpw %0,%1\n\ - bne 1b" - : "=r" (retval), "=r" (junk)); - return retval; - } - -#else - - /* - ** asm() is needed for hardware timing support. Without asm(), - ** disable the sqlite3Hwtime() routine. - ** - ** sqlite3Hwtime() is only used for some obscure debugging - ** and analysis configurations, not in any deliverable, so this - ** should not be a great loss. - */ -SQLITE_PRIVATE sqlite_uint64 sqlite3Hwtime(void){ return ((sqlite_uint64)0); } - -#endif - -#endif /* !defined(SQLITE_HWTIME_H) */ - -/************** End of hwtime.h **********************************************/ -/************** Continuing where we left off in util.c ***********************/ -#endif - /************** End of util.c ************************************************/ /************** Begin file hash.c ********************************************/ /* @@ -37870,23 +37815,23 @@ SQLITE_PRIVATE const char *sqlite3OpcodeName(int i){ /* 100 */ "SetCookie" OpHelp(""), /* 101 */ "ReopenIdx" OpHelp("root=P2 iDb=P3"), /* 102 */ "OpenRead" OpHelp("root=P2 iDb=P3"), - /* 103 */ "BitAnd" OpHelp("r[P3]=r[P1]&r[P2]"), - /* 104 */ "BitOr" OpHelp("r[P3]=r[P1]|r[P2]"), - /* 105 */ "ShiftLeft" OpHelp("r[P3]=r[P2]<>r[P1]"), - /* 107 */ "Add" OpHelp("r[P3]=r[P1]+r[P2]"), - /* 108 */ "Subtract" OpHelp("r[P3]=r[P2]-r[P1]"), - /* 109 */ "Multiply" OpHelp("r[P3]=r[P1]*r[P2]"), - /* 110 */ "Divide" OpHelp("r[P3]=r[P2]/r[P1]"), - /* 111 */ "Remainder" OpHelp("r[P3]=r[P2]%r[P1]"), - /* 112 */ "Concat" OpHelp("r[P3]=r[P2]+r[P1]"), - /* 113 */ "OpenWrite" OpHelp("root=P2 iDb=P3"), + /* 103 */ "OpenWrite" OpHelp("root=P2 iDb=P3"), + /* 104 */ "BitAnd" OpHelp("r[P3]=r[P1]&r[P2]"), + /* 105 */ "BitOr" OpHelp("r[P3]=r[P1]|r[P2]"), + /* 106 */ "ShiftLeft" OpHelp("r[P3]=r[P2]<>r[P1]"), + /* 108 */ "Add" OpHelp("r[P3]=r[P1]+r[P2]"), + /* 109 */ "Subtract" OpHelp("r[P3]=r[P2]-r[P1]"), + /* 110 */ "Multiply" OpHelp("r[P3]=r[P1]*r[P2]"), + /* 111 */ "Divide" OpHelp("r[P3]=r[P2]/r[P1]"), + /* 112 */ "Remainder" OpHelp("r[P3]=r[P2]%r[P1]"), + /* 113 */ "Concat" OpHelp("r[P3]=r[P2]+r[P1]"), /* 114 */ "OpenDup" OpHelp(""), - /* 115 */ "BitNot" OpHelp("r[P2]= ~r[P1]"), - /* 116 */ "OpenAutoindex" OpHelp("nColumn=P2"), + /* 115 */ "OpenAutoindex" OpHelp("nColumn=P2"), + /* 116 */ "BitNot" OpHelp("r[P2]= ~r[P1]"), /* 117 */ "OpenEphemeral" OpHelp("nColumn=P2"), - /* 118 */ "String8" OpHelp("r[P2]='P4'"), - /* 119 */ "SorterOpen" OpHelp(""), + /* 118 */ "SorterOpen" OpHelp(""), + /* 119 */ "String8" OpHelp("r[P2]='P4'"), /* 120 */ "SequenceTest" OpHelp("if( cursor[P1].ctr++ ) pc = P2"), /* 121 */ "OpenPseudo" OpHelp("P3 columns in r[P2]"), /* 122 */ "Close" OpHelp(""), @@ -37921,8 +37866,8 @@ SQLITE_PRIVATE const char *sqlite3OpcodeName(int i){ /* 151 */ "DropTable" OpHelp(""), /* 152 */ "DropIndex" OpHelp(""), /* 153 */ "DropTrigger" OpHelp(""), - /* 154 */ "Real" OpHelp("r[P2]=P4"), - /* 155 */ "IntegrityCk" OpHelp(""), + /* 154 */ "IntegrityCk" OpHelp(""), + /* 155 */ "Real" OpHelp("r[P2]=P4"), /* 156 */ "RowSetAdd" OpHelp("rowset(P1)=r[P2]"), /* 157 */ "Param" OpHelp(""), /* 158 */ "FkCounter" OpHelp("fkctr[P1]+=P2"), @@ -39171,7 +39116,7 @@ SQLITE_PRIVATE int sqlite3KvvfsInit(void){ # endif #else /* !SQLITE_WASI */ # ifndef HAVE_FCHMOD -# define HAVE_FCHMOD +# define HAVE_FCHMOD 1 # endif #endif /* SQLITE_WASI */ @@ -41441,54 +41386,33 @@ static int robust_flock(int fd, int op){ ** is set to SQLITE_OK unless an I/O error occurs during lock checking. */ static int flockCheckReservedLock(sqlite3_file *id, int *pResOut){ - int rc = SQLITE_OK; - int reserved = 0; +#ifdef SQLITE_DEBUG unixFile *pFile = (unixFile*)id; +#else + UNUSED_PARAMETER(id); +#endif SimulateIOError( return SQLITE_IOERR_CHECKRESERVEDLOCK; ); assert( pFile ); + assert( pFile->eFileLock<=SHARED_LOCK ); - /* Check if a thread in this process holds such a lock */ - if( pFile->eFileLock>SHARED_LOCK ){ - reserved = 1; - } - - /* Otherwise see if some other process holds it. */ - if( !reserved ){ - /* attempt to get the lock */ - int lrc = robust_flock(pFile->h, LOCK_EX | LOCK_NB); - if( !lrc ){ - /* got the lock, unlock it */ - lrc = robust_flock(pFile->h, LOCK_UN); - if ( lrc ) { - int tErrno = errno; - /* unlock failed with an error */ - lrc = SQLITE_IOERR_UNLOCK; - storeLastErrno(pFile, tErrno); - rc = lrc; - } - } else { - int tErrno = errno; - reserved = 1; - /* someone else might have it reserved */ - lrc = sqliteErrorFromPosixError(tErrno, SQLITE_IOERR_LOCK); - if( IS_LOCK_ERROR(lrc) ){ - storeLastErrno(pFile, tErrno); - rc = lrc; - } - } - } - OSTRACE(("TEST WR-LOCK %d %d %d (flock)\n", pFile->h, rc, reserved)); + /* The flock VFS only ever takes exclusive locks (see function flockLock). + ** Therefore, if this connection is holding any lock at all, no other + ** connection may be holding a RESERVED lock. So set *pResOut to 0 + ** in this case. + ** + ** Or, this connection may be holding no lock. In that case, set *pResOut to + ** 0 as well. The caller will then attempt to take an EXCLUSIVE lock on the + ** db in order to roll the hot journal back. If there is another connection + ** holding a lock, that attempt will fail and an SQLITE_BUSY returned to + ** the user. With other VFS, we try to avoid this, in order to allow a reader + ** to proceed while a writer is preparing its transaction. But that won't + ** work with the flock VFS - as it always takes EXCLUSIVE locks - so it is + ** not a problem in this case. */ + *pResOut = 0; -#ifdef SQLITE_IGNORE_FLOCK_LOCK_ERRORS - if( (rc & 0xff) == SQLITE_IOERR ){ - rc = SQLITE_OK; - reserved=1; - } -#endif /* SQLITE_IGNORE_FLOCK_LOCK_ERRORS */ - *pResOut = reserved; - return rc; + return SQLITE_OK; } /* @@ -42985,6 +42909,11 @@ static int unixFileControl(sqlite3_file *id, int op, void *pArg){ } #endif /* __linux__ && SQLITE_ENABLE_BATCH_ATOMIC_WRITE */ + case SQLITE_FCNTL_NULL_IO: { + osClose(pFile->h); + pFile->h = -1; + return SQLITE_OK; + } case SQLITE_FCNTL_LOCKSTATE: { *(int*)pArg = pFile->eFileLock; return SQLITE_OK; @@ -43089,6 +43018,7 @@ static int unixFileControl(sqlite3_file *id, int op, void *pArg){ } #endif /* SQLITE_ENABLE_LOCKING_STYLE && defined(__APPLE__) */ +#ifndef SQLITE_OMIT_WAL case SQLITE_FCNTL_EXTERNAL_READER: { #if !defined(SQLITE_WASI) && !defined(SQLITE_OMIT_WAL) return unixFcntlExternalReader((unixFile*)id, (int*)pArg); @@ -43097,6 +43027,7 @@ static int unixFileControl(sqlite3_file *id, int op, void *pArg){ return SQLITE_OK; #endif } +#endif } return SQLITE_NOTFOUND; } @@ -43129,6 +43060,7 @@ static void setDeviceCharacteristics(unixFile *pFd){ if( pFd->ctrlFlags & UNIXFILE_PSOW ){ pFd->deviceCharacteristics |= SQLITE_IOCAP_POWERSAFE_OVERWRITE; } + pFd->deviceCharacteristics |= SQLITE_IOCAP_SUBPAGE_READ; pFd->sectorSize = SQLITE_DEFAULT_SECTOR_SIZE; } @@ -43179,7 +43111,7 @@ static void setDeviceCharacteristics(unixFile *pFile){ pFile->sectorSize = fsInfo.f_bsize; pFile->deviceCharacteristics = /* full bitset of atomics from max sector size and smaller */ - ((pFile->sectorSize / 512 * SQLITE_IOCAP_ATOMIC512) << 1) - 2 | + (((pFile->sectorSize / 512 * SQLITE_IOCAP_ATOMIC512) << 1) - 2) | SQLITE_IOCAP_SEQUENTIAL | /* The ram filesystem has no write behind ** so it is ordered */ 0; @@ -43187,7 +43119,7 @@ static void setDeviceCharacteristics(unixFile *pFile){ pFile->sectorSize = fsInfo.f_bsize; pFile->deviceCharacteristics = /* full bitset of atomics from max sector size and smaller */ - ((pFile->sectorSize / 512 * SQLITE_IOCAP_ATOMIC512) << 1) - 2 | + (((pFile->sectorSize / 512 * SQLITE_IOCAP_ATOMIC512) << 1) - 2) | SQLITE_IOCAP_SEQUENTIAL | /* The ram filesystem has no write behind ** so it is ordered */ 0; @@ -51022,6 +50954,11 @@ static int winFileControl(sqlite3_file *id, int op, void *pArg){ return SQLITE_OK; } #endif + case SQLITE_FCNTL_NULL_IO: { + (void)osCloseHandle(pFile->h); + pFile->h = NULL; + return SQLITE_OK; + } case SQLITE_FCNTL_TEMPFILENAME: { char *zTFile = 0; int rc = winGetTempname(pFile->pVfs, &zTFile); @@ -51083,7 +51020,7 @@ static int winSectorSize(sqlite3_file *id){ */ static int winDeviceCharacteristics(sqlite3_file *id){ winFile *p = (winFile*)id; - return SQLITE_IOCAP_UNDELETABLE_WHEN_OPEN | + return SQLITE_IOCAP_UNDELETABLE_WHEN_OPEN | SQLITE_IOCAP_SUBPAGE_READ | ((p->ctrlFlags & WINFILE_PSOW)?SQLITE_IOCAP_POWERSAFE_OVERWRITE:0); } @@ -57045,13 +56982,7 @@ static void pcache1Unpin( assert( PAGE_IS_PINNED(pPage) ); if( reuseUnlikely || pGroup->nPurgeable>pGroup->nMaxPage ){ - /* If pcache1.separateCache is set, temporarily set the isBulkLocal flag - ** so that pcache1RemoveFromHash() moves the page buffer to the pFree - ** list instead of sqlite3_free()ing it. */ - u16 isBulkLocal = pPage->isBulkLocal; - pPage->isBulkLocal = (u16)pcache1.separateCache; pcache1RemoveFromHash(pPage, 1); - pPage->isBulkLocal = isBulkLocal; }else{ /* Add the page to the PGroup LRU list. */ PgHdr1 **ppFirst = &pGroup->lru.pLruNext; @@ -57953,14 +57884,17 @@ SQLITE_PRIVATE int sqlite3WalInfo(Wal *pWal, u32 *pnPrior, u32 *pnFrame); /* sqlite3_wal_info() data */ SQLITE_PRIVATE int sqlite3WalInfo(Wal *pWal, u32 *pnPrior, u32 *pnFrame); -SQLITE_PRIVATE void sqlite3WalSetCommitTime(Wal *pWal, u64 *aCommitTime); - #endif /* ifndef SQLITE_OMIT_WAL */ #endif /* SQLITE_WAL_H */ /************** End of wal.h *************************************************/ /************** Continuing where we left off in pager.c **********************/ +#ifdef SQLITE_ENABLE_HCT +# define IS_HCT(pPager) (pPager->pVfs==0) +#else +# define IS_HCT(pPager) 0 +#endif /******************* NOTES ON THE DESIGN OF THE PAGER ************************ ** @@ -58642,7 +58576,6 @@ struct Pager { Wal *pWal; /* Write-ahead log used by "journal_mode=wal" */ char *zWal; /* File name for write-ahead log */ #endif - u64 *aCommitTime; }; /* @@ -58737,20 +58670,28 @@ static const unsigned char aJournalMagic[] = { ** Return true if page pgno can be read directly from the database file ** by the b-tree layer. This is the case if: ** -** * the database file is open, -** * there are no dirty pages in the cache, and -** * the desired page is not currently in the wal file. +** (1) the database file is open +** (2) the VFS for the database is able to do unaligned sub-page reads +** (3) there are no dirty pages in the cache, and +** (4) the desired page is not currently in the wal file. */ SQLITE_PRIVATE int sqlite3PagerDirectReadOk(Pager *pPager, Pgno pgno){ - if( pPager->fd->pMethods==0 ) return 0; - if( sqlite3PCacheIsDirty(pPager->pPCache) ) return 0; + assert( pPager!=0 ); + assert( pPager->fd!=0 ); + if( pPager->fd->pMethods==0 ) return 0; /* Case (1) */ + if( sqlite3PCacheIsDirty(pPager->pPCache) ) return 0; /* Failed (3) */ #ifndef SQLITE_OMIT_WAL if( pPager->pWal ){ u32 iRead = 0; (void)sqlite3WalFindFrame(pPager->pWal, pgno, &iRead); - return iRead==0; + return iRead==0; /* Condition (4) */ } #endif + assert( pPager->fd->pMethods->xDeviceCharacteristics!=0 ); + if( (pPager->fd->pMethods->xDeviceCharacteristics(pPager->fd) + & SQLITE_IOCAP_SUBPAGE_READ)==0 ){ + return 0; /* Case (2) */ + } return 1; } #endif @@ -60990,7 +60931,6 @@ static int pager_playback(Pager *pPager, int isHot){ static int readDbPage(PgHdr *pPg){ Pager *pPager = pPg->pPager; /* Pager object associated with page pPg */ int rc = SQLITE_OK; /* Return code */ - u64 t1 = 0; #ifndef SQLITE_OMIT_WAL u32 iFrame = 0; /* Frame of WAL containing pgno */ @@ -61002,9 +60942,6 @@ static int readDbPage(PgHdr *pPg){ rc = sqlite3WalFindFrame(pPager->pWal, pPg->pgno, &iFrame); if( rc ) return rc; } - if( pPager->aCommitTime ){ - t1 = sqlite3STimeNow(); - } if( iFrame ){ rc = sqlite3WalReadFrame(pPager->pWal, iFrame,pPager->pageSize,pPg->pData); }else @@ -61016,10 +60953,6 @@ static int readDbPage(PgHdr *pPg){ rc = SQLITE_OK; } } - if( pPager->aCommitTime ){ - pPager->aCommitTime[COMMIT_TIME_RELOCATE2_READUS] += (sqlite3STimeNow() - t1); - pPager->aCommitTime[COMMIT_TIME_RELOCATE2_READCOUNT]++; - } if( pPg->pgno==1 ){ if( rc ){ @@ -61211,7 +61144,6 @@ static int pagerWalFrames( pPager->aStat[PAGER_STAT_WRITE] += nList; if( pList->pgno==1 ) pager_write_changecounter(pList); - sqlite3CommitTimeSet(pPager->aCommitTime, COMMIT_TIME_AFTER_CHANGECOUNTER); rc = sqlite3WalFrames(pPager->pWal, pPager->pageSize, pList, nTruncate, isCommit, pPager->walSyncFlags ); @@ -64452,6 +64384,7 @@ SQLITE_PRIVATE int sqlite3PagerSync(Pager *pPager, const char *zSuper){ */ SQLITE_PRIVATE int sqlite3PagerExclusiveLock(Pager *pPager, PgHdr *pPage1, u32 *aConflict){ int rc = pPager->errCode; + if( IS_HCT(pPager) ) return SQLITE_OK; assert( assert_pager_state(pPager) ); if( rc==SQLITE_OK ){ assert( pPager->eState==PAGER_WRITER_CACHEMOD @@ -64602,9 +64535,7 @@ SQLITE_PRIVATE int sqlite3PagerCommitPhaseOne( } assert( rc==SQLITE_OK ); if( ALWAYS(pList) ){ - sqlite3CommitTimeSet(pPager->aCommitTime, COMMIT_TIME_BEFORE_WALFRAMES); rc = pagerWalFrames(pPager, pList, pPager->dbSize, 1); - sqlite3CommitTimeSet(pPager->aCommitTime, COMMIT_TIME_AFTER_WALFRAMES); } sqlite3PagerUnref(pPageOne); if( rc==SQLITE_OK ){ @@ -64778,10 +64709,6 @@ SQLITE_PRIVATE int sqlite3PagerCommitPhaseOne( return rc; } -SQLITE_PRIVATE void sqlite3PagerSetCommitTime(Pager *pPager, u64 *aCommitTime){ - pPager->aCommitTime = aCommitTime; - sqlite3WalSetCommitTime(pPager->pWal, aCommitTime); -} /* ** When this function is called, the database file has been completely @@ -65206,6 +65133,10 @@ SQLITE_PRIVATE sqlite3_vfs *sqlite3PagerVfs(Pager *pPager){ ** not yet been opened. */ SQLITE_PRIVATE sqlite3_file *sqlite3PagerFile(Pager *pPager){ +#ifdef SQLITE_ENABLE_HCT + static sqlite3_file s = {0}; + if( pPager->pVfs==0 ) return &s; +#endif return pPager->fd; } @@ -65561,6 +65492,7 @@ SQLITE_PRIVATE int sqlite3PagerGetJournalMode(Pager *pPager){ ** is unmodified. */ SQLITE_PRIVATE int sqlite3PagerOkToChangeJournalMode(Pager *pPager){ + if( IS_HCT(pPager) ) return 0; assert( assert_pager_state(pPager) ); if( pPager->eState>=PAGER_WRITER_CACHEMOD ) return 0; if( NEVER(isOpen(pPager->jfd) && pPager->journalOff>0) ) return 0; @@ -65986,7 +65918,7 @@ SQLITE_PRIVATE int sqlite3PagerWalSystemErrno(Pager *pPager){ ** 28: Checksum-2 (second part of checksum for first 24 bytes of header). ** ** Immediately following the wal-header are zero or more frames. Each -** frame consists of a 24-byte frame-header followed by a bytes +** frame consists of a 24-byte frame-header followed by bytes ** of page data. The frame-header is six big-endian 32-bit unsigned ** integer values, as follows: ** @@ -66403,19 +66335,6 @@ SQLITE_PRIVATE int sqlite3WalTrace = 0; #define WAL_VERSION1 3007000 /* For "journal_mode=wal" */ #define WAL_VERSION2 3021000 /* For "journal_mode=wal2" */ -#define SQLITE_ENABLE_WAL2NOCKSUM 1 - -#ifdef SQLITE_ENABLE_WAL2NOCKSUM -# undef WAL_VERSION2 -# define WAL_VERSION2 3048000 /* For "journal_mode=wal2" sans checksums */ - -# define isNocksum(pWal) isWalMode2(pWal) -#else -# define isNocksum(pWal) 0 -#endif - - - /* ** Index numbers for various locking bytes. WAL_NREADER is the number @@ -66759,13 +66678,13 @@ struct Wal { #endif #ifdef SQLITE_ENABLE_SNAPSHOT WalIndexHdr *pSnapshot; /* Start transaction here if not NULL */ + int bGetSnapshot; /* Transaction opened for sqlite3_get_snapshot() */ #endif int bClosing; /* Set to true at start of sqlite3WalClose() */ int bWal2; /* bWal2 flag passed to WalOpen() */ #ifdef SQLITE_ENABLE_SETLK_TIMEOUT sqlite3 *db; #endif - u64 *aCommitTime; }; /* @@ -67020,14 +66939,7 @@ static int walIndexPage( ){ SEH_INJECT_FAULT; if( pWal->nWiData<=iPage || (*ppPage = pWal->apWiData[iPage])==0 ){ - int rc; - u64 t1; - if( pWal->aCommitTime ) t1 = sqlite3STimeNow(); - rc = walIndexPageRealloc(pWal, iPage, ppPage); - if( pWal->aCommitTime ){ - pWal->aCommitTime[COMMIT_TIME_HASHMAPUS] += sqlite3STimeNow() - t1; - } - return rc; + return walIndexPageRealloc(pWal, iPage, ppPage); } return SQLITE_OK; } @@ -67200,15 +67112,12 @@ static void walEncodeFrame( if( pWal->iReCksum==0 ){ memcpy(&aFrame[8], pWal->hdr.aSalt, 8); - if( isNocksum(pWal)==0 ){ - nativeCksum = (pWal->hdr.bigEndCksum==SQLITE_BIGENDIAN); - walChecksumBytes(nativeCksum, aFrame, 8, aCksum, aCksum); - walChecksumBytes(nativeCksum, aData, pWal->szPage, aCksum, aCksum); - } + nativeCksum = (pWal->hdr.bigEndCksum==SQLITE_BIGENDIAN); + walChecksumBytes(nativeCksum, aFrame, 8, aCksum, aCksum); + walChecksumBytes(nativeCksum, aData, pWal->szPage, aCksum, aCksum); sqlite3Put4byte(&aFrame[16], aCksum[0]); sqlite3Put4byte(&aFrame[20], aCksum[1]); - }else{ memset(&aFrame[8], 0, 16); } @@ -67250,16 +67159,14 @@ static int walDecodeFrame( ** and the frame-data matches the checksum in the last 8 ** bytes of this frame-header. */ - if( isNocksum(pWal)==0 ){ - nativeCksum = (pWal->hdr.bigEndCksum==SQLITE_BIGENDIAN); - walChecksumBytes(nativeCksum, aFrame, 8, aCksum, aCksum); - walChecksumBytes(nativeCksum, aData, pWal->szPage, aCksum, aCksum); - if( aCksum[0]!=sqlite3Get4byte(&aFrame[16]) - || aCksum[1]!=sqlite3Get4byte(&aFrame[20]) - ){ - /* Checksum failed. */ - return 0; - } + nativeCksum = (pWal->hdr.bigEndCksum==SQLITE_BIGENDIAN); + walChecksumBytes(nativeCksum, aFrame, 8, aCksum, aCksum); + walChecksumBytes(nativeCksum, aData, pWal->szPage, aCksum, aCksum); + if( aCksum[0]!=sqlite3Get4byte(&aFrame[16]) + || aCksum[1]!=sqlite3Get4byte(&aFrame[20]) + ){ + /* Checksum failed. */ + return 0; } /* If we reach this point, the frame is valid. Return the page number @@ -67573,29 +67480,6 @@ static void walCleanupHash(Wal *pWal){ #endif /* SQLITE_ENABLE_EXPENSIVE_ASSERT */ } -/* -** Zero the n byte block indicated by pointer p. n Must be a multiple of -** 8, and p must be aligned to an 8-byte boundary. -*/ -static void zero64(void *p, int n){ -#if defined(__x86_64__) - size_t c = n / sizeof(u64); - void *d = p; - - assert( (n & 0x7)==0 ); - assert( EIGHT_BYTE_ALIGNMENT(p) ); - - __asm__ volatile ( - "rep stosq" - : "+D" (d), "+c" (c) - : "a" (0) - : "memory" - ); -#else - memset(p, 0, n); -#endif -} - /* ** Set an entry in the wal-index that will map database page number ** pPage into WAL frame iFrame. @@ -67604,7 +67488,6 @@ static int walIndexAppend(Wal *pWal, int iWal, u32 iFrame, u32 iPage){ int rc; /* Return code */ WalHashLoc sLoc; /* Wal-index hash table location */ u32 iExternal; - u64 t; if( isWalMode2(pWal) ){ iExternal = walExternalEncode(iWal, iFrame); @@ -67613,11 +67496,7 @@ static int walIndexAppend(Wal *pWal, int iWal, u32 iFrame, u32 iPage){ iExternal = iFrame; } - if( pWal->aCommitTime ) t = sqlite3STimeNow(); rc = walHashGet(pWal, walFramePage(iExternal), &sLoc); - if( pWal->aCommitTime ){ - pWal->aCommitTime[COMMIT_TIME_WALINDEX_HASHGETUS] += sqlite3STimeNow()-t; - } /* Assuming the wal-index file was successfully mapped, populate the ** page number array and hash table entry. @@ -67633,14 +67512,10 @@ static int walIndexAppend(Wal *pWal, int iWal, u32 iFrame, u32 iPage){ /* If this is the first entry to be added to this hash-table, zero the ** entire hash table and aPgno[] array before proceeding. */ - if( pWal->aCommitTime ) t = sqlite3STimeNow(); - if( idx==1 && sLoc.aPgno[0]!=0 ){ - /* Special for BEDROCK branch: Zero only the aHash[] part. Not the - ** aPgno[] part of the page. */ - zero64((void*)sLoc.aHash, HASHTABLE_NSLOT * sizeof(sLoc.aHash[0])); - } - if( pWal->aCommitTime ){ - pWal->aCommitTime[COMMIT_TIME_WALINDEX_MEMSETUS]+=sqlite3STimeNow()-t; + if( idx==1 ){ + int nByte = (int)((u8*)&sLoc.aHash[HASHTABLE_NSLOT] - (u8*)sLoc.aPgno); + assert( nByte>=0 ); + memset((void*)sLoc.aPgno, 0, nByte); } /* If the entry in aPgno[] is already set, then the previous writer @@ -67648,42 +67523,21 @@ static int walIndexAppend(Wal *pWal, int iWal, u32 iFrame, u32 iPage){ ** writing one or more dirty pages to the WAL to free up memory). ** Remove the remnants of that writers uncommitted transaction from ** the hash-table before writing any new entries. - ** - ** Special for BEDROCK branch: On this branch we do not assume that - ** the aPgno[] part of each hash-table has been zeroed. Therefore, we - ** only need to clear out the remnants of an old writer's transaction if - ** the hash table matches the aPgno[] entry (as it would if a write - ** transaction was interrupted). And, because this makes the test more - ** expensive, we only do the check for the first frame written by each - ** transaction. */ - if( sLoc.aPgno[idx-1] && iFrame-1==walidxGetMxFrame(&pWal->hdr, iWal) ){ - if( pWal->aCommitTime ) t = sqlite3STimeNow(); - nCollide = idx; - for(iKey=walHash(iPage); sLoc.aHash[iKey]; iKey=walNextHash(iKey)){ - if( sLoc.aHash[iKey]==idx ){ - walCleanupHash(pWal); - } - if( (nCollide--)==0 ) return SQLITE_CORRUPT_BKPT; - } - if( pWal->aCommitTime ){ - pWal->aCommitTime[COMMIT_TIME_WALINDEX_CLEANUPUS]+=sqlite3STimeNow()-t; - } + */ + if( sLoc.aPgno[idx-1] ){ + walCleanupHash(pWal); + assert( !sLoc.aPgno[idx-1] ); } /* Write the aPgno[] array entry and the hash-table slot. */ - if( pWal->aCommitTime ) t = sqlite3STimeNow(); nCollide = idx; for(iKey=walHash(iPage); sLoc.aHash[iKey]; iKey=walNextHash(iKey)){ if( (nCollide--)==0 ) return SQLITE_CORRUPT_BKPT; } sLoc.aPgno[idx-1] = iPage; AtomicStore(&sLoc.aHash[iKey], (ht_slot)idx); - if( pWal->aCommitTime ){ - pWal->aCommitTime[COMMIT_TIME_WALINDEX_ENTRYUS]+=sqlite3STimeNow()-t; - } #ifdef SQLITE_ENABLE_EXPENSIVE_ASSERT - assert( this_should_not_be_enabled ); /* Verify that the number of entries in the hash table exactly equals ** the number of entries in the mapping region. */ @@ -67819,7 +67673,6 @@ static int walIndexRecoverOne(Wal *pWal, int iWal, u32 *pnCkpt, int *pbZero){ if( aShare==0 ) break; SEH_SET_ON_ERROR(iPg, aShare); pWal->apWiData[iPg] = aPrivate; - memset(aPrivate, 0, WALINDEX_PGSZ); if( iWal ){ assert( version==WAL_VERSION2 ); @@ -68948,39 +68801,6 @@ static int walCheckpoint( } } - if( bWal2 && rc==SQLITE_OK && eMode!=SQLITE_CHECKPOINT_PASSIVE ){ - /* In wal2 mode, a non-passive checkpoint waits for all readers of - ** the wal file just checkpointed to finish, then zeroes the hash - ** tables associated with that wal file. This is because in some - ** deployments, zeroing the hash tables as they are overwritten within - ** COMMIT commands is a significant performance hit. - ** - ** Currently, both of the "PART" locks are held for the wal file - ** being checkpointed. i.e. if iCkpt==0, then we already hold both - ** WAL_LOCK_PART1 and WAL_LOCK_PART1_FULL2. If we now also take an - ** exclusive lock on WAL_LOCK_PART2_FULL1, then it is guaranteed that - ** there are no remaining readers of the (iCkpt==0) wal file. Similar - ** logic, with different locks, is used for (iCkpt==1). - */ - int lockIdx = WAL_READ_LOCK( - iCkpt==0 ? WAL_LOCK_PART2_FULL1 : WAL_LOCK_PART1_FULL2 - ); - assert( iCkpt==0 || iCkpt==1 ); - rc = walBusyLock(pWal, xBusy, pBusyArg, lockIdx, 1); - if( rc==SQLITE_OK ){ - int iHash; - for(iHash = walFramePage2(iCkpt, mxSafeFrame); iHash>=0; iHash-=2){ - WalHashLoc sLoc; - int nByte; - memset(&sLoc, 0, sizeof(sLoc)); - walHashGet(pWal, iHash, &sLoc); - nByte = (int)((u8*)&sLoc.aHash[HASHTABLE_NSLOT] - (u8*)sLoc.aPgno); - memset((void*)sLoc.aPgno, 0, nByte); - } - walUnlockExclusive(pWal, lockIdx, 1); - } - } - if( rc==SQLITE_BUSY ){ /* Reset the return code so as not to report a checkpoint failure ** just because there are active readers. */ @@ -69101,7 +68921,7 @@ static int walHandleException(Wal *pWal){ /* ** Assert that the Wal.lockMask mask, which indicates the locks held -** by the connenction, is consistent with the Wal.readLock, Wal.writeLock +** by the connection, is consistent with the Wal.readLock, Wal.writeLock ** and Wal.ckptLock variables. To be used as: ** ** assert( walAssertLockmask(pWal) ); @@ -69820,7 +69640,7 @@ static int walTryBeginRead(Wal *pWal, int *pChanged, int useWal, int *pCnt){ u32 mxFrame; /* Wal frame to lock to */ if( !useWal && pInfo->nBackfill==pWal->hdr.mxFrame #ifdef SQLITE_ENABLE_SNAPSHOT - && (pWal->pSnapshot==0 || pWal->hdr.mxFrame==0) + && ((pWal->bGetSnapshot==0 && pWal->pSnapshot==0) || pWal->hdr.mxFrame==0) #endif ){ /* The WAL has been completely backfilled (or it is empty). @@ -71102,7 +70922,6 @@ static int walWriteToLog( sqlite3_int64 iOffset /* Start writing at this offset */ ){ int rc; - u64 t; if( iOffsetiSyncPoint && iOffset+iAmt>=p->iSyncPoint ){ int iFirstAmt = (int)(p->iSyncPoint - iOffset); rc = sqlite3OsWrite(p->pFd, pContent, iFirstAmt, iOffset); @@ -71114,13 +70933,7 @@ static int walWriteToLog( rc = sqlite3OsSync(p->pFd, WAL_SYNC_FLAGS(p->syncFlags)); if( iAmt==0 || rc ) return rc; } - if( p->pWal->aCommitTime ){ - t = sqlite3STimeNow(); - } rc = sqlite3OsWrite(p->pFd, pContent, iAmt, iOffset); - if( p->pWal->aCommitTime ){ - p->pWal->aCommitTime[COMMIT_TIME_OSWRITE] += (sqlite3STimeNow() - t); - } return rc; } @@ -71150,21 +70963,10 @@ static int walWriteOneFrame( pData = pPage->pData; walEncodeFrame(p->pWal, pPage->pgno, nTruncate, pData, aFrame); - - if( isNocksum(p->pWal)==0 ){ - /* Write the header in normal mode */ - rc = walWriteToLog(p, aFrame, sizeof(aFrame), iOffset); - if( rc ) return rc; - } - + rc = walWriteToLog(p, aFrame, sizeof(aFrame), iOffset); + if( rc ) return rc; /* Write the page data */ rc = walWriteToLog(p, pData, p->szPage, iOffset+sizeof(aFrame)); - - if( isNocksum(p->pWal) ){ - /* Write the header in no-checksum mode */ - if( rc ) return rc; - rc = walWriteToLog(p, aFrame, sizeof(aFrame), iOffset); - } return rc; } @@ -71246,9 +71048,6 @@ static int walFrames( WalIndexHdr *pLive; /* Pointer to shared header */ int iApp; int bWal2 = isWalMode2(pWal); - int nFrame = 0; - - int logFlags = 0; assert( pList ); assert( pWal->writeLock ); @@ -71270,8 +71069,6 @@ static int walFrames( return rc; } - sqlite3CommitTimeSet(pWal->aCommitTime, COMMIT_TIME_AFTER_RESTARTLOG); - /* If this is the first frame written into the log, write the WAL ** header to the start of the WAL file. See comments at the top of ** this source file for a description of the WAL header format. @@ -71287,7 +71084,6 @@ static int walFrames( } #endif - logFlags |= (iFrame==0 ? 0x01 : 0x00); if( iFrame==0 ){ u32 iCkpt = 0; u8 aWalHdr[WAL_HDRSIZE]; /* Buffer to assemble wal-header in */ @@ -71341,7 +71137,6 @@ static int walFrames( if( (int)pWal->szPage!=szPage ){ return SQLITE_CORRUPT_BKPT; /* TH3 test case: cov1/corrupt155.test */ } - sqlite3CommitTimeSet(pWal->aCommitTime, COMMIT_TIME_AFTER_WRITEHDR); /* Setup information needed to write frames into the WAL */ w.pWal = pWal; @@ -71353,7 +71148,6 @@ static int walFrames( szFrame = szPage + WAL_FRAME_HDRSIZE; /* Write all frames into the log file exactly once */ - logFlags |= (iFirst==0 ? 0x00 : 0x02); for(p=pList; p; p=p->pDirty){ int nDbSize; /* 0 normally. Positive == commit flag */ @@ -71392,10 +71186,8 @@ static int walFrames( p->flags |= PGHDR_WAL_APPEND; } - sqlite3CommitTimeSet(pWal->aCommitTime, COMMIT_TIME_AFTER_WRITEFRAMES); /* Recalculate checksums within the wal file if required. */ - logFlags |= (pWal->iReCksum==0 ? 0x00 : 0x04); if( isCommit && pWal->iReCksum ){ rc = walRewriteChecksums(pWal, iFrame); if( rc ) return rc; @@ -71415,7 +71207,6 @@ static int walFrames( ** sector boundary is synced; the part of the last frame that extends ** past the sector boundary is written after the sync. */ - logFlags |= (WAL_SYNC_FLAGS(sync_flags)==0 ? 0x00 : 0x08); if( isCommit && WAL_SYNC_FLAGS(sync_flags)!=0 ){ int bSync = 1; if( pWal->padToSectorBoundary ){ @@ -71450,8 +71241,6 @@ static int walFrames( pWal->truncateOnCommit = 0; } - sqlite3CommitTimeSet(pWal->aCommitTime, COMMIT_TIME_BEFORE_WALINDEX); - /* Append data to the wal-index. It is not necessary to lock the ** wal-index to do this as the SQLITE_SHM_WRITE lock held on the wal-index ** guarantees that there are no other writers, and no data that may @@ -71462,7 +71251,6 @@ static int walFrames( if( (p->flags & PGHDR_WAL_APPEND)==0 ) continue; iFrame++; rc = walIndexAppend(pWal, iApp, iFrame, p->pgno); - nFrame++; } assert( pLast!=0 || nExtra==0 ); while( rc==SQLITE_OK && nExtra>0 ){ @@ -71470,9 +71258,6 @@ static int walFrames( nExtra--; rc = walIndexAppend(pWal, iApp, iFrame, pLast->pgno); } - if( pWal->aCommitTime ) pWal->aCommitTime[COMMIT_TIME_NFRAME] = nFrame; - - sqlite3CommitTimeSet(pWal->aCommitTime, COMMIT_TIME_AFTER_WALINDEX); if( rc==SQLITE_OK ){ /* Update the private copy of the header. */ @@ -71501,11 +71286,6 @@ static int walFrames( } } - sqlite3CommitTimeSet(pWal->aCommitTime, COMMIT_TIME_AFTER_WALINDEXHDR); - if( pWal->aCommitTime ){ - pWal->aCommitTime[COMMIT_TIME_WALFRAMESFLAGS] = logFlags; - } - WALTRACE(("WAL%p: frame write %s\n", pWal, rc ? "failed" : "ok")); return rc; } @@ -71598,7 +71378,7 @@ SQLITE_PRIVATE int sqlite3WalCheckpoint( ** writer lock retried until either the busy-handler returns 0 or the ** lock is successfully obtained. */ - if( eMode!=SQLITE_CHECKPOINT_PASSIVE && isWalMode2(pWal)==0 ){ + if( eMode!=SQLITE_CHECKPOINT_PASSIVE ){ rc = walBusyLock(pWal, xBusy2, pBusyArg, WAL_WRITE_LOCK, 1); if( rc==SQLITE_OK ){ pWal->writeLock = 1; @@ -71814,7 +71594,20 @@ SQLITE_PRIVATE void sqlite3WalSnapshotOpen( Wal *pWal, sqlite3_snapshot *pSnapshot ){ - pWal->pSnapshot = (WalIndexHdr*)pSnapshot; + if( pSnapshot && ((WalIndexHdr*)pSnapshot)->iVersion==0 ){ + /* iVersion==0 means that this is a call to sqlite3_snapshot_get(). In + ** this case set the bGetSnapshot flag so that if the call to + ** sqlite3_snapshot_get() is about to read transaction on this wal + ** file, it does not take read-lock 0 if the wal file has been completely + ** checkpointed. Taking read-lock 0 would work, but then it would be + ** possible for a subsequent writer to destroy the snapshot even while + ** this connection is holding its read-transaction open. This is contrary + ** to user expectations, so we avoid it by not taking read-lock 0. */ + pWal->bGetSnapshot = 1; + }else{ + pWal->pSnapshot = (WalIndexHdr*)pSnapshot; + pWal->bGetSnapshot = 0; + } } /* @@ -71917,12 +71710,6 @@ SQLITE_PRIVATE int sqlite3WalJournalMode(Wal *pWal){ return (isWalMode2(pWal) ? PAGER_JOURNALMODE_WAL2 : PAGER_JOURNALMODE_WAL); } -SQLITE_PRIVATE void sqlite3WalSetCommitTime(Wal *pWal, u64 *aCommitTime){ - if( pWal ){ - pWal->aCommitTime = aCommitTime; - } -} - #endif /* #ifndef SQLITE_OMIT_WAL */ /************** End of wal.c *************************************************/ @@ -72295,6 +72082,7 @@ struct BtLock { ** they often do so without holding sqlite3.mutex. */ struct Btree { + const BtreeMethods *pMethods; sqlite3 *db; /* The database connection holding this btree */ BtShared *pBt; /* Sharable content of this btree */ u8 inTrans; /* TRANS_NONE, TRANS_READ or TRANS_WRITE */ @@ -72418,8 +72206,6 @@ struct BtShared { BtreePtrmap *pMap; #endif int nPreformatSize; /* Size of last cell written by TransferRow() */ - - u64 *aCommitTime; }; /* @@ -72486,6 +72272,7 @@ struct CellInfo { ** FAULT skipNext holds the cursor fault error code. */ struct BtCursor { + const BtCursorMethods *pMethods; u8 eState; /* One of the CURSOR_XXX constants (see below) */ u8 curFlags; /* zero or more BTCF_* flags defined below */ u8 curPagerFlags; /* Flags to send to sqlite3PagerGet() */ @@ -73015,6 +72802,93 @@ SQLITE_PRIVATE void sqlite3BtreeLeaveCursor(BtCursor *pCur){ ** Including a description of file format and an overview of operation. */ /* #include "btreeInt.h" */ +/************** Include btreeDefine.h in the middle of btree.c ***************/ +/************** Begin file btreeDefine.h *************************************/ +#define sqlite3BtreeNext sqlite3StockBtreeNext +#define sqlite3BtreeCursorHasMoved sqlite3StockBtreeCursorHasMoved +#define sqlite3BtreeClearCursor sqlite3StockBtreeClearCursor +#define sqlite3BtreeCursorRestore sqlite3StockBtreeCursorRestore +#define sqlite3BtreeCursorHintFlags sqlite3StockBtreeCursorHintFlags +#define sqlite3BtreeCloseCursor sqlite3StockBtreeCloseCursor +#define sqlite3BtreeCursorIsValid sqlite3StockBtreeCursorIsValid +#define sqlite3BtreeCursorIsValidNN sqlite3StockBtreeCursorIsValidNN +#define sqlite3BtreeIntegerKey sqlite3StockBtreeIntegerKey +#define sqlite3BtreeCursorPin sqlite3StockBtreeCursorPin +#define sqlite3BtreeCursorUnpin sqlite3StockBtreeCursorUnpin +#define sqlite3BtreePayloadSize sqlite3StockBtreePayloadSize +#define sqlite3BtreeMaxRecordSize sqlite3StockBtreeMaxRecordSize +#define sqlite3BtreePayload sqlite3StockBtreePayload +#define sqlite3BtreePayloadChecked sqlite3StockBtreePayloadChecked +#define sqlite3BtreePayloadFetch sqlite3StockBtreePayloadFetch +#define sqlite3BtreeFirst sqlite3StockBtreeFirst +#define sqlite3BtreeLast sqlite3StockBtreeLast +#define sqlite3BtreeTableMoveto sqlite3StockBtreeTableMoveto +#define sqlite3BtreeIndexMoveto sqlite3StockBtreeIndexMoveto +#define sqlite3BtreeCursorDir sqlite3StockBtreeCursorDir +#define sqlite3BtreeEof sqlite3StockBtreeEof +#define sqlite3BtreeRowCountEst sqlite3StockBtreeRowCountEst +#define sqlite3BtreePrevious sqlite3StockBtreePrevious +#define sqlite3BtreeInsert sqlite3StockBtreeInsert +#define sqlite3BtreeDelete sqlite3StockBtreeDelete +#define sqlite3BtreeIdxDelete sqlite3StockBtreeIdxDelete +#define sqlite3BtreePutData sqlite3StockBtreePutData +#define sqlite3BtreeIncrblobCursor sqlite3StockBtreeIncrblobCursor +#define sqlite3BtreeCursorHasHint sqlite3StockBtreeCursorHasHint +#define sqlite3BtreeTransferRow sqlite3StockBtreeTransferRow +#define sqlite3BtreeClearTableOfCursor sqlite3StockBtreeClearTableOfCursor +#define sqlite3BtreeCount sqlite3StockBtreeCount +#define sqlite3BtreeCursor sqlite3StockBtreeCursor +#define sqlite3BtreeSeekCount sqlite3StockBtreeSeekCount +#define sqlite3BtreeLastPage sqlite3StockBtreeLastPage +#define sqlite3BtreeClose sqlite3StockBtreeClose +#define sqlite3BtreeSetCacheSize sqlite3StockBtreeSetCacheSize +#define sqlite3BtreeSetSpillSize sqlite3StockBtreeSetSpillSize +#define sqlite3BtreeSetMmapLimit sqlite3StockBtreeSetMmapLimit +#define sqlite3BtreeSetPagerFlags sqlite3StockBtreeSetPagerFlags +#define sqlite3BtreeSetPageSize sqlite3StockBtreeSetPageSize +#define sqlite3BtreeGetPageSize sqlite3StockBtreeGetPageSize +#define sqlite3BtreeGetReserveNoMutex sqlite3StockBtreeGetReserveNoMutex +#define sqlite3BtreeGetRequestedReserve sqlite3StockBtreeGetRequestedReserve +#define sqlite3BtreeMaxPageCount sqlite3StockBtreeMaxPageCount +#define sqlite3BtreeSecureDelete sqlite3StockBtreeSecureDelete +#define sqlite3BtreeSetAutoVacuum sqlite3StockBtreeSetAutoVacuum +#define sqlite3BtreeGetAutoVacuum sqlite3StockBtreeGetAutoVacuum +#define sqlite3BtreeNewDb sqlite3StockBtreeNewDb +#define sqlite3BtreeBeginTrans sqlite3StockBtreeBeginTrans +#define sqlite3BtreeIncrVacuum sqlite3StockBtreeIncrVacuum +#define sqlite3BtreeCommitPhaseOne sqlite3StockBtreeCommitPhaseOne +#define sqlite3BtreeCommitPhaseTwo sqlite3StockBtreeCommitPhaseTwo +#define sqlite3BtreeCommit sqlite3StockBtreeCommit +#define sqlite3BtreeTripAllCursors sqlite3StockBtreeTripAllCursors +#define sqlite3BtreeRollback sqlite3StockBtreeRollback +#define sqlite3BtreeBeginStmt sqlite3StockBtreeBeginStmt +#define sqlite3BtreeSavepoint sqlite3StockBtreeSavepoint +#define sqlite3BtreeCreateTable sqlite3StockBtreeCreateTable +#define sqlite3BtreeClearTable sqlite3StockBtreeClearTable +#define sqlite3BtreeDropTable sqlite3StockBtreeDropTable +#define sqlite3BtreeGetMeta sqlite3StockBtreeGetMeta +#define sqlite3BtreeUpdateMeta sqlite3StockBtreeUpdateMeta +#define sqlite3BtreePragma sqlite3StockBtreePragma +#define sqlite3BtreePager sqlite3StockBtreePager +#define sqlite3BtreeGetFilename sqlite3StockBtreeGetFilename +#define sqlite3BtreeGetJournalname sqlite3StockBtreeGetJournalname +#define sqlite3BtreeTxnState sqlite3StockBtreeTxnState +#define sqlite3BtreeIsInBackup sqlite3StockBtreeIsInBackup +#define sqlite3BtreeSchema sqlite3StockBtreeSchema +#define sqlite3BtreeSchemaLocked sqlite3StockBtreeSchemaLocked +#define sqlite3BtreeIsReadonly sqlite3StockBtreeIsReadonly +#define sqlite3BtreeSetVersion sqlite3StockBtreeSetVersion +#define sqlite3BtreeIntegrityCheck sqlite3StockBtreeIntegrityCheck +#define sqlite3BtreeCheckpoint sqlite3StockBtreeCheckpoint +#define sqlite3BtreeExclusiveLock sqlite3StockBtreeExclusiveLock +#define sqlite3BtreeFakeValidCursor sqlite3StockBtreeFakeValidCursor +#define sqlite3BtreeCursorSize sqlite3StockBtreeCursorSize +#define sqlite3BtreeCursorZero sqlite3StockBtreeCursorZero +#define sqlite3BtreeOpen sqlite3StockBtreeOpen + + +/************** End of btreeDefine.h *****************************************/ +/************** Continuing where we left off in btree.c **********************/ /* ** The header string that appears at the beginning of every @@ -74171,11 +74045,16 @@ static int btreeRestoreCursorPosition(BtCursor *pCur){ ** back to where it ought to be if this routine returns true. */ SQLITE_PRIVATE int sqlite3BtreeCursorHasMoved(BtCursor *pCur){ +#ifdef SQLITE_ENABLE_HCT + assert( EIGHT_BYTE_ALIGNMENT(pCur) ); + return (CURSOR_VALID!=pCur->eState); +#else assert( EIGHT_BYTE_ALIGNMENT(pCur) || pCur==sqlite3BtreeFakeValidCursor() ); assert( offsetof(BtCursor, eState)==0 ); assert( sizeof(pCur->eState)==1 ); return CURSOR_VALID != *(u8*)pCur; +#endif } /* @@ -76229,9 +76108,7 @@ SQLITE_PRIVATE int sqlite3BtreeSetCacheSize(Btree *p, int mxPage){ BtShared *pBt = p->pBt; assert( sqlite3_mutex_held(p->db->mutex) ); sqlite3BtreeEnter(p); - sqlite3PrepareTimeSet(p->db->aPrepareTime, PREPARE_TIME_BEGINSETCACHESIZE); sqlite3PagerSetCachesize(pBt->pPager, mxPage); - sqlite3PrepareTimeSet(p->db->aPrepareTime, PREPARE_TIME_ENDSETCACHESIZE); sqlite3BtreeLeave(p); return SQLITE_OK; } @@ -77558,60 +77435,27 @@ static int btreeRelocateRange( if( pEntry->eType==PTRMAP_FREEPAGE ){ Pgno dummy; - u64 t1; - if( pBt->aCommitTime ) t1 = sqlite3STimeNow(); rc = allocateBtreePage(pBt, &pFree, &dummy, iPg, BTALLOC_EXACT); - if( pBt->aCommitTime ){ - pBt->aCommitTime[COMMIT_TIME_RELOCATE2_EXACTUS] += (sqlite3STimeNow() - t1); - } if( pFree ){ assert( sqlite3PagerPageRefcount(pFree->pDbPage)==1 ); sqlite3PcacheDrop(pFree->pDbPage); } assert( rc!=SQLITE_OK || dummy==iPg ); }else if( pnCurrent ){ - u64 t1; btreeGetPage(pBt, iPg, &pPg, 0); assert( sqlite3PagerIswriteable(pPg->pDbPage) ); assert( sqlite3PagerPageRefcount(pPg->pDbPage)==1 ); iNew = ++(*pnCurrent); if( iNew==PENDING_BYTE_PAGE(pBt) ) iNew = ++(*pnCurrent); - if( pBt->aCommitTime ) t1 = sqlite3STimeNow(); rc = relocatePage(pBt, pPg, pEntry->eType, pEntry->parent, iNew, 1); - if( pBt->aCommitTime ){ - pBt->aCommitTime[COMMIT_TIME_RELOCATE2_RELOCATEUS] += (sqlite3STimeNow() - t1); - } releasePageNotNull(pPg); - }else if( pEntry->eType!=0 ){ - u64 t1; - if( pBt->aCommitTime ) t1 = sqlite3STimeNow(); - - /* Allocate a new page from the free-list to move page iPg to. - ** Except - if the page allocated is within the range being relocated - ** (i.e. pgno>=iFirst), then discard it and allocate another. */ - do { - rc = allocateBtreePage(pBt, &pFree, &iNew, 0, 0); - if( iNew>=iFirst ){ - assert( sqlite3PagerPageRefcount(pFree->pDbPage)==1 ); - assert( iNew>iPg ); - sqlite3PcacheDrop(pFree->pDbPage); - pMap->aPtr[iNew - pMap->iFirst].eType = 0; - pFree = 0; - } - }while( pFree==0 ); - - if( pBt->aCommitTime ){ - pBt->aCommitTime[COMMIT_TIME_RELOCATE2_ALLOCATEUS] += (sqlite3STimeNow() - t1); - } + }else{ + rc = allocateBtreePage(pBt, &pFree, &iNew, iFirst-1, BTALLOC_LE); assert( rc!=SQLITE_OK || iNewaCommitTime ) t1 = sqlite3STimeNow(); rc = relocatePage(pBt, pPg, pEntry->eType, pEntry->parent,iNew,1); - if( pBt->aCommitTime ){ - pBt->aCommitTime[COMMIT_TIME_RELOCATE2_RELOCATEUS] += (sqlite3STimeNow() - t1); - } releasePage(pPg); } } @@ -77641,8 +77485,6 @@ static int btreeFixUnlocked(Btree *p){ Pgno nPage = btreePagecount(pBt); u32 nFree = get4byte(&p1[36]); - sqlite3CommitTimeSet(p->pBt->aCommitTime, COMMIT_TIME_START_FIXUNLOCKED); - assert( pBt->pMap ); rc = sqlite3PagerUpgradeSnapshot(pPager, pPage1->pDbPage); assert( p1==pPage1->aData ); @@ -77689,7 +77531,6 @@ static int btreeFixUnlocked(Btree *p){ nCurrent = MAX(nPage, nHPage); pBt->nPage = nCurrent; - sqlite3CommitTimeSet(p->pBt->aCommitTime, COMMIT_TIME_START_RELOCATE1); rc = btreeRelocateRange(pBt, pMap->iFirst, iLast, &nCurrent); /* There are now no collisions with the snapshot at the head of the @@ -77706,17 +77547,7 @@ static int btreeFixUnlocked(Btree *p){ nFin--; } nFin = MAX(nFin, nHPage); - if( p->pBt->aCommitTime ){ - p->pBt->aCommitTime[COMMIT_TIME_OTHERWRITERS] = (1+nHPage-pMap->iFirst); - p->pBt->aCommitTime[COMMIT_TIME_RELOCATE1COUNT] = (1+iLast-pMap->iFirst); - p->pBt->aCommitTime[COMMIT_TIME_RELOCATE2COUNT] = (nCurrent - nFin); - } - sqlite3CommitTimeSet( - p->pBt->aCommitTime, COMMIT_TIME_START_RELOCATE2 - ); - sqlite3PagerSetCommitTime(pBt->pPager, pBt->aCommitTime); rc = btreeRelocateRange(pBt, nFin+1, nCurrent, 0); - sqlite3PagerSetCommitTime(pBt->pPager, 0); } put4byte(&p1[28], nFin); @@ -77781,12 +77612,9 @@ SQLITE_PRIVATE int sqlite3BtreeCommitPhaseOne(Btree *p, const char *zSuperJrnl){ #endif if( rc==SQLITE_OK && ISCONCURRENT && p->db->eConcurrent==CONCURRENT_OPEN ){ rc = btreeFixUnlocked(p); - sqlite3CommitTimeSet(p->pBt->aCommitTime, COMMIT_TIME_AFTER_FIXUNLOCKED); } if( rc==SQLITE_OK ){ - sqlite3PagerSetCommitTime(pBt->pPager, p->pBt->aCommitTime); rc = sqlite3PagerCommitPhaseOne(pBt->pPager, zSuperJrnl, 0); - sqlite3PagerSetCommitTime(pBt->pPager, 0); } #ifndef SQLITE_OMIT_CONCURRENT if( rc==SQLITE_OK ){ @@ -77891,9 +77719,7 @@ SQLITE_PRIVATE int sqlite3BtreeCommitPhaseTwo(Btree *p, int bCleanup){ BtShared *pBt = p->pBt; assert( pBt->inTransaction==TRANS_WRITE ); assert( pBt->nTransaction>0 ); - sqlite3PagerSetCommitTime(pBt->pPager, p->pBt->aCommitTime); rc = sqlite3PagerCommitPhaseTwo(pBt->pPager); - sqlite3PagerSetCommitTime(pBt->pPager, 0); if( rc!=SQLITE_OK && bCleanup==0 ){ sqlite3BtreeLeave(p); return rc; @@ -78276,6 +78102,25 @@ SQLITE_PRIVATE int sqlite3BtreeCursorSize(void){ return ROUND8(sizeof(BtCursor)); } +#ifdef SQLITE_DEBUG +/* +** Return true if and only if the Btree object will be automatically +** closed with the BtCursor closes. This is used within assert() statements +** only. +*/ +SQLITE_PRIVATE int sqlite3BtreeClosesWithCursor( + Btree *pBtree, /* the btree object */ + BtCursor *pCur /* Corresponding cursor */ +){ + BtShared *pBt = pBtree->pBt; + if( (pBt->openFlags & BTREE_SINGLE)==0 ) return 0; + if( pBt->pCursor!=pCur ) return 0; + if( pCur->pNext!=0 ) return 0; + if( pCur->pBtree!=pBtree ) return 0; + return 1; +} +#endif + /* ** Initialize memory that will be converted into a BtCursor object. ** @@ -85169,6 +85014,97 @@ SQLITE_API int sqlite3_commit_status( return rc; } +/************** Include btreeUndef.h in the middle of btree.c ****************/ +/************** Begin file btreeUndef.h **************************************/ +#undef sqlite3BtreeNext +#undef sqlite3BtreeCursorHasMoved +#undef sqlite3BtreeClearCursor +#undef sqlite3BtreeCursorRestore +#undef sqlite3BtreeCursorHintFlags +#undef sqlite3BtreeCloseCursor +#undef sqlite3BtreeCursorIsValid +#undef sqlite3BtreeCursorIsValidNN +#undef sqlite3BtreeIntegerKey +#undef sqlite3BtreeCursorPin +#undef sqlite3BtreeCursorUnpin +#undef sqlite3BtreePayloadSize +#undef sqlite3BtreeMaxRecordSize +#undef sqlite3BtreePayload +#undef sqlite3BtreePayloadChecked +#undef sqlite3BtreePayloadFetch +#undef sqlite3BtreeFirst +#undef sqlite3BtreeLast +#undef sqlite3BtreeTableMoveto +#undef sqlite3BtreeIndexMoveto +#undef sqlite3BtreeCursorDir +#undef sqlite3BtreeEof +#undef sqlite3BtreeRowCountEst +#undef sqlite3BtreePrevious +#undef sqlite3BtreeInsert +#undef sqlite3BtreeDelete +#undef sqlite3BtreeIdxDelete +#undef sqlite3BtreePutData +#undef sqlite3BtreeIncrblobCursor +#undef sqlite3BtreeCursorHasHint +#undef sqlite3BtreeTransferRow +#undef sqlite3BtreeClearTableOfCursor +#undef sqlite3BtreeCount +#undef sqlite3BtreeCursor +#undef sqlite3BtreeSeekCount +#undef sqlite3BtreeLastPage +#undef sqlite3BtreeClose +#undef sqlite3BtreeSetCacheSize +#undef sqlite3BtreeSetSpillSize +#undef sqlite3BtreeSetMmapLimit +#undef sqlite3BtreeSetPagerFlags +#undef sqlite3BtreeSetPageSize +#undef sqlite3BtreeGetPageSize +#undef sqlite3BtreeGetReserveNoMutex +#undef sqlite3BtreeGetRequestedReserve +#undef sqlite3BtreeMaxPageCount +#undef sqlite3BtreeSecureDelete +#undef sqlite3BtreeSetAutoVacuum +#undef sqlite3BtreeGetAutoVacuum +#undef sqlite3BtreeNewDb +#undef sqlite3BtreeBeginTrans +#undef sqlite3BtreeIncrVacuum +#undef sqlite3BtreeCommitPhaseOne +#undef sqlite3BtreeCommitPhaseTwo +#undef sqlite3BtreeCommit +#undef sqlite3BtreeTripAllCursors +#undef sqlite3BtreeRollback +#undef sqlite3BtreeBeginStmt +#undef sqlite3BtreeSavepoint +#undef sqlite3BtreeCreateTable +#undef sqlite3BtreeClearTable +#undef sqlite3BtreeDropTable +#undef sqlite3BtreeGetMeta +#undef sqlite3BtreeUpdateMeta +#undef sqlite3BtreePragma +#undef sqlite3BtreePager +#undef sqlite3BtreeGetFilename +#undef sqlite3BtreeGetJournalname +#undef sqlite3BtreeTxnState +#undef sqlite3BtreeIsInBackup +#undef sqlite3BtreeSchema +#undef sqlite3BtreeSchemaLocked +#undef sqlite3BtreeIsReadonly +#undef sqlite3BtreeSetVersion +#undef sqlite3BtreeIntegrityCheck +#undef sqlite3BtreeCheckpoint +#undef sqlite3BtreeExclusiveLock +#undef sqlite3BtreeFakeValidCursor +#undef sqlite3BtreeCursorSize +#undef sqlite3BtreeCursorZero +#undef sqlite3BtreeOpen +#ifndef SQLITE_DEBUG +# define sqlite3BtreeSeekCount(X) 0 +#endif + + +/************** End of btreeUndef.h ******************************************/ +/************** Continuing where we left off in btree.c **********************/ + /************** End of btree.c ***********************************************/ /************** Begin file backup.c ******************************************/ /* @@ -85940,6 +85876,681 @@ SQLITE_PRIVATE int sqlite3BtreeCopyFile(Btree *pTo, Btree *pFrom){ #endif /* SQLITE_OMIT_VACUUM */ /************** End of backup.c **********************************************/ +/************** Begin file btwrapper.c ***************************************/ +/* +** 2022 November 10 +** +** The author disclaims copyright to this source code. In place of +** a legal notice, here is a blessing: +** +** May you do good and not evil. +** May you find forgiveness for yourself and forgive others. +** May you share freely, never taking more than you give. +** +************************************************************************* +** +*/ + +/* #include "sqliteInt.h" */ + +#ifndef SQLITE_AMALGAMATION +struct BtCursor { + const BtCursorMethods *pMethods; +}; + +struct Btree { + const BtreeMethods *pMethods; +}; +#endif + +SQLITE_PRIVATE int sqlite3HctBtreeCursorSize(void); +SQLITE_PRIVATE int sqlite3HctBtreeOpen(sqlite3_vfs*, const char*, sqlite3*, Btree**, int, int); +SQLITE_PRIVATE int sqlite3HctBtreeSchemaLoaded(Btree*); + +SQLITE_PRIVATE int sqlite3StockBtreeCursorSize(void); +SQLITE_PRIVATE int sqlite3StockBtreeOpen(sqlite3_vfs*, const char*, sqlite3*, Btree**,int,int); + +SQLITE_PRIVATE int sqlite3StockBtreePragma(Btree *p, char **a){ + return SQLITE_NOTFOUND; +} +SQLITE_PRIVATE void sqlite3StockBtreeCursorDir(BtCursor *p, int a){ + /* no-op */ +} + + +SQLITE_PRIVATE int sqlite3StockBtreeIdxDelete(BtCursor *p, UnpackedRecord *pRec){ + int rc = SQLITE_OK; + int res = 0; + + rc = sqlite3BtreeIndexMoveto(p, pRec, &res); + if( rc==SQLITE_OK && res==0 ){ + rc = sqlite3BtreeDelete(p, BTREE_AUXDELETE); + } + + return rc; +} + +#ifndef SQLITE_DEBUG +SQLITE_PRIVATE int sqlite3StockBtreeCursorIsValid(BtCursor *pCursor){ + return 1; +} +SQLITE_PRIVATE sqlite3_uint64 sqlite3StockBtreeSeekCount(Btree *p){ + return 0; +} +#endif + +/* BEGIN_HCT_MKBTREEWRAPPER_TCL_CODE */ +/****************************************************************** +** GENERATED CODE - DO NOT EDIT! +** +** Code generated by tool/hct_mkbtreewrapper.tcl +*/ +struct BtCursorMethods { + int(*xBtreeNext)(BtCursor*, int); + int(*xBtreeCursorHasMoved)(BtCursor*); + void(*xBtreeClearCursor)(BtCursor*); + int(*xBtreeCursorRestore)(BtCursor*, int*); + void(*xBtreeCursorHintFlags)(BtCursor*, unsigned); + int(*xBtreeCloseCursor)(BtCursor*); + int(*xBtreeCursorIsValid)(BtCursor*); + int(*xBtreeCursorIsValidNN)(BtCursor*); + i64(*xBtreeIntegerKey)(BtCursor*); + void(*xBtreeCursorPin)(BtCursor*); + void(*xBtreeCursorUnpin)(BtCursor*); + u32(*xBtreePayloadSize)(BtCursor*); + sqlite3_int64(*xBtreeMaxRecordSize)(BtCursor*); + int(*xBtreePayload)(BtCursor*, u32, u32, void*); + int(*xBtreePayloadChecked)(BtCursor*, u32, u32, void *); + const void *(*xBtreePayloadFetch)(BtCursor*, u32*); + int(*xBtreeFirst)(BtCursor*, int*); + int(*xBtreeLast)(BtCursor*, int*); + int(*xBtreeTableMoveto)(BtCursor*, i64, int, int*); + int(*xBtreeIndexMoveto)(BtCursor*, UnpackedRecord*, int*); + void(*xBtreeCursorDir)(BtCursor*, int); + int(*xBtreeEof)(BtCursor*); + i64(*xBtreeRowCountEst)(BtCursor*); + int(*xBtreePrevious)(BtCursor*, int); + int(*xBtreeInsert)(BtCursor*, const BtreePayload*, int, int); + int(*xBtreeDelete)(BtCursor*, u8); + int(*xBtreeIdxDelete)(BtCursor*, UnpackedRecord*); + int(*xBtreePutData)(BtCursor*, u32, u32, void*); + void(*xBtreeIncrblobCursor)(BtCursor*); + int(*xBtreeCursorHasHint)(BtCursor*, unsigned int); + int(*xBtreeTransferRow)(BtCursor*, BtCursor*, i64); + int(*xBtreeClearTableOfCursor)(BtCursor*); + int(*xBtreeCount)(sqlite3*, BtCursor*, i64*); +}; +struct BtreeMethods { + BtCursorMethods const *pCsrMethods; + int(*xBtreeCursor)(Btree*, Pgno, int, struct KeyInfo*, BtCursor*); + sqlite3_uint64(*xBtreeSeekCount)(Btree*); + Pgno(*xBtreeLastPage)(Btree*); + int(*xBtreeClose)(Btree*); + int(*xBtreeSetCacheSize)(Btree*, int); + int(*xBtreeSetSpillSize)(Btree*, int); + int(*xBtreeSetMmapLimit)(Btree*, sqlite3_int64); + int(*xBtreeSetPagerFlags)(Btree*, unsigned); + int(*xBtreeSetPageSize)(Btree*, int, int, int); + int(*xBtreeGetPageSize)(Btree*); + int(*xBtreeGetReserveNoMutex)(Btree*); + int(*xBtreeGetRequestedReserve)(Btree*); + Pgno(*xBtreeMaxPageCount)(Btree*, Pgno); + int(*xBtreeSecureDelete)(Btree*, int); + int(*xBtreeSetAutoVacuum)(Btree*, int); + int(*xBtreeGetAutoVacuum)(Btree*); + int(*xBtreeNewDb)(Btree*); + int(*xBtreeBeginTrans)(Btree*, int, int*); + int(*xBtreeIncrVacuum)(Btree*); + int(*xBtreeCommitPhaseOne)(Btree*, const char*); + int(*xBtreeCommitPhaseTwo)(Btree*, int); + int(*xBtreeCommit)(Btree*); + int(*xBtreeTripAllCursors)(Btree*, int, int); + int(*xBtreeRollback)(Btree*, int, int); + int(*xBtreeBeginStmt)(Btree*, int); + int(*xBtreeSavepoint)(Btree*, int, int); + int(*xBtreeCreateTable)(Btree*, Pgno*, int); + int(*xBtreeClearTable)(Btree*, int, i64*); + int(*xBtreeDropTable)(Btree*, int, int*); + void(*xBtreeGetMeta)(Btree*, int, u32*); + int(*xBtreeUpdateMeta)(Btree*, int, u32); + int(*xBtreePragma)(Btree*, char**); + Pager *(*xBtreePager)(Btree*); + const char *(*xBtreeGetFilename)(Btree*); + const char *(*xBtreeGetJournalname)(Btree*); + int(*xBtreeTxnState)(Btree*); + int(*xBtreeIsInBackup)(Btree*); + void *(*xBtreeSchema)(Btree*, int, void(*)(void *)); + int(*xBtreeSchemaLocked)(Btree*); + int(*xBtreeIsReadonly)(Btree*); + int(*xBtreeSetVersion)(Btree*, int); + int(*xBtreeIntegrityCheck)(sqlite3*, Btree*, Pgno*, Mem*, int, int, int*, char**); + int(*xBtreeCheckpoint)(Btree*, int, int *, int *); + int(*xBtreeExclusiveLock)(Btree*); +}; +SQLITE_PRIVATE int sqlite3BtreeNext(BtCursor *p, int a){ + return p->pMethods->xBtreeNext(p, a); +} +SQLITE_PRIVATE void sqlite3BtreeClearCursor(BtCursor *p){ + p->pMethods->xBtreeClearCursor(p); +} +SQLITE_PRIVATE int sqlite3BtreeCursorRestore(BtCursor *p, int *a){ + return p->pMethods->xBtreeCursorRestore(p, a); +} +SQLITE_PRIVATE void sqlite3BtreeCursorHintFlags(BtCursor *p, unsigned a){ + p->pMethods->xBtreeCursorHintFlags(p, a); +} +SQLITE_PRIVATE int sqlite3BtreeCursorIsValid(BtCursor *p){ + return p->pMethods->xBtreeCursorIsValid(p); +} +SQLITE_PRIVATE int sqlite3BtreeCursorIsValidNN(BtCursor *p){ + return p->pMethods->xBtreeCursorIsValidNN(p); +} +SQLITE_PRIVATE i64 sqlite3BtreeIntegerKey(BtCursor *p){ + return p->pMethods->xBtreeIntegerKey(p); +} +SQLITE_PRIVATE void sqlite3BtreeCursorPin(BtCursor *p){ + p->pMethods->xBtreeCursorPin(p); +} +SQLITE_PRIVATE void sqlite3BtreeCursorUnpin(BtCursor *p){ + p->pMethods->xBtreeCursorUnpin(p); +} +SQLITE_PRIVATE u32 sqlite3BtreePayloadSize(BtCursor *p){ + return p->pMethods->xBtreePayloadSize(p); +} +SQLITE_PRIVATE sqlite3_int64 sqlite3BtreeMaxRecordSize(BtCursor *p){ + return p->pMethods->xBtreeMaxRecordSize(p); +} +SQLITE_PRIVATE int sqlite3BtreePayload(BtCursor *p, u32 a, u32 b, void *c){ + return p->pMethods->xBtreePayload(p, a, b, c); +} +SQLITE_PRIVATE int sqlite3BtreePayloadChecked(BtCursor *p, u32 a, u32 b, void *c){ + return p->pMethods->xBtreePayloadChecked(p, a, b, c); +} +const void * sqlite3BtreePayloadFetch(BtCursor *p, u32 *a){ + return p->pMethods->xBtreePayloadFetch(p, a); +} +SQLITE_PRIVATE int sqlite3BtreeFirst(BtCursor *p, int *a){ + return p->pMethods->xBtreeFirst(p, a); +} +SQLITE_PRIVATE int sqlite3BtreeLast(BtCursor *p, int *a){ + return p->pMethods->xBtreeLast(p, a); +} +SQLITE_PRIVATE int sqlite3BtreeTableMoveto(BtCursor *p, i64 a, int b, int *c){ + return p->pMethods->xBtreeTableMoveto(p, a, b, c); +} +SQLITE_PRIVATE int sqlite3BtreeIndexMoveto(BtCursor *p, UnpackedRecord *a, int *b){ + return p->pMethods->xBtreeIndexMoveto(p, a, b); +} +SQLITE_PRIVATE void sqlite3BtreeCursorDir(BtCursor *p, int a){ + p->pMethods->xBtreeCursorDir(p, a); +} +SQLITE_PRIVATE int sqlite3BtreeEof(BtCursor *p){ + return p->pMethods->xBtreeEof(p); +} +SQLITE_PRIVATE i64 sqlite3BtreeRowCountEst(BtCursor *p){ + return p->pMethods->xBtreeRowCountEst(p); +} +SQLITE_PRIVATE int sqlite3BtreePrevious(BtCursor *p, int a){ + return p->pMethods->xBtreePrevious(p, a); +} +SQLITE_PRIVATE int sqlite3BtreeInsert(BtCursor *p, const BtreePayload *a, int b, int c){ + return p->pMethods->xBtreeInsert(p, a, b, c); +} +SQLITE_PRIVATE int sqlite3BtreeDelete(BtCursor *p, u8 a){ + return p->pMethods->xBtreeDelete(p, a); +} +SQLITE_PRIVATE int sqlite3BtreeIdxDelete(BtCursor *p, UnpackedRecord *a){ + return p->pMethods->xBtreeIdxDelete(p, a); +} +SQLITE_PRIVATE int sqlite3BtreePutData(BtCursor *p, u32 a, u32 b, void *c){ + return p->pMethods->xBtreePutData(p, a, b, c); +} +SQLITE_PRIVATE void sqlite3BtreeIncrblobCursor(BtCursor *p){ + p->pMethods->xBtreeIncrblobCursor(p); +} +SQLITE_PRIVATE int sqlite3BtreeCursorHasHint(BtCursor *p, unsigned int a){ + return p->pMethods->xBtreeCursorHasHint(p, a); +} +SQLITE_PRIVATE int sqlite3BtreeTransferRow(BtCursor *p, BtCursor *a, i64 b){ + return p->pMethods->xBtreeTransferRow(p, a, b); +} +SQLITE_PRIVATE int sqlite3BtreeClearTableOfCursor(BtCursor *p){ + return p->pMethods->xBtreeClearTableOfCursor(p); +} +SQLITE_PRIVATE int sqlite3BtreeCount(sqlite3 *a, BtCursor *p, i64 *b){ + return p->pMethods->xBtreeCount(a, p, b); +} +SQLITE_PRIVATE Pgno sqlite3BtreeLastPage(Btree *p){ + return p->pMethods->xBtreeLastPage(p); +} +SQLITE_PRIVATE int sqlite3BtreeClose(Btree *p){ + return p->pMethods->xBtreeClose(p); +} +SQLITE_PRIVATE int sqlite3BtreeSetCacheSize(Btree *p, int a){ + return p->pMethods->xBtreeSetCacheSize(p, a); +} +SQLITE_PRIVATE int sqlite3BtreeSetSpillSize(Btree *p, int a){ + return p->pMethods->xBtreeSetSpillSize(p, a); +} +SQLITE_PRIVATE int sqlite3BtreeSetMmapLimit(Btree *p, sqlite3_int64 a){ + return p->pMethods->xBtreeSetMmapLimit(p, a); +} +SQLITE_PRIVATE int sqlite3BtreeSetPagerFlags(Btree *p, unsigned a){ + return p->pMethods->xBtreeSetPagerFlags(p, a); +} +SQLITE_PRIVATE int sqlite3BtreeSetPageSize(Btree *p, int a, int b, int c){ + return p->pMethods->xBtreeSetPageSize(p, a, b, c); +} +SQLITE_PRIVATE int sqlite3BtreeGetPageSize(Btree *p){ + return p->pMethods->xBtreeGetPageSize(p); +} +SQLITE_PRIVATE int sqlite3BtreeGetReserveNoMutex(Btree *p){ + return p->pMethods->xBtreeGetReserveNoMutex(p); +} +SQLITE_PRIVATE int sqlite3BtreeGetRequestedReserve(Btree *p){ + return p->pMethods->xBtreeGetRequestedReserve(p); +} +SQLITE_PRIVATE Pgno sqlite3BtreeMaxPageCount(Btree *p, Pgno a){ + return p->pMethods->xBtreeMaxPageCount(p, a); +} +SQLITE_PRIVATE int sqlite3BtreeSecureDelete(Btree *p, int a){ + if( p==0 ) return 0; + return p->pMethods->xBtreeSecureDelete(p, a); +} +SQLITE_PRIVATE int sqlite3BtreeSetAutoVacuum(Btree *p, int a){ + return p->pMethods->xBtreeSetAutoVacuum(p, a); +} +SQLITE_PRIVATE int sqlite3BtreeGetAutoVacuum(Btree *p){ + return p->pMethods->xBtreeGetAutoVacuum(p); +} +SQLITE_PRIVATE int sqlite3BtreeNewDb(Btree *p){ + return p->pMethods->xBtreeNewDb(p); +} +SQLITE_PRIVATE int sqlite3BtreeBeginTrans(Btree *p, int a, int *b){ + return p->pMethods->xBtreeBeginTrans(p, a, b); +} +SQLITE_PRIVATE int sqlite3BtreeIncrVacuum(Btree *p){ + return p->pMethods->xBtreeIncrVacuum(p); +} +SQLITE_PRIVATE int sqlite3BtreeCommitPhaseOne(Btree *p, const char *a){ + return p->pMethods->xBtreeCommitPhaseOne(p, a); +} +SQLITE_PRIVATE int sqlite3BtreeCommitPhaseTwo(Btree *p, int a){ + return p->pMethods->xBtreeCommitPhaseTwo(p, a); +} +SQLITE_PRIVATE int sqlite3BtreeCommit(Btree *p){ + return p->pMethods->xBtreeCommit(p); +} +SQLITE_PRIVATE int sqlite3BtreeTripAllCursors(Btree *p, int a, int b){ + if( p==0 ) return 0; + return p->pMethods->xBtreeTripAllCursors(p, a, b); +} +SQLITE_PRIVATE int sqlite3BtreeRollback(Btree *p, int a, int b){ + return p->pMethods->xBtreeRollback(p, a, b); +} +SQLITE_PRIVATE int sqlite3BtreeBeginStmt(Btree *p, int a){ + return p->pMethods->xBtreeBeginStmt(p, a); +} +SQLITE_PRIVATE int sqlite3BtreeSavepoint(Btree *p, int a, int b){ + if( p==0 ) return 0; + return p->pMethods->xBtreeSavepoint(p, a, b); +} +SQLITE_PRIVATE int sqlite3BtreeCreateTable(Btree *p, Pgno *a, int b){ + return p->pMethods->xBtreeCreateTable(p, a, b); +} +SQLITE_PRIVATE int sqlite3BtreeClearTable(Btree *p, int a, i64 *b){ + return p->pMethods->xBtreeClearTable(p, a, b); +} +SQLITE_PRIVATE int sqlite3BtreeDropTable(Btree *p, int a, int *b){ + return p->pMethods->xBtreeDropTable(p, a, b); +} +SQLITE_PRIVATE void sqlite3BtreeGetMeta(Btree *p, int a, u32 *b){ + p->pMethods->xBtreeGetMeta(p, a, b); +} +SQLITE_PRIVATE int sqlite3BtreeUpdateMeta(Btree *p, int a, u32 b){ + return p->pMethods->xBtreeUpdateMeta(p, a, b); +} +SQLITE_PRIVATE int sqlite3BtreePragma(Btree *p, char* *a){ + return p->pMethods->xBtreePragma(p, a); +} +Pager * sqlite3BtreePager(Btree *p){ + return p->pMethods->xBtreePager(p); +} +const char * sqlite3BtreeGetFilename(Btree *p){ + return p->pMethods->xBtreeGetFilename(p); +} +const char * sqlite3BtreeGetJournalname(Btree *p){ + return p->pMethods->xBtreeGetJournalname(p); +} +SQLITE_PRIVATE int sqlite3BtreeTxnState(Btree *p){ + if( p==0 ) return 0; + return p->pMethods->xBtreeTxnState(p); +} +SQLITE_PRIVATE int sqlite3BtreeIsInBackup(Btree *p){ + return p->pMethods->xBtreeIsInBackup(p); +} +void * sqlite3BtreeSchema(Btree *p, int a, void (*xFree)(void *)){ + return p->pMethods->xBtreeSchema(p, a, xFree); +} +SQLITE_PRIVATE int sqlite3BtreeSchemaLocked(Btree *p){ + return p->pMethods->xBtreeSchemaLocked(p); +} +SQLITE_PRIVATE int sqlite3BtreeIsReadonly(Btree *p){ + return p->pMethods->xBtreeIsReadonly(p); +} +SQLITE_PRIVATE int sqlite3BtreeSetVersion(Btree *p, int a){ + return p->pMethods->xBtreeSetVersion(p, a); +} +SQLITE_PRIVATE int sqlite3BtreeIntegrityCheck(sqlite3 *a, Btree *p, Pgno *b, Mem *c, int d, int e, int *f, char* *g){ + return p->pMethods->xBtreeIntegrityCheck(a, p, b, c, d, e, f, g); +} +SQLITE_PRIVATE int sqlite3BtreeCheckpoint(Btree *p, int a, int *b, int *c){ + if( p==0 ) return 0; + return p->pMethods->xBtreeCheckpoint(p, a, b, c); +} +SQLITE_PRIVATE int sqlite3BtreeExclusiveLock(Btree *p){ + return p->pMethods->xBtreeExclusiveLock(p); +} +static const BtCursorMethods hct_btcursor_methods = { + .xBtreeNext = sqlite3HctBtreeNext, + .xBtreeCursorHasMoved = sqlite3HctBtreeCursorHasMoved, + .xBtreeClearCursor = sqlite3HctBtreeClearCursor, + .xBtreeCursorRestore = sqlite3HctBtreeCursorRestore, + .xBtreeCursorHintFlags = sqlite3HctBtreeCursorHintFlags, + .xBtreeCloseCursor = sqlite3HctBtreeCloseCursor, + .xBtreeCursorIsValid = sqlite3HctBtreeCursorIsValid, + .xBtreeCursorIsValidNN = sqlite3HctBtreeCursorIsValidNN, + .xBtreeIntegerKey = sqlite3HctBtreeIntegerKey, + .xBtreeCursorPin = sqlite3HctBtreeCursorPin, + .xBtreeCursorUnpin = sqlite3HctBtreeCursorUnpin, + .xBtreePayloadSize = sqlite3HctBtreePayloadSize, + .xBtreeMaxRecordSize = sqlite3HctBtreeMaxRecordSize, + .xBtreePayload = sqlite3HctBtreePayload, + .xBtreePayloadChecked = sqlite3HctBtreePayloadChecked, + .xBtreePayloadFetch = sqlite3HctBtreePayloadFetch, + .xBtreeFirst = sqlite3HctBtreeFirst, + .xBtreeLast = sqlite3HctBtreeLast, + .xBtreeTableMoveto = sqlite3HctBtreeTableMoveto, + .xBtreeIndexMoveto = sqlite3HctBtreeIndexMoveto, + .xBtreeCursorDir = sqlite3HctBtreeCursorDir, + .xBtreeEof = sqlite3HctBtreeEof, + .xBtreeRowCountEst = sqlite3HctBtreeRowCountEst, + .xBtreePrevious = sqlite3HctBtreePrevious, + .xBtreeInsert = sqlite3HctBtreeInsert, + .xBtreeDelete = sqlite3HctBtreeDelete, + .xBtreeIdxDelete = sqlite3HctBtreeIdxDelete, + .xBtreePutData = sqlite3HctBtreePutData, + .xBtreeIncrblobCursor = sqlite3HctBtreeIncrblobCursor, + .xBtreeCursorHasHint = sqlite3HctBtreeCursorHasHint, + .xBtreeTransferRow = sqlite3HctBtreeTransferRow, + .xBtreeClearTableOfCursor = sqlite3HctBtreeClearTableOfCursor, + .xBtreeCount = sqlite3HctBtreeCount, +}; +static const BtreeMethods hct_btree_methods = { + .pCsrMethods = &hct_btcursor_methods, + .xBtreeCursor = sqlite3HctBtreeCursor, + .xBtreeSeekCount = sqlite3HctBtreeSeekCount, + .xBtreeLastPage = sqlite3HctBtreeLastPage, + .xBtreeClose = sqlite3HctBtreeClose, + .xBtreeSetCacheSize = sqlite3HctBtreeSetCacheSize, + .xBtreeSetSpillSize = sqlite3HctBtreeSetSpillSize, + .xBtreeSetMmapLimit = sqlite3HctBtreeSetMmapLimit, + .xBtreeSetPagerFlags = sqlite3HctBtreeSetPagerFlags, + .xBtreeSetPageSize = sqlite3HctBtreeSetPageSize, + .xBtreeGetPageSize = sqlite3HctBtreeGetPageSize, + .xBtreeGetReserveNoMutex = sqlite3HctBtreeGetReserveNoMutex, + .xBtreeGetRequestedReserve = sqlite3HctBtreeGetRequestedReserve, + .xBtreeMaxPageCount = sqlite3HctBtreeMaxPageCount, + .xBtreeSecureDelete = sqlite3HctBtreeSecureDelete, + .xBtreeSetAutoVacuum = sqlite3HctBtreeSetAutoVacuum, + .xBtreeGetAutoVacuum = sqlite3HctBtreeGetAutoVacuum, + .xBtreeNewDb = sqlite3HctBtreeNewDb, + .xBtreeBeginTrans = sqlite3HctBtreeBeginTrans, + .xBtreeIncrVacuum = sqlite3HctBtreeIncrVacuum, + .xBtreeCommitPhaseOne = sqlite3HctBtreeCommitPhaseOne, + .xBtreeCommitPhaseTwo = sqlite3HctBtreeCommitPhaseTwo, + .xBtreeCommit = sqlite3HctBtreeCommit, + .xBtreeTripAllCursors = sqlite3HctBtreeTripAllCursors, + .xBtreeRollback = sqlite3HctBtreeRollback, + .xBtreeBeginStmt = sqlite3HctBtreeBeginStmt, + .xBtreeSavepoint = sqlite3HctBtreeSavepoint, + .xBtreeCreateTable = sqlite3HctBtreeCreateTable, + .xBtreeClearTable = sqlite3HctBtreeClearTable, + .xBtreeDropTable = sqlite3HctBtreeDropTable, + .xBtreeGetMeta = sqlite3HctBtreeGetMeta, + .xBtreeUpdateMeta = sqlite3HctBtreeUpdateMeta, + .xBtreePragma = sqlite3HctBtreePragma, + .xBtreePager = sqlite3HctBtreePager, + .xBtreeGetFilename = sqlite3HctBtreeGetFilename, + .xBtreeGetJournalname = sqlite3HctBtreeGetJournalname, + .xBtreeTxnState = sqlite3HctBtreeTxnState, + .xBtreeIsInBackup = sqlite3HctBtreeIsInBackup, + .xBtreeSchema = sqlite3HctBtreeSchema, + .xBtreeSchemaLocked = sqlite3HctBtreeSchemaLocked, + .xBtreeIsReadonly = sqlite3HctBtreeIsReadonly, + .xBtreeSetVersion = sqlite3HctBtreeSetVersion, + .xBtreeIntegrityCheck = sqlite3HctBtreeIntegrityCheck, + .xBtreeCheckpoint = sqlite3HctBtreeCheckpoint, + .xBtreeExclusiveLock = sqlite3HctBtreeExclusiveLock, +}; + +static const BtCursorMethods stock_btcursor_methods = { + .xBtreeNext = sqlite3StockBtreeNext, + .xBtreeCursorHasMoved = sqlite3StockBtreeCursorHasMoved, + .xBtreeClearCursor = sqlite3StockBtreeClearCursor, + .xBtreeCursorRestore = sqlite3StockBtreeCursorRestore, + .xBtreeCursorHintFlags = sqlite3StockBtreeCursorHintFlags, + .xBtreeCloseCursor = sqlite3StockBtreeCloseCursor, + .xBtreeCursorIsValid = sqlite3StockBtreeCursorIsValid, + .xBtreeCursorIsValidNN = sqlite3StockBtreeCursorIsValidNN, + .xBtreeIntegerKey = sqlite3StockBtreeIntegerKey, + .xBtreeCursorPin = sqlite3StockBtreeCursorPin, + .xBtreeCursorUnpin = sqlite3StockBtreeCursorUnpin, + .xBtreePayloadSize = sqlite3StockBtreePayloadSize, + .xBtreeMaxRecordSize = sqlite3StockBtreeMaxRecordSize, + .xBtreePayload = sqlite3StockBtreePayload, + .xBtreePayloadChecked = sqlite3StockBtreePayloadChecked, + .xBtreePayloadFetch = sqlite3StockBtreePayloadFetch, + .xBtreeFirst = sqlite3StockBtreeFirst, + .xBtreeLast = sqlite3StockBtreeLast, + .xBtreeTableMoveto = sqlite3StockBtreeTableMoveto, + .xBtreeIndexMoveto = sqlite3StockBtreeIndexMoveto, + .xBtreeCursorDir = sqlite3StockBtreeCursorDir, + .xBtreeEof = sqlite3StockBtreeEof, + .xBtreeRowCountEst = sqlite3StockBtreeRowCountEst, + .xBtreePrevious = sqlite3StockBtreePrevious, + .xBtreeInsert = sqlite3StockBtreeInsert, + .xBtreeDelete = sqlite3StockBtreeDelete, + .xBtreeIdxDelete = sqlite3StockBtreeIdxDelete, + .xBtreePutData = sqlite3StockBtreePutData, + .xBtreeIncrblobCursor = sqlite3StockBtreeIncrblobCursor, + .xBtreeCursorHasHint = sqlite3StockBtreeCursorHasHint, + .xBtreeTransferRow = sqlite3StockBtreeTransferRow, + .xBtreeClearTableOfCursor = sqlite3StockBtreeClearTableOfCursor, + .xBtreeCount = sqlite3StockBtreeCount, +}; +static const BtreeMethods stock_btree_methods = { + .pCsrMethods = &stock_btcursor_methods, + .xBtreeCursor = sqlite3StockBtreeCursor, + .xBtreeSeekCount = sqlite3StockBtreeSeekCount, + .xBtreeLastPage = sqlite3StockBtreeLastPage, + .xBtreeClose = sqlite3StockBtreeClose, + .xBtreeSetCacheSize = sqlite3StockBtreeSetCacheSize, + .xBtreeSetSpillSize = sqlite3StockBtreeSetSpillSize, + .xBtreeSetMmapLimit = sqlite3StockBtreeSetMmapLimit, + .xBtreeSetPagerFlags = sqlite3StockBtreeSetPagerFlags, + .xBtreeSetPageSize = sqlite3StockBtreeSetPageSize, + .xBtreeGetPageSize = sqlite3StockBtreeGetPageSize, + .xBtreeGetReserveNoMutex = sqlite3StockBtreeGetReserveNoMutex, + .xBtreeGetRequestedReserve = sqlite3StockBtreeGetRequestedReserve, + .xBtreeMaxPageCount = sqlite3StockBtreeMaxPageCount, + .xBtreeSecureDelete = sqlite3StockBtreeSecureDelete, + .xBtreeSetAutoVacuum = sqlite3StockBtreeSetAutoVacuum, + .xBtreeGetAutoVacuum = sqlite3StockBtreeGetAutoVacuum, + .xBtreeNewDb = sqlite3StockBtreeNewDb, + .xBtreeBeginTrans = sqlite3StockBtreeBeginTrans, + .xBtreeIncrVacuum = sqlite3StockBtreeIncrVacuum, + .xBtreeCommitPhaseOne = sqlite3StockBtreeCommitPhaseOne, + .xBtreeCommitPhaseTwo = sqlite3StockBtreeCommitPhaseTwo, + .xBtreeCommit = sqlite3StockBtreeCommit, + .xBtreeTripAllCursors = sqlite3StockBtreeTripAllCursors, + .xBtreeRollback = sqlite3StockBtreeRollback, + .xBtreeBeginStmt = sqlite3StockBtreeBeginStmt, + .xBtreeSavepoint = sqlite3StockBtreeSavepoint, + .xBtreeCreateTable = sqlite3StockBtreeCreateTable, + .xBtreeClearTable = sqlite3StockBtreeClearTable, + .xBtreeDropTable = sqlite3StockBtreeDropTable, + .xBtreeGetMeta = sqlite3StockBtreeGetMeta, + .xBtreeUpdateMeta = sqlite3StockBtreeUpdateMeta, + .xBtreePragma = sqlite3StockBtreePragma, + .xBtreePager = sqlite3StockBtreePager, + .xBtreeGetFilename = sqlite3StockBtreeGetFilename, + .xBtreeGetJournalname = sqlite3StockBtreeGetJournalname, + .xBtreeTxnState = sqlite3StockBtreeTxnState, + .xBtreeIsInBackup = sqlite3StockBtreeIsInBackup, + .xBtreeSchema = sqlite3StockBtreeSchema, + .xBtreeSchemaLocked = sqlite3StockBtreeSchemaLocked, + .xBtreeIsReadonly = sqlite3StockBtreeIsReadonly, + .xBtreeSetVersion = sqlite3StockBtreeSetVersion, + .xBtreeIntegrityCheck = sqlite3StockBtreeIntegrityCheck, + .xBtreeCheckpoint = sqlite3StockBtreeCheckpoint, + .xBtreeExclusiveLock = sqlite3StockBtreeExclusiveLock, +}; + +/* +** END OF GENERATED CODE +******************************************************************/ +/* END_HCT_MKBTREEWRAPPER_TCL_CODE */ + +#ifdef SQLITE_DEBUG +SQLITE_PRIVATE sqlite3_uint64 sqlite3BtreeSeekCount(Btree *p){ + return p->pMethods->xBtreeSeekCount(p); +} +#endif + +SQLITE_PRIVATE BtCursor *sqlite3BtreeFakeValidCursor(void){ + static BtCursor csr = {0}; + return &csr; +} + +SQLITE_PRIVATE int sqlite3BtreeCursorSize(void){ + return MAX( + sqlite3HctBtreeCursorSize(), + sqlite3StockBtreeCursorSize() + ); +} + +SQLITE_PRIVATE void sqlite3BtreeCursorZero(BtCursor *p){ + memset(p, 0, sqlite3BtreeCursorSize()); +} + +SQLITE_PRIVATE int sqlite3BtreeCursorHasMoved(BtCursor *pCur){ + if( pCur->pMethods==0 ) return 0; + return pCur->pMethods->xBtreeCursorHasMoved(pCur); +} + +SQLITE_PRIVATE int sqlite3BtreeCloseCursor(BtCursor *pCur){ + if( pCur->pMethods==0 ) return 0; + return pCur->pMethods->xBtreeCloseCursor(pCur); +} + +SQLITE_PRIVATE int sqlite3BtreeCursor( + Btree *p, /* The btree */ + Pgno iTable, /* Root page of table to open */ + int wrFlag, /* 1 to write. 0 read-only */ + struct KeyInfo *pKeyInfo, /* First arg to xCompare() */ + BtCursor *pCur /* Write new cursor here */ +){ + int rc = p->pMethods->xBtreeCursor(p, iTable, wrFlag, pKeyInfo, pCur); + pCur->pMethods = p->pMethods->pCsrMethods; + return rc; +} + +static int btWrapperUseHct( + sqlite3_vfs *pVfs, + const char *zFilename, + int *pbUseHct +){ + int rc = SQLITE_OK; + char *zFull = 0; + char *zPagemap = 0; + int bUseHct = 0; + + if( zFilename && zFilename[0] ){ + int nAlloc = pVfs->mxPathname+2; + int bExists = 0; + + zFull = (char*)sqlite3_malloc(nAlloc); + if( zFull==0 ){ + rc = SQLITE_NOMEM_BKPT; + }else{ + memset(zFull, 0, nAlloc); + rc = pVfs->xFullPathname(pVfs, zFilename, pVfs->mxPathname, zFull); + } + + if( rc==SQLITE_OK ){ + rc = pVfs->xAccess(pVfs, zFull, SQLITE_ACCESS_EXISTS, &bExists); + } + if( rc==SQLITE_OK ){ + zPagemap = sqlite3_mprintf("%s-pagemap", zFull); + if( zPagemap==0 ){ + rc = SQLITE_NOMEM_BKPT; + }else if( bExists ){ + rc = pVfs->xAccess(pVfs, zPagemap, SQLITE_ACCESS_EXISTS, &bUseHct); + }else{ + sqlite3OsDelete(pVfs, zPagemap, 0); + bUseHct = sqlite3_uri_boolean(zFilename, "hctree", 0); + } + } + } + + sqlite3_free(zFull); + sqlite3_free(zPagemap); + *pbUseHct = bUseHct; + return rc; +} + +SQLITE_PRIVATE int sqlite3BtreeOpen( + sqlite3_vfs *pVfs, /* VFS to use for this b-tree */ + const char *zFilename, /* Name of the file containing the BTree database */ + sqlite3 *db, /* Associated database handle */ + Btree **ppBtree, /* Pointer to new Btree object written here */ + int flags, /* Options */ + int vfsFlags /* Flags passed through to sqlite3_vfs.xOpen() */ +){ + Btree *pBtree = 0; + int rc = SQLITE_OK; + int bUseHct = 0; + + rc = btWrapperUseHct(pVfs, zFilename, &bUseHct); + if( rc==SQLITE_OK ){ + if( bUseHct ){ + rc = sqlite3HctBtreeOpen(pVfs, zFilename, db, &pBtree, flags, vfsFlags); + if( rc==SQLITE_OK ) pBtree->pMethods = &hct_btree_methods; + }else{ + rc = sqlite3StockBtreeOpen(pVfs, zFilename, db, &pBtree, flags, vfsFlags); + if( rc==SQLITE_OK ) pBtree->pMethods = &stock_btree_methods; + } + } + *ppBtree = pBtree; + return rc; +} + +SQLITE_PRIVATE int sqlite3IsHct(Btree *pBt){ + return (pBt && pBt->pMethods==&hct_btree_methods); +} + +SQLITE_PRIVATE int sqlite3BtreeSchemaLoaded(Btree *pBt){ + int rc = SQLITE_OK; + if( sqlite3IsHct(pBt) ){ + rc = sqlite3HctBtreeSchemaLoaded(pBt); + } + return rc; +} + + + +/************** End of btwrapper.c *******************************************/ /************** Begin file vdbemem.c *****************************************/ /* ** 2004 May 26 @@ -87477,7 +88088,8 @@ static int valueFromFunction( goto value_from_function_out; } for(i=0; ia[i].pExpr, enc, aff, &apVal[i]); + rc = sqlite3Stat4ValueFromExpr(pCtx->pParse, pList->a[i].pExpr, aff, + &apVal[i]); if( apVal[i]==0 || rc!=SQLITE_OK ) goto value_from_function_out; } } @@ -88013,8 +88625,6 @@ SQLITE_PRIVATE int sqlite3ValueBytes(sqlite3_value *pVal, u8 enc){ /* #include "sqliteInt.h" */ /* #include "vdbeInt.h" */ -/* #include "btreeInt.h" */ - /* Forward references */ static void freeEphemeralFunction(sqlite3 *db, FuncDef *pDef); static void vdbeFreeOpArray(sqlite3 *, Op *, int); @@ -91000,18 +91610,13 @@ static int vdbeCommit(sqlite3 *db, Vdbe *p){ if( 0==sqlite3Strlen30(sqlite3BtreeGetFilename(db->aDb[0].pBt)) || nTrans<=1 ){ - sqlite3CommitTimeSet(p->aCommitTime, COMMIT_TIME_BEFORE_PHASEONE); for(i=0; rc==SQLITE_OK && inDb; i++){ Btree *pBt = db->aDb[i].pBt; if( pBt ){ - pBt->pBt->aCommitTime = p->aCommitTime; rc = sqlite3BtreeCommitPhaseOne(pBt, 0); - pBt->pBt->aCommitTime = 0; } } - sqlite3CommitTimeSet(p->aCommitTime, COMMIT_TIME_BEFORE_PHASETWO); - /* Do the commit only if all databases successfully complete phase 1. ** If one of the BtreeCommitPhaseOne() calls fails, this indicates an ** IO error while deleting or truncating a journal file. It is unlikely, @@ -91020,13 +91625,9 @@ static int vdbeCommit(sqlite3 *db, Vdbe *p){ for(i=0; rc==SQLITE_OK && inDb; i++){ Btree *pBt = db->aDb[i].pBt; if( pBt ){ - pBt->pBt->aCommitTime = p->aCommitTime; rc = sqlite3BtreeCommitPhaseTwo(pBt, 0); - pBt->pBt->aCommitTime = 0; } } - - sqlite3CommitTimeSet(p->aCommitTime, COMMIT_TIME_AFTER_PHASETWO); if( rc==SQLITE_OK ){ sqlite3VtabCommit(db); } @@ -91423,9 +92024,7 @@ SQLITE_PRIVATE int sqlite3VdbeHalt(Vdbe *p){ ** or hit an 'OR FAIL' constraint and there are no deferred foreign ** key constraints to hold up the transaction. This means a commit ** is required. */ - sqlite3CommitTimeSet(p->aCommitTime, COMMIT_TIME_BEFORE_VDBECOMMIT); rc = vdbeCommit(db, p); - sqlite3CommitTimeSet(p->aCommitTime, COMMIT_TIME_AFTER_VDBECOMMIT); } if( (rc & 0xFF)==SQLITE_BUSY && p->readOnly ){ sqlite3VdbeLeave(p); @@ -92544,7 +93143,7 @@ SQLITE_PRIVATE SQLITE_NOINLINE int sqlite3BlobCompare(const Mem *pB1, const Mem ** We must use separate SQLITE_NOINLINE functions here, since otherwise ** optimizer code movement causes gcov to become very confused. */ -#if defined(SQLITE_COVERAGE_TEST) || defined(SQLITE_DEBUG) +#if defined(SQLITE_COVERAGE_TEST) || defined(SQLITE_DEBUG) static int SQLITE_NOINLINE doubleLt(double a, double b){ return ar ); - testcase( x==r ); - return (xr); }else{ i64 y; if( r<-9223372036854775808.0 ) return +1; @@ -93457,101 +94049,6 @@ SQLITE_PRIVATE int sqlite3CursorRangeHintExprCheck(Walker *pWalker, Expr *pExpr) } #endif /* SQLITE_ENABLE_CURSOR_HINTS && SQLITE_DEBUG */ -/* #include */ -SQLITE_PRIVATE void sqlite3CommitTimeLog(u64 *aCommit){ - u64 i1 = aCommit[COMMIT_TIME_START]; - assert( COMMIT_TIME_START==0 && COMMIT_TIME_FINISH==COMMIT_TIME_N-1 ); - if( aCommit[COMMIT_TIME_FINISH]>(i1+COMMIT_TIME_TIMEOUT) ){ - char *zStr = 0; - int ii; - for(ii=1; ii(i1+PREPARE_TIME_TIMEOUT) ){ - int nByte = nSql; - char *zStr = 0; - int ii; - for(ii=1; ii(i1+SCHEMA_TIME_TIMEOUT) ){ - char *zStr = 0; - int ii; - for(ii=1; iinField; i++){ @@ -93670,6 +94168,13 @@ SQLITE_PRIVATE void sqlite3VdbePreUpdateHook( } sqlite3DbNNFreeNN(db, preupdate.aNew); } + if( preupdate.apDflt ){ + int i; + for(i=0; inCol; i++){ + sqlite3ValueFree(preupdate.apDflt[i]); + } + sqlite3DbFree(db, preupdate.apDflt); + } } #endif /* SQLITE_ENABLE_PREUPDATE_HOOK */ @@ -95298,6 +95803,17 @@ SQLITE_API const void *sqlite3_column_origin_name16(sqlite3_stmt *pStmt, int N){ ** ** The error code stored in database p->db is overwritten with the return ** value in any case. +** +** (tag-20240917-01) If vdbeUnbind(p,(u32)(i-1)) returns SQLITE_OK, +** that means all of the the following will be true: +** +** p!=0 +** p->pVar!=0 +** i>0 +** i<=p->nVar +** +** An assert() is normally added after vdbeUnbind() to help static analyzers +** realize this. */ static int vdbeUnbind(Vdbe *p, unsigned int i){ Mem *pVar; @@ -95355,6 +95871,7 @@ static int bindText( rc = vdbeUnbind(p, (u32)(i-1)); if( rc==SQLITE_OK ){ + assert( p!=0 && p->aVar!=0 && i>0 && i<=p->nVar ); /* tag-20240917-01 */ if( zData!=0 ){ pVar = &p->aVar[i-1]; rc = sqlite3VdbeMemSetStr(pVar, zData, nData, encoding, xDel); @@ -95404,6 +95921,7 @@ SQLITE_API int sqlite3_bind_double(sqlite3_stmt *pStmt, int i, double rValue){ Vdbe *p = (Vdbe *)pStmt; rc = vdbeUnbind(p, (u32)(i-1)); if( rc==SQLITE_OK ){ + assert( p!=0 && p->aVar!=0 && i>0 && i<=p->nVar ); /* tag-20240917-01 */ sqlite3VdbeMemSetDouble(&p->aVar[i-1], rValue); sqlite3_mutex_leave(p->db->mutex); } @@ -95417,6 +95935,7 @@ SQLITE_API int sqlite3_bind_int64(sqlite3_stmt *pStmt, int i, sqlite_int64 iValu Vdbe *p = (Vdbe *)pStmt; rc = vdbeUnbind(p, (u32)(i-1)); if( rc==SQLITE_OK ){ + assert( p!=0 && p->aVar!=0 && i>0 && i<=p->nVar ); /* tag-20240917-01 */ sqlite3VdbeMemSetInt64(&p->aVar[i-1], iValue); sqlite3_mutex_leave(p->db->mutex); } @@ -95427,6 +95946,7 @@ SQLITE_API int sqlite3_bind_null(sqlite3_stmt *pStmt, int i){ Vdbe *p = (Vdbe*)pStmt; rc = vdbeUnbind(p, (u32)(i-1)); if( rc==SQLITE_OK ){ + assert( p!=0 && p->aVar!=0 && i>0 && i<=p->nVar ); /* tag-20240917-01 */ sqlite3_mutex_leave(p->db->mutex); } return rc; @@ -95442,6 +95962,7 @@ SQLITE_API int sqlite3_bind_pointer( Vdbe *p = (Vdbe*)pStmt; rc = vdbeUnbind(p, (u32)(i-1)); if( rc==SQLITE_OK ){ + assert( p!=0 && p->aVar!=0 && i>0 && i<=p->nVar ); /* tag-20240917-01 */ sqlite3VdbeMemSetPointer(&p->aVar[i-1], pPtr, zPTtype, xDestructor); sqlite3_mutex_leave(p->db->mutex); }else if( xDestructor ){ @@ -95523,6 +96044,7 @@ SQLITE_API int sqlite3_bind_zeroblob(sqlite3_stmt *pStmt, int i, int n){ Vdbe *p = (Vdbe *)pStmt; rc = vdbeUnbind(p, (u32)(i-1)); if( rc==SQLITE_OK ){ + assert( p!=0 && p->aVar!=0 && i>0 && i<=p->nVar ); /* tag-20240917-01 */ #ifndef SQLITE_OMIT_INCRBLOB sqlite3VdbeMemSetZeroBlob(&p->aVar[i-1], n); #else @@ -95857,37 +96379,64 @@ SQLITE_API int sqlite3_preupdate_old(sqlite3 *db, int iIdx, sqlite3_value **ppVa goto preupdate_old_out; } - /* If the old.* record has not yet been loaded into memory, do so now. */ - if( p->pUnpacked==0 ){ - u32 nRec; - u8 *aRec; + if( iIdx==p->pTab->iPKey ){ + *ppValue = pMem = &p->oldipk; + sqlite3VdbeMemSetInt64(pMem, p->iKey1); + }else{ - assert( p->pCsr->eCurType==CURTYPE_BTREE ); - nRec = sqlite3BtreePayloadSize(p->pCsr->uc.pCursor); - aRec = sqlite3DbMallocRaw(db, nRec); - if( !aRec ) goto preupdate_old_out; - rc = sqlite3BtreePayload(p->pCsr->uc.pCursor, 0, nRec, aRec); - if( rc==SQLITE_OK ){ - p->pUnpacked = vdbeUnpackRecord(&p->keyinfo, nRec, aRec); - if( !p->pUnpacked ) rc = SQLITE_NOMEM; - } - if( rc!=SQLITE_OK ){ - sqlite3DbFree(db, aRec); - goto preupdate_old_out; + /* If the old.* record has not yet been loaded into memory, do so now. */ + if( p->pUnpacked==0 ){ + u32 nRec; + u8 *aRec; + + assert( p->pCsr->eCurType==CURTYPE_BTREE ); + nRec = sqlite3BtreePayloadSize(p->pCsr->uc.pCursor); + aRec = sqlite3DbMallocRaw(db, nRec); + if( !aRec ) goto preupdate_old_out; + rc = sqlite3BtreePayload(p->pCsr->uc.pCursor, 0, nRec, aRec); + if( rc==SQLITE_OK ){ + p->pUnpacked = vdbeUnpackRecord(&p->keyinfo, nRec, aRec); + if( !p->pUnpacked ) rc = SQLITE_NOMEM; + } + if( rc!=SQLITE_OK ){ + sqlite3DbFree(db, aRec); + goto preupdate_old_out; + } + p->aRecord = aRec; } - p->aRecord = aRec; - } - pMem = *ppValue = &p->pUnpacked->aMem[iIdx]; - if( iIdx==p->pTab->iPKey ){ - sqlite3VdbeMemSetInt64(pMem, p->iKey1); - }else if( iIdx>=p->pUnpacked->nField ){ - *ppValue = (sqlite3_value *)columnNullValue(); - }else if( p->pTab->aCol[iIdx].affinity==SQLITE_AFF_REAL ){ - if( pMem->flags & (MEM_Int|MEM_IntReal) ){ - testcase( pMem->flags & MEM_Int ); - testcase( pMem->flags & MEM_IntReal ); - sqlite3VdbeMemRealify(pMem); + pMem = *ppValue = &p->pUnpacked->aMem[iIdx]; + if( iIdx>=p->pUnpacked->nField ){ + /* This occurs when the table has been extended using ALTER TABLE + ** ADD COLUMN. The value to return is the default value of the column. */ + Column *pCol = &p->pTab->aCol[iIdx]; + if( pCol->iDflt>0 ){ + if( p->apDflt==0 ){ + int nByte = sizeof(sqlite3_value*)*p->pTab->nCol; + p->apDflt = (sqlite3_value**)sqlite3DbMallocZero(db, nByte); + if( p->apDflt==0 ) goto preupdate_old_out; + } + if( p->apDflt[iIdx]==0 ){ + sqlite3_value *pVal = 0; + Expr *pDflt; + assert( p->pTab!=0 && IsOrdinaryTable(p->pTab) ); + pDflt = p->pTab->u.tab.pDfltList->a[pCol->iDflt-1].pExpr; + rc = sqlite3ValueFromExpr(db, pDflt, ENC(db), pCol->affinity, &pVal); + if( rc==SQLITE_OK && pVal==0 ){ + rc = SQLITE_CORRUPT_BKPT; + } + p->apDflt[iIdx] = pVal; + } + *ppValue = p->apDflt[iIdx]; + }else{ + *ppValue = (sqlite3_value *)columnNullValue(); + } + }else if( p->pTab->aCol[iIdx].affinity==SQLITE_AFF_REAL ){ + if( pMem->flags & (MEM_Int|MEM_IntReal) ){ + testcase( pMem->flags & MEM_Int ); + testcase( pMem->flags & MEM_IntReal ); + sqlite3VdbeMemRealify(pMem); + } } } @@ -96435,6 +96984,104 @@ SQLITE_PRIVATE char *sqlite3VdbeExpandSql( /* #include "sqliteInt.h" */ /* #include "vdbeInt.h" */ +/* +** High-resolution hardware timer used for debugging and testing only. +*/ +#if defined(VDBE_PROFILE) \ + || defined(SQLITE_PERFORMANCE_TRACE) \ + || defined(SQLITE_ENABLE_STMT_SCANSTATUS) +/************** Include hwtime.h in the middle of vdbe.c *********************/ +/************** Begin file hwtime.h ******************************************/ +/* +** 2008 May 27 +** +** The author disclaims copyright to this source code. In place of +** a legal notice, here is a blessing: +** +** May you do good and not evil. +** May you find forgiveness for yourself and forgive others. +** May you share freely, never taking more than you give. +** +****************************************************************************** +** +** This file contains inline asm code for retrieving "high-performance" +** counters for x86 and x86_64 class CPUs. +*/ +#ifndef SQLITE_HWTIME_H +#define SQLITE_HWTIME_H + +/* +** The following routine only works on Pentium-class (or newer) processors. +** It uses the RDTSC opcode to read the cycle count value out of the +** processor and returns that value. This can be used for high-res +** profiling. +*/ +#if !defined(__STRICT_ANSI__) && \ + (defined(__GNUC__) || defined(_MSC_VER)) && \ + (defined(i386) || defined(__i386__) || defined(_M_IX86)) + + #if defined(__GNUC__) + + __inline__ sqlite_uint64 sqlite3Hwtime(void){ + unsigned int lo, hi; + __asm__ __volatile__ ("rdtsc" : "=a" (lo), "=d" (hi)); + return (sqlite_uint64)hi << 32 | lo; + } + + #elif defined(_MSC_VER) + + __declspec(naked) __inline sqlite_uint64 __cdecl sqlite3Hwtime(void){ + __asm { + rdtsc + ret ; return value at EDX:EAX + } + } + + #endif + +#elif !defined(__STRICT_ANSI__) && (defined(__GNUC__) && defined(__x86_64__)) + + __inline__ sqlite_uint64 sqlite3Hwtime(void){ + unsigned int lo, hi; + __asm__ __volatile__ ("rdtsc" : "=a" (lo), "=d" (hi)); + return (sqlite_uint64)hi << 32 | lo; + } + +#elif !defined(__STRICT_ANSI__) && (defined(__GNUC__) && defined(__ppc__)) + + __inline__ sqlite_uint64 sqlite3Hwtime(void){ + unsigned long long retval; + unsigned long junk; + __asm__ __volatile__ ("\n\ + 1: mftbu %1\n\ + mftb %L0\n\ + mftbu %0\n\ + cmpw %0,%1\n\ + bne 1b" + : "=r" (retval), "=r" (junk)); + return retval; + } + +#else + + /* + ** asm() is needed for hardware timing support. Without asm(), + ** disable the sqlite3Hwtime() routine. + ** + ** sqlite3Hwtime() is only used for some obscure debugging + ** and analysis configurations, not in any deliverable, so this + ** should not be a great loss. + */ +SQLITE_PRIVATE sqlite_uint64 sqlite3Hwtime(void){ return ((sqlite_uint64)0); } + +#endif + +#endif /* !defined(SQLITE_HWTIME_H) */ + +/************** End of hwtime.h **********************************************/ +/************** Continuing where we left off in vdbe.c ***********************/ +#endif + /* ** Invoke this macro on memory cells just prior to changing the ** value of the cell. This macro verifies that shallow copies are @@ -99402,7 +100049,9 @@ case OP_Column: { /* ncycle */ pC->payloadSize = sqlite3BtreePayloadSize(pCrsr); pC->aRow = sqlite3BtreePayloadFetch(pCrsr, &pC->szRow); assert( pC->szRow<=pC->payloadSize ); +#if 0 assert( pC->szRow<=65536 ); /* Maximum page size is 64KiB */ +#endif } pC->cacheStatus = p->cacheCtr; if( (aOffset[0] = pC->aRow[0])<0x80 ){ @@ -100369,13 +101018,6 @@ case OP_AutoCommit: { assert( p->bIsReader ); if( desiredAutoCommit!=db->autoCommit ){ - - u64 aCommit[COMMIT_TIME_N]; - memset(aCommit, 0, sizeof(aCommit)); - if( iRollback==0 ){ - sqlite3CommitTimeSet(aCommit, COMMIT_TIME_START); - } - if( iRollback ){ assert( desiredAutoCommit==1 ); sqlite3RollbackAll(db, SQLITE_ABORT_ROLLBACK); @@ -100400,11 +101042,7 @@ case OP_AutoCommit: { }else{ db->autoCommit = (u8)desiredAutoCommit; } - sqlite3CommitTimeSet(aCommit, COMMIT_TIME_BEFORE_HALT); - p->aCommitTime = aCommit; hrc = sqlite3VdbeHalt(p); - p->aCommitTime = 0; - sqlite3CommitTimeSet(aCommit, COMMIT_TIME_AFTER_HALT); if( (hrc & 0xFF)==SQLITE_BUSY ){ p->pc = (int)(pOp - aOp); db->autoCommit = (u8)(1-desiredAutoCommit); @@ -100420,8 +101058,6 @@ case OP_AutoCommit: { }else{ rc = SQLITE_ERROR; } - sqlite3CommitTimeSet(aCommit, COMMIT_TIME_FINISH); - if( desiredAutoCommit && !iRollback ) sqlite3CommitTimeLog(aCommit); goto vdbe_return; }else{ sqlite3VdbeError(p, @@ -100646,6 +101282,11 @@ case OP_SetCookie: { *(u32*)&pDb->pSchema->schema_cookie = *(u32*)&pOp->p3 - pOp->p5; db->mDbFlags |= DBFLAG_SchemaChange; sqlite3FkClearTriggerCache(db, pOp->p1); +#ifdef SQLITE_ENABLE_HCT + if( sqlite3IsHct(pDb->pBt) ){ + rc = sqlite3HctSchemaOp(pDb->pBt, p->zSql); + } +#endif }else if( pOp->p2==BTREE_FILE_FORMAT ){ /* Record changes in the file format */ pDb->pSchema->file_format = pOp->p3; @@ -100992,8 +101633,13 @@ case OP_OpenEphemeral: { /* ncycle */ } } pCx->isOrdered = (pOp->p5!=BTREE_UNORDERED); + assert( p->apCsr[pOp->p1]==pCx ); if( rc ){ + assert( !sqlite3BtreeClosesWithCursor(pCx->ub.pBtx, pCx->uc.pCursor) ); sqlite3BtreeClose(pCx->ub.pBtx); + p->apCsr[pOp->p1] = 0; /* Not required; helps with static analysis */ + }else{ + assert( sqlite3BtreeClosesWithCursor(pCx->ub.pBtx, pCx->uc.pCursor) ); } } } @@ -101291,6 +101937,9 @@ case OP_SeekGT: { /* jump0, in3, group, ncycle */ if( (oc & 0x0001)==(OP_SeekLT & 0x0001) ) oc++; } } + sqlite3BtreeCursorDir(pC->uc.pCursor, + (oc==OP_SeekGE || oc==OP_SeekGT) ? BTREE_DIR_FORWARD : BTREE_DIR_REVERSE + ); rc = sqlite3BtreeTableMoveto(pC->uc.pCursor, (u64)iKey, 0, &res); pC->movetoTarget = iKey; /* Used by OP_Delete */ if( rc!=SQLITE_OK ){ @@ -101344,6 +101993,9 @@ case OP_SeekGT: { /* jump0, in3, group, ncycle */ } #endif r.eqSeen = 0; + sqlite3BtreeCursorDir(pC->uc.pCursor, + (oc==OP_SeekGE || oc==OP_SeekGT) ? BTREE_DIR_FORWARD : BTREE_DIR_REVERSE + ); rc = sqlite3BtreeIndexMoveto(pC->uc.pCursor, &r, &res); if( rc!=SQLITE_OK ){ goto abort_due_to_error; @@ -101765,6 +102417,9 @@ case OP_Found: { /* jump, in3, ncycle */ assert( pC->eCurType==CURTYPE_BTREE ); assert( pC->uc.pCursor!=0 ); assert( pC->isTable==0 ); + sqlite3BtreeCursorDir(pC->uc.pCursor, + pOp->opcode==OP_NoConflict ? BTREE_DIR_NONE : BTREE_DIR_FORWARD + ); r.nField = (u16)pOp->p4.i; if( r.nField>0 ){ /* Key values in an array of registers */ @@ -101915,6 +102570,7 @@ case OP_NotExists: /* jump, in3, ncycle */ pCrsr = pC->uc.pCursor; assert( pCrsr!=0 ); res = 0; + sqlite3BtreeCursorDir(pCrsr, 0); rc = sqlite3BtreeTableMoveto(pCrsr, iKey, 0, &res); assert( rc==SQLITE_OK || res==0 ); pC->movetoTarget = iKey; /* Used by OP_Delete */ @@ -102996,7 +103652,6 @@ case OP_SorterInsert: { /* in2 */ case OP_IdxDelete: { VdbeCursor *pC; BtCursor *pCrsr; - int res; UnpackedRecord r; assert( pOp->p3>0 ); @@ -103012,6 +103667,10 @@ case OP_IdxDelete: { r.nField = (u16)pOp->p3; r.default_rc = 0; r.aMem = &aMem[pOp->p2]; +#if 1 + rc = sqlite3BtreeIdxDelete(pCrsr, &r); + if( rc ) goto abort_due_to_error; +#else rc = sqlite3BtreeIndexMoveto(pCrsr, &r, &res); if( rc ) goto abort_due_to_error; if( res==0 ){ @@ -103021,6 +103680,7 @@ case OP_IdxDelete: { rc = sqlite3ReportError(SQLITE_CORRUPT_INDEX, __LINE__, "index corruption"); goto abort_due_to_error; } +#endif assert( pC->deferredMoveto==0 ); pC->cacheStatus = CACHE_STALE; pC->seekResult = 0; @@ -105529,7 +106189,7 @@ case OP_ReleaseReg: { ** As with all opcodes, the meanings of the parameters for OP_Explain ** are subject to change from one release to the next. Applications ** should not attempt to interpret or use any of the information -** contined in the OP_Explain opcode. The information provided by this +** contained in the OP_Explain opcode. The information provided by this ** opcode is intended for testing and debugging use only. */ default: { /* This is really OP_Noop, OP_Explain */ @@ -110625,7 +111285,7 @@ static int lookupName( */ if( cntTab==0 || (cntTab==1 - && ALWAYS(pMatch!=0) + && pMatch!=0 && ALWAYS(pMatch->pSTab!=0) && (pMatch->pSTab->tabFlags & TF_Ephemeral)!=0 && (pTab->tabFlags & TF_Ephemeral)==0) @@ -111258,8 +111918,8 @@ static int resolveExprStep(Walker *pWalker, Expr *pExpr){ /* Resolve function names */ case TK_FUNCTION: { - ExprList *pList = pExpr->x.pList; /* The argument list */ - int n = pList ? pList->nExpr : 0; /* Number of arguments */ + ExprList *pList; /* The argument list */ + int n; /* Number of arguments */ int no_such_func = 0; /* True if no such function exists */ int wrong_num_args = 0; /* True if wrong number of arguments */ int is_agg = 0; /* True if is an aggregate function */ @@ -111272,6 +111932,8 @@ static int resolveExprStep(Walker *pWalker, Expr *pExpr){ #endif assert( !ExprHasProperty(pExpr, EP_xIsSelect|EP_IntValue) ); assert( pExpr->pLeft==0 || pExpr->pLeft->op==TK_ORDER ); + pList = pExpr->x.pList; + n = pList ? pList->nExpr : 0; zId = pExpr->u.zToken; pDef = sqlite3FindFunction(pParse->db, zId, n, enc, 0); if( pDef==0 ){ @@ -111320,6 +111982,24 @@ static int resolveExprStep(Walker *pWalker, Expr *pExpr){ } } #endif + + /* If the function may call sqlite3_value_subtype(), then set the + ** EP_SubtArg flag on all of its argument expressions. This prevents + ** where.c from replacing the expression with a value read from an + ** index on the same expression, which will not have the correct + ** subtype. Also set the flag if the function expression itself is + ** an EP_SubtArg expression. In this case subtypes are required as + ** the function may return a value with a subtype back to its + ** caller using sqlite3_result_value(). */ + if( (pDef->funcFlags & SQLITE_SUBTYPE) + || ExprHasProperty(pExpr, EP_SubtArg) + ){ + int ii; + for(ii=0; iia[ii].pExpr, EP_SubtArg); + } + } + if( pDef->funcFlags & (SQLITE_FUNC_CONSTANT|SQLITE_FUNC_SLOCHNG) ){ /* For the purposes of the EP_ConstFunc flag, date and time ** functions and other functions that change slowly are considered @@ -112919,7 +113599,7 @@ static int codeCompare( p5 = binaryCompareP5(pLeft, pRight, jumpIfNull); addr = sqlite3VdbeAddOp4(pParse->pVdbe, opcode, in2, dest, in1, (void*)p4, P4_COLLSEQ); - sqlite3VdbeChangeP5(pParse->pVdbe, (u16)p5); + sqlite3VdbeChangeP5(pParse->pVdbe, (u8)p5); return addr; } @@ -116994,6 +117674,59 @@ static int exprCodeInlineFunction( return target; } +/* +** Expression Node callback for sqlite3ExprCanReturnSubtype(). +** +** Only a function call is able to return a subtype. So if the node +** is not a function call, return WRC_Prune immediately. +** +** A function call is able to return a subtype if it has the +** SQLITE_RESULT_SUBTYPE property. +** +** Assume that every function is able to pass-through a subtype from +** one of its argument (using sqlite3_result_value()). Most functions +** are not this way, but we don't have a mechanism to distinguish those +** that are from those that are not, so assume they all work this way. +** That means that if one of its arguments is another function and that +** other function is able to return a subtype, then this function is +** able to return a subtype. +*/ +static int exprNodeCanReturnSubtype(Walker *pWalker, Expr *pExpr){ + int n; + FuncDef *pDef; + sqlite3 *db; + if( pExpr->op!=TK_FUNCTION ){ + return WRC_Prune; + } + assert( ExprUseXList(pExpr) ); + db = pWalker->pParse->db; + n = ALWAYS(pExpr->x.pList) ? pExpr->x.pList->nExpr : 0; + pDef = sqlite3FindFunction(db, pExpr->u.zToken, n, ENC(db), 0); + if( NEVER(pDef==0) || (pDef->funcFlags & SQLITE_RESULT_SUBTYPE)!=0 ){ + pWalker->eCode = 1; + return WRC_Prune; + } + return WRC_Continue; +} + +/* +** Return TRUE if expression pExpr is able to return a subtype. +** +** A TRUE return does not guarantee that a subtype will be returned. +** It only indicates that a subtype return is possible. False positives +** are acceptable as they only disable an optimization. False negatives, +** on the other hand, can lead to incorrect answers. +*/ +static int sqlite3ExprCanReturnSubtype(Parse *pParse, Expr *pExpr){ + Walker w; + memset(&w, 0, sizeof(w)); + w.pParse = pParse; + w.xExprCallback = exprNodeCanReturnSubtype; + sqlite3WalkExpr(&w, pExpr); + return w.eCode; +} + + /* ** Check to see if pExpr is one of the indexed expressions on pParse->pIdxEpr. ** If it is, then resolve the expression by reading from the index and @@ -117026,6 +117759,17 @@ static SQLITE_NOINLINE int sqlite3IndexedExprLookup( continue; } + + /* Functions that might set a subtype should not be replaced by the + ** value taken from an expression index if they are themselves an + ** argument to another scalar function or aggregate. + ** https://sqlite.org/forum/forumpost/68d284c86b082c3e */ + if( ExprHasProperty(pExpr, EP_SubtArg) + && sqlite3ExprCanReturnSubtype(pParse, pExpr) + ){ + continue; + } + v = pParse->pVdbe; assert( v!=0 ); if( p->bMaybeNullRow ){ @@ -122280,13 +123024,6 @@ static void openStatTable( # define SQLITE_STAT4_SAMPLES 24 #endif -/* -** Assumed number of of samples when loading sqlite_stat4 data. It doesn't -** matter if there are more or fewer samples than this, but is more efficient -** if this estimate turns out to be true. -*/ -#define SQLITE_STAT4_EST_SAMPLES SQLITE_STAT4_SAMPLES - /* ** Three SQL functions - stat_init(), stat_push(), and stat_get() - ** share an instance of the following structure to hold their state @@ -123581,9 +124318,6 @@ static void decodeIntArray( #endif if( *z==' ' ) z++; } - if( aOut ){ - for(/* no-op */; iaSample[j]; sqlite3DbFree(db, p->p); } - if( pIdx->nSampleAlloc!=SQLITE_STAT4_EST_SAMPLES ){ - sqlite3DbFree(db, pIdx->aSample); - } + sqlite3DbFree(db, pIdx->aSample); } if( db->pnBytesFreed==0 ){ pIdx->nSample = 0; pIdx->aSample = 0; - pIdx->nSampleAlloc = 0; } #else UNUSED_PARAMETER(db); @@ -123799,110 +124530,8 @@ static Index *findIndexOrPrimaryKey( } /* -** Grow the pIdx->aSample[] array. Return SQLITE_OK if successful, or -** SQLITE_NOMEM otherwise. -*/ -static int growSampleArray(sqlite3 *db, Index *pIdx, int *piOff){ - int nIdxCol = pIdx->nSampleCol; - int nNew = 0; - IndexSample *aNew = 0; - int nByte = 0; - tRowcnt *pSpace; /* Available allocated memory space */ - u8 *pPtr; /* Available memory as a u8 for easier manipulation */ - int i; - u64 t; - - assert( pIdx->nSample==pIdx->nSampleAlloc ); - nNew = SQLITE_STAT4_EST_SAMPLES; - if( pIdx->nSample ){ - nNew = pIdx->nSample*2; - } - - /* Set nByte to the required amount of space */ - nByte = ROUND8(sizeof(IndexSample) * nNew); - nByte += sizeof(tRowcnt) * nIdxCol * 3 * nNew; - nByte += nIdxCol * sizeof(tRowcnt); /* Space for Index.aAvgEq[] */ - - if( nNew==SQLITE_STAT4_EST_SAMPLES ){ - aNew = (IndexSample*)&((u8*)pIdx->pSchema->pStat4Space)[*piOff]; - *piOff += nByte; - assert( *piOff<=sqlite3_msize(pIdx->pSchema->pStat4Space) ); - }else{ - aNew = (IndexSample*)sqlite3DbMallocRaw(db, nByte); - if( aNew==0 ) return SQLITE_NOMEM_BKPT; - } - - pPtr = (u8*)aNew; - pPtr += ROUND8(nNew*sizeof(pIdx->aSample[0])); - pSpace = (tRowcnt*)pPtr; - - pIdx->aAvgEq = pSpace; pSpace += nIdxCol; - assert( EIGHT_BYTE_ALIGNMENT( pSpace ) ); - - if( pIdx->nSample ){ - /* Copy the contents of the anEq[], anLt[], anDLt[] arrays for all - ** extant samples to the new location. */ - int nByte = nIdxCol * 3 * sizeof(tRowcnt) * pIdx->nSample; - memcpy(pSpace, pIdx->aSample[0].anEq, nByte); - } - for(i=0; inSample ){ - aNew[i].p = pIdx->aSample[i].p; - aNew[i].n = pIdx->aSample[i].n; - } - } - assert( ((u8*)pSpace)-nByte==(u8*)aNew ); - - if( pIdx->nSample!=SQLITE_STAT4_EST_SAMPLES ){ - sqlite3DbFree(db, pIdx->aSample); - } - pIdx->aSample = aNew; - pIdx->nSampleAlloc = nNew; - return SQLITE_OK; -} - -/* -** Allocate the space that will likely be required for the Index.aSample[] -** arrays populated by loading data from the sqlite_stat4 table. Return -** SQLITE_OK if successful, or SQLITE_NOMEM otherwise. -*/ -static int stat4AllocSpace(sqlite3 *db, const char *zDb){ - int iDb = sqlite3FindDbName(db, zDb); - Schema *pSchema = db->aDb[iDb].pSchema; - int nByte = 0; - HashElem *k; - - assert( iDb>=0 ); - assert( pSchema->pStat4Space==0 ); - for(k=sqliteHashFirst(&pSchema->idxHash); k; k=sqliteHashNext(k)){ - Index *pIdx = sqliteHashData(k); - int nIdxCol; - if( !HasRowid(pIdx->pTable) && IsPrimaryKeyIndex(pIdx) ){ - nIdxCol = pIdx->nKeyCol; - }else{ - nIdxCol = pIdx->nColumn; - } - nByte += ROUND8(sizeof(IndexSample) * SQLITE_STAT4_EST_SAMPLES); - nByte += sizeof(tRowcnt) * nIdxCol * 3 * SQLITE_STAT4_EST_SAMPLES; - nByte += nIdxCol * sizeof(tRowcnt); /* Space for Index.aAvgEq[] */ - } - - if( nByte>0 ){ - pSchema->pStat4Space = sqlite3_malloc(nByte); - if( pSchema->pStat4Space==0 ){ - return SQLITE_NOMEM_BKPT; - } - } - - return SQLITE_OK; -} - -/* -** Load the content from the sqlite_stat4 into the relevant Index.aSample[] -** arrays. +** Load the content from either the sqlite_stat4 +** into the relevant Index.aSample[] arrays. ** ** Arguments zSql1 and zSql2 must point to SQL statements that return ** data equivalent to the following: @@ -123923,16 +124552,69 @@ static int loadStatTbl( char *zSql; /* Text of the SQL statement */ Index *pPrevIdx = 0; /* Previous index in the loop */ IndexSample *pSample; /* A slot in pIdx->aSample[] */ - int iBlockOff = 0; /* Offset into Schema.pStat4Space */ assert( db->lookaside.bDisable ); + zSql = sqlite3MPrintf(db, zSql1, zDb); + if( !zSql ){ + return SQLITE_NOMEM_BKPT; + } + rc = sqlite3_prepare(db, zSql, -1, &pStmt, 0); + sqlite3DbFree(db, zSql); + if( rc ) return rc; - /* Allocate the Schema.pStat4Space block that will be used for the - ** Index.aSample[] arrays populated by this call. */ - rc = stat4AllocSpace(db, zDb); - if( rc!=SQLITE_OK ) return rc; + while( sqlite3_step(pStmt)==SQLITE_ROW ){ + int nIdxCol = 1; /* Number of columns in stat4 records */ - sqlite3PrepareTimeSet(db->aSchemaTime, SCHEMA_TIME_AFTER_STAT4_SPACE); + char *zIndex; /* Index name */ + Index *pIdx; /* Pointer to the index object */ + int nSample; /* Number of samples */ + i64 nByte; /* Bytes of space required */ + i64 i; /* Bytes of space required */ + tRowcnt *pSpace; /* Available allocated memory space */ + u8 *pPtr; /* Available memory as a u8 for easier manipulation */ + + zIndex = (char *)sqlite3_column_text(pStmt, 0); + if( zIndex==0 ) continue; + nSample = sqlite3_column_int(pStmt, 1); + pIdx = findIndexOrPrimaryKey(db, zIndex, zDb); + assert( pIdx==0 || pIdx->nSample==0 ); + if( pIdx==0 ) continue; + if( pIdx->aSample!=0 ){ + /* The same index appears in sqlite_stat4 under multiple names */ + continue; + } + assert( !HasRowid(pIdx->pTable) || pIdx->nColumn==pIdx->nKeyCol+1 ); + if( !HasRowid(pIdx->pTable) && IsPrimaryKeyIndex(pIdx) ){ + nIdxCol = pIdx->nKeyCol; + }else{ + nIdxCol = pIdx->nColumn; + } + pIdx->nSampleCol = nIdxCol; + pIdx->mxSample = nSample; + nByte = ROUND8(sizeof(IndexSample) * nSample); + nByte += sizeof(tRowcnt) * nIdxCol * 3 * nSample; + nByte += nIdxCol * sizeof(tRowcnt); /* Space for Index.aAvgEq[] */ + + pIdx->aSample = sqlite3DbMallocZero(db, nByte); + if( pIdx->aSample==0 ){ + sqlite3_finalize(pStmt); + return SQLITE_NOMEM_BKPT; + } + pPtr = (u8*)pIdx->aSample; + pPtr += ROUND8(nSample*sizeof(pIdx->aSample[0])); + pSpace = (tRowcnt*)pPtr; + assert( EIGHT_BYTE_ALIGNMENT( pSpace ) ); + pIdx->aAvgEq = pSpace; pSpace += nIdxCol; + pIdx->pTable->tabFlags |= TF_HasStat4; + for(i=0; iaSample[i].anEq = pSpace; pSpace += nIdxCol; + pIdx->aSample[i].anLt = pSpace; pSpace += nIdxCol; + pIdx->aSample[i].anDLt = pSpace; pSpace += nIdxCol; + } + assert( ((u8*)pSpace)-nByte==(u8*)(pIdx->aSample) ); + } + rc = sqlite3_finalize(pStmt); + if( rc ) return rc; zSql = sqlite3MPrintf(db, zSql2, zDb); if( !zSql ){ @@ -123942,41 +124624,27 @@ static int loadStatTbl( sqlite3DbFree(db, zSql); if( rc ) return rc; - sqlite3PrepareTimeSet(db->aSchemaTime, SCHEMA_TIME_AFTER_STAT4_PREPARE); - while( sqlite3_step(pStmt)==SQLITE_ROW ){ char *zIndex; /* Index name */ Index *pIdx; /* Pointer to the index object */ int nCol = 1; /* Number of columns in index */ - u64 t = sqlite3STimeNow(); zIndex = (char *)sqlite3_column_text(pStmt, 0); if( zIndex==0 ) continue; pIdx = findIndexOrPrimaryKey(db, zIndex, zDb); if( pIdx==0 ) continue; - - if( pIdx->nSample==pIdx->nSampleAlloc ){ - u64 t2; - pIdx->pTable->tabFlags |= TF_HasStat4; - assert( !HasRowid(pIdx->pTable) || pIdx->nColumn==pIdx->nKeyCol+1 ); - if( !HasRowid(pIdx->pTable) && IsPrimaryKeyIndex(pIdx) ){ - pIdx->nSampleCol = pIdx->nKeyCol; - }else{ - pIdx->nSampleCol = pIdx->nColumn; - } - t2 = sqlite3STimeNow(); - if( growSampleArray(db, pIdx, &iBlockOff) ) break; - if( db->aSchemaTime ){ - db->aSchemaTime[SCHEMA_TIME_STAT4_GROWUS] += (sqlite3STimeNow() - t); - } + if( pIdx->nSample>=pIdx->mxSample ){ + /* Too many slots used because the same index appears in + ** sqlite_stat4 using multiple names */ + continue; } - + /* This next condition is true if data has already been loaded from + ** the sqlite_stat4 table. */ + nCol = pIdx->nSampleCol; if( pIdx!=pPrevIdx ){ initAvgEq(pPrevIdx); pPrevIdx = pIdx; } - - nCol = pIdx->nSampleCol; pSample = &pIdx->aSample[pIdx->nSample]; decodeIntArray((char*)sqlite3_column_text(pStmt,1),nCol,pSample->anEq,0,0); decodeIntArray((char*)sqlite3_column_text(pStmt,2),nCol,pSample->anLt,0,0); @@ -123999,13 +124667,8 @@ static int loadStatTbl( memcpy(pSample->p, sqlite3_column_blob(pStmt, 4), pSample->n); } pIdx->nSample++; - - if( db->aSchemaTime ){ - db->aSchemaTime[SCHEMA_TIME_STAT4_Q2_BODYUS] += (sqlite3STimeNow() - t); - } } rc = sqlite3_finalize(pStmt); - sqlite3PrepareTimeSet(db->aSchemaTime, SCHEMA_TIME_AFTER_STAT4_Q2); if( rc==SQLITE_OK ) initAvgEq(pPrevIdx); return rc; } @@ -124078,12 +124741,6 @@ SQLITE_PRIVATE int sqlite3AnalysisLoad(sqlite3 *db, int iDb){ pIdx->aSample = 0; #endif } -#ifdef SQLITE_ENABLE_STAT4 - sqlite3_free(pSchema->pStat4Space); - pSchema->pStat4Space = 0; -#endif - - sqlite3PrepareTimeSet(db->aSchemaTime, SCHEMA_TIME_AFTER_CLEAR_STATS); /* Load new statistics out of the sqlite_stat1 table */ sInfo.db = db; @@ -124101,8 +124758,6 @@ SQLITE_PRIVATE int sqlite3AnalysisLoad(sqlite3 *db, int iDb){ } } - sqlite3PrepareTimeSet(db->aSchemaTime, SCHEMA_TIME_AFTER_STAT1); - /* Set appropriate defaults on all indexes not in the sqlite_stat1 table */ assert( sqlite3SchemaMutexHeld(db, iDb, 0) ); for(i=sqliteHashFirst(&pSchema->idxHash); i; i=sqliteHashNext(i)){ @@ -124110,8 +124765,6 @@ SQLITE_PRIVATE int sqlite3AnalysisLoad(sqlite3 *db, int iDb){ if( !pIdx->hasStat1 ) sqlite3DefaultRowEst(pIdx); } - sqlite3PrepareTimeSet(db->aSchemaTime, SCHEMA_TIME_AFTER_DEFAULTS); - /* Load the statistics from the sqlite_stat4 table. */ #ifdef SQLITE_ENABLE_STAT4 if( rc==SQLITE_OK ){ @@ -124126,8 +124779,6 @@ SQLITE_PRIVATE int sqlite3AnalysisLoad(sqlite3 *db, int iDb){ } #endif - sqlite3PrepareTimeSet(db->aSchemaTime, SCHEMA_TIME_AFTER_STAT4); - if( rc==SQLITE_NOMEM ){ sqlite3OomFault(db); } @@ -124368,15 +125019,6 @@ static void attachFunc( sqlite3BtreeLeaveAll(db); assert( zErrDyn==0 || rc!=SQLITE_OK ); } -#ifdef SQLITE_USER_AUTHENTICATION - if( rc==SQLITE_OK && !REOPEN_AS_MEMDB(db) ){ - u8 newAuth = 0; - rc = sqlite3UserAuthCheckLogin(db, zName, &newAuth); - if( newAuthauth.authLevel ){ - rc = SQLITE_AUTH_USER; - } - } -#endif if( rc ){ if( ALWAYS(!REOPEN_AS_MEMDB(db)) ){ int iDb = db->nDb - 1; @@ -124874,11 +125516,7 @@ SQLITE_PRIVATE int sqlite3AuthReadCol( int rc; /* Auth callback return code */ if( db->init.busy ) return SQLITE_OK; - rc = db->xAuth(db->pAuthArg, SQLITE_READ, zTab,zCol,zDb,pParse->zAuthContext -#ifdef SQLITE_USER_AUTHENTICATION - ,db->auth.zAuthUser -#endif - ); + rc = db->xAuth(db->pAuthArg, SQLITE_READ, zTab,zCol,zDb,pParse->zAuthContext); if( rc==SQLITE_DENY ){ char *z = sqlite3_mprintf("%s.%s", zTab, zCol); if( db->nDb>2 || iDb!=0 ) z = sqlite3_mprintf("%s.%z", zDb, z); @@ -124985,11 +125623,7 @@ SQLITE_PRIVATE int sqlite3AuthCheck( testcase( zArg3==0 ); testcase( pParse->zAuthContext==0 ); - rc = db->xAuth(db->pAuthArg, code, zArg1, zArg2, zArg3, pParse->zAuthContext -#ifdef SQLITE_USER_AUTHENTICATION - ,db->auth.zAuthUser -#endif - ); + rc = db->xAuth(db->pAuthArg,code,zArg1,zArg2,zArg3,pParse->zAuthContext); if( rc==SQLITE_DENY ){ sqlite3ErrorMsg(pParse, "not authorized"); pParse->rc = SQLITE_AUTH; @@ -125222,17 +125856,6 @@ SQLITE_PRIVATE void sqlite3FinishCoding(Parse *pParse){ } sqlite3VdbeAddOp0(v, OP_Halt); -#if SQLITE_USER_AUTHENTICATION && !defined(SQLITE_OMIT_SHARED_CACHE) - if( pParse->nTableLock>0 && db->init.busy==0 ){ - sqlite3UserAuthInit(db); - if( db->auth.authLevelrc = SQLITE_AUTH_USER; - return; - } - } -#endif - /* The cookie mask contains one bit for each database file open. ** (Bit 0 is for main, bit 1 is for temp, and so forth.) Bits are ** set for each database that is used. Generate code to start a @@ -125361,16 +125984,6 @@ SQLITE_PRIVATE void sqlite3NestedParse(Parse *pParse, const char *zFormat, ...){ pParse->nested--; } -#if SQLITE_USER_AUTHENTICATION -/* -** Return TRUE if zTable is the name of the system table that stores the -** list of users and their access credentials. -*/ -SQLITE_PRIVATE int sqlite3UserAuthTable(const char *zTable){ - return sqlite3_stricmp(zTable, "sqlite_user")==0; -} -#endif - /* ** Locate the in-memory structure that describes a particular database ** table given the name of that table and (optionally) the name of the @@ -125389,13 +126002,6 @@ SQLITE_PRIVATE Table *sqlite3FindTable(sqlite3 *db, const char *zName, const cha /* All mutexes are required for schema access. Make sure we hold them. */ assert( zDatabase!=0 || sqlite3BtreeHoldsAllMutexes(db) ); -#if SQLITE_USER_AUTHENTICATION - /* Only the admin user is allowed to know that the sqlite_user table - ** exists */ - if( db->auth.authLevelnDb; i++){ if( sqlite3StrICmp(zDatabase, db->aDb[i].zDbSName)==0 ) break; @@ -129054,9 +129660,6 @@ SQLITE_PRIVATE void sqlite3CreateIndex( if( sqlite3StrNICmp(pTab->zName, "sqlite_", 7)==0 && db->init.busy==0 && pTblName!=0 -#if SQLITE_USER_AUTHENTICATION - && sqlite3UserAuthTable(pTab->zName)==0 -#endif ){ sqlite3ErrorMsg(pParse, "table %s may not be indexed", pTab->zName); goto exit_create_index; @@ -130686,6 +131289,7 @@ SQLITE_PRIVATE KeyInfo *sqlite3KeyInfoOfIndex(Parse *pParse, Index *pIdx){ pKey = sqlite3KeyInfoAlloc(pParse->db, nCol, 0); } if( pKey ){ + if( pIdx->onError ) pKey->nUniqField = pIdx->nKeyCol; assert( sqlite3KeyInfoIsWriteable(pKey) ); for(i=0; iazColl[i]; @@ -131348,10 +131952,6 @@ SQLITE_PRIVATE void sqlite3SchemaClear(void *p){ pSchema->iGeneration++; } pSchema->schemaFlags &= ~(DB_SchemaLoaded|DB_ResetWanted); -#ifdef SQLITE_ENABLE_STAT4 - sqlite3_free(pSchema->pStat4Space); - pSchema->pStat4Space = 0; -#endif } /* @@ -131456,6 +132056,7 @@ SQLITE_PRIVATE void sqlite3CodeChangeCount(Vdbe *v, int regCounter, const char * ** is for a top-level SQL statement. */ static int vtabIsReadOnly(Parse *pParse, Table *pTab){ + assert( IsVirtual(pTab) ); if( sqlite3GetVTable(pParse->db, pTab)->pMod->pModule->xUpdate==0 ){ return 1; } @@ -134950,7 +135551,13 @@ static void signFunc( ** Implementation of fpdecode(x,y,z) function. ** ** x is a real number that is to be decoded. y is the precision. -** z is the maximum real precision. +** z is the maximum real precision. Return a string that shows the +** results of the sqlite3FpDecode() function. +** +** Used for testing and debugging only, specifically testing and debugging +** of the sqlite3FpDecode() function. This SQL function does not appear +** in production builds. This function is not an API and is subject to +** modification or removal in future versions of SQLite. */ static void fpdecodeFunc( sqlite3_context *context, @@ -134977,6 +135584,82 @@ static void fpdecodeFunc( } #endif /* SQLITE_DEBUG */ +#ifdef SQLITE_DEBUG +/* +** Implementation of parseuri(uri,flags) function. +** +** Required Arguments: +** "uri" The URI to parse. +** "flags" Bitmask of flags, as if to sqlite3_open_v2(). +** +** Additional arguments beyond the first two make calls to +** sqlite3_uri_key() for integers and sqlite3_uri_parameter for +** anything else. +** +** The result is a string showing the results of calling sqlite3ParseUri(). +** +** Used for testing and debugging only, specifically testing and debugging +** of the sqlite3ParseUri() function. This SQL function does not appear +** in production builds. This function is not an API and is subject to +** modification or removal in future versions of SQLite. +*/ +static void parseuriFunc( + sqlite3_context *ctx, + int argc, + sqlite3_value **argv +){ + sqlite3_str *pResult; + const char *zVfs; + const char *zUri; + unsigned int flgs; + int rc; + sqlite3_vfs *pVfs = 0; + char *zFile = 0; + char *zErr = 0; + + if( argc<2 ) return; + pVfs = sqlite3_vfs_find(0); + assert( pVfs ); + zVfs = pVfs->zName; + zUri = (const char*)sqlite3_value_text(argv[0]); + if( zUri==0 ) return; + flgs = (unsigned int)sqlite3_value_int(argv[1]); + rc = sqlite3ParseUri(zVfs, zUri, &flgs, &pVfs, &zFile, &zErr); + pResult = sqlite3_str_new(0); + if( pResult ){ + int i; + sqlite3_str_appendf(pResult, "rc=%d", rc); + sqlite3_str_appendf(pResult, ", flags=0x%x", flgs); + sqlite3_str_appendf(pResult, ", vfs=%Q", pVfs ? pVfs->zName: 0); + sqlite3_str_appendf(pResult, ", err=%Q", zErr); + sqlite3_str_appendf(pResult, ", file=%Q", zFile); + if( zFile ){ + const char *z = zFile; + z += sqlite3Strlen30(z)+1; + while( z[0] ){ + sqlite3_str_appendf(pResult, ", %Q", z); + z += sqlite3Strlen30(z)+1; + } + for(i=2; ibHctMigrate ) bUseSeek = 0; sqlite3CompleteInsertion(pParse, pTab, iDataCur, iIdxCur, regIns, aRegIdx, 0, appendFlag, bUseSeek ); @@ -138960,7 +139643,11 @@ SQLITE_PRIVATE void sqlite3GenerateConstraintChecks( ** the following conflict logic if it does not. */ VdbeNoopComment((v, "uniqueness check for ROWID")); sqlite3VdbeVerifyAbortable(v, onError); - sqlite3VdbeAddOp3(v, OP_NotExists, iDataCur, addrRowidOk, regNewData); + if( db->bHctMigrate ){ + sqlite3VdbeAddOp2(v, OP_Goto, 0, addrRowidOk); + }else{ + sqlite3VdbeAddOp3(v, OP_NotExists, iDataCur, addrRowidOk, regNewData); + } VdbeCoverage(v); switch( onError ){ @@ -139176,9 +139863,13 @@ SQLITE_PRIVATE void sqlite3GenerateConstraintChecks( /* Check to see if the new index entry will be unique */ sqlite3VdbeVerifyAbortable(v, onError); - addrConflictCk = - sqlite3VdbeAddOp4Int(v, OP_NoConflict, iThisCur, addrUniqueOk, - regIdx, pIdx->nKeyCol); VdbeCoverage(v); + if( db->bHctMigrate ){ + addrConflictCk = sqlite3VdbeAddOp2(v, OP_Goto, 0, addrUniqueOk); + }else{ + addrConflictCk = + sqlite3VdbeAddOp4Int(v, OP_NoConflict, iThisCur, addrUniqueOk, + regIdx, pIdx->nKeyCol); VdbeCoverage(v); + } /* Generate code to handle collisions */ regR = pIdx==pPk ? regIdx : sqlite3GetTempRange(pParse, nPkField); @@ -140111,7 +140802,6 @@ SQLITE_API int sqlite3_exec( int nCol = 0; char **azVals = 0; - sqlite3PrepareTimeSet(db->aSchemaTime, SCHEMA_TIME_BEFORE_PREPARE); pStmt = 0; rc = sqlite3_prepare_v2(db, zSql, -1, &pStmt, &zLeftover); assert( rc==SQLITE_OK || pStmt==0 ); @@ -140125,7 +140815,6 @@ SQLITE_API int sqlite3_exec( } callbackIsInit = 0; - sqlite3PrepareTimeSet(db->aSchemaTime, SCHEMA_TIME_BEFORE_STEP); while( 1 ){ int i; rc = sqlite3_step(pStmt); @@ -140171,7 +140860,6 @@ SQLITE_API int sqlite3_exec( } } - sqlite3PrepareTimeSet(db->aSchemaTime, SCHEMA_TIME_BEFORE_FINALIZE); if( rc!=SQLITE_ROW ){ rc = sqlite3VdbeFinalize((Vdbe *)pStmt); pStmt = 0; @@ -142955,7 +143643,6 @@ SQLITE_PRIVATE void sqlite3Pragma( Vdbe *v = sqlite3GetVdbe(pParse); /* Prepared statement */ const PragmaName *pPragma; /* The pragma */ - sqlite3PrepareTimeSet(db->aPrepareTime, PREPARE_TIME_BEGINPRAGMA); if( v==0 ) return; sqlite3VdbeRunOnlyOnce(v); pParse->nMem = 2; @@ -142981,13 +143668,11 @@ SQLITE_PRIVATE void sqlite3Pragma( zRight = sqlite3NameFromToken(db, pValue); } - sqlite3PrepareTimeSet(db->aPrepareTime, PREPARE_TIME_BEGINAUTHCHECK); assert( pId2 ); zDb = pId2->n>0 ? pDb->zDbSName : 0; if( sqlite3AuthCheck(pParse, SQLITE_PRAGMA, zLeft, zRight, zDb) ){ goto pragma_out; } - sqlite3PrepareTimeSet(db->aPrepareTime, PREPARE_TIME_ENDAUTHCHECK); /* Send an SQLITE_FCNTL_PRAGMA file-control to the underlying VFS ** connection. If it returns SQLITE_OK, then assume that the VFS @@ -143009,7 +143694,10 @@ SQLITE_PRIVATE void sqlite3Pragma( aFcntl[2] = zRight; aFcntl[3] = 0; db->busyHandler.nBusy = 0; - rc = sqlite3_file_control(db, zDb, SQLITE_FCNTL_PRAGMA, (void*)aFcntl); + rc = sqlite3BtreePragma(pDb->pBt, aFcntl); + if( rc==SQLITE_NOTFOUND ){ + rc = sqlite3_file_control(db, zDb, SQLITE_FCNTL_PRAGMA, (void*)aFcntl); + } if( rc==SQLITE_OK ){ sqlite3VdbeSetNumCols(v, 1); sqlite3VdbeSetColName(v, 0, COLNAME_NAME, aFcntl[0], SQLITE_TRANSIENT); @@ -143035,12 +143723,10 @@ SQLITE_PRIVATE void sqlite3Pragma( goto pragma_out; } - sqlite3PrepareTimeSet(db->aPrepareTime, PREPARE_TIME_BEGINLOADSCHEMA); /* Make sure the database schema is loaded if the pragma requires that */ if( (pPragma->mPragFlg & PragFlg_NeedSchema)!=0 ){ if( sqlite3ReadSchema(pParse) ) goto pragma_out; } - sqlite3PrepareTimeSet(db->aPrepareTime, PREPARE_TIME_ENDLOADSCHEMA); /* Register the result column names for pragmas that return results */ if( (pPragma->mPragFlg & PragFlg_NoColumns)==0 @@ -143400,7 +144086,6 @@ SQLITE_PRIVATE void sqlite3Pragma( */ case PragTyp_CACHE_SIZE: { assert( sqlite3SchemaMutexHeld(db, iDb, 0) ); - sqlite3PrepareTimeSet(db->aPrepareTime, PREPARE_TIME_BEGINCACHESIZE); if( !zRight ){ returnSingleInt(v, pDb->pSchema->cache_size); }else{ @@ -143408,7 +144093,6 @@ SQLITE_PRIVATE void sqlite3Pragma( pDb->pSchema->cache_size = size; sqlite3BtreeSetCacheSize(pDb->pBt, pDb->pSchema->cache_size); } - sqlite3PrepareTimeSet(db->aPrepareTime, PREPARE_TIME_ENDCACHESIZE); break; } @@ -143681,12 +144365,6 @@ SQLITE_PRIVATE void sqlite3Pragma( ** in auto-commit mode. */ mask &= ~(SQLITE_ForeignKeys); } -#if SQLITE_USER_AUTHENTICATION - if( db->auth.authLevel==UAUTH_User ){ - /* Do not allow non-admin users to modify the schema arbitrarily */ - mask &= ~(SQLITE_WriteSchema); - } -#endif if( sqlite3GetBoolean(zRight, 0) ){ if( (mask & SQLITE_WriteSchema)==0 @@ -144298,11 +144976,12 @@ SQLITE_PRIVATE void sqlite3Pragma( /* Make sure sufficient number of registers have been allocated */ sqlite3TouchRegister(pParse, 8+cnt); + sqlite3VdbeAddOp3(v, OP_Null, 0, 8, 8+cnt); sqlite3ClearTempRegCache(pParse); /* Do the b-tree integrity checks */ sqlite3VdbeAddOp4(v, OP_IntegrityCk, 1, cnt, 8, (char*)aRoot,P4_INTARRAY); - sqlite3VdbeChangeP5(v, (u16)i); + sqlite3VdbeChangeP5(v, (u8)i); addr = sqlite3VdbeAddOp1(v, OP_IsNull, 2); VdbeCoverage(v); sqlite3VdbeAddOp4(v, OP_String8, 0, 3, 0, sqlite3MPrintf(db, "*** in database %s ***\n", db->aDb[i].zDbSName), @@ -145295,7 +145974,6 @@ SQLITE_PRIVATE void sqlite3Pragma( pragma_out: sqlite3DbFree(db, zLeft); sqlite3DbFree(db, zRight); - sqlite3PrepareTimeSet(db->aPrepareTime, PREPARE_TIME_ENDPRAGMA); } #ifndef SQLITE_OMIT_VIRTUALTABLE /***************************************************************************** @@ -145826,11 +146504,6 @@ SQLITE_PRIVATE int sqlite3InitOne(sqlite3 *db, int iDb, char **pzErrMsg, u32 mFl int openedTransaction = 0; int mask = ((db->mDbFlags & DBFLAG_EncodingFixed) | ~DBFLAG_EncodingFixed); - u64 aSchemaTime[SCHEMA_TIME_N]; - memset(aSchemaTime, 0, sizeof(aSchemaTime)); - db->aSchemaTime = aSchemaTime; - sqlite3PrepareTimeSet(aSchemaTime, SCHEMA_TIME_START); - assert( (db->mDbFlags & DBFLAG_SchemaKnownOk)==0 ); assert( iDb>=0 && iDbnDb ); assert( db->aDb[iDb].pSchema ); @@ -145865,8 +146538,6 @@ SQLITE_PRIVATE int sqlite3InitOne(sqlite3 *db, int iDb, char **pzErrMsg, u32 mFl goto error_out; } - sqlite3PrepareTimeSet(aSchemaTime, SCHEMA_TIME_AFTER_CREATE_1); - /* Create a cursor to hold the database open */ pDb = &db->aDb[iDb]; @@ -145890,8 +146561,6 @@ SQLITE_PRIVATE int sqlite3InitOne(sqlite3 *db, int iDb, char **pzErrMsg, u32 mFl openedTransaction = 1; } - sqlite3PrepareTimeSet(aSchemaTime, SCHEMA_TIME_AFTER_OPEN_TRANS); - /* Get the database meta information. ** ** Meta values are as follows: @@ -145917,8 +146586,6 @@ SQLITE_PRIVATE int sqlite3InitOne(sqlite3 *db, int iDb, char **pzErrMsg, u32 mFl } pDb->pSchema->schema_cookie = meta[BTREE_SCHEMA_VERSION-1]; - sqlite3PrepareTimeSet(aSchemaTime, SCHEMA_TIME_AFTER_GET_META); - /* If opening a non-empty database, check the text encoding. For the ** main database, set sqlite3.enc to the encoding of the main database. ** For an attached db, it is an error if the encoding is not the same @@ -145934,14 +146601,7 @@ SQLITE_PRIVATE int sqlite3InitOne(sqlite3 *db, int iDb, char **pzErrMsg, u32 mFl #else encoding = SQLITE_UTF8; #endif - if( db->nVdbeActive>0 && encoding!=ENC(db) - && (db->mDbFlags & DBFLAG_Vacuum)==0 - ){ - rc = SQLITE_LOCKED; - goto initone_error_out; - }else{ - sqlite3SetTextEncoding(db, encoding); - } + sqlite3SetTextEncoding(db, encoding); }else{ /* If opening an attached database, the encoding much match ENC(db) */ if( (meta[BTREE_TEXT_ENCODING-1] & 3)!=ENC(db) ){ @@ -145954,8 +146614,6 @@ SQLITE_PRIVATE int sqlite3InitOne(sqlite3 *db, int iDb, char **pzErrMsg, u32 mFl } pDb->pSchema->enc = ENC(db); - sqlite3PrepareTimeSet(aSchemaTime, SCHEMA_TIME_AFTER_FIX_ENCODING); - if( pDb->pSchema->cache_size==0 ){ #ifndef SQLITE_OMIT_DEPRECATED size = sqlite3AbsInt32(meta[BTREE_DEFAULT_CACHE_SIZE-1]); @@ -145967,8 +146625,6 @@ SQLITE_PRIVATE int sqlite3InitOne(sqlite3 *db, int iDb, char **pzErrMsg, u32 mFl sqlite3BtreeSetCacheSize(pDb->pBt, pDb->pSchema->cache_size); } - sqlite3PrepareTimeSet(aSchemaTime, SCHEMA_TIME_AFTER_SETCACHESIZE); - /* ** file_format==1 Version 3.0.0. ** file_format==2 Version 3.1.3. // ALTER TABLE ADD COLUMN @@ -146009,7 +146665,6 @@ SQLITE_PRIVATE int sqlite3InitOne(sqlite3 *db, int iDb, char **pzErrMsg, u32 mFl xAuth = db->xAuth; db->xAuth = 0; #endif - sqlite3PrepareTimeSet(aSchemaTime, SCHEMA_TIME_BEGIN_EXEC); rc = sqlite3_exec(db, zSql, sqlite3InitCallback, &initData, 0); #ifndef SQLITE_OMIT_AUTHORIZATION db->xAuth = xAuth; @@ -146017,13 +146672,11 @@ SQLITE_PRIVATE int sqlite3InitOne(sqlite3 *db, int iDb, char **pzErrMsg, u32 mFl #endif if( rc==SQLITE_OK ) rc = initData.rc; sqlite3DbFree(db, zSql); - sqlite3PrepareTimeSet(aSchemaTime, SCHEMA_TIME_BEGIN_ANALYZE_LOAD); #ifndef SQLITE_OMIT_ANALYZE if( rc==SQLITE_OK ){ sqlite3AnalysisLoad(db, iDb); } #endif - sqlite3PrepareTimeSet(aSchemaTime, SCHEMA_TIME_END_ANALYZE_LOAD); } assert( pDb == &(db->aDb[iDb]) ); if( db->mallocFailed ){ @@ -146057,12 +146710,6 @@ SQLITE_PRIVATE int sqlite3InitOne(sqlite3 *db, int iDb, char **pzErrMsg, u32 mFl sqlite3BtreeLeave(pDb->pBt); error_out: - db->aSchemaTime = 0; - sqlite3PrepareTimeSet(aSchemaTime, SCHEMA_TIME_FINISH); - if( rc==SQLITE_OK && iDb==0 ){ - const char *zFile = sqlite3BtreeGetFilename(pDb->pBt); - sqlite3SchemaTimeLog(aSchemaTime, zFile); - } if( rc ){ if( rc==SQLITE_NOMEM || rc==SQLITE_IOERR_NOMEM ){ sqlite3OomFault(db); @@ -146126,6 +146773,14 @@ SQLITE_PRIVATE int sqlite3ReadSchema(Parse *pParse){ }else if( db->noSharedCache ){ db->mDbFlags |= DBFLAG_SchemaKnownOk; } +#ifdef SQLITE_ENABLE_HCT + { + int iDb; + for(iDb=0; rc==SQLITE_OK && iDbnDb; iDb++){ + rc = sqlite3BtreeSchemaLoaded(db->aDb[iDb].pBt); + } + } +#endif } return rc; } @@ -146423,18 +147078,14 @@ static int sqlite3Prepare( } zSqlCopy = sqlite3DbStrNDup(db, zSql, nBytes); if( zSqlCopy ){ - sqlite3PrepareTimeSet(db->aPrepareTime, PREPARE_TIME_BEGINPARSE); sqlite3RunParser(&sParse, zSqlCopy); - sqlite3PrepareTimeSet(db->aPrepareTime, PREPARE_TIME_ENDPARSE); sParse.zTail = &zSql[sParse.zTail-zSqlCopy]; sqlite3DbFree(db, zSqlCopy); }else{ sParse.zTail = &zSql[nBytes]; } }else{ - sqlite3PrepareTimeSet(db->aPrepareTime, PREPARE_TIME_BEGINPARSE); sqlite3RunParser(&sParse, zSql); - sqlite3PrepareTimeSet(db->aPrepareTime, PREPARE_TIME_ENDPARSE); } assert( 0==sParse.nQueryLoop ); @@ -146495,12 +147146,6 @@ static int sqlite3LockAndPrepare( ){ int rc; int cnt = 0; - u64 *aPrepareSave = db->aPrepareTime; - - u64 aPrepareTime[PREPARE_TIME_N]; - memset(aPrepareTime, 0, sizeof(aPrepareTime)); - sqlite3PrepareTimeSet(aPrepareTime, PREPARE_TIME_START); - db->aPrepareTime = aPrepareTime; #ifdef SQLITE_ENABLE_API_ARMOR if( ppStmt==0 ) return SQLITE_MISUSE_BKPT; @@ -146526,11 +147171,6 @@ static int sqlite3LockAndPrepare( db->busyHandler.nBusy = 0; sqlite3_mutex_leave(db->mutex); assert( rc==SQLITE_OK || (*ppStmt)==0 ); - - db->aPrepareTime = aPrepareSave; - sqlite3PrepareTimeSet(aPrepareTime, PREPARE_TIME_FINISH); - sqlite3PrepareTimeLog(zSql, nBytes, aPrepareTime); - return rc; } @@ -146663,12 +147303,24 @@ static int sqlite3Prepare16( if( !sqlite3SafetyCheckOk(db)||zSql==0 ){ return SQLITE_MISUSE_BKPT; } + + /* Make sure nBytes is non-negative and correct. It should be the + ** number of bytes until the end of the input buffer or until the first + ** U+0000 character. If the input nBytes is odd, convert it into + ** an even number. If the input nBytes is negative, then the input + ** must be terminated by at least one U+0000 character */ if( nBytes>=0 ){ int sz; const char *z = (const char*)zSql; for(sz=0; szmutex); zSql8 = sqlite3Utf16to8(db, zSql, nBytes, SQLITE_UTF16NATIVE); if( zSql8 ){ @@ -146682,7 +147334,7 @@ static int sqlite3Prepare16( ** the same number of characters into the UTF-16 string. */ int chars_parsed = sqlite3Utf8CharLen(zSql8, (int)(zTail8-zSql8)); - *pzTail = (u8 *)zSql + sqlite3Utf16ByteLen(zSql, chars_parsed); + *pzTail = (u8 *)zSql + sqlite3Utf16ByteLen(zSql, nBytes, chars_parsed); } sqlite3DbFree(db, zSql8); rc = sqlite3ApiExit(db, rc); @@ -148267,6 +148919,7 @@ SQLITE_PRIVATE KeyInfo *sqlite3KeyInfoAlloc(sqlite3 *db, int N, int X){ p->enc = ENC(db); p->db = db; p->nRef = 1; + p->nUniqField = 0; memset(&p[1], 0, nExtra); }else{ return (KeyInfo*)sqlite3OomFault(db); @@ -153569,7 +154222,7 @@ static void finalizeAggFunctions(Parse *pParse, AggInfo *pAggInfo){ } sqlite3VdbeAddOp3(v, OP_AggStep, 0, regAgg, AggInfoFuncReg(pAggInfo,i)); sqlite3VdbeAppendP4(v, pF->pFunc, P4_FUNCDEF); - sqlite3VdbeChangeP5(v, (u16)nArg); + sqlite3VdbeChangeP5(v, (u8)nArg); sqlite3VdbeAddOp2(v, OP_Next, pF->iOBTab, iTop+1); VdbeCoverage(v); sqlite3VdbeJumpHere(v, iTop); sqlite3ReleaseTempRange(pParse, regAgg, nArg); @@ -153732,7 +154385,7 @@ static void updateAccumulator( } sqlite3VdbeAddOp3(v, OP_AggStep, 0, regAgg, AggInfoFuncReg(pAggInfo,i)); sqlite3VdbeAppendP4(v, pF->pFunc, P4_FUNCDEF); - sqlite3VdbeChangeP5(v, (u16)nArg); + sqlite3VdbeChangeP5(v, (u8)nArg); sqlite3ReleaseTempRange(pParse, regAgg, nArg); } if( addrNext ){ @@ -157126,7 +157779,7 @@ SQLITE_PRIVATE void sqlite3CodeRowTriggerDirect( ** invocation is disallowed if (a) the sub-program is really a trigger, ** not a foreign key action, and (b) the flag to enable recursive triggers ** is clear. */ - sqlite3VdbeChangeP5(v, (u16)bRecursive); + sqlite3VdbeChangeP5(v, (u8)bRecursive); } } @@ -159171,6 +159824,15 @@ SQLITE_PRIVATE SQLITE_NOINLINE int sqlite3RunVacuum( sqlite3SetString(pzErrMsg, db,"cannot VACUUM - SQL statements in progress"); return SQLITE_ERROR; /* IMP: R-15610-35227 */ } + if( sqlite3IsHct(db->aDb[iDb].pBt) ){ + if( pOut==0 ){ + /* Silent noop */ + return SQLITE_OK; + } + sqlite3SetString(pzErrMsg, db, "cannot VACUUM - hctree database"); + return SQLITE_ERROR; + } + saved_openFlags = db->openFlags; if( pOut ){ if( sqlite3_value_type(pOut)!=SQLITE_TEXT ){ @@ -160286,6 +160948,7 @@ SQLITE_API int sqlite3_declare_vtab(sqlite3 *db, const char *zCreateTable){ Table *pNew = sParse.pNewTable; Index *pIdx; pTab->aCol = pNew->aCol; + assert( IsOrdinaryTable(pNew) ); sqlite3ExprListDelete(db, pNew->u.tab.pDfltList); pTab->nNVCol = pTab->nCol = pNew->nCol; pTab->tabFlags |= pNew->tabFlags & (TF_WithoutRowid|TF_NoVisibleRowid); @@ -161350,9 +162013,17 @@ SQLITE_PRIVATE int sqlite3WhereExplainBloomFilter( const WhereInfo *pWInfo, /* WHERE clause */ const WhereLevel *pLevel /* Bloom filter on this level */ ); +SQLITE_PRIVATE void sqlite3WhereAddExplainText( + Parse *pParse, /* Parse context */ + int addr, + SrcList *pTabList, /* Table list this loop refers to */ + WhereLevel *pLevel, /* Scan to write OP_Explain opcode for */ + u16 wctrlFlags /* Flags passed to sqlite3WhereBegin() */ +); #else # define sqlite3WhereExplainOneScan(u,v,w,x) 0 # define sqlite3WhereExplainBloomFilter(u,v,w) 0 +# define sqlite3WhereAddExplainText(u,v,w,x,y) #endif /* SQLITE_OMIT_EXPLAIN */ #ifdef SQLITE_ENABLE_STMT_SCANSTATUS SQLITE_PRIVATE void sqlite3WhereAddScanStatus( @@ -161554,38 +162225,38 @@ static void explainIndexRange(StrAccum *pStr, WhereLoop *pLoop){ } /* -** This function is a no-op unless currently processing an EXPLAIN QUERY PLAN -** command, or if stmt_scanstatus_v2() stats are enabled, or if SQLITE_DEBUG -** was defined at compile-time. If it is not a no-op, a single OP_Explain -** opcode is added to the output to describe the table scan strategy in pLevel. -** -** If an OP_Explain opcode is added to the VM, its address is returned. -** Otherwise, if no OP_Explain is coded, zero is returned. +** This function sets the P4 value of an existing OP_Explain opcode to +** text describing the loop in pLevel. If the OP_Explain opcode already has +** a P4 value, it is freed before it is overwritten. */ -SQLITE_PRIVATE int sqlite3WhereExplainOneScan( +SQLITE_PRIVATE void sqlite3WhereAddExplainText( Parse *pParse, /* Parse context */ + int addr, /* Address of OP_Explain opcode */ SrcList *pTabList, /* Table list this loop refers to */ WhereLevel *pLevel, /* Scan to write OP_Explain opcode for */ u16 wctrlFlags /* Flags passed to sqlite3WhereBegin() */ ){ - int ret = 0; #if !defined(SQLITE_DEBUG) if( sqlite3ParseToplevel(pParse)->explain==2 || IS_STMT_SCANSTATUS(pParse->db) ) #endif { + VdbeOp *pOp = sqlite3VdbeGetOp(pParse->pVdbe, addr); + SrcItem *pItem = &pTabList->a[pLevel->iFrom]; - Vdbe *v = pParse->pVdbe; /* VM being constructed */ sqlite3 *db = pParse->db; /* Database handle */ int isSearch; /* True for a SEARCH. False for SCAN. */ WhereLoop *pLoop; /* The controlling WhereLoop object */ u32 flags; /* Flags that describe this loop */ +#if defined(SQLITE_DEBUG) && !defined(SQLITE_OMIT_EXPLAIN) char *zMsg; /* Text to add to EQP output */ +#endif StrAccum str; /* EQP output string */ char zBuf[100]; /* Initial space for EQP output string */ + if( db->mallocFailed ) return; + pLoop = pLevel->pWLoop; flags = pLoop->wsFlags; - if( (flags&WHERE_MULTI_OR) || (wctrlFlags&WHERE_OR_SUBCLAUSE) ) return 0; isSearch = (flags&(WHERE_BTM_LIMIT|WHERE_TOP_LIMIT))!=0 || ((flags&WHERE_VIRTUALTABLE)==0 && (pLoop->u.btree.nEq>0)) @@ -161609,7 +162280,7 @@ SQLITE_PRIVATE int sqlite3WhereExplainOneScan( zFmt = "AUTOMATIC PARTIAL COVERING INDEX"; }else if( flags & WHERE_AUTO_INDEX ){ zFmt = "AUTOMATIC COVERING INDEX"; - }else if( flags & WHERE_IDX_ONLY ){ + }else if( flags & (WHERE_IDX_ONLY|WHERE_EXPRIDX) ){ zFmt = "COVERING INDEX %s"; }else{ zFmt = "INDEX %s"; @@ -161661,11 +162332,50 @@ SQLITE_PRIVATE int sqlite3WhereExplainOneScan( sqlite3_str_append(&str, " (~1 row)", 9); } #endif +#if defined(SQLITE_DEBUG) && !defined(SQLITE_OMIT_EXPLAIN) zMsg = sqlite3StrAccumFinish(&str); sqlite3ExplainBreakpoint("",zMsg); - ret = sqlite3VdbeAddOp4(v, OP_Explain, sqlite3VdbeCurrentAddr(v), - pParse->addrExplain, pLoop->rRun, - zMsg, P4_DYNAMIC); +#endif + + assert( pOp->opcode==OP_Explain ); + assert( pOp->p4type==P4_DYNAMIC || pOp->p4.z==0 ); + sqlite3DbFree(db, pOp->p4.z); + pOp->p4type = P4_DYNAMIC; + pOp->p4.z = sqlite3StrAccumFinish(&str); + } +} + + +/* +** This function is a no-op unless currently processing an EXPLAIN QUERY PLAN +** command, or if stmt_scanstatus_v2() stats are enabled, or if SQLITE_DEBUG +** was defined at compile-time. If it is not a no-op, a single OP_Explain +** opcode is added to the output to describe the table scan strategy in pLevel. +** +** If an OP_Explain opcode is added to the VM, its address is returned. +** Otherwise, if no OP_Explain is coded, zero is returned. +*/ +SQLITE_PRIVATE int sqlite3WhereExplainOneScan( + Parse *pParse, /* Parse context */ + SrcList *pTabList, /* Table list this loop refers to */ + WhereLevel *pLevel, /* Scan to write OP_Explain opcode for */ + u16 wctrlFlags /* Flags passed to sqlite3WhereBegin() */ +){ + int ret = 0; +#if !defined(SQLITE_DEBUG) + if( sqlite3ParseToplevel(pParse)->explain==2 || IS_STMT_SCANSTATUS(pParse->db) ) +#endif + { + if( (pLevel->pWLoop->wsFlags & WHERE_MULTI_OR)==0 + && (wctrlFlags & WHERE_OR_SUBCLAUSE)==0 + ){ + Vdbe *v = pParse->pVdbe; + int addr = sqlite3VdbeCurrentAddr(v); + ret = sqlite3VdbeAddOp3( + v, OP_Explain, addr, pParse->addrExplain, pLevel->pWLoop->rRun + ); + sqlite3WhereAddExplainText(pParse, addr, pTabList, pLevel, wctrlFlags); + } } return ret; } @@ -161764,9 +162474,10 @@ SQLITE_PRIVATE void sqlite3WhereAddScanStatus( } }else{ int addr; + VdbeOp *pOp; assert( pSrclist->a[pLvl->iFrom].fg.isSubquery ); addr = pSrclist->a[pLvl->iFrom].u4.pSubq->addrFillSub; - VdbeOp *pOp = sqlite3VdbeGetOp(v, addr-1); + pOp = sqlite3VdbeGetOp(v, addr-1); assert( sqlite3VdbeDb(v)->mallocFailed || pOp->opcode==OP_InitCoroutine ); assert( sqlite3VdbeDb(v)->mallocFailed || pOp->p2>addr ); sqlite3VdbeScanStatusRange(v, addrExplain, addr, pOp->p2-1); @@ -164566,20 +165277,25 @@ static int isLikeOrGlob( z = (u8*)pRight->u.zToken; } if( z ){ - - /* Count the number of prefix characters prior to the first wildcard. - ** If the underlying database has a UTF16LE encoding, then only consider - ** ASCII characters. Note that the encoding of z[] is UTF8 - we are - ** dealing with only UTF8 here in this code, but the database engine - ** itself might be processing content using a different encoding. */ + /* Count the number of prefix bytes prior to the first wildcard. + ** or U+fffd character. If the underlying database has a UTF16LE + ** encoding, then only consider ASCII characters. Note that the + ** encoding of z[] is UTF8 - we are dealing with only UTF8 here in + ** this code, but the database engine itself might be processing + ** content using a different encoding. */ cnt = 0; while( (c=z[cnt])!=0 && c!=wc[0] && c!=wc[1] && c!=wc[2] ){ cnt++; - if( c==wc[3] && z[cnt]!=0 ){ + if( c==wc[3] && z[cnt]>0 && z[cnt]<0x80 ){ cnt++; - }else if( c>=0x80 && ENC(db)==SQLITE_UTF16LE ){ - cnt--; - break; + }else if( c>=0x80 ){ + const u8 *z2 = z+cnt-1; + if( sqlite3Utf8Read(&z2)==0xfffd || ENC(db)==SQLITE_UTF16LE ){ + cnt--; + break; + }else{ + cnt = (int)(z2-z); + } } } @@ -164591,7 +165307,7 @@ static int isLikeOrGlob( ** range search. The third is because the caller assumes that the pattern ** consists of at least one character after all escapes have been ** removed. */ - if( (cnt>1 || (cnt>0 && z[0]!=wc[3])) && 255!=(u8)z[cnt-1] ){ + if( (cnt>1 || (cnt>0 && z[0]!=wc[3])) && ALWAYS(255!=(u8)z[cnt-1]) ){ Expr *pPrefix; /* A "complete" match if the pattern ends with "*" or "%" */ @@ -167883,9 +168599,11 @@ static void freeIndexInfo(sqlite3 *db, sqlite3_index_info *pIdxInfo){ ** that this is required. */ static int vtabBestIndex(Parse *pParse, Table *pTab, sqlite3_index_info *p){ - sqlite3_vtab *pVtab = sqlite3GetVTable(pParse->db, pTab)->pVtab; int rc; + sqlite3_vtab *pVtab; + assert( IsVirtual(pTab) ); + pVtab = sqlite3GetVTable(pParse->db, pTab)->pVtab; whereTraceIndexInfoInputs(p, pTab); pParse->db->nSchemaLock++; rc = pVtab->pModule->xBestIndex(pVtab, p); @@ -168828,7 +169546,7 @@ static void whereInfoFree(sqlite3 *db, WhereInfo *pWInfo){ ** and Y has additional constraints that might speed the search that X lacks ** but the cost of running X is not more than the cost of running Y. ** -** In other words, return true if the cost relationwship between X and Y +** In other words, return true if the cost relationship between X and Y ** is inverted and needs to be adjusted. ** ** Case 1: @@ -172446,6 +173164,7 @@ static SQLITE_NOINLINE Bitmask whereOmitNoopJoin( WhereTerm *pTerm, *pEnd; SrcItem *pItem; WhereLoop *pLoop; + Bitmask m1; pLoop = pWInfo->a[i].pWLoop; pItem = &pWInfo->pTabList->a[pLoop->iTab]; if( (pItem->fg.jointype & (JT_LEFT|JT_RIGHT))!=JT_LEFT ) continue; @@ -172472,7 +173191,10 @@ static SQLITE_NOINLINE Bitmask whereOmitNoopJoin( } } if( pTerm drop loop %c not used\n", pLoop->cId)); + WHERETRACE(0xffffffff,("-> omit unused FROM-clause term %c\n",pLoop->cId)); + m1 = MASKBIT(i)-1; + testcase( ((pWInfo->revMask>>1) & ~m1)!=0 ); + pWInfo->revMask = (m1 & pWInfo->revMask) | ((pWInfo->revMask>>1) & ~m1); notReady &= ~pLoop->maskSelf; for(pTerm=pWInfo->sWC.a; pTermprereqAll & pLoop->maskSelf)!=0 ){ @@ -172543,58 +173265,6 @@ static SQLITE_NOINLINE void whereCheckIfBloomFilterIsUseful( } } -/* -** Expression Node callback for sqlite3ExprCanReturnSubtype(). -** -** Only a function call is able to return a subtype. So if the node -** is not a function call, return WRC_Prune immediately. -** -** A function call is able to return a subtype if it has the -** SQLITE_RESULT_SUBTYPE property. -** -** Assume that every function is able to pass-through a subtype from -** one of its argument (using sqlite3_result_value()). Most functions -** are not this way, but we don't have a mechanism to distinguish those -** that are from those that are not, so assume they all work this way. -** That means that if one of its arguments is another function and that -** other function is able to return a subtype, then this function is -** able to return a subtype. -*/ -static int exprNodeCanReturnSubtype(Walker *pWalker, Expr *pExpr){ - int n; - FuncDef *pDef; - sqlite3 *db; - if( pExpr->op!=TK_FUNCTION ){ - return WRC_Prune; - } - assert( ExprUseXList(pExpr) ); - db = pWalker->pParse->db; - n = pExpr->x.pList ? pExpr->x.pList->nExpr : 0; - pDef = sqlite3FindFunction(db, pExpr->u.zToken, n, ENC(db), 0); - if( pDef==0 || (pDef->funcFlags & SQLITE_RESULT_SUBTYPE)!=0 ){ - pWalker->eCode = 1; - return WRC_Prune; - } - return WRC_Continue; -} - -/* -** Return TRUE if expression pExpr is able to return a subtype. -** -** A TRUE return does not guarantee that a subtype will be returned. -** It only indicates that a subtype return is possible. False positives -** are acceptable as they only disable an optimization. False negatives, -** on the other hand, can lead to incorrect answers. -*/ -static int sqlite3ExprCanReturnSubtype(Parse *pParse, Expr *pExpr){ - Walker w; - memset(&w, 0, sizeof(w)); - w.pParse = pParse; - w.xExprCallback = exprNodeCanReturnSubtype; - sqlite3WalkExpr(&w, pExpr); - return w.eCode; -} - /* ** The index pIdx is used by a query and contains one or more expressions. ** In other words pIdx is an index on an expression. iIdxCur is the cursor @@ -172628,12 +173298,6 @@ static SQLITE_NOINLINE void whereAddIndexedExpr( continue; } if( sqlite3ExprIsConstant(0,pExpr) ) continue; - if( pExpr->op==TK_FUNCTION && sqlite3ExprCanReturnSubtype(pParse,pExpr) ){ - /* Functions that might set a subtype should not be replaced by the - ** value taken from an expression index since the index omits the - ** subtype. https://sqlite.org/forum/forumpost/68d284c86b082c3e */ - continue; - } p = sqlite3DbMallocRaw(pParse->db, sizeof(IndexedExpr)); if( p==0 ) break; p->pIENext = pParse->pIdxEpr; @@ -173740,14 +174404,28 @@ SQLITE_PRIVATE void sqlite3WhereEnd(WhereInfo *pWInfo){ pOp->p2 = x; pOp->p1 = pLevel->iIdxCur; OpcodeRewriteTrace(db, k, pOp); - }else{ - /* Unable to translate the table reference into an index - ** reference. Verify that this is harmless - that the - ** table being referenced really is open. - */ + }else if( pLoop->wsFlags & (WHERE_IDX_ONLY|WHERE_EXPRIDX) ){ if( pLoop->wsFlags & WHERE_IDX_ONLY ){ + /* An error. pLoop is supposed to be a covering index loop, + ** and yet the VM code refers to a column of the table that + ** is not part of the index. */ sqlite3ErrorMsg(pParse, "internal query planner error"); pParse->rc = SQLITE_INTERNAL; + }else{ + /* The WHERE_EXPRIDX flag is set by the planner when it is likely + ** that pLoop is a covering index loop, but it is not possible + ** to be 100% sure. In this case, any OP_Explain opcode + ** corresponding to this loop describes the index as a "COVERING + ** INDEX". But, pOp proves that pLoop is not actually a covering + ** index loop. So clear the WHERE_EXPRIDX flag and rewrite the + ** text that accompanies the OP_Explain opcode, if any. */ + pLoop->wsFlags &= ~WHERE_EXPRIDX; + sqlite3WhereAddExplainText(pParse, + pLevel->addrBody-1, + pTabList, + pLevel, + pWInfo->wctrlFlags + ); } } }else if( pOp->opcode==OP_Rowid ){ @@ -175455,6 +176133,7 @@ static void windowAggStep( int regArg; int nArg = pWin->bExprArgs ? 0 : windowArgCount(pWin); int i; + int addrIf = 0; assert( bInverse==0 || pWin->eStart!=TK_UNBOUNDED ); @@ -175471,6 +176150,18 @@ static void windowAggStep( } regArg = reg; + if( pWin->pFilter ){ + int regTmp; + assert( ExprUseXList(pWin->pOwner) ); + assert( pWin->bExprArgs || !nArg ||nArg==pWin->pOwner->x.pList->nExpr ); + assert( pWin->bExprArgs || nArg ||pWin->pOwner->x.pList==0 ); + regTmp = sqlite3GetTempReg(pParse); + sqlite3VdbeAddOp3(v, OP_Column, csr, pWin->iArgCol+nArg,regTmp); + addrIf = sqlite3VdbeAddOp3(v, OP_IfNot, regTmp, 0, 1); + VdbeCoverage(v); + sqlite3ReleaseTempReg(pParse, regTmp); + } + if( pMWin->regStartRowid==0 && (pFunc->funcFlags & SQLITE_FUNC_MINMAX) && (pWin->eStart!=TK_UNBOUNDED) @@ -175490,25 +176181,13 @@ static void windowAggStep( } sqlite3VdbeJumpHere(v, addrIsNull); }else if( pWin->regApp ){ + assert( pWin->pFilter==0 ); assert( pFunc->zName==nth_valueName || pFunc->zName==first_valueName ); assert( bInverse==0 || bInverse==1 ); sqlite3VdbeAddOp2(v, OP_AddImm, pWin->regApp+1-bInverse, 1); }else if( pFunc->xSFunc!=noopStepFunc ){ - int addrIf = 0; - if( pWin->pFilter ){ - int regTmp; - assert( ExprUseXList(pWin->pOwner) ); - assert( pWin->bExprArgs || !nArg ||nArg==pWin->pOwner->x.pList->nExpr ); - assert( pWin->bExprArgs || nArg ||pWin->pOwner->x.pList==0 ); - regTmp = sqlite3GetTempReg(pParse); - sqlite3VdbeAddOp3(v, OP_Column, csr, pWin->iArgCol+nArg,regTmp); - addrIf = sqlite3VdbeAddOp3(v, OP_IfNot, regTmp, 0, 1); - VdbeCoverage(v); - sqlite3ReleaseTempReg(pParse, regTmp); - } - if( pWin->bExprArgs ){ int iOp = sqlite3VdbeCurrentAddr(v); int iEnd; @@ -175535,12 +176214,13 @@ static void windowAggStep( sqlite3VdbeAddOp3(v, bInverse? OP_AggInverse : OP_AggStep, bInverse, regArg, pWin->regAccum); sqlite3VdbeAppendP4(v, pFunc, P4_FUNCDEF); - sqlite3VdbeChangeP5(v, (u16)nArg); + sqlite3VdbeChangeP5(v, (u8)nArg); if( pWin->bExprArgs ){ sqlite3ReleaseTempRange(pParse, regArg, nArg); } - if( addrIf ) sqlite3VdbeJumpHere(v, addrIf); } + + if( addrIf ) sqlite3VdbeJumpHere(v, addrIf); } } @@ -177200,122 +177880,122 @@ static void updateDeleteLimitError( #define TK_GE 59 #define TK_ESCAPE 60 #define TK_COLUMNKW 61 -#define TK_DO 62 -#define TK_FOR 63 -#define TK_IGNORE 64 -#define TK_INITIALLY 65 -#define TK_INSTEAD 66 -#define TK_NO 67 -#define TK_KEY 68 -#define TK_OF 69 -#define TK_OFFSET 70 -#define TK_PRAGMA 71 -#define TK_RAISE 72 -#define TK_RECURSIVE 73 -#define TK_REPLACE 74 -#define TK_RESTRICT 75 -#define TK_ROW 76 -#define TK_ROWS 77 -#define TK_TRIGGER 78 -#define TK_VACUUM 79 -#define TK_VIEW 80 -#define TK_VIRTUAL 81 -#define TK_WITH 82 -#define TK_NULLS 83 -#define TK_FIRST 84 -#define TK_LAST 85 -#define TK_CURRENT 86 -#define TK_FOLLOWING 87 -#define TK_PARTITION 88 -#define TK_PRECEDING 89 -#define TK_RANGE 90 -#define TK_UNBOUNDED 91 -#define TK_EXCLUDE 92 -#define TK_GROUPS 93 -#define TK_OTHERS 94 -#define TK_TIES 95 -#define TK_GENERATED 96 -#define TK_ALWAYS 97 -#define TK_MATERIALIZED 98 -#define TK_REINDEX 99 -#define TK_RENAME 100 -#define TK_CTIME_KW 101 -#define TK_ANY 102 -#define TK_BITAND 103 -#define TK_BITOR 104 -#define TK_LSHIFT 105 -#define TK_RSHIFT 106 -#define TK_PLUS 107 -#define TK_MINUS 108 -#define TK_STAR 109 -#define TK_SLASH 110 -#define TK_REM 111 -#define TK_CONCAT 112 -#define TK_PTR 113 -#define TK_COLLATE 114 -#define TK_BITNOT 115 -#define TK_ON 116 -#define TK_INDEXED 117 -#define TK_STRING 118 -#define TK_JOIN_KW 119 -#define TK_CONSTRAINT 120 -#define TK_DEFAULT 121 -#define TK_NULL 122 -#define TK_PRIMARY 123 -#define TK_UNIQUE 124 -#define TK_CHECK 125 -#define TK_REFERENCES 126 -#define TK_AUTOINCR 127 -#define TK_INSERT 128 -#define TK_DELETE 129 -#define TK_UPDATE 130 -#define TK_SET 131 -#define TK_DEFERRABLE 132 -#define TK_FOREIGN 133 -#define TK_DROP 134 -#define TK_UNION 135 -#define TK_ALL 136 -#define TK_EXCEPT 137 -#define TK_INTERSECT 138 -#define TK_SELECT 139 -#define TK_VALUES 140 -#define TK_DISTINCT 141 -#define TK_DOT 142 -#define TK_FROM 143 -#define TK_JOIN 144 -#define TK_USING 145 -#define TK_ORDER 146 -#define TK_GROUP 147 -#define TK_HAVING 148 -#define TK_LIMIT 149 -#define TK_WHERE 150 -#define TK_RETURNING 151 -#define TK_INTO 152 -#define TK_NOTHING 153 -#define TK_FLOAT 154 -#define TK_BLOB 155 -#define TK_INTEGER 156 -#define TK_VARIABLE 157 -#define TK_CASE 158 -#define TK_WHEN 159 -#define TK_THEN 160 -#define TK_ELSE 161 -#define TK_INDEX 162 -#define TK_ALTER 163 -#define TK_ADD 164 -#define TK_WINDOW 165 -#define TK_OVER 166 -#define TK_FILTER 167 -#define TK_COLUMN 168 -#define TK_AGG_FUNCTION 169 -#define TK_AGG_COLUMN 170 -#define TK_TRUEFALSE 171 -#define TK_FUNCTION 172 -#define TK_UPLUS 173 -#define TK_UMINUS 174 -#define TK_TRUTH 175 -#define TK_REGISTER 176 -#define TK_CONCURRENT 177 +#define TK_CONCURRENT 62 +#define TK_DO 63 +#define TK_FOR 64 +#define TK_IGNORE 65 +#define TK_INITIALLY 66 +#define TK_INSTEAD 67 +#define TK_NO 68 +#define TK_KEY 69 +#define TK_OF 70 +#define TK_OFFSET 71 +#define TK_PRAGMA 72 +#define TK_RAISE 73 +#define TK_RECURSIVE 74 +#define TK_REPLACE 75 +#define TK_RESTRICT 76 +#define TK_ROW 77 +#define TK_ROWS 78 +#define TK_TRIGGER 79 +#define TK_VACUUM 80 +#define TK_VIEW 81 +#define TK_VIRTUAL 82 +#define TK_WITH 83 +#define TK_NULLS 84 +#define TK_FIRST 85 +#define TK_LAST 86 +#define TK_CURRENT 87 +#define TK_FOLLOWING 88 +#define TK_PARTITION 89 +#define TK_PRECEDING 90 +#define TK_RANGE 91 +#define TK_UNBOUNDED 92 +#define TK_EXCLUDE 93 +#define TK_GROUPS 94 +#define TK_OTHERS 95 +#define TK_TIES 96 +#define TK_GENERATED 97 +#define TK_ALWAYS 98 +#define TK_MATERIALIZED 99 +#define TK_REINDEX 100 +#define TK_RENAME 101 +#define TK_CTIME_KW 102 +#define TK_ANY 103 +#define TK_BITAND 104 +#define TK_BITOR 105 +#define TK_LSHIFT 106 +#define TK_RSHIFT 107 +#define TK_PLUS 108 +#define TK_MINUS 109 +#define TK_STAR 110 +#define TK_SLASH 111 +#define TK_REM 112 +#define TK_CONCAT 113 +#define TK_PTR 114 +#define TK_COLLATE 115 +#define TK_BITNOT 116 +#define TK_ON 117 +#define TK_INDEXED 118 +#define TK_STRING 119 +#define TK_JOIN_KW 120 +#define TK_CONSTRAINT 121 +#define TK_DEFAULT 122 +#define TK_NULL 123 +#define TK_PRIMARY 124 +#define TK_UNIQUE 125 +#define TK_CHECK 126 +#define TK_REFERENCES 127 +#define TK_AUTOINCR 128 +#define TK_INSERT 129 +#define TK_DELETE 130 +#define TK_UPDATE 131 +#define TK_SET 132 +#define TK_DEFERRABLE 133 +#define TK_FOREIGN 134 +#define TK_DROP 135 +#define TK_UNION 136 +#define TK_ALL 137 +#define TK_EXCEPT 138 +#define TK_INTERSECT 139 +#define TK_SELECT 140 +#define TK_VALUES 141 +#define TK_DISTINCT 142 +#define TK_DOT 143 +#define TK_FROM 144 +#define TK_JOIN 145 +#define TK_USING 146 +#define TK_ORDER 147 +#define TK_GROUP 148 +#define TK_HAVING 149 +#define TK_LIMIT 150 +#define TK_WHERE 151 +#define TK_RETURNING 152 +#define TK_INTO 153 +#define TK_NOTHING 154 +#define TK_FLOAT 155 +#define TK_BLOB 156 +#define TK_INTEGER 157 +#define TK_VARIABLE 158 +#define TK_CASE 159 +#define TK_WHEN 160 +#define TK_THEN 161 +#define TK_ELSE 162 +#define TK_INDEX 163 +#define TK_ALTER 164 +#define TK_ADD 165 +#define TK_WINDOW 166 +#define TK_OVER 167 +#define TK_FILTER 168 +#define TK_COLUMN 169 +#define TK_AGG_FUNCTION 170 +#define TK_AGG_COLUMN 171 +#define TK_TRUEFALSE 172 +#define TK_FUNCTION 173 +#define TK_UPLUS 174 +#define TK_UMINUS 175 +#define TK_TRUTH 176 +#define TK_REGISTER 177 #define TK_VECTOR 178 #define TK_SELECT_COLUMN 179 #define TK_IF_NULL_ROW 180 @@ -177391,7 +178071,7 @@ static void updateDeleteLimitError( #define YYCODETYPE unsigned short int #define YYNOCODE 323 #define YYACTIONTYPE unsigned short int -#define YYWILDCARD 102 +#define YYWILDCARD 103 #define sqlite3ParserTOKENTYPE Token typedef union { int yyinit; @@ -177528,450 +178208,454 @@ typedef union { ** yy_default[] Default action for each state. ** *********** Begin parsing tables **********************************************/ -#define YY_ACTTAB_COUNT (2176) +#define YY_ACTTAB_COUNT (2212) static const YYACTIONTYPE yy_action[] = { - /* 0 */ 1332, 580, 1311, 580, 379, 580, 1285, 282, 282, 1626, - /* 10 */ 1332, 1259, 1, 1, 586, 2, 1263, 1304, 1283, 417, - /* 20 */ 577, 321, 566, 155, 81, 81, 51, 51, 51, 51, - /* 30 */ 1345, 987, 130, 127, 234, 1153, 1661, 1294, 1661, 988, - /* 40 */ 130, 127, 234, 436, 137, 138, 91, 534, 1232, 1232, + /* 0 */ 130, 127, 234, 130, 127, 234, 574, 574, 574, 580, + /* 10 */ 1294, 1259, 1, 1, 586, 2, 1263, 580, 502, 417, + /* 20 */ 585, 321, 1263, 155, 1546, 1297, 294, 321, 166, 155, + /* 30 */ 1345, 987, 51, 51, 1626, 987, 1345, 1337, 1337, 988, + /* 40 */ 82, 82, 1304, 988, 137, 138, 91, 534, 1232, 1232, /* 50 */ 1067, 1070, 1057, 1057, 135, 135, 136, 136, 136, 136, - /* 60 */ 1208, 1546, 580, 38, 1285, 288, 288, 1606, 586, 2, - /* 70 */ 1263, 285, 1208, 973, 582, 321, 582, 155, 577, 502, - /* 80 */ 566, 214, 288, 288, 1345, 82, 82, 391, 136, 136, - /* 90 */ 136, 136, 129, 245, 416, 577, 39, 566, 1337, 1337, - /* 100 */ 264, 231, 283, 134, 134, 134, 134, 133, 133, 132, - /* 110 */ 132, 132, 131, 128, 455, 1151, 307, 1581, 307, 288, - /* 120 */ 288, 7, 561, 417, 1545, 459, 1586, 384, 1586, 548, - /* 130 */ 1208, 535, 577, 1572, 566, 134, 134, 134, 134, 133, - /* 140 */ 133, 132, 132, 132, 131, 128, 455, 245, 137, 138, - /* 150 */ 91, 455, 1232, 1232, 1067, 1070, 1057, 1057, 135, 135, - /* 160 */ 136, 136, 136, 136, 130, 127, 234, 233, 1208, 1209, - /* 170 */ 1208, 257, 953, 1297, 515, 512, 511, 182, 441, 459, - /* 180 */ 1208, 1209, 1208, 368, 510, 132, 132, 132, 131, 128, - /* 190 */ 455, 1178, 973, 1178, 134, 134, 134, 134, 133, 133, - /* 200 */ 132, 132, 132, 131, 128, 455, 362, 134, 134, 134, - /* 210 */ 134, 133, 133, 132, 132, 132, 131, 128, 455, 133, - /* 220 */ 133, 132, 132, 132, 131, 128, 455, 417, 452, 451, - /* 230 */ 44, 289, 289, 112, 485, 1023, 261, 1237, 1208, 1209, - /* 240 */ 1208, 111, 1239, 44, 577, 1574, 566, 381, 580, 329, - /* 250 */ 1238, 502, 137, 138, 91, 518, 1232, 1232, 1067, 1070, - /* 260 */ 1057, 1057, 135, 135, 136, 136, 136, 136, 357, 465, - /* 270 */ 360, 19, 19, 438, 392, 1240, 388, 1240, 139, 274, - /* 280 */ 291, 376, 521, 371, 520, 262, 430, 320, 571, 348, - /* 290 */ 1296, 367, 1173, 1173, 527, 527, 1509, 1023, 417, 7, - /* 300 */ 320, 571, 487, 544, 422, 1173, 1173, 294, 1173, 1173, - /* 310 */ 296, 134, 134, 134, 134, 133, 133, 132, 132, 132, - /* 320 */ 131, 128, 455, 137, 138, 91, 1632, 1232, 1232, 1067, - /* 330 */ 1070, 1057, 1057, 135, 135, 136, 136, 136, 136, 417, - /* 340 */ 1510, 1455, 288, 288, 94, 257, 214, 93, 515, 512, - /* 350 */ 511, 348, 471, 334, 396, 577, 385, 566, 510, 410, - /* 360 */ 182, 543, 386, 502, 137, 138, 91, 417, 1232, 1232, - /* 370 */ 1067, 1070, 1057, 1057, 135, 135, 136, 136, 136, 136, - /* 380 */ 377, 1599, 134, 134, 134, 134, 133, 133, 132, 132, - /* 390 */ 132, 131, 128, 455, 91, 421, 1232, 1232, 1067, 1070, - /* 400 */ 1057, 1057, 135, 135, 136, 136, 136, 136, 425, 1602, - /* 410 */ 417, 1208, 130, 127, 234, 44, 579, 1208, 130, 127, - /* 420 */ 234, 476, 350, 134, 134, 134, 134, 133, 133, 132, - /* 430 */ 132, 132, 131, 128, 455, 137, 138, 91, 427, 1232, - /* 440 */ 1232, 1067, 1070, 1057, 1057, 135, 135, 136, 136, 136, - /* 450 */ 136, 134, 134, 134, 134, 133, 133, 132, 132, 132, - /* 460 */ 131, 128, 455, 580, 452, 451, 1439, 460, 1208, 580, - /* 470 */ 157, 1208, 320, 571, 562, 45, 554, 552, 552, 496, - /* 480 */ 528, 46, 7, 493, 197, 275, 82, 82, 1054, 1054, - /* 490 */ 1068, 1071, 61, 61, 134, 134, 134, 134, 133, 133, - /* 500 */ 132, 132, 132, 131, 128, 455, 468, 1208, 331, 288, - /* 510 */ 288, 1240, 580, 1240, 417, 288, 288, 415, 364, 1208, - /* 520 */ 1209, 1208, 577, 561, 566, 1208, 1209, 1208, 577, 529, - /* 530 */ 566, 382, 563, 580, 417, 51, 51, 432, 516, 137, - /* 540 */ 138, 91, 219, 1232, 1232, 1067, 1070, 1057, 1057, 135, - /* 550 */ 135, 136, 136, 136, 136, 379, 82, 82, 539, 137, - /* 560 */ 138, 91, 1058, 1232, 1232, 1067, 1070, 1057, 1057, 135, - /* 570 */ 135, 136, 136, 136, 136, 1173, 1208, 1209, 1208, 1208, - /* 580 */ 1209, 1208, 536, 108, 320, 571, 551, 580, 1173, 987, - /* 590 */ 580, 1173, 1575, 540, 446, 407, 1208, 988, 134, 134, - /* 600 */ 134, 134, 133, 133, 132, 132, 132, 131, 128, 455, - /* 610 */ 82, 82, 538, 82, 82, 1208, 1209, 1208, 134, 134, - /* 620 */ 134, 134, 133, 133, 132, 132, 132, 131, 128, 455, - /* 630 */ 288, 288, 550, 1153, 1662, 1588, 1662, 383, 417, 574, - /* 640 */ 574, 574, 890, 577, 542, 566, 578, 561, 940, 940, - /* 650 */ 561, 549, 131, 128, 455, 1208, 560, 238, 417, 443, - /* 660 */ 1184, 483, 883, 137, 138, 91, 303, 1232, 1232, 1067, - /* 670 */ 1070, 1057, 1057, 135, 135, 136, 136, 136, 136, 108, - /* 680 */ 537, 464, 111, 137, 138, 91, 533, 1232, 1232, 1067, - /* 690 */ 1070, 1057, 1057, 135, 135, 136, 136, 136, 136, 464, - /* 700 */ 463, 288, 288, 1248, 1208, 1209, 1208, 1439, 538, 22, - /* 710 */ 22, 427, 1184, 1151, 577, 1208, 566, 580, 232, 1343, - /* 720 */ 1572, 554, 134, 134, 134, 134, 133, 133, 132, 132, - /* 730 */ 132, 131, 128, 455, 580, 1281, 580, 229, 526, 96, - /* 740 */ 82, 82, 134, 134, 134, 134, 133, 133, 132, 132, - /* 750 */ 132, 131, 128, 455, 288, 288, 580, 19, 19, 19, - /* 760 */ 19, 6, 417, 1208, 1209, 1208, 1191, 577, 48, 566, - /* 770 */ 288, 288, 435, 464, 437, 320, 571, 316, 433, 145, - /* 780 */ 145, 212, 417, 577, 897, 566, 1045, 137, 138, 91, - /* 790 */ 975, 1232, 1232, 1067, 1070, 1057, 1057, 135, 135, 136, - /* 800 */ 136, 136, 136, 580, 390, 580, 523, 137, 138, 91, - /* 810 */ 6, 1232, 1232, 1067, 1070, 1057, 1057, 135, 135, 136, - /* 820 */ 136, 136, 136, 1208, 1209, 1208, 19, 19, 19, 19, - /* 830 */ 427, 469, 1573, 948, 381, 209, 555, 555, 947, 580, - /* 840 */ 475, 306, 415, 442, 530, 1556, 134, 134, 134, 134, - /* 850 */ 133, 133, 132, 132, 132, 131, 128, 455, 580, 1558, - /* 860 */ 580, 1579, 82, 82, 580, 7, 134, 134, 134, 134, - /* 870 */ 133, 133, 132, 132, 132, 131, 128, 455, 1208, 288, - /* 880 */ 288, 19, 19, 19, 19, 491, 417, 19, 19, 492, - /* 890 */ 1028, 198, 577, 1111, 566, 461, 206, 1111, 207, 317, - /* 900 */ 213, 1208, 556, 1631, 580, 915, 417, 136, 136, 136, - /* 910 */ 136, 137, 138, 91, 40, 1232, 1232, 1067, 1070, 1057, - /* 920 */ 1057, 135, 135, 136, 136, 136, 136, 147, 147, 1515, - /* 930 */ 497, 137, 138, 91, 1228, 1232, 1232, 1067, 1070, 1057, - /* 940 */ 1057, 135, 135, 136, 136, 136, 136, 1515, 1517, 267, - /* 950 */ 340, 1130, 141, 1439, 134, 134, 134, 134, 133, 133, - /* 960 */ 132, 132, 132, 131, 128, 455, 1131, 1109, 1572, 536, - /* 970 */ 134, 134, 134, 134, 133, 133, 132, 132, 132, 131, - /* 980 */ 128, 455, 1132, 477, 1208, 298, 1208, 1209, 1208, 1208, - /* 990 */ 134, 134, 134, 134, 133, 133, 132, 132, 132, 131, - /* 1000 */ 128, 455, 5, 926, 580, 485, 345, 1208, 1044, 1208, - /* 1010 */ 1209, 1208, 337, 927, 339, 478, 50, 580, 125, 417, - /* 1020 */ 3, 1515, 1349, 1129, 434, 1033, 415, 66, 66, 1032, - /* 1030 */ 453, 453, 453, 290, 577, 866, 566, 414, 413, 417, - /* 1040 */ 67, 67, 1572, 1228, 137, 138, 91, 115, 1232, 1232, - /* 1050 */ 1067, 1070, 1057, 1057, 135, 135, 136, 136, 136, 136, - /* 1060 */ 580, 1032, 1032, 1034, 137, 138, 91, 537, 1232, 1232, - /* 1070 */ 1067, 1070, 1057, 1057, 135, 135, 136, 136, 136, 136, - /* 1080 */ 1573, 299, 381, 82, 82, 476, 350, 1228, 1554, 485, - /* 1090 */ 47, 1192, 1208, 1209, 1208, 474, 338, 1208, 1209, 1208, - /* 1100 */ 10, 564, 267, 134, 134, 134, 134, 133, 133, 132, - /* 1110 */ 132, 132, 131, 128, 455, 1208, 1209, 1208, 1572, 974, - /* 1120 */ 449, 507, 580, 134, 134, 134, 134, 133, 133, 132, - /* 1130 */ 132, 132, 131, 128, 455, 580, 288, 288, 1089, 557, - /* 1140 */ 580, 1114, 1114, 499, 580, 21, 21, 580, 485, 577, - /* 1150 */ 261, 566, 417, 1620, 1573, 1439, 381, 215, 82, 82, - /* 1160 */ 260, 259, 258, 82, 82, 302, 49, 53, 53, 1211, - /* 1170 */ 68, 68, 417, 1335, 1335, 1454, 1502, 137, 138, 91, - /* 1180 */ 119, 1232, 1232, 1067, 1070, 1057, 1057, 135, 135, 136, - /* 1190 */ 136, 136, 136, 906, 324, 450, 1228, 137, 138, 91, - /* 1200 */ 454, 1232, 1232, 1067, 1070, 1057, 1057, 135, 135, 136, - /* 1210 */ 136, 136, 136, 1453, 377, 1599, 1663, 403, 422, 854, - /* 1220 */ 855, 856, 1286, 423, 304, 519, 498, 973, 1372, 315, - /* 1230 */ 1573, 580, 381, 907, 367, 502, 134, 134, 134, 134, - /* 1240 */ 133, 133, 132, 132, 132, 131, 128, 455, 580, 227, - /* 1250 */ 580, 491, 377, 1599, 54, 54, 134, 134, 134, 134, - /* 1260 */ 133, 133, 132, 132, 132, 131, 128, 455, 580, 1173, - /* 1270 */ 580, 69, 69, 70, 70, 580, 417, 214, 1211, 227, - /* 1280 */ 1344, 557, 1173, 325, 878, 1173, 573, 423, 1439, 439, - /* 1290 */ 405, 71, 71, 72, 72, 580, 417, 160, 73, 73, - /* 1300 */ 158, 137, 126, 91, 502, 1232, 1232, 1067, 1070, 1057, - /* 1310 */ 1057, 135, 135, 136, 136, 136, 136, 580, 55, 55, - /* 1320 */ 886, 233, 138, 91, 580, 1232, 1232, 1067, 1070, 1057, - /* 1330 */ 1057, 135, 135, 136, 136, 136, 136, 402, 1253, 491, - /* 1340 */ 56, 56, 1130, 424, 184, 456, 973, 57, 57, 1340, - /* 1350 */ 502, 347, 580, 466, 580, 123, 572, 1131, 4, 447, - /* 1360 */ 134, 134, 134, 134, 133, 133, 132, 132, 132, 131, - /* 1370 */ 128, 455, 575, 1132, 109, 59, 59, 60, 60, 580, - /* 1380 */ 134, 134, 134, 134, 133, 133, 132, 132, 132, 131, - /* 1390 */ 128, 455, 580, 878, 568, 355, 580, 222, 585, 580, - /* 1400 */ 1263, 580, 74, 74, 120, 321, 117, 155, 569, 402, - /* 1410 */ 1152, 161, 1044, 16, 1345, 75, 75, 1254, 44, 76, - /* 1420 */ 76, 482, 20, 20, 77, 77, 1580, 1578, 886, 1033, - /* 1430 */ 7, 7, 415, 1032, 1577, 489, 580, 1044, 7, 305, - /* 1440 */ 1227, 467, 948, 121, 121, 467, 580, 947, 297, 288, - /* 1450 */ 288, 122, 297, 456, 581, 456, 580, 319, 1032, 143, - /* 1460 */ 143, 456, 577, 166, 566, 1032, 1032, 1034, 415, 144, - /* 1470 */ 144, 123, 572, 242, 4, 320, 571, 245, 328, 78, - /* 1480 */ 78, 287, 231, 293, 481, 479, 456, 580, 575, 1254, - /* 1490 */ 1032, 1032, 1034, 1035, 35, 1192, 123, 572, 580, 4, - /* 1500 */ 97, 218, 9, 580, 1148, 580, 404, 580, 300, 459, - /* 1510 */ 62, 62, 580, 575, 23, 375, 580, 323, 580, 221, - /* 1520 */ 1192, 79, 79, 580, 569, 374, 63, 63, 80, 80, - /* 1530 */ 64, 64, 580, 490, 1384, 170, 170, 532, 546, 171, - /* 1540 */ 171, 87, 87, 545, 912, 913, 65, 65, 346, 569, - /* 1550 */ 111, 1383, 580, 1044, 428, 83, 83, 580, 222, 121, - /* 1560 */ 121, 470, 1025, 546, 266, 905, 904, 122, 547, 456, - /* 1570 */ 581, 456, 1561, 893, 1032, 146, 146, 456, 1044, 1096, - /* 1580 */ 84, 84, 990, 991, 121, 121, 524, 123, 572, 341, - /* 1590 */ 4, 244, 122, 1534, 456, 581, 456, 580, 484, 1032, - /* 1600 */ 266, 580, 456, 580, 575, 1036, 1032, 1032, 1034, 1035, - /* 1610 */ 35, 205, 123, 572, 580, 4, 1157, 1108, 580, 1108, - /* 1620 */ 168, 168, 1533, 494, 148, 148, 142, 142, 486, 575, - /* 1630 */ 266, 1032, 1032, 1034, 1035, 35, 1192, 169, 169, 292, - /* 1640 */ 569, 162, 162, 352, 400, 400, 399, 277, 397, 580, - /* 1650 */ 351, 863, 111, 1380, 546, 580, 508, 580, 263, 545, - /* 1660 */ 365, 1192, 111, 356, 239, 569, 327, 359, 580, 1044, - /* 1670 */ 361, 363, 152, 152, 326, 121, 121, 1328, 151, 151, - /* 1680 */ 149, 149, 893, 122, 1315, 456, 581, 456, 1096, 1314, - /* 1690 */ 1032, 150, 150, 1313, 1044, 580, 165, 1092, 111, 263, - /* 1700 */ 121, 121, 952, 1312, 978, 241, 266, 580, 122, 370, - /* 1710 */ 456, 581, 456, 175, 1036, 1032, 43, 380, 86, 86, - /* 1720 */ 1370, 1393, 1032, 1032, 1034, 1035, 35, 1605, 1196, 458, - /* 1730 */ 88, 88, 292, 240, 946, 1438, 125, 400, 400, 399, - /* 1740 */ 277, 397, 580, 943, 863, 125, 1366, 1032, 1032, 1034, - /* 1750 */ 1035, 35, 1192, 1593, 1107, 580, 1107, 239, 876, 327, - /* 1760 */ 159, 945, 1378, 125, 580, 85, 85, 326, 418, 567, - /* 1770 */ 503, 1443, 1293, 320, 571, 1284, 1272, 1192, 52, 52, - /* 1780 */ 1271, 1273, 1613, 280, 401, 167, 1363, 58, 58, 12, - /* 1790 */ 312, 313, 314, 1425, 224, 237, 295, 462, 241, 333, - /* 1800 */ 336, 343, 344, 301, 349, 488, 175, 1375, 456, 43, - /* 1810 */ 1430, 513, 1429, 228, 1376, 1311, 408, 210, 123, 572, - /* 1820 */ 373, 4, 1506, 1505, 1374, 1373, 240, 1616, 456, 570, - /* 1830 */ 211, 395, 1248, 270, 1553, 575, 1245, 223, 90, 572, - /* 1840 */ 1551, 4, 426, 186, 96, 1511, 220, 92, 235, 1426, - /* 1850 */ 95, 195, 140, 557, 332, 575, 13, 180, 1420, 188, - /* 1860 */ 1413, 418, 335, 472, 190, 191, 320, 571, 473, 192, - /* 1870 */ 193, 569, 506, 247, 109, 1434, 406, 199, 495, 1432, - /* 1880 */ 251, 102, 1431, 480, 409, 14, 501, 1522, 281, 354, - /* 1890 */ 462, 569, 1500, 203, 253, 522, 358, 254, 504, 1274, - /* 1900 */ 1044, 255, 1331, 411, 1330, 1329, 121, 121, 440, 104, - /* 1910 */ 1301, 412, 1322, 1630, 122, 1629, 456, 581, 456, 897, - /* 1920 */ 1044, 1032, 372, 1300, 1299, 1628, 121, 121, 1321, 229, - /* 1930 */ 1598, 444, 531, 310, 122, 445, 456, 581, 456, 311, - /* 1940 */ 559, 1032, 378, 268, 269, 448, 1398, 11, 1487, 389, - /* 1950 */ 116, 318, 110, 1032, 1032, 1034, 1035, 35, 1354, 387, - /* 1960 */ 558, 1353, 216, 1584, 1583, 541, 1397, 393, 42, 394, - /* 1970 */ 583, 1202, 276, 1032, 1032, 1034, 1035, 35, 1196, 458, - /* 1980 */ 278, 279, 292, 1192, 584, 1538, 172, 400, 400, 399, - /* 1990 */ 277, 397, 156, 1539, 863, 1269, 1264, 1537, 1536, 308, - /* 2000 */ 456, 225, 173, 1192, 226, 174, 850, 239, 457, 327, - /* 2010 */ 123, 572, 89, 4, 217, 322, 419, 326, 185, 420, - /* 2020 */ 154, 236, 1106, 1104, 330, 187, 176, 575, 1227, 189, - /* 2030 */ 929, 243, 342, 246, 1120, 194, 177, 178, 429, 98, - /* 2040 */ 99, 196, 100, 101, 179, 431, 1123, 248, 241, 1119, - /* 2050 */ 249, 163, 24, 250, 266, 353, 175, 1242, 1112, 43, - /* 2060 */ 500, 252, 200, 569, 201, 15, 374, 865, 505, 509, - /* 2070 */ 256, 895, 202, 103, 25, 26, 240, 366, 164, 514, - /* 2080 */ 369, 105, 309, 517, 1189, 908, 106, 525, 107, 1073, - /* 2090 */ 1159, 17, 1044, 1158, 27, 181, 230, 284, 121, 121, - /* 2100 */ 286, 204, 265, 976, 28, 125, 122, 982, 456, 581, - /* 2110 */ 456, 418, 29, 1032, 1175, 30, 320, 571, 31, 1179, - /* 2120 */ 8, 1177, 1182, 32, 1164, 41, 553, 33, 34, 208, - /* 2130 */ 111, 1087, 1074, 1072, 1076, 1128, 271, 113, 565, 114, - /* 2140 */ 462, 118, 1077, 36, 18, 1032, 1032, 1034, 1035, 35, - /* 2150 */ 1037, 877, 1183, 939, 124, 37, 398, 272, 153, 576, - /* 2160 */ 273, 183, 1621, 1198, 1197, 1260, 1260, 1260, 1260, 1260, - /* 2170 */ 1260, 1260, 1260, 1260, 1260, 1192, + /* 60 */ 214, 288, 288, 425, 362, 288, 288, 453, 453, 453, + /* 70 */ 441, 288, 288, 405, 577, 368, 566, 540, 577, 580, + /* 80 */ 566, 1208, 288, 288, 577, 285, 566, 973, 136, 136, + /* 90 */ 136, 136, 129, 245, 491, 577, 329, 566, 275, 245, + /* 100 */ 264, 231, 19, 19, 134, 134, 134, 134, 133, 133, + /* 110 */ 132, 132, 132, 131, 128, 455, 1296, 430, 1606, 586, + /* 120 */ 2, 1263, 460, 385, 417, 459, 321, 357, 155, 360, + /* 130 */ 1111, 459, 1586, 384, 1111, 1345, 134, 134, 134, 134, + /* 140 */ 133, 133, 132, 132, 132, 131, 128, 455, 518, 137, + /* 150 */ 138, 91, 524, 1232, 1232, 1067, 1070, 1057, 1057, 135, + /* 160 */ 135, 136, 136, 136, 136, 580, 438, 1208, 497, 182, + /* 170 */ 288, 288, 274, 291, 376, 521, 371, 520, 262, 130, + /* 180 */ 127, 234, 233, 577, 367, 566, 407, 1510, 51, 51, + /* 190 */ 1208, 1209, 1208, 1178, 298, 1178, 1285, 1572, 245, 133, + /* 200 */ 133, 132, 132, 132, 131, 128, 455, 973, 1283, 134, + /* 210 */ 134, 134, 134, 133, 133, 132, 132, 132, 131, 128, + /* 220 */ 455, 288, 288, 132, 132, 132, 131, 128, 455, 417, + /* 230 */ 459, 1023, 476, 350, 577, 112, 566, 157, 1228, 44, + /* 240 */ 134, 134, 134, 134, 133, 133, 132, 132, 132, 131, + /* 250 */ 128, 455, 483, 267, 137, 138, 91, 455, 1232, 1232, + /* 260 */ 1067, 1070, 1057, 1057, 135, 135, 136, 136, 136, 136, + /* 270 */ 1054, 1054, 1068, 1071, 1208, 1439, 1208, 1209, 1208, 257, + /* 280 */ 580, 139, 515, 512, 511, 348, 527, 527, 1588, 580, + /* 290 */ 383, 7, 510, 487, 1173, 257, 320, 571, 515, 512, + /* 300 */ 511, 417, 1581, 51, 51, 544, 7, 1173, 510, 1574, + /* 310 */ 1173, 381, 82, 82, 134, 134, 134, 134, 133, 133, + /* 320 */ 132, 132, 132, 131, 128, 455, 137, 138, 91, 1632, + /* 330 */ 1232, 1232, 1067, 1070, 1057, 1057, 135, 135, 136, 136, + /* 340 */ 136, 136, 417, 1173, 1173, 1058, 432, 94, 1228, 561, + /* 350 */ 948, 93, 320, 571, 551, 947, 1173, 1173, 535, 1173, + /* 360 */ 1173, 1153, 1661, 543, 1661, 303, 386, 137, 138, 91, + /* 370 */ 1343, 1232, 1232, 1067, 1070, 1057, 1057, 135, 135, 136, + /* 380 */ 136, 136, 136, 1208, 1209, 1208, 134, 134, 134, 134, + /* 390 */ 133, 133, 132, 132, 132, 131, 128, 455, 973, 421, + /* 400 */ 288, 288, 580, 1586, 548, 288, 288, 466, 136, 136, + /* 410 */ 136, 136, 542, 577, 417, 566, 1153, 1662, 577, 1662, + /* 420 */ 566, 1023, 130, 127, 234, 81, 81, 134, 134, 134, + /* 430 */ 134, 133, 133, 132, 132, 132, 131, 128, 455, 137, + /* 440 */ 138, 91, 1151, 1232, 1232, 1067, 1070, 1057, 1057, 135, + /* 450 */ 135, 136, 136, 136, 136, 580, 134, 134, 134, 134, + /* 460 */ 133, 133, 132, 132, 132, 131, 128, 455, 580, 282, + /* 470 */ 282, 1208, 580, 261, 1208, 348, 471, 334, 82, 82, + /* 480 */ 1602, 1281, 577, 496, 566, 530, 485, 493, 391, 579, + /* 490 */ 564, 82, 82, 233, 464, 82, 82, 1151, 379, 134, + /* 500 */ 134, 134, 134, 133, 133, 132, 132, 132, 131, 128, + /* 510 */ 455, 485, 464, 463, 214, 561, 288, 288, 973, 417, + /* 520 */ 288, 288, 396, 364, 560, 288, 288, 410, 316, 577, + /* 530 */ 1208, 566, 561, 577, 1315, 566, 45, 436, 577, 417, + /* 540 */ 566, 443, 422, 516, 137, 138, 91, 219, 1232, 1232, + /* 550 */ 1067, 1070, 1057, 1057, 135, 135, 136, 136, 136, 136, + /* 560 */ 1208, 382, 296, 417, 137, 138, 91, 890, 1232, 1232, + /* 570 */ 1067, 1070, 1057, 1057, 135, 135, 136, 136, 136, 136, + /* 580 */ 1208, 1209, 1208, 1208, 1209, 1208, 464, 299, 137, 138, + /* 590 */ 91, 485, 1232, 1232, 1067, 1070, 1057, 1057, 135, 135, + /* 600 */ 136, 136, 136, 136, 134, 134, 134, 134, 133, 133, + /* 610 */ 132, 132, 132, 131, 128, 455, 283, 427, 96, 1515, + /* 620 */ 1208, 580, 539, 1208, 134, 134, 134, 134, 133, 133, + /* 630 */ 132, 132, 132, 131, 128, 455, 197, 1515, 1517, 1208, + /* 640 */ 1209, 1208, 452, 451, 82, 82, 320, 571, 134, 134, + /* 650 */ 134, 134, 133, 133, 132, 132, 132, 131, 128, 455, + /* 660 */ 1208, 953, 289, 289, 229, 526, 975, 302, 417, 1208, + /* 670 */ 1209, 1208, 883, 198, 1285, 577, 1208, 566, 130, 127, + /* 680 */ 234, 450, 1335, 1335, 582, 46, 582, 331, 417, 1240, + /* 690 */ 227, 1240, 1191, 137, 138, 91, 1455, 1232, 1232, 1067, + /* 700 */ 1070, 1057, 1057, 135, 135, 136, 136, 136, 136, 44, + /* 710 */ 232, 1515, 417, 137, 138, 91, 1045, 1232, 1232, 1067, + /* 720 */ 1070, 1057, 1057, 135, 135, 136, 136, 136, 136, 1208, + /* 730 */ 1209, 1208, 1208, 1209, 1208, 377, 1599, 137, 138, 91, + /* 740 */ 390, 1232, 1232, 1067, 1070, 1057, 1057, 135, 135, 136, + /* 750 */ 136, 136, 136, 134, 134, 134, 134, 133, 133, 132, + /* 760 */ 132, 132, 131, 128, 455, 533, 320, 571, 580, 1208, + /* 770 */ 1209, 1208, 580, 134, 134, 134, 134, 133, 133, 132, + /* 780 */ 132, 132, 131, 128, 455, 1208, 1209, 1208, 22, 22, + /* 790 */ 1208, 145, 145, 417, 536, 19, 19, 134, 134, 134, + /* 800 */ 134, 133, 133, 132, 132, 132, 131, 128, 455, 222, + /* 810 */ 435, 580, 974, 131, 128, 455, 580, 417, 137, 138, + /* 820 */ 91, 1028, 1232, 1232, 1067, 1070, 1057, 1057, 135, 135, + /* 830 */ 136, 136, 136, 136, 147, 147, 491, 417, 1208, 66, + /* 840 */ 66, 1129, 137, 138, 91, 554, 1232, 1232, 1067, 1070, + /* 850 */ 1057, 1057, 135, 135, 136, 136, 136, 136, 578, 44, + /* 860 */ 940, 940, 137, 138, 91, 1556, 1232, 1232, 1067, 1070, + /* 870 */ 1057, 1057, 135, 135, 136, 136, 136, 136, 134, 134, + /* 880 */ 134, 134, 133, 133, 132, 132, 132, 131, 128, 455, + /* 890 */ 1558, 182, 108, 537, 1663, 403, 580, 886, 465, 1208, + /* 900 */ 1209, 1208, 134, 134, 134, 134, 133, 133, 132, 132, + /* 910 */ 132, 131, 128, 455, 1439, 1454, 320, 571, 6, 19, + /* 920 */ 19, 538, 134, 134, 134, 134, 133, 133, 132, 132, + /* 930 */ 132, 131, 128, 455, 437, 115, 347, 452, 451, 580, + /* 940 */ 1208, 580, 417, 1372, 315, 1572, 1237, 1208, 1209, 1208, + /* 950 */ 111, 1239, 562, 40, 377, 1599, 1453, 1208, 461, 1238, + /* 960 */ 555, 555, 82, 82, 82, 82, 1572, 137, 138, 91, + /* 970 */ 5, 1232, 1232, 1067, 1070, 1057, 1057, 135, 135, 136, + /* 980 */ 136, 136, 136, 340, 1240, 433, 1240, 288, 288, 1130, + /* 990 */ 1044, 1439, 209, 48, 580, 377, 1599, 475, 580, 317, + /* 1000 */ 577, 561, 566, 1173, 1131, 528, 886, 1033, 552, 552, + /* 1010 */ 563, 1032, 1349, 7, 50, 1572, 1173, 61, 61, 1173, + /* 1020 */ 1132, 82, 82, 392, 577, 388, 566, 134, 134, 134, + /* 1030 */ 134, 133, 133, 132, 132, 132, 131, 128, 455, 1554, + /* 1040 */ 288, 288, 926, 1032, 1032, 1034, 337, 580, 339, 1208, + /* 1050 */ 1209, 1208, 927, 577, 529, 566, 417, 1573, 449, 381, + /* 1060 */ 485, 215, 434, 47, 1208, 427, 1208, 1209, 1208, 427, + /* 1070 */ 67, 67, 1192, 1631, 580, 915, 417, 3, 1573, 502, + /* 1080 */ 381, 137, 138, 91, 119, 1232, 1232, 1067, 1070, 1057, + /* 1090 */ 1057, 135, 135, 136, 136, 136, 136, 82, 82, 580, + /* 1100 */ 491, 137, 138, 91, 1044, 1232, 1232, 1067, 1070, 1057, + /* 1110 */ 1057, 135, 135, 136, 136, 136, 136, 1439, 1332, 214, + /* 1120 */ 1311, 1033, 19, 19, 1545, 1032, 1314, 1573, 1332, 381, + /* 1130 */ 338, 227, 416, 324, 454, 212, 304, 306, 866, 213, + /* 1140 */ 125, 134, 134, 134, 134, 133, 133, 132, 132, 132, + /* 1150 */ 131, 128, 455, 580, 307, 580, 307, 1032, 1032, 1034, + /* 1160 */ 580, 134, 134, 134, 134, 133, 133, 132, 132, 132, + /* 1170 */ 131, 128, 455, 1208, 1209, 1208, 19, 19, 19, 19, + /* 1180 */ 477, 417, 536, 19, 19, 1439, 1192, 379, 498, 1228, + /* 1190 */ 1579, 442, 554, 206, 7, 1572, 1313, 523, 207, 474, + /* 1200 */ 305, 417, 10, 345, 267, 1109, 137, 126, 91, 502, + /* 1210 */ 1232, 1232, 1067, 1070, 1057, 1057, 135, 135, 136, 136, + /* 1220 */ 136, 136, 906, 580, 948, 550, 446, 138, 91, 947, + /* 1230 */ 1232, 1232, 1067, 1070, 1057, 1057, 135, 135, 136, 136, + /* 1240 */ 136, 136, 1089, 49, 549, 878, 19, 19, 402, 1253, + /* 1250 */ 507, 402, 1152, 1184, 1509, 519, 447, 109, 160, 580, + /* 1260 */ 1580, 556, 557, 907, 7, 1211, 134, 134, 134, 134, + /* 1270 */ 133, 133, 132, 132, 132, 131, 128, 455, 580, 261, + /* 1280 */ 1502, 537, 21, 21, 141, 502, 134, 134, 134, 134, + /* 1290 */ 133, 133, 132, 132, 132, 131, 128, 455, 580, 1228, + /* 1300 */ 1130, 53, 53, 580, 325, 1184, 417, 1573, 1575, 381, + /* 1310 */ 854, 855, 856, 375, 1578, 1131, 1577, 502, 7, 439, + /* 1320 */ 7, 68, 68, 374, 476, 350, 54, 54, 580, 1254, + /* 1330 */ 1344, 1132, 1254, 91, 6, 1232, 1232, 1067, 1070, 1057, + /* 1340 */ 1057, 135, 135, 136, 136, 136, 136, 456, 422, 580, + /* 1350 */ 557, 69, 69, 568, 580, 878, 580, 123, 572, 580, + /* 1360 */ 4, 580, 1340, 580, 367, 580, 1114, 1114, 499, 158, + /* 1370 */ 414, 413, 70, 70, 575, 1211, 328, 71, 71, 72, + /* 1380 */ 72, 479, 73, 73, 55, 55, 56, 56, 57, 57, + /* 1390 */ 532, 134, 134, 134, 134, 133, 133, 132, 132, 132, + /* 1400 */ 131, 128, 455, 580, 300, 502, 580, 456, 580, 1286, + /* 1410 */ 423, 569, 580, 492, 580, 573, 423, 123, 572, 580, + /* 1420 */ 4, 44, 260, 259, 258, 481, 59, 59, 580, 60, + /* 1430 */ 60, 74, 74, 111, 575, 75, 75, 76, 76, 580, + /* 1440 */ 1044, 467, 20, 20, 468, 467, 121, 121, 9, 580, + /* 1450 */ 355, 77, 77, 469, 122, 415, 456, 581, 456, 580, + /* 1460 */ 221, 1032, 143, 143, 415, 297, 478, 1227, 580, 297, + /* 1470 */ 1157, 569, 144, 144, 424, 184, 482, 415, 320, 571, + /* 1480 */ 287, 231, 78, 78, 238, 546, 580, 415, 456, 580, + /* 1490 */ 545, 62, 62, 1032, 1032, 1034, 1035, 35, 123, 572, + /* 1500 */ 1044, 4, 580, 97, 218, 580, 121, 121, 242, 79, + /* 1510 */ 79, 580, 63, 63, 122, 575, 456, 581, 456, 108, + /* 1520 */ 319, 1032, 1192, 580, 489, 80, 80, 580, 64, 64, + /* 1530 */ 1248, 415, 456, 1370, 170, 170, 897, 580, 120, 580, + /* 1540 */ 117, 580, 123, 572, 580, 4, 171, 171, 538, 580, + /* 1550 */ 87, 87, 569, 1032, 1032, 1034, 1035, 35, 38, 575, + /* 1560 */ 65, 65, 83, 83, 146, 146, 546, 84, 84, 580, + /* 1570 */ 23, 547, 168, 168, 161, 1384, 16, 1148, 1383, 404, + /* 1580 */ 580, 1044, 1192, 580, 470, 580, 222, 121, 121, 580, + /* 1590 */ 293, 39, 148, 148, 580, 122, 569, 456, 581, 456, + /* 1600 */ 905, 904, 1032, 142, 142, 1561, 169, 169, 162, 162, + /* 1610 */ 546, 428, 152, 152, 346, 545, 111, 151, 151, 1025, + /* 1620 */ 484, 266, 266, 490, 323, 1044, 580, 244, 580, 341, + /* 1630 */ 580, 121, 121, 893, 1032, 1032, 1034, 1035, 35, 122, + /* 1640 */ 1096, 456, 581, 456, 580, 1534, 1032, 456, 580, 149, + /* 1650 */ 149, 150, 150, 86, 86, 912, 913, 123, 572, 486, + /* 1660 */ 4, 266, 351, 1192, 111, 990, 991, 88, 88, 1036, + /* 1670 */ 1533, 85, 85, 205, 575, 580, 494, 290, 1032, 1032, + /* 1680 */ 1034, 1035, 35, 1605, 1196, 458, 580, 508, 292, 263, + /* 1690 */ 946, 352, 125, 400, 400, 399, 277, 397, 52, 52, + /* 1700 */ 863, 1380, 365, 165, 111, 111, 456, 1192, 356, 58, + /* 1710 */ 58, 569, 1092, 239, 263, 327, 123, 572, 978, 4, + /* 1720 */ 266, 359, 943, 326, 125, 1108, 456, 1108, 1107, 876, + /* 1730 */ 1107, 159, 361, 575, 363, 1328, 90, 572, 1312, 4, + /* 1740 */ 1044, 370, 945, 893, 125, 380, 121, 121, 952, 1593, + /* 1750 */ 1096, 1393, 1438, 575, 122, 241, 456, 581, 456, 1366, + /* 1760 */ 1378, 1032, 567, 175, 1443, 503, 43, 1293, 1363, 1284, + /* 1770 */ 569, 1272, 1271, 1273, 1613, 280, 167, 312, 313, 1036, + /* 1780 */ 314, 401, 224, 240, 333, 1425, 12, 237, 336, 295, + /* 1790 */ 569, 343, 344, 1032, 1032, 1034, 1035, 35, 1620, 1044, + /* 1800 */ 349, 1430, 1429, 301, 408, 121, 121, 513, 488, 1311, + /* 1810 */ 373, 1506, 228, 122, 1505, 456, 581, 456, 418, 1044, + /* 1820 */ 1032, 1375, 1192, 320, 571, 121, 121, 1616, 570, 1376, + /* 1830 */ 1374, 395, 1248, 122, 186, 456, 581, 456, 210, 559, + /* 1840 */ 1032, 270, 1553, 211, 223, 1373, 1551, 462, 1245, 235, + /* 1850 */ 195, 426, 1032, 1032, 1034, 1035, 35, 92, 95, 558, + /* 1860 */ 96, 1511, 220, 140, 1420, 557, 332, 180, 13, 1426, + /* 1870 */ 188, 1413, 1032, 1032, 1034, 1035, 35, 335, 1196, 458, + /* 1880 */ 472, 1192, 292, 473, 190, 191, 192, 400, 400, 399, + /* 1890 */ 277, 397, 193, 506, 863, 247, 109, 1432, 406, 480, + /* 1900 */ 456, 1192, 1431, 14, 1434, 409, 199, 239, 102, 327, + /* 1910 */ 123, 572, 251, 4, 1500, 501, 495, 326, 1522, 203, + /* 1920 */ 354, 522, 281, 253, 504, 358, 254, 575, 1274, 255, + /* 1930 */ 440, 411, 1331, 1322, 104, 1330, 1329, 897, 1321, 229, + /* 1940 */ 444, 531, 445, 310, 311, 268, 269, 1630, 1598, 241, + /* 1950 */ 1629, 1301, 412, 372, 1300, 1299, 1628, 175, 1584, 1398, + /* 1960 */ 43, 378, 1583, 448, 569, 11, 1487, 389, 1397, 318, + /* 1970 */ 110, 116, 541, 42, 583, 1202, 276, 240, 278, 1354, + /* 1980 */ 279, 387, 584, 1269, 1264, 185, 1353, 216, 393, 394, + /* 1990 */ 419, 420, 172, 1044, 1538, 850, 1539, 156, 308, 121, + /* 2000 */ 121, 1537, 1536, 173, 174, 457, 89, 122, 225, 456, + /* 2010 */ 581, 456, 418, 226, 1032, 217, 236, 320, 571, 322, + /* 2020 */ 154, 1106, 1104, 330, 187, 176, 929, 189, 1227, 243, + /* 2030 */ 246, 342, 1120, 194, 177, 178, 429, 431, 196, 98, + /* 2040 */ 99, 462, 100, 101, 1123, 179, 1032, 1032, 1034, 1035, + /* 2050 */ 35, 248, 292, 249, 1119, 163, 24, 400, 400, 399, + /* 2060 */ 277, 397, 250, 353, 863, 1112, 266, 200, 500, 1242, + /* 2070 */ 252, 201, 15, 374, 865, 1192, 505, 239, 256, 327, + /* 2080 */ 202, 509, 103, 25, 895, 366, 26, 326, 514, 369, + /* 2090 */ 105, 908, 517, 309, 164, 106, 181, 1189, 525, 230, + /* 2100 */ 27, 1073, 107, 1159, 17, 204, 1158, 284, 286, 976, + /* 2110 */ 1175, 125, 1179, 265, 982, 28, 1177, 8, 1182, 241, + /* 2120 */ 1183, 29, 30, 31, 32, 1164, 41, 175, 208, 553, + /* 2130 */ 43, 111, 33, 113, 114, 1087, 1074, 1072, 1076, 34, + /* 2140 */ 1077, 565, 1128, 118, 271, 36, 18, 240, 1037, 877, + /* 2150 */ 124, 939, 37, 272, 273, 398, 576, 183, 153, 1621, + /* 2160 */ 1198, 1197, 1260, 1260, 1260, 1260, 1260, 1260, 1260, 1260, + /* 2170 */ 1260, 1260, 1260, 1260, 1260, 1260, 1260, 1260, 1260, 1260, + /* 2180 */ 1260, 1260, 418, 1260, 1260, 1260, 1260, 320, 571, 1260, + /* 2190 */ 1260, 1260, 1260, 1260, 1260, 1260, 1260, 1260, 1260, 1260, + /* 2200 */ 1260, 1260, 1260, 1260, 1260, 1260, 1260, 1260, 1260, 1260, + /* 2210 */ 1260, 462, }; static const YYCODETYPE yy_lookahead[] = { - /* 0 */ 225, 195, 227, 195, 195, 195, 195, 241, 242, 217, - /* 10 */ 235, 187, 188, 189, 190, 191, 192, 225, 207, 19, - /* 20 */ 254, 197, 256, 199, 218, 219, 218, 219, 218, 219, - /* 30 */ 206, 31, 277, 278, 279, 22, 23, 218, 25, 39, - /* 40 */ 277, 278, 279, 234, 44, 45, 46, 206, 48, 49, + /* 0 */ 277, 278, 279, 277, 278, 279, 212, 213, 214, 195, + /* 10 */ 218, 187, 188, 189, 190, 191, 192, 195, 195, 19, + /* 20 */ 190, 197, 192, 199, 298, 218, 206, 197, 23, 199, + /* 30 */ 206, 31, 218, 219, 217, 31, 206, 237, 238, 39, + /* 40 */ 218, 219, 225, 39, 44, 45, 46, 206, 48, 49, /* 50 */ 50, 51, 52, 53, 54, 55, 56, 57, 58, 59, - /* 60 */ 9, 298, 195, 22, 195, 241, 242, 189, 190, 191, - /* 70 */ 192, 23, 9, 25, 205, 197, 207, 199, 254, 195, - /* 80 */ 256, 195, 241, 242, 206, 218, 219, 281, 56, 57, - /* 90 */ 58, 59, 60, 269, 208, 254, 55, 256, 237, 238, - /* 100 */ 259, 260, 215, 103, 104, 105, 106, 107, 108, 109, - /* 110 */ 110, 111, 112, 113, 114, 102, 230, 313, 232, 241, - /* 120 */ 242, 317, 255, 19, 240, 301, 318, 319, 318, 319, - /* 130 */ 9, 264, 254, 195, 256, 103, 104, 105, 106, 107, - /* 140 */ 108, 109, 110, 111, 112, 113, 114, 269, 44, 45, - /* 150 */ 46, 114, 48, 49, 50, 51, 52, 53, 54, 55, - /* 160 */ 56, 57, 58, 59, 277, 278, 279, 119, 117, 118, - /* 170 */ 119, 120, 109, 218, 123, 124, 125, 195, 19, 301, - /* 180 */ 117, 118, 119, 24, 133, 109, 110, 111, 112, 113, - /* 190 */ 114, 87, 144, 89, 103, 104, 105, 106, 107, 108, - /* 200 */ 109, 110, 111, 112, 113, 114, 16, 103, 104, 105, - /* 210 */ 106, 107, 108, 109, 110, 111, 112, 113, 114, 107, - /* 220 */ 108, 109, 110, 111, 112, 113, 114, 19, 107, 108, - /* 230 */ 82, 241, 242, 25, 195, 74, 48, 116, 117, 118, - /* 240 */ 119, 25, 121, 82, 254, 307, 256, 309, 195, 195, - /* 250 */ 129, 195, 44, 45, 46, 96, 48, 49, 50, 51, - /* 260 */ 52, 53, 54, 55, 56, 57, 58, 59, 78, 121, - /* 270 */ 80, 218, 219, 114, 251, 154, 253, 156, 70, 120, - /* 280 */ 121, 122, 123, 124, 125, 126, 233, 139, 140, 128, - /* 290 */ 218, 132, 77, 77, 312, 313, 240, 74, 19, 317, - /* 300 */ 139, 140, 195, 88, 116, 90, 90, 206, 93, 93, - /* 310 */ 271, 103, 104, 105, 106, 107, 108, 109, 110, 111, - /* 320 */ 112, 113, 114, 44, 45, 46, 232, 48, 49, 50, - /* 330 */ 51, 52, 53, 54, 55, 56, 57, 58, 59, 19, - /* 340 */ 286, 276, 241, 242, 24, 120, 195, 68, 123, 124, - /* 350 */ 125, 128, 129, 130, 203, 254, 221, 256, 133, 208, - /* 360 */ 195, 146, 221, 195, 44, 45, 46, 19, 48, 49, - /* 370 */ 50, 51, 52, 53, 54, 55, 56, 57, 58, 59, - /* 380 */ 315, 316, 103, 104, 105, 106, 107, 108, 109, 110, - /* 390 */ 111, 112, 113, 114, 46, 200, 48, 49, 50, 51, - /* 400 */ 52, 53, 54, 55, 56, 57, 58, 59, 240, 195, - /* 410 */ 19, 9, 277, 278, 279, 82, 195, 9, 277, 278, - /* 420 */ 279, 129, 130, 103, 104, 105, 106, 107, 108, 109, - /* 430 */ 110, 111, 112, 113, 114, 44, 45, 46, 195, 48, - /* 440 */ 49, 50, 51, 52, 53, 54, 55, 56, 57, 58, - /* 450 */ 59, 103, 104, 105, 106, 107, 108, 109, 110, 111, - /* 460 */ 112, 113, 114, 195, 107, 108, 195, 299, 9, 195, - /* 470 */ 25, 9, 139, 140, 206, 73, 195, 312, 313, 284, - /* 480 */ 206, 73, 317, 288, 22, 26, 218, 219, 48, 49, - /* 490 */ 50, 51, 218, 219, 103, 104, 105, 106, 107, 108, - /* 500 */ 109, 110, 111, 112, 113, 114, 246, 9, 265, 241, - /* 510 */ 242, 154, 195, 156, 19, 241, 242, 257, 23, 117, - /* 520 */ 118, 119, 254, 255, 256, 117, 118, 119, 254, 255, - /* 530 */ 256, 195, 264, 195, 19, 218, 219, 266, 23, 44, - /* 540 */ 45, 46, 151, 48, 49, 50, 51, 52, 53, 54, - /* 550 */ 55, 56, 57, 58, 59, 195, 218, 219, 195, 44, - /* 560 */ 45, 46, 122, 48, 49, 50, 51, 52, 53, 54, - /* 570 */ 55, 56, 57, 58, 59, 77, 117, 118, 119, 117, - /* 580 */ 118, 119, 19, 116, 139, 140, 88, 195, 90, 31, - /* 590 */ 195, 93, 311, 255, 234, 206, 9, 39, 103, 104, - /* 600 */ 105, 106, 107, 108, 109, 110, 111, 112, 113, 114, - /* 610 */ 218, 219, 145, 218, 219, 117, 118, 119, 103, 104, - /* 620 */ 105, 106, 107, 108, 109, 110, 111, 112, 113, 114, - /* 630 */ 241, 242, 67, 22, 23, 318, 25, 320, 19, 212, - /* 640 */ 213, 214, 23, 254, 146, 256, 135, 255, 137, 138, - /* 650 */ 255, 86, 112, 113, 114, 9, 264, 15, 19, 264, - /* 660 */ 95, 272, 23, 44, 45, 46, 206, 48, 49, 50, - /* 670 */ 51, 52, 53, 54, 55, 56, 57, 58, 59, 116, - /* 680 */ 117, 195, 25, 44, 45, 46, 195, 48, 49, 50, - /* 690 */ 51, 52, 53, 54, 55, 56, 57, 58, 59, 213, - /* 700 */ 214, 241, 242, 61, 117, 118, 119, 195, 145, 218, - /* 710 */ 219, 195, 147, 102, 254, 9, 256, 195, 195, 206, - /* 720 */ 195, 195, 103, 104, 105, 106, 107, 108, 109, 110, - /* 730 */ 111, 112, 113, 114, 195, 206, 195, 166, 167, 152, - /* 740 */ 218, 219, 103, 104, 105, 106, 107, 108, 109, 110, - /* 750 */ 111, 112, 113, 114, 241, 242, 195, 218, 219, 218, - /* 760 */ 219, 215, 19, 117, 118, 119, 23, 254, 243, 256, - /* 770 */ 241, 242, 233, 287, 233, 139, 140, 255, 266, 218, - /* 780 */ 219, 265, 19, 254, 127, 256, 23, 44, 45, 46, - /* 790 */ 144, 48, 49, 50, 51, 52, 53, 54, 55, 56, - /* 800 */ 57, 58, 59, 195, 195, 195, 109, 44, 45, 46, - /* 810 */ 215, 48, 49, 50, 51, 52, 53, 54, 55, 56, - /* 820 */ 57, 58, 59, 117, 118, 119, 218, 219, 218, 219, - /* 830 */ 195, 246, 307, 136, 309, 289, 310, 311, 141, 195, - /* 840 */ 294, 233, 257, 233, 206, 195, 103, 104, 105, 106, - /* 850 */ 107, 108, 109, 110, 111, 112, 113, 114, 195, 195, - /* 860 */ 195, 313, 218, 219, 195, 317, 103, 104, 105, 106, - /* 870 */ 107, 108, 109, 110, 111, 112, 113, 114, 9, 241, - /* 880 */ 242, 218, 219, 218, 219, 195, 19, 218, 219, 294, - /* 890 */ 23, 22, 254, 29, 256, 195, 233, 33, 233, 255, - /* 900 */ 265, 9, 233, 23, 195, 25, 19, 56, 57, 58, - /* 910 */ 59, 44, 45, 46, 22, 48, 49, 50, 51, 52, - /* 920 */ 53, 54, 55, 56, 57, 58, 59, 218, 219, 195, - /* 930 */ 66, 44, 45, 46, 9, 48, 49, 50, 51, 52, - /* 940 */ 53, 54, 55, 56, 57, 58, 59, 213, 214, 24, - /* 950 */ 16, 12, 22, 195, 103, 104, 105, 106, 107, 108, - /* 960 */ 109, 110, 111, 112, 113, 114, 27, 11, 195, 19, - /* 970 */ 103, 104, 105, 106, 107, 108, 109, 110, 111, 112, - /* 980 */ 113, 114, 43, 130, 9, 295, 117, 118, 119, 9, - /* 990 */ 103, 104, 105, 106, 107, 108, 109, 110, 111, 112, - /* 1000 */ 113, 114, 22, 64, 195, 195, 153, 9, 101, 117, - /* 1010 */ 118, 119, 78, 74, 80, 246, 243, 195, 25, 19, - /* 1020 */ 22, 287, 242, 23, 266, 118, 257, 218, 219, 122, - /* 1030 */ 212, 213, 214, 22, 254, 21, 256, 107, 108, 19, - /* 1040 */ 218, 219, 195, 118, 44, 45, 46, 160, 48, 49, - /* 1050 */ 50, 51, 52, 53, 54, 55, 56, 57, 58, 59, - /* 1060 */ 195, 154, 155, 156, 44, 45, 46, 117, 48, 49, - /* 1070 */ 50, 51, 52, 53, 54, 55, 56, 57, 58, 59, - /* 1080 */ 307, 271, 309, 218, 219, 129, 130, 9, 195, 195, - /* 1090 */ 243, 184, 117, 118, 119, 81, 162, 117, 118, 119, - /* 1100 */ 22, 206, 24, 103, 104, 105, 106, 107, 108, 109, - /* 1110 */ 110, 111, 112, 113, 114, 117, 118, 119, 195, 144, - /* 1120 */ 255, 19, 195, 103, 104, 105, 106, 107, 108, 109, - /* 1130 */ 110, 111, 112, 113, 114, 195, 241, 242, 124, 146, - /* 1140 */ 195, 128, 129, 130, 195, 218, 219, 195, 195, 254, - /* 1150 */ 48, 256, 19, 142, 307, 195, 309, 24, 218, 219, - /* 1160 */ 128, 129, 130, 218, 219, 271, 243, 218, 219, 9, - /* 1170 */ 218, 219, 19, 237, 238, 276, 162, 44, 45, 46, - /* 1180 */ 160, 48, 49, 50, 51, 52, 53, 54, 55, 56, - /* 1190 */ 57, 58, 59, 35, 195, 255, 118, 44, 45, 46, - /* 1200 */ 255, 48, 49, 50, 51, 52, 53, 54, 55, 56, - /* 1210 */ 57, 58, 59, 276, 315, 316, 304, 305, 116, 7, - /* 1220 */ 8, 9, 210, 211, 271, 67, 266, 25, 262, 263, - /* 1230 */ 307, 195, 309, 75, 132, 195, 103, 104, 105, 106, - /* 1240 */ 107, 108, 109, 110, 111, 112, 113, 114, 195, 25, - /* 1250 */ 195, 195, 315, 316, 218, 219, 103, 104, 105, 106, - /* 1260 */ 107, 108, 109, 110, 111, 112, 113, 114, 195, 77, - /* 1270 */ 195, 218, 219, 218, 219, 195, 19, 195, 118, 25, - /* 1280 */ 240, 146, 90, 195, 9, 93, 210, 211, 195, 131, - /* 1290 */ 208, 218, 219, 218, 219, 195, 19, 22, 218, 219, - /* 1300 */ 165, 44, 45, 46, 195, 48, 49, 50, 51, 52, - /* 1310 */ 53, 54, 55, 56, 57, 58, 59, 195, 218, 219, - /* 1320 */ 9, 119, 45, 46, 195, 48, 49, 50, 51, 52, - /* 1330 */ 53, 54, 55, 56, 57, 58, 59, 22, 23, 195, - /* 1340 */ 218, 219, 12, 302, 303, 9, 144, 218, 219, 240, - /* 1350 */ 195, 295, 195, 272, 195, 19, 20, 27, 22, 266, - /* 1360 */ 103, 104, 105, 106, 107, 108, 109, 110, 111, 112, - /* 1370 */ 113, 114, 36, 43, 150, 218, 219, 218, 219, 195, - /* 1380 */ 103, 104, 105, 106, 107, 108, 109, 110, 111, 112, - /* 1390 */ 113, 114, 195, 118, 64, 240, 195, 143, 190, 195, - /* 1400 */ 192, 195, 218, 219, 159, 197, 161, 199, 72, 22, - /* 1410 */ 23, 22, 101, 24, 206, 218, 219, 102, 82, 218, - /* 1420 */ 219, 246, 218, 219, 218, 219, 313, 313, 117, 118, - /* 1430 */ 317, 317, 257, 122, 313, 19, 195, 101, 317, 295, - /* 1440 */ 25, 263, 136, 107, 108, 267, 195, 141, 263, 241, - /* 1450 */ 242, 115, 267, 117, 118, 119, 195, 246, 122, 218, - /* 1460 */ 219, 9, 254, 23, 256, 154, 155, 156, 257, 218, - /* 1470 */ 219, 19, 20, 24, 22, 139, 140, 269, 195, 218, - /* 1480 */ 219, 259, 260, 100, 116, 130, 9, 195, 36, 102, - /* 1490 */ 154, 155, 156, 157, 158, 184, 19, 20, 195, 22, - /* 1500 */ 150, 151, 50, 195, 23, 195, 25, 195, 153, 301, - /* 1510 */ 218, 219, 195, 36, 22, 122, 195, 134, 195, 151, - /* 1520 */ 184, 218, 219, 195, 72, 132, 218, 219, 218, 219, - /* 1530 */ 218, 219, 195, 117, 195, 218, 219, 19, 86, 218, - /* 1540 */ 219, 218, 219, 91, 7, 8, 218, 219, 23, 72, - /* 1550 */ 25, 195, 195, 101, 62, 218, 219, 195, 143, 107, - /* 1560 */ 108, 195, 23, 86, 25, 121, 122, 115, 91, 117, - /* 1570 */ 118, 119, 195, 9, 122, 218, 219, 9, 101, 9, - /* 1580 */ 218, 219, 84, 85, 107, 108, 146, 19, 20, 195, - /* 1590 */ 22, 142, 115, 195, 117, 118, 119, 195, 23, 122, - /* 1600 */ 25, 195, 9, 195, 36, 9, 154, 155, 156, 157, - /* 1610 */ 158, 258, 19, 20, 195, 22, 98, 154, 195, 156, - /* 1620 */ 218, 219, 195, 195, 218, 219, 218, 219, 23, 36, - /* 1630 */ 25, 154, 155, 156, 157, 158, 184, 218, 219, 5, - /* 1640 */ 72, 218, 219, 195, 10, 11, 12, 13, 14, 195, - /* 1650 */ 23, 17, 25, 195, 86, 195, 23, 195, 25, 91, - /* 1660 */ 23, 184, 25, 195, 30, 72, 32, 195, 195, 101, - /* 1670 */ 195, 195, 218, 219, 40, 107, 108, 195, 218, 219, - /* 1680 */ 218, 219, 118, 115, 228, 117, 118, 119, 118, 228, - /* 1690 */ 122, 218, 219, 228, 101, 195, 23, 23, 25, 25, - /* 1700 */ 107, 108, 109, 195, 23, 71, 25, 195, 115, 195, - /* 1710 */ 117, 118, 119, 79, 118, 122, 82, 195, 218, 219, - /* 1720 */ 261, 195, 154, 155, 156, 157, 158, 0, 1, 2, - /* 1730 */ 218, 219, 5, 99, 23, 195, 25, 10, 11, 12, - /* 1740 */ 13, 14, 195, 23, 17, 25, 195, 154, 155, 156, - /* 1750 */ 157, 158, 184, 322, 154, 195, 156, 30, 23, 32, - /* 1760 */ 25, 23, 195, 25, 195, 218, 219, 40, 134, 238, - /* 1770 */ 291, 195, 195, 139, 140, 195, 195, 184, 218, 219, - /* 1780 */ 195, 195, 195, 290, 193, 244, 258, 218, 219, 245, - /* 1790 */ 258, 258, 258, 274, 216, 300, 247, 163, 71, 270, - /* 1800 */ 270, 296, 248, 248, 247, 296, 79, 262, 9, 82, - /* 1810 */ 274, 222, 274, 231, 262, 227, 274, 251, 19, 20, - /* 1820 */ 221, 22, 221, 221, 262, 262, 99, 198, 9, 283, - /* 1830 */ 251, 247, 61, 142, 202, 36, 38, 245, 19, 20, - /* 1840 */ 202, 22, 202, 300, 152, 286, 151, 297, 300, 275, - /* 1850 */ 297, 22, 149, 146, 251, 36, 273, 44, 252, 236, - /* 1860 */ 252, 134, 251, 18, 239, 239, 139, 140, 202, 239, - /* 1870 */ 239, 72, 18, 201, 150, 236, 248, 236, 202, 275, - /* 1880 */ 201, 159, 275, 248, 248, 273, 63, 293, 202, 292, - /* 1890 */ 163, 72, 248, 22, 201, 116, 202, 201, 223, 202, - /* 1900 */ 101, 201, 220, 223, 220, 220, 107, 108, 65, 22, - /* 1910 */ 220, 223, 229, 226, 115, 226, 117, 118, 119, 127, - /* 1920 */ 101, 122, 220, 222, 220, 220, 107, 108, 229, 166, - /* 1930 */ 316, 24, 308, 285, 115, 114, 117, 118, 119, 285, - /* 1940 */ 141, 122, 223, 202, 92, 83, 268, 22, 280, 202, - /* 1950 */ 159, 282, 148, 154, 155, 156, 157, 158, 252, 251, - /* 1960 */ 141, 252, 250, 321, 321, 147, 268, 249, 25, 248, - /* 1970 */ 204, 13, 196, 154, 155, 156, 157, 158, 1, 2, - /* 1980 */ 196, 6, 5, 184, 194, 215, 209, 10, 11, 12, - /* 1990 */ 13, 14, 224, 215, 17, 194, 194, 215, 215, 224, - /* 2000 */ 9, 216, 209, 184, 216, 209, 4, 30, 3, 32, - /* 2010 */ 19, 20, 215, 22, 22, 164, 306, 40, 303, 306, - /* 2020 */ 16, 15, 23, 23, 140, 152, 131, 36, 25, 143, - /* 2030 */ 20, 24, 16, 145, 1, 143, 131, 131, 62, 55, - /* 2040 */ 55, 152, 55, 55, 131, 37, 117, 34, 71, 1, - /* 2050 */ 142, 5, 22, 116, 25, 162, 79, 76, 69, 82, - /* 2060 */ 41, 142, 69, 72, 116, 24, 132, 20, 19, 68, - /* 2070 */ 126, 9, 22, 22, 22, 22, 99, 23, 23, 68, - /* 2080 */ 24, 22, 68, 97, 23, 28, 150, 22, 25, 23, - /* 2090 */ 23, 22, 101, 98, 34, 37, 142, 23, 107, 108, - /* 2100 */ 23, 22, 34, 144, 34, 25, 115, 117, 117, 118, - /* 2110 */ 119, 134, 34, 122, 89, 34, 139, 140, 34, 76, - /* 2120 */ 45, 87, 94, 34, 23, 22, 24, 34, 22, 25, - /* 2130 */ 25, 23, 23, 23, 23, 23, 22, 143, 25, 143, - /* 2140 */ 163, 25, 11, 22, 22, 154, 155, 156, 157, 158, - /* 2150 */ 23, 23, 76, 136, 22, 22, 15, 142, 23, 25, - /* 2160 */ 142, 25, 142, 1, 1, 323, 323, 323, 323, 323, - /* 2170 */ 323, 323, 323, 323, 323, 184, 323, 323, 323, 323, - /* 2180 */ 323, 323, 323, 323, 323, 323, 323, 323, 323, 323, + /* 60 */ 195, 241, 242, 240, 16, 241, 242, 212, 213, 214, + /* 70 */ 19, 241, 242, 208, 254, 24, 256, 255, 254, 195, + /* 80 */ 256, 9, 241, 242, 254, 23, 256, 25, 56, 57, + /* 90 */ 58, 59, 60, 269, 195, 254, 195, 256, 26, 269, + /* 100 */ 259, 260, 218, 219, 104, 105, 106, 107, 108, 109, + /* 110 */ 110, 111, 112, 113, 114, 115, 218, 233, 189, 190, + /* 120 */ 191, 192, 299, 221, 19, 301, 197, 79, 199, 81, + /* 130 */ 29, 301, 318, 319, 33, 206, 104, 105, 106, 107, + /* 140 */ 108, 109, 110, 111, 112, 113, 114, 115, 97, 44, + /* 150 */ 45, 46, 147, 48, 49, 50, 51, 52, 53, 54, + /* 160 */ 55, 56, 57, 58, 59, 195, 115, 9, 67, 195, + /* 170 */ 241, 242, 121, 122, 123, 124, 125, 126, 127, 277, + /* 180 */ 278, 279, 120, 254, 133, 256, 206, 286, 218, 219, + /* 190 */ 118, 119, 120, 88, 295, 90, 195, 195, 269, 108, + /* 200 */ 109, 110, 111, 112, 113, 114, 115, 145, 207, 104, + /* 210 */ 105, 106, 107, 108, 109, 110, 111, 112, 113, 114, + /* 220 */ 115, 241, 242, 110, 111, 112, 113, 114, 115, 19, + /* 230 */ 301, 75, 130, 131, 254, 25, 256, 25, 9, 83, + /* 240 */ 104, 105, 106, 107, 108, 109, 110, 111, 112, 113, + /* 250 */ 114, 115, 272, 24, 44, 45, 46, 115, 48, 49, + /* 260 */ 50, 51, 52, 53, 54, 55, 56, 57, 58, 59, + /* 270 */ 48, 49, 50, 51, 9, 195, 118, 119, 120, 121, + /* 280 */ 195, 71, 124, 125, 126, 129, 312, 313, 318, 195, + /* 290 */ 320, 317, 134, 195, 78, 121, 140, 141, 124, 125, + /* 300 */ 126, 19, 313, 218, 219, 89, 317, 91, 134, 307, + /* 310 */ 94, 309, 218, 219, 104, 105, 106, 107, 108, 109, + /* 320 */ 110, 111, 112, 113, 114, 115, 44, 45, 46, 232, + /* 330 */ 48, 49, 50, 51, 52, 53, 54, 55, 56, 57, + /* 340 */ 58, 59, 19, 78, 78, 123, 266, 24, 119, 255, + /* 350 */ 137, 69, 140, 141, 89, 142, 91, 91, 264, 94, + /* 360 */ 94, 22, 23, 147, 25, 206, 221, 44, 45, 46, + /* 370 */ 206, 48, 49, 50, 51, 52, 53, 54, 55, 56, + /* 380 */ 57, 58, 59, 118, 119, 120, 104, 105, 106, 107, + /* 390 */ 108, 109, 110, 111, 112, 113, 114, 115, 25, 200, + /* 400 */ 241, 242, 195, 318, 319, 241, 242, 272, 56, 57, + /* 410 */ 58, 59, 147, 254, 19, 256, 22, 23, 254, 25, + /* 420 */ 256, 75, 277, 278, 279, 218, 219, 104, 105, 106, + /* 430 */ 107, 108, 109, 110, 111, 112, 113, 114, 115, 44, + /* 440 */ 45, 46, 103, 48, 49, 50, 51, 52, 53, 54, + /* 450 */ 55, 56, 57, 58, 59, 195, 104, 105, 106, 107, + /* 460 */ 108, 109, 110, 111, 112, 113, 114, 115, 195, 241, + /* 470 */ 242, 9, 195, 48, 9, 129, 130, 131, 218, 219, + /* 480 */ 195, 206, 254, 284, 256, 206, 195, 288, 281, 195, + /* 490 */ 206, 218, 219, 120, 195, 218, 219, 103, 195, 104, + /* 500 */ 105, 106, 107, 108, 109, 110, 111, 112, 113, 114, + /* 510 */ 115, 195, 213, 214, 195, 255, 241, 242, 145, 19, + /* 520 */ 241, 242, 203, 23, 264, 241, 242, 208, 255, 254, + /* 530 */ 9, 256, 255, 254, 228, 256, 74, 234, 254, 19, + /* 540 */ 256, 264, 117, 23, 44, 45, 46, 152, 48, 49, + /* 550 */ 50, 51, 52, 53, 54, 55, 56, 57, 58, 59, + /* 560 */ 9, 195, 271, 19, 44, 45, 46, 23, 48, 49, + /* 570 */ 50, 51, 52, 53, 54, 55, 56, 57, 58, 59, + /* 580 */ 118, 119, 120, 118, 119, 120, 287, 271, 44, 45, + /* 590 */ 46, 195, 48, 49, 50, 51, 52, 53, 54, 55, + /* 600 */ 56, 57, 58, 59, 104, 105, 106, 107, 108, 109, + /* 610 */ 110, 111, 112, 113, 114, 115, 215, 195, 153, 195, + /* 620 */ 9, 195, 195, 9, 104, 105, 106, 107, 108, 109, + /* 630 */ 110, 111, 112, 113, 114, 115, 22, 213, 214, 118, + /* 640 */ 119, 120, 108, 109, 218, 219, 140, 141, 104, 105, + /* 650 */ 106, 107, 108, 109, 110, 111, 112, 113, 114, 115, + /* 660 */ 9, 110, 241, 242, 167, 168, 145, 271, 19, 118, + /* 670 */ 119, 120, 23, 22, 195, 254, 9, 256, 277, 278, + /* 680 */ 279, 255, 237, 238, 205, 74, 207, 265, 19, 155, + /* 690 */ 25, 157, 23, 44, 45, 46, 276, 48, 49, 50, + /* 700 */ 51, 52, 53, 54, 55, 56, 57, 58, 59, 83, + /* 710 */ 195, 287, 19, 44, 45, 46, 23, 48, 49, 50, + /* 720 */ 51, 52, 53, 54, 55, 56, 57, 58, 59, 118, + /* 730 */ 119, 120, 118, 119, 120, 315, 316, 44, 45, 46, + /* 740 */ 195, 48, 49, 50, 51, 52, 53, 54, 55, 56, + /* 750 */ 57, 58, 59, 104, 105, 106, 107, 108, 109, 110, + /* 760 */ 111, 112, 113, 114, 115, 195, 140, 141, 195, 118, + /* 770 */ 119, 120, 195, 104, 105, 106, 107, 108, 109, 110, + /* 780 */ 111, 112, 113, 114, 115, 118, 119, 120, 218, 219, + /* 790 */ 9, 218, 219, 19, 19, 218, 219, 104, 105, 106, + /* 800 */ 107, 108, 109, 110, 111, 112, 113, 114, 115, 144, + /* 810 */ 233, 195, 145, 113, 114, 115, 195, 19, 44, 45, + /* 820 */ 46, 23, 48, 49, 50, 51, 52, 53, 54, 55, + /* 830 */ 56, 57, 58, 59, 218, 219, 195, 19, 9, 218, + /* 840 */ 219, 23, 44, 45, 46, 195, 48, 49, 50, 51, + /* 850 */ 52, 53, 54, 55, 56, 57, 58, 59, 136, 83, + /* 860 */ 138, 139, 44, 45, 46, 195, 48, 49, 50, 51, + /* 870 */ 52, 53, 54, 55, 56, 57, 58, 59, 104, 105, + /* 880 */ 106, 107, 108, 109, 110, 111, 112, 113, 114, 115, + /* 890 */ 195, 195, 117, 118, 304, 305, 195, 9, 122, 118, + /* 900 */ 119, 120, 104, 105, 106, 107, 108, 109, 110, 111, + /* 910 */ 112, 113, 114, 115, 195, 276, 140, 141, 215, 218, + /* 920 */ 219, 146, 104, 105, 106, 107, 108, 109, 110, 111, + /* 930 */ 112, 113, 114, 115, 233, 161, 295, 108, 109, 195, + /* 940 */ 9, 195, 19, 262, 263, 195, 117, 118, 119, 120, + /* 950 */ 25, 122, 206, 22, 315, 316, 276, 9, 195, 130, + /* 960 */ 310, 311, 218, 219, 218, 219, 195, 44, 45, 46, + /* 970 */ 22, 48, 49, 50, 51, 52, 53, 54, 55, 56, + /* 980 */ 57, 58, 59, 16, 155, 266, 157, 241, 242, 12, + /* 990 */ 102, 195, 289, 243, 195, 315, 316, 294, 195, 255, + /* 1000 */ 254, 255, 256, 78, 27, 206, 118, 119, 312, 313, + /* 1010 */ 264, 123, 242, 317, 243, 195, 91, 218, 219, 94, + /* 1020 */ 43, 218, 219, 251, 254, 253, 256, 104, 105, 106, + /* 1030 */ 107, 108, 109, 110, 111, 112, 113, 114, 115, 195, + /* 1040 */ 241, 242, 65, 155, 156, 157, 79, 195, 81, 118, + /* 1050 */ 119, 120, 75, 254, 255, 256, 19, 307, 255, 309, + /* 1060 */ 195, 24, 266, 243, 9, 195, 118, 119, 120, 195, + /* 1070 */ 218, 219, 184, 23, 195, 25, 19, 22, 307, 195, + /* 1080 */ 309, 44, 45, 46, 161, 48, 49, 50, 51, 52, + /* 1090 */ 53, 54, 55, 56, 57, 58, 59, 218, 219, 195, + /* 1100 */ 195, 44, 45, 46, 102, 48, 49, 50, 51, 52, + /* 1110 */ 53, 54, 55, 56, 57, 58, 59, 195, 225, 195, + /* 1120 */ 227, 119, 218, 219, 240, 123, 228, 307, 235, 309, + /* 1130 */ 163, 25, 208, 195, 255, 265, 271, 233, 21, 265, + /* 1140 */ 25, 104, 105, 106, 107, 108, 109, 110, 111, 112, + /* 1150 */ 113, 114, 115, 195, 230, 195, 232, 155, 156, 157, + /* 1160 */ 195, 104, 105, 106, 107, 108, 109, 110, 111, 112, + /* 1170 */ 113, 114, 115, 118, 119, 120, 218, 219, 218, 219, + /* 1180 */ 131, 19, 19, 218, 219, 195, 184, 195, 266, 9, + /* 1190 */ 313, 233, 195, 233, 317, 195, 228, 110, 233, 82, + /* 1200 */ 295, 19, 22, 154, 24, 11, 44, 45, 46, 195, + /* 1210 */ 48, 49, 50, 51, 52, 53, 54, 55, 56, 57, + /* 1220 */ 58, 59, 35, 195, 137, 68, 234, 45, 46, 142, + /* 1230 */ 48, 49, 50, 51, 52, 53, 54, 55, 56, 57, + /* 1240 */ 58, 59, 125, 243, 87, 9, 218, 219, 22, 23, + /* 1250 */ 19, 22, 23, 96, 240, 68, 266, 151, 22, 195, + /* 1260 */ 313, 233, 147, 76, 317, 9, 104, 105, 106, 107, + /* 1270 */ 108, 109, 110, 111, 112, 113, 114, 115, 195, 48, + /* 1280 */ 163, 118, 218, 219, 22, 195, 104, 105, 106, 107, + /* 1290 */ 108, 109, 110, 111, 112, 113, 114, 115, 195, 119, + /* 1300 */ 12, 218, 219, 195, 195, 148, 19, 307, 311, 309, + /* 1310 */ 7, 8, 9, 123, 313, 27, 313, 195, 317, 132, + /* 1320 */ 317, 218, 219, 133, 130, 131, 218, 219, 195, 103, + /* 1330 */ 240, 43, 103, 46, 215, 48, 49, 50, 51, 52, + /* 1340 */ 53, 54, 55, 56, 57, 58, 59, 9, 117, 195, + /* 1350 */ 147, 218, 219, 65, 195, 119, 195, 19, 20, 195, + /* 1360 */ 22, 195, 240, 195, 133, 195, 129, 130, 131, 166, + /* 1370 */ 108, 109, 218, 219, 36, 119, 195, 218, 219, 218, + /* 1380 */ 219, 131, 218, 219, 218, 219, 218, 219, 218, 219, + /* 1390 */ 19, 104, 105, 106, 107, 108, 109, 110, 111, 112, + /* 1400 */ 113, 114, 115, 195, 154, 195, 195, 9, 195, 210, + /* 1410 */ 211, 73, 195, 294, 195, 210, 211, 19, 20, 195, + /* 1420 */ 22, 83, 129, 130, 131, 117, 218, 219, 195, 218, + /* 1430 */ 219, 218, 219, 25, 36, 218, 219, 218, 219, 195, + /* 1440 */ 102, 263, 218, 219, 246, 267, 108, 109, 50, 195, + /* 1450 */ 240, 218, 219, 246, 116, 257, 118, 119, 120, 195, + /* 1460 */ 152, 123, 218, 219, 257, 263, 246, 25, 195, 267, + /* 1470 */ 99, 73, 218, 219, 302, 303, 246, 257, 140, 141, + /* 1480 */ 259, 260, 218, 219, 15, 87, 195, 257, 9, 195, + /* 1490 */ 92, 218, 219, 155, 156, 157, 158, 159, 19, 20, + /* 1500 */ 102, 22, 195, 151, 152, 195, 108, 109, 24, 218, + /* 1510 */ 219, 195, 218, 219, 116, 36, 118, 119, 120, 117, + /* 1520 */ 246, 123, 184, 195, 19, 218, 219, 195, 218, 219, + /* 1530 */ 61, 257, 9, 261, 218, 219, 128, 195, 160, 195, + /* 1540 */ 162, 195, 19, 20, 195, 22, 218, 219, 146, 195, + /* 1550 */ 218, 219, 73, 155, 156, 157, 158, 159, 22, 36, + /* 1560 */ 218, 219, 218, 219, 218, 219, 87, 218, 219, 195, + /* 1570 */ 22, 92, 218, 219, 22, 195, 24, 23, 195, 25, + /* 1580 */ 195, 102, 184, 195, 195, 195, 144, 108, 109, 195, + /* 1590 */ 101, 55, 218, 219, 195, 116, 73, 118, 119, 120, + /* 1600 */ 122, 123, 123, 218, 219, 195, 218, 219, 218, 219, + /* 1610 */ 87, 63, 218, 219, 23, 92, 25, 218, 219, 23, + /* 1620 */ 23, 25, 25, 118, 135, 102, 195, 143, 195, 195, + /* 1630 */ 195, 108, 109, 9, 155, 156, 157, 158, 159, 116, + /* 1640 */ 9, 118, 119, 120, 195, 195, 123, 9, 195, 218, + /* 1650 */ 219, 218, 219, 218, 219, 7, 8, 19, 20, 23, + /* 1660 */ 22, 25, 23, 184, 25, 85, 86, 218, 219, 9, + /* 1670 */ 195, 218, 219, 258, 36, 195, 195, 22, 155, 156, + /* 1680 */ 157, 158, 159, 0, 1, 2, 195, 23, 5, 25, + /* 1690 */ 23, 195, 25, 10, 11, 12, 13, 14, 218, 219, + /* 1700 */ 17, 195, 23, 23, 25, 25, 9, 184, 195, 218, + /* 1710 */ 219, 73, 23, 30, 25, 32, 19, 20, 23, 22, + /* 1720 */ 25, 195, 23, 40, 25, 155, 9, 157, 155, 23, + /* 1730 */ 157, 25, 195, 36, 195, 195, 19, 20, 195, 22, + /* 1740 */ 102, 195, 23, 119, 25, 195, 108, 109, 110, 322, + /* 1750 */ 119, 195, 195, 36, 116, 72, 118, 119, 120, 195, + /* 1760 */ 195, 123, 238, 80, 195, 291, 83, 195, 258, 195, + /* 1770 */ 73, 195, 195, 195, 195, 290, 244, 258, 258, 119, + /* 1780 */ 258, 193, 216, 100, 270, 274, 245, 300, 270, 247, + /* 1790 */ 73, 296, 248, 155, 156, 157, 158, 159, 143, 102, + /* 1800 */ 247, 274, 274, 248, 274, 108, 109, 222, 296, 227, + /* 1810 */ 221, 221, 231, 116, 221, 118, 119, 120, 135, 102, + /* 1820 */ 123, 262, 184, 140, 141, 108, 109, 198, 283, 262, + /* 1830 */ 262, 247, 61, 116, 300, 118, 119, 120, 251, 142, + /* 1840 */ 123, 143, 202, 251, 245, 262, 202, 164, 38, 300, + /* 1850 */ 22, 202, 155, 156, 157, 158, 159, 297, 297, 142, + /* 1860 */ 153, 286, 152, 150, 252, 147, 251, 44, 273, 275, + /* 1870 */ 236, 252, 155, 156, 157, 158, 159, 251, 1, 2, + /* 1880 */ 18, 184, 5, 202, 239, 239, 239, 10, 11, 12, + /* 1890 */ 13, 14, 239, 18, 17, 201, 151, 275, 248, 248, + /* 1900 */ 9, 184, 275, 273, 236, 248, 236, 30, 160, 32, + /* 1910 */ 19, 20, 201, 22, 248, 64, 202, 40, 293, 22, + /* 1920 */ 292, 117, 202, 201, 223, 202, 201, 36, 202, 201, + /* 1930 */ 66, 223, 220, 229, 22, 220, 220, 128, 229, 167, + /* 1940 */ 24, 308, 115, 285, 285, 202, 93, 226, 316, 72, + /* 1950 */ 226, 220, 223, 220, 222, 220, 220, 80, 321, 268, + /* 1960 */ 83, 223, 321, 84, 73, 22, 280, 202, 268, 282, + /* 1970 */ 149, 160, 148, 25, 204, 13, 196, 100, 196, 252, + /* 1980 */ 6, 251, 194, 194, 194, 303, 252, 250, 249, 248, + /* 1990 */ 306, 306, 209, 102, 215, 4, 215, 224, 224, 108, + /* 2000 */ 109, 215, 215, 209, 209, 3, 215, 116, 216, 118, + /* 2010 */ 119, 120, 135, 216, 123, 22, 15, 140, 141, 165, + /* 2020 */ 16, 23, 23, 141, 153, 132, 20, 144, 25, 24, + /* 2030 */ 146, 16, 1, 144, 132, 132, 63, 37, 153, 55, + /* 2040 */ 55, 164, 55, 55, 118, 132, 155, 156, 157, 158, + /* 2050 */ 159, 34, 5, 143, 1, 5, 22, 10, 11, 12, + /* 2060 */ 13, 14, 117, 163, 17, 70, 25, 70, 41, 77, + /* 2070 */ 143, 117, 24, 133, 20, 184, 19, 30, 127, 32, + /* 2080 */ 22, 69, 22, 22, 9, 23, 22, 40, 69, 24, + /* 2090 */ 22, 28, 98, 69, 23, 151, 37, 23, 22, 143, + /* 2100 */ 34, 23, 25, 23, 22, 22, 99, 23, 23, 145, + /* 2110 */ 90, 25, 77, 34, 118, 34, 88, 45, 95, 72, + /* 2120 */ 77, 34, 34, 34, 34, 23, 22, 80, 25, 24, + /* 2130 */ 83, 25, 34, 144, 144, 23, 23, 23, 23, 22, + /* 2140 */ 11, 25, 23, 25, 22, 22, 22, 100, 23, 23, + /* 2150 */ 22, 137, 22, 143, 143, 15, 25, 25, 23, 143, + /* 2160 */ 1, 1, 323, 323, 323, 323, 323, 323, 323, 323, + /* 2170 */ 323, 323, 323, 323, 323, 323, 323, 323, 323, 323, + /* 2180 */ 323, 323, 135, 323, 323, 323, 323, 140, 141, 323, /* 2190 */ 323, 323, 323, 323, 323, 323, 323, 323, 323, 323, /* 2200 */ 323, 323, 323, 323, 323, 323, 323, 323, 323, 323, - /* 2210 */ 323, 323, 323, 323, 323, 323, 323, 323, 323, 323, + /* 2210 */ 323, 164, 323, 323, 323, 323, 323, 323, 323, 323, /* 2220 */ 323, 323, 323, 323, 323, 323, 323, 323, 323, 323, /* 2230 */ 323, 323, 323, 323, 323, 323, 323, 323, 323, 323, /* 2240 */ 323, 323, 323, 323, 323, 323, 323, 323, 323, 323, @@ -177984,120 +178668,123 @@ static const YYCODETYPE yy_lookahead[] = { /* 2310 */ 323, 323, 323, 323, 323, 323, 323, 323, 323, 323, /* 2320 */ 323, 323, 323, 323, 323, 323, 323, 323, 323, 323, /* 2330 */ 323, 323, 323, 323, 323, 323, 323, 323, 323, 323, - /* 2340 */ 323, 323, 323, 323, 323, 323, 323, 323, 323, 323, - /* 2350 */ 323, 323, 187, 187, 187, 187, 187, 187, 187, 187, - /* 2360 */ 187, 187, 187, + /* 2340 */ 323, 323, 323, 323, 323, 323, 323, 323, 323, 187, + /* 2350 */ 187, 187, 187, 187, 187, 187, 187, 187, 187, 187, + /* 2360 */ 187, 187, 187, 187, 187, 187, 187, 187, 187, 187, + /* 2370 */ 187, 187, 187, 187, 187, 187, 187, 187, 187, 187, + /* 2380 */ 187, 187, 187, 187, 187, 187, 187, 187, 187, 187, + /* 2390 */ 187, 187, 187, 187, 187, 187, 187, 187, 187, }; #define YY_SHIFT_COUNT (586) #define YY_SHIFT_MIN (0) -#define YY_SHIFT_MAX (2163) +#define YY_SHIFT_MAX (2160) static const unsigned short int yy_shift_ofst[] = { - /* 0 */ 1977, 1727, 1634, 1336, 1336, 333, 161, 1452, 1477, 1568, - /* 10 */ 1991, 1991, 1991, 148, 333, 333, 333, 333, 333, 0, - /* 20 */ 0, 279, 1153, 1991, 1991, 1991, 1991, 1991, 1991, 1991, - /* 30 */ 1991, 1991, 1991, 1991, 1991, 1991, 1991, 1991, 121, 121, - /* 40 */ 498, 498, 51, 402, 408, 706, 706, 445, 445, 445, - /* 50 */ 445, 104, 208, 320, 391, 495, 515, 619, 639, 743, - /* 60 */ 763, 867, 887, 1000, 1020, 1133, 1153, 1153, 1153, 1153, - /* 70 */ 1153, 1153, 1153, 1153, 1153, 1153, 1153, 1153, 1153, 1153, - /* 80 */ 1153, 1153, 1153, 1153, 1257, 1153, 1277, 348, 348, 1593, - /* 90 */ 1799, 1819, 1991, 1991, 1991, 1991, 1991, 1991, 1991, 1991, - /* 100 */ 1991, 1991, 1991, 1991, 1991, 1991, 1991, 1991, 1991, 1991, - /* 110 */ 1991, 1991, 1991, 1991, 1991, 1991, 1991, 1991, 1991, 1991, - /* 120 */ 1991, 1991, 1991, 1991, 1991, 1991, 1991, 1991, 1991, 1991, - /* 130 */ 1991, 1991, 1991, 1991, 1991, 1991, 1991, 1991, 1991, 1991, - /* 140 */ 1991, 1991, 32, 851, 851, 851, 851, 851, 851, 851, - /* 150 */ 91, 112, 76, 459, 706, 1014, 1102, 706, 706, 357, - /* 160 */ 357, 706, 540, 292, 571, 571, 571, 636, 37, 37, - /* 170 */ 2176, 2176, 159, 159, 159, 587, 462, 462, 462, 462, - /* 180 */ 939, 939, 215, 459, 13, 611, 706, 706, 706, 706, - /* 190 */ 706, 706, 706, 706, 706, 706, 706, 706, 706, 706, - /* 200 */ 706, 706, 706, 706, 706, 563, 216, 216, 706, 956, - /* 210 */ 1192, 1192, 950, 950, 1160, 1160, 1135, 2176, 2176, 2176, - /* 220 */ 2176, 2176, 2176, 2176, 1311, 907, 907, 869, 225, 892, - /* 230 */ 63, 980, 646, 975, 998, 706, 706, 706, 706, 706, - /* 240 */ 706, 706, 706, 706, 706, 223, 706, 706, 706, 706, - /* 250 */ 706, 706, 706, 706, 706, 706, 706, 706, 1158, 1158, - /* 260 */ 1158, 706, 706, 706, 48, 706, 706, 706, 1078, 565, - /* 270 */ 706, 1330, 706, 706, 706, 706, 706, 706, 706, 706, - /* 280 */ 1013, 864, 511, 925, 925, 925, 925, 1202, 511, 511, - /* 290 */ 697, 930, 1212, 642, 1368, 1350, 1254, 1350, 1416, 1224, - /* 300 */ 1368, 1368, 1224, 1368, 1254, 1416, 657, 880, 188, 558, - /* 310 */ 558, 558, 467, 467, 467, 467, 993, 993, 1245, 1415, - /* 320 */ 1306, 1389, 1771, 1771, 1691, 1691, 1798, 1798, 1691, 1692, - /* 330 */ 1695, 1829, 1703, 1707, 1813, 1703, 1707, 1845, 1845, 1845, - /* 340 */ 1845, 1691, 1854, 1724, 1695, 1695, 1724, 1829, 1813, 1724, - /* 350 */ 1813, 1724, 1691, 1854, 1722, 1823, 1691, 1854, 1871, 1691, - /* 360 */ 1854, 1691, 1854, 1871, 1779, 1779, 1779, 1843, 1887, 1887, - /* 370 */ 1871, 1779, 1792, 1779, 1843, 1779, 1779, 1763, 1907, 1821, - /* 380 */ 1821, 1871, 1691, 1852, 1852, 1862, 1862, 1703, 1707, 1925, - /* 390 */ 1691, 1791, 1703, 1804, 1818, 1724, 1943, 1958, 1958, 1975, - /* 400 */ 1975, 1975, 2176, 2176, 2176, 2176, 2176, 2176, 2176, 2176, - /* 410 */ 2176, 2176, 2176, 2176, 2176, 2176, 2176, 440, 934, 1315, - /* 420 */ 1387, 190, 1032, 1275, 1481, 1383, 41, 1449, 853, 1355, - /* 430 */ 1525, 1492, 1539, 1575, 1605, 1627, 1633, 1637, 1564, 1444, - /* 440 */ 1537, 1393, 1673, 1440, 1518, 1570, 1674, 1681, 1498, 1711, - /* 450 */ 1720, 1463, 1600, 1735, 1738, 1596, 1011, 2002, 2005, 1992, - /* 460 */ 1851, 2006, 2004, 1999, 2000, 1884, 1873, 1895, 2003, 2003, - /* 470 */ 2007, 1886, 2010, 1888, 2016, 2033, 1892, 1905, 2003, 1906, - /* 480 */ 1976, 2008, 2003, 1889, 1984, 1985, 1987, 1988, 1913, 1929, - /* 490 */ 2013, 1908, 2048, 2046, 2030, 1937, 1893, 1989, 2029, 1993, - /* 500 */ 1981, 2019, 1919, 1948, 2041, 2047, 2049, 1934, 1944, 2050, - /* 510 */ 2001, 2051, 2052, 2054, 2053, 2011, 2062, 2056, 1986, 2057, - /* 520 */ 2059, 2014, 2058, 2055, 2060, 1936, 2065, 2061, 2066, 2063, - /* 530 */ 2067, 2069, 1995, 1954, 2074, 2077, 1990, 2068, 2079, 1959, - /* 540 */ 2080, 2070, 2078, 2081, 2084, 2025, 2043, 2034, 2075, 2076, - /* 550 */ 2028, 2089, 2101, 2103, 2102, 2104, 2105, 2093, 1994, 1996, - /* 560 */ 2108, 2080, 2109, 2110, 2111, 2106, 2113, 2116, 2112, 2114, - /* 570 */ 2131, 2121, 2122, 2127, 2128, 2132, 2133, 2134, 2017, 2015, - /* 580 */ 2018, 2020, 2136, 2135, 2141, 2162, 2163, + /* 0 */ 1877, 1683, 2047, 1338, 1338, 626, 156, 1398, 1479, 1523, + /* 10 */ 1891, 1891, 1891, 776, 626, 626, 626, 626, 626, 0, + /* 20 */ 0, 282, 1057, 1891, 1891, 1891, 1891, 1891, 1891, 1891, + /* 30 */ 1891, 1891, 1891, 1891, 1891, 1891, 1891, 1891, 829, 829, + /* 40 */ 265, 265, 158, 462, 611, 781, 781, 212, 212, 212, + /* 50 */ 212, 105, 210, 323, 395, 500, 520, 544, 649, 669, + /* 60 */ 693, 798, 774, 818, 923, 1037, 1057, 1057, 1057, 1057, + /* 70 */ 1057, 1057, 1057, 1057, 1057, 1057, 1057, 1057, 1057, 1057, + /* 80 */ 1057, 1057, 1057, 1057, 1162, 1057, 1182, 1287, 1287, 1638, + /* 90 */ 1697, 1717, 1891, 1891, 1891, 1891, 1891, 1891, 1891, 1891, + /* 100 */ 1891, 1891, 1891, 1891, 1891, 1891, 1891, 1891, 1891, 1891, + /* 110 */ 1891, 1891, 1891, 1891, 1891, 1891, 1891, 1891, 1891, 1891, + /* 120 */ 1891, 1891, 1891, 1891, 1891, 1891, 1891, 1891, 1891, 1891, + /* 130 */ 1891, 1891, 1891, 1891, 1891, 1891, 1891, 1891, 1891, 1891, + /* 140 */ 1891, 1891, 32, 352, 352, 352, 352, 352, 352, 352, + /* 150 */ 136, 91, 113, 72, 781, 1117, 1231, 781, 781, 534, + /* 160 */ 534, 781, 700, 102, 497, 497, 497, 506, 142, 142, + /* 170 */ 2212, 2212, 51, 51, 51, 465, 614, 614, 614, 614, + /* 180 */ 977, 977, 216, 72, 339, 394, 781, 781, 781, 781, + /* 190 */ 781, 781, 781, 781, 781, 781, 781, 781, 781, 781, + /* 200 */ 781, 781, 781, 781, 781, 775, 925, 925, 781, 1194, + /* 210 */ 266, 266, 1163, 1163, 1256, 1256, 1203, 2212, 2212, 2212, + /* 220 */ 2212, 2212, 2212, 2212, 888, 1002, 1002, 651, 174, 931, + /* 230 */ 551, 948, 521, 667, 1055, 781, 781, 781, 781, 781, + /* 240 */ 781, 781, 781, 781, 781, 346, 781, 781, 781, 781, + /* 250 */ 781, 781, 781, 781, 781, 781, 781, 781, 1187, 1187, + /* 260 */ 1187, 781, 781, 781, 62, 781, 781, 781, 1180, 1157, + /* 270 */ 781, 1288, 781, 781, 781, 781, 781, 781, 781, 781, + /* 280 */ 1237, 101, 722, 229, 229, 229, 229, 373, 722, 722, + /* 290 */ 1087, 1262, 1303, 1469, 1308, 1352, 665, 1352, 1505, 1106, + /* 300 */ 1308, 1308, 1106, 1308, 665, 1505, 1408, 1050, 425, 4, + /* 310 */ 4, 4, 1402, 1402, 1402, 1402, 1115, 1115, 1378, 1442, + /* 320 */ 213, 1552, 1771, 1771, 1698, 1698, 1810, 1810, 1698, 1707, + /* 330 */ 1710, 1828, 1713, 1718, 1823, 1713, 1718, 1862, 1862, 1862, + /* 340 */ 1862, 1698, 1875, 1745, 1710, 1710, 1745, 1828, 1823, 1745, + /* 350 */ 1823, 1745, 1698, 1875, 1748, 1851, 1698, 1875, 1897, 1698, + /* 360 */ 1875, 1698, 1875, 1897, 1804, 1804, 1804, 1864, 1912, 1912, + /* 370 */ 1897, 1804, 1809, 1804, 1864, 1804, 1804, 1772, 1916, 1827, + /* 380 */ 1827, 1897, 1698, 1853, 1853, 1879, 1879, 1713, 1718, 1943, + /* 390 */ 1698, 1811, 1713, 1821, 1824, 1745, 1948, 1962, 1962, 1974, + /* 400 */ 1974, 1974, 2212, 2212, 2212, 2212, 2212, 2212, 2212, 2212, + /* 410 */ 2212, 2212, 2212, 2212, 2212, 2212, 2212, 222, 967, 1226, + /* 420 */ 1229, 48, 1293, 1236, 1554, 1489, 1536, 1484, 1049, 1250, + /* 430 */ 1591, 1548, 1596, 1597, 1636, 1639, 1664, 1679, 1624, 1478, + /* 440 */ 1648, 1190, 1680, 5, 1371, 1631, 1689, 1695, 1580, 1667, + /* 450 */ 1699, 1570, 1573, 1706, 1719, 1660, 1655, 1991, 2002, 1993, + /* 460 */ 1854, 2001, 2004, 1998, 1999, 1882, 1871, 1893, 2003, 2003, + /* 470 */ 2005, 1883, 2006, 1884, 2015, 2031, 1889, 1902, 2003, 1903, + /* 480 */ 1973, 2000, 2003, 1885, 1984, 1985, 1987, 1988, 1913, 1926, + /* 490 */ 2017, 1910, 2053, 2050, 2034, 1945, 1900, 1995, 2041, 1997, + /* 500 */ 1992, 2027, 1927, 1954, 2048, 2054, 2057, 1940, 1951, 2058, + /* 510 */ 2012, 2060, 2061, 2062, 2064, 2019, 2075, 2065, 1994, 2063, + /* 520 */ 2068, 2024, 2059, 2071, 2066, 1944, 2076, 2074, 2078, 2077, + /* 530 */ 2080, 2082, 2007, 1956, 2084, 2085, 1996, 2079, 2083, 1964, + /* 540 */ 2086, 2081, 2087, 2088, 2089, 2020, 2035, 2028, 2072, 2043, + /* 550 */ 2023, 2090, 2102, 2104, 2105, 2103, 2106, 2098, 1989, 1990, + /* 560 */ 2112, 2086, 2113, 2114, 2115, 2117, 2116, 2118, 2119, 2122, + /* 570 */ 2129, 2123, 2124, 2125, 2126, 2128, 2130, 2131, 2014, 2010, + /* 580 */ 2011, 2016, 2132, 2135, 2140, 2159, 2160, }; #define YY_REDUCE_COUNT (416) -#define YY_REDUCE_MIN (-245) -#define YY_REDUCE_MAX (1802) +#define YY_REDUCE_MIN (-277) +#define YY_REDUCE_MAX (1797) static const short yy_reduce_ofst[] = { - /* 0 */ -176, -122, 1208, 268, 274, -159, 389, -192, 317, -190, - /* 10 */ -133, 392, 395, 101, 460, 513, 529, 638, 895, 135, - /* 20 */ 141, -237, -113, 53, 539, 541, 608, 610, 338, 663, - /* 30 */ 665, 522, 644, 669, 865, -194, 940, 945, 486, 734, - /* 40 */ -18, 165, -114, 525, 773, 847, 923, -234, -10, -234, - /* 50 */ -10, -245, -245, -245, -245, -245, -245, -245, -245, -245, - /* 60 */ -245, -245, -245, -245, -245, -245, -245, -245, -245, -245, - /* 70 */ -245, -245, -245, -245, -245, -245, -245, -245, -245, -245, - /* 80 */ -245, -245, -245, -245, -245, -245, -245, -245, -245, 491, - /* 90 */ 561, 709, 809, 822, 927, 949, 952, 1036, 1053, 1055, - /* 100 */ 1073, 1075, 1080, 1100, 1122, 1129, 1157, 1159, 1184, 1197, - /* 110 */ 1201, 1204, 1206, 1241, 1251, 1261, 1292, 1303, 1308, 1310, - /* 120 */ 1312, 1317, 1321, 1323, 1328, 1337, 1357, 1362, 1402, 1406, - /* 130 */ 1408, 1419, 1423, 1454, 1460, 1462, 1473, 1500, 1512, 1547, - /* 140 */ 1560, 1569, -245, -245, -245, -245, -245, -245, -245, -245, - /* 150 */ -245, -245, -245, -131, 168, 195, -225, -62, 526, 427, - /* 160 */ 818, 151, -245, 546, 65, 899, 937, 780, -245, -245, - /* 170 */ -245, -245, -208, -208, -208, 54, 39, 810, 894, 953, - /* 180 */ -139, 936, -196, -189, 912, 912, 1082, 243, 516, 635, - /* 190 */ -116, 56, 1040, 1109, 690, 271, 1056, 512, 758, 1144, - /* 200 */ 960, 1155, -191, 360, 1093, 966, 548, 1113, 281, 595, - /* 210 */ 1114, 1121, 1178, 1185, 1012, 1076, 23, 1041, 260, 585, - /* 220 */ 769, 1175, 1222, 1211, -181, -45, 72, 107, 94, 214, - /* 230 */ 221, 336, 363, 523, 609, 650, 664, 700, 893, 999, - /* 240 */ 1088, 1283, 1339, 1356, 1366, 1081, 1377, 1394, 1398, 1427, - /* 250 */ 1428, 1448, 1458, 1468, 1472, 1475, 1476, 1482, 1456, 1461, - /* 260 */ 1465, 1508, 1514, 1522, 1459, 1526, 1540, 1551, 1353, 1431, - /* 270 */ 1567, 1531, 1576, 221, 1577, 1580, 1581, 1585, 1586, 1587, - /* 280 */ 1479, 1493, 1541, 1528, 1532, 1533, 1534, 1459, 1541, 1541, - /* 290 */ 1544, 1578, 1591, 1495, 1519, 1529, 1549, 1530, 1505, 1554, - /* 300 */ 1536, 1538, 1555, 1542, 1557, 1509, 1589, 1582, 1588, 1599, - /* 310 */ 1601, 1602, 1545, 1552, 1562, 1563, 1566, 1579, 1546, 1584, - /* 320 */ 1592, 1629, 1543, 1548, 1632, 1638, 1550, 1553, 1640, 1559, - /* 330 */ 1574, 1583, 1606, 1603, 1623, 1608, 1611, 1625, 1626, 1630, - /* 340 */ 1631, 1666, 1672, 1628, 1604, 1607, 1635, 1612, 1639, 1636, - /* 350 */ 1641, 1644, 1676, 1679, 1594, 1597, 1686, 1693, 1675, 1694, - /* 360 */ 1696, 1697, 1700, 1680, 1682, 1684, 1685, 1683, 1687, 1689, - /* 370 */ 1688, 1690, 1701, 1702, 1699, 1704, 1705, 1614, 1624, 1648, - /* 380 */ 1654, 1719, 1741, 1642, 1643, 1678, 1698, 1706, 1708, 1668, - /* 390 */ 1747, 1669, 1709, 1712, 1718, 1721, 1766, 1776, 1784, 1790, - /* 400 */ 1801, 1802, 1710, 1713, 1715, 1777, 1770, 1778, 1782, 1783, - /* 410 */ 1793, 1768, 1775, 1785, 1788, 1797, 1796, + /* 0 */ -176, -71, -170, 746, 799, -159, -20, -186, -30, 85, + /* 10 */ 94, 260, 277, -180, 159, 164, 275, 279, 284, -98, + /* 20 */ 145, -274, 401, -116, 577, 701, 904, 958, -178, 960, + /* 30 */ 965, 273, 744, 1028, 803, 207, 426, 879, 299, 424, + /* 40 */ -26, 696, 924, 750, 771, 820, 1000, 228, 421, 228, + /* 50 */ 421, -277, -277, -277, -277, -277, -277, -277, -277, -277, + /* 60 */ -277, -277, -277, -277, -277, -277, -277, -277, -277, -277, + /* 70 */ -277, -277, -277, -277, -277, -277, -277, -277, -277, -277, + /* 80 */ -277, -277, -277, -277, -277, -277, -277, -277, -277, 570, + /* 90 */ 573, 616, 621, 852, 1064, 1083, 1103, 1108, 1133, 1154, + /* 100 */ 1159, 1161, 1164, 1166, 1168, 1170, 1208, 1211, 1213, 1217, + /* 110 */ 1219, 1224, 1233, 1244, 1254, 1264, 1273, 1291, 1294, 1307, + /* 120 */ 1310, 1316, 1328, 1332, 1342, 1344, 1346, 1349, 1354, 1374, + /* 130 */ 1385, 1388, 1390, 1394, 1399, 1431, 1433, 1435, 1449, 1453, + /* 140 */ 1480, 1491, -277, -277, -277, -277, -277, -277, -277, -277, + /* 150 */ -277, -277, -277, 479, -177, 199, 893, 2, 650, -206, + /* 160 */ -145, 319, -277, 703, 420, 639, 680, 770, -277, -277, + /* 170 */ -277, -277, -183, -183, -183, -99, 291, 316, 396, 865, + /* 180 */ -200, 445, -11, 1, 590, 590, -135, 422, 870, 874, + /* 190 */ 884, 1014, 1090, 1122, -101, 80, 641, 719, 796, 905, + /* 200 */ 922, 1210, 303, 992, 990, 681, 877, 947, 997, 1119, + /* 210 */ 1001, 1003, 1178, 1202, 1199, 1205, 772, 1172, 1198, 1207, + /* 220 */ 1220, 1230, 1221, 1274, -208, -193, -102, 98, 97, 285, + /* 230 */ 294, 366, 427, 515, 545, 670, 695, 763, 844, 938, + /* 240 */ 1109, 1181, 1380, 1383, 1389, 135, 1410, 1434, 1450, 1475, + /* 250 */ 1481, 1496, 1506, 1513, 1526, 1537, 1539, 1540, 306, 898, + /* 260 */ 968, 1543, 1546, 1550, 1272, 1556, 1557, 1564, 1415, 1427, + /* 270 */ 1565, 1524, 1569, 294, 1572, 1574, 1576, 1577, 1578, 1579, + /* 280 */ 1474, 1485, 1532, 1510, 1519, 1520, 1522, 1272, 1532, 1532, + /* 290 */ 1541, 1566, 1588, 1487, 1511, 1514, 1542, 1518, 1495, 1544, + /* 300 */ 1527, 1528, 1555, 1530, 1553, 1512, 1585, 1581, 1582, 1589, + /* 310 */ 1590, 1593, 1559, 1567, 1568, 1583, 1587, 1592, 1545, 1584, + /* 320 */ 1599, 1629, 1534, 1549, 1640, 1644, 1560, 1561, 1649, 1575, + /* 330 */ 1594, 1595, 1612, 1615, 1634, 1619, 1626, 1645, 1646, 1647, + /* 340 */ 1653, 1681, 1694, 1650, 1622, 1627, 1651, 1630, 1668, 1657, + /* 350 */ 1670, 1666, 1714, 1711, 1625, 1628, 1720, 1722, 1701, 1723, + /* 360 */ 1725, 1726, 1728, 1708, 1712, 1715, 1716, 1704, 1721, 1724, + /* 370 */ 1729, 1731, 1732, 1733, 1709, 1735, 1736, 1632, 1633, 1658, + /* 380 */ 1659, 1738, 1743, 1637, 1641, 1691, 1700, 1727, 1730, 1686, + /* 390 */ 1765, 1687, 1734, 1737, 1739, 1741, 1770, 1780, 1782, 1788, + /* 400 */ 1789, 1790, 1684, 1685, 1682, 1783, 1779, 1781, 1786, 1787, + /* 410 */ 1794, 1773, 1774, 1792, 1797, 1791, 1795, }; static const YYACTIONTYPE yy_default[] = { /* 0 */ 1667, 1667, 1667, 1495, 1258, 1371, 1258, 1258, 1258, 1258, @@ -178240,6 +178927,7 @@ static const YYCODETYPE yyFallback[] = { 0, /* GE => nothing */ 0, /* ESCAPE => nothing */ 9, /* COLUMNKW => ID */ + 9, /* CONCURRENT => ID */ 9, /* DO => ID */ 9, /* FOR => ID */ 9, /* IGNORE => ID */ @@ -178355,7 +179043,6 @@ static const YYCODETYPE yyFallback[] = { 0, /* UMINUS => nothing */ 0, /* TRUTH => nothing */ 0, /* REGISTER => nothing */ - 0, /* CONCURRENT => nothing */ 0, /* VECTOR => nothing */ 0, /* SELECT_COLUMN => nothing */ 0, /* IF_NULL_ROW => nothing */ @@ -178510,122 +179197,122 @@ static const char *const yyTokenName[] = { /* 59 */ "GE", /* 60 */ "ESCAPE", /* 61 */ "COLUMNKW", - /* 62 */ "DO", - /* 63 */ "FOR", - /* 64 */ "IGNORE", - /* 65 */ "INITIALLY", - /* 66 */ "INSTEAD", - /* 67 */ "NO", - /* 68 */ "KEY", - /* 69 */ "OF", - /* 70 */ "OFFSET", - /* 71 */ "PRAGMA", - /* 72 */ "RAISE", - /* 73 */ "RECURSIVE", - /* 74 */ "REPLACE", - /* 75 */ "RESTRICT", - /* 76 */ "ROW", - /* 77 */ "ROWS", - /* 78 */ "TRIGGER", - /* 79 */ "VACUUM", - /* 80 */ "VIEW", - /* 81 */ "VIRTUAL", - /* 82 */ "WITH", - /* 83 */ "NULLS", - /* 84 */ "FIRST", - /* 85 */ "LAST", - /* 86 */ "CURRENT", - /* 87 */ "FOLLOWING", - /* 88 */ "PARTITION", - /* 89 */ "PRECEDING", - /* 90 */ "RANGE", - /* 91 */ "UNBOUNDED", - /* 92 */ "EXCLUDE", - /* 93 */ "GROUPS", - /* 94 */ "OTHERS", - /* 95 */ "TIES", - /* 96 */ "GENERATED", - /* 97 */ "ALWAYS", - /* 98 */ "MATERIALIZED", - /* 99 */ "REINDEX", - /* 100 */ "RENAME", - /* 101 */ "CTIME_KW", - /* 102 */ "ANY", - /* 103 */ "BITAND", - /* 104 */ "BITOR", - /* 105 */ "LSHIFT", - /* 106 */ "RSHIFT", - /* 107 */ "PLUS", - /* 108 */ "MINUS", - /* 109 */ "STAR", - /* 110 */ "SLASH", - /* 111 */ "REM", - /* 112 */ "CONCAT", - /* 113 */ "PTR", - /* 114 */ "COLLATE", - /* 115 */ "BITNOT", - /* 116 */ "ON", - /* 117 */ "INDEXED", - /* 118 */ "STRING", - /* 119 */ "JOIN_KW", - /* 120 */ "CONSTRAINT", - /* 121 */ "DEFAULT", - /* 122 */ "NULL", - /* 123 */ "PRIMARY", - /* 124 */ "UNIQUE", - /* 125 */ "CHECK", - /* 126 */ "REFERENCES", - /* 127 */ "AUTOINCR", - /* 128 */ "INSERT", - /* 129 */ "DELETE", - /* 130 */ "UPDATE", - /* 131 */ "SET", - /* 132 */ "DEFERRABLE", - /* 133 */ "FOREIGN", - /* 134 */ "DROP", - /* 135 */ "UNION", - /* 136 */ "ALL", - /* 137 */ "EXCEPT", - /* 138 */ "INTERSECT", - /* 139 */ "SELECT", - /* 140 */ "VALUES", - /* 141 */ "DISTINCT", - /* 142 */ "DOT", - /* 143 */ "FROM", - /* 144 */ "JOIN", - /* 145 */ "USING", - /* 146 */ "ORDER", - /* 147 */ "GROUP", - /* 148 */ "HAVING", - /* 149 */ "LIMIT", - /* 150 */ "WHERE", - /* 151 */ "RETURNING", - /* 152 */ "INTO", - /* 153 */ "NOTHING", - /* 154 */ "FLOAT", - /* 155 */ "BLOB", - /* 156 */ "INTEGER", - /* 157 */ "VARIABLE", - /* 158 */ "CASE", - /* 159 */ "WHEN", - /* 160 */ "THEN", - /* 161 */ "ELSE", - /* 162 */ "INDEX", - /* 163 */ "ALTER", - /* 164 */ "ADD", - /* 165 */ "WINDOW", - /* 166 */ "OVER", - /* 167 */ "FILTER", - /* 168 */ "COLUMN", - /* 169 */ "AGG_FUNCTION", - /* 170 */ "AGG_COLUMN", - /* 171 */ "TRUEFALSE", - /* 172 */ "FUNCTION", - /* 173 */ "UPLUS", - /* 174 */ "UMINUS", - /* 175 */ "TRUTH", - /* 176 */ "REGISTER", - /* 177 */ "CONCURRENT", + /* 62 */ "CONCURRENT", + /* 63 */ "DO", + /* 64 */ "FOR", + /* 65 */ "IGNORE", + /* 66 */ "INITIALLY", + /* 67 */ "INSTEAD", + /* 68 */ "NO", + /* 69 */ "KEY", + /* 70 */ "OF", + /* 71 */ "OFFSET", + /* 72 */ "PRAGMA", + /* 73 */ "RAISE", + /* 74 */ "RECURSIVE", + /* 75 */ "REPLACE", + /* 76 */ "RESTRICT", + /* 77 */ "ROW", + /* 78 */ "ROWS", + /* 79 */ "TRIGGER", + /* 80 */ "VACUUM", + /* 81 */ "VIEW", + /* 82 */ "VIRTUAL", + /* 83 */ "WITH", + /* 84 */ "NULLS", + /* 85 */ "FIRST", + /* 86 */ "LAST", + /* 87 */ "CURRENT", + /* 88 */ "FOLLOWING", + /* 89 */ "PARTITION", + /* 90 */ "PRECEDING", + /* 91 */ "RANGE", + /* 92 */ "UNBOUNDED", + /* 93 */ "EXCLUDE", + /* 94 */ "GROUPS", + /* 95 */ "OTHERS", + /* 96 */ "TIES", + /* 97 */ "GENERATED", + /* 98 */ "ALWAYS", + /* 99 */ "MATERIALIZED", + /* 100 */ "REINDEX", + /* 101 */ "RENAME", + /* 102 */ "CTIME_KW", + /* 103 */ "ANY", + /* 104 */ "BITAND", + /* 105 */ "BITOR", + /* 106 */ "LSHIFT", + /* 107 */ "RSHIFT", + /* 108 */ "PLUS", + /* 109 */ "MINUS", + /* 110 */ "STAR", + /* 111 */ "SLASH", + /* 112 */ "REM", + /* 113 */ "CONCAT", + /* 114 */ "PTR", + /* 115 */ "COLLATE", + /* 116 */ "BITNOT", + /* 117 */ "ON", + /* 118 */ "INDEXED", + /* 119 */ "STRING", + /* 120 */ "JOIN_KW", + /* 121 */ "CONSTRAINT", + /* 122 */ "DEFAULT", + /* 123 */ "NULL", + /* 124 */ "PRIMARY", + /* 125 */ "UNIQUE", + /* 126 */ "CHECK", + /* 127 */ "REFERENCES", + /* 128 */ "AUTOINCR", + /* 129 */ "INSERT", + /* 130 */ "DELETE", + /* 131 */ "UPDATE", + /* 132 */ "SET", + /* 133 */ "DEFERRABLE", + /* 134 */ "FOREIGN", + /* 135 */ "DROP", + /* 136 */ "UNION", + /* 137 */ "ALL", + /* 138 */ "EXCEPT", + /* 139 */ "INTERSECT", + /* 140 */ "SELECT", + /* 141 */ "VALUES", + /* 142 */ "DISTINCT", + /* 143 */ "DOT", + /* 144 */ "FROM", + /* 145 */ "JOIN", + /* 146 */ "USING", + /* 147 */ "ORDER", + /* 148 */ "GROUP", + /* 149 */ "HAVING", + /* 150 */ "LIMIT", + /* 151 */ "WHERE", + /* 152 */ "RETURNING", + /* 153 */ "INTO", + /* 154 */ "NOTHING", + /* 155 */ "FLOAT", + /* 156 */ "BLOB", + /* 157 */ "INTEGER", + /* 158 */ "VARIABLE", + /* 159 */ "CASE", + /* 160 */ "WHEN", + /* 161 */ "THEN", + /* 162 */ "ELSE", + /* 163 */ "INDEX", + /* 164 */ "ALTER", + /* 165 */ "ADD", + /* 166 */ "WINDOW", + /* 167 */ "OVER", + /* 168 */ "FILTER", + /* 169 */ "COLUMN", + /* 170 */ "AGG_FUNCTION", + /* 171 */ "AGG_COLUMN", + /* 172 */ "TRUEFALSE", + /* 173 */ "FUNCTION", + /* 174 */ "UPLUS", + /* 175 */ "UMINUS", + /* 176 */ "TRUTH", + /* 177 */ "REGISTER", /* 178 */ "VECTOR", /* 179 */ "SELECT_COLUMN", /* 180 */ "IF_NULL_ROW", @@ -180876,7 +181563,11 @@ static YYACTIONTYPE yy_reduce( case 84: /* cmd ::= select */ { SelectDest dest = {SRT_Output, 0, 0, 0, 0, 0, 0}; - sqlite3Select(pParse, yymsp[0].minor.yy637, &dest); + if( (pParse->db->mDbFlags & DBFLAG_EncodingFixed)!=0 + || sqlite3ReadSchema(pParse)==SQLITE_OK + ){ + sqlite3Select(pParse, yymsp[0].minor.yy637, &dest); + } sqlite3SelectDelete(pParse->db, yymsp[0].minor.yy637); } break; @@ -182646,152 +183337,153 @@ const unsigned char ebcdicToAscii[] = { ** is substantially reduced. This is important for embedded applications ** on platforms with limited memory. */ -/* Hash score: 231 */ -/* zKWText[] encodes 1007 bytes of keyword text in 667 bytes */ -/* REINDEXEDESCAPEACHECKEYBEFOREIGNOREGEXPLAINSTEADDATABASELECT */ -/* ABLEFTHENDEFERRABLELSEXCLUDELETEMPORARYISNULLSAVEPOINTERSECT */ -/* IESNOTNULLIKEXCEPTRANSACTIONATURALTERAISEXCLUSIVEXISTS */ -/* CONSTRAINTOFFSETRIGGERANGENERATEDETACHAVINGLOBEGINNEREFERENCES */ -/* UNIQUERYWITHOUTERELEASEATTACHBETWEENOTHINGROUPSCASCADEFAULT */ -/* CASECOLLATECREATECURRENT_DATEIMMEDIATEJOINSERTMATCHPLANALYZE */ -/* PRAGMATERIALIZEDEFERREDISTINCTUPDATEVALUESVIRTUALWAYSWHENWHERE */ -/* CURSIVEABORTAFTERENAMEANDROPARTITIONAUTOINCREMENTCASTCOLUMN */ -/* COMMITCONFLICTCROSSCURRENT_TIMESTAMPRECEDINGFAILASTFILTER */ -/* EPLACEFIRSTFOLLOWINGFROMFULLIMITIFORDERESTRICTOTHERSOVER */ +/* Hash score: 233 */ +/* zKWText[] encodes 1018 bytes of keyword text in 669 bytes */ +/* CONCURRENT_DATEMPORARYREINDEXEDESCAPEACHECKEYBEFOREIGNOREGEXP */ +/* LAINSTEADDATABASELECTABLEFTHENDEFERRABLELSEXCLUDELETEXCEPTIES */ +/* AVEPOINTERSECTRANSACTIONOTNULLSISNULLIKEXCLUSIVEXISTS */ +/* CONSTRAINTOFFSETRIGGERAISEUNIQUERYWITHOUTERANGENERATEDETACH */ +/* AVINGLOBEGINNEREFERENCESATTACHBETWEENATURALTERELEASECASCADE */ +/* FAULTCASECOLLATECREATEIMMEDIATEJOINSERTMATCHPLANALYZEPRAGMA */ +/* TERIALIZEDEFERREDISTINCTUPDATEVALUESVIRTUALWAYSWHENOTHINGROUPS */ +/* WHERECURSIVEABORTAFTERENAMEANDROPARTITIONAUTOINCREMENTCAST */ +/* COLUMNCOMMITCONFLICTCROSSCURRENT_TIMESTAMPRECEDINGFAILAST */ +/* FILTEREPLACEFIRSTFOLLOWINGFROMFULLIMITIFORDERESTRICTOTHERSOVER */ /* ETURNINGRIGHTROLLBACKROWSUNBOUNDEDUNIONUSINGVACUUMVIEWINDOWBY */ /* INITIALLYPRIMARY */ -static const char zKWText[666] = { - 'R','E','I','N','D','E','X','E','D','E','S','C','A','P','E','A','C','H', - 'E','C','K','E','Y','B','E','F','O','R','E','I','G','N','O','R','E','G', - 'E','X','P','L','A','I','N','S','T','E','A','D','D','A','T','A','B','A', - 'S','E','L','E','C','T','A','B','L','E','F','T','H','E','N','D','E','F', - 'E','R','R','A','B','L','E','L','S','E','X','C','L','U','D','E','L','E', - 'T','E','M','P','O','R','A','R','Y','I','S','N','U','L','L','S','A','V', - 'E','P','O','I','N','T','E','R','S','E','C','T','I','E','S','N','O','T', - 'N','U','L','L','I','K','E','X','C','E','P','T','R','A','N','S','A','C', - 'T','I','O','N','A','T','U','R','A','L','T','E','R','A','I','S','E','X', - 'C','L','U','S','I','V','E','X','I','S','T','S','C','O','N','S','T','R', - 'A','I','N','T','O','F','F','S','E','T','R','I','G','G','E','R','A','N', - 'G','E','N','E','R','A','T','E','D','E','T','A','C','H','A','V','I','N', - 'G','L','O','B','E','G','I','N','N','E','R','E','F','E','R','E','N','C', - 'E','S','U','N','I','Q','U','E','R','Y','W','I','T','H','O','U','T','E', - 'R','E','L','E','A','S','E','A','T','T','A','C','H','B','E','T','W','E', - 'E','N','O','T','H','I','N','G','R','O','U','P','S','C','A','S','C','A', - 'D','E','F','A','U','L','T','C','A','S','E','C','O','L','L','A','T','E', - 'C','R','E','A','T','E','C','U','R','R','E','N','T','_','D','A','T','E', - 'I','M','M','E','D','I','A','T','E','J','O','I','N','S','E','R','T','M', - 'A','T','C','H','P','L','A','N','A','L','Y','Z','E','P','R','A','G','M', - 'A','T','E','R','I','A','L','I','Z','E','D','E','F','E','R','R','E','D', - 'I','S','T','I','N','C','T','U','P','D','A','T','E','V','A','L','U','E', - 'S','V','I','R','T','U','A','L','W','A','Y','S','W','H','E','N','W','H', - 'E','R','E','C','U','R','S','I','V','E','A','B','O','R','T','A','F','T', - 'E','R','E','N','A','M','E','A','N','D','R','O','P','A','R','T','I','T', - 'I','O','N','A','U','T','O','I','N','C','R','E','M','E','N','T','C','A', - 'S','T','C','O','L','U','M','N','C','O','M','M','I','T','C','O','N','F', - 'L','I','C','T','C','R','O','S','S','C','U','R','R','E','N','T','_','T', - 'I','M','E','S','T','A','M','P','R','E','C','E','D','I','N','G','F','A', - 'I','L','A','S','T','F','I','L','T','E','R','E','P','L','A','C','E','F', - 'I','R','S','T','F','O','L','L','O','W','I','N','G','F','R','O','M','F', - 'U','L','L','I','M','I','T','I','F','O','R','D','E','R','E','S','T','R', - 'I','C','T','O','T','H','E','R','S','O','V','E','R','E','T','U','R','N', - 'I','N','G','R','I','G','H','T','R','O','L','L','B','A','C','K','R','O', - 'W','S','U','N','B','O','U','N','D','E','D','U','N','I','O','N','U','S', - 'I','N','G','V','A','C','U','U','M','V','I','E','W','I','N','D','O','W', - 'B','Y','I','N','I','T','I','A','L','L','Y','P','R','I','M','A','R','Y', +static const char zKWText[668] = { + 'C','O','N','C','U','R','R','E','N','T','_','D','A','T','E','M','P','O', + 'R','A','R','Y','R','E','I','N','D','E','X','E','D','E','S','C','A','P', + 'E','A','C','H','E','C','K','E','Y','B','E','F','O','R','E','I','G','N', + 'O','R','E','G','E','X','P','L','A','I','N','S','T','E','A','D','D','A', + 'T','A','B','A','S','E','L','E','C','T','A','B','L','E','F','T','H','E', + 'N','D','E','F','E','R','R','A','B','L','E','L','S','E','X','C','L','U', + 'D','E','L','E','T','E','X','C','E','P','T','I','E','S','A','V','E','P', + 'O','I','N','T','E','R','S','E','C','T','R','A','N','S','A','C','T','I', + 'O','N','O','T','N','U','L','L','S','I','S','N','U','L','L','I','K','E', + 'X','C','L','U','S','I','V','E','X','I','S','T','S','C','O','N','S','T', + 'R','A','I','N','T','O','F','F','S','E','T','R','I','G','G','E','R','A', + 'I','S','E','U','N','I','Q','U','E','R','Y','W','I','T','H','O','U','T', + 'E','R','A','N','G','E','N','E','R','A','T','E','D','E','T','A','C','H', + 'A','V','I','N','G','L','O','B','E','G','I','N','N','E','R','E','F','E', + 'R','E','N','C','E','S','A','T','T','A','C','H','B','E','T','W','E','E', + 'N','A','T','U','R','A','L','T','E','R','E','L','E','A','S','E','C','A', + 'S','C','A','D','E','F','A','U','L','T','C','A','S','E','C','O','L','L', + 'A','T','E','C','R','E','A','T','E','I','M','M','E','D','I','A','T','E', + 'J','O','I','N','S','E','R','T','M','A','T','C','H','P','L','A','N','A', + 'L','Y','Z','E','P','R','A','G','M','A','T','E','R','I','A','L','I','Z', + 'E','D','E','F','E','R','R','E','D','I','S','T','I','N','C','T','U','P', + 'D','A','T','E','V','A','L','U','E','S','V','I','R','T','U','A','L','W', + 'A','Y','S','W','H','E','N','O','T','H','I','N','G','R','O','U','P','S', + 'W','H','E','R','E','C','U','R','S','I','V','E','A','B','O','R','T','A', + 'F','T','E','R','E','N','A','M','E','A','N','D','R','O','P','A','R','T', + 'I','T','I','O','N','A','U','T','O','I','N','C','R','E','M','E','N','T', + 'C','A','S','T','C','O','L','U','M','N','C','O','M','M','I','T','C','O', + 'N','F','L','I','C','T','C','R','O','S','S','C','U','R','R','E','N','T', + '_','T','I','M','E','S','T','A','M','P','R','E','C','E','D','I','N','G', + 'F','A','I','L','A','S','T','F','I','L','T','E','R','E','P','L','A','C', + 'E','F','I','R','S','T','F','O','L','L','O','W','I','N','G','F','R','O', + 'M','F','U','L','L','I','M','I','T','I','F','O','R','D','E','R','E','S', + 'T','R','I','C','T','O','T','H','E','R','S','O','V','E','R','E','T','U', + 'R','N','I','N','G','R','I','G','H','T','R','O','L','L','B','A','C','K', + 'R','O','W','S','U','N','B','O','U','N','D','E','D','U','N','I','O','N', + 'U','S','I','N','G','V','A','C','U','U','M','V','I','E','W','I','N','D', + 'O','W','B','Y','I','N','I','T','I','A','L','L','Y','P','R','I','M','A', + 'R','Y', }; /* aKWHash[i] is the hash value for the i-th keyword */ static const unsigned char aKWHash[127] = { - 84, 92, 134, 82, 105, 29, 0, 0, 94, 0, 85, 72, 0, - 53, 35, 86, 15, 0, 42, 97, 54, 89, 135, 19, 0, 0, - 140, 0, 40, 129, 0, 22, 107, 0, 9, 0, 0, 123, 80, - 0, 78, 6, 0, 65, 103, 147, 0, 136, 115, 0, 0, 48, - 0, 90, 24, 0, 17, 0, 27, 70, 23, 26, 5, 60, 142, - 110, 122, 0, 73, 91, 71, 145, 61, 120, 74, 0, 49, 0, - 11, 41, 0, 113, 0, 0, 0, 109, 10, 111, 116, 125, 14, - 50, 124, 0, 100, 0, 18, 121, 144, 56, 130, 139, 88, 83, - 37, 30, 126, 0, 0, 108, 51, 131, 128, 0, 34, 0, 0, - 132, 0, 98, 38, 39, 0, 20, 45, 117, 93, + 82, 90, 135, 80, 106, 4, 0, 0, 92, 0, 83, 96, 0, + 52, 34, 84, 20, 0, 37, 95, 53, 87, 136, 24, 0, 0, + 141, 0, 46, 130, 0, 27, 108, 0, 14, 0, 0, 124, 79, + 0, 77, 11, 0, 57, 104, 148, 0, 137, 116, 0, 0, 47, + 0, 88, 29, 0, 22, 0, 32, 69, 28, 31, 10, 65, 143, + 111, 123, 0, 97, 89, 70, 146, 66, 121, 98, 0, 48, 0, + 16, 33, 0, 114, 0, 0, 0, 110, 15, 112, 117, 126, 19, + 49, 125, 0, 101, 0, 23, 122, 145, 61, 131, 140, 86, 81, + 41, 5, 127, 0, 0, 109, 50, 132, 129, 0, 36, 0, 0, + 133, 0, 99, 42, 44, 0, 25, 71, 118, 91, }; /* aKWNext[] forms the hash collision chain. If aKWHash[i]==0 ** then the i-th keyword has no more hash collisions. Otherwise, ** the next keyword with the same hash is aKWHash[i]-1. */ -static const unsigned char aKWNext[148] = {0, - 0, 0, 0, 0, 4, 0, 43, 0, 0, 106, 114, 0, 0, - 0, 2, 0, 0, 143, 0, 0, 0, 13, 0, 0, 0, 0, - 141, 0, 0, 119, 52, 0, 0, 137, 12, 0, 0, 62, 0, - 138, 0, 133, 0, 0, 36, 0, 0, 28, 77, 0, 0, 0, - 0, 59, 0, 47, 0, 0, 0, 0, 0, 0, 0, 0, 0, - 0, 69, 0, 0, 0, 0, 0, 146, 3, 0, 58, 0, 1, - 75, 0, 0, 0, 31, 0, 0, 0, 0, 0, 127, 0, 104, - 0, 64, 66, 63, 0, 0, 0, 0, 0, 46, 0, 16, 8, - 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 81, 101, 0, - 112, 21, 7, 67, 0, 79, 96, 118, 0, 0, 68, 0, 0, - 99, 44, 0, 55, 0, 76, 0, 95, 32, 33, 57, 25, 0, - 102, 0, 0, 87, +static const unsigned char aKWNext[149] = {0, + 0, 0, 0, 0, 120, 0, 0, 0, 0, 9, 0, 38, 0, + 0, 107, 115, 0, 0, 0, 7, 0, 0, 144, 0, 0, 0, + 18, 0, 0, 0, 0, 142, 0, 17, 0, 138, 134, 0, 0, + 0, 0, 67, 0, 0, 51, 139, 3, 76, 1, 0, 0, 0, + 64, 0, 0, 0, 0, 0, 73, 0, 55, 0, 0, 0, 0, + 0, 0, 0, 0, 0, 40, 0, 0, 0, 63, 0, 6, 74, + 0, 0, 45, 0, 0, 0, 0, 0, 128, 0, 105, 0, 56, + 58, 68, 0, 0, 0, 147, 8, 0, 0, 0, 72, 0, 21, + 13, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 2, 102, + 0, 113, 26, 12, 59, 0, 78, 94, 119, 0, 0, 60, 0, + 0, 100, 39, 0, 54, 0, 75, 0, 93, 43, 35, 62, 30, + 0, 103, 0, 0, 85, }; /* aKWLen[i] is the length (in bytes) of the i-th keyword */ -static const unsigned char aKWLen[148] = {0, - 7, 7, 5, 4, 6, 4, 5, 3, 6, 7, 3, 6, 6, - 7, 7, 3, 8, 2, 6, 5, 4, 4, 3, 10, 4, 7, - 6, 9, 4, 2, 6, 5, 9, 9, 4, 7, 3, 2, 4, - 4, 6, 11, 6, 2, 7, 5, 5, 9, 6, 10, 4, 6, - 2, 3, 7, 5, 9, 6, 6, 4, 5, 5, 10, 6, 5, - 7, 4, 5, 7, 6, 7, 7, 6, 5, 7, 3, 7, 4, - 7, 6, 12, 9, 4, 6, 5, 4, 7, 6, 12, 8, 8, - 2, 6, 6, 7, 6, 4, 5, 9, 5, 5, 6, 3, 4, - 9, 13, 2, 2, 4, 6, 6, 8, 5, 17, 12, 7, 9, - 4, 4, 6, 7, 5, 9, 4, 4, 5, 2, 5, 8, 6, - 4, 9, 5, 8, 4, 3, 9, 5, 5, 6, 4, 6, 2, - 2, 9, 3, 7, +static const unsigned char aKWLen[149] = {0, + 10, 12, 9, 4, 2, 7, 7, 5, 4, 6, 4, 5, 3, + 6, 7, 3, 6, 6, 7, 7, 3, 8, 2, 6, 5, 4, + 4, 3, 10, 4, 7, 6, 6, 4, 9, 9, 11, 6, 2, + 7, 3, 2, 5, 4, 6, 4, 9, 6, 10, 4, 6, 2, + 3, 7, 5, 6, 5, 7, 4, 5, 5, 9, 6, 6, 4, + 5, 5, 10, 6, 7, 7, 5, 7, 7, 3, 7, 4, 7, + 6, 9, 4, 6, 5, 4, 7, 6, 12, 8, 8, 2, 6, + 6, 7, 6, 4, 7, 6, 5, 5, 9, 5, 5, 6, 3, + 4, 9, 13, 2, 2, 4, 6, 6, 8, 5, 17, 12, 7, + 9, 4, 4, 6, 7, 5, 9, 4, 4, 5, 2, 5, 8, + 6, 4, 9, 5, 8, 4, 3, 9, 5, 5, 6, 4, 6, + 2, 2, 9, 3, 7, }; /* aKWOffset[i] is the index into zKWText[] of the start of ** the text for the i-th keyword. */ -static const unsigned short int aKWOffset[148] = {0, - 0, 2, 2, 8, 9, 14, 16, 20, 23, 25, 25, 29, 33, - 36, 41, 46, 48, 53, 54, 59, 62, 65, 67, 69, 78, 81, - 86, 90, 90, 94, 99, 101, 105, 111, 119, 123, 123, 123, 126, - 129, 132, 137, 142, 146, 147, 152, 156, 160, 168, 174, 181, 184, - 184, 187, 189, 195, 198, 206, 211, 216, 219, 222, 226, 236, 239, - 244, 244, 248, 252, 259, 265, 271, 277, 277, 283, 284, 288, 295, - 299, 306, 312, 324, 333, 335, 341, 346, 348, 355, 359, 370, 377, - 378, 385, 391, 397, 402, 408, 412, 415, 424, 429, 433, 439, 441, - 444, 453, 455, 457, 466, 470, 476, 482, 490, 495, 495, 495, 511, - 520, 523, 527, 532, 539, 544, 553, 557, 560, 565, 567, 571, 579, - 585, 588, 597, 602, 610, 610, 614, 623, 628, 633, 639, 642, 645, - 648, 650, 655, 659, +static const unsigned short int aKWOffset[149] = {0, + 0, 3, 13, 13, 17, 22, 24, 24, 30, 31, 36, 38, 42, + 45, 47, 47, 51, 55, 58, 63, 68, 70, 75, 76, 81, 84, + 87, 89, 91, 100, 103, 108, 113, 118, 121, 127, 135, 140, 144, + 145, 145, 145, 148, 148, 153, 158, 161, 169, 175, 182, 185, 185, + 188, 190, 196, 201, 204, 209, 209, 213, 217, 220, 228, 233, 238, + 241, 244, 248, 258, 264, 270, 275, 279, 286, 287, 291, 298, 302, + 309, 315, 324, 326, 332, 337, 339, 346, 350, 361, 368, 369, 376, + 382, 388, 393, 399, 402, 408, 408, 414, 417, 426, 431, 435, 441, + 443, 446, 455, 457, 459, 468, 472, 478, 484, 492, 497, 497, 497, + 513, 522, 525, 529, 534, 541, 546, 555, 559, 562, 567, 569, 573, + 581, 587, 590, 599, 604, 612, 612, 616, 625, 630, 635, 641, 644, + 647, 650, 652, 657, 661, }; /* aKWCode[i] is the parser symbol code for the i-th keyword */ -static const unsigned char aKWCode[148] = {0, +static const unsigned char aKWCode[149] = {0, + TK_CONCURRENT, TK_CTIME_KW, TK_TEMP, TK_TEMP, TK_OR, TK_REINDEX, TK_INDEXED, TK_INDEX, TK_DESC, TK_ESCAPE, TK_EACH, TK_CHECK, TK_KEY, TK_BEFORE, TK_FOREIGN, TK_FOR, TK_IGNORE, TK_LIKE_KW, TK_EXPLAIN, TK_INSTEAD, TK_ADD, TK_DATABASE, TK_AS, TK_SELECT, TK_TABLE, TK_JOIN_KW, TK_THEN, TK_END, TK_DEFERRABLE, TK_ELSE, - TK_EXCLUDE, TK_DELETE, TK_TEMP, TK_TEMP, TK_OR, - TK_ISNULL, TK_NULLS, TK_SAVEPOINT, TK_INTERSECT, TK_TIES, - TK_NOTNULL, TK_NOT, TK_NO, TK_NULL, TK_LIKE_KW, - TK_EXCEPT, TK_TRANSACTION,TK_ACTION, TK_ON, TK_JOIN_KW, - TK_ALTER, TK_RAISE, TK_EXCLUSIVE, TK_EXISTS, TK_CONSTRAINT, - TK_INTO, TK_OFFSET, TK_OF, TK_SET, TK_TRIGGER, + TK_EXCLUDE, TK_DELETE, TK_EXCEPT, TK_TIES, TK_SAVEPOINT, + TK_INTERSECT, TK_TRANSACTION,TK_ACTION, TK_ON, TK_NOTNULL, + TK_NOT, TK_NO, TK_NULLS, TK_NULL, TK_ISNULL, + TK_LIKE_KW, TK_EXCLUSIVE, TK_EXISTS, TK_CONSTRAINT, TK_INTO, + TK_OFFSET, TK_OF, TK_SET, TK_TRIGGER, TK_RAISE, + TK_UNIQUE, TK_QUERY, TK_WITHOUT, TK_WITH, TK_JOIN_KW, TK_RANGE, TK_GENERATED, TK_DETACH, TK_HAVING, TK_LIKE_KW, - TK_BEGIN, TK_JOIN_KW, TK_REFERENCES, TK_UNIQUE, TK_QUERY, - TK_WITHOUT, TK_WITH, TK_JOIN_KW, TK_RELEASE, TK_ATTACH, - TK_BETWEEN, TK_NOTHING, TK_GROUPS, TK_GROUP, TK_CASCADE, - TK_ASC, TK_DEFAULT, TK_CASE, TK_COLLATE, TK_CREATE, - TK_CTIME_KW, TK_IMMEDIATE, TK_JOIN, TK_INSERT, TK_MATCH, - TK_PLAN, TK_ANALYZE, TK_PRAGMA, TK_MATERIALIZED, TK_DEFERRED, - TK_DISTINCT, TK_IS, TK_UPDATE, TK_VALUES, TK_VIRTUAL, - TK_ALWAYS, TK_WHEN, TK_WHERE, TK_RECURSIVE, TK_ABORT, - TK_AFTER, TK_RENAME, TK_AND, TK_DROP, TK_PARTITION, - TK_AUTOINCR, TK_TO, TK_IN, TK_CAST, TK_COLUMNKW, - TK_COMMIT, TK_CONFLICT, TK_JOIN_KW, TK_CTIME_KW, TK_CTIME_KW, - TK_CURRENT, TK_PRECEDING, TK_FAIL, TK_LAST, TK_FILTER, - TK_REPLACE, TK_FIRST, TK_FOLLOWING, TK_FROM, TK_JOIN_KW, - TK_LIMIT, TK_IF, TK_ORDER, TK_RESTRICT, TK_OTHERS, - TK_OVER, TK_RETURNING, TK_JOIN_KW, TK_ROLLBACK, TK_ROWS, - TK_ROW, TK_UNBOUNDED, TK_UNION, TK_USING, TK_VACUUM, - TK_VIEW, TK_WINDOW, TK_DO, TK_BY, TK_INITIALLY, - TK_ALL, TK_PRIMARY, + TK_BEGIN, TK_JOIN_KW, TK_REFERENCES, TK_ATTACH, TK_BETWEEN, + TK_JOIN_KW, TK_ALTER, TK_RELEASE, TK_CASCADE, TK_ASC, + TK_DEFAULT, TK_CASE, TK_COLLATE, TK_CREATE, TK_IMMEDIATE, + TK_JOIN, TK_INSERT, TK_MATCH, TK_PLAN, TK_ANALYZE, + TK_PRAGMA, TK_MATERIALIZED, TK_DEFERRED, TK_DISTINCT, TK_IS, + TK_UPDATE, TK_VALUES, TK_VIRTUAL, TK_ALWAYS, TK_WHEN, + TK_NOTHING, TK_GROUPS, TK_GROUP, TK_WHERE, TK_RECURSIVE, + TK_ABORT, TK_AFTER, TK_RENAME, TK_AND, TK_DROP, + TK_PARTITION, TK_AUTOINCR, TK_TO, TK_IN, TK_CAST, + TK_COLUMNKW, TK_COMMIT, TK_CONFLICT, TK_JOIN_KW, TK_CTIME_KW, + TK_CTIME_KW, TK_CURRENT, TK_PRECEDING, TK_FAIL, TK_LAST, + TK_FILTER, TK_REPLACE, TK_FIRST, TK_FOLLOWING, TK_FROM, + TK_JOIN_KW, TK_LIMIT, TK_IF, TK_ORDER, TK_RESTRICT, + TK_OTHERS, TK_OVER, TK_RETURNING, TK_JOIN_KW, TK_ROLLBACK, + TK_ROWS, TK_ROW, TK_UNBOUNDED, TK_UNION, TK_USING, + TK_VACUUM, TK_VIEW, TK_WINDOW, TK_DO, TK_BY, + TK_INITIALLY, TK_ALL, TK_PRIMARY, }; /* Hash table decoded: ** 0: INSERT @@ -182885,7 +183577,7 @@ static const unsigned char aKWCode[148] = {0, ** 88: CURRENT AFTER ALTER ** 89: FULL FAIL CONFLICT ** 90: EXPLAIN -** 91: CONSTRAINT +** 91: CONSTRAINT CONCURRENT ** 92: FROM ALWAYS ** 93: ** 94: ABORT @@ -182946,153 +183638,154 @@ static int keywordCode(const char *z, int n, int *pType){ while( j=2 ) keywordCode((char*)z, n, &id); return id; } -#define SQLITE_N_KEYWORD 147 +#define SQLITE_N_KEYWORD 148 SQLITE_API int sqlite3_keyword_name(int i,const char **pzName,int *pnName){ if( i<0 || i>=SQLITE_N_KEYWORD ) return SQLITE_ERROR; i++; @@ -184336,6 +185029,9 @@ static int (*const sqlite3BuiltinExtensions[])(sqlite3*) = { #ifdef SQLITE_EXTRA_AUTOEXT SQLITE_EXTRA_AUTOEXT, #endif +#ifdef SQLITE_ENABLE_HCT + sqlite3HctVtabInit, +#endif }; #ifndef SQLITE_AMALGAMATION @@ -184409,32 +185105,6 @@ SQLITE_API char *sqlite3_temp_directory = 0; */ SQLITE_API char *sqlite3_data_directory = 0; -/* -** Determine whether or not high-precision (long double) floating point -** math works correctly on CPU currently running. -*/ -static SQLITE_NOINLINE int hasHighPrecisionDouble(int rc){ - if( sizeof(LONGDOUBLE_TYPE)<=8 ){ - /* If the size of "long double" is not more than 8, then - ** high-precision math is not possible. */ - return 0; - }else{ - /* Just because sizeof(long double)>8 does not mean that the underlying - ** hardware actually supports high-precision floating point. For example, - ** clearing the 0x100 bit in the floating-point control word on Intel - ** processors will make long double work like double, even though long - ** double takes up more space. The only way to determine if long double - ** actually works is to run an experiment. */ - LONGDOUBLE_TYPE a, b, c; - rc++; - a = 1.0+rc*0.1; - b = 1.0e+18+rc*25.0; - c = a+b; - return b!=c; - } -} - - /* ** Initialize SQLite. ** @@ -184629,13 +185299,6 @@ SQLITE_API int sqlite3_initialize(void){ rc = SQLITE_EXTRA_INIT(0); } #endif - - /* Experimentally determine if high-precision floating point is - ** available. */ -#ifndef SQLITE_OMIT_WSD - sqlite3Config.bUseLongDouble = hasHighPrecisionDouble(rc); -#endif - return rc; } @@ -185706,10 +186369,6 @@ SQLITE_PRIVATE void sqlite3LeaveMutexAndCloseZombie(sqlite3 *db){ sqlite3Error(db, SQLITE_OK); /* Deallocates any cached error strings. */ sqlite3ValueFree(db->pErr); sqlite3CloseExtensions(db); -#if SQLITE_USER_AUTHENTICATION - sqlite3_free(db->auth.zAuthUser); - sqlite3_free(db->auth.zAuthPW); -#endif db->eOpenState = SQLITE_STATE_ERROR; @@ -187144,8 +187803,8 @@ static const int aHardLimit[] = { #if SQLITE_MAX_VDBE_OP<40 # error SQLITE_MAX_VDBE_OP must be at least 40 #endif -#if SQLITE_MAX_FUNCTION_ARG<0 || SQLITE_MAX_FUNCTION_ARG>32767 -# error SQLITE_MAX_FUNCTION_ARG must be between 0 and 32767 +#if SQLITE_MAX_FUNCTION_ARG<0 || SQLITE_MAX_FUNCTION_ARG>127 +# error SQLITE_MAX_FUNCTION_ARG must be between 0 and 127 #endif #if SQLITE_MAX_ATTACHED<0 || SQLITE_MAX_ATTACHED>125 # error SQLITE_MAX_ATTACHED must be between 0 and 125 @@ -187212,8 +187871,8 @@ SQLITE_API int sqlite3_limit(sqlite3 *db, int limitId, int newLimit){ if( newLimit>=0 ){ /* IMP: R-52476-28732 */ if( newLimit>aHardLimit[limitId] ){ newLimit = aHardLimit[limitId]; /* IMP: R-51463-25634 */ - }else if( newLimit<1 && limitId==SQLITE_LIMIT_LENGTH ){ - newLimit = 1; + }else if( newLimitaLimit[limitId] = newLimit; } @@ -187732,6 +188391,7 @@ static int openDatabase( if( ((1<<(flags&7)) & 0x46)==0 ){ rc = SQLITE_MISUSE_BKPT; /* IMP: R-18321-05872 */ }else{ + if( zFilename==0 ) zFilename = ":memory:"; rc = sqlite3ParseUri(zVfs, zFilename, &flags, &db->pVfs, &zOpen, &zErrMsg); } if( rc!=SQLITE_OK ){ @@ -188886,24 +189546,21 @@ SQLITE_API int sqlite3_test_control(int op, ...){ *pI2 = sqlite3LogEst(*pU64); break; } - -#if !defined(SQLITE_OMIT_WSD) - /* sqlite3_test_control(SQLITE_TESTCTRL_USELONGDOUBLE, int X); + /* sqlite3_test_control(SQLITE_TESTCTRL_HCT_MTCOMMIT, + ** sqlite3 *db, + ** void(*xMtCommit)(void*, int), + ** void *pCtx + ** ); ** - ** X<0 Make no changes to the bUseLongDouble. Just report value. - ** X==0 Disable bUseLongDouble - ** X==1 Enable bUseLongDouble - ** X>=2 Set bUseLongDouble to its default value for this platform + ** Install xMtCommit hook on "main" hct database. */ - case SQLITE_TESTCTRL_USELONGDOUBLE: { - int b = va_arg(ap, int); - if( b>=2 ) b = hasHighPrecisionDouble(b); - if( b>=0 ) sqlite3Config.bUseLongDouble = b>0; - rc = sqlite3Config.bUseLongDouble!=0; + case SQLITE_TESTCTRL_HCT_MTCOMMIT: { + typedef void (*mt_commit_hook)(void*,int); + sqlite3 *db = va_arg(ap, sqlite3*); + db->xMtCommit = va_arg(ap, mt_commit_hook); + db->pMtCommitCtx = va_arg(ap, void*); break; - } -#endif - + }; #if defined(SQLITE_DEBUG) && !defined(SQLITE_OMIT_WSD) /* sqlite3_test_control(SQLITE_TESTCTRL_TUNE, id, *piValue) @@ -189212,7 +189869,11 @@ SQLITE_API int sqlite3_snapshot_get( if( iDb==0 || iDb>1 ){ Btree *pBt = db->aDb[iDb].pBt; if( SQLITE_TXN_WRITE!=sqlite3BtreeTxnState(pBt) ){ + Pager *pPager = sqlite3BtreePager(pBt); + i64 dummy = 0; + sqlite3PagerSnapshotOpen(pPager, (sqlite3_snapshot*)&dummy); rc = sqlite3BtreeBeginTrans(pBt, 0, 0); + sqlite3PagerSnapshotOpen(pPager, 0); if( rc==SQLITE_OK ){ rc = sqlite3PagerSnapshotGet(sqlite3BtreePager(pBt), ppSnapshot); } @@ -193031,10 +193692,15 @@ static int fts3PoslistPhraseMerge( if( *p1==POS_COLUMN ){ p1++; p1 += fts3GetVarint32(p1, &iCol1); + /* iCol1==0 indicates corruption. Column 0 does not have a POS_COLUMN + ** entry, so this is actually end-of-doclist. */ + if( iCol1==0 ) return 0; } if( *p2==POS_COLUMN ){ p2++; p2 += fts3GetVarint32(p2, &iCol2); + /* As above, iCol2==0 indicates corruption. */ + if( iCol2==0 ) return 0; } while( 1 ){ @@ -196205,7 +196871,7 @@ static int fts3EvalNearTest(Fts3Expr *pExpr, int *pRc){ nTmp += p->pRight->pPhrase->doclist.nList; } nTmp += p->pPhrase->doclist.nList; - aTmp = sqlite3_malloc64(nTmp*2); + aTmp = sqlite3_malloc64(nTmp*2 + FTS3_VARINT_MAX); if( !aTmp ){ *pRc = SQLITE_NOMEM; res = 0; @@ -196856,7 +197522,7 @@ SQLITE_PRIVATE int sqlite3Fts3Corrupt(){ } #endif -#if !SQLITE_CORE +#if !defined(SQLITE_CORE) /* ** Initialize API pointer table, if required. */ @@ -221147,7 +221813,7 @@ SQLITE_API int sqlite3_rtree_query_callback( ); } -#if !SQLITE_CORE +#ifndef SQLITE_CORE #ifdef _WIN32 __declspec(dllexport) #endif @@ -221738,7 +222404,7 @@ SQLITE_PRIVATE int sqlite3IcuInit(sqlite3 *db){ return rc; } -#if !SQLITE_CORE +#ifndef SQLITE_CORE #ifdef _WIN32 __declspec(dllexport) #endif @@ -222996,6 +223662,27 @@ struct RbuFrame { u32 iWalFrame; }; +#ifndef UNUSED_PARAMETER +/* +** The following macros are used to suppress compiler warnings and to +** make it clear to human readers when a function parameter is deliberately +** left unused within the body of a function. This usually happens when +** a function is called via a function pointer. For example the +** implementation of an SQL aggregate step callback may not use the +** parameter indicating the number of arguments passed to the aggregate, +** if it knows that this is enforced elsewhere. +** +** When a function parameter is not used at all within the body of a function, +** it is generally named "NotUsed" or "NotUsed2" to make things even clearer. +** However, these macros may also be used to suppress warnings related to +** parameters that may or may not be used depending on compilation options. +** For example those parameters only used in assert() statements. In these +** cases the parameters are named as per the usual conventions. +*/ +#define UNUSED_PARAMETER(x) (void)(x) +#define UNUSED_PARAMETER2(x,y) UNUSED_PARAMETER(x),UNUSED_PARAMETER(y) +#endif + /* ** RBU handle. ** @@ -223047,7 +223734,7 @@ struct sqlite3rbu { int rc; /* Value returned by last rbu_step() call */ char *zErrmsg; /* Error message if rc!=SQLITE_OK */ int nStep; /* Rows processed for current object */ - int nProgress; /* Rows processed for all objects */ + sqlite3_int64 nProgress; /* Rows processed for all objects */ RbuObjIter objiter; /* Iterator for skipping through tbl/idx */ const char *zVfsName; /* Name of automatically created rbu vfs */ rbu_file *pTargetFd; /* File handle open on target db */ @@ -223164,7 +223851,7 @@ static unsigned int rbuDeltaGetInt(const char **pz, int *pLen){ v = (v<<6) + c; } z--; - *pLen -= z - zStart; + *pLen -= (int)(z - zStart); *pz = (char*)z; return v; } @@ -223349,6 +224036,7 @@ static void rbuFossilDeltaFunc( char *aOut; assert( argc==2 ); + UNUSED_PARAMETER(argc); nOrig = sqlite3_value_bytes(argv[0]); aOrig = (const char*)sqlite3_value_blob(argv[0]); @@ -224928,13 +225616,13 @@ static char *rbuObjIterGetIndexWhere(sqlite3rbu *p, RbuObjIter *pIter){ else if( c==')' ){ nParen--; if( nParen==0 ){ - int nSpan = &zSql[i] - pIter->aIdxCol[iIdxCol].zSpan; + int nSpan = (int)(&zSql[i] - pIter->aIdxCol[iIdxCol].zSpan); pIter->aIdxCol[iIdxCol++].nSpan = nSpan; i++; break; } }else if( c==',' && nParen==1 ){ - int nSpan = &zSql[i] - pIter->aIdxCol[iIdxCol].zSpan; + int nSpan = (int)(&zSql[i] - pIter->aIdxCol[iIdxCol].zSpan); pIter->aIdxCol[iIdxCol++].nSpan = nSpan; pIter->aIdxCol[iIdxCol].zSpan = &zSql[i+1]; }else if( c=='"' || c=='\'' || c=='`' ){ @@ -225624,6 +226312,8 @@ static void rbuFileSuffix3(const char *zBase, char *z){ for(i=sz-1; i>0 && z[i]!='/' && z[i]!='.'; i--){} if( z[i]=='.' && sz>i+4 ) memmove(&z[i+1], &z[sz-3], 4); } +#else + UNUSED_PARAMETER2(zBase,z); #endif } @@ -226208,7 +226898,7 @@ static void rbuSaveState(sqlite3rbu *p, int eStage){ "(%d, %Q), " "(%d, %Q), " "(%d, %d), " - "(%d, %d), " + "(%d, %lld), " "(%d, %lld), " "(%d, %lld), " "(%d, %lld), " @@ -226566,6 +227256,7 @@ static void rbuIndexCntFunc( sqlite3 *db = (rbuIsVacuum(p) ? p->dbRbu : p->dbMain); assert( nVal==1 ); + UNUSED_PARAMETER(nVal); rc = prepareFreeAndCollectError(db, &pStmt, &zErrmsg, sqlite3_mprintf("SELECT count(*) FROM sqlite_schema " @@ -226841,7 +227532,7 @@ SQLITE_API sqlite3rbu *sqlite3rbu_vacuum( ){ if( zTarget==0 ){ return rbuMisuseError(); } if( zState ){ - int n = strlen(zState); + size_t n = strlen(zState); if( n>=7 && 0==memcmp("-vactmp", &zState[n-7], 7) ){ return rbuMisuseError(); } @@ -227058,6 +227749,7 @@ SQLITE_API int sqlite3rbu_savestate(sqlite3rbu *p){ */ static int xDefaultRename(void *pArg, const char *zOld, const char *zNew){ int rc = SQLITE_OK; + UNUSED_PARAMETER(pArg); #if defined(_WIN32_WCE) { LPWSTR zWideOld; @@ -227962,6 +228654,9 @@ static int rbuVfsCurrentTime(sqlite3_vfs *pVfs, double *pTimeOut){ ** No-op. */ static int rbuVfsGetLastError(sqlite3_vfs *pVfs, int a, char *b){ + UNUSED_PARAMETER(pVfs); + UNUSED_PARAMETER(a); + UNUSED_PARAMETER(b); return 0; } @@ -229018,7 +229713,13 @@ SQLITE_PRIVATE int sqlite3DbstatRegister(sqlite3 *db){ return SQLITE_OK; } ** ** The data field of sqlite_dbpage table can be updated. The new ** value must be a BLOB which is the correct page size, otherwise the -** update fails. Rows may not be deleted or inserted. +** update fails. INSERT operations also work, and operate as if they +** where REPLACE. The size of the database can be extended by INSERT-ing +** new pages on the end. +** +** Rows may not be deleted. However, doing an INSERT to page number N +** with NULL page data causes the N-th page and all subsequent pages to be +** deleted and the database to be truncated. */ /* #include "sqliteInt.h" ** Requires access to internal data structures ** */ @@ -229041,6 +229742,8 @@ struct DbpageCursor { struct DbpageTable { sqlite3_vtab base; /* Base class. Must be first */ sqlite3 *db; /* The database */ + int iDbTrunc; /* Database to truncate */ + Pgno pgnoTrunc; /* Size to truncate to */ }; /* Columns */ @@ -229049,7 +229752,6 @@ struct DbpageTable { #define DBPAGE_COLUMN_SCHEMA 2 - /* ** Connect to or create a dbpagevfs virtual table. */ @@ -229311,11 +230013,11 @@ static int dbpageUpdate( DbPage *pDbPage = 0; int rc = SQLITE_OK; char *zErr = 0; - const char *zSchema; int iDb; Btree *pBt; Pager *pPager; int szPage; + int isInsert; (void)pRowid; if( pTab->db->flags & SQLITE_Defensive ){ @@ -229326,21 +230028,29 @@ static int dbpageUpdate( zErr = "cannot delete"; goto update_fail; } - pgno = sqlite3_value_int(argv[0]); - if( sqlite3_value_type(argv[0])==SQLITE_NULL - || (Pgno)sqlite3_value_int(argv[1])!=pgno - ){ - zErr = "cannot insert"; - goto update_fail; + if( sqlite3_value_type(argv[0])==SQLITE_NULL ){ + pgno = (Pgno)sqlite3_value_int(argv[2]); + isInsert = 1; + }else{ + pgno = sqlite3_value_int(argv[0]); + if( (Pgno)sqlite3_value_int(argv[1])!=pgno ){ + zErr = "cannot insert"; + goto update_fail; + } + isInsert = 0; } - zSchema = (const char*)sqlite3_value_text(argv[4]); - iDb = ALWAYS(zSchema) ? sqlite3FindDbName(pTab->db, zSchema) : -1; - if( NEVER(iDb<0) ){ - zErr = "no such schema"; - goto update_fail; + if( sqlite3_value_type(argv[4])==SQLITE_NULL ){ + iDb = 0; + }else{ + const char *zSchema = (const char*)sqlite3_value_text(argv[4]); + iDb = sqlite3FindDbName(pTab->db, zSchema); + if( iDb<0 ){ + zErr = "no such schema"; + goto update_fail; + } } pBt = pTab->db->aDb[iDb].pBt; - if( NEVER(pgno<1) || NEVER(pBt==0) || NEVER(pgno>sqlite3BtreeLastPage(pBt)) ){ + if( pgno<1 || NEVER(pBt==0) ){ zErr = "bad page number"; goto update_fail; } @@ -229348,18 +230058,25 @@ static int dbpageUpdate( if( sqlite3_value_type(argv[3])!=SQLITE_BLOB || sqlite3_value_bytes(argv[3])!=szPage ){ - zErr = "bad page value"; - goto update_fail; + if( sqlite3_value_type(argv[3])==SQLITE_NULL && isInsert && pgno>1 ){ + /* "INSERT INTO dbpage($PGNO,NULL)" causes page number $PGNO and + ** all subsequent pages to be deleted. */ + pTab->iDbTrunc = iDb; + pgno--; + pTab->pgnoTrunc = pgno; + }else{ + zErr = "bad page value"; + goto update_fail; + } } pPager = sqlite3BtreePager(pBt); rc = sqlite3PagerGet(pPager, pgno, (DbPage**)&pDbPage, 0); if( rc==SQLITE_OK ){ const void *pData = sqlite3_value_blob(argv[3]); - assert( pData!=0 || pTab->db->mallocFailed ); - if( pData - && (rc = sqlite3PagerWrite(pDbPage))==SQLITE_OK - ){ - memcpy(sqlite3PagerGetData(pDbPage), pData, szPage); + if( (rc = sqlite3PagerWrite(pDbPage))==SQLITE_OK && pData ){ + unsigned char *aPage = sqlite3PagerGetData(pDbPage); + memcpy(aPage, pData, szPage); + pTab->pgnoTrunc = 0; } } sqlite3PagerUnref(pDbPage); @@ -229383,9 +230100,31 @@ static int dbpageBegin(sqlite3_vtab *pVtab){ Btree *pBt = db->aDb[i].pBt; if( pBt ) (void)sqlite3BtreeBeginTrans(pBt, 1, 0); } + pTab->pgnoTrunc = 0; return SQLITE_OK; } +/* Invoke sqlite3PagerTruncate() as necessary, just prior to COMMIT +*/ +static int dbpageSync(sqlite3_vtab *pVtab){ + DbpageTable *pTab = (DbpageTable *)pVtab; + if( pTab->pgnoTrunc>0 ){ + Btree *pBt = pTab->db->aDb[pTab->iDbTrunc].pBt; + Pager *pPager = sqlite3BtreePager(pBt); + sqlite3PagerTruncateImage(pPager, pTab->pgnoTrunc); + } + pTab->pgnoTrunc = 0; + return SQLITE_OK; +} + +/* Cancel any pending truncate. +*/ +static int dbpageRollbackTo(sqlite3_vtab *pVtab, int notUsed1){ + DbpageTable *pTab = (DbpageTable *)pVtab; + pTab->pgnoTrunc = 0; + (void)notUsed1; + return SQLITE_OK; +} /* ** Invoke this routine to register the "dbpage" virtual table module @@ -229407,14 +230146,14 @@ SQLITE_PRIVATE int sqlite3DbpageRegister(sqlite3 *db){ dbpageRowid, /* xRowid - read data */ dbpageUpdate, /* xUpdate */ dbpageBegin, /* xBegin */ - 0, /* xSync */ + dbpageSync, /* xSync */ 0, /* xCommit */ 0, /* xRollback */ 0, /* xFindMethod */ 0, /* xRename */ 0, /* xSavepoint */ 0, /* xRelease */ - 0, /* xRollbackTo */ + dbpageRollbackTo, /* xRollbackTo */ 0, /* xShadowName */ 0 /* xIntegrity */ }; @@ -229509,6 +230248,10 @@ struct SessionBuffer { ** input data. Input data may be supplied either as a single large buffer ** (e.g. sqlite3changeset_start()) or using a stream function (e.g. ** sqlite3changeset_start_strm()). +** +** bNoDiscard: +** If true, then the only time data is discarded is as a result of explicit +** sessionDiscardData() calls. Not within every sessionInputBuffer() call. */ struct SessionInput { int bNoDiscard; /* If true, do not discard in InputBuffer() */ @@ -231193,16 +231936,19 @@ static void sessionPreupdateOneChange( for(i=0; i<(pTab->nCol-pTab->bRowid); i++){ sqlite3_value *p = 0; if( op!=SQLITE_INSERT ){ - TESTONLY(int trc = ) pSession->hook.xOld(pSession->hook.pCtx, i, &p); - assert( trc==SQLITE_OK ); + /* This may fail if the column has a non-NULL default and was added + ** using ALTER TABLE ADD COLUMN after this record was created. */ + rc = pSession->hook.xOld(pSession->hook.pCtx, i, &p); }else if( pTab->abPK[i] ){ TESTONLY(int trc = ) pSession->hook.xNew(pSession->hook.pCtx, i, &p); assert( trc==SQLITE_OK ); } - /* This may fail if SQLite value p contains a utf-16 string that must - ** be converted to utf-8 and an OOM error occurs while doing so. */ - rc = sessionSerializeValue(0, p, &nByte); + if( rc==SQLITE_OK ){ + /* This may fail if SQLite value p contains a utf-16 string that must + ** be converted to utf-8 and an OOM error occurs while doing so. */ + rc = sessionSerializeValue(0, p, &nByte); + } if( rc!=SQLITE_OK ) goto error_out; } if( pTab->bRowid ){ @@ -234583,15 +235329,21 @@ static int sessionChangesetApply( int nTab = 0; /* Result of sqlite3Strlen30(zTab) */ SessionApplyCtx sApply; /* changeset_apply() context object */ int bPatchset; + u64 savedFlag = db->flags & SQLITE_FkNoAction; assert( xConflict!=0 ); + sqlite3_mutex_enter(sqlite3_db_mutex(db)); + if( flags & SQLITE_CHANGESETAPPLY_FKNOACTION ){ + db->flags |= ((u64)SQLITE_FkNoAction); + db->aDb[0].pSchema->schema_cookie -= 32; + } + pIter->in.bNoDiscard = 1; memset(&sApply, 0, sizeof(sApply)); sApply.bRebase = (ppRebase && pnRebase); sApply.bInvertConstraints = !!(flags & SQLITE_CHANGESETAPPLY_INVERT); sApply.bIgnoreNoop = !!(flags & SQLITE_CHANGESETAPPLY_IGNORENOOP); - sqlite3_mutex_enter(sqlite3_db_mutex(db)); if( (flags & SQLITE_CHANGESETAPPLY_NOSAVEPOINT)==0 ){ rc = sqlite3_exec(db, "SAVEPOINT changeset_apply", 0, 0, 0); } @@ -234753,6 +235505,12 @@ static int sessionChangesetApply( sqlite3_free((char*)sApply.azCol); /* cast works around VC++ bug */ sqlite3_free((char*)sApply.constraints.aBuf); sqlite3_free((char*)sApply.rebase.aBuf); + + if( (flags & SQLITE_CHANGESETAPPLY_FKNOACTION) && savedFlag==0 ){ + assert( db->flags & SQLITE_FkNoAction ); + db->flags &= ~((u64)SQLITE_FkNoAction); + db->aDb[0].pSchema->schema_cookie -= 32; + } sqlite3_mutex_leave(sqlite3_db_mutex(db)); return rc; } @@ -234781,12 +235539,6 @@ SQLITE_API int sqlite3changeset_apply_v2( sqlite3_changeset_iter *pIter; /* Iterator to skip through changeset */ int bInv = !!(flags & SQLITE_CHANGESETAPPLY_INVERT); int rc = sessionChangesetStart(&pIter, 0, 0, nChangeset, pChangeset, bInv, 1); - u64 savedFlag = db->flags & SQLITE_FkNoAction; - - if( flags & SQLITE_CHANGESETAPPLY_FKNOACTION ){ - db->flags |= ((u64)SQLITE_FkNoAction); - db->aDb[0].pSchema->schema_cookie -= 32; - } if( rc==SQLITE_OK ){ rc = sessionChangesetApply( @@ -234794,11 +235546,6 @@ SQLITE_API int sqlite3changeset_apply_v2( ); } - if( (flags & SQLITE_CHANGESETAPPLY_FKNOACTION) && savedFlag==0 ){ - assert( db->flags & SQLITE_FkNoAction ); - db->flags &= ~((u64)SQLITE_FkNoAction); - db->aDb[0].pSchema->schema_cookie -= 32; - } return rc; } @@ -235119,6 +235866,9 @@ static int sessionChangesetExtendRecord( sessionAppendBlob(pOut, aRec, nRec, &rc); if( rc==SQLITE_OK && pTab->pDfltStmt==0 ){ rc = sessionPrepareDfltStmt(pGrp->db, pTab, &pTab->pDfltStmt); + if( rc==SQLITE_OK && SQLITE_ROW!=sqlite3_step(pTab->pDfltStmt) ){ + rc = sqlite3_errcode(pGrp->db); + } } for(ii=nCol; rc==SQLITE_OK && iinCol; ii++){ int eType = sqlite3_column_type(pTab->pDfltStmt, ii); @@ -235135,6 +235885,7 @@ static int sessionChangesetExtendRecord( } if( SQLITE_OK==sessionBufferGrow(pOut, 8, &rc) ){ sessionPutI64(&pOut->aBuf[pOut->nBuf], iVal); + pOut->nBuf += 8; } break; } @@ -235274,6 +236025,8 @@ static int sessionOneChangeToHash( u8 *aRec = &pIter->in.aData[pIter->in.iCurrent + 2]; int nRec = (pIter->in.iNext - pIter->in.iCurrent) - 2; + assert( nRec>0 ); + /* Ensure that only changesets, or only patchsets, but not a mixture ** of both, are being combined. It is an error to try to combine a ** changeset and a patchset. */ @@ -235351,6 +236104,7 @@ static int sessionChangesetToHash( int nRec; int rc = SQLITE_OK; + pIter->in.bNoDiscard = 1; while( SQLITE_ROW==(sessionChangesetNext(pIter, &aRec, &nRec, 0)) ){ rc = sessionOneChangeToHash(pGrp, pIter, bRebase); if( rc!=SQLITE_OK ) break; @@ -235983,7 +236737,27 @@ SQLITE_API int sqlite3session_config(int op, void *pArg){ /************** End of sqlite3session.c **************************************/ /************** Begin file fts5.c ********************************************/ - +/* +** This, the "fts5.c" source file, is a composite file that is itself +** assembled from the following files: +** +** fts5.h +** fts5Int.h +** fts5parse.h <--- Generated from fts5parse.y by Lemon +** fts5parse.c <--- Generated from fts5parse.y by Lemon +** fts5_aux.c +** fts5_buffer.c +** fts5_config.c +** fts5_expr.c +** fts5_hash.c +** fts5_index.c +** fts5_main.c +** fts5_storage.c +** fts5_tokenize.c +** fts5_unicode2.c +** fts5_varint.c +** fts5_vocab.c +*/ #if !defined(SQLITE_CORE) || defined(SQLITE_ENABLE_FTS5) #if !defined(NDEBUG) && !defined(SQLITE_DEBUG) @@ -235993,6 +236767,12 @@ SQLITE_API int sqlite3session_config(int op, void *pArg){ # undef NDEBUG #endif +#ifdef HAVE_STDINT_H +/* #include */ +#endif +#ifdef HAVE_INTTYPES_H +/* #include */ +#endif /* ** 2014 May 31 ** @@ -236391,7 +237171,6 @@ struct Fts5ExtensionApi { ** Applications may also register custom tokenizer types. A tokenizer ** is registered by providing fts5 with a populated instance of the ** following structure. All structure methods must be defined, setting -** ** any member of the fts5_tokenizer struct to NULL leads to undefined ** behaviour. The structure methods are expected to function as follows: ** @@ -236960,6 +237739,7 @@ struct Fts5Config { int *aPrefix; /* Sizes in bytes of nPrefix prefix indexes */ int eContent; /* An FTS5_CONTENT value */ int bContentlessDelete; /* "contentless_delete=" option (dflt==0) */ + int bContentlessUnindexed; /* "contentless_unindexed=" option (dflt=0) */ char *zContent; /* content table */ char *zContentRowid; /* "content_rowid=" option value */ int bColumnsize; /* "columnsize=" option value (dflt==1) */ @@ -236998,9 +237778,10 @@ struct Fts5Config { #define FTS5_CURRENT_VERSION 4 #define FTS5_CURRENT_VERSION_SECUREDELETE 5 -#define FTS5_CONTENT_NORMAL 0 -#define FTS5_CONTENT_NONE 1 -#define FTS5_CONTENT_EXTERNAL 2 +#define FTS5_CONTENT_NORMAL 0 +#define FTS5_CONTENT_NONE 1 +#define FTS5_CONTENT_EXTERNAL 2 +#define FTS5_CONTENT_UNINDEXED 3 #define FTS5_DETAIL_FULL 0 #define FTS5_DETAIL_NONE 1 @@ -237372,16 +238153,13 @@ static Fts5Table *sqlite3Fts5TableFromCsrid(Fts5Global*, i64); static int sqlite3Fts5FlushToDisk(Fts5Table*); -static int sqlite3Fts5ExtractText( - Fts5Config *pConfig, - sqlite3_value *pVal, /* Value to extract text from */ - int bContent, /* Loaded from content table */ - int *pbResetTokenizer, /* OUT: True if ClearLocale() required */ - const char **ppText, /* OUT: Pointer to text buffer */ - int *pnText /* OUT: Size of (*ppText) in bytes */ -); - static void sqlite3Fts5ClearLocale(Fts5Config *pConfig); +static void sqlite3Fts5SetLocale(Fts5Config *pConfig, const char *pLoc, int nLoc); + +static int sqlite3Fts5IsLocaleValue(Fts5Config *pConfig, sqlite3_value *pVal); +static int sqlite3Fts5DecodeLocaleValue(sqlite3_value *pVal, + const char **ppText, int *pnText, const char **ppLoc, int *pnLoc +); /* ** End of interface to code in fts5.c. @@ -237463,7 +238241,7 @@ static int sqlite3Fts5DropAll(Fts5Config*); static int sqlite3Fts5CreateTable(Fts5Config*, const char*, const char*, int, char **); static int sqlite3Fts5StorageDelete(Fts5Storage *p, i64, sqlite3_value**, int); -static int sqlite3Fts5StorageContentInsert(Fts5Storage *p, sqlite3_value**, i64*); +static int sqlite3Fts5StorageContentInsert(Fts5Storage *p, int, sqlite3_value**, i64*); static int sqlite3Fts5StorageIndexInsert(Fts5Storage *p, sqlite3_value**, i64); static int sqlite3Fts5StorageIntegrity(Fts5Storage *p, int iArg); @@ -240670,6 +241448,7 @@ static int fts5ConfigParseSpecial( ){ int rc = SQLITE_OK; int nCmd = (int)strlen(zCmd); + if( sqlite3_strnicmp("prefix", zCmd, nCmd)==0 ){ const int nByte = sizeof(int) * FTS5_MAX_PREFIX_INDEXES; const char *p; @@ -240789,6 +241568,16 @@ static int fts5ConfigParseSpecial( return rc; } + if( sqlite3_strnicmp("contentless_unindexed", zCmd, nCmd)==0 ){ + if( (zArg[0]!='0' && zArg[0]!='1') || zArg[1]!='\0' ){ + *pzErr = sqlite3_mprintf("malformed contentless_delete=... directive"); + rc = SQLITE_ERROR; + }else{ + pConfig->bContentlessUnindexed = (zArg[0]=='1'); + } + return rc; + } + if( sqlite3_strnicmp("content_rowid", zCmd, nCmd)==0 ){ if( pConfig->zContentRowid ){ *pzErr = sqlite3_mprintf("multiple content_rowid=... directives"); @@ -240906,7 +241695,8 @@ static int fts5ConfigParseColumn( Fts5Config *p, char *zCol, char *zArg, - char **pzErr + char **pzErr, + int *pbUnindexed ){ int rc = SQLITE_OK; if( 0==sqlite3_stricmp(zCol, FTS5_RANK_NAME) @@ -240917,6 +241707,7 @@ static int fts5ConfigParseColumn( }else if( zArg ){ if( 0==sqlite3_stricmp(zArg, "unindexed") ){ p->abUnindexed[p->nCol] = 1; + *pbUnindexed = 1; }else{ *pzErr = sqlite3_mprintf("unrecognized column option: %s", zArg); rc = SQLITE_ERROR; @@ -240937,11 +241728,26 @@ static int fts5ConfigMakeExprlist(Fts5Config *p){ sqlite3Fts5BufferAppendPrintf(&rc, &buf, "T.%Q", p->zContentRowid); if( p->eContent!=FTS5_CONTENT_NONE ){ + assert( p->eContent==FTS5_CONTENT_EXTERNAL + || p->eContent==FTS5_CONTENT_NORMAL + || p->eContent==FTS5_CONTENT_UNINDEXED + ); for(i=0; inCol; i++){ if( p->eContent==FTS5_CONTENT_EXTERNAL ){ sqlite3Fts5BufferAppendPrintf(&rc, &buf, ", T.%Q", p->azCol[i]); - }else{ + }else if( p->eContent==FTS5_CONTENT_NORMAL || p->abUnindexed[i] ){ sqlite3Fts5BufferAppendPrintf(&rc, &buf, ", T.c%d", i); + }else{ + sqlite3Fts5BufferAppendPrintf(&rc, &buf, ", NULL"); + } + } + } + if( p->eContent==FTS5_CONTENT_NORMAL && p->bLocale ){ + for(i=0; inCol; i++){ + if( p->abUnindexed[i]==0 ){ + sqlite3Fts5BufferAppendPrintf(&rc, &buf, ", T.l%d", i); + }else{ + sqlite3Fts5BufferAppendPrintf(&rc, &buf, ", NULL"); } } } @@ -240975,6 +241781,7 @@ static int sqlite3Fts5ConfigParse( Fts5Config *pRet; /* New object to return */ int i; sqlite3_int64 nByte; + int bUnindexed = 0; /* True if there are one or more UNINDEXED */ *ppOut = pRet = (Fts5Config*)sqlite3_malloc(sizeof(Fts5Config)); if( pRet==0 ) return SQLITE_NOMEM; @@ -241034,7 +241841,7 @@ static int sqlite3Fts5ConfigParse( pzErr ); }else{ - rc = fts5ConfigParseColumn(pRet, zOne, zTwo, pzErr); + rc = fts5ConfigParseColumn(pRet, zOne, zTwo, pzErr, &bUnindexed); zOne = 0; } } @@ -241066,6 +241873,19 @@ static int sqlite3Fts5ConfigParse( rc = SQLITE_ERROR; } + /* We only allow contentless_unindexed=1 if the table is actually a + ** contentless one. + */ + if( rc==SQLITE_OK + && pRet->bContentlessUnindexed + && pRet->eContent!=FTS5_CONTENT_NONE + ){ + *pzErr = sqlite3_mprintf( + "contentless_unindexed=1 requires a contentless table" + ); + rc = SQLITE_ERROR; + } + /* If no zContent option was specified, fill in the default values. */ if( rc==SQLITE_OK && pRet->zContent==0 ){ const char *zTail = 0; @@ -241074,6 +241894,9 @@ static int sqlite3Fts5ConfigParse( ); if( pRet->eContent==FTS5_CONTENT_NORMAL ){ zTail = "content"; + }else if( bUnindexed && pRet->bContentlessUnindexed ){ + pRet->eContent = FTS5_CONTENT_UNINDEXED; + zTail = "content"; }else if( pRet->bColumnsize ){ zTail = "docsize"; } @@ -246615,7 +247438,7 @@ static i64 fts5IndexDataVersion(Fts5Index *p){ if( p->pDataVersion==0 ){ p->rc = fts5IndexPrepareStmt(p, &p->pDataVersion, sqlite3_mprintf("PRAGMA %Q.data_version", p->pConfig->zDb) - ); + ); if( p->rc ) return 0; } @@ -250247,6 +251070,11 @@ static int fts5IndexFindDeleteMerge(Fts5Index *p, Fts5Structure *pStruct){ nBest = nPercent; } } + + /* If pLvl is already the input level to an ongoing merge, look no + ** further for a merge candidate. The caller should be allowed to + ** continue merging from pLvl first. */ + if( pLvl->nMerge ) break; } } return iRet; @@ -254171,7 +254999,7 @@ static int fts5structConnectMethod( /* ** We must have a single struct=? constraint that will be passed through -** into the xFilter method. If there is no valid stmt=? constraint, +** into the xFilter method. If there is no valid struct=? constraint, ** then return an SQLITE_CONSTRAINT error. */ static int fts5structBestIndexMethod( @@ -254513,8 +255341,17 @@ struct Fts5Global { Fts5TokenizerModule *pTok; /* First in list of all tokenizer modules */ Fts5TokenizerModule *pDfltTok; /* Default tokenizer module */ Fts5Cursor *pCsr; /* First in list of all open cursors */ + u32 aLocaleHdr[4]; }; +/* +** Size of header on fts5_locale() values. And macro to access a buffer +** containing a copy of the header from an Fts5Config pointer. +*/ +#define FTS5_LOCALE_HDR_SIZE ((int)sizeof( ((Fts5Global*)0)->aLocaleHdr )) +#define FTS5_LOCALE_HDR(pConfig) ((const u8*)(pConfig->pGlobal->aLocaleHdr)) + + /* ** Each auxiliary function registered with the FTS5 module is represented ** by an object of the following type. All such objects are stored as part @@ -254677,12 +255514,6 @@ struct Fts5Cursor { #define BitFlagAllTest(x,y) (((x) & (y))==(y)) #define BitFlagTest(x,y) (((x) & (y))!=0) -/* -** The subtype value and header bytes used by fts5_locale(). -*/ -#define FTS5_LOCALE_SUBTYPE ((unsigned int)'L') -#define FTS5_LOCALE_HEADER "\x00\xE0\xB2\xEB" - /* ** Macros to Set(), Clear() and Test() cursor flags. @@ -254759,10 +255590,16 @@ static void fts5CheckTransactionState(Fts5FullTable *p, int op, int iSavepoint){ #endif /* -** Return true if pTab is a contentless table. +** Return true if pTab is a contentless table. If parameter bIncludeUnindexed +** is true, this includes contentless tables that store UNINDEXED columns +** only. */ -static int fts5IsContentless(Fts5FullTable *pTab){ - return pTab->p.pConfig->eContent==FTS5_CONTENT_NONE; +static int fts5IsContentless(Fts5FullTable *pTab, int bIncludeUnindexed){ + int eContent = pTab->p.pConfig->eContent; + return ( + eContent==FTS5_CONTENT_NONE + || (bIncludeUnindexed && eContent==FTS5_CONTENT_UNINDEXED) + ); } /* @@ -255053,6 +255890,7 @@ static int fts5BestIndexMethod(sqlite3_vtab *pVTab, sqlite3_index_info *pInfo){ if( p->usable==0 || iCol<0 ){ /* As there exists an unusable MATCH constraint this is an ** unusable plan. Return SQLITE_CONSTRAINT. */ + idxStr[iIdxStr] = 0; return SQLITE_CONSTRAINT; }else{ if( iCol==nCol+1 ){ @@ -255686,7 +256524,7 @@ static void fts5SetVtabError(Fts5FullTable *p, const char *zFormat, ...){ ** valid until after the final call to sqlite3Fts5Tokenize() that will use ** the locale. */ -static void fts5SetLocale( +static void sqlite3Fts5SetLocale( Fts5Config *pConfig, const char *zLocale, int nLocale @@ -255697,127 +256535,74 @@ static void fts5SetLocale( } /* -** Clear any locale configured by an earlier call to fts5SetLocale() or -** sqlite3Fts5ExtractText(). +** Clear any locale configured by an earlier call to sqlite3Fts5SetLocale(). */ static void sqlite3Fts5ClearLocale(Fts5Config *pConfig){ - fts5SetLocale(pConfig, 0, 0); + sqlite3Fts5SetLocale(pConfig, 0, 0); } /* -** This function is used to extract utf-8 text from an sqlite3_value. This -** is usually done in order to tokenize it. For example, when: -** -** * a value is written to an fts5 table, -** * a value is deleted from an FTS5_CONTENT_NORMAL table, -** * a value containing a query expression is passed to xFilter() -** -** and so on. -** -** This function handles 2 cases: -** -** 1) Ordinary values. The text can be extracted from these using -** sqlite3_value_text(). -** -** 2) Combination text/locale blobs created by fts5_locale(). There -** are several cases for these: -** -** * Blobs tagged with FTS5_LOCALE_SUBTYPE. -** * Blobs read from the content table of a locale=1 external-content -** table, and -** * Blobs read from the content table of a locale=1 regular -** content table. -** -** The first two cases above should have the 4 byte FTS5_LOCALE_HEADER -** header. It is an error if a blob with the subtype or a blob read -** from the content table of an external content table does not have -** the required header. A blob read from the content table of a regular -** locale=1 table does not have the header. This is to save space. -** -** If successful, SQLITE_OK is returned and output parameters (*ppText) -** and (*pnText) are set to point to a buffer containing the extracted utf-8 -** text and its length in bytes, respectively. The buffer is not -** nul-terminated. It has the same lifetime as the sqlite3_value object -** from which it is extracted. -** -** Parameter bContent must be true if the value was read from an indexed -** column (i.e. not UNINDEXED) of the on disk content. -** -** If pbResetTokenizer is not NULL and if case (2) is used, then -** fts5SetLocale() is called to ensure subsequent sqlite3Fts5Tokenize() calls -** use the locale. In this case (*pbResetTokenizer) is set to true before -** returning, to indicate that the caller must call sqlite3Fts5ClearLocale() -** to clear the locale after tokenizing the text. +** Return true if the value passed as the only argument is an +** fts5_locale() value. */ -static int sqlite3Fts5ExtractText( - Fts5Config *pConfig, - sqlite3_value *pVal, /* Value to extract text from */ - int bContent, /* True if indexed table content */ - int *pbResetTokenizer, /* OUT: True if xSetLocale(NULL) required */ - const char **ppText, /* OUT: Pointer to text buffer */ - int *pnText /* OUT: Size of (*ppText) in bytes */ -){ - const char *pText = 0; - int nText = 0; - int rc = SQLITE_OK; - int bDecodeBlob = 0; - - assert( pbResetTokenizer==0 || *pbResetTokenizer==0 ); - assert( bContent==0 || pConfig->eContent!=FTS5_CONTENT_NONE ); - assert( bContent==0 || sqlite3_value_subtype(pVal)==0 ); - +static int sqlite3Fts5IsLocaleValue(Fts5Config *pConfig, sqlite3_value *pVal){ + int ret = 0; if( sqlite3_value_type(pVal)==SQLITE_BLOB ){ - if( sqlite3_value_subtype(pVal)==FTS5_LOCALE_SUBTYPE - || (bContent && pConfig->bLocale) - ){ - bDecodeBlob = 1; - } - } - - if( bDecodeBlob ){ - const int SZHDR = sizeof(FTS5_LOCALE_HEADER)-1; + /* Call sqlite3_value_bytes() after sqlite3_value_blob() in this case. + ** If the blob was created using zeroblob(), then sqlite3_value_blob() + ** may call malloc(). If this malloc() fails, then the values returned + ** by both value_blob() and value_bytes() will be 0. If value_bytes() were + ** called first, then the NULL pointer returned by value_blob() might + ** be dereferenced. */ const u8 *pBlob = sqlite3_value_blob(pVal); int nBlob = sqlite3_value_bytes(pVal); - - /* Unless this blob was read from the %_content table of an - ** FTS5_CONTENT_NORMAL table, it should have the 4 byte fts5_locale() - ** header. Check for this. If it is not found, return an error. */ - if( (!bContent || pConfig->eContent!=FTS5_CONTENT_NORMAL) ){ - if( nBlobFTS5_LOCALE_HDR_SIZE + && 0==memcmp(pBlob, FTS5_LOCALE_HDR(pConfig), FTS5_LOCALE_HDR_SIZE) + ){ + ret = 1; } + } + return ret; +} - if( rc==SQLITE_OK ){ - int nLocale = 0; +/* +** Value pVal is guaranteed to be an fts5_locale() value, according to +** sqlite3Fts5IsLocaleValue(). This function extracts the text and locale +** from the value and returns them separately. +** +** If successful, SQLITE_OK is returned and (*ppText) and (*ppLoc) set +** to point to buffers containing the text and locale, as utf-8, +** respectively. In this case output parameters (*pnText) and (*pnLoc) are +** set to the sizes in bytes of these two buffers. +** +** Or, if an error occurs, then an SQLite error code is returned. The final +** value of the four output parameters is undefined in this case. +*/ +static int sqlite3Fts5DecodeLocaleValue( + sqlite3_value *pVal, + const char **ppText, + int *pnText, + const char **ppLoc, + int *pnLoc +){ + const char *p = sqlite3_value_blob(pVal); + int n = sqlite3_value_bytes(pVal); + int nLoc = 0; - for(nLocale=0; nLocaleFTS5_LOCALE_HDR_SIZE ); - if( pbResetTokenizer ){ - fts5SetLocale(pConfig, (const char*)pBlob, nLocale); - *pbResetTokenizer = 1; - } - } + for(nLoc=FTS5_LOCALE_HDR_SIZE; p[nLoc]; nLoc++){ + if( nLoc==(n-1) ){ + return SQLITE_MISMATCH; } - - }else{ - pText = (const char*)sqlite3_value_text(pVal); - nText = sqlite3_value_bytes(pVal); } + *ppLoc = &p[FTS5_LOCALE_HDR_SIZE]; + *pnLoc = nLoc - FTS5_LOCALE_HDR_SIZE; - *ppText = pText; - *pnText = nText; - return rc; + *ppText = &p[nLoc+1]; + *pnText = n - nLoc - 1; + return SQLITE_OK; } /* @@ -255826,8 +256611,8 @@ static int sqlite3Fts5ExtractText( ** the text of the expression, and sets output variable (*pzText) to ** point to a nul-terminated buffer containing the expression. ** -** If pVal was an fts5_locale() value, then fts5SetLocale() is called to -** set the tokenizer to use the specified locale. +** If pVal was an fts5_locale() value, then sqlite3Fts5SetLocale() is called +** to set the tokenizer to use the specified locale. ** ** If output variable (*pbFreeAndReset) is set to true, then the caller ** is required to (a) call sqlite3Fts5ClearLocale() to reset the tokenizer @@ -255839,24 +256624,22 @@ static int fts5ExtractExprText( char **pzText, /* OUT: nul-terminated buffer of text */ int *pbFreeAndReset /* OUT: Free (*pzText) and clear locale */ ){ - const char *zText = 0; - int nText = 0; int rc = SQLITE_OK; - int bReset = 0; - *pbFreeAndReset = 0; - rc = sqlite3Fts5ExtractText(pConfig, pVal, 0, &bReset, &zText, &nText); - if( rc==SQLITE_OK ){ - if( bReset ){ - *pzText = sqlite3Fts5Mprintf(&rc, "%.*s", nText, zText); - if( rc!=SQLITE_OK ){ - sqlite3Fts5ClearLocale(pConfig); - }else{ - *pbFreeAndReset = 1; - } - }else{ - *pzText = (char*)zText; + if( sqlite3Fts5IsLocaleValue(pConfig, pVal) ){ + const char *pText = 0; + int nText = 0; + const char *pLoc = 0; + int nLoc = 0; + rc = sqlite3Fts5DecodeLocaleValue(pVal, &pText, &nText, &pLoc, &nLoc); + *pzText = sqlite3Fts5Mprintf(&rc, "%.*s", nText, pText); + if( rc==SQLITE_OK ){ + sqlite3Fts5SetLocale(pConfig, pLoc, nLoc); } + *pbFreeAndReset = 1; + }else{ + *pzText = (char*)sqlite3_value_text(pVal); + *pbFreeAndReset = 0; } return rc; @@ -256208,7 +256991,7 @@ static int fts5SpecialInsert( } bLoadConfig = 1; }else if( 0==sqlite3_stricmp("rebuild", zCmd) ){ - if( pConfig->eContent==FTS5_CONTENT_NONE ){ + if( fts5IsContentless(pTab, 1) ){ fts5SetVtabError(pTab, "'rebuild' may not be used with a contentless fts5 table" ); @@ -256277,7 +257060,7 @@ static void fts5StorageInsert( ){ int rc = *pRc; if( rc==SQLITE_OK ){ - rc = sqlite3Fts5StorageContentInsert(pTab->pStorage, apVal, piRowid); + rc = sqlite3Fts5StorageContentInsert(pTab->pStorage, 0, apVal, piRowid); } if( rc==SQLITE_OK ){ rc = sqlite3Fts5StorageIndexInsert(pTab->pStorage, apVal, *piRowid); @@ -256285,6 +257068,67 @@ static void fts5StorageInsert( *pRc = rc; } +/* +** +** This function is called when the user attempts an UPDATE on a contentless +** table. Parameter bRowidModified is true if the UPDATE statement modifies +** the rowid value. Parameter apVal[] contains the new values for each user +** defined column of the fts5 table. pConfig is the configuration object of the +** table being updated (guaranteed to be contentless). The contentless_delete=1 +** and contentless_unindexed=1 options may or may not be set. +** +** This function returns SQLITE_OK if the UPDATE can go ahead, or an SQLite +** error code if it cannot. In this case an error message is also loaded into +** pConfig. Output parameter (*pbContent) is set to true if the caller should +** update the %_content table only - not the FTS index or any other shadow +** table. This occurs when an UPDATE modifies only UNINDEXED columns of the +** table. +** +** An UPDATE may proceed if: +** +** * The only columns modified are UNINDEXED columns, or +** +** * The contentless_delete=1 option was specified and all of the indexed +** columns (not a subset) have been modified. +*/ +static int fts5ContentlessUpdate( + Fts5Config *pConfig, + sqlite3_value **apVal, + int bRowidModified, + int *pbContent +){ + int ii; + int bSeenIndex = 0; /* Have seen modified indexed column */ + int bSeenIndexNC = 0; /* Have seen unmodified indexed column */ + int rc = SQLITE_OK; + + for(ii=0; iinCol; ii++){ + if( pConfig->abUnindexed[ii]==0 ){ + if( sqlite3_value_nochange(apVal[ii]) ){ + bSeenIndexNC++; + }else{ + bSeenIndex++; + } + } + } + + if( bSeenIndex==0 && bRowidModified==0 ){ + *pbContent = 1; + }else{ + if( bSeenIndexNC || pConfig->bContentlessDelete==0 ){ + rc = SQLITE_ERROR; + sqlite3Fts5ConfigErrmsg(pConfig, + (pConfig->bContentlessDelete ? + "%s a subset of columns on fts5 contentless-delete table: %s" : + "%s contentless fts5 table: %s") + , "cannot UPDATE", pConfig->zName + ); + } + } + + return rc; +} + /* ** This function is the implementation of the xUpdate callback used by ** FTS3 virtual tables. It is invoked by SQLite each time a row is to be @@ -256371,44 +257215,34 @@ static int fts5UpdateMethod( assert( eType0==SQLITE_INTEGER || eType0==SQLITE_NULL ); assert( nArg!=1 || eType0==SQLITE_INTEGER ); - /* Filter out attempts to run UPDATE or DELETE on contentless tables. - ** This is not suported. Except - they are both supported if the CREATE - ** VIRTUAL TABLE statement contained "contentless_delete=1". */ - if( eType0==SQLITE_INTEGER - && pConfig->eContent==FTS5_CONTENT_NONE - && pConfig->bContentlessDelete==0 - ){ - pTab->p.base.zErrMsg = sqlite3_mprintf( - "cannot %s contentless fts5 table: %s", - (nArg>1 ? "UPDATE" : "DELETE from"), pConfig->zName - ); - rc = SQLITE_ERROR; - } - /* DELETE */ - else if( nArg==1 ){ - i64 iDel = sqlite3_value_int64(apVal[0]); /* Rowid to delete */ - rc = sqlite3Fts5StorageDelete(pTab->pStorage, iDel, 0, 0); - bUpdateOrDelete = 1; + if( nArg==1 ){ + /* It is only possible to DELETE from a contentless table if the + ** contentless_delete=1 flag is set. */ + if( fts5IsContentless(pTab, 1) && pConfig->bContentlessDelete==0 ){ + fts5SetVtabError(pTab, + "cannot DELETE from contentless fts5 table: %s", pConfig->zName + ); + rc = SQLITE_ERROR; + }else{ + i64 iDel = sqlite3_value_int64(apVal[0]); /* Rowid to delete */ + rc = sqlite3Fts5StorageDelete(pTab->pStorage, iDel, 0, 0); + bUpdateOrDelete = 1; + } } /* INSERT or UPDATE */ else{ int eType1 = sqlite3_value_numeric_type(apVal[1]); - /* Ensure that no fts5_locale() values are written to locale=0 tables. - ** And that no blobs except fts5_locale() blobs are written to indexed - ** (i.e. not UNINDEXED) columns of locale=1 tables. */ - int ii; - for(ii=0; iinCol; ii++){ - if( sqlite3_value_type(apVal[ii+2])==SQLITE_BLOB ){ - int bSub = (sqlite3_value_subtype(apVal[ii+2])==FTS5_LOCALE_SUBTYPE); - if( (pConfig->bLocale && !bSub && pConfig->abUnindexed[ii]==0) - || (pConfig->bLocale==0 && bSub) - ){ - if( pConfig->bLocale==0 ){ - fts5SetVtabError(pTab, "fts5_locale() requires locale=1"); - } + /* It is an error to write an fts5_locale() value to a table without + ** the locale=1 option. */ + if( pConfig->bLocale==0 ){ + int ii; + for(ii=0; iinCol; ii++){ + sqlite3_value *pVal = apVal[ii+2]; + if( sqlite3Fts5IsLocaleValue(pConfig, pVal) ){ + fts5SetVtabError(pTab, "fts5_locale() requires locale=1"); rc = SQLITE_MISMATCH; goto update_out; } @@ -256428,35 +257262,55 @@ static int fts5UpdateMethod( /* UPDATE */ else{ + Fts5Storage *pStorage = pTab->pStorage; i64 iOld = sqlite3_value_int64(apVal[0]); /* Old rowid */ i64 iNew = sqlite3_value_int64(apVal[1]); /* New rowid */ + int bContent = 0; /* Content only update */ + + /* If this is a contentless table (including contentless_unindexed=1 + ** tables), check if the UPDATE may proceed. */ + if( fts5IsContentless(pTab, 1) ){ + rc = fts5ContentlessUpdate(pConfig, &apVal[2], iOld!=iNew, &bContent); + if( rc!=SQLITE_OK ) goto update_out; + } + if( eType1!=SQLITE_INTEGER ){ rc = SQLITE_MISMATCH; }else if( iOld!=iNew ){ + assert( bContent==0 ); if( eConflict==SQLITE_REPLACE ){ - rc = sqlite3Fts5StorageDelete(pTab->pStorage, iOld, 0, 1); + rc = sqlite3Fts5StorageDelete(pStorage, iOld, 0, 1); if( rc==SQLITE_OK ){ - rc = sqlite3Fts5StorageDelete(pTab->pStorage, iNew, 0, 0); + rc = sqlite3Fts5StorageDelete(pStorage, iNew, 0, 0); } fts5StorageInsert(&rc, pTab, apVal, pRowid); }else{ - rc = sqlite3Fts5StorageFindDeleteRow(pTab->pStorage, iOld); + rc = sqlite3Fts5StorageFindDeleteRow(pStorage, iOld); if( rc==SQLITE_OK ){ - rc = sqlite3Fts5StorageContentInsert(pTab->pStorage,apVal,pRowid); + rc = sqlite3Fts5StorageContentInsert(pStorage, 0, apVal, pRowid); } if( rc==SQLITE_OK ){ - rc = sqlite3Fts5StorageDelete(pTab->pStorage, iOld, 0, 1); + rc = sqlite3Fts5StorageDelete(pStorage, iOld, 0, 0); } if( rc==SQLITE_OK ){ - rc = sqlite3Fts5StorageIndexInsert(pTab->pStorage, apVal,*pRowid); + rc = sqlite3Fts5StorageIndexInsert(pStorage, apVal, *pRowid); } } + }else if( bContent ){ + /* This occurs when an UPDATE on a contentless table affects *only* + ** UNINDEXED columns. This is a no-op for contentless_unindexed=0 + ** tables, or a write to the %_content table only for =1 tables. */ + assert( fts5IsContentless(pTab, 1) ); + rc = sqlite3Fts5StorageFindDeleteRow(pStorage, iOld); + if( rc==SQLITE_OK ){ + rc = sqlite3Fts5StorageContentInsert(pStorage, 1, apVal, pRowid); + } }else{ - rc = sqlite3Fts5StorageDelete(pTab->pStorage, iOld, 0, 1); + rc = sqlite3Fts5StorageDelete(pStorage, iOld, 0, 1); fts5StorageInsert(&rc, pTab, apVal, pRowid); } bUpdateOrDelete = 1; - sqlite3Fts5StorageReleaseDeleteRow(pTab->pStorage); + sqlite3Fts5StorageReleaseDeleteRow(pStorage); } } @@ -256570,11 +257424,11 @@ static int fts5ApiTokenize_v2( Fts5Table *pTab = (Fts5Table*)(pCsr->base.pVtab); int rc = SQLITE_OK; - fts5SetLocale(pTab->pConfig, pLoc, nLoc); + sqlite3Fts5SetLocale(pTab->pConfig, pLoc, nLoc); rc = sqlite3Fts5Tokenize(pTab->pConfig, FTS5_TOKENIZE_AUX, pText, nText, pUserData, xToken ); - fts5SetLocale(pTab->pConfig, 0, 0); + sqlite3Fts5SetLocale(pTab->pConfig, 0, 0); return rc; } @@ -256602,6 +257456,49 @@ static int fts5ApiPhraseSize(Fts5Context *pCtx, int iPhrase){ return sqlite3Fts5ExprPhraseSize(pCsr->pExpr, iPhrase); } +/* +** Argument pStmt is an SQL statement of the type used by Fts5Cursor. This +** function extracts the text value of column iCol of the current row. +** Additionally, if there is an associated locale, it invokes +** sqlite3Fts5SetLocale() to configure the tokenizer. In all cases the caller +** should invoke sqlite3Fts5ClearLocale() to clear the locale at some point +** after this function returns. +** +** If successful, (*ppText) is set to point to a buffer containing the text +** value as utf-8 and SQLITE_OK returned. (*pnText) is set to the size of that +** buffer in bytes. It is not guaranteed to be nul-terminated. If an error +** occurs, an SQLite error code is returned. The final values of the two +** output parameters are undefined in this case. +*/ +static int fts5TextFromStmt( + Fts5Config *pConfig, + sqlite3_stmt *pStmt, + int iCol, + const char **ppText, + int *pnText +){ + sqlite3_value *pVal = sqlite3_column_value(pStmt, iCol+1); + const char *pLoc = 0; + int nLoc = 0; + int rc = SQLITE_OK; + + if( pConfig->bLocale + && pConfig->eContent==FTS5_CONTENT_EXTERNAL + && sqlite3Fts5IsLocaleValue(pConfig, pVal) + ){ + rc = sqlite3Fts5DecodeLocaleValue(pVal, ppText, pnText, &pLoc, &nLoc); + }else{ + *ppText = (const char*)sqlite3_value_text(pVal); + *pnText = sqlite3_value_bytes(pVal); + if( pConfig->bLocale && pConfig->eContent==FTS5_CONTENT_NORMAL ){ + pLoc = (const char*)sqlite3_column_text(pStmt, iCol+1+pConfig->nCol); + nLoc = sqlite3_column_bytes(pStmt, iCol+1+pConfig->nCol); + } + } + sqlite3Fts5SetLocale(pConfig, pLoc, nLoc); + return rc; +} + static int fts5ApiColumnText( Fts5Context *pCtx, int iCol, @@ -256615,16 +257512,14 @@ static int fts5ApiColumnText( assert( pCsr->ePlan!=FTS5_PLAN_SPECIAL ); if( iCol<0 || iCol>=pTab->pConfig->nCol ){ rc = SQLITE_RANGE; - }else if( fts5IsContentless((Fts5FullTable*)(pCsr->base.pVtab)) ){ + }else if( fts5IsContentless((Fts5FullTable*)(pCsr->base.pVtab), 0) ){ *pz = 0; *pn = 0; }else{ rc = fts5SeekCursor(pCsr, 0); if( rc==SQLITE_OK ){ - Fts5Config *pConfig = pTab->pConfig; - int bContent = (pConfig->abUnindexed[iCol]==0); - sqlite3_value *pVal = sqlite3_column_value(pCsr->pStmt, iCol+1); - sqlite3Fts5ExtractText(pConfig, pVal, bContent, 0, pz, pn); + rc = fts5TextFromStmt(pTab->pConfig, pCsr->pStmt, iCol, pz, pn); + sqlite3Fts5ClearLocale(pTab->pConfig); } } return rc; @@ -256650,7 +257545,7 @@ static int fts5CsrPoslist( if( iPhrase<0 || iPhrase>=sqlite3Fts5ExprPhraseCount(pCsr->pExpr) ){ rc = SQLITE_RANGE; }else if( pConfig->eDetail!=FTS5_DETAIL_FULL - && pConfig->eContent==FTS5_CONTENT_NONE + && fts5IsContentless((Fts5FullTable*)pCsr->base.pVtab, 1) ){ *pa = 0; *pn = 0; @@ -256666,17 +257561,15 @@ static int fts5CsrPoslist( rc = fts5SeekCursor(pCsr, 0); } for(i=0; inCol && rc==SQLITE_OK; i++){ - sqlite3_value *pVal = sqlite3_column_value(pCsr->pStmt, i+1); const char *z = 0; int n = 0; - int bReset = 0; - rc = sqlite3Fts5ExtractText(pConfig, pVal, 1, &bReset, &z, &n); + rc = fts5TextFromStmt(pConfig, pCsr->pStmt, i, &z, &n); if( rc==SQLITE_OK ){ rc = sqlite3Fts5ExprPopulatePoslists( pConfig, pCsr->pExpr, aPopulator, i, z, n ); } - if( bReset ) sqlite3Fts5ClearLocale(pConfig); + sqlite3Fts5ClearLocale(pConfig); } sqlite3_free(aPopulator); @@ -256848,7 +257741,7 @@ static int fts5ApiColumnSize(Fts5Context *pCtx, int iCol, int *pnToken){ if( pConfig->bColumnsize ){ i64 iRowid = fts5CursorRowid(pCsr); rc = sqlite3Fts5StorageDocsize(pTab->pStorage, iRowid, pCsr->aColumnSize); - }else if( pConfig->zContent==0 ){ + }else if( !pConfig->zContent || pConfig->eContent==FTS5_CONTENT_UNINDEXED ){ int i; for(i=0; inCol; i++){ if( pConfig->abUnindexed[i]==0 ){ @@ -256862,17 +257755,14 @@ static int fts5ApiColumnSize(Fts5Context *pCtx, int iCol, int *pnToken){ if( pConfig->abUnindexed[i]==0 ){ const char *z = 0; int n = 0; - int bReset = 0; - sqlite3_value *pVal = sqlite3_column_value(pCsr->pStmt, i+1); - pCsr->aColumnSize[i] = 0; - rc = sqlite3Fts5ExtractText(pConfig, pVal, 1, &bReset, &z, &n); + rc = fts5TextFromStmt(pConfig, pCsr->pStmt, i, &z, &n); if( rc==SQLITE_OK ){ rc = sqlite3Fts5Tokenize(pConfig, FTS5_TOKENIZE_AUX, z, n, (void*)&pCsr->aColumnSize[i], fts5ColumnSizeCb ); - if( bReset ) sqlite3Fts5ClearLocale(pConfig); } + sqlite3Fts5ClearLocale(pConfig); } } } @@ -257139,42 +258029,19 @@ static int fts5ApiColumnLocale( rc = SQLITE_RANGE; }else if( pConfig->abUnindexed[iCol]==0 - && pConfig->eContent!=FTS5_CONTENT_NONE + && 0==fts5IsContentless((Fts5FullTable*)pCsr->base.pVtab, 1) && pConfig->bLocale ){ rc = fts5SeekCursor(pCsr, 0); if( rc==SQLITE_OK ){ - /* Load the value into pVal. pVal is a locale/text pair iff: - ** - ** 1) It is an SQLITE_BLOB, and - ** 2) Either the subtype is FTS5_LOCALE_SUBTYPE, or else the - ** value was loaded from an FTS5_CONTENT_NORMAL table, and - ** 3) It does not begin with an 0x00 byte. - */ - sqlite3_value *pVal = sqlite3_column_value(pCsr->pStmt, iCol+1); - if( sqlite3_value_type(pVal)==SQLITE_BLOB ){ - const u8 *pBlob = (const u8*)sqlite3_value_blob(pVal); - int nBlob = sqlite3_value_bytes(pVal); - if( pConfig->eContent==FTS5_CONTENT_EXTERNAL ){ - const int SZHDR = sizeof(FTS5_LOCALE_HEADER)-1; - if( nBlobpStmt, iCol, &zDummy, &nDummy); + if( rc==SQLITE_OK ){ + *pzLocale = pConfig->t.pLocale; + *pnLocale = pConfig->t.nLocale; } + sqlite3Fts5ClearLocale(pConfig); } } @@ -257395,57 +258262,6 @@ static int fts5PoslistBlob(sqlite3_context *pCtx, Fts5Cursor *pCsr){ return rc; } -/* -** Value pVal was read from column iCol of the FTS5 table. This function -** returns it to the owner of pCtx via a call to an sqlite3_result_xxx() -** function. This function deals with the same cases as -** sqlite3Fts5ExtractText(): -** -** 1) Ordinary values. These can be returned using sqlite3_result_value(). -** -** 2) Blobs from fts5_locale(). The text is extracted from these and -** returned via sqlite3_result_text(). The locale is discarded. -*/ -static void fts5ExtractValueFromColumn( - sqlite3_context *pCtx, - Fts5Config *pConfig, - int iCol, - sqlite3_value *pVal -){ - assert( pConfig->eContent!=FTS5_CONTENT_NONE ); - - if( pConfig->bLocale - && sqlite3_value_type(pVal)==SQLITE_BLOB - && pConfig->abUnindexed[iCol]==0 - ){ - const int SZHDR = sizeof(FTS5_LOCALE_HEADER)-1; - const u8 *pBlob = sqlite3_value_blob(pVal); - int nBlob = sqlite3_value_bytes(pVal); - int ii; - - if( pConfig->eContent==FTS5_CONTENT_EXTERNAL ){ - if( nBlobbContentlessDelete ){ - fts5ResultError(pCtx, "cannot UPDATE a subset of " - "columns on fts5 contentless-delete table: %s", pConfig->zName - ); - } - }else if( bNochange==0 || pConfig->eContent!=FTS5_CONTENT_NORMAL ){ + if( !sqlite3_vtab_nochange(pCtx) && pConfig->eContent!=FTS5_CONTENT_NONE ){ pConfig->pzErrmsg = &pTab->p.base.zErrMsg; rc = fts5SeekCursor(pCsr, 1); if( rc==SQLITE_OK ){ sqlite3_value *pVal = sqlite3_column_value(pCsr->pStmt, iCol+1); - fts5ExtractValueFromColumn(pCtx, pConfig, iCol, pVal); + if( pConfig->bLocale + && pConfig->eContent==FTS5_CONTENT_EXTERNAL + && sqlite3Fts5IsLocaleValue(pConfig, pVal) + ){ + const char *z = 0; + int n = 0; + rc = fts5TextFromStmt(pConfig, pCsr->pStmt, iCol, &z, &n); + if( rc==SQLITE_OK ){ + sqlite3_result_text(pCtx, z, n, SQLITE_TRANSIENT); + } + sqlite3Fts5ClearLocale(pConfig); + }else{ + sqlite3_result_value(pCtx, pVal); + } } + pConfig->pzErrmsg = 0; } } @@ -258026,7 +258847,7 @@ static void fts5SourceIdFunc( ){ assert( nArg==0 ); UNUSED_PARAM2(nArg, apUnused); - sqlite3_result_text(pCtx, "fts5: 2024-12-20 19:37:41 b40cd7395c44b1f2d019d8e809e03de0e083c93693322a72ddb250a85640528f", -1, SQLITE_TRANSIENT); + sqlite3_result_text(pCtx, "fts5: 2024-11-15 19:25:39 ed829bf2b069a48c644ae5706399dad7486e5abb87dc1225764038ac258ea4dc", -1, SQLITE_TRANSIENT); } /* @@ -258065,13 +258886,12 @@ static void fts5LocaleFunc( if( zLocale==0 || zLocale[0]=='\0' ){ sqlite3_result_text(pCtx, zText, nText, SQLITE_TRANSIENT); }else{ + Fts5Global *p = (Fts5Global*)sqlite3_user_data(pCtx); u8 *pBlob = 0; u8 *pCsr = 0; int nBlob = 0; - const int nHdr = 4; - assert( sizeof(FTS5_LOCALE_HEADER)==nHdr+1 ); - nBlob = nHdr + nLocale + 1 + nText; + nBlob = FTS5_LOCALE_HDR_SIZE + nLocale + 1 + nText; pBlob = (u8*)sqlite3_malloc(nBlob); if( pBlob==0 ){ sqlite3_result_error_nomem(pCtx); @@ -258079,8 +258899,8 @@ static void fts5LocaleFunc( } pCsr = pBlob; - memcpy(pCsr, FTS5_LOCALE_HEADER, nHdr); - pCsr += nHdr; + memcpy(pCsr, (const u8*)p->aLocaleHdr, FTS5_LOCALE_HDR_SIZE); + pCsr += FTS5_LOCALE_HDR_SIZE; memcpy(pCsr, zLocale, nLocale); pCsr += nLocale; (*pCsr++) = 0x00; @@ -258088,7 +258908,6 @@ static void fts5LocaleFunc( assert( &pCsr[nText]==&pBlob[nBlob] ); sqlite3_result_blob(pCtx, pBlob, nBlob, sqlite3_free); - sqlite3_result_subtype(pCtx, FTS5_LOCALE_SUBTYPE); } } @@ -258190,6 +259009,16 @@ static int fts5Init(sqlite3 *db){ pGlobal->api.xFindTokenizer = fts5FindTokenizer; pGlobal->api.xCreateTokenizer_v2 = fts5CreateTokenizer_v2; pGlobal->api.xFindTokenizer_v2 = fts5FindTokenizer_v2; + + /* Initialize pGlobal->aLocaleHdr[] to a 128-bit pseudo-random vector. + ** The constants below were generated randomly. */ + sqlite3_randomness(sizeof(pGlobal->aLocaleHdr), pGlobal->aLocaleHdr); + pGlobal->aLocaleHdr[0] ^= 0xF924976D; + pGlobal->aLocaleHdr[1] ^= 0x16596E13; + pGlobal->aLocaleHdr[2] ^= 0x7C80BEAA; + pGlobal->aLocaleHdr[3] ^= 0x9B03A67F; + assert( sizeof(pGlobal->aLocaleHdr)==16 ); + rc = sqlite3_create_module_v2(db, "fts5", &fts5Mod, p, fts5ModuleDestroy); if( rc==SQLITE_OK ) rc = sqlite3Fts5IndexInit(db); if( rc==SQLITE_OK ) rc = sqlite3Fts5ExprInit(pGlobal, db); @@ -258416,20 +259245,35 @@ static int fts5StorageGetStmt( case FTS5_STMT_INSERT_CONTENT: case FTS5_STMT_REPLACE_CONTENT: { - int nCol = pC->nCol + 1; - char *zBind; + char *zBind = 0; int i; - zBind = sqlite3_malloc64(1 + nCol*2); - if( zBind ){ - for(i=0; ieContent==FTS5_CONTENT_NORMAL + || pC->eContent==FTS5_CONTENT_UNINDEXED + ); + + /* Add bindings for the "c*" columns - those that store the actual + ** table content. If eContent==NORMAL, then there is one binding + ** for each column. Or, if eContent==UNINDEXED, then there are only + ** bindings for the UNINDEXED columns. */ + for(i=0; rc==SQLITE_OK && i<(pC->nCol+1); i++){ + if( !i || pC->eContent==FTS5_CONTENT_NORMAL || pC->abUnindexed[i-1] ){ + zBind = sqlite3Fts5Mprintf(&rc, "%z%s?%d", zBind, zBind?",":"",i+1); } - zBind[i*2-1] = '\0'; - zSql = sqlite3_mprintf(azStmt[eStmt], pC->zDb, pC->zName, zBind); - sqlite3_free(zBind); } + + /* Add bindings for any "l*" columns. Only non-UNINDEXED columns + ** require these. */ + if( pC->bLocale && pC->eContent==FTS5_CONTENT_NORMAL ){ + for(i=0; rc==SQLITE_OK && inCol; i++){ + if( pC->abUnindexed[i]==0 ){ + zBind = sqlite3Fts5Mprintf(&rc, "%z,?%d", zBind, pC->nCol+i+2); + } + } + } + + zSql = sqlite3Fts5Mprintf(&rc, azStmt[eStmt], pC->zDb, pC->zName,zBind); + sqlite3_free(zBind); break; } @@ -258615,9 +259459,11 @@ static int sqlite3Fts5StorageOpen( p->pIndex = pIndex; if( bCreate ){ - if( pConfig->eContent==FTS5_CONTENT_NORMAL ){ + if( pConfig->eContent==FTS5_CONTENT_NORMAL + || pConfig->eContent==FTS5_CONTENT_UNINDEXED + ){ int nDefn = 32 + pConfig->nCol*10; - char *zDefn = sqlite3_malloc64(32 + (sqlite3_int64)pConfig->nCol * 10); + char *zDefn = sqlite3_malloc64(32 + (sqlite3_int64)pConfig->nCol * 20); if( zDefn==0 ){ rc = SQLITE_NOMEM; }else{ @@ -258626,8 +259472,20 @@ static int sqlite3Fts5StorageOpen( sqlite3_snprintf(nDefn, zDefn, "id INTEGER PRIMARY KEY"); iOff = (int)strlen(zDefn); for(i=0; inCol; i++){ - sqlite3_snprintf(nDefn-iOff, &zDefn[iOff], ", c%d", i); - iOff += (int)strlen(&zDefn[iOff]); + if( pConfig->eContent==FTS5_CONTENT_NORMAL + || pConfig->abUnindexed[i] + ){ + sqlite3_snprintf(nDefn-iOff, &zDefn[iOff], ", c%d", i); + iOff += (int)strlen(&zDefn[iOff]); + } + } + if( pConfig->bLocale ){ + for(i=0; inCol; i++){ + if( pConfig->abUnindexed[i]==0 ){ + sqlite3_snprintf(nDefn-iOff, &zDefn[iOff], ", l%d", i); + iOff += (int)strlen(&zDefn[iOff]); + } + } } rc = sqlite3Fts5CreateTable(pConfig, "content", zDefn, 0, pzErr); } @@ -258780,7 +259638,8 @@ static int fts5StorageDeleteFromIndex( sqlite3_value *pVal = 0; const char *pText = 0; int nText = 0; - int bReset = 0; + const char *pLoc = 0; + int nLoc = 0; assert( pSeek==0 || apVal==0 ); assert( pSeek!=0 || apVal!=0 ); @@ -258790,10 +259649,19 @@ static int fts5StorageDeleteFromIndex( pVal = apVal[iCol-1]; } - rc = sqlite3Fts5ExtractText( - pConfig, pVal, pSeek!=0, &bReset, &pText, &nText - ); + if( pConfig->bLocale && sqlite3Fts5IsLocaleValue(pConfig, pVal) ){ + rc = sqlite3Fts5DecodeLocaleValue(pVal, &pText, &nText, &pLoc, &nLoc); + }else{ + pText = (const char*)sqlite3_value_text(pVal); + nText = sqlite3_value_bytes(pVal); + if( pConfig->bLocale && pSeek ){ + pLoc = (const char*)sqlite3_column_text(pSeek, iCol + pConfig->nCol); + nLoc = sqlite3_column_bytes(pSeek, iCol + pConfig->nCol); + } + } + if( rc==SQLITE_OK ){ + sqlite3Fts5SetLocale(pConfig, pLoc, nLoc); ctx.szCol = 0; rc = sqlite3Fts5Tokenize(pConfig, FTS5_TOKENIZE_DOCUMENT, pText, nText, (void*)&ctx, fts5StorageInsertCallback @@ -258802,7 +259670,7 @@ static int fts5StorageDeleteFromIndex( if( rc==SQLITE_OK && p->aTotalSize[iCol-1]<0 ){ rc = FTS5_CORRUPT; } - if( bReset ) sqlite3Fts5ClearLocale(pConfig); + sqlite3Fts5ClearLocale(pConfig); } } } @@ -258847,7 +259715,9 @@ static int fts5StorageContentlessDelete(Fts5Storage *p, i64 iDel){ int rc = SQLITE_OK; assert( p->pConfig->bContentlessDelete ); - assert( p->pConfig->eContent==FTS5_CONTENT_NONE ); + assert( p->pConfig->eContent==FTS5_CONTENT_NONE + || p->pConfig->eContent==FTS5_CONTENT_UNINDEXED + ); /* Look up the origin of the document in the %_docsize table. Store ** this in stack variable iOrigin. */ @@ -258971,6 +259841,12 @@ static int sqlite3Fts5StorageDelete( if( rc==SQLITE_OK ){ if( p->pConfig->bContentlessDelete ){ rc = fts5StorageContentlessDelete(p, iDel); + if( rc==SQLITE_OK + && bSaveRow + && p->pConfig->eContent==FTS5_CONTENT_UNINDEXED + ){ + rc = sqlite3Fts5StorageFindDeleteRow(p, iDel); + } }else{ rc = fts5StorageDeleteFromIndex(p, iDel, apVal, bSaveRow); } @@ -258987,7 +259863,9 @@ static int sqlite3Fts5StorageDelete( } /* Delete the %_content record */ - if( pConfig->eContent==FTS5_CONTENT_NORMAL ){ + if( pConfig->eContent==FTS5_CONTENT_NORMAL + || pConfig->eContent==FTS5_CONTENT_UNINDEXED + ){ if( rc==SQLITE_OK ){ rc = fts5StorageGetStmt(p, FTS5_STMT_DELETE_CONTENT, &pDel, 0); } @@ -259019,8 +259897,13 @@ static int sqlite3Fts5StorageDeleteAll(Fts5Storage *p){ ); if( rc==SQLITE_OK && pConfig->bColumnsize ){ rc = fts5ExecPrintf(pConfig->db, 0, - "DELETE FROM %Q.'%q_docsize';", - pConfig->zDb, pConfig->zName + "DELETE FROM %Q.'%q_docsize';", pConfig->zDb, pConfig->zName + ); + } + + if( rc==SQLITE_OK && pConfig->eContent==FTS5_CONTENT_UNINDEXED ){ + rc = fts5ExecPrintf(pConfig->db, 0, + "DELETE FROM %Q.'%q_content';", pConfig->zDb, pConfig->zName ); } @@ -259061,20 +259944,35 @@ static int sqlite3Fts5StorageRebuild(Fts5Storage *p){ for(ctx.iCol=0; rc==SQLITE_OK && ctx.iColnCol; ctx.iCol++){ ctx.szCol = 0; if( pConfig->abUnindexed[ctx.iCol]==0 ){ - int bReset = 0; /* True if tokenizer locale must be reset */ int nText = 0; /* Size of pText in bytes */ const char *pText = 0; /* Pointer to buffer containing text value */ + int nLoc = 0; /* Size of pLoc in bytes */ + const char *pLoc = 0; /* Pointer to buffer containing text value */ + sqlite3_value *pVal = sqlite3_column_value(pScan, ctx.iCol+1); + if( pConfig->eContent==FTS5_CONTENT_EXTERNAL + && sqlite3Fts5IsLocaleValue(pConfig, pVal) + ){ + rc = sqlite3Fts5DecodeLocaleValue(pVal, &pText, &nText, &pLoc, &nLoc); + }else{ + pText = (const char*)sqlite3_value_text(pVal); + nText = sqlite3_value_bytes(pVal); + if( pConfig->bLocale ){ + int iCol = ctx.iCol + 1 + pConfig->nCol; + pLoc = (const char*)sqlite3_column_text(pScan, iCol); + nLoc = sqlite3_column_bytes(pScan, iCol); + } + } - rc = sqlite3Fts5ExtractText(pConfig, pVal, 1, &bReset, &pText, &nText); if( rc==SQLITE_OK ){ + sqlite3Fts5SetLocale(pConfig, pLoc, nLoc); rc = sqlite3Fts5Tokenize(pConfig, FTS5_TOKENIZE_DOCUMENT, pText, nText, (void*)&ctx, fts5StorageInsertCallback ); - if( bReset ) sqlite3Fts5ClearLocale(pConfig); + sqlite3Fts5ClearLocale(pConfig); } } sqlite3Fts5BufferAppendVarint(&rc, &buf, ctx.szCol); @@ -259141,6 +260039,7 @@ static int fts5StorageNewRowid(Fts5Storage *p, i64 *piRowid){ */ static int sqlite3Fts5StorageContentInsert( Fts5Storage *p, + int bReplace, /* True to use REPLACE instead of INSERT */ sqlite3_value **apVal, i64 *piRowid ){ @@ -259148,7 +260047,9 @@ static int sqlite3Fts5StorageContentInsert( int rc = SQLITE_OK; /* Insert the new row into the %_content table. */ - if( pConfig->eContent!=FTS5_CONTENT_NORMAL ){ + if( pConfig->eContent!=FTS5_CONTENT_NORMAL + && pConfig->eContent!=FTS5_CONTENT_UNINDEXED + ){ if( sqlite3_value_type(apVal[1])==SQLITE_INTEGER ){ *piRowid = sqlite3_value_int64(apVal[1]); }else{ @@ -259157,33 +260058,52 @@ static int sqlite3Fts5StorageContentInsert( }else{ sqlite3_stmt *pInsert = 0; /* Statement to write %_content table */ int i; /* Counter variable */ - rc = fts5StorageGetStmt(p, FTS5_STMT_INSERT_CONTENT, &pInsert, 0); - for(i=1; rc==SQLITE_OK && i<=pConfig->nCol+1; i++){ - sqlite3_value *pVal = apVal[i]; - if( sqlite3_value_nochange(pVal) && p->pSavedRow ){ - /* This is an UPDATE statement, and column (i-2) was not modified. - ** Retrieve the value from Fts5Storage.pSavedRow instead. */ - pVal = sqlite3_column_value(p->pSavedRow, i-1); - }else if( sqlite3_value_subtype(pVal)==FTS5_LOCALE_SUBTYPE ){ - assert( pConfig->bLocale ); - assert( i>1 ); - if( pConfig->abUnindexed[i-2] ){ - /* At attempt to insert an fts5_locale() value into an UNINDEXED - ** column. Strip the locale away and just bind the text. */ + + assert( FTS5_STMT_INSERT_CONTENT+1==FTS5_STMT_REPLACE_CONTENT ); + assert( bReplace==0 || bReplace==1 ); + rc = fts5StorageGetStmt(p, FTS5_STMT_INSERT_CONTENT+bReplace, &pInsert, 0); + if( pInsert ) sqlite3_clear_bindings(pInsert); + + /* Bind the rowid value */ + sqlite3_bind_value(pInsert, 1, apVal[1]); + + /* Loop through values for user-defined columns. i=2 is the leftmost + ** user-defined column. As is column 1 of pSavedRow. */ + for(i=2; rc==SQLITE_OK && i<=pConfig->nCol+1; i++){ + int bUnindexed = pConfig->abUnindexed[i-2]; + if( pConfig->eContent==FTS5_CONTENT_NORMAL || bUnindexed ){ + sqlite3_value *pVal = apVal[i]; + + if( sqlite3_value_nochange(pVal) && p->pSavedRow ){ + /* This is an UPDATE statement, and user-defined column (i-2) was not + ** modified. Retrieve the value from Fts5Storage.pSavedRow. */ + pVal = sqlite3_column_value(p->pSavedRow, i-1); + if( pConfig->bLocale && bUnindexed==0 ){ + sqlite3_bind_value(pInsert, pConfig->nCol + i, + sqlite3_column_value(p->pSavedRow, pConfig->nCol + i - 1) + ); + } + }else if( sqlite3Fts5IsLocaleValue(pConfig, pVal) ){ const char *pText = 0; + const char *pLoc = 0; int nText = 0; - rc = sqlite3Fts5ExtractText(pConfig, pVal, 0, 0, &pText, &nText); - sqlite3_bind_text(pInsert, i, pText, nText, SQLITE_TRANSIENT); - }else{ - const u8 *pBlob = (const u8*)sqlite3_value_blob(pVal); - int nBlob = sqlite3_value_bytes(pVal); - assert( nBlob>4 ); - sqlite3_bind_blob(pInsert, i, pBlob+4, nBlob-4, SQLITE_TRANSIENT); + int nLoc = 0; + assert( pConfig->bLocale ); + + rc = sqlite3Fts5DecodeLocaleValue(pVal, &pText, &nText, &pLoc, &nLoc); + if( rc==SQLITE_OK ){ + sqlite3_bind_text(pInsert, i, pText, nText, SQLITE_TRANSIENT); + if( bUnindexed==0 ){ + int iLoc = pConfig->nCol + i; + sqlite3_bind_text(pInsert, iLoc, pLoc, nLoc, SQLITE_TRANSIENT); + } + } + + continue; } - continue; - } - rc = sqlite3_bind_value(pInsert, i, pVal); + rc = sqlite3_bind_value(pInsert, i, pVal); + } } if( rc==SQLITE_OK ){ sqlite3_step(pInsert); @@ -259218,23 +260138,37 @@ static int sqlite3Fts5StorageIndexInsert( for(ctx.iCol=0; rc==SQLITE_OK && ctx.iColnCol; ctx.iCol++){ ctx.szCol = 0; if( pConfig->abUnindexed[ctx.iCol]==0 ){ - int bReset = 0; /* True if tokenizer locale must be reset */ int nText = 0; /* Size of pText in bytes */ const char *pText = 0; /* Pointer to buffer containing text value */ + int nLoc = 0; /* Size of pText in bytes */ + const char *pLoc = 0; /* Pointer to buffer containing text value */ + sqlite3_value *pVal = apVal[ctx.iCol+2]; - int bDisk = 0; if( p->pSavedRow && sqlite3_value_nochange(pVal) ){ pVal = sqlite3_column_value(p->pSavedRow, ctx.iCol+1); - bDisk = 1; + if( pConfig->eContent==FTS5_CONTENT_NORMAL && pConfig->bLocale ){ + int iCol = ctx.iCol + 1 + pConfig->nCol; + pLoc = (const char*)sqlite3_column_text(p->pSavedRow, iCol); + nLoc = sqlite3_column_bytes(p->pSavedRow, iCol); + } + }else{ + pVal = apVal[ctx.iCol+2]; + } + + if( pConfig->bLocale && sqlite3Fts5IsLocaleValue(pConfig, pVal) ){ + rc = sqlite3Fts5DecodeLocaleValue(pVal, &pText, &nText, &pLoc, &nLoc); + }else{ + pText = (const char*)sqlite3_value_text(pVal); + nText = sqlite3_value_bytes(pVal); } - rc = sqlite3Fts5ExtractText(pConfig, pVal, bDisk, &bReset, &pText,&nText); + if( rc==SQLITE_OK ){ - assert( bReset==0 || pConfig->bLocale ); + sqlite3Fts5SetLocale(pConfig, pLoc, nLoc); rc = sqlite3Fts5Tokenize(pConfig, FTS5_TOKENIZE_DOCUMENT, pText, nText, (void*)&ctx, fts5StorageInsertCallback ); - if( bReset ) sqlite3Fts5ClearLocale(pConfig); + sqlite3Fts5ClearLocale(pConfig); } } sqlite3Fts5BufferAppendVarint(&rc, &buf, ctx.szCol); @@ -259399,37 +260333,61 @@ static int sqlite3Fts5StorageIntegrity(Fts5Storage *p, int iArg){ rc = sqlite3Fts5TermsetNew(&ctx.pTermset); } for(i=0; rc==SQLITE_OK && inCol; i++){ - if( pConfig->abUnindexed[i] ) continue; - ctx.iCol = i; - ctx.szCol = 0; - if( pConfig->eDetail==FTS5_DETAIL_COLUMNS ){ - rc = sqlite3Fts5TermsetNew(&ctx.pTermset); - } - if( rc==SQLITE_OK ){ - int bReset = 0; /* True if tokenizer locale must be reset */ - int nText = 0; /* Size of pText in bytes */ - const char *pText = 0; /* Pointer to buffer containing text value */ + if( pConfig->abUnindexed[i]==0 ){ + const char *pText = 0; + int nText = 0; + const char *pLoc = 0; + int nLoc = 0; + sqlite3_value *pVal = sqlite3_column_value(pScan, i+1); + + if( pConfig->eContent==FTS5_CONTENT_EXTERNAL + && sqlite3Fts5IsLocaleValue(pConfig, pVal) + ){ + rc = sqlite3Fts5DecodeLocaleValue( + pVal, &pText, &nText, &pLoc, &nLoc + ); + }else{ + if( pConfig->eContent==FTS5_CONTENT_NORMAL && pConfig->bLocale ){ + int iCol = i + 1 + pConfig->nCol; + pLoc = (const char*)sqlite3_column_text(pScan, iCol); + nLoc = sqlite3_column_bytes(pScan, iCol); + } + pText = (const char*)sqlite3_value_text(pVal); + nText = sqlite3_value_bytes(pVal); + } + + ctx.iCol = i; + ctx.szCol = 0; + + if( rc==SQLITE_OK && pConfig->eDetail==FTS5_DETAIL_COLUMNS ){ + rc = sqlite3Fts5TermsetNew(&ctx.pTermset); + } - rc = sqlite3Fts5ExtractText(pConfig, - sqlite3_column_value(pScan, i+1), 1, &bReset, &pText, &nText - ); if( rc==SQLITE_OK ){ + sqlite3Fts5SetLocale(pConfig, pLoc, nLoc); rc = sqlite3Fts5Tokenize(pConfig, FTS5_TOKENIZE_DOCUMENT, pText, nText, (void*)&ctx, fts5StorageIntegrityCallback ); - if( bReset ) sqlite3Fts5ClearLocale(pConfig); + sqlite3Fts5ClearLocale(pConfig); + } + + /* If this is not a columnsize=0 database, check that the number + ** of tokens in the value matches the aColSize[] value read from + ** the %_docsize table. */ + if( rc==SQLITE_OK + && pConfig->bColumnsize + && ctx.szCol!=aColSize[i] + ){ + rc = FTS5_CORRUPT; + } + aTotalSize[i] += ctx.szCol; + if( pConfig->eDetail==FTS5_DETAIL_COLUMNS ){ + sqlite3Fts5TermsetFree(ctx.pTermset); + ctx.pTermset = 0; } - } - if( rc==SQLITE_OK && pConfig->bColumnsize && ctx.szCol!=aColSize[i] ){ - rc = FTS5_CORRUPT; - } - aTotalSize[i] += ctx.szCol; - if( pConfig->eDetail==FTS5_DETAIL_COLUMNS ){ - sqlite3Fts5TermsetFree(ctx.pTermset); - ctx.pTermset = 0; } } sqlite3Fts5TermsetFree(ctx.pTermset); @@ -259855,7 +260813,7 @@ static const unsigned char sqlite3Utf8Trans1[] = { c = *(zIn++); \ if( c>=0xc0 ){ \ c = sqlite3Utf8Trans1[c-0xc0]; \ - while( zIn!=zTerm && (*zIn & 0xc0)==0x80 ){ \ + while( zIn=zEof ) return SQLITE_OK; READ_UTF8(zIn, zEof, iCode); - if( iCode==0 ) return SQLITE_OK; if( p->bFold ) iCode = sqlite3Fts5UnicodeFold(iCode, p->iFoldParam); }while( iCode==0 ); WRITE_UTF8(zOut, iCode); @@ -261042,8 +262000,11 @@ static int fts5TriTokenize( /* Read characters from the input up until the first non-diacritic */ do { iNext = zIn - (const unsigned char*)pText; + if( zIn>=zEof ){ + iCode = 0; + break; + } READ_UTF8(zIn, zEof, iCode); - if( iCode==0 ) break; if( p->bFold ) iCode = sqlite3Fts5UnicodeFold(iCode, p->iFoldParam); }while( iCode==0 ); @@ -263080,7 +264041,7 @@ static int sqlite3Fts5VocabInit(Fts5Global *pGlobal, sqlite3 *db){ } - +/* Here ends the fts5.c composite file. */ #endif /* !defined(SQLITE_CORE) || defined(SQLITE_ENABLE_FTS5) */ /************** End of fts5.c ************************************************/ @@ -263434,6 +264395,21268 @@ SQLITE_API int sqlite3_stmt_init( #endif /* !defined(SQLITE_CORE) || defined(SQLITE_ENABLE_STMTVTAB) */ /************** End of stmt.c ************************************************/ -/* Return the source-id for this library */ -SQLITE_API const char *sqlite3_sourceid(void){ return SQLITE_SOURCE_ID; } +/************** Begin file hct_pman.c ****************************************/ +/* +** 2022 April 10 +** +** The author disclaims copyright to this source code. In place of +** a legal notice, here is a blessing: +** +** May you do good and not evil. +** May you find forgiveness for yourself and forgive others. +** May you share freely, never taking more than you give. +** +************************************************************************* +*/ + + +/************** Include hctInt.h in the middle of hct_pman.c *****************/ +/************** Begin file hctInt.h ******************************************/ + +/* #include */ +/************** Include sqlite3hct.h in the middle of hctInt.h ***************/ +/************** Begin file sqlite3hct.h **************************************/ +/* +** 2023 May 16 +** +** The author disclaims copyright to this source code. In place of +** a legal notice, here is a blessing: +** +** May you do good and not evil. +** May you find forgiveness for yourself and forgive others. +** May you share freely, never taking more than you give. +** +************************************************************************* +*/ + + + +#ifndef SQLITE3HCT_H +#define SQLITE3HCT_H + +/* +** Make sure we can call this stuff from C++. +*/ +#if 0 +extern "C" { +#endif + +#define SQLITE_HCT_JOURNAL_HASHSIZE 16 + +/* +** Initialize the main database for replication. +*/ +SQLITE_API int sqlite3_hct_journal_init(sqlite3 *db); + +/* +** Write a transaction into the database. +*/ +SQLITE_API int sqlite3_hct_journal_write( + sqlite3 *db, /* Write to "main" db of this handle */ + sqlite3_int64 iCid, + const char *zSchema, + const void *pData, int nData, + sqlite3_int64 iSchemaCid +); + +SQLITE_API int sqlite3_hct_journal_truncate(sqlite3 *db, sqlite3_int64 iMinCid); + +/* +** Candidate values for second arg to sqlite3_hct_journal_setmode() +*/ +#define SQLITE_HCT_JOURNAL_MODE_FOLLOWER 0 +#define SQLITE_HCT_JOURNAL_MODE_LEADER 1 + +/* +** Query the LEADER/FOLLOWER setting of the db passed as the only argument. +*/ +SQLITE_API int sqlite3_hct_journal_mode(sqlite3 *db); + +/* +** Set the LEADER/FOLLOWER setting of the db passed as the first argument. +** Return SQLITE_OK if successful. Otherwise, return an SQLite error code +** and leave an English language error message (accessible using +** sqlite3_errmsg()) in the database handle. +*/ +SQLITE_API int sqlite3_hct_journal_setmode(sqlite3 *db, int eMode); + +/* +** Rollback transactions that follow the first hole in the journal. +*/ +SQLITE_API int sqlite3_hct_journal_rollback(sqlite3 *db, sqlite3_int64 iCid); + +/* +** Special values that may be passed as second argument to +** sqlite3_hct_journal_rollback(). +*/ +#define SQLITE_HCT_ROLLBACK_MAXIMUM 0 +#define SQLITE_HCT_ROLLBACK_PRESERVE -1 + +/* +** Set output variable (*piCid) to the CID of the newest available +** database snapshot. Return SQLITE_OK if successful, or an SQLite +** error code if something goes wrong. +*/ +SQLITE_API int sqlite3_hct_journal_snapshot(sqlite3 *db, sqlite3_int64 *piCid); + +/* +** Register a custom validation callback with the database handle. +*/ +SQLITE_API int sqlite3_hct_journal_hook( + sqlite3 *db, + void *pArg, + int(*xValidate)( + void *pCopyOfArg, + sqlite3_int64 iCid, + const char *zSchema, + const void *pData, int nData, + sqlite3_int64 iSchemaCid + ) +); + +/* +** Both arguments are assumed to point to SQLITE_HCT_JOURNAL_HASHSIZE +** byte buffers. This function updates the hash stored in buffer pHash +** based on the contents of buffer pData. +*/ +SQLITE_API void sqlite3_hct_journal_hash(void *pHash, const void *pData); + +/* +** It is assumed that buffer pHash points to a buffer +** SQLITE_HCT_JOURNAL_HASHSIZE bytes in size. This function populates this +** buffer with a hash based on the remaining arguments. +*/ +SQLITE_API void sqlite3_hct_journal_hashentry( + void *pHash, /* OUT: Hash of other arguments */ + sqlite3_int64 iCid, + const char *zSchema, + const void *pData, int nData, + sqlite3_int64 iSchemaCid +); + +SQLITE_API void sqlite3_hct_migrate_mode(sqlite3 *db, int bActivate); + +#if 0 +} +#endif +#endif /* SQLITE3HCT_H */ + +/************** End of sqlite3hct.h ******************************************/ +/************** Continuing where we left off in hctInt.h *********************/ + +typedef sqlite3_int64 i64; +typedef unsigned char u8; +typedef unsigned int u32; + +/* +** Primitives for atomic load and store. +*/ +#define HctAtomicStore(PTR,VAL) __atomic_store_n((PTR),(VAL), __ATOMIC_SEQ_CST) +#define HctAtomicLoad(PTR) __atomic_load_n((PTR), __ATOMIC_SEQ_CST) + +#define HctCASBool(PTR,OLD,NEW) \ + (int)__sync_bool_compare_and_swap((PTR),(OLD),(NEW)) + + +/* +*/ +typedef struct HctConfig HctConfig; +struct HctConfig { + int nDbFile; /* Number of files (hct_file.c) */ + int nPageSet; /* Used by hct_pman.c */ + int nPageScan; /* Used by hct_pman.c */ + int szLogChunk; /* Used by hctree.c */ + int nTryBeforeUnevict; + int bQuiescentIntegrityCheck; /* PRAGMA hct_quiescent_integrity_check */ + int pgsz; + sqlite3 *db; +}; + +#define HCT_TID_MASK ((((u64)0x00FFFFFF) << 32)|0xFFFFFFFF) +#define HCT_PGNO_MASK ((u64)0xFFFFFFFF) + +#define HCT_MAX_NDBFILE 128 + +#define HCT_DEFAULT_NDBFILE 1 +#define HCT_DEFAULT_NPAGESET 256 +#define HCT_DEFAULT_NTRYBEFOREUNEVICT 100 +#define HCT_DEFAULT_NPAGESCAN 1024 +#define HCT_DEFAULT_SZLOGCHUNK 16384 +#define HCT_DEFAULT_PAGESIZE 4096 + + + +/************** Include hctTMapInt.h in the middle of hctInt.h ***************/ +/************** Begin file hctTMapInt.h **************************************/ +/* +** 2021 February 24 +** +** The author disclaims copyright to this source code. In place of +** a legal notice, here is a blessing: +** +** May you do good and not evil. +** May you find forgiveness for yourself and forgive others. +** May you share freely, never taking more than you give. +** +************************************************************************* +** +** This header file describes the transaction map implementation. It +** serves two tasks: +** +** * Provides the transaction map itself, a mapping from 56-bit TID values +** to a combination of a CID value (also 56 bits) and some flags. +** +** * Provides the read-lock system required by readers to ensure that old +** database pages and other resources are not reused before they +** are guaranteed to be finished with them. +*/ + +/* +*/ + +/* #define HCT_TMAP_PAGESIZE 1024 */ + +#define HCT_TMAP_PGSZBITS 10 +#define HCT_TMAP_PAGESIZE (1 << HCT_TMAP_PGSZBITS) + +#define HCT_TMAP_ENTRYSLOT(iEntry) \ + (((iEntry) >> 3) + (((iEntry) & 0x07) << (HCT_TMAP_PGSZBITS-3))) +// #define HCT_TMAP_ENTRYSLOT(iEntry) (((iEntry) >> 3) + (((iEntry) & 0x07) << 10)) + +/* +** Transaction state - stored in the MSB of the 8-byte transaction map entry. +*/ +#define HCT_TMAP_WRITING (((u64)0x00) << 56) +#define HCT_TMAP_VALIDATING (((u64)0x01) << 56) +#define HCT_TMAP_ROLLBACK (((u64)0x02) << 56) +#define HCT_TMAP_COMMITTED (((u64)0x03) << 56) + +#define HCT_TMAP_STATE_MASK (((u64)0x07) << 56) +#define HCT_TMAP_CID_MASK ~(((u64)0xFF) << 56) + +/* +** There is a single object of this type for each distinct database +** opened within the process. All connections to said database have +** a pointer to the same HctTMapServer object. +*/ +typedef struct HctTMapServer HctTMapServer; + +/* +** Each separate database connection holds a handle of this type for +** the lifetime of the connection. Obtained and later released using +** functions: +** +** sqlite3HctTMapServerNew() +** sqlite3HctTMapServerFree() +*/ +typedef struct HctTMapClient HctTMapClient; + +/* +*/ +typedef struct HctTMap HctTMap; + +/* +** A transaction-map object. +** +** iMinTid: +** This, and all smaller TID values have been finalized (fully committed +** or rolled back). The client may not query the map for any TID values +** less than or equal to this one. +** +** iMinCid: +** This an all smaller CID values were committed +*/ +struct HctTMap { + /* Snapshot locking values */ +#if 0 + u64 iMinCid; /* This + all smaller CIDs fully committed */ + u64 iMinTid; /* This + all smaller TIDs fully committed */ +#endif + + /* Transaction map */ + u64 iFirstTid; /* TID corresponding to aaMap[0][0] */ + int nMap; /* Number of mapping pages in aaMap[] */ + u64 **aaMap; /* Array of u64[HCT_TMAP_PAGESIZE] arrays */ +}; + +/* +** Create or delete a tmap server object. +*/ +SQLITE_PRIVATE int sqlite3HctTMapServerNew(u64 iFirstTid, u64 iLastTid, HctTMapServer **pp); +SQLITE_PRIVATE void sqlite3HctTMapServerFree(HctTMapServer *p); + +/* +** Connect/disconnect a tmap client object. +*/ +SQLITE_PRIVATE int sqlite3HctTMapClientNew(HctTMapServer*, HctConfig*, HctTMapClient**); +SQLITE_PRIVATE void sqlite3HctTMapClientFree(HctTMapClient *pClient); + +/* +** Obtain, update or release a reference to a transaction map object. +*/ +SQLITE_PRIVATE int sqlite3HctTMapBegin(HctTMapClient *p, u64 iSnapshot, HctTMap **ppMap); +SQLITE_PRIVATE int sqlite3HctTMapUpdate(HctTMapClient *p, HctTMap **ppMap); +SQLITE_PRIVATE int sqlite3HctTMapEnd(HctTMapClient *p, u64 iCID); + +/* +** Return a TID value for which: +** +** 1. the transactions associated with it and all smaller TID values +** have been finalized (marked as committed or rolled back), and +** +** 2. the transactions associated with it and all smaller TID values +** are included in the snapshots accessed by all current and future +** readers. +** +** All physical and logical pages freed by transactions with TIDs equal to +** or smaller than the returned value may now be reused without disturbing +** current or future readers. +*/ +SQLITE_PRIVATE u64 sqlite3HctTMapSafeTID(HctTMapClient*); + +SQLITE_PRIVATE int sqlite3HctTMapNewTID(HctTMapClient *p, u64 iTid, HctTMap **ppMap); + +/* +** Return TID value T for all transactions with tid values less than or +** equal to T were finished (marked as committed or rolled back), last +** time sqlite3HctTMapBegin() was called. +*/ +SQLITE_PRIVATE u64 sqlite3HctTMapCommitedTID(HctTMapClient*); + +SQLITE_PRIVATE i64 sqlite3HctTMapStats(sqlite3 *db, int iStat, const char **pzStat); + +SQLITE_PRIVATE void sqlite3HctTMapScan(HctTMapClient*); + + +/* +** The following API is used when recovering a replication-enabled database. +** In that case, a new HctTMap object must be created during recovery to +** reflect the contents of the sqlite_hct_journal table. +*/ +SQLITE_PRIVATE int sqlite3HctTMapRecoverySet(HctTMapClient*, u64 iTid, u64 iCid); +SQLITE_PRIVATE void sqlite3HctTMapRecoveryFinish(HctTMapClient*, int rc); + +SQLITE_PRIVATE int sqlite3HctTMapServerSet(HctTMapServer *pServer, u64 iTid, u64 iCid); + + + + +/************** End of hctTMapInt.h ******************************************/ +/************** Continuing where we left off in hctInt.h *********************/ +/************** Include hctFileInt.h in the middle of hctInt.h ***************/ +/************** Begin file hctFileInt.h **************************************/ +/* +** 2023 January 6 +** +** The author disclaims copyright to this source code. In place of +** a legal notice, here is a blessing: +** +** May you do good and not evil. +** May you find forgiveness for yourself and forgive others. +** May you share freely, never taking more than you give. +** +************************************************************************* +** +*/ + +typedef struct HctFileServer HctFileServer; +typedef struct HctFile HctFile; + +SQLITE_PRIVATE HctFile *sqlite3HctFileOpen( + int *pRc, + const char *zFile, + HctConfig *pConfig +); +SQLITE_PRIVATE void sqlite3HctFileClose(HctFile *pFile); + +/* +** If the database has not yet been created on disk, create it. Or, if +** the db has already been created, then this function is a no-op. +*/ +SQLITE_PRIVATE int sqlite3HctFileNewDb(HctFile *pFile); + +/* +** Return true if the db has not yet been created on disk. Or false +** if it already has. +*/ +SQLITE_PRIVATE int sqlite3HctFileIsNewDb(HctFile *pFile); + +SQLITE_PRIVATE u32 sqlite3HctFileMaxpage(HctFile *pFile); + +typedef struct HctFilePage HctFilePage; +struct HctFilePage { + u8 *aOld; /* Current buffer, or NULL */ + u8 *aNew; /* New buffer (to be populated) */ + + /* Used internally by hct_file.c. Mostly... */ + u32 iPg; /* logical page number */ + u32 iNewPg; /* New physical page number */ + u32 iOldPg; /* Original physical page number */ + HctFile *pFile; +}; + +/* +** Allocate logical root page numbers. And free the same (required if the +** transaction is rolled back). +*/ +SQLITE_PRIVATE int sqlite3HctFileRootPgno(HctFile *pFile, u32 *piRoot); +SQLITE_PRIVATE int sqlite3HctFileRootFree(HctFile *pFile, u32 iRoot); +SQLITE_PRIVATE int sqlite3HctFileRootNew(HctFile *pFile, u32 iRoot, HctFilePage*); + + +SQLITE_PRIVATE int sqlite3HctFilePageNew(HctFile *pFile, HctFilePage *pPg); + +/* +** Obtain a read-only reference to logical page iPg. +*/ +SQLITE_PRIVATE int sqlite3HctFilePageGet(HctFile *pFile, u32 iPg, HctFilePage *pPg); + +/* +** If the page is not already writable (if pPg->aNew==0), make it writable. +** This involves allocating a new physical page and setting pPg->aNew +** to point to the buffer. +*/ +SQLITE_PRIVATE int sqlite3HctFilePageWrite(HctFilePage *pPg); + +/* +** This is a no-op if the page is not writable. +** +** If the page is already writable, reverse this so that will not be +** written out when PageRelease() or PageCommit() is called. This reclaims +** the physical page that was allocated by the earlier PageWrite() call +** and sets pPg->aNew to NULL. +*/ +SQLITE_PRIVATE void sqlite3HctFilePageUnwrite(HctFilePage *pPg); + +/* +** This is a no-op if the page is not writable. +** +** Commit the new version of the page to disk (i.e. set the page-map entry +** so that the logical page number now maps to the new version of the page +** in pPg->aNew). Then make pPg a non-writable reference to the logical +** page (so that pPg->aOld points to the new version of the page and +** pPg->aNew is NULL). +*/ +SQLITE_PRIVATE int sqlite3HctFilePageCommit(HctFilePage *pPg); + +/* +** Evict the page from the data structure - i.e. set the LOGICAL_EVICTED +** flag for it. This operation fails if the LOGICAL_EVICTED flag has +** already been set, or if the page has been written since it was read. +*/ +SQLITE_PRIVATE int sqlite3HctFilePageEvict(HctFilePage *pPg, int bIrrevocable); + +SQLITE_PRIVATE void sqlite3HctFilePageUnevict(HctFilePage *pPg); + +SQLITE_PRIVATE int sqlite3HctFilePageIsEvicted(HctFile *pFile, u32 iPgno); +SQLITE_PRIVATE int sqlite3HctFilePageIsFree(HctFile *pFile, u32 iPgno, int bLogical); + +/* +** Release a page reference obtained via an earlier call to +** sqlite3HctFilePageGet() or sqlite3HctFilePageNew(). After this call +** pPg->aOld is NULL. +** +** If the page is writable, it is committed (see sqlite3HctFilePageCommit) +** before the reference is released. +*/ +SQLITE_PRIVATE int sqlite3HctFilePageRelease(HctFilePage *pPg); + + +SQLITE_PRIVATE int sqlite3HctFilePageGetPhysical(HctFile *pFile, u32 iPg, HctFilePage *pPg); +SQLITE_PRIVATE int sqlite3HctFilePageNewPhysical(HctFile *pFile, HctFilePage *pPg); + +SQLITE_PRIVATE u64 sqlite3HctFileAllocateTransid(HctFile *pFile); +SQLITE_PRIVATE u64 sqlite3HctFileAllocateCID(HctFile *pFile, int); +SQLITE_PRIVATE u64 sqlite3HctFileGetSnapshotid(HctFile *pFile); + +SQLITE_PRIVATE void sqlite3HctFileSetCID(HctFile *pFile, u64); + +/* +** Increment the global write-count by nIncr, and return the final value. +*/ +SQLITE_PRIVATE u64 sqlite3HctFileIncrWriteCount(HctFile *pFile, int nIncr); + +SQLITE_PRIVATE HctTMapClient *sqlite3HctFileTMapClient(HctFile*); + +SQLITE_PRIVATE int sqlite3HctFilePgsz(HctFile *pFile); +SQLITE_PRIVATE int sqlite3HctFileVtabInit(sqlite3 *db); + +SQLITE_PRIVATE u64 sqlite3HctFileSafeTID(HctFile*); +SQLITE_PRIVATE u32 sqlite3HctFilePageRangeAlloc(HctFile*, int bLogical, int nPg); + +SQLITE_PRIVATE int sqlite3HctFileClearInUse(HctFilePage *pPg, int bReuseNow); +SQLITE_PRIVATE int sqlite3HctFileClearPhysInUse(HctFile *pFile, u32 pgno, int bReuseNow); + +SQLITE_PRIVATE void sqlite3HctFileDebugPrint(HctFile *pFile, const char *zFmt, ...); + +SQLITE_PRIVATE char *sqlite3HctFileLogFile(HctFile *pFile); +SQLITE_PRIVATE int sqlite3HctFileStartRecovery(HctFile *pFile, int iStage); +SQLITE_PRIVATE int sqlite3HctFileFinishRecovery(HctFile *pFile, int iStage, int rc); +SQLITE_PRIVATE int sqlite3HctFileRecoverFreelists( + HctFile *pFile, /* File to recover freelists for */ + int nRoot, i64 *aRoot, /* Array of root page numbers */ + int nPhys, i64 *aPhys /* Sorted array of phys. pages to preserve */ +); + +SQLITE_PRIVATE int sqlite3HctFileFindLogs(HctFile*, void*, int(*)(void*, const char*)); + +SQLITE_PRIVATE u32 sqlite3HctFilePageMapping(HctFile *pFile, u32 iLogical, int *pbEvicted); + +SQLITE_PRIVATE void sqlite3HctFileICArrays(HctFile*, u8**, u32*, u8**, u32*); +SQLITE_PRIVATE int sqlite3HctFileTreeFree(HctFile *, u32, int); +SQLITE_PRIVATE int sqlite3HctFilePageClearIsRoot(HctFile*, u32); +SQLITE_PRIVATE int sqlite3HctFilePageClearInUse(HctFile *pFile, u32 iPg, int bLogic); + +/************** Include hctPManInt.h in the middle of hctFileInt.h ***********/ +/************** Begin file hctPManInt.h **************************************/ +/* +** 2022 March 20 +** +** The author disclaims copyright to this source code. In place of +** a legal notice, here is a blessing: +** +** May you do good and not evil. +** May you find forgiveness for yourself and forgive others. +** May you share freely, never taking more than you give. +** +************************************************************************* +** +*/ + +/* +** There is a single object of this type for each distinct database opened +** by the process. Allocated and later freed using the following functions. +** +** sqlite3HctPManServerNew() +** sqlite3HctPManServerFree() +** +** Once an HctPManServer object has been created, it is configured with +** the set of free logical and physical pages, which the caller presumably +** discovers by scanning the page-map. +*/ +typedef struct HctPManServer HctPManServer; +typedef struct HctFile HctFile; + +SQLITE_PRIVATE HctPManServer *sqlite3HctPManServerNew( + int *pRc, /* IN/OUT: Error code */ + HctFileServer *pFileServer /* Associated file-server object */ +); +SQLITE_PRIVATE void sqlite3HctPManServerFree(HctPManServer*); + +/* +** This function is called multiple times while scanning the page-map +** during initialization. To load the initial set of free physical and +** logical pages. +*/ +SQLITE_PRIVATE void sqlite3HctPManServerInit( + int *pRc, HctPManServer*, u64 iTid, u32 iPg, int bLogical +); + +/* +** Each separate database connection holds a handle of this type for +** the lifetime of the connection. +*/ +typedef struct HctPManClient HctPManClient; + +SQLITE_PRIVATE HctPManClient *sqlite3HctPManClientNew( + int *pRc, + HctConfig*, + HctPManServer*, + HctFile* +); +SQLITE_PRIVATE void sqlite3HctPManClientFree(HctPManClient*); + +/* +** Allocate a new logical or physical page. +*/ +SQLITE_PRIVATE u32 sqlite3HctPManAllocPg( + int *pRc, /* IN/OUT: Error code */ + HctPManClient *p, /* page-manager client handle */ + HctFile *pFile, + int bLogical +); + +/* +** Mark a logical or physical page as no longer in use. Parameter iTid +** is the transaction-id associated with the transaction that freed the +** page. The page may be reused once all clients are accessing a +** snapshot that includes this transaction. In other words, once the +** snapshot id of all readers is greater than or equal to the commit id +** that maps to transaction id iTid. +** +** Sometimes this function is called with iTid==0, to indicate that the +** page in question may be reused immediately. +*/ +SQLITE_PRIVATE void sqlite3HctPManFreePg( + int *pRc, /* IN/OUT: Error code */ + HctPManClient *p, /* page-manager client handle */ + i64 iTid, /* Associated TID value */ + u32 iPg, /* Page number */ + int bLogical /* True for logical, false for physical */ +); + +SQLITE_PRIVATE void sqlite3HctPManClientHandoff(HctPManClient *p); + +SQLITE_PRIVATE void sqlite3HctPManServerReset(HctPManServer *pServer); + +SQLITE_PRIVATE int sqlite3HctPManVtabInit(sqlite3 *db); + +/* +** Mark an entire tree of logical and physical pages as free. The iTid +** parameter works just as it does for sqlite3HctPManFreePg(). +** +** SQLITE_OK is returned if successful, or an error code (e.g. SQLITE_NOMEM) +** otherwise. +*/ +SQLITE_PRIVATE int sqlite3HctPManFreeTree(HctPManClient *p, HctFile*, u32 iRoot, u64 iTid); + +SQLITE_PRIVATE int sqlite3HctPManServerInitRoot(int *pRc, HctPManServer*, u64, HctFile*, u32); +SQLITE_PRIVATE i64 sqlite3HctPManStats(sqlite3 *db, int iStat, const char **pzStat); + +/************** End of hctPManInt.h ******************************************/ +/************** Continuing where we left off in hctFileInt.h *****************/ +SQLITE_PRIVATE HctPManClient *sqlite3HctFilePManClient(HctFile*); + +SQLITE_PRIVATE int sqlite3HctFileRootArray(HctFile*, u32**, int*); + +/* Interface used by hct_stats virtual table */ +SQLITE_PRIVATE i64 sqlite3HctFileStats(sqlite3*, int, const char**); + +/* +** Return the total number of physical page allocations made during +** the entire lifetime of this object. +*/ +SQLITE_PRIVATE u64 sqlite3HctFileWriteCount(HctFile *pFile); + +/* +** Return the number of files used to store data within the database (the +** value to return for "PRAGMA hct_ndbfile"). Before returning, set output +** parameter *pbFixed if the database has been created and the number +** of files is therefore fixed, or clear it if the db has yet to be created. +*/ +SQLITE_PRIVATE int sqlite3HctFileNFile(HctFile *pFile, int *pbFixed); + +SQLITE_PRIVATE void sqlite3HctFileSetJrnlPtr(HctFile *pFile, void *pPtr, void(*xDel)(void*)); +SQLITE_PRIVATE void *sqlite3HctFileGetJrnlPtr(HctFile *pFile); + +SQLITE_PRIVATE int sqlite3HctIoerr(int rc); + + +/************** End of hctFileInt.h ******************************************/ +/************** Continuing where we left off in hctInt.h *********************/ + +#ifdef SQLITE_DEBUG +# define SQLITE_LOCKED_ERR(x,y) sqlite3HctLockedErr(x,y) +SQLITE_PRIVATE int sqlite3HctLockedErr(u32 pgno, const char *zReason); +#else +# define SQLITE_LOCKED_ERR(x,y) SQLITE_LOCKED +#endif + +#define HCT_TREE_SCHEMAOP_ROOT 3 + +/* +** Growable buffer type used for various things. +*/ +typedef struct HctBuffer HctBuffer; +struct HctBuffer { + u8 *aBuf; + int nBuf; + int nAlloc; +}; +SQLITE_PRIVATE int sqlite3HctBufferGrow(HctBuffer *pBuf, int nSize); +SQLITE_PRIVATE void sqlite3HctBufferFree(HctBuffer *pBuf); + + + +/************************************************************************* +** Interface to code in hct_tree.c +*/ +typedef struct HctTree HctTree; +typedef struct HctTreeCsr HctTreeCsr; + +SQLITE_PRIVATE int sqlite3HctTreeNew(HctTree **ppTree); +SQLITE_PRIVATE void sqlite3HctTreeFree(HctTree *pTree); + +SQLITE_PRIVATE int sqlite3HctTreeInsert(HctTreeCsr*, UnpackedRecord*, i64, int, const u8*,int); +SQLITE_PRIVATE int sqlite3HctTreeAppend(HctTreeCsr*, KeyInfo*, i64, int, const u8*,int); +SQLITE_PRIVATE int sqlite3HctTreeDelete(HctTreeCsr *pCsr); +SQLITE_PRIVATE int sqlite3HctTreeDeleteKey(HctTreeCsr *, UnpackedRecord *, i64, int,const u8*); + +/* +** These functions are used to open and close transactions and nested +** sub-transactions. +** +** The Begin() function is used to open transactions and sub-transactions. +** A successful call to Begin() ensures that there are at least iLevel +** nested transactions open. To open a top-level transaction, pass iLevel=1. +** To open a sub-transaction within the top-level transaction, iLevel=2. +** Passing iLevel=0 is a no-op. +** +** Release() is used to commit transactions and sub-transactions. A +** successful call to Release() ensures that there are at most iLevel +** nested transactions open. To commit a top-level transaction, pass iLevel=0. +** To commit all sub-transactions inside the main transaction, pass iLevel=1. +** +** Function lsm_rollback() is used to roll back transactions and +** sub-transactions. A successful call to lsm_rollback() restores the database +** to the state it was in when the iLevel'th nested sub-transaction (if any) +** was first opened. And then closes transactions to ensure that there are +** at most iLevel nested transactions open. Passing iLevel=0 rolls back and +** closes the top-level transaction. iLevel=1 also rolls back the top-level +** transaction, but leaves it open. iLevel=2 rolls back the sub-transaction +** nested directly inside the top-level transaction (and leaves it open). +*/ +SQLITE_PRIVATE int sqlite3HctTreeBegin(HctTree *pTree, int iStmt); +SQLITE_PRIVATE int sqlite3HctTreeRelease(HctTree *pTree, int iStmt); +SQLITE_PRIVATE int sqlite3HctTreeRollbackTo(HctTree *pTree, int iStmt); + +SQLITE_PRIVATE int sqlite3HctTreeClearOne(HctTree *pTree, u32 iRoot, i64 *pnRow); + +SQLITE_PRIVATE int sqlite3HctTreeCsrOpen(HctTree *pTree, u32 iRoot, HctTreeCsr **ppCsr); +SQLITE_PRIVATE int sqlite3HctTreeCsrClose(HctTreeCsr *pCsr); + +SQLITE_PRIVATE int sqlite3HctTreeCsrNext(HctTreeCsr *pCsr); +SQLITE_PRIVATE int sqlite3HctTreeCsrPrev(HctTreeCsr *pCsr); +SQLITE_PRIVATE int sqlite3HctTreeCsrEof(HctTreeCsr *pCsr); + +SQLITE_PRIVATE int sqlite3HctTreeCsrSeek(HctTreeCsr*, UnpackedRecord*, i64 iKey, int *pRes); +SQLITE_PRIVATE int sqlite3HctTreeCsrFirst(HctTreeCsr *pCsr); +SQLITE_PRIVATE int sqlite3HctTreeCsrLast(HctTreeCsr *pCsr); + +SQLITE_PRIVATE int sqlite3HctTreeCsrKey(HctTreeCsr *pCsr, i64 *piKey); +SQLITE_PRIVATE int sqlite3HctTreeCsrData(HctTreeCsr *pCsr, int *pnData, const u8 **paData); +SQLITE_PRIVATE int sqlite3HctTreeCsrIsDelete(HctTreeCsr *pCsr); + +SQLITE_PRIVATE void sqlite3HctTreeCsrPin(HctTreeCsr *pCsr); +SQLITE_PRIVATE void sqlite3HctTreeCsrUnpin(HctTreeCsr *pCsr); + +SQLITE_PRIVATE int sqlite3HctTreeCsrHasMoved(HctTreeCsr *pCsr); +SQLITE_PRIVATE int sqlite3HctTreeCsrRestore(HctTreeCsr *pCsr, int *pIsDifferent); +SQLITE_PRIVATE void sqlite3HctTreeCsrClear(HctTreeCsr *pCsr); + +SQLITE_PRIVATE u32 sqlite3HctTreeCsrRoot(HctTreeCsr *pCsr); + + +/* +** Iterate through non-empty tables/indexes within an HctTree structure. Used +** when flushing contents to disk. +** +** If parameter bSchemaOp is false, then no callback is issued for the table +** with root page number HCT_TREE_SCHEMAOP_ROOT. If bSchemaOp is non-zero, +** then HCT_TREE_SCHEMAOP_ROOT is treated like any other table. +*/ + +SQLITE_PRIVATE int sqlite3HctTreeForeach( + HctTree *pTree, + int bSchemOp, + void *pCtx, + int (*x)(void *, u32, KeyInfo*) +); +SQLITE_PRIVATE void sqlite3HctTreeClear(HctTree *pTree); + +SQLITE_PRIVATE void sqlite3HctTreeCsrIncrblob(HctTreeCsr *pCsr); +SQLITE_PRIVATE int sqlite3HctTreeCsrReseek(HctTreeCsr *pCsr, int*); + +SQLITE_PRIVATE int sqlite3HctTreeUpdateMeta(HctTree*, const u8*, int); + +/************************************************************************* +** Interface to code in hct_database.c +*/ +typedef struct HctDatabase HctDatabase; +typedef struct HctDbCsr HctDbCsr; + +typedef struct HctJournal HctJournal; + +SQLITE_PRIVATE HctDatabase *sqlite3HctDbFind(sqlite3*, int); +SQLITE_PRIVATE int sqlite3HctDetectJournals(sqlite3 *db); + +SQLITE_PRIVATE HctDatabase *sqlite3HctDbOpen(int*, const char *zFile, HctConfig*); +SQLITE_PRIVATE void sqlite3HctDbClose(HctDatabase *pDb); + +SQLITE_PRIVATE int sqlite3HctDbRootNew(HctDatabase *p, u32 *piRoot); +SQLITE_PRIVATE int sqlite3HctDbRootFree(HctDatabase *p, u32 iRoot); + +SQLITE_PRIVATE int sqlite3HctDbRootInit(HctDatabase *p, int bIndex, u32 iRoot); +SQLITE_PRIVATE void sqlite3HctDbRootPageInit(int bIndex, u8 *aPage, int szPage); +SQLITE_PRIVATE int sqlite3HctDbGetMeta(HctDatabase *p, u8 *aBuf, int nBuf); + +SQLITE_PRIVATE int sqlite3HctDbInsert( + HctDatabase *pDb, + u32 iRoot, + UnpackedRecord *pRec, i64 iKey, + int bDel, int nData, const u8 *aData, + int *pnRetry +); +SQLITE_PRIVATE int sqlite3HctDbInsertFlush(HctDatabase *pDb, int *pnRetry); +SQLITE_PRIVATE int sqlite3HctDbStartRead(HctDatabase*,HctJournal*); +SQLITE_PRIVATE int sqlite3HctDbStartWrite(HctDatabase*, u64*); +SQLITE_PRIVATE int sqlite3HctDbEndWrite(HctDatabase*, u64, int); +SQLITE_PRIVATE int sqlite3HctDbEndRead(HctDatabase*); +SQLITE_PRIVATE int sqlite3HctDbValidate(sqlite3*, HctDatabase*, u64 *piCid, int*); + +SQLITE_PRIVATE i64 sqlite3HctDbTid(HctDatabase *); + +SQLITE_PRIVATE void sqlite3HctDbRollbackMode(HctDatabase*,int); + +SQLITE_PRIVATE int sqlite3HctDbCsrOpen(HctDatabase*, struct KeyInfo*, u32 iRoot, HctDbCsr**); +SQLITE_PRIVATE void sqlite3HctDbCsrClose(HctDbCsr *pCsr); + +SQLITE_PRIVATE void sqlite3HctDbCsrNosnap(HctDbCsr *pCsr, int bNosnap); + +SQLITE_PRIVATE void sqlite3HctDbCsrDir(HctDbCsr*, int eDir); +SQLITE_PRIVATE int sqlite3HctDbCsrSeek(HctDbCsr*, UnpackedRecord*, i64 iKey, int *pRes); + +SQLITE_PRIVATE int sqlite3HctDbCsrEof(HctDbCsr*); +SQLITE_PRIVATE int sqlite3HctDbCsrFirst(HctDbCsr*); +SQLITE_PRIVATE int sqlite3HctDbCsrLast(HctDbCsr*); +SQLITE_PRIVATE int sqlite3HctDbCsrNext(HctDbCsr*); +SQLITE_PRIVATE int sqlite3HctDbCsrPrev(HctDbCsr*); +SQLITE_PRIVATE void sqlite3HctDbCsrClear(HctDbCsr*); + +SQLITE_PRIVATE void sqlite3HctDbCsrKey(HctDbCsr*, i64 *piKey); +SQLITE_PRIVATE int sqlite3HctDbCsrData(HctDbCsr *pCsr, int *pnData, const u8 **paData); +SQLITE_PRIVATE int sqlite3HctDbCsrLoadAndDecode(HctDbCsr *pCsr, UnpackedRecord **ppRec); + +SQLITE_PRIVATE int sqlite3HctDbIsIndex(HctDatabase *pDb, u32 iRoot, int *pbIndex); + +SQLITE_PRIVATE int sqlite3HctDbStartRecovery(HctDatabase *pDb, int iStage); +SQLITE_PRIVATE int sqlite3HctDbFinishRecovery(HctDatabase *db, int iStage, int rc); +SQLITE_PRIVATE void sqlite3HctDbRecoverTid(HctDatabase *db, u64 iTid); + +SQLITE_PRIVATE char *sqlite3HctDbLogFile(HctDatabase*); + +SQLITE_PRIVATE i64 sqlite3HctDbNCasFail(HctDatabase*); + +SQLITE_PRIVATE char *sqlite3HctDbIntegrityCheck(HctDatabase*, u32 *aRoot,Mem*,int nRoot, int*); +SQLITE_PRIVATE i64 sqlite3HctDbStats(sqlite3 *db, int iStat, const char **pzStat); + +SQLITE_PRIVATE int sqlite3HctDbCsrRollbackSeek(HctDbCsr*, UnpackedRecord*, i64, int *pOp); + +SQLITE_PRIVATE void sqlite3HctDbSetSavePhysical( + HctDatabase *pDb, + int (*xSave)(void*, i64 iPhys), + void *pSave +); + +SQLITE_PRIVATE char *sqlite3HctDbRecordToText(sqlite3 *db, const u8 *aRec, int nRec); + +SQLITE_PRIVATE void sqlite3HctDbTMapScan(HctDatabase *pDb); + +SQLITE_PRIVATE void sqlite3HctDbTransIsConcurrent(HctDatabase *pDb, int bConcurrent); + +SQLITE_PRIVATE HctFile *sqlite3HctDbFile(HctDatabase *pDb); + +SQLITE_PRIVATE int sqlite3HctDbWalkTree( + HctFile *pFile, /* File tree resides in */ + u32 iRoot, /* Root page of tree */ + int (*x)(void*, u32, u32), /* Callback function */ + void *pCtx /* First argument to pass to x() */ +); + +SQLITE_PRIVATE int sqlite3HctDbPagesize(HctDatabase *pDb); + +SQLITE_PRIVATE void sqlite3HctDbRecordTrim(UnpackedRecord *pRec); + +/* +** This function returns the current snapshot-id. It may only be called +** when a read transaction is active. +*/ +SQLITE_PRIVATE i64 sqlite3HctDbSnapshotId(HctDatabase *pDb); + +SQLITE_PRIVATE int sqlite3HctDbCsrFindLastWrite( + HctDbCsr *pCsr, /* Cursor to seek */ + UnpackedRecord *pRec, /* Key for index/without rowid tables */ + i64 iKey, /* Key for intkey tables */ + u64 *piCid /* Last CID to write to this key */ +); + +SQLITE_PRIVATE void sqlite3HctDbJrnlWriteCid(HctDatabase *pDb, u64 iVal); + +/************************************************************************* +** Interface to code in hct_file.c +*/ + +/************************************************************************* +** Interface to code in hct_record.c +*/ +SQLITE_PRIVATE int sqlite3HctSerializeRecord( + UnpackedRecord *pRec, /* Record to serialize */ + u8 **ppRec, /* OUT: buffer containing serialization */ + int *pnRec /* OUT: size of (*ppRec) in bytes */ +); + +/************************************************************************* +** Interface to code in hct_stats.c +*/ +SQLITE_PRIVATE int sqlite3HctStatsInit(sqlite3*); + +/************************************************************************* +** Utility functions: +*/ +SQLITE_PRIVATE void *sqlite3HctMalloc(int *pRc, i64 nByte); + +/************************************************************************* +** hctree.c: +**/ + +/************** Include hctJrnlInt.h in the middle of hctInt.h ***************/ +/************** Begin file hctJrnlInt.h **************************************/ +/* +** 2023 January 6 +** +** The author disclaims copyright to this source code. In place of +** a legal notice, here is a blessing: +** +** May you do good and not evil. +** May you find forgiveness for yourself and forgive others. +** May you share freely, never taking more than you give. +** +************************************************************************* +** +*/ + +typedef struct HctJournal HctJournal; + +/* +** If schema pSchema contains the special tables sqlite_hct_journal and +** sqlite_hct_baseline, allocate a new HctJournal object, set (*pp) +** to point to it and return SQLITE_OK. Or, if neither table can be +** found, set (*pp) to NULL and return SQLITE_OK. +** +** If only one of the required tables is found (SQLITE_CORRUPT), or if an +** OOM error occurs (SQLITE_NOMEM), return an SQLite error code. The final +** value of (*pp) is NULL in this case. +*/ +SQLITE_PRIVATE int sqlite3HctJournalNewIf(Schema*, HctTree*, HctDatabase*, HctJournal **pp); + +SQLITE_PRIVATE void sqlite3HctJournalClose(HctJournal*); + + +SQLITE_PRIVATE int sqlite3HctJrnlLog( + HctJournal *pJrnl, + sqlite3 *db, + Schema *pSchema, + u64 iCid, + u64 iTid, + int *pbCustomValid +); + +/* +** This is called as part of stage 1 recovery (the bit after the upper layer +** has loaded the database schema). The recovery mutex is held, so the client +** has exclusive access to the database on disk. +*/ +SQLITE_PRIVATE int sqlite3HctJrnlRecovery(HctJournal *pJrnl, HctDatabase *pDb); + +SQLITE_PRIVATE int sqlite3HctJrnlSavePhysical(sqlite3 *db, HctJournal *pJrnl, + int (*xSave)(void*, i64 iPhys), void *pSave +); + +/* +** Register the hct_journal_entry() SQL user-function with the database +** handle. For decoding the "data" column of the sqlite_hct_journal table. +*/ +SQLITE_PRIVATE int sqlite3HctJrnlInit(sqlite3 *db); + +/* +** Return non-zero if (1) argument pJrnl is not NULL, and either (2a) argument +** iTable is the logical root page of either the journal or baseline table +** represented by pJrnl, or (2b) the connection is in follower mode. +** +** Before returning, set output variable (*pbNosnap) to non-zero if condition +** (2a) was true. To indicate that the table does not use snapshots - all +** committed rows are visible. +*/ +SQLITE_PRIVATE int sqlite3HctJournalIsReadonly(HctJournal *pJrnl, u64 iTable, int *pbNosnap); + +SQLITE_PRIVATE int sqlite3HctJrnlRollbackEntry(HctJournal *pJrnl, i64 iTid); + +SQLITE_PRIVATE int sqlite3HctJrnlWriteEmpty(HctJournal *Jrnl, u64 iCid, u64 iTid, sqlite3 *db); + +SQLITE_PRIVATE u64 sqlite3HctJrnlWriteTid(HctJournal *pJrnl, u64 *piCid); + +SQLITE_PRIVATE u64 sqlite3HctJournalSnapshot(HctJournal *pJrnl); + +SQLITE_PRIVATE void sqlite3HctJournalFixSchema(HctJournal *pJrnl, sqlite3*, void *pSchema); + +SQLITE_PRIVATE void sqlite3HctJournalSchemaVersion(HctJournal *pJrnl, u32 *pSchemaVersion); + +SQLITE_PRIVATE void sqlite3HctJrnlInvokeHook(HctJournal *pJrnl, sqlite3 *db); + +/************** End of hctJrnlInt.h ******************************************/ +/************** Continuing where we left off in hctInt.h *********************/ +SQLITE_PRIVATE HctJournal *sqlite3HctJrnlFind(sqlite3*); + +SQLITE_PRIVATE int sqlite3HctBtreeIsNewTable(Btree *pBt, u64 iRoot); +SQLITE_PRIVATE u64 sqlite3HctBtreeSnapshotId(Btree *pBt); + +SQLITE_PRIVATE i64 sqlite3HctMainStats(sqlite3 *db, int iStat, const char **pzStat); + + + +/************** End of hctInt.h **********************************************/ +/************** Continuing where we left off in hct_pman.c *******************/ + +typedef struct HctPManPageset HctPManPageset; +typedef struct HctPManTree HctPManTree; + +#define PAGESET_INIT_SIZE 1000 + +typedef struct HctPManFreePg HctPManFreePg; +typedef struct HctPManFreePgSet HctPManFreePgSet; + +struct HctPManFreePg { + i64 pgno; /* The free page number */ + i64 iTid; /* TID of transaction that freed page */ +}; + +struct HctPManFreePgSet { + HctPManFreePg *aPg; /* Page buffer */ + int nAlloc; /* Allocated size of aPg[] */ + int iFirst; /* Index of first entry in aPg[] */ + int nPg; /* Number of valid pages in aPg[] */ +}; + + + + +/****************************************************************/ + +/* +** A basket of free page ids - a pageset - is represented by an instance +** of the following type. +** +** nAlloc: +** Allocated size of aPg[] array, in entries (not bytes). +** +** nPg: +** Number of valid entries in aPg[]. +** +** aPg: +** Array of free logical or physical page ids. +** +** iMaxTid: +** When a page is freed, it is associated with a TID. Such that the page +** may be reused once it is guaranteed that all current and future readers +** include in their snapshots all transactions with TID values less than +** the associated TID. The maximum of all these values for pages in the +** page set is stored in this variable. +** +** pNext: +** Used to link the HctPManServer.apList[] lists together. +*/ +struct HctPManPageset { + i64 iMaxTid; /* Max associated TID of aPg[] entries */ + int nAlloc; /* Allocated size of aPg[] array */ + int nPg; /* Number of valid entries in aPg[] */ + u32 *aPg; /* Array of page numbers */ + HctPManPageset *pNext; /* Next in list */ +}; + +/* +** A tree of free logical and physical pages. +*/ +struct HctPManTree { + u32 iRoot; /* Logical root of free tree */ + i64 iTid; /* Associated TID value */ +}; + +/* +** Indexes into HctPManServer.apList[], HctPManClient.apAcc[] and +** HctPManClient.apUse[] arrays. +*/ +#define PAGESET_PHYSICAL 0 +#define PAGESET_LOGICAL 1 + +/* +** aList[]: +** aList[0].pHead is a pointer to the first element of a singly-linked +** list of pagesets containing free physical page ids. aList[0].pTail +** always points to the last element of this list. The list is sorted +** in order of HctPManPageset.iMaxTid values. +** +** aList[1] is similar, but for logical page ids. +** +** aTree[]: +** Array of tree structures to eventually walk and free +*/ +struct HctPManServer { + sqlite3_mutex *pMutex; /* Mutex to protect this object */ + HctFileServer *pFileServer; /* Associated file-server object */ + struct HctPManServerList { + HctPManPageset *pHead; + HctPManPageset *pTail; + } aList[2]; + + int nTree; + HctPManTree *aTree; +}; + +/* +** Event counters used by the hctstats virtual table. +*/ +typedef struct HctPManStats HctPManStats; +struct HctPManStats { + i64 nMutex; + i64 nMutexBlock; +}; + +/* +** apAcc[]: +** These two pagesets are used to accumulate physical (apAcc[0]) and +** logical (apAcc[1]) page ids as they are freed by the client. Once +** sufficient page ids have been accumulated the pageset will be handed +** to the server object. +** +** apUse[]: +** These two pagesets are guaranteed to contain page ids that can be +** reused immediately. For the client to use as it requires. +*/ +struct HctPManClient { + HctConfig *pConfig; + HctPManServer *pServer; + HctFile *pFile; + + HctPManFreePgSet aPgSet[2]; /* Free physical and logical pages */ + + HctPManStats stats; +}; + +static void hctPManMutexEnter(HctPManClient *pClient){ + sqlite3_mutex *pMutex = pClient->pServer->pMutex; + pClient->stats.nMutex++; + if( sqlite3_mutex_try(pMutex)!=SQLITE_OK ){ + pClient->stats.nMutexBlock++; + sqlite3_mutex_enter(pMutex); + } +} + + +#define ENTER_PMAN_MUTEX(pClient) hctPManMutexEnter(pClient) +#define LEAVE_PMAN_MUTEX(pClient) sqlite3_mutex_leave(pClient->pServer->pMutex) + +/* +** Utility malloc function for hct. Allocate nByte bytes of zeroed memory. +*/ +SQLITE_PRIVATE void *sqlite3HctMalloc(int *pRc, i64 nByte){ + void *pRet = 0; + assert( nByte!=0 ); + if( *pRc==SQLITE_OK ){ + pRet = sqlite3MallocZero(nByte); + if( pRet==0 ){ + *pRc = SQLITE_NOMEM_BKPT; + } + } + return pRet; +} + + +/* +** Allocate and return a new HctPManServer object. +*/ +SQLITE_PRIVATE HctPManServer *sqlite3HctPManServerNew( + int *pRc, + HctFileServer *pFileServer +){ + int rc = *pRc; + HctPManServer *pRet = 0; + pRet = sqlite3HctMalloc(&rc, sizeof(*pRet)); + if( pRet ){ + pRet->pFileServer = pFileServer; + pRet->pMutex = sqlite3_mutex_alloc(SQLITE_MUTEX_RECURSIVE); + if( pRet->pMutex==0 ){ + rc = SQLITE_NOMEM_BKPT; + } + } + + if( rc!=SQLITE_OK ){ + sqlite3HctPManServerFree(pRet); + pRet = 0; + } + *pRc = rc; + return pRet; +} + + +SQLITE_PRIVATE void sqlite3HctPManServerReset(HctPManServer *pServer){ + int ii = 0; + for(ii=0; ii<2; ii++){ + HctPManPageset *pNext = pServer->aList[ii].pHead; + while( pNext ){ + HctPManPageset *pDel = pNext; + pNext = pNext->pNext; + sqlite3_free(pDel); + } + memset(&pServer->aList[ii], 0, sizeof(struct HctPManServerList)); + } +} + +/* +** Free an HctPManServer object allocated by an earlier call to +** sqlite3HctPManServerNew(). +*/ +SQLITE_PRIVATE void sqlite3HctPManServerFree(HctPManServer *pServer){ + if( pServer ){ + sqlite3HctPManServerReset(pServer); + sqlite3_mutex_free(pServer->pMutex); + sqlite3_free(pServer->aTree); + sqlite3_free(pServer); + } +} + +/* +** Allocate and return a pointer to a new pageset object with enough +** space for up to nAlloc page ids. +*/ +static HctPManPageset *hctPManPagesetNew(int *pRc, int nAlloc){ + const int nByte = sizeof(HctPManPageset) + nAlloc*sizeof(u32); + HctPManPageset *pRet = 0; + + pRet = (HctPManPageset*)sqlite3HctMalloc(pRc, nByte); + if( pRet ){ + pRet->aPg = (u32*)&pRet[1]; + pRet->nAlloc = nAlloc; + } + + return pRet; +} + +/* +** Add page iPg directly to the list of free pages managed by server pServer. +** iPg may be either a logical (if bLogical==1) or a physical (if bLogical==0) +** page id. It is available for reuse immediately. +** +** This function is not threadsafe. It is only called during initialization, +** when there is only one thread that may be accessing object pServer. +*/ +SQLITE_PRIVATE void sqlite3HctPManServerInit( + int *pRc, + HctPManServer *pServer, + u64 iTid, + u32 iPg, + int bLogical +){ + struct HctPManServerList *p = &pServer->aList[bLogical]; + assert( bLogical==0 || bLogical==1 ); + + if( p->pHead==0 || p->pHead->nPg==p->pHead->nAlloc ){ + HctPManPageset *pNew = hctPManPagesetNew(pRc, PAGESET_INIT_SIZE); + if( pNew==0 ) return; + pNew->pNext = p->pHead; + pNew->iMaxTid = iTid; + p->pHead = pNew; + if( p->pTail==0 ) p->pTail = pNew; + } + p->pHead->aPg[p->pHead->nPg++] = iPg; +} + +/* +** Allocate a new page-manager client. +*/ +SQLITE_PRIVATE HctPManClient *sqlite3HctPManClientNew( + int *pRc, /* IN/OUT: Error code */ + HctConfig *pConfig, /* Connection configuration object */ + HctPManServer *pServer, /* Page-manager server to connect to */ + HctFile *pFile /* File object */ +){ + HctPManClient *pClient = 0; + pClient = (HctPManClient*)sqlite3HctMalloc(pRc, sizeof(HctPManClient)); + if( pClient ){ + pClient->pConfig = pConfig; + pClient->pServer = pServer; + pClient->pFile = pFile; + } + return pClient; +} + +/* +** Hand off a page-set object to the server passed as the first argument. +*/ +static void hctPManServerHandoff( + HctPManServer *p, /* Server object */ + HctPManPageset *pPageSet, /* Pageset to pass to the server */ + int bLogical, /* True for logical, false for physical ids */ + int bUsable /* Page ids are immediately usable */ +){ + if( pPageSet ){ + struct HctPManServerList *pList = &p->aList[bLogical]; + if( bUsable ){ + pPageSet->pNext = pList->pHead; + pList->pHead = pPageSet; + if( pList->pTail==0 ) pList->pTail = pPageSet; + }else{ + pPageSet->pNext = 0; + if( pList->pTail==0 ){ + pList->pTail = pList->pHead = pPageSet; + }else{ + pList->pTail->pNext = pPageSet; + pList->pTail = pPageSet; + } + } + } +} + +/* +** +*/ +static int hctPManHandback( + HctPManClient *pClient, /* Client to hand pages back from */ + int bLogical, /* True for logical pages, false for phys. */ + int nPg /* Number of pages to hand back */ +){ + u64 iSafeTid = sqlite3HctFileSafeTID(pClient->pFile); + const int nPageSet = pClient->pConfig->nPageSet; + HctPManFreePgSet *pSet = &pClient->aPgSet[bLogical]; + int nRem = nPg; + int rc = SQLITE_OK; + + HctPManPageset *pList = 0; + + assert( bLogical==0 || bLogical==1 ); + assert( nPg<=pSet->nPg ); + + while( nRem>0 ){ + int ii = 0; + HctPManPageset *pNew = 0; + int nCopy = MIN(nRem, nPageSet); + + nRem -= nCopy; + pNew = hctPManPagesetNew(&rc, nCopy); + if( !pNew ) break; + for(ii=0; iiiFirst + ii) % pSet->nAlloc; + pNew->aPg[pNew->nPg++] = (u32)(pSet->aPg[iPg].pgno); + pNew->iMaxTid = pSet->aPg[iPg].iTid; + } + pSet->iFirst = (pSet->iFirst+nCopy) % pSet->nAlloc; + pSet->nPg -= nCopy; + + pNew->pNext = pList; + pList = pNew; + } + assert( pList || nPg==0 || rc!=SQLITE_OK ); + + ENTER_PMAN_MUTEX(pClient); + while( pList ){ + int bSafe = (pList->iMaxTid<=iSafeTid); + HctPManPageset *pNext = pList->pNext; + pList->pNext = 0; + hctPManServerHandoff(pClient->pServer, pList, bLogical, bSafe); + pList = pNext; + } + LEAVE_PMAN_MUTEX(pClient); + + return rc; +} + +/* +** Free a page-manager client. +*/ +SQLITE_PRIVATE void sqlite3HctPManClientFree(HctPManClient *pClient){ + if( pClient ){ + /* Return all pages to the server object */ + hctPManHandback(pClient, 0, pClient->aPgSet[0].nPg); + hctPManHandback(pClient, 1, pClient->aPgSet[1].nPg); + + /* Free allocations */ + sqlite3_free(pClient->aPgSet[0].aPg); + sqlite3_free(pClient->aPgSet[1].aPg); + sqlite3_free(pClient); + } +} + + +typedef struct FreeTreeCtx FreeTreeCtx; +struct FreeTreeCtx { + HctFile *pFile; + HctPManClient *pPManClient; +}; + +static int pmanFreeTreeCb(void *pCtx, u32 iLogic, u32 iPhys){ + FreeTreeCtx *p = (FreeTreeCtx*)pCtx; + int rc = SQLITE_OK; + + if( iLogic && !sqlite3HctFilePageIsFree(p->pFile, iLogic, 1) ){ + rc = sqlite3HctFilePageClearInUse(p->pFile, iLogic, 1); + sqlite3HctPManFreePg(&rc, p->pPManClient, 0, iLogic, 1); + } + if( iPhys && !sqlite3HctFilePageIsFree(p->pFile, iPhys, 0) && rc==SQLITE_OK ){ + rc = sqlite3HctFilePageClearInUse(p->pFile, iPhys, 0); + sqlite3HctPManFreePg(&rc, p->pPManClient, 0, iPhys, 0); + } + + return rc; +} + +static int hctPManFreeTreeNow( + HctPManClient *p, + HctFile *pFile, + u32 iRoot +){ + int rc = SQLITE_OK; + FreeTreeCtx ctx; + ctx.pPManClient = p; + ctx.pFile = pFile; + rc = sqlite3HctDbWalkTree(pFile, iRoot, pmanFreeTreeCb, (void*)&ctx); + if( rc==SQLITE_OK ){ + rc = sqlite3HctFilePageClearIsRoot(pFile, iRoot); + } + return rc; +} + +#if 0 +static void pman_debug( + HctPManClient *pClient, + const char *zOp, + int bLogical, + u32 iPg, + i64 iTid +){ + printf("pman: (%p) %s %s page %d - tid=%lld\n", pClient, + zOp, bLogical ? "LOGICAL" : "PHYSICAL", (int)iPg, iTid + ); + fflush(stdout); +} + +static void pman_debug_new_pageset( + HctPManPageset *pPageSet, + int bLogical, + u64 iSafeTid, + u64 iServerTid +){ + printf( + "pman: new %s pageset - safetid=%lld servertid=%lld\n", + bLogical ? "LOGICAL" : "PHYSICAL", iSafeTid, iServerTid + ); + fflush(stdout); +} +#else + +# define pman_debug(a,b,c,d,e) +# define pman_debug_new_pageset(a,b,c,d) + +#endif + +/* +** Ensure that the circular buffer identified by bLogical has at least +** nPg free slots in it. +*/ +static int hctPManMakeSpace( + HctPManClient *pClient, + int bLogical, + int nPg +){ + int rc = SQLITE_OK; + HctPManFreePgSet *pSet = &pClient->aPgSet[bLogical]; + + if( (pSet->nAlloc-pSet->nPg)nPg + nPg; + int nByte = nNew * sizeof(HctPManFreePg); + HctPManFreePg *aNew = (HctPManFreePg*)sqlite3_realloc(pSet->aPg, nByte); + + if( aNew==0 ){ + rc = SQLITE_NOMEM; + }else{ + pSet->aPg = aNew; + if( (pSet->iFirst + pSet->nPg)>pSet->nAlloc ){ + int nExtra = nNew - pSet->nAlloc; + int nStart = pSet->nPg - (pSet->nAlloc - pSet->iFirst); + + if( nExtra>=nStart ){ + memcpy(&aNew[pSet->nAlloc], aNew, nStart*sizeof(HctPManFreePg)); + }else{ + memcpy(&aNew[pSet->nAlloc], aNew, nExtra*sizeof(HctPManFreePg)); + memmove(aNew, &aNew[nExtra], (nStart-nExtra)*sizeof(HctPManFreePg)); + } + } + pSet->nAlloc = nNew; + } + } + + return rc; +} + +static void hctPManAddFree( + HctPManClient *pClient, + int bLogical, + i64 iPg, + i64 iTid +){ + HctPManFreePgSet *pSet = &pClient->aPgSet[bLogical]; + int iIdx = 0; + + assert( pSet->nPgnAlloc ); + if( iTid==0 ){ + if( pSet->iFirst==0 ) pSet->iFirst = pSet->nAlloc; + pSet->iFirst--; + iIdx = pSet->iFirst; + }else{ + iIdx = (pSet->iFirst + pSet->nPg) % pSet->nAlloc; + } + + pSet->nPg++; + pSet->aPg[iIdx].pgno = iPg; + pSet->aPg[iIdx].iTid = iTid; +} + + +/* +** Allocate a new logical or physical page. +*/ +SQLITE_PRIVATE u32 sqlite3HctPManAllocPg( + int *pRc, /* IN/OUT: Error code */ + HctPManClient *pClient, /* page-manager client handle */ + HctFile *pFile, + int bLogical +){ + HctPManServer *p = pClient->pServer; + u64 iSafeTid = sqlite3HctFileSafeTID(pFile); + HctPManFreePgSet *pSet = &pClient->aPgSet[bLogical]; + u32 iRoot = 0; + HctPManPageset *pPgset = 0; + int rc = SQLITE_OK; + + /* Check if the client has a usable page already. If so, return early. */ + if( pSet->nPg>0 && pSet->aPg[pSet->iFirst].iTid<=iSafeTid ){ + u32 pgno = pSet->aPg[pSet->iFirst].pgno; + + pman_debug(pClient, "alloc", bLogical, pgno, pSet->aPg[pSet->iFirst].iTid); + + pSet->iFirst = (pSet->iFirst+1) % pSet->nAlloc; + pSet->nPg--; + return pgno; + } + + do{ + iRoot = 0; + + /* Attempt to allocate a page from the page-manager server. */ + ENTER_PMAN_MUTEX(pClient); + if( p->nTree>0 && p->aTree[0].iTid<=iSafeTid ){ + /* A tree structure that can be traversed to find free pages. */ + iRoot = p->aTree[0].iRoot; + p->nTree--; + memmove(&p->aTree[0], &p->aTree[1], (p->nTree)*sizeof(HctPManTree)); + }else{ + struct HctPManServerList *pList = &p->aList[bLogical]; + if( pList->pHead && pList->pHead->iMaxTid<=iSafeTid ){ + /* A page-set object full of usable pages */ + pPgset = pList->pHead; + pList->pHead = pList->pHead->pNext; + if( pList->pHead==0 ) pList->pTail = 0; + } + } + LEAVE_PMAN_MUTEX(pClient); + + /* If a free tree structure was found, iterate through it, returning + ** all physical and logical pages to the server. Then retry the above. + */ + if( iRoot ){ + rc = hctPManFreeTreeNow(pClient, pFile, iRoot); + } + }while( iRoot ); + + if( rc==SQLITE_OK ){ + int ii; + if( pPgset ){ + pman_debug_new_pageset(pPgset, bLogical, iSafeTid, pPgset->iMaxTid); + rc = hctPManMakeSpace(pClient, bLogical, pPgset->nPg); + if( rc==SQLITE_OK ){ + for(ii=pPgset->nPg-1; ii>=0; ii--){ + hctPManAddFree(pClient, bLogical, pPgset->aPg[ii], 0); + } + } + }else{ + const int nPageSet = pClient->pConfig->nPageSet; + rc = hctPManMakeSpace(pClient, bLogical, nPageSet); + if( rc==SQLITE_OK ){ + u32 iPg = sqlite3HctFilePageRangeAlloc(pFile, bLogical, nPageSet); + pman_debug_new_pageset(0, bLogical, iSafeTid, -1); + for(ii=nPageSet-1; ii>=0; ii--){ + hctPManAddFree(pClient, bLogical, iPg+ii, 0); + } + } + } + } + sqlite3_free(pPgset); + + if( rc==SQLITE_OK ){ + assert( pSet->nPg>0 && pSet->aPg[pSet->iFirst].iTid<=iSafeTid ); + return sqlite3HctPManAllocPg(pRc, pClient, pFile, bLogical); + } + + /* An error has occurred. Return 0. */ + *pRc = rc; + return 0; +} + +/* +** Free a physical or logical page. +*/ +SQLITE_PRIVATE void sqlite3HctPManFreePg( + int *pRc, /* IN/OUT: Error code */ + HctPManClient *pClient, /* page-manager client handle */ + i64 iTid, /* Associated TID value */ + u32 iPg, /* Page number */ + int bLogical /* True for logical, false for physical */ +){ + int rc = SQLITE_OK; + pman_debug(pClient, "free", bLogical, iPg, iTid); + assert( iPg>0 ); + rc = hctPManMakeSpace(pClient, bLogical, 1); + if( rc==SQLITE_OK ){ + hctPManAddFree(pClient, bLogical, iPg, iTid); + } +} + +SQLITE_PRIVATE void sqlite3HctPManClientHandoff(HctPManClient *pClient){ + hctPManHandback(pClient, 0, pClient->aPgSet[0].nPg); + hctPManHandback(pClient, 1, pClient->aPgSet[1].nPg); +} + +SQLITE_PRIVATE int sqlite3HctPManFreeTree( + HctPManClient *p, + HctFile *pFile, + u32 iRoot, + u64 iTid +){ + int rc = SQLITE_OK; + if( iTid==0 ){ + rc = hctPManFreeTreeNow(p, pFile, iRoot); + }else{ + HctPManServer *pServer = p->pServer; + int nNew; + HctPManTree *aNew; + + ENTER_PMAN_MUTEX(p); + nNew = pServer->nTree + 1; + aNew = (HctPManTree*)sqlite3_realloc( + pServer->aTree, nNew*sizeof(HctPManTree) + ); + if( aNew==0 ){ + rc = SQLITE_NOMEM_BKPT; + }else{ + aNew[pServer->nTree].iRoot = iRoot; + aNew[pServer->nTree].iTid = iTid; + pServer->nTree++; + pServer->aTree = aNew; + } + LEAVE_PMAN_MUTEX(p); + } + return rc; +} + +typedef struct InitRootCtx InitRootCtx; +struct InitRootCtx { + HctFile *pFile; + HctPManServer *pServer; + u64 iTid; + u64 iRoot; /* Logical root page of this tree */ +}; + +static int pmanInitRootCb(void *pCtx, u32 iLogic, u32 iPhys){ + InitRootCtx *p = (InitRootCtx*)pCtx; + int rc = SQLITE_OK; + + if( iLogic && !sqlite3HctFilePageIsFree(p->pFile, iLogic, 1) ){ + rc = sqlite3HctFilePageClearInUse(p->pFile, iLogic, 1); + if( iLogiciRoot ){ + sqlite3HctPManServerInit(&rc, p->pServer, p->iTid, iLogic, 1); + } + } + if( iPhys && !sqlite3HctFilePageIsFree(p->pFile, iPhys, 0) && rc==SQLITE_OK ){ + rc = sqlite3HctFilePageClearInUse(p->pFile, iPhys, 0); + if( iPhysiRoot ){ + sqlite3HctPManServerInit(&rc, p->pServer, p->iTid, iPhys, 0); + } + } + + return rc; +} + +SQLITE_PRIVATE int sqlite3HctPManServerInitRoot( + int *pRc, + HctPManServer *pServer, + u64 iTid, + HctFile *pFile, + u32 iRoot +){ + int rc = SQLITE_OK; + InitRootCtx ctx; + ctx.pServer = pServer; + ctx.pFile = pFile; + ctx.iTid = iTid; + ctx.iRoot = iRoot; + rc = sqlite3HctDbWalkTree(pFile, iRoot, pmanInitRootCb, (void*)&ctx); + if( rc==SQLITE_OK ){ + rc = sqlite3HctFilePageClearIsRoot(pFile, iRoot); + } + return rc; +} + +/************************************************************************* +** Beginning of vtab implemetation. +*************************************************************************/ + +#define HCT_PMAN_SCHEMA \ +" CREATE TABLE hctpman(" \ +" type TEXT," \ +" location TEXT," \ +" pgno INTEGER," \ +" tid INTEGER" \ +" );" + +typedef struct pman_vtab pman_vtab; +typedef struct pman_cursor pman_cursor; +typedef struct HctPmanRow HctPmanRow; + +/* +** Virtual table type for "hctpman". +*/ +struct pman_vtab { + sqlite3_vtab base; /* Base class - must be first */ + sqlite3 *db; +}; + +/* +** Virtual cursor type for "hctpman". +*/ +struct pman_cursor { + sqlite3_vtab_cursor base; /* Base class - must be first */ + int nRow; + int iRow; + HctPmanRow *aRow; +}; + +/* +** Values to return for a single row of the hctpman table. +*/ +struct HctPmanRow { + u8 eType; /* HCT_PMAN_TYPE_* value */ + u8 eLoc; /* HCT_PMAN_LOC_* value */ + u32 pgno; /* Page number */ + i64 iTid; /* Associated TID */ +}; + +#define HCT_PMAN_TYPE_PHYSICAL 0 +#define HCT_PMAN_TYPE_LOGICAL 1 + +#define HCT_PMAN_LOC_USE 0 +#define HCT_PMAN_LOC_ACC 1 +#define HCT_PMAN_LOC_SERVER 2 + +/* +** This xConnect() method is invoked to create a new hctpman virtual table. +*/ +static int pmanConnect( + sqlite3 *db, + void *pAux, + int argc, const char *const*argv, + sqlite3_vtab **ppVtab, + char **pzErr +){ + pman_vtab *pNew; + int rc; + + rc = sqlite3_declare_vtab(db, HCT_PMAN_SCHEMA); + pNew = (pman_vtab*)sqlite3HctMalloc(&rc, sizeof(*pNew)); + if( pNew ){ + pNew->db = db; + } + + *ppVtab = (sqlite3_vtab*)pNew; + return rc; +} + +/* +** This method is the destructor for pman_vtab objects. +*/ +static int pmanDisconnect(sqlite3_vtab *pVtab){ + pman_vtab *p = (pman_vtab*)pVtab; + sqlite3_free(p); + return SQLITE_OK; +} + +/* +** Constructor for a new pman_cursor object. +*/ +static int pmanOpen(sqlite3_vtab *p, sqlite3_vtab_cursor **ppCursor){ + pman_cursor *pCur; + pCur = sqlite3MallocZero(sizeof(*pCur)); + if( pCur==0 ) return SQLITE_NOMEM; + *ppCursor = &pCur->base; + return SQLITE_OK; +} + +/* +** Destructor for a pman_cursor. +*/ +static int pmanClose(sqlite3_vtab_cursor *cur){ + pman_cursor *pCur = (pman_cursor*)cur; + sqlite3_free(pCur->aRow); + sqlite3_free(pCur); + return SQLITE_OK; +} + +/* +** Return TRUE if the cursor has been moved off of the last row of output. +*/ +static int pmanEof(sqlite3_vtab_cursor *cur){ + pman_cursor *pCur = (pman_cursor*)cur; + return pCur->iRow>=pCur->nRow; +} + +/* +** Advance a pman_cursor to its next row of output. +*/ +static int pmanNext(sqlite3_vtab_cursor *cur){ + pman_cursor *pCur = (pman_cursor*)cur; + pCur->iRow++; + return SQLITE_OK; +} + +/* +** Return values of columns for the row at which the pgmap_cursor +** is currently pointing. +*/ +static int pmanColumn( + sqlite3_vtab_cursor *cur, /* The cursor */ + sqlite3_context *ctx, /* First argument to sqlite3_result_...() */ + int i /* Which column to return */ +){ + const char *aType[] = {"physical", "logical"}; + const char *aLoc[] = {"use", "acc", "server"}; + pman_cursor *pCur = (pman_cursor*)cur; + + HctPmanRow *pRow = &pCur->aRow[pCur->iRow]; + switch( i ){ + case 0: { /* type */ + sqlite3_result_text(ctx, aType[pRow->eType], -1, SQLITE_STATIC); + break; + } + case 1: { /* location */ + sqlite3_result_text(ctx, aLoc[pRow->eLoc], -1, SQLITE_STATIC); + break; + } + case 2: { /* pgno */ + sqlite3_result_int64(ctx, pRow->pgno); + break; + } + case 3: { /* tid */ + sqlite3_result_int64(ctx, pRow->iTid); + break; + } + } + return SQLITE_OK; +} + +/* +** Return the rowid for the current row. In this implementation, the +** rowid is the same as the slotno value. +*/ +static int pmanRowid(sqlite3_vtab_cursor *cur, sqlite_int64 *pRowid){ + pman_cursor *pCur = (pman_cursor*)cur; + *pRowid = pCur->iRow+1; + return SQLITE_OK; +} + +static int hctPagesetSize(HctPManPageset *pPageset){ + return pPageset ? pPageset->nPg : 0; +} + +static void hctPagesetRows( + pman_cursor *pCur, + HctPManPageset *pPageset, + u8 eType, + u8 eLoc +){ + if( pPageset ){ + int ii; + for(ii=0; iinPg; ii++){ + HctPmanRow *pRow = &pCur->aRow[pCur->nRow++]; + pRow->eType = eType; + pRow->eLoc = eLoc; + pRow->pgno = pPageset->aPg[ii]; + pRow->iTid = pPageset->iMaxTid; + } + } +} + +/* +** This method is called to "rewind" the pman_cursor object back +** to the first row of output. This method is always called at least +** once prior to any call to pmanColumn() or pmanRowid() or +** pmanEof(). +*/ +static int pmanFilter( + sqlite3_vtab_cursor *pVtabCursor, + int idxNum, const char *idxStr, + int argc, sqlite3_value **argv +){ + pman_cursor *pCur = (pman_cursor*)pVtabCursor; + pman_vtab *pTab = (pman_vtab*)(pCur->base.pVtab); + HctPManClient *pClient = 0; + int nRow = 0; + int ii = 0; + HctPManPageset *pSet = 0; + int rc = SQLITE_OK; + + pCur->iRow = 0; + pCur->nRow = 0; + sqlite3_free(pCur->aRow); + pCur->aRow = 0; + + pClient = sqlite3HctFilePManClient( + sqlite3HctDbFile(sqlite3HctDbFind(pTab->db, 0)) + ); + + ENTER_PMAN_MUTEX(pClient); + for(ii=0; ii<2; ii++){ + nRow += pClient->aPgSet[ii].nPg; + for(pSet=pClient->pServer->aList[ii].pHead; pSet; pSet=pSet->pNext){ + nRow += hctPagesetSize(pSet); + } + } + pCur->aRow = sqlite3HctMalloc(&rc, sizeof(HctPmanRow) * nRow); + if( pCur->aRow ){ + for(ii=0; ii<2; ii++){ + int i2; + HctPManFreePgSet *pPgSet = &pClient->aPgSet[ii]; + for(i2=0; i2nPg; i2++){ + HctPmanRow *pRow = &pCur->aRow[pCur->nRow++]; + int idx = (pPgSet->iFirst + i2) % pPgSet->nAlloc; + pRow->eType = ii; + pRow->eLoc = HCT_PMAN_LOC_USE; + pRow->pgno = pPgSet->aPg[idx].pgno; + pRow->iTid = pPgSet->aPg[idx].iTid; + } + for(pSet=pClient->pServer->aList[ii].pHead; pSet; pSet=pSet->pNext){ + hctPagesetRows(pCur, pSet, ii, HCT_PMAN_LOC_SERVER); + } + } + } + LEAVE_PMAN_MUTEX(pClient); + + return rc; +} + +/* +** SQLite will invoke this method one or more times while planning a query +** that uses the virtual table. This routine needs to create +** a query plan for each invocation and compute an estimated cost for that +** plan. +*/ +static int pmanBestIndex( + sqlite3_vtab *tab, + sqlite3_index_info *pIdxInfo +){ + pIdxInfo->estimatedCost = (double)10; + pIdxInfo->estimatedRows = 10; + return SQLITE_OK; +} + +SQLITE_PRIVATE int sqlite3HctPManVtabInit(sqlite3 *db){ + static sqlite3_module pmanModule = { + /* iVersion */ 0, + /* xCreate */ 0, + /* xConnect */ pmanConnect, + /* xBestIndex */ pmanBestIndex, + /* xDisconnect */ pmanDisconnect, + /* xDestroy */ 0, + /* xOpen */ pmanOpen, + /* xClose */ pmanClose, + /* xFilter */ pmanFilter, + /* xNext */ pmanNext, + /* xEof */ pmanEof, + /* xColumn */ pmanColumn, + /* xRowid */ pmanRowid, + /* xUpdate */ 0, + /* xBegin */ 0, + /* xSync */ 0, + /* xCommit */ 0, + /* xRollback */ 0, + /* xFindMethod */ 0, + /* xRename */ 0, + /* xSavepoint */ 0, + /* xRelease */ 0, + /* xRollbackTo */ 0, + /* xShadowName */ 0 + }; + + return sqlite3_create_module(db, "hctpman", &pmanModule, 0); +} + + +SQLITE_PRIVATE i64 sqlite3HctPManStats(sqlite3 *db, int iStat, const char **pzStat){ + HctPManClient *pClient = 0; + i64 iVal = -1; + + pClient = sqlite3HctFilePManClient(sqlite3HctDbFile(sqlite3HctDbFind(db, 0))); + switch( iStat ){ + case 0: + *pzStat = "mutex_attempt"; + iVal = pClient->stats.nMutex; + break; + case 1: + *pzStat = "mutex_block"; + iVal = pClient->stats.nMutexBlock; + break; + default: + break; + } + + return iVal; +} + + + +/************** End of hct_pman.c ********************************************/ +/************** Begin file hctree.c ******************************************/ +/* +** 2004 April 6 +** +** The author disclaims copyright to this source code. In place of +** a legal notice, here is a blessing: +** +** May you do good and not evil. +** May you find forgiveness for yourself and forgive others. +** May you share freely, never taking more than you give. +** +************************************************************************* +** +*/ + +/* #include "sqliteInt.h" */ +/* #include "hctInt.h" */ + +/* #include */ +/* #include */ +/* #include */ +/* #include */ +#include + +#ifdef SQLITE_ENABLE_HCT + +typedef struct BtSchemaOp BtSchemaOp; + +typedef struct HBtree HBtree; +typedef struct HBtCursor HBtCursor; +typedef struct HctLogFile HctLogFile; +typedef struct HctMainStats HctMainStats; + + +/* +** An object to help with writing a log file. +*/ +struct HctLogFile { + int fd; /* File descriptor open on log file */ + char *zLogFile; /* Full path to log file */ + u8 *aBuf; /* malloc'd buffer for writing log file */ + int nBuf; /* Size of aBuf[] in bytes */ + i64 iFileOff; /* Current write offset in file */ + int iBufferOff; /* Current write offset in buffer */ +}; + +struct HctMainStats { + i64 nRetry; + i64 nRetryKey; + i64 nKeyOp; +}; + +/* +** aSchemaOp[]: +** Array of nSchemaOp BtSchemaOp structures. Each such structure represents +** a new table or index created by the current transaction. +** aSchemaOp[x].iSavepoint contains the open savepoint count when the table +** with root page aSchemaOp[x].pgnoRoot was created. The value +** HBtree.db->nSavepoint. +** +** eTrans: +** Set to SQLITE_TXN_NONE, READ or WRITE to indicate the type of +** transaction that is open. This is set by the following functions: +** +** sqlite3HctBtreeBeginTrans() +** sqlite3HctBtreeCommitPhaseTwo() +** sqlite3HctBtreeRollback() +*/ +struct HBtree { + BtreeMethods *pMethods; + + HctConfig config; /* Configuration for this connection */ + HctTree *pHctTree; /* In-memory part of database */ + HctDatabase *pHctDb; /* On-disk part of db, if any */ + void *pSchema; /* Memory from sqlite3HctBtreeSchema() */ + void(*xSchemaFree)(void*); /* Function to free pSchema */ + int eTrans; /* SQLITE_TXN_NONE, READ or WRITE */ + HBtCursor *pCsrList; /* List of all open cursors */ + + int nSchemaOp; + BtSchemaOp *aSchemaOp; + int nRollbackOp; + + int openFlags; + HctLogFile *pLog; /* Object for writing to log file */ + u32 iNextRoot; /* Next root page to allocate if pHctDb==0 */ + u32 aMeta[SQLITE_N_BTREE_META]; /* 16 database meta values */ + int eMetaState; + + int bRecoveryDone; +#if 0 + u64 iJrnlRoot; /* Root of sqlite_hct_journal */ + u64 iBaseRoot; /* Root of sqlite_hct_baseline */ +#endif + HctJournal *pHctJrnl; + + Pager *pFakePager; + HctMainStats stats; +}; + +/* +** Another candidate value for HBtree.eTrans. Must be different from +** SQLITE_TXN_NONE, SQLITE_TXN_READ and SQLITE_TXN_WRITE. +*/ +#define SQLITE_TXN_ERROR 4 + +/* +** Candidate values for HBtree.eMetaState. +*/ +#define HCT_METASTATE_NONE 0 +#define HCT_METASTATE_READ 1 + +/* +** A schema op. +*/ +struct BtSchemaOp { + int iSavepoint; + int eSchemaOp; + u32 pgnoRoot; +}; + +/* +** Candidate values for BtSchemaOp.eSchemaOp +*/ +#define HCT_SCHEMAOP_DROP 1 +#define HCT_SCHEMAOP_CREATE_INTKEY 2 +#define HCT_SCHEMAOP_CREATE_INDEX 3 + + +struct HBtCursor { + BtCursorMethods *pMethods; + + HBtree *pBtree; + HctTreeCsr *pHctTreeCsr; + HctDbCsr *pHctDbCsr; + int bUseTree; /* 1 if tree-csr is current entry, else 0 */ + int eDir; /* One of BTREE_DIR_NONE, FORWARD, REVERSE */ + + int isLast; /* Csr has not moved since BtreeLast() */ + + KeyInfo *pKeyInfo; /* For non-intkey tables */ + int errCode; + int wrFlag; /* Value of wrFlag when cursor opened */ + HBtCursor *pCsrNext; /* Next element in Btree.pCsrList list */ +}; + + +#ifdef SQLITE_TEST +SQLITE_PRIVATE BtShared *SQLITE_WSD sqlite3SharedCacheList = 0; +#endif + +#ifndef SQLITE_OMIT_SHARED_CACHE +/* +** Enable or disable the shared pager and schema features. +** +** This routine has no effect on existing database connections. +** The shared cache setting effects only future calls to +** sqlite3_open(), sqlite3_open16(), or sqlite3_open_v2(). +*/ +SQLITE_API int sqlite3_enable_shared_cache(int enable){ + sqlite3GlobalConfig.sharedCacheEnabled = enable; + return SQLITE_OK; +} +#endif + + +/* +** Return an reset the seek counter for a Btree object. +*/ +SQLITE_PRIVATE sqlite3_uint64 sqlite3HctBtreeSeekCount(Btree *pBt){ + assert( 0 ); + return 0; +} + +/* +** Clear the current cursor position. +*/ +SQLITE_PRIVATE void sqlite3HctBtreeClearCursor(BtCursor *pCur){ + HBtCursor *pCsr = (HBtCursor*)pCur; + sqlite3HctDbCsrClear(pCsr->pHctDbCsr); + sqlite3HctTreeCsrClear(pCsr->pHctTreeCsr); +} + +/* +** Determine whether or not a cursor has moved from the position where +** it was last placed, or has been invalidated for any other reason. +** Cursors can move when the row they are pointing at is deleted out +** from under them, for example. Cursor might also move if a btree +** is rebalanced. +** +** Calling this routine with a NULL cursor pointer returns false. +** +** Use the separate sqlite3HctBtreeCursorRestore() routine to restore a cursor +** back to where it ought to be if this routine returns true. +*/ +SQLITE_PRIVATE int sqlite3HctBtreeCursorHasMoved(BtCursor *pCursor){ + HBtCursor *const pCur = (HBtCursor*)pCursor; + return sqlite3HctTreeCsrHasMoved(pCur->pHctTreeCsr); +} + +/* +** Return a pointer to a fake BtCursor object that will always answer +** false to the sqlite3HctBtreeCursorHasMoved() routine above. The fake +** cursor returned must not be used with any other Btree interface. +*/ +#if 0 +SQLITE_PRIVATE BtCursor *sqlite3HctBtreeFakeValidCursor(void){ + static BtCursor csr = {0,0,0}; + return &csr; +} +#endif + +/* +** This routine restores a cursor back to its original position after it +** has been moved by some outside activity (such as a btree rebalance or +** a row having been deleted out from under the cursor). +** +** On success, the *pDifferentRow parameter is false if the cursor is left +** pointing at exactly the same row. *pDifferntRow is the row the cursor +** was pointing to has been deleted, forcing the cursor to point to some +** nearby row. +** +** This routine should only be called for a cursor that just returned +** TRUE from sqlite3HctBtreeCursorHasMoved(). +*/ +SQLITE_PRIVATE int sqlite3HctBtreeCursorRestore(BtCursor *pCursor, int *pDifferentRow){ + HBtCursor *const pCur = (HBtCursor*)pCursor; + return sqlite3HctTreeCsrRestore(pCur->pHctTreeCsr, pDifferentRow); +} + +/* +** Return the size of the database file in pages. If there is any kind of +** error, return ((unsigned int)-1). +*/ +SQLITE_PRIVATE Pgno sqlite3HctBtreeLastPage(Btree *p){ + return 0xFFFFFFFF; +} + +/* +** Provide flag hints to the cursor. +*/ +SQLITE_PRIVATE void sqlite3HctBtreeCursorHintFlags(BtCursor *pCur, unsigned x){ + /* no-op */ + assert( x==BTREE_SEEK_EQ || x==BTREE_BULKLOAD || x==0 ); +} + +typedef struct RecoverCsr RecoverCsr; +struct RecoverCsr { + HctDbCsr *pCsr; /* Cursor to read from database on disk */ + HctTreeCsr *pTreeCsr; /* Cursor to write to in-memory tree */ + UnpackedRecord *pRec; /* Used to seek both cursors */ + KeyInfo *pKeyInfo; +}; + +static void hctRecoverCursorClose(HBtree *p, RecoverCsr *pCsr){ + sqlite3HctDbCsrClose(pCsr->pCsr); + sqlite3HctTreeCsrClose(pCsr->pTreeCsr); + sqlite3DbFree(p->config.db, pCsr->pRec); + sqlite3KeyInfoUnref(pCsr->pKeyInfo); + memset(pCsr, 0, sizeof(RecoverCsr)); +} + +static int hctFindKeyInfo(HBtree *p, u32 iRoot, KeyInfo **ppKeyInfo){ + Schema *pSchema = (Schema*)p->pSchema; + int rc = SQLITE_OK; + HashElem *pE = 0; + KeyInfo *pKeyInfo = 0; + + /* Search the database schema for an index with root page iRoot. If + ** one is found, extract a KeyInfo reference. */ + for(pE=sqliteHashFirst(&pSchema->tblHash); pE; pE=sqliteHashNext(pE)){ + Index *pIdx = 0; + Table *pTab = (Table*)sqliteHashData(pE); + for(pIdx=pTab->pIndex; pIdx; pIdx=pIdx->pNext){ + if( pIdx->tnum==iRoot ){ + Parse sParse; + Parse *pSave = 0; + memset(&sParse, 0, sizeof(sParse)); + sParse.db = p->config.db; + pSave = sParse.db->pParse; + sParse.db->pParse = &sParse; + pKeyInfo = sqlite3KeyInfoOfIndex(&sParse, pIdx); + sParse.db->pParse = pSave; + rc = sParse.rc; + sqlite3DbFree(sParse.db, sParse.zErrMsg); + break; + } + } + if( pTab->tnum==iRoot ) break; + } + + *ppKeyInfo = pKeyInfo; + return rc; +} + +/* +** +*/ +static int hctRecoverCursorOpen( + HBtree *p, + u32 iRoot, + RecoverCsr *pCsr +){ + int rc = SQLITE_OK; + memset(pCsr, 0, sizeof(RecoverCsr)); + + rc = hctFindKeyInfo(p, iRoot, &pCsr->pKeyInfo); + assert( rc==SQLITE_OK || pCsr->pKeyInfo==0 ); + if( pCsr->pKeyInfo ){ + pCsr->pRec = sqlite3VdbeAllocUnpackedRecord(pCsr->pKeyInfo); + if( pCsr->pRec==0 ) rc = SQLITE_NOMEM_BKPT; + } + if( rc==SQLITE_OK ){ + rc = sqlite3HctDbCsrOpen(p->pHctDb, pCsr->pKeyInfo, iRoot, &pCsr->pCsr); + } + if( rc==SQLITE_OK ){ + rc = sqlite3HctTreeCsrOpen(p->pHctTree, iRoot, &pCsr->pTreeCsr); + } + + return rc; +} + +#if 1 +# define hctRecoverDebug(v,w,x,y,z) +#else +static void hctRecoverDebug( + RecoverCsr *p, + const char *zType, + i64 iKey, + const u8 *aKey, + int nKey +){ + if( p->pRec==0 ){ + printf("recover-%s: %lld\n", zType, iKey); + }else{ + char *zText = sqlite3HctDbRecordToText(0, aKey, nKey); + printf("recover-%s: %s\n", zType, zText); + sqlite3_free(zText); + } + fflush(stdout); +} +#endif + +/* +** This object is used to read a log file from disk. It is manipulated using +** the following API: +** +** hctLogReaderOpen() +** hctLogReaderNext() +** hctLogReaderClose() +** +** Log file format consists of an 8-byte TID value followed by one or more +** records. Each record is: +** +** * 32-bit root page number, +** * 32-bit size of key field (nKey), +** * if( nKey==0 ) 64-bit rowid key, +** * if( nKey!=0 ) nKey byte blob key. +*/ +typedef struct HctLogReader HctLogReader; +struct HctLogReader { + u8 *aFile; /* Buffer containing log file contents */ + int nFile; /* Size of aFile[] in bytes */ + int iFile; /* Offset of next record in aFile[] */ + + i64 iTid; /* TID value for log file */ + int bEof; /* True if reader has hit EOF */ + + /* Valid only if bEof==0 */ + i64 iRoot; /* Root page for current entry */ + i64 iKey; /* Integer key for current entry (aKey==0) */ + int nKey; /* Size of aKey[] buffer */ + u8 *aKey; /* Blob key for current entry */ +}; + +static void hctLogReaderNext(HctLogReader *pReader){ + u32 aInt[2]; + + if( (pReader->iFile + sizeof(aInt))>pReader->nFile ){ + pReader->bEof = 1; + }else{ + memcpy(aInt, &pReader->aFile[pReader->iFile], sizeof(aInt)); + pReader->iRoot = (i64)aInt[0]; + if( pReader->iRoot==0 ){ + pReader->bEof = 1; + }else{ + pReader->nKey = (int)aInt[1]; + pReader->iFile += sizeof(aInt); + if( pReader->nKey==0 ){ + pReader->aKey = 0; + if( pReader->iFile+sizeof(i64)>pReader->nFile ){ + pReader->bEof = 1; + }else{ + memcpy(&pReader->iKey, &pReader->aFile[pReader->iFile], sizeof(i64)); + pReader->iFile += sizeof(i64); + } + }else{ + pReader->iKey = 0; + if( pReader->iFile+pReader->nKey>pReader->nFile ){ + pReader->bEof = 1; + }else{ + pReader->aKey = &pReader->aFile[pReader->iFile]; + pReader->iFile += pReader->nKey; + } + } + } + } +} + +static void hctLogReaderClose(HctLogReader *pReader){ + sqlite3_free(pReader->aFile); + memset(pReader, 0, sizeof(*pReader)); +} + +static int hctLogReaderOpen(const char *zFile, HctLogReader *pReader){ + int rc = SQLITE_OK; + int fd = -1; + + memset(pReader, 0, sizeof(*pReader)); + fd = open(zFile, O_RDONLY); + if( fd<0 ){ + rc = sqlite3HctIoerr(SQLITE_IOERR); + }else{ + struct stat sStat; + + memset(&sStat, 0, sizeof(sStat)); + fstat(fd, &sStat); + pReader->nFile = (int)sStat.st_size; + pReader->aFile = (u8*)sqlite3HctMalloc(&rc, pReader->nFile + 8); + if( pReader->aFile ){ + int nRead = read(fd, pReader->aFile, pReader->nFile); + if( nRead!=pReader->nFile ){ + rc = sqlite3HctIoerr(SQLITE_IOERR); + }else{ + memcpy(&pReader->iTid, pReader->aFile, sizeof(i64)); + pReader->iFile = sizeof(i64); + if( pReader->iTid==0 ){ + pReader->bEof = 1; + }else{ + hctLogReaderNext(pReader); + } + } + } + + close(fd); + } + + return rc; +} + + +static int btreeFlushData(HBtree *p, int bRollback); + +static int hctRecoverOne(void *pCtx, const char *zFile){ + HBtree *p = (HBtree*)pCtx; + int rc = SQLITE_OK; + u32 iPrevRoot = 0; + RecoverCsr csr; + HctLogReader rdr; + + memset(&csr, 0, sizeof(csr)); + rc = hctLogReaderOpen(zFile, &rdr); + if( rc==SQLITE_OK && rdr.bEof==0 ){ + + assert( rdr.iTid!=0 ); + sqlite3HctDbRollbackMode(p->pHctDb, 2); + sqlite3HctDbRecoverTid(p->pHctDb, rdr.iTid); + for(/* no-op */; rdr.bEof==0; hctLogReaderNext(&rdr)){ + int op = 0; + + if( rdr.iRoot!=iPrevRoot ){ + iPrevRoot = rdr.iRoot; + hctRecoverCursorClose(p, &csr); + rc = hctRecoverCursorOpen(p, rdr.iRoot, &csr); + } + + if( rdr.nKey ){ + sqlite3VdbeRecordUnpack(csr.pKeyInfo, rdr.nKey, rdr.aKey, csr.pRec); + } + rc = sqlite3HctDbCsrRollbackSeek(csr.pCsr, csr.pRec, rdr.iKey, &op); + + if( rc==SQLITE_OK && op!=0 ){ + HctTreeCsr *pTCsr = csr.pTreeCsr; + if( op<0 ){ + /* rollback requires deleting the key */ + hctRecoverDebug(&csr, "delete", rdr.iKey, rdr.aKey, rdr.nKey); + rc = sqlite3HctTreeDeleteKey( + pTCsr, csr.pRec, rdr.iKey, rdr.nKey, rdr.aKey + ); + }else if( op>0 ){ + const u8 *aOld = 0; + int nOld = 0; + rc = sqlite3HctDbCsrData(csr.pCsr, &nOld, &aOld); + if( rc==SQLITE_OK ){ + hctRecoverDebug(&csr, "insert", rdr.iKey, aOld, nOld); + rc = sqlite3HctTreeInsert(pTCsr, csr.pRec, rdr.iKey, nOld, aOld, 0); + } + } + } + } + hctRecoverCursorClose(p, &csr); + + if( rc==SQLITE_OK ){ + rc = btreeFlushData(p, 0); + } + sqlite3HctDbRollbackMode(p->pHctDb, 0); + if( rc==SQLITE_OK && p->pHctJrnl ){ + rc = sqlite3HctJrnlRollbackEntry(p->pHctJrnl, rdr.iTid); + } + sqlite3HctDbRecoverTid(p->pHctDb, 0); + } + + if( rc==SQLITE_OK ){ + /* TODO!!! */ + unlink(zFile); + } + hctLogReaderClose(&rdr); + return rc; +} + +static int hctRecoverLogs(HBtree *p){ + HctFile *pFile = sqlite3HctDbFile(p->pHctDb); + return sqlite3HctFileFindLogs(pFile, (void*)p, hctRecoverOne); +} + + +/* +** Free a pLog object and close the associated log file handle. If parameter +** bUnlink is true, also unlink() the log file. +*/ +static void hctLogFileClose(HctLogFile *pLog, int bUnlink){ + if( pLog ){ + close(pLog->fd); + if( bUnlink ) unlink(pLog->zLogFile); + sqlite3_free(pLog->zLogFile); + sqlite3_free(pLog->aBuf); + sqlite3_free(pLog); + } +} + +/* +** Open a log file object. +*/ +static int hctLogFileOpen(char *zLogFile, int nBuf, HctLogFile **ppLog){ + int rc = SQLITE_OK; + HctLogFile *pLog; + + pLog = (HctLogFile*)sqlite3HctMalloc(&rc, sizeof(HctLogFile)); + if( pLog ){ + pLog->zLogFile = zLogFile; + pLog->fd = open(zLogFile, O_CREAT|O_RDWR, 0644); + if( pLog->fd<0 ){ + rc = SQLITE_CANTOPEN_BKPT; + }else{ + pLog->nBuf = nBuf; + pLog->aBuf = sqlite3HctMalloc(&rc, nBuf); + } + } + + if( rc!=SQLITE_OK ){ + hctLogFileClose(pLog, 0); + pLog = 0; + } + + *ppLog = pLog; + return rc; +} + +static int hctLogFileWrite(HctLogFile *pLog, const void *aData, int nData){ + int nRem = nData; + const u8 *aRem = (u8*)aData; + + assert( pLog->iBufferOff<=pLog->nBuf ); + while( 1 ){ + + int nCopy = MIN(pLog->nBuf - pLog->iBufferOff, nRem); + if( nCopy>0 ){ + memcpy(&pLog->aBuf[pLog->iBufferOff], aRem, nCopy); + pLog->iBufferOff += nCopy; + nRem -= nCopy; + if( nRem==0 ) break; + aRem += nCopy; + } + + if( write(pLog->fd, pLog->aBuf, pLog->nBuf)!=pLog->nBuf ){ + return sqlite3HctIoerr(SQLITE_IOERR_WRITE); + } + pLog->iFileOff += pLog->nBuf; + pLog->iBufferOff = 0; + } + + return SQLITE_OK; +} + + +static void hctLogFileRestart(HctLogFile *pLog){ + memset(pLog->aBuf, 0, 8); + lseek(pLog->fd, 0, SEEK_SET); + pLog->iFileOff = 0; + pLog->iBufferOff = 8; +} + + +static int hctLogFileWriteTid(HctLogFile *pLog, u64 iTid){ + lseek(pLog->fd, 0, SEEK_SET); + if( write(pLog->fd, &iTid, sizeof(iTid))!=sizeof(iTid) ){ + return sqlite3HctIoerr(SQLITE_IOERR_WRITE); + } + return SQLITE_OK; +} + +static int hctLogFileFinish(HctLogFile *pLog, u64 iTid){ + int rc = SQLITE_OK; + int bDone = 0; + if( pLog->iFileOff==0 ){ + bDone = 1; + memcpy(pLog->aBuf, &iTid, sizeof(iTid)); + } + if( rc==SQLITE_OK ){ + static const u8 aZero[8] = {0,0,0,0, 0,0,0,0}; + rc = hctLogFileWrite(pLog, aZero, sizeof(aZero)); + if( rc==SQLITE_OK ){ + assert( pLog->iBufferOff>0 ); + if( write(pLog->fd, pLog->aBuf, pLog->iBufferOff)!=pLog->iBufferOff ){ + rc = sqlite3HctIoerr(SQLITE_IOERR_WRITE); + } + } + } + if( bDone==0 && rc==SQLITE_OK ){ + rc = hctLogFileWriteTid(pLog, iTid); + } + return rc; +} + +static int btreeLogFileZero(HctLogFile *pLog){ + return hctLogFileWriteTid(pLog, 0); +} + + +/* +** Open a database file. +** +** zFilename is the name of the database file. If zFilename is NULL +** then an ephemeral database is created. The ephemeral database might +** be exclusively in memory, or it might use a disk-based memory cache. +** Either way, the ephemeral database will be automatically deleted +** when sqlite3HctBtreeClose() is called. +** +** If zFilename is ":memory:" then an in-memory database is created +** that is automatically destroyed when it is closed. +** +** The "flags" parameter is a bitmask that might contain bits like +** BTREE_OMIT_JOURNAL and/or BTREE_MEMORY. +** +** If the database is already opened in the same database connection +** and we are in shared cache mode, then the open will fail with an +** SQLITE_CONSTRAINT error. We cannot allow two or more BtShared +** objects in the same database connection since doing so will lead +** to problems with locking. +*/ +SQLITE_PRIVATE int sqlite3HctBtreeOpen( + sqlite3_vfs *pVfs, /* VFS to use for this b-tree */ + const char *zFilename, /* Name of the file containing the BTree database */ + sqlite3 *db, /* Associated database handle */ + Btree **ppBtree, /* Pointer to new Btree object written here */ + int flags, /* Options */ + int vfsFlags /* Flags passed through to sqlite3_vfs.xOpen() */ +){ + int rc = SQLITE_OK; + HBtree *pNew; + + assert( (flags & BTREE_SINGLE)==0 && zFilename && zFilename[0] ); + + pNew = (HBtree*)sqlite3_malloc(sizeof(HBtree)); + if( pNew ){ + memset(pNew, 0, sizeof(HBtree)); + pNew->iNextRoot = 2; + pNew->config.db = db; + pNew->openFlags = flags; + pNew->config.nDbFile = HCT_DEFAULT_NDBFILE; + pNew->config.nPageSet = HCT_DEFAULT_NPAGESET; + pNew->config.nTryBeforeUnevict = HCT_DEFAULT_NTRYBEFOREUNEVICT; + pNew->config.nPageScan = HCT_DEFAULT_NPAGESCAN; + pNew->config.szLogChunk = HCT_DEFAULT_SZLOGCHUNK; + pNew->config.pgsz = HCT_DEFAULT_PAGESIZE; + rc = sqlite3HctTreeNew(&pNew->pHctTree); + pNew->pFakePager = (Pager*)sqlite3HctMalloc(&rc, 4096); + }else{ + rc = SQLITE_NOMEM; + } + + if( rc==SQLITE_OK && zFilename && zFilename[0] ){ + pNew->pHctDb = sqlite3HctDbOpen(&rc, zFilename, &pNew->config); + } + + if( rc!=SQLITE_OK ){ + sqlite3HctBtreeClose((Btree*)pNew); + pNew = 0; + } + *ppBtree = (Btree*)pNew; + return rc; +} + +/* +** Close an open database and invalidate all cursors. +*/ +SQLITE_PRIVATE int sqlite3HctBtreeClose(Btree *pBt){ + HBtree *const p = (HBtree*)pBt; + if( p ){ + while(p->pCsrList){ + sqlite3HctBtreeCloseCursor((BtCursor*)p->pCsrList); + } + hctLogFileClose(p->pLog, 1); + sqlite3HctBtreeRollback((Btree*)p, SQLITE_OK, 0); + sqlite3HctBtreeCommit((Btree*)p); + if( p->xSchemaFree ){ + p->xSchemaFree(p->pSchema); + } + sqlite3_free(p->pSchema); + sqlite3HctJournalClose(p->pHctJrnl); + sqlite3HctTreeFree(p->pHctTree); + sqlite3HctDbClose(p->pHctDb); + sqlite3_free(p->aSchemaOp); + sqlite3_free(p->pFakePager); + sqlite3_free(p); + } + return SQLITE_OK; +} + +/* +** Change the "soft" limit on the number of pages in the cache. +** Unused and unmodified pages will be recycled when the number of +** pages in the cache exceeds this soft limit. But the size of the +** cache is allowed to grow larger than this limit if it contains +** dirty pages or pages still in active use. +*/ +SQLITE_PRIVATE int sqlite3HctBtreeSetCacheSize(Btree *p, int mxPage){ + /* no-op in hct */ + return SQLITE_OK; +} + +/* +** Change the "spill" limit on the number of pages in the cache. +** If the number of pages exceeds this limit during a write transaction, +** the pager might attempt to "spill" pages to the journal early in +** order to free up memory. +** +** The value returned is the current spill size. If zero is passed +** as an argument, no changes are made to the spill size setting, so +** using mxPage of 0 is a way to query the current spill size. +*/ +SQLITE_PRIVATE int sqlite3HctBtreeSetSpillSize(Btree *p, int mxPage){ + return 1024; +} + +#if SQLITE_MAX_MMAP_SIZE>0 +/* +** Change the limit on the amount of the database file that may be +** memory mapped. +*/ +SQLITE_PRIVATE int sqlite3HctBtreeSetMmapLimit(Btree *p, sqlite3_int64 szMmap){ + /* assert( 0 ); */ + return SQLITE_OK; +} +#endif /* SQLITE_MAX_MMAP_SIZE>0 */ + +/* +** Change the way data is synced to disk in order to increase or decrease +** how well the database resists damage due to OS crashes and power +** failures. Level 1 is the same as asynchronous (no syncs() occur and +** there is a high probability of damage) Level 2 is the default. There +** is a very low but non-zero probability of damage. Level 3 reduces the +** probability of damage to near zero but with a write performance reduction. +*/ +#ifndef SQLITE_OMIT_PAGER_PRAGMAS +SQLITE_PRIVATE int sqlite3HctBtreeSetPagerFlags( + Btree *p, /* The btree to set the safety level on */ + unsigned pgFlags /* Various PAGER_* flags */ +){ + /* HCT - does this need fixing? */ + return SQLITE_OK; +} +#endif + +/* +** Change the default pages size and the number of reserved bytes per page. +** Or, if the page size has already been fixed, return SQLITE_READONLY +** without changing anything. +** +** The page size must be a power of 2 between 512 and 65536. If the page +** size supplied does not meet this constraint then the page size is not +** changed. +** +** Page sizes are constrained to be a power of two so that the region +** of the database file used for locking (beginning at PENDING_BYTE, +** the first byte past the 1GB boundary, 0x40000000) needs to occur +** at the beginning of a page. +** +** If parameter nReserve is less than zero, then the number of reserved +** bytes per page is left unchanged. +** +** If the iFix!=0 then the BTS_PAGESIZE_FIXED flag is set so that the page size +** and autovacuum mode can no longer be changed. +*/ +SQLITE_PRIVATE int sqlite3HctBtreeSetPageSize(Btree *pBt, int pgsz, int nReserve, int iFix){ + HBtree *const p = (HBtree*)pBt; + int rc = SQLITE_READONLY; + if( p->pHctDb && pgsz>=512 && pgsz<=32768 && 0==(pgsz & (pgsz-1)) ){ + int orig = sqlite3HctDbPagesize(p->pHctDb); + if( orig==0 ){ + p->config.pgsz = pgsz; + rc = SQLITE_OK; + } + } + return rc; +} + +/* +** Return the currently defined page size +*/ +SQLITE_PRIVATE int sqlite3HctBtreeGetPageSize(Btree *pBt){ + HBtree *const p = (HBtree*)pBt; + int pgsz = 1024; + if( p->pHctDb ){ + pgsz = sqlite3HctDbPagesize(p->pHctDb); + if( pgsz==0 ){ + pgsz = p->config.pgsz; + } + } + p->config.pgsz = pgsz; + return pgsz; +} + +/* +** This function is similar to sqlite3HctBtreeGetReserve(), except that it +** may only be called if it is guaranteed that the b-tree mutex is already +** held. +** +** This is useful in one special case in the backup API code where it is +** known that the shared b-tree mutex is held, but the mutex on the +** database handle that owns *p is not. In this case if sqlite3HctBtreeEnter() +** were to be called, it might collide with some other operation on the +** database handle that owns *p, causing undefined behavior. +*/ +SQLITE_PRIVATE int sqlite3HctBtreeGetReserveNoMutex(Btree *p){ + assert( 0 ); + return 0; +} + +/* +** Return the number of bytes of space at the end of every page that +** are intentually left unused. This is the "reserved" space that is +** sometimes used by extensions. +** +** The value returned is the larger of the current reserve size and +** the latest reserve size requested by SQLITE_FILECTRL_RESERVE_BYTES. +** The amount of reserve can only grow - never shrink. +*/ +SQLITE_PRIVATE int sqlite3HctBtreeGetRequestedReserve(Btree *p){ + return 0; +} + + +/* +** Set the maximum page count for a database if mxPage is positive. +** No changes are made if mxPage is 0 or negative. +** Regardless of the value of mxPage, return the maximum page count. +*/ +SQLITE_PRIVATE Pgno sqlite3HctBtreeMaxPageCount(Btree *p, Pgno mxPage){ + return 0xFFFFFFFF; +} + +/* +** Change the values for the BTS_SECURE_DELETE and BTS_OVERWRITE flags: +** +** newFlag==0 Both BTS_SECURE_DELETE and BTS_OVERWRITE are cleared +** newFlag==1 BTS_SECURE_DELETE set and BTS_OVERWRITE is cleared +** newFlag==2 BTS_SECURE_DELETE cleared and BTS_OVERWRITE is set +** newFlag==(-1) No changes +** +** This routine acts as a query if newFlag is less than zero +** +** With BTS_OVERWRITE set, deleted content is overwritten by zeros, but +** freelist leaf pages are not written back to the database. Thus in-page +** deleted content is cleared, but freelist deleted content is not. +** +** With BTS_SECURE_DELETE, operation is like BTS_OVERWRITE with the addition +** that freelist leaf pages are written back into the database, increasing +** the amount of disk I/O. +*/ +SQLITE_PRIVATE int sqlite3HctBtreeSecureDelete(Btree *p, int newFlag){ + return 0; +} + +/* +** Change the 'auto-vacuum' property of the database. If the 'autoVacuum' +** parameter is non-zero, then auto-vacuum mode is enabled. If zero, it +** is disabled. The default value for the auto-vacuum property is +** determined by the SQLITE_DEFAULT_AUTOVACUUM macro. +*/ +SQLITE_PRIVATE int sqlite3HctBtreeSetAutoVacuum(Btree *p, int autoVacuum){ + return SQLITE_OK; +} + +/* +** Return the value of the 'auto-vacuum' property. If auto-vacuum is +** enabled 1 is returned. Otherwise 0. +*/ +SQLITE_PRIVATE int sqlite3HctBtreeGetAutoVacuum(Btree *p){ + /* hct is never in auto-vacuum mode */ + return 0; +} + +/* +** Initialize the first page of the database file (creating a database +** consisting of a single page and no schema objects). Return SQLITE_OK +** if successful, or an SQLite error code otherwise. +*/ +SQLITE_PRIVATE int sqlite3HctBtreeNewDb(Btree *p){ + int rc = SQLITE_OK; + assert( 0 ); + return rc; +} + +static int hctDetectJournals(HBtree *p){ + int rc = SQLITE_OK; + if( p->pHctJrnl==0 ){ + rc = sqlite3HctJournalNewIf( + (Schema*)p->pSchema, p->pHctTree, p->pHctDb, &p->pHctJrnl + ); + } + return rc; +} + +/* +** This is called by sqlite3_hct_journal_init() after the journal and +** baseline tables have been created in the database to initialize the +** journal sub-system. +** +** Return SQLITE_OK if successful, or an SQLite error code if an error +** occurs. +*/ +SQLITE_PRIVATE int sqlite3HctDetectJournals(sqlite3 *db){ + HBtree *p = (HBtree*)db->aDb[0].pBt; + int rc = hctDetectJournals(p); + if( rc==SQLITE_OK ){ + rc = sqlite3HctDbStartRead(p->pHctDb, 0); + } + if( rc==SQLITE_OK ){ + rc = sqlite3HctJrnlRecovery(p->pHctJrnl, p->pHctDb); + } + sqlite3HctDbEndRead(p->pHctDb); + return rc; +} + +typedef struct HctFreelistCtx HctFreelistCtx; +struct HctFreelistCtx { + /* Physical pages that need to be preserved for log and journal rollback */ + int nAlloc; + int nPg; + i64 *aPg; + + /* Root pages in the current schema */ + int nRootAlloc; + int nRoot; + i64 *aRoot; + + HBtree *p; +}; + +static int hctTopDownMerge( + i64 *aB, + int iBegin1, int iEnd1, + int iBegin2, int iEnd2, + i64 *aA +){ + int i = iBegin1; + int j = iBegin2; + int k; + for(k=iBegin1; i=iEnd2 || aA[i]<=aA[j]) ){ + if( j1 ){ + int iMid = (iEnd + iBegin) / 2; + int i1 = hctTopDownSplitMerge(aA, iBegin, iMid, aB); + int i2 = hctTopDownSplitMerge(aA, iMid, iEnd, aB); + return hctTopDownMerge(aB, iBegin, i1, iMid, i2, aA); + } + return iEnd; +} + +/* +** Sort the array of aPg[] page numbers in ascending order. Discard +** any duplicates. +*/ +static void hctFreelistSort(int *pRc, HctFreelistCtx *p){ + if( *pRc==SQLITE_OK && p->nPg>1 ){ + i64 *aWork = (i64*)sqlite3HctMalloc(pRc, p->nPg * sizeof(i64)); + if( aWork ){ + memcpy(aWork, p->aPg, p->nPg * sizeof(i64)); + p->nPg = hctTopDownSplitMerge(p->aPg, 0, p->nPg, aWork); + sqlite3_free(aWork); +#ifdef SQLITE_DEBUG + { + int ii; + for(ii=1; iinPg; ii++){ + assert( p->aPg[ii]>p->aPg[ii-1] ); + } + } +#endif + } + } +} + +static int hctSavePhysical(void *pCtx, i64 iPhys){ + HctFreelistCtx *p = (HctFreelistCtx*)pCtx; + if( p->nPg==p->nAlloc ){ + int nNew = (p->nPg>0) ? p->nPg * 4 : 64; + i64 *aNew = (i64*)sqlite3_realloc(p->aPg, nNew*sizeof(i64));; + if( aNew==0 ) return SQLITE_NOMEM; + p->aPg = aNew; + p->nAlloc = nNew; + } + p->aPg[p->nPg++] = iPhys; + return SQLITE_OK; +} + +static int hctScanOne(void *pCtx, const char *zFile){ + HctFreelistCtx *p = (HctFreelistCtx*)pCtx; + int rc = SQLITE_OK; + HctLogReader rdr; + + sqlite3HctDbSetSavePhysical(p->p->pHctDb, hctSavePhysical, pCtx); + + rc = hctLogReaderOpen(zFile, &rdr); + if( rc==SQLITE_OK && rdr.bEof==0 ){ + u32 iPrevRoot =0; + RecoverCsr csr; + memset(&csr, 0, sizeof(csr)); + sqlite3HctDbRecoverTid(p->p->pHctDb, rdr.iTid); + for(/* no-op */; rc==SQLITE_OK && rdr.bEof==0; hctLogReaderNext(&rdr)){ + + if( rdr.iRoot!=iPrevRoot ){ + hctRecoverCursorClose(p->p, &csr); + rc = hctRecoverCursorOpen(p->p, rdr.iRoot, &csr); + } + + if( rc==SQLITE_OK ){ + int dummy = 0; + if( rdr.nKey ){ + sqlite3VdbeRecordUnpack(csr.pKeyInfo, rdr.nKey, rdr.aKey, csr.pRec); + } + rc = sqlite3HctDbCsrRollbackSeek(csr.pCsr, csr.pRec, rdr.iKey, &dummy); + } + } + + hctRecoverCursorClose(p->p, &csr); + } + + sqlite3HctDbSetSavePhysical(p->p->pHctDb, 0, 0); + hctLogReaderClose(&rdr); + return rc; +} + +static void hctRootpageAdd(int *pRc, HctFreelistCtx *pCtx, i64 iRoot){ + if( *pRc==SQLITE_OK ){ + if( pCtx->nRoot==pCtx->nRootAlloc ){ + int nNew = (pCtx->nRoot>0) ? pCtx->nRoot * 4 : 64; + i64 *aNew = (i64*)sqlite3_realloc(pCtx->aRoot, nNew*sizeof(i64));; + if( aNew==0 ){ + *pRc = SQLITE_NOMEM; + return; + } + pCtx->aRoot = aNew; + pCtx->nRootAlloc = nNew; + } + + pCtx->aRoot[pCtx->nRoot++] = iRoot; + } +} + +/* +** Assemble a list of the root pages in the current schema in the +** pCtx->aRoot[] array. +*/ +static void hctRootpageList(int *pRc, HctFreelistCtx *pCtx){ + Schema *pSchema = (Schema*)pCtx->p->pSchema; + HashElem *pE = 0; + for(pE=sqliteHashFirst(&pSchema->tblHash); pE; pE=sqliteHashNext(pE)){ + Table *pTab = (Table*)sqliteHashData(pE); + Index *pIdx = 0; + hctRootpageAdd(pRc, pCtx, pTab->tnum); + for(pIdx=pTab->pIndex; pIdx; pIdx=pIdx->pNext){ + hctRootpageAdd(pRc, pCtx, pIdx->tnum); + } + } +} + +/* +** This is called as part of recovery, before any log files are rolled back, +** to rebuild the free-page list (or, if you like, to initialize the +** page-manager). This involves the following: +** +** 1) Scanning the sqlite_hct_journal table, if any, from the first hole +** to the last entry to determine the list of physical database pages +** that will be required if sqlite3_hct_journal_rollback() is called. +** +** 2) Scanning each log file that will be rolled back, accumulating a +** list of the physical database pages that will be required to find +** the "old" values required to roll them back. +** +** 3) Scanning the page map, checking for pages with the PHYSICAL_IN_USE +** flag clear. Each such page is added to the free-page list. If the +** page was one of those found in the scans in steps (1) or (2), then +** it is not available for reuse until after tid $TID, and all previous +** tids, have been committed. Otherwise, it is available for reuse +** immediately. +** +** $TID is set to the TID of the next transaction that will be written +** to this database (page-map entry TRANSID_EOF+1). +** +** This is a complicated procedure. +*/ +static int hctRecoverFreeList(HBtree *p){ + HctFreelistCtx ctx; + HctFile *pFile = sqlite3HctDbFile(p->pHctDb); + int rc = SQLITE_OK; + + memset(&ctx, 0, sizeof(ctx)); + ctx.p = p; + + /* If this is a replication database, scan all journal entries that may + ** be rolled back using a call to sqlite3_hct_journal_rollback(). Record + ** the set of physical pages that may be required by this call in the + ** ctx.aPg[] array. */ + if( p->pHctJrnl ){ + void *pCtx = (void*)&ctx; + rc = sqlite3HctJrnlSavePhysical( + p->config.db, p->pHctJrnl, hctSavePhysical, pCtx + ); + } + + /* Also scan any log files, adding the list of physical pages that must + ** be preserved to the ctx.aPg[] array. */ + if( rc==SQLITE_OK ){ + sqlite3HctDbRollbackMode(p->pHctDb, 2); + rc = sqlite3HctFileFindLogs(pFile, (void*)&ctx, hctScanOne); + sqlite3HctDbRollbackMode(p->pHctDb, 0); + } + + /* Sort the list of physical page numbers accumulated above. */ + hctFreelistSort(&rc, &ctx); + + /* Assemble a list of root pages. */ + hctRootpageList(&rc, &ctx); + + /* Scan the page-map, taking into account the physical pages that must + ** be preserved, and the set of root pages in the current db schema. */ + if( rc==SQLITE_OK ){ + rc = sqlite3HctFileRecoverFreelists( + pFile, ctx.nRoot, ctx.aRoot, ctx.nPg, ctx.aPg + ); + } + + sqlite3_free(ctx.aPg); + sqlite3_free(ctx.aRoot); + return rc; +} + +static int hctAttemptRecovery(HBtree *p){ + int rc = SQLITE_OK; + if( p->bRecoveryDone==0 ){ + HctFile *pFile = sqlite3HctDbFile(p->pHctDb); + if( p->pHctDb && sqlite3HctFileStartRecovery(pFile, 0) ){ + p->bRecoveryDone = 1; + rc = hctRecoverFreeList(p); + + if( rc==SQLITE_OK ){ + rc = hctRecoverLogs(p); + } + + if( rc==SQLITE_OK && p->pHctJrnl ){ + sqlite3HctDbRollbackMode(p->pHctDb, 0); + rc = sqlite3HctJrnlRecovery(p->pHctJrnl, p->pHctDb); + } + rc = sqlite3HctDbFinishRecovery(p->pHctDb, 0, rc); + } + + p->bRecoveryDone = (rc==SQLITE_OK); + } + + return rc; +} + +/* +** Attempt to start a new transaction. A write-transaction +** is started if the second argument is nonzero, otherwise a read- +** transaction. If the second argument is 2 or more and exclusive +** transaction is started, meaning that no other process is allowed +** to access the database. A preexisting transaction may not be +** upgraded to exclusive by calling this routine a second time - the +** exclusivity flag only works for a new transaction. +** +** A write-transaction must be started before attempting any +** changes to the database. None of the following routines +** will work unless a transaction is started first: +** +** sqlite3HctBtreeCreateTable() +** sqlite3HctBtreeCreateIndex() +** sqlite3HctBtreeClearTable() +** sqlite3HctBtreeDropTable() +** sqlite3HctBtreeInsert() +** sqlite3HctBtreeDelete() +** sqlite3HctBtreeUpdateMeta() +*/ +SQLITE_PRIVATE int sqlite3HctBtreeBeginTrans(Btree *pBt, int wrflag, int *pSchemaVersion){ + HBtree *const p = (HBtree*)pBt; + int rc = SQLITE_OK; + int req = wrflag ? SQLITE_TXN_WRITE : SQLITE_TXN_READ; + + assert( wrflag==0 || p->pHctDb==0 || pSchemaVersion ); + + if( p->eTrans==SQLITE_TXN_ERROR ) return SQLITE_BUSY_SNAPSHOT; + + if( rc==SQLITE_OK ){ + rc = sqlite3HctDbStartRead(p->pHctDb, p->pHctJrnl); + } + + if( rc==SQLITE_OK && pSchemaVersion ){ + sqlite3HctBtreeGetMeta((Btree*)p, 1, (u32*)pSchemaVersion); + sqlite3HctDbTransIsConcurrent(p->pHctDb, p->config.db->eConcurrent); + } + + if( rc==SQLITE_OK && wrflag ){ + rc = sqlite3HctTreeBegin(p->pHctTree, 1 + p->config.db->nSavepoint); + } + if( rc==SQLITE_OK && p->eTranseTrans = req; + } + return rc; +} + +/* +** This is called just after the schema is loaded for b-tree pBt. +*/ +SQLITE_PRIVATE int sqlite3HctBtreeSchemaLoaded(Btree *pBt){ + int rc = SQLITE_OK; + HBtree *const p = (HBtree*)pBt; + if( p->bRecoveryDone==0 ){ + rc = hctDetectJournals(p); + if( rc==SQLITE_OK ){ + rc = hctAttemptRecovery(p); + } + if( rc==SQLITE_OK ){ + sqlite3HctDbEndRead(p->pHctDb); + } + } + if( rc==SQLITE_OK && p->pHctJrnl ){ + sqlite3HctJournalFixSchema(p->pHctJrnl, p->config.db, p->pSchema); + } + return rc; +} + +/* +** A write-transaction must be opened before calling this function. +** It performs a single unit of work towards an incremental vacuum. +** +** If the incremental vacuum is finished after this function has run, +** SQLITE_DONE is returned. If it is not finished, but no error occurred, +** SQLITE_OK is returned. Otherwise an SQLite error code. +*/ +SQLITE_PRIVATE int sqlite3HctBtreeIncrVacuum(Btree *p){ + return SQLITE_DONE; +} + +/* +** This routine does the first phase of a two-phase commit. This routine +** causes a rollback journal to be created (if it does not already exist) +** and populated with enough information so that if a power loss occurs +** the database can be restored to its original state by playing back +** the journal. Then the contents of the journal are flushed out to +** the disk. After the journal is safely on oxide, the changes to the +** database are written into the database file and flushed to oxide. +** At the end of this call, the rollback journal still exists on the +** disk and we are still holding all locks, so the transaction has not +** committed. See sqlite3HctBtreeCommitPhaseTwo() for the second phase of the +** commit process. +** +** This call is a no-op if no write-transaction is currently active on pBt. +** +** Otherwise, sync the database file for the btree pBt. zSuperJrnl points to +** the name of a super-journal file that should be written into the +** individual journal file, or is NULL, indicating no super-journal file +** (single database transaction). +** +** When this is called, the super-journal should already have been +** created, populated with this journal pointer and synced to disk. +** +** Once this is routine has returned, the only thing required to commit +** the write-transaction for this database file is to delete the journal. +*/ +SQLITE_PRIVATE int sqlite3HctBtreeCommitPhaseOne(Btree *p, const char *zSuperJrnl){ + /* Everything happens in sqlite3HctBtreeCommitPhaseTwo() */ + return SQLITE_OK; +} + +typedef struct FlushOneCtx FlushOneCtx; +struct FlushOneCtx { + HBtree *p; + int bRollback; +}; + +static int btreeFlushOneToDisk(void *pCtx, u32 iRoot, KeyInfo *pKeyInfo){ + FlushOneCtx *pFC = (FlushOneCtx*)pCtx; + HBtree *p = pFC->p; + int iRollbackDir = pFC->bRollback ? -1 : 1; + + HctDatabase *pDb = p->pHctDb; + HctTreeCsr *pCsr = 0; + int rc; + UnpackedRecord *pRec = 0; + + if( pKeyInfo ){ + pRec = sqlite3VdbeAllocUnpackedRecord(pKeyInfo); + if( pRec==0 ) return SQLITE_NOMEM_BKPT; + } + + rc = sqlite3HctTreeCsrOpen(p->pHctTree, iRoot, &pCsr); + if( rc==SQLITE_OK ){ + for(rc=sqlite3HctTreeCsrFirst(pCsr); rc==SQLITE_OK ; /* no-op */){ + int nRetry = 0; + int ii; + i64 iKey = 0; + int nData = 0; + int bDel = 0; + const u8 *aData = 0; + sqlite3HctTreeCsrKey(pCsr, &iKey); + sqlite3HctTreeCsrData(pCsr, &nData, &aData); + bDel = sqlite3HctTreeCsrIsDelete(pCsr); + if( pRec ) sqlite3VdbeRecordUnpack(pKeyInfo, nData, aData, pRec); + rc = sqlite3HctDbInsert(pDb, iRoot, pRec, iKey, bDel,nData,aData,&nRetry); + p->nRollbackOp += (iRollbackDir * (1 - nRetry)); + if( rc ) break; + p->stats.nKeyOp++; + + if( pFC->bRollback && p->nRollbackOp==0 ){ + assert( nRetry==0 ); + rc = sqlite3HctDbInsertFlush(pDb, &nRetry); + if( rc ) break; + if( nRetry==0 ){ + rc = SQLITE_DONE; + break; + } + p->nRollbackOp = nRetry; + if( sqlite3HctTreeCsrEof(pCsr) ){ + sqlite3HctTreeCsrLast(pCsr); + } + } + + if( nRetry==0 ){ + sqlite3HctTreeCsrNext(pCsr); + if( sqlite3HctTreeCsrEof(pCsr) ){ + rc = sqlite3HctDbInsertFlush(pDb, &nRetry); + if( nRetry ){ + sqlite3HctTreeCsrLast(pCsr); + assert( sqlite3HctTreeCsrEof(pCsr)==0 ); + p->nRollbackOp -= (iRollbackDir * nRetry); + }else{ + /* Done - the table has been successfully flushed to disk */ + break; + } + } + }else{ + p->stats.nRetry++; + p->stats.nRetryKey += nRetry; + } + for(ii=1; iidb, pRec); + } + return rc; +} + +static int btreeLogIntkey(HctLogFile *pLog, u32 iRoot, i64 iRowid){ + u8 aBuf[16]; + memcpy(&aBuf[0], &iRoot, sizeof(u32)); + memset(&aBuf[4], 0, sizeof(u32)); + memcpy(&aBuf[8], &iRowid, sizeof(i64)); + return hctLogFileWrite(pLog, aBuf, sizeof(aBuf)); +} + +static int btreeLogIndex( + HctLogFile *pLog, + u32 iRoot, + const u8 *aData, int nData +){ + if( hctLogFileWrite(pLog, &iRoot, sizeof(iRoot)) + || hctLogFileWrite(pLog, &nData, sizeof(nData)) + || hctLogFileWrite(pLog, aData, nData) + ){ + return sqlite3HctIoerr(SQLITE_IOERR_WRITE); + } + return SQLITE_OK; +} + +static int btreeLogOneToDisk(void *pCtx, u32 iRoot, KeyInfo *pKeyInfo){ + HBtree *p = (HBtree*)pCtx; + HctTreeCsr *pCsr = 0; + int rc; + + rc = sqlite3HctTreeCsrOpen(p->pHctTree, iRoot, &pCsr); + if( rc==SQLITE_OK ){ + for(rc=sqlite3HctTreeCsrFirst(pCsr); + rc==SQLITE_OK && sqlite3HctTreeCsrEof(pCsr)==0; + rc=sqlite3HctTreeCsrNext(pCsr) + ){ + if( pKeyInfo ){ + int nData = 0; + const u8 *aData = 0; + sqlite3HctTreeCsrData(pCsr, &nData, &aData); + rc = btreeLogIndex(p->pLog, iRoot, aData, nData); + }else{ + i64 iRowid = 0; + sqlite3HctTreeCsrKey(pCsr, &iRowid); + rc = btreeLogIntkey(p->pLog, iRoot, iRowid); + } + + if( rc!=SQLITE_OK ) break; + } + sqlite3HctTreeCsrClose(pCsr); + } + + return rc; +} + +static int btreeFlushData(HBtree *p, int bRollback){ + int rc = SQLITE_OK; + + if( bRollback ) sqlite3HctDbRollbackMode(p->pHctDb, 1); + if( bRollback && p->nRollbackOp==0 ){ + rc = SQLITE_DONE; + } + + if( rc==SQLITE_OK ){ + FlushOneCtx ctx; + ctx.p = p; + ctx.bRollback = bRollback; + rc = sqlite3HctTreeForeach(p->pHctTree, 0, (void*)&ctx,btreeFlushOneToDisk); + } + if( bRollback ) sqlite3HctDbRollbackMode(p->pHctDb, 0); + return rc; +} + +static int btreeWriteLog(HBtree *p){ + int rc = SQLITE_OK; + + if( p->pLog==0 ){ + char *zLog = sqlite3HctDbLogFile(p->pHctDb); + if( zLog==0 ){ + rc = SQLITE_NOMEM_BKPT; + }else{ + rc = hctLogFileOpen(zLog, p->config.szLogChunk, &p->pLog); + } + } + + if( rc==SQLITE_OK ){ + hctLogFileRestart(p->pLog); + rc = sqlite3HctTreeForeach(p->pHctTree, 0, (void*)p, btreeLogOneToDisk); + } + + return rc; +} + +/* +** Flush the contents of Btree.pHctTree to Btree.pHctDb. +*/ +static int btreeFlushToDisk(HBtree *p){ + int i; + int rc = SQLITE_OK; + int rcok = SQLITE_OK; + u64 iTid = 0; + u64 iCid = 0; + int bTmapScan = 0; + int bCustomValid = 0; /* True if xValidate() was invoked */ + + /* Write a log file for this transaction. The TID field is still set + ** to zero at this point. */ + if( p->config.db->bHctMigrate==0 ){ + rc = btreeWriteLog(p); + } + + if( rc==SQLITE_OK ){ + /* Obtain the TID for this transaction. */ + iTid = sqlite3HctJrnlWriteTid(p->pHctJrnl, &iCid); + if( iTid==0 ){ + sqlite3HctDbStartWrite(p->pHctDb, &iTid); + } + + /* Invoke the SQLITE_TESTCTRL_HCT_MTCOMMIT hook, if applicable */ + if( p->config.db->xMtCommit ){ + p->config.db->xMtCommit(p->config.db->pMtCommitCtx, 0); + } + + assert( iTid>0 ); + if( p->pLog ) rc = hctLogFileFinish(p->pLog, iTid); + } + + /* Initialize the root pages of any new tables or indexes created by this + ** transaction. At this point the logical root page numbers have been + ** assigned by the page-manager, but there is no mapped physical page, + ** and the LOGICAL_IN_USE and LOGICAL_IS_ROOT flags are not yet set + ** for the page. This allocates and populates the physical root page, + ** and sets the two flags on the logical page slot. + ** + ** If the current transaction does not commit (i.e. failed validiation), + ** then the new tree is returned to the page-manage to be recycled + ** immediately. Or, if a crash occurs, then recovery will see the + ** LOGICAL_IS_ROOT flag on a root page that is not in the sqlite_schema + ** table and free the pages then. */ + for(i=0; rc==SQLITE_OK && inSchemaOp; i++){ + BtSchemaOp *pOp = &p->aSchemaOp[i]; + assert( + pOp->eSchemaOp==HCT_SCHEMAOP_DROP + || pOp->eSchemaOp==HCT_SCHEMAOP_CREATE_INTKEY + || pOp->eSchemaOp==HCT_SCHEMAOP_CREATE_INDEX + ); + if( pOp->eSchemaOp!=HCT_SCHEMAOP_DROP ){ + int bIndex = (pOp->eSchemaOp==HCT_SCHEMAOP_CREATE_INDEX); + rc = sqlite3HctDbRootInit(p->pHctDb, bIndex, pOp->pgnoRoot); + } + } + + /* Write all the new database entries to the database. Any write/write + ** conflicts are detected here - SQLITE_BUSY is returned in that case. */ + p->nRollbackOp = 0; + if( rc==SQLITE_OK ){ + rc = btreeFlushData(p, 0); + } + + /* Assuming the data has been flushed to disk without error or a + ** write/write conflict, allocate a CID and validate the transaction. */ + if( rc==SQLITE_OK ){ + /* Invoke the SQLITE_TESTCTRL_HCT_MTCOMMIT hook, if applicable */ + if( p->config.db->xMtCommit ){ + p->config.db->xMtCommit(p->config.db->pMtCommitCtx, 1); + } + + /* Validate the transaction */ + rc = sqlite3HctDbValidate(p->config.db, p->pHctDb, &iCid, &bTmapScan); + + /* If validation passed and this database is configured for replication, + ** write the journal entry and invoke the custom validation hook */ + if( rc==SQLITE_OK && p->pHctJrnl ){ + rc = sqlite3HctJrnlLog( + p->pHctJrnl, + p->config.db, + (Schema*)p->pSchema, + iCid, iTid, &bCustomValid + ); + } + } + + /* If conflicts have been detected, roll back the transaction */ + assert( rc!=SQLITE_BUSY ); + if( rc==SQLITE_BUSY_SNAPSHOT ){ + rcok = SQLITE_BUSY_SNAPSHOT; + rc = btreeFlushData(p, 1); + if( rc==SQLITE_DONE ) rc = SQLITE_OK; + if( iCid>0 && p->pHctJrnl ){ + rc = sqlite3HctJrnlWriteEmpty(p->pHctJrnl, iCid, iTid, + (bCustomValid ? 0 : p->config.db) + ); + } + } + + for(i=0; rc==SQLITE_OK && inSchemaOp; i++){ + BtSchemaOp *pOp = &p->aSchemaOp[i]; + if( (rcok==SQLITE_OK && pOp->eSchemaOp==HCT_SCHEMAOP_DROP) + || (rcok!=SQLITE_OK && pOp->eSchemaOp!=HCT_SCHEMAOP_DROP) + ){ + HctFile *pFile = sqlite3HctDbFile(p->pHctDb); + rc = sqlite3HctFileTreeFree(pFile, pOp->pgnoRoot, rcok!=SQLITE_OK); + } + } + + /* Zero the log file and set the entry in the transaction-map to + ** finish the transaction. */ + if( rc==SQLITE_OK && p->pLog ){ + rc = btreeLogFileZero(p->pLog); + } + if( rc==SQLITE_OK ){ + rc = sqlite3HctDbEndWrite(p->pHctDb, iCid, rcok!=SQLITE_OK); + } + assert( rc==SQLITE_OK ); + if( bTmapScan ){ + sqlite3HctDbTMapScan(p->pHctDb); + } + + sqlite3HctJrnlInvokeHook(p->pHctJrnl, p->config.db); + return (rc==SQLITE_OK ? rcok : rc); +} + +static void hctEndTransaction(HBtree *p){ + if( p->eTrans>SQLITE_TXN_NONE + && p->pCsrList==0 + && p->config.db->nVdbeRead<=1 + ){ + if( p->pHctDb ){ + sqlite3HctDbEndRead(p->pHctDb); + } + p->eTrans = SQLITE_TXN_NONE; + p->eMetaState = HCT_METASTATE_NONE; + } +} + + +static int hctBtreeMigrateInsert( + HBtCursor *pCur, + UnpackedRecord *pRec, + i64 iKey, + int nData, + const u8 *aData +){ + int rc = SQLITE_OK; + HBtree *p = pCur->pBtree; + int nRetry = 0; + + if( 0==sqlite3HctDbTid(p->pHctDb) ){ + i64 iDummy = 0; + rc = sqlite3HctDbStartWrite(p->pHctDb, &iDummy); + if( rc!=SQLITE_OK ) return rc; + } + + rc = sqlite3HctDbInsert( + p->pHctDb, + sqlite3HctTreeCsrRoot(pCur->pHctTreeCsr), + pRec, iKey, 0, nData, aData, &nRetry + ); + if( nRetry>0 ){ + rc = SQLITE_ABORT; + } + + return rc; +} + +static int hctBtreeMigrateCommit(HBtree *p){ + int rc = SQLITE_OK; + i64 iCid = 0; + int bTmapScan = 0; + int nRetry = 0; + + rc = sqlite3HctDbInsertFlush(p->pHctDb, &nRetry); + if( nRetry>0 ){ + rc = SQLITE_ABORT; + } + + if( rc==SQLITE_OK ){ + rc = sqlite3HctDbValidate(p->config.db, p->pHctDb, &iCid, &bTmapScan); + } + + if( rc==SQLITE_OK ){ + rc = sqlite3HctDbEndWrite(p->pHctDb, iCid, 0); + } + + if( bTmapScan ){ + sqlite3HctDbTMapScan(p->pHctDb); + } + + return rc; +} + +#define BT_IS_MIGRATE(pBt) (pBt->config.db->bHctMigrate) +#define CSR_IS_MIGRATE(pCsr) (pCsr->pBtree->config.db->bHctMigrate) + +/* +** Commit the transaction currently in progress. +** +** This routine implements the second phase of a 2-phase commit. The +** sqlite3HctBtreeCommitPhaseOne() routine does the first phase and should +** be invoked prior to calling this routine. The sqlite3HctBtreeCommitPhaseOne() +** routine did all the work of writing information out to disk and flushing the +** contents so that they are written onto the disk platter. All this +** routine has to do is delete or truncate or zero the header in the +** the rollback journal (which causes the transaction to commit) and +** drop locks. +** +** Normally, if an error occurs while the pager layer is attempting to +** finalize the underlying journal file, this function returns an error and +** the upper layer will attempt a rollback. However, if the second argument +** is non-zero then this b-tree transaction is part of a multi-file +** transaction. In this case, the transaction has already been committed +** (by deleting a super-journal file) and the caller will ignore this +** functions return code. So, even if an error occurs in the pager layer, +** reset the b-tree objects internal state to indicate that the write +** transaction has been closed. This is quite safe, as the pager will have +** transitioned to the error state. +** +** This will release the write lock on the database file. If there +** are no active cursors, it also releases the read lock. +*/ +SQLITE_PRIVATE int sqlite3HctBtreeCommitPhaseTwo(Btree *pBt, int bCleanup){ + HBtree *const p = (HBtree*)pBt; + int rc = SQLITE_OK; + + if( p->eTrans==SQLITE_TXN_ERROR ) return SQLITE_BUSY_SNAPSHOT; + + if( BT_IS_MIGRATE(p) ){ + rc = hctBtreeMigrateCommit(p); + }else{ + if( p->eTrans==SQLITE_TXN_WRITE ){ + if( p->pCsrList ){ + /* Cannot commit with open cursors in hctree */ + return SQLITE_LOCKED; + } + + sqlite3HctTreeRelease(p->pHctTree, 0); + if( p->pHctDb ){ + rc = btreeFlushToDisk(p); + sqlite3HctTreeClear(p->pHctTree); + p->nSchemaOp = 0; + } + p->eTrans = SQLITE_TXN_READ; + } + } + + if( rc==SQLITE_OK ){ + hctEndTransaction(p); + }else{ + p->eTrans = SQLITE_TXN_ERROR; + } + return rc; +} + +/* +** Do both phases of a commit. +*/ +SQLITE_PRIVATE int sqlite3HctBtreeCommit(Btree *pBt){ + int rc; + HBtree *const p = (HBtree*)pBt; + rc = sqlite3HctBtreeCommitPhaseOne((Btree*)p, 0); + if( rc==SQLITE_OK ){ + rc = sqlite3HctBtreeCommitPhaseTwo((Btree*)p, 0); + } + return rc; +} + +/* +** This routine sets the state to CURSOR_FAULT and the error +** code to errCode for every cursor on any BtShared that pBtree +** references. Or if the writeOnly flag is set to 1, then only +** trip write cursors and leave read cursors unchanged. +** +** Every cursor is a candidate to be tripped, including cursors +** that belong to other database connections that happen to be +** sharing the cache with pBtree. +** +** This routine gets called when a rollback occurs. If the writeOnly +** flag is true, then only write-cursors need be tripped - read-only +** cursors save their current positions so that they may continue +** following the rollback. Or, if writeOnly is false, all cursors are +** tripped. In general, writeOnly is false if the transaction being +** rolled back modified the database schema. In this case b-tree root +** pages may be moved or deleted from the database altogether, making +** it unsafe for read cursors to continue. +** +** If the writeOnly flag is true and an error is encountered while +** saving the current position of a read-only cursor, all cursors, +** including all read-cursors are tripped. +** +** SQLITE_OK is returned if successful, or if an error occurs while +** saving a cursor position, an SQLite error code. +*/ +SQLITE_PRIVATE int sqlite3HctBtreeTripAllCursors(Btree *pBt, int errCode, int writeOnly){ + HBtree *const p = (HBtree*)pBt; + int rc = SQLITE_OK; + if( p ){ + HBtCursor *pCur; + for(pCur=p->pCsrList; pCur; pCur=pCur->pCsrNext){ + if( writeOnly==0 || pCur->wrFlag ){ + sqlite3HctTreeCsrClose(pCur->pHctTreeCsr); + pCur->pHctTreeCsr = 0; + pCur->errCode = errCode; + } + } + } + return rc; +} + +/* +** Rollback the transaction in progress. +** +** If tripCode is not SQLITE_OK then cursors will be invalidated (tripped). +** Only write cursors are tripped if writeOnly is true but all cursors are +** tripped if writeOnly is false. Any attempt to use +** a tripped cursor will result in an error. +** +** This will release the write lock on the database file. If there +** are no active cursors, it also releases the read lock. +*/ +SQLITE_PRIVATE int sqlite3HctBtreeRollback(Btree *pBt, int tripCode, int writeOnly){ + HBtree *const p = (HBtree*)pBt; + + assert( SQLITE_TXN_ERROR==4 && SQLITE_TXN_WRITE==2 ); + assert( SQLITE_TXN_READ==1 && SQLITE_TXN_NONE==0 ); + assert( p->eTrans!=SQLITE_TXN_ERROR || p->pCsrList==0 ); + + if( p->eTrans>=SQLITE_TXN_WRITE ){ + sqlite3HctTreeRollbackTo(p->pHctTree, 0); + if( p->pHctDb ){ + sqlite3HctTreeClear(p->pHctTree); + } + p->eTrans = SQLITE_TXN_READ; + p->nSchemaOp = 0; + } + hctEndTransaction(p); + return SQLITE_OK; +} + +/* +** Start a statement subtransaction. The subtransaction can be rolled +** back independently of the main transaction. You must start a transaction +** before starting a subtransaction. The subtransaction is ended automatically +** if the main transaction commits or rolls back. +** +** Statement subtransactions are used around individual SQL statements +** that are contained within a BEGIN...COMMIT block. If a constraint +** error occurs within the statement, the effect of that one statement +** can be rolled back without having to rollback the entire transaction. +** +** A statement sub-transaction is implemented as an anonymous savepoint. The +** value passed as the second parameter is the total number of savepoints, +** including the new anonymous savepoint, open on the B-Tree. i.e. if there +** are no active savepoints and no other statement-transactions open, +** iStatement is 1. This anonymous savepoint can be released or rolled back +** using the sqlite3HctBtreeSavepoint() function. +*/ +SQLITE_PRIVATE int sqlite3HctBtreeBeginStmt(Btree *pBt, int iStatement){ + HBtree *const p = (HBtree*)pBt; + int rc = SQLITE_OK; + assert( p->eTrans!=SQLITE_TXN_ERROR ); + rc = sqlite3HctTreeBegin(p->pHctTree, iStatement+1); + return rc; +} + +static int btreeRollbackRoot(HBtree *p, int iSavepoint){ + int i; + int rc = SQLITE_OK; + for(i=p->nSchemaOp-1; rc==SQLITE_OK && i>=0; i--){ + if( p->aSchemaOp[i].iSavepoint<=iSavepoint ) break; + rc = sqlite3HctDbRootFree(p->pHctDb, p->aSchemaOp[i].pgnoRoot); + } + p->nSchemaOp = i+1; + return rc; +} + +/* +** The second argument to this function, op, is always SAVEPOINT_ROLLBACK +** or SAVEPOINT_RELEASE. This function either releases or rolls back the +** savepoint identified by parameter iSavepoint, depending on the value +** of op. +** +** Normally, iSavepoint is greater than or equal to zero. However, if op is +** SAVEPOINT_ROLLBACK, then iSavepoint may also be -1. In this case the +** contents of the entire transaction are rolled back. This is different +** from a normal transaction rollback, as no locks are released and the +** transaction remains open. +*/ +SQLITE_PRIVATE int sqlite3HctBtreeSavepoint(Btree *pBt, int op, int iSavepoint){ + HBtree *const p = (HBtree*)pBt; + int rc = SQLITE_OK; + if( p && p->eTrans==SQLITE_TXN_WRITE ){ + int i; + assert( op==SAVEPOINT_ROLLBACK || op==SAVEPOINT_RELEASE ); + if( op==SAVEPOINT_RELEASE ){ + for(i=0; inSchemaOp; i++){ + if( p->aSchemaOp[i].iSavepoint>iSavepoint ){ + p->aSchemaOp[i].iSavepoint = iSavepoint; + } + } + sqlite3HctTreeRelease(p->pHctTree, iSavepoint+1); + }else{ + sqlite3HctTreeRollbackTo(p->pHctTree, iSavepoint+2); + btreeRollbackRoot(p, iSavepoint); + p->eMetaState = HCT_METASTATE_NONE; + } + } + return rc; +} + +SQLITE_PRIVATE int sqlite3HctBtreeIsNewTable(Btree *pBt, u64 iRoot){ + HBtree *const p = (HBtree*)pBt; + int ii; + for(ii=0; iinSchemaOp && p->aSchemaOp[ii].pgnoRoot!=iRoot; ii++); + return iinSchemaOp; +} + +SQLITE_PRIVATE u64 sqlite3HctBtreeSnapshotId(Btree *pBt){ + HBtree *const p = (HBtree*)pBt; + return sqlite3HctDbSnapshotId(p->pHctDb); +} + +/* +** Open a new cursor +*/ +SQLITE_PRIVATE int sqlite3HctBtreeCursor( + Btree *pBt, /* The btree */ + Pgno iTable, /* Root page of table to open */ + int wrFlag, /* 1 to write. 0 read-only */ + struct KeyInfo *pKeyInfo, /* First arg to xCompare() */ + BtCursor *pCursor /* Write new cursor here */ +){ + HBtCursor *const pCur = (HBtCursor*)pCursor; + HBtree *const p = (HBtree*)pBt; + int rc = SQLITE_OK; + int bNosnap = 0; + int bReadonly = sqlite3HctJournalIsReadonly(p->pHctJrnl, iTable, &bNosnap); + + assert( p->eTrans!=SQLITE_TXN_NONE ); + assert( p->eTrans!=SQLITE_TXN_ERROR ); + assert( pCur->pHctTreeCsr==0 ); + assert( BT_IS_MIGRATE(p)==0 || wrFlag ); + + /* If this is an attempt to open a read/write cursor on either the + ** sqlite_hct_journal or sqlite_hct_baseline tables, return an error + ** immediately. */ + if( wrFlag && bReadonly ){ + return SQLITE_READONLY; + } + + pCur->pKeyInfo = pKeyInfo; + rc = sqlite3HctTreeCsrOpen(p->pHctTree, iTable, &pCur->pHctTreeCsr); + if( rc==SQLITE_OK && p->pHctDb ){ + int ii; + for(ii=0; iinSchemaOp && p->aSchemaOp[ii].pgnoRoot!=iTable; ii++); + if( ii==p->nSchemaOp ){ + rc = sqlite3HctDbCsrOpen(p->pHctDb, pKeyInfo, iTable, &pCur->pHctDbCsr); + sqlite3HctDbCsrNosnap(pCur->pHctDbCsr, bNosnap); + } + } + if( rc==SQLITE_OK ){ + pCur->pCsrNext = p->pCsrList; + pCur->pBtree = p; + pCur->wrFlag = wrFlag; + p->pCsrList = pCur; + }else{ + sqlite3HctTreeCsrClose(pCur->pHctTreeCsr); + pCur->pHctTreeCsr = 0; + pCur->pKeyInfo = 0; + } + + return rc; +} + +/* +** Return the size of a BtCursor object in bytes. +** +** This interfaces is needed so that users of cursors can preallocate +** sufficient storage to hold a cursor. The BtCursor object is opaque +** to users so they cannot do the sizeof() themselves - they must call +** this routine. +*/ +SQLITE_PRIVATE int sqlite3HctBtreeCursorSize(void){ + return ROUND8(sizeof(HBtCursor)); +} + +/* +** Initialize memory that will be converted into a BtCursor object. +** +** The simple approach here would be to memset() the entire object +** to zero. But it turns out that the apPage[] and aiIdx[] arrays +** do not need to be zeroed and they are large, so we can save a lot +** of run-time by skipping the initialization of those elements. +*/ +SQLITE_PRIVATE void sqlite3HctBtreeCursorZero(BtCursor *p){ + /* hct takes the simple approach mentioned above */ + memset(p, 0, sizeof(HBtCursor)); +} + +/* +** Close a cursor. The read lock on the database file is released +** when the last cursor is closed. +*/ +SQLITE_PRIVATE int sqlite3HctBtreeCloseCursor(BtCursor *pCursor){ + HBtCursor *const pCur = (HBtCursor*)pCursor; + HBtree *const pBtree = pCur->pBtree; + if( pBtree ){ + HBtCursor **pp; + sqlite3HctTreeCsrClose(pCur->pHctTreeCsr); + sqlite3HctDbCsrClose(pCur->pHctDbCsr); + for(pp=&pBtree->pCsrList; *pp!=pCur; pp=&(*pp)->pCsrNext); + *pp = pCur->pCsrNext; + pCur->pHctTreeCsr = 0; + pCur->pBtree = 0; + pCur->pCsrNext = 0; + if( (pBtree->openFlags & BTREE_SINGLE) && pBtree->pCsrList==0 ){ + sqlite3HctBtreeClose((Btree*)pBtree); + } + } + return SQLITE_OK; +} + +/* +** Return true if the given BtCursor is valid. A valid cursor is one +** that is currently pointing to a row in a (non-empty) table. +** This is a verification routine is used only within assert() statements. +*/ +SQLITE_PRIVATE int sqlite3HctBtreeCursorIsValid(BtCursor *pCursor){ + HBtCursor *const pCur = (HBtCursor*)pCursor; + return pCur && ( + !sqlite3HctTreeCsrEof(pCur->pHctTreeCsr) + || !sqlite3HctDbCsrEof(pCur->pHctDbCsr) + ); +} +SQLITE_PRIVATE int sqlite3HctBtreeCursorIsValidNN(BtCursor *pCursor){ + HBtCursor *const pCur = (HBtCursor*)pCursor; + return ( + !sqlite3HctTreeCsrEof(pCur->pHctTreeCsr) + || !sqlite3HctDbCsrEof(pCur->pHctDbCsr) + ); +} + +/* +** Return the value of the integer key or "rowid" for a table btree. +** This routine is only valid for a cursor that is pointing into a +** ordinary table btree. If the cursor points to an index btree or +** is invalid, the result of this routine is undefined. +*/ +SQLITE_PRIVATE i64 sqlite3HctBtreeIntegerKey(BtCursor *pCursor){ + HBtCursor *const pCur = (HBtCursor*)pCursor; + i64 iKey; + if( pCur->bUseTree ){ + sqlite3HctTreeCsrKey(pCur->pHctTreeCsr, &iKey); + }else{ + sqlite3HctDbCsrKey(pCur->pHctDbCsr, &iKey); + } + return iKey; +} + +/* +** Pin or unpin a cursor. +*/ +SQLITE_PRIVATE void sqlite3HctBtreeCursorPin(BtCursor *pCursor){ + HBtCursor *const pCur = (HBtCursor*)pCursor; + sqlite3HctTreeCsrPin(pCur->pHctTreeCsr); +} +SQLITE_PRIVATE void sqlite3HctBtreeCursorUnpin(BtCursor *pCursor){ + HBtCursor *const pCur = (HBtCursor*)pCursor; + sqlite3HctTreeCsrUnpin(pCur->pHctTreeCsr); +} + +#ifdef SQLITE_ENABLE_OFFSET_SQL_FUNC +/* +** Return the offset into the database file for the start of the +** payload to which the cursor is pointing. +*/ +SQLITE_PRIVATE i64 sqlite3HctBtreeOffset(BtCursor *pCur){ + assert( 0 ); + return 0; +} +#endif /* SQLITE_ENABLE_OFFSET_SQL_FUNC */ + +/* +** Return the number of bytes of payload for the entry that pCur is +** currently pointing to. For table btrees, this will be the amount +** of data. For index btrees, this will be the size of the key. +** +** The caller must guarantee that the cursor is pointing to a non-NULL +** valid entry. In other words, the calling procedure must guarantee +** that the cursor has Cursor.eState==CURSOR_VALID. +*/ +SQLITE_PRIVATE u32 sqlite3HctBtreePayloadSize(BtCursor *pCursor){ + HBtCursor *const pCur = (HBtCursor*)pCursor; + int nData; + if( pCur->bUseTree ){ + sqlite3HctTreeCsrData(pCur->pHctTreeCsr, &nData, 0); + }else{ + sqlite3HctDbCsrData(pCur->pHctDbCsr, &nData, 0); + } + return nData; +} + +/* +** Return an upper bound on the size of any record for the table +** that the cursor is pointing into. +** +** This is an optimization. Everything will still work if this +** routine always returns 2147483647 (which is the largest record +** that SQLite can handle) or more. But returning a smaller value might +** prevent large memory allocations when trying to interpret a +** corrupt datrabase. +** +** The current implementation merely returns the size of the underlying +** database file. +*/ +SQLITE_PRIVATE sqlite3_int64 sqlite3HctBtreeMaxRecordSize(BtCursor *pCur){ + assert( 0 ); + return 0x7FFFFFFF; +} + +/* +** Read part of the payload for the row at which that cursor pCur is currently +** pointing. "amt" bytes will be transferred into pBuf[]. The transfer +** begins at "offset". +** +** pCur can be pointing to either a table or an index b-tree. +** If pointing to a table btree, then the content section is read. If +** pCur is pointing to an index b-tree then the key section is read. +** +** For sqlite3HctBtreePayload(), the caller must ensure that pCur is pointing +** to a valid row in the table. For sqlite3HctBtreePayloadChecked(), the +** cursor might be invalid or might need to be restored before being read. +** +** Return SQLITE_OK on success or an error code if anything goes +** wrong. An error is returned if "offset+amt" is larger than +** the available payload. +*/ +SQLITE_PRIVATE int sqlite3HctBtreePayload(BtCursor *pCur, u32 offset, u32 amt, void *pBuf){ + u32 n = 0; + const u8 *p = 0; + + p = (const u8*)sqlite3HctBtreePayloadFetch(pCur, &n); + assert( offset+amt<=n ); + memcpy(pBuf, &p[offset], amt); + + return SQLITE_OK; +} + + +static int btreeSetUseTree(HBtCursor *pCur){ + int rc = SQLITE_OK; + int bTreeEof = sqlite3HctTreeCsrEof(pCur->pHctTreeCsr); + int bDbEof = sqlite3HctDbCsrEof(pCur->pHctDbCsr); + + assert( pCur->eDir==BTREE_DIR_FORWARD || pCur->eDir==BTREE_DIR_REVERSE ); + assert( pCur->pHctTreeCsr ); + + if( bTreeEof ){ + pCur->bUseTree = 0; + }else if( bDbEof ){ + pCur->bUseTree = 1; + }else if( pCur->pKeyInfo==0 ){ + i64 iKeyTree; + i64 iKeyDb; + + sqlite3HctTreeCsrKey(pCur->pHctTreeCsr, &iKeyTree); + sqlite3HctDbCsrKey(pCur->pHctDbCsr, &iKeyDb); + + if( iKeyTree==iKeyDb ){ + pCur->bUseTree = 2; + }else{ + pCur->bUseTree = (iKeyTree < iKeyDb); + if( pCur->eDir==BTREE_DIR_REVERSE ) pCur->bUseTree = !pCur->bUseTree; + } + }else{ + UnpackedRecord *pKeyDb = 0; + const u8 *aKeyTree = 0; + int nKeyTree = 0; + + rc = sqlite3HctDbCsrLoadAndDecode(pCur->pHctDbCsr, &pKeyDb); + if( rc==SQLITE_OK ){ + int res; + int nSave = pKeyDb->nField; + sqlite3HctDbRecordTrim(pKeyDb); + sqlite3HctTreeCsrData(pCur->pHctTreeCsr, &nKeyTree, &aKeyTree); + res = sqlite3VdbeRecordCompare(nKeyTree, aKeyTree, pKeyDb); + pKeyDb->nField = nSave; + if( res==0 ){ + pCur->bUseTree = 2; + }else{ + pCur->bUseTree = (res<0); + if( pCur->eDir==BTREE_DIR_REVERSE ) pCur->bUseTree = !pCur->bUseTree; + } + } + } + + return rc; +} + +static int hctReseekBlobCsr(HBtCursor *pCsr){ + int rc = SQLITE_OK; + assert( pCsr->pKeyInfo==0 ); + if( sqlite3HctTreeCsrHasMoved(pCsr->pHctTreeCsr) ){ + int res = 0; + rc = sqlite3HctTreeCsrReseek(pCsr->pHctTreeCsr, &res); + if( rc==SQLITE_OK && res==0 ){ + pCsr->bUseTree = 1; + } + } + return rc; +} + +/* +** This variant of sqlite3HctBtreePayload() works even if the cursor has not +** in the CURSOR_VALID state. It is only used by the sqlite3_blob_read() +** interface. +*/ +#ifndef SQLITE_OMIT_INCRBLOB +SQLITE_PRIVATE int sqlite3HctBtreePayloadChecked( + BtCursor *pCur, + u32 offset, + u32 amt, + void *pBuf +){ + HBtCursor *pCsr = (HBtCursor*)pCur; + int rc = SQLITE_OK; + rc = hctReseekBlobCsr(pCsr); + if( rc==SQLITE_OK ){ + rc = sqlite3HctBtreePayload(pCur, offset, amt, pBuf); + } + return rc; +} +#endif /* SQLITE_OMIT_INCRBLOB */ + +/* +** For the entry that cursor pCur is point to, return as +** many bytes of the key or data as are available on the local +** b-tree page. Write the number of available bytes into *pAmt. +** +** The pointer returned is ephemeral. The key/data may move +** or be destroyed on the next call to any Btree routine, +** including calls from other threads against the same cache. +** Hence, a mutex on the BtShared should be held prior to calling +** this routine. +** +** These routines is used to get quick access to key and data +** in the common case where no overflow pages are used. +*/ +SQLITE_PRIVATE const void *sqlite3HctBtreePayloadFetch(BtCursor *pCursor, u32 *pAmt){ + HBtCursor *const pCur = (HBtCursor*)pCursor; + const u8 *aData; + int nData; + if( pCur->bUseTree ){ + sqlite3HctTreeCsrData(pCur->pHctTreeCsr, &nData, &aData); + }else{ + sqlite3HctDbCsrData(pCur->pHctDbCsr, &nData, &aData); + } + *pAmt = (u32)nData; + return aData; +} + +/* Move the cursor to the first entry in the table. Return SQLITE_OK +** on success. Set *pRes to 0 if the cursor actually points to something +** or set *pRes to 1 if the table is empty. +*/ +SQLITE_PRIVATE int sqlite3HctBtreeFirst(BtCursor *pCursor, int *pRes){ + HBtCursor *const pCur = (HBtCursor*)pCursor; + int rc = SQLITE_OK; + + sqlite3HctTreeCsrFirst(pCur->pHctTreeCsr); + if( pCur->pHctDbCsr ){ + rc = sqlite3HctDbCsrFirst(pCur->pHctDbCsr); + } + if( rc==SQLITE_OK ){ + pCur->eDir = BTREE_DIR_FORWARD; + btreeSetUseTree(pCur); + if( pCur->bUseTree && sqlite3HctTreeCsrIsDelete(pCur->pHctTreeCsr) ){ + rc = sqlite3HctBtreeNext((BtCursor*)pCur, 0); + if( rc==SQLITE_DONE ) rc = SQLITE_OK; + } + *pRes = sqlite3HctBtreeEof((BtCursor*)pCur); + } + + return rc; +} + +/* Move the cursor to the last entry in the table. Return SQLITE_OK +** on success. Set *pRes to 0 if the cursor actually points to something +** or set *pRes to 1 if the table is empty. +*/ +SQLITE_PRIVATE int sqlite3HctBtreeLast(BtCursor *pCursor, int *pRes){ + int rc = SQLITE_OK; + HBtCursor *const pCur = (HBtCursor*)pCursor; + + if( pCur->isLast==0 ){ + sqlite3HctTreeCsrLast(pCur->pHctTreeCsr); + if( pCur->pHctDbCsr ){ + rc = sqlite3HctDbCsrLast(pCur->pHctDbCsr); + } + if( rc==SQLITE_OK ){ + int bTreeEof = sqlite3HctTreeCsrEof(pCur->pHctTreeCsr); + int bDbEof = sqlite3HctDbCsrEof(pCur->pHctDbCsr); + *pRes = (bTreeEof && bDbEof); + pCur->eDir = BTREE_DIR_REVERSE; + btreeSetUseTree(pCur); + if( pCur->bUseTree ){ + if( sqlite3HctTreeCsrIsDelete(pCur->pHctTreeCsr) ){ + rc = sqlite3HctBtreePrevious((BtCursor*)pCur, 0); + if( rc==SQLITE_DONE ){ + *pRes = sqlite3HctBtreeEof((BtCursor*)pCur); + rc = SQLITE_OK; + } + }else{ + pCur->isLast = 1; + } + } + } + } + + return rc; +} + +/* Move the cursor so that it points to an entry near the key +** specified by pIdxKey or intKey. Return a success code. +** +** For INTKEY tables, the intKey parameter is used. pIdxKey +** must be NULL. For index tables, pIdxKey is used and intKey +** is ignored. +** +** If an exact match is not found, then the cursor is always +** left pointing at a leaf page which would hold the entry if it +** were present. The cursor might point to an entry that comes +** before or after the key. +** +** An integer is written into *pRes which is the result of +** comparing the key with the entry to which the cursor is +** pointing. The meaning of the integer written into +** *pRes is as follows: +** +** *pRes<0 The cursor is left pointing at an entry that +** is smaller than intKey/pIdxKey or if the table is empty +** and the cursor is therefore left point to nothing. +** +** *pRes==0 The cursor is left pointing at an entry that +** exactly matches intKey/pIdxKey. +** +** *pRes>0 The cursor is left pointing at an entry that +** is larger than intKey/pIdxKey. +** +** For index tables, the pIdxKey->eqSeen field is set to 1 if there +** exists an entry in the table that exactly matches pIdxKey. +*/ +static int hctBtreeMovetoUnpacked( + HBtCursor *pCur, /* The cursor to be moved */ + UnpackedRecord *pIdxKey, /* Unpacked index key */ + i64 intKey, /* The table key */ + int biasRight, /* If true, bias the search to the high end */ + int *pRes /* Write search results here */ +){ + int rc = SQLITE_OK; + int res1 = 0; + int res2 = -1; + + pCur->isLast = 0; + rc = sqlite3HctTreeCsrSeek(pCur->pHctTreeCsr, pIdxKey, intKey, &res1); + if( rc==SQLITE_OK && pCur->pHctDbCsr ){ + rc = sqlite3HctDbCsrSeek(pCur->pHctDbCsr, pIdxKey, intKey, &res2); + } + + if( pCur->eDir==BTREE_DIR_NONE ){ + if( res1==0 || pCur->pHctDbCsr==0 ){ + *pRes = res1; + pCur->bUseTree = 1; + if( sqlite3HctTreeCsrIsDelete(pCur->pHctTreeCsr) ){ + *pRes = -1; + } + }else{ + pCur->bUseTree = 0; + *pRes = res2; + } + }else{ + if( pCur->eDir==BTREE_DIR_FORWARD ){ + if( rc==SQLITE_OK && res2<0 && !sqlite3HctDbCsrEof(pCur->pHctDbCsr) ){ + rc = sqlite3HctDbCsrNext(pCur->pHctDbCsr); + } + if( rc==SQLITE_OK && res1<0 && !sqlite3HctTreeCsrEof(pCur->pHctTreeCsr) ){ + rc = sqlite3HctTreeCsrNext(pCur->pHctTreeCsr); + } + + if( res1==0 || (res2==0 && pCur->pHctDbCsr) ){ + *pRes = 0; + }else if( sqlite3HctTreeCsrEof(pCur->pHctTreeCsr) + && sqlite3HctDbCsrEof(pCur->pHctDbCsr) + ){ + *pRes = -1; + }else{ + *pRes = +1; + } + }else{ + assert( pCur->eDir==BTREE_DIR_REVERSE ); + assert( res2<=0 ); + if( rc==SQLITE_OK && res1>0 && !sqlite3HctTreeCsrEof(pCur->pHctTreeCsr) ){ + rc = sqlite3HctTreeCsrPrev(pCur->pHctTreeCsr); + } + if( res1==0 || res2==0 ){ + *pRes = 0; + }else{ + *pRes = -1; + } + } + + btreeSetUseTree(pCur); + if( pCur->bUseTree && sqlite3HctTreeCsrIsDelete(pCur->pHctTreeCsr) ){ + if( pCur->eDir==BTREE_DIR_FORWARD ){ + rc = sqlite3HctBtreeNext((BtCursor*)pCur, 0); + if( rc==SQLITE_DONE ){ + /* Cursor points at EOF. *pRes must be -ve in this case. */ + rc = SQLITE_OK; + *pRes = -1; + }else if( pIdxKey==0 ){ + *pRes = 1; + }else{ + u32 nKey; + const void *a = sqlite3HctBtreePayloadFetch((BtCursor*)pCur, &nKey); + *pRes = sqlite3VdbeRecordCompareWithSkip(nKey, a, pIdxKey, 0); + } + }else{ + rc = sqlite3HctBtreePrevious((BtCursor*)pCur, 0); + if( rc==SQLITE_DONE ) rc = SQLITE_OK; + *pRes = -1; + } + } + } + + return rc; +} + +SQLITE_PRIVATE int sqlite3HctBtreeTableMoveto( + BtCursor *pCursor, /* The cursor to be moved */ + i64 intKey, /* The table key */ + int biasRight, /* If true, bias the search to the high end */ + int *pRes /* Write search results here */ +){ + HBtCursor *const pCur = (HBtCursor*)pCursor; + assert( CSR_IS_MIGRATE(pCur)==0 ); + if( pCur->isLast && sqlite3HctBtreeIntegerKey(pCursor)eDir = eDir; + if( pCur->pHctDbCsr ){ + sqlite3HctDbCsrDir(pCur->pHctDbCsr, eDir); + } +} + +/* +** Return TRUE if the cursor is not pointing at an entry of the table. +** +** TRUE will be returned after a call to sqlite3HctBtreeNext() moves +** past the last entry in the table or sqlite3HctBtreePrev() moves past +** the first entry. TRUE is also returned if the table is empty. +*/ +SQLITE_PRIVATE int sqlite3HctBtreeEof(BtCursor *pCursor){ + HBtCursor *const pCur = (HBtCursor*)pCursor; + /* TODO: What if the cursor is in CURSOR_REQUIRESEEK but all table entries + ** have been deleted? This API will need to change to return an error code + ** as well as the boolean result value. + */ + return ( + sqlite3HctTreeCsrEof(pCur->pHctTreeCsr) + && sqlite3HctDbCsrEof(pCur->pHctDbCsr) + ); +} + +/* +** Return an estimate for the number of rows in the table that pCur is +** pointing to. Return a negative number if no estimate is currently +** available. +*/ +SQLITE_PRIVATE i64 sqlite3HctBtreeRowCountEst(BtCursor *pCur){ + /* TODO: Fix this so that it returns a meaningful value. */ + return -1; +} + +/* +** Advance the cursor to the next entry in the database. +** Return value: +** +** SQLITE_OK success +** SQLITE_DONE cursor is already pointing at the last element +** otherwise some kind of error occurred +** +** The main entry point is sqlite3HctBtreeNext(). That routine is optimized +** for the common case of merely incrementing the cell counter BtCursor.aiIdx +** to the next cell on the current page. The (slower) btreeNext() helper +** routine is called when it is necessary to move to a different page or +** to restore the cursor. +** +** If bit 0x01 of the F argument in sqlite3HctBtreeNext(C,F) is 1, then the +** cursor corresponds to an SQL index and this routine could have been +** skipped if the SQL index had been a unique index. The F argument +** is a hint to the implement. SQLite btree implementation does not use +** this hint, but COMDB2 does. +*/ +SQLITE_PRIVATE int sqlite3HctBtreeNext(BtCursor *pCursor, int flags){ + HBtCursor *const pCur = (HBtCursor*)pCursor; + int rc = SQLITE_OK; + int bDummy; + + assert( pCur->isLast==0 ); + rc = sqlite3HctBtreeCursorRestore((BtCursor*)pCur, &bDummy); + if( rc!=SQLITE_OK ) return rc; + + if( sqlite3HctBtreeEof((BtCursor*)pCur) ){ + rc = SQLITE_DONE; + }else{ + assert( pCur->eDir==BTREE_DIR_FORWARD ); + do{ + if( pCur->bUseTree ){ + rc = sqlite3HctTreeCsrNext(pCur->pHctTreeCsr); + } + if( rc==SQLITE_OK && (pCur->bUseTree==0 || pCur->bUseTree==2) ){ + rc = sqlite3HctDbCsrNext(pCur->pHctDbCsr); + } + if( rc==SQLITE_OK ){ + if( sqlite3HctBtreeEof((BtCursor*)pCur) ){ + rc = SQLITE_DONE; + }else{ + btreeSetUseTree(pCur); + } + } + }while( rc==SQLITE_OK + && pCur->bUseTree && sqlite3HctTreeCsrIsDelete(pCur->pHctTreeCsr) + ); + } + return rc; +} + +/* +** Step the cursor to the back to the previous entry in the database. +** Return values: +** +** SQLITE_OK success +** SQLITE_DONE the cursor is already on the first element of the table +** otherwise some kind of error occurred +** +** The main entry point is sqlite3HctBtreePrevious(). That routine is optimized +** for the common case of merely decrementing the cell counter BtCursor.aiIdx +** to the previous cell on the current page. The (slower) btreePrevious() +** helper routine is called when it is necessary to move to a different page +** or to restore the cursor. +** +** If bit 0x01 of the F argument to sqlite3HctBtreePrevious(C,F) is 1, then +** the cursor corresponds to an SQL index and this routine could have been +** skipped if the SQL index had been a unique index. The F argument is a +** hint to the implement. The native SQLite btree implementation does not +** use this hint, but COMDB2 does. +*/ +SQLITE_PRIVATE int sqlite3HctBtreePrevious(BtCursor *pCursor, int flags){ + HBtCursor *const pCur = (HBtCursor*)pCursor; + int rc = SQLITE_OK; + int bDummy; + assert( pCur->eDir==BTREE_DIR_REVERSE ); + + pCur->isLast = 0; + rc = sqlite3HctBtreeCursorRestore((BtCursor*)pCur, &bDummy); + if( rc!=SQLITE_OK ) return rc; + + do{ + if( pCur->bUseTree ){ + rc = sqlite3HctTreeCsrPrev(pCur->pHctTreeCsr); + } + if( rc==SQLITE_OK && (pCur->bUseTree==0 || pCur->bUseTree==2) ){ + rc = sqlite3HctDbCsrPrev(pCur->pHctDbCsr); + } + if( rc==SQLITE_OK ){ + if( sqlite3HctBtreeEof((BtCursor*)pCur) ){ + rc = SQLITE_DONE; + }else{ + btreeSetUseTree(pCur); + } + } + }while( rc==SQLITE_OK + && pCur->bUseTree && sqlite3HctTreeCsrIsDelete(pCur->pHctTreeCsr) + ); + return rc; +} + +static void hctBtreeClearIsLast(HBtree *pBt, HBtCursor *pExcept){ + HBtCursor *p; + for(p=pBt->pCsrList; p; p=p->pCsrNext){ + if( p!=pExcept ) p->isLast = 0; + } +} + +/* +** Insert a new record into the BTree. The content of the new record +** is described by the pX object. The pCur cursor is used only to +** define what table the record should be inserted into, and is left +** pointing at a random location. +** +** For a table btree (used for rowid tables), only the pX.nKey value of +** the key is used. The pX.pKey value must be NULL. The pX.nKey is the +** rowid or INTEGER PRIMARY KEY of the row. The pX.nData,pData,nZero fields +** hold the content of the row. +** +** For an index btree (used for indexes and WITHOUT ROWID tables), the +** key is an arbitrary byte sequence stored in pX.pKey,nKey. The +** pX.pData,nData,nZero fields must be zero. +** +** If the seekResult parameter is non-zero, then a successful call to +** MovetoUnpacked() to seek cursor pCur to (pKey,nKey) has already +** been performed. In other words, if seekResult!=0 then the cursor +** is currently pointing to a cell that will be adjacent to the cell +** to be inserted. If seekResult<0 then pCur points to a cell that is +** smaller then (pKey,nKey). If seekResult>0 then pCur points to a cell +** that is larger than (pKey,nKey). +** +** If seekResult==0, that means pCur is pointing at some unknown location. +** In that case, this routine must seek the cursor to the correct insertion +** point for (pKey,nKey) before doing the insertion. For index btrees, +** if pX->nMem is non-zero, then pX->aMem contains pointers to the unpacked +** key values and pX->aMem can be used instead of pX->pKey to avoid having +** to decode the key. +*/ +SQLITE_PRIVATE int sqlite3HctBtreeInsert( + BtCursor *pCursor, /* Insert data into the table of this cursor */ + const BtreePayload *pX, /* Content of the row to be inserted */ + int flags, /* True if this is likely an append */ + int seekResult /* Result of prior MovetoUnpacked() call */ +){ + HBtCursor *const pCur = (HBtCursor*)pCursor; + HctTreeCsr *pTreeCsr = pCur->pHctTreeCsr; + int rc = SQLITE_OK; + UnpackedRecord r; + UnpackedRecord *pRec = 0; + const u8 *aData; + int nData; + int nZero; + i64 iKey = 0; + int bMigrate = pCur->pBtree->config.db->bHctMigrate; + + hctBtreeClearIsLast(pCur->pBtree, pCur); + if( pX->pKey ){ + aData = pX->pKey; + nData = pX->nKey; + nZero = 0; + if( pX->nMem ){ + memset(&r, 0, sizeof(r)); + r.pKeyInfo = pCur->pKeyInfo; + r.aMem = pX->aMem; + r.nField = pX->nMem; + pRec = &r; + }else{ + pRec = sqlite3VdbeAllocUnpackedRecord(pCur->pKeyInfo); + if( pRec==0 ) return SQLITE_NOMEM_BKPT; + sqlite3VdbeRecordUnpack(pCur->pKeyInfo, nData, aData, pRec); + } + iKey = 0; + }else{ + aData = pX->pData; + nData = pX->nData; + nZero = pX->nZero; + iKey = pX->nKey; + } + + if( CSR_IS_MIGRATE(pCur) ){ + assert( nZero==0 ); + rc = hctBtreeMigrateInsert(pCur, pRec, iKey, nData, aData); + }else{ + if( pCur->isLast && seekResult<0 ){ + rc = sqlite3HctTreeAppend( + pTreeCsr, pCur->pKeyInfo, iKey, nData, aData, nZero + ); + }else{ + rc = sqlite3HctTreeInsert(pTreeCsr, pRec, iKey, nData, aData, nZero); + pCur->isLast = 0; + } + } + + if( pRec && pRec!=&r ){ + sqlite3DbFree(pCur->pKeyInfo->db, pRec); + } + return rc; +} + +SQLITE_PRIVATE int sqlite3HctSchemaOp(Btree *pBt, const char *zSql){ + int rc = SQLITE_OK; + HBtree *const p = (HBtree*)pBt; + if( p->pHctJrnl ){ + HctTreeCsr *pCsr = 0; + + rc = sqlite3HctTreeCsrOpen(p->pHctTree, HCT_TREE_SCHEMAOP_ROOT, &pCsr); + if( rc==SQLITE_OK ){ + int nSql = sqlite3Strlen30(zSql); + i64 iRowid = 1; + sqlite3HctTreeCsrLast(pCsr); + if( sqlite3HctTreeCsrEof(pCsr)==0 ){ + sqlite3HctTreeCsrKey(pCsr, &iRowid); + iRowid++; + } + + rc = sqlite3HctTreeInsert(pCsr, 0, iRowid, nSql, (const u8*)zSql, 0); + sqlite3HctTreeCsrClose(pCsr); + } + } + return rc; +} + +/* +** Delete the entry that the cursor is pointing to. +** +** If the BTREE_SAVEPOSITION bit of the flags parameter is zero, then +** the cursor is left pointing at an arbitrary location after the delete. +** But if that bit is set, then the cursor is left in a state such that +** the next call to BtreeNext() or BtreePrev() moves it to the same row +** as it would have been on if the call to BtreeDelete() had been omitted. +** +** The BTREE_AUXDELETE bit of flags indicates that is one of several deletes +** associated with a single table entry and its indexes. Only one of those +** deletes is considered the "primary" delete. The primary delete occurs +** on a cursor that is not a BTREE_FORDELETE cursor. All but one delete +** operation on non-FORDELETE cursors is tagged with the AUXDELETE flag. +** The BTREE_AUXDELETE bit is a hint that is not used by this implementation, +** but which might be used by alternative storage engines. +*/ +SQLITE_PRIVATE int sqlite3HctBtreeDelete(BtCursor *pCursor, u8 flags){ + HBtCursor *const pCur = (HBtCursor*)pCursor; + int rc = SQLITE_OK; + + hctBtreeClearIsLast(pCur->pBtree, 0); + if( pCur->pHctDbCsr==0 ){ + rc = sqlite3HctTreeDelete(pCur->pHctTreeCsr); + }else if( pCur->pKeyInfo==0 ){ + i64 iKey = sqlite3HctBtreeIntegerKey((BtCursor*)pCur); + rc = sqlite3HctTreeDeleteKey(pCur->pHctTreeCsr, 0, iKey, 0, 0); + }else{ + u32 nKey; + const u8 *aKey = (u8*)sqlite3HctBtreePayloadFetch((BtCursor*)pCur, &nKey); + UnpackedRecord *pRec = sqlite3VdbeAllocUnpackedRecord(pCur->pKeyInfo); + + if( pRec==0 ){ + rc = SQLITE_NOMEM_BKPT; + }else{ + sqlite3VdbeRecordUnpack(pCur->pKeyInfo, nKey, aKey, pRec); + rc = sqlite3HctTreeDeleteKey(pCur->pHctTreeCsr, pRec, 0, nKey, aKey); + sqlite3DbFree(pCur->pBtree->config.db, pRec); + } + } + return rc; +} + +SQLITE_PRIVATE int sqlite3HctBtreeIdxDelete(BtCursor *pCursor, UnpackedRecord *pKey){ + HBtCursor *const pCur = (HBtCursor*)pCursor; + int rc = SQLITE_OK; + + hctBtreeClearIsLast(pCur->pBtree, 0); + if( pCur->pHctDbCsr ){ + u8 *aRec = 0; + int nRec = 0; + rc = sqlite3HctSerializeRecord(pKey, &aRec, &nRec); + if( rc==SQLITE_OK ){ + rc = sqlite3HctTreeDeleteKey(pCur->pHctTreeCsr, pKey, 0, nRec, aRec); + sqlite3_free(aRec); + } + }else{ + int res = 0; + rc = sqlite3HctTreeCsrSeek(pCur->pHctTreeCsr, pKey, 0, &res); + if( res==0 ){ + rc = sqlite3HctTreeDelete(pCur->pHctTreeCsr); + } + } + return rc; +} + +static int hctreeAddNewSchemaOp(HBtree *p, u32 iRoot, int eOp){ + BtSchemaOp *aSchemaOp; + + /* Grow the Btree.aSchemaOp array */ + assert( p->pHctDb ); + aSchemaOp = (BtSchemaOp*)sqlite3_realloc( + p->aSchemaOp, sizeof(BtSchemaOp)*(p->nSchemaOp+1) + ); + if( aSchemaOp==0 ) return SQLITE_NOMEM_BKPT; + + p->aSchemaOp = aSchemaOp; + p->aSchemaOp[p->nSchemaOp].pgnoRoot = iRoot; + p->aSchemaOp[p->nSchemaOp].iSavepoint = p->config.db->nSavepoint; + p->aSchemaOp[p->nSchemaOp].eSchemaOp = eOp; + p->nSchemaOp++; + + return SQLITE_OK; +} + +static int hctreeAddNewRoot(HBtree *p, u32 iRoot, int bIndex){ + int eOp = bIndex ? HCT_SCHEMAOP_CREATE_INDEX : HCT_SCHEMAOP_CREATE_INTKEY; + return hctreeAddNewSchemaOp(p, iRoot, eOp); +} + +/* +** Create a new BTree table. Write into *piTable the page +** number for the root page of the new table. +** +** The type of type is determined by the flags parameter. Only the +** following values of flags are currently in use. Other values for +** flags might not work: +** +** BTREE_INTKEY|BTREE_LEAFDATA Used for SQL tables with rowid keys +** BTREE_ZERODATA Used for SQL indices +*/ +SQLITE_PRIVATE int sqlite3HctBtreeCreateTable(Btree *pBt, Pgno *piTable, int flags){ + HBtree *const p = (HBtree*)pBt; + Pgno iNew = 0; + int rc = SQLITE_OK; + if( p->pHctDb ){ + rc = sqlite3HctDbRootNew(p->pHctDb, &iNew); + if( rc==SQLITE_OK ){ + rc = hctreeAddNewRoot(p, iNew, (flags & BTREE_INTKEY)==0); + } + }else{ + iNew = p->iNextRoot++; + } + *piTable = iNew; + return rc; +} + +/* +** Delete all information from a single table in the database. iTable is +** the page number of the root of the table. After this routine returns, +** the root page is empty, but still exists. +** +** This routine will fail with SQLITE_LOCKED if there are any open +** read cursors on the table. Open write cursors are moved to the +** root of the table. +** +** If pnChange is not NULL, then table iTable must be an intkey table. The +** integer value pointed to by pnChange is incremented by the number of +** entries in the table. +*/ +SQLITE_PRIVATE int sqlite3HctBtreeClearTable(Btree *pBt, int iTable, i64 *pnChange){ + HBtree *const p = (HBtree*)pBt; + int rc = SQLITE_OK; + KeyInfo *pKeyInfo = 0; + + rc = hctFindKeyInfo(p, iTable, &pKeyInfo); + if( rc==SQLITE_OK ){ + i64 nChange = 0; + BtCursor *pCsr = 0; + HctTreeCsr *pTreeCsr = 0; + UnpackedRecord *pRec = 0; + + if( pKeyInfo ){ + pRec = sqlite3VdbeAllocUnpackedRecord(pKeyInfo); + if( pRec==0 ) rc = SQLITE_NOMEM_BKPT; + } + pCsr = (BtCursor*)sqlite3HctMalloc(&rc, sizeof(HBtCursor)); + if( rc==SQLITE_OK ){ + rc = sqlite3HctBtreeCursor(pBt, iTable, 0, pKeyInfo, pCsr); + } + if( rc==SQLITE_OK ){ + rc = sqlite3HctTreeCsrOpen(p->pHctTree, iTable, &pTreeCsr); + } + + if( rc==SQLITE_OK ){ + int res = 0; + rc = sqlite3HctBtreeFirst(pCsr, &res); + if( res==0 ){ + while( rc==SQLITE_OK ){ + nChange++; + if( pKeyInfo ){ + const u8 *aData = 0; + u32 nData = 0; + aData = (const u8*)sqlite3HctBtreePayloadFetch(pCsr, &nData); + sqlite3VdbeRecordUnpack(pKeyInfo, nData, aData, pRec); + rc = sqlite3HctTreeDeleteKey(pTreeCsr, pRec, 0, nData, aData); + }else{ + i64 iKey = sqlite3HctBtreeIntegerKey((BtCursor*)pCsr); + rc = sqlite3HctTreeDeleteKey(pTreeCsr, 0, iKey, 0, 0); + } + rc = sqlite3HctBtreeNext(pCsr, 0); + } + if( rc==SQLITE_DONE ) rc = SQLITE_OK; + } + } + if( pnChange ) *pnChange = nChange; + + sqlite3KeyInfoUnref(pKeyInfo); + sqlite3HctBtreeCloseCursor(pCsr); + sqlite3HctTreeCsrClose(pTreeCsr); + sqlite3DbFree(p->config.db, pRec); + sqlite3_free(pCsr); + } + return rc; +} + +/* +** Delete all information from the single table that pCur is open on. +** +** This routine only work for pCur on an ephemeral table. +*/ +SQLITE_PRIVATE int sqlite3HctBtreeClearTableOfCursor(BtCursor *pCursor){ + HBtCursor *const pCur = (HBtCursor*)pCursor; + return sqlite3HctTreeClearOne( + pCur->pBtree->pHctTree, sqlite3HctTreeCsrRoot(pCur->pHctTreeCsr), 0 + ); +} + +/* +** Drop the table with root page iTable. Set (*piMoved) to 0 before +** returning. +*/ +SQLITE_PRIVATE int sqlite3HctBtreeDropTable(Btree *pBt, int iTable, int *piMoved){ + HBtree *const p = (HBtree*)pBt; + *piMoved = 0; + return hctreeAddNewSchemaOp(p, iTable, HCT_SCHEMAOP_DROP); +} + + +/* +** This function may only be called if the b-tree connection already +** has a read or write transaction open on the database. +** +** Read the meta-information out of a database file. Meta[0] +** is the number of free pages currently in the database. Meta[1] +** through meta[15] are available for use by higher layers. Meta[0] +** is read-only, the others are read/write. +** +** The schema layer numbers meta values differently. At the schema +** layer (and the SetCookie and ReadCookie opcodes) the number of +** free pages is not visible. So Cookie[0] is the same as Meta[1]. +** +** This routine treats Meta[BTREE_DATA_VERSION] as a special case. Instead +** of reading the value out of the header, it instead loads the "DataVersion" +** from the pager. The BTREE_DATA_VERSION value is not actually stored in the +** database file. It is a number computed by the pager. But its access +** pattern is the same as header meta values, and so it is convenient to +** read it from this routine. +*/ +SQLITE_PRIVATE void sqlite3HctBtreeGetMeta(Btree *pBt, int idx, u32 *pMeta){ + HBtree *const p = (HBtree*)pBt; + + assert( idx>=0 && idxpHctDb ); + if( idx==BTREE_DATA_VERSION ){ + /* TODO: Fix this so that the data_version does not change when the + ** database is written by the current connection. */ + i64 iSnapshot = sqlite3HctDbSnapshotId(p->pHctDb); + *pMeta = (u32)iSnapshot; + }else{ + if( p->eMetaState==HCT_METASTATE_NONE ){ + int rc = SQLITE_OK; + if( p->eTrans==SQLITE_TXN_NONE ){ + rc = sqlite3HctDbGetMeta( + p->pHctDb, (u8*)p->aMeta, SQLITE_N_BTREE_META*4 + ); + }else{ + int res = 0; + HBtCursor csr; + BtCursor *pCsr = (BtCursor*)&csr; + memset(&csr, 0, sizeof(csr)); + + sqlite3HctBtreeCursor(pBt, 2, 0, 0, pCsr); + rc = sqlite3HctBtreeTableMoveto(pCsr, 0, 0, &res); + assert( rc==SQLITE_OK ); + if( rc==SQLITE_OK && res==0 ){ + const void *aMeta = 0; + u32 nMeta = 0; + aMeta = sqlite3HctBtreePayloadFetch(pCsr, &nMeta); + memcpy(p->aMeta, aMeta, MAX(nMeta, SQLITE_N_BTREE_META*4)); + } + sqlite3HctBtreeCloseCursor(pCsr); + } + sqlite3HctJournalSchemaVersion( + p->pHctJrnl, &p->aMeta[BTREE_SCHEMA_VERSION] + ); + } + *pMeta = p->aMeta[idx]; + } +} + +/* +** Write meta-information back into the database. Meta[0] is +** read-only and may not be written. +*/ +SQLITE_PRIVATE int sqlite3HctBtreeUpdateMeta(Btree *pBt, int idx, u32 iMeta){ + HBtree *const p = (HBtree*)pBt; + u32 dummy; + sqlite3HctBtreeGetMeta((Btree*)p, 0, &dummy); + p->aMeta[idx] = iMeta; + return sqlite3HctTreeUpdateMeta( + p->pHctTree, (u8*)p->aMeta, SQLITE_N_BTREE_META*4 + ); +} + +static char *hctDbMPrintf(int *pRc, const char *zFormat, ...){ + char *zRet = 0; + if( *pRc==SQLITE_OK ){ + va_list ap; + va_start(ap, zFormat); + zRet = sqlite3_vmprintf(zFormat, ap); + va_end(ap); + if( !zRet ) *pRc = SQLITE_NOMEM_BKPT; + } + return zRet; +} + +SQLITE_PRIVATE int sqlite3HctBtreePragma(Btree *pBt, char **aFnctl){ + HBtree *const p = (HBtree*)pBt; + int rc = SQLITE_OK; + const char *zLeft = aFnctl[1]; + const char *zRight = aFnctl[2]; + char *zRet = 0; + + if( 0==sqlite3_stricmp("hct_ndbfile", zLeft) ){ + HctFile *pFile = sqlite3HctDbFile(p->pHctDb); + int iCurrent = 0; + int bFixed = 0; + if( zRight ){ + int iVal = sqlite3Atoi(zRight); + if( iVal<1 || iVal>HCT_MAX_NDBFILE ){ + rc = SQLITE_RANGE; + }else{ + p->config.nDbFile = iVal; + } + } + if( rc==SQLITE_OK ){ + iCurrent = sqlite3HctFileNFile(pFile, &bFixed); + if( bFixed==0 ) iCurrent = p->config.nDbFile; + zRet = hctDbMPrintf(&rc, "%d", iCurrent); + } + } + + else if( 0==sqlite3_stricmp("hct_try_before_unevict", zLeft) ){ + int iVal = 0; + if( zRight ){ + iVal = sqlite3Atoi(zRight); + } + if( iVal>0 ){ + p->config.nTryBeforeUnevict = iVal; + } + zRet = hctDbMPrintf(&rc, "%d", p->config.nTryBeforeUnevict); + } + else if( 0==sqlite3_stricmp("hct_npageset", zLeft) ){ + int iVal = 0; + if( zRight ){ + iVal = sqlite3Atoi(zRight); + } + if( iVal>0 ){ + p->config.nPageSet = iVal; + } + zRet = hctDbMPrintf(&rc, "%d", p->config.nPageSet); + } + else if( 0==sqlite3_stricmp("hct_ncasfail", zLeft) ){ + zRet = hctDbMPrintf(&rc, "%lld", sqlite3HctDbNCasFail(p->pHctDb)); + } + else if( p->pHctDb && 0==sqlite3_stricmp("hct_npagescan", zLeft) ){ + int iVal = 0; + if( zRight ){ + iVal = sqlite3Atoi(zRight); + } + if( iVal>0 ){ + p->config.nPageScan = iVal; + } + zRet = hctDbMPrintf(&rc, "%d", p->config.nPageScan); + } + else if( 0==sqlite3_stricmp("hct_quiescent_integrity_check", zLeft) ){ + int iVal = 0; + if( zRight ){ + iVal = sqlite3Atoi(zRight); + } + if( iVal>0 ){ + p->config.bQuiescentIntegrityCheck = (iVal==0 ? 0 : 1); + } + zRet = hctDbMPrintf(&rc, "%d", p->config.bQuiescentIntegrityCheck); + }else{ + rc = SQLITE_NOTFOUND; + } + + aFnctl[0] = zRet; + return rc; +} + +/* +** The first argument, pCur, is a cursor opened on some b-tree. Count the +** number of entries in the b-tree and write the result to *pnEntry. +** +** SQLITE_OK is returned if the operation is successfully executed. +** Otherwise, if an error is encountered (i.e. an IO error or database +** corruption) an SQLite error code is returned. +*/ +SQLITE_PRIVATE int sqlite3HctBtreeCount(sqlite3 *db, BtCursor *pCursor, i64 *pnEntry){ + HBtCursor *const pCur = (HBtCursor*)pCursor; + i64 nEntry = 0; + int dummy = 0; + int rc; + for(rc = sqlite3HctBtreeFirst((BtCursor*)pCur, &dummy); + rc==SQLITE_OK && 0==sqlite3HctBtreeEof((BtCursor*)pCur); + rc = sqlite3HctBtreeNext((BtCursor*)pCur, 0) + ){ + nEntry++; + } + *pnEntry = nEntry; + return SQLITE_OK; +} + +/* +** Return the pager associated with a BTree. This routine is used for +** testing and debugging only. +*/ +SQLITE_PRIVATE Pager *sqlite3HctBtreePager(Btree *pBt){ + HBtree *const p = (HBtree*)pBt; + return p->pFakePager; +} + +#ifndef SQLITE_OMIT_INTEGRITY_CHECK +/* +** This routine does a complete check of the given BTree file. aRoot[] is +** an array of pages numbers were each page number is the root page of +** a table. nRoot is the number of entries in aRoot. +** +** A read-only or read-write transaction must be opened before calling +** this function. +** +** Write the number of error seen in *pnErr. Except for some memory +** allocation errors, an error message held in memory obtained from +** malloc is returned if *pnErr is non-zero. If *pnErr==0 then NULL is +** returned. If a memory allocation error occurs, NULL is returned. +** +** If the first entry in aRoot[] is 0, that indicates that the list of +** root pages is incomplete. This is a "partial integrity-check". This +** happens when performing an integrity check on a single table. The +** zero is skipped, of course. But in addition, the freelist checks +** and the checks to make sure every page is referenced are also skipped, +** since obviously it is not possible to know which pages are covered by +** the unverified btrees. Except, if aRoot[1] is 1, then the freelist +** checks are still performed. +*/ +SQLITE_PRIVATE int sqlite3HctBtreeIntegrityCheck( + sqlite3 *db, /* Database connection that is running the check */ + Btree *pBt, /* The btree to be checked */ + Pgno *aRoot, /* An array of root pages numbers for individual trees */ + Mem *aCnt, + int nRoot, /* Number of entries in aRoot[] */ + int mxErr, /* Stop reporting errors after this many */ + int *pnErr, /* Write number of errors seen to this variable */ + char **pzErr +){ + HBtree *const p = (HBtree*)pBt; + char *zRet = 0; /* Return value */ + *pnErr = 0; + int ii; + for(ii=0; iiconfig.bQuiescentIntegrityCheck && nRoot>0 && aRoot[0]!=0 ){ + zRet = sqlite3HctDbIntegrityCheck(p->pHctDb, aRoot, aCnt, nRoot, pnErr); + assert( zRet==0 || (*pnErr)>0 ); + } + *pzErr = zRet; + return 0; +} +#endif /* SQLITE_OMIT_INTEGRITY_CHECK */ + +/* +** Return the full pathname of the underlying database file. Return +** an empty string if the database is in-memory or a TEMP database. +** +** The pager filename is invariant as long as the pager is +** open so it is safe to access without the BtShared mutex. +*/ +SQLITE_PRIVATE const char *sqlite3HctBtreeGetFilename(Btree *p){ + return 0; +} + +/* +** Return the pathname of the journal file for this database. The return +** value of this routine is the same regardless of whether the journal file +** has been created or not. +** +** The pager journal filename is invariant as long as the pager is +** open so it is safe to access without the BtShared mutex. +*/ +SQLITE_PRIVATE const char *sqlite3HctBtreeGetJournalname(Btree *p){ + return 0; +} + +/* +** Return one of SQLITE_TXN_NONE, SQLITE_TXN_READ, or SQLITE_TXN_WRITE +** to describe the current transaction state of Btree p. +*/ +SQLITE_PRIVATE int sqlite3HctBtreeTxnState(Btree *pBt){ + HBtree *const p = (HBtree*)pBt; + return p ? p->eTrans : SQLITE_TXN_NONE; +} + +#ifndef SQLITE_OMIT_WAL +/* +** Run a checkpoint on the Btree passed as the first argument. +** +** Return SQLITE_LOCKED if this or any other connection has an open +** transaction on the shared-cache the argument Btree is connected to. +** +** Parameter eMode is one of SQLITE_CHECKPOINT_PASSIVE, FULL or RESTART. +*/ +SQLITE_PRIVATE int sqlite3HctBtreeCheckpoint(Btree *p, int eMode, int *pnLog, int *pnCkpt){ + return SQLITE_OK; +} +#endif + +/* +** Return true if there is currently a backup running on Btree p. +*/ +SQLITE_PRIVATE int sqlite3HctBtreeIsInBackup(Btree *p){ + return 0; +} + +/* +** This function returns a pointer to a blob of memory associated with +** a single shared-btree. The memory is used by client code for its own +** purposes (for example, to store a high-level schema associated with +** the shared-btree). The btree layer manages reference counting issues. +** +** The first time this is called on a shared-btree, nBytes bytes of memory +** are allocated, zeroed, and returned to the caller. For each subsequent +** call the nBytes parameter is ignored and a pointer to the same blob +** of memory returned. +** +** If the nBytes parameter is 0 and the blob of memory has not yet been +** allocated, a null pointer is returned. If the blob has already been +** allocated, it is returned as normal. +** +** Just before the shared-btree is closed, the function passed as the +** xFree argument when the memory allocation was made is invoked on the +** blob of allocated memory. The xFree function should not call sqlite3_free() +** on the memory, the btree layer does that. +*/ +SQLITE_PRIVATE void *sqlite3HctBtreeSchema(Btree *pBt, int nBytes, void(*xFree)(void *)){ + HBtree *const p = (HBtree*)pBt; + void *pRet = 0; + if( p->pSchema ){ + pRet = p->pSchema; + }else if( nBytes>0 ){ + pRet = p->pSchema = sqlite3_malloc(nBytes); + if( pRet ){ + memset(pRet, 0, nBytes); + p->xSchemaFree = xFree; + } + } + return pRet; +} + +/* +** Return SQLITE_LOCKED_SHAREDCACHE if another user of the same shared +** btree as the argument handle holds an exclusive lock on the +** sqlite_schema table. Otherwise SQLITE_OK. +*/ +SQLITE_PRIVATE int sqlite3HctBtreeSchemaLocked(Btree *p){ + return SQLITE_OK; +} + +SQLITE_PRIVATE HctDatabase *sqlite3HctDbFind(sqlite3 *db, int iDb){ + Btree *pBt = db->aDb[iDb].pBt; + return sqlite3IsHct(pBt) ? ((HBtree*)pBt)->pHctDb : 0; +} +SQLITE_PRIVATE HctJournal *sqlite3HctJrnlFind(sqlite3 *db){ + Btree *pBt = db->aDb[0].pBt; + return sqlite3IsHct(pBt) ? ((HBtree*)pBt)->pHctJrnl : 0; +} + +#ifndef SQLITE_OMIT_SHARED_CACHE +/* +** Obtain a lock on the table whose root page is iTab. The +** lock is a write lock if isWritelock is true or a read lock +** if it is false. +*/ +SQLITE_PRIVATE int sqlite3HctBtreeLockTable(Btree *p, int iTab, u8 isWriteLock){ + int rc = SQLITE_OK; + assert( 0 ); + return rc; +} +#endif + +#ifndef SQLITE_OMIT_INCRBLOB +/* +** Argument pCur must be a cursor opened for writing on an +** INTKEY table currently pointing at a valid table entry. +** This function modifies the data stored as part of that entry. +** +** Only the data content may only be modified, it is not possible to +** change the length of the data stored. If this function is called with +** parameters that attempt to write past the end of the existing data, +** no modifications are made and SQLITE_CORRUPT is returned. +*/ +SQLITE_PRIVATE int sqlite3HctBtreePutData(BtCursor *pCur, u32 offset, u32 amt, void *z){ + HBtCursor *pCsr = (HBtCursor*)pCur; + int rc = SQLITE_OK; + + if( pCsr->wrFlag==0 ){ + rc = SQLITE_READONLY; + }else{ + rc = hctReseekBlobCsr(pCsr); + } + if( rc==SQLITE_OK ){ + u32 nData = 0; + const void *aData = sqlite3HctBtreePayloadFetch(pCur, &nData); + if( offset+amt>nData ){ + rc = SQLITE_CORRUPT_BKPT; + }else{ + u8 *aBuf = (u8*)sqlite3_malloc(nData+1); + if( aBuf ){ + BtreePayload payload; + memcpy(aBuf, aData, nData); + memcpy(&aBuf[offset], z, amt); + + memset(&payload, 0, sizeof(payload)); + payload.nKey = sqlite3HctBtreeIntegerKey(pCur); + payload.pData = (const void*)aBuf; + payload.nData = nData; + rc = sqlite3HctBtreeInsert(pCur, &payload, 0, 0); + if( rc==SQLITE_OK ){ + int dummy = 0; + rc = sqlite3HctBtreeTableMoveto(pCur, payload.nKey, 0, &dummy); + assert( dummy==0 ); + } + sqlite3_free(aBuf); + }else{ + rc = SQLITE_NOMEM; + } + } + } + + return rc; +} + +/* +** Mark this cursor as an incremental blob cursor. +*/ +SQLITE_PRIVATE void sqlite3HctBtreeIncrblobCursor(BtCursor *pCur){ + HBtCursor *pCsr = (HBtCursor*)pCur; + sqlite3HctTreeCsrIncrblob(pCsr->pHctTreeCsr); +} +#endif + +/* +** Set both the "read version" (single byte at byte offset 18) and +** "write version" (single byte at byte offset 19) fields in the database +** header to iVersion. +*/ +SQLITE_PRIVATE int sqlite3HctBtreeSetVersion(Btree *pBtree, int iVersion){ + assert( 0 ); + return SQLITE_OK; +} + +/* +** Return true if the cursor has a hint specified. This routine is +** only used from within assert() statements +*/ +SQLITE_PRIVATE int sqlite3HctBtreeCursorHasHint(BtCursor *pCsr, unsigned int mask){ + return 0; +} + +/* +** Return true if the given Btree is read-only. +*/ +SQLITE_PRIVATE int sqlite3HctBtreeIsReadonly(Btree *p){ + return 0; +} + +#if !defined(SQLITE_OMIT_SHARED_CACHE) +/* +** Return true if the Btree passed as the only argument is sharable. +*/ +SQLITE_PRIVATE int sqlite3HctBtreeSharable(Btree *p){ + assert( 0 ); + return 0; +} + +/* +** Return the number of connections to the BtShared object accessed by +** the Btree handle passed as the only argument. For private caches +** this is always 1. For shared caches it may be 1 or greater. +*/ +SQLITE_PRIVATE int sqlite3HctBtreeConnectionCount(Btree *p){ + assert( 0 ); + return 1; +} +#endif + +SQLITE_PRIVATE int sqlite3HctBtreeExclusiveLock(Btree *p){ + return SQLITE_OK; +} + +SQLITE_PRIVATE int sqlite3HctBtreeTransferRow(BtCursor *p1, BtCursor *p2, i64 iKey){ + assert( 0 ); + return SQLITE_LOCKED; +} + +SQLITE_PRIVATE int sqlite3HctLockedErr(u32 pgno, const char *zReason){ + return SQLITE_LOCKED; +} + +SQLITE_PRIVATE i64 sqlite3HctMainStats(sqlite3 *db, int iStat, const char **pzStat){ + Btree *pBt = db->aDb[0].pBt; + + i64 iRet = 0; + + if( sqlite3IsHct(pBt) ){ + HBtree *pHct = (HBtree*)pBt; + switch( iStat ){ + case 0: + *pzStat = "nretry"; + iRet = pHct->stats.nRetry; + break; + case 1: + *pzStat = "nretrykey"; + iRet = pHct->stats.nRetryKey; + break; + case 2: + *pzStat = "nkeyop"; + iRet = pHct->stats.nKeyOp; + break; + } + } + + return iRet; +} + + +#endif /* SQLITE_ENABLE_HCT */ + +/************** End of hctree.c **********************************************/ +/************** Begin file hct_tree.c ****************************************/ +/* +** 2020 September 24 +** +** The author disclaims copyright to this source code. In place of +** a legal notice, here is a blessing: +** +** May you do good and not evil. +** May you find forgiveness for yourself and forgive others. +** May you share freely, never taking more than you give. +** +************************************************************************* +*/ + + +/* #include "hctInt.h" */ +/* #include */ +/* #include */ + +#define HCT_TREE_MAX_DEPTH 60 + +typedef struct HctTreeNode HctTreeNode; +typedef struct HctTreeRoot HctTreeRoot; + +struct HctTree { + int nRootHash; + int nRootEntry; + HctTreeRoot **apRootHash; + HctTreeNode *pRollback; /* List of rollback list items */ + HctTreeNode **apStmt; /* Array of open statement transactions */ + int nStmt; /* Allocated size of apStmt[] */ + int iStmt; /* Current entry in apStmt (-1 == none) */ +}; + +/* +** pReseek: +** Set to non-NULL if the cursor was disrupted by a write. The cursor +** should be seeked to the key in node pReseek. +*/ +struct HctTreeCsr { + HctTree *pTree; + HctTreeRoot *pRoot; + u8 bPin; /* True if cursor is pinned */ + u8 eIncrblob; /* Incrblob cursor state */ + i64 iSeekRowid; /* Last rowid value seeked to */ + int iSkip; /* -ve -> skip Prev(), +ve -> skip Next() */ + int iNode; /* Current depth */ + HctTreeNode *apNode[HCT_TREE_MAX_DEPTH]; + HctTreeNode *pReseek; + HctTreeCsr *pCsrNext; /* Next item in HctTreeRoot.pCsrList list */ +}; + +#define TREE_INCRBLOB_NONE 0 +#define TREE_INCRBLOB_READY 1 +#define TREE_INCRBLOB_ABORT 2 + +struct HctTreeNode { + i64 iKey; /* 64-bit key for this node */ + u8 bBlack; /* 1 for black node, 0 for red node */ + u8 nRef; /* Number of pointers to this node */ + u8 bDelete; /* True if this is a delete key */ + int nData; /* Size of aData[] in bytes */ + u8 *aData; /* Pointer to associated data (or NULL) */ + u32 iRoot; /* Root id of table this node belongs to */ + HctTreeNode *pLeft; /* Left child in tree */ + HctTreeNode *pRight; /* Right child in tree */ + + /* Rollback list related variables */ + HctTreeNode *pPrev; /* Previous entry in rollback list */ + HctTreeNode *pClobber; /* If non-NULL, entry this one clobbered */ +}; + +static HctTreeNode hctTreeGlobalEofNode; +#define TREE_RESEEK_EOF (&hctTreeGlobalEofNode) + +/* +** pCsrCache: +** List of unused cursor objects for this table/index. +*/ +struct HctTreeRoot { + u32 iRoot; /* Name of this tree structure */ + KeyInfo *pKeyInfo; + HctTreeNode *pNode; /* Root node of tree (or NULL) */ + HctTreeRoot *pHashNext; /* Next entry in hash-chain */ + HctTreeCsr *pCsrList; /* Cursors open on this tree */ + HctTreeCsr *pCsrCache; /* Cache of unused cursor objects */ +}; + +/* +** Allocate and return nByte bytes of zeroed memory. +*/ +static void *hctMallocZero(int nByte){ + void *pNew = sqlite3_malloc(nByte); + if( pNew ){ + memset(pNew, 0, nByte); + } + return pNew; +} + +SQLITE_PRIVATE int sqlite3HctTreeNew(HctTree **ppTree){ + HctTree *pNew; + int rc = SQLITE_OK; + + pNew = (HctTree*)hctMallocZero(sizeof(HctTree)); + if( pNew ){ + pNew->apRootHash = (HctTreeRoot**)hctMallocZero(sizeof(HctTreeRoot*)*16); + pNew->nRootHash = 16; + } + if( pNew==0 || pNew->apRootHash==0 ){ + sqlite3_free(pNew); + rc = SQLITE_NOMEM_BKPT; + } + + *ppTree = pNew; + return rc; +} + +static void treeNodeUnref(HctTreeNode *pNode){ + if( pNode!=TREE_RESEEK_EOF ){ + assert( pNode->nRef>0 ); + pNode->nRef--; + if( pNode->nRef==0 ){ + sqlite3_free(pNode); + } + } +} + +static void hctTreeFreeNode(HctTreeNode *pNode){ + if( pNode ){ + hctTreeFreeNode(pNode->pLeft); + hctTreeFreeNode(pNode->pRight); + assert( pNode->nRef==1 ); + treeNodeUnref(pNode); + } +} + +SQLITE_PRIVATE void sqlite3HctTreeFree(HctTree *pTree){ + if( pTree ){ + int i; + sqlite3HctTreeRelease(pTree, 0); + assert( pTree->pRollback==0 ); + for(i=0; inRootHash; i++){ + while( pTree->apRootHash[i] ){ + HctTreeRoot *p = pTree->apRootHash[i]; + HctTreeCsr *pCsr = p->pCsrCache; + sqlite3KeyInfoUnref(p->pKeyInfo); + pTree->apRootHash[i] = p->pHashNext; + while( pCsr ){ + HctTreeCsr *pNext = pCsr->pCsrNext; + sqlite3_free(pCsr); + pCsr = pNext; + } + hctTreeFreeNode(p->pNode); + sqlite3_free(p); + } + } + sqlite3_free(pTree->apRootHash); + sqlite3_free(pTree->apStmt); + sqlite3_free(pTree); + } +} + +#ifdef SQLITE_DEBUG +/* #include */ +static void hct_print_subtree2(HctTreeNode *pNode, char *aPrefix){ + if( pNode ){ + int n = strlen(aPrefix); + fprintf(stdout, "%-8s %s k=%lld\n", + aPrefix, pNode->bBlack ? "BLACK" : "RED ", pNode->iKey + ); + aPrefix[n] = 'L'; + hct_print_subtree2(pNode->pLeft, aPrefix); + aPrefix[n] = 'R'; + hct_print_subtree2(pNode->pRight, aPrefix); + aPrefix[n] = '\0'; + } +} +static void hct_print_subtree(HctTreeNode *pNode){ + if( pNode ){ + char aPrefix[64]; + memset(aPrefix, 0, sizeof(aPrefix)); + hct_print_subtree2(pNode, aPrefix); + fflush(stdout); + } +} + +/* +** To be used as: +** +** assert( hct_tree_check(pTree) ) +** +** An assert() fails if any of the following tree properties are violated: +** +** 1. Root node must be black. +** 2. A red node may not have a red parent. +** 3. Every path from root to NULL passes through the same number +** of black nodes. +*/ +static void hct_tree_check_subtree(HctTreeNode *pNode, int nDepth, int nExpect){ + if( pNode ){ + int nThisDepth = nDepth; + if( pNode->bBlack ){ + nThisDepth++; + }else{ + /* Property 2 - red parents have black children */ + assert( pNode->pLeft==0 || pNode->pLeft->bBlack ); + assert( pNode->pRight==0 || pNode->pRight->bBlack ); + } + + /* Property 3 - Every path from root to NULL has same black-depth */ + assert( (pNode->pLeft && pNode->pRight) || nThisDepth==nExpect ); + + hct_tree_check_subtree(pNode->pLeft, nThisDepth, nExpect); + hct_tree_check_subtree(pNode->pRight, nThisDepth, nExpect); + } + hct_print_subtree(0); /* no-op - just to avoid a warning */ +} +static int hct_tree_check(HctTreeRoot *pRoot){ + if( 0 && pRoot->pNode ){ + int nBlack = 0; + HctTreeNode *pNode = 0; + assert( pRoot->pNode->bBlack ); /* 1. Root is black */ + + /* Calculate the expected number of black nodes between root and NULL. */ + for(pNode=pRoot->pNode; pNode; pNode=pNode->pLeft){ + if( pNode->bBlack ) nBlack++; + } + + hct_tree_check_subtree(pRoot->pNode, 0, nBlack); + } + return 1; +} +#endif + +static HctTreeRoot *hctTreeFindRoot(HctTree *pTree, u32 iRoot){ + HctTreeRoot *pNew = 0; + + /* Search the hash table for an existing root. Return immediately if + ** one is found. */ + HctTreeRoot *pRoot; + for(pRoot = pTree->apRootHash[iRoot % pTree->nRootHash]; + pRoot; + pRoot=pRoot->pHashNext + ){ + if( pRoot->iRoot==iRoot ) return pRoot; + } + + /* If the hash table needs to grow, do that now */ + if( (pTree->nRootEntry+1)*2 > pTree->nRootHash ){ + int ii; + int nOld = pTree->nRootHash; + int nNew = nOld ? nOld*2 : 16; + HctTreeRoot **apNew = (HctTreeRoot**)sqlite3_realloc( + pTree->apRootHash, nNew*sizeof(HctTreeRoot*) + ); + if( apNew==0 ) return 0; + memset(&apNew[nOld], 0, (nNew-nOld)*sizeof(HctTreeRoot*)); + + for(ii=0; iipHashNext; + int iHash = p->iRoot % nNew; + p->pHashNext = apNew[iHash]; + apNew[iHash] = p; + p = pNext; + } + } + + pTree->apRootHash = apNew; + pTree->nRootHash = nNew; + } + + /* Allocate a new root and add it to the hash table */ + pNew = hctMallocZero(sizeof(HctTreeRoot)); + if( pNew ){ + int iHash = iRoot % pTree->nRootHash; + pNew->iRoot = iRoot; + pNew->pHashNext = pTree->apRootHash[iHash]; + pTree->apRootHash[iHash] = pNew; + pTree->nRootEntry++; + } + + return pNew; +} + +static void leftRotate(HctTreeNode **pp){ + HctTreeNode *pG = *pp; + HctTreeNode *pRight = pG->pRight; + + pG->pRight = pRight->pLeft; + pRight->pLeft = pG; + *pp = pRight; +} + +static void rightRotate(HctTreeNode **pp){ + HctTreeNode *pG = *pp; + HctTreeNode *pLeft = pG->pLeft; + + pG->pLeft = pLeft->pRight; + pLeft->pRight = pG; + *pp = pLeft; +} + +static HctTreeNode **hctTreeFindPointer(HctTreeCsr *pCsr, int iNode){ + HctTreeNode **pp; + if( iNode==0 ){ + assert( pCsr->apNode[0]==pCsr->pRoot->pNode ); + pp = &pCsr->pRoot->pNode; + }else{ + HctTreeNode *pParent = pCsr->apNode[iNode-1]; + if( pParent->pLeft==pCsr->apNode[iNode] ){ + pp = &pParent->pLeft; + }else{ + assert( pParent->pRight==pCsr->apNode[iNode] ); + pp = &pParent->pRight; + } + } + return pp; +} + +static void hctTreeFixInsert( + HctTree *pTree, + HctTreeCsr *pCsr, + HctTreeNode *pX +){ + HctTreeNode *pP = pCsr->apNode[pCsr->iNode]; + HctTreeNode *pG = pCsr->apNode[pCsr->iNode-1]; + HctTreeNode *pU; + + assert( pCsr->iNode>=1 ); + + if( pG->pLeft==pP ){ + pU = pG->pRight; + }else{ + pU = pG->pLeft; + } + + if( pU && pU->bBlack==0 ){ + /* Uncle of X is red */ + pP->bBlack = 1; + pU->bBlack = 1; + if( pCsr->iNode>1 ){ + pG->bBlack = 0; + if( pCsr->apNode[pCsr->iNode-2]->bBlack==0 ){ + pCsr->iNode -= 2; + hctTreeFixInsert(pTree, pCsr, pG); + } + } + }else{ + /* Uncle of X is black */ + int iCase = ((pG->pRight==pP) ? 2 : 0) + (pP->pRight==pX ? 1 : 0); + HctTreeNode **ppG = hctTreeFindPointer(pCsr, pCsr->iNode-1); + + switch( iCase ){ + case 1: /* left/right */ + leftRotate(&pG->pLeft); + pP = pX; + /* fall-through */ + case 0: /* left/left */ + rightRotate(ppG); + pP->bBlack = 1; + pG->bBlack = 0; + break; + case 2: /* right/left */ + rightRotate(&pG->pRight); + pP = pX; + /* fall-through */ + case 3: /* right/right */ + leftRotate(ppG); + pP->bBlack = 1; + pG->bBlack = 0; + break; + default: + assert( 0 ); + } + } +} + +static int hctSaveCursors( + HctTreeRoot *pRoot, + HctTreeCsr *pExcept, + int bAbortBlob, + i64 iRowid +){ + int rc = SQLITE_OK; + HctTreeCsr *pCsr; + for(pCsr=pRoot->pCsrList; pCsr; pCsr=pCsr->pCsrNext){ + if( pCsr!=pExcept && pCsr->pReseek==0 ){ + if( pCsr->iNode>=0 ){ + if( pCsr->bPin ){ + return SQLITE_CONSTRAINT_PINNED; + } + pCsr->pReseek = pCsr->apNode[pCsr->iNode]; + pCsr->pReseek->nRef++; + }else{ + pCsr->pReseek = TREE_RESEEK_EOF; + } + } + if( bAbortBlob + && pCsr->eIncrblob==TREE_INCRBLOB_READY + && pCsr->iSeekRowid==iRowid + ){ + pCsr->eIncrblob = TREE_INCRBLOB_ABORT; + } + } + return rc; +} + + +static int hctTreeCsrSeekInt( + HctTreeCsr *pCsr, + i64 iKey, + int *pRes +){ + int rc = SQLITE_OK; /* Return code */ + int res = -1; /* Value to return via *pRes */ + HctTreeNode *pNode = pCsr->pRoot->pNode; + pCsr->iNode = -1; + while( pNode ){ + i64 iNodeKey = pNode->iKey; + pCsr->apNode[++pCsr->iNode] = pNode; + if( iNodeKey==iKey ){ + res = 0; + break; + } + if( iKeypLeft; + }else{ + res = -1; + pNode = pNode->pRight; + } + assert( pCsr->iNodepRoot->pNode; + pCsr->iNode = -1; + while( pNode ){ + pCsr->apNode[++pCsr->iNode] = pNode; + res = sqlite3VdbeRecordCompare(pNode->nData, pNode->aData, pRec); + if( res==0 ) break; + if( res>0 ){ + /* pRec is smaller than this node's key. Go left. */ + pNode = pNode->pLeft; + }else{ + /* pRec is larger than this node's key. Go left. */ + pNode = pNode->pRight; + } + assert( pCsr->iNodepRoot->pKeyInfo==0 ){ + pCsr->pRoot->pKeyInfo = sqlite3KeyInfoRef(pRec->pKeyInfo); + } + + if( pRes ) *pRes = res; + return rc; +} + +static int hctTreeCsrSeekPacked( + HctTreeCsr *pCsr, + int nKey, + const u8 *aKey, + int *pRes +){ + int rc; + KeyInfo *pKeyInfo = pCsr->pRoot->pKeyInfo; + UnpackedRecord *pRec; + + assert( pKeyInfo ); + pRec = sqlite3VdbeAllocUnpackedRecord(pKeyInfo); + if( pRec ){ + sqlite3VdbeRecordUnpack(pKeyInfo, nKey, aKey, pRec); + rc = hctTreeCsrSeekUnpacked(pCsr, pRec, pRes); + sqlite3DbFree(pKeyInfo->db, pRec); + }else{ + rc = SQLITE_NOMEM; + } + return rc; +} + + +static int hctRestoreCursor(HctTreeCsr *pCsr, int *pRes){ + int rc = SQLITE_OK; + HctTreeNode *pReseek = pCsr->pReseek; + if( pReseek ){ + if( pReseek!=TREE_RESEEK_EOF ){ + if( pCsr->pRoot->pKeyInfo ){ + rc = hctTreeCsrSeekPacked(pCsr, pReseek->nData, pReseek->aData, pRes); + }else{ + rc = hctTreeCsrSeekInt(pCsr, pReseek->iKey, pRes); + } + treeNodeUnref(pReseek); + } + pCsr->pReseek = 0; + }else{ + *pRes = 0; + } + return rc; +} + +static void hctRestoreDiscard(HctTreeCsr *pCsr){ + if( pCsr->pReseek ){ + treeNodeUnref(pCsr->pReseek); + pCsr->pReseek = 0; + pCsr->iNode = -1; + } + pCsr->iSkip = 0; +} + +static int treeInsertNode( + HctTree *pTree, + int bRollback, + UnpackedRecord *pKey, + i64 iKey, + HctTreeNode *pNew +){ + HctTreeRoot *pRoot = hctTreeFindRoot(pTree, pNew->iRoot); + UnpackedRecord *pFree = 0; + int res = 0; + HctTreeCsr csr; + memset(&csr, 0, sizeof(csr)); + csr.pRoot = pRoot; + csr.pTree = pTree; + + /* Special case. If this insert is to effect a rollback on an index + ** tree, pKey will still be NULL. In this case construct a pKey value + ** with which to do the seek. */ + if( pRoot->pKeyInfo && pKey==0 ){ + assert( bRollback ); + pFree = sqlite3VdbeAllocUnpackedRecord(pRoot->pKeyInfo); + if( pFree==0 ){ + return SQLITE_NOMEM; + } + sqlite3VdbeRecordUnpack(pRoot->pKeyInfo, pNew->nData, pNew->aData, pFree); + pKey = pFree; + } + + sqlite3HctTreeCsrSeek(&csr, pKey, iKey, &res); + if( csr.iNode<0 ){ + assert( pRoot->pNode==0 ); + pRoot->pNode = pNew; + }else{ + HctTreeNode *pNode = csr.apNode[csr.iNode]; + if( res==0 ){ + pNew->pLeft = pNode->pLeft; + pNew->pRight = pNode->pRight; + pNew->bBlack = pNode->bBlack; + *(hctTreeFindPointer(&csr, csr.iNode)) = pNew; + if( bRollback==0 && pTree->iStmt>=0 ){ + pNew->pClobber = pNode; + assert( pNew->iKey==pNode->iKey ); + }else{ + treeNodeUnref(pNode); + } + }else{ + if( res<0 ){ + assert( pNode->pRight==0 ); + pNode->pRight = pNew; + }else{ + assert( pNode->pLeft==0 ); + pNode->pLeft = pNew; + } + if( pNode->bBlack==0 ){ + hctTreeFixInsert(pTree, &csr, pNew); + } + } + } + pNew->nRef++; + + /* Root node is always black */ + pRoot->pNode->bBlack = 1; + assert( hct_tree_check(pRoot) ); + if( pFree ){ + sqlite3DbFree(pFree->pKeyInfo->db, pFree); + } + return SQLITE_OK; +} + +static HctTreeNode *treeNewNode2( + HctTree *pTree, + HctTreeRoot *pRoot, + i64 iKey, + int bDelete, + int nData, + const u8 *aData, + int nZero +){ + HctTreeNode *pNew; + + pNew = (HctTreeNode*)hctMallocZero(sizeof(HctTreeNode) + nData + nZero); + if( pNew ){ + pNew->iKey = iKey; + pNew->nData = nData + nZero; + pNew->iRoot = pRoot->iRoot; + pNew->bDelete = bDelete; + if( (nData+nZero)>0 ){ + pNew->aData = (u8*)&pNew[1]; + memcpy(pNew->aData, aData, nData); + } + + if( pTree->iStmt>0 ){ + pNew->pPrev = pTree->pRollback; + pTree->pRollback = pNew; + pNew->nRef = 1; + } + } + + return pNew; +} + +/* +** Allocate a new tree node. Link it into the rollback list. +*/ +static HctTreeNode *treeNewNode( + HctTreeCsr *pCsr, + i64 iKey, + int bDelete, + int nData, + const u8 *aData, + int nZero +){ + return treeNewNode2( + pCsr->pTree, pCsr->pRoot, iKey, bDelete, nData, aData, nZero + ); +} + +static int treeInsert( + HctTreeCsr *pCsr, + UnpackedRecord *pKey, + i64 iKey, + int bDelete, + int nData, + const u8 *aData, + int nZero +){ + HctTree *pTree = pCsr->pTree; + HctTreeNode *pNew; + int rc = SQLITE_OK; + + assert( bDelete==0 || pKey || (aData==0 && nData==0 && nZero==0) ); + + pNew = treeNewNode(pCsr, iKey, bDelete, nData, aData, nZero); + if( pNew==0 ){ + rc = SQLITE_NOMEM; + }else{ + int nSave = 0; + int bPinSave = pCsr->bPin; + if( pKey ){ + nSave = pKey->nField; + sqlite3HctDbRecordTrim(pKey); + } + pCsr->bPin = 0; + rc = hctSaveCursors(pCsr->pRoot, 0, (pCsr->eIncrblob==0), iKey); + if( rc==SQLITE_OK && bPinSave ){ + int dummy; + rc = hctRestoreCursor(pCsr, &dummy); + } + pCsr->bPin = bPinSave; + if( rc==SQLITE_OK ){ + rc = treeInsertNode(pTree, pTree->iStmt<=0, pKey, iKey, pNew); + } + if( pKey ) pKey->nField = nSave; + } + + return rc; +} + +SQLITE_PRIVATE int sqlite3HctTreeUpdateMeta( + HctTree *pTree, + const u8 *aMeta, /* Meta data */ + int nMeta /* Size of meta data in bytes */ +){ + HctTreeRoot *pRoot = hctTreeFindRoot(pTree, 2); + HctTreeNode *pNew = treeNewNode2(pTree, pRoot, 0, 0, nMeta, aMeta, 0); + treeInsertNode(pTree, pTree->iStmt<=0, 0, 0, pNew); + return SQLITE_OK; +} + +/* +** This function is like sqlite3HctTreeInsert(), except that: +** +** 1) the new key is always larger than any existing key in the +** tree, and +** +** 2) unless the tree is empty, cursor pCsr is guaranteed to point to the +** largest record in it, and +** +** 3) before returning, this function leaves cursor pCsr pointing to the +** new entry. +*/ +SQLITE_PRIVATE int sqlite3HctTreeAppend( + HctTreeCsr *pCsr, + KeyInfo *pKeyInfo, + i64 iKey, + int nData, + const u8 *aData, + int nZero +){ + HctTreeRoot *pRoot = pCsr->pRoot; + int rc = SQLITE_OK; + + assert( pCsr->pTree->iStmt>0 ); + + if( pKeyInfo && pRoot->pKeyInfo==0 ){ + pRoot->pKeyInfo = sqlite3KeyInfoRef(pKeyInfo); + } + + rc = hctSaveCursors(pRoot, pCsr, pCsr->eIncrblob==0, iKey); + if( rc==SQLITE_OK ){ + HctTreeNode *pNew = treeNewNode(pCsr, iKey, 0, nData, aData, nZero); + if( pNew==0 ){ + rc = SQLITE_NOMEM_BKPT; + }else{ + pNew->nRef++; + if( pRoot->pNode==0 ){ + pRoot->pNode = pNew; + pCsr->apNode[0] = pNew; + pCsr->iNode = 0; + }else{ + HctTreeNode *pParent = pCsr->apNode[pCsr->iNode]; + + assert( pCsr->iNode>=0 ); + assert( pParent->pRight==0 ); + pParent->pRight = pNew; + + if( pParent->bBlack==0 ){ + hctTreeFixInsert(pCsr->pTree, pCsr, pNew); + sqlite3HctTreeCsrLast(pCsr); + }else{ + pCsr->apNode[++pCsr->iNode] = pNew; + } + } + + /* Root node is always black */ + pRoot->pNode->bBlack = 1; + assert( hct_tree_check(pRoot) ); + } + } + + return rc; +} + +#if 0 +static void debug_write_op( + HctTreeCsr *pCsr, + const char *zOp, + UnpackedRecord *pKey, + i64 iKey, + int nData, + const u8 *aData +){ + printf("%s(%d) ", zOp, (int)pCsr->pRoot->iRoot); + if( pKey ){ + char *z = sqlite3HctDbRecordToText(0, aData, nData); + printf("[%s]\n", z); + }else{ + printf("%lld\n", iKey); + } + fflush(stdout); +} +#else +# define debug_write_op(r,s,w,x,y,z) +#endif + +SQLITE_PRIVATE int sqlite3HctTreeInsert( + HctTreeCsr *pCsr, + UnpackedRecord *pKey, + i64 iKey, + int nData, + const u8 *aData, + int nZero +){ + assert( pKey==0 || iKey==0 ); + debug_write_op(pCsr, "INSERT", pKey, iKey, nData, aData); + return treeInsert(pCsr, pKey, iKey, 0, nData, aData, nZero); +} + +SQLITE_PRIVATE int sqlite3HctTreeDeleteKey( + HctTreeCsr *pCsr, + UnpackedRecord *pKey, + i64 iKey, + int nData, + const u8 *aData +){ + debug_write_op(pCsr, "DELETE", pKey, iKey, nData, aData); + return treeInsert(pCsr, pKey, iKey, 1, nData, aData, 0); +} + +/* +** Cursor pCsr currently points at a double-black node. Fix it. +*/ +static void hctTreeFixDelete(HctTreeCsr *pCsr){ + assert( pCsr->iNode>0 || pCsr->pRoot->pNode->bBlack ); + if( pCsr->iNode>0 ){ + HctTreeNode *pDB; /* The double-black */ + HctTreeNode *pP; /* Parent of pDB */ + HctTreeNode *pS; /* Sibling of pDB */ + + pDB = pCsr->apNode[pCsr->iNode]; + pP = pCsr->apNode[pCsr->iNode-1]; + pS = pP->pLeft==pDB ? pP->pRight : pP->pLeft; + + if( pS->bBlack ){ + HctTreeNode *pR = 0; + if( pS->pLeft && pS->pLeft->bBlack==0 ){ + pR = pS->pLeft; + }else if( pS->pRight && pS->pRight->bBlack==0 ){ + pR = pS->pRight; + } + + if( pR ){ + /* Sibling is black, pR is a red child */ + HctTreeNode **ppP = hctTreeFindPointer(pCsr, pCsr->iNode-1); + int iCase = ((pP->pRight==pS) ? 2 : 0) + (pS->pRight==pR ? 1 : 0); + switch( iCase ){ + case 0: /* Left/Left */ + pR->bBlack = 1; + pS->bBlack = pP->bBlack; + rightRotate(ppP); + pP->bBlack = 1; + break; + case 1: /* Left/Right */ + leftRotate(&pP->pLeft); + rightRotate(ppP); + pR->bBlack = pP->bBlack; + pP->bBlack = 1; + break; + case 2: /* Right/Left */ + rightRotate(&pP->pRight); + leftRotate(ppP); + pR->bBlack = pP->bBlack; + pP->bBlack = 1; + break; + case 3: /* Right/Right */ + pR->bBlack = 1; + pS->bBlack = pP->bBlack; + leftRotate(ppP); + pP->bBlack = 1; + break; + } + }else{ + /* Sibling is black, with no red children. */ + pS->bBlack = 0; + if( pP->bBlack ){ + pCsr->iNode--; + hctTreeFixDelete(pCsr); + }else{ + pP->bBlack = 1; + } + } + }else{ + HctTreeNode **ppP = hctTreeFindPointer(pCsr, pCsr->iNode-1); + + /* Sibling is red. Because it is the red sibling of a double-black, it + ** must have children on both sides. And because it is red, both those + ** children must be black. */ + assert( pS->pLeft->bBlack && pS->pRight->bBlack ); + + if( pS==pP->pLeft ){ + rightRotate(ppP); + }else{ + leftRotate(ppP); + } + pS->bBlack = 1; + pP->bBlack = 0; + pCsr->apNode[pCsr->iNode-1] = pS; + pCsr->apNode[pCsr->iNode] = pP; + pCsr->apNode[pCsr->iNode+1] = pDB; + pCsr->iNode++; + hctTreeFixDelete(pCsr); + } + } +} + +static int treeDelete(HctTreeCsr *pCsr, int bRollback){ + HctTreeNode *pDel = pCsr->apNode[pCsr->iNode]; + HctTreeNode *pU = 0; + HctTreeNode *pReseek = 0; + int rc; + + /* Save the positions of all cursors on this table */ + rc = hctSaveCursors(pCsr->pRoot, pCsr, 0, 0); + if( rc ) return rc; + + assert( pCsr->pReseek==0 ); + assert( pCsr->iNode>=0 ); +#if 0 + fprintf(stdout, "deleting %lld\n", iKey); + hct_print_subtree(pCsr->pRoot->pNode); +#endif + + if( bRollback==0 ){ + HctTreeNode *pEntry = hctMallocZero(sizeof(*pEntry)); + if( pEntry==0 ) return SQLITE_NOMEM; + pEntry->iKey = pDel->iKey; + pEntry->pClobber = pDel; + pEntry->pPrev = pCsr->pTree->pRollback; + pEntry->nRef = 1; + pEntry->iRoot = pCsr->pRoot->iRoot; + pDel->nRef++; + pCsr->pTree->pRollback = pEntry; + pReseek = pDel; + pReseek->nRef++; + } + + /* If node pDel has two children, swap it with its immediate successor + ** in the tree. This node is guaranteed to have pNode->pLeft==0. */ + if( pDel->pLeft && pDel->pRight ){ + int iDel = pCsr->iNode; + HctTreeNode *pSwap; + sqlite3HctTreeCsrNext(pCsr); + pSwap = pCsr->apNode[pCsr->iNode]; + SWAP(HctTreeNode*, pSwap->pLeft, pDel->pLeft); + SWAP(HctTreeNode*, pSwap->pRight, pDel->pRight); + SWAP(int, pSwap->bBlack, pDel->bBlack); + *hctTreeFindPointer(pCsr, iDel) = pSwap; + pCsr->apNode[iDel] = pSwap; + *hctTreeFindPointer(pCsr, pCsr->iNode) = pDel; + pCsr->apNode[pCsr->iNode] = pDel; + + assert( pDel->pLeft==0 ); + assert( hct_tree_check(pCsr->pRoot) ); + } + + assert( pCsr->apNode[pCsr->iNode]==pDel ); + assert( pDel->pLeft==0 || pDel->pRight==0 ); + + pU = pDel->pLeft ? pDel->pLeft : pDel->pRight; + *hctTreeFindPointer(pCsr, pCsr->iNode) = pU; + if( pDel->bBlack==0 || (pU && pU->bBlack==0) || pCsr->pRoot->pNode==0 ){ + /* Simple case. If either pDel or its child pU are red, then + ** replacing the pDel with the child and ensuring the child is + ** colored black is enough. No change in black-height for the + ** children of pU. */ + if( pU ) pU->bBlack = 1; + }else{ + pCsr->apNode[pCsr->iNode] = pU; + hctTreeFixDelete(pCsr); + } + + treeNodeUnref(pDel); + assert( pCsr->pReseek==0 ); + pCsr->pReseek = pReseek; + +#if 0 + fprintf(stdout, "finished deleting %lld\n", iKey); + hct_print_subtree(pCsr->pRoot->pNode); +#endif + assert( hct_tree_check(pCsr->pRoot) ); + return SQLITE_OK; +} + +SQLITE_PRIVATE int sqlite3HctTreeDelete(HctTreeCsr *pCsr){ + int rc; + assert( pCsr->pReseek==0 ); + rc = treeDelete(pCsr, 0); + return rc; +} + +SQLITE_PRIVATE int sqlite3HctTreeBegin(HctTree *pTree, int iStmt){ + if( iStmt>pTree->iStmt ){ + int ii; + if( pTree->nStmt<=iStmt ){ + int nNew = iStmt+16; + HctTreeNode **apNew = (HctTreeNode**)hctMallocZero(nNew*sizeof(*apNew)); + if( apNew==0 ) return SQLITE_NOMEM; + if( pTree->apStmt ){ + memcpy(apNew, pTree->apStmt, pTree->nStmt*sizeof(*apNew)); + sqlite3_free(pTree->apStmt); + } + pTree->apStmt = apNew; + pTree->nStmt = nNew; + } + for(ii=pTree->iStmt+1; ii<=iStmt; ii++){ + pTree->apStmt[ii] = pTree->pRollback; + } + pTree->iStmt = iStmt; + } + return SQLITE_OK; +} + +SQLITE_PRIVATE int sqlite3HctTreeRelease(HctTree *pTree, int iStmt){ + if( iStmtiStmt ){ + if( iStmt==0 ){ + HctTreeNode *pStop = pTree->apStmt[iStmt+1]; + HctTreeNode *pNode; + HctTreeNode *pPrev; + for(pNode=pTree->pRollback; pNode!=pStop; pNode=pPrev){ + pPrev = pNode->pPrev; + if( pNode->pClobber ) treeNodeUnref(pNode->pClobber); + treeNodeUnref(pNode); + } + pTree->pRollback = pStop; + } + pTree->iStmt = iStmt; + } + return SQLITE_OK; +} + +SQLITE_PRIVATE int sqlite3HctTreeRollbackTo(HctTree *pTree, int iStmt){ + int rc = SQLITE_OK; + if( iStmt<=pTree->iStmt ){ + HctTreeNode *pStop = pTree->apStmt[iStmt]; + HctTreeNode *pNode; + HctTreeNode *pPrev; + for(pNode=pTree->pRollback; pNode!=pStop; pNode=pPrev){ + KeyInfo *pKeyInfo = 0; + UnpackedRecord *pRec = 0; + HctTreeRoot *pRoot = hctTreeFindRoot(pTree, pNode->iRoot); + + pPrev = pNode->pPrev; + + if( (pKeyInfo = pRoot->pKeyInfo) ){ + pRec = sqlite3VdbeAllocUnpackedRecord(pKeyInfo); + if( pRec==0 ){ + rc = SQLITE_NOMEM; + pStop = pNode; + break; + } + sqlite3VdbeRecordUnpack(pKeyInfo, pNode->nData, pNode->aData, pRec); + } + + if( pNode->pClobber ){ + HctTreeNode *pClobber = pNode->pClobber; + assert( pNode->iKey==pNode->pClobber->iKey ); + pClobber->pLeft = pClobber->pRight = 0; + pClobber->bBlack = 0; + if( (rc = hctSaveCursors(pRoot, 0, 0, 0)) + || (rc = treeInsertNode(pTree, 1, pRec, pNode->iKey, pClobber)) ){ + pStop = pNode; + break; + } + treeNodeUnref(pClobber); + }else{ + HctTreeCsr csr; + int res; + memset(&csr, 0, sizeof(csr)); + csr.pRoot = pRoot; + csr.pTree = pTree; + sqlite3HctTreeCsrSeek(&csr, pRec, pNode->iKey, &res); + if( res==0 ) treeDelete(&csr, 1); + } + if( pRec ) sqlite3DbFree(pKeyInfo->db, pRec); + treeNodeUnref(pNode); + } + pTree->pRollback = pStop; + pTree->iStmt = iStmt; + } + return rc; +} + +/* +** Clear the contents of the entire tree. +*/ +SQLITE_PRIVATE void sqlite3HctTreeClear(HctTree *pTree){ + HctTreeRoot **pp; + HctTreeRoot **pEnd = &pTree->apRootHash[pTree->nRootHash]; + for(pp=pTree->apRootHash; pppHashNext){ + hctSaveCursors(p, 0, 0, 0); + hctTreeFreeNode(p->pNode); + p->pNode = 0; + sqlite3KeyInfoUnref(p->pKeyInfo); + p->pKeyInfo = 0; + } + } +} + +SQLITE_PRIVATE int sqlite3HctTreeClearOne(HctTree *pTree, u32 iRoot, i64 *pnRow){ + HctTreeCsr csr; + int rc = SQLITE_OK; + int nRow = 0; + + memset(&csr, 0, sizeof(csr)); + csr.pTree = pTree; + csr.pRoot = hctTreeFindRoot(pTree, iRoot); + csr.iNode = -1; + rc = hctSaveCursors(csr.pRoot, 0, 0, 0); + if( rc ) return rc; + while( rc==SQLITE_OK && csr.pRoot->pNode ){ + sqlite3HctTreeCsrFirst(&csr); + rc = sqlite3HctTreeDelete(&csr); + nRow++; + hctRestoreDiscard(&csr); + } + if( pnRow ) *pnRow = nRow; + return rc; +} + +SQLITE_PRIVATE int sqlite3HctTreeCsrOpen(HctTree *pTree, u32 iRoot, HctTreeCsr **ppCsr){ + int rc = SQLITE_OK; + HctTreeCsr *pNew = 0; + HctTreeRoot *pRoot = hctTreeFindRoot(pTree, iRoot); + if( pRoot==0 ){ + rc = SQLITE_NOMEM; + }else{ + if( pRoot->pCsrCache ){ + pNew = pRoot->pCsrCache; + pRoot->pCsrCache = pNew->pCsrNext; + pNew->pCsrNext = 0; + assert( pNew->pTree==pTree ); + assert( pNew->pRoot==pRoot ); + assert( pNew->iNode==-1 ); + assert( pNew->eIncrblob==TREE_INCRBLOB_NONE ); + }else{ + pNew = (HctTreeCsr*)hctMallocZero(sizeof(HctTreeCsr)); + if( pNew==0 ){ + rc = SQLITE_NOMEM; + }else{ + pNew->pTree = pTree; + pNew->pRoot = pRoot; + pNew->iNode = -1; + } + } + pNew->pCsrNext = pRoot->pCsrList; + pRoot->pCsrList = pNew; + } + *ppCsr = pNew; + return rc; +} + +SQLITE_PRIVATE int sqlite3HctTreeCsrClose(HctTreeCsr *pCsr){ + if( pCsr ){ + HctTreeCsr **pp; + for(pp=&pCsr->pRoot->pCsrList; *pp!=pCsr; pp=&(*pp)->pCsrNext); + *pp = pCsr->pCsrNext; + if( pCsr->pReseek ){ + treeNodeUnref(pCsr->pReseek); + pCsr->pReseek = 0; + } + pCsr->pCsrNext = pCsr->pRoot->pCsrCache; + pCsr->pRoot->pCsrCache = pCsr; + pCsr->iSkip = 0; + pCsr->bPin = 0; + pCsr->iNode = -1; + pCsr->eIncrblob = TREE_INCRBLOB_NONE; + } + return SQLITE_OK; +} + +/* +** An integer is written into *pRes which is the result of +** comparing the key with the entry to which the cursor is +** pointing. The meaning of the integer written into +** *pRes is as follows: +** +** *pRes<0 The cursor is left pointing at an entry that +** is smaller than intKey/pIdxKey. Or, the table is empty +** and the cursor is therefore left point to nothing. +** +** *pRes==0 The cursor is left pointing at an entry that +** exactly matches intKey/pIdxKey. +** +** *pRes>0 The cursor is left pointing at an entry that +** is larger than intKey/pIdxKey. +*/ +SQLITE_PRIVATE int sqlite3HctTreeCsrSeek( + HctTreeCsr *pCsr, + UnpackedRecord *pRec, + i64 iKey, + int *pRes +){ + hctRestoreDiscard(pCsr); + pCsr->iSeekRowid = iKey; + if( pRec ){ + return hctTreeCsrSeekUnpacked(pCsr, pRec, pRes); + } + return hctTreeCsrSeekInt(pCsr, iKey, pRes); +} + +/* +** Move the cursor to EOF. +*/ +SQLITE_PRIVATE void sqlite3HctTreeCsrClear(HctTreeCsr *pCsr){ + hctRestoreDiscard(pCsr); + pCsr->iNode = -1; +} + +SQLITE_PRIVATE int sqlite3HctTreeCsrNext(HctTreeCsr *pCsr){ + int iNode; + int res = 0; + + assert( pCsr->pReseek==0 || pCsr->iSkip==0 ); + if( pCsr->iSkip>0 ){ + pCsr->iSkip = 0; + return SQLITE_OK; + } + if( hctRestoreCursor(pCsr, &res) ) return SQLITE_NOMEM; + if( res>0 ) return SQLITE_OK; + + iNode = pCsr->iNode; + if( iNode>=0 ){ + HctTreeNode *pNode = pCsr->apNode[iNode]; + assert( iNode>=0 ); + if( pNode->pRight ){ + pNode = pNode->pRight; + while( pNode ){ + iNode++; + pCsr->apNode[iNode] = pNode; + pNode = pNode->pLeft; + } + }else{ + while( (--iNode)>=0 ){ + HctTreeNode *pParent = pCsr->apNode[iNode]; + assert( pNode==pParent->pLeft || pNode==pParent->pRight ); + if( pNode==pParent->pLeft ) break; + pNode = pParent; + } + } + pCsr->iNode = iNode; + } + return SQLITE_OK; +} + +SQLITE_PRIVATE int sqlite3HctTreeCsrPrev(HctTreeCsr *pCsr){ + int iNode; + int res = 0; + + assert( pCsr->pReseek==0 || pCsr->iSkip==0 ); + if( pCsr->iSkip<0 ){ + pCsr->iSkip = 0; + return SQLITE_OK; + } + if( hctRestoreCursor(pCsr, &res) ) return SQLITE_NOMEM; + if( res<0 ) return SQLITE_OK; + + iNode = pCsr->iNode; + if( iNode>=0 ){ + HctTreeNode *pNode = pCsr->apNode[iNode]; + assert( iNode>=0 ); + if( pNode->pLeft ){ + pNode = pNode->pLeft; + while( pNode ){ + iNode++; + pCsr->apNode[iNode] = pNode; + pNode = pNode->pRight; + } + }else{ + while( (--iNode)>=0 ){ + HctTreeNode *pParent = pCsr->apNode[iNode]; + assert( pNode==pParent->pLeft || pNode==pParent->pRight ); + if( pNode==pParent->pRight ) break; + pNode = pParent; + } + } + pCsr->iNode = iNode; + } + return SQLITE_OK; +} + +/* +** Return false if cursor points to a valid entry, or true otherwise. +*/ +SQLITE_PRIVATE int sqlite3HctTreeCsrEof(HctTreeCsr *pCsr){ + return (pCsr->iNode<0); +} + +static void hctTreeCursorEnd(HctTreeCsr *pCsr, int bLast){ + int iNode = -1; + HctTreeNode *pNode = pCsr->pRoot->pNode; + + hctRestoreDiscard(pCsr); + while( pNode ){ + iNode++; + assert( iNodeapNode[iNode] = pNode; + pNode = (bLast ? pNode->pRight : pNode->pLeft); + } + pCsr->iNode = iNode; +} + +SQLITE_PRIVATE int sqlite3HctTreeCsrFirst(HctTreeCsr *pCsr){ + hctTreeCursorEnd(pCsr, 0); + return SQLITE_OK; +} + +SQLITE_PRIVATE int sqlite3HctTreeCsrLast(HctTreeCsr *pCsr){ + hctTreeCursorEnd(pCsr, 1); + return SQLITE_OK; +} + +SQLITE_PRIVATE int sqlite3HctTreeCsrKey(HctTreeCsr *pCsr, i64 *piKey){ + assert( pCsr->iNode>=0 ); + assert( pCsr->pReseek==0 ); + *piKey = pCsr->apNode[pCsr->iNode]->iKey; + return SQLITE_OK; +} + +SQLITE_PRIVATE int sqlite3HctTreeCsrData(HctTreeCsr *pCsr, int *pnData, const u8 **paData){ + HctTreeNode *pNode = pCsr->apNode[pCsr->iNode]; + assert( pCsr->pReseek==0 ); + assert( pCsr->iNode>=0 ); + *pnData = pNode->nData; + if( paData ) *paData = pNode->aData; + return SQLITE_OK; +} + +/* +** Return non-zero if the cursor is pointing to a delete key. Return zero +** if it is pointing to an insert or to EOF. +*/ +SQLITE_PRIVATE int sqlite3HctTreeCsrIsDelete(HctTreeCsr *pCsr){ + assert( pCsr->pReseek==0 ); + return (pCsr->iNode>=0 && pCsr->apNode[pCsr->iNode]->bDelete); +} + +SQLITE_PRIVATE void sqlite3HctTreeCsrPin(HctTreeCsr *pCsr){ + pCsr->bPin = 1; +} +SQLITE_PRIVATE void sqlite3HctTreeCsrUnpin(HctTreeCsr *pCsr){ + pCsr->bPin = 0; +} + +SQLITE_PRIVATE void sqlite3HctTreeCsrIncrblob(HctTreeCsr *pCsr){ + if( pCsr->eIncrblob==TREE_INCRBLOB_NONE ){ + pCsr->eIncrblob = TREE_INCRBLOB_READY; + } +} + +SQLITE_PRIVATE int sqlite3HctTreeCsrHasMoved(HctTreeCsr *pCsr){ + return pCsr && pCsr->pReseek!=0; +} + +SQLITE_PRIVATE int sqlite3HctTreeCsrReseek(HctTreeCsr *pCsr, int *pRes){ + assert( + pCsr->eIncrblob==TREE_INCRBLOB_READY + || pCsr->eIncrblob==TREE_INCRBLOB_ABORT + ); + assert( pCsr->pReseek ); + if( pCsr->eIncrblob==TREE_INCRBLOB_ABORT ) return SQLITE_ABORT; + return sqlite3HctTreeCsrSeek(pCsr, 0, pCsr->iSeekRowid, pRes); +} + + +SQLITE_PRIVATE int sqlite3HctTreeCsrRestore(HctTreeCsr *pCsr, int *pIsDifferent){ + int rc = SQLITE_OK; + if( pCsr->pReseek ){ + assert( pCsr->iSkip==0 ); + rc = hctRestoreCursor(pCsr, &pCsr->iSkip); + } + *pIsDifferent = pCsr->iSkip; + return rc; +} + +SQLITE_PRIVATE u32 sqlite3HctTreeCsrRoot(HctTreeCsr *pCsr){ + return pCsr->pRoot->iRoot; +} + +SQLITE_PRIVATE int sqlite3HctTreeForeach( + HctTree *pTree, + int bSchemaOp, + void *pCtx, + int (*x)(void *, u32, KeyInfo*) +){ + int i; + int rc = SQLITE_OK; + for(i=0; rc==SQLITE_OK && inRootHash; i++){ + HctTreeRoot *p; + for(p=pTree->apRootHash[i]; rc==SQLITE_OK && p; p=p->pHashNext){ + if( p->pNode && (bSchemaOp || p->iRoot!=HCT_TREE_SCHEMAOP_ROOT) ){ + rc = x(pCtx, p->iRoot, p->pKeyInfo); + } + } + } + return rc; +} + + + +/************** End of hct_tree.c ********************************************/ +/************** Begin file hct_file.c ****************************************/ +/* +** 2020 October 13 +** +** The author disclaims copyright to this source code. In place of +** a legal notice, here is a blessing: +** +** May you do good and not evil. +** May you find forgiveness for yourself and forgive others. +** May you share freely, never taking more than you give. +** +************************************************************************* +*/ + + +/* #include "hctInt.h" */ +/* #include */ +/* #include */ +/* #include */ + +/* #include */ +/* #include */ +#include +/* #include */ +/* #include */ + +/* #include */ +/* #include */ + +#define HCT_DEFAULT_PAGESIZE 4096 + +/* +** The database file is extended and managed in chunks of +** HCT_DEFAULT_PAGEPERCHUNK pages. Since pages are normally 4096 bytes, this +** is 2MiB by default. But, if the file is mmap()ed 2MiB at a time, we +** quickly read the system limit for number of mappings (on Linux, this is +** kernel parameter vm.max_map_count - 65530 by default). So, each mapping +** is made for HCT_MMAP_QUANTA times this amount. Since we need mappings for +** both the database file and page-map, this means we can mmap() a database of: +** +** 32765 * 1024*512*4096 bytes +** +** or around 64TiB. +*/ +#define HCT_DEFAULT_PAGEPERCHUNK 512 +#define HCT_MMAP_QUANTA 1024 + +#define HCT_HEADER_PAGESIZE 4096 + +#define HCT_LOCK_OFFSET (1024*1024) +#define HCT_LOCK_SIZE 1 + + +/* +** Pagemap slots used for special purposes. +*/ +#define HCT_ROOTPAGE_SCHEMA 1 +#define HCT_ROOTPAGE_META 2 + +#define HCT_PAGEMAP_LOGICAL_EOF 3 +#define HCT_PAGEMAP_PHYSICAL_EOF 4 + +#define HCT_PAGEMAP_TRANSID_EOF 16 + +#define HCT_PMF_LOGICAL_EVICTED (((u64)0x00000001)<<56) +#define HCT_PMF_LOGICAL_IRREVICTED (((u64)0x00000002)<<56) +#define HCT_PMF_PHYSICAL_IN_USE (((u64)0x00000004)<<56) +#define HCT_PMF_LOGICAL_IN_USE (((u64)0x00000008)<<56) +#define HCT_PMF_LOGICAL_IS_ROOT (((u64)0x00000010)<<56) + +#define HCT_FIRST_LOGICAL 33 + +/* +** Masks for use with pagemap values. +*/ +#define HCT_PAGEMAP_FMASK (((u64)0xFF) << 56) /* Flags MASK */ +#define HCT_PAGEMAP_VMASK (~HCT_PAGEMAP_FMASK) /* Value MASK */ + +#define assert_pgno_ok(iPg) assert( ((u64)iPg)<((u64)1<<48) && iPg>0 ) + +typedef struct HctFileServer HctFileServer; +typedef struct HctMapping HctMapping; +typedef struct HctMappingChunk HctMappingChunk; + +/* +** Global variables for this module. +** +** pServerList: +** Linked list of distinct files opened by this process. Access to this +** variable is protected by SQLITE_MUTEX_STATIC_VFS1. +** +** nCASFailCnt/nCASFailReset: +** These are used to inject CAS instruction failures for testing purposes. +** Set by the sqlite3_hct_cas_failure() API. They are not threadsafe. +** +** nProcFailCnt +** These are used to inject process failures (i.e. abort() calls) for +** testing purposes. Set by the sqlite3_hct_proc_failure() API. Not +** threadsafe. +*/ +static struct HctFileGlobalVars { + HctFileServer *pServerList; + + int nCASFailCnt; + int nCASFailReset; + + int nProcFailCnt; +} g; + +SQLITE_API void sqlite3_hct_cas_failure(int nCASFailCnt, int nCASFailReset){ + g.nCASFailCnt = nCASFailCnt; + g.nCASFailReset = nCASFailReset; +} +SQLITE_API void sqlite3_hct_proc_failure(int nProcFailCnt){ + g.nProcFailCnt = nProcFailCnt; +} + +/* +** This is called to check if a CAS fault should be injected. It returns +** true if a fault should be injected, or false otherwise. +*/ +static int inject_cas_failure(void){ + if( g.nCASFailCnt>0 ){ + if( (--g.nCASFailCnt)==0 ){ + g.nCASFailCnt = g.nCASFailReset; + return 1; + } + } + if( g.nProcFailCnt>0 ){ + if( (--g.nProcFailCnt)==0 ){ + abort(); + } + } + return 0; +} + +/* +** nRef: +** Number of references to this object held by the system. The +** HctFileServer object may hold one reference, each HctFile may +** also hold one. +** +** iLogPPC: +** Log2 of number of pages-per-chunk. e.g. if there are 512 pages +** on each mapping chunk, this value is set to 9. +** +** aPagemap/nPagemap: +** Mapping of the current page-map file. +*/ +struct HctMappingChunk { + void *pData; /* Mapping of chunk in data file */ + u64 *aMap; /* Mapping of chunk in map file */ +}; +struct HctMapping { + int nRef; /* Number of pointers to this array */ + int szPage; /* Size of pages in bytes */ + int nChunk; /* Size of aChunk[] array */ + u32 mapShift; + u32 mapMask; + HctMappingChunk *aChunk; /* Array of database chunk mappings */ +}; + +/* +** eInitState: +** Set to one of the HCT_INIT_XXX constants defined below. See comments +** above those constants for details. +** +** iNextFileId: +** Used to allocate unique ids to each HctFile associated with this +** HctFileServer object. These ids are used for debugging, and also +** to generate log file names. +*/ +struct HctFileServer { + sqlite3_mutex *pMutex; /* Mutex to protect this object */ + HctFile *pFileList; + + u64 iCommitId; /* CID value */ + u64 nWriteCount; /* Write count */ + + int iNextFileId; + char *zPath; /* Path to database (aFdDb[0]) */ + char *zDir; /* Directory component of zPath */ + int fdMap; /* Read/write file descriptor for page-map */ + int nFdDb; /* Number of valid entries in aFdDb[] */ + int aFdDb[HCT_MAX_NDBFILE]; + + int szPage; /* Page size for database */ + int nPagePerChunk; + HctMapping *pMapping; /* Mapping of pagemap and db pages */ + + int bReadOnlyMap; /* True for a read-only mapping of db file */ + + HctTMapServer *pTMapServer; /* Transaction map server */ + HctPManServer *pPManServer; /* Page manager server */ + int eInitState; + + void *pJrnlPtr; + void(*xJrnlDel)(void*); + + i64 st_dev; /* File identification 1 */ + i64 st_ino; /* File identification 2 */ + HctFileServer *pServerNext; /* Next object in g.pServerList list */ +}; + +/* +** System initialization state: +** +** HCT_INIT_NONE: +** No initialization has been done. +** +** HCT_INIT_RECOVER1: +** The sqlite_schema table (root page 1) has been recovered. And the +** page-map scanned to initialize the page-manager. +** +** HCT_INIT_RECOVER2: +** Other tables (apart from sqlite_schema) have been recovered. +** Initialization has finished. +*/ +#define HCT_INIT_NONE 0 +#define HCT_INIT_RECOVER1 1 +#define HCT_INIT_RECOVER2 2 + +/* +** Event counters used by the hctstats virtual table. +*/ +typedef struct HctFileStats HctFileStats; +struct HctFileStats { + i64 nCasAttempt; + i64 nCasFail; + i64 nIncrAttempt; + i64 nIncrFail; + i64 nMutex; + i64 nMutexBlock; +}; + +/* +** iCurrentTid: +** Most recent value returned by sqlite3HctFileAllocateTransid(). This +** is the current TID while the upper layer is writing the database, and +** meaningless at other times. Used by this object as the "current TID" +** when freeing a page. +** +** nPageAlloc: +** The total number of physical page allocations requested by the upper +** layer in the lifetime of this object. +*/ +struct HctFile { + HctConfig *pConfig; /* Connection configuration object */ + HctFileServer *pServer; /* Connection to global db object */ + HctFile *pFileNext; /* Next handle opened on same file */ + int iFileId; /* Id used for debugging output */ + int eInitState; + + HctTMapClient *pTMapClient; /* Transaction map client object */ + HctPManClient *pPManClient; /* Transaction map client object */ + + u64 iCurrentTid; + u64 nPageAlloc; + + /* Copies of HctFileServer variables */ + int szPage; + HctMapping *pMapping; + + /* Event counters used by the hctstats virtual table */ + HctFileStats stats; +}; + +static int hctLog2(int n){ + int i; + assert( (n & (n-1))==0 ); + for(i=0; (1<0 ){ + if( (--g.nCASFailCnt)==0 ){ + g.nCASFailCnt = g.nCASFailReset; + return 0; + } + } +#endif + return HctCASBool(pPtr, iOld, iNew); +} + +/* +** Allocate and return a new HctMapping object with enough space for +** nChunk chunks. +*/ +static HctMapping *hctMappingNew(int *pRc, HctMapping *pOld, int nChunk){ + HctMapping *pNew = 0; + if( *pRc==SQLITE_OK ){ + int nByte = sizeof(HctMapping) + nChunk*sizeof(HctMappingChunk); + pNew = (HctMapping*)sqlite3MallocZero(nByte); + if( pNew ){ + pNew->aChunk = (HctMappingChunk*)&pNew[1]; + pNew->nRef = 1; + pNew->nChunk = nChunk; + if( pOld ){ + assert( nChunk>pOld->nChunk ); + pNew->mapShift = pOld->mapShift; + pNew->mapMask = pOld->mapMask; + pNew->szPage = pOld->szPage; + memcpy(pNew->aChunk,pOld->aChunk,pOld->nChunk*sizeof(HctMappingChunk)); + } + }else{ + *pRc = SQLITE_NOMEM_BKPT; + } + } + return pNew; +} + +static void hctMappingUnref(HctMapping *p){ + if( p ){ + p->nRef--; + if( p->nRef==0 ){ + sqlite3_free(p); + } + } +} + + +static u64 *hctPagemapPtr(HctMapping *p, u32 iSlot){ + return &(p->aChunk[(iSlot-1) >> p->mapShift].aMap[(iSlot-1) & p->mapMask]); +} + +static void *hctPagePtr(HctMapping *p, u32 iPhys){ + return &((u8*)(p->aChunk[(iPhys-1) >> p->mapShift].pData))[ + ((iPhys-1) & p->mapMask) * p->szPage + ]; +} + +/* +** Buffer aBuf[] is p->szPage bytes in size. This function writes the +** contents of said buffer to physical database page iPhys. +*/ +static int hctPageWriteToDisk(HctFileServer *p, u64 iPhys, u8 *aBuf){ + i64 iChunk = ((iPhys-1) / p->nPagePerChunk); + int iFd = iChunk % p->nFdDb; + i64 iOff = p->szPage * ( + ((iChunk / p->nFdDb) * p->nPagePerChunk) + + (iPhys-1) % p->nPagePerChunk + ); + ssize_t res; + assert_pgno_ok( iPhys ); + res = pwrite(p->aFdDb[iFd], aBuf, p->szPage, iOff); + return (res==p->szPage ? SQLITE_OK : SQLITE_ERROR); +} + +static void hctFilePagemapSetDirect(HctMapping *p, u32 iSlot, u64 iNew){ + u64 *pPtr = hctPagemapPtr(p, iSlot); + *pPtr = iNew; +} + +static void hctFilePagemapSetFlag(HctMapping *p, u32 iSlot, u64 mask){ + u64 *pPtr = hctPagemapPtr(p, iSlot); + *pPtr = *pPtr | mask; +} + +/* +** Use a CAS instruction to the value of page-map slot iSlot. Return true +** if the slot is successfully set to value iNew, or false otherwise. +*/ +static int hctFilePagemapSet(HctFile *pFile, u32 iSlot, u64 iOld, u64 iNew){ + u64 *pPtr = hctPagemapPtr(pFile->pMapping, iSlot); + pFile->stats.nCasAttempt++; + if( hctBoolCompareAndSwap64(pPtr, iOld, iNew) ) return 1; + pFile->stats.nCasFail++; + return 0; +} + + +static u64 hctFilePagemapGet(HctMapping *p, u32 iSlot){ + return HctAtomicLoad( hctPagemapPtr(p, iSlot) ); +} + +static u64 hctFilePagemapGetSafe(HctMapping *p, u32 iSlot){ + if( ((iSlot-1)>>p->mapShift)>=p->nChunk ){ + return 0; + } + return hctFilePagemapGet(p, iSlot); +} + +static u64 hctFileAtomicIncr(HctFile *pFile, u64 *pPtr, int nIncr){ + u64 iOld; + while( 1 ){ + iOld = HctAtomicLoad(pPtr); + pFile->stats.nIncrAttempt++; + if( hctBoolCompareAndSwap64(pPtr, iOld, iOld+nIncr) ) return iOld+nIncr; + pFile->stats.nIncrFail++; + } +} + +/* +** Increment the value in slot iSlot by nIncr. Return the new value. +*/ +static u64 hctFilePagemapIncr(HctFile *pFile, u32 iSlot, int nIncr){ + u64 *pPtr = hctPagemapPtr(pFile->pMapping, iSlot); + u64 iOld; + while( 1 ){ + iOld = HctAtomicLoad(pPtr); + pFile->stats.nIncrAttempt++; + if( hctBoolCompareAndSwap64(pPtr, iOld, iOld+nIncr) ) return iOld+nIncr; + pFile->stats.nIncrFail++; + } +} + +/* +** Set the physical page id mapped from logical page iLogical to physical +** page id iNew. Return 1 if successful, or 0 if the operation fails. The +** operation fails if either: +** +** * the LOGICAL_EVICTED flag is already set for the logical page, or +** * the current physical page id to which the logical page is mapped +** is not equal to parameter iOld. +*/ +static int hctFilePagemapSetLogical( + HctFile *pFile, /* Use mapping object of this file */ + u32 iLogical, /* Logical page to set the physical id for */ + u64 iOld, /* Old physical page id */ + u64 iNew /* New physical page id */ +){ + HctMapping *p = pFile->pMapping; + while( 1 ){ + u64 i1 = hctFilePagemapGet(p, iLogical); + u64 iOld1 = (iOld & HCT_PAGEMAP_VMASK) | (i1 & HCT_PAGEMAP_FMASK); + u64 iNew1 = (iNew & HCT_PAGEMAP_VMASK) | (i1 & HCT_PAGEMAP_FMASK); + + iNew1 |= HCT_PMF_LOGICAL_IN_USE; + + /* If a CAS instruction failure injection is scheduled, return 0 + ** to the caller. */ + if( inject_cas_failure() ) return 0; + + /* This operation fails if LOGICAL_EVICTED has been set. */ + iOld1 &= ~HCT_PMF_LOGICAL_EVICTED; + iNew1 &= ~HCT_PMF_LOGICAL_EVICTED; + + if( hctFilePagemapSet(pFile, iLogical, iOld1, iNew1) ){ + return 1; + } + if( i1!=iOld1 ) return 0; + } + + assert( !"unreachable" ); + return 0; +} + +/* +** Set the EVICTED or IRREVICTED flag on page iLogical. +*/ +static int hctFileSetEvicted( + HctFile *pFile, + u32 iLogical, + u32 iOldPg, + int bIrrevocable +){ + u64 *pPtr = hctPagemapPtr(pFile->pMapping, iLogical); + while( 1 ){ + u64 iOld = HctAtomicLoad(pPtr); + u64 iNew = iOld | ( + bIrrevocable ? HCT_PMF_LOGICAL_IRREVICTED : HCT_PMF_LOGICAL_EVICTED + ); + + /* Fail if either the current physical page mapped to logical page iLogical + ** is not iOldPg, or if the LOGICAL_EVICTED flag has already been set. */ + if( (iOld & HCT_PAGEMAP_VMASK)!=iOldPg + || ((iOld & HCT_PMF_LOGICAL_EVICTED) && !bIrrevocable) + || ((iOld & HCT_PMF_LOGICAL_EVICTED)==0 && bIrrevocable) + || ((iOld & HCT_PMF_LOGICAL_IN_USE)==0) + ){ + return 0; + } + if( inject_cas_failure() ) return 0; + + pFile->stats.nCasAttempt++; + if( hctBoolCAS64(pPtr, iOld, iNew) ) return 1; + pFile->stats.nCasFail++; + } + + assert( !"unreachable" ); + return 0; +} + +/* +** Clear the LOGICAL_EVICTED flag from page-map entry iLogical. This will +** fail if the LOGICAL_IRREVICTED flag is already set. Return 1 if the +** flag is successfully cleared, or 0 otherwise. +*/ +static int hctFileClearEvicted(HctFile *pFile, u32 iLogical){ + u64 *pPtr = hctPagemapPtr(pFile->pMapping, iLogical); + while( 1 ){ + u64 iOld = HctAtomicLoad(pPtr); + u64 iNew = iOld & ~HCT_PMF_LOGICAL_EVICTED; + + if( (iOld & HCT_PMF_LOGICAL_IRREVICTED) ) return 0; + if( inject_cas_failure() ) return 0; + + pFile->stats.nCasAttempt++; + if( hctBoolCAS64(pPtr, iOld, iNew) ) return 1; + pFile->stats.nCasFail++; + } + + assert( !"unreachable" ); + return 0; +} + +static void hctFilePagemapZeroValue(HctFile *pFile, u32 iSlot){ + while( 1 ){ + u64 i1 = hctFilePagemapGet(pFile->pMapping, iSlot); + u64 i2 = (i1 & HCT_PMF_PHYSICAL_IN_USE); + if( hctFilePagemapSet(pFile, iSlot, i1, i2) ) return; + } +} + +/* +** Open a file descriptor for read/write access on the filename formed by +** concatenating arguments zFile and zPost (e.g. "test.db" and "-pagemap"). +** Return the file descriptor if successful. +*/ +static int hctFileOpen(int *pRc, const char *zFile, const char *zPost){ + int fd = -1; + if( *pRc==SQLITE_OK ){ + char *zPath = sqlite3_mprintf("%s%s", zFile, zPost); + if( zPath==0 ){ + *pRc = SQLITE_NOMEM_BKPT; + }else{ + while( fd<0 ){ + fd = open(zPath, O_CREAT|O_RDWR, 0644); + if( fd<0 ){ + *pRc = SQLITE_CANTOPEN_BKPT; + break; + } + if( fd<3 ){ + /* Do not use any file-descriptor with values 0, 1 or 2. Using + ** these means that stray calls to printf() etc. may corrupt the + ** database. */ + close(fd); + fd = open("/dev/null", O_RDONLY, 0644); + if( fd<0 ){ + *pRc = SQLITE_CANTOPEN_BKPT; + break; + } + fd = -1; + } + } + sqlite3_free(zPath); + } + } + return fd; +} + +/* +** Take an exclusive POSIX lock on the file-descriptor passed as the +** second argument. +*/ +static void hctFileLock(int *pRc, int fd, const char *zFile){ + if( *pRc==SQLITE_OK ){ + int res; + struct flock l; + memset(&l, 0, sizeof(l)); + l.l_type = F_WRLCK; + l.l_whence = SEEK_SET; + l.l_start = HCT_LOCK_OFFSET; + l.l_len = HCT_LOCK_SIZE; + res = fcntl(fd, F_SETLK, &l); + if( res!=0 ){ + fcntl(fd, F_GETLK, &l); + sqlite3_log(SQLITE_BUSY, "hct file \"%s\" locked by process %lld", + zFile, (i64)l.l_pid + ); + *pRc = SQLITE_BUSY; + } + } +} + +/* +** Argument fd is an open file-handle. Return the size of the file in bytes. +** +** This function is a no-op (returns 0) if *pRc is other than SQLITE_OK +** when it is called. If an error occurs, *pRc is set to an SQLite error +** code before returning. +*/ +static i64 hctFileSize(int *pRc, int fd){ + i64 szRet = 0; + if( *pRc==SQLITE_OK ){ + struct stat sStat; + if( fstat(fd, &sStat) ){ + *pRc = sqlite3HctIoerr(SQLITE_IOERR_FSTAT); + }else{ + szRet = (i64)(sStat.st_size); + } + } + return szRet; +} + +static int hctFileTruncate(int *pRc, int fd, i64 sz){ + if( *pRc==SQLITE_OK ){ + int res = ftruncate(fd, (off_t)sz); + if( res ){ + *pRc = sqlite3HctIoerr(SQLITE_IOERR_TRUNCATE); + } + } + return *pRc; +} + +static void hctFileUnlink(int *pRc, const char *zFile){ + if( *pRc==SQLITE_OK ) unlink(zFile); +} + + +/* +** This function is a no-op if (*pRc) is set to other than SQLITE_OK +** when it is called. +** +** Otherwise, argument fd is assumed to be an open file-descriptor. This +** function attempts to map and return a pointer to a region nByte bytes in +** size at offset iOff of the open file. The mapping is read-only if parameter +** bRO is non-zero, or read/write if it is zero. +** +** If an error occurs, NULL is returned and (*pRc) set to an SQLite error +** code. +*/ +static void *hctFileMmap(int *pRc, int fd, i64 nByte, i64 iOff, int bRO){ + void *pRet = 0; + if( *pRc==SQLITE_OK ){ + const int flags = PROT_READ | (bRO ? 0 : PROT_WRITE); + pRet = mmap(0, nByte, flags, MAP_SHARED, fd, iOff); + if( pRet==MAP_FAILED ){ + pRet = 0; + *pRc = sqlite3HctIoerr(SQLITE_IOERR_MMAP); + } + } + return pRet; +} + +static void hctFileMunmap(void *pMap, i64 nByte){ + if( pMap ) munmap(pMap, nByte); +} + +static char *hctStrdup(int *pRc, const char *zIn){ + char *zRet = 0; + if( *pRc==SQLITE_OK ){ + zRet = sqlite3_mprintf("%s", zIn); + if( zRet==0 ) *pRc = SQLITE_NOMEM_BKPT; + } + return zRet; +} + + +/* +** Given local path zFile, return the associated canonical path in a buffer +** obtained from sqlite3_malloc(). It is the responsibility of the caller +** to eventually free this buffer using sqlite3_free(). +*/ +static char *fileGetFullPath(int *pRc, const char *zFile){ + char *zRet = 0; + if( *pRc==SQLITE_OK ){ + char *zFree = realpath(zFile, 0); + if( zFree==0 ){ + *pRc = SQLITE_CANTOPEN_BKPT; + }else{ + zRet = hctStrdup(pRc, zFree); + free(zFree); + } + } + return zRet; +} + +static int hctFileFindLogs( + HctFileServer *pServer, + void *pCtx, + int (*xLog)(void*, const char*) +){ + DIR *d; + const char *zName = &pServer->zPath[strlen(pServer->zDir)]; + int nName = strlen(zName); + int rc = SQLITE_OK; + + d = opendir(pServer->zDir); + if (d) { + struct dirent *dir; + while( rc==SQLITE_OK && (dir = readdir(d))!=NULL ){ + const char *zFile = (const char*)dir->d_name; + int nFile = strlen(zFile); + if( nFile>(nName+5) + && memcmp(zFile, zName, nName)==0 + && memcmp(&zFile[nName], "-log-", 5)==0 + ){ + char *zFull = sqlite3_mprintf("%s/%s", pServer->zDir, zFile); + rc = xLog(pCtx, zFull); + sqlite3_free(zFull); + } + } + closedir(d); + } + + return rc; +} + +static int hctFileServerInitUnlinkLog(void *pDummy, const char *zFile){ + int rc = SQLITE_OK; + hctFileUnlink(&rc, zFile); + return rc; +} + +static void hctFileReadHdr( + int *pRc, + void *pHdr, + int *pszPage, + int *pnDbFile +){ + *pszPage = 0; + if( *pRc==SQLITE_OK ){ + /* 12345678901234567890123456789012 */ + int szPage = 0; + int nDbFile = 0; + char *zHdr = "Hctree database version 00000001"; + u8 aEmpty[32] = {0}; + + assert( strlen(zHdr)==32 ); + if( memcmp(zHdr, pHdr, 32)==0 ){ + memcpy(&szPage, &((u8*)pHdr)[32], sizeof(int)); + if( szPage<512 || szPage>32768 || (szPage & (szPage-1))!=0 ){ + *pRc = SQLITE_CANTOPEN_BKPT; + return; + } + + memcpy(&nDbFile, &((u8*)pHdr)[36], sizeof(int)); + if( nDbFile<1 || nDbFile>HCT_MAX_NDBFILE ){ + *pRc = SQLITE_CANTOPEN_BKPT; + return; + } + }else if( memcmp(aEmpty, pHdr, 32)==0 ){ + /* no-op */ + }else{ + *pRc = SQLITE_CANTOPEN_BKPT; + } + + *pszPage = szPage; + *pnDbFile = nDbFile; + } +} + +static void *hctFileMmapDbChunk( + int *pRc, + HctFileServer *p, + HctMapping *pMap, + int iChunk +){ + void *pRet = 0; + i64 szChunk = p->nPagePerChunk * p->szPage; + int iFd = iChunk % p->nFdDb; + int iChunkOfFile = (iChunk / p->nFdDb); + + if( (iChunkOfFile % HCT_MMAP_QUANTA)==0 ){ + i64 iOff = szChunk * iChunkOfFile; + pRet = hctFileMmap( + pRc, p->aFdDb[iFd], szChunk*HCT_MMAP_QUANTA, iOff, p->bReadOnlyMap + ); + }else{ + pRet = (void*)(((u8*)pMap->aChunk[iChunk - p->nFdDb].pData) + szChunk); + } + + return pRet; +} + +static void *hctFileMmapPagemapChunk( + int *pRc, + HctFileServer *p, + HctMapping *pMap, + int iChunk +){ + void *pRet = 0; + i64 szChunk = p->nPagePerChunk * sizeof(u64); + + if( (iChunk % HCT_MMAP_QUANTA)==0 ){ + pRet = hctFileMmap( + pRc, p->fdMap, szChunk*HCT_MMAP_QUANTA, (szChunk*iChunk), 0 + ); + }else{ + pRet = (void*)(((u8*)(pMap->aChunk[iChunk-1].aMap)) + szChunk); + } + + return pRet; +} + +static void hctFileOpenDataFiles( + int *pRc, + HctFileServer *p, + int nDbFile +){ + int ii; + int rc = *pRc; + assert( p->nFdDb==1 ); + for(ii=1; iiaFdDb[ii] = hctFileOpen(&rc, p->zPath, z); + sqlite3_free(z); + if( rc==SQLITE_OK ) p->nFdDb = ii+1; + } + + if( rc!=SQLITE_OK ){ + for(ii=1; iinFdDb; ii++){ + if( p->aFdDb[ii]>0 ) close(p->aFdDb[ii]); + p->aFdDb[ii] = -1; + } + p->nFdDb = 1; + } + *pRc = rc; +} + +static i64 round_up(i64 iVal, i64 nQuanta){ + return ((iVal + nQuanta - 1) / nQuanta) * nQuanta; +} + +static void hctFileAllocateMapping( + int *pRc, + HctFileServer *p, + int nChunk +){ + i64 szChunkPagemap = p->nPagePerChunk * sizeof(u64); + i64 szChunkData = p->nPagePerChunk * p->szPage; + int rc = *pRc; + HctMapping *pMapping = 0; + int iFd = 0; + int i = 0; + + p->pMapping = pMapping = hctMappingNew(&rc, 0, nChunk); + if( rc==SQLITE_OK ){ + pMapping->mapShift = hctLog2(p->nPagePerChunk); + pMapping->mapMask = (1<mapShift)-1; + pMapping->szPage = p->szPage; + } + + /* Map all chunks of the pagemap file using a single call to mmap() */ + { + int nAll = round_up(nChunk, HCT_MMAP_QUANTA); + u8 *pMap = (u8*)hctFileMmap(&rc, p->fdMap, nAll*szChunkPagemap,0,0); + for(i=0; rc==SQLITE_OK && iaChunk[i].aMap = (u64*)&pMap[i * szChunkPagemap]; + } + } + + /* Map all chunks of the data files. One call to mmap() for each file. */ + for(iFd=0; iFdnFdDb && rc==SQLITE_OK; iFd++){ + int nFileChunk = (nChunk / p->nFdDb) + (iFd < (nChunk % p->nFdDb)); + i64 n = round_up(nFileChunk, HCT_MMAP_QUANTA) * szChunkData; + u8 *pMap = (u8*)hctFileMmap(&rc, p->aFdDb[iFd], n, 0, p->bReadOnlyMap); + for(i=0; inFdDb; + pMapping->aChunk[iChunk].pData = &pMap[i*szChunkData]; + } + } + + *pRc = rc; +} + +typedef struct Uncommitted Uncommitted; +struct Uncommitted { + int nAlloc; + int nTid; + i64 *aTid; +}; + +static int hctFileServerInitUncommitted(void *pCtx, const char *zFile){ + int fd; + Uncommitted *p = (Uncommitted*)pCtx; + + fd = open(zFile, O_RDONLY); + if( fd>=0 ){ + i64 iTid = 0; + read(fd, &iTid, sizeof(iTid)); + close(fd); + if( iTid>0 ){ + if( p->nTid==p->nAlloc ){ + int nNew = p->nTid ? p->nTid*4 : 64; + i64 *aNew = sqlite3_realloc(p->aTid, nNew*sizeof(i64)); + if( aNew==0 ){ + return SQLITE_NOMEM; + }else{ + p->aTid = aNew; + p->nAlloc = nNew; + } + } + p->aTid[p->nTid++] = iTid; + } + } + return SQLITE_OK; +} + +static int hctFileServerInit( + HctFileServer *p, + HctConfig *pConfig, + const char *zFile +){ + int rc = SQLITE_OK; + assert( sqlite3_mutex_held(p->pMutex) ); + if( p->zPath==0 ){ + i64 szHdr; /* Size of header file */ + i64 szMap; /* Size of pagemap file */ + int nChunk = 0; /* Number of chunks in database */ + int szPage = 0; + int nDbFile = 0; + + Uncommitted unc; + memset(&unc, 0, sizeof(unc)); + + /* Open the data and page-map files */ + p->fdMap = hctFileOpen(&rc, zFile, "-pagemap"); + p->zPath = fileGetFullPath(&rc, zFile); + + if( rc==SQLITE_OK ){ + int n = strlen(p->zPath); + while( p->zPath[n-1]!='/' && n>1 ) n--; + p->zDir = sqlite3_mprintf("%.*s", n, p->zPath); + if( p->zDir==0 ) rc = SQLITE_NOMEM_BKPT; + } + + /* Initialize the page-manager */ + p->pPManServer = sqlite3HctPManServerNew(&rc, p); + + /* If the header file is zero bytes in size, or is not yet populated, + ** then the database is empty, regardless of the contents of the + ** *-data or *-pagemap file. Truncate the pagemap and data files to + ** zero bytes in size to make sure of this. + ** + ** Alternatively, if the header file is the right size, try to read it. + */ + szHdr = hctFileSize(&rc, p->aFdDb[0]); + if( rc==SQLITE_OK ){ + void *pHdr = 0; + if( szHdr==0 ){ + szHdr = HCT_HEADER_PAGESIZE*2; + hctFileTruncate(&rc, p->aFdDb[0], szHdr); + }else if( szHdr<(HCT_HEADER_PAGESIZE*2) ){ + rc = SQLITE_CANTOPEN_BKPT; + } + + pHdr = hctFileMmap(&rc, p->aFdDb[0], HCT_HEADER_PAGESIZE*2, 0, 1); + hctFileReadHdr(&rc, pHdr, &szPage, &nDbFile); + if( rc==SQLITE_OK && szPage==0 ){ + hctFileTruncate(&rc, p->fdMap, 0); + hctFileTruncate(&rc, p->fdMap, HCT_DEFAULT_PAGEPERCHUNK*sizeof(i64)); + szHdr = HCT_HEADER_PAGESIZE*2; + hctFileTruncate(&rc, p->aFdDb[0], szHdr); + if( rc==SQLITE_OK ){ + rc = hctFileFindLogs(p, 0, hctFileServerInitUnlinkLog); + } + }else{ + if( rc==SQLITE_OK ){ + rc = hctFileFindLogs(p, (void*)&unc, hctFileServerInitUncommitted); + } + hctFileOpenDataFiles(&rc, p, nDbFile); + } + hctFileMunmap(pHdr, HCT_HEADER_PAGESIZE*2); + } + p->nPagePerChunk = HCT_DEFAULT_PAGEPERCHUNK; + + assert( szPage==0 || rc==SQLITE_OK ); + if( szPage>0 ){ + i64 szChunkPagemap = p->nPagePerChunk * sizeof(u64); + + p->szPage = szPage; + szMap = hctFileSize(&rc, p->fdMap); + if( rc==SQLITE_OK ){ + if( szMapnFdDb==1 && szHdr!=p->szPage*(szMap/sizeof(u64))) + ){ + rc = SQLITE_CANTOPEN_BKPT; + }else{ + nChunk = szMap / szChunkPagemap; + } + } + + hctFileAllocateMapping(&rc, p, nChunk); + } + + /* Initialize CID value */ + p->iCommitId = 5; + + /* Allocate a transaction map server */ + if( rc==SQLITE_OK && p->pTMapServer==0 ){ + u64 iFirst = 0; /* First tid that will be written in tmap */ + u64 iLast = 0; /* Last such tid */ + int ii; /* To iterate through unc.aTid[] */ + + if( p->pMapping ){ + iFirst = hctFilePagemapGet(p->pMapping, HCT_PAGEMAP_TRANSID_EOF); + iFirst = (iFirst & HCT_TID_MASK) + 1; + }else{ + iFirst = 1; + } + + iLast = iFirst; + for(ii=0; ii=iLast ) iLast = iThis+1; + } + + /* Allocate the tmap-server object. Set all entries between iFirst and + ** iLast to (HCT_TMAP_COMMITTED, cid=1). Ensuring that the contents of + ** these transactions are visible to all readers. + ** + ** Then go back and set the entry for all tid values in unc.aTid[] to + ** (HCT_TMAP_ROLLBACK, 0) - not visible to any readers. */ + rc = sqlite3HctTMapServerNew(iFirst, iLast, &p->pTMapServer); + for(ii=0; iipTMapServer, iThis, HCT_TMAP_ROLLBACK); + } + } + sqlite3_free(unc.aTid); + } + return rc; +} + +/* +** This is called as part of initializing a new database on disk. Mutex +** HctFileServer.mutex must be held to call this function. It writes a +** new, empty, root page to physical page iPhys, to be used for either +** HCT_ROOTPAGE_SCHEMA or HCT_ROOTPAGE_META. +** +** SQLITE_OK is returned if successful, or an SQLite error code otherwise. +*/ +static int hctFileInitSystemRoot(HctFileServer *p, u64 iPhys){ + int rc = SQLITE_OK; + u8 *aBuf = sqlite3_malloc(p->szPage); + + assert( sqlite3_mutex_held(p->pMutex) ); + if( aBuf==0 ){ + rc = SQLITE_NOMEM; + }else{ + sqlite3HctDbRootPageInit(0, aBuf, p->szPage); + + if( p->bReadOnlyMap ){ + rc = hctPageWriteToDisk(p, iPhys, aBuf); + }else{ + u8 *a = (u8*)hctPagePtr(p->pMapping, iPhys); + memcpy(a, aBuf, p->szPage); + } + sqlite3_free(aBuf); + } + return rc; +} + +static int hctFileInitHdr(HctFileServer *p){ + int rc = SQLITE_OK; + u8 *aBuf = sqlite3_malloc(HCT_HEADER_PAGESIZE); + assert( sqlite3_mutex_held(p->pMutex) ); + if( aBuf==0 ){ + rc = SQLITE_NOMEM; + }else{ + char *zHdr = "Hctree database version 00000001"; + assert( strlen(zHdr)==32 ); + memset(aBuf, 0, HCT_HEADER_PAGESIZE); + memcpy(aBuf, zHdr, 32); + memcpy(&aBuf[32], &p->szPage, sizeof(int)); + memcpy(&aBuf[36], &p->nFdDb, sizeof(int)); + if( p->bReadOnlyMap ){ + ssize_t res = pwrite(p->aFdDb[0], aBuf, HCT_HEADER_PAGESIZE, 0); + rc = (res==HCT_HEADER_PAGESIZE ? SQLITE_OK : SQLITE_ERROR); + }else{ + memcpy(p->pMapping->aChunk[0].pData, aBuf, HCT_HEADER_PAGESIZE); + } + } + sqlite3_free(aBuf); + return rc; +} + + +/* +** This is called each time a new snapshot is opened. If HctFile.szPage is +** still set to 0, then: +** +** a) this is the first snapshot opened by connection pFile, and +** b) the database had not been created when pFile was opened. +** +** In this case the server-mutex is taken, and if the db has still not been +** created (HctFileServer.szPage==0), then it is created on disk under the +** cover of the mutex. +*/ +SQLITE_PRIVATE int sqlite3HctFileNewDb(HctFile *pFile){ + int rc = SQLITE_OK; + if( pFile->szPage==0 ){ + HctFileServer *p = pFile->pServer; + sqlite3_mutex_enter(p->pMutex); + if( p->szPage==0 ){ + HctConfig *pConfig = pFile->pConfig; + HctMapping *pMapping = 0; + int szPage = pConfig->pgsz; + int nDbFile = pConfig->nDbFile; + + p->szPage = szPage; + hctFileTruncate(&rc, p->fdMap, p->nPagePerChunk * sizeof(u64)); + hctFileTruncate(&rc, p->aFdDb[0], p->nPagePerChunk * szPage); + + hctFileAllocateMapping(&rc, p, 1); + pMapping = p->pMapping; + + assert( nDbFile>=1 && nDbFile<=HCT_MAX_NDBFILE ); + hctFileOpenDataFiles(&rc, p, nDbFile); + + /* 1. Make logical page 1 an empty intkey root page (SQLite uses this + ** as the root of sqlite_schema). + ** + ** 2. Set the initial values of the largest logical and physical page + ** ids allocated fields. + */ + + /* Set the initial values of the largest logical and physical page + ** ids allocated fields. These will be used when the set of free pages + ** is recovered in sqlite3HctFileRecoverFreelists(). */ + if( rc==SQLITE_OK ){ + const int nPageSet = pConfig->nPageSet; + hctFilePagemapSetDirect(pMapping, HCT_PAGEMAP_LOGICAL_EOF, nPageSet); + hctFilePagemapSetDirect(pMapping, HCT_PAGEMAP_PHYSICAL_EOF, nPageSet); + } + + if( rc==SQLITE_OK ){ + const u64 f = HCT_PMF_LOGICAL_IN_USE | HCT_PMF_LOGICAL_IS_ROOT; + u64 aRoot[] = { + HCT_ROOTPAGE_SCHEMA, + HCT_ROOTPAGE_META, + }; + int ii = 0; + u64 iPhys1 = 1 + (((HCT_HEADER_PAGESIZE*2)+szPage-1) / szPage); + + for(ii=0; iipMapping); + p->pMapping = 0; + } + } + + if( rc==SQLITE_OK ){ + pFile->szPage = p->szPage; + pFile->pMapping = p->pMapping; + pFile->pMapping->nRef++; + pFile->eInitState = p->eInitState; + } + sqlite3_mutex_leave(p->pMutex); + } + return rc; +} + + +/* +** Return true if the db has not yet been created on disk. Or false +** if it already has. +*/ +SQLITE_PRIVATE int sqlite3HctFileIsNewDb(HctFile *pFile){ + int bRet = 0; + if( pFile->szPage==0 ){ + HctFileServer *p = pFile->pServer; + sqlite3_mutex_enter(p->pMutex); + if( p->szPage==0 ){ + bRet = 1; + } + sqlite3_mutex_leave(p->pMutex); + } + return bRet; +} + +static sqlite3_int64 current_time(){ + struct timeval sNow; + gettimeofday(&sNow, 0); + return (sqlite3_int64)sNow.tv_sec*1000 + sNow.tv_usec/1000; +} + +static void hctFileEnterServerMutex(HctFile *pFile){ + sqlite3_mutex *pMutex = pFile->pServer->pMutex; + pFile->stats.nMutex++; + if( sqlite3_mutex_try(pMutex)!=SQLITE_OK ){ + pFile->stats.nMutexBlock++; + sqlite3_mutex_enter(pMutex); + } +} + +/* +** This is called to ensure that the mapping currently held by client +** pFile contains at least nChunk chunks. +*/ +static int hctFileGrowMapping(HctFile *pFile, int nChunk){ + int rc = SQLITE_OK; + if( pFile->pMapping->nChunkpServer; + HctMapping *pOld; + hctFileEnterServerMutex(pFile); + hctMappingUnref(pFile->pMapping); + pFile->pMapping = 0; + pOld = p->pMapping; + nOld = pOld->nChunk; + if( nOldnPagePerChunk*p->szPage; + i64 szChunkMap = p->nPagePerChunk*sizeof(u64); + int i; + + /* Grow the mapping file */ + hctFileTruncate(&rc, p->fdMap, nChunk*szChunkMap); + + for(i=nOld; iaChunk[i]; + + /* Grow the data file */ + int iFd = (i % p->nFdDb); + i64 sz = ((i / p->nFdDb) + 1) * szChunkData; + hctFileTruncate(&rc, p->aFdDb[iFd], sz); + + /* Map the new chunks of both the data and mapping files. */ + pChunk->aMap = hctFileMmapPagemapChunk(&rc, p, pNew, i); + pChunk->pData = hctFileMmapDbChunk(&rc, p, pNew, i); + } + + if( rc==SQLITE_OK ){ + p->pMapping = pNew; + hctMappingUnref(pOld); + }else{ + hctMappingUnref(pNew); + } + } + } + pFile->pMapping = p->pMapping; + pFile->pMapping->nRef++; + sqlite3_mutex_leave(p->pMutex); + } + return rc; +} + +/* +** Grow the mapping so that it is at least large enough to have an entry +** for slot iSlot. Return SQLITE_OK if successful (or if the mapping does +** not need to grow), or an SQLite error code otherwise. +*/ +static int hctFileGrowMappingForSlot(HctFile *pFile, u32 iSlot){ + assert( iSlot>0 ); + return hctFileGrowMapping(pFile, 1 + ((iSlot-1) / HCT_DEFAULT_PAGEPERCHUNK)); +} + + +static int hctFileServerFind(HctFile *pFile, const char *zFile){ + int rc = SQLITE_OK; + struct stat sStat; + HctFileServer *pServer = 0; + sqlite3_mutex *pMutex = sqlite3_mutex_alloc(SQLITE_MUTEX_STATIC_VFS1); + + memset(&sStat, 0, sizeof(sStat)); + + /* Take the VFS1 mutex that protects the globals in this file */ + sqlite3_mutex_enter(pMutex); + + /* Search for an existing HctFileServer already open on this database */ + if( 0==stat(zFile, &sStat) ){ + for(pServer=g.pServerList; pServer; pServer=pServer->pServerNext){ + if( pServer->st_ino==(i64)sStat.st_ino + && pServer->st_dev==(i64)sStat.st_dev + ){ + break; + } + } + } + + if( pServer==0 ){ + int fd = hctFileOpen(&rc, zFile, ""); + if( rc==SQLITE_OK ){ + assert( fd>0 ); + hctFileLock(&rc, fd, zFile); + pServer = (HctFileServer*)sqlite3HctMalloc(&rc, sizeof(*pServer)); + if( pServer==0 ){ + close(fd); + }else{ + int ii; + for(ii=0; iiaFdDb[ii] = -1; + } + fstat(fd, &sStat); + pServer->st_dev = (i64)sStat.st_dev; + pServer->st_ino = (i64)sStat.st_ino; + pServer->pServerNext = g.pServerList; + pServer->aFdDb[0] = fd; + pServer->nFdDb = 1; + pServer->pMutex = sqlite3_mutex_alloc(SQLITE_MUTEX_RECURSIVE); + /* pServer->bReadOnlyMap = 1; */ + g.pServerList = pServer; + } + } + } + + if( rc==SQLITE_OK ){ + pFile->pServer = pServer; + pFile->pFileNext = pServer->pFileList; + pServer->pFileList = pFile; + } + + /* Release the global mutex */ + sqlite3_mutex_leave(pMutex); + + return rc; +} + + +/* +** Open a connection to the database zFile. +*/ +SQLITE_PRIVATE HctFile *sqlite3HctFileOpen(int *pRc, const char *zFile, HctConfig *pConfig){ + int rc = *pRc; + HctFile *pNew; + + pNew = (HctFile*)sqlite3HctMalloc(&rc, sizeof(*pNew)); + if( pNew ){ + pNew->pConfig = pConfig; + rc = hctFileServerFind(pNew, zFile); + if( rc==SQLITE_OK ){ + HctFileServer *pServer = pNew->pServer; + + sqlite3_mutex_enter(pServer->pMutex); + rc = hctFileServerInit(pServer, pConfig, zFile); + assert( rc==SQLITE_OK ); + if( rc==SQLITE_OK && pServer->szPage>0 ){ + pNew->szPage = pServer->szPage; + pNew->pMapping = pServer->pMapping; + pNew->pMapping->nRef++; + } + pNew->eInitState = pServer->eInitState; + pNew->iFileId = pServer->iNextFileId++; + sqlite3_mutex_leave(pServer->pMutex); + + if( rc==SQLITE_OK ){ + sqlite3HctTMapClientNew( + pServer->pTMapServer, pConfig, &pNew->pTMapClient + ); + } + if( rc==SQLITE_OK ){ + pNew->pPManClient = sqlite3HctPManClientNew( + &rc, pConfig, pServer->pPManServer, pNew + ); + } + }else{ + sqlite3_free(pNew); + pNew = 0; + } + + if( rc!=SQLITE_OK ){ + sqlite3HctFileClose(pNew); + pNew = 0; + } + } + assert( (rc==SQLITE_OK)==(pNew!=0) ); + *pRc = rc; + return pNew; +} + +SQLITE_PRIVATE HctTMapClient *sqlite3HctFileTMapClient(HctFile *pFile){ + return pFile->pTMapClient; +} +SQLITE_PRIVATE HctPManClient *sqlite3HctFilePManClient(HctFile *pFile){ + return pFile->pPManClient; +} + +SQLITE_PRIVATE void sqlite3HctFileClose(HctFile *pFile){ + if( pFile ){ + HctFileServer *pDel = 0; + HctFile **pp; + HctFileServer *pServer = pFile->pServer; + + /* Release the page-manager client */ + sqlite3HctPManClientFree(pFile->pPManClient); + pFile->pPManClient = 0; + + /* Release the transaction map client */ + sqlite3HctTMapClientFree(pFile->pTMapClient); + pFile->pTMapClient = 0; + + /* Release the reference to the HctMapping object, if any */ + hctMappingUnref(pFile->pMapping); + pFile->pMapping = 0; + + /* Remove this object from the HctFileServer.pFileList list. If this + ** means there are no longer any connections to this server object, + ** remove the HctFileServer object itself from the global list. In + ** this case leave stack variable pDel set to point to the + ** HctFileServer. */ + sqlite3_mutex_enter( sqlite3_mutex_alloc(SQLITE_MUTEX_STATIC_VFS1) ); + for(pp=&pServer->pFileList; *pp!=pFile; pp=&(*pp)->pFileNext); + *pp = pFile->pFileNext; + if( pServer->pFileList==0 ){ + HctFileServer **ppS; + pDel = pServer; + for(ppS=&g.pServerList; *ppS!=pServer; ppS=&(*ppS)->pServerNext); + *ppS = pServer->pServerNext; + } + sqlite3_mutex_leave( sqlite3_mutex_alloc(SQLITE_MUTEX_STATIC_VFS1) ); + + /* It if was removed from the global list, clean up the HctFileServer + ** object. */ + if( pDel ){ + int szChunkData = pDel->nPagePerChunk*pDel->szPage; + int szChunkMap = pDel->nPagePerChunk*sizeof(u64); + int i; + HctMapping *pMapping = pDel->pMapping; + + sqlite3HctTMapServerFree(pDel->pTMapServer); + pDel->pTMapServer = 0; + + sqlite3HctPManServerFree(pDel->pPManServer); + pDel->pPManServer = 0; + + if( pMapping ){ + pDel->pMapping = 0; + for(i=0; inChunk; i++){ + HctMappingChunk *pChunk = &pMapping->aChunk[i]; + if( pChunk->aMap ) hctFileMunmap(pChunk->aMap, szChunkMap); + if( pChunk->pData ) hctFileMunmap(pChunk->pData, szChunkData); + } + hctMappingUnref(pMapping); + } + + /* Close the data files and the mapping file. */ + for(i=0; inFdDb; i++){ + if( pDel->aFdDb[i]>0 ) close(pDel->aFdDb[i]); + } + if( pDel->fdMap ) close(pDel->fdMap); + + if( pDel->xJrnlDel ){ + pDel->xJrnlDel(pDel->pJrnlPtr); + } + sqlite3_free(pDel->zDir); + sqlite3_free(pDel->zPath); + sqlite3_mutex_free(pDel->pMutex); + sqlite3_free(pDel); + } + + /* Finally, free the HctFile object */ + sqlite3_free(pFile); + } +} + +SQLITE_PRIVATE u32 sqlite3HctFileMaxpage(HctFile *pFile){ + u64 iVal = hctFilePagemapGet(pFile->pMapping, HCT_PAGEMAP_PHYSICAL_EOF); + return (iVal & 0xFFFFFFFF); +} + +/* +** Set the flags in mask within page-map slot iSlot. +*/ +static int hctFileSetFlag(HctFile *pFile, u32 iSlot, u64 mask){ + int rc = hctFileGrowMappingForSlot(pFile, iSlot); + if( rc==SQLITE_OK ){ + HctMapping *pMapping = pFile->pMapping; + while( 1 ){ + u64 iVal = hctFilePagemapGet(pMapping, iSlot); + if( hctFilePagemapSet(pFile, iSlot, iVal, iVal | mask) ) break; + } + } + return rc; +} + +/* +** Clear the flags in mask within page-map slot iSlot. +*/ +static int hctFileClearFlag(HctFile *pFile, u32 iSlot, u64 mask){ + int rc = hctFileGrowMappingForSlot(pFile, iSlot); + if( rc==SQLITE_OK ){ + HctMapping *pMapping = pFile->pMapping; + while( 1 ){ + u64 iVal = hctFilePagemapGet(pMapping, iSlot); + if( hctFilePagemapSet(pFile, iSlot, iVal, iVal & ~mask) ) break; + } + } + return rc; +} + + +SQLITE_PRIVATE int sqlite3HctFileRootFree(HctFile *pFile, u32 iRoot){ + /* TODO - do something with freed root-page */ + return SQLITE_OK; +} + +SQLITE_PRIVATE int sqlite3HctFilePageClearIsRoot(HctFile *pFile, u32 iRoot){ + return hctFileClearFlag(pFile, iRoot, HCT_PMF_LOGICAL_IS_ROOT); +} +SQLITE_PRIVATE int sqlite3HctFilePageClearInUse(HctFile *pFile, u32 iPg, int bLogic){ + u64 flag = bLogic ? HCT_PMF_LOGICAL_IN_USE : HCT_PMF_PHYSICAL_IN_USE; + return hctFileClearFlag(pFile, iPg, flag); +} + +SQLITE_PRIVATE int sqlite3HctFileTreeFree(HctFile *pFile, u32 iRoot, int bImmediate){ + u64 iTid = bImmediate ? 0 : pFile->iCurrentTid; + return sqlite3HctPManFreeTree(pFile->pPManClient, pFile, iRoot, iTid); +} + +static int hctFilePagemapGetGrow(HctFile *pFile, u32 iPg, u64 *piVal){ + int rc = hctFileGrowMapping(pFile, 1+(iPg>>pFile->pMapping->mapShift)); + if( rc==SQLITE_OK ){ + *piVal = hctFilePagemapGet(pFile->pMapping, iPg); + } + return rc; +} + +/* +** Obtain the lower 32-bits of the value currently stored in slot iSlot. +*/ +static int hctFilePagemapGetGrow32(HctFile *pFile, u32 iSlot, u32 *piVal){ + int rc; + u64 val = 0; + rc = hctFilePagemapGetGrow(pFile, iSlot, &val); + *piVal = (u32)(val & 0xFFFFFFFF); + return rc; +} + + +static int hctFilePagemapPtr(HctFile *pFile, u32 iPg, u8 **paData){ + int rc = hctFileGrowMapping(pFile, 1+(iPg>>pFile->pMapping->mapShift)); + if( rc==SQLITE_OK ){ + *paData = hctPagePtr(pFile->pMapping, iPg); + } + return rc; +} + +SQLITE_PRIVATE int sqlite3HctFilePageGet(HctFile *pFile, u32 iPg, HctFilePage *pPg){ + int rc; + assert( iPg!=0 ); + memset(pPg, 0, sizeof(*pPg)); + pPg->pFile = pFile; + pPg->iPg = iPg; + rc = hctFilePagemapGetGrow32(pFile, iPg, &pPg->iOldPg); + if( rc==SQLITE_OK ){ + u32 iPhys = pPg->iOldPg; + assert( iPhys!=0 ); + rc = hctFilePagemapPtr(pFile, iPhys, &pPg->aOld); + } + return rc; +} + +SQLITE_PRIVATE u32 sqlite3HctFilePageMapping(HctFile *pFile, u32 iLogical, int *pbEvicted){ + u64 val = hctFilePagemapGet(pFile->pMapping, iLogical); + *pbEvicted = (val & HCT_PMF_LOGICAL_EVICTED) ? 1 : 0; + return (u32)(val & 0xFFFFFFFF); +} + +/* +** Obtain a reference to physical page iPg. +*/ +SQLITE_PRIVATE int sqlite3HctFilePageGetPhysical(HctFile *pFile, u32 iPg, HctFilePage *pPg){ + u32 iVal; + int rc; + assert( iPg!=0 ); + memset(pPg, 0, sizeof(*pPg)); + rc = hctFilePagemapGetGrow32(pFile, iPg, &iVal); + if( rc==SQLITE_OK ){ + pPg->iOldPg = iPg; + pPg->aOld = (u8*)hctPagePtr(pFile->pMapping, iPg); + } + return rc; +} + +static u32 hctFileAllocPg(int *pRc, HctFile *pFile, int bLogical){ + int rc = *pRc; + u32 iRet = 0; + + if( bLogical==0 ) pFile->nPageAlloc++; + iRet = sqlite3HctPManAllocPg(&rc, pFile->pPManClient, pFile, bLogical); + if( rc==SQLITE_OK ){ + rc = hctFileGrowMappingForSlot(pFile, iRet); + if( rc!=SQLITE_OK ){ + /* TODO: Something about this resource leak */ + iRet = 0; + } + } + + *pRc = rc; + return iRet; +} + +/* +** This function makes the page object pPg writable if it is not already +** so. Specifically, it allocates a new physical page and sets the +** following variables accordingly: +** +** HctFilePage.iNewPg +** HctFilePage.aNew +** +** The PHYSICAL_IN_USE flag is set on the new physical page allocated +** here. +*/ +static void hctFilePageMakeWritable(int *pRc, HctFilePage *pPg){ + if( pPg->aNew==0 ){ + HctFile *pFile = pPg->pFile; + u32 iNewPg = hctFileAllocPg(pRc, pFile, 0); + if( iNewPg ){ + hctFileSetFlag(pPg->pFile, iNewPg, HCT_PMF_PHYSICAL_IN_USE); + pPg->iNewPg = iNewPg; + + if( pFile->pServer->bReadOnlyMap ){ + pPg->aNew = (u8*)sqlite3_malloc(pFile->szPage); + /* todo: handle oom here */ + }else{ + pPg->aNew = (u8*)hctPagePtr(pPg->pFile->pMapping, iNewPg); + } + } + } +} + + +#if 0 +static void debug_printf(const char *zFmt, ...){ + va_list ap; + va_start(ap, zFmt); + vprintf(zFmt, ap); + va_end(ap); +} + +static void debug_slot_value(HctFile *pFile, u32 iSlot){ + u64 iVal = hctFilePagemapGet(pFile->pMapping, iSlot); + printf("[flags=%02x val=%lld]", (u32)(iVal>>56), iVal & HCT_PAGEMAP_VMASK); +} + +#define DEBUG_PAGE_MUTEX_ENTER(pPg) \ + sqlite3_mutex_enter(pPg->pFile->pServer->pMutex) + +#define DEBUG_PAGE_MUTEX_LEAVE(pPg) \ + fflush(stdout); sqlite3_mutex_leave(pPg->pFile->pServer->pMutex) + +#define DEBUG_PRINTF(...) debug_printf(__VA_ARGS__) +#define DEBUG_SLOT_VALUE(pFile, iSlot) debug_slot_value(pFile, iSlot) + +SQLITE_PRIVATE void sqlite3HctFileDebugPrint(HctFile *pFile, const char *zFmt, ...){ + va_list ap; + sqlite3_mutex_enter(pFile->pServer->pMutex); + printf("f=%d: ", pFile->iFileId); + va_start(ap, zFmt); + vprintf(zFmt, ap); + va_end(ap); + sqlite3_mutex_leave(pFile->pServer->pMutex); +} + +#else +# define DEBUG_PAGE_MUTEX_ENTER(x) +# define DEBUG_PAGE_MUTEX_LEAVE(x) +# define DEBUG_PRINTF(...) +# define DEBUG_SLOT_VALUE(x,y) +SQLITE_PRIVATE void sqlite3HctFileDebugPrint(HctFile *pFile, const char *zFmt, ...){ } +#endif + +void hctFileFreePg( + int *pRc, + HctFile *pFile, + i64 iTid, /* Associated TID value */ + u32 iPg, /* Page number */ + int bLogical /* True for logical, false for physical */ +){ + if( pFile->eInitState>=HCT_INIT_RECOVER1 ){ + sqlite3HctPManFreePg(pRc, pFile->pPManClient, iTid, iPg, bLogical); + } +} + + +static int hctFilePageFlush(HctFilePage *pPg){ + int rc = SQLITE_OK; + if( pPg->aNew ){ + u32 iOld = pPg->iOldPg; + + DEBUG_PAGE_MUTEX_ENTER(pPg); + DEBUG_PRINTF("f=%d: Flushing page %d orig=", pPg->pFile->iFileId, pPg->iPg); + DEBUG_SLOT_VALUE(pPg->pFile, pPg->iPg); + DEBUG_PRINTF(" (ioldpg=%d) (inewpg=%d)", pPg->iOldPg, pPg->iNewPg); + + DEBUG_PRINTF("\n"); + DEBUG_PAGE_MUTEX_LEAVE(pPg); + + if( pPg->pFile->pServer->bReadOnlyMap ){ + rc = hctPageWriteToDisk(pPg->pFile->pServer, pPg->iNewPg, pPg->aNew); + } + + if( rc==SQLITE_OK ){ + if( !hctFilePagemapSetLogical(pPg->pFile, pPg->iPg, iOld, pPg->iNewPg) ){ + rc = SQLITE_LOCKED_ERR(pPg->iPg, "flush"); + }else{ + if( iOld ){ + u64 iTid = pPg->pFile->iCurrentTid; + hctFileFreePg(&rc, pPg->pFile, iTid, iOld, 0); + hctFileClearFlag(pPg->pFile, iOld, HCT_PMF_PHYSICAL_IN_USE); + } + pPg->iOldPg = pPg->iNewPg; + if( pPg->pFile->pServer->bReadOnlyMap ){ + sqlite3_free(pPg->aNew); + pPg->aOld = hctPagePtr(pPg->pFile->pMapping, pPg->iOldPg); + }else{ + pPg->aOld = pPg->aNew; + } + pPg->aNew = 0; + pPg->iNewPg = 0; + } + } + + DEBUG_PAGE_MUTEX_ENTER(pPg); + DEBUG_PRINTF("f=%d:", pPg->pFile->iFileId); + + DEBUG_PRINTF(" rc=%d final=", rc); + DEBUG_SLOT_VALUE(pPg->pFile, pPg->iPg); + DEBUG_PRINTF("%s\n", rc==SQLITE_LOCKED ? " SQLITE_LOCKED" : ""); + DEBUG_PAGE_MUTEX_LEAVE(pPg); + } + return rc; +} + +SQLITE_PRIVATE int sqlite3HctFilePageCommit(HctFilePage *pPg){ + assert( pPg->iPg ); + return hctFilePageFlush(pPg); +} + +SQLITE_PRIVATE int sqlite3HctFilePageEvict(HctFilePage *pPg, int bIrrevocable){ + int ret; + + DEBUG_PAGE_MUTEX_ENTER(pPg); + DEBUG_PRINTF("f=%d: Evicting page %d (irrecocable=%d) orig=", + pPg->pFile->iFileId, pPg->iPg, bIrrevocable + ); + DEBUG_SLOT_VALUE(pPg->pFile, pPg->iPg); + + ret = hctFileSetEvicted(pPg->pFile, pPg->iPg, pPg->iOldPg, bIrrevocable); + ret = (ret ? SQLITE_OK : SQLITE_LOCKED_ERR(pPg->iPg, "evict")); + + DEBUG_PRINTF(" rc=%d final=", ret); + DEBUG_SLOT_VALUE(pPg->pFile, pPg->iPg); + DEBUG_PRINTF("%s\n", ret==SQLITE_LOCKED ? " SQLITE_LOCKED" : ""); + DEBUG_PAGE_MUTEX_LEAVE(pPg); + return ret; +} + +SQLITE_PRIVATE void sqlite3HctFilePageUnevict(HctFilePage *pPg){ + DEBUG_PAGE_MUTEX_ENTER(pPg); + DEBUG_PRINTF("f=%d: Unevicting page %d orig=", pPg->pFile->iFileId, pPg->iPg); + DEBUG_SLOT_VALUE(pPg->pFile, pPg->iPg); + + hctFileClearEvicted(pPg->pFile, pPg->iPg); + + DEBUG_PRINTF(" final="); + DEBUG_SLOT_VALUE(pPg->pFile, pPg->iPg); + DEBUG_PRINTF("\n"); + DEBUG_PAGE_MUTEX_LEAVE(pPg); +} + +SQLITE_PRIVATE int sqlite3HctFilePageIsEvicted(HctFile *pFile, u32 iPgno){ + u64 val; + int rc = hctFilePagemapGetGrow(pFile, iPgno, &val); + return (rc || (val & HCT_PMF_LOGICAL_EVICTED)!=0); +} + +SQLITE_PRIVATE int sqlite3HctFilePageIsFree(HctFile *pFile, u32 iPgno, int bLogical){ + u64 iVal = hctFilePagemapGet(pFile->pMapping, iPgno); + u64 mask = (bLogical ? HCT_PMF_LOGICAL_IN_USE : HCT_PMF_PHYSICAL_IN_USE); + return (iVal & mask) ? 0 : 1; +} + +SQLITE_PRIVATE int sqlite3HctFilePageRelease(HctFilePage *pPg){ + int rc = SQLITE_OK; + if( pPg->iPg ){ + rc = hctFilePageFlush(pPg); + }else if( pPg->aNew && pPg->pFile->pServer->bReadOnlyMap ){ + rc = hctPageWriteToDisk(pPg->pFile->pServer, pPg->iNewPg, pPg->aNew); + sqlite3_free(pPg->aNew); + } + memset(pPg, 0, sizeof(*pPg)); + return rc; +} + + + +/* +** Allocate a new physical page and set (*pPg) to refer to it. The new +** physical page number is available in HctFilePage.iNewPg. +*/ +SQLITE_PRIVATE int sqlite3HctFilePageNewPhysical(HctFile *pFile, HctFilePage *pPg){ + int rc = SQLITE_OK; + memset(pPg, 0, sizeof(*pPg)); + pPg->pFile = pFile; + hctFilePageMakeWritable(&rc, pPg); + return rc; +} + +/* +** Allocate a new logical page. If parameter iPg is zero, then a new +** logical page number is allocated. Otherwise, it must be a logical page +** number obtained by an earlier call to sqlite3HctFileRootPgno(). +*/ +SQLITE_PRIVATE int sqlite3HctFilePageNew(HctFile *pFile, HctFilePage *pPg){ + int rc = SQLITE_OK; /* Return code */ + u32 iLPg = hctFileAllocPg(&rc, pFile, 1); + if( rc==SQLITE_OK ){ + memset(pPg, 0, sizeof(*pPg)); + pPg->pFile = pFile; + pPg->iPg = iLPg; + hctFilePagemapZeroValue(pFile, iLPg); + hctFilePageMakeWritable(&rc, pPg); + } + + return rc; +} + +/* +** Allocate a new logical root page number. +*/ +SQLITE_PRIVATE int sqlite3HctFileRootPgno(HctFile *pFile, u32 *piRoot){ + int rc = SQLITE_OK; + u32 iRoot = hctFileAllocPg(&rc, pFile, 1); + if( rc==SQLITE_OK ){ + hctFilePagemapZeroValue(pFile, iRoot); + *piRoot = iRoot; + } + return rc; +} + +/* +** Parameter iRoot is a root page number previously obtained from +** sqlite3HctFileRootPgno(). This function allocates a physical +** page to go with the logical one. +*/ +SQLITE_PRIVATE int sqlite3HctFileRootNew(HctFile *pFile, u32 iRoot, HctFilePage *pPg){ + int rc = SQLITE_OK; /* Return code */ + + memset(pPg, 0, sizeof(*pPg)); + pPg->pFile = pFile; + pPg->iPg = iRoot; + hctFilePageMakeWritable(&rc, pPg); + + /* Set the LOGICAL_IN_USE and LOGICAL_IS_ROOT flags on page iRoot. At + ** the same time, set the mapping to 0. Take care not to clear the + ** PHYSICAL_IN_USE flag while doing so, in case there is a physical + ** page with page number iRoot currently in use somewhere. */ + while( rc==SQLITE_OK ){ + u64 i1 = hctFilePagemapGet(pFile->pMapping, iRoot); + u64 i2 = (i1 & HCT_PMF_PHYSICAL_IN_USE); + i2 |= (HCT_PMF_LOGICAL_IS_ROOT|HCT_PMF_LOGICAL_IN_USE); + if( hctFilePagemapSet(pFile, iRoot, i1, i2) ) break; + } + + return rc; +} + +SQLITE_PRIVATE void sqlite3HctFilePageUnwrite(HctFilePage *pPg){ + int rc = SQLITE_OK; + if( pPg->aNew ){ + hctFileClearFlag(pPg->pFile, pPg->iNewPg, HCT_PMF_PHYSICAL_IN_USE); + hctFileFreePg(&rc, pPg->pFile, 0, pPg->iNewPg, 0); + if( pPg->pFile->pServer->bReadOnlyMap ){ + sqlite3_free(pPg->aNew); + } + pPg->iNewPg = 0; + pPg->aNew = 0; + if( pPg->iOldPg==0 ){ + assert( pPg->aOld==0 ); + hctFileFreePg(&rc, pPg->pFile, 0, pPg->iPg, 1); + pPg->iPg = 0; + } + } +} + +SQLITE_PRIVATE int sqlite3HctFilePageWrite(HctFilePage *pPg){ + int rc = SQLITE_OK; /* Return code */ + hctFilePageMakeWritable(&rc, pPg); + return rc; +} + +SQLITE_PRIVATE u64 sqlite3HctFileAllocateTransid(HctFile *pFile){ + u64 iVal = hctFilePagemapIncr(pFile, HCT_PAGEMAP_TRANSID_EOF, 1); + pFile->iCurrentTid = (iVal & HCT_TID_MASK); + return pFile->iCurrentTid; +} +SQLITE_PRIVATE u64 sqlite3HctFileAllocateCID(HctFile *pFile, int nWrite){ + assert( nWrite>0 ); + return hctFileAtomicIncr(pFile, &pFile->pServer->iCommitId, nWrite); +} + +SQLITE_PRIVATE void sqlite3HctFileSetCID(HctFile *pFile, u64 iVal){ + HctAtomicStore(&pFile->pServer->iCommitId, iVal); +} + +SQLITE_PRIVATE u64 sqlite3HctFileIncrWriteCount(HctFile *pFile, int nIncr){ + return hctFileAtomicIncr(pFile, &pFile->pServer->nWriteCount, nIncr); +} + +SQLITE_PRIVATE u64 sqlite3HctFileGetSnapshotid(HctFile *pFile){ + return HctAtomicLoad( &pFile->pServer->iCommitId ); +} + +SQLITE_PRIVATE int sqlite3HctFilePgsz(HctFile *pFile){ + return pFile->szPage; +} + +SQLITE_PRIVATE void sqlite3HctFileSetJrnlPtr( + HctFile *pFile, + void *pPtr, + void(*xDel)(void*) +){ + assert( pFile->pServer->pJrnlPtr==0 ); + assert( pFile->pServer->xJrnlDel==0 ); + pFile->pServer->pJrnlPtr = pPtr; + pFile->pServer->xJrnlDel = xDel; +} + +SQLITE_PRIVATE void *sqlite3HctFileGetJrnlPtr(HctFile *pFile){ + return pFile->pServer->pJrnlPtr; +} + +/* +** Return the current "safe" TID value. +*/ +SQLITE_PRIVATE u64 sqlite3HctFileSafeTID(HctFile *pFile){ + return sqlite3HctTMapSafeTID(pFile->pTMapClient); +} + +/* +** Allocate a block of nPg physical or logical page ids from the +** end of the current range. +*/ +SQLITE_PRIVATE u32 sqlite3HctFilePageRangeAlloc(HctFile *pFile, int bLogical, int nPg){ + u32 iSlot = HCT_PAGEMAP_PHYSICAL_EOF - bLogical; + u64 iNew = 0; + + assert( bLogical==0 || iSlot==HCT_PAGEMAP_LOGICAL_EOF ); + assert( bLogical!=0 || iSlot==HCT_PAGEMAP_PHYSICAL_EOF ); + + /* Increment the selected slot by nPg. The returned value, iNew, is the + ** new value of the slot - the last page in the range allocated. */ + iNew = hctFilePagemapIncr(pFile, iSlot, nPg); + + /* Return the first page number in the range of nPg allocated */ + return (iNew+1 - nPg); +} + +/* +** This function is called by the upper layer to clear the: +** +** * LOGICAL_IN_USE flag on the specified page id, and the +** * PHYSICAL_IN_USE flag on currently mapped physical page id. +** +** If parameter bReuseNow is true, then the page was never properly linked +** into a list, and so the logical and physical page ids can be reused +** immediately. Otherwise, they are handled as if freed by the current +** transaction. +*/ +SQLITE_PRIVATE int sqlite3HctFileClearInUse(HctFilePage *pPg, int bReuseNow){ + int rc = SQLITE_OK; + if( pPg->pFile ){ + u64 iTid = pPg->pFile->iCurrentTid; + u32 iPhysPg = pPg->iOldPg; + + assert( pPg->iPg>0 ); + assert( pPg->iOldPg>0 ); + +#ifdef SQLITE_DEBUG + if( bReuseNow==0 ){ + u64 iVal = hctFilePagemapGet(pPg->pFile->pMapping, pPg->iPg); + assert( iVal & HCT_PMF_LOGICAL_EVICTED ); + } +#endif + + hctFileClearFlag(pPg->pFile, pPg->iPg, HCT_PMF_LOGICAL_IN_USE); + hctFileClearFlag(pPg->pFile, iPhysPg, HCT_PMF_PHYSICAL_IN_USE); + hctFileFreePg(&rc, pPg->pFile, iTid, pPg->iPg, 1); + hctFileFreePg(&rc, pPg->pFile, iTid, iPhysPg, 0); + } + + return rc; +} + +SQLITE_PRIVATE int sqlite3HctFileClearPhysInUse(HctFile *pFile, u32 pgno, int bReuseNow){ + u64 iTid = pFile->iCurrentTid; + int rc = SQLITE_OK; + + hctFileClearFlag(pFile, pgno, HCT_PMF_PHYSICAL_IN_USE); + hctFileFreePg(&rc, pFile, iTid, pgno, 0); + return rc; +} + +SQLITE_PRIVATE char *sqlite3HctFileLogFile(HctFile *pFile){ + char *zRet = 0; + HctFileServer *pServer = pFile->pServer; + sqlite3_mutex_enter(pServer->pMutex); + zRet = sqlite3_mprintf("%s-log-%d", pServer->zPath, pFile->iFileId); + sqlite3_mutex_leave(pServer->pMutex); + return zRet; +} + +SQLITE_PRIVATE int sqlite3HctFileStartRecovery(HctFile *pFile, int iStage){ + int bRet = 0; + if( pFile->eInitState==iStage ){ + HctFileServer *pServer = pFile->pServer; + sqlite3_mutex_enter(pServer->pMutex); + if( pServer->eInitState==iStage ){ + bRet = 1; + }else{ + pFile->eInitState = pServer->eInitState; + sqlite3_mutex_leave(pServer->pMutex); + } + } + return bRet; +} + +SQLITE_PRIVATE int sqlite3HctFileFinishRecovery(HctFile *pFile, int iStage, int rc){ + HctFileServer *pServer = pFile->pServer; + if( rc==SQLITE_OK ){ + pFile->eInitState = iStage+1; + pServer->eInitState = iStage+1; + } + sqlite3HctPManClientHandoff(pFile->pPManClient); + sqlite3_mutex_leave(pFile->pServer->pMutex); + return rc; +} + +SQLITE_PRIVATE int sqlite3HctFileRecoverFreelists( + HctFile *pFile, /* File to recover freelists for */ + int nRoot, i64 *aRoot, /* Array of root page numbers */ + int nPhys, i64 *aPhys /* Sorted array of phys. pages to preserve */ +){ + int rc = SQLITE_OK; + HctFileServer *pServer = pFile->pServer; + HctPManServer *pPManServer = pServer->pPManServer; + HctMapping *pMapping = pServer->pMapping; + u64 iSafeTid = hctFilePagemapGet(pMapping, HCT_PAGEMAP_TRANSID_EOF); + u64 nPg1 = hctFilePagemapGet(pMapping, HCT_PAGEMAP_PHYSICAL_EOF); + u64 nPg2 = hctFilePagemapGet(pMapping, HCT_PAGEMAP_LOGICAL_EOF); + u32 iPg; + u32 nPg; + u32 iPhysOff = ((HCT_HEADER_PAGESIZE*2)+pServer->szPage-1)/pServer->szPage; + + int iPhys = 0; + + nPg1 = nPg1 & HCT_PAGEMAP_VMASK; + nPg2 = nPg2 & HCT_PAGEMAP_VMASK; + + /* TODO: Really - page-manager must be empty at this point. Should assert() + ** that instead of making this call. */ + sqlite3HctPManServerReset(pPManServer); + + nPg = MAX((nPg1 & 0xFFFFFFFF), (nPg2 & 0xFFFFFFFF)); + for(iPg=1; iPg<=nPg; iPg++){ + u64 iVal = hctFilePagemapGetSafe(pMapping, iPg); + + if( (iVal & HCT_PMF_LOGICAL_IS_ROOT) && iPg>=3 ){ + int ii; + for(ii=0; iiiPhysOff) + ){ + /* Check if page iPg is one that must be preserved. */ + u64 iTid = iSafeTid; + while( iPhys=HCT_FIRST_LOGICAL + ){ + sqlite3HctPManServerInit(&rc, pPManServer, iSafeTid, iPg, 1); + } + } + + return rc; +} + +SQLITE_PRIVATE int sqlite3HctFileFindLogs( + HctFile *pFile, + void *pCtx, + int (*xLog)(void*, const char*) +){ + return hctFileFindLogs(pFile->pServer, pCtx, xLog); +} + +SQLITE_PRIVATE int sqlite3HctFileRootArray( + HctFile *pFile, + u32 **paiRoot, + int *pnRoot +){ + int nAlloc = 0; + int nRoot = 0; + u32 *aRoot = 0; + u32 nLogic = 0; + int ii; + int rc; + + rc = hctFilePagemapGetGrow32(pFile, HCT_PAGEMAP_LOGICAL_EOF, &nLogic); + for(ii=1; rc==SQLITE_OK && ii<=nLogic; ii++){ + u64 val; + rc = hctFilePagemapGetGrow(pFile, ii, &val); + if( rc==SQLITE_OK && (val & HCT_PMF_LOGICAL_IS_ROOT) ){ + if( nRoot>=nAlloc ){ + int nNew = (nAlloc ? nAlloc*2 : 16); + u32 *aNew = (u32*)sqlite3_realloc(aRoot, nNew*sizeof(u32)); + if( aNew==0 ){ + rc = SQLITE_NOMEM_BKPT; + }else{ + nAlloc = nNew; + aRoot = aNew; + } + } + + if( rc==SQLITE_OK ){ + aRoot[nRoot++] = ii; + } + } + } + + if( rc!=SQLITE_OK ){ + sqlite3_free(aRoot); + aRoot = 0; + nRoot = 0; + } + *paiRoot = aRoot; + *pnRoot = nRoot; + return rc; +} + +SQLITE_PRIVATE u64 sqlite3HctFileWriteCount(HctFile *pFile){ + return pFile->nPageAlloc; +} + +SQLITE_PRIVATE void sqlite3HctFileICArrays( + HctFile *pFile, + u8 **paLogic, u32 *pnLogic, + u8 **paPhys, u32 *pnPhys +){ + int rc = SQLITE_OK; + u32 nLogic = 0; + u32 nPhys = 0; + u8 *aLogic = 0; + u8 *aPhys = 0; + u32 ii; + + rc = hctFilePagemapGetGrow32(pFile, HCT_PAGEMAP_LOGICAL_EOF, &nLogic); + if( rc==SQLITE_OK ){ + rc = hctFilePagemapGetGrow32(pFile, HCT_PAGEMAP_PHYSICAL_EOF, &nPhys); + } + + if( rc==SQLITE_OK ){ + aLogic = (u8*)sqlite3HctMalloc(&rc, (nLogic + nPhys) * sizeof(u8)); + if( aLogic ){ + aPhys = &aLogic[nLogic]; + } + } + + for(ii=1; ii<=nLogic && rc==SQLITE_OK; ii++){ + u64 val; + rc = hctFilePagemapGetGrow(pFile, ii, &val); + if( rc==SQLITE_OK && (val & HCT_PMF_LOGICAL_IN_USE)==0 ){ + aLogic[ii-1] = 1; + } + } + for(ii=1; ii<=nPhys && rc==SQLITE_OK; ii++){ + u64 val; + rc = hctFilePagemapGetGrow(pFile, ii, &val); + if( rc==SQLITE_OK && (val & HCT_PMF_PHYSICAL_IN_USE)==0 ){ + aPhys[ii-1] = 1; + } + } + + if( rc!=SQLITE_OK ){ + sqlite3_free(aLogic); + aLogic = aPhys = 0; + nLogic = nPhys = 0; + } + + *paLogic = aLogic; + *paPhys = aPhys; + *pnLogic = nLogic; + *pnPhys = nPhys; +} + +SQLITE_PRIVATE i64 sqlite3HctFileStats(sqlite3 *db, int iStat, const char **pzStat){ + i64 iVal = -1; + HctFile *pFile = sqlite3HctDbFile(sqlite3HctDbFind(db, 0)); + + switch( iStat ){ + case 0: + *pzStat = "cas_attempt"; + iVal = pFile->stats.nCasAttempt; + break; + case 1: + *pzStat = "cas_fail"; + iVal = pFile->stats.nCasFail; + break; + case 2: + *pzStat = "incr_attempt"; + iVal = pFile->stats.nIncrAttempt; + break; + case 3: + *pzStat = "incr_fail"; + iVal = pFile->stats.nIncrFail; + break; + case 4: + *pzStat = "mutex_attempt"; + iVal = pFile->stats.nMutex; + break; + case 5: + *pzStat = "mutex_block"; + iVal = pFile->stats.nMutexBlock; + break; + default: + break; + } + + return iVal; +} + +SQLITE_PRIVATE int sqlite3HctFileNFile(HctFile *pFile, int *pbFixed){ + int iRet = 0; + HctFileServer *p = pFile->pServer; + sqlite3_mutex_enter(p->pMutex); + iRet = p->nFdDb; + *pbFixed = (p->szPage>0); + sqlite3_mutex_leave(p->pMutex); + return iRet; +} + +/************************************************************************* +** Beginning of vtab implemetation. +*************************************************************************/ + +#define HCT_PGMAP_SCHEMA \ +" CREATE TABLE hct_pgmap(" \ +" slot INTEGER," \ +" value INTEGER," \ +" comment TEXT," \ +" physical_in_use BOOLEAN," \ +" logical_in_use BOOLEAN," \ +" logical_evicted BOOLEAN," \ +" logical_irrevicted BOOLEAN,"\ +" logical_is_root BOOLEAN" \ +" );" + +/* +** Virtual table type for "hctpgmap". +*/ +typedef struct pgmap_vtab pgmap_vtab; +struct pgmap_vtab { + sqlite3_vtab base; /* Base class - must be first */ + sqlite3 *db; +}; + +/* +** Virtual cursor type for "hctpgmap". +*/ +typedef struct pgmap_cursor pgmap_cursor; +struct pgmap_cursor { + sqlite3_vtab_cursor base; /* Base class - must be first */ + HctFile *pFile; /* Database to report on */ + u64 iMaxSlotno; /* Maximum page number for this scan */ + u64 slotno; /* The page-number/rowid value */ + u64 iVal; /* Value read from pagemap */ +}; + +/* +** The pgmapConnect() method is invoked to create a new +** template virtual table. +** +** Think of this routine as the constructor for pgmap_vtab objects. +** +** All this routine needs to do is: +** +** (1) Allocate the pgmap_vtab object and initialize all fields. +** +** (2) Tell SQLite (via the sqlite3_declare_vtab() interface) what the +** result set of queries against the virtual table will look like. +*/ +static int pgmapConnect( + sqlite3 *db, + void *pAux, + int argc, const char *const*argv, + sqlite3_vtab **ppVtab, + char **pzErr +){ + pgmap_vtab *pNew; + int rc; + + rc = sqlite3_declare_vtab(db, HCT_PGMAP_SCHEMA); + pNew = (pgmap_vtab*)sqlite3HctMalloc(&rc, sizeof(*pNew)); + if( pNew ){ + pNew->db = db; + } + + *ppVtab = (sqlite3_vtab*)pNew; + return rc; +} + +/* +** This method is the destructor for pgmap_vtab objects. +*/ +static int pgmapDisconnect(sqlite3_vtab *pVtab){ + pgmap_vtab *p = (pgmap_vtab*)pVtab; + sqlite3_free(p); + return SQLITE_OK; +} + +/* +** Constructor for a new pgmap_cursor object. +*/ +static int pgmapOpen(sqlite3_vtab *p, sqlite3_vtab_cursor **ppCursor){ + pgmap_cursor *pCur; + pCur = sqlite3MallocZero(sizeof(*pCur)); + if( pCur==0 ) return SQLITE_NOMEM; + *ppCursor = &pCur->base; + return SQLITE_OK; +} + +/* +** Destructor for a pgmap_cursor. +*/ +static int pgmapClose(sqlite3_vtab_cursor *cur){ + pgmap_cursor *pCur = (pgmap_cursor*)cur; + sqlite3_free(pCur); + return SQLITE_OK; +} + +/* +** Return TRUE if the cursor has been moved off of the last +** row of output. +*/ +static int pgmapEof(sqlite3_vtab_cursor *cur){ + pgmap_cursor *pCur = (pgmap_cursor*)cur; + return pCur->slotno>pCur->iMaxSlotno; +} + +static int pgmapLoadSlot(pgmap_cursor *pCur){ + return hctFilePagemapGetGrow( + pCur->pFile, pCur->slotno, &pCur->iVal + ); +} + +/* +** Advance a hctdb_cursor to its next row of output. +*/ +static int pgmapNext(sqlite3_vtab_cursor *cur){ + pgmap_cursor *pCur = (pgmap_cursor*)cur; + pCur->slotno++; + return pgmapEof(cur) ? SQLITE_OK : pgmapLoadSlot(pCur); +} + +static void pgmapGetComment(sqlite3_context *ctx, i64 iSlot){ + const char *zText = 0; + + switch( iSlot ){ + case HCT_ROOTPAGE_SCHEMA: + zText = "ROOTPAGE_SCHEMA"; + break; + case HCT_ROOTPAGE_META: + zText = "ROOTPAGE_META"; + break; + case HCT_PAGEMAP_LOGICAL_EOF: + zText = "LOGICAL_EOF"; + break; + case HCT_PAGEMAP_PHYSICAL_EOF: + zText = "PHYSICAL_EOF"; + break; + case HCT_PAGEMAP_TRANSID_EOF: + zText = "TRANSID_EOF"; + break; + } + + if( zText ){ + sqlite3_result_text(ctx, zText, -1, SQLITE_TRANSIENT); + } +} + +/* +** Return values of columns for the row at which the pgmap_cursor +** is currently pointing. +*/ +static int pgmapColumn( + sqlite3_vtab_cursor *cur, /* The cursor */ + sqlite3_context *ctx, /* First argument to sqlite3_result_...() */ + int i /* Which column to return */ +){ + pgmap_cursor *pCur = (pgmap_cursor*)cur; + switch( i ){ + case 0: { /* slotno */ + sqlite3_result_int64(ctx, pCur->slotno); + break; + } + case 1: { /* pgno */ + sqlite3_result_int64(ctx, (pCur->iVal & 0xFFFFFFFF)); + break; + } + case 2: { /* pgno */ + pgmapGetComment(ctx, pCur->slotno); + break; + } + case 3: { /* physical_in_use */ + sqlite3_result_int64(ctx, (pCur->iVal & HCT_PMF_PHYSICAL_IN_USE)?1:0); + break; + } + case 4: { /* logical_in_use */ + sqlite3_result_int64(ctx, (pCur->iVal & HCT_PMF_LOGICAL_IN_USE)?1:0); + break; + } + case 5: { /* logical_evicted */ + sqlite3_result_int64(ctx, (pCur->iVal & HCT_PMF_LOGICAL_EVICTED)?1:0); + break; + } + case 6: { /* logical_irrevicted */ + sqlite3_result_int64(ctx, (pCur->iVal & HCT_PMF_LOGICAL_IRREVICTED)?1:0); + break; + } + case 7: { /* logical_is_root */ + sqlite3_result_int64(ctx, (pCur->iVal & HCT_PMF_LOGICAL_IS_ROOT)?1:0); + break; + } + } + return SQLITE_OK; +} + +/* +** Return the rowid for the current row. In this implementation, the +** rowid is the same as the slotno value. +*/ +static int pgmapRowid(sqlite3_vtab_cursor *cur, sqlite_int64 *pRowid){ + pgmap_cursor *pCur = (pgmap_cursor*)cur; + *pRowid = pCur->slotno; + return SQLITE_OK; +} + +/* +** This method is called to "rewind" the pgmap_cursor object back +** to the first row of output. This method is always called at least +** once prior to any call to pgmapColumn() or pgmapRowid() or +** pgmapEof(). +*/ +static int pgmapFilter( + sqlite3_vtab_cursor *pVtabCursor, + int idxNum, const char *idxStr, + int argc, sqlite3_value **argv +){ + pgmap_cursor *pCur = (pgmap_cursor*)pVtabCursor; + pgmap_vtab *pTab = (pgmap_vtab*)(pCur->base.pVtab); + int rc; + u64 max1; + u64 max2; + + pCur->pFile = sqlite3HctDbFile(sqlite3HctDbFind(pTab->db, 0)); + pCur->slotno = 1; + max1 = hctFilePagemapGet(pCur->pFile->pMapping, HCT_PAGEMAP_PHYSICAL_EOF); + max2 = hctFilePagemapGet(pCur->pFile->pMapping, HCT_PAGEMAP_LOGICAL_EOF); + max1 &= HCT_PGNO_MASK; + max2 &= HCT_PGNO_MASK; + pCur->iMaxSlotno = max1>max2 ? max1 : max2; + rc = pgmapLoadSlot(pCur); + return rc; +} + +/* +** SQLite will invoke this method one or more times while planning a query +** that uses the virtual table. This routine needs to create +** a query plan for each invocation and compute an estimated cost for that +** plan. +*/ +static int pgmapBestIndex( + sqlite3_vtab *tab, + sqlite3_index_info *pIdxInfo +){ + pIdxInfo->estimatedCost = (double)10; + pIdxInfo->estimatedRows = 10; + return SQLITE_OK; +} + +/* +** This function is the implementation of the xUpdate callback used by +** hctpgmap virtual tables. It is invoked by SQLite each time a row is +** to be inserted, updated or deleted. +** +** A delete specifies a single argument - the rowid of the row to remove. +** +** Update and insert operations pass: +** +** 1. The "old" rowid (for an UPDATE), or NULL (for an INSERT). +** 2. The "new" rowid. +** 3. Values for each of the 6 columns. +** +** Specifically: +** +** apVal[2]: slot +** apVal[3]: value +** apVal[4]: comment +** apVal[5]: physical_in_use +** apVal[6]: logical_in_use +** apVal[7]: logical_evicted +** apVal[8]: logical_irrevicted +** apVal[9]: logical_is_root +*/ +static int pgmapUpdate( + sqlite3_vtab *pVtab, + int nVal, + sqlite3_value **apVal, + sqlite3_int64 *piRowid +){ + pgmap_vtab *p = (pgmap_vtab*)pVtab; + HctFile *pFile = sqlite3HctDbFile(sqlite3HctDbFind(p->db, 0)); + u32 iSlot = 0; + u64 val = 0; + u64 *pPtr = 0; + + i64 iValue = 0; + int bPhysicalInUse = 0; + int bLogicalInUse = 0; + int bLogicalEvicted = 0; + int bLogicalIrrevicted = 0; + int bLogicalIsRoot = 0; + + if( nVal==1 || sqlite3_value_type(apVal[0])!=SQLITE_INTEGER ){ + return SQLITE_CONSTRAINT; + } + iSlot = sqlite3_value_int64(apVal[0]); + + iValue = sqlite3_value_int64(apVal[3]); + bPhysicalInUse = sqlite3_value_int(apVal[5]); + bLogicalInUse = sqlite3_value_int(apVal[6]); + bLogicalEvicted = sqlite3_value_int(apVal[7]); + bLogicalIrrevicted = sqlite3_value_int(apVal[8]); + bLogicalIsRoot = sqlite3_value_int(apVal[9]); + + val = iValue & HCT_PAGEMAP_VMASK; + val |= (bPhysicalInUse ? HCT_PMF_PHYSICAL_IN_USE : 0); + val |= (bLogicalInUse ? HCT_PMF_LOGICAL_IN_USE : 0); + val |= (bLogicalEvicted ? HCT_PMF_LOGICAL_EVICTED : 0); + val |= (bLogicalIrrevicted ? HCT_PMF_LOGICAL_IRREVICTED : 0); + val |= (bLogicalIsRoot ? HCT_PMF_LOGICAL_IS_ROOT : 0); + + pPtr = hctPagemapPtr(pFile->pMapping, iSlot); + AtomicStore(pPtr, val); + + *piRowid = iSlot; + return SQLITE_OK; +} + +SQLITE_PRIVATE int sqlite3HctFileVtabInit(sqlite3 *db){ + static sqlite3_module pgmapModule = { + /* iVersion */ 0, + /* xCreate */ 0, + /* xConnect */ pgmapConnect, + /* xBestIndex */ pgmapBestIndex, + /* xDisconnect */ pgmapDisconnect, + /* xDestroy */ 0, + /* xOpen */ pgmapOpen, + /* xClose */ pgmapClose, + /* xFilter */ pgmapFilter, + /* xNext */ pgmapNext, + /* xEof */ pgmapEof, + /* xColumn */ pgmapColumn, + /* xRowid */ pgmapRowid, + /* xUpdate */ pgmapUpdate, + /* xBegin */ 0, + /* xSync */ 0, + /* xCommit */ 0, + /* xRollback */ 0, + /* xFindMethod */ 0, + /* xRename */ 0, + /* xSavepoint */ 0, + /* xRelease */ 0, + /* xRollbackTo */ 0, + /* xShadowName */ 0 + }; + + return sqlite3_create_module(db, "hctpgmap", &pgmapModule, 0); +} + +SQLITE_PRIVATE int sqlite3HctIoerr(int rc){ + sqlite3_log(rc, "sqlite3HctIoerr() - rc=%d errno=%d\n", rc, (int)errno); + assert( 0 ); + abort(); + return rc; +} + + +/************** End of hct_file.c ********************************************/ +/************** Begin file hct_database.c ************************************/ +/* +** 2020 October 13 +** +** The author disclaims copyright to this source code. In place of +** a legal notice, here is a blessing: +** +** May you do good and not evil. +** May you find forgiveness for yourself and forgive others. +** May you share freely, never taking more than you give. +** +************************************************************************* +*/ + +/* #include "hctInt.h" */ +/* #include "vdbeInt.h" */ +/* #include */ +/* #include */ + +typedef struct HctDatabase HctDatabase; +typedef struct HctDbIndexEntry HctDbIndexEntry; +typedef struct HctDbIndexLeaf HctDbIndexLeaf; +typedef struct HctDbIndexNode HctDbIndexNode; +typedef struct HctDbIndexNodeEntry HctDbIndexNodeEntry; +typedef struct HctDbIndexNodeHdr HctDbIndexNodeHdr; +typedef struct HctDbIntkeyEntry HctDbIntkeyEntry; +typedef struct HctDbIntkeyLeaf HctDbIntkeyLeaf; +typedef struct HctDbIntkeyNodeEntry HctDbIntkeyNodeEntry; +typedef struct HctDbIntkeyNode HctDbIntkeyNode; +typedef struct HctDbKey HctDbKey; +typedef struct HctDbLeaf HctDbLeaf; +typedef struct HctDbLeafHdr HctDbLeafHdr; +typedef struct HctDbWriter HctDbWriter; +typedef struct HctDbPageHdr HctDbPageHdr; +typedef struct HctDbHistoryFan HctDbHistoryFan; +typedef struct HctDbRangeCsr HctDbRangeCsr; + +typedef struct HctCsrIntkeyOp HctCsrIntkeyOp; +typedef struct HctCsrIndexOp HctCsrIndexOp; + +typedef struct HctDbPageArray HctDbPageArray; + +struct HctCsrIntkeyOp { + HctCsrIntkeyOp *pNextOp; + i64 iFirst; + i64 iLast; + + u32 iLogical; + u32 iPhysical; +}; + +struct HctCsrIndexOp { + HctCsrIndexOp *pNextOp; + u8 *pFirst; + int nFirst; + u8 *pLast; + int nLast; + + u32 iLogical; + u32 iPhysical; +}; + +struct CsrIntkey { + HctCsrIntkeyOp *pOpList; + HctCsrIntkeyOp *pCurrentOp; +}; +struct CsrIndex { + HctCsrIndexOp *pOpList; + HctCsrIndexOp *pCurrentOp; +}; + +struct HctDbKey { + i64 iKey; /* Integer key value */ + UnpackedRecord *pKey; /* Index key value */ + HctBuffer buf; /* Buffer for pKey data (if required) */ +}; + +/* +** eRange: +** Set to one of the HCT_RANGE_* constants defined below. +*/ +struct HctDbRangeCsr { + HctDbKey lowkey; + HctDbKey highkey; + u64 iRangeTid; /* The range TID that was followed here */ + + int eRange; /* HCT_RANGE_* constant */ + int iCell; + HctFilePage pg; +}; + +#define HCT_RANGE_FOLLOW 0 /* Follow range-pointers only */ +#define HCT_RANGE_MERGE 1 /* Merge in data + follow range-pointers */ +#define HCT_RANGE_FAN 2 /* HctDbRangeCsr.pg is a fan page */ + +#define IS_HCT_MIGRATE(pDb) (pDb->pConfig->db->bHctMigrate) + +/* +** iRoot: +** Logical root page of tree structure that this cursor is open on. +** +** pKeyInfo: +** NULL for cursors open on intkey trees, otherwise points to the +** KeyInfo used to compare keys in the open index tree. For cursors +** opened by the user, this is set when the cursor is opened within +** sqlite3HctDbCsrOpen() and never modified. +** +** pRec: +** UnpackedRecord structure suitable for use with pKeyInfo. This is +** allocated the first time it is required and then retained for +** the lifetime of the HctDbCsr structure. +** +** eDir: +** One of BTREE_DIR_NONE, BTREE_DIR_FORWARD or BTREE_DIR_REVERSE. +** +** pIntkeyOps: +*/ +struct HctDbCsr { + HctDatabase *pDb; /* Database that owns this cursor */ + u32 iRoot; /* Root page cursor is opened on */ + KeyInfo *pKeyInfo; + UnpackedRecord *pRec; + int eDir; /* Direction cursor will step after Seek() */ + int bNosnap; /* The "no-snapshot" flag */ + + u8 *aRecord; /* Record in allocated memory */ + int nRecord; /* Size of aRecord[] in bytes */ + HctBuffer rec; /* Buffer used to manage aRecord[] */ + + struct CsrIntkey intkey; + struct CsrIndex index; + HctDbCsr *pNextScanner; + + int iCell; /* Current cell within page */ + HctFilePage pg; /* Current leaf page */ + + int nRange; + int nRangeAlloc; + HctDbRangeCsr *aRange; +}; + +#define HCTDB_MAX_DIRTY (HCTDB_MAX_PAGEARRAY-2) +// #define HCTDB_MAX_DIRTY (HCTDB_STATIC_PAGEARRAY-2) +#define HCTDB_MAX_PAGEARRAY 2048 +#define HCTDB_STATIC_PAGEARRAY (8+2) + +#define HCTDB_APPEND_MODE_THRESHOLD 5 + + +#define LARGEST_TID ((((u64)1)<<56)-1) +#define HCT_TID_ROLLBACK_OVERRIDE (((u64)0x01) << 56) + + +struct HctDbPageArray { + int nPg; + HctFilePage *aPg; + HctFilePage aStatic[HCTDB_STATIC_PAGEARRAY]; + HctFilePage *aDyn; + int nDyn; +}; + +typedef struct HctDbOverflow HctDbOverflow; +typedef struct HctDbOverflowArray HctDbOverflowArray; + +struct HctDbOverflow { + u32 pgno; + int nOvfl; +}; + +struct HctDbOverflowArray { + int nEntry; + int nAlloc; + HctDbOverflow *aOvfl; +}; + +typedef struct HctDbFPKey HctDbFPKey; +struct HctDbFPKey { + i64 iKey; + u8 *aKey; + HctBuffer buf; +}; + +/* +** +** iHeight: +** The height of the list that this writer is writing to. 0 for leaves, +** 1 for the parents of leaves, etc. +** +** aWritePg/nWritePg: +** +** nWriteKey: +** Number of hctDbInsert() calls since last flush - i.e. how many have to +** be retried if we hit a CAS failure and have to redo this write operation. +** +** iWriteFpKey/aWriteFpKey: +** These two variables store the fence-post key for the peer page of +** the rightmost page in the aWritePg[] array - aWritePg[nWritePg-1]. +** For intkey tables, iWriteFpKey is the 64-bit integer key value. For +** index tables, aWriteFpKey points to a buffer containing the FP key, +** and iWriteFpKey its size in bytes. The buffer is allocated with +** sqlite3_malloc(). +** +** If there is no peer page and writing to an intkey list, iWriteFpKey +** is set to LARGEST_INT64. If writing to an index list, aWriteFpKey is +** set to NULL and iWriteFpKey to 0. +** +** discardpg: +** Pages to the right of writepg[0] that will be removed from the list +** if the CAS instruction for this write succeeds. +** +** bAppend: +** True if the writer is in append mode. +** +** bDoCleanup: +** True if hctDbInsert() has been called since the most recent +** hctDbWriterCleanup(). +*/ +struct HctDbWriter { + int iHeight; /* Height to write at (0==leaves) */ + HctDbPageArray writepg; + int nWriteKey; /* Number of new keys in writepg array */ + + int bAppend; /* Writer is in "append" mode */ + HctDbFPKey fp; /* Fence-Post key. */ + + HctDbCsr writecsr; /* Used to find target page while writing */ + HctDbPageArray discardpg; + HctFilePage fanpg; + + int bDoCleanup; + int nEvictLocked; + u32 iEvictLockedPgno; + + HctDbOverflowArray delOvfl; /* Overflow chains to free on write */ + HctDbOverflowArray insOvfl; /* Overflow chains to free on don't-write */ + + int nOverflow; + + int nMigrateKey; +}; + +/* +** This is used by the rebalance operation implemented by hctDbBalance(). +** The first step of that operation is to assemble an array of these +** structures - one for each cell that will be distributed between the +** output pages. +** +** nByte: +** Total bytes of space required by cell on new page. This includes +** the header entry and the data stored in the cell area. +** +** aEntry: +** Pointer to buffer containing cell entry. Or NULL to indicate that +** the HctDbCellSz structure corresponds to a new cell being written +** (that is not on any input page). +** +** aCell: +** Only valid if (aEntry!=0). Pointer to buffer containing leaf-page +** portion of cell. +*/ +typedef struct HctDbCellSz HctDbCellSz; +struct HctDbCellSz { + int nByte; /* Size of cell in bytes */ + u8 *aEntry; /* Buffer containing cell entry */ + u8 *aCell; /* Buffer containing cell body */ +}; + +typedef struct HctBalance HctBalance; +struct HctBalance { + u8 *aPg[3]; + int nSzAlloc; /* Allocated size of aSz[] array */ + HctDbCellSz *aSz; /* aSz[] array */ +}; + +/* +** Given the database page-size as an argument, the maximum number of cells +** that may fit on any page with variable sized entries (an index leaf or node, +** or intkey leaf page). +*/ +#define MAX_CELLS_PER_PAGE(pgsz) ((pgsz) / 8) + +/* +** This structure, an instance of which is part of each HctDatabase object, +** holds counters collected for the hctstats structure. +*/ +typedef struct HctDatabaseStats HctDatabaseStats; +struct HctDatabaseStats { + i64 nBalanceIntkey; + i64 nBalanceIndex; + i64 nBalanceSingle; + i64 nTMapLookup; + i64 nUpdateInPlace; + i64 nInternalRetry; +}; + +/* +** pScannerList: +** Linked list of cursors used by the current transaction. If this turns +** out to be a write transaction, this list is used to detect read/write +** conflicts. +** +** iJrnlWriteCid: +** This value is set within calls to sqlite3_hct_journal_write(). The CID +** of the journal entry being written to the db. +*/ +struct HctDatabase { + HctFile *pFile; + HctConfig *pConfig; + i64 nCasFail; /* Number cas-collisions so far */ + int pgsz; /* Page size in bytes */ + + u8 *aTmp; /* Temp buffer pgsz bytes in size */ + HctBalance *pBalance; /* Space for hctDbBalance() */ + + HctDbCsr *pScannerList; + + u64 iJrnlWriteCid; + + HctTMap *pTmap; /* Transaction map (non-NULL if trans open) */ + u64 iSnapshotId; /* Snapshot id for reading */ + u64 iLocalMinTid; + HctDbWriter pa; + HctDbCsr rbackcsr; /* Used to find old values during rollback */ + u64 iTid; /* Transaction id for writing */ + u64 nWriteCount; /* Write-count at start of commit */ + + int eMode; /* HCT_MODE_XXX constant */ + int bConcurrent; /* Collect validation information */ + + int (*xSavePhysical)(void*, i64); + void *pSavePhysical; + + HctDatabaseStats stats; +}; + +/* +** Values for HctDatabase.eMode. +*/ +#define HCT_MODE_NORMAL 0 +#define HCT_MODE_ROLLBACK 1 +#define HCT_MODE_VALIDATE 3 + + +/* +** 8-byte database page header. Described in fileformat.wiki. +*/ +struct HctDbPageHdr { + u8 hdrFlags; + u8 nHeight; /* 0 for leaves, 1 for parents etc. */ + u16 nEntry; + u32 iPeerPg; +}; + +/* +** Page types. These are the values that may appear in the page-type +** field of a page header. +*/ +#define HCT_PAGETYPE_INTKEY 0x01 +#define HCT_PAGETYPE_INDEX 0x03 +#define HCT_PAGETYPE_OVERFLOW 0x05 +#define HCT_PAGETYPE_HISTORY 0x06 + +#define HCT_PAGETYPE_MASK 0x07 + +/* +** Page types may be ORed with the following: +*/ +#define HCT_PAGETYPE_LEFTMOST 0x80 + +#define hctPagetype(p) (((HctDbPageHdr*)(p))->hdrFlags&HCT_PAGETYPE_MASK) +#define hctIsLeftmost(p) (((HctDbPageHdr*)(p))->hdrFlags&HCT_PAGETYPE_LEFTMOST) +#define hctPageheight(p) (((HctDbPageHdr*)(p))->nHeight) +#define hctPagenentry(p) (((HctDbPageHdr*)(p))->nEntry) +#define hctPagePeer(p) (((HctDbPageHdr*)(p))->iPeerPg) + +/* +** 16-byte leaf page header. Used by both index and intkey leaf pages. +** Described in fileformat.wiki. +*/ +struct HctDbLeafHdr { + u16 nFreeGap; /* Size of free-space region, in bytes */ + u16 nFreeBytes; /* Total free bytes on page */ + u32 unused; +}; + +struct HctDbLeaf { + HctDbPageHdr pg; + HctDbLeafHdr hdr; +}; + + +struct HctDbIntkeyEntry { + u32 nSize; /* 0: Total size of data (local+overflow) */ + u16 iOff; /* 4: Offset of record within this page */ + u8 flags; /* 6: Flags (see below) */ + u8 unused; /* 7: */ + i64 iKey; /* 8: Integer key value */ +}; + +struct HctDbIndexEntry { + u32 nSize; /* 0: Total size of data (local+overflow) */ + u16 iOff; /* 4: Offset of record within this page */ + u8 flags; /* 6: Flags (see below) */ + u8 unused; /* 7: */ +}; + +struct HctDbIndexNodeEntry { + u32 nSize; + u16 iOff; + u8 flags; + u8 unused; + u32 iChildPg; +}; + +struct HctDbIntkeyNodeEntry { + i64 iKey; /* Value of FP key on page iChild */ + u32 iChildPg; /* Child page */ + u32 unused; +}; + +struct HctDbIntkeyNode { + HctDbPageHdr pg; + HctDbIntkeyNodeEntry aEntry[0]; +}; + +struct HctDbIntkeyLeaf { + HctDbPageHdr pg; + HctDbLeafHdr hdr; + HctDbIntkeyEntry aEntry[0]; +}; + +struct HctDbIndexLeaf { + HctDbPageHdr pg; + HctDbLeafHdr hdr; + HctDbIndexEntry aEntry[0]; +}; + +struct HctDbIndexNodeHdr { + u16 nFreeGap; /* Size of free-space region, in bytes */ + u16 nFreeBytes; /* Total free bytes on page */ +}; + +struct HctDbIndexNode { + HctDbPageHdr pg; + HctDbIndexNodeHdr hdr; + HctDbIndexNodeEntry aEntry[0]; +}; + +/* +** History fanout page. +** +** iSplit0: +** The index of a key in page aPgOld1[0]. This key is the first that +** should be considered in aPgOld1[0]. Implying that no key equal to +** or greater than this from pgOld0 should be considered. +*/ +struct HctDbHistoryFan { + HctDbPageHdr pg; + + u64 iRangeTid0; + u64 iFollowTid0; + u32 pgOld0; + + int iSplit0; + + u64 iRangeTid1; + u32 aPgOld1[0]; +}; + +/* +** Structure for reading/writing cells from and to pages. +*/ +typedef struct HctDbCell HctDbCell; +struct HctDbCell { + u64 iTid; + u64 iRangeTid; + u32 iRangeOld; + u32 iOvfl; + const u8 *aPayload; +}; + +#if 1 +__attribute__ ((noinline)) +static void hctMemcpy(void *a, const void *b, size_t c){ + if( c ) memcpy(a, b, c); +} +#else +# define hctMemcpy memcpy +#endif + + + +/* +** Flags for HctDbIntkeyEntry.flags +*/ +#define HCTDB_HAS_TID 0x01 /* 8 bytes */ +#define HCTDB_HAS_OVFL 0x04 /* 4 bytes */ +#define HCTDB_HAS_RANGETID 0x08 /* 8 bytes */ +#define HCTDB_HAS_RANGEOLD 0x10 /* 4 bytes */ + +#define HCTDB_MAX_EXTRA_CELL_DATA (8+4+8+4) + +SQLITE_PRIVATE int sqlite3HctBufferGrow(HctBuffer *pBuf, int nSize){ + int rc = SQLITE_OK; + if( nSize>pBuf->nAlloc ){ + u8 *aNew = sqlite3_realloc(pBuf->aBuf, nSize); + if( aNew==0 ){ + rc = SQLITE_NOMEM_BKPT; + }else{ + pBuf->aBuf = aNew; + pBuf->nAlloc = nSize; + } + } + return rc; +} + +SQLITE_PRIVATE void sqlite3HctBufferFree(HctBuffer *pBuf){ + sqlite3_free(pBuf->aBuf); + memset(pBuf, 0, sizeof(HctBuffer)); +} + +static int hctBufferSet(HctBuffer *pBuf, const u8 *aData, int nData){ + int rc = sqlite3HctBufferGrow(pBuf, nData); + if( rc==SQLITE_OK ){ + hctMemcpy(pBuf->aBuf, aData, nData); + } + return rc; +} + + +#ifdef SQLITE_DEBUG +static int hctSqliteBusy(int iLine){ + return SQLITE_BUSY_SNAPSHOT; +} +# define HCT_SQLITE_BUSY hctSqliteBusy(__LINE__) +#else +# define HCT_SQLITE_BUSY SQLITE_BUSY_SNAPSHOT +#endif /* SQLITE_DEBUG */ + +static u64 hctDbTMapLookup(HctDatabase *pDb, u64 iTid, u64 *peState){ + u64 iVal = 0; + HctTMap *pTmap = pDb->pTmap; + if( iTid==LARGEST_TID ){ + *peState = HCT_TMAP_ROLLBACK; + }else if( iTidiFirstTid ){ + *peState = HCT_TMAP_COMMITTED; + }else{ + int iMap = (iTid - pTmap->iFirstTid) / HCT_TMAP_PAGESIZE; + + if( iMap>=pTmap->nMap ){ + HctTMapClient *pTMapClient = sqlite3HctFileTMapClient(pDb->pFile); + sqlite3HctTMapUpdate(pTMapClient, &pDb->pTmap); + assert( iTid<(pDb->pTmap->nMap*HCT_TMAP_PAGESIZE)+pDb->pTmap->iFirstTid ); + return hctDbTMapLookup(pDb, iTid, peState); + } + + { + int iOff = (iTid - pTmap->iFirstTid) % HCT_TMAP_PAGESIZE; + iOff = HCT_TMAP_ENTRYSLOT(iOff); + iVal = AtomicLoad(&pTmap->aaMap[iMap][iOff]); + pDb->stats.nTMapLookup++; + } + + *peState = (iVal & HCT_TMAP_STATE_MASK); + } + return (iVal & HCT_TMAP_CID_MASK); +} + + +static void print_out_tmap(HctDatabase *pDb, int nLimit){ + int ii; + + for(ii=0; iipTmap->iFirstTid + ii; + u64 iCid = hctDbTMapLookup(pDb, iTid, &eState); + + printf("tid=%d -> (%s, %d)\n", (int)iTid, + eState==HCT_TMAP_WRITING ? "WRITING" : + eState==HCT_TMAP_VALIDATING ? "VALIDATING" : + eState==HCT_TMAP_ROLLBACK ? "ROLLBACK" : + eState==HCT_TMAP_COMMITTED ? "COMMITTED" : "???", + (int)iCid + ); + } + +} + +static void hctDbPageArrayReset(HctDbPageArray *pArray){ + sqlite3_free(pArray->aDyn); + pArray->nPg = 0; + pArray->aPg = pArray->aStatic; + pArray->aDyn = 0; + pArray->nDyn = 0; +} + +static int hctDbPageArrayGrow(HctDbPageArray *pArray){ + assert( pArray->aDyn==0 ); + pArray->aDyn = sqlite3MallocZero(sizeof(HctFilePage) * HCTDB_MAX_PAGEARRAY); + if( pArray->aDyn==0 ){ + return SQLITE_NOMEM_BKPT; + } + pArray->nDyn = HCTDB_MAX_PAGEARRAY; + pArray->aPg = pArray->aDyn; + hctMemcpy(pArray->aPg, pArray->aStatic, + sizeof(HctFilePage)*HCTDB_STATIC_PAGEARRAY + ); + return SQLITE_OK; +} + +/* +** Grow the dynamic arrays used by the writer, if necessary +*/ +static int hctDbWriterGrow(HctDbWriter *pWriter){ + int rc = SQLITE_OK; + if( pWriter->writepg.aDyn==0 ){ + if( pWriter->writepg.nPg>=(HCTDB_STATIC_PAGEARRAY-2) + || pWriter->discardpg.nPg>=(HCTDB_STATIC_PAGEARRAY-2) + ){ + rc = hctDbPageArrayGrow(&pWriter->writepg); + if( rc==SQLITE_OK ){ + rc = hctDbPageArrayGrow(&pWriter->discardpg); + } + } + } + return rc; +} + +SQLITE_PRIVATE HctDatabase *sqlite3HctDbOpen( + int *pRc, + const char *zFile, + HctConfig *pConfig +){ + int rc = *pRc; + HctDatabase *pNew = 0; + + pNew = (HctDatabase*)sqlite3HctMalloc(&rc, sizeof(*pNew)); + if( pNew ){ + pNew->pFile = sqlite3HctFileOpen(&rc, zFile, pConfig); + pNew->pConfig = pConfig; + if( pNew->pFile ) pNew->pgsz = sqlite3HctFilePgsz(pNew->pFile); + } + + if( rc!=SQLITE_OK ){ + sqlite3HctDbClose(pNew); + pNew = 0; + } + + *pRc = rc; + return pNew; +} + +SQLITE_PRIVATE int sqlite3HctDbPagesize(HctDatabase *pDb){ + return pDb->pgsz; +} + + +SQLITE_PRIVATE void sqlite3HctDbClose(HctDatabase *p){ + if( p ){ + sqlite3_free(p->aTmp); + sqlite3HctFileClose(p->pFile); + p->pFile = 0; + sqlite3_free(p->pBalance); + sqlite3_free(p); + } +} + +SQLITE_PRIVATE HctFile *sqlite3HctDbFile(HctDatabase *pDb){ + return pDb->pFile; +} + +SQLITE_PRIVATE int sqlite3HctDbRootNew(HctDatabase *p, u32 *piRoot){ + return sqlite3HctFileRootPgno(p->pFile, piRoot); +} + +SQLITE_PRIVATE int sqlite3HctDbRootFree(HctDatabase *p, u32 iRoot){ + return sqlite3HctFileRootFree(p->pFile, iRoot); +} + +SQLITE_PRIVATE void sqlite3HctDbRootPageInit( + int bIndex, /* True for an index, false for intkey */ + u8 *aPage, /* Buffer to initialize */ + int szPage /* Size of aPage[] in bytes */ +){ + HctDbLeaf *pLeaf = (HctDbLeaf*)aPage; + memset(aPage, 0, szPage); + if( bIndex ){ + pLeaf->pg.hdrFlags = HCT_PAGETYPE_INDEX | HCT_PAGETYPE_LEFTMOST; + }else{ + pLeaf->pg.hdrFlags = HCT_PAGETYPE_INTKEY | HCT_PAGETYPE_LEFTMOST; + } + pLeaf->hdr.nFreeBytes = szPage - sizeof(HctDbLeaf); + pLeaf->hdr.nFreeGap = pLeaf->hdr.nFreeBytes; +} + +static void hctDbRootPageInit( + int bIndex, /* True for an index, false for intkey */ + int nHeight, /* Initial height */ + u32 iChildPg, /* Child page number */ + u8 *aPage, /* Buffer to initialize */ + int szPage /* Size of aPage[] in bytes */ +){ + HctDbPageHdr *pPg = (HctDbPageHdr*)aPage; + memset(aPage, 0, szPage); + if( bIndex ){ + pPg->hdrFlags = HCT_PAGETYPE_INDEX | HCT_PAGETYPE_LEFTMOST; + }else{ + pPg->hdrFlags = HCT_PAGETYPE_INTKEY | HCT_PAGETYPE_LEFTMOST; + } + if( nHeight>0 ){ + pPg->nHeight = nHeight; + pPg->nEntry = 1; + if( bIndex ){ + HctDbIndexNode *pNode = (HctDbIndexNode*)pPg; + pNode->aEntry[0].iChildPg = iChildPg; + pNode->hdr.nFreeBytes = + szPage - sizeof(HctDbIndexNode) - sizeof(HctDbIndexNodeEntry); + pNode->hdr.nFreeGap = pNode->hdr.nFreeBytes; + }else{ + HctDbIntkeyNode *pNode = (HctDbIntkeyNode*)pPg; + pNode->aEntry[0].iKey = SMALLEST_INT64; + pNode->aEntry[0].iChildPg = iChildPg; + } + }else{ + HctDbLeaf *pLeaf = (HctDbLeaf*)pPg; + pLeaf->hdr.nFreeBytes = szPage - sizeof(HctDbLeaf); + pLeaf->hdr.nFreeGap = pLeaf->hdr.nFreeBytes; + } +} + +/* +** Open a read transaction, if one is not already open. +*/ +SQLITE_PRIVATE int sqlite3HctDbStartRead(HctDatabase *pDb, HctJournal *pJrnl){ + int rc = SQLITE_OK; + + assert( (pDb->iSnapshotId==0)==(pDb->pTmap==0) ); + assert( pDb->iSnapshotId!=0 || pDb->bConcurrent==0 ); + if( pDb->iSnapshotId==0 && SQLITE_OK==(rc=sqlite3HctFileNewDb(pDb->pFile)) ){ + if( pDb->aTmp==0 ){ + pDb->pgsz = sqlite3HctFilePgsz(pDb->pFile); + pDb->aTmp = (u8*)sqlite3HctMalloc(&rc, pDb->pgsz); + } + if( rc==SQLITE_OK ){ + u64 iSnapshot = 0; + HctTMapClient *pTMapClient = sqlite3HctFileTMapClient(pDb->pFile); + + iSnapshot = sqlite3HctJournalSnapshot(pJrnl); + rc = sqlite3HctTMapBegin(pTMapClient, iSnapshot, &pDb->pTmap); + assert( rc==SQLITE_OK ); /* todo */ + + iSnapshot = sqlite3HctJournalSnapshot(pJrnl); + if( iSnapshot==0 ){ + iSnapshot = sqlite3HctFileGetSnapshotid(pDb->pFile); + } + pDb->iSnapshotId = iSnapshot; + pDb->iLocalMinTid = sqlite3HctTMapCommitedTID(pTMapClient); + assert( pDb->iSnapshotId>0 ); + } + } + + return rc; +} + +static u64 hctGetU64(const u8 *a){ + u64 ret; + hctMemcpy(&ret, a, sizeof(u64)); + return ret; +} +static u32 hctGetU32(const u8 *a){ + u32 ret; + hctMemcpy(&ret, a, sizeof(u32)); + return ret; +} + +static void hctPutU32(u8 *a, u32 val){ + hctMemcpy(a, &val, sizeof(u32)); +} + +/* +** Return true if TID iTid maps to a commit-id visible to the current +** client. Or false otherwise. +*/ +static int hctDbTidIsVisible(HctDatabase *pDb, u64 iTid, int bNosnap){ + + if( (iTid & HCT_TID_MASK)<=pDb->iLocalMinTid ) return 1; + while( 1 ){ + u64 eState = 0; + u64 iCid = hctDbTMapLookup(pDb, (iTid & HCT_TID_MASK), &eState); + if( iTid & HCT_TID_ROLLBACK_OVERRIDE ){ + eState = HCT_TMAP_COMMITTED; + } + if( eState==HCT_TMAP_WRITING || eState==HCT_TMAP_ROLLBACK ){ + return 0; + } + if( eState==HCT_TMAP_COMMITTED ){ + if( bNosnap==0 && iCid>pDb->iSnapshotId ){ + return 0; + } + return 1; + } + assert( eState==HCT_TMAP_VALIDATING ); + if( iCid>pDb->iSnapshotId || iTid==pDb->iTid ){ + return 0; + } + } + + assert( 0 ); + return 0; +} + +/* +** This is called when writing keys to the database as part of committing +** a transaction. One of the writes will clobber a key associated with +** transaction-id iTid. This function returns true if this represents +** a write/write conflict and the transaction should be rolled back, or +** false if the write should proceed. +*/ +static int hctDbTidIsConflict(HctDatabase *pDb, u64 iTid){ + if( iTid==pDb->iTid || iTid<=pDb->iLocalMinTid || iTid==LARGEST_TID ){ + return 0; + }else{ + u64 eState = 0; + u64 iCid = hctDbTMapLookup(pDb, iTid & HCT_TID_MASK, &eState); + + /* This should only be called while writing or validating. */ + assert( pDb->iTid ); + if( iTid & HCT_TID_ROLLBACK_OVERRIDE ){ + eState = HCT_TMAP_COMMITTED; + } + + if( eState==HCT_TMAP_COMMITTED && iCid<=pDb->iSnapshotId ) return 0; + if( iCid==pDb->iJrnlWriteCid ) return 0; + return 1; + + if( eState==HCT_TMAP_WRITING || eState==HCT_TMAP_VALIDATING ) return 1; + + /* It's tempting to return 0 here - how can a key that has been rolled + ** back be a conflict? The problem is that the previous version of the + ** key - the one before this rolled back version - may be a write/write + ** conflict. Ideally, this code would check that and return accordingly. */ + if( eState==HCT_TMAP_ROLLBACK ) return 1; + + assert( eState==HCT_TMAP_COMMITTED ); + return (iCid > pDb->iSnapshotId); + } +} + + +static int hctDbOffset(int iOff, int flags){ + static const int aVal[] = { + 0+0+0+0+0, 0+0+0+0+8, 0+0+0+0+0, 0+0+0+0+8, + 0+0+4+0+0, 0+0+4+0+8, 0+0+4+0+0, 0+0+4+0+8, + 0+8+0+0+0, 0+8+0+0+8, 0+8+0+0+0, 0+8+0+0+8, + 0+8+4+0+0, 0+8+4+0+8, 0+8+4+0+0, 0+8+4+0+8, + + 4+0+0+0+0, 4+0+0+0+8, 4+0+0+0+0, 4+0+0+0+8, + 4+0+4+0+0, 4+0+4+0+8, 4+0+4+0+0, 4+0+4+0+8, + 4+8+0+0+0, 4+8+0+0+8, 4+8+0+0+0, 4+8+0+0+8, + 4+8+4+0+0, 4+8+4+0+8, 4+8+4+0+0, 4+8+4+0+8, + }; + + assert( HCTDB_HAS_RANGEOLD==0x10 ); /* +4 */ + assert( HCTDB_HAS_RANGETID==0x08 ); /* +8 */ + assert( HCTDB_HAS_OVFL==0x04 ); /* +4 */ + assert( HCTDB_HAS_TID==0x01 ); /* +8 */ + + assert( aVal[ flags & 0x1F ]==( + ((flags & HCTDB_HAS_TID) ? 8 : 0) + + ((flags & HCTDB_HAS_RANGETID) ? 8 : 0) + + ((flags & HCTDB_HAS_RANGEOLD) ? 4 : 0) + + ((flags & HCTDB_HAS_OVFL) ? 4 : 0) + )); + + return iOff + aVal[ flags&0x1F ]; +} + + +/* +** Wrapper around sqlite3HctFilePageGetPhysical() that also invokes the +** xSavePhysical callback, if one is configured. +*/ +static int hctDbGetPhysical(HctDatabase *pDb, u32 iPg, HctFilePage *pPg){ + int rc = sqlite3HctFilePageGetPhysical(pDb->pFile, iPg, pPg); + if( rc==SQLITE_OK && pDb->xSavePhysical ){ + rc = pDb->xSavePhysical(pDb->pSavePhysical, (i64)iPg); + } + return rc; +} + +/* +** Load the meta-data record from the database and store it in buffer aBuf +** (size nBuf bytes). The meta-data record is stored with rowid=0 int the +** intkey table with root-page=2. +*/ +SQLITE_PRIVATE int sqlite3HctDbGetMeta(HctDatabase *pDb, u8 *aBuf, int nBuf){ + HctFilePage pg; + int rc; + + assert( pDb->iSnapshotId ); + memset(aBuf, 0, nBuf); + rc = sqlite3HctFilePageGet(pDb->pFile, 2, &pg); + while( rc==SQLITE_OK ){ + HctDbIntkeyLeaf *pLeaf = (HctDbIntkeyLeaf*)pg.aOld; + int iOff; + u8 flags; + + if( pLeaf->pg.nEntry==0 ){ + break; + } + + assert( pLeaf->pg.nEntry==1 ); + assert( pLeaf->aEntry[0].iKey==0 ); + assert( pLeaf->aEntry[0].nSize==nBuf ); + iOff = pLeaf->aEntry[0].iOff; + flags = pLeaf->aEntry[0].flags; + + assert( flags==HCTDB_HAS_TID + || flags==(HCTDB_HAS_RANGEOLD|HCTDB_HAS_RANGETID|HCTDB_HAS_TID) + ); + if( (flags & HCTDB_HAS_RANGEOLD) + && 0==hctDbTidIsVisible(pDb, hctGetU64(&pg.aOld[iOff]), 0) + ){ + u32 iOld = hctGetU32(&pg.aOld[iOff+8+8]); + if( iOld==0 ) break; + sqlite3HctFilePageRelease(&pg); + rc = hctDbGetPhysical(pDb, iOld, &pg); + }else{ + iOff = hctDbOffset(iOff, pLeaf->aEntry[0].flags ); + hctMemcpy(aBuf, &pg.aOld[iOff], nBuf); + sqlite3HctFilePageRelease(&pg); + break; + } + } + + return rc; +} + +SQLITE_PRIVATE void sqlite3HctDbTransIsConcurrent(HctDatabase *pDb, int eConcurrent){ + pDb->bConcurrent = (eConcurrent!=0); +} + +static int hctDbValidateMeta(HctDatabase *pDb){ + int rc = SQLITE_OK; + HctFilePage pg; + + assert( pDb->iSnapshotId>0 ); + rc = sqlite3HctFilePageGet(pDb->pFile, 2, &pg); + if( rc==SQLITE_OK ){ + HctDbIntkeyEntry *p = &((HctDbIntkeyLeaf*)pg.aOld)->aEntry[0]; + if( p->flags & HCTDB_HAS_TID ){ + u64 iTid = hctGetU64(&pg.aOld[p->iOff]); + if( hctDbTidIsConflict(pDb, iTid) ) rc = HCT_SQLITE_BUSY; + } + sqlite3HctFilePageRelease(&pg); + } + + return rc; +} + +SQLITE_PRIVATE int sqlite3HctDbRootInit(HctDatabase *p, int bIndex, u32 iRoot){ + HctFilePage pg; + int rc = SQLITE_OK; + + rc = sqlite3HctFileRootNew(p->pFile, iRoot, &pg); + if( rc==SQLITE_OK ){ + sqlite3HctDbRootPageInit(bIndex, pg.aNew, p->pgsz); + rc = sqlite3HctFilePageRelease(&pg); + } + return rc; +} + +static i64 hctDbIntkeyFPKey(const void *aPg){ + if( ((HctDbPageHdr*)aPg)->nHeight==0 ){ + return ((HctDbIntkeyLeaf*)aPg)->aEntry[0].iKey; + } + return ((HctDbIntkeyNode*)aPg)->aEntry[0].iKey; +} + + +static i64 hctDbGetIntkey(const u8 *aTarget, int iCell){ + assert( hctPagetype(aTarget)==HCT_PAGETYPE_INTKEY ); + assert( hctPageheight(aTarget)==0 ); + assert( iCell>=0 && iCell<((HctDbIntkeyLeaf*)aTarget)->pg.nEntry ); + + return ((HctDbIntkeyLeaf*)aTarget)->aEntry[iCell].iKey; +} + +#if 0 +static i64 hctDbGetIntkeyFromPhys( + int *pRc, + HctDatabase *pDb, + u32 iPhys, + int iCell +){ + i64 iRet = 0; + int rc = *pRc; + if( rc==SQLITE_OK ){ + HctFilePage pg; + rc = sqlite3HctFilePageGetPhysical(pDb->pFile, iPhys, &pg); + if( rc==SQLITE_OK ){ + iRet = hctDbGetIntkey(pg.aOld, iCell); + sqlite3HctFilePageRelease(&pg); + } + } + *pRc = rc; + return iRet; +} +#endif + + +/* +** Buffer aPg contains an intkey leaf page. +** +** This function searches the leaf page for key iKey. If found, it returns +** the index of the matching key within the page and sets output variable +** (*pbExact) to 1. If there is no match for key iKey, this function returns +** the index of the smallest key on the page that is larger than iKey, or +** (nEntry) if all keys on the page are smaller than iKey. (*pbExact) is +** set to 0 before returning in this case. +*/ +static int hctDbIntkeyLeafSearch( + const u8 *aPg, + i64 iKey, + int *pbExact +){ + const HctDbIntkeyLeaf *pLeaf = (const HctDbIntkeyLeaf*)aPg; + int i1 = 0; + int i2 = pLeaf->pg.nEntry; + + assert( hctPagetype(aPg)==HCT_PAGETYPE_INTKEY ); + assert( pLeaf->pg.nHeight==0 ); + while( i2>i1 ){ + int iTest = (i1+i2)/2; + i64 iPgKey = pLeaf->aEntry[iTest].iKey; + if( iPgKey==iKey ){ + *pbExact = 1; + return iTest; + }else if( iPgKey=0 ); + assert( i2==pLeaf->pg.nEntry || iKeyaEntry[i2].iKey ); + assert( i2==0 || iKey>pLeaf->aEntry[i2-1].iKey ); + + *pbExact = 0; + return i2; +} + +static int hctDbIntkeyLocalsize(int pgsz, int nSize){ + const int nMax = ( + pgsz - + sizeof(HctDbIntkeyLeaf) - + sizeof(HctDbIntkeyEntry) - + (HCTDB_MAX_EXTRA_CELL_DATA - sizeof(u32)) + ); + + int nLocal; + if( nSize (nMax-sizeof(u32)) ){ + nLocal = nMin; + } + } + + return nLocal; +} + +static int hctDbIndexLocalsize(int pgsz, int nSize){ + int nLocal; + int nMax = pgsz/4; + if( nSizenMax ){ + nLocal = nMin; + } + } + return nLocal; +} + +static int hctDbLocalsize(const u8 *aPg, int pgsz, int nSize){ + if( hctPagetype(aPg)==HCT_PAGETYPE_INTKEY ){ + return hctDbIntkeyLocalsize(pgsz, nSize); + } + return hctDbIndexLocalsize(pgsz, nSize); +} + +static int hctDbIntkeyEntrySize(HctDbIntkeyEntry *pEntry, int pgsz){ + int sz = hctDbIntkeyLocalsize(pgsz, pEntry->nSize) + + hctDbOffset(0, pEntry->flags); + return sz; +} + +static int hctDbIndexEntrySize(HctDbIndexEntry *pEntry, int pgsz){ + int sz = hctDbIndexLocalsize(pgsz, pEntry->nSize) + + hctDbOffset(0, pEntry->flags); + return sz; +} + +static int hctDbIndexNodeEntrySize(HctDbIndexNodeEntry *pEntry, int pgsz){ + return hctDbIndexLocalsize(pgsz, pEntry->nSize) + + ((pEntry->flags & HCTDB_HAS_OVFL) ? 4 : 0); +} + +/* +** The pointer passed as the first argument is a pointer to a buffer +** containing a page that uses variable sized records. That is, an +** intkey leaf page, or an index leaf or node page. This function +** returns the number of bytes of record-area space consumed by +** entry iEntry on the page. +*/ +static int hctDbPageRecordSize(void *aPg, int pgsz, int iEntry){ + int eType = hctPagetype(aPg); + if( eType==HCT_PAGETYPE_INTKEY ){ + assert( hctPageheight(aPg)==0 ); + return hctDbIntkeyEntrySize(&((HctDbIntkeyLeaf*)aPg)->aEntry[iEntry], pgsz); + }else if( hctPageheight(aPg)==0 ){ + return hctDbIndexEntrySize(&((HctDbIndexLeaf*)aPg)->aEntry[iEntry], pgsz); + } + return hctDbIndexNodeEntrySize(&((HctDbIndexNode*)aPg)->aEntry[iEntry], pgsz); +} +static int hctDbPageEntrySize(void *aPg){ + int eType = hctPagetype(aPg); + if( eType==HCT_PAGETYPE_INTKEY ){ + assert( hctPageheight(aPg)==0 ); + return sizeof(HctDbIntkeyEntry); + }else if( hctPageheight(aPg)==0 ){ + return sizeof(HctDbIndexEntry); + } + return sizeof(HctDbIndexNodeEntry); +} + +/* +** The buffer passed as the first argument contains a page that is +** guaranteed to be either an intkey leaf, or an index leaf or node. +** This function returns a pointer to HctDbIndexEntry structure +** associated with page entry iEntry. +*/ +static HctDbIndexEntry *hctDbEntryEntry(const void *aPg, int iEntry){ + int iOff; + + assert( (hctPagetype(aPg)==HCT_PAGETYPE_INTKEY && hctPageheight(aPg)==0) + || (hctPagetype(aPg)==HCT_PAGETYPE_INDEX) + ); + + if( hctPagetype(aPg)==HCT_PAGETYPE_INTKEY ){ + iOff = sizeof(HctDbIntkeyLeaf) + iEntry*sizeof(HctDbIntkeyEntry); + }else if( hctPageheight(aPg)==0 ){ + iOff = sizeof(HctDbIndexLeaf) + iEntry*sizeof(HctDbIndexEntry); + }else{ + iOff = sizeof(HctDbIndexNode) + iEntry*sizeof(HctDbIndexNodeEntry); + } + + return (HctDbIndexEntry*)&((u8*)aPg)[iOff]; +} + +/* +** Argument aPg[] is a buffer containing either an index tree page, or an +** intkey leaf page. This function locates the record associated with +** cell iCell on the page, and populates output variables *pnData and +** *paData with the size and a pointer to a buffer containing the record, +** respectively. +** +** If the record in cell iCell does not overflow the page, (*paData) is +** set to point into the body of the page itself. If the record does +** overflow the page, then buffer pBuf is used to store the record and +** (*paData) is set to point to the buffer's allocation. In this case +** it is the responsibility of the caller to eventually release the buffer. +** +** SQLITE_OK is returned if successful, or an SQLite error code otherwise. +*/ +static int hctDbLoadRecord( + HctDatabase *pDb, + HctBuffer *pBuf, + const u8 *aPg, + int iCell, + int *pnData, + const u8 **paData +){ + int rc = SQLITE_OK; + HctDbIndexEntry *p = hctDbEntryEntry(aPg, iCell); + + *pnData = p->nSize; + if( paData ){ + if( p->flags & HCTDB_HAS_OVFL ){ + rc = sqlite3HctBufferGrow(pBuf, p->nSize); + *paData = pBuf->aBuf; + if( rc==SQLITE_OK ){ + u32 pgOvfl; + int nLocal = hctDbLocalsize(aPg, pDb->pgsz, p->nSize); + + int iOff = hctDbOffset(p->iOff, p->flags); + hctMemcpy(pBuf->aBuf, &aPg[iOff], nLocal); + pgOvfl = hctGetU32(&aPg[iOff-sizeof(u32)]); + iOff = nLocal; + + while( rc==SQLITE_OK && iOffnSize ){ + HctFilePage ovfl; + rc = hctDbGetPhysical(pDb, pgOvfl, &ovfl); + if( rc==SQLITE_OK ){ + int nCopy = MIN(pDb->pgsz-8, p->nSize-iOff); + hctMemcpy(&pBuf->aBuf[iOff],&ovfl.aOld[sizeof(HctDbPageHdr)],nCopy); + iOff += nCopy; + pgOvfl = ((HctDbPageHdr*)ovfl.aOld)->iPeerPg; + sqlite3HctFilePageRelease(&ovfl); + } + } + } + }else{ + int iOff = hctDbOffset(p->iOff, p->flags); + *paData = &aPg[iOff]; + } + } + + return rc; +} + +/* +** Buffer aPg[] contains either an index page or an intkey leaf (i.e. a page +** that contains variable length records). This function loads the record +** associated with cell iCell on the page, and populates output object +** pFP with the results. +** +** SQLITE_OK is returned if successful, or an SQLite error code otherwise. +*/ +static int hctDbLoadRecordFP( + HctDatabase *pDb, /* Database handle */ + const u8 *aPg, /* Page to load record from */ + int iCell, /* Cell to load */ + HctDbFPKey *pFP /* Populate this structure with record */ +){ + const u8 *aKey = 0; + int nKey = 0; + int rc = SQLITE_OK; + + rc = hctDbLoadRecord(pDb, &pFP->buf, aPg, iCell, &nKey, &aKey); + if( rc==SQLITE_OK ){ + if( aKey!=pFP->buf.aBuf ){ + rc = sqlite3HctBufferGrow(&pFP->buf, nKey); + if( rc==SQLITE_OK ){ + hctMemcpy(pFP->buf.aBuf, aKey, nKey); + } + } + pFP->iKey = nKey; + pFP->aKey = pFP->buf.aBuf; + } + + return rc; +} + +/* +** Buffer aPg[] contains a history fan page. +** +** This page searches the page, returning the index of the entry that +** points to the page with the largest key that is less than or equal +** to parameter pKey/iKey. +*/ +static int hctDbFanSearch( + int *pRc, + HctDatabase *pDb, + const u8 *aPg, + UnpackedRecord *pKey, + i64 iKey +){ + HctDbHistoryFan *pFan = (HctDbHistoryFan*)aPg; + int rc = *pRc; + int i1 = 0; + int i2 = pFan->pg.nEntry-1; + HctBuffer buf = {0, 0, 0}; + + assert( hctPagetype(aPg)==HCT_PAGETYPE_HISTORY ); + + while( rc==SQLITE_OK && i2>i1 ){ + HctFilePage pg; + int iTest = (i1+i2)/2; + + rc = hctDbGetPhysical(pDb, pFan->aPgOld1[iTest], &pg); + while( rc==SQLITE_OK && hctPagetype(pg.aOld)==HCT_PAGETYPE_HISTORY ){ + HctDbHistoryFan *pFan = (HctDbHistoryFan*)pg.aOld; + rc = hctDbGetPhysical(pDb, pFan->pgOld0, &pg); + } + if( rc==SQLITE_OK ){ + int iCell = (iTest==0 ? pFan->iSplit0 : 0); + + assert( pKey || hctPagetype(pg.aOld)==HCT_PAGETYPE_INTKEY ); + assert( pKey==0 || hctPagetype(pg.aOld)==HCT_PAGETYPE_INDEX ); + + if( pKey==0 ){ + i64 iPgKey = hctDbGetIntkey(pg.aOld, iCell); + if( iPgKey==iKey ){ + i1 = i2 = iTest+1; + }else if( iPgKeypKeyInfo->db, pRec); + } +} + +static UnpackedRecord *hctDbAllocateUnpacked(int *pRc, KeyInfo *pKeyInfo){ + UnpackedRecord *pRet = 0; + if( *pRc==SQLITE_OK ){ + pRet = sqlite3VdbeAllocUnpackedRecord(pKeyInfo); + if( pRet==0 ) *pRc = SQLITE_NOMEM_BKPT; + } + return pRet; +} + +SQLITE_PRIVATE void sqlite3HctDbRecordTrim(UnpackedRecord *pRec){ + if( pRec && pRec->pKeyInfo->nUniqField ){ + int ii; + u16 nUniqField = pRec->pKeyInfo->nUniqField; + for(ii=0; iiaMem[ii].flags & MEM_Null ){ + return; + } + } + pRec->nField = nUniqField; + } +} + + +/* +** This function returns the current snapshot-id. It may only be called +** when a read transaction is active. +*/ +SQLITE_PRIVATE i64 sqlite3HctDbSnapshotId(HctDatabase *pDb){ + assert( pDb->iSnapshotId>0 ); + return pDb->iSnapshotId; +} + +/* +** Load the key belonging to cell iCell on page aPg[] into structure (*pKey). +*/ +static void hctDbGetKey( + int *pRc, + HctDatabase *pDb, + KeyInfo *pKeyInfo, + int bDup, + const u8 *aPg, + int iCell, + HctDbKey *pKey +){ + int rc = *pRc; + + if( rc==SQLITE_OK ){ + assert( hctPageheight(aPg)==0 ); + assert( iCell>=0 && iCelliKey = hctDbGetIntkey(aPg, iCell); + }else{ + const u8 *aRec = 0; + int nRec = 0; + rc = hctDbLoadRecord(pDb, &pKey->buf, aPg, iCell, &nRec, &aRec); + if( aRec!=pKey->buf.aBuf && bDup && rc==SQLITE_OK ){ + rc = hctBufferSet(&pKey->buf, aRec, nRec); + aRec = pKey->buf.aBuf; + } + pKey->pKey = hctDbAllocateUnpacked(&rc, pKeyInfo); + if( rc==SQLITE_OK ){ + sqlite3VdbeRecordUnpack(pKeyInfo, nRec, aRec, pKey->pKey); + } + if( rc==SQLITE_OK ){ + sqlite3HctDbRecordTrim(pKey->pKey); + } + } + } + *pRc = rc; +} + +/* +** Retrieve the key from iCell of physical page iPhys. iPhys may be an +** intkey or index leaf page. Populate structure (*pKey) with the key +** value before returning. +*/ +static void hctDbGetKeyFromPage( + int *pRc, + HctDatabase *pDb, + KeyInfo *pKeyInfo, + int bLogical, /* True for logical, false for physical */ + u32 iPg, + int iCell, + HctDbKey *pKey +){ + int rc = *pRc; + + if( rc==SQLITE_OK ){ + HctFilePage pg; + if( bLogical ){ + rc = sqlite3HctFilePageGet(pDb->pFile, iPg, &pg); + }else{ + rc = hctDbGetPhysical(pDb, iPg, &pg); + while( rc==SQLITE_OK && hctPagetype(pg.aOld)==HCT_PAGETYPE_HISTORY ){ + HctDbHistoryFan *pFan = (HctDbHistoryFan*)pg.aOld; + rc = hctDbGetPhysical(pDb, pFan->pgOld0, &pg); + } + } + if( rc==SQLITE_OK ){ + hctDbGetKey(&rc, pDb, pKeyInfo, 1, pg.aOld, iCell, pKey); + sqlite3HctFilePageRelease(&pg); + } + } + *pRc = rc; +} + +/* static RecordCompare find_record_compare((UnpackedRecord*, RecordCompare); */ +#define find_record_compare(pRec, xCompare) ( \ + (xCompare) ? (xCompare) : sqlite3VdbeFindCompare(pRec) \ +) + + +static int hctDbIndexSearch( + HctDatabase *pDb, + const u8 *aPg, + RecordCompare xCompare, + UnpackedRecord *pRec, + int *piPos, + int *pbExact +){ + int rc = SQLITE_OK; + HctBuffer buf; + int i1 = 0; + int i2 = ((HctDbPageHdr*)aPg)->nEntry; + + if( pRec ) xCompare = find_record_compare(pRec, xCompare); + memset(&buf, 0, sizeof(buf)); + + while( i2>i1 ){ + int iTest = (i1+i2)/2; + int res; + int nRec = 0; + const u8 *aRec = 0; + + rc = hctDbLoadRecord(pDb, &buf, aPg, iTest, &nRec, &aRec); + if( rc!=SQLITE_OK ) break; + if( nRec==0 ){ + res = -1; + }else{ + res = xCompare(nRec, aRec, pRec); + } + + if( res==0 ){ + *pbExact = 1; + *piPos = iTest; + sqlite3HctBufferFree(&buf); + return SQLITE_OK; + }else if( res<0 ){ + i1 = iTest+1; + }else{ + i2 = iTest; + } + } + + assert( i1==i2 && i2>=0 ); + sqlite3HctBufferFree(&buf); + *pbExact = 0; + *piPos = i2; + return rc; +} + + +/* +** The first argument is a pointer to an intkey internal node page. +** +** This function searches the node page for key iKey. If found, it returns +** the index of the matching key within the page and sets output variable +** (*pbExact) to 1. If there is no match for key iKey, this function returns +** the index of the smallest key on the page that is larger than iKey, or +** (nEntry) if all keys on the page are smaller than iKey. (*pbExact) is +** set to 0 before returning in this case. +*/ +static int hctDbIntkeyNodeSearch( + void *aPg, + i64 iKey, + int *pbExact +){ + HctDbIntkeyNode *pNode = (HctDbIntkeyNode*)aPg; + int i1 = 0; + int i2 = pNode->pg.nEntry; + + assert( hctPagetype(pNode)==HCT_PAGETYPE_INTKEY && pNode->pg.nHeight>0 ); + while( i2>i1 ){ + int iTest = (i1+i2)/2; + i64 iPgKey = pNode->aEntry[iTest].iKey; + if( iPgKey==iKey ){ + *pbExact = 1; + return iTest; + }else if( iPgKey=0 ); + assert( i2==pNode->pg.nEntry || iKeyaEntry[i2].iKey ); + assert( i2==0 || iKey>pNode->aEntry[i2-1].iKey ); + + *pbExact = 0; + return i2; +} + +/* +** Set (*bGe) to true if (pRec >= (FP-key for aPg)). +*/ +static int hctDbCompareFPKey( + HctDatabase *pDb, + UnpackedRecord *pRec, + const u8 *aPg, + int *pbGe +){ + const u8 *aFP = 0; + int nFP = 0; + int res; + int rc; + HctBuffer buf = {0,0,0}; + + rc = hctDbLoadRecord(pDb, &buf, aPg, 0, &nFP, &aFP); + if( rc==SQLITE_OK ){ + res = sqlite3VdbeRecordCompare(nFP, aFP, pRec); + sqlite3HctBufferFree(&buf); + *pbGe = (res<=0); + } + return rc; +} + +static int hctDbCsrGoLeft(HctDbCsr*); + +/* +** Seek the cursor within its tree. This only seeks within the tree, it does +** not follow any old-data pointers. +*/ +int hctDbCsrSeek( + HctDbCsr *pCsr, /* Cursor to seek */ + HctDbFPKey *pFP, + int iHeight, /* Height to seek at (0==leaf, 1==parent) */ + RecordCompare xCompare, + UnpackedRecord *pRec, /* Key for index/without rowid tables */ + i64 iKey, /* Key for intkey tables */ + int *pbExact +){ + HctFile *pFile = pCsr->pDb->pFile; + u32 iPg = pCsr->iRoot; + int rc = SQLITE_OK; + + HctFilePage par; + memset(&par, 0, sizeof(par)); + int iPar = 0; + + if( pRec ) xCompare = find_record_compare(pRec, xCompare); + while( rc==SQLITE_OK ){ + if( iPg ) rc = sqlite3HctFilePageGet(pFile, iPg, &pCsr->pg); + if( rc==SQLITE_OK ){ + HctDbPageHdr *pHdr = (HctDbPageHdr*)pCsr->pg.aOld; + int i2 = pHdr->nEntry-1; + int bExact; + if( pHdr->nHeight==0 ){ + if( pRec ){ + rc = hctDbIndexSearch( + pCsr->pDb, pCsr->pg.aOld, xCompare, pRec, &i2, &bExact + ); + }else{ + i2 = hctDbIntkeyLeafSearch(pCsr->pg.aOld, iKey, &bExact); + } + if( bExact==0 ) i2--; + }else{ + if( pRec ){ + HctDbIndexNode *pNode = (HctDbIndexNode*)pCsr->pg.aOld; + rc = hctDbIndexSearch( + pCsr->pDb, pCsr->pg.aOld, xCompare, pRec, &i2, &bExact + ); + i2 -= !bExact; + iPg = pNode->aEntry[i2].iChildPg; + assert( iPg ); + }else{ + HctDbIntkeyNode *pNode = (HctDbIntkeyNode*)pCsr->pg.aOld; + i2 = hctDbIntkeyNodeSearch(pNode, iKey, &bExact); + assert( i2==pHdr->nEntry || iKey<=pNode->aEntry[i2].iKey ); + assert( i2==pHdr->nEntry || bExact==(iKey==pNode->aEntry[i2].iKey) ); + assert( i2nEntry || bExact==0 ); + i2 -= !bExact; + iPg = pNode->aEntry[i2].iChildPg; + assert( iPg ); + } + + /* Avoid following a pointer to an EVICTED page */ + if( pHdr->nHeight!=iHeight ){ + while( sqlite3HctFilePageIsEvicted(pFile, iPg) ){ + i2--; + if( i2<0 ){ + rc = hctDbCsrGoLeft(pCsr); + if( rc!=SQLITE_OK ) break; + i2 = pCsr->iCell; + } + + bExact = 0; + if( pRec ){ + HctDbIndexNode *pNode = (HctDbIndexNode*)pCsr->pg.aOld; + iPg = pNode->aEntry[i2].iChildPg; + }else{ + HctDbIntkeyNode *pNode = (HctDbIntkeyNode*)pCsr->pg.aOld; + iPg = pNode->aEntry[i2].iChildPg; + } + } + } + } + + + /* Test if it is necessary to skip to the peer node. */ + if( i2>=0 && i2==pHdr->nEntry-1 && pHdr->iPeerPg!=0 ){ + HctFilePage peer; + rc = sqlite3HctFilePageGet(pFile, pHdr->iPeerPg, &peer); + if( rc==SQLITE_OK ){ + int bGotoPeer; + if( pRec ){ + rc = hctDbCompareFPKey(pCsr->pDb, pRec, peer.aOld, &bGotoPeer); + }else{ + i64 iFP = hctDbIntkeyFPKey(peer.aOld); + bGotoPeer = (iFP<=iKey); + } + if( bGotoPeer ){ + SWAP(HctFilePage, pCsr->pg, peer); + sqlite3HctFilePageRelease(&peer); + assert( pCsr->pg.aOld ); + iPg = 0; + continue; + } + sqlite3HctFilePageRelease(&peer); + } + } + + if( pHdr->nHeight==iHeight ){ + pCsr->iCell = i2; + if( pbExact ) *pbExact = bExact; + + /* If parameter pFP was not NULL and there is a parent page stored + ** in variable par, try to load the FP key from that page. This + ** is used when seeking a cursor for writing. */ + if( pFP && par.aOld ){ + i64 iPeer = ((HctDbPageHdr*)pCsr->pg.aOld)->iPeerPg; + if( pRec ){ + HctDbIndexNode *pPar = (HctDbIndexNode*)par.aOld; + if( (iPar+1)pg.nEntry + && pPar->aEntry[iPar+1].iChildPg==iPeer + ){ + rc = hctDbLoadRecordFP(pCsr->pDb, par.aOld, iPar+1, pFP); + } + }else{ + HctDbIntkeyNode *pPar = (HctDbIntkeyNode*)par.aOld; + if( (iPar+1)pg.nEntry + && pPar->aEntry[iPar+1].iChildPg==iPeer + ){ + pFP->iKey = pPar->aEntry[iPar+1].iKey; + } + } + } + + break; + } + + if( pFP && pHdr->nHeight==iHeight+1 ){ + par = pCsr->pg; + iPar = i2; + memset(&pCsr->pg, 0, sizeof(HctFilePage)); + }else{ + sqlite3HctFilePageRelease(&pCsr->pg); + } + assert( rc!=SQLITE_OK || iPg!=0 ); + } + } + + if( pFP ) sqlite3HctFilePageRelease(&par); + return rc; +} + +SQLITE_PRIVATE void sqlite3HctDbCsrDir(HctDbCsr *pCsr, int eDir){ + pCsr->eDir = eDir; +} + +static int hctDbCellOffset(const u8 *aPage, int iCell, u8 *pFlags){ + HctDbPageHdr *pHdr = (HctDbPageHdr*)aPage; + int iRet; + if( hctPagetype(pHdr)==HCT_PAGETYPE_INTKEY ){ + HctDbIntkeyEntry *pEntry = &((HctDbIntkeyLeaf*)pHdr)->aEntry[iCell]; + *pFlags = pEntry->flags; + iRet = pEntry->iOff; + }else if( hctPageheight(pHdr)>0 ){ + HctDbIndexNodeEntry *pEntry = &((HctDbIndexNode*)pHdr)->aEntry[iCell]; + *pFlags = pEntry->flags; + iRet = pEntry->iOff; + }else{ + HctDbIndexEntry *pEntry = &((HctDbIndexLeaf*)pHdr)->aEntry[iCell]; + *pFlags = pEntry->flags; + iRet = pEntry->iOff; + } + return iRet; +} + +/* +** If the cursor is open on an index tree, ensure that the UnpackedRecord +** structure is allocated. Return SQLITE_NOMEM if an OOM is encountered +** while attempting to allocate said structure, or SQLITE_OK otherwise. +*/ +static int hctDbCsrAllocateUnpacked(HctDbCsr *pCsr){ + int rc = SQLITE_OK; + if( pCsr->pKeyInfo && pCsr->pRec==0 ){ + pCsr->pRec = sqlite3VdbeAllocUnpackedRecord(pCsr->pKeyInfo); + if( pCsr->pRec==0 ){ + rc = SQLITE_NOMEM_BKPT; + } + } + return rc; +} + +static const u8 *hctDbCsrPageAndCellIdx( + HctDbCsr *pCsr, + int iIdx, + int *piCell +){ + const u8 *aPg = 0; + int iCell = 0; + + if( iIdx<0 ){ + aPg = pCsr->pg.aOld; + iCell = pCsr->iCell; + }else{ + aPg = pCsr->aRange[iIdx].pg.aOld; + iCell = pCsr->aRange[iIdx].iCell; + } + *piCell = iCell; + return aPg; +} + +/* +** Return a pointer to the current page accessed by the cursor. Before +** returning, also set output variable (*piCell) to the index of the +** current cell within the page. +*/ +static const u8 *hctDbCsrPageAndCell(HctDbCsr *pCsr, int *piCell){ + const u8 *aPg = 0; + int iCell = 0; + if( pCsr->nRange ){ + aPg = pCsr->aRange[pCsr->nRange-1].pg.aOld; + iCell = pCsr->aRange[pCsr->nRange-1].iCell; + }else{ + aPg = pCsr->pg.aOld; + iCell = pCsr->iCell; + } + + *piCell = iCell; + return aPg; +} + +static void hctDbFreeKeyContents(HctDbKey *pKey){ + hctDbFreeUnpacked(pKey->pKey); + sqlite3HctBufferFree(&pKey->buf); +} + +static void hctDbCsrAscendRange(HctDbCsr *pCsr){ + HctDbRangeCsr *pLast = &pCsr->aRange[--pCsr->nRange]; + assert( pCsr->nRange>=0 ); + hctDbFreeKeyContents(&pLast->highkey); + hctDbFreeKeyContents(&pLast->lowkey); + sqlite3HctFilePageRelease(&pLast->pg); +} + +static void hctDbCsrReset(HctDbCsr *pCsr){ + sqlite3HctFilePageRelease(&pCsr->pg); + pCsr->iCell = -1; + while( pCsr->nRange>0 ){ + hctDbCsrAscendRange(pCsr); + } +} + +static void hctDbFreeCsr(HctDbCsr *pCsr){ + hctDbCsrReset(pCsr); + while( pCsr->intkey.pOpList ){ + HctCsrIntkeyOp *pOp = pCsr->intkey.pOpList; + pCsr->intkey.pOpList = pOp->pNextOp; + sqlite3_free(pOp); + } + while( pCsr->index.pOpList ){ + HctCsrIndexOp *pOp = pCsr->index.pOpList; + pCsr->index.pOpList = pOp->pNextOp; + if( pOp->pLast!=pOp->pFirst ){ + sqlite3_free(pOp->pLast); + } + sqlite3_free(pOp->pFirst); + sqlite3_free(pOp); + } + if( pCsr->pRec ) sqlite3DbFree(pCsr->pKeyInfo->db, pCsr->pRec); + sqlite3KeyInfoUnref(pCsr->pKeyInfo); + sqlite3HctBufferFree(&pCsr->rec); + sqlite3_free(pCsr->aRange); + pCsr->aRange = 0; + pCsr->nRangeAlloc = 0; + sqlite3_free(pCsr); +} + +static void hctDbCsrCleanup(HctDbCsr *pCsr){ + hctDbCsrReset(pCsr); + if( pCsr->pKeyInfo ){ + sqlite3DbFree(pCsr->pKeyInfo->db, pCsr->pRec); + sqlite3KeyInfoUnref(pCsr->pKeyInfo); + pCsr->pKeyInfo = 0; + pCsr->pRec = 0; + } + sqlite3_free(pCsr->aRange); + pCsr->aRange = 0; + pCsr->nRangeAlloc = 0; + sqlite3HctBufferFree(&pCsr->rec); + pCsr->iRoot = 0; +} + +static int hctDbCsrScanStart(HctDbCsr *pCsr, UnpackedRecord *pRec, i64 iKey){ + int rc = SQLITE_OK; + + if( pCsr->pDb->bConcurrent ){ + if( pCsr->pDb->iTid==0 ){ + if( pCsr->pKeyInfo==0 ){ + HctCsrIntkeyOp *pOp = 0; + pOp = sqlite3MallocZero(sizeof(HctCsrIntkeyOp)); + if( pOp==0 ){ + rc = SQLITE_NOMEM_BKPT; + }else{ + assert( pCsr->intkey.pCurrentOp==0 ); + pOp->iFirst = pOp->iLast = iKey; + pCsr->intkey.pCurrentOp = pOp; + pOp->iLogical = pCsr->pg.iPg; + pOp->iPhysical = pCsr->pg.iOldPg; + } + }else{ + HctCsrIndexOp *pOp = 0; + pOp = sqlite3MallocZero(sizeof(HctCsrIndexOp)); + if( pOp==0 ){ + rc = SQLITE_NOMEM_BKPT; + }else{ + if( pRec ){ + rc = sqlite3HctSerializeRecord(pRec, &pOp->pFirst, &pOp->nFirst); + pOp->pLast = pOp->pFirst; + pOp->nLast = pOp->nFirst; + pOp->iLogical = pCsr->pg.iPg; + pOp->iPhysical = pCsr->pg.iOldPg; + } + assert( pCsr->index.pCurrentOp==0 ); + pCsr->index.pCurrentOp = pOp; + } + } + } + } + + return rc; +} + +static int hctDbCsrScanFinish(HctDbCsr *pCsr){ + int rc = SQLITE_OK; + if( pCsr->pDb->bConcurrent ){ + if( pCsr->pKeyInfo==0 ){ + HctCsrIntkeyOp *pOp = pCsr->intkey.pCurrentOp; + pCsr->intkey.pCurrentOp = 0; + if( pOp ){ + HctCsrIntkeyOp *pPrev = pCsr->intkey.pOpList; + + if( pCsr->eDir!=BTREE_DIR_NONE ){ + i64 iVal = 0; + if( sqlite3HctDbCsrEof(pCsr) ){ + if( pCsr->eDir==BTREE_DIR_FORWARD ){ + iVal = LARGEST_INT64; + }else{ + iVal = SMALLEST_INT64; + } + pOp->iLogical = pOp->iPhysical = 0; + }else{ + sqlite3HctDbCsrKey(pCsr, &iVal); + if( pCsr->pg.iPg!=pOp->iLogical ){ + pOp->iLogical = pOp->iPhysical = 0; + } + } + + if( iVal>=pOp->iFirst ){ + pOp->iLast = iVal; + }else{ + pOp->iLast = pOp->iFirst; + pOp->iFirst = iVal; + } + } + + if( pPrev && pOp->iLast<=pPrev->iLast && pOp->iFirst>=pPrev->iFirst ){ + pPrev->iLogical = pPrev->iPhysical = 0; + sqlite3_free(pOp); + }else{ + pOp->pNextOp = pPrev; + pCsr->intkey.pOpList = pOp; + } + } + }else{ + HctCsrIndexOp *pOp = pCsr->index.pCurrentOp; + pCsr->index.pCurrentOp = 0; + if( pOp ){ + if( pCsr->eDir!=BTREE_DIR_NONE ){ + int nKey = 0; + u8 *aCopy = 0; + if( !sqlite3HctDbCsrEof(pCsr) ){ + const u8 *aKey = 0; + rc = sqlite3HctDbCsrData(pCsr, &nKey, &aKey); + if( rc==SQLITE_OK ){ + aCopy = sqlite3_malloc(nKey); + if( aCopy==0 ){ + rc = SQLITE_NOMEM_BKPT; + }else{ + hctMemcpy(aCopy, aKey, nKey); + } + } + if( pCsr->pg.iPg!=pOp->iLogical ){ + pOp->iLogical = pOp->iPhysical = 0; + } + }else{ + pOp->iLogical = pOp->iPhysical = 0; + } + + if( pCsr->eDir==BTREE_DIR_FORWARD ){ + pOp->pLast = aCopy; + pOp->nLast = nKey; + }else{ + pOp->pFirst = aCopy; + pOp->nFirst = nKey; + } + } + + pOp->pNextOp = pCsr->index.pOpList; + pCsr->index.pOpList = pOp; + } + } + } + + return rc; +} + +static int hctDbCsrFirst(HctDbCsr *pCsr){ + int rc = SQLITE_OK; + + /* Starting at the root of the tree structure, follow the left-most + ** pointers to find the left-most node in the list of leaves. */ + u32 iPg = pCsr->iRoot; + HctFile *pFile = pCsr->pDb->pFile; + HctFilePage pg; + while( 1 ){ + HctDbPageHdr *pPg; + rc = sqlite3HctFilePageGet(pFile, iPg, &pg); + if( rc!=SQLITE_OK ) break; + pPg = (HctDbPageHdr*)pg.aOld; + if( pPg->nHeight==0 ){ + break; + }else if( hctPagetype(pPg)==HCT_PAGETYPE_INTKEY ){ + iPg = ((HctDbIntkeyNode*)pPg)->aEntry[0].iChildPg; + }else{ + iPg = ((HctDbIndexNode*)pPg)->aEntry[0].iChildPg; + } + sqlite3HctFilePageRelease(&pg); + } + hctMemcpy(&pCsr->pg, &pg, sizeof(pg)); + if( ((HctDbPageHdr*)pCsr->pg.aOld)->nEntry>0 ){ + pCsr->iCell = 0; + }else{ + pCsr->iCell = -1; + } + return rc; +} + +static int hctDbCsrFirstValid(HctDbCsr *pCsr){ + int rc = SQLITE_OK; + + rc = hctDbCsrFirst(pCsr); + + /* Skip forward to the first visible entry, if any. */ + if( rc==SQLITE_OK ){ + pCsr->iCell = -1; + rc = sqlite3HctDbCsrNext(pCsr); + } + + return rc; +} + +static int hctDbCellPut( + u8 *aBuf, + HctDbCell *pCell, + int nLocal +){ + int iOff = 0; + if( pCell->iTid ){ + hctMemcpy(&aBuf[iOff], &pCell->iTid, sizeof(u64)); + iOff += sizeof(u64); + } + if( pCell->iRangeTid ){ + hctMemcpy(&aBuf[iOff], &pCell->iRangeTid, sizeof(u64)); + iOff += sizeof(u64); + } + if( pCell->iRangeOld ){ + hctMemcpy(&aBuf[iOff], &pCell->iRangeOld, sizeof(u32)); + iOff += sizeof(u32); + } + if( pCell->iOvfl ){ + hctMemcpy(&aBuf[iOff], &pCell->iOvfl, sizeof(u32)); + iOff += sizeof(u32); + } + hctMemcpy(&aBuf[iOff], pCell->aPayload, nLocal); + return iOff+nLocal; +} + +static void hctDbCellGet( + HctDatabase *pDb, + const u8 *aBuf, + int flags, + HctDbCell *pCell +){ + int iOff = 0; + memset(pCell, 0, sizeof(HctDbCell)); + + if( flags & HCTDB_HAS_TID ){ + hctMemcpy(&pCell->iTid, &aBuf[iOff], sizeof(u64)); + iOff += sizeof(u64); + } + if( flags & HCTDB_HAS_RANGETID ){ + hctMemcpy(&pCell->iRangeTid, &aBuf[iOff], sizeof(u64)); + iOff += sizeof(u64); + } + if( flags & HCTDB_HAS_RANGEOLD ){ + hctMemcpy(&pCell->iRangeOld, &aBuf[iOff], sizeof(u32)); + iOff += sizeof(u32); + } + if( flags & HCTDB_HAS_OVFL ){ + hctMemcpy(&pCell->iOvfl, &aBuf[iOff], sizeof(u32)); + iOff += sizeof(u32); + } + + pCell->aPayload = &aBuf[iOff]; +} + +static void hctDbCellGetByIdx( + HctDatabase *pDb, + const u8 *aPg, + int iIdx, + HctDbCell *pCell +){ + HctDbIndexEntry *p = hctDbEntryEntry(aPg, iIdx); + hctDbCellGet(pDb, &aPg[p->iOff], p->flags, pCell); +} + +static u8 hctDbCellToFlags(HctDbCell *pCell){ + u8 flags = 0; + if( pCell->iTid ) flags |= HCTDB_HAS_TID; + if( pCell->iOvfl ) flags |= HCTDB_HAS_OVFL; + if( pCell->iRangeTid ) flags |= HCTDB_HAS_RANGETID; + if( pCell->iRangeOld ) flags |= HCTDB_HAS_RANGEOLD; + return flags; +} + +typedef struct HctRangePtr HctRangePtr; +struct HctRangePtr { + u64 iRangeTid; + u64 iFollowTid; + u32 iOld; +}; + +/* +** This function is called when a reader encounters an old-range pointer +** with associated TID value iRangeTid. It returns true if the pointer +** should be followed, or false otherwise. +** +** If the data items on the linked page should be merged in to the cursor +** results, output parameter (*pbMerge) is set to true before returning. +** This happens if the transaction with TID iRangeTid is not visible to +** the reader. Or, if the only reason to follow the pointer is in order +** to follow other pointers on the indicated page, (*pbMerge) is set to +** true. This happens when iRangeTid is included in the transaction, but +** there exists one or more transactions with TID values smaller than iRangeTid +** that are not. +*/ +static int hctDbFollowRangeOld( + HctDatabase *pDb, + HctRangePtr *pPtr, + int *pbMerge +){ + int bRet = 0; + int bMerge = 0; + u64 iRangeTidValue = (pPtr->iRangeTid & HCT_TID_MASK); + + /* HctDatabase.iTid is set when writing, validating or rolling back a + ** transaction. When writing or validating, old-ranges created by this + ** transaction should not be merge in, even if they are followed. But, when + ** doing rollback, they must be merged in (to find the old data). */ + + i64 iDoNotMergeTid = (pDb->eMode==HCT_MODE_VALIDATE) ? 0 : pDb->iTid; + assert( pDb->eMode!=HCT_MODE_ROLLBACK ); + + if( iRangeTidValue>pDb->iLocalMinTid ){ + bRet = 1; + if( iDoNotMergeTid!=iRangeTidValue ){ + bMerge = (0==hctDbTidIsVisible(pDb, pPtr->iRangeTid, 0)); + } + }else if( (pPtr->iFollowTid & HCT_TID_MASK)>pDb->iLocalMinTid ){ + bRet = 1; + assert( bMerge==0 ); + } + + *pbMerge = bMerge; + assert( bRet==0 || iRangeTidValue>0 ); + return bRet; +} + +static int hctDbCsrExtendRange(HctDbCsr *pCsr){ + if( pCsr->nRange==pCsr->nRangeAlloc ){ + int nNew = pCsr->nRangeAlloc ? pCsr->nRangeAlloc*2 : 16; + HctDbRangeCsr *aNew = 0; + + aNew = (HctDbRangeCsr*)sqlite3_realloc( + pCsr->aRange, nNew*sizeof(HctDbRangeCsr) + ); + if( aNew==0 ) return SQLITE_NOMEM_BKPT; + pCsr->nRangeAlloc = nNew; + pCsr->aRange = aNew; + } + + memset(&pCsr->aRange[pCsr->nRange], 0, sizeof(HctDbRangeCsr)); + pCsr->nRange++; + return SQLITE_OK; +} + +static int hctDbCompareKey2( + KeyInfo *pKeyInfo, + UnpackedRecord *pKey1, + i64 iKey1, + HctDbKey *p2 +){ + int ret = 0; + if( pKeyInfo ){ + int ii = 0; + int n1, n2; + + if( pKey1==0 ) return 1; + if( p2->pKey==0 ) return -1; + + n1 = pKey1->nField; + n2 = p2->pKey->nField; + + for(ii=0; ret==0 && iiaColl[ii]; + ret = sqlite3MemCompare(&pKey1->aMem[ii], &p2->pKey->aMem[ii], pColl); + if( pKeyInfo->aSortFlags[ii] & KEYINFO_ORDER_DESC ) ret = -ret; + } + if( ret==0 ){ + /* default_rc==1 if the key has been passed to hctDbDecrementKey() */ + assert( pKey1->default_rc==0 || pKey1->default_rc==1 ); + assert( p2->pKey->default_rc==0 || p2->pKey->default_rc==1 ); + ret = p2->pKey->default_rc - pKey1->default_rc; + } + if( ret==0 ){ + if( n1n2 ){ + ret = +1; + } + } + }else{ + if( iKey1iKey ){ + ret = -1; + }else if( iKey1>p2->iKey ){ + ret = +1; + } + } + return ret; +} + +/* +** Compare the key values in p1 and p2, returning a value less than, equal +** to, or greater than zero if p1 is respectively less than, equal to or +** greater than p2. i.e. +** +** res = (*p1) - (*p2) +*/ +static int hctDbCompareKey(KeyInfo *pKeyInfo, HctDbKey *p1, HctDbKey *p2){ + return hctDbCompareKey2(pKeyInfo, p1->pKey, p1->iKey, p2); +} + +static int hctDbCopyKey(HctDbKey *p1, HctDbKey *p2){ + if( p2->pKey ){ + int ii; + int bNew = 0; + if( p1->pKey==0 || p1->pKey->nFieldpKey->nField ){ + int rc = SQLITE_OK; + hctDbFreeUnpacked(p1->pKey); + p1->pKey = hctDbAllocateUnpacked(&rc, p2->pKey->pKeyInfo); + if( rc!=SQLITE_OK ) return rc; + bNew = 1; + p1->pKey->default_rc = 0; + } + for(ii=0; iipKey->nField; ii++){ + Mem *pFrom = &p2->pKey->aMem[ii]; + Mem *pTo = &p1->pKey->aMem[ii]; + if( bNew ) sqlite3VdbeMemInit(pTo, pFrom->db, 0); + sqlite3VdbeMemShallowCopy(pTo, pFrom, MEM_Static); + } + p1->pKey->nField = p2->pKey->nField; + p1->pKey->default_rc = p2->pKey->default_rc; + }else{ + p1->iKey = p2->iKey; + } + return SQLITE_OK; +} + +static void hctDbDecrementKey(HctDbKey *pKey){ + if( pKey->pKey ){ + /* TODO: Is this correct? Or should it be +1? Or...? */ + pKey->pKey->default_rc = +1; + }else if( pKey->iKey!=SMALLEST_INT64 ){ + pKey->iKey--; + } +} + +static void hctDbCsrDescendRange( + int *pRc, + HctDbCsr *pCsr, + u64 iRangeTid, + u32 iRangeOld, + int bMerge +){ + int rc = *pRc; + + if( rc==SQLITE_OK ){ + rc = hctDbCsrExtendRange(pCsr); + } + + if( rc==SQLITE_OK ){ + HctDbRangeCsr *pNew = &pCsr->aRange[pCsr->nRange-1]; + assert( bMerge==HCT_RANGE_FOLLOW || bMerge==HCT_RANGE_MERGE ); + + pNew->eRange = bMerge; + pNew->iRangeTid = iRangeTid; + rc = hctDbGetPhysical(pCsr->pDb, iRangeOld, &pNew->pg); + + if( rc==SQLITE_OK ){ + int iPar = pCsr->nRange-2; + int iPCell = 0; + const u8 *aParent = hctDbCsrPageAndCellIdx(pCsr, iPar, &iPCell); + const HctDbPageHdr *pPar = (HctDbPageHdr*)aParent; + int bSeen = 0; + + /* Figure out the upper limit key for the scan of this page */ + if( hctPagetype(aParent)==HCT_PAGETYPE_HISTORY ){ + if( iPCell==0 && pPar->nEntry>1 ){ + const HctDbHistoryFan *pFan = (const HctDbHistoryFan*)aParent; + hctDbGetKeyFromPage(&rc, pCsr->pDb, pCsr->pKeyInfo, + 0, pFan->aPgOld1[0], pFan->iSplit0, &pNew->highkey + ); + bSeen = 1; + } + }else{ + if( iPCell==(pPar->nEntry-1) ){ + if( pPar->iPeerPg ){ + hctDbGetKeyFromPage(&rc, pCsr->pDb, pCsr->pKeyInfo, + 1, pPar->iPeerPg, 0, &pNew->highkey + ); + bSeen = 1; + } + }else{ + hctDbGetKey(&rc, + pCsr->pDb, pCsr->pKeyInfo, 0, aParent, iPCell+1, &pNew->highkey + ); + bSeen = 1; + } + } + + if( bSeen==0 ){ + if( iPar>=0 ){ + hctDbCopyKey(&pNew->highkey, &pNew[-1].highkey); + }else{ + pNew->highkey.iKey = LARGEST_INT64; + assert( pNew->highkey.pKey==0 ); + } + }else if( iPar>=0 ){ + /* The 'highkey' should be the minimum of pNew->highkey and the + ** parent highkey. highkey = MIN(highkey, parent.highkey); */ + HctDbKey *pPKey = &pNew[-1].highkey; + if( hctDbCompareKey(pCsr->pKeyInfo, &pNew->highkey, pPKey)>0 ){ + hctDbCopyKey(&pNew->highkey, pPKey); + } + } + + /* Figure the lower limit key for the scan of this page */ + pNew->lowkey.iKey = SMALLEST_INT64; + if( hctPagetype(aParent)==HCT_PAGETYPE_HISTORY ){ + if( iPCell>0 ){ + const HctDbHistoryFan *pFan = (const HctDbHistoryFan*)aParent; + hctDbGetKeyFromPage(&rc, pCsr->pDb, pCsr->pKeyInfo, + 0, pFan->aPgOld1[0], pFan->iSplit0, &pNew->lowkey + ); + hctDbDecrementKey(&pNew->lowkey); + }else{ + hctDbCopyKey(&pNew->lowkey, &pNew[-1].lowkey); + } + }else{ + HctDbCell pcell; + hctDbGetKey(&rc, + pCsr->pDb, pCsr->pKeyInfo, 0, aParent, iPCell, &pNew->lowkey + ); + hctDbCellGetByIdx(pCsr->pDb, aParent, iPCell, &pcell); + if( hctDbTidIsVisible(pCsr->pDb, pcell.iTid, 0)==0 ){ + hctDbDecrementKey(&pNew->lowkey); + } + } + if( iPar>=0 ){ + /* The 'lowkey' should be the maximum of pNew->lowkey and the + ** parent lowkey. lowkey = MAX(lowkey, parent.lowkey); */ + HctDbKey *pPKey = &pNew[-1].lowkey; + if( hctDbCompareKey(pCsr->pKeyInfo, &pNew->lowkey, pPKey)<0 ){ + hctDbCopyKey(&pNew->lowkey, pPKey); + } + } + + if( rc==SQLITE_OK && hctPagetype(pNew->pg.aOld)==HCT_PAGETYPE_HISTORY){ + pNew->eRange = HCT_RANGE_FAN; + } + } + } + + *pRc = rc; +} + +static void hctDbGetRange( + const u8 *aPg, + int iCell, + HctRangePtr *pPtr +){ + if( iCell<0 ){ + memset(pPtr, 0, sizeof(*pPtr)); + }else if( hctPagetype(aPg)==HCT_PAGETYPE_HISTORY ){ + HctDbHistoryFan *pFan = (HctDbHistoryFan*)aPg; + if( iCell==0 ){ + pPtr->iRangeTid = pFan->iRangeTid0; + pPtr->iFollowTid = pFan->iFollowTid0; + pPtr->iOld = pFan->pgOld0; + }else{ + pPtr->iFollowTid = pPtr->iRangeTid = pFan->iRangeTid1; + pPtr->iOld = pFan->aPgOld1[iCell-1]; + } + }else{ + HctDbCell cell; + hctDbCellGetByIdx(0, aPg, iCell, &cell); + pPtr->iFollowTid = pPtr->iRangeTid = cell.iRangeTid; + pPtr->iOld = cell.iRangeOld; + } + + assert( (pPtr->iFollowTid & HCT_TID_MASK)>=(pPtr->iRangeTid & HCT_TID_MASK) ); +} + +static void hctDbCsrGetRange( + HctDbCsr *pCsr, + HctRangePtr *pPtr +){ + const u8 *aPg = 0; + int iCell = 0; + aPg = hctDbCsrPageAndCell(pCsr, &iCell); + assert( ((HctDbPageHdr*)aPg)->nEntry>iCell ); + assert( ((HctDbPageHdr*)aPg)->nHeight==0 ); + hctDbGetRange(aPg, iCell, pPtr); +} + +/* +** Return true if the entry that the cursor currently points to is visible +** to the current transaction, or false otherwise. +*/ +static int hctDbCurrentIsVisible(HctDbCsr *pCsr){ + int iCell = 0; + HctDbIndexEntry *p; + const u8 *aPg = hctDbCsrPageAndCell(pCsr, &iCell); + u64 iTid = 0; + + if( pCsr->pKeyInfo ){ + p = &((HctDbIndexLeaf*)aPg)->aEntry[iCell]; + }else{ + p = (HctDbIndexEntry*)&((HctDbIntkeyLeaf*)aPg)->aEntry[iCell]; + } + if( (p->flags & HCTDB_HAS_TID)==0 ) return 1; + hctMemcpy(&iTid, &aPg[p->iOff], sizeof(u64)); + if( pCsr->pDb->iTid==iTid && pCsr->pDb->eMode==HCT_MODE_VALIDATE ) return 1; + + return hctDbTidIsVisible(pCsr->pDb, iTid, pCsr->bNosnap); +} + +/* +** Search leaf page aPg[] for a specified key. +** +** If the key is present in the page, set output variable (*piPos) to +** the index of the key in the page, and (*pbExact) to true. +** +** Or, if the key is not present in the page, set output variable (*piPos) +** to the index of the SMALLEST KEY THAT IS LARGER THAN IKEY/PKEY, and +** set (*pbExact) to false. +*/ +static int hctDbLeafSearch( + HctDatabase *pDb, + const u8 *aPg, + i64 iKey, + UnpackedRecord *pKey, + int *piPos, + int *pbExact +){ + if( hctPagetype(aPg)==HCT_PAGETYPE_INDEX ){ + if( pKey==0 ){ + *piPos = hctPagenentry(aPg); + *pbExact = 0; + }else{ + int rc = hctDbIndexSearch(pDb, aPg, 0, pKey, piPos, pbExact); + if( rc ) return rc; + } + }else{ + *piPos = hctDbIntkeyLeafSearch(aPg, iKey, pbExact); + } + return SQLITE_OK; +} + +static int hctDbCsrRollbackDescend( + HctDbCsr *pCsr, /* Cursor to seek */ + UnpackedRecord *pRec, /* Key for index/without rowid tables */ + i64 iKey, /* Key for intkey tables */ + int *pbExact +){ + HctDatabase *pDb = pCsr->pDb; + int bExact = 0; + int rc = SQLITE_OK; + + assert( pDb->eMode==HCT_MODE_ROLLBACK ); + while( 1 ){ + HctRangePtr ptr; + HctDbRangeCsr *p = 0; + + hctDbCsrGetRange(pCsr, &ptr); + + if( (ptr.iFollowTid & HCT_TID_MASK)pDb->iTid ) break; + + rc = hctDbCsrExtendRange(pCsr); + if( rc==SQLITE_OK ){ + p = &pCsr->aRange[pCsr->nRange-1]; + rc = hctDbGetPhysical(pDb, ptr.iOld, &p->pg); + } + if( rc==SQLITE_OK ){ + p->iRangeTid = ptr.iRangeTid & HCT_TID_MASK; + if( hctPagetype(p->pg.aOld)==HCT_PAGETYPE_HISTORY ){ + p->eRange = HCT_RANGE_FAN; + p->iCell = hctDbFanSearch(&rc, pCsr->pDb, p->pg.aOld, pRec, iKey); + bExact = 0; + }else{ + p->eRange = HCT_RANGE_MERGE; + rc = hctDbLeafSearch( + pCsr->pDb, p->pg.aOld, iKey, pRec, &p->iCell, &bExact + ); + if( rc!=SQLITE_OK || bExact ) break; + p->iCell--; + if( p->iCell<0 ) break; + } + } + } + + *pbExact = bExact; + return rc; +} + +static int hctDbCsrSeekAndDescend( + HctDbCsr *pCsr, /* Cursor to seek */ + UnpackedRecord *pRec, /* Key for index/without rowid tables */ + i64 iKey, /* Key for intkey tables */ + int bStopOnExact, /* Stop on exact match, even if not visible */ + int *pbExact +){ + int rc = SQLITE_OK; + int bExact = 0; + + /* This function is never called when writing to the database. Or while + ** doing rollback. But it is called during transaction preparation (iTid==0), + ** and validation (eMode==HCT_MODE_VALIDATE). */ + assert( pCsr->pDb->eMode==HCT_MODE_VALIDATE || pCsr->pDb->iTid==0 ); + + rc = hctDbCsrSeek(pCsr, 0, 0, 0, pRec, iKey, &bExact); + if( bExact && bStopOnExact ){ + *pbExact = 1; + return rc; + } + + while( rc==SQLITE_OK && (0==bExact || 0==hctDbCurrentIsVisible(pCsr)) ){ + HctRangePtr ptr; + int bMerge = 0; + + /* Check if there is a range pointer that we should follow */ + hctDbCsrGetRange(pCsr, &ptr); + if( hctDbFollowRangeOld(pCsr->pDb, &ptr, &bMerge) ){ + hctDbCsrDescendRange(&rc, pCsr, ptr.iRangeTid, ptr.iOld, bMerge); + if( rc==SQLITE_OK ){ + HctDbRangeCsr *p = &pCsr->aRange[pCsr->nRange-1]; + + assert( hctDbCompareKey2(pCsr->pKeyInfo, pRec, iKey, &p->lowkey)>=0 ); + if( hctDbCompareKey2(pCsr->pKeyInfo, pRec, iKey, &p->lowkey)<=0 ){ + p->iCell = -1; + break; + } + + if( p->eRange==HCT_RANGE_FAN ){ + p->iCell = hctDbFanSearch(&rc, pCsr->pDb, p->pg.aOld, pRec, iKey); + bExact = 0; + }else{ + rc = hctDbLeafSearch( + pCsr->pDb, p->pg.aOld, iKey, pRec, &p->iCell, &bExact + ); + if( rc!=SQLITE_OK ) break; + if( bExact==0 ){ + p->iCell--; + }else if( bStopOnExact ){ + *pbExact = 1; + return SQLITE_OK; + } + if( p->iCell<0 ) break; + if( p->eRange==HCT_RANGE_FOLLOW ) bExact = 0; + } + } + }else{ + break; + } + } + + while( rc==SQLITE_OK && pCsr->nRange>0 ){ + HctDbRangeCsr *p = &pCsr->aRange[pCsr->nRange-1]; + if( p->eRange==HCT_RANGE_MERGE && p->iCell>=0 ) break; + hctDbCsrAscendRange(pCsr); + } + + *pbExact = bExact; + return rc; +} + +/* +** Find the CID of the last transaction to write to a specified key. +** +** This must be called from within a transaction. +*/ +SQLITE_PRIVATE int sqlite3HctDbCsrFindLastWrite( + HctDbCsr *pCsr, /* Cursor to seek */ + UnpackedRecord *pRec, /* Key for index/without rowid tables */ + i64 iKey, /* Key for intkey tables */ + u64 *piCid /* Last CID to write to this key */ +){ + int rc = SQLITE_OK; + u64 iCid = 0; + int bExact = 0; + + rc = hctDbCsrSeekAndDescend(pCsr, pRec, iKey, 1, &bExact); + if( rc==SQLITE_OK && bExact ){ + u64 iTid = 0; + if( pCsr->nRange>1 ){ + /* In this case the key has been deleted. Find the TID of the + ** transaction that deleted it. */ + HctDbRangeCsr *p = &pCsr->aRange[pCsr->nRange-2]; + HctRangePtr ptr; + hctDbGetRange(p->pg.aOld, p->iCell, &ptr); + iTid = ptr.iRangeTid; + }else{ + HctDbCell cell; + hctDbCellGetByIdx(pCsr->pDb, pCsr->pg.aOld, pCsr->iCell, &cell); + if( pCsr->nRange ){ + assert( pCsr->nRange==1 ); + iTid = cell.iRangeTid; + }else{ + iTid = cell.iTid; + } + } + + if( iTid ){ + u64 dummy = 0; + iTid = (iTid & HCT_TID_MASK); + iCid = hctDbTMapLookup(pCsr->pDb, iTid, &dummy); + }else{ + iCid = 1; + } + } + + *piCid = iCid; + return rc; +} + +/* +** An integer is written into *pRes which is the result of +** comparing the key with the entry to which the cursor is +** pointing. The meaning of the integer written into +** *pRes is as follows: +** +** *pRes<0 The cursor is left pointing at an entry that +** is smaller than iKey/pRec or if the table is empty +** and the cursor is therefore left point to nothing. +** +** *pRes==0 The cursor is left pointing at an entry that +** exactly matches iKey/pRec. +** +** *pRes>0 The cursor is left pointing at an entry that +** is larger than iKey/pRec. +*/ +SQLITE_PRIVATE int sqlite3HctDbCsrSeek( + HctDbCsr *pCsr, /* Cursor to seek */ + UnpackedRecord *pRec, /* Key for index tables */ + i64 iKey, /* Key for intkey tables */ + int *pRes /* Result of seek (see above) */ +){ + int rc = SQLITE_OK; + int bExact; + + /* Should not be called while committing, validating or during rollback. */ + assert( pCsr->pDb->eMode==HCT_MODE_NORMAL ); + assert( pCsr->pDb->iTid==0 ); + + rc = hctDbCsrScanFinish(pCsr); + hctDbCsrReset(pCsr); + + if( rc==SQLITE_OK ){ + rc = hctDbCsrSeekAndDescend(pCsr, pRec, iKey, 0, &bExact); + } + if( rc==SQLITE_OK ){ + rc = hctDbCsrScanStart(pCsr, pRec, iKey); + } + + /* The main cursor now points to the largest entry less than or equal + ** to the supplied key (pRec or iKey). If the supplied key is smaller + ** than all entries in the table, then pCsr->iCell is set to -1. */ + if( rc==SQLITE_OK ){ + if( pCsr->iCell<0 ){ + /* The supplied key is smaller than all keys in the table. If the cursor + ** is BTREE_DIR_REVERSE or NONE, then leave it as it is at EOF. + ** Otherwise, if the cursor is BTREE_DIR_FORWARD, attempt to move + ** it to the first valid entry. */ + if( pCsr->eDir==BTREE_DIR_FORWARD ){ + rc = hctDbCsrFirstValid(pCsr); + *pRes = sqlite3HctDbCsrEof(pCsr) ? -1 : +1; + }else{ + *pRes = -1; + } + }else{ + + if( rc==SQLITE_OK && 0==hctDbCurrentIsVisible(pCsr) ){ + switch( pCsr->eDir ){ + case BTREE_DIR_FORWARD: + *pRes = 1; + rc = sqlite3HctDbCsrNext(pCsr); + *pRes = sqlite3HctDbCsrEof(pCsr) ? -1 : +1; + break; + case BTREE_DIR_REVERSE: + rc = sqlite3HctDbCsrPrev(pCsr); + /* Either the cursor is is now at EOF or it points to a key + ** smaller than iKey/pRec. Either way, set (*pRes) to -ve. */ + *pRes = -1; + break; + default: assert( pCsr->eDir==BTREE_DIR_NONE ); + hctDbCsrReset(pCsr); + *pRes = -1; + break; + } + }else{ + *pRes = (bExact ? 0 : -1); + } + } + } + + return rc; +} + +SQLITE_PRIVATE void sqlite3HctDbSetSavePhysical( + HctDatabase *pDb, + int (*xSave)(void*, i64 iPhys), + void *pSave +){ + pDb->xSavePhysical = xSave; + pDb->pSavePhysical = pSave; +} + +SQLITE_PRIVATE int sqlite3HctDbCsrRollbackSeek( + HctDbCsr *pCsr, /* Cursor to seek */ + UnpackedRecord *pRec, /* Key for index tables */ + i64 iKey, /* Key for intkey tables */ + int *pOp /* Required rollback op */ +){ + HctDatabase *pDb = pCsr->pDb; + int rc = SQLITE_OK; + int bExact = 0; + int op = 0; + + hctDbCsrReset(pCsr); + + /* At this point pDb->bRollback is set and pDb->iTid is set to the TID + ** of the transaction being rolled back. There are four possibilities: + ** + ** 1) The key was written by transaction pDb->iTid and there was no + ** previous entry. + ** + ** 2) The key was written by transaction pDb->iTid and there is a + ** previous entry to restore. + ** + ** 3) The key was deleted by transaction pDb->iTid. + ** + ** 4) None of the above. No rollback required. + */ + + rc = hctDbCsrSeek(pCsr, 0, 0, 0, pRec, iKey, &bExact); + if( rc==SQLITE_OK && bExact==0 ){ + rc = hctDbCsrRollbackDescend(pCsr, pRec, iKey, &bExact); + } + + if( rc==SQLITE_OK && bExact ){ + HctDbCell cell; + int iCell = 0; + const u8 *aPg = hctDbCsrPageAndCell(pCsr, &iCell); + + memset(&cell, 0, sizeof(cell)); + hctDbCellGetByIdx(pDb, aPg, iCell, &cell); + if( cell.iTid==pDb->iTid ){ + op = -1; + rc = hctDbCsrRollbackDescend(pCsr, pRec, iKey, &bExact); + } + + if( rc==SQLITE_OK + && bExact + && pCsr->nRange && pDb->iTid==pCsr->aRange[pCsr->nRange-1].iRangeTid + ){ + op = +1; + } + } + + *pOp = op; + return rc; +} + +SQLITE_PRIVATE int sqlite3HctDbIsIndex(HctDatabase *pDb, u32 iRoot, int *pbIndex){ + HctFilePage pg; + int rc = sqlite3HctFilePageGet(pDb->pFile, iRoot, &pg); + if( rc==SQLITE_OK ){ + *pbIndex = (hctPagetype(pg.aOld)==HCT_PAGETYPE_INDEX); + sqlite3HctFilePageRelease(&pg); + } + return rc; +} + +SQLITE_PRIVATE char *sqlite3HctDbLogFile(HctDatabase *pDb){ + return sqlite3HctFileLogFile(pDb->pFile); +} + +static void hctDbCsrInit( + HctDatabase *pDb, + u32 iRoot, + KeyInfo *pKeyInfo, + HctDbCsr *pCsr +){ + memset(pCsr, 0, sizeof(HctDbCsr)); + pCsr->pDb = pDb; + pCsr->iRoot = iRoot; + if( pKeyInfo ){ + pCsr->pKeyInfo = sqlite3KeyInfoRef(pKeyInfo); + } +} + + + +/* +** Return the size of the local part of a nData byte record stored on +** an intkey leaf page. +*/ +#if 0 +static int hctDbLocalSize(HctDatabase *pDb, int nData){ + int nOther = sizeof(HctDbIntkeyLeaf) + sizeof(HctDbIntkeyEntry) + 12; + if( nData<=(pDb->pgsz-nOther) ){ + return nData; + } + assert( !"todo" ); + return 0; +} +#endif + +#if 0 +static i64 hctDbIntkeyGetKey(u8 *aPg, int ii){ + HctDbIntkeyLeaf *p = (HctDbIntkeyLeaf*)aPg; + return p->aEntry[ii].iKey; +} +#endif + + + +/* +** Return the maximum number of entries that fit on an intkey internal +** node if the database page size is as specified by the only parameter. +*/ +static int hctDbMaxCellsPerIntkeyNode(int pgsz){ + return (pgsz - sizeof(HctDbIntkeyNode)) / sizeof(HctDbIntkeyNodeEntry); +} +static int hctDbMinCellsPerIntkeyNode(int pgsz){ + return (pgsz - sizeof(HctDbIntkeyNode)) / (3*sizeof(HctDbIntkeyNodeEntry)); +} + +static void hctDbIrrevocablyEvictPage(HctDatabase *pDb, HctDbWriter *p); + +static int hctDbOverflowArrayFree(HctDatabase *pDb, HctDbOverflowArray *p){ + int ii = 0; + int rc = SQLITE_OK; + + for(ii=0; rc==SQLITE_OK && iinEntry; ii++){ + u32 pgno = p->aOvfl[ii].pgno; + int nRem = p->aOvfl[ii].nOvfl; + while( 1 ){ + HctFilePage pg; + sqlite3HctFileClearPhysInUse(pDb->pFile, pgno, 0); + nRem--; + if( nRem==0 ) break; + rc = hctDbGetPhysical(pDb, pgno, &pg); + assert( rc==SQLITE_OK ); + pgno = ((HctDbPageHdr*)pg.aOld)->iPeerPg; + sqlite3HctFilePageRelease(&pg); + } + } + + return rc; +} + +#ifdef SQLITE_DEBUG +/* +** Do some assert() statements to check that: +** +** * the pages in discardpg[] are sorted according to key. +*/ +static void assert_writer_is_ok(HctDatabase *pDb, HctDbWriter *p){ + int ii; + HctBuffer buf = {0,0,0}; + UnpackedRecord *pRec = 0; + + for(ii=1; iidiscardpg.nPg; ii++){ + u8 *a1 = p->discardpg.aPg[ii-1].aOld; + u8 *a2 = p->discardpg.aPg[ii].aOld; + + if( hctPagetype(a1)==HCT_PAGETYPE_INTKEY ){ + i64 i1 = hctDbIntkeyFPKey(a1); + i64 i2 = hctDbIntkeyFPKey(a2); + assert( i2>i1 ); + }else{ + int nData = 0; + const u8 *aData = 0; + int rc = hctDbLoadRecord(pDb, &buf, a1, 0, &nData, &aData); + if( rc==SQLITE_OK && pRec==0 ){ + pRec = sqlite3VdbeAllocUnpackedRecord(p->writecsr.pKeyInfo); + if( pRec==0 ){ + rc = SQLITE_NOMEM; + } + } + if( rc==SQLITE_OK ){ + int bGe = 555; + sqlite3VdbeRecordUnpack(p->writecsr.pKeyInfo, nData, aData, pRec); + rc = hctDbCompareFPKey(pDb, pRec, a2, &bGe); + assert( rc!=SQLITE_OK || bGe==0 ); + } + } + } + + sqlite3HctBufferFree(&buf); + hctDbFreeUnpacked(pRec); +} +#else /* if !SQLITE_DEBUG */ +# define assert_writer_is_ok(pDb, p) +#endif + +/* +** Cleanup the writer object passed as the first argument. +*/ +static void hctDbWriterCleanup(HctDatabase *pDb, HctDbWriter *p, int bRevert){ + + if( p->bDoCleanup ){ + int ii; + + sqlite3HctFileDebugPrint(pDb->pFile, + "writer cleanup height=%d bRevert=%d\n", p->iHeight, bRevert + ); + + assert_writer_is_ok(pDb, p); + + sqlite3HctBufferFree(&p->fp.buf); + memset(&p->fp, 0, sizeof(p->fp)); + + /* sqlite3HctFilePageUnwrite(&p->fanpg); */ + sqlite3HctFilePageRelease(&p->fanpg); + + /* If not reverting, mark the overflow chains in p->delOvfl as free */ + if( bRevert==0 ){ + hctDbOverflowArrayFree(pDb, &p->delOvfl); + }else{ + hctDbOverflowArrayFree(pDb, &p->insOvfl); + } + sqlite3_free(p->delOvfl.aOvfl); + sqlite3_free(p->insOvfl.aOvfl); + memset(&p->delOvfl, 0, sizeof(p->delOvfl)); + memset(&p->insOvfl, 0, sizeof(p->insOvfl)); + + for(ii=0; iiwritepg.nPg; ii++){ + HctFilePage *pPg = &p->writepg.aPg[ii]; + if( bRevert ){ + if( pPg->aNew ){ + sqlite3HctFilePageUnwrite(pPg); + }else if( ii>0 ){ + sqlite3HctFileClearInUse(pPg, 1); + } + } + sqlite3HctFilePageRelease(pPg); + } + hctDbPageArrayReset(&p->writepg); + + for(ii=0; iidiscardpg.nPg; ii++){ + if( bRevert && pDb->pConfig->nTryBeforeUnevict>1 ){ + sqlite3HctFilePageUnevict(&p->discardpg.aPg[ii]); + } + sqlite3HctFilePageRelease(&p->discardpg.aPg[ii]); + } + + hctDbPageArrayReset(&p->discardpg); + p->fp.iKey = 0; + p->fp.aKey = 0; + + if( p->iEvictLockedPgno ){ + assert( p->writecsr.iRoot ); + p->nEvictLocked++; + if( p->nEvictLocked>=pDb->pConfig->nTryBeforeUnevict ){ + p->nEvictLocked = -1; + hctDbIrrevocablyEvictPage(pDb, p); + p->nEvictLocked = 0; + } + }else{ + p->nEvictLocked = 0; + } + p->iEvictLockedPgno = 0; + p->bAppend = 0; + + /* Free/zero various buffers and caches */ + hctDbCsrCleanup(&p->writecsr); + hctDbCsrCleanup(&pDb->rbackcsr); + p->bDoCleanup = 0; + } +} + +static int hctDbInsert( + HctDatabase *pDb, + HctDbWriter *p, + u32 iRoot, + UnpackedRecord *pRec, /* The key value for index tables */ + i64 iKey, /* For intkey tables, the key value */ + u32 iChildPg, /* For internal node ops, the child pgno */ + int bDel, /* True for a delete operation */ + int nData, const u8 *aData /* Record/key to insert */ +); + +typedef struct HctDbWriterOrigin HctDbWriterOrigin; +struct HctDbWriterOrigin { + u8 bDiscard; /* 1 for aDiscard[], 0 for aWritePg[] */ + i16 iPg; /* Index of page in array*/ +}; + +static int hctdbWriterSortFPKeys( + HctDatabase *pDb, + int eType, + HctDbWriter *p, + HctDbWriterOrigin *aOrigin /* Populate this array */ +){ + int iDiscard = 0; + int iWP = 1; + int iOut = 0; + int rc = SQLITE_OK; + + assert( eType==HCT_PAGETYPE_INDEX || eType==HCT_PAGETYPE_INTKEY ); + + while( iDiscarddiscardpg.nPg || iWPwritepg.nPg ){ + if( iDiscard>=p->discardpg.nPg ){ + aOrigin[iOut].bDiscard = 0; + aOrigin[iOut].iPg = iWP++; + iOut++; + } + else if( iWP>=p->writepg.nPg ){ + aOrigin[iOut].bDiscard = 1; + aOrigin[iOut].iPg = iDiscard++; + iOut++; + }else{ + int bDiscard = 0; + const u8 *aD = p->discardpg.aPg[iDiscard].aOld; + const u8 *aW = p->writepg.aPg[iWP].aOld; + + if( eType==HCT_PAGETYPE_INTKEY ){ + i64 i1 = hctDbIntkeyFPKey(aD); + i64 i2 = hctDbIntkeyFPKey(aW); + bDiscard = (i1<=i2); + }else{ + int nFP = 0; + const u8 *aFP = 0; + UnpackedRecord *pRec = p->writecsr.pRec; + rc = hctDbLoadRecord(pDb, &p->writecsr.rec, aW, 0, &nFP, &aFP); + if( rc!=SQLITE_OK ) break; + sqlite3VdbeRecordUnpack(p->writecsr.pKeyInfo, nFP, aFP, pRec); + rc = hctDbCompareFPKey(pDb, pRec, aD, &bDiscard); + if( rc!=SQLITE_OK ) break; + } + + aOrigin[iOut].bDiscard = bDiscard; + if( bDiscard ){ + aOrigin[iOut].iPg = iDiscard++; + }else{ + aOrigin[iOut].iPg = iWP++; + } + iOut++; + } + } + + return rc; +} + +#if 0 +/* +** +*/ +static int hctDbTruncateRecord( + HctBuffer *pBuf, /* Buffer to use for storage space */ + KeyInfo *pKeyInfo, /* Description of index */ + int *pnFP, /* IN/OUT: Size of record */ + const u8 **aFP /* IN/OUT: Pointer to record */ +){ +} +#endif + +/* +** This is a wrapper around: +** +** sqlite3HctFilePageEvict(pPg, 0); +** +** If the call fails with SQLITE_LOCKED because page pPg has been evicted, +** HctDbWriter.iEvictLockedPgno is set to the logical page number of pPg. +*/ +static int hctDbFilePageEvict(HctDbWriter *p, HctFilePage *pPg){ + int rc = sqlite3HctFilePageEvict(pPg, 0); + if( rc==SQLITE_LOCKED && sqlite3HctFilePageIsEvicted(pPg->pFile, pPg->iPg) ){ + p->iEvictLockedPgno = pPg->iPg; + } + return rc; +} + +static int hctDbFilePageCommit(HctDbWriter *p, HctFilePage *pPg){ + int rc = sqlite3HctFilePageCommit(pPg); + if( rc==SQLITE_LOCKED && sqlite3HctFilePageIsEvicted(pPg->pFile, pPg->iPg) ){ + p->iEvictLockedPgno = pPg->iPg; + } + return rc; +} + +static int hctDbMigrateReinsertKeys(HctDatabase *pDb, HctDbWriter *p); + +static int hctDbInsertFlushWrite(HctDatabase *pDb, HctDbWriter *p){ + int rc = SQLITE_OK; + int ii; + int eType = hctPagetype(p->writepg.aPg[0].aNew); + HctFilePage root; + int bUnevict = 0; + + memset(&root, 0, sizeof(root)); + + rc = hctDbMigrateReinsertKeys(pDb, p); + +#ifdef SQLITE_DEBUG + for(ii=1; rc==SQLITE_OK && iiwritepg.nPg; ii++){ + u32 iPeer = ((HctDbPageHdr*)p->writepg.aPg[ii-1].aNew)->iPeerPg; + assert( p->writepg.aPg[ii].iPg==iPeer ); + } +#endif + + /* Test if this is a split of a root page of the tree. */ + if( rc==SQLITE_OK + && p->writepg.nPg>1 + && p->writepg.aPg[0].iPg==p->writecsr.iRoot + ){ + HctFilePage *pPg0 = &p->writepg.aPg[0]; + hctMemcpy(&root, pPg0, sizeof(HctFilePage)); + memset(pPg0, 0, sizeof(HctFilePage)); + rc = sqlite3HctFilePageNew(pDb->pFile, pPg0); + if( rc==SQLITE_OK ){ + hctMemcpy(pPg0->aNew, root.aNew, pDb->pgsz); + hctDbRootPageInit(eType==HCT_PAGETYPE_INDEX, + hctPageheight(root.aNew)+1, pPg0->iPg, root.aNew, pDb->pgsz + ); + } + } + + if( rc==SQLITE_OK ){ + rc = sqlite3HctFilePageRelease(&p->fanpg); + } + + /* Loop through the set of pages to write out. They must be + ** written in reverse order - so that page aWritePg[0] is written + ** last. */ + assert( p->writepg.nPg>0 ); + for(ii=p->writepg.nPg-1; rc==SQLITE_OK && ii>=0; ii--){ + rc = hctDbFilePageCommit(p, &p->writepg.aPg[ii]); + } + + /* If there is one, write the new root page to disk */ + if( rc==SQLITE_OK && root.iPg ){ + rc = hctDbFilePageCommit(p, &root); + sqlite3HctFilePageRelease(&root); + } + + if( rc!=SQLITE_OK ){ + bUnevict = 1; + } + + /* If there is more than one page in the writepg array, or more than + ** zero in the discardpg array, then the parent list must be updated. + ** This block does that. */ + if( (p->writepg.nPg>1 || p->discardpg.nPg>0) && rc==SQLITE_OK ){ + const u32 iRoot = p->writecsr.iRoot; + const int nOrig = p->discardpg.nPg + p->writepg.nPg - 1; + HctDbWriterOrigin aStatic[6]; + HctDbWriterOrigin *aDyn = 0; + HctDbWriterOrigin *aOrig = aStatic; + HctBuffer buf; + HctDbWriter wr; + int iOrig = 0; + + memset(&buf, 0, sizeof(buf)); + memset(&wr, 0, sizeof(wr)); + hctDbPageArrayReset(&wr.writepg); + hctDbPageArrayReset(&wr.discardpg); + + if( nOrig>ArraySize(aStatic) ){ + int nByte = sizeof(HctDbWriterOrigin) * nOrig; + aOrig = aDyn = (HctDbWriterOrigin*)sqlite3HctMalloc(&rc, nByte); + } + + if( rc==SQLITE_OK ){ + wr.iHeight = p->iHeight + 1; + rc = hctDbCsrAllocateUnpacked(&p->writecsr); + } + + if( rc==SQLITE_OK ){ + rc = hctdbWriterSortFPKeys(pDb, eType, p, aOrig); + } + + if( rc==SQLITE_OK ){ + do { + assert( rc==SQLITE_OK || rc==SQLITE_LOCKED ); + rc = SQLITE_OK; + + while( iOrigbDiscard; + + pPg = &(bDel ? p->discardpg.aPg : p->writepg.aPg)[pOrig->iPg]; + if( eType==HCT_PAGETYPE_INTKEY ){ + iKey = hctDbIntkeyFPKey(pPg->aOld); + }else{ + rc = hctDbLoadRecord(pDb, &buf, pPg->aOld, 0, &nFP, &aFP); + if( rc!=SQLITE_OK ) break; + pRec = p->writecsr.pRec; + sqlite3VdbeRecordUnpack(p->writecsr.pKeyInfo, nFP, aFP, pRec); + sqlite3HctDbRecordTrim(pRec); + } + + rc = hctDbInsert( + pDb, &wr, iRoot, pRec, iKey, pPg->iPg, bDel, nFP, aFP + ); + } + + if( rc==SQLITE_OK ){ + rc = hctDbInsertFlushWrite(pDb, &wr); + } + if( rc==SQLITE_LOCKED ){ + assert( iOrig>=wr.nWriteKey ); + iOrig -= wr.nWriteKey; + pDb->nCasFail++; + pDb->stats.nInternalRetry++; + } + hctDbWriterCleanup(pDb, &wr, (rc!=SQLITE_OK)); + wr.nWriteKey = 0; + + }while( rc==SQLITE_LOCKED ); + } + + sqlite3HctBufferFree(&buf); + sqlite3_free(aDyn); + } + + if( rc==SQLITE_OK ){ + for(ii=0; iidiscardpg.nPg; ii++){ + sqlite3HctFileClearInUse(&p->discardpg.aPg[ii], 0); + } + } + + /* Clean up the Writer object */ + hctDbWriterCleanup(pDb, p, bUnevict); + return rc; +} + +SQLITE_PRIVATE void sqlite3HctDbRollbackMode(HctDatabase *pDb, int eRollback){ + assert( eRollback==0 || pDb->eMode==HCT_MODE_NORMAL ); + pDb->pa.nWriteKey = 0; + pDb->eMode = eRollback ? HCT_MODE_ROLLBACK : HCT_MODE_NORMAL; + if( eRollback>1 ){ + memset(&pDb->pa, 0, sizeof(pDb->pa)); + hctDbPageArrayReset(&pDb->pa.writepg); + hctDbPageArrayReset(&pDb->pa.discardpg); + + /* During recovery rollback the connection should read the latest + ** version of the db - no exceptions. Set these two to the largest + ** possible values to ensure that this happens. */ + pDb->iSnapshotId = LARGEST_TID-1; + pDb->iLocalMinTid = LARGEST_TID-1; + } +} + +SQLITE_PRIVATE i64 sqlite3HctDbNCasFail(HctDatabase *pDb){ + return pDb->nCasFail; +} + +#if 0 +static HctDbIntkeyEntry *hctDbIntkeyEntry(u8 *aPg, int iCell){ + return iCell<0 ? 0 : (&((HctDbIntkeyLeaf*)aPg)->aEntry[iCell]); +} +#endif + +SQLITE_PRIVATE int sqlite3HctDbInsertFlush(HctDatabase *pDb, int *pnRetry){ + int rc = SQLITE_OK; + if( pDb->pa.writepg.nPg ){ + rc = hctDbInsertFlushWrite(pDb, &pDb->pa); + if( rc==SQLITE_LOCKED ){ + *pnRetry = pDb->pa.nWriteKey; + rc = SQLITE_OK; + pDb->nCasFail++; + }else{ + *pnRetry = 0; + } +#if 0 + { + sqlite3HctFileDebugPrint(pDb->pFile, + "%p: %s sqlite3HctDbInsertFlush() -> %d (nRetry=%d)\n", + pDb, (pDb->eMode==HCT_MODE_ROLLBACK ? "RB" : " "), rc, *pnRetry + ); + fflush(stdout); + } +#endif + pDb->pa.nWriteKey = 0; + } + return rc; +} + +/* +** If pRec is not NULL, it contains an unpacked index key. Compare this key +** with the write-fp-key in pDb->pa.aWriteFpKey. Return true if pRec is greater +** than or equal to the write-fp-key. +** +** Or, if pRec is NULL, iKey is the key and it is compared to +** pDb->iWriteFpKey. +*/ +static int hctDbTestWriteFpKey( + HctDbWriter *p, + RecordCompare xCompare, + UnpackedRecord *pRec, + i64 iKey +){ + if( pRec ){ + int r; + if( p->fp.aKey==0 ){ + r = 1; + }else{ + r = xCompare(p->fp.iKey, p->fp.aKey, pRec); + } + return (r <= 0); + } + return iKey>=p->fp.iKey; +} + +static int hctDbSetWriteFpKey(HctDatabase *pDb, HctDbWriter *p){ + int rc = SQLITE_OK; + HctDbPageHdr *pHdr = (HctDbPageHdr*)p->writepg.aPg[p->writepg.nPg-1].aNew; + + p->fp.aKey = 0; + p->fp.iKey = 0; + + if( pHdr->iPeerPg==0 ){ + if( hctPagetype(pHdr)==HCT_PAGETYPE_INTKEY ){ + p->fp.iKey = LARGEST_INT64; + } + }else{ + HctFilePage pg; + rc = sqlite3HctFilePageGet(pDb->pFile, pHdr->iPeerPg, &pg); + if( rc==SQLITE_OK ){ + if( hctPagetype(pHdr)==HCT_PAGETYPE_INTKEY ){ + p->fp.iKey = hctDbIntkeyFPKey(pg.aOld); + }else{ + rc = hctDbLoadRecordFP(pDb, pg.aOld, 0, &p->fp); + } + sqlite3HctFilePageRelease(&pg); + } + } + + return rc; +} + +/* +** Buffer aTarget[] contains a page that contains variable length keys +** (i.e. an intkey leaf or an index leaf or node). This function returns +** the offset of the aEntry[] array in aTarget. Before doing so, it sets +** output variable (*pszEntry) to the sizeof(aEntry[0]). +*/ +static int hctDbEntryArrayDim(const u8 *aTarget, int *pszEntry){ + int eType = hctPagetype(aTarget); + int nHeight = hctPageheight(aTarget); + int nRet; + + assert( eType==HCT_PAGETYPE_INTKEY || eType==HCT_PAGETYPE_INDEX ); + assert( eType==HCT_PAGETYPE_INDEX || nHeight==0 ); + if( eType==HCT_PAGETYPE_INTKEY ){ + *pszEntry = sizeof(HctDbIntkeyEntry); + nRet = sizeof(HctDbIntkeyLeaf); + }else if( nHeight==0 ){ + *pszEntry = sizeof(HctDbIndexEntry); + nRet = sizeof(HctDbIndexLeaf); + }else{ + *pszEntry = sizeof(HctDbIndexNodeEntry); + nRet = sizeof(HctDbIndexNode); + } + + return nRet; +} + +static int hctIsVarRecords(const u8 *aTarget){ + int eType = hctPagetype(aTarget); + int nHeight = hctPageheight(aTarget); + return (nHeight==0 || eType==HCT_PAGETYPE_INDEX); +} + +#ifdef SQLITE_DEBUG + +static void print_out_page(const char *zCaption, const u8 *aData, int nData){ + HctDbPageHdr *pPg = (HctDbPageHdr*)aData; + + if( hctPagetype(pPg)==HCT_PAGETYPE_INTKEY && pPg->nHeight==0 ){ + HctDbIntkeyLeaf *pLeaf = (HctDbIntkeyLeaf*)pPg; + char *zPrint = 0; + const char *zSep = ""; + int ii; + + for(ii=0; iipg.nEntry; ii++){ + HctDbIntkeyEntry *pEntry = &pLeaf->aEntry[ii]; + zPrint = sqlite3_mprintf("%z%s(k=%lld f=%.2x %d..%d)", zPrint, zSep, + pEntry->iKey, pEntry->flags, + pEntry->iOff, pEntry->iOff+ hctDbIntkeyEntrySize(pEntry, nData) + ); + zSep = ","; + } + + printf("%s: nFreeGap=%d nFreeBytes=%d (intkey leaf)\n", zCaption, + pLeaf->hdr.nFreeGap, + pLeaf->hdr.nFreeBytes + ); + printf("%s: %s\n", zCaption, zPrint); + sqlite3_free(zPrint); + } + + if( hctPagetype(pPg)==HCT_PAGETYPE_INDEX && pPg->nHeight==0 ){ + HctDbIndexLeaf *pLeaf = (HctDbIndexLeaf*)pPg; + char *zPrint = 0; + const char *zSep = ""; + int ii; + + for(ii=0; iipg.nEntry; ii++){ + HctDbIndexEntry *pEntry = &pLeaf->aEntry[ii]; + zPrint = sqlite3_mprintf("%z%s(%d..%d)", zPrint, zSep, + pEntry->iOff, pEntry->iOff + hctDbIndexEntrySize(pEntry, nData) + ); + zSep = ","; + } + + printf("%s: nFreeGap=%d nFreeBytes=%d (index leaf)\n", zCaption, + pLeaf->hdr.nFreeGap, + pLeaf->hdr.nFreeBytes + ); + printf("%s: %s\n", zCaption, zPrint); + fflush(stdout); + sqlite3_free(zPrint); + } + + +} + +#define assert_or_print(E) \ + if( !(E) ){ \ + print_out_page("page", aData, nData); \ + assert( E ); \ + } + +typedef struct VarCellReader VarCellReader; +struct VarCellReader { + const u8 *aData; + int nData; + int szEntry; + int iEntry0; +}; + +static void hctVCRInit(VarCellReader *p, const u8 *aData, int nData){ + p->aData = aData; + p->nData = nData; + p->iEntry0 = hctDbEntryArrayDim(aData, &p->szEntry); +} + +static int hctVCRFindCell(VarCellReader *p, int iCell, int *pnByte){ + HctDbIndexNodeEntry *pEntry; + + pEntry = (HctDbIndexNodeEntry*)&p->aData[p->iEntry0 + iCell*p->szEntry]; + *pnByte = hctDbLocalsize(p->aData, p->nData, pEntry->nSize) + + hctDbOffset(0, pEntry->flags); + + return pEntry->iOff; +} + +static void assert_page_is_ok(const u8 *aData, int nData){ + + if( aData && hctIsVarRecords(aData) ){ + HctDbIndexNode *p = (HctDbIndexNode*)aData; + VarCellReader vcr; + int iEnd = nData; + int iStart = 0; + int nRecTotal = 0; + int ii = 0; + int nFreeExpect; + + hctVCRInit(&vcr, aData, nData); + for(ii=0; iipg.nEntry; ii++){ + int sz = 0; + int iOff = hctVCRFindCell(&vcr, ii, &sz); + if( iOff ){ + assert_or_print( (iOff+sz)<=nData ); + iEnd = MIN(iEnd, iOff); + nRecTotal += sz; + }else{ + assert( sz==0 && ii==0 ); + } + } + + iStart = vcr.iEntry0 + vcr.szEntry * p->pg.nEntry; + nFreeExpect = nData - (iStart + nRecTotal); + + assert_or_print( p->hdr.nFreeGap==(iEnd - iStart) ); + assert_or_print( p->hdr.nFreeBytes==nFreeExpect); + } + +} +#else +# define assert_page_is_ok(x,y) +#endif + +#ifdef SQLITE_DEBUG +static void assert_all_pages_ok(HctDatabase *pDb, HctDbWriter *p){ + int ii; + return; + for(ii=0; iiwritepg.nPg; ii++){ + u8 *aPg = p->writepg.aPg[ii].aNew; + assert( aPg[0]!=0x00 ); + assert( hctIsVarRecords(aPg) ); + assert_page_is_ok(aPg, pDb->pgsz); + assert( ii==p->writepg.nPg-1 + || ((HctDbPageHdr*)aPg)->iPeerPg==p->writepg.aPg[ii+1].iPg + ); + } +} +static void assert_all_pages_nonempty(HctDatabase *pDb, HctDbWriter *p){ + return; + if( p->writepg.nPg>1 ){ + int ii; + for(ii=0; iiwritepg.nPg; ii++){ + HctDbPageHdr *pPg = (HctDbPageHdr*)p->writepg.aPg[ii].aNew; + assert( pPg->nEntry>0 ); + } + } +} +#else +# define assert_all_pages_ok(x,y) +# define assert_all_pages_nonempty(x,y) +#endif + + +/* +** HOW INSERT/DELETE OPERATIONS WORK: +** +** 1. If the page array is not empty, flush it to disk if required. It +** should be flushed to disk if either: +** +** a) the key being written (specified by iKey/pRec) is greater or +** equal to the FP key to the right of the page array (stored +** in HctDbWriter.iWriteFpKey/aWriteFpKey). +** +** b) there are more than HCTDB_MAX_DIRTY pages in the array. +** +** 2. If the page array is empty, either because it was flushed to disk +** in (1) or because it was empty when this function was called, seek +** the write-cursor (HctDbWriter.writecsr) to the key being written. +** The page the cursor seeks to becomes the first page of the page +** array. +** +** 3. Locate within the page array the page into which the new key +** or delete-key should be inserted. There are three possible outcomes: +** +** i) the new key may just be written to the page. +** +** ii) the new key fits on the page, but leaves it underfull. In this +** context, "underfull" means that the total amount of free space +** on the page is less than or equal to (pgsz*2/3). +** +** iii) the new key does not fit on the page. +** +** In cases (ii) or (iii), first ensure that that the page has two peers in +** the page array (unless there are fewer than three pages in the list, in +** which case the entire list should be loaded). Then redistribute the keys +** between the minimum number of pages, discarding or adding nodes as +** required. +*/ + +/* +** Insert nPg new pages at index iPg into the write-array of the HctDbWriter +** passed as the second argument and link them into the list. +*/ +static int hctDbExtendWriteArray( + HctDatabase *pDb, + HctDbWriter *p, + int iPg, + int nPg +){ + int rc = SQLITE_OK; + int ii; + + assert( iPg>0 ); + assert( (p->writepg.nPg+nPg)>0 ); + assert( p->writepg.nPg>0 ); + + /* Add any new pages required */ + for(ii=iPg; rc==SQLITE_OK && iiwritepg.nPgwritepg.aPg) ); + assert( ii>0 ); + if( iiwritepg.nPg ){ + int nByte = sizeof(HctFilePage) * (p->writepg.nPg-ii); + memmove(&p->writepg.aPg[ii+1], &p->writepg.aPg[ii], nByte); + } + p->writepg.nPg++; + memset(&p->writepg.aPg[ii], 0, sizeof(HctFilePage)); + rc = sqlite3HctFilePageNew(pDb->pFile, &p->writepg.aPg[ii]); + if( rc==SQLITE_OK ){ + HctDbPageHdr *pNew = (HctDbPageHdr*)p->writepg.aPg[ii].aNew; + HctDbPageHdr *pPrev = (HctDbPageHdr*)p->writepg.aPg[ii-1].aNew; + memset(pNew, 0, sizeof(HctDbPageHdr)); + pNew->hdrFlags = hctPagetype(pPrev); + pNew->nHeight = pPrev->nHeight; + pNew->iPeerPg = pPrev->iPeerPg; + pPrev->iPeerPg = p->writepg.aPg[ii].iPg; + } + } + + /* Remove pages that are not required */ + for(ii=nPg; ii<0; ii++){ + HctDbPageHdr *pPrev = (HctDbPageHdr*)(p->writepg.aPg[iPg-1].aNew); + HctDbPageHdr *pRem = (HctDbPageHdr*)(p->writepg.aPg[iPg].aNew); + pPrev->iPeerPg = pRem->iPeerPg; + assert( p->writepg.nPg>1 ); + p->writepg.nPg--; + + assert( iPg!=0 ); + assert( p->writepg.aPg[iPg].aOld==0 ); + sqlite3HctFilePageUnwrite(&p->writepg.aPg[iPg]); + + if( iPg!=p->writepg.nPg ){ + int nByte = sizeof(HctFilePage) * (p->writepg.nPg-iPg); + assert( nByte>0 ); + memmove(&p->writepg.aPg[iPg], &p->writepg.aPg[iPg+1], nByte); + } + } + + return rc; +} + +static int hctDbCsrLoadAndDecode( + HctDbCsr *pCsr, + int iCell, + UnpackedRecord **ppRec +){ + const u8 *aPg = pCsr->pg.aNew ? pCsr->pg.aNew : pCsr->pg.aOld; + int nData = 0; + const u8 *aData = 0; + int rc; + + rc = hctDbLoadRecord(pCsr->pDb, &pCsr->rec, aPg, iCell, &nData, &aData); + if( rc==SQLITE_OK ){ + rc = hctDbCsrAllocateUnpacked(pCsr); + } + if( rc==SQLITE_OK ){ + *ppRec = pCsr->pRec; + sqlite3VdbeRecordUnpack(pCsr->pKeyInfo, nData, aData, pCsr->pRec); + assert( pCsr->pRec->nField>0 ); + } + + return rc; +} + +SQLITE_PRIVATE int sqlite3HctDbCsrLoadAndDecode(HctDbCsr *pCsr, UnpackedRecord **ppRec){ + return hctDbCsrLoadAndDecode(pCsr, pCsr->iCell, ppRec); +} + +/* +** +*/ +static int hctDbFindLhsPeer( + HctDatabase *pDb, + HctDbWriter *p, + HctFilePage *pPg, + HctFilePage *pOut +){ + HctDbCsr csr; + u8 *aLeft = pPg->aNew ? pPg->aNew : pPg->aOld; + int rc = SQLITE_OK; + + hctDbCsrInit(pDb, p->writecsr.iRoot, 0, &csr); + csr.pKeyInfo = p->writecsr.pKeyInfo; + if( hctPagetype(aLeft)==HCT_PAGETYPE_INTKEY ){ + i64 iKey = hctDbIntkeyFPKey(aLeft); + assert( iKey!=SMALLEST_INT64 ); + rc = hctDbCsrSeek(&csr, 0, p->iHeight, 0, 0, iKey-1, 0); + }else{ + UnpackedRecord *pRec = 0; + HctBuffer buf; + int nData = 0; + const u8 *aData = 0; + memset(&buf, 0, sizeof(buf)); + rc = hctDbLoadRecord(pDb, &buf, aLeft, 0, &nData, &aData); + if( rc==SQLITE_OK ){ + rc = hctDbCsrAllocateUnpacked(&p->writecsr); + } + if( rc==SQLITE_OK ){ + pRec = p->writecsr.pRec; + sqlite3VdbeRecordUnpack(p->writecsr.pKeyInfo, nData, aData, pRec); + sqlite3HctDbRecordTrim(pRec); + pRec->default_rc = 1; + rc = hctDbCsrSeek(&csr, 0, p->iHeight, 0, pRec, 0, 0); + pRec->default_rc = 0; + + assert( csr.pg.iPg!=pPg->iPg ); + } + sqlite3HctBufferFree(&buf); + } + + if( rc==SQLITE_OK + && ((HctDbPageHdr*)csr.pg.aOld)->iPeerPg==pPg->iPg + ){ + *pOut = csr.pg; + }else{ + memset(pOut, 0, sizeof(HctFilePage)); + rc = SQLITE_LOCKED_ERR(pPg->iPg, "peer"); + } + + return rc; +} + +static void hctDbIrrevocablyEvictPage(HctDatabase *pDb, HctDbWriter *p){ + int rc = SQLITE_OK; + u32 iLocked = p->iEvictLockedPgno; + int bDone = 0; + + KeyInfo *pKeyInfo = sqlite3KeyInfoRef(p->writecsr.pKeyInfo); + u32 iRoot = p->writecsr.iRoot; + + sqlite3HctFileDebugPrint(pDb->pFile,"BEGIN forced eviction of %d\n", iLocked); + + do { + HctFilePage pg1; + HctFilePage pg0; + memset(&pg1, 0, sizeof(pg1)); + if( p->writecsr.iRoot==0 ){ + hctDbCsrInit(pDb, iRoot, pKeyInfo, &p->writecsr); + } + rc = sqlite3HctFilePageGet(pDb->pFile, iLocked, &pg1); + while( rc==SQLITE_OK ){ + memset(&pg0, 0, sizeof(pg0)); + rc = hctDbFindLhsPeer(pDb, p, &pg1, &pg0); + if( rc ) break; + if( 0==sqlite3HctFilePageIsEvicted(pg0.pFile, pg0.iPg) ) break; + sqlite3HctFilePageRelease(&pg1); + pg1 = pg0; + memset(&pg0, 0, sizeof(pg0)); + } + + if( rc==SQLITE_OK ){ + bDone = (pg1.iPg==iLocked); + sqlite3HctFileDebugPrint( + pDb->pFile, "forcing write of %d->%d\n", pg0.iPg, pg1.iPg + ); + + rc = sqlite3HctFilePageEvict(&pg1, 1); + if( rc==SQLITE_OK ){ + rc = sqlite3HctFilePageWrite(&pg0); + } + if( rc==SQLITE_OK ){ + hctMemcpy(pg0.aNew, pg0.aOld, pDb->pgsz); + } + if( rc==SQLITE_OK ){ + p->writepg.aPg[0] = pg0; + p->writepg.nPg = 1; + rc = hctDbExtendWriteArray(pDb, p, 1, 1); + } + if( rc==SQLITE_OK ){ + hctMemcpy(p->writepg.aPg[1].aNew, pg1.aOld, pDb->pgsz); + p->discardpg.aPg[0] = pg1; + p->discardpg.nPg = 1; + } + + p->bDoCleanup = 1; + if( rc==SQLITE_OK ){ + rc = hctDbInsertFlushWrite(pDb, p); + }else{ + hctDbWriterCleanup(pDb, p, 1); + } + + }else{ + sqlite3HctFilePageRelease(&pg0); + sqlite3HctFilePageRelease(&pg1); + } + }while( rc==SQLITE_OK && bDone==0 ); + + sqlite3KeyInfoUnref(pKeyInfo); + sqlite3HctFileDebugPrint(pDb->pFile,"END forced eviction of %d\n", iLocked); +} + +/* +** +*/ +static int hctDbLoadPeers(HctDatabase *pDb, HctDbWriter *p, int *piPg){ + int rc = SQLITE_OK; + int iPg = *piPg; + + if( p->writepg.nPg<3 ){ + HctFilePage *pLeft = &p->writepg.aPg[0]; + + if( p->writepg.nPg==1 && 0==hctIsLeftmost(pLeft->aNew) ){ + HctFilePage *pCopy = 0; + assert( iPg==0 ); + + /* First, evict the page currently in p->writepg.aPg[0]. If we + ** successfully evict the page here, then of course no other thread + ** can - which guarantees that the seek operation below really does + ** find the left-hand peer (assuming the db is not corrupt). */ + rc = hctDbFilePageEvict(p, pLeft); + + /* Assuming the LOGICAL_EVICTED flag was successfully set, seek + ** cursor csr to the leaf page immediately to the left of pLeft. */ + if( rc==SQLITE_OK ){ + if( p->discardpg.nPg>0 ){ + int nMove = p->discardpg.nPg * sizeof(HctFilePage); + memmove(&p->discardpg.aPg[1], &p->discardpg.aPg[0], nMove); + } + pCopy = &p->discardpg.aPg[0]; + p->discardpg.nPg++; + *pCopy = *pLeft; + rc = hctDbFindLhsPeer(pDb, p, pCopy, pLeft); + } + if( rc==SQLITE_OK ){ + assert( ((HctDbPageHdr*)pLeft->aOld)->iPeerPg==pCopy->iPg ); + rc = sqlite3HctFilePageWrite(pLeft); + } + + if( rc==SQLITE_OK ){ + hctMemcpy(pLeft->aNew, pLeft->aOld, pDb->pgsz); + rc = hctDbExtendWriteArray(pDb, p, 1, 1); + } + if( rc==SQLITE_OK ){ + hctMemcpy(p->writepg.aPg[1].aNew, pCopy->aNew, pDb->pgsz); + sqlite3HctFilePageUnwrite(pCopy); + *piPg = 1; + } + } + + if( rc==SQLITE_OK ){ + HctDbPageHdr *pHdr = (HctDbPageHdr*)p->writepg.aPg[p->writepg.nPg-1].aNew; + if( pHdr->iPeerPg ){ + HctFilePage *pCopy = &p->discardpg.aPg[p->discardpg.nPg]; + + rc = sqlite3HctFilePageGet(pDb->pFile, pHdr->iPeerPg, pCopy); + if( rc==SQLITE_OK ){ + /* Evict the page immediately */ + rc = hctDbFilePageEvict(p, pCopy); + if( rc!=SQLITE_OK ){ + sqlite3HctFilePageRelease(pCopy); + }else{ + p->discardpg.nPg++; + } + } + + if( rc==SQLITE_OK ){ + rc = hctDbExtendWriteArray(pDb, p, p->writepg.nPg, 1); + } + if( rc==SQLITE_OK ){ + HctFilePage *pPg = &p->writepg.aPg[p->writepg.nPg-1]; + hctMemcpy(pPg->aNew, pCopy->aOld, pDb->pgsz); + rc = hctDbSetWriteFpKey(pDb, p); + } + } + } + } + + return rc; +} + +static int hctDbOverflowArrayAppend(HctDbOverflowArray *p, u32 ovfl, int nOvfl){ + assert( p->nAlloc>=p->nEntry ); + assert( ovfl>0 && nOvfl>0 ); + + if( p->nAlloc==p->nEntry ){ + int nNew = p->nAlloc ? p->nAlloc*2 : 16; + int nByte = nNew*sizeof(HctDbOverflow); + HctDbOverflow *aNew = (HctDbOverflow*)sqlite3_realloc(p->aOvfl, nByte); + + if( aNew==0 ){ + return SQLITE_NOMEM_BKPT; + } + p->aOvfl = aNew; + p->nAlloc = nNew; + } + + p->aOvfl[p->nEntry].pgno = ovfl; + p->aOvfl[p->nEntry].nOvfl = nOvfl; + p->nEntry++; + + return SQLITE_OK; +} + + +/* +** Buffer aTarget[] must contain a page with variable sized records - an +** index leaf or node, or an intkey leaf. This function returns the offset +** of the record for entry iEntry, and populates output variable *pFlags +** with the entry flags. +*/ +static int hctDbFindEntry(u8 *aTarget, int iEntry, u8 *pFlags, int *pnSize){ + int iRet; + if( hctPagetype(aTarget)==HCT_PAGETYPE_INTKEY ){ + iRet = ((HctDbIntkeyLeaf*)aTarget)->aEntry[iEntry].iOff; + *pFlags = ((HctDbIntkeyLeaf*)aTarget)->aEntry[iEntry].flags; + *pnSize = ((HctDbIntkeyLeaf*)aTarget)->aEntry[iEntry].nSize; + }else if( hctPageheight(aTarget)==0 ){ + iRet = ((HctDbIndexLeaf*)aTarget)->aEntry[iEntry].iOff; + *pFlags = ((HctDbIndexLeaf*)aTarget)->aEntry[iEntry].flags; + *pnSize = ((HctDbIndexLeaf*)aTarget)->aEntry[iEntry].nSize; + }else{ + iRet = ((HctDbIndexNode*)aTarget)->aEntry[iEntry].iOff; + *pFlags = ((HctDbIndexNode*)aTarget)->aEntry[iEntry].flags; + *pnSize = ((HctDbIndexNode*)aTarget)->aEntry[iEntry].nSize; + } + return iRet; +} + +static int hctDbRemoveOverflow( + HctDatabase *pDb, + HctDbWriter *p, + u8 *aPage, + int iCell +){ + int rc = SQLITE_OK; + + int nSize = 0; + u8 flags = 0; + int iOff = hctDbFindEntry(aPage, iCell, &flags, &nSize); + if( flags & HCTDB_HAS_OVFL ){ + u32 ovfl = 0; + int nOvfl = 0; + const int nBytePerOvfl = pDb->pgsz - sizeof(HctDbPageHdr); + int nLocal = hctDbLocalsize(aPage, pDb->pgsz, nSize); + + if( flags & HCTDB_HAS_TID ) iOff += 8; + if( flags & HCTDB_HAS_RANGETID ) iOff += 8; + if( flags & HCTDB_HAS_RANGEOLD ) iOff += 4; + + ovfl = hctGetU32(&aPage[iOff]); + nOvfl = ((nSize - nLocal) + nBytePerOvfl - 1) / nBytePerOvfl; + + rc = hctDbOverflowArrayAppend(&p->delOvfl, ovfl, nOvfl); + } + + return rc; +} + +static void hctDbRemoveTids( + HctDbIndexNodeEntry *p, + u8 *aPg, + u64 iSafeTid +){ + if( (p->flags & HCTDB_HAS_TID)==HCTDB_HAS_TID ){ + u64 iTid; + memcpy(&iTid, &aPg[p->iOff], sizeof(u64)); + if( (iTid & HCT_TID_MASK)<=iSafeTid ){ + p->flags &= ~HCTDB_HAS_TID; + p->iOff += sizeof(u64); + } + } + if( (p->flags & (HCTDB_HAS_TID|HCTDB_HAS_RANGETID))==HCTDB_HAS_RANGETID ){ + u64 iTid; + assert( p->flags & HCTDB_HAS_RANGEOLD ); + memcpy(&iTid, &aPg[p->iOff], sizeof(u64)); + if( (iTid & HCT_TID_MASK)<=iSafeTid ){ + p->flags &= ~(HCTDB_HAS_RANGETID|HCTDB_HAS_RANGEOLD); + p->iOff += (sizeof(u64) + sizeof(u32)); + } + } +} + +/* +** Populate the aSz[] array with the sizes and locations of each cell +** +** (bClobber && nNewCell==0) -> full-delete +** (bClobber) -> clobber +** (bClobber==0) -> insert of new key +*/ +static void hctDbBalanceGetCellSz( + HctDatabase *pDb, + HctDbWriter *pWriter, + int iInsert, + int bClobber, + int nNewCell, /* Bytes stored on page for new cell */ + u8 *aPg, + HctDbCellSz *aSz, + int *pnSz /* OUT: number of entries in aSz[] */ +){ + HctDbPageHdr *pPg = (HctDbPageHdr*)aPg; + u64 iSafeTid = sqlite3HctFileSafeTID(pDb->pFile); + int szEntry; + int i0 = hctDbEntryArrayDim(aPg, &szEntry); + int iCell = 0; /* Current cell of aPgCopy[ii] */ + int iSz = 0; /* Current populated size of aSz[] */ + int iIns = iInsert; + + for(iSz=0; iCellnEntry || iCell==iIns; iSz++){ + HctDbCellSz *pSz = &aSz[iSz]; + + assert( pPg->nEntrypgsz ); + if( iCell==iIns ){ + assert( nNewCell>0 || bClobber ); + if( nNewCell ){ + pSz->nByte = szEntry + nNewCell; + pSz->aEntry = 0; + pSz->aCell = 0; + }else{ + iSz--; + } + if( bClobber ){ + iCell++; + } + iIns = -1; + }else{ + HctDbIndexNodeEntry *pE = (HctDbIndexNodeEntry*)&aPg[i0+iCell*szEntry]; + hctDbRemoveTids(pE, aPg, iSafeTid); + + pSz->nByte = szEntry + hctDbPageRecordSize(pPg, pDb->pgsz, iCell); + pSz->aEntry = (u8*)pE; + pSz->aCell = &aPg[pE->iOff]; + assert( pSz->nByte>0 ); + iCell++; + } + } + if( pnSz ) *pnSz = iSz; +} + +typedef struct HctDbInsertOp HctDbInsertOp; +struct HctDbInsertOp { + u8 entryFlags; /* Flags for page entry added by this call */ + u8 *aEntry; /* Buffer containing formatted entry */ + int nEntry; /* Size of aEntry[] */ + int nEntrySize; /* Value for page header nSize field */ + + int iPg; /* Index in HctDbWriter.writepg.aPg */ + int iInsert; /* Index in page to write to */ + + i64 iIntkey; /* Key to insert (if intkey page) */ + + int eBalance; /* True if balance routine must be called */ + int bFullDel; /* True to skip insert */ + + u32 iOldPg; + const u8 *aOldPg; +}; + +/* +** Values for HctDbInsertOp.eBalance +*/ +#define BALANCE_NONE 0 +#define BALANCE_OPTIONAL 1 +#define BALANCE_REQUIRED 2 + + +static int hctDbBalanceAppend( + HctDatabase *pDb, + HctDbWriter *p, + HctDbInsertOp *pOp +){ + int rc = hctDbExtendWriteArray(pDb, p, p->writepg.nPg, 1); + if( rc==SQLITE_OK ){ + HctDbLeaf *pLeaf = (HctDbLeaf*)p->writepg.aPg[p->writepg.nPg-1].aNew; + pLeaf->hdr.nFreeBytes = pDb->pgsz - sizeof(HctDbLeaf); + pLeaf->hdr.nFreeGap = pLeaf->hdr.nFreeBytes; + assert( p->iHeight==0 ); + assert_all_pages_ok(pDb, p); + pOp->iPg = p->writepg.nPg-1; + pOp->iInsert = 0; + } + return rc; +} + +static HctBalance *hctDbBalanceSpace(int *pRc, HctDatabase *pDb){ + if( pDb->pBalance==0 ){ + HctBalance *p = 0; + int nPg = ArraySize(p->aPg); + int nSzAlloc = (nPg * 2 * MAX_CELLS_PER_PAGE(pDb->pgsz)) + 1; + + pDb->pBalance = p = (HctBalance*)sqlite3HctMalloc(pRc, + sizeof(HctBalance) + + nPg * pDb->pgsz + + sizeof(HctDbCellSz) * nSzAlloc + ); + if( p ){ + u8 *aCsr = (u8*)&p[1]; + int ii; + for(ii=0; iiaPg[ii] = aCsr; + aCsr += pDb->pgsz; + } + p->aSz = (HctDbCellSz*)aCsr; + p->nSzAlloc = nSzAlloc; + } + } + return pDb->pBalance; +} + +/* +** Rebalance routine for pages with variably-sized records - intkey leaves, +** index leaves and index nodes. +*/ +static int hctDbBalance( + HctDatabase *pDb, + HctDbWriter *p, + HctDbInsertOp *pOp, + int bClobber +){ + int rc = SQLITE_OK; /* Return code */ + int iPg = pOp->iPg; + int iIns = pOp->iInsert; + + int iLeftPg; /* Index of leftmost page used in balance */ + int nIn = 1; /* Number of input peers for balance */ + int ii; /* Iterator used for various things */ + int nOut = 0; /* Number of output peers */ + int szEntry = 0; + int iEntry0 = 0; + HctDbCellSz *aSz = 0; + int nSz = 0; + u8 **aPgCopy = 0; + + int nRem; + + int aPgRem[5]; + int aPgFirst[6]; + + /* Grab the temporary space used by balance operations. */ + HctBalance *pBal = 0; + pBal = hctDbBalanceSpace(&rc, pDb); + if( pBal==0 ) return rc; + + /* Populate the aSz[] and aPgCopy[] arrays as if this were a single-page + ** rebalance only. */ + aSz = &pBal->aSz[MAX_CELLS_PER_PAGE(pDb->pgsz) * 2]; + aPgCopy = pBal->aPg; + hctMemcpy(aPgCopy[0], p->writepg.aPg[iPg].aNew, pDb->pgsz); + hctDbBalanceGetCellSz(pDb, p, iIns, bClobber,pOp->nEntry,aPgCopy[0],aSz,&nSz); + + if( pOp->eBalance==BALANCE_OPTIONAL ){ + int nTotal = 0; + for(ii=0; iipgsz - sizeof(HctDbIntkeyLeaf)) ){ + /* This is a single page balance */ + nIn = 1; + nOut = 1; + iLeftPg = iPg; + } + } + + if( nOut==0 ){ + HctDbPageHdr *pHdr = (HctDbPageHdr*)p->writepg.aPg[iPg].aNew; + if( p->iHeight==0 + && bClobber==0 && pOp->nEntry>0 + && pHdr->iPeerPg==0 && pHdr->nEntry==iIns + ){ + p->bAppend = 1; + rc = hctDbBalanceAppend(pDb, p, pOp); + return rc; + } + + /* If the HctDbWriter.writepg.aPg[] array still contains a single page, + ** load some peer pages into it. */ + assert( p->discardpg.nPg>=0 ); + if( IS_HCT_MIGRATE(pDb)==0 ){ + rc = hctDbLoadPeers(pDb, p, &iPg); + if( rc!=SQLITE_OK ){ + return rc; + } + } + assert_all_pages_ok(pDb, p); + + /* Determine the subset of HctDbWriter.writepg.aPg[] pages that will be + ** rebalanced. Variable nIn is set to the number of input pages, and + ** iLeftPg to the index of the leftmost of them. */ + iLeftPg = iPg; + if( iPg==0 ){ + nIn = MIN(p->writepg.nPg, 3); + }else{ + if( iPg==p->writepg.nPg-1 ){ + nIn = MIN(p->writepg.nPg, 3); + iLeftPg -= (nIn-1); + }else{ + nIn = 3; + iLeftPg--; + } + SWAP(u8*, aPgCopy[0], aPgCopy[iPg-iLeftPg]); + } + + /* aPgCopy[iPg-iLeftPg] already contains a copy of page iPg at this + ** point. This loop takes copies of the other pages involved in the + ** balance operation. */ + for(ii=0; iiwritepg.aPg[iLeftPg+ii].aNew, pDb->pgsz); + } + + for(ii=(iPg-iLeftPg)-1; ii>=0; ii--){ + int nCell = hctPagenentry(aPgCopy[ii]); + aSz -= nCell; + nSz += nCell; + hctDbBalanceGetCellSz(pDb, p, -1, 0, 0, aPgCopy[ii], aSz, 0); + } + for(ii=(iPg-iLeftPg)+1; iiwritecsr.pKeyInfo==0 ){ + pDb->stats.nBalanceIntkey++; + }else{ + pDb->stats.nBalanceIndex++; + } + if( nIn==1 ){ + pDb->stats.nBalanceSingle++; + } + + /* Figure out how many output pages will be required. This loop calculates + ** a mapping heavily biased to the left. */ + aPgFirst[0] = 0; + if( nOut==0 ){ + assert( sizeof(HctDbIntkeyLeaf)==sizeof(HctDbIndexLeaf) ); + nRem = pDb->pgsz - sizeof(HctDbIntkeyLeaf); + nOut = 1; + for(ii=0; iinRem ){ + aPgRem[nOut-1] = nRem; + aPgFirst[nOut] = ii; + nOut++; + nRem = pDb->pgsz - sizeof(HctDbIntkeyLeaf); + assert( nOut<=ArraySize(aPgRem) ); + } + nRem -= aSz[ii].nByte; + } + aPgRem[nOut-1] = nRem; + } + aPgFirst[nOut] = nSz; + + /* Adjust the packing calculated by the previous loop. */ + for(ii=nOut-1; ii>0; ii--){ + /* Try to shift cells from output page (ii-1) to output page (ii). Shift + ** cells for as long as (a) there is more free space on page (ii) than on + ** page (ii-1), and (b) there is enough free space on page (ii) to fit + ** the last cell from page (ii-1). */ + while( aPgRem[ii]>aPgRem[ii-1] ){ /* condition (a) */ + HctDbCellSz *pLast = &aSz[aPgFirst[ii]-1]; + if( pLast->nByte>aPgRem[ii] ) break; /* condition (b) */ + aPgRem[ii] -= pLast->nByte; + aPgRem[ii-1] += pLast->nByte; + aPgFirst[ii] = (pLast - aSz); + } + } + + /* Allocate any required new pages and link them into the list. */ + rc = hctDbExtendWriteArray(pDb, p, iLeftPg+1, nOut-nIn); + + /* Populate the output pages */ + iEntry0 = hctDbEntryArrayDim(aPgCopy[0], &szEntry); + for(ii=0; iiwritepg.aPg[iIdx].aNew; + HctDbIndexLeaf *pLeaf = (HctDbIndexLeaf*)aTarget; + int iOff = pDb->pgsz; /* Start of data area in aTarget[] */ + int iLast = (ii==(nOut-1) ? nSz : aPgFirst[ii+1]); + int nNewEntry = 0; /* Number of entries on this output page */ + int i2; + + for(i2=0; i2<(iLast - aPgFirst[ii]); i2++){ + HctDbCellSz *pSz = &aSz[aPgFirst[ii] + i2]; + if( pSz->aEntry ){ + u8 *aETo = &aTarget[iEntry0 + nNewEntry*szEntry]; + int nCopy = pSz->nByte - szEntry; + hctMemcpy(aETo, pSz->aEntry, szEntry); + iOff -= nCopy; + ((HctDbIndexEntry*)aETo)->iOff = iOff; + hctMemcpy(&aTarget[iOff], pSz->aCell, nCopy); + nNewEntry++; + }else{ + pOp->iPg = iIdx; + pOp->iInsert = i2; + } + } + + pLeaf->pg.nEntry = nNewEntry; + pLeaf->hdr.nFreeBytes = iOff - (iEntry0 + nNewEntry*szEntry); + pLeaf->hdr.nFreeGap = iOff - (iEntry0 + nNewEntry*szEntry); + } + + return rc; +} + + +static int hctDbBalanceIntkeyNode( + HctDatabase *pDb, + HctDbWriter *p, + int iPg, + int iInsert, /* Index in iPg for new key, if any */ + i64 iKey, /* Integer key value */ + u32 iChildPg /* The child pgno */ +){ + int nMax = hctDbMaxCellsPerIntkeyNode(pDb->pgsz); + int rc = SQLITE_OK; + int nIn; /* Number of input pages */ + int nOut; /* Number of output pages */ + int iLeftPg; /* Index of left-most page in balance */ + int ii; /* Iterator variable */ + int nTotal = 0; /* Total number of keys for balance */ + u8 *aPgCopy[3]; + u8 *pFree = 0; + + assert( p->writepg.aPg[p->writepg.nPg-1].aNew ); + if( IS_HCT_MIGRATE(pDb)==0 ){ + rc = hctDbLoadPeers(pDb, p, &iPg); + if( rc!=SQLITE_OK ){ + return rc; + } + } + + iLeftPg = iPg; + if( iPg==0 ){ + nIn = MIN(p->writepg.nPg, 3); + }else if( iPg==p->writepg.nPg-1 ){ + nIn = MIN(p->writepg.nPg, 3); + iLeftPg -= (nIn-1); + }else{ + nIn = MIN(p->writepg.nPg, 3); + iLeftPg--; + assert( iLeftPg+nIn<=p->writepg.nPg ); + } + + /* Take a copy of each input page. Make the buffer used to store each + ** copy larger than required by the size of one entry. Then, there is + ** a new entry to add in stack variables (iKey/iChildPg), add it to the + ** copy of its page. This is to make the loop that populates the output + ** pages below easier to write. A real candidate for optimization, this. */ + pFree = (u8*)sqlite3Malloc(nIn*(pDb->pgsz+sizeof(HctDbIntkeyNodeEntry))); + if( pFree==0 ) return SQLITE_NOMEM; + for(ii=0; iipgsz + sizeof(HctDbIntkeyNodeEntry)) * ii]; + hctMemcpy(aPgCopy[ii], p->writepg.aPg[iLeftPg+ii].aNew, pDb->pgsz); + } + if( iInsert>=0 ){ + HctDbIntkeyNode *pNode = (HctDbIntkeyNode*)aPgCopy[iPg-iLeftPg]; + if( iInsertpg.nEntry ){ + int nByte = sizeof(HctDbIntkeyNodeEntry) * (pNode->pg.nEntry-iInsert); + memmove(&pNode->aEntry[iInsert+1], &pNode->aEntry[iInsert], nByte); + } + pNode->pg.nEntry++; + pNode->aEntry[iInsert].iKey = iKey; + pNode->aEntry[iInsert].iChildPg = iChildPg; + } + + /* Figure out how many entries there are, in total */ + for(ii=0; iipg.nEntry; + } + + /* Figure out how many output pages are required */ + nOut = (nTotal + (nMax-1)) / nMax; + rc = hctDbExtendWriteArray(pDb, p, iLeftPg+1, nOut-nIn); + assert( rc==SQLITE_OK ); /* todo */ + + /* Populate the output pages */ + if( rc==SQLITE_OK ){ + int nRem = nTotal; + int iIn = 0; + int iInEntry = 0; + + for(ii=0; iiwritepg.aPg[ii+iLeftPg].aNew; + for(pEntry=pNode->aEntry; pEntry<&pNode->aEntry[nCell]; pEntry++){ + HctDbIntkeyNode *pIn = (HctDbIntkeyNode*)aPgCopy[iIn]; + *pEntry = pIn->aEntry[iInEntry++]; + if( iInEntry>=pIn->pg.nEntry ){ + iInEntry = 0; + iIn++; + } + } + pNode->pg.nEntry = nCell; + nRem -= nCell; + } + } + + sqlite3_free(pFree); + return rc; +} + +/* +** This function handles the second part of an insert or delete operation +** on an internal intkey node key. The implementation is separate from the +** usual insert/delete routine because internal intkey nodes use fixed size +** records. The other three types of pages found in lists - intkey leaves, +** index leaves and index nodes - all use variable sized entries. +*/ +static int hctDbInsertIntkeyNode( + HctDatabase *pDb, + HctDbWriter *p, + int iPg, + int iInsert, + i64 iKey, /* Integer key value */ + u32 iChildPg, /* The child pgno */ + int bClobber, /* True to clobber entry iInsert */ + int bDel /* True for a delete operation */ +){ + int nMax = hctDbMaxCellsPerIntkeyNode(pDb->pgsz); + int nMin = hctDbMinCellsPerIntkeyNode(pDb->pgsz); + HctDbIntkeyNode *pNode; + int rc = SQLITE_OK; + + /* If bDel is set, then bClobber must also be set. */ + assert( bDel==0 || bClobber ); + + pNode = (HctDbIntkeyNode*)p->writepg.aPg[iPg].aNew; + if( (pNode->pg.nEntry>=nMax && bClobber==0 && bDel==0 ) ){ + /* Need to do a balance operation to make room for the new entry */ + rc = hctDbBalanceIntkeyNode(pDb, p, iPg, iInsert, iKey, iChildPg); + }else if( bDel ){ + assert( iInsertpg.nEntry ); + if( iInsert==0 ){ + rc = hctDbLoadPeers(pDb, p, &iPg); + pNode = (HctDbIntkeyNode*)p->writepg.aPg[iPg].aNew; + } + if( rc==SQLITE_OK ){ + if( iInsert<(pNode->pg.nEntry-1) ){ + int nByte = sizeof(HctDbIntkeyNodeEntry) * (pNode->pg.nEntry-1-iInsert); + memmove(&pNode->aEntry[iInsert], &pNode->aEntry[iInsert+1], nByte); + } + pNode->pg.nEntry--; + if( iInsert==0 || pNode->pg.nEntrypg.nEntry ){ + int nByte = sizeof(HctDbIntkeyNodeEntry) * (pNode->pg.nEntry-iInsert); + memmove(&pNode->aEntry[iInsert+1], &pNode->aEntry[iInsert], nByte); + } + pNode->pg.nEntry++; + } + pNode->aEntry[iInsert].iKey = iKey; + pNode->aEntry[iInsert].iChildPg = iChildPg; + pNode->aEntry[iInsert].unused = 0; + } + + return rc; +} + + +/* +** The buffer passed as the first +*/ +static int hctDbFreegap(void *aPg){ + assert( + (hctPagetype(aPg)==HCT_PAGETYPE_INTKEY && hctPageheight(aPg)==0) + || (hctPagetype(aPg)==HCT_PAGETYPE_INDEX) + ); + return ((HctDbIndexNode*)aPg)->hdr.nFreeGap; +} + +static int hctDbFreebytes(void *aPg){ + assert( + (hctPagetype(aPg)==HCT_PAGETYPE_INTKEY && hctPageheight(aPg)==0) + || (hctPagetype(aPg)==HCT_PAGETYPE_INDEX) + ); + return ((HctDbIndexNode*)aPg)->hdr.nFreeBytes; +} + +static int hctDbInsertOverflow( + HctDatabase *pDb, + HctDbWriter *pWriter, + u8 *aTarget, + int nData, + const u8 *aData, + int *pnWrite, + u32 *ppgOvfl +){ + int rc = SQLITE_OK; + int nLocal = hctDbLocalsize(aTarget, pDb->pgsz, nData); + + if( nLocal==nData ){ + *pnWrite = nData; + *ppgOvfl = 0; + }else{ + const int sz = (pDb->pgsz - sizeof(HctDbPageHdr)); + int nRem; + int nCopy; + u32 iPg = 0; + int nOvfl = 0; + + nRem = nData; + nCopy = (nRem-nLocal) % sz; + if( nCopy==0 ) nCopy = sz; + while( rc==SQLITE_OK && nRem>nLocal ){ + HctFilePage pg; + nOvfl++; + rc = sqlite3HctFilePageNewPhysical(pDb->pFile, &pg); + if( rc==SQLITE_OK ){ + HctDbPageHdr *pPg = (HctDbPageHdr*)pg.aNew; + memset(pPg, 0, sizeof(HctDbPageHdr)); + pPg->iPeerPg = iPg; + pPg->nEntry = nCopy; + hctMemcpy(&pPg[1], &aData[nRem-nCopy], nCopy); + iPg = pg.iNewPg; + sqlite3HctFilePageRelease(&pg); + } + nRem -= nCopy; + nCopy = sz; + } + + *ppgOvfl = iPg; + *pnWrite = nLocal; + + if( rc==SQLITE_OK ){ + rc = hctDbOverflowArrayAppend(&pWriter->insOvfl, iPg, nOvfl); + } + } + + return rc; +} + +static void hctDbRemoveCell( + HctDatabase *pDb, + HctDbWriter *pWriter, + u8 *aTarget, + int iRem +){ + HctDbIndexNode *p = (HctDbIndexNode*)aTarget; + const int eType = hctPagetype(aTarget); + const int nHeight = hctPageheight(aTarget); + const int pgsz = pDb->pgsz; + + int szEntry = 0; /* Size of each entry in aEntry[] array */ + int iArrayOff = 0; /* Offset of aEntry array in aTarget */ + int iData = 0; /* Offset of cell in aTarget[] */ + int nData = 0; /* Local size of cell to remove */ + + /* Populate stack variables szEntry, iArrayOff, iData and nData. */ + assert( eType==HCT_PAGETYPE_INTKEY || eType==HCT_PAGETYPE_INDEX ); + assert( eType==HCT_PAGETYPE_INDEX || nHeight==0 ); + if( eType==HCT_PAGETYPE_INTKEY ){ + HctDbIntkeyEntry *pEntry = &((HctDbIntkeyLeaf*)aTarget)->aEntry[iRem]; + iData = pEntry->iOff; + nData = hctDbIntkeyEntrySize(pEntry, pgsz); + szEntry = sizeof(*pEntry); + iArrayOff = sizeof(HctDbIntkeyLeaf); + }else if( nHeight==0 ){ + HctDbIndexEntry *pEntry = &((HctDbIndexLeaf*)aTarget)->aEntry[iRem]; + iData = pEntry->iOff; + nData = hctDbIndexEntrySize(pEntry, pgsz); + szEntry = sizeof(*pEntry); + iArrayOff = sizeof(HctDbIndexLeaf); + }else{ + HctDbIndexNodeEntry *pEntry = &((HctDbIndexNode*)aTarget)->aEntry[iRem]; + iData = pEntry->iOff; + nData = hctDbIndexNodeEntrySize(pEntry, pgsz); + szEntry = sizeof(*pEntry); + iArrayOff = sizeof(HctDbIndexNode); + } + + /* Remove the aEntry[] array entry */ + if( iRempg.nEntry-1 ){ + u8 *aTo = &aTarget[iArrayOff + iRem*szEntry]; + memmove(aTo, &aTo[szEntry], (p->pg.nEntry-iRem-1) * szEntry); + } + p->pg.nEntry--; + p->hdr.nFreeBytes += szEntry; + p->hdr.nFreeGap += szEntry; + + /* Remove the cell from the data area */ + if( iData==(iArrayOff + szEntry*p->pg.nEntry + p->hdr.nFreeGap) ){ + int ii; + int iFirst = pDb->pgsz; + p->hdr.nFreeGap += nData; + for(ii=0; iipg.nEntry; ii++){ + int iOff = ((HctDbIndexEntry*)&aTarget[iArrayOff + szEntry*ii])->iOff; + if( iOff && iOffhdr.nFreeGap = iFirst - (iArrayOff + szEntry*p->pg.nEntry); + } + p->hdr.nFreeBytes += nData; + +} + + +/* +** This is called as part of a bulk insert of contiguous keys. At present +** this only occurs as part of a migrate, but in the future it could be +** auto-detected. +*/ +static int hctDbBalanceMigrate( + HctDatabase *pDb, + HctDbWriter *p, + HctDbInsertOp *pOp +){ + HctDbLeaf *pLeaf = (HctDbLeaf*)p->writepg.aPg[0].aNew; + int ii = 0; + + assert( p->writepg.nPg==1 ); + assert( p->bAppend==0 ); + assert( p->iHeight==0 ); + assert( pOp->iInsert<=pLeaf->pg.nEntry ); + assert( pOp->eBalance==BALANCE_REQUIRED || pOp->eBalance==BALANCE_OPTIONAL ); + + /* Set nMigrateKey to the number of keys to copy from p->writepg.aPg[0].aOld + ** before flushing the current array of pages to disk. */ + p->nMigrateKey = pLeaf->pg.nEntry - pOp->iInsert; + + /* Remove the last nMigrateKey cells from the page. */ + for(ii=0; iinMigrateKey; ii++){ + hctDbRemoveCell(pDb, 0, (u8*)pLeaf, pLeaf->pg.nEntry-1); + } + p->bAppend = 1; + + /* Use a regular balance to make space for the new key */ + pOp->eBalance = BALANCE_REQUIRED; + return hctDbBalance(pDb, p, pOp, 0); +} + +/* +** Buffer aTarget must contain the image of a page that uses variable +** length records - an intkey leaf, or an index leaf or node. This +** function does part of the job of inserting a new record into the +** page. +** +** Buffer aEntry[], size nEntry bytes, contains the sequence of bytes that +** will be stored in the data area of the page (i.e. any serialized +** tids, the old page number if any, any overflow page number and the +** portion of the database record that will be stored on the main +** page. Parameter iIns specifies the index within the page at which +** the new entry will be inserted. +*/ +static void hctDbInsertEntry( + HctDatabase *pDb, + u8 *aTarget, + int iIns, + const u8 *aEntry, + int nEntry +){ + HctDbIndexNode *p = (HctDbIndexNode*)aTarget; + int szEntry = 0; /* Size of each entry in aEntry[] array */ + int iEntry0 = 0; /* Offset of aEntry array in aTarget */ + int iOff = 0; /* Offset of new cell data in aTarget */ + u8 *aFrom = 0; + + iEntry0 = hctDbEntryArrayDim(aTarget, &szEntry); + + /* This might fail if the db is corrupt */ + assert( p->hdr.nFreeGap>=(nEntry + szEntry) ); + + /* Insert the new zeroed entry into the aEntry[] array */ + aFrom = &aTarget[iEntry0 + szEntry*iIns]; + if( iInspg.nEntry ){ + memmove(&aFrom[szEntry], aFrom, (p->pg.nEntry-iIns) * szEntry); + } + memset(aFrom, 0, szEntry); + p->hdr.nFreeBytes -= szEntry; + p->hdr.nFreeGap -= szEntry; + p->pg.nEntry++; + + /* Insert the cell into the data area */ + iOff = iEntry0 + p->pg.nEntry*szEntry + p->hdr.nFreeGap - nEntry; + hctMemcpy(&aTarget[iOff], aEntry, nEntry); + p->hdr.nFreeBytes -= nEntry; + p->hdr.nFreeGap -= nEntry; + + /* Set the aEntry[].iOff field */ + ((HctDbIndexEntry*)aFrom)->iOff = iOff; +} + + +static int hctDbMigrateReinsertKeys(HctDatabase *pDb, HctDbWriter *p){ + int rc = SQLITE_OK; + if( p->nMigrateKey>0 ){ + assert( p->iHeight==0 ); + + /* Append a page to the write-array */ + rc = hctDbExtendWriteArray(pDb, p, p->writepg.nPg, 1); + + + if( rc==SQLITE_OK ){ + int ii = 0; + HctDbInsertOp op; + HctDbLeaf *pOld = (HctDbLeaf*)p->writepg.aPg[0].aOld; + HctDbLeaf *pNew = (HctDbLeaf*)p->writepg.aPg[p->writepg.nPg-1].aNew; + + /* TODO: Might this not be a part of ExtendWriteArray() ? */ + pNew->hdr.nFreeBytes = pDb->pgsz - sizeof(HctDbLeaf); + pNew->hdr.nFreeGap = pNew->hdr.nFreeBytes; + + /* Loop through the last nMigrateKey on the old page, copying them + ** to the new page. */ + for(ii=0; iinMigrateKey; ii++){ + int iOld = (pOld->pg.nEntry - p->nMigrateKey) + ii; + HctDbIndexEntry *pOldE = 0; + HctDbIndexEntry *pNewE = 0; + int nEntry = 0; + + pOldE = hctDbEntryEntry(pOld, iOld); + nEntry = hctDbPageRecordSize(pOld, pDb->pgsz, iOld); + hctDbInsertEntry(pDb, (u8*)pNew, ii, &((u8*)pOld)[pOldE->iOff], nEntry); + + pNewE = hctDbEntryEntry(pNew, ii); + pNewE->nSize = pOldE->nSize; + pNewE->flags = pOldE->flags; + if( hctPagetype(pOld)==HCT_PAGETYPE_INTKEY ){ + ((HctDbIntkeyEntry*)pNewE)->iKey = ((HctDbIntkeyEntry*)pOldE)->iKey; + } + } + + memset(&op, 0, sizeof(op)); + op.iPg = p->writepg.nPg-1; + op.iInsert = -1; + op.eBalance = BALANCE_OPTIONAL; + rc = hctDbBalance(pDb, p, &op, 0); + } + } + + return rc; +} + +/* +** Parameter aTarget points to a buffer containing an intkey or index +** internal node. Return the child-page number for entry iInsert on +** that page. +*/ +u32 hctDbGetChildPage(u8 *aTarget, int iInsert){ + const int eType = hctPagetype(aTarget); + u32 iChildPg; + if( eType==HCT_PAGETYPE_INTKEY ){ + iChildPg = ((HctDbIntkeyNode*)aTarget)->aEntry[iInsert].iChildPg; + }else{ + assert( eType==HCT_PAGETYPE_INDEX ); + iChildPg = ((HctDbIndexNode*)aTarget)->aEntry[iInsert].iChildPg; + } + return iChildPg; +} + +static void hctDbClobberEntry( + HctDatabase *pDb, + u8 *aTarget, + HctDbInsertOp *pOp +){ + HctDbIndexEntry *pEntry; /* Entry being clobbered */ + int nOld = hctDbPageRecordSize(aTarget, pDb->pgsz, pOp->iInsert); + + pEntry = hctDbEntryEntry(aTarget, pOp->iInsert); + pEntry->nSize = pOp->nEntrySize; + pEntry->flags = pOp->entryFlags; + + memcpy(&aTarget[pEntry->iOff], pOp->aEntry, pOp->nEntry); + ((HctDbIndexNode*)aTarget)->hdr.nFreeBytes += (nOld - pOp->nEntry); + + pDb->stats.nUpdateInPlace++; +} + +static int hctDbFindOldPage( + HctDatabase *pDb, + HctDbWriter *p, + UnpackedRecord *pKey, + i64 iKey, + u32 *piOld, + const u8 **paOld +){ + HctFilePage *pPg = 0; + int rc = SQLITE_OK; + int iTest; + + for(iTest=p->discardpg.nPg-1; iTest>=0; iTest--){ + pPg = &p->discardpg.aPg[iTest]; + if( pKey ){ + int bGe = 0; + rc = hctDbCompareFPKey(pDb, pKey, pPg->aOld, &bGe); + if( bGe || rc!=SQLITE_OK ) break; + }else{ + i64 iFP = hctDbIntkeyFPKey(pPg->aOld); + if( iKey>=iFP ) break; + } + pPg = 0; + } + + if( pPg==0 ){ + pPg = &p->writepg.aPg[0]; + } + assert( pPg->iOldPg!=0 ); + *piOld = pPg->iOldPg; + *paOld = pPg->aOld; + + return rc; +} + +static u64 hctDbGetRangeTidByIdx(HctDatabase *pDb, u8 *aTarget, int iIdx){ + HctDbCell cell; + hctDbCellGetByIdx(pDb, aTarget, iIdx, &cell); + return cell.iRangeTid; +} + +static u32 hctDbMakeFollowPtr( + int *pRc, + HctDatabase *pDb, + u64 iFollowTid, + u32 iPg +){ + int rc = *pRc; + HctFilePage pg; + u32 iRet = 0; + + memset(&pg, 0, sizeof(pg)); + if( rc==SQLITE_OK ){ + rc = sqlite3HctFilePageNewPhysical(pDb->pFile, &pg); + iRet = pg.iNewPg; + } + if( rc==SQLITE_OK ){ + rc = sqlite3HctFileClearPhysInUse(pDb->pFile, iRet, 0); + } + if( rc==SQLITE_OK ){ + HctDbHistoryFan *pFan = (HctDbHistoryFan*)pg.aNew; + memset(pFan, 0, sizeof(*pFan)); + pFan->pg.hdrFlags = HCT_PAGETYPE_HISTORY; + pFan->pg.nEntry = 1; + pFan->iRangeTid0 = pDb->iTid; + pFan->iFollowTid0 = iFollowTid; + pFan->pgOld0 = iPg; + rc = sqlite3HctFilePageRelease(&pg); + }else{ + sqlite3HctFilePageUnwrite(&pg); + sqlite3HctFilePageRelease(&pg); + } + + *pRc = rc; + return iRet; +} + +static int hctDbDelete( + HctDatabase *pDb, + HctDbWriter *p, + UnpackedRecord *pRec, + HctDbInsertOp *pOp +){ + u64 iTidOr = (pDb->eMode==HCT_MODE_ROLLBACK ? HCT_TID_ROLLBACK_OVERRIDE : 0); + u64 iSafeTid = sqlite3HctFileSafeTID(pDb->pFile); + u64 iTidValue = pDb->iTid | iTidOr; + u64 iDelRangeTid = 0; + int rc = SQLITE_OK; + u8 *aNull = 0; + int prevFlags = 0; + int nLocalSz = 0; + u8 *aTarget = p->writepg.aPg[pOp->iPg].aNew; + int bLeftmost = (hctIsLeftmost(aTarget) && pOp->iInsert==0); + + HctDbCell prev; /* Previous cell on page */ + + assert( pOp->bFullDel==0 ); + + if( pOp->iInsert==0 && !bLeftmost ){ + /* If deleting the first key on the first page, set the eBalance flag (as + ** deleting a FP key means the parent list must be adjusted) and load peer + ** pages into memory. */ + pOp->eBalance = BALANCE_REQUIRED; + if( pOp->iPg==0 ){ + rc = hctDbLoadPeers(pDb, p, &pOp->iPg); + if( rc!=SQLITE_OK ) return rc; + aTarget = p->writepg.aPg[pOp->iPg].aNew; + } + } + assert_page_is_ok(aTarget, pDb->pgsz); + + /* Deal with the case where the cell we are about to remove (cell iInsert) + ** has a range-tid greater than that of the current transaction (iTid) */ + iDelRangeTid = hctDbGetRangeTidByIdx(pDb, aTarget, pOp->iInsert); + if( (iDelRangeTid & HCT_TID_MASK)>pDb->iTid ){ + iTidValue = iDelRangeTid; + pOp->iOldPg = hctDbMakeFollowPtr(&rc, pDb, iDelRangeTid, pOp->iOldPg); + sqlite3HctFilePageRelease(&p->fanpg); + } + + if( bLeftmost ){ + int nNull = 0; + + memset(&prev, 0, sizeof(prev)); + prev.iTid = LARGEST_TID; + prevFlags |= HCTDB_HAS_TID; + + assert( pOp->iPg==0 ); + if( hctPagetype(aTarget)==HCT_PAGETYPE_INDEX ){ + int nField = p->writecsr.pKeyInfo->nAllField; + int nByte = nField + 9; + aNull = sqlite3HctMalloc(&rc, nByte); + if( rc!=SQLITE_OK ) return rc; + if( nField<=126 ){ + aNull[0] = nField+1; + nNull = nField+1; + } + else if( nField<=16382 ){ + sqlite3PutVarint(aNull, nField+2); + nNull = nField+2; + }else{ + assert( sqlite3VarintLen(nField+3)==3 ); + sqlite3PutVarint(aNull, nField+3); + nNull = nField+3; + } + prev.aPayload = aNull; + } + prev.iTid = LARGEST_TID; + prevFlags |= HCTDB_HAS_TID; + pOp->nEntrySize = nNull; + nLocalSz = hctDbLocalsize(aTarget, pDb->pgsz, pOp->nEntrySize); + + }else{ + HctDbIndexEntry *pPrev = 0; + + /* Remove the cell being deleted from the target page. This must be done + ** after hctDbLoadPeers() is called (if it is called). */ + assert_page_is_ok(aTarget, pDb->pgsz); + hctDbRemoveCell(pDb, p, aTarget, pOp->iInsert); + assert_page_is_ok(aTarget, pDb->pgsz); + if( pOp->iInsert==0 ){ + assert( pOp->iPg>0 ); + pOp->iPg--; + aTarget = p->writepg.aPg[pOp->iPg].aNew; + assert( hctPagenentry(aTarget)>0 ); + pOp->iInsert = ((HctDbPageHdr*)aTarget)->nEntry - 1; + }else{ + pOp->iInsert--; + } + + /* Load the cell immediately before the one just removed */ + pPrev = hctDbEntryEntry(aTarget, pOp->iInsert); + pOp->nEntrySize = pPrev->nSize; + prevFlags = pPrev->flags; + + hctDbCellGet(pDb, &aTarget[pPrev->iOff], pPrev->flags, &prev); + nLocalSz = hctDbLocalsize(aTarget, pDb->pgsz, pOp->nEntrySize); + } + + /* Update the range-tid and range-oldpg fields. There are several + ** possibilities: + ** + ** 1) The left-hand-cell already has the desired range-pointer values + ** (both TID and old-page-number). + ** + ** 2) The left-hand-cell does not have a range-pointer. Or else + ** has a range-pointer so old it can be overwritten with impunity. + ** + ** 3) The left-hand-cell has a range-pointer to a fan-page that was + ** created by the current HctDbWriter batch, and that fan-page + ** is not already full. + ** + ** 4) None of the above are true. A new fan-page must be created. + */ + if( prev.iRangeTid==iTidValue && prev.iRangeOld==pOp->iOldPg ){ + /* Possibility (1) */ + pOp->bFullDel = 1; + pOp->iInsert = -1; + } + else if( prev.iRangeTid==0 || (prev.iRangeTid & HCT_TID_MASK)<=iSafeTid ){ + /* Possibility (2) */ + prev.iRangeTid = iTidValue; + prev.iRangeOld = pOp->iOldPg; + }else if( prev.iRangeOld==p->fanpg.iNewPg ){ + /* Possibility (3) */ + HctDbHistoryFan *pFan = (HctDbHistoryFan*)p->fanpg.aNew; + assert( pFan->iRangeTid1==iTidValue ); + if( pFan->aPgOld1[pFan->pg.nEntry-2]!=pOp->iOldPg ){ + const int nMax = ((pDb->pgsz - sizeof(HctDbHistoryFan))/sizeof(u32)); + assert( pFan->pg.nEntryaPgOld1[pFan->pg.nEntry-1] = pOp->iOldPg; + pFan->pg.nEntry++; + if( pFan->pg.nEntry==nMax ){ + rc = sqlite3HctFilePageRelease(&p->fanpg); + } + } + pOp->bFullDel = 1; + pOp->iInsert = -1; + }else{ + /* Possibility (4) */ + rc = sqlite3HctFilePageRelease(&p->fanpg); + if( rc==SQLITE_OK ){ + rc = sqlite3HctFilePageNewPhysical(pDb->pFile, &p->fanpg); + } + if( rc==SQLITE_OK ){ + rc = sqlite3HctFileClearPhysInUse(pDb->pFile, p->fanpg.iNewPg, 0); + } + if( rc==SQLITE_OK ){ + int bDummy = 0; + HctDbHistoryFan *pFan = (HctDbHistoryFan*)p->fanpg.aNew; + memset(pFan, 0, pDb->pgsz); + pFan->pg.hdrFlags = HCT_PAGETYPE_HISTORY; + pFan->pg.nEntry = 2; + pFan->iRangeTid0 = prev.iRangeTid; + pFan->iFollowTid0 = prev.iRangeTid; + pFan->pgOld0 = prev.iRangeOld; + rc = hctDbLeafSearch( + pDb, pOp->aOldPg, pOp->iIntkey, pRec, &pFan->iSplit0, &bDummy + ); + assert( bDummy ); + pFan->iRangeTid1 = iTidValue; + pFan->aPgOld1[0] = pOp->iOldPg; + prev.iRangeOld = p->fanpg.iNewPg; + if( (prev.iRangeTid & HCT_TID_MASK)<(iTidValue & HCT_TID_MASK) ){ + prev.iRangeTid = iTidValue; + } + } + } + + if( rc==SQLITE_OK && pOp->bFullDel==0 ){ + prev.iRangeTid |= iTidOr; + pOp->aEntry = pDb->aTmp; + pOp->nEntry = hctDbCellPut(pOp->aEntry, &prev, nLocalSz); + pOp->entryFlags = prevFlags | HCTDB_HAS_RANGETID | HCTDB_HAS_RANGEOLD; + if( hctPagetype(aTarget)==HCT_PAGETYPE_INTKEY ){ + if( bLeftmost ){ + pOp->iIntkey = SMALLEST_INT64; + }else{ + pOp->iIntkey = ((HctDbIntkeyLeaf*)aTarget)->aEntry[pOp->iInsert].iKey; + } + } + } + + assert_page_is_ok(aTarget, pDb->pgsz); + if( aNull ) sqlite3_free(aNull); + return rc; +} + +static int hctDbInsertFindPosition( + HctDatabase *pDb, + HctDbWriter *p, + u32 iRoot, + UnpackedRecord *pRec, + i64 iKey, + HctDbInsertOp *pOp, + int *pbClobber +){ + const RecordCompare xCompare = pRec ? sqlite3VdbeFindCompare(pRec) : 0; + int rc = SQLITE_OK; + + if( p->writepg.nPg==0 ){ + if( p->writecsr.iRoot!=iRoot ){ + hctDbCsrInit(pDb, iRoot, 0, &p->writecsr); + }else{ + hctDbCsrReset(&p->writecsr); + } + if( pRec ){ + p->writecsr.pKeyInfo = sqlite3KeyInfoRef(pRec->pKeyInfo); + } + rc = hctDbCsrSeek( + &p->writecsr, &p->fp, p->iHeight, xCompare, pRec, iKey, pbClobber + ); + if( rc ) return rc; + pOp->iInsert = p->writecsr.iCell; + if( *pbClobber==0 ) pOp->iInsert++; + + p->writepg.aPg[0] = p->writecsr.pg; + memset(&p->writecsr.pg, 0, sizeof(HctFilePage)); + + assert( p->bDoCleanup ); + p->writepg.nPg = 1; + rc = sqlite3HctFilePageWrite(&p->writepg.aPg[0]); + if( rc ) return rc; + hctMemcpy(p->writepg.aPg[0].aNew, p->writepg.aPg[0].aOld, pDb->pgsz); + if( p->fp.iKey==0 ){ + rc = hctDbSetWriteFpKey(pDb, p); + } + if( rc ) return rc; + }else if( pRec ){ + HctBuffer buf = {0,0,0}; + for(pOp->iPg=p->writepg.nPg-1; pOp->iPg>0; pOp->iPg--){ + const u8 *aK; + int nK; + rc = hctDbLoadRecord( + pDb, &buf, p->writepg.aPg[pOp->iPg].aNew, 0, &nK, &aK + ); + if( rc!=SQLITE_OK ){ + sqlite3HctBufferFree(&buf); + return rc; + } + if( xCompare(nK, aK, pRec)<=0 ) break; + } + sqlite3HctBufferFree(&buf); + rc = hctDbIndexSearch(pDb, + p->writepg.aPg[pOp->iPg].aNew, xCompare, pRec, &pOp->iInsert, pbClobber + ); + if( rc!=SQLITE_OK ) return rc; + }else{ + for(pOp->iPg=p->writepg.nPg-1; pOp->iPg>0; pOp->iPg--){ + if( hctDbIntkeyFPKey(p->writepg.aPg[pOp->iPg].aNew)<=iKey ) break; + } + if( p->iHeight==0 ){ + pOp->iInsert = hctDbIntkeyLeafSearch( + p->writepg.aPg[pOp->iPg].aNew, iKey, pbClobber + ); + }else{ + pOp->iInsert = hctDbIntkeyNodeSearch( + p->writepg.aPg[pOp->iPg].aNew, iKey, pbClobber + ); + } + } + + return rc; +} + +static int hctDbWriteWriteConflict( + HctDatabase *pDb, + HctDbWriter *p, + HctDbInsertOp *pOp, + UnpackedRecord *pKey, + i64 iKey, + int bClobber +){ + int rc = SQLITE_OK; + const u8 *aTarget = p->writepg.aPg[pOp->iPg].aNew; + + assert( p->iHeight==0 && pDb->eMode==HCT_MODE_NORMAL ); + + if( bClobber ){ + HctDbIndexEntry *pE; + if( pKey ){ + pE = &((HctDbIndexLeaf*)aTarget)->aEntry[pOp->iInsert]; + }else{ + pE = (HctDbIndexEntry*)&((HctDbIntkeyLeaf*)aTarget)->aEntry[pOp->iInsert]; + } + if( pE->flags & HCTDB_HAS_TID ){ + u64 iTid; + hctMemcpy(&iTid, &aTarget[pE->iOff], sizeof(u64)); + if( hctDbTidIsConflict(pDb, iTid) ){ + rc = HCT_SQLITE_BUSY; + } + } + }else if( pOp->iInsert>0 ){ + int iCell = 0; + int bMerge = 0; + HctRangePtr ptr; + + iCell = (pOp->iInsert - 1); + hctDbGetRange(aTarget, iCell, &ptr); + while( hctDbFollowRangeOld(pDb, &ptr, &bMerge) ){ + HctFilePage pg; + const u8 *aOld = 0; + + if( ptr.iOld==pDb->pa.fanpg.iNewPg ){ + aOld = pDb->pa.fanpg.aNew; + memset(&pg, 0, sizeof(pg)); + }else{ + rc = hctDbGetPhysical(pDb, ptr.iOld, &pg); + aOld = pg.aOld; + } + + /* assert( bMerge==0 || iRangeTid!=pDb->iTid ); */ + if( rc==SQLITE_OK ){ + int iCell = 0; + if( hctPagetype(aOld)==HCT_PAGETYPE_HISTORY ){ + iCell = hctDbFanSearch(&rc, pDb, aOld, pKey, iKey); + }else{ + int bExact = 0; + rc = hctDbLeafSearch(pDb, aOld, iKey, pKey, &iCell, &bExact); + if( rc==SQLITE_OK && bExact ){ + if( bMerge ){ + HctDbCell cell; + hctDbCellGetByIdx(pDb, aOld, iCell, &cell); + if( hctDbTidIsVisible(pDb, cell.iTid, 0) ) rc = HCT_SQLITE_BUSY; + } + sqlite3HctFilePageRelease(&pg); + break; + }else{ + iCell--; + } + if( rc ){ + sqlite3HctFilePageRelease(&pg); + break; + } + } + + hctDbGetRange(aOld, iCell, &ptr); + sqlite3HctFilePageRelease(&pg); + }else{ + break; + } + } + } + + return rc; +} + +static int hctDbInsert( + HctDatabase *pDb, + HctDbWriter *p, + u32 iRoot, + UnpackedRecord *pRec, /* The key value for index tables */ + i64 iKey, /* For intkey tables, the key value */ + u32 iChildPg, /* For internal node ops, the child pgno */ + int bDel, /* True for a delete operation */ + int nData, const u8 *aData /* Record/key to insert */ +){ + const RecordCompare xCompare = pRec ? sqlite3VdbeFindCompare(pRec) : 0; + int rc = SQLITE_OK; + int bClobber = 0; + u8 *aTarget; /* Page to write new entry to */ + HctDbInsertOp op = {0,0,0,0,0,0,0,0,0,0,0}; + int bUpdateInPlace = 0; + + p->nWriteKey++; + + assert( pDb->eMode==HCT_MODE_NORMAL || pDb->eMode==HCT_MODE_ROLLBACK ); + + /* Check if any existing dirty pages need to be flushed to disk before + ** this key can be inserted. If they do, flush them. */ + assert( p->writepg.nPg==0 || iRoot==p->writecsr.iRoot ); + assert( p->writepg.nPg>0 || p->bAppend==0 ); + if( p->writepg.nPg ){ + assert( p->bDoCleanup ); + if( p->writepg.nPg>HCTDB_MAX_DIRTY + || p->discardpg.nPg>=HCTDB_MAX_DIRTY + || hctDbTestWriteFpKey(p, xCompare, pRec, iKey) + ){ + rc = hctDbInsertFlushWrite(pDb, p); + if( rc ) return rc; + p->nWriteKey = 1; + } + } + + p->bDoCleanup = 1; + rc = hctDbWriterGrow(p); + if( rc ) return rc; + + /* This block sets stack variables: + ** + ** op.iPg: Index of page in HctDbWriter.writepg.aPg[] to write to. + ** op.iInsert: The index of the new, overwritten, or deleted entry + ** within the page. + ** bClobber: True if this write clobbers (or deletes, if bDel) an + ** existing entry. + ** aTarget: The aNew[] buffer of the page that will be written. + ** + ** It also checks if the current key is a write-write conflict. And + ** returns early if so. + */ + if( p->bAppend ){ + assert( bClobber==0 ); + assert( p->writepg.nPg>0 ); + op.iPg = p->writepg.nPg-1; + aTarget = p->writepg.aPg[op.iPg].aNew; + op.iInsert = hctPagenentry(aTarget); + }else{ + /* If the page array is empty, seek the write cursor to find the leaf + ** page on which to insert this new entry or delete key. + ** + ** Otherwise, figure out which page in the HctDbWriter.aWritePg[] array the + ** new entry belongs on. */ + rc = hctDbInsertFindPosition(pDb, p, iRoot, pRec, iKey, &op, &bClobber); + if( rc ) return rc; + aTarget = p->writepg.aPg[op.iPg].aNew; + assert( aTarget ); + + /* If this is a write to a leaf page, and not part of a rollback, + ** check for a write-write conflict here. */ + if( 0==p->iHeight + && pDb->eMode==HCT_MODE_NORMAL + && (rc=hctDbWriteWriteConflict(pDb, p, &op, pRec, iKey, bClobber)) + ){ + return rc; + } + } + + if( bClobber==0 && bDel ){ + return SQLITE_OK; + } + + /* At this point, once the page that will be modified has been loaded + ** and marked as writable, if the operation is on an internal list: + ** + ** 1) For an insert, check if the child page has already been marked + ** as EVICTED by some other client. If so, return early. + ** + ** 2) For a delete, check that there is an entry to delete. And if so, + ** that the value of its child-page field matches iChildPg. If + ** not, return early. Note that the page marked as writable will + ** still be flushed to disk in this case - even though it may be + ** unmodified. + ** + ** This resolves a race condition that may occur if client B starts + ** removing page X from a list before client A has finished inserting + ** the corresponding entry into the parent list. Specifically: + ** + ** + when client A gets here, if the EVICTED flag is not set on page X, + ** then client B will try to delete the corresponding entry from + ** the parent list at some point in the future. This will either + ** occur after client A has updated the list, in which case no + ** problem, or it will cause client A's attempt to flush the modified + ** page to disk to fail. Client A will retry, see the EVICTED flag + ** is set, and continue. + ** + ** + or, if EVICTED is set, then there is no point in writing the + ** entry into the parent list. + */ + assert( rc==SQLITE_OK ); + if( p->iHeight>0 ){ + if( bDel==0 && sqlite3HctFilePageIsEvicted(pDb->pFile, iChildPg) ){ + return SQLITE_OK; + } + if( bDel ){ + u32 iChild = hctDbGetChildPage(aTarget, op.iInsert); + if( iChild!=iChildPg ) return SQLITE_OK; + } + } + + /* Writes to an intkey internal node are handled separately. They are + ** different because they used fixed size key/data pairs. All other types + ** of page use variably sized key/data entries. */ + if( pRec==0 && p->iHeight>0 ){ + return hctDbInsertIntkeyNode( + pDb, p, op.iPg, op.iInsert, iKey, iChildPg, bClobber, bDel + ); + } + + if( p->iHeight>0 ){ + op.bFullDel = bDel; + } + + if( rc ){ + assert( !"is this really possible?" ); + return rc; + } + + /* If this is a clobber or delete operation and the entry being removed + ** has an overflow chain, add an entry to HctDbWriter.delOvfl. */ + if( bClobber ){ + hctDbRemoveOverflow(pDb, p, aTarget, op.iInsert); + } + + /* Populate the following variables: + ** + ** entryFlags + ** aEntry + ** nEntry + ** nEntrySize + ** + ** This block populates the above variables. It also inserts overflow pages. + */ + op.iIntkey = iKey; + if( op.bFullDel==0 ){ + + if( p->iHeight==0 && (bClobber || bDel) ){ + rc = hctDbFindOldPage(pDb, p, pRec, iKey, &op.iOldPg, &op.aOldPg); + if( rc!=SQLITE_OK ) goto insert_out; + assert( op.iOldPg!=0 ); + } + + if( bDel && p->iHeight==0 ){ + assert( bClobber ); + rc = hctDbDelete(pDb, p, pRec, &op); + aTarget = p->writepg.aPg[op.iPg].aNew; + assert_page_is_ok(aTarget, pDb->pgsz); + if( op.bFullDel ) bClobber = 0; + }else{ + HctDbCell cell; + int nLocal = 0; + memset(&cell, 0, sizeof(cell)); + + if( p->iHeight==0 ){ + + /* There should never be a rollback operation while migrating a + ** database. */ + assert( IS_HCT_MIGRATE(pDb)==0 || pDb->eMode!=HCT_MODE_ROLLBACK ); + + if( IS_HCT_MIGRATE(pDb)==0 ){ + cell.iTid = pDb->iTid; + if( pDb->eMode==HCT_MODE_ROLLBACK ){ + cell.iTid |= HCT_TID_ROLLBACK_OVERRIDE; + } + } + + if( bClobber ){ + u64 iOldRangeTid = hctDbGetRangeTidByIdx(pDb, aTarget, op.iInsert); + if( (iOldRangeTid & HCT_TID_MASK)>pDb->iTid ){ + cell.iRangeOld = hctDbMakeFollowPtr(&rc,pDb,iOldRangeTid,op.iOldPg); + cell.iRangeTid = iOldRangeTid; + }else{ + cell.iRangeTid = pDb->iTid; + cell.iRangeOld = op.iOldPg; + } + }else if( op.iInsert>0 ){ + HctDbCell prev; + hctDbCellGetByIdx(pDb, aTarget, op.iInsert-1, &prev); + cell.iRangeTid = prev.iRangeTid; + cell.iRangeOld = prev.iRangeOld; + assert( cell.iRangeTid==0 || cell.iRangeOld!=0 ); + } + } + rc = hctDbInsertOverflow( + pDb, p, aTarget, nData, aData, &nLocal, &cell.iOvfl + ); + cell.aPayload = aData; + + op.aEntry = pDb->aTmp; + op.nEntry = hctDbCellPut(op.aEntry, &cell, nLocal); + op.nEntrySize = nData; + op.entryFlags = hctDbCellToFlags(&cell); + } + + assert( rc!=SQLITE_OK || op.bFullDel || op.aEntry==pDb->aTmp ); + if( rc!=SQLITE_OK ) goto insert_out; + } + + assert( op.aEntry==0 || op.aEntry==pDb->aTmp ); + + /* There are now two choices - either the aTarget[] page can be updated + ** directly (if the new entry fits on the page), or the balance-tree() + ** routine runs to redistribute cells between aTarget[] and its peers, + ** writing the new entry at the same time. A balance is required if: + ** + ** 1) there is insufficient space in the free-gap for any new + ** cell and array entry, or + ** + ** 2) this is a full-delete of the fpkey of the page (iInsert==0), or + ** + ** 3) this operation would leave the page underfull, and it is not + ** the only page in its list. + */ + if( op.eBalance==BALANCE_NONE ){ + int szEntry = hctDbPageEntrySize(aTarget); + int nFree = hctDbFreebytes(aTarget); + int nReq = 0; + int nSpace = 0; /* Space freed by removing cell */ + + if( bClobber ){ + nSpace = hctDbPageRecordSize(aTarget, pDb->pgsz, op.iInsert); + nFree += szEntry; + nFree += nSpace; + } + + if( op.bFullDel==0 ){ + if( nSpace>=op.nEntry ) bUpdateInPlace = 1; + nFree -= op.nEntry; + nFree -= szEntry; + nReq = op.nEntry + (bClobber ? 0 : szEntry); + } + + /* If (a) this is a clobber operation, and (b) either the first + ** key on the page is being deleted or else the page will be less + ** than 1/3 full following the update, and (c) the page is not + ** the only page in its linked list, rebalance! */ + if( (bClobber || bDel) /* (a) */ + && ((op.iInsert==0 && op.bFullDel) || (nFree>(2*pDb->pgsz/3))) /* (b) */ + && (hctIsLeftmost(aTarget)==0 || hctPagePeer(aTarget)!=0) /* (c) */ + ){ + /* Target page will be underfull following this op. Rebalance! */ + op.eBalance = BALANCE_REQUIRED; + bUpdateInPlace = 0; + }else if( hctDbFreegap(aTarget)bAppend ){ + rc = hctDbBalanceAppend(pDb, p, &op); + }else if( IS_HCT_MIGRATE(pDb) && p->iHeight==0 ){ + rc = hctDbBalanceMigrate(pDb, p, &op); + }else{ + rc = hctDbBalance(pDb, p, &op, bClobber); + } + if( rc==SQLITE_OK ) assert_all_pages_ok(pDb, p); + aTarget = p->writepg.aPg[op.iPg].aNew; + }else if( bUpdateInPlace ){ + assert_page_is_ok(aTarget, pDb->pgsz); + hctDbClobberEntry(pDb, aTarget, &op); + assert_page_is_ok(aTarget, pDb->pgsz); + }else if( bClobber ){ + assert_page_is_ok(aTarget, pDb->pgsz); + hctDbRemoveCell(pDb, p, aTarget, op.iInsert); + assert_page_is_ok(aTarget, pDb->pgsz); + } + + /* Unless this is a full-delete operation, update rest of the aEntry[] + ** entry fields for the new cell. */ + if( rc==SQLITE_OK && op.bFullDel==0 ){ + int eType = hctPagetype(aTarget); + assert_page_is_ok(aTarget, pDb->pgsz); + assert( op.iInsert>=0 ); + + /* print_out_page("1", aTarget, pDb->pgsz); */ + if( bUpdateInPlace==0 ){ + hctDbInsertEntry(pDb, aTarget, op.iInsert, op.aEntry, op.nEntry); + } + + assert( (pRec==0)==(eType==HCT_PAGETYPE_INTKEY) ); + if( eType==HCT_PAGETYPE_INTKEY ){ + HctDbIntkeyEntry *pE = &((HctDbIntkeyLeaf*)aTarget)->aEntry[op.iInsert]; + pE->iKey = op.iIntkey; + pE->nSize = op.nEntrySize; + pE->flags = op.entryFlags; + }else if( p->iHeight==0 ){ + HctDbIndexEntry *pE = &((HctDbIndexLeaf*)aTarget)->aEntry[op.iInsert]; + pE->nSize = op.nEntrySize; + pE->flags = op.entryFlags; + }else{ + HctDbIndexNodeEntry *pE = &((HctDbIndexNode*)aTarget)->aEntry[op.iInsert]; + pE->nSize = op.nEntrySize; + pE->flags = op.entryFlags; + pE->iChildPg = iChildPg; + } + + /* print_out_page("2", aTarget, pDb->pgsz); */ + assert_page_is_ok(aTarget, pDb->pgsz); + } + + insert_out: + if( rc==SQLITE_OK ){ + assert_all_pages_ok(pDb, p); + assert_all_pages_nonempty(pDb, p); + } + return rc; +} + +SQLITE_PRIVATE int sqlite3HctDbInsert( + HctDatabase *pDb, /* Database to insert into or delete from */ + u32 iRoot, /* Root page of table to modify */ + UnpackedRecord *pRec, /* The key value for index tables */ + i64 iKey, /* For intkey tables, the key value */ + int bDel, /* True for a delete, false for insert */ + int nData, const u8 *aData, /* Record/key to insert */ + int *pnRetry /* OUT: number of operations to retry */ +){ + int rc = SQLITE_OK; + int nRecField = pRec ? pRec->nField : 0; + + /* If this operation is inserting an index entry, figure out how many of + ** the record fields to consider when determining if a potential write + ** collision is found in the data structure. */ + sqlite3HctDbRecordTrim(pRec); + +#if 0 + { + char *zText = sqlite3HctDbRecordToText(0, aData, nData); + sqlite3HctFileDebugPrint(pDb->pFile, + "%p: %s sqlite3HctDbInsert(bDel=%d, iKey=%lld, aData={%s}) iTid=%lld\n", + pDb, + (pDb->eMode==HCT_MODE_ROLLBACK ? "RB" : " "), + bDel, iKey, zText, (i64)pDb->iTid + ); + fflush(stdout); + } +#endif + + assert( pDb->eMode==HCT_MODE_NORMAL + || pDb->eMode==HCT_MODE_ROLLBACK + ); + if( pDb->eMode==HCT_MODE_ROLLBACK ){ + int op = 0; + + pDb->pa.bDoCleanup = 1; + if( pDb->rbackcsr.iRoot!=iRoot ){ + hctDbCsrInit(pDb, iRoot, 0, &pDb->rbackcsr); + if( pRec ){ + pDb->rbackcsr.pKeyInfo = sqlite3KeyInfoRef(pRec->pKeyInfo); + } + }else{ + hctDbCsrReset(&pDb->rbackcsr); + } + + rc = sqlite3HctDbCsrRollbackSeek(&pDb->rbackcsr, pRec, iKey, &op); + if( rc==SQLITE_OK ){ + if( op<0 ){ + bDel = 1; + aData = 0; + nData = 0; + }else if( op>0 ){ + rc = sqlite3HctDbCsrData(&pDb->rbackcsr, &nData, &aData); + bDel = 0; + }else{ + /* TODO: It would be nice to assert( op!=0 ) here, but this fails + ** if the original op being rolled back was a no-op delete. If + ** we could note these as they occur, we could bring a form + ** of this assert() back. */ + /* assert( op!=0 ); */ + goto insert_done; + } + } + } + + if( rc==SQLITE_OK ){ + rc = hctDbInsert(pDb, &pDb->pa, iRoot, pRec, iKey, 0, bDel, nData, aData); + if( rc!=SQLITE_OK ){ + hctDbWriterCleanup(pDb, &pDb->pa, 1); + } + } + if( rc==SQLITE_LOCKED || (rc&0xFF)==SQLITE_BUSY ){ + if( rc==SQLITE_LOCKED ){ + rc = SQLITE_OK; + pDb->nCasFail++; + } + *pnRetry = pDb->pa.nWriteKey; + pDb->pa.nWriteKey = 0; + }else{ + *pnRetry = 0; + } + + insert_done: + if( pRec ) pRec->nField = nRecField; + return rc; +} + +/* +** Start the write-phase of a transaction. +*/ +SQLITE_PRIVATE int sqlite3HctDbStartWrite(HctDatabase *p, u64 *piTid){ + int rc = SQLITE_OK; + HctTMapClient *pTMapClient = sqlite3HctFileTMapClient(p->pFile); + + assert( p->iTid==0 ); + assert( p->eMode==HCT_MODE_NORMAL ); + memset(&p->pa, 0, sizeof(p->pa)); + hctDbPageArrayReset(&p->pa.writepg); + hctDbPageArrayReset(&p->pa.discardpg); + + p->nWriteCount = sqlite3HctFileWriteCount(p->pFile); + p->iTid = sqlite3HctFileAllocateTransid(p->pFile); + rc = sqlite3HctTMapNewTID(pTMapClient, p->iTid, &p->pTmap); + *piTid = p->iTid; + return rc; +} + +SQLITE_PRIVATE i64 sqlite3HctDbTid(HctDatabase *p){ + return p->iTid; +} + +/* +** Set HctDatabase.iJrnlWriteCid. +*/ +SQLITE_PRIVATE void sqlite3HctDbJrnlWriteCid(HctDatabase *pDb, u64 iVal){ + pDb->iJrnlWriteCid = iVal; +} + +static u64 *hctDbFindTMapEntry(HctTMap *pTmap, u64 iTid){ + int iMap, iEntry; + assert( pTmap->iFirstTid<=iTid ); + assert( pTmap->iFirstTid+(pTmap->nMap*HCT_TMAP_PAGESIZE)>iTid ); + iMap = (iTid - pTmap->iFirstTid) / HCT_TMAP_PAGESIZE; + iEntry = (iTid - pTmap->iFirstTid) % HCT_TMAP_PAGESIZE; + + iEntry = HCT_TMAP_ENTRYSLOT(iEntry); + return &pTmap->aaMap[iMap][iEntry]; +} + +/* +** This is called once the current transaction has been completely +** written to disk and validated. The CID is passed as the second argument. +** Or, if the transaction was abandoned and rolled back, iCid is passed +** zero. +*/ +SQLITE_PRIVATE int sqlite3HctDbEndWrite(HctDatabase *p, u64 iCid, int bRollback){ + int rc = SQLITE_OK; + u64 *pEntry = hctDbFindTMapEntry(p->pTmap, p->iTid); + + assert( p->eMode==HCT_MODE_NORMAL ); + assert( p->pa.writepg.nPg==0 ); + + HctAtomicStore(pEntry, iCid|(bRollback?HCT_TMAP_ROLLBACK:HCT_TMAP_COMMITTED)); + p->iTid = 0; + return rc; +} + +static void hctDbFreeCsrList(HctDbCsr *pList){ + HctDbCsr *pNext = pList; + while( pNext ){ + HctDbCsr *pDel = pNext; + pNext = pNext->pNextScanner; + hctDbFreeCsr(pDel); + } +} + +SQLITE_PRIVATE int sqlite3HctDbEndRead(HctDatabase *pDb){ + HctTMapClient *pTMapClient = sqlite3HctFileTMapClient(pDb->pFile); + // assert( (pDb->iSnapshotId==0)==(pDb->pTmap==0) ); + hctDbFreeCsrList(pDb->pScannerList); + pDb->pScannerList = 0; + if( pDb->pTmap ){ + sqlite3HctTMapEnd(pTMapClient, pDb->iSnapshotId); + pDb->pTmap = 0; + pDb->iSnapshotId = 0; + pDb->bConcurrent = 0; + } + return SQLITE_OK; +} + +/* +** If recovery is still required, this function grabs the file-server +** mutex and returns non-zero. Or, if recovery is not required, returns +** zero without grabbing the mutex. +*/ +SQLITE_PRIVATE int sqlite3HctDbStartRecovery(HctDatabase *pDb, int iStage){ + assert( iStage==0 || iStage==1 ); + assert( pDb->eMode==HCT_MODE_NORMAL ); + if( sqlite3HctFileStartRecovery(pDb->pFile, iStage) ){ + memset(&pDb->pa, 0, sizeof(pDb->pa)); + hctDbPageArrayReset(&pDb->pa.writepg); + hctDbPageArrayReset(&pDb->pa.discardpg); + pDb->eMode = HCT_MODE_ROLLBACK; + + /* During recovery the connection should read the latest version of + ** the db - no exceptions. Set these two to the largest possible + ** values to ensure that this happens. */ + pDb->iSnapshotId = LARGEST_TID-1; + pDb->iLocalMinTid = LARGEST_TID-1; + } + return (pDb->eMode==HCT_MODE_ROLLBACK); +} + +SQLITE_PRIVATE void sqlite3HctDbRecoverTid(HctDatabase *pDb, u64 iTid){ + pDb->iTid = iTid; + pDb->iLocalMinTid = iTid ? iTid-1 : 0; +} + +SQLITE_PRIVATE int sqlite3HctDbFinishRecovery(HctDatabase *pDb, int iStage, int rc){ + /* assert( pDb->eMode==HCT_MODE_ROLLBACK ); */ + assert( iStage==0 || iStage==1 ); + assert( pDb->iSnapshotId>0 ); + + pDb->iTid = 0; + pDb->eMode = HCT_MODE_NORMAL; + pDb->iSnapshotId = 0; + pDb->iLocalMinTid = 0; + return sqlite3HctFileFinishRecovery(pDb->pFile, iStage, rc); +} + +/* +** Open a cursor. +*/ +SQLITE_PRIVATE int sqlite3HctDbCsrOpen( + HctDatabase *pDb, + KeyInfo *pKeyInfo, + u32 iRoot, + HctDbCsr **ppCsr +){ + int rc = SQLITE_OK; + HctDbCsr *p; + + assert( pDb->iSnapshotId!=0 ); + + /* Search for an existing cursor that can be reused. */ + HctDbCsr **pp; + for(pp=&pDb->pScannerList; *pp; pp=&(*pp)->pNextScanner){ + if( (*pp)->iRoot==iRoot ){ + *ppCsr = *pp; + *pp = (*pp)->pNextScanner; + return SQLITE_OK; + } + } + + /* If no existing cursor was found, allocate a new one */ + p = (HctDbCsr*)sqlite3MallocZero(sizeof(HctDbCsr)); + if( p==0 ){ + rc = SQLITE_NOMEM_BKPT; + }else{ + p->pDb = pDb; + p->iRoot = iRoot; + p->iCell = -1; + p->pKeyInfo = pKeyInfo; + sqlite3KeyInfoRef(pKeyInfo); + } + *ppCsr = p; + return rc; +} + +/* +** Set the "no-snapshot" flag on the cursor passed as the first argument. +*/ +SQLITE_PRIVATE void sqlite3HctDbCsrNosnap(HctDbCsr *pCsr, int bNosnap){ + if( pCsr ) pCsr->bNosnap = bNosnap; +} + +/* +** Close a cursor opened with sqlite3HctDbCsrOpen(). +*/ +SQLITE_PRIVATE void sqlite3HctDbCsrClose(HctDbCsr *pCsr){ + if( pCsr ){ + HctDatabase *pDb = pCsr->pDb; + hctDbCsrScanFinish(pCsr); + hctDbCsrReset(pCsr); + if( pDb->bConcurrent && pDb->iTid==0 ){ + pCsr->pNextScanner = pDb->pScannerList; + pDb->pScannerList = pCsr; + }else{ + hctDbFreeCsr(pCsr); + } + } +} + +/* +** The cursor passed as the first argument must be open on an intkey +** table and pointed at a valid entry. This function sets output variable +** (*piKey) to the integer key value associated with that entry before +** returning. +*/ +SQLITE_PRIVATE void sqlite3HctDbCsrKey(HctDbCsr *pCsr, i64 *piKey){ + int iCell = 0; + const u8 *aPg = 0; + + aPg = hctDbCsrPageAndCell(pCsr, &iCell); + *piKey = hctDbGetIntkey(aPg, iCell); +} + +/* +** Return true if the cursor is at EOF. Otherwise false. +*/ +SQLITE_PRIVATE int sqlite3HctDbCsrEof(HctDbCsr *pCsr){ + return pCsr==0 || pCsr->iCell<0; +} + +/* +** Set the cursor to point to the first entry in its table. If it is +** stepped, this cursor will be stepped with sqlite3HctDbCsrNext(). +*/ +SQLITE_PRIVATE int sqlite3HctDbCsrFirst(HctDbCsr *pCsr){ + int rc = SQLITE_OK; + + rc = hctDbCsrScanFinish(pCsr); + if( rc==SQLITE_OK ){ + hctDbCsrReset(pCsr); + pCsr->eDir = BTREE_DIR_FORWARD; + rc = hctDbCsrScanStart(pCsr, 0, SMALLEST_INT64); + } + pCsr->eDir = BTREE_DIR_FORWARD; + + if( rc==SQLITE_OK ){ + rc = hctDbCsrFirstValid(pCsr); + } + + return rc; +} + +/* +** Set the cursor to point to the last entry in its table. If it is +** stepped, this cursor will be stepped with sqlite3HctDbCsrPrev(). +*/ +SQLITE_PRIVATE int sqlite3HctDbCsrLast(HctDbCsr *pCsr){ + int rc = SQLITE_OK; + HctFile *pFile = pCsr->pDb->pFile; + u32 iPg = pCsr->iRoot; + HctDbPageHdr *pPg = 0; + HctFilePage pg; + + rc = hctDbCsrScanFinish(pCsr); + if( rc==SQLITE_OK ){ + hctDbCsrReset(pCsr); + pCsr->eDir = BTREE_DIR_REVERSE; + rc = hctDbCsrScanStart(pCsr, 0, LARGEST_INT64); + } + + /* Find the last page in the leaf page list. */ + while( 1 ){ + rc = sqlite3HctFilePageGet(pFile, iPg, &pg); + if( rc!=SQLITE_OK ) break; + + pPg = (HctDbPageHdr*)pg.aOld; + if( pPg->iPeerPg ){ + iPg = pPg->iPeerPg; + }else if( pPg->nHeight==0 ){ + break; + }else if( hctPagetype(pPg)==HCT_PAGETYPE_INTKEY ){ + HctDbIntkeyNode *pNode = (HctDbIntkeyNode*)pPg; + iPg = pNode->aEntry[pPg->nEntry-1].iChildPg; + }else{ + HctDbIndexNode *pNode = (HctDbIndexNode*)pPg; + iPg = pNode->aEntry[pPg->nEntry-1].iChildPg; + } + sqlite3HctFilePageRelease(&pg); + } + + /* Set the cursor to point to one position past the last entry on the + ** page located above. Then call sqlite3HctDbCsrPrev() to step back to + ** the first entry visible to the current transaction. */ + if( rc==SQLITE_OK ){ + assert( pPg->nHeight==0 && pPg->iPeerPg==0 ); + hctMemcpy(&pCsr->pg, &pg, sizeof(pg)); + if( pPg->nEntry==0 ){ + pCsr->iCell = -1; + }else{ + pCsr->iCell = pPg->nEntry; + rc = sqlite3HctDbCsrPrev(pCsr); + } + } + + return rc; +} + +/* +** Load the key associated with cell iCell1 on page aPg1[] and compare +** it to pKey2. Return an integer less than, equal to or greater than +** zero if the loaded key is less than, equal to or greater than pKey2, +** respectively. i.e. +** +** ret = key(aPg1, iCell1) - (*pKey2) +*/ +static int hctDbCompareCellKey( + int *pRc, + HctDatabase *pDb, + const u8 *aPg1, + int iCell1, + HctDbKey *pKey2 +){ + int ret = 0; + if( *pRc==SQLITE_OK ){ + + assert( hctPagetype(aPg1)==HCT_PAGETYPE_INTKEY + || hctPagetype(aPg1)==HCT_PAGETYPE_INDEX + ); + if( hctPagetype(aPg1)==HCT_PAGETYPE_INTKEY ){ + i64 iKey = hctDbGetIntkey(aPg1, iCell1); + if( iKeyiKey ){ + ret = -1; + }else if( iKey>pKey2->iKey ){ + ret = +1; + } + }else if( pKey2->pKey==0 ){ + ret = -1; + }else{ + int nRec = 0; + const u8 *aRec = 0; + HctBuffer buf = {0,0,0}; + int rc = hctDbLoadRecord(pDb, &buf, aPg1, iCell1, &nRec, &aRec); + if( rc!=SQLITE_OK ){ + *pRc = rc; + }else{ + ret = sqlite3VdbeRecordCompare(nRec, aRec, pKey2->pKey); + } + sqlite3HctBufferFree(&buf); + } + } + + return ret; +} + + +static int hctDbCsrNext(HctDbCsr *pCsr){ + HctDatabase *pDb = pCsr->pDb; + HctDbPageHdr *pPg = 0; + int rc = SQLITE_OK; + + /* Check if the current cell, be it on the linked list of leaves, or + ** on a history page, has an old-data pointer that should be followed. + ** + ** Except, don't do this if pCsr->iCell is less than zero. In that + ** case this call is supposed to jump to the first cell on the main + ** page. */ + if( pCsr->iCell>=0 ){ + do { + int bMerge = 0; + HctRangePtr ptr; + + hctDbCsrGetRange(pCsr, &ptr); + if( hctDbFollowRangeOld(pDb, &ptr, &bMerge) ){ + hctDbCsrDescendRange(&rc, pCsr, ptr.iRangeTid, ptr.iOld, bMerge); + if( rc==SQLITE_OK ){ + HctDbRangeCsr *p = &pCsr->aRange[pCsr->nRange-1]; + if( p->eRange==HCT_RANGE_FAN ){ + p->iCell = -1; + }else{ + int bExact = 0; + hctDbLeafSearch(pDb, + p->pg.aOld, p->lowkey.iKey, p->lowkey.pKey, &p->iCell, &bExact + ); + if( bExact==0 ) p->iCell--; + } + } + } + + while( pCsr->nRange ){ + HctDbRangeCsr *p = &pCsr->aRange[pCsr->nRange-1]; + + p->iCell++; + if( p->iCellpg.aOld) && ( + p->eRange==HCT_RANGE_FAN + || hctDbCompareCellKey(&rc, pDb, p->pg.aOld, p->iCell, &p->highkey)<0 + )){ + if( p->eRange==HCT_RANGE_MERGE ){ + return SQLITE_OK; + } + break; + } + sqlite3HctFilePageRelease(&p->pg); + hctDbCsrAscendRange(pCsr); + } + + }while( pCsr->nRange ); + + } + + pPg = (HctDbPageHdr*)pCsr->pg.aOld; + assert( pCsr->iCell>=-1 && pCsr->iCellnEntry ); + assert( pPg->nHeight==0 ); + + pCsr->iCell++; + if( pCsr->iCell==pPg->nEntry ){ + u32 iPeerPg = pPg->iPeerPg; + if( iPeerPg==0 ){ + /* Main cursor is now at EOF */ + pCsr->iCell = -1; + sqlite3HctFilePageRelease(&pCsr->pg); + }else{ + /* Jump to peer page */ + rc = sqlite3HctFilePageRelease(&pCsr->pg); + if( rc==SQLITE_OK ){ + rc = sqlite3HctFilePageGet(pDb->pFile, iPeerPg, &pCsr->pg); + pCsr->iCell = 0; + } + } + } + + return rc; +} + +static int hctDbCsrGoLeft(HctDbCsr *pCsr){ + int rc = SQLITE_OK; + int nHeight = ((HctDbPageHdr*)pCsr->pg.aOld)->nHeight; + + if( pCsr->pKeyInfo ){ + UnpackedRecord *pRec = 0; + rc = hctDbCsrLoadAndDecode(pCsr, 0, &pRec); + if( rc==SQLITE_OK ){ + int bDummy; + HctFilePage pg = pCsr->pg; + memset(&pCsr->pg, 0, sizeof(HctFilePage)); + pRec->default_rc = 1; + hctDbCsrSeek(pCsr, 0, nHeight, 0, pRec, 0, &bDummy); + pRec->default_rc = 0; + sqlite3HctFilePageRelease(&pg); + } + }else if( hctIsLeftmost(pCsr->pg.aOld)==0 ){ + i64 iKey = hctDbIntkeyFPKey(pCsr->pg.aOld); + sqlite3HctFilePageRelease(&pCsr->pg); + rc = hctDbCsrSeek(pCsr, 0, nHeight, 0, 0, iKey-1, 0); + } + + return rc; +} + +static int hctDbCsrPrev(HctDbCsr *pCsr){ + HctDatabase *pDb = pCsr->pDb; + int rc = SQLITE_OK; + /* Advance the cursor */ + + if( pCsr->nRange ){ + HctDbRangeCsr *pRange = &pCsr->aRange[pCsr->nRange-1]; + pRange->iCell--; + }else{ + pCsr->iCell--; + if( pCsr->iCell<0 ){ + rc = hctDbCsrGoLeft(pCsr); + } + } + + if( pCsr->iCell>=0 ){ + do { + HctRangePtr ptr; + int bMerge = 0; + + hctDbCsrGetRange(pCsr, &ptr); + if( hctDbFollowRangeOld(pDb, &ptr, &bMerge) ){ + do { + hctDbCsrDescendRange(&rc, pCsr, ptr.iRangeTid, ptr.iOld, bMerge); + memset(&ptr, 0, sizeof(ptr)); + if( rc==SQLITE_OK ){ + HctDbRangeCsr *p = &pCsr->aRange[pCsr->nRange-1]; + if( p->eRange==HCT_RANGE_FAN ){ + p->iCell = ((HctDbPageHdr*)p->pg.aOld)->nEntry-1; + }else{ + int bExact; + hctDbLeafSearch(pDb, p->pg.aOld, + p->highkey.iKey, p->highkey.pKey, &p->iCell, &bExact + ); + p->iCell--; + } + + if( p->iCell>=0 ){ + hctDbCsrGetRange(pCsr, &ptr); + } + } + }while( hctDbFollowRangeOld(pDb, &ptr, &bMerge) ); + } + + while( pCsr->nRange>0 ){ + HctDbRangeCsr *p = &pCsr->aRange[pCsr->nRange-1]; + if( p->iCell>=0 && ( + p->eRange==HCT_RANGE_FAN + || hctDbCompareCellKey(&rc, pDb, p->pg.aOld, p->iCell, &p->lowkey)>0 + )){ + if( p->eRange==HCT_RANGE_MERGE ){ + return SQLITE_OK; + } + p->iCell--; + break; + } + sqlite3HctFilePageRelease(&p->pg); + hctDbCsrAscendRange(pCsr); + } + }while( pCsr->nRange ); + } + + return rc; +} + +SQLITE_PRIVATE int sqlite3HctDbCsrNext(HctDbCsr *pCsr){ + int rc = SQLITE_OK; + + /* Should not be called while committing, validating or doing rollback. */ + assert( pCsr->pDb->iTid==0 && pCsr->pDb->eMode==HCT_MODE_NORMAL ); + + do { + rc = hctDbCsrNext(pCsr); + }while( rc==SQLITE_OK && pCsr->iCell>=0 && hctDbCurrentIsVisible(pCsr)==0 ); + return rc; +} + +SQLITE_PRIVATE int sqlite3HctDbCsrPrev(HctDbCsr *pCsr){ + int rc = SQLITE_OK; + + assert( pCsr->pDb->eMode==HCT_MODE_NORMAL ); + do { + rc = hctDbCsrPrev(pCsr); + }while( rc==SQLITE_OK && pCsr->iCell>=0 && hctDbCurrentIsVisible(pCsr)==0 ); + return rc; +} + +SQLITE_PRIVATE void sqlite3HctDbCsrClear(HctDbCsr *pCsr){ + hctDbCsrScanFinish(pCsr); + hctDbCsrReset(pCsr); +} + + +SQLITE_PRIVATE int sqlite3HctDbCsrData(HctDbCsr *pCsr, int *pnData, const u8 **paData){ + const u8 *pPg; + int iCell; + + pPg = hctDbCsrPageAndCell(pCsr, &iCell); + assert( hctPageheight(pPg)==0 ); + +#if 0 + if( pCsr->nRange ){ + printf("%p: data from range page %d (from %d) (snapshotid=%lld)\n", + pCsr->pDb, + (int)pCsr->aRange[pCsr->nRange-1].pg.iOldPg, + (int)pCsr->pg.iOldPg, pCsr->pDb->iSnapshotId + ); + }else{ + printf("%p: data from page %d (snapshotid=%lld)\n", + pCsr->pDb, + (int)pCsr->pg.iOldPg, pCsr->pDb->iSnapshotId + ); + } + fflush(stdout); +#endif + + return hctDbLoadRecord(pCsr->pDb, &pCsr->rec, pPg, iCell, pnData, paData); +} + +static int hctDbValidateEntry(HctDatabase *pDb, HctDbCsr *pCsr){ + int rc = SQLITE_OK; + u8 flags; + + if( pCsr->nRange ){ + /* If the current entry is on a history page, it is not valid (as + ** it has already been deleted). Later: unless of course it was this + ** transaction that deleted it! */ + if( pCsr->aRange[pCsr->nRange-1].iRangeTid!=pDb->iTid ){ + rc = HCT_SQLITE_BUSY; + } + }else{ + int iOff = hctDbCellOffset(pCsr->pg.aOld, pCsr->iCell, &flags); + if( flags & HCTDB_HAS_TID ){ + u64 iTid = hctGetU64(&pCsr->pg.aOld[iOff]); + if( hctDbTidIsConflict(pCsr->pDb, iTid) ){ + rc = HCT_SQLITE_BUSY; + } + } + } + return rc; +} + +static int hctDbValidateIntkey(HctDatabase *pDb, HctDbCsr *pCsr){ + int rc = SQLITE_OK; + HctCsrIntkeyOp *pOpList = pCsr->intkey.pOpList; + HctCsrIntkeyOp *pOp; + + pCsr->intkey.pOpList = 0; + assert( pCsr->intkey.pCurrentOp==0 ); + for(pOp=pOpList; pOp && rc==SQLITE_OK; pOp=pOp->pNextOp){ + int bDum = 0; + assert( pOp->iFirst<=pOp->iLast ); + + if( pOp->iLogical ){ + int bEvict = 0; + + /* If the physical page associated with the logical page containing + ** the current key has not changed, and the logical page has not been + ** evicted, then the current key itself may not have been modified. + ** Jump to the next iteration of the loop in this case. */ + u32 iPhys = sqlite3HctFilePageMapping(pDb->pFile, pOp->iLogical, &bEvict); + if( pOp->iPhysical==iPhys && bEvict==0 ) continue; + + /* Alternatively, if the logical page has not been evicted, load it + ** and seek to the desired key. If the key is found, or if it is not + ** found but the key would reside on the current page, then load + ** the page into the cursor. This is faster than the hctDbCsrSeek() + ** call below. */ + if( bEvict==0 && pOp->iLogical!=pCsr->iRoot ){ + rc = hctDbGetPhysical(pDb, iPhys, &pCsr->pg); + if( rc==SQLITE_OK ){ + pCsr->eDir = BTREE_DIR_FORWARD; + pCsr->iCell = hctDbIntkeyLeafSearch(pCsr->pg.aOld, pOp->iFirst,&bDum); + if( pCsr->iCell>=((HctDbIntkeyLeaf*)pCsr->pg.aOld)->pg.nEntry ){ + hctDbCsrReset(pCsr); + } + } + } + } + + if( pCsr->pg.aOld==0 ){ + if( pOp->iFirst==SMALLEST_INT64 ){ + pCsr->eDir = BTREE_DIR_FORWARD; + rc = hctDbCsrFirst(pCsr); + }else{ + if( pOp->iFirst==pOp->iLast ){ + pCsr->eDir = BTREE_DIR_NONE; + }else{ + pCsr->eDir = BTREE_DIR_FORWARD; + } + rc = hctDbCsrSeekAndDescend(pCsr, 0, pOp->iFirst, 0, &bDum); + } + } + + while( rc==SQLITE_OK && !sqlite3HctDbCsrEof(pCsr) ){ + i64 iKey = 0; + sqlite3HctDbCsrKey(pCsr, &iKey); + if( iKey>=pOp->iFirst && iKey<=pOp->iLast ){ + rc = hctDbValidateEntry(pDb, pCsr); + } + if( rc!=SQLITE_OK || iKey>=pOp->iLast ) break; + rc = hctDbCsrNext(pCsr); + } + hctDbCsrReset(pCsr); + } + assert( pCsr->intkey.pOpList==0 && pCsr->intkey.pCurrentOp==0 ); + pCsr->intkey.pOpList = pOpList; + + return rc; +} + +static int hctDbValidateIndex(HctDatabase *pDb, HctDbCsr *pCsr){ + int rc = SQLITE_OK; + HctCsrIndexOp *pOpList = pCsr->index.pOpList; + HctCsrIndexOp *pOp; + + pCsr->index.pOpList = 0; + assert( pCsr->index.pCurrentOp==0 ); + rc = hctDbCsrAllocateUnpacked(pCsr); + for(pOp=pOpList; pOp && rc==SQLITE_OK; pOp=pOp->pNextOp){ + UnpackedRecord *pRec = pCsr->pRec; + int bDummy = 0; + + if( pOp->iLogical + && pOp->iPhysical==sqlite3HctFilePageMapping(pDb->pFile, pOp->iLogical, &bDummy) + ){ + continue; + } + + hctDbCsrReset(pCsr); + pCsr->eDir = (pOp->pFirst==pOp->pLast) ? BTREE_DIR_NONE : BTREE_DIR_FORWARD; + if( pOp->pFirst==0 ){ + rc = hctDbCsrFirst(pCsr); + }else{ + int bExact = 0; + sqlite3VdbeRecordUnpack(pCsr->pKeyInfo, pOp->nFirst, pOp->pFirst, pRec); + rc = hctDbCsrSeek(pCsr, 0, 0, 0, pRec, 0, &bExact); + if( rc==SQLITE_OK && bExact==0 ){ + rc = hctDbCsrNext(pCsr); + } + } + if( pOp->pLast && pOp->pLast!=pOp->pFirst ){ + sqlite3VdbeRecordUnpack(pCsr->pKeyInfo, pOp->nLast, pOp->pLast, pRec); + }else{ + pRec = 0; + } + if( rc!=SQLITE_OK ) break; + + if( pOp->pLast==pOp->pFirst ){ + assert( !sqlite3HctDbCsrEof(pCsr) ); + rc = hctDbValidateEntry(pDb, pCsr); + }else{ + while( !sqlite3HctDbCsrEof(pCsr) ){ + int res = -1; + if( pRec ){ + const u8 *aKey = 0; + int nKey = 0; + rc = sqlite3HctDbCsrData(pCsr, &nKey, &aKey); + if( rc!=SQLITE_OK ) break; + res = sqlite3VdbeRecordCompare(nKey, aKey, pRec); + if( res<0 ) break; + } + rc = hctDbValidateEntry(pDb, pCsr); + if( res==0 || rc!=SQLITE_OK ) break; + rc = hctDbCsrNext(pCsr); + if( rc!=SQLITE_OK ) break; + } + } + } + + assert( pCsr->index.pOpList==0 && pCsr->index.pCurrentOp==0 ); + pCsr->index.pOpList = pOpList; + return rc; +} + +SQLITE_PRIVATE void sqlite3HctDbTMapScan(HctDatabase *pDb){ + sqlite3HctTMapScan(sqlite3HctFileTMapClient(pDb->pFile)); +} + +int +__attribute__ ((noinline)) +sqlite3HctDbValidate( + sqlite3 *db, + HctDatabase *pDb, + u64 *piCid, + int *pbTmapscan +){ + HctDbCsr *pCsr = 0; + u64 *pEntry = hctDbFindTMapEntry(pDb->pTmap, pDb->iTid); + u64 iCid = *piCid; + u64 nFinalWrite = 0; + int rc = SQLITE_OK; + int nPageScan = pDb->pConfig->nPageScan; + + /* Set nWrite to the number of pages written by this transaction. This + ** is used for scheduling tmap scans only, so it doesn't matter if it + ** is slightly inaccurate in some cases. */ + int nWrite = sqlite3HctFileWriteCount(pDb->pFile) - pDb->nWriteCount; + assert( nWrite>=0 ); + if( nWrite==0 ) nWrite = 1; + + assert( *pEntry==0 ); + if( iCid==0 ){ + HctAtomicStore(pEntry, HCT_TMAP_VALIDATING); + iCid = sqlite3HctFileAllocateCID(pDb->pFile, 1); + } + HctAtomicStore(pEntry, HCT_TMAP_VALIDATING | iCid); + + nFinalWrite = sqlite3HctFileIncrWriteCount(pDb->pFile, nWrite); + if( (nFinalWrite / nPageScan)!=((nFinalWrite-nWrite) / nPageScan) ){ + *pbTmapscan = 1; + } + + assert( pDb->eMode==HCT_MODE_NORMAL ); + + /* Invoke the SQLITE_TESTCTRL_HCT_MTCOMMIT hook, if applicable */ + if( db->xMtCommit ) db->xMtCommit(db->pMtCommitCtx, 2); + + /* If iCid is one more than pDb->iSnapshotId, then this transaction is + ** being applied against the snapshot that it was run against. In this + ** case we can skip validation entirely. */ + if( iCid!=pDb->iSnapshotId+1 ){ + if( pDb->bConcurrent ){ + pDb->eMode = HCT_MODE_VALIDATE; + if( hctDbValidateMeta(pDb) ){ + rc = HCT_SQLITE_BUSY; + }else{ + for(pCsr=pDb->pScannerList; pCsr; pCsr=pCsr->pNextScanner){ + if( pCsr->pKeyInfo==0 ){ + rc = hctDbValidateIntkey(pDb, pCsr); + }else{ + rc = hctDbValidateIndex(pDb, pCsr); + } + if( rc ) break; + } + } + pDb->eMode = HCT_MODE_NORMAL; + }else{ + rc = HCT_SQLITE_BUSY; + } + } + + *piCid = iCid; + return rc; +} + +/************************************************************************* +************************************************************************** +** Start of integrity-check implementation. +** +** The code here assumes that the database is quiescent. If it is invoked +** concurrently with database writers, false-positive errors may be reported. +*/ + +/* +** Walk the tree structure with logical root page iRoot, visiting every +** page and overflow page currently linked in. +** +** For each page in the tree, the supplied callback is invoked. The first +** argument passed to the callback is a copy of the fourth argument to +** this function. The second and third arguments are the logical and +** physical page number, respectively. If there is no logical page number, +** as for overflow pages, the second parameter is passed zero. +** +** It (presumably) makes little sense to call this function without +** somehow guaranteeing that the tree is not being currently written to. +*/ +SQLITE_PRIVATE int sqlite3HctDbWalkTree( + HctFile *pFile, /* File tree resides in */ + u32 iRoot, /* Root page of tree */ + int (*x)(void*, u32, u32), /* Callback function */ + void *pCtx /* First argument to pass to x() */ +){ + int rc = SQLITE_OK; + u32 pgno = iRoot; + + u32 iPhys = 0; + int dummy = 0; + + /* Special case - the root page is not mapped to any physical page. */ + iPhys = sqlite3HctFilePageMapping(pFile, iRoot, &dummy); + if( iPhys==0 ){ + return x(pCtx, iRoot, 0); + } + + /* This outer loop runs once for each list in the tree structure - once + ** for the list of leaves, once for the list of parent, and so on. + ** Starting from the root page and descending towards the leaves. */ + do { + HctFilePage pg; + int nHeight = 0; + int eType = 0; + u32 pgnoChild = 0; + + /* Load up page pgno - the leftmost of its list. Then, unless this + ** is the list of leaves, set pgnoChild to the leftmost child of + ** the page. Or, if this is a list of leaves, leave pgnoChild set + ** to zero. */ + rc = sqlite3HctFilePageGet(pFile, pgno, &pg); + if( rc!=SQLITE_OK ){ + break; + }else{ + nHeight = hctPageheight(pg.aOld); + eType = hctPagetype(pg.aOld); + if( eType!=HCT_PAGETYPE_INTKEY && eType!=HCT_PAGETYPE_INDEX ){ + rc = SQLITE_CORRUPT_BKPT; + break; + } + else if( nHeight>0 ){ + if( eType==HCT_PAGETYPE_INTKEY ){ + pgnoChild = ((HctDbIntkeyNode*)pg.aOld)->aEntry[0].iChildPg; + }else{ + pgnoChild = ((HctDbIndexNode*)pg.aOld)->aEntry[0].iChildPg; + } + } + } + + while( pg.aOld ){ + u32 iPeerPg = ((HctDbPageHdr*)pg.aOld)->iPeerPg; + u32 iLogic = pg.iPg; + u32 iPhys = pg.iOldPg; + + rc = x(pCtx, iLogic, iPhys); + if( rc!=SQLITE_OK ) break; + + if( nHeight==0 || eType==HCT_PAGETYPE_INDEX ){ + int iCell = 0; + int nEntry = ((HctDbPageHdr*)pg.aOld)->nEntry; + for(iCell=0; iCelliPeerPg; + sqlite3HctFilePageRelease(&ov); + } + } + } + } + + sqlite3HctFilePageRelease(&pg); + if( iPeerPg ){ + rc = sqlite3HctFilePageGet(pFile, iPeerPg, &pg); + if( rc!=SQLITE_OK ) break; + } + } + + pgno = pgnoChild; + }while( rc==SQLITE_OK && pgno!=0 ); + + return rc; +} + +typedef struct IntCheckCtx IntCheckCtx; +struct IntCheckCtx { + u32 nLogic; /* Number of logical pages in db */ + u32 nPhys; /* Number of physical pages in db */ + u8 *aLogic; + u8 *aPhys; + int nErr; + int nMaxErr; + char *zErr; + i64 nEntry; /* Number of entries in table */ +}; + +static void hctDbICError( + IntCheckCtx *p, + char *zFmt, + ... +){ + va_list ap; + char *zErr; + va_start(ap, zFmt); + zErr = sqlite3_vmprintf(zFmt, ap); + p->zErr = sqlite3_mprintf("%z%s%z", p->zErr, (p->zErr ? "\n" : ""), zErr); + p->nErr++; + va_end(ap); +} + +static int hctDbIntegrityCheckCb( + void *pCtx, + u32 iLogic, + u32 iPhys +){ + IntCheckCtx *p = (IntCheckCtx*)pCtx; + if( iLogic ){ + if( p->aLogic[iLogic-1] ){ + hctDbICError(p, "multiple refs to logical page %d", (int)iLogic); + } + p->aLogic[iLogic-1] = 1; + } + if( iPhys ){ + if( p->aPhys[iPhys-1] ){ + hctDbICError(p, "multiple refs to physical page %d", (int)iPhys); + } + p->aPhys[iPhys-1] = 1; + } + + return (p->nErr>=p->nMaxErr) ? -1 : 0; +} + + +SQLITE_PRIVATE char *sqlite3HctDbIntegrityCheck( + HctDatabase *pDb, + u32 *aRoot, + Mem *aCnt, + int nRoot, + int *pnErr +){ + HctFile *pFile = pDb->pFile; + IntCheckCtx c; + u32 *aFileRoot = 0; + int nFileRoot = 0; + + int rc = sqlite3HctFileRootArray(pFile, &aFileRoot, &nFileRoot); + memset(&c, 0, sizeof(c)); + if( rc==SQLITE_OK ){ + c.nErr = *pnErr; + c.nMaxErr = 100; + sqlite3HctFileICArrays(pFile, &c.aLogic, &c.nLogic, &c.aPhys, &c.nPhys); + } + if( !c.aLogic ){ + c.nErr++; + }else{ + int ii; + + for(ii=0; c.nErr==0 && iistats.nBalanceIntkey; + break; + case 1: + *pzStat = "balance_index"; + iVal = pDb->stats.nBalanceIndex; + break; + case 2: + *pzStat = "balance_single"; + iVal = pDb->stats.nBalanceSingle; + break; + case 3: + *pzStat = "tmap_lookup"; + iVal = pDb->stats.nTMapLookup; + break; + case 4: + *pzStat = "update_in_place"; + iVal = pDb->stats.nUpdateInPlace; + break; + case 5: + *pzStat = "internal_retry"; + iVal = pDb->stats.nInternalRetry; + break; + default: + break; + } + + return iVal; +} + +/************************************************************************* +************************************************************************** +** Below are the virtual table implementations. These are debugging +** aids only. +*/ + +typedef struct hctdb_vtab hctdb_vtab; +struct hctdb_vtab { + sqlite3_vtab base; /* Base class - must be first */ + sqlite3 *db; +}; + +/* templatevtab_cursor is a subclass of sqlite3_vtab_cursor which will +** serve as the underlying representation of a cursor that scans +** over rows of the result +*/ +typedef struct hctdb_cursor hctdb_cursor; +struct hctdb_cursor { + sqlite3_vtab_cursor base; /* Base class - must be first */ + HctDatabase *pDb; /* Database to report on */ + u64 iMaxPgno; /* Maximum page number for this scan */ + + u64 pgno; /* The page-number/rowid value */ + const char *zPgtype; + u32 iPeerPg; + u32 nEntry; + u32 nHeight; + u32 nFree; + char *zFpKey; +}; + +/* +** The hctdbConnect() method is invoked to create a new +** template virtual table. +** +** Think of this routine as the constructor for hctdb_vtab objects. +** +** All this routine needs to do is: +** +** (1) Allocate the hctdb_vtab object and initialize all fields. +** +** (2) Tell SQLite (via the sqlite3_declare_vtab() interface) what the +** result set of queries against the virtual table will look like. +*/ +static int hctdbConnect( + sqlite3 *db, + void *pAux, + int argc, const char *const*argv, + sqlite3_vtab **ppVtab, + char **pzErr +){ + hctdb_vtab *pNew; + int rc; + + rc = sqlite3_declare_vtab(db, + "CREATE TABLE x(" + "pgno INTEGER, pgtype TEXT, nheight INTEGER, " + "peer INTEGER, nentry INTEGER, nfree INTEGER, fpkey TEXT" + ")" + ); + + if( rc==SQLITE_OK ){ + pNew = sqlite3MallocZero( sizeof(*pNew) ); + *ppVtab = (sqlite3_vtab*)pNew; + if( pNew==0 ) return SQLITE_NOMEM; + pNew->db = db; + } + return rc; +} + +/* +** This method is the destructor for hctdb_vtab objects. +*/ +static int hctdbDisconnect(sqlite3_vtab *pVtab){ + hctdb_vtab *p = (hctdb_vtab*)pVtab; + sqlite3_free(p); + return SQLITE_OK; +} + +/* +** Constructor for a new hctdb_cursor object. +*/ +static int hctdbOpen(sqlite3_vtab *p, sqlite3_vtab_cursor **ppCursor){ + hctdb_cursor *pCur; + pCur = sqlite3MallocZero(sizeof(*pCur)); + if( pCur==0 ) return SQLITE_NOMEM; + *ppCursor = &pCur->base; + return SQLITE_OK; +} + +/* +** Destructor for a hctdb_cursor. +*/ +static int hctdbClose(sqlite3_vtab_cursor *cur){ + hctdb_cursor *pCur = (hctdb_cursor*)cur; + sqlite3_free(pCur->zFpKey); + sqlite3_free(pCur); + return SQLITE_OK; +} + +static char *hex_encode(const u8 *aIn, int nIn){ + char *zRet = sqlite3MallocZero(nIn*2+1); + if( zRet ){ + static const char aDigit[] = "0123456789ABCDEF"; + int i; + for(i=0; i> 4) ]; + zRet[i*2+1] = aDigit[ (aIn[i] & 0xF) ]; + } + } + return zRet; +} + + +SQLITE_PRIVATE char *sqlite3HctDbRecordToText(sqlite3 *db, const u8 *aRec, int nRec){ + char *zRet = 0; + const char *zSep = ""; + const u8 *pEndHdr; /* Points to one byte past record header */ + const u8 *pHdr; /* Current point in record header */ + const u8 *pBody; /* Current point in record data */ + u64 nHdr; /* Bytes in record header */ + + if( nRec==0 ){ + return sqlite3_mprintf(""); + } + + pHdr = aRec + sqlite3GetVarint(aRec, &nHdr); + pBody = pEndHdr = &aRec[nHdr]; + while( pHdrbase.pVtab)->db; + int eType; + + assert( 0==sqlite3_stricmp("intkey", azType[HCT_PAGETYPE_INTKEY]) ); + assert( 0==sqlite3_stricmp("index", azType[HCT_PAGETYPE_INDEX]) ); + assert( 0==sqlite3_stricmp("overflow", azType[HCT_PAGETYPE_OVERFLOW]) ); + + sqlite3_free(pCur->zFpKey); + pCur->zFpKey = 0; + + eType = hctPagetype(pHdr); + if( eTypezPgtype = azType[hctPagetype(pHdr)]; + }else{ + pCur->zPgtype = "!INVALID!"; + } + pCur->iPeerPg = pHdr->iPeerPg; + pCur->nEntry = pHdr->nEntry; + pCur->nHeight = pHdr->nHeight; + + if( eType==HCT_PAGETYPE_INTKEY ){ + if( pHdr->nHeight==0 ){ + HctDbIntkeyLeaf *pLeaf = (HctDbIntkeyLeaf*)aPg; + char *zFpKey = sqlite3_mprintf("%lld", pLeaf->aEntry[0].iKey); + if( zFpKey==0 ) rc = SQLITE_NOMEM_BKPT; + pCur->zFpKey = zFpKey; + pCur->nFree = (int)pLeaf->hdr.nFreeBytes; + }else{ + HctDbIntkeyNode *pNode = (HctDbIntkeyNode*)aPg; + char *zFpKey = sqlite3_mprintf("%lld", pNode->aEntry[0].iKey); + if( zFpKey==0 ) rc = SQLITE_NOMEM_BKPT; + pCur->zFpKey = zFpKey; + pCur->nFree = ( + hctDbMaxCellsPerIntkeyNode(pCur->pDb->pgsz) - pNode->pg.nEntry + ) * sizeof(HctDbIntkeyNodeEntry); + } + + }else if( eType==HCT_PAGETYPE_INDEX ){ + HctBuffer buf = {0,0,0}; + const u8 *aRec = 0; + int nRec = 0; + + rc = hctDbLoadRecord(pCur->pDb, &buf, aPg, 0, &nRec, &aRec); + if( rc==SQLITE_OK ){ + char *zFpKey = sqlite3HctDbRecordToText(db, aRec, nRec); + if( zFpKey==0 ) rc = SQLITE_NOMEM_BKPT; + pCur->zFpKey = zFpKey; + } + + pCur->nFree = (int)(((HctDbIndexNode*)pHdr)->hdr.nFreeBytes); + sqlite3HctBufferFree(&buf); + } + return rc; +} + +/* +** Return TRUE if the cursor has been moved off of the last +** row of output. +*/ +static int hctdbEof(sqlite3_vtab_cursor *cur){ + hctdb_cursor *pCur = (hctdb_cursor*)cur; + return pCur->pgno>pCur->iMaxPgno; +} + +/* +** Advance a hctdb_cursor to its next row of output. +*/ +static int hctdbNext(sqlite3_vtab_cursor *cur){ + hctdb_cursor *pCur = (hctdb_cursor*)cur; + int rc = SQLITE_OK; + HctFilePage pg; + + memset(&pg, 0, sizeof(pg)); + do { + sqlite3HctFilePageRelease(&pg); + pCur->pgno++; + if( hctdbEof(cur) ) return SQLITE_OK; + rc = sqlite3HctFilePageGetPhysical(pCur->pDb->pFile, pCur->pgno, &pg); + }while( rc==SQLITE_OK && pg.aOld==0 ); + + if( pg.aOld ){ + rc = hctdbLoadPage(pCur, pg.aOld); + } + return rc; +} + +/* +** Return values of columns for the row at which the hctdb_cursor +** is currently pointing. +*/ +static int hctdbColumn( + sqlite3_vtab_cursor *cur, /* The cursor */ + sqlite3_context *ctx, /* First argument to sqlite3_result_...() */ + int i /* Which column to return */ +){ + hctdb_cursor *pCur = (hctdb_cursor*)cur; + assert( i>=0 && i<=5 ); + switch( i ){ + case 0: /* pgno */ + sqlite3_result_int64(ctx, (i64)pCur->pgno); + break; + case 1: /* pgtype */ + sqlite3_result_text(ctx, pCur->zPgtype, -1, SQLITE_TRANSIENT); + break; + case 2: /* nHeight */ + sqlite3_result_int64(ctx, (i64)pCur->nHeight); + break; + case 3: /* peer */ + sqlite3_result_int64(ctx, (i64)pCur->iPeerPg); + break; + case 4: /* nEntry */ + sqlite3_result_int64(ctx, (i64)pCur->nEntry); + break; + case 5: /* nfree */ + sqlite3_result_int64(ctx, (i64)pCur->nFree); + break; + case 6: /* fpkey */ + sqlite3_result_text(ctx, pCur->zFpKey, -1, SQLITE_TRANSIENT); + break; + } + return SQLITE_OK; +} + +/* +** Return the rowid for the current row. In this implementation, the +** rowid is the same as the output value. +*/ +static int hctdbRowid(sqlite3_vtab_cursor *cur, sqlite_int64 *pRowid){ + hctdb_cursor *pCur = (hctdb_cursor*)cur; + *pRowid = pCur->pgno; + return SQLITE_OK; +} + +/* +** This method is called to "rewind" the hctdb_cursor object back +** to the first row of output. This method is always called at least +** once prior to any call to hctdbColumn() or hctdbRowid() or +** hctdbEof(). +*/ +static int hctdbFilter( + sqlite3_vtab_cursor *pVtabCursor, + int idxNum, const char *idxStr, + int argc, sqlite3_value **argv +){ + hctdb_cursor *pCur = (hctdb_cursor*)pVtabCursor; + hctdb_vtab *pTab = (hctdb_vtab*)(pCur->base.pVtab); + + pCur->pDb = sqlite3HctDbFind(pTab->db, 0); + if( argc==1 ){ + u32 iVal = (u32)sqlite3_value_int64(argv[0]); + pCur->iMaxPgno = iVal; + pCur->pgno = iVal-1; + }else{ + pCur->pgno = 0; + pCur->iMaxPgno = sqlite3HctFileMaxpage(pCur->pDb->pFile); + } + return hctdbNext(pVtabCursor); +} + +/* +** SQLite will invoke this method one or more times while planning a query +** that uses the virtual table. This routine needs to create +** a query plan for each invocation and compute an estimated cost for that +** plan. +*/ +static int hctdbBestIndex( + sqlite3_vtab *tab, + sqlite3_index_info *pIdxInfo +){ + int i; + pIdxInfo->estimatedCost = (double)10000; + pIdxInfo->estimatedRows = 10000; + + for(i=0; inConstraint; i++){ + struct sqlite3_index_constraint *p = &pIdxInfo->aConstraint[i]; + if( p->iColumn!=0 ) continue; + if( p->op!=SQLITE_INDEX_CONSTRAINT_EQ ) continue; + if( !p->usable ) continue; + pIdxInfo->aConstraintUsage[i].argvIndex = 1; + pIdxInfo->idxNum = 1; + pIdxInfo->estimatedCost = (double)10; + pIdxInfo->estimatedRows = 10; + break; + } + + return SQLITE_OK; +} + +typedef struct hctentry_vtab hctentry_vtab; +struct hctentry_vtab { + sqlite3_vtab base; /* Base class - must be first */ + sqlite3 *db; +}; + +/* templatevtab_cursor is a subclass of sqlite3_vtab_cursor which will +** serve as the underlying representation of a cursor that scans +** over rows of the result +*/ +typedef struct hctentry_cursor hctentry_cursor; +struct hctentry_cursor { + sqlite3_vtab_cursor base; /* Base class - must be first */ + HctDatabase *pDb; /* Database to report on */ + int iEntry; + HctFilePage pg; + u32 iPg; /* Current physical page number */ + u32 iLastPg; /* Last physical page to report on */ +}; + +/* +** The hctentryConnect() method is invoked to create a new +** template virtual table. +** +** Think of this routine as the constructor for hctentry_vtab objects. +** +** All this routine needs to do is: +** +** (1) Allocate the hctentry_vtab object and initialize all fields. +** +** (2) Tell SQLite (via the sqlite3_declare_vtab() interface) what the +** result set of queries against the virtual table will look like. +*/ +static int hctentryConnect( + sqlite3 *db, + void *pAux, + int argc, const char *const*argv, + sqlite3_vtab **ppVtab, + char **pzErr +){ + hctentry_vtab *pNew; + int rc; + + rc = sqlite3_declare_vtab(db, + "CREATE TABLE x(" + "pgno INTEGER, entry INTEGER, " + "ikey INTEGER, size INTEGER, offset INTEGER, " + "child INTEGER, " + "tid INTEGER, rangetid INTEGER, " + /* "oldpg INTEGER, " */ + "rangeoldpg INTEGER, ovfl INTEGER, record TEXT" + ")" + ); + + if( rc==SQLITE_OK ){ + pNew = sqlite3MallocZero( sizeof(*pNew) ); + *ppVtab = (sqlite3_vtab*)pNew; + if( pNew==0 ) return SQLITE_NOMEM; + pNew->db = db; + } + return rc; +} + +/* +** This method is the destructor for hctentry_vtab objects. +*/ +static int hctentryDisconnect(sqlite3_vtab *pVtab){ + hctentry_vtab *p = (hctentry_vtab*)pVtab; + sqlite3_free(p); + return SQLITE_OK; +} + +/* +** Constructor for a new hctentry_cursor object. +*/ +static int hctentryOpen(sqlite3_vtab *p, sqlite3_vtab_cursor **ppCursor){ + hctentry_cursor *pCur; + pCur = sqlite3MallocZero(sizeof(*pCur)); + if( pCur==0 ) return SQLITE_NOMEM; + *ppCursor = &pCur->base; + return SQLITE_OK; +} + +/* +** Destructor for a hctentry_cursor. +*/ +static int hctentryClose(sqlite3_vtab_cursor *cur){ + hctentry_cursor *pCur = (hctentry_cursor*)cur; + sqlite3HctFilePageRelease(&pCur->pg); + sqlite3_free(pCur); + return SQLITE_OK; +} + +/* +** Return TRUE if the cursor has been moved off of the last +** row of output. +*/ +static int hctentryEof(sqlite3_vtab_cursor *cur){ + hctentry_cursor *pCur = (hctentry_cursor*)cur; + return pCur->pg.aOld==0; +} + +/* +** Advance a hctentry_cursor to its next row of output. +*/ +static int hctentryNext(sqlite3_vtab_cursor *cur){ + int rc = SQLITE_OK; + hctentry_cursor *pCur = (hctentry_cursor*)cur; + + while( rc==SQLITE_OK ){ + HctDbPageHdr *pPg = (HctDbPageHdr*)pCur->pg.aOld; + int eType = hctPagetype(pPg); + if( eType==HCT_PAGETYPE_INTKEY + || eType==HCT_PAGETYPE_INDEX + || eType==HCT_PAGETYPE_HISTORY + ){ + pCur->iEntry++; + if( pCur->iEntrynEntry ) break; + } + pCur->iEntry = -1; + pCur->iPg++; + sqlite3HctFilePageRelease(&pCur->pg); + if( pCur->iPg>pCur->iLastPg ) break; + rc = sqlite3HctFilePageGetPhysical(pCur->pDb->pFile, pCur->iPg, &pCur->pg); + } + + return rc; +} + +/* +** Return values of columns for the row at which the hctentry_cursor +** is currently pointing. +*/ +static int hctentryColumn( + sqlite3_vtab_cursor *cur, /* The cursor */ + sqlite3_context *ctx, /* First argument to sqlite3_result_...() */ + int i /* Which column to return */ +){ + hctentry_cursor *pCur = (hctentry_cursor*)cur; + int eType = hctPagetype(pCur->pg.aOld); + int nHeight = hctPageheight(pCur->pg.aOld); + + HctDbIntkeyEntry *pIntkey = 0; + HctDbIntkeyNodeEntry *pIntkeyNode = 0; + HctDbIndexEntry *pIndex = 0; + HctDbIndexNodeEntry *pIndexNode = 0; + HctDbHistoryFan *pFan = 0; + + switch( eType ){ + case HCT_PAGETYPE_INTKEY: + if( nHeight==0 ){ + pIntkey = &((HctDbIntkeyLeaf*)pCur->pg.aOld)->aEntry[pCur->iEntry]; + }else{ + pIntkeyNode = &((HctDbIntkeyNode*)pCur->pg.aOld)->aEntry[pCur->iEntry]; + } + break; + + case HCT_PAGETYPE_INDEX: + if( nHeight==0 ){ + pIndex = &((HctDbIndexLeaf*)pCur->pg.aOld)->aEntry[pCur->iEntry]; + }else{ + pIndexNode = &((HctDbIndexNode*)pCur->pg.aOld)->aEntry[pCur->iEntry]; + } + break; + + case HCT_PAGETYPE_HISTORY: + pFan = (HctDbHistoryFan*)pCur->pg.aOld; + break; + + } + + switch( i ){ + case 0: /* pgno */ + sqlite3_result_int64(ctx, (i64)pCur->iPg); + break; + case 1: /* iEntry */ + sqlite3_result_int64(ctx, (i64)pCur->iEntry); + break; + case 2: /* ikey */ + if( pIntkey ) sqlite3_result_int64(ctx, pIntkey->iKey); + if( pIntkeyNode ) sqlite3_result_int64(ctx, pIntkeyNode->iKey); + break; + case 3: /* size */ + if( pIntkey ) sqlite3_result_int64(ctx, pIntkey->nSize); + if( pIndex ) sqlite3_result_int64(ctx, pIndex->nSize); + if( pIndexNode ) sqlite3_result_int64(ctx, pIndexNode->nSize); + break; + case 4: /* offset */ + if( pIntkey ) sqlite3_result_int64(ctx, pIntkey->iOff); + if( pIndex ) sqlite3_result_int64(ctx, pIndex->iOff); + if( pIndexNode ) sqlite3_result_int64(ctx, pIndexNode->iOff); + break; + case 5: /* child */ + if( pIndexNode ) sqlite3_result_int64(ctx, pIndexNode->iChildPg); + if( pIntkeyNode ) sqlite3_result_int64(ctx, pIntkeyNode->iChildPg); + break; + + case 6: /* tid */ + case 7: /* rangetid */ + case 8: /* rangeoldpg */ + case 9: /* ovfl */ + if( pIntkey || pIndex || pIndexNode ){ + u8 *aPg = pCur->pg.aOld; + HctDbCell cell; + HctDbIndexEntry *p = hctDbEntryEntry(aPg, pCur->iEntry); + hctDbCellGet(pCur->pDb, &aPg[p->iOff], p->flags, &cell); + + if( i==6 && cell.iTid ){ + i64 iVal = (cell.iTid & HCT_TID_MASK); + if( cell.iTid & HCT_TID_ROLLBACK_OVERRIDE ) iVal = iVal*-1; + sqlite3_result_int64(ctx, iVal); + } + if( i==7 && cell.iRangeTid ){ + i64 iVal = (cell.iRangeTid & HCT_TID_MASK); + if( cell.iRangeTid & HCT_TID_ROLLBACK_OVERRIDE ) iVal = iVal*-1; + sqlite3_result_int64(ctx, iVal); + } + if( i==8 && cell.iRangeOld ){ + sqlite3_result_int64(ctx, (i64)cell.iRangeOld); + } + if( i==9 && cell.iOvfl ){ + sqlite3_result_int64(ctx, (i64)cell.iOvfl); + } + }else if( pFan ){ + if( i==7 ){ /* rangetid */ + u64 iVal = ((pCur->iEntry==0) ? pFan->iRangeTid0 : pFan->iRangeTid1); + if( iVal & HCT_TID_ROLLBACK_OVERRIDE ){ + sqlite3_result_int64(ctx, ((i64)(iVal & HCT_TID_MASK)) * -1); + }else{ + sqlite3_result_int64(ctx, (i64)iVal); + } + }else if( i==8 ){ /* rangeoldpg */ + u32 iRangeOldPg = + ((pCur->iEntry==0) ? pFan->pgOld0 : pFan->aPgOld1[pCur->iEntry-1]); + sqlite3_result_int64(ctx, (i64)iRangeOldPg); + } + } + break; + case 10: /* record */ + if( pIntkey || pIndex || pIndexNode ){ + sqlite3 *db = sqlite3_context_db_handle(ctx); + u8 *aPg = pCur->pg.aOld; + char *zRec; + int sz; + const u8 *aRec = 0; + HctBuffer buf = {0,0,0}; + + hctDbLoadRecord(pCur->pDb, &buf, aPg, pCur->iEntry, &sz, &aRec); + + zRec = sqlite3HctDbRecordToText(db, aRec, sz); + if( zRec ){ + sqlite3_result_text(ctx, zRec, -1, SQLITE_TRANSIENT); + sqlite3_free(zRec); + } + sqlite3HctBufferFree(&buf); + }else if( pFan ){ + char *zRec = sqlite3_mprintf("iSplit0=%d", pFan->iSplit0); + if( zRec ){ + sqlite3_result_text(ctx, zRec, -1, SQLITE_TRANSIENT); + sqlite3_free(zRec); + } + } + break; + } + + return SQLITE_OK; +} + +/* +** Return the rowid for the current row. In this implementation, the +** rowid is the same as the output value. +*/ +static int hctentryRowid(sqlite3_vtab_cursor *cur, sqlite_int64 *pRowid){ + hctentry_cursor *pCur = (hctentry_cursor*)cur; + *pRowid = (((i64)pCur->iPg) << 32) + pCur->iEntry; + return SQLITE_OK; +} + +/* +** This method is called to "rewind" the hctentry_cursor object back +** to the first row of output. This method is always called at least +** once prior to any call to hctentryColumn() or hctentryRowid() or +** hctentryEof(). +*/ +static int hctentryFilter( + sqlite3_vtab_cursor *pVtabCursor, + int idxNum, const char *idxStr, + int argc, sqlite3_value **argv +){ + int rc; + hctentry_cursor *pCur = (hctentry_cursor*)pVtabCursor; + hctentry_vtab *pTab = (hctentry_vtab*)(pCur->base.pVtab); + u32 iLastPg; + + pCur->pDb = sqlite3HctDbFind(pTab->db, 0); + pCur->iEntry = -1; + iLastPg = sqlite3HctFileMaxpage(pCur->pDb->pFile); + + if( idxNum==1 ){ + u32 iPg = (u32)sqlite3_value_int64(argv[0]); + assert( argc==1 ); + if( iPg<1 || iPg>iLastPg ) return SQLITE_OK; + pCur->iPg = pCur->iLastPg = iPg; + }else{ + pCur->iPg = 1; + pCur->iLastPg = iLastPg; + } + + rc = sqlite3HctFilePageGetPhysical(pCur->pDb->pFile, pCur->iPg, &pCur->pg); + if( rc!=SQLITE_OK ){ + return rc; + } + return hctentryNext(pVtabCursor); +} + +/* +** SQLite will invoke this method one or more times while planning a query +** that uses the virtual table. This routine needs to create +** a query plan for each invocation and compute an estimated cost for that +** plan. +*/ +static int hctentryBestIndex( + sqlite3_vtab *tab, + sqlite3_index_info *pIdxInfo +){ + int i; + int iPgnoEq = -1; + + pIdxInfo->estimatedCost = (double)1000000; + pIdxInfo->estimatedRows = 1000000; + + /* Search for a pgno=? constraint */ + for(i=0; inConstraint; i++){ + struct sqlite3_index_constraint *p = &pIdxInfo->aConstraint[i]; + if( p->usable && p->iColumn==0 && p->op==SQLITE_INDEX_CONSTRAINT_EQ ){ + iPgnoEq = i; + } + } + + if( iPgnoEq>=0 ){ + pIdxInfo->aConstraintUsage[iPgnoEq].argvIndex = 1; + pIdxInfo->idxNum = 1; + pIdxInfo->estimatedCost = (double)1000; + pIdxInfo->estimatedRows = 1000; + } + + return SQLITE_OK; +} + +typedef struct hctvalid_vtab hctvalid_vtab; +typedef struct hctvalid_cursor hctvalid_cursor; +struct hctvalid_vtab { + sqlite3_vtab base; /* Base class - must be first */ + sqlite3 *db; +}; +struct hctvalid_cursor { + sqlite3_vtab_cursor base; /* Base class - must be first */ + HctDatabase *pDb; /* Database to report on */ + int iEntry; /* Current entry (i.e. rowid) */ + + u32 rootpgno; /* Value of rootpgno column */ + char *zFirst; + char *zLast; + char *zPglist; +}; +static int hctvalidConnect( + sqlite3 *db, + void *pAux, + int argc, const char *const*argv, + sqlite3_vtab **ppVtab, + char **pzErr +){ + hctvalid_vtab *pNew = 0; + int rc = SQLITE_OK; + + *ppVtab = 0; + rc = sqlite3_declare_vtab(db, + "CREATE TABLE x(rootpgno, first, last, pglist)" + ); + + if( rc==SQLITE_OK ){ + pNew = sqlite3MallocZero( sizeof(*pNew) ); + *ppVtab = (sqlite3_vtab*)pNew; + if( pNew==0 ) return SQLITE_NOMEM; + pNew->db = db; + } + return rc; +} +static int hctvalidBestIndex( + sqlite3_vtab *tab, + sqlite3_index_info *pIdxInfo +){ + pIdxInfo->estimatedCost = (double)10000; + pIdxInfo->estimatedRows = 10000; + return SQLITE_OK; +} +static int hctvalidDisconnect(sqlite3_vtab *pVtab){ + hctvalid_vtab *p = (hctvalid_vtab*)pVtab; + sqlite3_free(p); + return SQLITE_OK; +} +static int hctvalidOpen(sqlite3_vtab *p, sqlite3_vtab_cursor **ppCursor){ + hctvalid_cursor *pCur; + pCur = sqlite3MallocZero(sizeof(*pCur)); + if( pCur==0 ) return SQLITE_NOMEM; + *ppCursor = &pCur->base; + return SQLITE_OK; +} +static int hctvalidClose(sqlite3_vtab_cursor *cur){ + hctvalid_cursor *pCur = (hctvalid_cursor*)cur; + sqlite3_free(pCur); + return SQLITE_OK; +} +static int hctvalidNext(sqlite3_vtab_cursor *cur){ + hctvalid_cursor *pCsr = (hctvalid_cursor*)cur; + hctvalid_vtab *pTab = (hctvalid_vtab*)(pCsr->base.pVtab); + int ii; + HctDbCsr *pDbCsr = 0; + HctCsrIntkeyOp *pIntkeyOp = 0; + HctCsrIndexOp *pIndexOp = 0; + + sqlite3_free(pCsr->zFirst); + sqlite3_free(pCsr->zLast); + sqlite3_free(pCsr->zPglist); + pCsr->zFirst = 0; + pCsr->zLast = 0; + pCsr->zPglist = 0; + pCsr->rootpgno = 0; + pCsr->iEntry++; + pDbCsr = pCsr->pDb->pScannerList; + pIntkeyOp = pDbCsr->intkey.pOpList; + pIndexOp = pDbCsr->index.pOpList; + ii = 0; + if( pIntkeyOp==0 && pIndexOp==0 ) ii--; + for(/*noop*/; pDbCsr && iiiEntry; ii++){ + if( pIntkeyOp ) pIntkeyOp = pIntkeyOp->pNextOp; + if( pIndexOp ) pIndexOp = pIndexOp->pNextOp; + if( pIntkeyOp==0 && pIndexOp==0 ){ + pDbCsr = pDbCsr->pNextScanner; + if( pDbCsr ){ + pIntkeyOp = pDbCsr->intkey.pOpList; + pIndexOp = pDbCsr->index.pOpList; + if( pIntkeyOp==0 && pIndexOp==0 ) ii--; + } + } + } + + if( pDbCsr ){ + pCsr->rootpgno = pDbCsr->iRoot; + if( pIntkeyOp ){ + if( pIntkeyOp->iFirst!=SMALLEST_INT64 ){ + pCsr->zFirst = sqlite3_mprintf("%lld", pIntkeyOp->iFirst); + } + if( pIntkeyOp->iFirst!=LARGEST_INT64 ){ + pCsr->zLast = sqlite3_mprintf("%lld", pIntkeyOp->iLast); + } + if( pIntkeyOp->iLogical ){ + pCsr->zPglist = sqlite3_mprintf( + "%lld/%lld", pIntkeyOp->iLogical, pIntkeyOp->iPhysical + ); + } + }else{ + if( pIndexOp->pFirst ){ + pCsr->zFirst = sqlite3HctDbRecordToText( + pTab->db, pIndexOp->pFirst, pIndexOp->nFirst + ); + } + if( pIndexOp->pLast ){ + pCsr->zLast = sqlite3HctDbRecordToText( + pTab->db, pIndexOp->pLast, pIndexOp->nLast + ); + } + if( pIndexOp->iLogical ){ + pCsr->zPglist = sqlite3_mprintf( + "%lld/%lld", pIndexOp->iLogical, pIndexOp->iPhysical + ); + } + } + } + + return SQLITE_OK; +} +static int hctvalidFilter( + sqlite3_vtab_cursor *cur, + int idxNum, const char *idxStr, + int argc, sqlite3_value **argv +){ + hctvalid_cursor *pCsr = (hctvalid_cursor*)cur; + hctvalid_vtab *pTab = (hctvalid_vtab*)(pCsr->base.pVtab); + + pCsr->pDb = sqlite3HctDbFind(pTab->db, 0); + pCsr->iEntry = -1; + return hctvalidNext(cur); +} +static int hctvalidEof(sqlite3_vtab_cursor *cur){ + hctvalid_cursor *pCsr = (hctvalid_cursor*)cur; + return (pCsr->rootpgno==0); +} +static int hctvalidColumn( + sqlite3_vtab_cursor *cur, /* The cursor */ + sqlite3_context *ctx, /* First argument to sqlite3_result_...() */ + int i /* Which column to return */ +){ + hctvalid_cursor *pCsr = (hctvalid_cursor*)cur; + switch( i ){ + case 0: + sqlite3_result_int64(ctx, (i64)pCsr->rootpgno); + break; + case 1: + sqlite3_result_text(ctx, pCsr->zFirst, -1, SQLITE_TRANSIENT); + break; + case 2: + sqlite3_result_text(ctx, pCsr->zLast, -1, SQLITE_TRANSIENT); + break; + case 3: + sqlite3_result_text(ctx, pCsr->zPglist, -1, SQLITE_TRANSIENT); + break; + } + return SQLITE_OK; +} +static int hctvalidRowid(sqlite3_vtab_cursor *cur, sqlite_int64 *pRowid){ + hctvalid_cursor *pCsr = (hctvalid_cursor*)cur; + *pRowid = pCsr->iEntry; + return SQLITE_OK; +} + + + +SQLITE_PRIVATE int sqlite3HctVtabInit(sqlite3 *db){ + static sqlite3_module hctdbModule = { + /* iVersion */ 0, + /* xCreate */ 0, + /* xConnect */ hctdbConnect, + /* xBestIndex */ hctdbBestIndex, + /* xDisconnect */ hctdbDisconnect, + /* xDestroy */ 0, + /* xOpen */ hctdbOpen, + /* xClose */ hctdbClose, + /* xFilter */ hctdbFilter, + /* xNext */ hctdbNext, + /* xEof */ hctdbEof, + /* xColumn */ hctdbColumn, + /* xRowid */ hctdbRowid, + /* xUpdate */ 0, + /* xBegin */ 0, + /* xSync */ 0, + /* xCommit */ 0, + /* xRollback */ 0, + /* xFindMethod */ 0, + /* xRename */ 0, + /* xSavepoint */ 0, + /* xRelease */ 0, + /* xRollbackTo */ 0, + /* xShadowName */ 0 + }; + + static sqlite3_module hctentryModule = { + /* iVersion */ 0, + /* xCreate */ 0, + /* xConnect */ hctentryConnect, + /* xBestIndex */ hctentryBestIndex, + /* xDisconnect */ hctentryDisconnect, + /* xDestroy */ 0, + /* xOpen */ hctentryOpen, + /* xClose */ hctentryClose, + /* xFilter */ hctentryFilter, + /* xNext */ hctentryNext, + /* xEof */ hctentryEof, + /* xColumn */ hctentryColumn, + /* xRowid */ hctentryRowid, + /* xUpdate */ 0, + /* xBegin */ 0, + /* xSync */ 0, + /* xCommit */ 0, + /* xRollback */ 0, + /* xFindMethod */ 0, + /* xRename */ 0, + /* xSavepoint */ 0, + /* xRelease */ 0, + /* xRollbackTo */ 0, + /* xShadowName */ 0 + }; + + static sqlite3_module hctvalidModule = { + /* iVersion */ 0, + /* xCreate */ 0, + /* xConnect */ hctvalidConnect, + /* xBestIndex */ hctvalidBestIndex, + /* xDisconnect */ hctvalidDisconnect, + /* xDestroy */ 0, + /* xOpen */ hctvalidOpen, + /* xClose */ hctvalidClose, + /* xFilter */ hctvalidFilter, + /* xNext */ hctvalidNext, + /* xEof */ hctvalidEof, + /* xColumn */ hctvalidColumn, + /* xRowid */ hctvalidRowid, + /* xUpdate */ 0, + /* xBegin */ 0, + /* xSync */ 0, + /* xCommit */ 0, + /* xRollback */ 0, + /* xFindMethod */ 0, + /* xRename */ 0, + /* xSavepoint */ 0, + /* xRelease */ 0, + /* xRollbackTo */ 0, + /* xShadowName */ 0 + }; + + int rc; + + rc = sqlite3_create_module(db, "hctdb", &hctdbModule, 0); + if( rc==SQLITE_OK ){ + rc = sqlite3_create_module(db, "hctentry", &hctentryModule, 0); + } + if( rc==SQLITE_OK ){ + rc = sqlite3_create_module(db, "hctvalid", &hctvalidModule, 0); + } + if( rc==SQLITE_OK ){ + rc = sqlite3HctFileVtabInit(db); + } + if( rc==SQLITE_OK ){ + rc = sqlite3HctPManVtabInit(db); + } + if( rc==SQLITE_OK ){ + rc = sqlite3HctStatsInit(db); + } + if( rc==SQLITE_OK ){ + rc = sqlite3HctJrnlInit(db); + } + return rc; +} + +/************** End of hct_database.c ****************************************/ +/************** Begin file hct_tmap.c ****************************************/ +/* +** 2021 February 28 +** +** The author disclaims copyright to this source code. In place of +** a legal notice, here is a blessing: +** +** May you do good and not evil. +** May you find forgiveness for yourself and forgive others. +** May you share freely, never taking more than you give. +** +************************************************************************* +*/ + +/* +** NOTES ON LOCKING +** +** Each time a new HctTMap object is allocated, the locking related +** variables are set: +** +** HctTMap.iMinTid +** HctTMap.iMinCid +** +** New HctTMap objects are always allocated by writers during the +** WRITING phase of a transaction. The iMinCid variable is set to +** the CID value associated with the snapshot on which the writer +** based its transaction. The iMinTid value is set to the largest +** TID value for which it and all smaller TID values map to fully +** committed transactions with CID values smaller than or equal +** to iMinCid. This means that: +** +** * The new object may be used by any client accessing a snapshot +** with a snapshot-id >= iMinCid. +** +** * So long as this object exists, it is not safe to reuse any +** page ids (logical or physical) freed by transactions with +** TID values > iMinTid. +** +** The HctTMap object may then be used to access any snapshot with +** a CID value greater than or equal to iMinCid. While the HctTMap +** is still in use, it is not safe to reuse any logical or physical +** page id freed by a transaction with a TID value greater than +** iMinTid. +** +** A new HctTMap object is created by a writer after it is allocated +** its TID iff: +** +** TODO: This all needs updating!!! +** +** * The expression (iNewTid % HctTMapServer.nTidStep)==0 is true, or +** * The existing transaction map is too small to contain an entry +** for iNewTid. +** +** The first time a client obtains a new HctTMap object, it remembers +** the CID of the first snapshot it accesses using it. The HctTMap +** is released at the end of the first transaction for which the CID is +** greater than or equal to (iFirstCid + HctTMapServer.nTidStep). This +** happens even if a new HctTMap has been obtained since then. TODO: There +** is probably a role for some randomness here. +** +** The above creates a problem - a single dormant connection can prevent +** all reuse of freed logical and physical pages. This is addressed by +** using smart reference objects of type HctTMapRef that support the +** reference being revoked by the server at any time. See comments above +** struct HctTMapRef for details. +*/ + +/* #include "hctInt.h" */ + +typedef struct HctTMapFull HctTMapFull; +typedef struct HctTMapRef HctTMapRef; + +/* +** The following object type represents a reference to an HctTMapFull +** object. The reference is taken and released under the cover of the +** associated HctTMapServer.mutex mutex. +** +** pRefNext/pRefPrev: +** These are used to link this object into the linked list at +** HctTMapFull.pRefList. They may only be accessed under the cover +** of the associated HctTMapServer.mutex mutex. +** +** pMap: +** Pointer to the HctTMapFull object, if any, that this reference +** currently points to. +** +** refMask: +** This may be set to one of four values. It is always modified using +** CAS instructions. +** +** Zero: +** HctTMapRef.pMap is not valid (always NULL). +** +** HCT_TMAPREF_SERVER: +** When the reference is first taken, under cover of the server mutex, +** refMask is set to this value. +** +** HCT_TMAPREF_SERVER|HCT_TMAPREF_CLIENT: +** When a client actually wishes to use the tmap indicated by this +** reference, it uses a CAS instruction to set refMask to this value. +** It may then use the tmap object. This does not require the mutex. +** +** If the client finds that refMask is not HCT_TMAPREF_SERVER, but +** has been set to 0, then the reference has been revoked. In this +** case it is not safe for the client to touch pMap. It must +** reinitialize the HctTmapRef object (under cover of the server +** mutex). +** +** When the read transaction is over, and the client does not need +** need the tmap object, it uses a CAS instruction to set refMask +** back to HCT_TMAPREF_SERVER. If, when doing so, it finds that the +** HCT_TMAPREF_SERVER bit has already been cleared, then it must +** release the reference immediately (under cover of the server +** mutex). +** +** HCT_TMAPREF_CLIENT: +*/ +struct HctTMapRef { + u32 refMask; + HctTMapFull *pMap; + HctTMapRef *pRefNext; + HctTMapRef *pRefPrev; +}; + +/* +** Bits from HctTMapRef.refMask. +*/ +#define HCT_TMAPREF_CLIENT 0x01 +#define HCT_TMAPREF_SERVER 0x02 +#define HCT_TMAPREF_BOTH 0x03 + +/* +** Event counters used by the hctstats virtual table. +*/ +typedef struct HctTMapStats HctTMapStats; +struct HctTMapStats { + i64 nMutex; + i64 nMutexBlock; +}; + + +/* +** iLockValue: +** This field contains two things - a flag and a safe-tid value. The flag +** is set whenever a read transaction is active, and clear otherwise. +** The safe-tid value is set to a TID value for which itself an all smaller +** TID values are included in the connection's transactions - current and +** future. +** +** Pages freed by the transaction with the safe-tid value may be reused +** without disturbing this client. +** +** pNextClient: +** Linked list of all clients associated with pServer. +** +** pBuild: +** This is used by the sqlite3HctTMapRecoveryXXX() API when constructing +** a new tmap object as part of sqlite_hct_journal recovery. +*/ +struct HctTMapClient { + HctTMapServer *pServer; + HctConfig *pConfig; + u64 iLockValue; + HctTMapClient *pNextClient; + HctTMapFull *pMap; + HctTMapStats stats; + + HctTMapFull *pBuild; + u64 iBuildMin; /* Min TID value explicitly set in pBuild */ +}; + +#define HCT_LOCKVALUE_ACTIVE (((u64)0x01) << 56) + +/* +** Values for HctTMapClient.eState +*/ +#define HCT_CLIENT_NONE 0 +#define HCT_CLIENT_OPEN 1 +#define HCT_CLIENT_UP 2 + +/* +** iMinMinTid: +** This value is set only when the mutex is held, using HctAtomicStore(). +** It may be read, using HctAtomicLoad(), at any time. +*/ +struct HctTMapServer { + sqlite3_mutex *pMutex; /* Mutex to protect this object */ + int nClient; /* Number of connected clients */ + u64 iMinMinTid; /* Smallest iMinTid value in pList */ + HctTMapFull *pList; /* List of tmaps. Newest first */ + HctTMapClient *pClientList; /* List of clients */ +}; + +/* +** nRef: +** Number of clients that hold a pointer to this object. +*/ +struct HctTMapFull { + HctTMap m; + int nRef; /* Number of pointers to this object */ + HctTMapFull *pNext; /* Next entry in HctTMapServer.pList */ +}; + +/* +** ENTER_TMAP_MUTEX(pClient) implementation. +** +** Grab the server mutex. And update client-stats as required at the same +** time. +*/ +static void hctTMapMutexEnter(HctTMapClient *pClient){ + sqlite3_mutex *pMutex = pClient->pServer->pMutex; + pClient->stats.nMutex++; + if( sqlite3_mutex_try(pMutex)!=SQLITE_OK ){ + pClient->stats.nMutexBlock++; + sqlite3_mutex_enter(pMutex); + } +} + +#if 0 +#define ENTER_TMAP_MUTEX(pClient) sqlite3_mutex_enter(pClient->pServer->pMutex) +#endif +#define ENTER_TMAP_MUTEX(pClient) hctTMapMutexEnter(pClient) +#define LEAVE_TMAP_MUTEX(pClient) sqlite3_mutex_leave(pClient->pServer->pMutex) + +/* +** Atomic version of: +** +** if( *pPtr!=iOld ){ +** return 0; +** } +** *pPtr = iNew; +** return 1; +*/ +#if 0 +static int hctTMapBoolCAS32(u32 *pPtr, u32 iOld, u32 iNew){ + return HctCASBool(pPtr, iOld, iNew); +} +#endif +static int hctTMapBoolCAS64(u64 *pPtr, u64 iOld, u64 iNew){ + return HctCASBool(pPtr, iOld, iNew); +} + +/* +** Return a pointer to the slot in pMap associated with TID iTid. +*/ +static u64 *hctTMapFind(HctTMapFull *pMap, u64 iTid){ + int iOff = iTid - pMap->m.iFirstTid; + int iMap = iOff / HCT_TMAP_PAGESIZE; + iOff = HCT_TMAP_ENTRYSLOT( (iOff % HCT_TMAP_PAGESIZE) ); + return &pMap->m.aaMap[iMap][iOff % HCT_TMAP_PAGESIZE]; +} + +/* +** Allocate the initial HctTMapFull object for the server passed as the +** only argument. This is called as part of sqlite3HctTMapServerNew(). +*/ +static int hctTMapInit(HctTMapServer *p, u64 iFirstTid, u64 iLastTid){ + int rc = SQLITE_OK; + int nMap = 0; + int nByte = 0; + u64 iFirst = (iFirstTid / HCT_TMAP_PAGESIZE) * HCT_TMAP_PAGESIZE; + HctTMapFull *pNew = 0; + + assert( p->pList==0 ); + assert( (iFirstTid & HCT_TMAP_CID_MASK)==iFirstTid ); + + nMap = (iLastTid / HCT_TMAP_PAGESIZE) - (iFirst / HCT_TMAP_PAGESIZE) + 3; + nByte = sizeof(HctTMapFull) + sizeof(u64*)*nMap; + pNew = (HctTMapFull*)sqlite3HctMalloc(&rc, nByte); + if( pNew ){ + int i; + pNew->m.iFirstTid = iFirst; + pNew->m.nMap = nMap; + pNew->m.aaMap = (u64**)&pNew[1]; + for(i=0; im.nMap; i++){ + u64 *a = (u64*)sqlite3HctMalloc(&rc, sizeof(u64)*HCT_TMAP_PAGESIZE); + pNew->m.aaMap[i] = a; + } + + if( rc!=SQLITE_OK ){ + assert( 0 ); /* OOM case */ + for(i=0; im.nMap; i++){ + sqlite3_free(pNew->m.aaMap[i]); + } + sqlite3_free(pNew); + }else{ + u64 t; + for(t=iFirst; tpList = pNew; + pNew->nRef = 1; /* Server reference */ + } + } + + return rc; +} + +SQLITE_PRIVATE int sqlite3HctTMapServerNew(u64 iFirstTid, u64 iLastTid, HctTMapServer **pp){ + int rc = SQLITE_OK; + HctTMapServer *pNew; + + pNew = sqlite3MallocZero(sizeof(HctTMapServer)); + if( pNew==0 ){ + rc = SQLITE_NOMEM_BKPT; + }else{ + pNew->pMutex = sqlite3_mutex_alloc(SQLITE_MUTEX_FAST); + if( pNew->pMutex==0 ){ + rc = SQLITE_NOMEM_BKPT; + }else{ + pNew->iMinMinTid = iFirstTid-1; + rc = hctTMapInit(pNew, iFirstTid, iLastTid); + } + } + + if( rc!=SQLITE_OK ){ + sqlite3HctTMapServerFree(pNew); + pNew = 0; + } + + *pp = pNew; + return rc; +} + +SQLITE_PRIVATE int sqlite3HctTMapServerSet(HctTMapServer *pServer, u64 iTid, u64 iCid){ + u64 *pEntry = hctTMapFind(pServer->pList, iTid); + *pEntry = iCid; + return SQLITE_OK; +} + +/* +** Argument pMap is an HctTMapFull object that is currently linked +** into the list at HctTMapServer.pList. This function removes pMap +** from that list and frees all associated allocations. +*/ +static void hctTMapFreeMap(HctTMapServer *p, HctTMapFull *pMap){ + int iFirst = 0; /* First in pMap->m.aaMap[] to free */ + int iSave = 0; /* First in pMap->m.aaMap[] to preserve */ + int ii; + + assert( pMap && pMap->nRef==0 ); + if( pMap==p->pList ){ + if( pMap->pNext==0 ) iSave = pMap->m.nMap; + p->pList = pMap->pNext; + }else{ + HctTMapFull *pPrev; + HctTMapFull *pNext = pMap->pNext; + + for(pPrev=p->pList; pPrev->pNext!=pMap; pPrev=pPrev->pNext); + for(iSave=0; iSavem.nMap; iSave++){ + if( pMap->m.aaMap[iSave]==pPrev->m.aaMap[0] ) break; + } + + if( pNext ){ + u64 *aDoNotDel = pNext->m.aaMap[pNext->m.nMap-1]; + for(iFirst=pMap->m.nMap; iFirst>0; iFirst--){ + if( pMap->m.aaMap[iFirst-1]==aDoNotDel ) break; + } + } + + pPrev->pNext = pMap->pNext; + } + + for(ii=iFirst; iim.aaMap[ii]); + } + sqlite3_free(pMap); + +} + +/* +** Free a tmap-server object. +*/ +SQLITE_PRIVATE void sqlite3HctTMapServerFree(HctTMapServer *p){ + if( p ){ + assert( p->pClientList==0 ); + sqlite3_mutex_free(p->pMutex); + + assert( p->pList==0 || p->pList->nRef==1 ); + if( p->pList ) p->pList->nRef--; + while( p->pList ){ + HctTMapFull *pMap = p->pList; + while( pMap->pNext ) pMap = pMap->pNext; + hctTMapFreeMap(p, pMap); + } + + sqlite3_free(p); + } +} + +SQLITE_PRIVATE int sqlite3HctTMapClientNew( + HctTMapServer *p, + HctConfig *pConfig, + HctTMapClient **ppClient +){ + int rc = SQLITE_OK; + HctTMapClient *pNew; + + pNew = (HctTMapClient*)sqlite3HctMalloc(&rc, sizeof(HctTMapClient)); + if( pNew ){ + pNew->pServer = p; + pNew->pConfig = pConfig; + ENTER_TMAP_MUTEX(pNew); + /* Under cover of the server mutex, link this new client into the + ** list of clients associated with the server. The minimum TID value + ** for the client is set to the current global minimum. */ + pNew->iLockValue = p->iMinMinTid; + pNew->pNextClient = p->pClientList; + pNew->pMap = p->pList; + pNew->pMap->nRef++; + p->pClientList = pNew; + LEAVE_TMAP_MUTEX(pNew); + } + *ppClient = pNew; + return rc; +} + +SQLITE_PRIVATE void sqlite3HctTMapClientFree(HctTMapClient *pClient){ + if( pClient ){ + HctTMapClient **pp; + ENTER_TMAP_MUTEX(pClient); + + pClient->pMap->nRef--; + if( pClient->pMap->nRef==0 ){ + hctTMapFreeMap(pClient->pServer, pClient->pMap); + } + + /* Remove this client from the HctTMapServer.pClientList list */ + for(pp=&pClient->pServer->pClientList;*pp!=pClient;pp=&(*pp)->pNextClient); + *pp = pClient->pNextClient; + + LEAVE_TMAP_MUTEX(pClient); + sqlite3_free(pClient); + } +} + + +SQLITE_PRIVATE int sqlite3HctTMapBegin(HctTMapClient *pClient, u64 iSnapshot, HctTMap **ppMap){ + HctTMapFull *pMap = pClient->pMap; + u64 iEof = pMap->m.iFirstTid + pMap->m.nMap*HCT_TMAP_PAGESIZE; + + while( 1 ){ + u64 iOrigLockValue = HctAtomicLoad(&pClient->iLockValue); + u64 iLockValue; + + /* Find the new "safe-tid" value */ + u64 iSafe = (iOrigLockValue & HCT_TMAP_CID_MASK); + u64 iMinMinTid = HctAtomicLoad(&pClient->pServer->iMinMinTid); + if( iSafeiSnapshot ) break; + iSafe++; + } + + /* Set the lock-value. If this fails, it means some writer process + ** has increased the safe-tid value for us. */ + assert( (iOrigLockValue & HCT_LOCKVALUE_ACTIVE)==0 ); + iLockValue = iSafe | HCT_LOCKVALUE_ACTIVE; + if( hctTMapBoolCAS64(&pClient->iLockValue, iOrigLockValue, iLockValue) ){ + break; + } + } + + *ppMap = (HctTMap*)pMap; + return SQLITE_OK; +} + +SQLITE_PRIVATE u64 sqlite3HctTMapCommitedTID(HctTMapClient *pClient){ + return (pClient->iLockValue & HCT_TMAP_CID_MASK); +} + +static void hctTMapUpdateSafe(HctTMapClient *pClient){ + assert( sqlite3_mutex_held(pClient->pServer->pMutex) ); + if( pClient->pMap!=pClient->pServer->pList ){ + pClient->pMap->nRef--; + if( pClient->pMap->nRef==0 ){ + hctTMapFreeMap(pClient->pServer, pClient->pMap); + } + pClient->pMap = pClient->pServer->pList; + pClient->pMap->nRef++; + } +} + +/* +** This is called by a reader if it needs to look-up a TID for which its +** current HctTMap object is not large enough. This function sets output +** parameter (*ppMap) to point to the latest HctTMap object, which, +** unless the db is corrupt, is guaranteed to be large enough. +** +** SQLITE_OK is returned if successful. +*/ +SQLITE_PRIVATE int sqlite3HctTMapUpdate(HctTMapClient *pClient, HctTMap **ppMap){ + ENTER_TMAP_MUTEX(pClient); + hctTMapUpdateSafe(pClient); + LEAVE_TMAP_MUTEX(pClient); + *ppMap = (HctTMap*)pClient->pMap; + return SQLITE_OK; +} + +/* +** Called to signal the end of a read or write a transaction. Parameter +** iCID is passed the CID of the snapshot on which the transaction was +** based. +*/ +SQLITE_PRIVATE int sqlite3HctTMapEnd(HctTMapClient *pClient, u64 iCID){ + while( 1 ){ + u64 iOrigLockValue = pClient->iLockValue; + u64 iLockValue; + + assert( (iOrigLockValue & HCT_LOCKVALUE_ACTIVE)!=0 ); + iLockValue = (iOrigLockValue & ~HCT_LOCKVALUE_ACTIVE); + if( hctTMapBoolCAS64(&pClient->iLockValue, iOrigLockValue, iLockValue) ){ + break; + } + } + return SQLITE_OK; +} + +/* +** Allocate a new HctTMapFull object and link it into the list +** belonging to server pServer. The new map object is based on +** the server's current newest - pServer->pList. Relative to this +** object, the new map: +** +** * appends one mapping page to the end of the map, and +** +** * may remove one or more mapping pages from the start of the +** map, based on the current value of HctTMapServer.iMinMinTid. +** +** The server mutex must be held to call this function. +*/ +static int hctTMapNewObject(HctTMapServer *pServer){ + u64 iFirst = (pServer->iMinMinTid / HCT_TMAP_PAGESIZE) * HCT_TMAP_PAGESIZE; + HctTMapFull *pOld = pServer->pList; + HctTMapFull *pNew = 0; + int nMap = 0; + int nDiscard = 0; + int nByte = 0; + int rc = SQLITE_OK; + + assert( sqlite3_mutex_held(pServer->pMutex) ); + assert( (iFirst % HCT_TMAP_PAGESIZE)==0 ); + assert( (pOld->m.iFirstTid % HCT_TMAP_PAGESIZE)==0 ); + assert( (pServer->iMinMinTid & HCT_TMAP_CID_MASK)==pServer->iMinMinTid ); + assert( (iFirst & HCT_TMAP_CID_MASK)==iFirst ); + + nDiscard = (iFirst - pOld->m.iFirstTid) / HCT_TMAP_PAGESIZE; + nMap = pOld->m.nMap + 1 - nDiscard; + nByte = sizeof(HctTMapFull) + nMap*sizeof(u64*); + pNew = (HctTMapFull*)sqlite3HctMalloc(&rc, nByte); + + if( pNew ){ + int ii; + pNew->m.iFirstTid = iFirst; + pNew->m.nMap = nMap; + pNew->m.aaMap = (u64**)&pNew[1]; + pNew->nRef = 1; + for(ii=0; ii<(nMap-1); ii++){ + pNew->m.aaMap[ii] = pOld->m.aaMap[ii+nDiscard]; + } + pNew->m.aaMap[ii] = (u64*)sqlite3HctMalloc( + &rc, sizeof(u64)*HCT_TMAP_PAGESIZE + ); + + pServer->pList->nRef--; + if( pServer->pList->nRef==0 ){ + hctTMapFreeMap(pServer, pServer->pList); + } + pNew->pNext = pServer->pList; + pServer->pList = pNew; + } + + return rc; +} + +/* +** Return the largest TID for which it is safe to reuse freed pages. +*/ +SQLITE_PRIVATE u64 sqlite3HctTMapSafeTID(HctTMapClient *p){ + /* TODO: -1? */ + return HctAtomicLoad(&p->pServer->iMinMinTid); +} + +/* +** This is called by write transactions immediately after obtaining +** the transaction's TID value (at the start of the commit process). +*/ +SQLITE_PRIVATE int sqlite3HctTMapNewTID( + HctTMapClient *p, /* Transaction map client */ + u64 iTid, /* TID for write transaction */ + HctTMap **ppMap /* OUT: (possibly) new transaction map */ +){ + int rc = SQLITE_OK; + HctTMapFull *pMap = p->pMap; + u64 iEof = pMap->m.iFirstTid + ((u64)pMap->m.nMap*HCT_TMAP_PAGESIZE); + + /* If it is time to do so, allocate a new transaction-map */ + if( iTid>=iEof || iTid==(iEof - HCT_TMAP_PAGESIZE/2) ){ + ENTER_TMAP_MUTEX(p); + hctTMapUpdateSafe(p); + pMap = p->pMap; + iEof = pMap->m.iFirstTid + ((u64)pMap->m.nMap*HCT_TMAP_PAGESIZE); + if( iTid>=iEof || iTid==(iEof - HCT_TMAP_PAGESIZE/2) ){ + hctTMapNewObject(p->pServer); + hctTMapUpdateSafe(p); + } + LEAVE_TMAP_MUTEX(p); + } + + *ppMap = (HctTMap*)p->pMap; + return rc; +} + +SQLITE_PRIVATE void sqlite3HctTMapScan(HctTMapClient *p){ + HctTMapClient *pClient = 0; + u64 iSafe = p->iLockValue & HCT_TMAP_CID_MASK; + + ENTER_TMAP_MUTEX(p); + for(pClient=p->pServer->pClientList; pClient; pClient=pClient->pNextClient){ + u64 iVal = HctAtomicLoad(&pClient->iLockValue); + u64 iTid = (iVal & HCT_TMAP_CID_MASK); + + if( (iVal & HCT_LOCKVALUE_ACTIVE)==0 && iTidiLockValue, iVal, iSafe); + iVal = HctAtomicLoad(&pClient->iLockValue); + iTid = (iVal & HCT_TMAP_CID_MASK); + } + + iSafe = MIN(iSafe, iTid); + } + HctAtomicStore(&p->pServer->iMinMinTid, iSafe); + LEAVE_TMAP_MUTEX(p); +} + +SQLITE_PRIVATE i64 sqlite3HctTMapStats(sqlite3 *db, int iStat, const char **pzStat){ + HctTMapClient *pClient = 0; + i64 iVal = -1; + + pClient = sqlite3HctFileTMapClient(sqlite3HctDbFile(sqlite3HctDbFind(db, 0))); + switch( iStat ){ + case 0: + *pzStat = "mutex_attempt"; + iVal = pClient->stats.nMutex; + break; + case 1: + *pzStat = "mutex_block"; + iVal = pClient->stats.nMutexBlock; + break; + default: + break; + } + + return iVal; +} + +SQLITE_PRIVATE int sqlite3HctTMapRecoverySet(HctTMapClient *p, u64 iTid, u64 iCid){ + int rc = SQLITE_OK; + HctTMapFull *pNew = p->pBuild; + if( pNew==0 ){ + u64 iFirst = 1; + u64 iEof = p->pServer->pList->m.iFirstTid; + u64 iLast = iEof + (HCT_TMAP_PAGESIZE*2); + int nMap = 0; + if( iTid>=HCT_TMAP_PAGESIZE ){ + iFirst = 1 + ((iTid / HCT_TMAP_PAGESIZE) - 1) * HCT_TMAP_PAGESIZE; + } + nMap = ((iLast - iFirst) + HCT_TMAP_PAGESIZE-1) / HCT_TMAP_PAGESIZE; + assert( nMap>0 ); + + p->pBuild = pNew = (HctTMapFull*)sqlite3HctMalloc(&rc, + sizeof(HctTMapFull) + nMap*sizeof(u64*) + ); + p->iBuildMin = iTid; + if( pNew ){ + int ii; + pNew->m.iFirstTid = iFirst; + pNew->m.nMap = nMap; + pNew->m.aaMap = (u64**)&pNew[1]; + pNew->nRef = 1; + for(ii=0; iim.aaMap[ii] = aMap; + } + if( rc==SQLITE_OK ){ + u64 ee; + for(ee=iFirst; eem.aaMap[iMap][iOff] = ((u64)1 | HCT_TMAP_COMMITTED); + } + } + } + } + p->iBuildMin = MIN(p->iBuildMin, iTid); + + while( rc==SQLITE_OK && pNew->m.iFirstTid>iTid ){ + int ii; + HctTMapFull *pAlloc = 0; + int nMap = pNew->m.nMap + 1; + + pAlloc = (HctTMapFull*)sqlite3HctMalloc(&rc, + sizeof(HctTMapFull) + nMap*sizeof(u64*) + ); + pAlloc->nRef = 1; + pAlloc->m.nMap = nMap; + pAlloc->m.aaMap = (u64**)&pAlloc[1]; + pAlloc->m.iFirstTid = pNew->m.iFirstTid - HCT_TMAP_PAGESIZE; + memcpy(&pAlloc->m.aaMap[1], pNew->m.aaMap, pNew->m.nMap*sizeof(u64*)); + pAlloc->m.aaMap[0] = (u64*)sqlite3HctMalloc(&rc, + sizeof(u64) * HCT_TMAP_PAGESIZE + ); + for(ii=0; iim.aaMap[0][ii] = ((u64)1 | HCT_TMAP_COMMITTED); + } + + assert( pNew->nRef==1 ); + sqlite3_free(pNew); + p->pBuild = pNew = pAlloc; + } + + if( rc==SQLITE_OK ){ + int iMap = (iTid - pNew->m.iFirstTid) / HCT_TMAP_PAGESIZE; + int iOff = (iTid - pNew->m.iFirstTid) % HCT_TMAP_PAGESIZE; + pNew->m.aaMap[iMap][iOff] = (iCid | HCT_TMAP_COMMITTED); + } + + return rc; +} + +SQLITE_PRIVATE void sqlite3HctTMapRecoveryFinish(HctTMapClient *p, int rc){ + HctTMapFull *pNew = p->pBuild; + if( pNew ){ + p->pBuild = 0; + if( rc==SQLITE_OK ){ + pNew->pNext = p->pServer->pList; + p->pServer->pList = pNew; + p->pServer->iMinMinTid = p->iBuildMin; + if( pNew->pNext ){ + pNew->pNext->nRef--; + if( pNew->pNext->nRef==0 ){ + hctTMapFreeMap(p->pServer, pNew->pNext); + } + } + }else{ + int ii; + for(ii=0; iim.nMap; ii++){ + sqlite3_free(pNew->m.aaMap[ii]); + } + sqlite3_free(pNew); + } + p->iBuildMin = 0; + } +} + + +/************** End of hct_tmap.c ********************************************/ +/************** Begin file hct_record.c **************************************/ +/* +** 2022 May 19 +** +** The author disclaims copyright to this source code. In place of +** a legal notice, here is a blessing: +** +** May you do good and not evil. +** May you find forgiveness for yourself and forgive others. +** May you share freely, never taking more than you give. +** +************************************************************************* +*/ + +/* #include "hctInt.h" */ +/* #include "sqliteInt.h" */ +/* #include "vdbeInt.h" */ + +/* #include */ +/* #include */ + +/* +** Write the serialized data blob for the value stored in pMem into +** buf. It is assumed that the caller has allocated sufficient space. +** Return the number of bytes written. +** +** nBuf is the amount of space left in buf[]. The caller is responsible +** for allocating enough space to buf[] to hold the entire field, exclusive +** of the pMem->u.nZero bytes for a MEM_Zero value. +** +** Return the number of bytes actually written into buf[]. The number +** of bytes in the zero-filled tail is included in the return value only +** if those bytes were zeroed in buf[]. +*/ +static u32 hctRecordSerialPut(u8 *buf, Mem *pMem, u32 serial_type){ + u32 len; + + /* Integer and Real */ + if( serial_type<=7 && serial_type>0 ){ + u64 v; + u32 i; + if( serial_type==7 ){ + assert( sizeof(v)==sizeof(pMem->u.r) ); + memcpy(&v, &pMem->u.r, sizeof(v)); + swapMixedEndianFloat(v); + }else{ + v = pMem->u.i; + } + len = i = sqlite3SmallTypeSizes[serial_type]; + assert( i>0 ); + do{ + buf[--i] = (u8)(v&0xFF); + v >>= 8; + }while( i ); + return len; + } + + /* String or blob */ + if( serial_type>=12 ){ + assert( pMem->n + ((pMem->flags & MEM_Zero)?pMem->u.nZero:0) + == (int)sqlite3VdbeSerialTypeLen(serial_type) ); + len = pMem->n; + if( len>0 ) memcpy(buf, pMem->z, len); + return len; + } + + /* NULL or constants 0 or 1 */ + return 0; +} + +/* +** Return the serial-type for the value stored in pMem. +** +** This routine might convert a large MEM_IntReal value into MEM_Real. +*/ +static u32 hctRecordSerialType(Mem *pMem, u32 *pLen){ + int flags = pMem->flags; + u32 n; + + assert( pLen!=0 ); + if( flags&MEM_Null ){ + *pLen = 0; + return 0; + } + if( flags&(MEM_Int|MEM_IntReal) ){ + /* Figure out whether to use 1, 2, 4, 6 or 8 bytes. */ +# define MAX_6BYTE ((((i64)0x00008000)<<32)-1) + i64 i = pMem->u.i; + u64 u; + testcase( flags & MEM_Int ); + testcase( flags & MEM_IntReal ); + if( i<0 ){ + u = ~i; + }else{ + u = i; + } + if( u<=127 ){ + if( (i&1)==i ){ + *pLen = 0; + return 8+(u32)u; + }else{ + *pLen = 1; + return 1; + } + } + if( u<=32767 ){ *pLen = 2; return 2; } + if( u<=8388607 ){ *pLen = 3; return 3; } + if( u<=2147483647 ){ *pLen = 4; return 4; } + if( u<=MAX_6BYTE ){ *pLen = 6; return 5; } + *pLen = 8; + if( flags&MEM_IntReal ){ + /* If the value is IntReal and is going to take up 8 bytes to store + ** as an integer, then we might as well make it an 8-byte floating + ** point value */ + pMem->u.r = (double)pMem->u.i; + pMem->flags &= ~MEM_IntReal; + pMem->flags |= MEM_Real; + return 7; + } + return 6; + } + if( flags&MEM_Real ){ + *pLen = 8; + return 7; + } + assert( pMem->db->mallocFailed || flags&(MEM_Str|MEM_Blob) ); + assert( pMem->n>=0 ); + n = (u32)pMem->n; + if( flags & MEM_Zero ){ + n += pMem->u.nZero; + } + *pLen = n; + return ((n*2) + 12 + ((flags&MEM_Str)!=0)); +} + + +/* +** +*/ +SQLITE_PRIVATE int sqlite3HctSerializeRecord( + UnpackedRecord *pRec, /* Record to serialize */ + u8 **ppRec, /* OUT: buffer containing serialization */ + int *pnRec /* OUT: size of (*ppRec) in bytes */ +){ + int ii; + int nData = 0; + int nHdr = 0; + u8 *pOut = 0; + int iOffHdr = 0; + int iOffData = 0; + + for(ii=0; iinField; ii++){ + u32 n; + u32 stype = hctRecordSerialType(&pRec->aMem[ii], &n); + nData += n; + nHdr += sqlite3VarintLen(stype); + pRec->aMem[ii].uTemp = stype; + } + + if( nHdr<=126 ){ + /* The common case */ + nHdr += 1; + }else{ + /* Rare case of a really large header */ + int nVarint = sqlite3VarintLen(nHdr); + nHdr += nVarint; + if( nVarintnField; ii++){ + u32 stype = pRec->aMem[ii].uTemp; + iOffHdr += putVarint32(&pOut[iOffHdr], stype); + iOffData += hctRecordSerialPut(&pOut[iOffData], &pRec->aMem[ii], stype); + } + assert( iOffData==(nHdr+nData) ); + + *ppRec = pOut; + *pnRec = iOffData; + + return SQLITE_OK; +} + + +/************** End of hct_record.c ******************************************/ +/************** Begin file hct_stats.c ***************************************/ +/* +** 2022 September 28 +** +** The author disclaims copyright to this source code. In place of +** a legal notice, here is a blessing: +** +** May you do good and not evil. +** May you find forgiveness for yourself and forgive others. +** May you share freely, never taking more than you give. +** +************************************************************************* +*/ + + +/* #include "hctInt.h" */ + +typedef struct hctstats_vtab hctstats_vtab; +typedef struct hctstats_cursor hctstats_cursor; +struct hctstats_vtab { + sqlite3_vtab base; /* Base class - must be first */ + sqlite3 *db; +}; +struct hctstats_cursor { + sqlite3_vtab_cursor base; /* Base class - must be first */ + int iSubsys; + int iStat; + + i64 iRowid; + const char *zStat; /* Value for "stat" column. NULL for EOF. */ + i64 iVal; /* Value for "val" column. */ +}; + +typedef struct HctStatsSubsys HctStatsSubsys; +struct HctStatsSubsys { + const char *zSubsys; + i64 (*xStat)(sqlite3*, int iStat, const char **pzStat); +}; + +static HctStatsSubsys aHctStatGlobal[] = { + { "file", sqlite3HctFileStats }, + { "db", sqlite3HctDbStats }, + { "tmap", sqlite3HctTMapStats }, + { "pman", sqlite3HctPManStats }, + { "hct", sqlite3HctMainStats } +}; + +#define HCTSTATS_SCHEMA "CREATE TABLE x(subsys, stat, val)" + +/* +** xConnect() callback for hctstats table. +*/ +static int hctstatsConnect( + sqlite3 *db, + void *pAux, + int argc, const char *const*argv, + sqlite3_vtab **ppVtab, + char **pzErr +){ + hctstats_vtab *pNew = 0; + int rc = SQLITE_OK; + + *ppVtab = 0; + rc = sqlite3_declare_vtab(db, HCTSTATS_SCHEMA); + + if( rc==SQLITE_OK ){ + pNew = sqlite3MallocZero( sizeof(*pNew) ); + *ppVtab = (sqlite3_vtab*)pNew; + if( pNew==0 ) return SQLITE_NOMEM; + pNew->db = db; + } + return rc; +} + +/* +** xBestIndex() callback for hctstats table. +*/ +static int hctstatsBestIndex( + sqlite3_vtab *tab, + sqlite3_index_info *pIdxInfo +){ + pIdxInfo->estimatedCost = (double)10000; + pIdxInfo->estimatedRows = 10000; + return SQLITE_OK; +} + +/* +** xDisconnect() callback for hctstats table. Free the vtab handle. +*/ +static int hctstatsDisconnect(sqlite3_vtab *pVtab){ + hctstats_vtab *p = (hctstats_vtab*)pVtab; + sqlite3_free(p); + return SQLITE_OK; +} + +/* +** xOpen() callback for hctstats table. Free the vtab handle. +*/ +static int hctstatsOpen(sqlite3_vtab *p, sqlite3_vtab_cursor **ppCursor){ + hctstats_cursor *pCur; + pCur = sqlite3MallocZero(sizeof(*pCur)); + if( pCur==0 ) return SQLITE_NOMEM; + *ppCursor = &pCur->base; + return SQLITE_OK; +} + +/* +** xClose() callback for hctstats table. Free the vtab handle. +*/ +static int hctstatsClose(sqlite3_vtab_cursor *cur){ + hctstats_cursor *pCur = (hctstats_cursor*)cur; + sqlite3_free(pCur); + return SQLITE_OK; +} + +static int hctstatsNext(sqlite3_vtab_cursor *cur){ + hctstats_cursor *pCsr = (hctstats_cursor*)cur; + hctstats_vtab *pTab = (hctstats_vtab*)(pCsr->base.pVtab); + + pCsr->zStat = 0; + pCsr->iStat++; + + while( pCsr->zStat==0 && pCsr->iSubsysiSubsys]; + pCsr->iVal = p->xStat(pTab->db, pCsr->iStat, &pCsr->zStat); + if( pCsr->zStat==0 ){ + pCsr->iStat = 0; + pCsr->iSubsys++; + } + } + + return SQLITE_OK; +} + +static int hctstatsFilter( + sqlite3_vtab_cursor *cur, + int idxNum, const char *idxStr, + int argc, sqlite3_value **argv +){ + hctstats_cursor *pCsr = (hctstats_cursor*)cur; + + if( sqlite3HctDbFind(((hctstats_vtab*)cur->pVtab)->db, 0)==0 ){ + /* Main database is not an hctree db */ + return SQLITE_OK; + } + + pCsr->iStat = -1; + pCsr->iSubsys = 0; + pCsr->iRowid = 0; + return hctstatsNext(cur); +} + +static int hctstatsEof(sqlite3_vtab_cursor *cur){ + hctstats_cursor *pCsr = (hctstats_cursor*)cur; + return (pCsr->zStat==0); +} + +static int hctstatsColumn( + sqlite3_vtab_cursor *cur, /* The cursor */ + sqlite3_context *ctx, /* First argument to sqlite3_result_...() */ + int i /* Which column to return */ +){ + hctstats_cursor *pCsr = (hctstats_cursor*)cur; + + assert( i==0 || i==1 || i==2 ); + switch( i ){ + case 0: { + HctStatsSubsys *p = &aHctStatGlobal[pCsr->iSubsys]; + sqlite3_result_text(ctx, p->zSubsys, -1, SQLITE_STATIC); + break; + } + + case 1: + sqlite3_result_text(ctx, pCsr->zStat, -1, SQLITE_STATIC); + break; + + default: + assert( i==2 ); + sqlite3_result_int64(ctx, pCsr->iVal); + break; + } + return SQLITE_OK; +} + +static int hctstatsRowid(sqlite3_vtab_cursor *cur, sqlite_int64 *pRowid){ + hctstats_cursor *pCsr = (hctstats_cursor*)cur; + *pRowid = pCsr->iRowid; + return SQLITE_OK; +} + + +/* +** Register the hct_stats virtual table module with the supplied +** SQLite database handle. +*/ +SQLITE_PRIVATE int sqlite3HctStatsInit(sqlite3 *db){ + static sqlite3_module hctstatsModule = { + /* iVersion */ 0, + /* xCreate */ 0, + /* xConnect */ hctstatsConnect, + /* xBestIndex */ hctstatsBestIndex, + /* xDisconnect */ hctstatsDisconnect, + /* xDestroy */ 0, + /* xOpen */ hctstatsOpen, + /* xClose */ hctstatsClose, + /* xFilter */ hctstatsFilter, + /* xNext */ hctstatsNext, + /* xEof */ hctstatsEof, + /* xColumn */ hctstatsColumn, + /* xRowid */ hctstatsRowid, + /* xUpdate */ 0, + /* xBegin */ 0, + /* xSync */ 0, + /* xCommit */ 0, + /* xRollback */ 0, + /* xFindMethod */ 0, + /* xRename */ 0, + /* xSavepoint */ 0, + /* xRelease */ 0, + /* xRollbackTo */ 0, + /* xShadowName */ 0 + }; + + return sqlite3_create_module(db, "hctstats", &hctstatsModule, 0); +} + + + +/************** End of hct_stats.c *******************************************/ +/************** Begin file hct_journal.c *************************************/ +/* +** 2020 October 13 +** +** The author disclaims copyright to this source code. In place of +** a legal notice, here is a blessing: +** +** May you do good and not evil. +** May you find forgiveness for yourself and forgive others. +** May you share freely, never taking more than you give. +** +************************************************************************* +*/ + +/* #include "hctInt.h" */ +/* #include "vdbeInt.h" */ + +#define HCT_JOURNAL_SCHEMA \ +"CREATE TABLE sqlite_hct_journal(" \ + "cid INTEGER PRIMARY KEY," \ + "schema TEXT," \ + "data BLOB," \ + "schemacid INTEGER," \ + "hash BLOB," \ + "tid INTEGER," \ + "validcid INTEGER" \ +");" + + +#define HCT_BASELINE_SCHEMA \ +"CREATE TABLE sqlite_hct_baseline(" \ + "cid INTEGER," \ + "schemacid INTEGER," \ + "hash BLOB" \ +");" + +/* +** In follower mode, it is not possible to call sqlite3_hct_journal_write() +** for the transaction with CID (N + HCT_MAX_LEADING_WRITE) until all +** transactions with CID values of N or less have been committed. +*/ +#define HCT_MAX_LEADING_WRITE (8*1024) + +typedef struct HctJrnlServer HctJrnlServer; +typedef struct HctJrnlPendingHook HctJrnlPendingHook; + +/* +** One object of this type is shared by all connections to the same +** database. Managed by the HctFileServer object (see functions +** sqlite3HctFileGetJrnlPtr() and SetJrnlPtr()). +** +** iSchemaCid: +** This contains the current schema version of the database. Even though +** this value may be concurrently accessed, there is no need for an +** advanced or versioned data structure. Because: +** +** 1) In LEADER mode, this value is only accessed when writing an entry +** to the journal table, from within sqlite3HctJrnlLog(). It is only +** written to if the transaction has modified the database schema. +** +** The call to sqlite3HctJrnlLog() comes after the transaction has been +** successfully validated. And a transaction that modifies the schema +** only passes validation if there have been no writes at all to the +** the database since its snapshot was opened - i.e. if the CID for the +** transaction is one greater than the CID of its snapshot. This +** guarantees that there are no transactions with CID values less than +** that of the schema transaction concurrently accessing iSchemaCid. +** +** Also, since schema transactions modify the schema cookie, and all other +** transactions check the schema cookie during validation, it is +** guaranteed that no transaction started before the schema transaction +** is committed may successfully validate with a CID value greater than +** that of the schema transaction. +** +** Therefore, if a schema transaction has passed validation, it is +** guaranteed exclusive access to the iSchemaCid variable. +** +** 2) In FOLLOWER mode, the value is: +** +** * read from within sqlite3_hct_journal_write(), just after opening +** a snapshot, and +** +** * written from within the same call, following successful validation +** of a schema transaction. +** +** A schema transaction is only started once all transactions with CID +** values less than that of the schema transaction have finished +** committing. This alone ensures that there is at most a single +** writer to the iSchemaCid variable at any one time. +** +** eMode: +** The current database mode - either SQLITE_HCT_JOURNAL_MODE_FOLLOWER or +** SQLITE_HCT_JOURNAL_MODE_LEADER. +** +** iSnapshot: +** This is meaningful in FOLLOWER mode only. +** +** This is set to a CID value for which it and all prior transactions are +** committed. It may be written by any client using an atomic CAS operation, +** but may only be increased, never decreased. No transaction with a CID +** greater than (iSnapshot + HCT_MAX_LEADING_WRITE) may be started - +** iSnapshot must be increased first. +** +** nCommit: +** Size of aCommit[] array. +** +** aCommit: +** This array is only populated if the object is in FOLLOWER mode. +** +** Say the size of the array is N (actually HctJrnlServer.nCommit). Then, +** when transaction X is committed, slot aCommit[X % N] is set to X. Or, +** if transaction X is committed but no snapshot is valid until Y (for Y>X), +** then instead slot aCommit[X % N] is set to Y. +*/ +struct HctJrnlServer { + u64 iSchemaCid; + int eMode; + u64 iSnapshot; + int nSchemaVersionIncr; + int nCommit; + u64 *aCommit; /* Array of size nCommit */ +}; + +struct HctJrnlPendingHook { + u64 iCid; + u64 iSCid; + HctBuffer data; + HctBuffer schema; +}; + +/* +** There is one instance of this structure for each database handle (HBtree*) +** open on a replication-enabled hctree database. +** +** eInWrite: +** Set to true while the database connection is in a call to +** sqlite3_hct_journal_write(). +*/ +struct HctJournal { + u64 iJrnlRoot; /* Root page of journal table */ + u64 iBaseRoot; /* Root page of base table */ + int eInWrite; + u64 iWriteTid; + u64 iWriteCid; + u64 iRollbackSnapshot; + HctDatabase *pDb; + HctTree *pTree; + HctJrnlServer *pServer; + HctJrnlPendingHook pending; +}; + +#define HCT_JOURNAL_NONE 0 +#define HCT_JOURNAL_INWRITE 1 +#define HCT_JOURNAL_INROLLBACK 2 + +static void hctJournalSetDbError( + sqlite3 *db, /* Database on which to set error */ + int rc, /* Error code */ + const char *zFormat, ... /* Printf() error string and arguments */ +){ + char *zErr = 0; + sqlite3_mutex_enter( sqlite3_db_mutex(db) ); + if( zFormat ){ + va_list ap; + va_start(ap, zFormat); + zErr = sqlite3_vmprintf(zFormat, ap); + va_end(ap); + } + if( zErr ){ + sqlite3ErrorWithMsg(db, rc, "%s", zErr); + sqlite3_free(zErr); + }else{ + sqlite3ErrorWithMsg(db, rc, 0, 0); + } + sqlite3_mutex_leave( sqlite3_db_mutex(db) ); +} + +/* +** Initialize the main database for replication. +*/ +SQLITE_API int sqlite3_hct_journal_init(sqlite3 *db){ + const char *zTest1 = "PRAGMA hct_ndbfile"; + const char *zTest2 = "SELECT 1 WHERE (SELECT count(*) FROM sqlite_schema)=0"; + sqlite3_stmt *pTest = 0; + int rc = SQLITE_OK; + + /* Test that there is not already an open transaction on this database. */ + if( sqlite3_get_autocommit(db)==0 ){ + hctJournalSetDbError(db, SQLITE_ERROR, "open transaction on database"); + return SQLITE_ERROR; + } + + /* Test that the main db really is an hct database. Leave rc set to + ** something other than SQLITE_OK and an error message in the database + ** handle if it is not. */ + if( rc==SQLITE_OK ){ + rc = sqlite3_prepare_v2(db, zTest1, -1, &pTest, 0); + } + if( rc==SQLITE_OK ){ + rc = sqlite3_step(pTest); + sqlite3_finalize(pTest); + if( rc==SQLITE_DONE ){ + hctJournalSetDbError(db, SQLITE_ERROR, "not an hct database"); + }else if( rc==SQLITE_ROW ){ + rc = SQLITE_OK; + } + } + + /* Open a transaction on the db */ + if( rc==SQLITE_OK ){ + rc = sqlite3_exec(db, "BEGIN", 0, 0, 0); + } + + /* Test that the main db really is empty */ + if( rc==SQLITE_OK ){ + rc = sqlite3_prepare_v2(db, zTest2, -1, &pTest, 0); + } + if( rc==SQLITE_OK ){ + rc = sqlite3_step(pTest); + sqlite3_finalize(pTest); + if( rc==SQLITE_DONE ){ + hctJournalSetDbError(db, SQLITE_ERROR, "not an empty database"); + rc = SQLITE_ERROR; + }else if( rc==SQLITE_ROW ){ + rc = SQLITE_OK; + } + } + + if( rc==SQLITE_OK ){ + rc = sqlite3_exec(db, + "PRAGMA writable_schema = 1;" + HCT_JOURNAL_SCHEMA ";" + HCT_BASELINE_SCHEMA ";" + "INSERT INTO sqlite_hct_baseline VALUES(6, 0, zeroblob(16));" + "PRAGMA writable_schema = 0;" + ,0 ,0 ,0 + ); + } + + if( rc==SQLITE_OK ){ + rc = sqlite3_exec(db, "COMMIT", 0, 0, 0); + } + if( rc!=SQLITE_OK ){ + char *zErr = sqlite3_mprintf("%s", sqlite3_errmsg(db)); + sqlite3_exec(db, "ROLLBACK", 0, 0, 0); + hctJournalSetDbError(db, rc, "%s", zErr); + sqlite3_free(zErr); + }else{ + rc = sqlite3HctDetectJournals(db); + } + + return rc; +} + +/* +** Register a custom validation callback with the database handle. +*/ +SQLITE_API int sqlite3_hct_journal_hook( + sqlite3 *db, + void *pArg, + int(*xValidate)( + void *pCopyOfArg, + sqlite3_int64 iCid, + const char *zSchema, + const void *pData, int nData, + sqlite3_int64 iSchemaCid + ) +){ + db->xValidate = xValidate; + db->pValidateArg = pArg; + return SQLITE_OK; +} + +SQLITE_API void sqlite3_hct_migrate_mode(sqlite3 *db, int bActivate){ + db->bHctMigrate = bActivate; +} + +/* +** Value iVal is to be stored as an integer in an SQLite record. This +** function returns the number of bytes that it will use for storage. +*/ +static int hctJrnlIntSize(u64 iVal){ +#define MAX_6BYTE ((((i64)0x00008000)<<32)-1) + if( iVal<=127 ) return 1; + if( iVal<=32767 ) return 2; + if( iVal<=8388607 ) return 3; + if( iVal<=2147483647 ) return 4; + if( iVal<=MAX_6BYTE ) return 6; + return 8; +} + +/* +** Store an (nByte*8) bit big-endian integer, value iVal, in buffer a[]. +*/ +static void hctJrnlIntPut(u8 *a, u64 iVal, int nByte){ + int i; + for(i=1; i<=nByte; i++){ + a[nByte-i] = (iVal & 0xFF); + iVal = (iVal >> 8); + } +} + +/* +** Return the byte value that should be stored in the SQLite record +** header for an nSize byte integer field. +*/ +static u8 hctJrnlIntHdr(int nSize){ + if( nSize==8 ) return 6; + if( nSize==6 ) return 5; + return nSize; +} + +/* +** Compose an SQLite record suitable for the sqlite_hct_journal table. +*/ +static u8 *hctJrnlComposeRecord( + u64 iCid, + const char *zSchema, + const u8 *pData, int nData, + u64 iSchemaCid, + u64 iTid, + u64 iValidCid, + int *pnRec +){ + u8 *pRec = 0; + int nRec = 0; + int nHdr = 0; + int nBody = 0; + int nSchema = 0; /* Length of zSchema, in bytes */ + int nTidByte = 0; + int nSchemaCidByte = 0; + int nValidCidByte = 0; + u8 aHash[SQLITE_HCT_JOURNAL_HASHSIZE]; + + nSchema = sqlite3Strlen30(zSchema); + nTidByte = hctJrnlIntSize(iTid); + nSchemaCidByte = hctJrnlIntSize(iSchemaCid); + nValidCidByte = hctJrnlIntSize(iValidCid); + + sqlite3_hct_journal_hashentry( + aHash, iCid, zSchema, pData, nData, iSchemaCid + ); + + /* First figure out how large the eventual record will be */ + nHdr = 1 /* size of header varint */ + + 1 /* "cid" - always NULL */ + + sqlite3VarintLen((nSchema * 2) + 13) /* "schema" - TEXT */ + + sqlite3VarintLen((nData * 2) + 12) /* "data" - BLOB */ + + 1 /* "schemacid" - INTEGER */ + + 1 /* "hash" - BLOB */ + + 1 /* "tid" - INTEGER */ + + 1; /* "validcid" - INTEGER */ + + nBody = 0 /* "cid" - always NULL */ + + nSchema /* "schema" - TEXT */ + + nData /* "data" - BLOB */ + + nSchemaCidByte /* "schemacid" - INTEGER */ + + SQLITE_HCT_JOURNAL_HASHSIZE /* "hash" - BLOB */ + + nTidByte /* "tid" - INTEGER */ + + nValidCidByte; /* "validcid" - INTEGER */ + + nRec = nBody+nHdr; + pRec = (u8*)sqlite3_malloc(nRec); + if( pRec ){ + u8 *pHdr = pRec; + u8 *pBody = &pRec[nHdr]; + + *pHdr++ = (u8)nHdr; /* size-of-header varint */ + *pHdr++ = 0x00; /* "cid" - NULL */ + + /* "schema" field - TEXT */ + pHdr += sqlite3PutVarint(pHdr, (nSchema*2) + 13); + if( nSchema>0 ){ + memcpy(pBody, zSchema, nSchema); + pBody += nSchema; + } + + /* "data" field - BLOB */ + pHdr += sqlite3PutVarint(pHdr, (nData*2) + 12); + if( nData>0 ){ + memcpy(pBody, pData, nData); + pBody += nData; + } + + /* "schemacid" field - INTEGER */ + *pHdr++ = hctJrnlIntHdr(nSchemaCidByte); + hctJrnlIntPut(pBody, iSchemaCid, nSchemaCidByte); + pBody += nSchemaCidByte; + + /* "hash" field - SQLITE_HCT_JOURNAL_HASHSIZE byte BLOB */ + *pHdr++ = (u8)((SQLITE_HCT_JOURNAL_HASHSIZE * 2) + 12); + memcpy(pBody, aHash, SQLITE_HCT_JOURNAL_HASHSIZE); + pBody += SQLITE_HCT_JOURNAL_HASHSIZE; + + /* "tid" field - INTEGER */ + *pHdr++ = hctJrnlIntHdr(nTidByte); + hctJrnlIntPut(pBody, iTid, nTidByte); + pBody += nTidByte; + + /* "validcid" field - INTEGER */ + *pHdr++ = hctJrnlIntHdr(nValidCidByte); + hctJrnlIntPut(pBody, iValidCid, nValidCidByte); + pBody += nValidCidByte; + + assert( pHdr==&pRec[nHdr] ); + assert( pBody==&pRec[nRec] ); + }else{ + nRec = 0; + } + + *pnRec = nRec; + return pRec; +} + +typedef struct JrnlCtx JrnlCtx; +struct JrnlCtx { + Schema *pSchema; + HctTree *pTree; + HctBuffer *pBuf; + HctBuffer *pSchemaSql; +}; + +typedef struct JrnlTree JrnlTree; +struct JrnlTree { + const char *zName; +}; + +static int hctJrnlFindTree(Schema *pSchema, u32 iRoot, JrnlTree *pJTree){ + HashElem *k; + if( iRoot==1 ) return 0; + for(k=sqliteHashFirst(&pSchema->tblHash); k; k=sqliteHashNext(k)){ + Table *pTab = (Table*)sqliteHashData(k); + if( pTab->tnum==iRoot ){ + pJTree->zName = pTab->zName; + return 1; + } + } + return 0; +} + +static void hctJrnlRecordPrefix( + HctBuffer *pBuf, + int nData, /* Size of buffer aData[] in bytes */ + const u8 *aData, /* Buffer containing SQLite record */ + int nField /* Number of prefix fields requested */ +){ + int iHdr = 0; + int iBody = 0; + int ii = 0; + int szHdr = 0; /* Size of output header */ + int szBody = 0; /* Size of output record body */ + u8 *aHdrOut = 0; + u8 *aBodyOut = 0; + + iHdr = getVarint32(aData, iBody); + + /* Figure out the aggregate sizes of the header and body fields for the + ** required number of prefix fields. */ + for(ii=0; ii126 ){ + int nVarint = sqlite3VarintLen(szHdr); + szHdr += nVarint; + if( sqlite3VarintLen(szHdr)!=nVarint ) szHdr++; + } + + /* Size of record field */ + pBuf->nBuf += sqlite3PutVarint(&pBuf->aBuf[pBuf->nBuf], szHdr+szBody); + + aHdrOut = &pBuf->aBuf[pBuf->nBuf]; + aBodyOut = &aHdrOut[szHdr]; + + /* Write the size-of-header field for the output record */ + aHdrOut += sqlite3PutVarint(aHdrOut, szHdr); + + /* Write the other fields to both the header and body of the output record */ + for(ii=0; ii0 ){ + memcpy(aBodyOut, &aData[iBody], nBody); + iBody += nBody; + aBodyOut += nBody; + } + } + + pBuf->nBuf = (aBodyOut - pBuf->aBuf); +} + +static int hctBufferExtend(HctBuffer *pBuf, int nExtend){ + i64 nDesire = pBuf->nBuf + nExtend; + if( pBuf->nAllocaBuf[pBuf->nBuf], zApp, nApp+1); + pBuf->nBuf += nApp; + sqlite3_free(zApp); + return SQLITE_OK; +} + + +static int hctJrnlLogTree(void *pCtx, u32 iRoot, KeyInfo *pKeyInfo){ + int rc = SQLITE_OK; + JrnlCtx *pJrnl = (JrnlCtx*)pCtx; + HctBuffer *pBuf = pJrnl->pBuf; + + if( iRoot==HCT_TREE_SCHEMAOP_ROOT ){ + HctTreeCsr *pCsr = 0; + rc = sqlite3HctTreeCsrOpen(pJrnl->pTree, iRoot, &pCsr); + if( rc==SQLITE_OK ){ + for(rc=sqlite3HctTreeCsrFirst(pCsr); + rc==SQLITE_OK && sqlite3HctTreeCsrEof(pCsr)==0; + rc=sqlite3HctTreeCsrNext(pCsr) + ){ + int nData = 0; + const u8 *aData = 0; + sqlite3HctTreeCsrData(pCsr, &nData, &aData); + rc = hctBufferAppend(pJrnl->pSchemaSql, "%s%.*s", + (pJrnl->pSchemaSql->nBuf>0 ? ";" : ""), nData, (const char*)aData + ); + } + sqlite3HctTreeCsrClose(pCsr); + } + }else{ + JrnlTree jrnltree; + memset(&jrnltree, 0, sizeof(jrnltree)); + if( hctJrnlFindTree(pJrnl->pSchema, iRoot, &jrnltree) ){ + int nName = sqlite3Strlen30(jrnltree.zName); + + rc = hctBufferExtend(pBuf, 1+nName+1); + if( rc==SQLITE_OK ){ + HctTreeCsr *pCsr = 0; + + pBuf->aBuf[pBuf->nBuf++] = 'T'; + memcpy(&pBuf->aBuf[pBuf->nBuf], jrnltree.zName, nName+1); + pBuf->nBuf += nName+1; + rc = sqlite3HctTreeCsrOpen(pJrnl->pTree, iRoot, &pCsr); + + if( rc==SQLITE_OK ){ + for(rc=sqlite3HctTreeCsrFirst(pCsr); + rc==SQLITE_OK && sqlite3HctTreeCsrEof(pCsr)==0; + rc=sqlite3HctTreeCsrNext(pCsr) + ){ + i64 iKey = 0; + int nData = 0; + const u8 *aData = 0; + int bDel = 0; + + sqlite3HctTreeCsrKey(pCsr, &iKey); + sqlite3HctTreeCsrData(pCsr, &nData, &aData); + bDel = sqlite3HctTreeCsrIsDelete(pCsr); + + rc = hctBufferExtend(pBuf, 1+9+9+nData); + if( rc!=SQLITE_OK ) break; + + if( pKeyInfo==0 ){ + pBuf->aBuf[pBuf->nBuf++] = bDel ? 'd' : 'i'; + pBuf->nBuf += sqlite3PutVarint(&pBuf->aBuf[pBuf->nBuf], iKey); + }else{ + pBuf->aBuf[pBuf->nBuf++] = bDel ? 'D' : 'I'; + if( bDel ){ + hctJrnlRecordPrefix(pBuf, nData, aData, pKeyInfo->nUniqField); + } + } + if( bDel==0 ){ + pBuf->nBuf += sqlite3PutVarint(&pBuf->aBuf[pBuf->nBuf], nData); + memcpy(&pBuf->aBuf[pBuf->nBuf], aData, nData); + pBuf->nBuf += nData; + } + } + } + + sqlite3HctTreeCsrClose(pCsr); + } + } + } + + return rc; +} + +static int hctJrnlWriteRecord( + HctJournal *pJrnl, + u64 iCid, + const char *zSchema, + const void *pData, int nData, + u64 iSchemaCid, + u64 iTid +){ + int rc = SQLITE_OK; + u8 *pRec = 0; + int nRec = 0; + + pRec = hctJrnlComposeRecord( + iCid, zSchema, pData, nData, iSchemaCid, iTid, 0, &nRec + ); + if( pRec==0 ){ + rc = SQLITE_NOMEM_BKPT; + }else{ + int nRetry = 0; + do { + nRetry = 0; + rc = sqlite3HctDbInsert( + pJrnl->pDb, (u32)pJrnl->iJrnlRoot, 0, iCid, 0, nRec, pRec, &nRetry + ); + if( rc!=SQLITE_OK ) break; + assert( nRetry==0 || nRetry==1 ); + if( nRetry==0 ){ + rc = sqlite3HctDbInsertFlush(pJrnl->pDb, &nRetry); + if( rc!=SQLITE_OK ) break; + } + }while( nRetry ); + } + sqlite3_free(pRec); + + return rc; +} + +SQLITE_PRIVATE int sqlite3HctJrnlWriteEmpty( + HctJournal *pJrnl, + u64 iCid, + u64 iTid, + sqlite3 *db /* If non-NULL, invoke custom validation */ +){ + int rc = SQLITE_OK; + if( pJrnl->eInWrite==HCT_JOURNAL_NONE ){ + rc = hctJrnlWriteRecord(pJrnl, iCid, "", 0, 0, 0, iTid); + + /* If argument db is not NULL and there is a custom validation hook + ** configured, invoke it now. This is just to propagate the empty + ** transaction to any follower databases, not to actually validate + ** an empty transaction - the return code is ignored. */ + if( rc==SQLITE_OK && db && db->xValidate ){ + // (void)db->xValidate(db->pValidateArg, iCid, "", 0, 0, 0); + pJrnl->pending.iCid = iCid; + pJrnl->pending.iSCid = 0; + pJrnl->pending.data.nBuf = 0; + pJrnl->pending.schema.nBuf = 0; + } + } + return rc; +} + +SQLITE_PRIVATE void sqlite3HctJrnlInvokeHook(HctJournal *pJrnl, sqlite3 *db){ + if( pJrnl ){ + HctJrnlPendingHook *pPending = &pJrnl->pending; + if( pPending->iCid>0 ){ + if( db->xValidate ){ + const char *zSchema = ""; + if( pJrnl->pending.schema.nBuf>0 ){ + zSchema = (const char*)pJrnl->pending.schema.aBuf; + } + (void)db->xValidate( + db->pValidateArg, pPending->iCid, + zSchema, pPending->data.aBuf, pPending->data.nBuf, + pPending->iSCid + ); + } + + pPending->iCid = 0; + } + } +} + +SQLITE_PRIVATE int sqlite3HctJrnlLog( + HctJournal *pJrnl, + sqlite3 *db, + Schema *pSchema, + u64 iCid, + u64 iTid, + int *pbValidateCalled +){ + int rc = SQLITE_OK; + JrnlCtx jrnlctx; + const char *zSchema = ""; + u64 iSchemaCid = HctAtomicLoad(&pJrnl->pServer->iSchemaCid); + + assert( *pbValidateCalled==0 ); + if( pJrnl->eInWrite!=HCT_JOURNAL_NONE ) return SQLITE_OK; + + memset(&jrnlctx, 0, sizeof(jrnlctx)); + jrnlctx.pSchema = pSchema; + jrnlctx.pTree = pJrnl->pTree; + jrnlctx.pBuf = &pJrnl->pending.data; + jrnlctx.pSchemaSql = &pJrnl->pending.schema; + + jrnlctx.pBuf->nBuf = 0; + jrnlctx.pSchemaSql->nBuf = 0; + + rc = sqlite3HctTreeForeach(pJrnl->pTree, 1, (void*)&jrnlctx, hctJrnlLogTree); + if( jrnlctx.pSchemaSql->nBuf ){ + zSchema =(const char*)jrnlctx.pSchemaSql->aBuf; + } + + if( rc==SQLITE_OK ){ + rc = hctJrnlWriteRecord(pJrnl, iCid, zSchema, + jrnlctx.pBuf->aBuf, jrnlctx.pBuf->nBuf, iSchemaCid, iTid + ); + } + + /* If one is registered, invoke the validation hook */ + if( rc==SQLITE_OK && db->xValidate ){ +#if 0 + int res = db->xValidate(db->pValidateArg, iCid, zSchema, + jrnlctx.buf.aBuf, jrnlctx.buf.nBuf, iSchemaCid + ); + if( res!=0 ){ + rc = SQLITE_BUSY_SNAPSHOT; + } + *pbValidateCalled = 1; +#endif + pJrnl->pending.iCid = iCid; + pJrnl->pending.iSCid = iSchemaCid; + } + + if( zSchema[0] && rc==SQLITE_OK ){ + HctAtomicStore(&pJrnl->pServer->iSchemaCid, iCid); + } + + return rc; +} + +static void hctJrnlDelServer(void *p){ + if( p ){ + HctJrnlServer *pServer = (HctJrnlServer*)p; + sqlite3_free(pServer->aCommit); + sqlite3_free(pServer); + } +} + +typedef struct HctJournalRecord HctJournalRecord; +struct HctJournalRecord { + i64 iCid; + const char *zSchema; int nSchema; + const void *pData; int nData; + i64 iSchemaCid; + const void *pHash; + i64 iTid; + i64 iValidCid; +}; + +/* +** Structure containing values read from the sqlite_hct_baseline table. +*/ +typedef struct HctBaselineRecord HctBaselineRecord; +struct HctBaselineRecord { + i64 iCid; + u8 aHash[SQLITE_HCT_JOURNAL_HASHSIZE]; + i64 iSchemaCid; +}; + + +typedef struct HctRecordReader HctRecordReader; +struct HctRecordReader { + const u8 *aRec; + int nRec; + int nHdr; + const u8 *pHdr; + const u8 *pBody; +}; + +static void hctJrnlReadInit( + HctRecordReader *p, + int nRec, + const u8 *aRec +){ + memset(p, 0, sizeof(*p)); + p->aRec = aRec; + p->nRec = nRec; + p->pHdr = p->aRec + getVarint32(aRec, p->nHdr); + p->pBody = &p->aRec[p->nHdr]; +} + +static const u8 *hctJrnlReadBlobText( + int *pRc, + HctRecordReader *p, + int bText, + int *pnData +){ + const u8 *pRet = 0; + if( *pRc==SQLITE_OK ){ + u64 iType = 0; + p->pHdr += sqlite3GetVarint(p->pHdr, &iType); + if( iType<12 || (iType % 2)!=bText ){ + *pRc = SQLITE_CORRUPT_BKPT; + }else{ + *pnData = (iType - 12) / 2; + pRet = p->pBody; + p->pBody += (*pnData); + } + } + return pRet; +} + +static const char *hctJrnlReadText( + int *pRc, + HctRecordReader *p, + int *pnText +){ + return (const char*)hctJrnlReadBlobText(pRc, p, 1, pnText); +} +static const u8 *hctJrnlReadBlob( + int *pRc, + HctRecordReader *p, + int *pnText +){ + return hctJrnlReadBlobText(pRc, p, 0, pnText); +} + +static i64 hctJrnlReadInteger(int *pRc, HctRecordReader *p){ + i64 iRet = 0; + if( *pRc==SQLITE_OK ){ + u64 iType = 0; + p->pHdr += sqlite3GetVarint(p->pHdr, &iType); + switch( iType ){ + case 1: + iRet = p->pBody[0]; + p->pBody++; + break; + case 2: + iRet = ((u64)p->pBody[0] << 8) + + ((u64)p->pBody[1] << 0); + p->pBody += 2; + break; + case 3: + iRet = ((u64)p->pBody[0] << 16) + + ((u64)p->pBody[1] << 8) + + ((u64)p->pBody[2] << 0); + p->pBody += 3; + break; + case 4: + iRet = ((u64)p->pBody[0] << 24) + + ((u64)p->pBody[1] << 16) + + ((u64)p->pBody[2] << 8) + + ((u64)p->pBody[3] << 0); + p->pBody += 4; + break; + case 5: + iRet = ((u64)p->pBody[0] << 40) + + ((u64)p->pBody[1] << 32) + + ((u64)p->pBody[2] << 24) + + ((u64)p->pBody[3] << 16) + + ((u64)p->pBody[4] << 8) + + ((u64)p->pBody[5] << 0); + p->pBody += 6; + break; + case 6: + iRet = ((u64)p->pBody[0] << 56) + + ((u64)p->pBody[1] << 48) + + ((u64)p->pBody[2] << 40) + + ((u64)p->pBody[3] << 32) + + ((u64)p->pBody[4] << 24) + + ((u64)p->pBody[5] << 16) + + ((u64)p->pBody[6] << 8) + + ((u64)p->pBody[7] << 0); + p->pBody += 6; + break; + case 8: + iRet = 0; + break; + case 9: + iRet = 1; + break; + default: + *pRc = SQLITE_CORRUPT_BKPT; + break; + } + } + + return iRet; +} + +static void hctJrnlReadHash( + int *pRc, /* IN/OUT: Error code */ + HctRecordReader *p, /* Record reader */ + u8 *aHash /* Pointer to buffer to populate */ +){ + int nHash = 0; + const u8 *a = 0; + a = hctJrnlReadBlob(pRc, p, &nHash); + if( *pRc==SQLITE_OK && nHash!=SQLITE_HCT_JOURNAL_HASHSIZE ){ + *pRc = SQLITE_CORRUPT_BKPT; + } + if( *pRc==SQLITE_OK ){ + memcpy(aHash, a, SQLITE_HCT_JOURNAL_HASHSIZE); + } +} + +static int hctJrnlReadJournalRecord(HctDbCsr *pCsr, HctJournalRecord *pRec){ + int rc = SQLITE_OK; + int nData = 0; + const u8 *aData = 0; + + memset(pRec, 0, sizeof(*pRec)); + + sqlite3HctDbCsrKey(pCsr, (i64*)&pRec->iCid); + rc = sqlite3HctDbCsrData(pCsr, &nData, &aData); + if( rc==SQLITE_OK ){ + int nHash = 0; + HctRecordReader rdr; + hctJrnlReadInit(&rdr, nData, aData); + + /* "cid" field - always NULL */ + if( *rdr.pHdr++!=0 ) return SQLITE_CORRUPT_BKPT; + + /* "schema" field - always TEXT. */ + pRec->zSchema = hctJrnlReadText(&rc, &rdr, &pRec->nSchema); + + /* "data" field - always BLOB */ + pRec->pData = hctJrnlReadBlob(&rc, &rdr, &pRec->nData); + + /* "schemacid" field - always INTEGER */ + pRec->iSchemaCid = hctJrnlReadInteger(&rc, &rdr); + + /* "hash" field - SQLITE_HCT_JOURNAL_HASHSIZE byte BLOB */ + pRec->pHash = (const void*)hctJrnlReadBlob(&rc, &rdr, &nHash); + if( nHash!=SQLITE_HCT_JOURNAL_HASHSIZE ) rc = SQLITE_CORRUPT_BKPT; + + /* "tid" field - an INTEGER */ + pRec->iTid = hctJrnlReadInteger(&rc, &rdr); + + /* "valid_cid" field - an INTEGER */ + pRec->iValidCid = hctJrnlReadInteger(&rc, &rdr); + } + return rc; +} + +/* +** Read the contents of the sqlite_hct_baseline table into structure +** (*pRec). Return SQLITE_OK if successful, or an SQLite error code +** otherwise. +*/ +static int hctJrnlReadBaseline( + HctJournal *pJrnl, /* Database to read from */ + HctBaselineRecord *pRec /* Populate this structure before returning */ +){ + HctDbCsr *pCsr = 0; + int rc = SQLITE_OK; + + memset(pRec, 0, sizeof(HctBaselineRecord)); + + /* Open a cursor on the baseline table */ + rc = sqlite3HctDbCsrOpen(pJrnl->pDb, 0, (u32)pJrnl->iBaseRoot, &pCsr); + + /* Move the cursor to the first record in the table. */ + if( rc==SQLITE_OK ){ + rc = sqlite3HctDbCsrFirst(pCsr); + } + if( rc==SQLITE_OK && sqlite3HctDbCsrEof(pCsr) ){ + rc = SQLITE_CORRUPT_BKPT; + } + + if( rc==SQLITE_OK ){ + int nData = 0; + const u8 *aData = 0; + + rc = sqlite3HctDbCsrData(pCsr, &nData, &aData); + if( rc==SQLITE_OK ){ + HctRecordReader rdr; + hctJrnlReadInit(&rdr, nData, aData); + + /* "cid" field - an INTEGER */ + pRec->iCid = hctJrnlReadInteger(&rc, &rdr); + + /* "schemacid" field - an INTEGER */ + pRec->iSchemaCid = hctJrnlReadInteger(&rc, &rdr); + + /* "hash" field - SQLITE_HCT_JOURNAL_HASHSIZE byte BLOB */ + hctJrnlReadHash(&rc, &rdr, pRec->aHash); + } + } + sqlite3HctDbCsrClose(pCsr); + + return rc; +} + +static int hctJrnlGetJrnlShape( + sqlite3 *db, + i64 *piLast, /* Out: Last entry in journal */ + i64 *piLastCont /* Out: Last contiguous entry in journal */ +){ + const char *z1 = "SELECT max(cid) FROM sqlite_hct_journal"; + const char *z2 = "SELECT cid FROM sqlite_hct_journal ORDER BY 1 DESC"; + + int rc = SQLITE_OK; + sqlite3_stmt *pStmt = 0; + i64 iLast = 0; + i64 iLastCont = 0; + + rc = sqlite3_prepare_v2(db, z1, -1, &pStmt, 0); + if( rc==SQLITE_OK ){ + if( SQLITE_ROW==sqlite3_step(pStmt) ){ + iLast = sqlite3_column_int64(pStmt, 0); + } + rc = sqlite3_finalize(pStmt); + } + + if( rc==SQLITE_OK ){ + rc = sqlite3_prepare_v2(db, z2, -1, &pStmt, 0); + } + if( rc==SQLITE_OK ){ + i64 iPrev = iLast; + iLastCont = iLast; + while( sqlite3_step(pStmt)==SQLITE_ROW ){ + i64 iThis = sqlite3_column_int64(pStmt, 0); + if( iThis!=iPrev-1 ){ + iLastCont = iThis; + } + if( (iLast-iThis)>HCT_MAX_LEADING_WRITE*2 ) break; + iPrev = iThis; + } + rc = sqlite3_finalize(pStmt); + } + + *piLast = iLast; + *piLastCont = iLastCont; + return rc; +} + +static sqlite3_stmt *hctPreparePrintf( + int *pRc, + sqlite3 *db, + const char *zFmt, ... +){ + sqlite3_stmt *pRet = 0; + va_list ap; + char *zSql = 0; + + va_start(ap, zFmt); + zSql = sqlite3_vmprintf(zFmt, ap); + va_end(ap); + + if( *pRc==SQLITE_OK ){ + if( zSql==0 ){ + *pRc = SQLITE_NOMEM; + }else{ + *pRc = sqlite3_prepare_v2(db, zSql, -1, &pRet, 0); + } + } + sqlite3_free(zSql); + return pRet; +} + +/* +** Iterator for reading a blob from the "data" column of a journal entry. +*/ +typedef struct HctDataReader HctDataReader; +struct HctDataReader { + const u8 *aData; + int nData; + int iData; + + int bEof; + char eType; + + /* Valid for all values of eType */ + const char *zTab; + + /* For eType==HCT_TYPE_INSERT_ROWID, HCT_TYPE_DELETE_ROWID */ + i64 iRowid; + + /* For eType==HCT_TYPE_INSERT_ROWID */ + int nRecord; + const u8 *aRecord; +}; + +#define HCT_TYPE_TABLE 'T' +#define HCT_TYPE_INSERT_ROWID 'i' +#define HCT_TYPE_DELETE_ROWID 'd' + +static int hctDataReaderNext(HctDataReader *p){ + if( p->iData>=p->nData ){ + p->bEof = 1; + }else{ + p->eType = (char)(p->aData[p->iData++]); + switch( p->eType ){ + case 'T': { + p->zTab = (const char*)&p->aData[p->iData]; + p->iData += sqlite3Strlen30(p->zTab) + 1; + break; + } + + case 'd': { + p->iData += sqlite3GetVarint(&p->aData[p->iData], (u64*)&p->iRowid); + break; + } + + case 'i': { + p->iData += sqlite3GetVarint(&p->aData[p->iData], (u64*)&p->iRowid); + p->iData += getVarint32(&p->aData[p->iData], p->nRecord); + p->aRecord = &p->aData[p->iData]; + p->iData += p->nRecord; + break; + } + + default: { + return SQLITE_CORRUPT_BKPT; + } + } + } + + return SQLITE_OK; +} + +/* +** Initialize an HctDataReader object to iterate through the nData byte +** 'data' blob in buffer pData. Leave the iterator pointing at the first +** entry in the blob. +*/ +static int hctDataReaderInit(const void *pData, int nData, HctDataReader *pRdr){ + memset(pRdr, 0, sizeof(*pRdr)); + pRdr->aData = (const u8*)pData; + pRdr->nData = nData; + return hctDataReaderNext(pRdr); +} + +SQLITE_PRIVATE int sqlite3HctJrnlSavePhysical( + sqlite3 *db, + HctJournal *pJrnl, + int (*xSave)(void*, i64 iPhys), + void *pSave +){ + const char *zSql = "SELECT data FROM sqlite_hct_journal WHERE cid>?"; + int rc = SQLITE_OK; + i64 iLast = 0; + i64 iLastCont = 0; + sqlite3_stmt *pStmt = 0; + + rc = hctJrnlGetJrnlShape(db, &iLast, &iLastCont); + if( rc==SQLITE_OK ){ + rc = sqlite3_prepare_v2(db, zSql, -1, &pStmt, 0); + } + if( rc==SQLITE_OK ){ + sqlite3_bind_int64(pStmt, 1, iLastCont); + while( rc==SQLITE_OK && sqlite3_step(pStmt)==SQLITE_ROW ){ + const void *pData = sqlite3_column_blob(pStmt, 0); + int nData = sqlite3_column_bytes(pStmt, 0); + sqlite3_stmt *pQuery = 0; + HctDataReader rdr; + + sqlite3HctDbSetSavePhysical(pJrnl->pDb, xSave, pSave); + for(rc=hctDataReaderInit(pData, nData, &rdr); + rc==SQLITE_OK && rdr.bEof==0; + rc=hctDataReaderNext(&rdr) + ){ + switch( rdr.eType ){ + case HCT_TYPE_TABLE: { + rc = sqlite3_finalize(pQuery); + pQuery = hctPreparePrintf( + &rc, db, "SELECT * FROM %Q WHERE _rowid_=?", rdr.zTab + ); + break; + } + + case HCT_TYPE_INSERT_ROWID: + case HCT_TYPE_DELETE_ROWID: { + sqlite3_bind_int64(pQuery, 1, rdr.iRowid); + sqlite3_step(pQuery); + rc = sqlite3_reset(pQuery); + break; + } + + default: assert( 0 ); + } + if( rc ) break; + } + sqlite3HctDbSetSavePhysical(pJrnl->pDb, 0, 0); + sqlite3_finalize(pQuery); + } + rc = sqlite3_finalize(pStmt); + } + + return rc; +} + +/* +** Do special recovery (startup) processing for replication-enabled databases. +** This function is called during stage 1 recovery - after any log files have +** been processed (and the database schema + contents restored), but before the +** free-page-lists are recovered. +*/ +SQLITE_PRIVATE int sqlite3HctJrnlRecovery(HctJournal *pJrnl, HctDatabase *pDb){ + HctBaselineRecord base; /* sqlite_hct_baseline data */ + HctJrnlServer *pServer = 0; + HctFile *pFile = sqlite3HctDbFile(pDb); + int rc = SQLITE_OK; + HctDbCsr *pCsr = 0; + + i64 iMaxCid = 0; + i64 iSchemaCid = 0; + + /* Read the contents of the sqlite_hct_baseline table. */ + rc = hctJrnlReadBaseline(pJrnl, &base); + + /* Allocate the new HctJrnlServer structure */ + pServer = (HctJrnlServer*)sqlite3HctMalloc(&rc, sizeof(HctJrnlServer)); + + /* Read the last record of the sqlite_hct_journal table. Specifically, + ** the value of fields "cid" and "schema_version". Store these values + ** in stack variables iMaxCid and aSchema, respectively. Or, if the + ** sqlite_hct_journal table is empty, populate iMaxCid and aSchema[] with + ** values from the baseline table. */ + if( rc==SQLITE_OK ){ + rc = sqlite3HctDbCsrOpen(pDb, 0, (u32)pJrnl->iJrnlRoot, &pCsr); + } + if( rc==SQLITE_OK ){ + rc = sqlite3HctDbCsrLast(pCsr); + } + if( rc==SQLITE_OK ){ + if( sqlite3HctDbCsrEof(pCsr)==0 ){ + HctJournalRecord rec; + rc = hctJrnlReadJournalRecord(pCsr, &rec); + if( rc==SQLITE_OK ){ + iMaxCid = rec.iCid; + iSchemaCid = (rec.zSchema[0] ? rec.iCid : rec.iSchemaCid); + } + }else{ + iMaxCid = base.iCid; + iSchemaCid = base.iSchemaCid; + } + } + + /* Scan the sqlite_hct_journal table from beginning to end. When + ** the first missing entry is found, calculate the size of the + ** HctJrnlServer.aCommit[] API and allocate it. Then continue + ** scanning the sqlite_hct_journal table, populating aCommit[] along + ** the way. */ + if( rc==SQLITE_OK ){ + HctTMapClient *pTClient = sqlite3HctFileTMapClient(pFile); + i64 iPrev = base.iCid; + int nTrans = 0; + u64 *aCommit = 0; + + /* Scan until the first missing entry. Set nTrans to the number of + ** number of entries between the first missing one and the last + ** present, or to HCT_MAX_LEADING_WRITE, whichever is greater. + ** Set iPrev to the largest CID value for which it and all previous + ** CIDs have been written into the journal table. */ + for(rc = sqlite3HctDbCsrFirst(pCsr); + rc==SQLITE_OK && 0==sqlite3HctDbCsrEof(pCsr); + rc = sqlite3HctDbCsrNext(pCsr) + ){ + i64 iCid = 0; + sqlite3HctDbCsrKey(pCsr, &iCid); + if( iPrev!=0 && iCid!=iPrev+1 ){ + nTrans = iMaxCid - iPrev; + break; + } + iPrev = iCid; + } + nTrans = MAX(HCT_MAX_LEADING_WRITE, nTrans); + + pServer->nCommit = nTrans*2; + aCommit = (u64*)sqlite3HctMalloc(&rc, pServer->nCommit*sizeof(u64)); + pServer->aCommit = aCommit; + pServer->iSnapshot = iPrev; + + /* Scan through whatever is left of the sqlite_hct_journal table, + ** populating the aCommit[] array and the transaction-map (hct_tmap.c) + ** along the way. */ + while( rc==SQLITE_OK && 0==sqlite3HctDbCsrEof(pCsr) ){ + HctJournalRecord rec; + rc = hctJrnlReadJournalRecord(pCsr, &rec); + if( rc==SQLITE_OK ){ + i64 iVal = rec.iValidCid ? rec.iValidCid : rec.iCid; + pServer->aCommit[rec.iCid % pServer->nCommit] = iVal; + rc = sqlite3HctTMapRecoverySet(pTClient, rec.iTid, rec.iCid); + } + if( rc==SQLITE_OK ){ + rc = sqlite3HctDbCsrNext(pCsr); + } + } + sqlite3HctTMapRecoveryFinish(pTClient, rc); + } + + if( rc==SQLITE_OK ){ + HctAtomicStore(&pServer->iSchemaCid, iSchemaCid); + pJrnl->pServer = pServer; + sqlite3HctFileSetJrnlPtr(pFile, (void*)pServer, hctJrnlDelServer); + if( iMaxCid>0 ) sqlite3HctFileSetCID(pFile, iMaxCid); + }else{ + hctJrnlDelServer((void*)pServer); + } + + sqlite3HctDbCsrClose(pCsr); + return rc; +} + +static u64 hctFindRootByName(Schema *pSchema, const char *zName){ + u64 iRet = 0; + Table *pTab = (Table*)sqlite3HashFind(&pSchema->tblHash, zName); + if( pTab ){ + iRet = pTab->tnum; + } + return iRet; +} + +SQLITE_PRIVATE int sqlite3HctJournalNewIf( + Schema *pSchema, + HctTree *pTree, + HctDatabase *pDb, + HctJournal **pp +){ + int rc = SQLITE_OK; + u64 iJrnlRoot = hctFindRootByName(pSchema, "sqlite_hct_journal"); + u64 iBaseRoot = hctFindRootByName(pSchema, "sqlite_hct_baseline"); + + assert( *pp==0 ); + + if( (iJrnlRoot==0)!=(iBaseRoot==0) ){ + return SQLITE_CORRUPT_BKPT; + } + if( iJrnlRoot ){ + HctJournal *pNew = sqlite3HctMalloc(&rc, sizeof(HctJournal)); + if( pNew ){ + HctFile *pFile = sqlite3HctDbFile(pDb); + pNew->iJrnlRoot = iJrnlRoot; + pNew->iBaseRoot = iBaseRoot; + pNew->pDb = pDb; + pNew->pTree = pTree; + pNew->pServer = (HctJrnlServer*)sqlite3HctFileGetJrnlPtr(pFile); + *pp = pNew; + } + } + + return rc; +} + +SQLITE_PRIVATE void sqlite3HctJournalClose(HctJournal *pJrnl){ + sqlite3_free(pJrnl); +} + +/* +** See description in hctJrnlInt.h. +*/ +SQLITE_PRIVATE int sqlite3HctJournalIsReadonly( + HctJournal *pJrnl, + u64 iTable, + int *pbNosnap +){ + if( pJrnl ){ + HctJrnlServer *p = pJrnl->pServer; + int bNosnap = (pJrnl->iJrnlRoot==iTable || pJrnl->iBaseRoot==iTable); + *pbNosnap = bNosnap; + return (pJrnl->eInWrite==HCT_JOURNAL_NONE && ( + bNosnap || !p || p->eMode==SQLITE_HCT_JOURNAL_MODE_FOLLOWER + )); + } + return 0; +} + +/* +** Called during log file recovery to remove the entry with "tid" (not CID!) +** value iTid from the sqlite_hct_journal table. +*/ +SQLITE_PRIVATE int sqlite3HctJrnlRollbackEntry(HctJournal *pJrnl, i64 iTid){ + i64 iDel = 0; + HctDbCsr *pCsr = 0; + int rc = SQLITE_OK; + + rc = sqlite3HctDbCsrOpen(pJrnl->pDb, 0, (u32)pJrnl->iJrnlRoot, &pCsr); + if( rc==SQLITE_OK ){ + HctJournalRecord rec; + sqlite3HctDbCsrNosnap(pCsr, 1); + for(rc=sqlite3HctDbCsrLast(pCsr); + iDel==0 && rc==SQLITE_OK && sqlite3HctDbCsrEof(pCsr)==0; + rc=sqlite3HctDbCsrPrev(pCsr) + ){ + hctJrnlReadJournalRecord(pCsr, &rec); + if( rec.iTid==iTid ) iDel = rec.iCid; + } + + if( iDel!=0 && rc==SQLITE_OK ){ + rc = hctJrnlWriteRecord(pJrnl, iDel, "", 0, 0, 0, iTid); + } + + sqlite3HctDbCsrClose(pCsr); + } + + return rc; +} + +/* +** Find the HctJournal object associated with the "main" database of the +** connection passed as the only argument. If successful, set (*ppJrnl) +** to point to said object and return SQLITE_OK. Or, if the database is +** not a replication-enabled db, set (*ppJrnl) to NULL and return SQLITE_OK. +** Or, if an error occurs, return an SQLite error code. The final value +** of (*ppJrnl) is undefined in this case. +*/ +static int hctJrnlFind(sqlite3 *db, HctJournal **ppJrnl){ + int rc = SQLITE_OK; + HctJournal *pJrnl = sqlite3HctJrnlFind(db); + + if( pJrnl==0 ){ + /* If the journal was not found, it might be because the database is + ** not yet initialized. Run a query to ensure it is, then try to retrieve + ** the journal object again. */ + rc = sqlite3_exec(db, "SELECT 1 FROM sqlite_schema LIMIT 1", 0, 0, 0); + if( rc==SQLITE_OK ){ + pJrnl = sqlite3HctJrnlFind(db); + } + } + + if( rc==SQLITE_OK && pJrnl==0 ){ + hctJournalSetDbError(db, SQLITE_ERROR, "not a journaled hct database"); + rc = SQLITE_ERROR; + } + + *ppJrnl = pJrnl; + return rc; +} + + +/* +** Return the current journal mode - SQLITE_HCT_JOURNAL_MODE_FOLLOWER or +** SQLITE_HCT_JOURNAL_MODE_LEADER - for the main database of the connection +** passed as the only argument. Or, if the main database is not a +** replication-enabled hct database, return -1; +*/ +SQLITE_API int sqlite3_hct_journal_mode(sqlite3 *db){ + int eRet = -1; + HctJournal *pJrnl = sqlite3HctJrnlFind(db); + if( pJrnl ){ + eRet = pJrnl->pServer->eMode; + } + return eRet; +} + +/* +** Return true if the journal is complete - contains no holes. Or false +** otherwise. This function is not threadsafe. Results are undefined +** if there are concurrent transactions running on the database. +*/ +static int hctJrnlIsComplete(HctJournal *pJrnl){ + HctJrnlServer *pServer = pJrnl->pServer; + u64 iSnapshot = pServer->iSnapshot; + int ii; + + assert( pServer->eMode==SQLITE_HCT_JOURNAL_MODE_FOLLOWER ); + + /* Set iSnapshot to the CID of the last contiguous commit */ + while( 1 ){ + int iNext = (iSnapshot+1) % pServer->nCommit; + u64 iVal = HctAtomicLoad(&pServer->aCommit[iNext]); + if( iVal<=iSnapshot ) break; + iSnapshot++; + } + + /* See if there are any transactions yet committed with CID values greater + ** than iSnapshot. If there are, then the journal is not complete. */ + for(ii=0; iinCommit; ii++){ + u64 iVal = HctAtomicLoad(&pServer->aCommit[ii]); + if( iVal>iSnapshot ){ + return 0; + } + } + + return 1; +} + +/* +** Set the LEADER/FOLLOWER setting of the main database of the connection +** passed as the first argument. +*/ +SQLITE_API int sqlite3_hct_journal_setmode(sqlite3 *db, int eMode){ + int rc = SQLITE_OK; + HctJournal *pJrnl = sqlite3HctJrnlFind(db); + + if( pJrnl==0 ){ + rc = sqlite3_exec(db, "SELECT 1 FROM sqlite_schema LIMIT 1", 0, 0, 0); + if( rc==SQLITE_OK ){ + pJrnl = sqlite3HctJrnlFind(db); + } + } + + if( rc==SQLITE_OK ){ + if( eMode!=SQLITE_HCT_JOURNAL_MODE_LEADER + && eMode!=SQLITE_HCT_JOURNAL_MODE_FOLLOWER + ){ + return SQLITE_MISUSE_BKPT; + }else if( pJrnl==0 ){ + hctJournalSetDbError(db, SQLITE_ERROR, "not a journaled hct database"); + rc = SQLITE_ERROR; + }else{ + HctFile *pFile = sqlite3HctDbFile(pJrnl->pDb); + HctJrnlServer *pServer = pJrnl->pServer; + if( eMode!=pServer->eMode ){ + if( eMode==SQLITE_HCT_JOURNAL_MODE_LEADER ){ + /* Switch from FOLLOWER to LEADER mode. This is only allowed if + ** there are no holes in the journal. */ + if( hctJrnlIsComplete(pJrnl)==0 ){ + hctJournalSetDbError(db, SQLITE_ERROR, "incomplete journal"); + rc = SQLITE_ERROR; + }else{ + u64 iCid = sqlite3HctJournalSnapshot(pJrnl); + pServer->eMode = SQLITE_HCT_JOURNAL_MODE_LEADER; + if( iCid>0 ){ + sqlite3HctFileSetCID(sqlite3HctDbFile(pJrnl->pDb), iCid); + } + } + pServer->nSchemaVersionIncr++; + }else{ + /* Switch from LEADER to FOLLOWER mode. This is always possible. */ + void *pSchema = sqlite3HctBtreeSchema(db->aDb[0].pBt, 0, 0); + u64 iSnapshotId = sqlite3HctFileGetSnapshotid(pFile); + memset(pServer->aCommit, 0, pServer->nCommit*sizeof(u64)); + pServer->iSnapshot = iSnapshotId; + pServer->eMode = SQLITE_HCT_JOURNAL_MODE_FOLLOWER; + sqlite3HctJournalFixSchema(pJrnl, db, pSchema); + } + } + } + } + + return rc; +} + +static void hctJrnlFixTable(Table *pTab){ + Index *pIdx; + for(pIdx=pTab->pIndex; pIdx; pIdx=pIdx->pNext){ + if( pIdx->idxType==SQLITE_IDXTYPE_UNIQUE + || pIdx->idxType==SQLITE_IDXTYPE_PRIMARYKEY + ){ + pIdx->idxType = SQLITE_IDXTYPE_APPDEF; + } + pIdx->uniqNotNull = 0; + pIdx->onError = OE_None; + } + + +} + +/* +** This function is used to "fix" a schema so that it can be used in +** a FOLLOWER mode database. Specifically: +** +** * All UNIQUE indexes are marked as not-unique. +** * All triggers are removed from the schema. +** * All FK definitions are removed from the schema. +*/ +SQLITE_PRIVATE void sqlite3HctJournalFixSchema(HctJournal *pJrnl, sqlite3 *db, void *pS){ + HctJrnlServer *pServer = pJrnl->pServer; + if( pServer==0 || pServer->eMode==SQLITE_HCT_JOURNAL_MODE_FOLLOWER ){ + Schema *pSchema = (Schema*)pS; + HashElem *k; + + for(k=sqliteHashFirst(&pSchema->tblHash); k; k=sqliteHashNext(k)){ + Table *pTab = (Table*)sqliteHashData(k); + hctJrnlFixTable(pTab); + while( pTab->pTrigger ){ + Trigger *pTrig = pTab->pTrigger; + pTab->pTrigger = pTrig->pNext; + sqlite3DeleteTrigger(db, pTrig); + } + if( IsOrdinaryTable(pTab) ){ + sqlite3FkDelete(db, pTab); + } + } + sqlite3HashClear(&pSchema->trigHash); + } +} + +SQLITE_PRIVATE void sqlite3HctJournalSchemaVersion(HctJournal *pJrnl, u32 *pSchemaVersion){ + if( pJrnl && pJrnl->pServer ){ + *pSchemaVersion += HctAtomicLoad(&pJrnl->pServer->nSchemaVersionIncr); + } +} + +#ifdef SQLITE_DEBUG +/* +** assert() that the schema associated with table pTab has been "fixed", +** according to the definition used by sqlite3HctJournalFixSchema(). +*/ +static void assert_schema_is_fixed(Table *pTab){ + Index *pIdx; + for(pIdx=pTab->pIndex; pIdx; pIdx=pIdx->pNext){ + assert( pIdx->idxType==SQLITE_IDXTYPE_APPDEF ); + assert( pIdx->uniqNotNull==0 ); + assert( pIdx->onError==OE_None ); + } + assert( pTab->pTrigger==0 ); + assert( pTab->u.tab.pFKey==0 ); +} +#else +# define assert_schema_is_fixed(x) +#endif + +static int hctJrnlGetInsertStmt( + sqlite3 *db, + const char *zTab, + int *piPk, + sqlite3_stmt **ppStmt +){ + sqlite3_str *pStr; + Schema *pSchema = db->aDb[0].pSchema; + Table *pTab = (Table*)sqlite3HashFind(&pSchema->tblHash, zTab); + char *zSql = 0; + int rc = SQLITE_OK; + int ii; + + assert( pTab ); + assert_schema_is_fixed(pTab); + + *ppStmt = 0; + pStr = sqlite3_str_new(0); + sqlite3_str_appendf(pStr, "REPLACE INTO main.%Q(", zTab); + if( pTab->iPKey<0 ){ + sqlite3_str_appendf(pStr, "_rowid_, "); + } + for(ii=0; iinCol; ii++){ + const char *zSep = (ii==pTab->nCol-1) ? ") VALUES (" : ","; + sqlite3_str_appendf(pStr, "%Q%s ", pTab->aCol[ii].zCnName, zSep); + } + if( pTab->iPKey<0 ){ + sqlite3_str_appendf(pStr, "?%d, ", pTab->nCol+1); + *piPk = pTab->nCol+1; + }else{ + *piPk = pTab->iPKey+1; + } + for(ii=0; iinCol; ii++){ + const char *zSep = (ii==pTab->nCol-1) ? ")" : ", "; + sqlite3_str_appendf(pStr, "?%d%s", ii+1, zSep); + } + + zSql = sqlite3_str_finish(pStr); + if( zSql==0 ){ + rc = SQLITE_NOMEM_BKPT; + }else{ + rc = sqlite3_prepare_v2(db, zSql, -1, ppStmt, 0); + sqlite3_free(zSql); + } + + return rc; +} + +static int hctJrnlGetDeleteStmt( + sqlite3 *db, + const char *zTab, + sqlite3_stmt **ppStmt +){ + Schema *pSchema = db->aDb[0].pSchema; + Table *pTab = (Table*)sqlite3HashFind(&pSchema->tblHash, zTab); + int rc = SQLITE_OK; + char *zSql = 0; + const char *zRowid = "_rowid_"; + + assert( pTab ); + assert_schema_is_fixed(pTab); + + if( pTab->iPKey>=0 ){ + zRowid = pTab->aCol[pTab->iPKey].zCnName; + } + + *ppStmt = 0; + zSql = sqlite3_mprintf( + "DELETE FROM main.%Q WHERE main.%Q.%Q = ?", + pTab->zName, pTab->zName, zRowid + ); + if( zSql==0 ){ + rc = SQLITE_NOMEM_BKPT; + }else{ + rc = sqlite3_prepare_v2(db, zSql, -1, ppStmt, 0); + sqlite3_free(zSql); + } + + return rc; +} + +/* +** Parameter aData[] points to a record encoded in SQLite format. Bind +** each value in the record to the statement passed as the second argument. +*/ +static int hctJrnlBindRecord(int *pRc, sqlite3_stmt *pStmt, const u8 *aData){ + int rc = *pRc; + int ret = 0; + if( rc==SQLITE_OK ){ + const u8 *pHdr = aData; + const u8 *pData = 0; + int nHdr; + int iBind; + + pHdr += getVarint32(pHdr, nHdr); + pData = &aData[nHdr]; + for(iBind=1; pHdr<&aData[nHdr]; iBind++){ + u32 t; + pHdr += getVarint32(pHdr, t); + switch( t ){ + case 10: + case 11: + case 0: /* NULL */ + sqlite3_bind_null(pStmt, iBind); + break; + + case 1: { /* 1 byte integer */ + i64 iVal = pData[0]; + pData += 1; + sqlite3_bind_int64(pStmt, iBind, iVal); + break; + } + case 2: { /* 2 byte integer */ + i64 iVal = ((i64)pData[0]<<8) + (i64)pData[1]; + pData += 2; + sqlite3_bind_int64(pStmt, iBind, iVal); + break; + } + case 3: { /* 3 byte integer */ + i64 iVal = ((i64)pData[0]<<16) + ((i64)pData[1]<<8) + (i64)pData[2]; + pData += 3; + sqlite3_bind_int64(pStmt, iBind, iVal); + break; + } + case 4: { /* 4 byte integer */ + i64 iVal = ((i64)pData[0]<<24) + + ((i64)pData[1]<<16) + + ((i64)pData[2]<<8) + + (i64)pData[3]; + pData += 4; + sqlite3_bind_int64(pStmt, iBind, iVal); + break; + } + case 5: { /* 6 byte integer */ + i64 iVal = ((i64)pData[0]<<40) + + ((i64)pData[1]<<32) + + ((i64)pData[2]<<24) + + ((i64)pData[3]<<16) + + ((i64)pData[4]<<8) + + (i64)pData[5]; + pData += 6; + sqlite3_bind_int64(pStmt, iBind, iVal); + break; + } + + case 6: case 7: { /* 8 byte integer, 8 byte real value */ + u64 iVal = ((u64)pData[0]<<56) + + ((u64)pData[1]<<48) + + ((u64)pData[2]<<40) + + ((u64)pData[3]<<32) + + ((u64)pData[4]<<24) + + ((u64)pData[5]<<16) + + ((u64)pData[6]<<8) + + (u64)pData[7]; + pData += 8; + if( t==6 ){ + i64 iVal2; + memcpy(&iVal2, &iVal, sizeof(iVal)); + sqlite3_bind_int64(pStmt, iBind, iVal2); + }else{ + double rVal2; + memcpy(&rVal2, &iVal, sizeof(iVal)); + sqlite3_bind_double(pStmt, iBind, rVal2); + } + break; + } + + case 8: /* integer value 0 */ + sqlite3_bind_int(pStmt, iBind, 0); + break; + + case 9: /* integer value 1 */ + sqlite3_bind_int(pStmt, iBind, 1); + break; + + default: { + int nByte = (t - 12) / 2; + if( t & 0x01 ){ + sqlite3_bind_text( + pStmt, iBind, (const char*)pData, nByte, SQLITE_TRANSIENT + ); + }else{ + sqlite3_bind_blob( + pStmt, iBind, (const void*)pData, nByte, SQLITE_TRANSIENT + ); + } + pData += nByte; + break; + }; + } + } + + ret = pData - aData; + } + return ret; +} + +SQLITE_PRIVATE u64 sqlite3HctJrnlWriteTid(HctJournal *pJrnl, u64 *piCid){ + u64 iRet = 0; + assert( *piCid==0 ); + if( pJrnl && pJrnl->eInWrite!=HCT_JOURNAL_NONE ){ + iRet = pJrnl->iWriteTid; + *piCid = pJrnl->iWriteCid; + } + return iRet; +} + +SQLITE_PRIVATE u64 sqlite3HctJournalSnapshot(HctJournal *pJrnl){ + u64 iRet = 0; + if( pJrnl ){ + if( pJrnl->eInWrite==HCT_JOURNAL_INROLLBACK ){ + return pJrnl->iRollbackSnapshot; + } + HctJrnlServer *pServer = pJrnl->pServer; + if( pServer && pServer->eMode==SQLITE_HCT_JOURNAL_MODE_FOLLOWER ){ + u64 iTest = 0; + u64 iValid = 0; + u64 iSnap = HctAtomicLoad(&pServer->iSnapshot); + iRet = iSnap; + for(iTest=iRet+1; 1; iTest++){ + u64 iVal = HctAtomicLoad(&pServer->aCommit[iTest % pServer->nCommit]); + if( iVal=iValid ) iRet = iTest; + }else{ + iValid = MAX(iVal, iValid); + } + } + + /* Update HctJrnlServer.iSnapshot if required */ + if( iRet>=iSnap+16 ){ + (void)HctCASBool(&pServer->iSnapshot, iSnap, iRet); + } + + /* If we are in an sqlite3_hct_journal_write() call, it is fine (and + ** necessary) to read snapshots that are invalid to the application. + ** So ignore any entries in the aCommit[] array that indicate such. */ + if( pJrnl->eInWrite==HCT_JOURNAL_INWRITE ){ + assert( (iTest-1)>=iRet ); + iRet = (iTest-1); + } + } + } + return iRet; +} + +/* +** Set output variable (*piCid) to the CID of the newest available +** database snapshot. Return SQLITE_OK if successful, or an SQLite +** error code if something goes wrong. +*/ +SQLITE_API int sqlite3_hct_journal_snapshot(sqlite3 *db, sqlite3_int64 *piCid){ + int rc = SQLITE_OK; + HctJournal *pJrnl = 0; + + rc = hctJrnlFind(db, &pJrnl); + if( rc==SQLITE_OK ){ + *piCid = (i64)sqlite3HctJournalSnapshot(pJrnl); + }else{ + *piCid = 0; + } + return rc; +} + +static sqlite3_stmt *hctJrnlPrepare(int *pRc, sqlite3 *db, const char *zSql){ + sqlite3_stmt *pStmt = 0; + if( *pRc==SQLITE_OK ){ + *pRc = sqlite3_prepare_v2(db, zSql, -1, &pStmt, 0); + } + return pStmt; +} + +static void hctJrnlFinalize(int *pRc, sqlite3_stmt *pStmt){ + int rc = sqlite3_finalize(pStmt); + if( *pRc==SQLITE_OK ){ + *pRc = rc; + } +} + +SQLITE_API int sqlite3_hct_journal_truncate(sqlite3 *db, i64 iMinCid){ + int rc = SQLITE_OK; + HctJournal *pJrnl = 0; + sqlite3_stmt *pSelJrnl = 0; + sqlite3_stmt *pSelBaseline = 0; + sqlite3_stmt *pDelete = 0; + sqlite3_stmt *pUpdate = 0; + + if( 0==sqlite3_get_autocommit(db) ){ + hctJournalSetDbError(db, SQLITE_ERROR, + "cannot truncate journal from within a transaction" + ); + return SQLITE_ERROR; + } + + rc = hctJrnlFind(db, &pJrnl); + if( rc==SQLITE_OK + && pJrnl->pServer->eMode==SQLITE_HCT_JOURNAL_MODE_FOLLOWER + ){ + u64 iCid = sqlite3HctJournalSnapshot(pJrnl); + if( iCideInWrite = HCT_JOURNAL_INWRITE; + rc = sqlite3_exec(db, "BEGIN CONCURRENT", 0, 0, 0); + } + + pSelBaseline = hctJrnlPrepare(&rc, db, + "SELECT cid, schemacid, hash FROM sqlite_hct_baseline" + ); + pSelJrnl = hctJrnlPrepare(&rc, db, + "SELECT cid, schemacid, hash FROM sqlite_hct_journal WHERE cidpDb, &pJrnl->iWriteTid); + pJrnl->iWriteCid = 1; + } + if( rc==SQLITE_OK ){ + rc = sqlite3_exec(db, "COMMIT", 0, 0, 0); + } + if( rc!=SQLITE_OK ){ + sqlite3_exec(db, "ROLLBACK", 0, 0, 0); + } + } + + hctJrnlFinalize(&rc, pSelJrnl); + hctJrnlFinalize(&rc, pSelBaseline); + hctJrnlFinalize(&rc, pDelete); + hctJrnlFinalize(&rc, pUpdate); + pJrnl->eInWrite = HCT_JOURNAL_NONE; + pJrnl->iWriteTid = 0; + pJrnl->iWriteCid = 0; + return rc; +} + +static int hctBufferAppendInsert( + HctBuffer *pBuf, + i64 iRowid, + Table *pTab, + sqlite3_stmt *pQuery +){ + int ii; + int rc = SQLITE_OK; + + rc = hctBufferAppend(pBuf, "REPLACE INTO %Q(_rowid_", pTab->zName); + for(ii=0; rc==SQLITE_OK && iinCol; ii++){ + if( ii!=pTab->iPKey ){ + rc = hctBufferAppend(pBuf, ", %Q", pTab->aCol[ii].zCnName); + } + } + + if( rc==SQLITE_OK ){ + rc = hctBufferAppend(pBuf, ") VALUES(%lld", iRowid); + } + + for(ii=0; rc==SQLITE_OK && iinCol; ii++){ + if( ii!=pTab->iPKey ){ + rc = hctBufferAppend(&buf, "%squote(x.%Q)", zSep, pTab->aCol[ii].zCnName); + zSep = ", "; + } + } + if( rc==SQLITE_OK ){ + rc = hctBufferAppend(&buf, "FROM %Q AS x WHERE _rowid_=?", pTab->zName); + } + + if( rc==SQLITE_OK ){ + rc = sqlite3_prepare_v2(db, (const char*)buf.aBuf, -1, &pRet, 0); + } + sqlite3_free(buf.aBuf); + + *pRc = rc; + return pRet; +} + +/* +** Rollback transactions that follow the first hole in the journal. +*/ +SQLITE_API int sqlite3_hct_journal_rollback(sqlite3 *db, sqlite3_int64 iCid){ + int rc = SQLITE_OK; + HctJournal *pJrnl = 0; + i64 iLast = 0; + i64 iLastCont = 0; + sqlite3_stmt *pStmt = 0; + Schema *pSchema = 0; + + rc = hctJrnlFind(db, &pJrnl); + if( rc!=SQLITE_OK ) return rc; + pSchema = db->aDb[0].pSchema; + + /* + ** 1. Find the location of the first hole in the journal. + ** + ** 2. Loop through journal entries, from the newest back to the + ** first hole in the journal. + ** + ** 3. Work through each of the transactions identified in step (1). + ** For each, write a log file, make the required modifications to + ** the db and journal file, then delete the log file. + */ + + /* Cannot call this with an open transaction. */ + if( 0==sqlite3_get_autocommit(db) ){ + hctJournalSetDbError(db, SQLITE_ERROR, + "cannot rollback journal from within a transaction" + ); + return SQLITE_ERROR; + } + + /* Cannot call this in LEADER mode. */ + if( pJrnl->pServer->eMode==SQLITE_HCT_JOURNAL_MODE_LEADER ){ + hctJournalSetDbError(db, SQLITE_ERROR, + "cannot rollback journal in leader database" + ); + return SQLITE_ERROR; + } + + /* Find the location of the first hole in the journal. If there are no + ** holes in the journal, this call is a no-op. */ + rc = hctJrnlGetJrnlShape(db, &iLast, &iLastCont); + assert( iLastCont<=iLast ); + if( rc!=SQLITE_OK || iLastCont>=iLast ) return rc; + + /* Loop through all of the journal entries that will be rolled back. + ** For each, extract the primary keys from the "data" blob. Query the + ** current database snapshot for each of these keys, generating an SQL + ** script with a "REPLACE INTO" for each row present in the db and a + ** "DELETE" for each not. */ + rc = sqlite3_prepare_v2(db, + "SELECT data FROM sqlite_hct_journal WHERE cid>?", -1, &pStmt, 0 + ); + if( rc==SQLITE_OK ){ + HctBuffer sql = {0, 0, 0}; + sqlite3_bind_int64(pStmt, 1, iLastCont); + + rc = hctBufferAppend(&sql, "BEGIN CONCURRENT;\n"); + while( rc==SQLITE_OK && SQLITE_ROW==sqlite3_step(pStmt) ){ + const void *pData = sqlite3_column_blob(pStmt, 0); + int nData = sqlite3_column_bytes(pStmt, 0); + sqlite3_stmt *pQuery = 0; + Table *pTab = 0; + HctDataReader rdr; + + for(rc=hctDataReaderInit(pData, nData, &rdr); + rc==SQLITE_OK && rdr.bEof==0; + rc=hctDataReaderNext(&rdr) + ){ + switch( rdr.eType ){ + case HCT_TYPE_TABLE: { + pTab = (Table*)sqlite3HashFind(&pSchema->tblHash, rdr.zTab); + if( pTab==0 ){ + rc = SQLITE_CORRUPT_BKPT; + }else{ + rc = sqlite3_finalize(pQuery); + pQuery = hctGetQuoteQuery(&rc, db, pTab); + } + break; + } + + case HCT_TYPE_INSERT_ROWID: + case HCT_TYPE_DELETE_ROWID: { + sqlite3_bind_int64(pQuery, 1, rdr.iRowid); + if( SQLITE_ROW==sqlite3_step(pQuery) ){ + rc = hctBufferAppendInsert(&sql, rdr.iRowid, pTab, pQuery); + }else{ + rc = hctBufferAppend(&sql, + "DELETE FROM %Q WHERE _rowid_=%lld;\n", rdr.zTab, rdr.iRowid + ); + } + rc = sqlite3_reset(pQuery); + break; + } + + default: assert( 0 ); + } + if( rc ) break; + } + sqlite3_finalize(pQuery); + } + if( rc==SQLITE_OK ){ + rc = hctBufferAppend(&sql, + "DELETE FROM sqlite_hct_journal WHERE cid>%lld;\n", iLastCont + ); + } + + if( rc==SQLITE_OK ){ + assert( pJrnl->eInWrite==HCT_JOURNAL_NONE ); + pJrnl->eInWrite = HCT_JOURNAL_INROLLBACK; + rc = sqlite3_exec(db, (const char*)sql.aBuf, 0, 0, 0); + if( rc==SQLITE_OK ){ + rc = sqlite3HctDbStartWrite(pJrnl->pDb, &pJrnl->iWriteTid); + } + if( rc==SQLITE_OK ){ + pJrnl->iWriteCid = iLastCont; + pJrnl->iRollbackSnapshot = iLast; + rc = sqlite3_exec(db, "COMMIT", 0, 0, 0); + } + if( rc!=SQLITE_OK ){ + sqlite3_exec(db, "ROLLBACK", 0, 0, 0); + } + pJrnl->eInWrite = HCT_JOURNAL_NONE; + } + + sqlite3_free(sql.aBuf); + sqlite3_finalize(pStmt); + } + + return rc; +} + +static u64 hctJournalFindLastWrite( + int *pRc, /* IN/OUT: Error code */ + HctJournal *pJrnl, /* Journal object */ + u64 iRoot, /* Root page of table */ + i64 iRowid /* Key (for rowid tables) */ +){ + int rc = *pRc; + u64 iRet = 0; + if( rc==SQLITE_OK ){ + HctDbCsr *pCsr = 0; + rc = sqlite3HctDbCsrOpen(pJrnl->pDb, 0, iRoot, &pCsr); + if( rc==SQLITE_OK ){ + rc = sqlite3HctDbCsrFindLastWrite(pCsr, 0, iRowid, &iRet); + sqlite3HctDbCsrClose(pCsr); + } + *pRc = rc; + } + return iRet; +} + +/* +** Write a transaction into the database. +*/ +SQLITE_API int sqlite3_hct_journal_write( + sqlite3 *db, /* Write to "main" db of this handle */ + sqlite3_int64 iCid, + const char *zSchema, + const void *pData, int nData, + sqlite3_int64 iSchemaCid +){ + int rc = SQLITE_OK; + char *zErr = 0; /* Error message, if any */ + HctJournal *pJrnl = 0; + u64 iValidCid = 0; + u64 iSnapshotId = 0; + Btree *pBt = db->aDb[0].pBt; + Schema *pSchema = db->aDb[0].pSchema; + u64 iRoot = 0; /* Root page of zTab */ + HctJrnlServer *pServer = 0; + + HctDataReader rdr; /* For iterating through pData/nData */ + + rc = hctJrnlFind(db, &pJrnl); + if( rc!=SQLITE_OK ) return rc; + pJrnl->eInWrite = HCT_JOURNAL_INWRITE; + pServer = pJrnl->pServer; + + /* Check that the journal is in follower mode */ + if( pServer->eMode!=SQLITE_HCT_JOURNAL_MODE_FOLLOWER ){ + hctJournalSetDbError(db, SQLITE_ERROR, "database is not in FOLLOWER mode"); + return SQLITE_ERROR; + } + + /* Check that there is no transaction open on the connection */ + if( rc==SQLITE_OK && sqlite3_get_autocommit(db)==0 ){ + hctJournalSetDbError(db, SQLITE_ERROR, "open transaction on database"); + return SQLITE_ERROR; + } + + /* Open a concurrent transaction on the db handle. Then ensure that the + ** snapshot on the main database has also been opened. */ + rc = sqlite3_exec(db, "BEGIN CONCURRENT", 0, 0, 0); + if( rc==SQLITE_OK ){ + int dummy = 0; + rc = sqlite3BtreeBeginTrans(pBt, 1, &dummy); + } + + /* Check that the snapshot that was just opened has a schema new enough + ** for this transaction to be applied. */ + if( rc==SQLITE_OK ){ + iSnapshotId = sqlite3HctBtreeSnapshotId(pBt); + if( iSchemaCid>iSnapshotId ){ + rc = SQLITE_BUSY; + zErr = sqlite3_mprintf( + "change may not be applied yet (requires newer schema)" + ); + }else if( (iSnapshotId+HCT_MAX_LEADING_WRITE)aDb[0].pBt, iRoot)==0 ){ + iLastCid = hctJournalFindLastWrite(&rc, pJrnl, iRoot, rdr.iRowid); + } + if( iLastCid>iSnapshotId && iLastCidaDb[0].pBt, iRoot)==0 ){ + u64 iLastCid = 0; + iLastCid = hctJournalFindLastWrite(&rc, pJrnl, iRoot, rdr.iRowid); + if( iLastCid>iSnapshotId && iLastCidpDb, &pJrnl->iWriteTid); + sqlite3HctDbJrnlWriteCid(pJrnl->pDb, iCid); + pJrnl->iWriteCid = iCid; + } + + /* Write the sqlite_hct_journal record directly into the HctTree + ** structure. We don't write via the SQL interface here, because + ** writing to the db once sqlite3HctDbStartWrite() has been called + ** causes assert() failures. And we don't write directly to the db + ** either, because the write needs to be rolled back if there is + ** a conflict. */ + if( rc==SQLITE_OK ){ + u8 *pRec = 0; + int nRec = 0; + + /* TODO: "validcid" value */ + pRec = hctJrnlComposeRecord(iCid, zSchema, + pData, nData, iSchemaCid, pJrnl->iWriteTid, iValidCid, &nRec + ); + if( pRec==0 ){ + rc = SQLITE_NOMEM_BKPT; + }else{ + HctTreeCsr *pCsr = 0; + u64 root = hctFindRootByName(db->aDb[0].pSchema, "sqlite_hct_journal"); + + rc = sqlite3HctTreeCsrOpen(pJrnl->pTree, root, &pCsr); + if( rc==SQLITE_OK ){ + rc = sqlite3HctTreeInsert(pCsr, 0, iCid, nRec, pRec, 0); + sqlite3HctTreeCsrClose(pCsr); + } + } + sqlite3_free(pRec); + + if( rc==SQLITE_OK ){ + rc = sqlite3_exec(db, "COMMIT", 0, 0, 0); + } + if( rc==SQLITE_OK ){ + i64 iVal = iValidCid ? iValidCid : iCid; + i64 *pPtr = (i64*)&pServer->aCommit[iCid % pServer->nCommit]; + + /* If this transaction updated the schema, update the Server.iSchemaCid + ** field as well. This field is not used in FOLLOWER mode, but may be + ** if this process switches to LEADER later on. */ + if( zSchema[0] ){ + HctAtomicStore(&pServer->iSchemaCid, iCid); + } + + assert( iVal>=iCid ); + while( 1 ){ + i64 iExist = *pPtr; + if( iExist>=iVal ) break; + if( HctCASBool(pPtr, iExist, iVal) ) break; + } + assert( *pPtr>=iVal ); + + if( HctAtomicLoad(&pServer->iSnapshot)==0 ){ + (void)HctCASBool(&pServer->iSnapshot, (u64)0, (u64)iCid); + } + } + } + + if( rc!=SQLITE_OK ){ + sqlite3_exec(db, "ROLLBACK", 0, 0, 0); + if( zErr ){ + hctJournalSetDbError(db, rc, "%s", zErr); + sqlite3_free(zErr); + }else{ + hctJournalSetDbError(db, rc, 0); + } + } + pJrnl->eInWrite = HCT_JOURNAL_NONE; + sqlite3HctDbJrnlWriteCid(pJrnl->pDb, 0); + return rc; +} + +static int hctBufferAppendIf(HctBuffer *pBuf, const char *zSep){ + int rc = SQLITE_OK; + if( pBuf->nBuf>0 ){ + rc = hctBufferAppend(pBuf, "%s", zSep); + } + return rc; +} + +static void hctJournalEntryFunc( + sqlite3_context *pCtx, + int nArg, + sqlite3_value **apArg +){ + sqlite3 *db = sqlite3_context_db_handle(pCtx); + const u8 *aEntry = 0; + int nEntry = 0; + int ii = 0; + HctBuffer buf; + const char *zTab = "!"; + const char *zSep = " "; + + assert( nArg==1 ); + memset(&buf, 0, sizeof(buf)); + + nEntry = sqlite3_value_bytes(apArg[0]); + aEntry = (const u8*)sqlite3_value_blob(apArg[0]); + + while( ii */ + +/* + * If compiled on a machine that doesn't have a 32-bit integer, + * you just set "uint32" to the appropriate datatype for an + * unsigned 32-bit integer. For example: + * + * cc -Duint32='unsigned long' md5.c + * + */ +#ifndef uint32 +# define uint32 unsigned int +#endif + +struct MD5Context { + int isInit; + uint32 buf[4]; + uint32 bits[2]; + unsigned char in[64]; +}; +typedef struct MD5Context MD5Context; + +/* + * Note: this code is harmless on little-endian machines. + */ +static void byteReverse (unsigned char *buf, unsigned longs){ + uint32 t; + do { + t = (uint32)((unsigned)buf[3]<<8 | buf[2]) << 16 | + ((unsigned)buf[1]<<8 | buf[0]); + *(uint32 *)buf = t; + buf += 4; + } while (--longs); +} +/* The four core functions - F1 is optimized somewhat */ + +/* #define F1(x, y, z) (x & y | ~x & z) */ +#define F1(x, y, z) (z ^ (x & (y ^ z))) +#define F2(x, y, z) F1(z, x, y) +#define F3(x, y, z) (x ^ y ^ z) +#define F4(x, y, z) (y ^ (x | ~z)) + +/* This is the central step in the MD5 algorithm. */ +#define MD5STEP(f, w, x, y, z, data, s) \ + ( w += f(x, y, z) + data, w = w<>(32-s), w += x ) + +/* + * The core of the MD5 algorithm, this alters an existing MD5 hash to + * reflect the addition of 16 longwords of new data. MD5Update blocks + * the data and converts bytes into longwords for this routine. + */ +static void MD5Transform(uint32 buf[4], const uint32 in[16]){ + register uint32 a, b, c, d; + + a = buf[0]; + b = buf[1]; + c = buf[2]; + d = buf[3]; + + MD5STEP(F1, a, b, c, d, in[ 0]+0xd76aa478, 7); + MD5STEP(F1, d, a, b, c, in[ 1]+0xe8c7b756, 12); + MD5STEP(F1, c, d, a, b, in[ 2]+0x242070db, 17); + MD5STEP(F1, b, c, d, a, in[ 3]+0xc1bdceee, 22); + MD5STEP(F1, a, b, c, d, in[ 4]+0xf57c0faf, 7); + MD5STEP(F1, d, a, b, c, in[ 5]+0x4787c62a, 12); + MD5STEP(F1, c, d, a, b, in[ 6]+0xa8304613, 17); + MD5STEP(F1, b, c, d, a, in[ 7]+0xfd469501, 22); + MD5STEP(F1, a, b, c, d, in[ 8]+0x698098d8, 7); + MD5STEP(F1, d, a, b, c, in[ 9]+0x8b44f7af, 12); + MD5STEP(F1, c, d, a, b, in[10]+0xffff5bb1, 17); + MD5STEP(F1, b, c, d, a, in[11]+0x895cd7be, 22); + MD5STEP(F1, a, b, c, d, in[12]+0x6b901122, 7); + MD5STEP(F1, d, a, b, c, in[13]+0xfd987193, 12); + MD5STEP(F1, c, d, a, b, in[14]+0xa679438e, 17); + MD5STEP(F1, b, c, d, a, in[15]+0x49b40821, 22); + + MD5STEP(F2, a, b, c, d, in[ 1]+0xf61e2562, 5); + MD5STEP(F2, d, a, b, c, in[ 6]+0xc040b340, 9); + MD5STEP(F2, c, d, a, b, in[11]+0x265e5a51, 14); + MD5STEP(F2, b, c, d, a, in[ 0]+0xe9b6c7aa, 20); + MD5STEP(F2, a, b, c, d, in[ 5]+0xd62f105d, 5); + MD5STEP(F2, d, a, b, c, in[10]+0x02441453, 9); + MD5STEP(F2, c, d, a, b, in[15]+0xd8a1e681, 14); + MD5STEP(F2, b, c, d, a, in[ 4]+0xe7d3fbc8, 20); + MD5STEP(F2, a, b, c, d, in[ 9]+0x21e1cde6, 5); + MD5STEP(F2, d, a, b, c, in[14]+0xc33707d6, 9); + MD5STEP(F2, c, d, a, b, in[ 3]+0xf4d50d87, 14); + MD5STEP(F2, b, c, d, a, in[ 8]+0x455a14ed, 20); + MD5STEP(F2, a, b, c, d, in[13]+0xa9e3e905, 5); + MD5STEP(F2, d, a, b, c, in[ 2]+0xfcefa3f8, 9); + MD5STEP(F2, c, d, a, b, in[ 7]+0x676f02d9, 14); + MD5STEP(F2, b, c, d, a, in[12]+0x8d2a4c8a, 20); + + MD5STEP(F3, a, b, c, d, in[ 5]+0xfffa3942, 4); + MD5STEP(F3, d, a, b, c, in[ 8]+0x8771f681, 11); + MD5STEP(F3, c, d, a, b, in[11]+0x6d9d6122, 16); + MD5STEP(F3, b, c, d, a, in[14]+0xfde5380c, 23); + MD5STEP(F3, a, b, c, d, in[ 1]+0xa4beea44, 4); + MD5STEP(F3, d, a, b, c, in[ 4]+0x4bdecfa9, 11); + MD5STEP(F3, c, d, a, b, in[ 7]+0xf6bb4b60, 16); + MD5STEP(F3, b, c, d, a, in[10]+0xbebfbc70, 23); + MD5STEP(F3, a, b, c, d, in[13]+0x289b7ec6, 4); + MD5STEP(F3, d, a, b, c, in[ 0]+0xeaa127fa, 11); + MD5STEP(F3, c, d, a, b, in[ 3]+0xd4ef3085, 16); + MD5STEP(F3, b, c, d, a, in[ 6]+0x04881d05, 23); + MD5STEP(F3, a, b, c, d, in[ 9]+0xd9d4d039, 4); + MD5STEP(F3, d, a, b, c, in[12]+0xe6db99e5, 11); + MD5STEP(F3, c, d, a, b, in[15]+0x1fa27cf8, 16); + MD5STEP(F3, b, c, d, a, in[ 2]+0xc4ac5665, 23); + + MD5STEP(F4, a, b, c, d, in[ 0]+0xf4292244, 6); + MD5STEP(F4, d, a, b, c, in[ 7]+0x432aff97, 10); + MD5STEP(F4, c, d, a, b, in[14]+0xab9423a7, 15); + MD5STEP(F4, b, c, d, a, in[ 5]+0xfc93a039, 21); + MD5STEP(F4, a, b, c, d, in[12]+0x655b59c3, 6); + MD5STEP(F4, d, a, b, c, in[ 3]+0x8f0ccc92, 10); + MD5STEP(F4, c, d, a, b, in[10]+0xffeff47d, 15); + MD5STEP(F4, b, c, d, a, in[ 1]+0x85845dd1, 21); + MD5STEP(F4, a, b, c, d, in[ 8]+0x6fa87e4f, 6); + MD5STEP(F4, d, a, b, c, in[15]+0xfe2ce6e0, 10); + MD5STEP(F4, c, d, a, b, in[ 6]+0xa3014314, 15); + MD5STEP(F4, b, c, d, a, in[13]+0x4e0811a1, 21); + MD5STEP(F4, a, b, c, d, in[ 4]+0xf7537e82, 6); + MD5STEP(F4, d, a, b, c, in[11]+0xbd3af235, 10); + MD5STEP(F4, c, d, a, b, in[ 2]+0x2ad7d2bb, 15); + MD5STEP(F4, b, c, d, a, in[ 9]+0xeb86d391, 21); + + buf[0] += a; + buf[1] += b; + buf[2] += c; + buf[3] += d; +} + +/* + * Start MD5 accumulation. Set bit count to 0 and buffer to mysterious + * initialization constants. + */ +static void MD5Init(MD5Context *ctx){ + ctx->isInit = 1; + ctx->buf[0] = 0x67452301; + ctx->buf[1] = 0xefcdab89; + ctx->buf[2] = 0x98badcfe; + ctx->buf[3] = 0x10325476; + ctx->bits[0] = 0; + ctx->bits[1] = 0; +} + +/* + * Update context to reflect the concatenation of another buffer full + * of bytes. + */ +static +void MD5Update(MD5Context *ctx, const unsigned char *buf, unsigned int len){ + uint32 t; + + /* Update bitcount */ + + t = ctx->bits[0]; + if ((ctx->bits[0] = t + ((uint32)len << 3)) < t) + ctx->bits[1]++; /* Carry from low to high */ + ctx->bits[1] += len >> 29; + + t = (t >> 3) & 0x3f; /* Bytes already in shsInfo->data */ + + /* Handle any leading odd-sized chunks */ + + if ( t ) { + unsigned char *p = (unsigned char *)ctx->in + t; + + t = 64-t; + if (len < t) { + if( len ) memcpy(p, buf, len); + return; + } + memcpy(p, buf, t); + byteReverse(ctx->in, 16); + MD5Transform(ctx->buf, (uint32 *)ctx->in); + buf += t; + len -= t; + } + + /* Process data in 64-byte chunks */ + + while (len >= 64) { + memcpy(ctx->in, buf, 64); + byteReverse(ctx->in, 16); + MD5Transform(ctx->buf, (uint32 *)ctx->in); + buf += 64; + len -= 64; + } + + /* Handle any remaining bytes of data. */ + + memcpy(ctx->in, buf, len); +} + +/* + * Final wrapup - pad to 64-byte boundary with the bit pattern + * 1 0* (64-bit count of bits processed, MSB-first) + */ +static void MD5Final(unsigned char digest[16], MD5Context *ctx){ + unsigned count; + unsigned char *p; + + /* Compute number of bytes mod 64 */ + count = (ctx->bits[0] >> 3) & 0x3F; + + /* Set the first char of padding to 0x80. This is safe since there is + always at least one byte free */ + p = ctx->in + count; + *p++ = 0x80; + + /* Bytes of padding needed to make 64 bytes */ + count = 64 - 1 - count; + + /* Pad out to 56 mod 64 */ + if (count < 8) { + /* Two lots of padding: Pad the first block to 64 bytes */ + memset(p, 0, count); + byteReverse(ctx->in, 16); + MD5Transform(ctx->buf, (uint32 *)ctx->in); + + /* Now fill the next block with 56 bytes */ + memset(ctx->in, 0, 56); + } else { + /* Pad block to 56 bytes */ + memset(p, 0, count-8); + } + byteReverse(ctx->in, 14); + + /* Append length in bits and transform */ + memcpy(ctx->in + 14*4, ctx->bits, 8); + + MD5Transform(ctx->buf, (uint32 *)ctx->in); + byteReverse((unsigned char *)ctx->buf, 4); + memcpy(digest, ctx->buf, 16); +} + +/*************************************************************************/ +/*************************************************************************/ +/*************************************************************************/ + + +/* +** Both arguments are assumed to point to SQLITE_HCT_JOURNAL_HASHSIZE +** byte buffers. This function updates the hash stored in buffer pHash +** based on the contents of buffer pData. +*/ +SQLITE_API void sqlite3_hct_journal_hash(void *pHash, const void *pData){ + MD5Context ctx; + MD5Init(&ctx); + MD5Update(&ctx, pHash, SQLITE_HCT_JOURNAL_HASHSIZE); + MD5Update(&ctx, pData, SQLITE_HCT_JOURNAL_HASHSIZE); + MD5Final(pHash, &ctx); +} + +static void md5U64(MD5Context *pCtx, sqlite3_uint64 iVal){ + u8 aVal[8]; + aVal[0] = (iVal >> 56) & 0xFF; + aVal[1] = (iVal >> 48) & 0xFF; + aVal[2] = (iVal >> 40) & 0xFF; + aVal[3] = (iVal >> 32) & 0xFF; + aVal[4] = (iVal >> 24) & 0xFF; + aVal[5] = (iVal >> 16) & 0xFF; + aVal[6] = (iVal >> 8) & 0xFF; + aVal[7] = (iVal >> 0) & 0xFF; + MD5Update(pCtx, aVal, sizeof(aVal)); +} + +/* +** It is assumed that buffer pHash points to a buffer +** SQLITE_HCT_JOURNAL_HASHSIZE bytes in size. This function populates this +** buffer with a hash based on the remaining arguments. +*/ +SQLITE_API void sqlite3_hct_journal_hashentry( + void *pHash, /* OUT: Hash of other arguments */ + sqlite3_int64 iCid, + const char *zSchema, + const void *pData, int nData, + sqlite3_int64 iSchemaCid +){ + MD5Context ctx; + MD5Init(&ctx); + + md5U64(&ctx, (sqlite3_uint64)iCid); + MD5Update(&ctx, (const u8*)zSchema, sqlite3Strlen30(zSchema)); + MD5Update(&ctx, pData, nData); + md5U64(&ctx, (sqlite3_uint64)iSchemaCid); + + MD5Final(pHash, &ctx); +} + + +/************** End of hct_journalhash.c *************************************/ +/* Return the source-id for this library */ +SQLITE_API const char *sqlite3_sourceid(void){ return SQLITE_SOURCE_ID; } +#endif /* SQLITE_AMALGAMATION */ /************************** End of sqlite3.c ******************************/ diff --git a/libstuff/sqlite3.h b/libstuff/sqlite3.h index 9827d4007..12fe5fda2 100644 --- a/libstuff/sqlite3.h +++ b/libstuff/sqlite3.h @@ -146,9 +146,9 @@ extern "C" { ** [sqlite3_libversion_number()], [sqlite3_sourceid()], ** [sqlite_version()] and [sqlite_source_id()]. */ -#define SQLITE_VERSION "3.47.0" -#define SQLITE_VERSION_NUMBER 3047000 -#define SQLITE_SOURCE_ID "2024-12-20 19:37:41 b40cd7395c44b1f2d019d8e809e03de0e083c93693322a72ddb250a85640528f" +#define SQLITE_VERSION "3.48.0" +#define SQLITE_VERSION_NUMBER 3048000 +#define SQLITE_SOURCE_ID "2024-11-15 19:25:39 ed829bf2b069a48c644ae5706399dad7486e5abb87dc1225764038ac258ea4dc" /* ** CAPI3REF: Run-Time Library Version Numbers @@ -652,6 +652,13 @@ SQLITE_API int sqlite3_exec( ** filesystem supports doing multiple write operations atomically when those ** write operations are bracketed by [SQLITE_FCNTL_BEGIN_ATOMIC_WRITE] and ** [SQLITE_FCNTL_COMMIT_ATOMIC_WRITE]. +** +** The SQLITE_IOCAP_SUBPAGE_READ property means that it is ok to read +** from the database file in amounts that are not a multiple of the +** page size and that do not begin at a page boundary. Without this +** property, SQLite is careful to only do full-page reads and write +** on aligned pages, with the one exception that it will do a sub-page +** read of the first page to access the database header. */ #define SQLITE_IOCAP_ATOMIC 0x00000001 #define SQLITE_IOCAP_ATOMIC512 0x00000002 @@ -668,6 +675,7 @@ SQLITE_API int sqlite3_exec( #define SQLITE_IOCAP_POWERSAFE_OVERWRITE 0x00001000 #define SQLITE_IOCAP_IMMUTABLE 0x00002000 #define SQLITE_IOCAP_BATCH_ATOMIC 0x00004000 +#define SQLITE_IOCAP_SUBPAGE_READ 0x00008000 /* ** CAPI3REF: File Locking Levels @@ -814,6 +822,7 @@ struct sqlite3_file { **
  • [SQLITE_IOCAP_POWERSAFE_OVERWRITE] **
  • [SQLITE_IOCAP_IMMUTABLE] **
  • [SQLITE_IOCAP_BATCH_ATOMIC] +**
  • [SQLITE_IOCAP_SUBPAGE_READ] ** ** ** The SQLITE_IOCAP_ATOMIC property means that all writes of @@ -1091,6 +1100,11 @@ struct sqlite3_io_methods { ** pointed to by the pArg argument. This capability is used during testing ** and only needs to be supported when SQLITE_TEST is defined. ** +**
  • [[SQLITE_FCNTL_NULL_IO]] +** The [SQLITE_FCNTL_NULL_IO] opcode sets the low-level file descriptor +** or file handle for the [sqlite3_file] object such that it will no longer +** read or write to the database file. +** **
  • [[SQLITE_FCNTL_WAL_BLOCK]] ** The [SQLITE_FCNTL_WAL_BLOCK] is a signal to the VFS layer that it might ** be advantageous to block on the next WAL lock if the lock is not immediately @@ -1244,6 +1258,7 @@ struct sqlite3_io_methods { #define SQLITE_FCNTL_EXTERNAL_READER 40 #define SQLITE_FCNTL_CKSM_FILE 41 #define SQLITE_FCNTL_RESET_CACHE 42 +#define SQLITE_FCNTL_NULL_IO 43 /* deprecated names */ #define SQLITE_GET_LOCKPROXYFILE SQLITE_FCNTL_GET_LOCKPROXYFILE @@ -2622,10 +2637,14 @@ SQLITE_API void sqlite3_set_last_insert_rowid(sqlite3*,sqlite3_int64); ** deleted by the most recently completed INSERT, UPDATE or DELETE ** statement on the database connection specified by the only parameter. ** The two functions are identical except for the type of the return value -** and that if the number of rows modified by the most recent INSERT, UPDATE +** and that if the number of rows modified by the most recent INSERT, UPDATE, ** or DELETE is greater than the maximum value supported by type "int", then ** the return value of sqlite3_changes() is undefined. ^Executing any other ** type of SQL statement does not modify the value returned by these functions. +** For the purposes of this interface, a CREATE TABLE AS SELECT statement +** does not count as an INSERT, UPDATE or DELETE statement and hence the rows +** added to the new table by the CREATE TABLE AS SELECT statement are not +** counted. ** ** ^Only changes made directly by the INSERT, UPDATE or DELETE statement are ** considered - auxiliary changes caused by [CREATE TRIGGER | triggers], @@ -4222,13 +4241,17 @@ SQLITE_API int sqlite3_limit(sqlite3*, int id, int newVal); ** and sqlite3_prepare16_v3() use UTF-16. ** ** ^If the nByte argument is negative, then zSql is read up to the -** first zero terminator. ^If nByte is positive, then it is the -** number of bytes read from zSql. ^If nByte is zero, then no prepared +** first zero terminator. ^If nByte is positive, then it is the maximum +** number of bytes read from zSql. When nByte is positive, zSql is read +** up to the first zero terminator or until the nByte bytes have been read, +** whichever comes first. ^If nByte is zero, then no prepared ** statement is generated. ** If the caller knows that the supplied string is nul-terminated, then ** there is a small performance advantage to passing an nByte parameter that ** is the number of bytes in the input string including ** the nul-terminator. +** Note that nByte measure the length of the input in bytes, not +** characters, even for the UTF-16 interfaces. ** ** ^If pzTail is not NULL then *pzTail is made to point to the first byte ** past the end of the first SQL statement in zSql. These routines only @@ -5599,7 +5622,7 @@ SQLITE_API int sqlite3_create_window_function( ** This flag instructs SQLite to omit some corner-case optimizations that ** might disrupt the operation of the [sqlite3_value_subtype()] function, ** causing it to return zero rather than the correct subtype(). -** SQL functions that invokes [sqlite3_value_subtype()] should have this +** All SQL functions that invoke [sqlite3_value_subtype()] should have this ** property. If the SQLITE_SUBTYPE property is omitted, then the return ** value from [sqlite3_value_subtype()] might sometimes be zero even though ** a non-zero subtype was specified by the function argument expression. @@ -8364,8 +8387,9 @@ SQLITE_API int sqlite3_test_control(int op, ...); #define SQLITE_TESTCTRL_TRACEFLAGS 31 #define SQLITE_TESTCTRL_TUNE 32 #define SQLITE_TESTCTRL_LOGEST 33 -#define SQLITE_TESTCTRL_USELONGDOUBLE 34 -#define SQLITE_TESTCTRL_LAST 34 /* Largest TESTCTRL */ +#define SQLITE_TESTCTRL_USELONGDOUBLE 34 /* NOT USED */ +#define SQLITE_TESTCTRL_HCT_MTCOMMIT 35 +#define SQLITE_TESTCTRL_LAST 35 /* Largest TESTCTRL */ /* ** CAPI3REF: SQL Keyword Checking @@ -9340,6 +9364,16 @@ typedef struct sqlite3_backup sqlite3_backup; ** APIs are not strictly speaking threadsafe. If they are invoked at the ** same time as another thread is invoking sqlite3_backup_step() it is ** possible that they return invalid values. +** +** Alternatives To Using The Backup API +** +** Other techniques for safely creating a consistent backup of an SQLite +** database include: +** +**
      +**
    • The [VACUUM INTO] command. +**
    • The [sqlite3_rsync] utility program. +**
    */ SQLITE_API sqlite3_backup *sqlite3_backup_init( sqlite3 *pDest, /* Destination database handle */ @@ -10539,6 +10573,14 @@ typedef struct sqlite3_snapshot { ** If there is not already a read-transaction open on schema S when ** this function is called, one is opened automatically. ** +** If a read-transaction is opened by this function, then it is guaranteed +** that the returned snapshot object may not be invalidated by a database +** writer or checkpointer until after the read-transaction is closed. This +** is not guaranteed if a read-transaction is already open when this +** function is called. In that case, any subsequent write or checkpoint +** operation on the database may invalidate the returned snapshot handle, +** even while the read-transaction remains open. +** ** The following must be true for this function to succeed. If any of ** the following statements are false when sqlite3_snapshot_get() is ** called, SQLITE_ERROR is returned. The final value of *P is undefined @@ -10961,6 +11003,9 @@ SQLITE_API int sqlite3_commit_status( # undef double #endif +SQLITE_API void sqlite3_hct_cas_failure(int nCASFailCnt, int nCASFailReset); +SQLITE_API void sqlite3_hct_proc_failure(int nProcFailCnt); + #if defined(__wasi__) # undef SQLITE_WASI # define SQLITE_WASI 1 @@ -10975,7 +11020,7 @@ SQLITE_API int sqlite3_commit_status( #ifdef __cplusplus } /* End of the 'extern "C"' block */ #endif -#endif /* SQLITE3_H */ +/* #endif for SQLITE3_H will be added by mksqlite3.tcl */ /******** Begin file sqlite3rtree.h *********/ /* @@ -13354,7 +13399,6 @@ struct Fts5ExtensionApi { ** Applications may also register custom tokenizer types. A tokenizer ** is registered by providing fts5 with a populated instance of the ** following structure. All structure methods must be defined, setting -** ** any member of the fts5_tokenizer struct to NULL leads to undefined ** behaviour. The structure methods are expected to function as follows: ** @@ -13698,3 +13742,4 @@ struct fts5_api { #endif /* _FTS5_H */ /******** End of fts5.h *********/ +#endif /* SQLITE3_H */ diff --git a/test/clustertest/BedrockClusterTester.h b/test/clustertest/BedrockClusterTester.h index fb8dbe541..aa3754d79 100644 --- a/test/clustertest/BedrockClusterTester.h +++ b/test/clustertest/BedrockClusterTester.h @@ -195,7 +195,7 @@ ClusterTester::~ClusterTester() // Shut down everything but the leader first. list threads; - for (int i = 0; i< _size; i++) { + for (int i = 0; i < _size; i++) { // Wait for all the commands to be done before stopping. // If we don't do this, then if the leader loses quorum, because the other nodes have all shut down, // it won't run remaining commands, and it will get stuck forever. @@ -216,7 +216,9 @@ ClusterTester::~ClusterTester() } // Then do leader last. This is to avoid getting in a state where nodes try to stand up as leader shuts down. + cout << "Stopping leader." << endl; stopNode(0); + cout << "Stopped leader." << endl; auto end = STimeNow(); diff --git a/test/lib/BedrockTester.cpp b/test/lib/BedrockTester.cpp index c112f69a1..d8f1153b8 100644 --- a/test/lib/BedrockTester.cpp +++ b/test/lib/BedrockTester.cpp @@ -17,7 +17,7 @@ PortMap BedrockTester::ports; mutex BedrockTester::_testersMutex; set BedrockTester::_testers; -const bool BedrockTester::ENABLE_HCTREE{false}; +const bool BedrockTester::ENABLE_HCTREE{true}; string BedrockTester::getTempFileName(string prefix) { string templateStr = "/tmp/" + prefix + "bedrocktest_XXXXXX.db"; @@ -576,7 +576,7 @@ bool BedrockTester::readDB(const string& query, SQResult& result, bool online) SData command("Query"); command["Query"] = fixedQuery; command["Format"] = "JSON"; - auto commandResult = executeWaitMultipleData({command}); + auto commandResult = executeWaitMultipleData({command}, 1); auto row0 = SParseJSONObject(commandResult[0].content)["rows"]; auto headerString = SParseJSONObject(commandResult[0].content)["headers"]; diff --git a/test/lib/BedrockTester.h b/test/lib/BedrockTester.h index 9fb31f111..0d3680fc3 100644 --- a/test/lib/BedrockTester.h +++ b/test/lib/BedrockTester.h @@ -43,7 +43,7 @@ class BedrockTester { string startServer(bool wait = true); // Stop a server by sending it a signal. - void stopServer(int signal = SIGTERM); + virtual void stopServer(int signal = SIGTERM); // Shuts down all bedrock servers associated with any existing testers. static void stopAll(); From 9555940c18a82c438271279205fbc0f03b6c5887 Mon Sep 17 00:00:00 2001 From: Tyler Karaszewski Date: Mon, 6 Jan 2025 12:15:18 -0800 Subject: [PATCH 117/127] Add check for LEADING or FOLLOWING --- test/lib/BedrockTester.cpp | 18 ++++++++++++++++++ test/lib/BedrockTester.h | 3 +++ 2 files changed, 21 insertions(+) diff --git a/test/lib/BedrockTester.cpp b/test/lib/BedrockTester.cpp index d8f1153b8..424c2e0e0 100644 --- a/test/lib/BedrockTester.cpp +++ b/test/lib/BedrockTester.cpp @@ -622,6 +622,24 @@ bool BedrockTester::waitForStatusTerm(const string& term, const string& testValu return false; } +bool BedrockTester::waitForLeadingFollowing(uint64_t timeoutUS) { + uint64_t start = STimeNow(); + while (STimeNow() < start + timeoutUS) { + try { + string result = SParseJSONObject(BedrockTester::executeWaitVerifyContent(SData("Status"), "200", true))["state"]; + + // if the value matches, return, otherwise wait + if (result == "LEADING" || result == "FOLLOWING") { + return true; + } + } catch (...) { + // Doesn't do anything, we'll fall through to the sleep and try again. + } + usleep(100'000); + } + return false; +} + bool BedrockTester::waitForState(const string& state, uint64_t timeoutUS) { return waitForStatusTerm("state", state, timeoutUS); diff --git a/test/lib/BedrockTester.h b/test/lib/BedrockTester.h index 0d3680fc3..6aa005954 100644 --- a/test/lib/BedrockTester.h +++ b/test/lib/BedrockTester.h @@ -89,6 +89,9 @@ class BedrockTester { // true if a match was found, or times out otherwose. bool waitForStatusTerm(const string& term, const string& testValue, uint64_t timeoutUS = 60'000'000); + // Waits for the status to be either LEADING or FOLLOWING + bool waitForLeadingFollowing(uint64_t timeoutUS = 60'000'000); + // This is just a convenience wrapper around `waitForStatusTerm` looking for the state of the node. bool waitForState(const string& state, uint64_t timeoutUS = 60'000'000); From f635d16f31f7a3456a77e8988f717ce5721e6e90 Mon Sep 17 00:00:00 2001 From: Tyler Karaszewski Date: Mon, 6 Jan 2025 12:27:30 -0800 Subject: [PATCH 118/127] Remove test code --- test/clustertest/BedrockClusterTester.h | 2 -- 1 file changed, 2 deletions(-) diff --git a/test/clustertest/BedrockClusterTester.h b/test/clustertest/BedrockClusterTester.h index aa3754d79..17c68bfb3 100644 --- a/test/clustertest/BedrockClusterTester.h +++ b/test/clustertest/BedrockClusterTester.h @@ -216,9 +216,7 @@ ClusterTester::~ClusterTester() } // Then do leader last. This is to avoid getting in a state where nodes try to stand up as leader shuts down. - cout << "Stopping leader." << endl; stopNode(0); - cout << "Stopped leader." << endl; auto end = STimeNow(); From 273f4d2795a5637941bd92192ac309b6d37a9311 Mon Sep 17 00:00:00 2001 From: Tyler Karaszewski Date: Mon, 6 Jan 2025 12:29:16 -0800 Subject: [PATCH 119/127] Undo HC-Tree --- libstuff/sqlite3.c | 28263 ++++------------------------------- libstuff/sqlite3.h | 67 +- test/lib/BedrockTester.cpp | 2 +- 3 files changed, 3032 insertions(+), 25300 deletions(-) diff --git a/libstuff/sqlite3.c b/libstuff/sqlite3.c index 64b8eca84..33034cd19 100644 --- a/libstuff/sqlite3.c +++ b/libstuff/sqlite3.c @@ -1,6 +1,6 @@ /****************************************************************************** ** This file is an amalgamation of many separate C source files from SQLite -** version 3.48.0. By combining all the individual C code files into this +** version 3.47.0. By combining all the individual C code files into this ** single large file, the entire code can be compiled as a single translation ** unit. This allows many compilers to do optimizations that would not be ** possible if the files were compiled separately. Performance improvements @@ -18,11 +18,8 @@ ** separate file. This file contains only code for the core SQLite library. ** ** The content in this amalgamation comes from Fossil check-in -** ed829bf2b069a48c644ae5706399dad7486e with changes in files: -** -** +** b40cd7395c44b1f2d019d8e809e03de0e083. */ -#ifndef SQLITE_AMALGAMATION #define SQLITE_CORE 1 #define SQLITE_AMALGAMATION 1 #ifndef SQLITE_PRIVATE @@ -82,17 +79,6 @@ # define SQLITE_TCLAPI #endif -#define SQLITE_ENABLE_HCT 1 -#ifdef SQLITE_ENABLE_HCT -# define SQLITE_OMIT_SHARED_CACHE 1 -# define SQLITE_ENABLE_PREUPDATE_HOOK 1 -#endif - -#ifndef SQLITE_DISABLE_PAGECACHE_OVERFLOW_STATS -# define SQLITE_DISABLE_PAGECACHE_OVERFLOW_STATS 1 -#endif -#define SQLITE_SHARED_MAPPING 1 - /* ** Include the header file used to customize the compiler options for MSVC. ** This should be done first so that it can successfully prevent spurious @@ -477,9 +463,9 @@ extern "C" { ** [sqlite3_libversion_number()], [sqlite3_sourceid()], ** [sqlite_version()] and [sqlite_source_id()]. */ -#define SQLITE_VERSION "3.48.0" -#define SQLITE_VERSION_NUMBER 3048000 -#define SQLITE_SOURCE_ID "2024-11-15 19:25:39 ed829bf2b069a48c644ae5706399dad7486e5abb87dc1225764038ac258ea4dc" +#define SQLITE_VERSION "3.47.0" +#define SQLITE_VERSION_NUMBER 3047000 +#define SQLITE_SOURCE_ID "2024-12-20 19:37:41 b40cd7395c44b1f2d019d8e809e03de0e083c93693322a72ddb250a85640528f" /* ** CAPI3REF: Run-Time Library Version Numbers @@ -983,13 +969,6 @@ SQLITE_API int sqlite3_exec( ** filesystem supports doing multiple write operations atomically when those ** write operations are bracketed by [SQLITE_FCNTL_BEGIN_ATOMIC_WRITE] and ** [SQLITE_FCNTL_COMMIT_ATOMIC_WRITE]. -** -** The SQLITE_IOCAP_SUBPAGE_READ property means that it is ok to read -** from the database file in amounts that are not a multiple of the -** page size and that do not begin at a page boundary. Without this -** property, SQLite is careful to only do full-page reads and write -** on aligned pages, with the one exception that it will do a sub-page -** read of the first page to access the database header. */ #define SQLITE_IOCAP_ATOMIC 0x00000001 #define SQLITE_IOCAP_ATOMIC512 0x00000002 @@ -1006,7 +985,6 @@ SQLITE_API int sqlite3_exec( #define SQLITE_IOCAP_POWERSAFE_OVERWRITE 0x00001000 #define SQLITE_IOCAP_IMMUTABLE 0x00002000 #define SQLITE_IOCAP_BATCH_ATOMIC 0x00004000 -#define SQLITE_IOCAP_SUBPAGE_READ 0x00008000 /* ** CAPI3REF: File Locking Levels @@ -1153,7 +1131,6 @@ struct sqlite3_file { **
  • [SQLITE_IOCAP_POWERSAFE_OVERWRITE] **
  • [SQLITE_IOCAP_IMMUTABLE] **
  • [SQLITE_IOCAP_BATCH_ATOMIC] -**
  • [SQLITE_IOCAP_SUBPAGE_READ] ** ** ** The SQLITE_IOCAP_ATOMIC property means that all writes of @@ -1431,11 +1408,6 @@ struct sqlite3_io_methods { ** pointed to by the pArg argument. This capability is used during testing ** and only needs to be supported when SQLITE_TEST is defined. ** -**
  • [[SQLITE_FCNTL_NULL_IO]] -** The [SQLITE_FCNTL_NULL_IO] opcode sets the low-level file descriptor -** or file handle for the [sqlite3_file] object such that it will no longer -** read or write to the database file. -** **
  • [[SQLITE_FCNTL_WAL_BLOCK]] ** The [SQLITE_FCNTL_WAL_BLOCK] is a signal to the VFS layer that it might ** be advantageous to block on the next WAL lock if the lock is not immediately @@ -1589,7 +1561,6 @@ struct sqlite3_io_methods { #define SQLITE_FCNTL_EXTERNAL_READER 40 #define SQLITE_FCNTL_CKSM_FILE 41 #define SQLITE_FCNTL_RESET_CACHE 42 -#define SQLITE_FCNTL_NULL_IO 43 /* deprecated names */ #define SQLITE_GET_LOCKPROXYFILE SQLITE_FCNTL_GET_LOCKPROXYFILE @@ -2968,14 +2939,10 @@ SQLITE_API void sqlite3_set_last_insert_rowid(sqlite3*,sqlite3_int64); ** deleted by the most recently completed INSERT, UPDATE or DELETE ** statement on the database connection specified by the only parameter. ** The two functions are identical except for the type of the return value -** and that if the number of rows modified by the most recent INSERT, UPDATE, +** and that if the number of rows modified by the most recent INSERT, UPDATE ** or DELETE is greater than the maximum value supported by type "int", then ** the return value of sqlite3_changes() is undefined. ^Executing any other ** type of SQL statement does not modify the value returned by these functions. -** For the purposes of this interface, a CREATE TABLE AS SELECT statement -** does not count as an INSERT, UPDATE or DELETE statement and hence the rows -** added to the new table by the CREATE TABLE AS SELECT statement are not -** counted. ** ** ^Only changes made directly by the INSERT, UPDATE or DELETE statement are ** considered - auxiliary changes caused by [CREATE TRIGGER | triggers], @@ -4572,17 +4539,13 @@ SQLITE_API int sqlite3_limit(sqlite3*, int id, int newVal); ** and sqlite3_prepare16_v3() use UTF-16. ** ** ^If the nByte argument is negative, then zSql is read up to the -** first zero terminator. ^If nByte is positive, then it is the maximum -** number of bytes read from zSql. When nByte is positive, zSql is read -** up to the first zero terminator or until the nByte bytes have been read, -** whichever comes first. ^If nByte is zero, then no prepared +** first zero terminator. ^If nByte is positive, then it is the +** number of bytes read from zSql. ^If nByte is zero, then no prepared ** statement is generated. ** If the caller knows that the supplied string is nul-terminated, then ** there is a small performance advantage to passing an nByte parameter that ** is the number of bytes in the input string including ** the nul-terminator. -** Note that nByte measure the length of the input in bytes, not -** characters, even for the UTF-16 interfaces. ** ** ^If pzTail is not NULL then *pzTail is made to point to the first byte ** past the end of the first SQL statement in zSql. These routines only @@ -5953,7 +5916,7 @@ SQLITE_API int sqlite3_create_window_function( ** This flag instructs SQLite to omit some corner-case optimizations that ** might disrupt the operation of the [sqlite3_value_subtype()] function, ** causing it to return zero rather than the correct subtype(). -** All SQL functions that invoke [sqlite3_value_subtype()] should have this +** SQL functions that invokes [sqlite3_value_subtype()] should have this ** property. If the SQLITE_SUBTYPE property is omitted, then the return ** value from [sqlite3_value_subtype()] might sometimes be zero even though ** a non-zero subtype was specified by the function argument expression. @@ -8718,9 +8681,8 @@ SQLITE_API int sqlite3_test_control(int op, ...); #define SQLITE_TESTCTRL_TRACEFLAGS 31 #define SQLITE_TESTCTRL_TUNE 32 #define SQLITE_TESTCTRL_LOGEST 33 -#define SQLITE_TESTCTRL_USELONGDOUBLE 34 /* NOT USED */ -#define SQLITE_TESTCTRL_HCT_MTCOMMIT 35 -#define SQLITE_TESTCTRL_LAST 35 /* Largest TESTCTRL */ +#define SQLITE_TESTCTRL_USELONGDOUBLE 34 +#define SQLITE_TESTCTRL_LAST 34 /* Largest TESTCTRL */ /* ** CAPI3REF: SQL Keyword Checking @@ -9695,16 +9657,6 @@ typedef struct sqlite3_backup sqlite3_backup; ** APIs are not strictly speaking threadsafe. If they are invoked at the ** same time as another thread is invoking sqlite3_backup_step() it is ** possible that they return invalid values. -** -** Alternatives To Using The Backup API -** -** Other techniques for safely creating a consistent backup of an SQLite -** database include: -** -**
      -**
    • The [VACUUM INTO] command. -**
    • The [sqlite3_rsync] utility program. -**
    */ SQLITE_API sqlite3_backup *sqlite3_backup_init( sqlite3 *pDest, /* Destination database handle */ @@ -10904,14 +10856,6 @@ typedef struct sqlite3_snapshot { ** If there is not already a read-transaction open on schema S when ** this function is called, one is opened automatically. ** -** If a read-transaction is opened by this function, then it is guaranteed -** that the returned snapshot object may not be invalidated by a database -** writer or checkpointer until after the read-transaction is closed. This -** is not guaranteed if a read-transaction is already open when this -** function is called. In that case, any subsequent write or checkpoint -** operation on the database may invalidate the returned snapshot handle, -** even while the read-transaction remains open. -** ** The following must be true for this function to succeed. If any of ** the following statements are false when sqlite3_snapshot_get() is ** called, SQLITE_ERROR is returned. The final value of *P is undefined @@ -11334,9 +11278,6 @@ SQLITE_API int sqlite3_commit_status( # undef double #endif -SQLITE_API void sqlite3_hct_cas_failure(int nCASFailCnt, int nCASFailReset); -SQLITE_API void sqlite3_hct_proc_failure(int nProcFailCnt); - #if defined(__wasi__) # undef SQLITE_WASI # define SQLITE_WASI 1 @@ -11351,7 +11292,7 @@ SQLITE_API void sqlite3_hct_proc_failure(int nProcFailCnt); #if 0 } /* End of the 'extern "C"' block */ #endif -/* #endif for SQLITE3_H will be added by mksqlite3.tcl */ +#endif /* SQLITE3_H */ /******** Begin file sqlite3rtree.h *********/ /* @@ -13730,6 +13671,7 @@ struct Fts5ExtensionApi { ** Applications may also register custom tokenizer types. A tokenizer ** is registered by providing fts5 with a populated instance of the ** following structure. All structure methods must be defined, setting +** ** any member of the fts5_tokenizer struct to NULL leads to undefined ** behaviour. The structure methods are expected to function as follows: ** @@ -14073,7 +14015,6 @@ struct fts5_api { #endif /* _FTS5_H */ /******** End of fts5.h *********/ -#endif /* SQLITE3_H */ /************** End of sqlite3.h *********************************************/ /************** Continuing where we left off in sqliteInt.h ******************/ @@ -14119,7 +14060,6 @@ struct fts5_api { #ifndef SQLITE_MAX_LENGTH # define SQLITE_MAX_LENGTH 1000000000 #endif -#define SQLITE_MIN_LENGTH 30 /* Minimum value for the length limit */ /* ** This is the maximum number of @@ -14185,9 +14125,13 @@ struct fts5_api { /* ** The maximum number of arguments to an SQL function. +** +** This value has a hard upper limit of 32767 due to storage +** constraints (it needs to fit inside a i16). We keep it +** lower than that to prevent abuse. */ #ifndef SQLITE_MAX_FUNCTION_ARG -# define SQLITE_MAX_FUNCTION_ARG 127 +# define SQLITE_MAX_FUNCTION_ARG 1000 #endif /* @@ -14474,7 +14418,7 @@ struct fts5_api { ** which case memory allocation statistics are disabled by default. */ #if !defined(SQLITE_DEFAULT_MEMSTATUS) -# define SQLITE_DEFAULT_MEMSTATUS 0 +# define SQLITE_DEFAULT_MEMSTATUS 1 #endif /* @@ -14887,122 +14831,122 @@ SQLITE_PRIVATE void sqlite3HashClear(Hash*); #define TK_GE 59 #define TK_ESCAPE 60 #define TK_COLUMNKW 61 -#define TK_CONCURRENT 62 -#define TK_DO 63 -#define TK_FOR 64 -#define TK_IGNORE 65 -#define TK_INITIALLY 66 -#define TK_INSTEAD 67 -#define TK_NO 68 -#define TK_KEY 69 -#define TK_OF 70 -#define TK_OFFSET 71 -#define TK_PRAGMA 72 -#define TK_RAISE 73 -#define TK_RECURSIVE 74 -#define TK_REPLACE 75 -#define TK_RESTRICT 76 -#define TK_ROW 77 -#define TK_ROWS 78 -#define TK_TRIGGER 79 -#define TK_VACUUM 80 -#define TK_VIEW 81 -#define TK_VIRTUAL 82 -#define TK_WITH 83 -#define TK_NULLS 84 -#define TK_FIRST 85 -#define TK_LAST 86 -#define TK_CURRENT 87 -#define TK_FOLLOWING 88 -#define TK_PARTITION 89 -#define TK_PRECEDING 90 -#define TK_RANGE 91 -#define TK_UNBOUNDED 92 -#define TK_EXCLUDE 93 -#define TK_GROUPS 94 -#define TK_OTHERS 95 -#define TK_TIES 96 -#define TK_GENERATED 97 -#define TK_ALWAYS 98 -#define TK_MATERIALIZED 99 -#define TK_REINDEX 100 -#define TK_RENAME 101 -#define TK_CTIME_KW 102 -#define TK_ANY 103 -#define TK_BITAND 104 -#define TK_BITOR 105 -#define TK_LSHIFT 106 -#define TK_RSHIFT 107 -#define TK_PLUS 108 -#define TK_MINUS 109 -#define TK_STAR 110 -#define TK_SLASH 111 -#define TK_REM 112 -#define TK_CONCAT 113 -#define TK_PTR 114 -#define TK_COLLATE 115 -#define TK_BITNOT 116 -#define TK_ON 117 -#define TK_INDEXED 118 -#define TK_STRING 119 -#define TK_JOIN_KW 120 -#define TK_CONSTRAINT 121 -#define TK_DEFAULT 122 -#define TK_NULL 123 -#define TK_PRIMARY 124 -#define TK_UNIQUE 125 -#define TK_CHECK 126 -#define TK_REFERENCES 127 -#define TK_AUTOINCR 128 -#define TK_INSERT 129 -#define TK_DELETE 130 -#define TK_UPDATE 131 -#define TK_SET 132 -#define TK_DEFERRABLE 133 -#define TK_FOREIGN 134 -#define TK_DROP 135 -#define TK_UNION 136 -#define TK_ALL 137 -#define TK_EXCEPT 138 -#define TK_INTERSECT 139 -#define TK_SELECT 140 -#define TK_VALUES 141 -#define TK_DISTINCT 142 -#define TK_DOT 143 -#define TK_FROM 144 -#define TK_JOIN 145 -#define TK_USING 146 -#define TK_ORDER 147 -#define TK_GROUP 148 -#define TK_HAVING 149 -#define TK_LIMIT 150 -#define TK_WHERE 151 -#define TK_RETURNING 152 -#define TK_INTO 153 -#define TK_NOTHING 154 -#define TK_FLOAT 155 -#define TK_BLOB 156 -#define TK_INTEGER 157 -#define TK_VARIABLE 158 -#define TK_CASE 159 -#define TK_WHEN 160 -#define TK_THEN 161 -#define TK_ELSE 162 -#define TK_INDEX 163 -#define TK_ALTER 164 -#define TK_ADD 165 -#define TK_WINDOW 166 -#define TK_OVER 167 -#define TK_FILTER 168 -#define TK_COLUMN 169 -#define TK_AGG_FUNCTION 170 -#define TK_AGG_COLUMN 171 -#define TK_TRUEFALSE 172 -#define TK_FUNCTION 173 -#define TK_UPLUS 174 -#define TK_UMINUS 175 -#define TK_TRUTH 176 -#define TK_REGISTER 177 +#define TK_DO 62 +#define TK_FOR 63 +#define TK_IGNORE 64 +#define TK_INITIALLY 65 +#define TK_INSTEAD 66 +#define TK_NO 67 +#define TK_KEY 68 +#define TK_OF 69 +#define TK_OFFSET 70 +#define TK_PRAGMA 71 +#define TK_RAISE 72 +#define TK_RECURSIVE 73 +#define TK_REPLACE 74 +#define TK_RESTRICT 75 +#define TK_ROW 76 +#define TK_ROWS 77 +#define TK_TRIGGER 78 +#define TK_VACUUM 79 +#define TK_VIEW 80 +#define TK_VIRTUAL 81 +#define TK_WITH 82 +#define TK_NULLS 83 +#define TK_FIRST 84 +#define TK_LAST 85 +#define TK_CURRENT 86 +#define TK_FOLLOWING 87 +#define TK_PARTITION 88 +#define TK_PRECEDING 89 +#define TK_RANGE 90 +#define TK_UNBOUNDED 91 +#define TK_EXCLUDE 92 +#define TK_GROUPS 93 +#define TK_OTHERS 94 +#define TK_TIES 95 +#define TK_GENERATED 96 +#define TK_ALWAYS 97 +#define TK_MATERIALIZED 98 +#define TK_REINDEX 99 +#define TK_RENAME 100 +#define TK_CTIME_KW 101 +#define TK_ANY 102 +#define TK_BITAND 103 +#define TK_BITOR 104 +#define TK_LSHIFT 105 +#define TK_RSHIFT 106 +#define TK_PLUS 107 +#define TK_MINUS 108 +#define TK_STAR 109 +#define TK_SLASH 110 +#define TK_REM 111 +#define TK_CONCAT 112 +#define TK_PTR 113 +#define TK_COLLATE 114 +#define TK_BITNOT 115 +#define TK_ON 116 +#define TK_INDEXED 117 +#define TK_STRING 118 +#define TK_JOIN_KW 119 +#define TK_CONSTRAINT 120 +#define TK_DEFAULT 121 +#define TK_NULL 122 +#define TK_PRIMARY 123 +#define TK_UNIQUE 124 +#define TK_CHECK 125 +#define TK_REFERENCES 126 +#define TK_AUTOINCR 127 +#define TK_INSERT 128 +#define TK_DELETE 129 +#define TK_UPDATE 130 +#define TK_SET 131 +#define TK_DEFERRABLE 132 +#define TK_FOREIGN 133 +#define TK_DROP 134 +#define TK_UNION 135 +#define TK_ALL 136 +#define TK_EXCEPT 137 +#define TK_INTERSECT 138 +#define TK_SELECT 139 +#define TK_VALUES 140 +#define TK_DISTINCT 141 +#define TK_DOT 142 +#define TK_FROM 143 +#define TK_JOIN 144 +#define TK_USING 145 +#define TK_ORDER 146 +#define TK_GROUP 147 +#define TK_HAVING 148 +#define TK_LIMIT 149 +#define TK_WHERE 150 +#define TK_RETURNING 151 +#define TK_INTO 152 +#define TK_NOTHING 153 +#define TK_FLOAT 154 +#define TK_BLOB 155 +#define TK_INTEGER 156 +#define TK_VARIABLE 157 +#define TK_CASE 158 +#define TK_WHEN 159 +#define TK_THEN 160 +#define TK_ELSE 161 +#define TK_INDEX 162 +#define TK_ALTER 163 +#define TK_ADD 164 +#define TK_WINDOW 165 +#define TK_OVER 166 +#define TK_FILTER 167 +#define TK_COLUMN 168 +#define TK_AGG_FUNCTION 169 +#define TK_AGG_COLUMN 170 +#define TK_TRUEFALSE 171 +#define TK_FUNCTION 172 +#define TK_UPLUS 173 +#define TK_UMINUS 174 +#define TK_TRUTH 175 +#define TK_REGISTER 176 +#define TK_CONCURRENT 177 #define TK_VECTOR 178 #define TK_SELECT_COLUMN 179 #define TK_IF_NULL_ROW 180 @@ -15020,7 +14964,6 @@ SQLITE_PRIVATE void sqlite3HashClear(Hash*); #include #include #include -#include /* ** Use a macro to replace memcpy() if compiled with SQLITE_INLINE_MEMCPY. @@ -15043,6 +14986,7 @@ SQLITE_PRIVATE void sqlite3HashClear(Hash*); # define float sqlite_int64 # define fabs(X) ((X)<0?-(X):(X)) # define sqlite3IsOverflow(X) 0 +# define LONGDOUBLE_TYPE sqlite_int64 # ifndef SQLITE_BIG_DBL # define SQLITE_BIG_DBL (((sqlite3_int64)1)<<50) # endif @@ -15217,6 +15161,9 @@ SQLITE_PRIVATE void sqlite3HashClear(Hash*); # define INT8_TYPE signed char # endif #endif +#ifndef LONGDOUBLE_TYPE +# define LONGDOUBLE_TYPE long double +#endif typedef sqlite_int64 i64; /* 8-byte signed integer */ typedef sqlite_uint64 u64; /* 8-byte unsigned integer */ typedef UINT32_TYPE u32; /* 4-byte unsigned integer */ @@ -16388,6 +16335,8 @@ SQLITE_PRIVATE void sqlite3PagerRefdump(Pager*); SQLITE_PRIVATE int sqlite3PagerWalSystemErrno(Pager*); #endif +SQLITE_PRIVATE void sqlite3PagerSetCommitTime(Pager *pPager, u64 *aCommitTime); + #endif /* SQLITE_PAGER_H */ /************** End of pager.h ***********************************************/ @@ -16429,9 +16378,6 @@ SQLITE_PRIVATE int sqlite3PagerWalSystemErrno(Pager*); #define BTREE_AUTOVACUUM_FULL 1 /* Do full auto-vacuum */ #define BTREE_AUTOVACUUM_INCR 2 /* Incremental vacuum */ -typedef struct BtCursorMethods BtCursorMethods; -typedef struct BtreeMethods BtreeMethods; - /* ** Forward declarations of structure */ @@ -16639,9 +16585,6 @@ SQLITE_PRIVATE int sqlite3BtreeCursor( ); SQLITE_PRIVATE BtCursor *sqlite3BtreeFakeValidCursor(void); SQLITE_PRIVATE int sqlite3BtreeCursorSize(void); -#ifdef SQLITE_DEBUG -SQLITE_PRIVATE int sqlite3BtreeClosesWithCursor(Btree*,BtCursor*); -#endif SQLITE_PRIVATE void sqlite3BtreeCursorZero(BtCursor*); SQLITE_PRIVATE void sqlite3BtreeCursorHintFlags(BtCursor*, unsigned); #ifdef SQLITE_ENABLE_CURSOR_HINTS @@ -16820,21 +16763,6 @@ SQLITE_PRIVATE int sqlite3SchemaMutexHeld(sqlite3*,int,Schema*); # define sqlite3BtreeHoldsAllMutexes(X) 1 # define sqlite3SchemaMutexHeld(X,Y,Z) 1 #endif -#define BTREE_DIR_NONE 0 -#define BTREE_DIR_FORWARD 1 -#define BTREE_DIR_REVERSE 2 - -#ifdef SQLITE_ENABLE_HCT -SQLITE_PRIVATE void sqlite3BtreeCursorDir(BtCursor*, int eDir); -SQLITE_PRIVATE int sqlite3HctVtabInit(sqlite3*); -SQLITE_PRIVATE int sqlite3BtreeSchemaLoaded(Btree *pBt); -#else -# define sqlite3BtreeCursorDir(a,b) -# define sqlite3BtreeSchemaLoaded(x) SQLITE_OK -#endif - -SQLITE_PRIVATE int sqlite3BtreePragma(Btree *pBtree, char **aFnctl); -SQLITE_PRIVATE int sqlite3BtreeIdxDelete(BtCursor*, UnpackedRecord*); #endif /* SQLITE_BTREE_H */ @@ -17130,23 +17058,23 @@ typedef struct VdbeOpList VdbeOpList; #define OP_SetCookie 100 #define OP_ReopenIdx 101 /* synopsis: root=P2 iDb=P3 */ #define OP_OpenRead 102 /* synopsis: root=P2 iDb=P3 */ -#define OP_OpenWrite 103 /* synopsis: root=P2 iDb=P3 */ -#define OP_BitAnd 104 /* same as TK_BITAND, synopsis: r[P3]=r[P1]&r[P2] */ -#define OP_BitOr 105 /* same as TK_BITOR, synopsis: r[P3]=r[P1]|r[P2] */ -#define OP_ShiftLeft 106 /* same as TK_LSHIFT, synopsis: r[P3]=r[P2]<>r[P1] */ -#define OP_Add 108 /* same as TK_PLUS, synopsis: r[P3]=r[P1]+r[P2] */ -#define OP_Subtract 109 /* same as TK_MINUS, synopsis: r[P3]=r[P2]-r[P1] */ -#define OP_Multiply 110 /* same as TK_STAR, synopsis: r[P3]=r[P1]*r[P2] */ -#define OP_Divide 111 /* same as TK_SLASH, synopsis: r[P3]=r[P2]/r[P1] */ -#define OP_Remainder 112 /* same as TK_REM, synopsis: r[P3]=r[P2]%r[P1] */ -#define OP_Concat 113 /* same as TK_CONCAT, synopsis: r[P3]=r[P2]+r[P1] */ +#define OP_BitAnd 103 /* same as TK_BITAND, synopsis: r[P3]=r[P1]&r[P2] */ +#define OP_BitOr 104 /* same as TK_BITOR, synopsis: r[P3]=r[P1]|r[P2] */ +#define OP_ShiftLeft 105 /* same as TK_LSHIFT, synopsis: r[P3]=r[P2]<>r[P1] */ +#define OP_Add 107 /* same as TK_PLUS, synopsis: r[P3]=r[P1]+r[P2] */ +#define OP_Subtract 108 /* same as TK_MINUS, synopsis: r[P3]=r[P2]-r[P1] */ +#define OP_Multiply 109 /* same as TK_STAR, synopsis: r[P3]=r[P1]*r[P2] */ +#define OP_Divide 110 /* same as TK_SLASH, synopsis: r[P3]=r[P2]/r[P1] */ +#define OP_Remainder 111 /* same as TK_REM, synopsis: r[P3]=r[P2]%r[P1] */ +#define OP_Concat 112 /* same as TK_CONCAT, synopsis: r[P3]=r[P2]+r[P1] */ +#define OP_OpenWrite 113 /* synopsis: root=P2 iDb=P3 */ #define OP_OpenDup 114 -#define OP_OpenAutoindex 115 /* synopsis: nColumn=P2 */ -#define OP_BitNot 116 /* same as TK_BITNOT, synopsis: r[P2]= ~r[P1] */ +#define OP_BitNot 115 /* same as TK_BITNOT, synopsis: r[P2]= ~r[P1] */ +#define OP_OpenAutoindex 116 /* synopsis: nColumn=P2 */ #define OP_OpenEphemeral 117 /* synopsis: nColumn=P2 */ -#define OP_SorterOpen 118 -#define OP_String8 119 /* same as TK_STRING, synopsis: r[P2]='P4' */ +#define OP_String8 118 /* same as TK_STRING, synopsis: r[P2]='P4' */ +#define OP_SorterOpen 119 #define OP_SequenceTest 120 /* synopsis: if( cursor[P1].ctr++ ) pc = P2 */ #define OP_OpenPseudo 121 /* synopsis: P3 columns in r[P2] */ #define OP_Close 122 @@ -17181,8 +17109,8 @@ typedef struct VdbeOpList VdbeOpList; #define OP_DropTable 151 #define OP_DropIndex 152 #define OP_DropTrigger 153 -#define OP_IntegrityCk 154 -#define OP_Real 155 /* same as TK_FLOAT, synopsis: r[P2]=P4 */ +#define OP_Real 154 /* same as TK_FLOAT, synopsis: r[P2]=P4 */ +#define OP_IntegrityCk 155 #define OP_RowSetAdd 156 /* synopsis: rowset(P1)=r[P2] */ #define OP_Param 157 #define OP_FkCounter 158 /* synopsis: fkctr[P1]+=P2 */ @@ -17243,14 +17171,14 @@ typedef struct VdbeOpList VdbeOpList; /* 72 */ 0x10, 0x10, 0x00, 0x10, 0x00, 0x10, 0x10, 0x00,\ /* 80 */ 0x00, 0x10, 0x10, 0x00, 0x00, 0x00, 0x02, 0x02,\ /* 88 */ 0x02, 0x00, 0x00, 0x12, 0x1e, 0x20, 0x40, 0x00,\ -/* 96 */ 0x00, 0x00, 0x10, 0x10, 0x00, 0x40, 0x40, 0x00,\ +/* 96 */ 0x00, 0x00, 0x10, 0x10, 0x00, 0x40, 0x40, 0x26,\ /* 104 */ 0x26, 0x26, 0x26, 0x26, 0x26, 0x26, 0x26, 0x26,\ -/* 112 */ 0x26, 0x26, 0x40, 0x40, 0x12, 0x40, 0x00, 0x10,\ +/* 112 */ 0x26, 0x00, 0x40, 0x12, 0x40, 0x40, 0x10, 0x00,\ /* 120 */ 0x00, 0x00, 0x40, 0x00, 0x40, 0x40, 0x10, 0x10,\ /* 128 */ 0x00, 0x00, 0x00, 0x00, 0x00, 0x40, 0x00, 0x50,\ /* 136 */ 0x00, 0x40, 0x04, 0x04, 0x00, 0x40, 0x50, 0x40,\ /* 144 */ 0x10, 0x00, 0x00, 0x10, 0x00, 0x00, 0x00, 0x00,\ -/* 152 */ 0x00, 0x00, 0x00, 0x10, 0x06, 0x10, 0x00, 0x04,\ +/* 152 */ 0x00, 0x00, 0x10, 0x00, 0x06, 0x10, 0x00, 0x04,\ /* 160 */ 0x1a, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00,\ /* 168 */ 0x00, 0x00, 0x00, 0x00, 0x00, 0x40, 0x10, 0x50,\ /* 176 */ 0x40, 0x00, 0x10, 0x10, 0x02, 0x12, 0x12, 0x00,\ @@ -17511,171 +17439,71 @@ SQLITE_PRIVATE void sqlite3VdbePrintOp(FILE*, int, VdbeOp*); SQLITE_PRIVATE int sqlite3CursorRangeHintExprCheck(Walker *pWalker, Expr *pExpr); #endif + +#define COMMIT_TIME_START 0 +#define COMMIT_TIME_BEFORE_HALT 1 +#define COMMIT_TIME_BEFORE_VDBECOMMIT 2 + +#define COMMIT_TIME_BEFORE_PHASEONE 3 +#define COMMIT_TIME_START_FIXUNLOCKED 4 +#define COMMIT_TIME_START_RELOCATE1 5 +#define COMMIT_TIME_START_RELOCATE2 6 + +#define COMMIT_TIME_OTHERWRITERS 7 +#define COMMIT_TIME_RELOCATE1COUNT 8 +#define COMMIT_TIME_RELOCATE2COUNT 9 + +#define COMMIT_TIME_RELOCATE2_READUS 10 +#define COMMIT_TIME_RELOCATE2_READCOUNT 11 +#define COMMIT_TIME_RELOCATE2_EXACTUS 12 +#define COMMIT_TIME_RELOCATE2_ALLOCATEUS 13 +#define COMMIT_TIME_RELOCATE2_RELOCATEUS 14 + +#define COMMIT_TIME_AFTER_FIXUNLOCKED 15 + +#define COMMIT_TIME_BEFORE_WALFRAMES 16 +#define COMMIT_TIME_AFTER_CHANGECOUNTER 17 +#define COMMIT_TIME_AFTER_RESTARTLOG 18 +#define COMMIT_TIME_AFTER_WRITEHDR 19 + +#define COMMIT_TIME_OSWRITE 20 + +#define COMMIT_TIME_AFTER_WRITEFRAMES 21 + +#define COMMIT_TIME_NFRAME 22 +#define COMMIT_TIME_HASHMAPUS 23 + +#define COMMIT_TIME_BEFORE_WALINDEX 24 + +#define COMMIT_TIME_WALINDEX_HASHGETUS 25 +#define COMMIT_TIME_WALINDEX_MEMSETUS 26 +#define COMMIT_TIME_WALINDEX_CLEANUPUS 27 +#define COMMIT_TIME_WALINDEX_ENTRYUS 28 + +#define COMMIT_TIME_AFTER_WALINDEX 29 +#define COMMIT_TIME_AFTER_WALINDEXHDR 30 +#define COMMIT_TIME_WALFRAMESFLAGS 31 +#define COMMIT_TIME_AFTER_WALFRAMES 32 +#define COMMIT_TIME_BEFORE_PHASETWO 33 +#define COMMIT_TIME_AFTER_PHASETWO 34 + +#define COMMIT_TIME_AFTER_VDBECOMMIT 35 +#define COMMIT_TIME_AFTER_HALT 36 +#define COMMIT_TIME_FINISH 37 + +#define COMMIT_TIME_N 38 + +/* #define COMMIT_TIME_TIMEOUT (2*1000*1000) */ +#define COMMIT_TIME_TIMEOUT (10*1000) /* 10ms threshold */ + +SQLITE_PRIVATE void sqlite3CommitTimeLog(u64*); +SQLITE_PRIVATE u64 sqlite3STimeNow(); +SQLITE_PRIVATE void sqlite3CommitTimeSet(u64*, int); + #endif /* SQLITE_VDBE_H */ /************** End of vdbe.h ************************************************/ /************** Continuing where we left off in sqliteInt.h ******************/ -/************** Include btreeModules.h in the middle of sqliteInt.h **********/ -/************** Begin file btreeModules.h ************************************/ -SQLITE_PRIVATE int sqlite3HctBtreeCursor(Btree*, Pgno, int, struct KeyInfo*, BtCursor*); -SQLITE_PRIVATE sqlite3_uint64 sqlite3HctBtreeSeekCount(Btree*); -SQLITE_PRIVATE Pgno sqlite3HctBtreeLastPage(Btree*); -SQLITE_PRIVATE int sqlite3HctBtreeClose(Btree*); -SQLITE_PRIVATE int sqlite3HctBtreeSetCacheSize(Btree*, int); -SQLITE_PRIVATE int sqlite3HctBtreeSetSpillSize(Btree*, int); -SQLITE_PRIVATE int sqlite3HctBtreeSetMmapLimit(Btree*, sqlite3_int64); -SQLITE_PRIVATE int sqlite3HctBtreeSetPagerFlags(Btree*, unsigned); -SQLITE_PRIVATE int sqlite3HctBtreeSetPageSize(Btree*, int, int, int); -SQLITE_PRIVATE int sqlite3HctBtreeGetPageSize(Btree*); -SQLITE_PRIVATE int sqlite3HctBtreeGetReserveNoMutex(Btree*); -SQLITE_PRIVATE int sqlite3HctBtreeGetRequestedReserve(Btree*); -SQLITE_PRIVATE Pgno sqlite3HctBtreeMaxPageCount(Btree*, Pgno); -SQLITE_PRIVATE int sqlite3HctBtreeSecureDelete(Btree*, int); -SQLITE_PRIVATE int sqlite3HctBtreeSetAutoVacuum(Btree*, int); -SQLITE_PRIVATE int sqlite3HctBtreeGetAutoVacuum(Btree*); -SQLITE_PRIVATE int sqlite3HctBtreeNewDb(Btree*); -SQLITE_PRIVATE int sqlite3HctBtreeBeginTrans(Btree*, int, int*); -SQLITE_PRIVATE int sqlite3HctBtreeIncrVacuum(Btree*); -SQLITE_PRIVATE int sqlite3HctBtreeCommitPhaseOne(Btree*, const char*); -SQLITE_PRIVATE int sqlite3HctBtreeCommitPhaseTwo(Btree*, int); -SQLITE_PRIVATE int sqlite3HctBtreeCommit(Btree*); -SQLITE_PRIVATE int sqlite3HctBtreeTripAllCursors(Btree*, int, int); -SQLITE_PRIVATE int sqlite3HctBtreeRollback(Btree*, int, int); -SQLITE_PRIVATE int sqlite3HctBtreeBeginStmt(Btree*, int); -SQLITE_PRIVATE int sqlite3HctBtreeSavepoint(Btree*, int, int); -SQLITE_PRIVATE int sqlite3HctBtreeCreateTable(Btree*, Pgno*, int); -SQLITE_PRIVATE int sqlite3HctBtreeClearTable(Btree*, int, i64*); -SQLITE_PRIVATE int sqlite3HctBtreeDropTable(Btree*, int, int*); -SQLITE_PRIVATE void sqlite3HctBtreeGetMeta(Btree*, int, u32*); -SQLITE_PRIVATE int sqlite3HctBtreeUpdateMeta(Btree*, int, u32); -SQLITE_PRIVATE int sqlite3HctBtreePragma(Btree*, char**); -SQLITE_PRIVATE Pager *sqlite3HctBtreePager(Btree*); -SQLITE_PRIVATE const char *sqlite3HctBtreeGetFilename(Btree*); -SQLITE_PRIVATE const char *sqlite3HctBtreeGetJournalname(Btree*); -SQLITE_PRIVATE int sqlite3HctBtreeTxnState(Btree*); -SQLITE_PRIVATE int sqlite3HctBtreeIsInBackup(Btree*); -SQLITE_PRIVATE void *sqlite3HctBtreeSchema(Btree*, int, void(*)(void *)); -SQLITE_PRIVATE int sqlite3HctBtreeSchemaLocked(Btree*); -SQLITE_PRIVATE int sqlite3HctBtreeIsReadonly(Btree*); -SQLITE_PRIVATE int sqlite3HctBtreeSetVersion(Btree*, int); -SQLITE_PRIVATE int sqlite3HctBtreeIntegrityCheck(sqlite3*, Btree*, Pgno*, Mem*, int, int, int*, char**); -SQLITE_PRIVATE int sqlite3HctBtreeCheckpoint(Btree*, int, int *, int *); -SQLITE_PRIVATE int sqlite3HctBtreeExclusiveLock(Btree*); -SQLITE_PRIVATE int sqlite3HctBtreeNext(BtCursor*, int); -SQLITE_PRIVATE int sqlite3HctBtreeCursorHasMoved(BtCursor*); -SQLITE_PRIVATE void sqlite3HctBtreeClearCursor(BtCursor*); -SQLITE_PRIVATE int sqlite3HctBtreeCursorRestore(BtCursor*, int*); -SQLITE_PRIVATE void sqlite3HctBtreeCursorHintFlags(BtCursor*, unsigned); -SQLITE_PRIVATE int sqlite3HctBtreeCloseCursor(BtCursor*); -SQLITE_PRIVATE int sqlite3HctBtreeCursorIsValid(BtCursor*); -SQLITE_PRIVATE int sqlite3HctBtreeCursorIsValidNN(BtCursor*); -SQLITE_PRIVATE i64 sqlite3HctBtreeIntegerKey(BtCursor*); -SQLITE_PRIVATE void sqlite3HctBtreeCursorPin(BtCursor*); -SQLITE_PRIVATE void sqlite3HctBtreeCursorUnpin(BtCursor*); -SQLITE_PRIVATE u32 sqlite3HctBtreePayloadSize(BtCursor*); -SQLITE_PRIVATE sqlite3_int64 sqlite3HctBtreeMaxRecordSize(BtCursor*); -SQLITE_PRIVATE int sqlite3HctBtreePayload(BtCursor*, u32, u32, void*); -SQLITE_PRIVATE int sqlite3HctBtreePayloadChecked(BtCursor*, u32, u32, void *); -SQLITE_PRIVATE const void *sqlite3HctBtreePayloadFetch(BtCursor*, u32*); -SQLITE_PRIVATE int sqlite3HctBtreeFirst(BtCursor*, int*); -SQLITE_PRIVATE int sqlite3HctBtreeLast(BtCursor*, int*); -SQLITE_PRIVATE int sqlite3HctBtreeTableMoveto(BtCursor*, i64, int, int*); -SQLITE_PRIVATE int sqlite3HctBtreeIndexMoveto(BtCursor*, UnpackedRecord*, int*); -SQLITE_PRIVATE void sqlite3HctBtreeCursorDir(BtCursor*, int); -SQLITE_PRIVATE int sqlite3HctBtreeEof(BtCursor*); -SQLITE_PRIVATE i64 sqlite3HctBtreeRowCountEst(BtCursor*); -SQLITE_PRIVATE int sqlite3HctBtreePrevious(BtCursor*, int); -SQLITE_PRIVATE int sqlite3HctBtreeInsert(BtCursor*, const BtreePayload*, int, int); -SQLITE_PRIVATE int sqlite3HctBtreeDelete(BtCursor*, u8); -SQLITE_PRIVATE int sqlite3HctBtreeIdxDelete(BtCursor*, UnpackedRecord*); -SQLITE_PRIVATE int sqlite3HctBtreePutData(BtCursor*, u32, u32, void*); -SQLITE_PRIVATE void sqlite3HctBtreeIncrblobCursor(BtCursor*); -SQLITE_PRIVATE int sqlite3HctBtreeCursorHasHint(BtCursor*, unsigned int); -SQLITE_PRIVATE int sqlite3HctBtreeTransferRow(BtCursor*, BtCursor*, i64); -SQLITE_PRIVATE int sqlite3HctBtreeClearTableOfCursor(BtCursor*); -SQLITE_PRIVATE int sqlite3HctBtreeCount(sqlite3*, BtCursor*, i64*); -SQLITE_PRIVATE int sqlite3StockBtreeCursor(Btree*, Pgno, int, struct KeyInfo*, BtCursor*); -SQLITE_PRIVATE sqlite3_uint64 sqlite3StockBtreeSeekCount(Btree*); -SQLITE_PRIVATE Pgno sqlite3StockBtreeLastPage(Btree*); -SQLITE_PRIVATE int sqlite3StockBtreeClose(Btree*); -SQLITE_PRIVATE int sqlite3StockBtreeSetCacheSize(Btree*, int); -SQLITE_PRIVATE int sqlite3StockBtreeSetSpillSize(Btree*, int); -SQLITE_PRIVATE int sqlite3StockBtreeSetMmapLimit(Btree*, sqlite3_int64); -SQLITE_PRIVATE int sqlite3StockBtreeSetPagerFlags(Btree*, unsigned); -SQLITE_PRIVATE int sqlite3StockBtreeSetPageSize(Btree*, int, int, int); -SQLITE_PRIVATE int sqlite3StockBtreeGetPageSize(Btree*); -SQLITE_PRIVATE int sqlite3StockBtreeGetReserveNoMutex(Btree*); -SQLITE_PRIVATE int sqlite3StockBtreeGetRequestedReserve(Btree*); -SQLITE_PRIVATE Pgno sqlite3StockBtreeMaxPageCount(Btree*, Pgno); -SQLITE_PRIVATE int sqlite3StockBtreeSecureDelete(Btree*, int); -SQLITE_PRIVATE int sqlite3StockBtreeSetAutoVacuum(Btree*, int); -SQLITE_PRIVATE int sqlite3StockBtreeGetAutoVacuum(Btree*); -SQLITE_PRIVATE int sqlite3StockBtreeNewDb(Btree*); -SQLITE_PRIVATE int sqlite3StockBtreeBeginTrans(Btree*, int, int*); -SQLITE_PRIVATE int sqlite3StockBtreeIncrVacuum(Btree*); -SQLITE_PRIVATE int sqlite3StockBtreeCommitPhaseOne(Btree*, const char*); -SQLITE_PRIVATE int sqlite3StockBtreeCommitPhaseTwo(Btree*, int); -SQLITE_PRIVATE int sqlite3StockBtreeCommit(Btree*); -SQLITE_PRIVATE int sqlite3StockBtreeTripAllCursors(Btree*, int, int); -SQLITE_PRIVATE int sqlite3StockBtreeRollback(Btree*, int, int); -SQLITE_PRIVATE int sqlite3StockBtreeBeginStmt(Btree*, int); -SQLITE_PRIVATE int sqlite3StockBtreeSavepoint(Btree*, int, int); -SQLITE_PRIVATE int sqlite3StockBtreeCreateTable(Btree*, Pgno*, int); -SQLITE_PRIVATE int sqlite3StockBtreeClearTable(Btree*, int, i64*); -SQLITE_PRIVATE int sqlite3StockBtreeDropTable(Btree*, int, int*); -SQLITE_PRIVATE void sqlite3StockBtreeGetMeta(Btree*, int, u32*); -SQLITE_PRIVATE int sqlite3StockBtreeUpdateMeta(Btree*, int, u32); -SQLITE_PRIVATE int sqlite3StockBtreePragma(Btree*, char**); -SQLITE_PRIVATE Pager *sqlite3StockBtreePager(Btree*); -SQLITE_PRIVATE const char *sqlite3StockBtreeGetFilename(Btree*); -SQLITE_PRIVATE const char *sqlite3StockBtreeGetJournalname(Btree*); -SQLITE_PRIVATE int sqlite3StockBtreeTxnState(Btree*); -SQLITE_PRIVATE int sqlite3StockBtreeIsInBackup(Btree*); -SQLITE_PRIVATE void *sqlite3StockBtreeSchema(Btree*, int, void(*)(void *)); -SQLITE_PRIVATE int sqlite3StockBtreeSchemaLocked(Btree*); -SQLITE_PRIVATE int sqlite3StockBtreeIsReadonly(Btree*); -SQLITE_PRIVATE int sqlite3StockBtreeSetVersion(Btree*, int); -SQLITE_PRIVATE int sqlite3StockBtreeIntegrityCheck(sqlite3*, Btree*, Pgno*, Mem*, int, int, int*, char**); -SQLITE_PRIVATE int sqlite3StockBtreeCheckpoint(Btree*, int, int *, int *); -SQLITE_PRIVATE int sqlite3StockBtreeExclusiveLock(Btree*); -SQLITE_PRIVATE int sqlite3StockBtreeNext(BtCursor*, int); -SQLITE_PRIVATE int sqlite3StockBtreeCursorHasMoved(BtCursor*); -SQLITE_PRIVATE void sqlite3StockBtreeClearCursor(BtCursor*); -SQLITE_PRIVATE int sqlite3StockBtreeCursorRestore(BtCursor*, int*); -SQLITE_PRIVATE void sqlite3StockBtreeCursorHintFlags(BtCursor*, unsigned); -SQLITE_PRIVATE int sqlite3StockBtreeCloseCursor(BtCursor*); -SQLITE_PRIVATE int sqlite3StockBtreeCursorIsValid(BtCursor*); -SQLITE_PRIVATE int sqlite3StockBtreeCursorIsValidNN(BtCursor*); -SQLITE_PRIVATE i64 sqlite3StockBtreeIntegerKey(BtCursor*); -SQLITE_PRIVATE void sqlite3StockBtreeCursorPin(BtCursor*); -SQLITE_PRIVATE void sqlite3StockBtreeCursorUnpin(BtCursor*); -SQLITE_PRIVATE u32 sqlite3StockBtreePayloadSize(BtCursor*); -SQLITE_PRIVATE sqlite3_int64 sqlite3StockBtreeMaxRecordSize(BtCursor*); -SQLITE_PRIVATE int sqlite3StockBtreePayload(BtCursor*, u32, u32, void*); -SQLITE_PRIVATE int sqlite3StockBtreePayloadChecked(BtCursor*, u32, u32, void *); -SQLITE_PRIVATE const void *sqlite3StockBtreePayloadFetch(BtCursor*, u32*); -SQLITE_PRIVATE int sqlite3StockBtreeFirst(BtCursor*, int*); -SQLITE_PRIVATE int sqlite3StockBtreeLast(BtCursor*, int*); -SQLITE_PRIVATE int sqlite3StockBtreeTableMoveto(BtCursor*, i64, int, int*); -SQLITE_PRIVATE int sqlite3StockBtreeIndexMoveto(BtCursor*, UnpackedRecord*, int*); -SQLITE_PRIVATE void sqlite3StockBtreeCursorDir(BtCursor*, int); -SQLITE_PRIVATE int sqlite3StockBtreeEof(BtCursor*); -SQLITE_PRIVATE i64 sqlite3StockBtreeRowCountEst(BtCursor*); -SQLITE_PRIVATE int sqlite3StockBtreePrevious(BtCursor*, int); -SQLITE_PRIVATE int sqlite3StockBtreeInsert(BtCursor*, const BtreePayload*, int, int); -SQLITE_PRIVATE int sqlite3StockBtreeDelete(BtCursor*, u8); -SQLITE_PRIVATE int sqlite3StockBtreeIdxDelete(BtCursor*, UnpackedRecord*); -SQLITE_PRIVATE int sqlite3StockBtreePutData(BtCursor*, u32, u32, void*); -SQLITE_PRIVATE void sqlite3StockBtreeIncrblobCursor(BtCursor*); -SQLITE_PRIVATE int sqlite3StockBtreeCursorHasHint(BtCursor*, unsigned int); -SQLITE_PRIVATE int sqlite3StockBtreeTransferRow(BtCursor*, BtCursor*, i64); -SQLITE_PRIVATE int sqlite3StockBtreeClearTableOfCursor(BtCursor*); -SQLITE_PRIVATE int sqlite3StockBtreeCount(sqlite3*, BtCursor*, i64*); -SQLITE_PRIVATE BtCursor *sqlite3StockBtreeFakeValidCursor(void); - - -/************** End of btreeModules.h ****************************************/ -/************** Continuing where we left off in sqliteInt.h ******************/ /************** Include pcache.h in the middle of sqliteInt.h ****************/ /************** Begin file pcache.h ******************************************/ /* @@ -18031,6 +17859,9 @@ struct Schema { u8 enc; /* Text encoding used by this database */ u16 schemaFlags; /* Flags associated with this schema */ int cache_size; /* Number of pages to use in the cache */ +#ifdef SQLITE_ENABLE_STAT4 + void *pStat4Space; /* Memory for stat4 Index.aSample[] arrays */ +#endif }; /* @@ -18155,11 +17986,47 @@ struct FuncDefHash { }; #define SQLITE_FUNC_HASH(C,L) (((C)+(L))%SQLITE_FUNC_HASH_SZ) +#if defined(SQLITE_USER_AUTHENTICATION) +# warning "The SQLITE_USER_AUTHENTICATION extension is deprecated. \ + See ext/userauth/user-auth.txt for details." +#endif +#ifdef SQLITE_USER_AUTHENTICATION +/* +** Information held in the "sqlite3" database connection object and used +** to manage user authentication. +*/ +typedef struct sqlite3_userauth sqlite3_userauth; +struct sqlite3_userauth { + u8 authLevel; /* Current authentication level */ + int nAuthPW; /* Size of the zAuthPW in bytes */ + char *zAuthPW; /* Password used to authenticate */ + char *zAuthUser; /* User name used to authenticate */ +}; + +/* Allowed values for sqlite3_userauth.authLevel */ +#define UAUTH_Unknown 0 /* Authentication not yet checked */ +#define UAUTH_Fail 1 /* User authentication failed */ +#define UAUTH_User 2 /* Authenticated as a normal user */ +#define UAUTH_Admin 3 /* Authenticated as an administrator */ + +/* Functions used only by user authorization logic */ +SQLITE_PRIVATE int sqlite3UserAuthTable(const char*); +SQLITE_PRIVATE int sqlite3UserAuthCheckLogin(sqlite3*,const char*,u8*); +SQLITE_PRIVATE void sqlite3UserAuthInit(sqlite3*); +SQLITE_PRIVATE void sqlite3CryptFunc(sqlite3_context*,int,sqlite3_value**); + +#endif /* SQLITE_USER_AUTHENTICATION */ + /* ** typedef for the authorization callback function. */ -typedef int (*sqlite3_xauth)(void*,int,const char*,const char*,const char*, - const char*); +#ifdef SQLITE_USER_AUTHENTICATION + typedef int (*sqlite3_xauth)(void*,int,const char*,const char*,const char*, + const char*, const char*); +#else + typedef int (*sqlite3_xauth)(void*,int,const char*,const char*,const char*, + const char*); +#endif #ifndef SQLITE_OMIT_DEPRECATED /* This is an extra SQLITE_TRACE macro that indicates "legacy" tracing @@ -18322,6 +18189,9 @@ struct sqlite3 { void (*xUnlockNotify)(void **, int); /* Unlock notify callback */ sqlite3 *pNextBlocked; /* Next in list of all blocked connections */ #endif +#ifdef SQLITE_USER_AUTHENTICATION + sqlite3_userauth auth; /* User authentication information */ +#endif #ifndef SQLITE_OMIT_CONCURRENT /* Return values for sqlite3_commit_status() requests: ** SQLITE_COMMIT_CONFLICT_DB, CONFLICT_FRAME and CONFLICT_PGNO. @@ -18329,16 +18199,70 @@ struct sqlite3 { u32 aCommit[5]; #endif - /* Used as part of testing hctree commits */ - void (*xMtCommit)(void*, int); - void *pMtCommitCtx; + u64 *aPrepareTime; + u64 *aSchemaTime; +}; + +#define PREPARE_TIME_START 0 +#define PREPARE_TIME_BEGINPARSE 1 +#define PREPARE_TIME_BEGINPRAGMA 2 - /* The sqlite3_hct_journal_validation_hook() callback */ - void *pValidateArg; - int (*xValidate)(void*, i64, const char*, const void*, int, i64); +#define PREPARE_TIME_BEGINAUTHCHECK 3 +#define PREPARE_TIME_ENDAUTHCHECK 4 +#define PREPARE_TIME_BEGINLOADSCHEMA 5 +#define PREPARE_TIME_ENDLOADSCHEMA 6 - int bHctMigrate; -}; + +#define PREPARE_TIME_BEGINCACHESIZE 7 +#define PREPARE_TIME_BEGINSETCACHESIZE 8 +#define PREPARE_TIME_ENDSETCACHESIZE 9 +#define PREPARE_TIME_ENDCACHESIZE 10 +#define PREPARE_TIME_ENDPRAGMA 11 +#define PREPARE_TIME_ENDPARSE 12 +#define PREPARE_TIME_FINISH 13 + +#define PREPARE_TIME_N 14 + + + +#define SCHEMA_TIME_START 0 +#define SCHEMA_TIME_AFTER_CREATE_1 1 +#define SCHEMA_TIME_AFTER_OPEN_TRANS 2 +#define SCHEMA_TIME_AFTER_GET_META 3 +#define SCHEMA_TIME_AFTER_FIX_ENCODING 4 +#define SCHEMA_TIME_AFTER_SETCACHESIZE 5 +#define SCHEMA_TIME_BEGIN_EXEC 6 +#define SCHEMA_TIME_BEFORE_STEP 7 +#define SCHEMA_TIME_BEFORE_PREPARE 8 +#define SCHEMA_TIME_BEFORE_FINALIZE 9 +#define SCHEMA_TIME_BEGIN_ANALYZE_LOAD 10 + +#define SCHEMA_TIME_AFTER_CLEAR_STATS 11 +#define SCHEMA_TIME_AFTER_STAT1 12 +#define SCHEMA_TIME_AFTER_DEFAULTS 13 + +#define SCHEMA_TIME_AFTER_STAT4_SPACE 14 +#define SCHEMA_TIME_AFTER_STAT4_PREPARE 15 + +#define SCHEMA_TIME_STAT4_GROWUS 16 +#define SCHEMA_TIME_STAT4_Q2_BODYUS 17 +#define SCHEMA_TIME_AFTER_STAT4_Q2 18 + +#define SCHEMA_TIME_AFTER_STAT4 19 + +#define SCHEMA_TIME_END_ANALYZE_LOAD 20 +#define SCHEMA_TIME_FINISH 21 + +#define SCHEMA_TIME_N 22 +#define SCHEMA_TIME_TIMEOUT (500 * 1000) + + + +#define sqlite3PrepareTimeSet(x,y) sqlite3CommitTimeSet(x,y) +SQLITE_PRIVATE void sqlite3PrepareTimeLog(const char *zSql, int nSql, u64 *aPrepareTime); +SQLITE_PRIVATE void sqlite3SchemaTimeLog(u64 *aSchemaTime, const char *zFile); + +#define PREPARE_TIME_TIMEOUT (2 * 1000 * 1000) /* 2 second timeout */ /* @@ -18505,7 +18429,7 @@ struct sqlite3 { ** field is used by per-connection app-def functions. */ struct FuncDef { - i8 nArg; /* Number of arguments. -1 means unlimited */ + i16 nArg; /* Number of arguments. -1 means unlimited */ u32 funcFlags; /* Some combination of SQLITE_FUNC_* */ void *pUserData; /* User data parameter */ FuncDef *pNext; /* Next function with same name */ @@ -19191,7 +19115,6 @@ struct FKey { struct KeyInfo { u32 nRef; /* Number of references to this KeyInfo object */ u8 enc; /* Text encoding - one of the SQLITE_UTF* values */ - u16 nUniqField; u16 nKeyField; /* Number of key columns in the index */ u16 nAllField; /* Total columns, including key plus others */ sqlite3 *db; /* The database connection */ @@ -19335,7 +19258,7 @@ struct Index { ** expression, or a reference to a VIRTUAL column */ #ifdef SQLITE_ENABLE_STAT4 int nSample; /* Number of elements in aSample[] */ - int mxSample; /* Number of slots allocated to aSample[] */ + int nSampleAlloc; /* Number of slots allocated to aSample[] */ int nSampleCol; /* Size of IndexSample.anEq[] and so on */ tRowcnt *aAvgEq; /* Average nEq values for keys not in aSample */ IndexSample *aSample; /* Samples of the left-most key */ @@ -19646,7 +19569,7 @@ struct Expr { #define EP_IsTrue 0x10000000 /* Always has boolean value of TRUE */ #define EP_IsFalse 0x20000000 /* Always has boolean value of FALSE */ #define EP_FromDDL 0x40000000 /* Originates from sqlite_schema */ -#define EP_SubtArg 0x80000000 /* Is argument to SQLITE_SUBTYPE function */ + /* 0x80000000 // Available */ /* The EP_Propagate mask is a set of properties that automatically propagate ** upwards into parent nodes. @@ -20202,7 +20125,7 @@ struct Select { ** row of result as the key in table pDest->iSDParm. ** Apply the affinity pDest->affSdst before storing ** results. if pDest->iSDParm2 is positive, then it is -** a register holding a Bloom filter for the IN operator +** a regsiter holding a Bloom filter for the IN operator ** that should be populated in addition to the ** pDest->iSDParm table. This SRT is used to ** implement "IN (SELECT ...)". @@ -20801,6 +20724,7 @@ struct Sqlite3Config { u8 bUseCis; /* Use covering indices for full-scans */ u8 bSmallMalloc; /* Avoid large memory allocations if true */ u8 bExtraSchemaChecks; /* Verify type,name,tbl_name in schema */ + u8 bUseLongDouble; /* Make use of long double */ #ifdef SQLITE_DEBUG u8 bJsonSelfcheck; /* Double-check JSON parsing */ #endif @@ -21175,6 +21099,15 @@ SQLITE_PRIVATE int sqlite3CorruptPgnoError(int,Pgno); # define SQLITE_ENABLE_FTS3 1 #endif +/* +** The ctype.h header is needed for non-ASCII systems. It is also +** needed by FTS3 when FTS3 is included in the amalgamation. +*/ +#if !defined(SQLITE_ASCII) || \ + (defined(SQLITE_ENABLE_FTS3) && defined(SQLITE_AMALGAMATION)) +# include +#endif + /* ** The following macros mimic the standard library functions toupper(), ** isspace(), isalnum(), isdigit() and isxdigit(), respectively. The @@ -21798,7 +21731,7 @@ SQLITE_PRIVATE int sqlite3GetInt32(const char *, int*); SQLITE_PRIVATE int sqlite3GetUInt32(const char*, u32*); SQLITE_PRIVATE int sqlite3Atoi(const char*); #ifndef SQLITE_OMIT_UTF16 -SQLITE_PRIVATE int sqlite3Utf16ByteLen(const void *pData, int nByte, int nChar); +SQLITE_PRIVATE int sqlite3Utf16ByteLen(const void *pData, int nChar); #endif SQLITE_PRIVATE int sqlite3Utf8CharLen(const char *pData, int nByte); SQLITE_PRIVATE u32 sqlite3Utf8Read(const u8**); @@ -22381,11 +22314,6 @@ SQLITE_PRIVATE sqlite3_uint64 sqlite3Hwtime(void); # define IS_STMT_SCANSTATUS(db) 0 #endif -#ifdef SQLITE_ENABLE_HCT -SQLITE_PRIVATE int sqlite3IsHct(Btree*); -SQLITE_PRIVATE int sqlite3HctSchemaOp(Btree*, const char*); -#endif - #endif /* SQLITEINT_H */ /************** End of sqliteInt.h *******************************************/ @@ -23261,6 +23189,9 @@ static const char * const sqlite3azCompileOpt[] = { #ifdef SQLITE_UNTESTABLE "UNTESTABLE", #endif +#ifdef SQLITE_USER_AUTHENTICATION + "USER_AUTHENTICATION", +#endif #ifdef SQLITE_USE_ALLOCA "USE_ALLOCA", #endif @@ -23537,6 +23468,7 @@ SQLITE_PRIVATE SQLITE_WSD struct Sqlite3Config sqlite3Config = { SQLITE_ALLOW_COVERING_INDEX_SCAN, /* bUseCis */ 0, /* bSmallMalloc */ 1, /* bExtraSchemaChecks */ + sizeof(LONGDOUBLE_TYPE)>8, /* bUseLongDouble */ #ifdef SQLITE_DEBUG 0, /* bJsonSelfcheck */ #endif @@ -24109,7 +24041,7 @@ struct sqlite3_context { int isError; /* Error code returned by the function. */ u8 enc; /* Encoding to use for results */ u8 skipFlag; /* Skip accumulator loading if true */ - u8 argc; /* Number of arguments */ + u16 argc; /* Number of arguments */ sqlite3_value *argv[1]; /* Argument set */ }; @@ -24230,6 +24162,7 @@ struct Vdbe { int nScan; /* Entries in aScan[] */ ScanStatus *aScan; /* Scan definitions for sqlite3_stmt_scanstatus() */ #endif + u64 *aCommitTime; }; /* @@ -24256,11 +24189,9 @@ struct PreUpdate { int iBlobWrite; /* Value returned by preupdate_blobwrite() */ i64 iKey1; /* First key value passed to hook */ i64 iKey2; /* Second key value passed to hook */ - Mem oldipk; /* Memory cell holding "old" IPK value */ Mem *aNew; /* Array of new.* values */ Table *pTab; /* Schema object being updated */ Index *pPk; /* PK index if pTab is WITHOUT ROWID */ - sqlite3_value **apDflt; /* Array of default values, if required */ }; /* @@ -29706,29 +29637,16 @@ SQLITE_API void sqlite3_mutex_leave(sqlite3_mutex *p){ /* ** The sqlite3_mutex_held() and sqlite3_mutex_notheld() routine are ** intended for use inside assert() statements. -** -** Because these routines raise false-positive alerts in TSAN, disable -** them (make them always return 1) when compiling with TSAN. */ SQLITE_API int sqlite3_mutex_held(sqlite3_mutex *p){ -# if defined(__has_feature) -# if __has_feature(thread_sanitizer) - p = 0; -# endif -# endif assert( p==0 || sqlite3GlobalConfig.mutex.xMutexHeld ); return p==0 || sqlite3GlobalConfig.mutex.xMutexHeld(p); } SQLITE_API int sqlite3_mutex_notheld(sqlite3_mutex *p){ -# if defined(__has_feature) -# if __has_feature(thread_sanitizer) - p = 0; -# endif -# endif assert( p==0 || sqlite3GlobalConfig.mutex.xMutexNotheld ); return p==0 || sqlite3GlobalConfig.mutex.xMutexNotheld(p); } -#endif /* NDEBUG */ +#endif #endif /* !defined(SQLITE_MUTEX_OMIT) */ @@ -33085,7 +33003,7 @@ SQLITE_API char *sqlite3_snprintf(int n, char *zBuf, const char *zFormat, ...){ */ static void renderLogMsg(int iErrCode, const char *zFormat, va_list ap){ StrAccum acc; /* String accumulator */ - char zMsg[SQLITE_PRINT_BUF_SIZE*3]; /* Complete log message */ + char zMsg[SQLITE_PRINT_BUF_SIZE*10]; /* Complete log message */ sqlite3StrAccumInit(&acc, 0, zMsg, sizeof(zMsg), 0); sqlite3_str_vappendf(&acc, zFormat, ap); @@ -35143,7 +35061,7 @@ static const unsigned char sqlite3Utf8Trans1[] = { c = *(zIn++); \ if( c>=0xc0 ){ \ c = sqlite3Utf8Trans1[c-0xc0]; \ - while( zIn=0xd8 && c<0xdc && z<=zEnd && z[0]>=0xdc && z[0]<0xe0 ) z += 2; + if( c>=0xd8 && c<0xdc && z[0]>=0xdc && z[0]<0xe0 ) z += 2; n++; } return (int)(z-(unsigned char const *)zIn) @@ -36117,8 +36033,6 @@ SQLITE_PRIVATE int sqlite3AtoF(const char *z, double *pResult, int length, u8 en int eValid = 1; /* True exponent is either not used or is well-formed */ int nDigit = 0; /* Number of digits processed */ int eType = 1; /* 1: pure integer, 2+: fractional -1 or less: bad UTF16 */ - double rr[2]; - u64 s2; assert( enc==SQLITE_UTF8 || enc==SQLITE_UTF16LE || enc==SQLITE_UTF16BE ); *pResult = 0.0; /* Default return value, in case of an error */ @@ -36230,41 +36144,68 @@ SQLITE_PRIVATE int sqlite3AtoF(const char *z, double *pResult, int length, u8 en e++; } - rr[0] = (double)s; - s2 = (u64)rr[0]; -#if defined(_MSC_VER) && _MSC_VER<1700 - if( s2==0x8000000000000000LL ){ s2 = 2*(u64)(0.5*rr[0]); } + if( e==0 ){ + *pResult = s; + }else if( sqlite3Config.bUseLongDouble ){ + LONGDOUBLE_TYPE r = (LONGDOUBLE_TYPE)s; + if( e>0 ){ + while( e>=100 ){ e-=100; r *= 1.0e+100L; } + while( e>=10 ){ e-=10; r *= 1.0e+10L; } + while( e>=1 ){ e-=1; r *= 1.0e+01L; } + }else{ + while( e<=-100 ){ e+=100; r *= 1.0e-100L; } + while( e<=-10 ){ e+=10; r *= 1.0e-10L; } + while( e<=-1 ){ e+=1; r *= 1.0e-01L; } + } + assert( r>=0.0 ); + if( r>+1.7976931348623157081452742373e+308L ){ +#ifdef INFINITY + *pResult = +INFINITY; +#else + *pResult = 1.0e308*10.0; #endif - rr[1] = s>=s2 ? (double)(s - s2) : -(double)(s2 - s); - if( e>0 ){ - while( e>=100 ){ - e -= 100; - dekkerMul2(rr, 1.0e+100, -1.5902891109759918046e+83); - } - while( e>=10 ){ - e -= 10; - dekkerMul2(rr, 1.0e+10, 0.0); - } - while( e>=1 ){ - e -= 1; - dekkerMul2(rr, 1.0e+01, 0.0); + }else{ + *pResult = (double)r; } }else{ - while( e<=-100 ){ - e += 100; - dekkerMul2(rr, 1.0e-100, -1.99918998026028836196e-117); - } - while( e<=-10 ){ - e += 10; - dekkerMul2(rr, 1.0e-10, -3.6432197315497741579e-27); - } - while( e<=-1 ){ - e += 1; - dekkerMul2(rr, 1.0e-01, -5.5511151231257827021e-18); + double rr[2]; + u64 s2; + rr[0] = (double)s; + s2 = (u64)rr[0]; +#if defined(_MSC_VER) && _MSC_VER<1700 + if( s2==0x8000000000000000LL ){ s2 = 2*(u64)(0.5*rr[0]); } +#endif + rr[1] = s>=s2 ? (double)(s - s2) : -(double)(s2 - s); + if( e>0 ){ + while( e>=100 ){ + e -= 100; + dekkerMul2(rr, 1.0e+100, -1.5902891109759918046e+83); + } + while( e>=10 ){ + e -= 10; + dekkerMul2(rr, 1.0e+10, 0.0); + } + while( e>=1 ){ + e -= 1; + dekkerMul2(rr, 1.0e+01, 0.0); + } + }else{ + while( e<=-100 ){ + e += 100; + dekkerMul2(rr, 1.0e-100, -1.99918998026028836196e-117); + } + while( e<=-10 ){ + e += 10; + dekkerMul2(rr, 1.0e-10, -3.6432197315497741579e-27); + } + while( e<=-1 ){ + e += 1; + dekkerMul2(rr, 1.0e-01, -5.5511151231257827021e-18); + } } + *pResult = rr[0]+rr[1]; + if( sqlite3IsNaN(*pResult) ) *pResult = 1e300*1e300; } - *pResult = rr[0]+rr[1]; - if( sqlite3IsNaN(*pResult) ) *pResult = 1e300*1e300; if( sign<0 ) *pResult = -*pResult; assert( !sqlite3IsNaN(*pResult) ); @@ -36585,10 +36526,9 @@ SQLITE_PRIVATE void sqlite3FpDecode(FpDecode *p, double r, int iRound, int mxRou int i; u64 v; int e, exp = 0; - double rr[2]; - p->isSpecial = 0; p->z = p->zBuf; + assert( mxRound>0 ); /* Convert negative numbers to positive. Deal with Infinity, 0.0, and @@ -36616,45 +36556,62 @@ SQLITE_PRIVATE void sqlite3FpDecode(FpDecode *p, double r, int iRound, int mxRou /* Multiply r by powers of ten until it lands somewhere in between ** 1.0e+19 and 1.0e+17. - ** - ** Use Dekker-style double-double computation to increase the - ** precision. - ** - ** The error terms on constants like 1.0e+100 computed using the - ** decimal extension, for example as follows: - ** - ** SELECT decimal_exp(decimal_sub('1.0e+100',decimal(1.0e+100))); */ - rr[0] = r; - rr[1] = 0.0; - if( rr[0]>9.223372036854774784e+18 ){ - while( rr[0]>9.223372036854774784e+118 ){ - exp += 100; - dekkerMul2(rr, 1.0e-100, -1.99918998026028836196e-117); - } - while( rr[0]>9.223372036854774784e+28 ){ - exp += 10; - dekkerMul2(rr, 1.0e-10, -3.6432197315497741579e-27); - } - while( rr[0]>9.223372036854774784e+18 ){ - exp += 1; - dekkerMul2(rr, 1.0e-01, -5.5511151231257827021e-18); + if( sqlite3Config.bUseLongDouble ){ + LONGDOUBLE_TYPE rr = r; + if( rr>=1.0e+19 ){ + while( rr>=1.0e+119L ){ exp+=100; rr *= 1.0e-100L; } + while( rr>=1.0e+29L ){ exp+=10; rr *= 1.0e-10L; } + while( rr>=1.0e+19L ){ exp++; rr *= 1.0e-1L; } + }else{ + while( rr<1.0e-97L ){ exp-=100; rr *= 1.0e+100L; } + while( rr<1.0e+07L ){ exp-=10; rr *= 1.0e+10L; } + while( rr<1.0e+17L ){ exp--; rr *= 1.0e+1L; } } + v = (u64)rr; }else{ - while( rr[0]<9.223372036854774784e-83 ){ - exp -= 100; - dekkerMul2(rr, 1.0e+100, -1.5902891109759918046e+83); - } - while( rr[0]<9.223372036854774784e+07 ){ - exp -= 10; - dekkerMul2(rr, 1.0e+10, 0.0); - } - while( rr[0]<9.22337203685477478e+17 ){ - exp -= 1; - dekkerMul2(rr, 1.0e+01, 0.0); + /* If high-precision floating point is not available using "long double", + ** then use Dekker-style double-double computation to increase the + ** precision. + ** + ** The error terms on constants like 1.0e+100 computed using the + ** decimal extension, for example as follows: + ** + ** SELECT decimal_exp(decimal_sub('1.0e+100',decimal(1.0e+100))); + */ + double rr[2]; + rr[0] = r; + rr[1] = 0.0; + if( rr[0]>9.223372036854774784e+18 ){ + while( rr[0]>9.223372036854774784e+118 ){ + exp += 100; + dekkerMul2(rr, 1.0e-100, -1.99918998026028836196e-117); + } + while( rr[0]>9.223372036854774784e+28 ){ + exp += 10; + dekkerMul2(rr, 1.0e-10, -3.6432197315497741579e-27); + } + while( rr[0]>9.223372036854774784e+18 ){ + exp += 1; + dekkerMul2(rr, 1.0e-01, -5.5511151231257827021e-18); + } + }else{ + while( rr[0]<9.223372036854774784e-83 ){ + exp -= 100; + dekkerMul2(rr, 1.0e+100, -1.5902891109759918046e+83); + } + while( rr[0]<9.223372036854774784e+07 ){ + exp -= 10; + dekkerMul2(rr, 1.0e+10, 0.0); + } + while( rr[0]<9.22337203685477478e+17 ){ + exp -= 1; + dekkerMul2(rr, 1.0e+01, 0.0); + } } + v = rr[1]<0.0 ? (u64)rr[0]-(u64)(-rr[1]) : (u64)rr[0]+(u64)rr[1]; } - v = rr[1]<0.0 ? (u64)rr[0]-(u64)(-rr[1]) : (u64)rr[0]+(u64)rr[1]; + /* Extract significant digits. */ i = sizeof(p->zBuf)-1; @@ -37425,6 +37382,104 @@ SQLITE_PRIVATE int sqlite3VListNameToNum(VList *pIn, const char *zName, int nNam return 0; } +/* +** High-resolution hardware timer used for debugging and testing only. +*/ +#if defined(VDBE_PROFILE) \ + || defined(SQLITE_PERFORMANCE_TRACE) \ + || defined(SQLITE_ENABLE_STMT_SCANSTATUS) +/************** Include hwtime.h in the middle of util.c *********************/ +/************** Begin file hwtime.h ******************************************/ +/* +** 2008 May 27 +** +** The author disclaims copyright to this source code. In place of +** a legal notice, here is a blessing: +** +** May you do good and not evil. +** May you find forgiveness for yourself and forgive others. +** May you share freely, never taking more than you give. +** +****************************************************************************** +** +** This file contains inline asm code for retrieving "high-performance" +** counters for x86 and x86_64 class CPUs. +*/ +#ifndef SQLITE_HWTIME_H +#define SQLITE_HWTIME_H + +/* +** The following routine only works on Pentium-class (or newer) processors. +** It uses the RDTSC opcode to read the cycle count value out of the +** processor and returns that value. This can be used for high-res +** profiling. +*/ +#if !defined(__STRICT_ANSI__) && \ + (defined(__GNUC__) || defined(_MSC_VER)) && \ + (defined(i386) || defined(__i386__) || defined(_M_IX86)) + + #if defined(__GNUC__) + + __inline__ sqlite_uint64 sqlite3Hwtime(void){ + unsigned int lo, hi; + __asm__ __volatile__ ("rdtsc" : "=a" (lo), "=d" (hi)); + return (sqlite_uint64)hi << 32 | lo; + } + + #elif defined(_MSC_VER) + + __declspec(naked) __inline sqlite_uint64 __cdecl sqlite3Hwtime(void){ + __asm { + rdtsc + ret ; return value at EDX:EAX + } + } + + #endif + +#elif !defined(__STRICT_ANSI__) && (defined(__GNUC__) && defined(__x86_64__)) + + __inline__ sqlite_uint64 sqlite3Hwtime(void){ + unsigned int lo, hi; + __asm__ __volatile__ ("rdtsc" : "=a" (lo), "=d" (hi)); + return (sqlite_uint64)hi << 32 | lo; + } + +#elif !defined(__STRICT_ANSI__) && (defined(__GNUC__) && defined(__ppc__)) + + __inline__ sqlite_uint64 sqlite3Hwtime(void){ + unsigned long long retval; + unsigned long junk; + __asm__ __volatile__ ("\n\ + 1: mftbu %1\n\ + mftb %L0\n\ + mftbu %0\n\ + cmpw %0,%1\n\ + bne 1b" + : "=r" (retval), "=r" (junk)); + return retval; + } + +#else + + /* + ** asm() is needed for hardware timing support. Without asm(), + ** disable the sqlite3Hwtime() routine. + ** + ** sqlite3Hwtime() is only used for some obscure debugging + ** and analysis configurations, not in any deliverable, so this + ** should not be a great loss. + */ +SQLITE_PRIVATE sqlite_uint64 sqlite3Hwtime(void){ return ((sqlite_uint64)0); } + +#endif + +#endif /* !defined(SQLITE_HWTIME_H) */ + +/************** End of hwtime.h **********************************************/ +/************** Continuing where we left off in util.c ***********************/ +#endif + /************** End of util.c ************************************************/ /************** Begin file hash.c ********************************************/ /* @@ -37815,23 +37870,23 @@ SQLITE_PRIVATE const char *sqlite3OpcodeName(int i){ /* 100 */ "SetCookie" OpHelp(""), /* 101 */ "ReopenIdx" OpHelp("root=P2 iDb=P3"), /* 102 */ "OpenRead" OpHelp("root=P2 iDb=P3"), - /* 103 */ "OpenWrite" OpHelp("root=P2 iDb=P3"), - /* 104 */ "BitAnd" OpHelp("r[P3]=r[P1]&r[P2]"), - /* 105 */ "BitOr" OpHelp("r[P3]=r[P1]|r[P2]"), - /* 106 */ "ShiftLeft" OpHelp("r[P3]=r[P2]<>r[P1]"), - /* 108 */ "Add" OpHelp("r[P3]=r[P1]+r[P2]"), - /* 109 */ "Subtract" OpHelp("r[P3]=r[P2]-r[P1]"), - /* 110 */ "Multiply" OpHelp("r[P3]=r[P1]*r[P2]"), - /* 111 */ "Divide" OpHelp("r[P3]=r[P2]/r[P1]"), - /* 112 */ "Remainder" OpHelp("r[P3]=r[P2]%r[P1]"), - /* 113 */ "Concat" OpHelp("r[P3]=r[P2]+r[P1]"), + /* 103 */ "BitAnd" OpHelp("r[P3]=r[P1]&r[P2]"), + /* 104 */ "BitOr" OpHelp("r[P3]=r[P1]|r[P2]"), + /* 105 */ "ShiftLeft" OpHelp("r[P3]=r[P2]<>r[P1]"), + /* 107 */ "Add" OpHelp("r[P3]=r[P1]+r[P2]"), + /* 108 */ "Subtract" OpHelp("r[P3]=r[P2]-r[P1]"), + /* 109 */ "Multiply" OpHelp("r[P3]=r[P1]*r[P2]"), + /* 110 */ "Divide" OpHelp("r[P3]=r[P2]/r[P1]"), + /* 111 */ "Remainder" OpHelp("r[P3]=r[P2]%r[P1]"), + /* 112 */ "Concat" OpHelp("r[P3]=r[P2]+r[P1]"), + /* 113 */ "OpenWrite" OpHelp("root=P2 iDb=P3"), /* 114 */ "OpenDup" OpHelp(""), - /* 115 */ "OpenAutoindex" OpHelp("nColumn=P2"), - /* 116 */ "BitNot" OpHelp("r[P2]= ~r[P1]"), + /* 115 */ "BitNot" OpHelp("r[P2]= ~r[P1]"), + /* 116 */ "OpenAutoindex" OpHelp("nColumn=P2"), /* 117 */ "OpenEphemeral" OpHelp("nColumn=P2"), - /* 118 */ "SorterOpen" OpHelp(""), - /* 119 */ "String8" OpHelp("r[P2]='P4'"), + /* 118 */ "String8" OpHelp("r[P2]='P4'"), + /* 119 */ "SorterOpen" OpHelp(""), /* 120 */ "SequenceTest" OpHelp("if( cursor[P1].ctr++ ) pc = P2"), /* 121 */ "OpenPseudo" OpHelp("P3 columns in r[P2]"), /* 122 */ "Close" OpHelp(""), @@ -37866,8 +37921,8 @@ SQLITE_PRIVATE const char *sqlite3OpcodeName(int i){ /* 151 */ "DropTable" OpHelp(""), /* 152 */ "DropIndex" OpHelp(""), /* 153 */ "DropTrigger" OpHelp(""), - /* 154 */ "IntegrityCk" OpHelp(""), - /* 155 */ "Real" OpHelp("r[P2]=P4"), + /* 154 */ "Real" OpHelp("r[P2]=P4"), + /* 155 */ "IntegrityCk" OpHelp(""), /* 156 */ "RowSetAdd" OpHelp("rowset(P1)=r[P2]"), /* 157 */ "Param" OpHelp(""), /* 158 */ "FkCounter" OpHelp("fkctr[P1]+=P2"), @@ -39116,7 +39171,7 @@ SQLITE_PRIVATE int sqlite3KvvfsInit(void){ # endif #else /* !SQLITE_WASI */ # ifndef HAVE_FCHMOD -# define HAVE_FCHMOD 1 +# define HAVE_FCHMOD # endif #endif /* SQLITE_WASI */ @@ -41386,33 +41441,54 @@ static int robust_flock(int fd, int op){ ** is set to SQLITE_OK unless an I/O error occurs during lock checking. */ static int flockCheckReservedLock(sqlite3_file *id, int *pResOut){ -#ifdef SQLITE_DEBUG + int rc = SQLITE_OK; + int reserved = 0; unixFile *pFile = (unixFile*)id; -#else - UNUSED_PARAMETER(id); -#endif SimulateIOError( return SQLITE_IOERR_CHECKRESERVEDLOCK; ); assert( pFile ); - assert( pFile->eFileLock<=SHARED_LOCK ); - /* The flock VFS only ever takes exclusive locks (see function flockLock). - ** Therefore, if this connection is holding any lock at all, no other - ** connection may be holding a RESERVED lock. So set *pResOut to 0 - ** in this case. - ** - ** Or, this connection may be holding no lock. In that case, set *pResOut to - ** 0 as well. The caller will then attempt to take an EXCLUSIVE lock on the - ** db in order to roll the hot journal back. If there is another connection - ** holding a lock, that attempt will fail and an SQLITE_BUSY returned to - ** the user. With other VFS, we try to avoid this, in order to allow a reader - ** to proceed while a writer is preparing its transaction. But that won't - ** work with the flock VFS - as it always takes EXCLUSIVE locks - so it is - ** not a problem in this case. */ - *pResOut = 0; + /* Check if a thread in this process holds such a lock */ + if( pFile->eFileLock>SHARED_LOCK ){ + reserved = 1; + } - return SQLITE_OK; + /* Otherwise see if some other process holds it. */ + if( !reserved ){ + /* attempt to get the lock */ + int lrc = robust_flock(pFile->h, LOCK_EX | LOCK_NB); + if( !lrc ){ + /* got the lock, unlock it */ + lrc = robust_flock(pFile->h, LOCK_UN); + if ( lrc ) { + int tErrno = errno; + /* unlock failed with an error */ + lrc = SQLITE_IOERR_UNLOCK; + storeLastErrno(pFile, tErrno); + rc = lrc; + } + } else { + int tErrno = errno; + reserved = 1; + /* someone else might have it reserved */ + lrc = sqliteErrorFromPosixError(tErrno, SQLITE_IOERR_LOCK); + if( IS_LOCK_ERROR(lrc) ){ + storeLastErrno(pFile, tErrno); + rc = lrc; + } + } + } + OSTRACE(("TEST WR-LOCK %d %d %d (flock)\n", pFile->h, rc, reserved)); + +#ifdef SQLITE_IGNORE_FLOCK_LOCK_ERRORS + if( (rc & 0xff) == SQLITE_IOERR ){ + rc = SQLITE_OK; + reserved=1; + } +#endif /* SQLITE_IGNORE_FLOCK_LOCK_ERRORS */ + *pResOut = reserved; + return rc; } /* @@ -42909,11 +42985,6 @@ static int unixFileControl(sqlite3_file *id, int op, void *pArg){ } #endif /* __linux__ && SQLITE_ENABLE_BATCH_ATOMIC_WRITE */ - case SQLITE_FCNTL_NULL_IO: { - osClose(pFile->h); - pFile->h = -1; - return SQLITE_OK; - } case SQLITE_FCNTL_LOCKSTATE: { *(int*)pArg = pFile->eFileLock; return SQLITE_OK; @@ -43018,7 +43089,6 @@ static int unixFileControl(sqlite3_file *id, int op, void *pArg){ } #endif /* SQLITE_ENABLE_LOCKING_STYLE && defined(__APPLE__) */ -#ifndef SQLITE_OMIT_WAL case SQLITE_FCNTL_EXTERNAL_READER: { #if !defined(SQLITE_WASI) && !defined(SQLITE_OMIT_WAL) return unixFcntlExternalReader((unixFile*)id, (int*)pArg); @@ -43027,7 +43097,6 @@ static int unixFileControl(sqlite3_file *id, int op, void *pArg){ return SQLITE_OK; #endif } -#endif } return SQLITE_NOTFOUND; } @@ -43060,7 +43129,6 @@ static void setDeviceCharacteristics(unixFile *pFd){ if( pFd->ctrlFlags & UNIXFILE_PSOW ){ pFd->deviceCharacteristics |= SQLITE_IOCAP_POWERSAFE_OVERWRITE; } - pFd->deviceCharacteristics |= SQLITE_IOCAP_SUBPAGE_READ; pFd->sectorSize = SQLITE_DEFAULT_SECTOR_SIZE; } @@ -43111,7 +43179,7 @@ static void setDeviceCharacteristics(unixFile *pFile){ pFile->sectorSize = fsInfo.f_bsize; pFile->deviceCharacteristics = /* full bitset of atomics from max sector size and smaller */ - (((pFile->sectorSize / 512 * SQLITE_IOCAP_ATOMIC512) << 1) - 2) | + ((pFile->sectorSize / 512 * SQLITE_IOCAP_ATOMIC512) << 1) - 2 | SQLITE_IOCAP_SEQUENTIAL | /* The ram filesystem has no write behind ** so it is ordered */ 0; @@ -43119,7 +43187,7 @@ static void setDeviceCharacteristics(unixFile *pFile){ pFile->sectorSize = fsInfo.f_bsize; pFile->deviceCharacteristics = /* full bitset of atomics from max sector size and smaller */ - (((pFile->sectorSize / 512 * SQLITE_IOCAP_ATOMIC512) << 1) - 2) | + ((pFile->sectorSize / 512 * SQLITE_IOCAP_ATOMIC512) << 1) - 2 | SQLITE_IOCAP_SEQUENTIAL | /* The ram filesystem has no write behind ** so it is ordered */ 0; @@ -50954,11 +51022,6 @@ static int winFileControl(sqlite3_file *id, int op, void *pArg){ return SQLITE_OK; } #endif - case SQLITE_FCNTL_NULL_IO: { - (void)osCloseHandle(pFile->h); - pFile->h = NULL; - return SQLITE_OK; - } case SQLITE_FCNTL_TEMPFILENAME: { char *zTFile = 0; int rc = winGetTempname(pFile->pVfs, &zTFile); @@ -51020,7 +51083,7 @@ static int winSectorSize(sqlite3_file *id){ */ static int winDeviceCharacteristics(sqlite3_file *id){ winFile *p = (winFile*)id; - return SQLITE_IOCAP_UNDELETABLE_WHEN_OPEN | SQLITE_IOCAP_SUBPAGE_READ | + return SQLITE_IOCAP_UNDELETABLE_WHEN_OPEN | ((p->ctrlFlags & WINFILE_PSOW)?SQLITE_IOCAP_POWERSAFE_OVERWRITE:0); } @@ -56982,7 +57045,13 @@ static void pcache1Unpin( assert( PAGE_IS_PINNED(pPage) ); if( reuseUnlikely || pGroup->nPurgeable>pGroup->nMaxPage ){ + /* If pcache1.separateCache is set, temporarily set the isBulkLocal flag + ** so that pcache1RemoveFromHash() moves the page buffer to the pFree + ** list instead of sqlite3_free()ing it. */ + u16 isBulkLocal = pPage->isBulkLocal; + pPage->isBulkLocal = (u16)pcache1.separateCache; pcache1RemoveFromHash(pPage, 1); + pPage->isBulkLocal = isBulkLocal; }else{ /* Add the page to the PGroup LRU list. */ PgHdr1 **ppFirst = &pGroup->lru.pLruNext; @@ -57884,17 +57953,14 @@ SQLITE_PRIVATE int sqlite3WalInfo(Wal *pWal, u32 *pnPrior, u32 *pnFrame); /* sqlite3_wal_info() data */ SQLITE_PRIVATE int sqlite3WalInfo(Wal *pWal, u32 *pnPrior, u32 *pnFrame); +SQLITE_PRIVATE void sqlite3WalSetCommitTime(Wal *pWal, u64 *aCommitTime); + #endif /* ifndef SQLITE_OMIT_WAL */ #endif /* SQLITE_WAL_H */ /************** End of wal.h *************************************************/ /************** Continuing where we left off in pager.c **********************/ -#ifdef SQLITE_ENABLE_HCT -# define IS_HCT(pPager) (pPager->pVfs==0) -#else -# define IS_HCT(pPager) 0 -#endif /******************* NOTES ON THE DESIGN OF THE PAGER ************************ ** @@ -58576,6 +58642,7 @@ struct Pager { Wal *pWal; /* Write-ahead log used by "journal_mode=wal" */ char *zWal; /* File name for write-ahead log */ #endif + u64 *aCommitTime; }; /* @@ -58670,28 +58737,20 @@ static const unsigned char aJournalMagic[] = { ** Return true if page pgno can be read directly from the database file ** by the b-tree layer. This is the case if: ** -** (1) the database file is open -** (2) the VFS for the database is able to do unaligned sub-page reads -** (3) there are no dirty pages in the cache, and -** (4) the desired page is not currently in the wal file. +** * the database file is open, +** * there are no dirty pages in the cache, and +** * the desired page is not currently in the wal file. */ SQLITE_PRIVATE int sqlite3PagerDirectReadOk(Pager *pPager, Pgno pgno){ - assert( pPager!=0 ); - assert( pPager->fd!=0 ); - if( pPager->fd->pMethods==0 ) return 0; /* Case (1) */ - if( sqlite3PCacheIsDirty(pPager->pPCache) ) return 0; /* Failed (3) */ + if( pPager->fd->pMethods==0 ) return 0; + if( sqlite3PCacheIsDirty(pPager->pPCache) ) return 0; #ifndef SQLITE_OMIT_WAL if( pPager->pWal ){ u32 iRead = 0; (void)sqlite3WalFindFrame(pPager->pWal, pgno, &iRead); - return iRead==0; /* Condition (4) */ + return iRead==0; } #endif - assert( pPager->fd->pMethods->xDeviceCharacteristics!=0 ); - if( (pPager->fd->pMethods->xDeviceCharacteristics(pPager->fd) - & SQLITE_IOCAP_SUBPAGE_READ)==0 ){ - return 0; /* Case (2) */ - } return 1; } #endif @@ -60931,6 +60990,7 @@ static int pager_playback(Pager *pPager, int isHot){ static int readDbPage(PgHdr *pPg){ Pager *pPager = pPg->pPager; /* Pager object associated with page pPg */ int rc = SQLITE_OK; /* Return code */ + u64 t1 = 0; #ifndef SQLITE_OMIT_WAL u32 iFrame = 0; /* Frame of WAL containing pgno */ @@ -60942,6 +61002,9 @@ static int readDbPage(PgHdr *pPg){ rc = sqlite3WalFindFrame(pPager->pWal, pPg->pgno, &iFrame); if( rc ) return rc; } + if( pPager->aCommitTime ){ + t1 = sqlite3STimeNow(); + } if( iFrame ){ rc = sqlite3WalReadFrame(pPager->pWal, iFrame,pPager->pageSize,pPg->pData); }else @@ -60953,6 +61016,10 @@ static int readDbPage(PgHdr *pPg){ rc = SQLITE_OK; } } + if( pPager->aCommitTime ){ + pPager->aCommitTime[COMMIT_TIME_RELOCATE2_READUS] += (sqlite3STimeNow() - t1); + pPager->aCommitTime[COMMIT_TIME_RELOCATE2_READCOUNT]++; + } if( pPg->pgno==1 ){ if( rc ){ @@ -61144,6 +61211,7 @@ static int pagerWalFrames( pPager->aStat[PAGER_STAT_WRITE] += nList; if( pList->pgno==1 ) pager_write_changecounter(pList); + sqlite3CommitTimeSet(pPager->aCommitTime, COMMIT_TIME_AFTER_CHANGECOUNTER); rc = sqlite3WalFrames(pPager->pWal, pPager->pageSize, pList, nTruncate, isCommit, pPager->walSyncFlags ); @@ -64384,7 +64452,6 @@ SQLITE_PRIVATE int sqlite3PagerSync(Pager *pPager, const char *zSuper){ */ SQLITE_PRIVATE int sqlite3PagerExclusiveLock(Pager *pPager, PgHdr *pPage1, u32 *aConflict){ int rc = pPager->errCode; - if( IS_HCT(pPager) ) return SQLITE_OK; assert( assert_pager_state(pPager) ); if( rc==SQLITE_OK ){ assert( pPager->eState==PAGER_WRITER_CACHEMOD @@ -64535,7 +64602,9 @@ SQLITE_PRIVATE int sqlite3PagerCommitPhaseOne( } assert( rc==SQLITE_OK ); if( ALWAYS(pList) ){ + sqlite3CommitTimeSet(pPager->aCommitTime, COMMIT_TIME_BEFORE_WALFRAMES); rc = pagerWalFrames(pPager, pList, pPager->dbSize, 1); + sqlite3CommitTimeSet(pPager->aCommitTime, COMMIT_TIME_AFTER_WALFRAMES); } sqlite3PagerUnref(pPageOne); if( rc==SQLITE_OK ){ @@ -64709,6 +64778,10 @@ SQLITE_PRIVATE int sqlite3PagerCommitPhaseOne( return rc; } +SQLITE_PRIVATE void sqlite3PagerSetCommitTime(Pager *pPager, u64 *aCommitTime){ + pPager->aCommitTime = aCommitTime; + sqlite3WalSetCommitTime(pPager->pWal, aCommitTime); +} /* ** When this function is called, the database file has been completely @@ -65133,10 +65206,6 @@ SQLITE_PRIVATE sqlite3_vfs *sqlite3PagerVfs(Pager *pPager){ ** not yet been opened. */ SQLITE_PRIVATE sqlite3_file *sqlite3PagerFile(Pager *pPager){ -#ifdef SQLITE_ENABLE_HCT - static sqlite3_file s = {0}; - if( pPager->pVfs==0 ) return &s; -#endif return pPager->fd; } @@ -65492,7 +65561,6 @@ SQLITE_PRIVATE int sqlite3PagerGetJournalMode(Pager *pPager){ ** is unmodified. */ SQLITE_PRIVATE int sqlite3PagerOkToChangeJournalMode(Pager *pPager){ - if( IS_HCT(pPager) ) return 0; assert( assert_pager_state(pPager) ); if( pPager->eState>=PAGER_WRITER_CACHEMOD ) return 0; if( NEVER(isOpen(pPager->jfd) && pPager->journalOff>0) ) return 0; @@ -65918,7 +65986,7 @@ SQLITE_PRIVATE int sqlite3PagerWalSystemErrno(Pager *pPager){ ** 28: Checksum-2 (second part of checksum for first 24 bytes of header). ** ** Immediately following the wal-header are zero or more frames. Each -** frame consists of a 24-byte frame-header followed by bytes +** frame consists of a 24-byte frame-header followed by a bytes ** of page data. The frame-header is six big-endian 32-bit unsigned ** integer values, as follows: ** @@ -66335,6 +66403,19 @@ SQLITE_PRIVATE int sqlite3WalTrace = 0; #define WAL_VERSION1 3007000 /* For "journal_mode=wal" */ #define WAL_VERSION2 3021000 /* For "journal_mode=wal2" */ +#define SQLITE_ENABLE_WAL2NOCKSUM 1 + +#ifdef SQLITE_ENABLE_WAL2NOCKSUM +# undef WAL_VERSION2 +# define WAL_VERSION2 3048000 /* For "journal_mode=wal2" sans checksums */ + +# define isNocksum(pWal) isWalMode2(pWal) +#else +# define isNocksum(pWal) 0 +#endif + + + /* ** Index numbers for various locking bytes. WAL_NREADER is the number @@ -66678,13 +66759,13 @@ struct Wal { #endif #ifdef SQLITE_ENABLE_SNAPSHOT WalIndexHdr *pSnapshot; /* Start transaction here if not NULL */ - int bGetSnapshot; /* Transaction opened for sqlite3_get_snapshot() */ #endif int bClosing; /* Set to true at start of sqlite3WalClose() */ int bWal2; /* bWal2 flag passed to WalOpen() */ #ifdef SQLITE_ENABLE_SETLK_TIMEOUT sqlite3 *db; #endif + u64 *aCommitTime; }; /* @@ -66939,7 +67020,14 @@ static int walIndexPage( ){ SEH_INJECT_FAULT; if( pWal->nWiData<=iPage || (*ppPage = pWal->apWiData[iPage])==0 ){ - return walIndexPageRealloc(pWal, iPage, ppPage); + int rc; + u64 t1; + if( pWal->aCommitTime ) t1 = sqlite3STimeNow(); + rc = walIndexPageRealloc(pWal, iPage, ppPage); + if( pWal->aCommitTime ){ + pWal->aCommitTime[COMMIT_TIME_HASHMAPUS] += sqlite3STimeNow() - t1; + } + return rc; } return SQLITE_OK; } @@ -67112,12 +67200,15 @@ static void walEncodeFrame( if( pWal->iReCksum==0 ){ memcpy(&aFrame[8], pWal->hdr.aSalt, 8); - nativeCksum = (pWal->hdr.bigEndCksum==SQLITE_BIGENDIAN); - walChecksumBytes(nativeCksum, aFrame, 8, aCksum, aCksum); - walChecksumBytes(nativeCksum, aData, pWal->szPage, aCksum, aCksum); + if( isNocksum(pWal)==0 ){ + nativeCksum = (pWal->hdr.bigEndCksum==SQLITE_BIGENDIAN); + walChecksumBytes(nativeCksum, aFrame, 8, aCksum, aCksum); + walChecksumBytes(nativeCksum, aData, pWal->szPage, aCksum, aCksum); + } sqlite3Put4byte(&aFrame[16], aCksum[0]); sqlite3Put4byte(&aFrame[20], aCksum[1]); + }else{ memset(&aFrame[8], 0, 16); } @@ -67159,14 +67250,16 @@ static int walDecodeFrame( ** and the frame-data matches the checksum in the last 8 ** bytes of this frame-header. */ - nativeCksum = (pWal->hdr.bigEndCksum==SQLITE_BIGENDIAN); - walChecksumBytes(nativeCksum, aFrame, 8, aCksum, aCksum); - walChecksumBytes(nativeCksum, aData, pWal->szPage, aCksum, aCksum); - if( aCksum[0]!=sqlite3Get4byte(&aFrame[16]) - || aCksum[1]!=sqlite3Get4byte(&aFrame[20]) - ){ - /* Checksum failed. */ - return 0; + if( isNocksum(pWal)==0 ){ + nativeCksum = (pWal->hdr.bigEndCksum==SQLITE_BIGENDIAN); + walChecksumBytes(nativeCksum, aFrame, 8, aCksum, aCksum); + walChecksumBytes(nativeCksum, aData, pWal->szPage, aCksum, aCksum); + if( aCksum[0]!=sqlite3Get4byte(&aFrame[16]) + || aCksum[1]!=sqlite3Get4byte(&aFrame[20]) + ){ + /* Checksum failed. */ + return 0; + } } /* If we reach this point, the frame is valid. Return the page number @@ -67480,6 +67573,29 @@ static void walCleanupHash(Wal *pWal){ #endif /* SQLITE_ENABLE_EXPENSIVE_ASSERT */ } +/* +** Zero the n byte block indicated by pointer p. n Must be a multiple of +** 8, and p must be aligned to an 8-byte boundary. +*/ +static void zero64(void *p, int n){ +#if defined(__x86_64__) + size_t c = n / sizeof(u64); + void *d = p; + + assert( (n & 0x7)==0 ); + assert( EIGHT_BYTE_ALIGNMENT(p) ); + + __asm__ volatile ( + "rep stosq" + : "+D" (d), "+c" (c) + : "a" (0) + : "memory" + ); +#else + memset(p, 0, n); +#endif +} + /* ** Set an entry in the wal-index that will map database page number ** pPage into WAL frame iFrame. @@ -67488,6 +67604,7 @@ static int walIndexAppend(Wal *pWal, int iWal, u32 iFrame, u32 iPage){ int rc; /* Return code */ WalHashLoc sLoc; /* Wal-index hash table location */ u32 iExternal; + u64 t; if( isWalMode2(pWal) ){ iExternal = walExternalEncode(iWal, iFrame); @@ -67496,7 +67613,11 @@ static int walIndexAppend(Wal *pWal, int iWal, u32 iFrame, u32 iPage){ iExternal = iFrame; } + if( pWal->aCommitTime ) t = sqlite3STimeNow(); rc = walHashGet(pWal, walFramePage(iExternal), &sLoc); + if( pWal->aCommitTime ){ + pWal->aCommitTime[COMMIT_TIME_WALINDEX_HASHGETUS] += sqlite3STimeNow()-t; + } /* Assuming the wal-index file was successfully mapped, populate the ** page number array and hash table entry. @@ -67512,10 +67633,14 @@ static int walIndexAppend(Wal *pWal, int iWal, u32 iFrame, u32 iPage){ /* If this is the first entry to be added to this hash-table, zero the ** entire hash table and aPgno[] array before proceeding. */ - if( idx==1 ){ - int nByte = (int)((u8*)&sLoc.aHash[HASHTABLE_NSLOT] - (u8*)sLoc.aPgno); - assert( nByte>=0 ); - memset((void*)sLoc.aPgno, 0, nByte); + if( pWal->aCommitTime ) t = sqlite3STimeNow(); + if( idx==1 && sLoc.aPgno[0]!=0 ){ + /* Special for BEDROCK branch: Zero only the aHash[] part. Not the + ** aPgno[] part of the page. */ + zero64((void*)sLoc.aHash, HASHTABLE_NSLOT * sizeof(sLoc.aHash[0])); + } + if( pWal->aCommitTime ){ + pWal->aCommitTime[COMMIT_TIME_WALINDEX_MEMSETUS]+=sqlite3STimeNow()-t; } /* If the entry in aPgno[] is already set, then the previous writer @@ -67523,21 +67648,42 @@ static int walIndexAppend(Wal *pWal, int iWal, u32 iFrame, u32 iPage){ ** writing one or more dirty pages to the WAL to free up memory). ** Remove the remnants of that writers uncommitted transaction from ** the hash-table before writing any new entries. - */ - if( sLoc.aPgno[idx-1] ){ - walCleanupHash(pWal); - assert( !sLoc.aPgno[idx-1] ); + ** + ** Special for BEDROCK branch: On this branch we do not assume that + ** the aPgno[] part of each hash-table has been zeroed. Therefore, we + ** only need to clear out the remnants of an old writer's transaction if + ** the hash table matches the aPgno[] entry (as it would if a write + ** transaction was interrupted). And, because this makes the test more + ** expensive, we only do the check for the first frame written by each + ** transaction. */ + if( sLoc.aPgno[idx-1] && iFrame-1==walidxGetMxFrame(&pWal->hdr, iWal) ){ + if( pWal->aCommitTime ) t = sqlite3STimeNow(); + nCollide = idx; + for(iKey=walHash(iPage); sLoc.aHash[iKey]; iKey=walNextHash(iKey)){ + if( sLoc.aHash[iKey]==idx ){ + walCleanupHash(pWal); + } + if( (nCollide--)==0 ) return SQLITE_CORRUPT_BKPT; + } + if( pWal->aCommitTime ){ + pWal->aCommitTime[COMMIT_TIME_WALINDEX_CLEANUPUS]+=sqlite3STimeNow()-t; + } } /* Write the aPgno[] array entry and the hash-table slot. */ + if( pWal->aCommitTime ) t = sqlite3STimeNow(); nCollide = idx; for(iKey=walHash(iPage); sLoc.aHash[iKey]; iKey=walNextHash(iKey)){ if( (nCollide--)==0 ) return SQLITE_CORRUPT_BKPT; } sLoc.aPgno[idx-1] = iPage; AtomicStore(&sLoc.aHash[iKey], (ht_slot)idx); + if( pWal->aCommitTime ){ + pWal->aCommitTime[COMMIT_TIME_WALINDEX_ENTRYUS]+=sqlite3STimeNow()-t; + } #ifdef SQLITE_ENABLE_EXPENSIVE_ASSERT + assert( this_should_not_be_enabled ); /* Verify that the number of entries in the hash table exactly equals ** the number of entries in the mapping region. */ @@ -67673,6 +67819,7 @@ static int walIndexRecoverOne(Wal *pWal, int iWal, u32 *pnCkpt, int *pbZero){ if( aShare==0 ) break; SEH_SET_ON_ERROR(iPg, aShare); pWal->apWiData[iPg] = aPrivate; + memset(aPrivate, 0, WALINDEX_PGSZ); if( iWal ){ assert( version==WAL_VERSION2 ); @@ -68801,6 +68948,39 @@ static int walCheckpoint( } } + if( bWal2 && rc==SQLITE_OK && eMode!=SQLITE_CHECKPOINT_PASSIVE ){ + /* In wal2 mode, a non-passive checkpoint waits for all readers of + ** the wal file just checkpointed to finish, then zeroes the hash + ** tables associated with that wal file. This is because in some + ** deployments, zeroing the hash tables as they are overwritten within + ** COMMIT commands is a significant performance hit. + ** + ** Currently, both of the "PART" locks are held for the wal file + ** being checkpointed. i.e. if iCkpt==0, then we already hold both + ** WAL_LOCK_PART1 and WAL_LOCK_PART1_FULL2. If we now also take an + ** exclusive lock on WAL_LOCK_PART2_FULL1, then it is guaranteed that + ** there are no remaining readers of the (iCkpt==0) wal file. Similar + ** logic, with different locks, is used for (iCkpt==1). + */ + int lockIdx = WAL_READ_LOCK( + iCkpt==0 ? WAL_LOCK_PART2_FULL1 : WAL_LOCK_PART1_FULL2 + ); + assert( iCkpt==0 || iCkpt==1 ); + rc = walBusyLock(pWal, xBusy, pBusyArg, lockIdx, 1); + if( rc==SQLITE_OK ){ + int iHash; + for(iHash = walFramePage2(iCkpt, mxSafeFrame); iHash>=0; iHash-=2){ + WalHashLoc sLoc; + int nByte; + memset(&sLoc, 0, sizeof(sLoc)); + walHashGet(pWal, iHash, &sLoc); + nByte = (int)((u8*)&sLoc.aHash[HASHTABLE_NSLOT] - (u8*)sLoc.aPgno); + memset((void*)sLoc.aPgno, 0, nByte); + } + walUnlockExclusive(pWal, lockIdx, 1); + } + } + if( rc==SQLITE_BUSY ){ /* Reset the return code so as not to report a checkpoint failure ** just because there are active readers. */ @@ -68921,7 +69101,7 @@ static int walHandleException(Wal *pWal){ /* ** Assert that the Wal.lockMask mask, which indicates the locks held -** by the connection, is consistent with the Wal.readLock, Wal.writeLock +** by the connenction, is consistent with the Wal.readLock, Wal.writeLock ** and Wal.ckptLock variables. To be used as: ** ** assert( walAssertLockmask(pWal) ); @@ -69640,7 +69820,7 @@ static int walTryBeginRead(Wal *pWal, int *pChanged, int useWal, int *pCnt){ u32 mxFrame; /* Wal frame to lock to */ if( !useWal && pInfo->nBackfill==pWal->hdr.mxFrame #ifdef SQLITE_ENABLE_SNAPSHOT - && ((pWal->bGetSnapshot==0 && pWal->pSnapshot==0) || pWal->hdr.mxFrame==0) + && (pWal->pSnapshot==0 || pWal->hdr.mxFrame==0) #endif ){ /* The WAL has been completely backfilled (or it is empty). @@ -70922,6 +71102,7 @@ static int walWriteToLog( sqlite3_int64 iOffset /* Start writing at this offset */ ){ int rc; + u64 t; if( iOffsetiSyncPoint && iOffset+iAmt>=p->iSyncPoint ){ int iFirstAmt = (int)(p->iSyncPoint - iOffset); rc = sqlite3OsWrite(p->pFd, pContent, iFirstAmt, iOffset); @@ -70933,7 +71114,13 @@ static int walWriteToLog( rc = sqlite3OsSync(p->pFd, WAL_SYNC_FLAGS(p->syncFlags)); if( iAmt==0 || rc ) return rc; } + if( p->pWal->aCommitTime ){ + t = sqlite3STimeNow(); + } rc = sqlite3OsWrite(p->pFd, pContent, iAmt, iOffset); + if( p->pWal->aCommitTime ){ + p->pWal->aCommitTime[COMMIT_TIME_OSWRITE] += (sqlite3STimeNow() - t); + } return rc; } @@ -70963,10 +71150,21 @@ static int walWriteOneFrame( pData = pPage->pData; walEncodeFrame(p->pWal, pPage->pgno, nTruncate, pData, aFrame); - rc = walWriteToLog(p, aFrame, sizeof(aFrame), iOffset); - if( rc ) return rc; + + if( isNocksum(p->pWal)==0 ){ + /* Write the header in normal mode */ + rc = walWriteToLog(p, aFrame, sizeof(aFrame), iOffset); + if( rc ) return rc; + } + /* Write the page data */ rc = walWriteToLog(p, pData, p->szPage, iOffset+sizeof(aFrame)); + + if( isNocksum(p->pWal) ){ + /* Write the header in no-checksum mode */ + if( rc ) return rc; + rc = walWriteToLog(p, aFrame, sizeof(aFrame), iOffset); + } return rc; } @@ -71048,6 +71246,9 @@ static int walFrames( WalIndexHdr *pLive; /* Pointer to shared header */ int iApp; int bWal2 = isWalMode2(pWal); + int nFrame = 0; + + int logFlags = 0; assert( pList ); assert( pWal->writeLock ); @@ -71069,6 +71270,8 @@ static int walFrames( return rc; } + sqlite3CommitTimeSet(pWal->aCommitTime, COMMIT_TIME_AFTER_RESTARTLOG); + /* If this is the first frame written into the log, write the WAL ** header to the start of the WAL file. See comments at the top of ** this source file for a description of the WAL header format. @@ -71084,6 +71287,7 @@ static int walFrames( } #endif + logFlags |= (iFrame==0 ? 0x01 : 0x00); if( iFrame==0 ){ u32 iCkpt = 0; u8 aWalHdr[WAL_HDRSIZE]; /* Buffer to assemble wal-header in */ @@ -71137,6 +71341,7 @@ static int walFrames( if( (int)pWal->szPage!=szPage ){ return SQLITE_CORRUPT_BKPT; /* TH3 test case: cov1/corrupt155.test */ } + sqlite3CommitTimeSet(pWal->aCommitTime, COMMIT_TIME_AFTER_WRITEHDR); /* Setup information needed to write frames into the WAL */ w.pWal = pWal; @@ -71148,6 +71353,7 @@ static int walFrames( szFrame = szPage + WAL_FRAME_HDRSIZE; /* Write all frames into the log file exactly once */ + logFlags |= (iFirst==0 ? 0x00 : 0x02); for(p=pList; p; p=p->pDirty){ int nDbSize; /* 0 normally. Positive == commit flag */ @@ -71186,8 +71392,10 @@ static int walFrames( p->flags |= PGHDR_WAL_APPEND; } + sqlite3CommitTimeSet(pWal->aCommitTime, COMMIT_TIME_AFTER_WRITEFRAMES); /* Recalculate checksums within the wal file if required. */ + logFlags |= (pWal->iReCksum==0 ? 0x00 : 0x04); if( isCommit && pWal->iReCksum ){ rc = walRewriteChecksums(pWal, iFrame); if( rc ) return rc; @@ -71207,6 +71415,7 @@ static int walFrames( ** sector boundary is synced; the part of the last frame that extends ** past the sector boundary is written after the sync. */ + logFlags |= (WAL_SYNC_FLAGS(sync_flags)==0 ? 0x00 : 0x08); if( isCommit && WAL_SYNC_FLAGS(sync_flags)!=0 ){ int bSync = 1; if( pWal->padToSectorBoundary ){ @@ -71241,6 +71450,8 @@ static int walFrames( pWal->truncateOnCommit = 0; } + sqlite3CommitTimeSet(pWal->aCommitTime, COMMIT_TIME_BEFORE_WALINDEX); + /* Append data to the wal-index. It is not necessary to lock the ** wal-index to do this as the SQLITE_SHM_WRITE lock held on the wal-index ** guarantees that there are no other writers, and no data that may @@ -71251,6 +71462,7 @@ static int walFrames( if( (p->flags & PGHDR_WAL_APPEND)==0 ) continue; iFrame++; rc = walIndexAppend(pWal, iApp, iFrame, p->pgno); + nFrame++; } assert( pLast!=0 || nExtra==0 ); while( rc==SQLITE_OK && nExtra>0 ){ @@ -71258,6 +71470,9 @@ static int walFrames( nExtra--; rc = walIndexAppend(pWal, iApp, iFrame, pLast->pgno); } + if( pWal->aCommitTime ) pWal->aCommitTime[COMMIT_TIME_NFRAME] = nFrame; + + sqlite3CommitTimeSet(pWal->aCommitTime, COMMIT_TIME_AFTER_WALINDEX); if( rc==SQLITE_OK ){ /* Update the private copy of the header. */ @@ -71286,6 +71501,11 @@ static int walFrames( } } + sqlite3CommitTimeSet(pWal->aCommitTime, COMMIT_TIME_AFTER_WALINDEXHDR); + if( pWal->aCommitTime ){ + pWal->aCommitTime[COMMIT_TIME_WALFRAMESFLAGS] = logFlags; + } + WALTRACE(("WAL%p: frame write %s\n", pWal, rc ? "failed" : "ok")); return rc; } @@ -71378,7 +71598,7 @@ SQLITE_PRIVATE int sqlite3WalCheckpoint( ** writer lock retried until either the busy-handler returns 0 or the ** lock is successfully obtained. */ - if( eMode!=SQLITE_CHECKPOINT_PASSIVE ){ + if( eMode!=SQLITE_CHECKPOINT_PASSIVE && isWalMode2(pWal)==0 ){ rc = walBusyLock(pWal, xBusy2, pBusyArg, WAL_WRITE_LOCK, 1); if( rc==SQLITE_OK ){ pWal->writeLock = 1; @@ -71594,20 +71814,7 @@ SQLITE_PRIVATE void sqlite3WalSnapshotOpen( Wal *pWal, sqlite3_snapshot *pSnapshot ){ - if( pSnapshot && ((WalIndexHdr*)pSnapshot)->iVersion==0 ){ - /* iVersion==0 means that this is a call to sqlite3_snapshot_get(). In - ** this case set the bGetSnapshot flag so that if the call to - ** sqlite3_snapshot_get() is about to read transaction on this wal - ** file, it does not take read-lock 0 if the wal file has been completely - ** checkpointed. Taking read-lock 0 would work, but then it would be - ** possible for a subsequent writer to destroy the snapshot even while - ** this connection is holding its read-transaction open. This is contrary - ** to user expectations, so we avoid it by not taking read-lock 0. */ - pWal->bGetSnapshot = 1; - }else{ - pWal->pSnapshot = (WalIndexHdr*)pSnapshot; - pWal->bGetSnapshot = 0; - } + pWal->pSnapshot = (WalIndexHdr*)pSnapshot; } /* @@ -71710,6 +71917,12 @@ SQLITE_PRIVATE int sqlite3WalJournalMode(Wal *pWal){ return (isWalMode2(pWal) ? PAGER_JOURNALMODE_WAL2 : PAGER_JOURNALMODE_WAL); } +SQLITE_PRIVATE void sqlite3WalSetCommitTime(Wal *pWal, u64 *aCommitTime){ + if( pWal ){ + pWal->aCommitTime = aCommitTime; + } +} + #endif /* #ifndef SQLITE_OMIT_WAL */ /************** End of wal.c *************************************************/ @@ -72082,7 +72295,6 @@ struct BtLock { ** they often do so without holding sqlite3.mutex. */ struct Btree { - const BtreeMethods *pMethods; sqlite3 *db; /* The database connection holding this btree */ BtShared *pBt; /* Sharable content of this btree */ u8 inTrans; /* TRANS_NONE, TRANS_READ or TRANS_WRITE */ @@ -72206,6 +72418,8 @@ struct BtShared { BtreePtrmap *pMap; #endif int nPreformatSize; /* Size of last cell written by TransferRow() */ + + u64 *aCommitTime; }; /* @@ -72272,7 +72486,6 @@ struct CellInfo { ** FAULT skipNext holds the cursor fault error code. */ struct BtCursor { - const BtCursorMethods *pMethods; u8 eState; /* One of the CURSOR_XXX constants (see below) */ u8 curFlags; /* zero or more BTCF_* flags defined below */ u8 curPagerFlags; /* Flags to send to sqlite3PagerGet() */ @@ -72802,93 +73015,6 @@ SQLITE_PRIVATE void sqlite3BtreeLeaveCursor(BtCursor *pCur){ ** Including a description of file format and an overview of operation. */ /* #include "btreeInt.h" */ -/************** Include btreeDefine.h in the middle of btree.c ***************/ -/************** Begin file btreeDefine.h *************************************/ -#define sqlite3BtreeNext sqlite3StockBtreeNext -#define sqlite3BtreeCursorHasMoved sqlite3StockBtreeCursorHasMoved -#define sqlite3BtreeClearCursor sqlite3StockBtreeClearCursor -#define sqlite3BtreeCursorRestore sqlite3StockBtreeCursorRestore -#define sqlite3BtreeCursorHintFlags sqlite3StockBtreeCursorHintFlags -#define sqlite3BtreeCloseCursor sqlite3StockBtreeCloseCursor -#define sqlite3BtreeCursorIsValid sqlite3StockBtreeCursorIsValid -#define sqlite3BtreeCursorIsValidNN sqlite3StockBtreeCursorIsValidNN -#define sqlite3BtreeIntegerKey sqlite3StockBtreeIntegerKey -#define sqlite3BtreeCursorPin sqlite3StockBtreeCursorPin -#define sqlite3BtreeCursorUnpin sqlite3StockBtreeCursorUnpin -#define sqlite3BtreePayloadSize sqlite3StockBtreePayloadSize -#define sqlite3BtreeMaxRecordSize sqlite3StockBtreeMaxRecordSize -#define sqlite3BtreePayload sqlite3StockBtreePayload -#define sqlite3BtreePayloadChecked sqlite3StockBtreePayloadChecked -#define sqlite3BtreePayloadFetch sqlite3StockBtreePayloadFetch -#define sqlite3BtreeFirst sqlite3StockBtreeFirst -#define sqlite3BtreeLast sqlite3StockBtreeLast -#define sqlite3BtreeTableMoveto sqlite3StockBtreeTableMoveto -#define sqlite3BtreeIndexMoveto sqlite3StockBtreeIndexMoveto -#define sqlite3BtreeCursorDir sqlite3StockBtreeCursorDir -#define sqlite3BtreeEof sqlite3StockBtreeEof -#define sqlite3BtreeRowCountEst sqlite3StockBtreeRowCountEst -#define sqlite3BtreePrevious sqlite3StockBtreePrevious -#define sqlite3BtreeInsert sqlite3StockBtreeInsert -#define sqlite3BtreeDelete sqlite3StockBtreeDelete -#define sqlite3BtreeIdxDelete sqlite3StockBtreeIdxDelete -#define sqlite3BtreePutData sqlite3StockBtreePutData -#define sqlite3BtreeIncrblobCursor sqlite3StockBtreeIncrblobCursor -#define sqlite3BtreeCursorHasHint sqlite3StockBtreeCursorHasHint -#define sqlite3BtreeTransferRow sqlite3StockBtreeTransferRow -#define sqlite3BtreeClearTableOfCursor sqlite3StockBtreeClearTableOfCursor -#define sqlite3BtreeCount sqlite3StockBtreeCount -#define sqlite3BtreeCursor sqlite3StockBtreeCursor -#define sqlite3BtreeSeekCount sqlite3StockBtreeSeekCount -#define sqlite3BtreeLastPage sqlite3StockBtreeLastPage -#define sqlite3BtreeClose sqlite3StockBtreeClose -#define sqlite3BtreeSetCacheSize sqlite3StockBtreeSetCacheSize -#define sqlite3BtreeSetSpillSize sqlite3StockBtreeSetSpillSize -#define sqlite3BtreeSetMmapLimit sqlite3StockBtreeSetMmapLimit -#define sqlite3BtreeSetPagerFlags sqlite3StockBtreeSetPagerFlags -#define sqlite3BtreeSetPageSize sqlite3StockBtreeSetPageSize -#define sqlite3BtreeGetPageSize sqlite3StockBtreeGetPageSize -#define sqlite3BtreeGetReserveNoMutex sqlite3StockBtreeGetReserveNoMutex -#define sqlite3BtreeGetRequestedReserve sqlite3StockBtreeGetRequestedReserve -#define sqlite3BtreeMaxPageCount sqlite3StockBtreeMaxPageCount -#define sqlite3BtreeSecureDelete sqlite3StockBtreeSecureDelete -#define sqlite3BtreeSetAutoVacuum sqlite3StockBtreeSetAutoVacuum -#define sqlite3BtreeGetAutoVacuum sqlite3StockBtreeGetAutoVacuum -#define sqlite3BtreeNewDb sqlite3StockBtreeNewDb -#define sqlite3BtreeBeginTrans sqlite3StockBtreeBeginTrans -#define sqlite3BtreeIncrVacuum sqlite3StockBtreeIncrVacuum -#define sqlite3BtreeCommitPhaseOne sqlite3StockBtreeCommitPhaseOne -#define sqlite3BtreeCommitPhaseTwo sqlite3StockBtreeCommitPhaseTwo -#define sqlite3BtreeCommit sqlite3StockBtreeCommit -#define sqlite3BtreeTripAllCursors sqlite3StockBtreeTripAllCursors -#define sqlite3BtreeRollback sqlite3StockBtreeRollback -#define sqlite3BtreeBeginStmt sqlite3StockBtreeBeginStmt -#define sqlite3BtreeSavepoint sqlite3StockBtreeSavepoint -#define sqlite3BtreeCreateTable sqlite3StockBtreeCreateTable -#define sqlite3BtreeClearTable sqlite3StockBtreeClearTable -#define sqlite3BtreeDropTable sqlite3StockBtreeDropTable -#define sqlite3BtreeGetMeta sqlite3StockBtreeGetMeta -#define sqlite3BtreeUpdateMeta sqlite3StockBtreeUpdateMeta -#define sqlite3BtreePragma sqlite3StockBtreePragma -#define sqlite3BtreePager sqlite3StockBtreePager -#define sqlite3BtreeGetFilename sqlite3StockBtreeGetFilename -#define sqlite3BtreeGetJournalname sqlite3StockBtreeGetJournalname -#define sqlite3BtreeTxnState sqlite3StockBtreeTxnState -#define sqlite3BtreeIsInBackup sqlite3StockBtreeIsInBackup -#define sqlite3BtreeSchema sqlite3StockBtreeSchema -#define sqlite3BtreeSchemaLocked sqlite3StockBtreeSchemaLocked -#define sqlite3BtreeIsReadonly sqlite3StockBtreeIsReadonly -#define sqlite3BtreeSetVersion sqlite3StockBtreeSetVersion -#define sqlite3BtreeIntegrityCheck sqlite3StockBtreeIntegrityCheck -#define sqlite3BtreeCheckpoint sqlite3StockBtreeCheckpoint -#define sqlite3BtreeExclusiveLock sqlite3StockBtreeExclusiveLock -#define sqlite3BtreeFakeValidCursor sqlite3StockBtreeFakeValidCursor -#define sqlite3BtreeCursorSize sqlite3StockBtreeCursorSize -#define sqlite3BtreeCursorZero sqlite3StockBtreeCursorZero -#define sqlite3BtreeOpen sqlite3StockBtreeOpen - - -/************** End of btreeDefine.h *****************************************/ -/************** Continuing where we left off in btree.c **********************/ /* ** The header string that appears at the beginning of every @@ -74045,16 +74171,11 @@ static int btreeRestoreCursorPosition(BtCursor *pCur){ ** back to where it ought to be if this routine returns true. */ SQLITE_PRIVATE int sqlite3BtreeCursorHasMoved(BtCursor *pCur){ -#ifdef SQLITE_ENABLE_HCT - assert( EIGHT_BYTE_ALIGNMENT(pCur) ); - return (CURSOR_VALID!=pCur->eState); -#else assert( EIGHT_BYTE_ALIGNMENT(pCur) || pCur==sqlite3BtreeFakeValidCursor() ); assert( offsetof(BtCursor, eState)==0 ); assert( sizeof(pCur->eState)==1 ); return CURSOR_VALID != *(u8*)pCur; -#endif } /* @@ -76108,7 +76229,9 @@ SQLITE_PRIVATE int sqlite3BtreeSetCacheSize(Btree *p, int mxPage){ BtShared *pBt = p->pBt; assert( sqlite3_mutex_held(p->db->mutex) ); sqlite3BtreeEnter(p); + sqlite3PrepareTimeSet(p->db->aPrepareTime, PREPARE_TIME_BEGINSETCACHESIZE); sqlite3PagerSetCachesize(pBt->pPager, mxPage); + sqlite3PrepareTimeSet(p->db->aPrepareTime, PREPARE_TIME_ENDSETCACHESIZE); sqlite3BtreeLeave(p); return SQLITE_OK; } @@ -77435,27 +77558,60 @@ static int btreeRelocateRange( if( pEntry->eType==PTRMAP_FREEPAGE ){ Pgno dummy; + u64 t1; + if( pBt->aCommitTime ) t1 = sqlite3STimeNow(); rc = allocateBtreePage(pBt, &pFree, &dummy, iPg, BTALLOC_EXACT); + if( pBt->aCommitTime ){ + pBt->aCommitTime[COMMIT_TIME_RELOCATE2_EXACTUS] += (sqlite3STimeNow() - t1); + } if( pFree ){ assert( sqlite3PagerPageRefcount(pFree->pDbPage)==1 ); sqlite3PcacheDrop(pFree->pDbPage); } assert( rc!=SQLITE_OK || dummy==iPg ); }else if( pnCurrent ){ + u64 t1; btreeGetPage(pBt, iPg, &pPg, 0); assert( sqlite3PagerIswriteable(pPg->pDbPage) ); assert( sqlite3PagerPageRefcount(pPg->pDbPage)==1 ); iNew = ++(*pnCurrent); if( iNew==PENDING_BYTE_PAGE(pBt) ) iNew = ++(*pnCurrent); + if( pBt->aCommitTime ) t1 = sqlite3STimeNow(); rc = relocatePage(pBt, pPg, pEntry->eType, pEntry->parent, iNew, 1); + if( pBt->aCommitTime ){ + pBt->aCommitTime[COMMIT_TIME_RELOCATE2_RELOCATEUS] += (sqlite3STimeNow() - t1); + } releasePageNotNull(pPg); - }else{ - rc = allocateBtreePage(pBt, &pFree, &iNew, iFirst-1, BTALLOC_LE); + }else if( pEntry->eType!=0 ){ + u64 t1; + if( pBt->aCommitTime ) t1 = sqlite3STimeNow(); + + /* Allocate a new page from the free-list to move page iPg to. + ** Except - if the page allocated is within the range being relocated + ** (i.e. pgno>=iFirst), then discard it and allocate another. */ + do { + rc = allocateBtreePage(pBt, &pFree, &iNew, 0, 0); + if( iNew>=iFirst ){ + assert( sqlite3PagerPageRefcount(pFree->pDbPage)==1 ); + assert( iNew>iPg ); + sqlite3PcacheDrop(pFree->pDbPage); + pMap->aPtr[iNew - pMap->iFirst].eType = 0; + pFree = 0; + } + }while( pFree==0 ); + + if( pBt->aCommitTime ){ + pBt->aCommitTime[COMMIT_TIME_RELOCATE2_ALLOCATEUS] += (sqlite3STimeNow() - t1); + } assert( rc!=SQLITE_OK || iNewaCommitTime ) t1 = sqlite3STimeNow(); rc = relocatePage(pBt, pPg, pEntry->eType, pEntry->parent,iNew,1); + if( pBt->aCommitTime ){ + pBt->aCommitTime[COMMIT_TIME_RELOCATE2_RELOCATEUS] += (sqlite3STimeNow() - t1); + } releasePage(pPg); } } @@ -77485,6 +77641,8 @@ static int btreeFixUnlocked(Btree *p){ Pgno nPage = btreePagecount(pBt); u32 nFree = get4byte(&p1[36]); + sqlite3CommitTimeSet(p->pBt->aCommitTime, COMMIT_TIME_START_FIXUNLOCKED); + assert( pBt->pMap ); rc = sqlite3PagerUpgradeSnapshot(pPager, pPage1->pDbPage); assert( p1==pPage1->aData ); @@ -77531,6 +77689,7 @@ static int btreeFixUnlocked(Btree *p){ nCurrent = MAX(nPage, nHPage); pBt->nPage = nCurrent; + sqlite3CommitTimeSet(p->pBt->aCommitTime, COMMIT_TIME_START_RELOCATE1); rc = btreeRelocateRange(pBt, pMap->iFirst, iLast, &nCurrent); /* There are now no collisions with the snapshot at the head of the @@ -77547,7 +77706,17 @@ static int btreeFixUnlocked(Btree *p){ nFin--; } nFin = MAX(nFin, nHPage); + if( p->pBt->aCommitTime ){ + p->pBt->aCommitTime[COMMIT_TIME_OTHERWRITERS] = (1+nHPage-pMap->iFirst); + p->pBt->aCommitTime[COMMIT_TIME_RELOCATE1COUNT] = (1+iLast-pMap->iFirst); + p->pBt->aCommitTime[COMMIT_TIME_RELOCATE2COUNT] = (nCurrent - nFin); + } + sqlite3CommitTimeSet( + p->pBt->aCommitTime, COMMIT_TIME_START_RELOCATE2 + ); + sqlite3PagerSetCommitTime(pBt->pPager, pBt->aCommitTime); rc = btreeRelocateRange(pBt, nFin+1, nCurrent, 0); + sqlite3PagerSetCommitTime(pBt->pPager, 0); } put4byte(&p1[28], nFin); @@ -77612,9 +77781,12 @@ SQLITE_PRIVATE int sqlite3BtreeCommitPhaseOne(Btree *p, const char *zSuperJrnl){ #endif if( rc==SQLITE_OK && ISCONCURRENT && p->db->eConcurrent==CONCURRENT_OPEN ){ rc = btreeFixUnlocked(p); + sqlite3CommitTimeSet(p->pBt->aCommitTime, COMMIT_TIME_AFTER_FIXUNLOCKED); } if( rc==SQLITE_OK ){ + sqlite3PagerSetCommitTime(pBt->pPager, p->pBt->aCommitTime); rc = sqlite3PagerCommitPhaseOne(pBt->pPager, zSuperJrnl, 0); + sqlite3PagerSetCommitTime(pBt->pPager, 0); } #ifndef SQLITE_OMIT_CONCURRENT if( rc==SQLITE_OK ){ @@ -77719,7 +77891,9 @@ SQLITE_PRIVATE int sqlite3BtreeCommitPhaseTwo(Btree *p, int bCleanup){ BtShared *pBt = p->pBt; assert( pBt->inTransaction==TRANS_WRITE ); assert( pBt->nTransaction>0 ); + sqlite3PagerSetCommitTime(pBt->pPager, p->pBt->aCommitTime); rc = sqlite3PagerCommitPhaseTwo(pBt->pPager); + sqlite3PagerSetCommitTime(pBt->pPager, 0); if( rc!=SQLITE_OK && bCleanup==0 ){ sqlite3BtreeLeave(p); return rc; @@ -78102,25 +78276,6 @@ SQLITE_PRIVATE int sqlite3BtreeCursorSize(void){ return ROUND8(sizeof(BtCursor)); } -#ifdef SQLITE_DEBUG -/* -** Return true if and only if the Btree object will be automatically -** closed with the BtCursor closes. This is used within assert() statements -** only. -*/ -SQLITE_PRIVATE int sqlite3BtreeClosesWithCursor( - Btree *pBtree, /* the btree object */ - BtCursor *pCur /* Corresponding cursor */ -){ - BtShared *pBt = pBtree->pBt; - if( (pBt->openFlags & BTREE_SINGLE)==0 ) return 0; - if( pBt->pCursor!=pCur ) return 0; - if( pCur->pNext!=0 ) return 0; - if( pCur->pBtree!=pBtree ) return 0; - return 1; -} -#endif - /* ** Initialize memory that will be converted into a BtCursor object. ** @@ -85014,97 +85169,6 @@ SQLITE_API int sqlite3_commit_status( return rc; } -/************** Include btreeUndef.h in the middle of btree.c ****************/ -/************** Begin file btreeUndef.h **************************************/ -#undef sqlite3BtreeNext -#undef sqlite3BtreeCursorHasMoved -#undef sqlite3BtreeClearCursor -#undef sqlite3BtreeCursorRestore -#undef sqlite3BtreeCursorHintFlags -#undef sqlite3BtreeCloseCursor -#undef sqlite3BtreeCursorIsValid -#undef sqlite3BtreeCursorIsValidNN -#undef sqlite3BtreeIntegerKey -#undef sqlite3BtreeCursorPin -#undef sqlite3BtreeCursorUnpin -#undef sqlite3BtreePayloadSize -#undef sqlite3BtreeMaxRecordSize -#undef sqlite3BtreePayload -#undef sqlite3BtreePayloadChecked -#undef sqlite3BtreePayloadFetch -#undef sqlite3BtreeFirst -#undef sqlite3BtreeLast -#undef sqlite3BtreeTableMoveto -#undef sqlite3BtreeIndexMoveto -#undef sqlite3BtreeCursorDir -#undef sqlite3BtreeEof -#undef sqlite3BtreeRowCountEst -#undef sqlite3BtreePrevious -#undef sqlite3BtreeInsert -#undef sqlite3BtreeDelete -#undef sqlite3BtreeIdxDelete -#undef sqlite3BtreePutData -#undef sqlite3BtreeIncrblobCursor -#undef sqlite3BtreeCursorHasHint -#undef sqlite3BtreeTransferRow -#undef sqlite3BtreeClearTableOfCursor -#undef sqlite3BtreeCount -#undef sqlite3BtreeCursor -#undef sqlite3BtreeSeekCount -#undef sqlite3BtreeLastPage -#undef sqlite3BtreeClose -#undef sqlite3BtreeSetCacheSize -#undef sqlite3BtreeSetSpillSize -#undef sqlite3BtreeSetMmapLimit -#undef sqlite3BtreeSetPagerFlags -#undef sqlite3BtreeSetPageSize -#undef sqlite3BtreeGetPageSize -#undef sqlite3BtreeGetReserveNoMutex -#undef sqlite3BtreeGetRequestedReserve -#undef sqlite3BtreeMaxPageCount -#undef sqlite3BtreeSecureDelete -#undef sqlite3BtreeSetAutoVacuum -#undef sqlite3BtreeGetAutoVacuum -#undef sqlite3BtreeNewDb -#undef sqlite3BtreeBeginTrans -#undef sqlite3BtreeIncrVacuum -#undef sqlite3BtreeCommitPhaseOne -#undef sqlite3BtreeCommitPhaseTwo -#undef sqlite3BtreeCommit -#undef sqlite3BtreeTripAllCursors -#undef sqlite3BtreeRollback -#undef sqlite3BtreeBeginStmt -#undef sqlite3BtreeSavepoint -#undef sqlite3BtreeCreateTable -#undef sqlite3BtreeClearTable -#undef sqlite3BtreeDropTable -#undef sqlite3BtreeGetMeta -#undef sqlite3BtreeUpdateMeta -#undef sqlite3BtreePragma -#undef sqlite3BtreePager -#undef sqlite3BtreeGetFilename -#undef sqlite3BtreeGetJournalname -#undef sqlite3BtreeTxnState -#undef sqlite3BtreeIsInBackup -#undef sqlite3BtreeSchema -#undef sqlite3BtreeSchemaLocked -#undef sqlite3BtreeIsReadonly -#undef sqlite3BtreeSetVersion -#undef sqlite3BtreeIntegrityCheck -#undef sqlite3BtreeCheckpoint -#undef sqlite3BtreeExclusiveLock -#undef sqlite3BtreeFakeValidCursor -#undef sqlite3BtreeCursorSize -#undef sqlite3BtreeCursorZero -#undef sqlite3BtreeOpen -#ifndef SQLITE_DEBUG -# define sqlite3BtreeSeekCount(X) 0 -#endif - - -/************** End of btreeUndef.h ******************************************/ -/************** Continuing where we left off in btree.c **********************/ - /************** End of btree.c ***********************************************/ /************** Begin file backup.c ******************************************/ /* @@ -85876,681 +85940,6 @@ SQLITE_PRIVATE int sqlite3BtreeCopyFile(Btree *pTo, Btree *pFrom){ #endif /* SQLITE_OMIT_VACUUM */ /************** End of backup.c **********************************************/ -/************** Begin file btwrapper.c ***************************************/ -/* -** 2022 November 10 -** -** The author disclaims copyright to this source code. In place of -** a legal notice, here is a blessing: -** -** May you do good and not evil. -** May you find forgiveness for yourself and forgive others. -** May you share freely, never taking more than you give. -** -************************************************************************* -** -*/ - -/* #include "sqliteInt.h" */ - -#ifndef SQLITE_AMALGAMATION -struct BtCursor { - const BtCursorMethods *pMethods; -}; - -struct Btree { - const BtreeMethods *pMethods; -}; -#endif - -SQLITE_PRIVATE int sqlite3HctBtreeCursorSize(void); -SQLITE_PRIVATE int sqlite3HctBtreeOpen(sqlite3_vfs*, const char*, sqlite3*, Btree**, int, int); -SQLITE_PRIVATE int sqlite3HctBtreeSchemaLoaded(Btree*); - -SQLITE_PRIVATE int sqlite3StockBtreeCursorSize(void); -SQLITE_PRIVATE int sqlite3StockBtreeOpen(sqlite3_vfs*, const char*, sqlite3*, Btree**,int,int); - -SQLITE_PRIVATE int sqlite3StockBtreePragma(Btree *p, char **a){ - return SQLITE_NOTFOUND; -} -SQLITE_PRIVATE void sqlite3StockBtreeCursorDir(BtCursor *p, int a){ - /* no-op */ -} - - -SQLITE_PRIVATE int sqlite3StockBtreeIdxDelete(BtCursor *p, UnpackedRecord *pRec){ - int rc = SQLITE_OK; - int res = 0; - - rc = sqlite3BtreeIndexMoveto(p, pRec, &res); - if( rc==SQLITE_OK && res==0 ){ - rc = sqlite3BtreeDelete(p, BTREE_AUXDELETE); - } - - return rc; -} - -#ifndef SQLITE_DEBUG -SQLITE_PRIVATE int sqlite3StockBtreeCursorIsValid(BtCursor *pCursor){ - return 1; -} -SQLITE_PRIVATE sqlite3_uint64 sqlite3StockBtreeSeekCount(Btree *p){ - return 0; -} -#endif - -/* BEGIN_HCT_MKBTREEWRAPPER_TCL_CODE */ -/****************************************************************** -** GENERATED CODE - DO NOT EDIT! -** -** Code generated by tool/hct_mkbtreewrapper.tcl -*/ -struct BtCursorMethods { - int(*xBtreeNext)(BtCursor*, int); - int(*xBtreeCursorHasMoved)(BtCursor*); - void(*xBtreeClearCursor)(BtCursor*); - int(*xBtreeCursorRestore)(BtCursor*, int*); - void(*xBtreeCursorHintFlags)(BtCursor*, unsigned); - int(*xBtreeCloseCursor)(BtCursor*); - int(*xBtreeCursorIsValid)(BtCursor*); - int(*xBtreeCursorIsValidNN)(BtCursor*); - i64(*xBtreeIntegerKey)(BtCursor*); - void(*xBtreeCursorPin)(BtCursor*); - void(*xBtreeCursorUnpin)(BtCursor*); - u32(*xBtreePayloadSize)(BtCursor*); - sqlite3_int64(*xBtreeMaxRecordSize)(BtCursor*); - int(*xBtreePayload)(BtCursor*, u32, u32, void*); - int(*xBtreePayloadChecked)(BtCursor*, u32, u32, void *); - const void *(*xBtreePayloadFetch)(BtCursor*, u32*); - int(*xBtreeFirst)(BtCursor*, int*); - int(*xBtreeLast)(BtCursor*, int*); - int(*xBtreeTableMoveto)(BtCursor*, i64, int, int*); - int(*xBtreeIndexMoveto)(BtCursor*, UnpackedRecord*, int*); - void(*xBtreeCursorDir)(BtCursor*, int); - int(*xBtreeEof)(BtCursor*); - i64(*xBtreeRowCountEst)(BtCursor*); - int(*xBtreePrevious)(BtCursor*, int); - int(*xBtreeInsert)(BtCursor*, const BtreePayload*, int, int); - int(*xBtreeDelete)(BtCursor*, u8); - int(*xBtreeIdxDelete)(BtCursor*, UnpackedRecord*); - int(*xBtreePutData)(BtCursor*, u32, u32, void*); - void(*xBtreeIncrblobCursor)(BtCursor*); - int(*xBtreeCursorHasHint)(BtCursor*, unsigned int); - int(*xBtreeTransferRow)(BtCursor*, BtCursor*, i64); - int(*xBtreeClearTableOfCursor)(BtCursor*); - int(*xBtreeCount)(sqlite3*, BtCursor*, i64*); -}; -struct BtreeMethods { - BtCursorMethods const *pCsrMethods; - int(*xBtreeCursor)(Btree*, Pgno, int, struct KeyInfo*, BtCursor*); - sqlite3_uint64(*xBtreeSeekCount)(Btree*); - Pgno(*xBtreeLastPage)(Btree*); - int(*xBtreeClose)(Btree*); - int(*xBtreeSetCacheSize)(Btree*, int); - int(*xBtreeSetSpillSize)(Btree*, int); - int(*xBtreeSetMmapLimit)(Btree*, sqlite3_int64); - int(*xBtreeSetPagerFlags)(Btree*, unsigned); - int(*xBtreeSetPageSize)(Btree*, int, int, int); - int(*xBtreeGetPageSize)(Btree*); - int(*xBtreeGetReserveNoMutex)(Btree*); - int(*xBtreeGetRequestedReserve)(Btree*); - Pgno(*xBtreeMaxPageCount)(Btree*, Pgno); - int(*xBtreeSecureDelete)(Btree*, int); - int(*xBtreeSetAutoVacuum)(Btree*, int); - int(*xBtreeGetAutoVacuum)(Btree*); - int(*xBtreeNewDb)(Btree*); - int(*xBtreeBeginTrans)(Btree*, int, int*); - int(*xBtreeIncrVacuum)(Btree*); - int(*xBtreeCommitPhaseOne)(Btree*, const char*); - int(*xBtreeCommitPhaseTwo)(Btree*, int); - int(*xBtreeCommit)(Btree*); - int(*xBtreeTripAllCursors)(Btree*, int, int); - int(*xBtreeRollback)(Btree*, int, int); - int(*xBtreeBeginStmt)(Btree*, int); - int(*xBtreeSavepoint)(Btree*, int, int); - int(*xBtreeCreateTable)(Btree*, Pgno*, int); - int(*xBtreeClearTable)(Btree*, int, i64*); - int(*xBtreeDropTable)(Btree*, int, int*); - void(*xBtreeGetMeta)(Btree*, int, u32*); - int(*xBtreeUpdateMeta)(Btree*, int, u32); - int(*xBtreePragma)(Btree*, char**); - Pager *(*xBtreePager)(Btree*); - const char *(*xBtreeGetFilename)(Btree*); - const char *(*xBtreeGetJournalname)(Btree*); - int(*xBtreeTxnState)(Btree*); - int(*xBtreeIsInBackup)(Btree*); - void *(*xBtreeSchema)(Btree*, int, void(*)(void *)); - int(*xBtreeSchemaLocked)(Btree*); - int(*xBtreeIsReadonly)(Btree*); - int(*xBtreeSetVersion)(Btree*, int); - int(*xBtreeIntegrityCheck)(sqlite3*, Btree*, Pgno*, Mem*, int, int, int*, char**); - int(*xBtreeCheckpoint)(Btree*, int, int *, int *); - int(*xBtreeExclusiveLock)(Btree*); -}; -SQLITE_PRIVATE int sqlite3BtreeNext(BtCursor *p, int a){ - return p->pMethods->xBtreeNext(p, a); -} -SQLITE_PRIVATE void sqlite3BtreeClearCursor(BtCursor *p){ - p->pMethods->xBtreeClearCursor(p); -} -SQLITE_PRIVATE int sqlite3BtreeCursorRestore(BtCursor *p, int *a){ - return p->pMethods->xBtreeCursorRestore(p, a); -} -SQLITE_PRIVATE void sqlite3BtreeCursorHintFlags(BtCursor *p, unsigned a){ - p->pMethods->xBtreeCursorHintFlags(p, a); -} -SQLITE_PRIVATE int sqlite3BtreeCursorIsValid(BtCursor *p){ - return p->pMethods->xBtreeCursorIsValid(p); -} -SQLITE_PRIVATE int sqlite3BtreeCursorIsValidNN(BtCursor *p){ - return p->pMethods->xBtreeCursorIsValidNN(p); -} -SQLITE_PRIVATE i64 sqlite3BtreeIntegerKey(BtCursor *p){ - return p->pMethods->xBtreeIntegerKey(p); -} -SQLITE_PRIVATE void sqlite3BtreeCursorPin(BtCursor *p){ - p->pMethods->xBtreeCursorPin(p); -} -SQLITE_PRIVATE void sqlite3BtreeCursorUnpin(BtCursor *p){ - p->pMethods->xBtreeCursorUnpin(p); -} -SQLITE_PRIVATE u32 sqlite3BtreePayloadSize(BtCursor *p){ - return p->pMethods->xBtreePayloadSize(p); -} -SQLITE_PRIVATE sqlite3_int64 sqlite3BtreeMaxRecordSize(BtCursor *p){ - return p->pMethods->xBtreeMaxRecordSize(p); -} -SQLITE_PRIVATE int sqlite3BtreePayload(BtCursor *p, u32 a, u32 b, void *c){ - return p->pMethods->xBtreePayload(p, a, b, c); -} -SQLITE_PRIVATE int sqlite3BtreePayloadChecked(BtCursor *p, u32 a, u32 b, void *c){ - return p->pMethods->xBtreePayloadChecked(p, a, b, c); -} -const void * sqlite3BtreePayloadFetch(BtCursor *p, u32 *a){ - return p->pMethods->xBtreePayloadFetch(p, a); -} -SQLITE_PRIVATE int sqlite3BtreeFirst(BtCursor *p, int *a){ - return p->pMethods->xBtreeFirst(p, a); -} -SQLITE_PRIVATE int sqlite3BtreeLast(BtCursor *p, int *a){ - return p->pMethods->xBtreeLast(p, a); -} -SQLITE_PRIVATE int sqlite3BtreeTableMoveto(BtCursor *p, i64 a, int b, int *c){ - return p->pMethods->xBtreeTableMoveto(p, a, b, c); -} -SQLITE_PRIVATE int sqlite3BtreeIndexMoveto(BtCursor *p, UnpackedRecord *a, int *b){ - return p->pMethods->xBtreeIndexMoveto(p, a, b); -} -SQLITE_PRIVATE void sqlite3BtreeCursorDir(BtCursor *p, int a){ - p->pMethods->xBtreeCursorDir(p, a); -} -SQLITE_PRIVATE int sqlite3BtreeEof(BtCursor *p){ - return p->pMethods->xBtreeEof(p); -} -SQLITE_PRIVATE i64 sqlite3BtreeRowCountEst(BtCursor *p){ - return p->pMethods->xBtreeRowCountEst(p); -} -SQLITE_PRIVATE int sqlite3BtreePrevious(BtCursor *p, int a){ - return p->pMethods->xBtreePrevious(p, a); -} -SQLITE_PRIVATE int sqlite3BtreeInsert(BtCursor *p, const BtreePayload *a, int b, int c){ - return p->pMethods->xBtreeInsert(p, a, b, c); -} -SQLITE_PRIVATE int sqlite3BtreeDelete(BtCursor *p, u8 a){ - return p->pMethods->xBtreeDelete(p, a); -} -SQLITE_PRIVATE int sqlite3BtreeIdxDelete(BtCursor *p, UnpackedRecord *a){ - return p->pMethods->xBtreeIdxDelete(p, a); -} -SQLITE_PRIVATE int sqlite3BtreePutData(BtCursor *p, u32 a, u32 b, void *c){ - return p->pMethods->xBtreePutData(p, a, b, c); -} -SQLITE_PRIVATE void sqlite3BtreeIncrblobCursor(BtCursor *p){ - p->pMethods->xBtreeIncrblobCursor(p); -} -SQLITE_PRIVATE int sqlite3BtreeCursorHasHint(BtCursor *p, unsigned int a){ - return p->pMethods->xBtreeCursorHasHint(p, a); -} -SQLITE_PRIVATE int sqlite3BtreeTransferRow(BtCursor *p, BtCursor *a, i64 b){ - return p->pMethods->xBtreeTransferRow(p, a, b); -} -SQLITE_PRIVATE int sqlite3BtreeClearTableOfCursor(BtCursor *p){ - return p->pMethods->xBtreeClearTableOfCursor(p); -} -SQLITE_PRIVATE int sqlite3BtreeCount(sqlite3 *a, BtCursor *p, i64 *b){ - return p->pMethods->xBtreeCount(a, p, b); -} -SQLITE_PRIVATE Pgno sqlite3BtreeLastPage(Btree *p){ - return p->pMethods->xBtreeLastPage(p); -} -SQLITE_PRIVATE int sqlite3BtreeClose(Btree *p){ - return p->pMethods->xBtreeClose(p); -} -SQLITE_PRIVATE int sqlite3BtreeSetCacheSize(Btree *p, int a){ - return p->pMethods->xBtreeSetCacheSize(p, a); -} -SQLITE_PRIVATE int sqlite3BtreeSetSpillSize(Btree *p, int a){ - return p->pMethods->xBtreeSetSpillSize(p, a); -} -SQLITE_PRIVATE int sqlite3BtreeSetMmapLimit(Btree *p, sqlite3_int64 a){ - return p->pMethods->xBtreeSetMmapLimit(p, a); -} -SQLITE_PRIVATE int sqlite3BtreeSetPagerFlags(Btree *p, unsigned a){ - return p->pMethods->xBtreeSetPagerFlags(p, a); -} -SQLITE_PRIVATE int sqlite3BtreeSetPageSize(Btree *p, int a, int b, int c){ - return p->pMethods->xBtreeSetPageSize(p, a, b, c); -} -SQLITE_PRIVATE int sqlite3BtreeGetPageSize(Btree *p){ - return p->pMethods->xBtreeGetPageSize(p); -} -SQLITE_PRIVATE int sqlite3BtreeGetReserveNoMutex(Btree *p){ - return p->pMethods->xBtreeGetReserveNoMutex(p); -} -SQLITE_PRIVATE int sqlite3BtreeGetRequestedReserve(Btree *p){ - return p->pMethods->xBtreeGetRequestedReserve(p); -} -SQLITE_PRIVATE Pgno sqlite3BtreeMaxPageCount(Btree *p, Pgno a){ - return p->pMethods->xBtreeMaxPageCount(p, a); -} -SQLITE_PRIVATE int sqlite3BtreeSecureDelete(Btree *p, int a){ - if( p==0 ) return 0; - return p->pMethods->xBtreeSecureDelete(p, a); -} -SQLITE_PRIVATE int sqlite3BtreeSetAutoVacuum(Btree *p, int a){ - return p->pMethods->xBtreeSetAutoVacuum(p, a); -} -SQLITE_PRIVATE int sqlite3BtreeGetAutoVacuum(Btree *p){ - return p->pMethods->xBtreeGetAutoVacuum(p); -} -SQLITE_PRIVATE int sqlite3BtreeNewDb(Btree *p){ - return p->pMethods->xBtreeNewDb(p); -} -SQLITE_PRIVATE int sqlite3BtreeBeginTrans(Btree *p, int a, int *b){ - return p->pMethods->xBtreeBeginTrans(p, a, b); -} -SQLITE_PRIVATE int sqlite3BtreeIncrVacuum(Btree *p){ - return p->pMethods->xBtreeIncrVacuum(p); -} -SQLITE_PRIVATE int sqlite3BtreeCommitPhaseOne(Btree *p, const char *a){ - return p->pMethods->xBtreeCommitPhaseOne(p, a); -} -SQLITE_PRIVATE int sqlite3BtreeCommitPhaseTwo(Btree *p, int a){ - return p->pMethods->xBtreeCommitPhaseTwo(p, a); -} -SQLITE_PRIVATE int sqlite3BtreeCommit(Btree *p){ - return p->pMethods->xBtreeCommit(p); -} -SQLITE_PRIVATE int sqlite3BtreeTripAllCursors(Btree *p, int a, int b){ - if( p==0 ) return 0; - return p->pMethods->xBtreeTripAllCursors(p, a, b); -} -SQLITE_PRIVATE int sqlite3BtreeRollback(Btree *p, int a, int b){ - return p->pMethods->xBtreeRollback(p, a, b); -} -SQLITE_PRIVATE int sqlite3BtreeBeginStmt(Btree *p, int a){ - return p->pMethods->xBtreeBeginStmt(p, a); -} -SQLITE_PRIVATE int sqlite3BtreeSavepoint(Btree *p, int a, int b){ - if( p==0 ) return 0; - return p->pMethods->xBtreeSavepoint(p, a, b); -} -SQLITE_PRIVATE int sqlite3BtreeCreateTable(Btree *p, Pgno *a, int b){ - return p->pMethods->xBtreeCreateTable(p, a, b); -} -SQLITE_PRIVATE int sqlite3BtreeClearTable(Btree *p, int a, i64 *b){ - return p->pMethods->xBtreeClearTable(p, a, b); -} -SQLITE_PRIVATE int sqlite3BtreeDropTable(Btree *p, int a, int *b){ - return p->pMethods->xBtreeDropTable(p, a, b); -} -SQLITE_PRIVATE void sqlite3BtreeGetMeta(Btree *p, int a, u32 *b){ - p->pMethods->xBtreeGetMeta(p, a, b); -} -SQLITE_PRIVATE int sqlite3BtreeUpdateMeta(Btree *p, int a, u32 b){ - return p->pMethods->xBtreeUpdateMeta(p, a, b); -} -SQLITE_PRIVATE int sqlite3BtreePragma(Btree *p, char* *a){ - return p->pMethods->xBtreePragma(p, a); -} -Pager * sqlite3BtreePager(Btree *p){ - return p->pMethods->xBtreePager(p); -} -const char * sqlite3BtreeGetFilename(Btree *p){ - return p->pMethods->xBtreeGetFilename(p); -} -const char * sqlite3BtreeGetJournalname(Btree *p){ - return p->pMethods->xBtreeGetJournalname(p); -} -SQLITE_PRIVATE int sqlite3BtreeTxnState(Btree *p){ - if( p==0 ) return 0; - return p->pMethods->xBtreeTxnState(p); -} -SQLITE_PRIVATE int sqlite3BtreeIsInBackup(Btree *p){ - return p->pMethods->xBtreeIsInBackup(p); -} -void * sqlite3BtreeSchema(Btree *p, int a, void (*xFree)(void *)){ - return p->pMethods->xBtreeSchema(p, a, xFree); -} -SQLITE_PRIVATE int sqlite3BtreeSchemaLocked(Btree *p){ - return p->pMethods->xBtreeSchemaLocked(p); -} -SQLITE_PRIVATE int sqlite3BtreeIsReadonly(Btree *p){ - return p->pMethods->xBtreeIsReadonly(p); -} -SQLITE_PRIVATE int sqlite3BtreeSetVersion(Btree *p, int a){ - return p->pMethods->xBtreeSetVersion(p, a); -} -SQLITE_PRIVATE int sqlite3BtreeIntegrityCheck(sqlite3 *a, Btree *p, Pgno *b, Mem *c, int d, int e, int *f, char* *g){ - return p->pMethods->xBtreeIntegrityCheck(a, p, b, c, d, e, f, g); -} -SQLITE_PRIVATE int sqlite3BtreeCheckpoint(Btree *p, int a, int *b, int *c){ - if( p==0 ) return 0; - return p->pMethods->xBtreeCheckpoint(p, a, b, c); -} -SQLITE_PRIVATE int sqlite3BtreeExclusiveLock(Btree *p){ - return p->pMethods->xBtreeExclusiveLock(p); -} -static const BtCursorMethods hct_btcursor_methods = { - .xBtreeNext = sqlite3HctBtreeNext, - .xBtreeCursorHasMoved = sqlite3HctBtreeCursorHasMoved, - .xBtreeClearCursor = sqlite3HctBtreeClearCursor, - .xBtreeCursorRestore = sqlite3HctBtreeCursorRestore, - .xBtreeCursorHintFlags = sqlite3HctBtreeCursorHintFlags, - .xBtreeCloseCursor = sqlite3HctBtreeCloseCursor, - .xBtreeCursorIsValid = sqlite3HctBtreeCursorIsValid, - .xBtreeCursorIsValidNN = sqlite3HctBtreeCursorIsValidNN, - .xBtreeIntegerKey = sqlite3HctBtreeIntegerKey, - .xBtreeCursorPin = sqlite3HctBtreeCursorPin, - .xBtreeCursorUnpin = sqlite3HctBtreeCursorUnpin, - .xBtreePayloadSize = sqlite3HctBtreePayloadSize, - .xBtreeMaxRecordSize = sqlite3HctBtreeMaxRecordSize, - .xBtreePayload = sqlite3HctBtreePayload, - .xBtreePayloadChecked = sqlite3HctBtreePayloadChecked, - .xBtreePayloadFetch = sqlite3HctBtreePayloadFetch, - .xBtreeFirst = sqlite3HctBtreeFirst, - .xBtreeLast = sqlite3HctBtreeLast, - .xBtreeTableMoveto = sqlite3HctBtreeTableMoveto, - .xBtreeIndexMoveto = sqlite3HctBtreeIndexMoveto, - .xBtreeCursorDir = sqlite3HctBtreeCursorDir, - .xBtreeEof = sqlite3HctBtreeEof, - .xBtreeRowCountEst = sqlite3HctBtreeRowCountEst, - .xBtreePrevious = sqlite3HctBtreePrevious, - .xBtreeInsert = sqlite3HctBtreeInsert, - .xBtreeDelete = sqlite3HctBtreeDelete, - .xBtreeIdxDelete = sqlite3HctBtreeIdxDelete, - .xBtreePutData = sqlite3HctBtreePutData, - .xBtreeIncrblobCursor = sqlite3HctBtreeIncrblobCursor, - .xBtreeCursorHasHint = sqlite3HctBtreeCursorHasHint, - .xBtreeTransferRow = sqlite3HctBtreeTransferRow, - .xBtreeClearTableOfCursor = sqlite3HctBtreeClearTableOfCursor, - .xBtreeCount = sqlite3HctBtreeCount, -}; -static const BtreeMethods hct_btree_methods = { - .pCsrMethods = &hct_btcursor_methods, - .xBtreeCursor = sqlite3HctBtreeCursor, - .xBtreeSeekCount = sqlite3HctBtreeSeekCount, - .xBtreeLastPage = sqlite3HctBtreeLastPage, - .xBtreeClose = sqlite3HctBtreeClose, - .xBtreeSetCacheSize = sqlite3HctBtreeSetCacheSize, - .xBtreeSetSpillSize = sqlite3HctBtreeSetSpillSize, - .xBtreeSetMmapLimit = sqlite3HctBtreeSetMmapLimit, - .xBtreeSetPagerFlags = sqlite3HctBtreeSetPagerFlags, - .xBtreeSetPageSize = sqlite3HctBtreeSetPageSize, - .xBtreeGetPageSize = sqlite3HctBtreeGetPageSize, - .xBtreeGetReserveNoMutex = sqlite3HctBtreeGetReserveNoMutex, - .xBtreeGetRequestedReserve = sqlite3HctBtreeGetRequestedReserve, - .xBtreeMaxPageCount = sqlite3HctBtreeMaxPageCount, - .xBtreeSecureDelete = sqlite3HctBtreeSecureDelete, - .xBtreeSetAutoVacuum = sqlite3HctBtreeSetAutoVacuum, - .xBtreeGetAutoVacuum = sqlite3HctBtreeGetAutoVacuum, - .xBtreeNewDb = sqlite3HctBtreeNewDb, - .xBtreeBeginTrans = sqlite3HctBtreeBeginTrans, - .xBtreeIncrVacuum = sqlite3HctBtreeIncrVacuum, - .xBtreeCommitPhaseOne = sqlite3HctBtreeCommitPhaseOne, - .xBtreeCommitPhaseTwo = sqlite3HctBtreeCommitPhaseTwo, - .xBtreeCommit = sqlite3HctBtreeCommit, - .xBtreeTripAllCursors = sqlite3HctBtreeTripAllCursors, - .xBtreeRollback = sqlite3HctBtreeRollback, - .xBtreeBeginStmt = sqlite3HctBtreeBeginStmt, - .xBtreeSavepoint = sqlite3HctBtreeSavepoint, - .xBtreeCreateTable = sqlite3HctBtreeCreateTable, - .xBtreeClearTable = sqlite3HctBtreeClearTable, - .xBtreeDropTable = sqlite3HctBtreeDropTable, - .xBtreeGetMeta = sqlite3HctBtreeGetMeta, - .xBtreeUpdateMeta = sqlite3HctBtreeUpdateMeta, - .xBtreePragma = sqlite3HctBtreePragma, - .xBtreePager = sqlite3HctBtreePager, - .xBtreeGetFilename = sqlite3HctBtreeGetFilename, - .xBtreeGetJournalname = sqlite3HctBtreeGetJournalname, - .xBtreeTxnState = sqlite3HctBtreeTxnState, - .xBtreeIsInBackup = sqlite3HctBtreeIsInBackup, - .xBtreeSchema = sqlite3HctBtreeSchema, - .xBtreeSchemaLocked = sqlite3HctBtreeSchemaLocked, - .xBtreeIsReadonly = sqlite3HctBtreeIsReadonly, - .xBtreeSetVersion = sqlite3HctBtreeSetVersion, - .xBtreeIntegrityCheck = sqlite3HctBtreeIntegrityCheck, - .xBtreeCheckpoint = sqlite3HctBtreeCheckpoint, - .xBtreeExclusiveLock = sqlite3HctBtreeExclusiveLock, -}; - -static const BtCursorMethods stock_btcursor_methods = { - .xBtreeNext = sqlite3StockBtreeNext, - .xBtreeCursorHasMoved = sqlite3StockBtreeCursorHasMoved, - .xBtreeClearCursor = sqlite3StockBtreeClearCursor, - .xBtreeCursorRestore = sqlite3StockBtreeCursorRestore, - .xBtreeCursorHintFlags = sqlite3StockBtreeCursorHintFlags, - .xBtreeCloseCursor = sqlite3StockBtreeCloseCursor, - .xBtreeCursorIsValid = sqlite3StockBtreeCursorIsValid, - .xBtreeCursorIsValidNN = sqlite3StockBtreeCursorIsValidNN, - .xBtreeIntegerKey = sqlite3StockBtreeIntegerKey, - .xBtreeCursorPin = sqlite3StockBtreeCursorPin, - .xBtreeCursorUnpin = sqlite3StockBtreeCursorUnpin, - .xBtreePayloadSize = sqlite3StockBtreePayloadSize, - .xBtreeMaxRecordSize = sqlite3StockBtreeMaxRecordSize, - .xBtreePayload = sqlite3StockBtreePayload, - .xBtreePayloadChecked = sqlite3StockBtreePayloadChecked, - .xBtreePayloadFetch = sqlite3StockBtreePayloadFetch, - .xBtreeFirst = sqlite3StockBtreeFirst, - .xBtreeLast = sqlite3StockBtreeLast, - .xBtreeTableMoveto = sqlite3StockBtreeTableMoveto, - .xBtreeIndexMoveto = sqlite3StockBtreeIndexMoveto, - .xBtreeCursorDir = sqlite3StockBtreeCursorDir, - .xBtreeEof = sqlite3StockBtreeEof, - .xBtreeRowCountEst = sqlite3StockBtreeRowCountEst, - .xBtreePrevious = sqlite3StockBtreePrevious, - .xBtreeInsert = sqlite3StockBtreeInsert, - .xBtreeDelete = sqlite3StockBtreeDelete, - .xBtreeIdxDelete = sqlite3StockBtreeIdxDelete, - .xBtreePutData = sqlite3StockBtreePutData, - .xBtreeIncrblobCursor = sqlite3StockBtreeIncrblobCursor, - .xBtreeCursorHasHint = sqlite3StockBtreeCursorHasHint, - .xBtreeTransferRow = sqlite3StockBtreeTransferRow, - .xBtreeClearTableOfCursor = sqlite3StockBtreeClearTableOfCursor, - .xBtreeCount = sqlite3StockBtreeCount, -}; -static const BtreeMethods stock_btree_methods = { - .pCsrMethods = &stock_btcursor_methods, - .xBtreeCursor = sqlite3StockBtreeCursor, - .xBtreeSeekCount = sqlite3StockBtreeSeekCount, - .xBtreeLastPage = sqlite3StockBtreeLastPage, - .xBtreeClose = sqlite3StockBtreeClose, - .xBtreeSetCacheSize = sqlite3StockBtreeSetCacheSize, - .xBtreeSetSpillSize = sqlite3StockBtreeSetSpillSize, - .xBtreeSetMmapLimit = sqlite3StockBtreeSetMmapLimit, - .xBtreeSetPagerFlags = sqlite3StockBtreeSetPagerFlags, - .xBtreeSetPageSize = sqlite3StockBtreeSetPageSize, - .xBtreeGetPageSize = sqlite3StockBtreeGetPageSize, - .xBtreeGetReserveNoMutex = sqlite3StockBtreeGetReserveNoMutex, - .xBtreeGetRequestedReserve = sqlite3StockBtreeGetRequestedReserve, - .xBtreeMaxPageCount = sqlite3StockBtreeMaxPageCount, - .xBtreeSecureDelete = sqlite3StockBtreeSecureDelete, - .xBtreeSetAutoVacuum = sqlite3StockBtreeSetAutoVacuum, - .xBtreeGetAutoVacuum = sqlite3StockBtreeGetAutoVacuum, - .xBtreeNewDb = sqlite3StockBtreeNewDb, - .xBtreeBeginTrans = sqlite3StockBtreeBeginTrans, - .xBtreeIncrVacuum = sqlite3StockBtreeIncrVacuum, - .xBtreeCommitPhaseOne = sqlite3StockBtreeCommitPhaseOne, - .xBtreeCommitPhaseTwo = sqlite3StockBtreeCommitPhaseTwo, - .xBtreeCommit = sqlite3StockBtreeCommit, - .xBtreeTripAllCursors = sqlite3StockBtreeTripAllCursors, - .xBtreeRollback = sqlite3StockBtreeRollback, - .xBtreeBeginStmt = sqlite3StockBtreeBeginStmt, - .xBtreeSavepoint = sqlite3StockBtreeSavepoint, - .xBtreeCreateTable = sqlite3StockBtreeCreateTable, - .xBtreeClearTable = sqlite3StockBtreeClearTable, - .xBtreeDropTable = sqlite3StockBtreeDropTable, - .xBtreeGetMeta = sqlite3StockBtreeGetMeta, - .xBtreeUpdateMeta = sqlite3StockBtreeUpdateMeta, - .xBtreePragma = sqlite3StockBtreePragma, - .xBtreePager = sqlite3StockBtreePager, - .xBtreeGetFilename = sqlite3StockBtreeGetFilename, - .xBtreeGetJournalname = sqlite3StockBtreeGetJournalname, - .xBtreeTxnState = sqlite3StockBtreeTxnState, - .xBtreeIsInBackup = sqlite3StockBtreeIsInBackup, - .xBtreeSchema = sqlite3StockBtreeSchema, - .xBtreeSchemaLocked = sqlite3StockBtreeSchemaLocked, - .xBtreeIsReadonly = sqlite3StockBtreeIsReadonly, - .xBtreeSetVersion = sqlite3StockBtreeSetVersion, - .xBtreeIntegrityCheck = sqlite3StockBtreeIntegrityCheck, - .xBtreeCheckpoint = sqlite3StockBtreeCheckpoint, - .xBtreeExclusiveLock = sqlite3StockBtreeExclusiveLock, -}; - -/* -** END OF GENERATED CODE -******************************************************************/ -/* END_HCT_MKBTREEWRAPPER_TCL_CODE */ - -#ifdef SQLITE_DEBUG -SQLITE_PRIVATE sqlite3_uint64 sqlite3BtreeSeekCount(Btree *p){ - return p->pMethods->xBtreeSeekCount(p); -} -#endif - -SQLITE_PRIVATE BtCursor *sqlite3BtreeFakeValidCursor(void){ - static BtCursor csr = {0}; - return &csr; -} - -SQLITE_PRIVATE int sqlite3BtreeCursorSize(void){ - return MAX( - sqlite3HctBtreeCursorSize(), - sqlite3StockBtreeCursorSize() - ); -} - -SQLITE_PRIVATE void sqlite3BtreeCursorZero(BtCursor *p){ - memset(p, 0, sqlite3BtreeCursorSize()); -} - -SQLITE_PRIVATE int sqlite3BtreeCursorHasMoved(BtCursor *pCur){ - if( pCur->pMethods==0 ) return 0; - return pCur->pMethods->xBtreeCursorHasMoved(pCur); -} - -SQLITE_PRIVATE int sqlite3BtreeCloseCursor(BtCursor *pCur){ - if( pCur->pMethods==0 ) return 0; - return pCur->pMethods->xBtreeCloseCursor(pCur); -} - -SQLITE_PRIVATE int sqlite3BtreeCursor( - Btree *p, /* The btree */ - Pgno iTable, /* Root page of table to open */ - int wrFlag, /* 1 to write. 0 read-only */ - struct KeyInfo *pKeyInfo, /* First arg to xCompare() */ - BtCursor *pCur /* Write new cursor here */ -){ - int rc = p->pMethods->xBtreeCursor(p, iTable, wrFlag, pKeyInfo, pCur); - pCur->pMethods = p->pMethods->pCsrMethods; - return rc; -} - -static int btWrapperUseHct( - sqlite3_vfs *pVfs, - const char *zFilename, - int *pbUseHct -){ - int rc = SQLITE_OK; - char *zFull = 0; - char *zPagemap = 0; - int bUseHct = 0; - - if( zFilename && zFilename[0] ){ - int nAlloc = pVfs->mxPathname+2; - int bExists = 0; - - zFull = (char*)sqlite3_malloc(nAlloc); - if( zFull==0 ){ - rc = SQLITE_NOMEM_BKPT; - }else{ - memset(zFull, 0, nAlloc); - rc = pVfs->xFullPathname(pVfs, zFilename, pVfs->mxPathname, zFull); - } - - if( rc==SQLITE_OK ){ - rc = pVfs->xAccess(pVfs, zFull, SQLITE_ACCESS_EXISTS, &bExists); - } - if( rc==SQLITE_OK ){ - zPagemap = sqlite3_mprintf("%s-pagemap", zFull); - if( zPagemap==0 ){ - rc = SQLITE_NOMEM_BKPT; - }else if( bExists ){ - rc = pVfs->xAccess(pVfs, zPagemap, SQLITE_ACCESS_EXISTS, &bUseHct); - }else{ - sqlite3OsDelete(pVfs, zPagemap, 0); - bUseHct = sqlite3_uri_boolean(zFilename, "hctree", 0); - } - } - } - - sqlite3_free(zFull); - sqlite3_free(zPagemap); - *pbUseHct = bUseHct; - return rc; -} - -SQLITE_PRIVATE int sqlite3BtreeOpen( - sqlite3_vfs *pVfs, /* VFS to use for this b-tree */ - const char *zFilename, /* Name of the file containing the BTree database */ - sqlite3 *db, /* Associated database handle */ - Btree **ppBtree, /* Pointer to new Btree object written here */ - int flags, /* Options */ - int vfsFlags /* Flags passed through to sqlite3_vfs.xOpen() */ -){ - Btree *pBtree = 0; - int rc = SQLITE_OK; - int bUseHct = 0; - - rc = btWrapperUseHct(pVfs, zFilename, &bUseHct); - if( rc==SQLITE_OK ){ - if( bUseHct ){ - rc = sqlite3HctBtreeOpen(pVfs, zFilename, db, &pBtree, flags, vfsFlags); - if( rc==SQLITE_OK ) pBtree->pMethods = &hct_btree_methods; - }else{ - rc = sqlite3StockBtreeOpen(pVfs, zFilename, db, &pBtree, flags, vfsFlags); - if( rc==SQLITE_OK ) pBtree->pMethods = &stock_btree_methods; - } - } - *ppBtree = pBtree; - return rc; -} - -SQLITE_PRIVATE int sqlite3IsHct(Btree *pBt){ - return (pBt && pBt->pMethods==&hct_btree_methods); -} - -SQLITE_PRIVATE int sqlite3BtreeSchemaLoaded(Btree *pBt){ - int rc = SQLITE_OK; - if( sqlite3IsHct(pBt) ){ - rc = sqlite3HctBtreeSchemaLoaded(pBt); - } - return rc; -} - - - -/************** End of btwrapper.c *******************************************/ /************** Begin file vdbemem.c *****************************************/ /* ** 2004 May 26 @@ -88088,8 +87477,7 @@ static int valueFromFunction( goto value_from_function_out; } for(i=0; ipParse, pList->a[i].pExpr, aff, - &apVal[i]); + rc = sqlite3ValueFromExpr(db, pList->a[i].pExpr, enc, aff, &apVal[i]); if( apVal[i]==0 || rc!=SQLITE_OK ) goto value_from_function_out; } } @@ -88625,6 +88013,8 @@ SQLITE_PRIVATE int sqlite3ValueBytes(sqlite3_value *pVal, u8 enc){ /* #include "sqliteInt.h" */ /* #include "vdbeInt.h" */ +/* #include "btreeInt.h" */ + /* Forward references */ static void freeEphemeralFunction(sqlite3 *db, FuncDef *pDef); static void vdbeFreeOpArray(sqlite3 *, Op *, int); @@ -91610,13 +91000,18 @@ static int vdbeCommit(sqlite3 *db, Vdbe *p){ if( 0==sqlite3Strlen30(sqlite3BtreeGetFilename(db->aDb[0].pBt)) || nTrans<=1 ){ + sqlite3CommitTimeSet(p->aCommitTime, COMMIT_TIME_BEFORE_PHASEONE); for(i=0; rc==SQLITE_OK && inDb; i++){ Btree *pBt = db->aDb[i].pBt; if( pBt ){ + pBt->pBt->aCommitTime = p->aCommitTime; rc = sqlite3BtreeCommitPhaseOne(pBt, 0); + pBt->pBt->aCommitTime = 0; } } + sqlite3CommitTimeSet(p->aCommitTime, COMMIT_TIME_BEFORE_PHASETWO); + /* Do the commit only if all databases successfully complete phase 1. ** If one of the BtreeCommitPhaseOne() calls fails, this indicates an ** IO error while deleting or truncating a journal file. It is unlikely, @@ -91625,9 +91020,13 @@ static int vdbeCommit(sqlite3 *db, Vdbe *p){ for(i=0; rc==SQLITE_OK && inDb; i++){ Btree *pBt = db->aDb[i].pBt; if( pBt ){ + pBt->pBt->aCommitTime = p->aCommitTime; rc = sqlite3BtreeCommitPhaseTwo(pBt, 0); + pBt->pBt->aCommitTime = 0; } } + + sqlite3CommitTimeSet(p->aCommitTime, COMMIT_TIME_AFTER_PHASETWO); if( rc==SQLITE_OK ){ sqlite3VtabCommit(db); } @@ -92024,7 +91423,9 @@ SQLITE_PRIVATE int sqlite3VdbeHalt(Vdbe *p){ ** or hit an 'OR FAIL' constraint and there are no deferred foreign ** key constraints to hold up the transaction. This means a commit ** is required. */ + sqlite3CommitTimeSet(p->aCommitTime, COMMIT_TIME_BEFORE_VDBECOMMIT); rc = vdbeCommit(db, p); + sqlite3CommitTimeSet(p->aCommitTime, COMMIT_TIME_AFTER_VDBECOMMIT); } if( (rc & 0xFF)==SQLITE_BUSY && p->readOnly ){ sqlite3VdbeLeave(p); @@ -93143,7 +92544,7 @@ SQLITE_PRIVATE SQLITE_NOINLINE int sqlite3BlobCompare(const Mem *pB1, const Mem ** We must use separate SQLITE_NOINLINE functions here, since otherwise ** optimizer code movement causes gcov to become very confused. */ -#if defined(SQLITE_COVERAGE_TEST) || defined(SQLITE_DEBUG) +#if defined(SQLITE_COVERAGE_TEST) || defined(SQLITE_DEBUG) static int SQLITE_NOINLINE doubleLt(double a, double b){ return ar ); + testcase( x==r ); + return (xr); }else{ i64 y; if( r<-9223372036854775808.0 ) return +1; @@ -94049,6 +93457,101 @@ SQLITE_PRIVATE int sqlite3CursorRangeHintExprCheck(Walker *pWalker, Expr *pExpr) } #endif /* SQLITE_ENABLE_CURSOR_HINTS && SQLITE_DEBUG */ +/* #include */ +SQLITE_PRIVATE void sqlite3CommitTimeLog(u64 *aCommit){ + u64 i1 = aCommit[COMMIT_TIME_START]; + assert( COMMIT_TIME_START==0 && COMMIT_TIME_FINISH==COMMIT_TIME_N-1 ); + if( aCommit[COMMIT_TIME_FINISH]>(i1+COMMIT_TIME_TIMEOUT) ){ + char *zStr = 0; + int ii; + for(ii=1; ii(i1+PREPARE_TIME_TIMEOUT) ){ + int nByte = nSql; + char *zStr = 0; + int ii; + for(ii=1; ii(i1+SCHEMA_TIME_TIMEOUT) ){ + char *zStr = 0; + int ii; + for(ii=1; iinField; i++){ @@ -94168,13 +93670,6 @@ SQLITE_PRIVATE void sqlite3VdbePreUpdateHook( } sqlite3DbNNFreeNN(db, preupdate.aNew); } - if( preupdate.apDflt ){ - int i; - for(i=0; inCol; i++){ - sqlite3ValueFree(preupdate.apDflt[i]); - } - sqlite3DbFree(db, preupdate.apDflt); - } } #endif /* SQLITE_ENABLE_PREUPDATE_HOOK */ @@ -95803,17 +95298,6 @@ SQLITE_API const void *sqlite3_column_origin_name16(sqlite3_stmt *pStmt, int N){ ** ** The error code stored in database p->db is overwritten with the return ** value in any case. -** -** (tag-20240917-01) If vdbeUnbind(p,(u32)(i-1)) returns SQLITE_OK, -** that means all of the the following will be true: -** -** p!=0 -** p->pVar!=0 -** i>0 -** i<=p->nVar -** -** An assert() is normally added after vdbeUnbind() to help static analyzers -** realize this. */ static int vdbeUnbind(Vdbe *p, unsigned int i){ Mem *pVar; @@ -95871,7 +95355,6 @@ static int bindText( rc = vdbeUnbind(p, (u32)(i-1)); if( rc==SQLITE_OK ){ - assert( p!=0 && p->aVar!=0 && i>0 && i<=p->nVar ); /* tag-20240917-01 */ if( zData!=0 ){ pVar = &p->aVar[i-1]; rc = sqlite3VdbeMemSetStr(pVar, zData, nData, encoding, xDel); @@ -95921,7 +95404,6 @@ SQLITE_API int sqlite3_bind_double(sqlite3_stmt *pStmt, int i, double rValue){ Vdbe *p = (Vdbe *)pStmt; rc = vdbeUnbind(p, (u32)(i-1)); if( rc==SQLITE_OK ){ - assert( p!=0 && p->aVar!=0 && i>0 && i<=p->nVar ); /* tag-20240917-01 */ sqlite3VdbeMemSetDouble(&p->aVar[i-1], rValue); sqlite3_mutex_leave(p->db->mutex); } @@ -95935,7 +95417,6 @@ SQLITE_API int sqlite3_bind_int64(sqlite3_stmt *pStmt, int i, sqlite_int64 iValu Vdbe *p = (Vdbe *)pStmt; rc = vdbeUnbind(p, (u32)(i-1)); if( rc==SQLITE_OK ){ - assert( p!=0 && p->aVar!=0 && i>0 && i<=p->nVar ); /* tag-20240917-01 */ sqlite3VdbeMemSetInt64(&p->aVar[i-1], iValue); sqlite3_mutex_leave(p->db->mutex); } @@ -95946,7 +95427,6 @@ SQLITE_API int sqlite3_bind_null(sqlite3_stmt *pStmt, int i){ Vdbe *p = (Vdbe*)pStmt; rc = vdbeUnbind(p, (u32)(i-1)); if( rc==SQLITE_OK ){ - assert( p!=0 && p->aVar!=0 && i>0 && i<=p->nVar ); /* tag-20240917-01 */ sqlite3_mutex_leave(p->db->mutex); } return rc; @@ -95962,7 +95442,6 @@ SQLITE_API int sqlite3_bind_pointer( Vdbe *p = (Vdbe*)pStmt; rc = vdbeUnbind(p, (u32)(i-1)); if( rc==SQLITE_OK ){ - assert( p!=0 && p->aVar!=0 && i>0 && i<=p->nVar ); /* tag-20240917-01 */ sqlite3VdbeMemSetPointer(&p->aVar[i-1], pPtr, zPTtype, xDestructor); sqlite3_mutex_leave(p->db->mutex); }else if( xDestructor ){ @@ -96044,7 +95523,6 @@ SQLITE_API int sqlite3_bind_zeroblob(sqlite3_stmt *pStmt, int i, int n){ Vdbe *p = (Vdbe *)pStmt; rc = vdbeUnbind(p, (u32)(i-1)); if( rc==SQLITE_OK ){ - assert( p!=0 && p->aVar!=0 && i>0 && i<=p->nVar ); /* tag-20240917-01 */ #ifndef SQLITE_OMIT_INCRBLOB sqlite3VdbeMemSetZeroBlob(&p->aVar[i-1], n); #else @@ -96379,64 +95857,37 @@ SQLITE_API int sqlite3_preupdate_old(sqlite3 *db, int iIdx, sqlite3_value **ppVa goto preupdate_old_out; } - if( iIdx==p->pTab->iPKey ){ - *ppValue = pMem = &p->oldipk; - sqlite3VdbeMemSetInt64(pMem, p->iKey1); - }else{ + /* If the old.* record has not yet been loaded into memory, do so now. */ + if( p->pUnpacked==0 ){ + u32 nRec; + u8 *aRec; - /* If the old.* record has not yet been loaded into memory, do so now. */ - if( p->pUnpacked==0 ){ - u32 nRec; - u8 *aRec; - - assert( p->pCsr->eCurType==CURTYPE_BTREE ); - nRec = sqlite3BtreePayloadSize(p->pCsr->uc.pCursor); - aRec = sqlite3DbMallocRaw(db, nRec); - if( !aRec ) goto preupdate_old_out; - rc = sqlite3BtreePayload(p->pCsr->uc.pCursor, 0, nRec, aRec); - if( rc==SQLITE_OK ){ - p->pUnpacked = vdbeUnpackRecord(&p->keyinfo, nRec, aRec); - if( !p->pUnpacked ) rc = SQLITE_NOMEM; - } - if( rc!=SQLITE_OK ){ - sqlite3DbFree(db, aRec); - goto preupdate_old_out; - } - p->aRecord = aRec; + assert( p->pCsr->eCurType==CURTYPE_BTREE ); + nRec = sqlite3BtreePayloadSize(p->pCsr->uc.pCursor); + aRec = sqlite3DbMallocRaw(db, nRec); + if( !aRec ) goto preupdate_old_out; + rc = sqlite3BtreePayload(p->pCsr->uc.pCursor, 0, nRec, aRec); + if( rc==SQLITE_OK ){ + p->pUnpacked = vdbeUnpackRecord(&p->keyinfo, nRec, aRec); + if( !p->pUnpacked ) rc = SQLITE_NOMEM; } + if( rc!=SQLITE_OK ){ + sqlite3DbFree(db, aRec); + goto preupdate_old_out; + } + p->aRecord = aRec; + } - pMem = *ppValue = &p->pUnpacked->aMem[iIdx]; - if( iIdx>=p->pUnpacked->nField ){ - /* This occurs when the table has been extended using ALTER TABLE - ** ADD COLUMN. The value to return is the default value of the column. */ - Column *pCol = &p->pTab->aCol[iIdx]; - if( pCol->iDflt>0 ){ - if( p->apDflt==0 ){ - int nByte = sizeof(sqlite3_value*)*p->pTab->nCol; - p->apDflt = (sqlite3_value**)sqlite3DbMallocZero(db, nByte); - if( p->apDflt==0 ) goto preupdate_old_out; - } - if( p->apDflt[iIdx]==0 ){ - sqlite3_value *pVal = 0; - Expr *pDflt; - assert( p->pTab!=0 && IsOrdinaryTable(p->pTab) ); - pDflt = p->pTab->u.tab.pDfltList->a[pCol->iDflt-1].pExpr; - rc = sqlite3ValueFromExpr(db, pDflt, ENC(db), pCol->affinity, &pVal); - if( rc==SQLITE_OK && pVal==0 ){ - rc = SQLITE_CORRUPT_BKPT; - } - p->apDflt[iIdx] = pVal; - } - *ppValue = p->apDflt[iIdx]; - }else{ - *ppValue = (sqlite3_value *)columnNullValue(); - } - }else if( p->pTab->aCol[iIdx].affinity==SQLITE_AFF_REAL ){ - if( pMem->flags & (MEM_Int|MEM_IntReal) ){ - testcase( pMem->flags & MEM_Int ); - testcase( pMem->flags & MEM_IntReal ); - sqlite3VdbeMemRealify(pMem); - } + pMem = *ppValue = &p->pUnpacked->aMem[iIdx]; + if( iIdx==p->pTab->iPKey ){ + sqlite3VdbeMemSetInt64(pMem, p->iKey1); + }else if( iIdx>=p->pUnpacked->nField ){ + *ppValue = (sqlite3_value *)columnNullValue(); + }else if( p->pTab->aCol[iIdx].affinity==SQLITE_AFF_REAL ){ + if( pMem->flags & (MEM_Int|MEM_IntReal) ){ + testcase( pMem->flags & MEM_Int ); + testcase( pMem->flags & MEM_IntReal ); + sqlite3VdbeMemRealify(pMem); } } @@ -96984,104 +96435,6 @@ SQLITE_PRIVATE char *sqlite3VdbeExpandSql( /* #include "sqliteInt.h" */ /* #include "vdbeInt.h" */ -/* -** High-resolution hardware timer used for debugging and testing only. -*/ -#if defined(VDBE_PROFILE) \ - || defined(SQLITE_PERFORMANCE_TRACE) \ - || defined(SQLITE_ENABLE_STMT_SCANSTATUS) -/************** Include hwtime.h in the middle of vdbe.c *********************/ -/************** Begin file hwtime.h ******************************************/ -/* -** 2008 May 27 -** -** The author disclaims copyright to this source code. In place of -** a legal notice, here is a blessing: -** -** May you do good and not evil. -** May you find forgiveness for yourself and forgive others. -** May you share freely, never taking more than you give. -** -****************************************************************************** -** -** This file contains inline asm code for retrieving "high-performance" -** counters for x86 and x86_64 class CPUs. -*/ -#ifndef SQLITE_HWTIME_H -#define SQLITE_HWTIME_H - -/* -** The following routine only works on Pentium-class (or newer) processors. -** It uses the RDTSC opcode to read the cycle count value out of the -** processor and returns that value. This can be used for high-res -** profiling. -*/ -#if !defined(__STRICT_ANSI__) && \ - (defined(__GNUC__) || defined(_MSC_VER)) && \ - (defined(i386) || defined(__i386__) || defined(_M_IX86)) - - #if defined(__GNUC__) - - __inline__ sqlite_uint64 sqlite3Hwtime(void){ - unsigned int lo, hi; - __asm__ __volatile__ ("rdtsc" : "=a" (lo), "=d" (hi)); - return (sqlite_uint64)hi << 32 | lo; - } - - #elif defined(_MSC_VER) - - __declspec(naked) __inline sqlite_uint64 __cdecl sqlite3Hwtime(void){ - __asm { - rdtsc - ret ; return value at EDX:EAX - } - } - - #endif - -#elif !defined(__STRICT_ANSI__) && (defined(__GNUC__) && defined(__x86_64__)) - - __inline__ sqlite_uint64 sqlite3Hwtime(void){ - unsigned int lo, hi; - __asm__ __volatile__ ("rdtsc" : "=a" (lo), "=d" (hi)); - return (sqlite_uint64)hi << 32 | lo; - } - -#elif !defined(__STRICT_ANSI__) && (defined(__GNUC__) && defined(__ppc__)) - - __inline__ sqlite_uint64 sqlite3Hwtime(void){ - unsigned long long retval; - unsigned long junk; - __asm__ __volatile__ ("\n\ - 1: mftbu %1\n\ - mftb %L0\n\ - mftbu %0\n\ - cmpw %0,%1\n\ - bne 1b" - : "=r" (retval), "=r" (junk)); - return retval; - } - -#else - - /* - ** asm() is needed for hardware timing support. Without asm(), - ** disable the sqlite3Hwtime() routine. - ** - ** sqlite3Hwtime() is only used for some obscure debugging - ** and analysis configurations, not in any deliverable, so this - ** should not be a great loss. - */ -SQLITE_PRIVATE sqlite_uint64 sqlite3Hwtime(void){ return ((sqlite_uint64)0); } - -#endif - -#endif /* !defined(SQLITE_HWTIME_H) */ - -/************** End of hwtime.h **********************************************/ -/************** Continuing where we left off in vdbe.c ***********************/ -#endif - /* ** Invoke this macro on memory cells just prior to changing the ** value of the cell. This macro verifies that shallow copies are @@ -100049,9 +99402,7 @@ case OP_Column: { /* ncycle */ pC->payloadSize = sqlite3BtreePayloadSize(pCrsr); pC->aRow = sqlite3BtreePayloadFetch(pCrsr, &pC->szRow); assert( pC->szRow<=pC->payloadSize ); -#if 0 assert( pC->szRow<=65536 ); /* Maximum page size is 64KiB */ -#endif } pC->cacheStatus = p->cacheCtr; if( (aOffset[0] = pC->aRow[0])<0x80 ){ @@ -101018,6 +100369,13 @@ case OP_AutoCommit: { assert( p->bIsReader ); if( desiredAutoCommit!=db->autoCommit ){ + + u64 aCommit[COMMIT_TIME_N]; + memset(aCommit, 0, sizeof(aCommit)); + if( iRollback==0 ){ + sqlite3CommitTimeSet(aCommit, COMMIT_TIME_START); + } + if( iRollback ){ assert( desiredAutoCommit==1 ); sqlite3RollbackAll(db, SQLITE_ABORT_ROLLBACK); @@ -101042,7 +100400,11 @@ case OP_AutoCommit: { }else{ db->autoCommit = (u8)desiredAutoCommit; } + sqlite3CommitTimeSet(aCommit, COMMIT_TIME_BEFORE_HALT); + p->aCommitTime = aCommit; hrc = sqlite3VdbeHalt(p); + p->aCommitTime = 0; + sqlite3CommitTimeSet(aCommit, COMMIT_TIME_AFTER_HALT); if( (hrc & 0xFF)==SQLITE_BUSY ){ p->pc = (int)(pOp - aOp); db->autoCommit = (u8)(1-desiredAutoCommit); @@ -101058,6 +100420,8 @@ case OP_AutoCommit: { }else{ rc = SQLITE_ERROR; } + sqlite3CommitTimeSet(aCommit, COMMIT_TIME_FINISH); + if( desiredAutoCommit && !iRollback ) sqlite3CommitTimeLog(aCommit); goto vdbe_return; }else{ sqlite3VdbeError(p, @@ -101282,11 +100646,6 @@ case OP_SetCookie: { *(u32*)&pDb->pSchema->schema_cookie = *(u32*)&pOp->p3 - pOp->p5; db->mDbFlags |= DBFLAG_SchemaChange; sqlite3FkClearTriggerCache(db, pOp->p1); -#ifdef SQLITE_ENABLE_HCT - if( sqlite3IsHct(pDb->pBt) ){ - rc = sqlite3HctSchemaOp(pDb->pBt, p->zSql); - } -#endif }else if( pOp->p2==BTREE_FILE_FORMAT ){ /* Record changes in the file format */ pDb->pSchema->file_format = pOp->p3; @@ -101633,13 +100992,8 @@ case OP_OpenEphemeral: { /* ncycle */ } } pCx->isOrdered = (pOp->p5!=BTREE_UNORDERED); - assert( p->apCsr[pOp->p1]==pCx ); if( rc ){ - assert( !sqlite3BtreeClosesWithCursor(pCx->ub.pBtx, pCx->uc.pCursor) ); sqlite3BtreeClose(pCx->ub.pBtx); - p->apCsr[pOp->p1] = 0; /* Not required; helps with static analysis */ - }else{ - assert( sqlite3BtreeClosesWithCursor(pCx->ub.pBtx, pCx->uc.pCursor) ); } } } @@ -101937,9 +101291,6 @@ case OP_SeekGT: { /* jump0, in3, group, ncycle */ if( (oc & 0x0001)==(OP_SeekLT & 0x0001) ) oc++; } } - sqlite3BtreeCursorDir(pC->uc.pCursor, - (oc==OP_SeekGE || oc==OP_SeekGT) ? BTREE_DIR_FORWARD : BTREE_DIR_REVERSE - ); rc = sqlite3BtreeTableMoveto(pC->uc.pCursor, (u64)iKey, 0, &res); pC->movetoTarget = iKey; /* Used by OP_Delete */ if( rc!=SQLITE_OK ){ @@ -101993,9 +101344,6 @@ case OP_SeekGT: { /* jump0, in3, group, ncycle */ } #endif r.eqSeen = 0; - sqlite3BtreeCursorDir(pC->uc.pCursor, - (oc==OP_SeekGE || oc==OP_SeekGT) ? BTREE_DIR_FORWARD : BTREE_DIR_REVERSE - ); rc = sqlite3BtreeIndexMoveto(pC->uc.pCursor, &r, &res); if( rc!=SQLITE_OK ){ goto abort_due_to_error; @@ -102417,9 +101765,6 @@ case OP_Found: { /* jump, in3, ncycle */ assert( pC->eCurType==CURTYPE_BTREE ); assert( pC->uc.pCursor!=0 ); assert( pC->isTable==0 ); - sqlite3BtreeCursorDir(pC->uc.pCursor, - pOp->opcode==OP_NoConflict ? BTREE_DIR_NONE : BTREE_DIR_FORWARD - ); r.nField = (u16)pOp->p4.i; if( r.nField>0 ){ /* Key values in an array of registers */ @@ -102570,7 +101915,6 @@ case OP_NotExists: /* jump, in3, ncycle */ pCrsr = pC->uc.pCursor; assert( pCrsr!=0 ); res = 0; - sqlite3BtreeCursorDir(pCrsr, 0); rc = sqlite3BtreeTableMoveto(pCrsr, iKey, 0, &res); assert( rc==SQLITE_OK || res==0 ); pC->movetoTarget = iKey; /* Used by OP_Delete */ @@ -103652,6 +102996,7 @@ case OP_SorterInsert: { /* in2 */ case OP_IdxDelete: { VdbeCursor *pC; BtCursor *pCrsr; + int res; UnpackedRecord r; assert( pOp->p3>0 ); @@ -103667,10 +103012,6 @@ case OP_IdxDelete: { r.nField = (u16)pOp->p3; r.default_rc = 0; r.aMem = &aMem[pOp->p2]; -#if 1 - rc = sqlite3BtreeIdxDelete(pCrsr, &r); - if( rc ) goto abort_due_to_error; -#else rc = sqlite3BtreeIndexMoveto(pCrsr, &r, &res); if( rc ) goto abort_due_to_error; if( res==0 ){ @@ -103680,7 +103021,6 @@ case OP_IdxDelete: { rc = sqlite3ReportError(SQLITE_CORRUPT_INDEX, __LINE__, "index corruption"); goto abort_due_to_error; } -#endif assert( pC->deferredMoveto==0 ); pC->cacheStatus = CACHE_STALE; pC->seekResult = 0; @@ -106189,7 +105529,7 @@ case OP_ReleaseReg: { ** As with all opcodes, the meanings of the parameters for OP_Explain ** are subject to change from one release to the next. Applications ** should not attempt to interpret or use any of the information -** contained in the OP_Explain opcode. The information provided by this +** contined in the OP_Explain opcode. The information provided by this ** opcode is intended for testing and debugging use only. */ default: { /* This is really OP_Noop, OP_Explain */ @@ -111285,7 +110625,7 @@ static int lookupName( */ if( cntTab==0 || (cntTab==1 - && pMatch!=0 + && ALWAYS(pMatch!=0) && ALWAYS(pMatch->pSTab!=0) && (pMatch->pSTab->tabFlags & TF_Ephemeral)!=0 && (pTab->tabFlags & TF_Ephemeral)==0) @@ -111918,8 +111258,8 @@ static int resolveExprStep(Walker *pWalker, Expr *pExpr){ /* Resolve function names */ case TK_FUNCTION: { - ExprList *pList; /* The argument list */ - int n; /* Number of arguments */ + ExprList *pList = pExpr->x.pList; /* The argument list */ + int n = pList ? pList->nExpr : 0; /* Number of arguments */ int no_such_func = 0; /* True if no such function exists */ int wrong_num_args = 0; /* True if wrong number of arguments */ int is_agg = 0; /* True if is an aggregate function */ @@ -111932,8 +111272,6 @@ static int resolveExprStep(Walker *pWalker, Expr *pExpr){ #endif assert( !ExprHasProperty(pExpr, EP_xIsSelect|EP_IntValue) ); assert( pExpr->pLeft==0 || pExpr->pLeft->op==TK_ORDER ); - pList = pExpr->x.pList; - n = pList ? pList->nExpr : 0; zId = pExpr->u.zToken; pDef = sqlite3FindFunction(pParse->db, zId, n, enc, 0); if( pDef==0 ){ @@ -111982,24 +111320,6 @@ static int resolveExprStep(Walker *pWalker, Expr *pExpr){ } } #endif - - /* If the function may call sqlite3_value_subtype(), then set the - ** EP_SubtArg flag on all of its argument expressions. This prevents - ** where.c from replacing the expression with a value read from an - ** index on the same expression, which will not have the correct - ** subtype. Also set the flag if the function expression itself is - ** an EP_SubtArg expression. In this case subtypes are required as - ** the function may return a value with a subtype back to its - ** caller using sqlite3_result_value(). */ - if( (pDef->funcFlags & SQLITE_SUBTYPE) - || ExprHasProperty(pExpr, EP_SubtArg) - ){ - int ii; - for(ii=0; iia[ii].pExpr, EP_SubtArg); - } - } - if( pDef->funcFlags & (SQLITE_FUNC_CONSTANT|SQLITE_FUNC_SLOCHNG) ){ /* For the purposes of the EP_ConstFunc flag, date and time ** functions and other functions that change slowly are considered @@ -113599,7 +112919,7 @@ static int codeCompare( p5 = binaryCompareP5(pLeft, pRight, jumpIfNull); addr = sqlite3VdbeAddOp4(pParse->pVdbe, opcode, in2, dest, in1, (void*)p4, P4_COLLSEQ); - sqlite3VdbeChangeP5(pParse->pVdbe, (u8)p5); + sqlite3VdbeChangeP5(pParse->pVdbe, (u16)p5); return addr; } @@ -117674,59 +116994,6 @@ static int exprCodeInlineFunction( return target; } -/* -** Expression Node callback for sqlite3ExprCanReturnSubtype(). -** -** Only a function call is able to return a subtype. So if the node -** is not a function call, return WRC_Prune immediately. -** -** A function call is able to return a subtype if it has the -** SQLITE_RESULT_SUBTYPE property. -** -** Assume that every function is able to pass-through a subtype from -** one of its argument (using sqlite3_result_value()). Most functions -** are not this way, but we don't have a mechanism to distinguish those -** that are from those that are not, so assume they all work this way. -** That means that if one of its arguments is another function and that -** other function is able to return a subtype, then this function is -** able to return a subtype. -*/ -static int exprNodeCanReturnSubtype(Walker *pWalker, Expr *pExpr){ - int n; - FuncDef *pDef; - sqlite3 *db; - if( pExpr->op!=TK_FUNCTION ){ - return WRC_Prune; - } - assert( ExprUseXList(pExpr) ); - db = pWalker->pParse->db; - n = ALWAYS(pExpr->x.pList) ? pExpr->x.pList->nExpr : 0; - pDef = sqlite3FindFunction(db, pExpr->u.zToken, n, ENC(db), 0); - if( NEVER(pDef==0) || (pDef->funcFlags & SQLITE_RESULT_SUBTYPE)!=0 ){ - pWalker->eCode = 1; - return WRC_Prune; - } - return WRC_Continue; -} - -/* -** Return TRUE if expression pExpr is able to return a subtype. -** -** A TRUE return does not guarantee that a subtype will be returned. -** It only indicates that a subtype return is possible. False positives -** are acceptable as they only disable an optimization. False negatives, -** on the other hand, can lead to incorrect answers. -*/ -static int sqlite3ExprCanReturnSubtype(Parse *pParse, Expr *pExpr){ - Walker w; - memset(&w, 0, sizeof(w)); - w.pParse = pParse; - w.xExprCallback = exprNodeCanReturnSubtype; - sqlite3WalkExpr(&w, pExpr); - return w.eCode; -} - - /* ** Check to see if pExpr is one of the indexed expressions on pParse->pIdxEpr. ** If it is, then resolve the expression by reading from the index and @@ -117759,17 +117026,6 @@ static SQLITE_NOINLINE int sqlite3IndexedExprLookup( continue; } - - /* Functions that might set a subtype should not be replaced by the - ** value taken from an expression index if they are themselves an - ** argument to another scalar function or aggregate. - ** https://sqlite.org/forum/forumpost/68d284c86b082c3e */ - if( ExprHasProperty(pExpr, EP_SubtArg) - && sqlite3ExprCanReturnSubtype(pParse, pExpr) - ){ - continue; - } - v = pParse->pVdbe; assert( v!=0 ); if( p->bMaybeNullRow ){ @@ -123024,6 +122280,13 @@ static void openStatTable( # define SQLITE_STAT4_SAMPLES 24 #endif +/* +** Assumed number of of samples when loading sqlite_stat4 data. It doesn't +** matter if there are more or fewer samples than this, but is more efficient +** if this estimate turns out to be true. +*/ +#define SQLITE_STAT4_EST_SAMPLES SQLITE_STAT4_SAMPLES + /* ** Three SQL functions - stat_init(), stat_push(), and stat_get() - ** share an instance of the following structure to hold their state @@ -124318,6 +123581,9 @@ static void decodeIntArray( #endif if( *z==' ' ) z++; } + if( aOut ){ + for(/* no-op */; iaSample[j]; sqlite3DbFree(db, p->p); } - sqlite3DbFree(db, pIdx->aSample); + if( pIdx->nSampleAlloc!=SQLITE_STAT4_EST_SAMPLES ){ + sqlite3DbFree(db, pIdx->aSample); + } } if( db->pnBytesFreed==0 ){ pIdx->nSample = 0; pIdx->aSample = 0; + pIdx->nSampleAlloc = 0; } #else UNUSED_PARAMETER(db); @@ -124530,8 +123799,110 @@ static Index *findIndexOrPrimaryKey( } /* -** Load the content from either the sqlite_stat4 -** into the relevant Index.aSample[] arrays. +** Grow the pIdx->aSample[] array. Return SQLITE_OK if successful, or +** SQLITE_NOMEM otherwise. +*/ +static int growSampleArray(sqlite3 *db, Index *pIdx, int *piOff){ + int nIdxCol = pIdx->nSampleCol; + int nNew = 0; + IndexSample *aNew = 0; + int nByte = 0; + tRowcnt *pSpace; /* Available allocated memory space */ + u8 *pPtr; /* Available memory as a u8 for easier manipulation */ + int i; + u64 t; + + assert( pIdx->nSample==pIdx->nSampleAlloc ); + nNew = SQLITE_STAT4_EST_SAMPLES; + if( pIdx->nSample ){ + nNew = pIdx->nSample*2; + } + + /* Set nByte to the required amount of space */ + nByte = ROUND8(sizeof(IndexSample) * nNew); + nByte += sizeof(tRowcnt) * nIdxCol * 3 * nNew; + nByte += nIdxCol * sizeof(tRowcnt); /* Space for Index.aAvgEq[] */ + + if( nNew==SQLITE_STAT4_EST_SAMPLES ){ + aNew = (IndexSample*)&((u8*)pIdx->pSchema->pStat4Space)[*piOff]; + *piOff += nByte; + assert( *piOff<=sqlite3_msize(pIdx->pSchema->pStat4Space) ); + }else{ + aNew = (IndexSample*)sqlite3DbMallocRaw(db, nByte); + if( aNew==0 ) return SQLITE_NOMEM_BKPT; + } + + pPtr = (u8*)aNew; + pPtr += ROUND8(nNew*sizeof(pIdx->aSample[0])); + pSpace = (tRowcnt*)pPtr; + + pIdx->aAvgEq = pSpace; pSpace += nIdxCol; + assert( EIGHT_BYTE_ALIGNMENT( pSpace ) ); + + if( pIdx->nSample ){ + /* Copy the contents of the anEq[], anLt[], anDLt[] arrays for all + ** extant samples to the new location. */ + int nByte = nIdxCol * 3 * sizeof(tRowcnt) * pIdx->nSample; + memcpy(pSpace, pIdx->aSample[0].anEq, nByte); + } + for(i=0; inSample ){ + aNew[i].p = pIdx->aSample[i].p; + aNew[i].n = pIdx->aSample[i].n; + } + } + assert( ((u8*)pSpace)-nByte==(u8*)aNew ); + + if( pIdx->nSample!=SQLITE_STAT4_EST_SAMPLES ){ + sqlite3DbFree(db, pIdx->aSample); + } + pIdx->aSample = aNew; + pIdx->nSampleAlloc = nNew; + return SQLITE_OK; +} + +/* +** Allocate the space that will likely be required for the Index.aSample[] +** arrays populated by loading data from the sqlite_stat4 table. Return +** SQLITE_OK if successful, or SQLITE_NOMEM otherwise. +*/ +static int stat4AllocSpace(sqlite3 *db, const char *zDb){ + int iDb = sqlite3FindDbName(db, zDb); + Schema *pSchema = db->aDb[iDb].pSchema; + int nByte = 0; + HashElem *k; + + assert( iDb>=0 ); + assert( pSchema->pStat4Space==0 ); + for(k=sqliteHashFirst(&pSchema->idxHash); k; k=sqliteHashNext(k)){ + Index *pIdx = sqliteHashData(k); + int nIdxCol; + if( !HasRowid(pIdx->pTable) && IsPrimaryKeyIndex(pIdx) ){ + nIdxCol = pIdx->nKeyCol; + }else{ + nIdxCol = pIdx->nColumn; + } + nByte += ROUND8(sizeof(IndexSample) * SQLITE_STAT4_EST_SAMPLES); + nByte += sizeof(tRowcnt) * nIdxCol * 3 * SQLITE_STAT4_EST_SAMPLES; + nByte += nIdxCol * sizeof(tRowcnt); /* Space for Index.aAvgEq[] */ + } + + if( nByte>0 ){ + pSchema->pStat4Space = sqlite3_malloc(nByte); + if( pSchema->pStat4Space==0 ){ + return SQLITE_NOMEM_BKPT; + } + } + + return SQLITE_OK; +} + +/* +** Load the content from the sqlite_stat4 into the relevant Index.aSample[] +** arrays. ** ** Arguments zSql1 and zSql2 must point to SQL statements that return ** data equivalent to the following: @@ -124552,69 +123923,16 @@ static int loadStatTbl( char *zSql; /* Text of the SQL statement */ Index *pPrevIdx = 0; /* Previous index in the loop */ IndexSample *pSample; /* A slot in pIdx->aSample[] */ + int iBlockOff = 0; /* Offset into Schema.pStat4Space */ assert( db->lookaside.bDisable ); - zSql = sqlite3MPrintf(db, zSql1, zDb); - if( !zSql ){ - return SQLITE_NOMEM_BKPT; - } - rc = sqlite3_prepare(db, zSql, -1, &pStmt, 0); - sqlite3DbFree(db, zSql); - if( rc ) return rc; - - while( sqlite3_step(pStmt)==SQLITE_ROW ){ - int nIdxCol = 1; /* Number of columns in stat4 records */ - - char *zIndex; /* Index name */ - Index *pIdx; /* Pointer to the index object */ - int nSample; /* Number of samples */ - i64 nByte; /* Bytes of space required */ - i64 i; /* Bytes of space required */ - tRowcnt *pSpace; /* Available allocated memory space */ - u8 *pPtr; /* Available memory as a u8 for easier manipulation */ - zIndex = (char *)sqlite3_column_text(pStmt, 0); - if( zIndex==0 ) continue; - nSample = sqlite3_column_int(pStmt, 1); - pIdx = findIndexOrPrimaryKey(db, zIndex, zDb); - assert( pIdx==0 || pIdx->nSample==0 ); - if( pIdx==0 ) continue; - if( pIdx->aSample!=0 ){ - /* The same index appears in sqlite_stat4 under multiple names */ - continue; - } - assert( !HasRowid(pIdx->pTable) || pIdx->nColumn==pIdx->nKeyCol+1 ); - if( !HasRowid(pIdx->pTable) && IsPrimaryKeyIndex(pIdx) ){ - nIdxCol = pIdx->nKeyCol; - }else{ - nIdxCol = pIdx->nColumn; - } - pIdx->nSampleCol = nIdxCol; - pIdx->mxSample = nSample; - nByte = ROUND8(sizeof(IndexSample) * nSample); - nByte += sizeof(tRowcnt) * nIdxCol * 3 * nSample; - nByte += nIdxCol * sizeof(tRowcnt); /* Space for Index.aAvgEq[] */ + /* Allocate the Schema.pStat4Space block that will be used for the + ** Index.aSample[] arrays populated by this call. */ + rc = stat4AllocSpace(db, zDb); + if( rc!=SQLITE_OK ) return rc; - pIdx->aSample = sqlite3DbMallocZero(db, nByte); - if( pIdx->aSample==0 ){ - sqlite3_finalize(pStmt); - return SQLITE_NOMEM_BKPT; - } - pPtr = (u8*)pIdx->aSample; - pPtr += ROUND8(nSample*sizeof(pIdx->aSample[0])); - pSpace = (tRowcnt*)pPtr; - assert( EIGHT_BYTE_ALIGNMENT( pSpace ) ); - pIdx->aAvgEq = pSpace; pSpace += nIdxCol; - pIdx->pTable->tabFlags |= TF_HasStat4; - for(i=0; iaSample[i].anEq = pSpace; pSpace += nIdxCol; - pIdx->aSample[i].anLt = pSpace; pSpace += nIdxCol; - pIdx->aSample[i].anDLt = pSpace; pSpace += nIdxCol; - } - assert( ((u8*)pSpace)-nByte==(u8*)(pIdx->aSample) ); - } - rc = sqlite3_finalize(pStmt); - if( rc ) return rc; + sqlite3PrepareTimeSet(db->aSchemaTime, SCHEMA_TIME_AFTER_STAT4_SPACE); zSql = sqlite3MPrintf(db, zSql2, zDb); if( !zSql ){ @@ -124624,27 +123942,41 @@ static int loadStatTbl( sqlite3DbFree(db, zSql); if( rc ) return rc; + sqlite3PrepareTimeSet(db->aSchemaTime, SCHEMA_TIME_AFTER_STAT4_PREPARE); + while( sqlite3_step(pStmt)==SQLITE_ROW ){ char *zIndex; /* Index name */ Index *pIdx; /* Pointer to the index object */ int nCol = 1; /* Number of columns in index */ + u64 t = sqlite3STimeNow(); zIndex = (char *)sqlite3_column_text(pStmt, 0); if( zIndex==0 ) continue; pIdx = findIndexOrPrimaryKey(db, zIndex, zDb); if( pIdx==0 ) continue; - if( pIdx->nSample>=pIdx->mxSample ){ - /* Too many slots used because the same index appears in - ** sqlite_stat4 using multiple names */ - continue; + + if( pIdx->nSample==pIdx->nSampleAlloc ){ + u64 t2; + pIdx->pTable->tabFlags |= TF_HasStat4; + assert( !HasRowid(pIdx->pTable) || pIdx->nColumn==pIdx->nKeyCol+1 ); + if( !HasRowid(pIdx->pTable) && IsPrimaryKeyIndex(pIdx) ){ + pIdx->nSampleCol = pIdx->nKeyCol; + }else{ + pIdx->nSampleCol = pIdx->nColumn; + } + t2 = sqlite3STimeNow(); + if( growSampleArray(db, pIdx, &iBlockOff) ) break; + if( db->aSchemaTime ){ + db->aSchemaTime[SCHEMA_TIME_STAT4_GROWUS] += (sqlite3STimeNow() - t); + } } - /* This next condition is true if data has already been loaded from - ** the sqlite_stat4 table. */ - nCol = pIdx->nSampleCol; + if( pIdx!=pPrevIdx ){ initAvgEq(pPrevIdx); pPrevIdx = pIdx; } + + nCol = pIdx->nSampleCol; pSample = &pIdx->aSample[pIdx->nSample]; decodeIntArray((char*)sqlite3_column_text(pStmt,1),nCol,pSample->anEq,0,0); decodeIntArray((char*)sqlite3_column_text(pStmt,2),nCol,pSample->anLt,0,0); @@ -124667,8 +123999,13 @@ static int loadStatTbl( memcpy(pSample->p, sqlite3_column_blob(pStmt, 4), pSample->n); } pIdx->nSample++; + + if( db->aSchemaTime ){ + db->aSchemaTime[SCHEMA_TIME_STAT4_Q2_BODYUS] += (sqlite3STimeNow() - t); + } } rc = sqlite3_finalize(pStmt); + sqlite3PrepareTimeSet(db->aSchemaTime, SCHEMA_TIME_AFTER_STAT4_Q2); if( rc==SQLITE_OK ) initAvgEq(pPrevIdx); return rc; } @@ -124741,6 +124078,12 @@ SQLITE_PRIVATE int sqlite3AnalysisLoad(sqlite3 *db, int iDb){ pIdx->aSample = 0; #endif } +#ifdef SQLITE_ENABLE_STAT4 + sqlite3_free(pSchema->pStat4Space); + pSchema->pStat4Space = 0; +#endif + + sqlite3PrepareTimeSet(db->aSchemaTime, SCHEMA_TIME_AFTER_CLEAR_STATS); /* Load new statistics out of the sqlite_stat1 table */ sInfo.db = db; @@ -124758,6 +124101,8 @@ SQLITE_PRIVATE int sqlite3AnalysisLoad(sqlite3 *db, int iDb){ } } + sqlite3PrepareTimeSet(db->aSchemaTime, SCHEMA_TIME_AFTER_STAT1); + /* Set appropriate defaults on all indexes not in the sqlite_stat1 table */ assert( sqlite3SchemaMutexHeld(db, iDb, 0) ); for(i=sqliteHashFirst(&pSchema->idxHash); i; i=sqliteHashNext(i)){ @@ -124765,6 +124110,8 @@ SQLITE_PRIVATE int sqlite3AnalysisLoad(sqlite3 *db, int iDb){ if( !pIdx->hasStat1 ) sqlite3DefaultRowEst(pIdx); } + sqlite3PrepareTimeSet(db->aSchemaTime, SCHEMA_TIME_AFTER_DEFAULTS); + /* Load the statistics from the sqlite_stat4 table. */ #ifdef SQLITE_ENABLE_STAT4 if( rc==SQLITE_OK ){ @@ -124779,6 +124126,8 @@ SQLITE_PRIVATE int sqlite3AnalysisLoad(sqlite3 *db, int iDb){ } #endif + sqlite3PrepareTimeSet(db->aSchemaTime, SCHEMA_TIME_AFTER_STAT4); + if( rc==SQLITE_NOMEM ){ sqlite3OomFault(db); } @@ -125019,6 +124368,15 @@ static void attachFunc( sqlite3BtreeLeaveAll(db); assert( zErrDyn==0 || rc!=SQLITE_OK ); } +#ifdef SQLITE_USER_AUTHENTICATION + if( rc==SQLITE_OK && !REOPEN_AS_MEMDB(db) ){ + u8 newAuth = 0; + rc = sqlite3UserAuthCheckLogin(db, zName, &newAuth); + if( newAuthauth.authLevel ){ + rc = SQLITE_AUTH_USER; + } + } +#endif if( rc ){ if( ALWAYS(!REOPEN_AS_MEMDB(db)) ){ int iDb = db->nDb - 1; @@ -125516,7 +124874,11 @@ SQLITE_PRIVATE int sqlite3AuthReadCol( int rc; /* Auth callback return code */ if( db->init.busy ) return SQLITE_OK; - rc = db->xAuth(db->pAuthArg, SQLITE_READ, zTab,zCol,zDb,pParse->zAuthContext); + rc = db->xAuth(db->pAuthArg, SQLITE_READ, zTab,zCol,zDb,pParse->zAuthContext +#ifdef SQLITE_USER_AUTHENTICATION + ,db->auth.zAuthUser +#endif + ); if( rc==SQLITE_DENY ){ char *z = sqlite3_mprintf("%s.%s", zTab, zCol); if( db->nDb>2 || iDb!=0 ) z = sqlite3_mprintf("%s.%z", zDb, z); @@ -125623,7 +124985,11 @@ SQLITE_PRIVATE int sqlite3AuthCheck( testcase( zArg3==0 ); testcase( pParse->zAuthContext==0 ); - rc = db->xAuth(db->pAuthArg,code,zArg1,zArg2,zArg3,pParse->zAuthContext); + rc = db->xAuth(db->pAuthArg, code, zArg1, zArg2, zArg3, pParse->zAuthContext +#ifdef SQLITE_USER_AUTHENTICATION + ,db->auth.zAuthUser +#endif + ); if( rc==SQLITE_DENY ){ sqlite3ErrorMsg(pParse, "not authorized"); pParse->rc = SQLITE_AUTH; @@ -125856,6 +125222,17 @@ SQLITE_PRIVATE void sqlite3FinishCoding(Parse *pParse){ } sqlite3VdbeAddOp0(v, OP_Halt); +#if SQLITE_USER_AUTHENTICATION && !defined(SQLITE_OMIT_SHARED_CACHE) + if( pParse->nTableLock>0 && db->init.busy==0 ){ + sqlite3UserAuthInit(db); + if( db->auth.authLevelrc = SQLITE_AUTH_USER; + return; + } + } +#endif + /* The cookie mask contains one bit for each database file open. ** (Bit 0 is for main, bit 1 is for temp, and so forth.) Bits are ** set for each database that is used. Generate code to start a @@ -125984,6 +125361,16 @@ SQLITE_PRIVATE void sqlite3NestedParse(Parse *pParse, const char *zFormat, ...){ pParse->nested--; } +#if SQLITE_USER_AUTHENTICATION +/* +** Return TRUE if zTable is the name of the system table that stores the +** list of users and their access credentials. +*/ +SQLITE_PRIVATE int sqlite3UserAuthTable(const char *zTable){ + return sqlite3_stricmp(zTable, "sqlite_user")==0; +} +#endif + /* ** Locate the in-memory structure that describes a particular database ** table given the name of that table and (optionally) the name of the @@ -126002,6 +125389,13 @@ SQLITE_PRIVATE Table *sqlite3FindTable(sqlite3 *db, const char *zName, const cha /* All mutexes are required for schema access. Make sure we hold them. */ assert( zDatabase!=0 || sqlite3BtreeHoldsAllMutexes(db) ); +#if SQLITE_USER_AUTHENTICATION + /* Only the admin user is allowed to know that the sqlite_user table + ** exists */ + if( db->auth.authLevelnDb; i++){ if( sqlite3StrICmp(zDatabase, db->aDb[i].zDbSName)==0 ) break; @@ -129660,6 +129054,9 @@ SQLITE_PRIVATE void sqlite3CreateIndex( if( sqlite3StrNICmp(pTab->zName, "sqlite_", 7)==0 && db->init.busy==0 && pTblName!=0 +#if SQLITE_USER_AUTHENTICATION + && sqlite3UserAuthTable(pTab->zName)==0 +#endif ){ sqlite3ErrorMsg(pParse, "table %s may not be indexed", pTab->zName); goto exit_create_index; @@ -131289,7 +130686,6 @@ SQLITE_PRIVATE KeyInfo *sqlite3KeyInfoOfIndex(Parse *pParse, Index *pIdx){ pKey = sqlite3KeyInfoAlloc(pParse->db, nCol, 0); } if( pKey ){ - if( pIdx->onError ) pKey->nUniqField = pIdx->nKeyCol; assert( sqlite3KeyInfoIsWriteable(pKey) ); for(i=0; iazColl[i]; @@ -131952,6 +131348,10 @@ SQLITE_PRIVATE void sqlite3SchemaClear(void *p){ pSchema->iGeneration++; } pSchema->schemaFlags &= ~(DB_SchemaLoaded|DB_ResetWanted); +#ifdef SQLITE_ENABLE_STAT4 + sqlite3_free(pSchema->pStat4Space); + pSchema->pStat4Space = 0; +#endif } /* @@ -132056,7 +131456,6 @@ SQLITE_PRIVATE void sqlite3CodeChangeCount(Vdbe *v, int regCounter, const char * ** is for a top-level SQL statement. */ static int vtabIsReadOnly(Parse *pParse, Table *pTab){ - assert( IsVirtual(pTab) ); if( sqlite3GetVTable(pParse->db, pTab)->pMod->pModule->xUpdate==0 ){ return 1; } @@ -135551,13 +134950,7 @@ static void signFunc( ** Implementation of fpdecode(x,y,z) function. ** ** x is a real number that is to be decoded. y is the precision. -** z is the maximum real precision. Return a string that shows the -** results of the sqlite3FpDecode() function. -** -** Used for testing and debugging only, specifically testing and debugging -** of the sqlite3FpDecode() function. This SQL function does not appear -** in production builds. This function is not an API and is subject to -** modification or removal in future versions of SQLite. +** z is the maximum real precision. */ static void fpdecodeFunc( sqlite3_context *context, @@ -135584,82 +134977,6 @@ static void fpdecodeFunc( } #endif /* SQLITE_DEBUG */ -#ifdef SQLITE_DEBUG -/* -** Implementation of parseuri(uri,flags) function. -** -** Required Arguments: -** "uri" The URI to parse. -** "flags" Bitmask of flags, as if to sqlite3_open_v2(). -** -** Additional arguments beyond the first two make calls to -** sqlite3_uri_key() for integers and sqlite3_uri_parameter for -** anything else. -** -** The result is a string showing the results of calling sqlite3ParseUri(). -** -** Used for testing and debugging only, specifically testing and debugging -** of the sqlite3ParseUri() function. This SQL function does not appear -** in production builds. This function is not an API and is subject to -** modification or removal in future versions of SQLite. -*/ -static void parseuriFunc( - sqlite3_context *ctx, - int argc, - sqlite3_value **argv -){ - sqlite3_str *pResult; - const char *zVfs; - const char *zUri; - unsigned int flgs; - int rc; - sqlite3_vfs *pVfs = 0; - char *zFile = 0; - char *zErr = 0; - - if( argc<2 ) return; - pVfs = sqlite3_vfs_find(0); - assert( pVfs ); - zVfs = pVfs->zName; - zUri = (const char*)sqlite3_value_text(argv[0]); - if( zUri==0 ) return; - flgs = (unsigned int)sqlite3_value_int(argv[1]); - rc = sqlite3ParseUri(zVfs, zUri, &flgs, &pVfs, &zFile, &zErr); - pResult = sqlite3_str_new(0); - if( pResult ){ - int i; - sqlite3_str_appendf(pResult, "rc=%d", rc); - sqlite3_str_appendf(pResult, ", flags=0x%x", flgs); - sqlite3_str_appendf(pResult, ", vfs=%Q", pVfs ? pVfs->zName: 0); - sqlite3_str_appendf(pResult, ", err=%Q", zErr); - sqlite3_str_appendf(pResult, ", file=%Q", zFile); - if( zFile ){ - const char *z = zFile; - z += sqlite3Strlen30(z)+1; - while( z[0] ){ - sqlite3_str_appendf(pResult, ", %Q", z); - z += sqlite3Strlen30(z)+1; - } - for(i=2; ibHctMigrate ) bUseSeek = 0; sqlite3CompleteInsertion(pParse, pTab, iDataCur, iIdxCur, regIns, aRegIdx, 0, appendFlag, bUseSeek ); @@ -139643,11 +138960,7 @@ SQLITE_PRIVATE void sqlite3GenerateConstraintChecks( ** the following conflict logic if it does not. */ VdbeNoopComment((v, "uniqueness check for ROWID")); sqlite3VdbeVerifyAbortable(v, onError); - if( db->bHctMigrate ){ - sqlite3VdbeAddOp2(v, OP_Goto, 0, addrRowidOk); - }else{ - sqlite3VdbeAddOp3(v, OP_NotExists, iDataCur, addrRowidOk, regNewData); - } + sqlite3VdbeAddOp3(v, OP_NotExists, iDataCur, addrRowidOk, regNewData); VdbeCoverage(v); switch( onError ){ @@ -139863,13 +139176,9 @@ SQLITE_PRIVATE void sqlite3GenerateConstraintChecks( /* Check to see if the new index entry will be unique */ sqlite3VdbeVerifyAbortable(v, onError); - if( db->bHctMigrate ){ - addrConflictCk = sqlite3VdbeAddOp2(v, OP_Goto, 0, addrUniqueOk); - }else{ - addrConflictCk = - sqlite3VdbeAddOp4Int(v, OP_NoConflict, iThisCur, addrUniqueOk, - regIdx, pIdx->nKeyCol); VdbeCoverage(v); - } + addrConflictCk = + sqlite3VdbeAddOp4Int(v, OP_NoConflict, iThisCur, addrUniqueOk, + regIdx, pIdx->nKeyCol); VdbeCoverage(v); /* Generate code to handle collisions */ regR = pIdx==pPk ? regIdx : sqlite3GetTempRange(pParse, nPkField); @@ -140802,6 +140111,7 @@ SQLITE_API int sqlite3_exec( int nCol = 0; char **azVals = 0; + sqlite3PrepareTimeSet(db->aSchemaTime, SCHEMA_TIME_BEFORE_PREPARE); pStmt = 0; rc = sqlite3_prepare_v2(db, zSql, -1, &pStmt, &zLeftover); assert( rc==SQLITE_OK || pStmt==0 ); @@ -140815,6 +140125,7 @@ SQLITE_API int sqlite3_exec( } callbackIsInit = 0; + sqlite3PrepareTimeSet(db->aSchemaTime, SCHEMA_TIME_BEFORE_STEP); while( 1 ){ int i; rc = sqlite3_step(pStmt); @@ -140860,6 +140171,7 @@ SQLITE_API int sqlite3_exec( } } + sqlite3PrepareTimeSet(db->aSchemaTime, SCHEMA_TIME_BEFORE_FINALIZE); if( rc!=SQLITE_ROW ){ rc = sqlite3VdbeFinalize((Vdbe *)pStmt); pStmt = 0; @@ -143643,6 +142955,7 @@ SQLITE_PRIVATE void sqlite3Pragma( Vdbe *v = sqlite3GetVdbe(pParse); /* Prepared statement */ const PragmaName *pPragma; /* The pragma */ + sqlite3PrepareTimeSet(db->aPrepareTime, PREPARE_TIME_BEGINPRAGMA); if( v==0 ) return; sqlite3VdbeRunOnlyOnce(v); pParse->nMem = 2; @@ -143668,11 +142981,13 @@ SQLITE_PRIVATE void sqlite3Pragma( zRight = sqlite3NameFromToken(db, pValue); } + sqlite3PrepareTimeSet(db->aPrepareTime, PREPARE_TIME_BEGINAUTHCHECK); assert( pId2 ); zDb = pId2->n>0 ? pDb->zDbSName : 0; if( sqlite3AuthCheck(pParse, SQLITE_PRAGMA, zLeft, zRight, zDb) ){ goto pragma_out; } + sqlite3PrepareTimeSet(db->aPrepareTime, PREPARE_TIME_ENDAUTHCHECK); /* Send an SQLITE_FCNTL_PRAGMA file-control to the underlying VFS ** connection. If it returns SQLITE_OK, then assume that the VFS @@ -143694,10 +143009,7 @@ SQLITE_PRIVATE void sqlite3Pragma( aFcntl[2] = zRight; aFcntl[3] = 0; db->busyHandler.nBusy = 0; - rc = sqlite3BtreePragma(pDb->pBt, aFcntl); - if( rc==SQLITE_NOTFOUND ){ - rc = sqlite3_file_control(db, zDb, SQLITE_FCNTL_PRAGMA, (void*)aFcntl); - } + rc = sqlite3_file_control(db, zDb, SQLITE_FCNTL_PRAGMA, (void*)aFcntl); if( rc==SQLITE_OK ){ sqlite3VdbeSetNumCols(v, 1); sqlite3VdbeSetColName(v, 0, COLNAME_NAME, aFcntl[0], SQLITE_TRANSIENT); @@ -143723,10 +143035,12 @@ SQLITE_PRIVATE void sqlite3Pragma( goto pragma_out; } + sqlite3PrepareTimeSet(db->aPrepareTime, PREPARE_TIME_BEGINLOADSCHEMA); /* Make sure the database schema is loaded if the pragma requires that */ if( (pPragma->mPragFlg & PragFlg_NeedSchema)!=0 ){ if( sqlite3ReadSchema(pParse) ) goto pragma_out; } + sqlite3PrepareTimeSet(db->aPrepareTime, PREPARE_TIME_ENDLOADSCHEMA); /* Register the result column names for pragmas that return results */ if( (pPragma->mPragFlg & PragFlg_NoColumns)==0 @@ -144086,6 +143400,7 @@ SQLITE_PRIVATE void sqlite3Pragma( */ case PragTyp_CACHE_SIZE: { assert( sqlite3SchemaMutexHeld(db, iDb, 0) ); + sqlite3PrepareTimeSet(db->aPrepareTime, PREPARE_TIME_BEGINCACHESIZE); if( !zRight ){ returnSingleInt(v, pDb->pSchema->cache_size); }else{ @@ -144093,6 +143408,7 @@ SQLITE_PRIVATE void sqlite3Pragma( pDb->pSchema->cache_size = size; sqlite3BtreeSetCacheSize(pDb->pBt, pDb->pSchema->cache_size); } + sqlite3PrepareTimeSet(db->aPrepareTime, PREPARE_TIME_ENDCACHESIZE); break; } @@ -144365,6 +143681,12 @@ SQLITE_PRIVATE void sqlite3Pragma( ** in auto-commit mode. */ mask &= ~(SQLITE_ForeignKeys); } +#if SQLITE_USER_AUTHENTICATION + if( db->auth.authLevel==UAUTH_User ){ + /* Do not allow non-admin users to modify the schema arbitrarily */ + mask &= ~(SQLITE_WriteSchema); + } +#endif if( sqlite3GetBoolean(zRight, 0) ){ if( (mask & SQLITE_WriteSchema)==0 @@ -144976,12 +144298,11 @@ SQLITE_PRIVATE void sqlite3Pragma( /* Make sure sufficient number of registers have been allocated */ sqlite3TouchRegister(pParse, 8+cnt); - sqlite3VdbeAddOp3(v, OP_Null, 0, 8, 8+cnt); sqlite3ClearTempRegCache(pParse); /* Do the b-tree integrity checks */ sqlite3VdbeAddOp4(v, OP_IntegrityCk, 1, cnt, 8, (char*)aRoot,P4_INTARRAY); - sqlite3VdbeChangeP5(v, (u8)i); + sqlite3VdbeChangeP5(v, (u16)i); addr = sqlite3VdbeAddOp1(v, OP_IsNull, 2); VdbeCoverage(v); sqlite3VdbeAddOp4(v, OP_String8, 0, 3, 0, sqlite3MPrintf(db, "*** in database %s ***\n", db->aDb[i].zDbSName), @@ -145974,6 +145295,7 @@ SQLITE_PRIVATE void sqlite3Pragma( pragma_out: sqlite3DbFree(db, zLeft); sqlite3DbFree(db, zRight); + sqlite3PrepareTimeSet(db->aPrepareTime, PREPARE_TIME_ENDPRAGMA); } #ifndef SQLITE_OMIT_VIRTUALTABLE /***************************************************************************** @@ -146504,6 +145826,11 @@ SQLITE_PRIVATE int sqlite3InitOne(sqlite3 *db, int iDb, char **pzErrMsg, u32 mFl int openedTransaction = 0; int mask = ((db->mDbFlags & DBFLAG_EncodingFixed) | ~DBFLAG_EncodingFixed); + u64 aSchemaTime[SCHEMA_TIME_N]; + memset(aSchemaTime, 0, sizeof(aSchemaTime)); + db->aSchemaTime = aSchemaTime; + sqlite3PrepareTimeSet(aSchemaTime, SCHEMA_TIME_START); + assert( (db->mDbFlags & DBFLAG_SchemaKnownOk)==0 ); assert( iDb>=0 && iDbnDb ); assert( db->aDb[iDb].pSchema ); @@ -146538,6 +145865,8 @@ SQLITE_PRIVATE int sqlite3InitOne(sqlite3 *db, int iDb, char **pzErrMsg, u32 mFl goto error_out; } + sqlite3PrepareTimeSet(aSchemaTime, SCHEMA_TIME_AFTER_CREATE_1); + /* Create a cursor to hold the database open */ pDb = &db->aDb[iDb]; @@ -146561,6 +145890,8 @@ SQLITE_PRIVATE int sqlite3InitOne(sqlite3 *db, int iDb, char **pzErrMsg, u32 mFl openedTransaction = 1; } + sqlite3PrepareTimeSet(aSchemaTime, SCHEMA_TIME_AFTER_OPEN_TRANS); + /* Get the database meta information. ** ** Meta values are as follows: @@ -146586,6 +145917,8 @@ SQLITE_PRIVATE int sqlite3InitOne(sqlite3 *db, int iDb, char **pzErrMsg, u32 mFl } pDb->pSchema->schema_cookie = meta[BTREE_SCHEMA_VERSION-1]; + sqlite3PrepareTimeSet(aSchemaTime, SCHEMA_TIME_AFTER_GET_META); + /* If opening a non-empty database, check the text encoding. For the ** main database, set sqlite3.enc to the encoding of the main database. ** For an attached db, it is an error if the encoding is not the same @@ -146601,7 +145934,14 @@ SQLITE_PRIVATE int sqlite3InitOne(sqlite3 *db, int iDb, char **pzErrMsg, u32 mFl #else encoding = SQLITE_UTF8; #endif - sqlite3SetTextEncoding(db, encoding); + if( db->nVdbeActive>0 && encoding!=ENC(db) + && (db->mDbFlags & DBFLAG_Vacuum)==0 + ){ + rc = SQLITE_LOCKED; + goto initone_error_out; + }else{ + sqlite3SetTextEncoding(db, encoding); + } }else{ /* If opening an attached database, the encoding much match ENC(db) */ if( (meta[BTREE_TEXT_ENCODING-1] & 3)!=ENC(db) ){ @@ -146614,6 +145954,8 @@ SQLITE_PRIVATE int sqlite3InitOne(sqlite3 *db, int iDb, char **pzErrMsg, u32 mFl } pDb->pSchema->enc = ENC(db); + sqlite3PrepareTimeSet(aSchemaTime, SCHEMA_TIME_AFTER_FIX_ENCODING); + if( pDb->pSchema->cache_size==0 ){ #ifndef SQLITE_OMIT_DEPRECATED size = sqlite3AbsInt32(meta[BTREE_DEFAULT_CACHE_SIZE-1]); @@ -146625,6 +145967,8 @@ SQLITE_PRIVATE int sqlite3InitOne(sqlite3 *db, int iDb, char **pzErrMsg, u32 mFl sqlite3BtreeSetCacheSize(pDb->pBt, pDb->pSchema->cache_size); } + sqlite3PrepareTimeSet(aSchemaTime, SCHEMA_TIME_AFTER_SETCACHESIZE); + /* ** file_format==1 Version 3.0.0. ** file_format==2 Version 3.1.3. // ALTER TABLE ADD COLUMN @@ -146665,6 +146009,7 @@ SQLITE_PRIVATE int sqlite3InitOne(sqlite3 *db, int iDb, char **pzErrMsg, u32 mFl xAuth = db->xAuth; db->xAuth = 0; #endif + sqlite3PrepareTimeSet(aSchemaTime, SCHEMA_TIME_BEGIN_EXEC); rc = sqlite3_exec(db, zSql, sqlite3InitCallback, &initData, 0); #ifndef SQLITE_OMIT_AUTHORIZATION db->xAuth = xAuth; @@ -146672,11 +146017,13 @@ SQLITE_PRIVATE int sqlite3InitOne(sqlite3 *db, int iDb, char **pzErrMsg, u32 mFl #endif if( rc==SQLITE_OK ) rc = initData.rc; sqlite3DbFree(db, zSql); + sqlite3PrepareTimeSet(aSchemaTime, SCHEMA_TIME_BEGIN_ANALYZE_LOAD); #ifndef SQLITE_OMIT_ANALYZE if( rc==SQLITE_OK ){ sqlite3AnalysisLoad(db, iDb); } #endif + sqlite3PrepareTimeSet(aSchemaTime, SCHEMA_TIME_END_ANALYZE_LOAD); } assert( pDb == &(db->aDb[iDb]) ); if( db->mallocFailed ){ @@ -146710,6 +146057,12 @@ SQLITE_PRIVATE int sqlite3InitOne(sqlite3 *db, int iDb, char **pzErrMsg, u32 mFl sqlite3BtreeLeave(pDb->pBt); error_out: + db->aSchemaTime = 0; + sqlite3PrepareTimeSet(aSchemaTime, SCHEMA_TIME_FINISH); + if( rc==SQLITE_OK && iDb==0 ){ + const char *zFile = sqlite3BtreeGetFilename(pDb->pBt); + sqlite3SchemaTimeLog(aSchemaTime, zFile); + } if( rc ){ if( rc==SQLITE_NOMEM || rc==SQLITE_IOERR_NOMEM ){ sqlite3OomFault(db); @@ -146773,14 +146126,6 @@ SQLITE_PRIVATE int sqlite3ReadSchema(Parse *pParse){ }else if( db->noSharedCache ){ db->mDbFlags |= DBFLAG_SchemaKnownOk; } -#ifdef SQLITE_ENABLE_HCT - { - int iDb; - for(iDb=0; rc==SQLITE_OK && iDbnDb; iDb++){ - rc = sqlite3BtreeSchemaLoaded(db->aDb[iDb].pBt); - } - } -#endif } return rc; } @@ -147078,14 +146423,18 @@ static int sqlite3Prepare( } zSqlCopy = sqlite3DbStrNDup(db, zSql, nBytes); if( zSqlCopy ){ + sqlite3PrepareTimeSet(db->aPrepareTime, PREPARE_TIME_BEGINPARSE); sqlite3RunParser(&sParse, zSqlCopy); + sqlite3PrepareTimeSet(db->aPrepareTime, PREPARE_TIME_ENDPARSE); sParse.zTail = &zSql[sParse.zTail-zSqlCopy]; sqlite3DbFree(db, zSqlCopy); }else{ sParse.zTail = &zSql[nBytes]; } }else{ + sqlite3PrepareTimeSet(db->aPrepareTime, PREPARE_TIME_BEGINPARSE); sqlite3RunParser(&sParse, zSql); + sqlite3PrepareTimeSet(db->aPrepareTime, PREPARE_TIME_ENDPARSE); } assert( 0==sParse.nQueryLoop ); @@ -147146,6 +146495,12 @@ static int sqlite3LockAndPrepare( ){ int rc; int cnt = 0; + u64 *aPrepareSave = db->aPrepareTime; + + u64 aPrepareTime[PREPARE_TIME_N]; + memset(aPrepareTime, 0, sizeof(aPrepareTime)); + sqlite3PrepareTimeSet(aPrepareTime, PREPARE_TIME_START); + db->aPrepareTime = aPrepareTime; #ifdef SQLITE_ENABLE_API_ARMOR if( ppStmt==0 ) return SQLITE_MISUSE_BKPT; @@ -147171,6 +146526,11 @@ static int sqlite3LockAndPrepare( db->busyHandler.nBusy = 0; sqlite3_mutex_leave(db->mutex); assert( rc==SQLITE_OK || (*ppStmt)==0 ); + + db->aPrepareTime = aPrepareSave; + sqlite3PrepareTimeSet(aPrepareTime, PREPARE_TIME_FINISH); + sqlite3PrepareTimeLog(zSql, nBytes, aPrepareTime); + return rc; } @@ -147303,24 +146663,12 @@ static int sqlite3Prepare16( if( !sqlite3SafetyCheckOk(db)||zSql==0 ){ return SQLITE_MISUSE_BKPT; } - - /* Make sure nBytes is non-negative and correct. It should be the - ** number of bytes until the end of the input buffer or until the first - ** U+0000 character. If the input nBytes is odd, convert it into - ** an even number. If the input nBytes is negative, then the input - ** must be terminated by at least one U+0000 character */ if( nBytes>=0 ){ int sz; const char *z = (const char*)zSql; for(sz=0; szmutex); zSql8 = sqlite3Utf16to8(db, zSql, nBytes, SQLITE_UTF16NATIVE); if( zSql8 ){ @@ -147334,7 +146682,7 @@ static int sqlite3Prepare16( ** the same number of characters into the UTF-16 string. */ int chars_parsed = sqlite3Utf8CharLen(zSql8, (int)(zTail8-zSql8)); - *pzTail = (u8 *)zSql + sqlite3Utf16ByteLen(zSql, nBytes, chars_parsed); + *pzTail = (u8 *)zSql + sqlite3Utf16ByteLen(zSql, chars_parsed); } sqlite3DbFree(db, zSql8); rc = sqlite3ApiExit(db, rc); @@ -148919,7 +148267,6 @@ SQLITE_PRIVATE KeyInfo *sqlite3KeyInfoAlloc(sqlite3 *db, int N, int X){ p->enc = ENC(db); p->db = db; p->nRef = 1; - p->nUniqField = 0; memset(&p[1], 0, nExtra); }else{ return (KeyInfo*)sqlite3OomFault(db); @@ -154222,7 +153569,7 @@ static void finalizeAggFunctions(Parse *pParse, AggInfo *pAggInfo){ } sqlite3VdbeAddOp3(v, OP_AggStep, 0, regAgg, AggInfoFuncReg(pAggInfo,i)); sqlite3VdbeAppendP4(v, pF->pFunc, P4_FUNCDEF); - sqlite3VdbeChangeP5(v, (u8)nArg); + sqlite3VdbeChangeP5(v, (u16)nArg); sqlite3VdbeAddOp2(v, OP_Next, pF->iOBTab, iTop+1); VdbeCoverage(v); sqlite3VdbeJumpHere(v, iTop); sqlite3ReleaseTempRange(pParse, regAgg, nArg); @@ -154385,7 +153732,7 @@ static void updateAccumulator( } sqlite3VdbeAddOp3(v, OP_AggStep, 0, regAgg, AggInfoFuncReg(pAggInfo,i)); sqlite3VdbeAppendP4(v, pF->pFunc, P4_FUNCDEF); - sqlite3VdbeChangeP5(v, (u8)nArg); + sqlite3VdbeChangeP5(v, (u16)nArg); sqlite3ReleaseTempRange(pParse, regAgg, nArg); } if( addrNext ){ @@ -157779,7 +157126,7 @@ SQLITE_PRIVATE void sqlite3CodeRowTriggerDirect( ** invocation is disallowed if (a) the sub-program is really a trigger, ** not a foreign key action, and (b) the flag to enable recursive triggers ** is clear. */ - sqlite3VdbeChangeP5(v, (u8)bRecursive); + sqlite3VdbeChangeP5(v, (u16)bRecursive); } } @@ -159824,15 +159171,6 @@ SQLITE_PRIVATE SQLITE_NOINLINE int sqlite3RunVacuum( sqlite3SetString(pzErrMsg, db,"cannot VACUUM - SQL statements in progress"); return SQLITE_ERROR; /* IMP: R-15610-35227 */ } - if( sqlite3IsHct(db->aDb[iDb].pBt) ){ - if( pOut==0 ){ - /* Silent noop */ - return SQLITE_OK; - } - sqlite3SetString(pzErrMsg, db, "cannot VACUUM - hctree database"); - return SQLITE_ERROR; - } - saved_openFlags = db->openFlags; if( pOut ){ if( sqlite3_value_type(pOut)!=SQLITE_TEXT ){ @@ -160948,7 +160286,6 @@ SQLITE_API int sqlite3_declare_vtab(sqlite3 *db, const char *zCreateTable){ Table *pNew = sParse.pNewTable; Index *pIdx; pTab->aCol = pNew->aCol; - assert( IsOrdinaryTable(pNew) ); sqlite3ExprListDelete(db, pNew->u.tab.pDfltList); pTab->nNVCol = pTab->nCol = pNew->nCol; pTab->tabFlags |= pNew->tabFlags & (TF_WithoutRowid|TF_NoVisibleRowid); @@ -162013,17 +161350,9 @@ SQLITE_PRIVATE int sqlite3WhereExplainBloomFilter( const WhereInfo *pWInfo, /* WHERE clause */ const WhereLevel *pLevel /* Bloom filter on this level */ ); -SQLITE_PRIVATE void sqlite3WhereAddExplainText( - Parse *pParse, /* Parse context */ - int addr, - SrcList *pTabList, /* Table list this loop refers to */ - WhereLevel *pLevel, /* Scan to write OP_Explain opcode for */ - u16 wctrlFlags /* Flags passed to sqlite3WhereBegin() */ -); #else # define sqlite3WhereExplainOneScan(u,v,w,x) 0 # define sqlite3WhereExplainBloomFilter(u,v,w) 0 -# define sqlite3WhereAddExplainText(u,v,w,x,y) #endif /* SQLITE_OMIT_EXPLAIN */ #ifdef SQLITE_ENABLE_STMT_SCANSTATUS SQLITE_PRIVATE void sqlite3WhereAddScanStatus( @@ -162225,38 +161554,38 @@ static void explainIndexRange(StrAccum *pStr, WhereLoop *pLoop){ } /* -** This function sets the P4 value of an existing OP_Explain opcode to -** text describing the loop in pLevel. If the OP_Explain opcode already has -** a P4 value, it is freed before it is overwritten. +** This function is a no-op unless currently processing an EXPLAIN QUERY PLAN +** command, or if stmt_scanstatus_v2() stats are enabled, or if SQLITE_DEBUG +** was defined at compile-time. If it is not a no-op, a single OP_Explain +** opcode is added to the output to describe the table scan strategy in pLevel. +** +** If an OP_Explain opcode is added to the VM, its address is returned. +** Otherwise, if no OP_Explain is coded, zero is returned. */ -SQLITE_PRIVATE void sqlite3WhereAddExplainText( +SQLITE_PRIVATE int sqlite3WhereExplainOneScan( Parse *pParse, /* Parse context */ - int addr, /* Address of OP_Explain opcode */ SrcList *pTabList, /* Table list this loop refers to */ WhereLevel *pLevel, /* Scan to write OP_Explain opcode for */ u16 wctrlFlags /* Flags passed to sqlite3WhereBegin() */ ){ + int ret = 0; #if !defined(SQLITE_DEBUG) if( sqlite3ParseToplevel(pParse)->explain==2 || IS_STMT_SCANSTATUS(pParse->db) ) #endif { - VdbeOp *pOp = sqlite3VdbeGetOp(pParse->pVdbe, addr); - SrcItem *pItem = &pTabList->a[pLevel->iFrom]; + Vdbe *v = pParse->pVdbe; /* VM being constructed */ sqlite3 *db = pParse->db; /* Database handle */ int isSearch; /* True for a SEARCH. False for SCAN. */ WhereLoop *pLoop; /* The controlling WhereLoop object */ u32 flags; /* Flags that describe this loop */ -#if defined(SQLITE_DEBUG) && !defined(SQLITE_OMIT_EXPLAIN) char *zMsg; /* Text to add to EQP output */ -#endif StrAccum str; /* EQP output string */ char zBuf[100]; /* Initial space for EQP output string */ - if( db->mallocFailed ) return; - pLoop = pLevel->pWLoop; flags = pLoop->wsFlags; + if( (flags&WHERE_MULTI_OR) || (wctrlFlags&WHERE_OR_SUBCLAUSE) ) return 0; isSearch = (flags&(WHERE_BTM_LIMIT|WHERE_TOP_LIMIT))!=0 || ((flags&WHERE_VIRTUALTABLE)==0 && (pLoop->u.btree.nEq>0)) @@ -162280,7 +161609,7 @@ SQLITE_PRIVATE void sqlite3WhereAddExplainText( zFmt = "AUTOMATIC PARTIAL COVERING INDEX"; }else if( flags & WHERE_AUTO_INDEX ){ zFmt = "AUTOMATIC COVERING INDEX"; - }else if( flags & (WHERE_IDX_ONLY|WHERE_EXPRIDX) ){ + }else if( flags & WHERE_IDX_ONLY ){ zFmt = "COVERING INDEX %s"; }else{ zFmt = "INDEX %s"; @@ -162332,50 +161661,11 @@ SQLITE_PRIVATE void sqlite3WhereAddExplainText( sqlite3_str_append(&str, " (~1 row)", 9); } #endif -#if defined(SQLITE_DEBUG) && !defined(SQLITE_OMIT_EXPLAIN) zMsg = sqlite3StrAccumFinish(&str); sqlite3ExplainBreakpoint("",zMsg); -#endif - - assert( pOp->opcode==OP_Explain ); - assert( pOp->p4type==P4_DYNAMIC || pOp->p4.z==0 ); - sqlite3DbFree(db, pOp->p4.z); - pOp->p4type = P4_DYNAMIC; - pOp->p4.z = sqlite3StrAccumFinish(&str); - } -} - - -/* -** This function is a no-op unless currently processing an EXPLAIN QUERY PLAN -** command, or if stmt_scanstatus_v2() stats are enabled, or if SQLITE_DEBUG -** was defined at compile-time. If it is not a no-op, a single OP_Explain -** opcode is added to the output to describe the table scan strategy in pLevel. -** -** If an OP_Explain opcode is added to the VM, its address is returned. -** Otherwise, if no OP_Explain is coded, zero is returned. -*/ -SQLITE_PRIVATE int sqlite3WhereExplainOneScan( - Parse *pParse, /* Parse context */ - SrcList *pTabList, /* Table list this loop refers to */ - WhereLevel *pLevel, /* Scan to write OP_Explain opcode for */ - u16 wctrlFlags /* Flags passed to sqlite3WhereBegin() */ -){ - int ret = 0; -#if !defined(SQLITE_DEBUG) - if( sqlite3ParseToplevel(pParse)->explain==2 || IS_STMT_SCANSTATUS(pParse->db) ) -#endif - { - if( (pLevel->pWLoop->wsFlags & WHERE_MULTI_OR)==0 - && (wctrlFlags & WHERE_OR_SUBCLAUSE)==0 - ){ - Vdbe *v = pParse->pVdbe; - int addr = sqlite3VdbeCurrentAddr(v); - ret = sqlite3VdbeAddOp3( - v, OP_Explain, addr, pParse->addrExplain, pLevel->pWLoop->rRun - ); - sqlite3WhereAddExplainText(pParse, addr, pTabList, pLevel, wctrlFlags); - } + ret = sqlite3VdbeAddOp4(v, OP_Explain, sqlite3VdbeCurrentAddr(v), + pParse->addrExplain, pLoop->rRun, + zMsg, P4_DYNAMIC); } return ret; } @@ -162474,10 +161764,9 @@ SQLITE_PRIVATE void sqlite3WhereAddScanStatus( } }else{ int addr; - VdbeOp *pOp; assert( pSrclist->a[pLvl->iFrom].fg.isSubquery ); addr = pSrclist->a[pLvl->iFrom].u4.pSubq->addrFillSub; - pOp = sqlite3VdbeGetOp(v, addr-1); + VdbeOp *pOp = sqlite3VdbeGetOp(v, addr-1); assert( sqlite3VdbeDb(v)->mallocFailed || pOp->opcode==OP_InitCoroutine ); assert( sqlite3VdbeDb(v)->mallocFailed || pOp->p2>addr ); sqlite3VdbeScanStatusRange(v, addrExplain, addr, pOp->p2-1); @@ -165277,25 +164566,20 @@ static int isLikeOrGlob( z = (u8*)pRight->u.zToken; } if( z ){ - /* Count the number of prefix bytes prior to the first wildcard. - ** or U+fffd character. If the underlying database has a UTF16LE - ** encoding, then only consider ASCII characters. Note that the - ** encoding of z[] is UTF8 - we are dealing with only UTF8 here in - ** this code, but the database engine itself might be processing - ** content using a different encoding. */ + + /* Count the number of prefix characters prior to the first wildcard. + ** If the underlying database has a UTF16LE encoding, then only consider + ** ASCII characters. Note that the encoding of z[] is UTF8 - we are + ** dealing with only UTF8 here in this code, but the database engine + ** itself might be processing content using a different encoding. */ cnt = 0; while( (c=z[cnt])!=0 && c!=wc[0] && c!=wc[1] && c!=wc[2] ){ cnt++; - if( c==wc[3] && z[cnt]>0 && z[cnt]<0x80 ){ + if( c==wc[3] && z[cnt]!=0 ){ cnt++; - }else if( c>=0x80 ){ - const u8 *z2 = z+cnt-1; - if( sqlite3Utf8Read(&z2)==0xfffd || ENC(db)==SQLITE_UTF16LE ){ - cnt--; - break; - }else{ - cnt = (int)(z2-z); - } + }else if( c>=0x80 && ENC(db)==SQLITE_UTF16LE ){ + cnt--; + break; } } @@ -165307,7 +164591,7 @@ static int isLikeOrGlob( ** range search. The third is because the caller assumes that the pattern ** consists of at least one character after all escapes have been ** removed. */ - if( (cnt>1 || (cnt>0 && z[0]!=wc[3])) && ALWAYS(255!=(u8)z[cnt-1]) ){ + if( (cnt>1 || (cnt>0 && z[0]!=wc[3])) && 255!=(u8)z[cnt-1] ){ Expr *pPrefix; /* A "complete" match if the pattern ends with "*" or "%" */ @@ -168599,11 +167883,9 @@ static void freeIndexInfo(sqlite3 *db, sqlite3_index_info *pIdxInfo){ ** that this is required. */ static int vtabBestIndex(Parse *pParse, Table *pTab, sqlite3_index_info *p){ + sqlite3_vtab *pVtab = sqlite3GetVTable(pParse->db, pTab)->pVtab; int rc; - sqlite3_vtab *pVtab; - assert( IsVirtual(pTab) ); - pVtab = sqlite3GetVTable(pParse->db, pTab)->pVtab; whereTraceIndexInfoInputs(p, pTab); pParse->db->nSchemaLock++; rc = pVtab->pModule->xBestIndex(pVtab, p); @@ -169546,7 +168828,7 @@ static void whereInfoFree(sqlite3 *db, WhereInfo *pWInfo){ ** and Y has additional constraints that might speed the search that X lacks ** but the cost of running X is not more than the cost of running Y. ** -** In other words, return true if the cost relationship between X and Y +** In other words, return true if the cost relationwship between X and Y ** is inverted and needs to be adjusted. ** ** Case 1: @@ -173164,7 +172446,6 @@ static SQLITE_NOINLINE Bitmask whereOmitNoopJoin( WhereTerm *pTerm, *pEnd; SrcItem *pItem; WhereLoop *pLoop; - Bitmask m1; pLoop = pWInfo->a[i].pWLoop; pItem = &pWInfo->pTabList->a[pLoop->iTab]; if( (pItem->fg.jointype & (JT_LEFT|JT_RIGHT))!=JT_LEFT ) continue; @@ -173191,10 +172472,7 @@ static SQLITE_NOINLINE Bitmask whereOmitNoopJoin( } } if( pTerm omit unused FROM-clause term %c\n",pLoop->cId)); - m1 = MASKBIT(i)-1; - testcase( ((pWInfo->revMask>>1) & ~m1)!=0 ); - pWInfo->revMask = (m1 & pWInfo->revMask) | ((pWInfo->revMask>>1) & ~m1); + WHERETRACE(0xffffffff, ("-> drop loop %c not used\n", pLoop->cId)); notReady &= ~pLoop->maskSelf; for(pTerm=pWInfo->sWC.a; pTermprereqAll & pLoop->maskSelf)!=0 ){ @@ -173265,6 +172543,58 @@ static SQLITE_NOINLINE void whereCheckIfBloomFilterIsUseful( } } +/* +** Expression Node callback for sqlite3ExprCanReturnSubtype(). +** +** Only a function call is able to return a subtype. So if the node +** is not a function call, return WRC_Prune immediately. +** +** A function call is able to return a subtype if it has the +** SQLITE_RESULT_SUBTYPE property. +** +** Assume that every function is able to pass-through a subtype from +** one of its argument (using sqlite3_result_value()). Most functions +** are not this way, but we don't have a mechanism to distinguish those +** that are from those that are not, so assume they all work this way. +** That means that if one of its arguments is another function and that +** other function is able to return a subtype, then this function is +** able to return a subtype. +*/ +static int exprNodeCanReturnSubtype(Walker *pWalker, Expr *pExpr){ + int n; + FuncDef *pDef; + sqlite3 *db; + if( pExpr->op!=TK_FUNCTION ){ + return WRC_Prune; + } + assert( ExprUseXList(pExpr) ); + db = pWalker->pParse->db; + n = pExpr->x.pList ? pExpr->x.pList->nExpr : 0; + pDef = sqlite3FindFunction(db, pExpr->u.zToken, n, ENC(db), 0); + if( pDef==0 || (pDef->funcFlags & SQLITE_RESULT_SUBTYPE)!=0 ){ + pWalker->eCode = 1; + return WRC_Prune; + } + return WRC_Continue; +} + +/* +** Return TRUE if expression pExpr is able to return a subtype. +** +** A TRUE return does not guarantee that a subtype will be returned. +** It only indicates that a subtype return is possible. False positives +** are acceptable as they only disable an optimization. False negatives, +** on the other hand, can lead to incorrect answers. +*/ +static int sqlite3ExprCanReturnSubtype(Parse *pParse, Expr *pExpr){ + Walker w; + memset(&w, 0, sizeof(w)); + w.pParse = pParse; + w.xExprCallback = exprNodeCanReturnSubtype; + sqlite3WalkExpr(&w, pExpr); + return w.eCode; +} + /* ** The index pIdx is used by a query and contains one or more expressions. ** In other words pIdx is an index on an expression. iIdxCur is the cursor @@ -173298,6 +172628,12 @@ static SQLITE_NOINLINE void whereAddIndexedExpr( continue; } if( sqlite3ExprIsConstant(0,pExpr) ) continue; + if( pExpr->op==TK_FUNCTION && sqlite3ExprCanReturnSubtype(pParse,pExpr) ){ + /* Functions that might set a subtype should not be replaced by the + ** value taken from an expression index since the index omits the + ** subtype. https://sqlite.org/forum/forumpost/68d284c86b082c3e */ + continue; + } p = sqlite3DbMallocRaw(pParse->db, sizeof(IndexedExpr)); if( p==0 ) break; p->pIENext = pParse->pIdxEpr; @@ -174404,28 +173740,14 @@ SQLITE_PRIVATE void sqlite3WhereEnd(WhereInfo *pWInfo){ pOp->p2 = x; pOp->p1 = pLevel->iIdxCur; OpcodeRewriteTrace(db, k, pOp); - }else if( pLoop->wsFlags & (WHERE_IDX_ONLY|WHERE_EXPRIDX) ){ + }else{ + /* Unable to translate the table reference into an index + ** reference. Verify that this is harmless - that the + ** table being referenced really is open. + */ if( pLoop->wsFlags & WHERE_IDX_ONLY ){ - /* An error. pLoop is supposed to be a covering index loop, - ** and yet the VM code refers to a column of the table that - ** is not part of the index. */ sqlite3ErrorMsg(pParse, "internal query planner error"); pParse->rc = SQLITE_INTERNAL; - }else{ - /* The WHERE_EXPRIDX flag is set by the planner when it is likely - ** that pLoop is a covering index loop, but it is not possible - ** to be 100% sure. In this case, any OP_Explain opcode - ** corresponding to this loop describes the index as a "COVERING - ** INDEX". But, pOp proves that pLoop is not actually a covering - ** index loop. So clear the WHERE_EXPRIDX flag and rewrite the - ** text that accompanies the OP_Explain opcode, if any. */ - pLoop->wsFlags &= ~WHERE_EXPRIDX; - sqlite3WhereAddExplainText(pParse, - pLevel->addrBody-1, - pTabList, - pLevel, - pWInfo->wctrlFlags - ); } } }else if( pOp->opcode==OP_Rowid ){ @@ -176133,7 +175455,6 @@ static void windowAggStep( int regArg; int nArg = pWin->bExprArgs ? 0 : windowArgCount(pWin); int i; - int addrIf = 0; assert( bInverse==0 || pWin->eStart!=TK_UNBOUNDED ); @@ -176150,18 +175471,6 @@ static void windowAggStep( } regArg = reg; - if( pWin->pFilter ){ - int regTmp; - assert( ExprUseXList(pWin->pOwner) ); - assert( pWin->bExprArgs || !nArg ||nArg==pWin->pOwner->x.pList->nExpr ); - assert( pWin->bExprArgs || nArg ||pWin->pOwner->x.pList==0 ); - regTmp = sqlite3GetTempReg(pParse); - sqlite3VdbeAddOp3(v, OP_Column, csr, pWin->iArgCol+nArg,regTmp); - addrIf = sqlite3VdbeAddOp3(v, OP_IfNot, regTmp, 0, 1); - VdbeCoverage(v); - sqlite3ReleaseTempReg(pParse, regTmp); - } - if( pMWin->regStartRowid==0 && (pFunc->funcFlags & SQLITE_FUNC_MINMAX) && (pWin->eStart!=TK_UNBOUNDED) @@ -176181,13 +175490,25 @@ static void windowAggStep( } sqlite3VdbeJumpHere(v, addrIsNull); }else if( pWin->regApp ){ - assert( pWin->pFilter==0 ); assert( pFunc->zName==nth_valueName || pFunc->zName==first_valueName ); assert( bInverse==0 || bInverse==1 ); sqlite3VdbeAddOp2(v, OP_AddImm, pWin->regApp+1-bInverse, 1); }else if( pFunc->xSFunc!=noopStepFunc ){ + int addrIf = 0; + if( pWin->pFilter ){ + int regTmp; + assert( ExprUseXList(pWin->pOwner) ); + assert( pWin->bExprArgs || !nArg ||nArg==pWin->pOwner->x.pList->nExpr ); + assert( pWin->bExprArgs || nArg ||pWin->pOwner->x.pList==0 ); + regTmp = sqlite3GetTempReg(pParse); + sqlite3VdbeAddOp3(v, OP_Column, csr, pWin->iArgCol+nArg,regTmp); + addrIf = sqlite3VdbeAddOp3(v, OP_IfNot, regTmp, 0, 1); + VdbeCoverage(v); + sqlite3ReleaseTempReg(pParse, regTmp); + } + if( pWin->bExprArgs ){ int iOp = sqlite3VdbeCurrentAddr(v); int iEnd; @@ -176214,13 +175535,12 @@ static void windowAggStep( sqlite3VdbeAddOp3(v, bInverse? OP_AggInverse : OP_AggStep, bInverse, regArg, pWin->regAccum); sqlite3VdbeAppendP4(v, pFunc, P4_FUNCDEF); - sqlite3VdbeChangeP5(v, (u8)nArg); + sqlite3VdbeChangeP5(v, (u16)nArg); if( pWin->bExprArgs ){ sqlite3ReleaseTempRange(pParse, regArg, nArg); } + if( addrIf ) sqlite3VdbeJumpHere(v, addrIf); } - - if( addrIf ) sqlite3VdbeJumpHere(v, addrIf); } } @@ -177880,122 +177200,122 @@ static void updateDeleteLimitError( #define TK_GE 59 #define TK_ESCAPE 60 #define TK_COLUMNKW 61 -#define TK_CONCURRENT 62 -#define TK_DO 63 -#define TK_FOR 64 -#define TK_IGNORE 65 -#define TK_INITIALLY 66 -#define TK_INSTEAD 67 -#define TK_NO 68 -#define TK_KEY 69 -#define TK_OF 70 -#define TK_OFFSET 71 -#define TK_PRAGMA 72 -#define TK_RAISE 73 -#define TK_RECURSIVE 74 -#define TK_REPLACE 75 -#define TK_RESTRICT 76 -#define TK_ROW 77 -#define TK_ROWS 78 -#define TK_TRIGGER 79 -#define TK_VACUUM 80 -#define TK_VIEW 81 -#define TK_VIRTUAL 82 -#define TK_WITH 83 -#define TK_NULLS 84 -#define TK_FIRST 85 -#define TK_LAST 86 -#define TK_CURRENT 87 -#define TK_FOLLOWING 88 -#define TK_PARTITION 89 -#define TK_PRECEDING 90 -#define TK_RANGE 91 -#define TK_UNBOUNDED 92 -#define TK_EXCLUDE 93 -#define TK_GROUPS 94 -#define TK_OTHERS 95 -#define TK_TIES 96 -#define TK_GENERATED 97 -#define TK_ALWAYS 98 -#define TK_MATERIALIZED 99 -#define TK_REINDEX 100 -#define TK_RENAME 101 -#define TK_CTIME_KW 102 -#define TK_ANY 103 -#define TK_BITAND 104 -#define TK_BITOR 105 -#define TK_LSHIFT 106 -#define TK_RSHIFT 107 -#define TK_PLUS 108 -#define TK_MINUS 109 -#define TK_STAR 110 -#define TK_SLASH 111 -#define TK_REM 112 -#define TK_CONCAT 113 -#define TK_PTR 114 -#define TK_COLLATE 115 -#define TK_BITNOT 116 -#define TK_ON 117 -#define TK_INDEXED 118 -#define TK_STRING 119 -#define TK_JOIN_KW 120 -#define TK_CONSTRAINT 121 -#define TK_DEFAULT 122 -#define TK_NULL 123 -#define TK_PRIMARY 124 -#define TK_UNIQUE 125 -#define TK_CHECK 126 -#define TK_REFERENCES 127 -#define TK_AUTOINCR 128 -#define TK_INSERT 129 -#define TK_DELETE 130 -#define TK_UPDATE 131 -#define TK_SET 132 -#define TK_DEFERRABLE 133 -#define TK_FOREIGN 134 -#define TK_DROP 135 -#define TK_UNION 136 -#define TK_ALL 137 -#define TK_EXCEPT 138 -#define TK_INTERSECT 139 -#define TK_SELECT 140 -#define TK_VALUES 141 -#define TK_DISTINCT 142 -#define TK_DOT 143 -#define TK_FROM 144 -#define TK_JOIN 145 -#define TK_USING 146 -#define TK_ORDER 147 -#define TK_GROUP 148 -#define TK_HAVING 149 -#define TK_LIMIT 150 -#define TK_WHERE 151 -#define TK_RETURNING 152 -#define TK_INTO 153 -#define TK_NOTHING 154 -#define TK_FLOAT 155 -#define TK_BLOB 156 -#define TK_INTEGER 157 -#define TK_VARIABLE 158 -#define TK_CASE 159 -#define TK_WHEN 160 -#define TK_THEN 161 -#define TK_ELSE 162 -#define TK_INDEX 163 -#define TK_ALTER 164 -#define TK_ADD 165 -#define TK_WINDOW 166 -#define TK_OVER 167 -#define TK_FILTER 168 -#define TK_COLUMN 169 -#define TK_AGG_FUNCTION 170 -#define TK_AGG_COLUMN 171 -#define TK_TRUEFALSE 172 -#define TK_FUNCTION 173 -#define TK_UPLUS 174 -#define TK_UMINUS 175 -#define TK_TRUTH 176 -#define TK_REGISTER 177 +#define TK_DO 62 +#define TK_FOR 63 +#define TK_IGNORE 64 +#define TK_INITIALLY 65 +#define TK_INSTEAD 66 +#define TK_NO 67 +#define TK_KEY 68 +#define TK_OF 69 +#define TK_OFFSET 70 +#define TK_PRAGMA 71 +#define TK_RAISE 72 +#define TK_RECURSIVE 73 +#define TK_REPLACE 74 +#define TK_RESTRICT 75 +#define TK_ROW 76 +#define TK_ROWS 77 +#define TK_TRIGGER 78 +#define TK_VACUUM 79 +#define TK_VIEW 80 +#define TK_VIRTUAL 81 +#define TK_WITH 82 +#define TK_NULLS 83 +#define TK_FIRST 84 +#define TK_LAST 85 +#define TK_CURRENT 86 +#define TK_FOLLOWING 87 +#define TK_PARTITION 88 +#define TK_PRECEDING 89 +#define TK_RANGE 90 +#define TK_UNBOUNDED 91 +#define TK_EXCLUDE 92 +#define TK_GROUPS 93 +#define TK_OTHERS 94 +#define TK_TIES 95 +#define TK_GENERATED 96 +#define TK_ALWAYS 97 +#define TK_MATERIALIZED 98 +#define TK_REINDEX 99 +#define TK_RENAME 100 +#define TK_CTIME_KW 101 +#define TK_ANY 102 +#define TK_BITAND 103 +#define TK_BITOR 104 +#define TK_LSHIFT 105 +#define TK_RSHIFT 106 +#define TK_PLUS 107 +#define TK_MINUS 108 +#define TK_STAR 109 +#define TK_SLASH 110 +#define TK_REM 111 +#define TK_CONCAT 112 +#define TK_PTR 113 +#define TK_COLLATE 114 +#define TK_BITNOT 115 +#define TK_ON 116 +#define TK_INDEXED 117 +#define TK_STRING 118 +#define TK_JOIN_KW 119 +#define TK_CONSTRAINT 120 +#define TK_DEFAULT 121 +#define TK_NULL 122 +#define TK_PRIMARY 123 +#define TK_UNIQUE 124 +#define TK_CHECK 125 +#define TK_REFERENCES 126 +#define TK_AUTOINCR 127 +#define TK_INSERT 128 +#define TK_DELETE 129 +#define TK_UPDATE 130 +#define TK_SET 131 +#define TK_DEFERRABLE 132 +#define TK_FOREIGN 133 +#define TK_DROP 134 +#define TK_UNION 135 +#define TK_ALL 136 +#define TK_EXCEPT 137 +#define TK_INTERSECT 138 +#define TK_SELECT 139 +#define TK_VALUES 140 +#define TK_DISTINCT 141 +#define TK_DOT 142 +#define TK_FROM 143 +#define TK_JOIN 144 +#define TK_USING 145 +#define TK_ORDER 146 +#define TK_GROUP 147 +#define TK_HAVING 148 +#define TK_LIMIT 149 +#define TK_WHERE 150 +#define TK_RETURNING 151 +#define TK_INTO 152 +#define TK_NOTHING 153 +#define TK_FLOAT 154 +#define TK_BLOB 155 +#define TK_INTEGER 156 +#define TK_VARIABLE 157 +#define TK_CASE 158 +#define TK_WHEN 159 +#define TK_THEN 160 +#define TK_ELSE 161 +#define TK_INDEX 162 +#define TK_ALTER 163 +#define TK_ADD 164 +#define TK_WINDOW 165 +#define TK_OVER 166 +#define TK_FILTER 167 +#define TK_COLUMN 168 +#define TK_AGG_FUNCTION 169 +#define TK_AGG_COLUMN 170 +#define TK_TRUEFALSE 171 +#define TK_FUNCTION 172 +#define TK_UPLUS 173 +#define TK_UMINUS 174 +#define TK_TRUTH 175 +#define TK_REGISTER 176 +#define TK_CONCURRENT 177 #define TK_VECTOR 178 #define TK_SELECT_COLUMN 179 #define TK_IF_NULL_ROW 180 @@ -178071,7 +177391,7 @@ static void updateDeleteLimitError( #define YYCODETYPE unsigned short int #define YYNOCODE 323 #define YYACTIONTYPE unsigned short int -#define YYWILDCARD 103 +#define YYWILDCARD 102 #define sqlite3ParserTOKENTYPE Token typedef union { int yyinit; @@ -178208,454 +177528,450 @@ typedef union { ** yy_default[] Default action for each state. ** *********** Begin parsing tables **********************************************/ -#define YY_ACTTAB_COUNT (2212) +#define YY_ACTTAB_COUNT (2176) static const YYACTIONTYPE yy_action[] = { - /* 0 */ 130, 127, 234, 130, 127, 234, 574, 574, 574, 580, - /* 10 */ 1294, 1259, 1, 1, 586, 2, 1263, 580, 502, 417, - /* 20 */ 585, 321, 1263, 155, 1546, 1297, 294, 321, 166, 155, - /* 30 */ 1345, 987, 51, 51, 1626, 987, 1345, 1337, 1337, 988, - /* 40 */ 82, 82, 1304, 988, 137, 138, 91, 534, 1232, 1232, + /* 0 */ 1332, 580, 1311, 580, 379, 580, 1285, 282, 282, 1626, + /* 10 */ 1332, 1259, 1, 1, 586, 2, 1263, 1304, 1283, 417, + /* 20 */ 577, 321, 566, 155, 81, 81, 51, 51, 51, 51, + /* 30 */ 1345, 987, 130, 127, 234, 1153, 1661, 1294, 1661, 988, + /* 40 */ 130, 127, 234, 436, 137, 138, 91, 534, 1232, 1232, /* 50 */ 1067, 1070, 1057, 1057, 135, 135, 136, 136, 136, 136, - /* 60 */ 214, 288, 288, 425, 362, 288, 288, 453, 453, 453, - /* 70 */ 441, 288, 288, 405, 577, 368, 566, 540, 577, 580, - /* 80 */ 566, 1208, 288, 288, 577, 285, 566, 973, 136, 136, - /* 90 */ 136, 136, 129, 245, 491, 577, 329, 566, 275, 245, - /* 100 */ 264, 231, 19, 19, 134, 134, 134, 134, 133, 133, - /* 110 */ 132, 132, 132, 131, 128, 455, 1296, 430, 1606, 586, - /* 120 */ 2, 1263, 460, 385, 417, 459, 321, 357, 155, 360, - /* 130 */ 1111, 459, 1586, 384, 1111, 1345, 134, 134, 134, 134, - /* 140 */ 133, 133, 132, 132, 132, 131, 128, 455, 518, 137, - /* 150 */ 138, 91, 524, 1232, 1232, 1067, 1070, 1057, 1057, 135, - /* 160 */ 135, 136, 136, 136, 136, 580, 438, 1208, 497, 182, - /* 170 */ 288, 288, 274, 291, 376, 521, 371, 520, 262, 130, - /* 180 */ 127, 234, 233, 577, 367, 566, 407, 1510, 51, 51, - /* 190 */ 1208, 1209, 1208, 1178, 298, 1178, 1285, 1572, 245, 133, - /* 200 */ 133, 132, 132, 132, 131, 128, 455, 973, 1283, 134, - /* 210 */ 134, 134, 134, 133, 133, 132, 132, 132, 131, 128, - /* 220 */ 455, 288, 288, 132, 132, 132, 131, 128, 455, 417, - /* 230 */ 459, 1023, 476, 350, 577, 112, 566, 157, 1228, 44, - /* 240 */ 134, 134, 134, 134, 133, 133, 132, 132, 132, 131, - /* 250 */ 128, 455, 483, 267, 137, 138, 91, 455, 1232, 1232, - /* 260 */ 1067, 1070, 1057, 1057, 135, 135, 136, 136, 136, 136, - /* 270 */ 1054, 1054, 1068, 1071, 1208, 1439, 1208, 1209, 1208, 257, - /* 280 */ 580, 139, 515, 512, 511, 348, 527, 527, 1588, 580, - /* 290 */ 383, 7, 510, 487, 1173, 257, 320, 571, 515, 512, - /* 300 */ 511, 417, 1581, 51, 51, 544, 7, 1173, 510, 1574, - /* 310 */ 1173, 381, 82, 82, 134, 134, 134, 134, 133, 133, - /* 320 */ 132, 132, 132, 131, 128, 455, 137, 138, 91, 1632, - /* 330 */ 1232, 1232, 1067, 1070, 1057, 1057, 135, 135, 136, 136, - /* 340 */ 136, 136, 417, 1173, 1173, 1058, 432, 94, 1228, 561, - /* 350 */ 948, 93, 320, 571, 551, 947, 1173, 1173, 535, 1173, - /* 360 */ 1173, 1153, 1661, 543, 1661, 303, 386, 137, 138, 91, - /* 370 */ 1343, 1232, 1232, 1067, 1070, 1057, 1057, 135, 135, 136, - /* 380 */ 136, 136, 136, 1208, 1209, 1208, 134, 134, 134, 134, - /* 390 */ 133, 133, 132, 132, 132, 131, 128, 455, 973, 421, - /* 400 */ 288, 288, 580, 1586, 548, 288, 288, 466, 136, 136, - /* 410 */ 136, 136, 542, 577, 417, 566, 1153, 1662, 577, 1662, - /* 420 */ 566, 1023, 130, 127, 234, 81, 81, 134, 134, 134, - /* 430 */ 134, 133, 133, 132, 132, 132, 131, 128, 455, 137, - /* 440 */ 138, 91, 1151, 1232, 1232, 1067, 1070, 1057, 1057, 135, - /* 450 */ 135, 136, 136, 136, 136, 580, 134, 134, 134, 134, - /* 460 */ 133, 133, 132, 132, 132, 131, 128, 455, 580, 282, - /* 470 */ 282, 1208, 580, 261, 1208, 348, 471, 334, 82, 82, - /* 480 */ 1602, 1281, 577, 496, 566, 530, 485, 493, 391, 579, - /* 490 */ 564, 82, 82, 233, 464, 82, 82, 1151, 379, 134, - /* 500 */ 134, 134, 134, 133, 133, 132, 132, 132, 131, 128, - /* 510 */ 455, 485, 464, 463, 214, 561, 288, 288, 973, 417, - /* 520 */ 288, 288, 396, 364, 560, 288, 288, 410, 316, 577, - /* 530 */ 1208, 566, 561, 577, 1315, 566, 45, 436, 577, 417, - /* 540 */ 566, 443, 422, 516, 137, 138, 91, 219, 1232, 1232, - /* 550 */ 1067, 1070, 1057, 1057, 135, 135, 136, 136, 136, 136, - /* 560 */ 1208, 382, 296, 417, 137, 138, 91, 890, 1232, 1232, - /* 570 */ 1067, 1070, 1057, 1057, 135, 135, 136, 136, 136, 136, - /* 580 */ 1208, 1209, 1208, 1208, 1209, 1208, 464, 299, 137, 138, - /* 590 */ 91, 485, 1232, 1232, 1067, 1070, 1057, 1057, 135, 135, - /* 600 */ 136, 136, 136, 136, 134, 134, 134, 134, 133, 133, - /* 610 */ 132, 132, 132, 131, 128, 455, 283, 427, 96, 1515, - /* 620 */ 1208, 580, 539, 1208, 134, 134, 134, 134, 133, 133, - /* 630 */ 132, 132, 132, 131, 128, 455, 197, 1515, 1517, 1208, - /* 640 */ 1209, 1208, 452, 451, 82, 82, 320, 571, 134, 134, - /* 650 */ 134, 134, 133, 133, 132, 132, 132, 131, 128, 455, - /* 660 */ 1208, 953, 289, 289, 229, 526, 975, 302, 417, 1208, - /* 670 */ 1209, 1208, 883, 198, 1285, 577, 1208, 566, 130, 127, - /* 680 */ 234, 450, 1335, 1335, 582, 46, 582, 331, 417, 1240, - /* 690 */ 227, 1240, 1191, 137, 138, 91, 1455, 1232, 1232, 1067, - /* 700 */ 1070, 1057, 1057, 135, 135, 136, 136, 136, 136, 44, - /* 710 */ 232, 1515, 417, 137, 138, 91, 1045, 1232, 1232, 1067, - /* 720 */ 1070, 1057, 1057, 135, 135, 136, 136, 136, 136, 1208, - /* 730 */ 1209, 1208, 1208, 1209, 1208, 377, 1599, 137, 138, 91, - /* 740 */ 390, 1232, 1232, 1067, 1070, 1057, 1057, 135, 135, 136, - /* 750 */ 136, 136, 136, 134, 134, 134, 134, 133, 133, 132, - /* 760 */ 132, 132, 131, 128, 455, 533, 320, 571, 580, 1208, - /* 770 */ 1209, 1208, 580, 134, 134, 134, 134, 133, 133, 132, - /* 780 */ 132, 132, 131, 128, 455, 1208, 1209, 1208, 22, 22, - /* 790 */ 1208, 145, 145, 417, 536, 19, 19, 134, 134, 134, - /* 800 */ 134, 133, 133, 132, 132, 132, 131, 128, 455, 222, - /* 810 */ 435, 580, 974, 131, 128, 455, 580, 417, 137, 138, - /* 820 */ 91, 1028, 1232, 1232, 1067, 1070, 1057, 1057, 135, 135, - /* 830 */ 136, 136, 136, 136, 147, 147, 491, 417, 1208, 66, - /* 840 */ 66, 1129, 137, 138, 91, 554, 1232, 1232, 1067, 1070, - /* 850 */ 1057, 1057, 135, 135, 136, 136, 136, 136, 578, 44, - /* 860 */ 940, 940, 137, 138, 91, 1556, 1232, 1232, 1067, 1070, - /* 870 */ 1057, 1057, 135, 135, 136, 136, 136, 136, 134, 134, - /* 880 */ 134, 134, 133, 133, 132, 132, 132, 131, 128, 455, - /* 890 */ 1558, 182, 108, 537, 1663, 403, 580, 886, 465, 1208, - /* 900 */ 1209, 1208, 134, 134, 134, 134, 133, 133, 132, 132, - /* 910 */ 132, 131, 128, 455, 1439, 1454, 320, 571, 6, 19, - /* 920 */ 19, 538, 134, 134, 134, 134, 133, 133, 132, 132, - /* 930 */ 132, 131, 128, 455, 437, 115, 347, 452, 451, 580, - /* 940 */ 1208, 580, 417, 1372, 315, 1572, 1237, 1208, 1209, 1208, - /* 950 */ 111, 1239, 562, 40, 377, 1599, 1453, 1208, 461, 1238, - /* 960 */ 555, 555, 82, 82, 82, 82, 1572, 137, 138, 91, - /* 970 */ 5, 1232, 1232, 1067, 1070, 1057, 1057, 135, 135, 136, - /* 980 */ 136, 136, 136, 340, 1240, 433, 1240, 288, 288, 1130, - /* 990 */ 1044, 1439, 209, 48, 580, 377, 1599, 475, 580, 317, - /* 1000 */ 577, 561, 566, 1173, 1131, 528, 886, 1033, 552, 552, - /* 1010 */ 563, 1032, 1349, 7, 50, 1572, 1173, 61, 61, 1173, - /* 1020 */ 1132, 82, 82, 392, 577, 388, 566, 134, 134, 134, - /* 1030 */ 134, 133, 133, 132, 132, 132, 131, 128, 455, 1554, - /* 1040 */ 288, 288, 926, 1032, 1032, 1034, 337, 580, 339, 1208, - /* 1050 */ 1209, 1208, 927, 577, 529, 566, 417, 1573, 449, 381, - /* 1060 */ 485, 215, 434, 47, 1208, 427, 1208, 1209, 1208, 427, - /* 1070 */ 67, 67, 1192, 1631, 580, 915, 417, 3, 1573, 502, - /* 1080 */ 381, 137, 138, 91, 119, 1232, 1232, 1067, 1070, 1057, - /* 1090 */ 1057, 135, 135, 136, 136, 136, 136, 82, 82, 580, - /* 1100 */ 491, 137, 138, 91, 1044, 1232, 1232, 1067, 1070, 1057, - /* 1110 */ 1057, 135, 135, 136, 136, 136, 136, 1439, 1332, 214, - /* 1120 */ 1311, 1033, 19, 19, 1545, 1032, 1314, 1573, 1332, 381, - /* 1130 */ 338, 227, 416, 324, 454, 212, 304, 306, 866, 213, - /* 1140 */ 125, 134, 134, 134, 134, 133, 133, 132, 132, 132, - /* 1150 */ 131, 128, 455, 580, 307, 580, 307, 1032, 1032, 1034, - /* 1160 */ 580, 134, 134, 134, 134, 133, 133, 132, 132, 132, - /* 1170 */ 131, 128, 455, 1208, 1209, 1208, 19, 19, 19, 19, - /* 1180 */ 477, 417, 536, 19, 19, 1439, 1192, 379, 498, 1228, - /* 1190 */ 1579, 442, 554, 206, 7, 1572, 1313, 523, 207, 474, - /* 1200 */ 305, 417, 10, 345, 267, 1109, 137, 126, 91, 502, - /* 1210 */ 1232, 1232, 1067, 1070, 1057, 1057, 135, 135, 136, 136, - /* 1220 */ 136, 136, 906, 580, 948, 550, 446, 138, 91, 947, - /* 1230 */ 1232, 1232, 1067, 1070, 1057, 1057, 135, 135, 136, 136, - /* 1240 */ 136, 136, 1089, 49, 549, 878, 19, 19, 402, 1253, - /* 1250 */ 507, 402, 1152, 1184, 1509, 519, 447, 109, 160, 580, - /* 1260 */ 1580, 556, 557, 907, 7, 1211, 134, 134, 134, 134, - /* 1270 */ 133, 133, 132, 132, 132, 131, 128, 455, 580, 261, - /* 1280 */ 1502, 537, 21, 21, 141, 502, 134, 134, 134, 134, - /* 1290 */ 133, 133, 132, 132, 132, 131, 128, 455, 580, 1228, - /* 1300 */ 1130, 53, 53, 580, 325, 1184, 417, 1573, 1575, 381, - /* 1310 */ 854, 855, 856, 375, 1578, 1131, 1577, 502, 7, 439, - /* 1320 */ 7, 68, 68, 374, 476, 350, 54, 54, 580, 1254, - /* 1330 */ 1344, 1132, 1254, 91, 6, 1232, 1232, 1067, 1070, 1057, - /* 1340 */ 1057, 135, 135, 136, 136, 136, 136, 456, 422, 580, - /* 1350 */ 557, 69, 69, 568, 580, 878, 580, 123, 572, 580, - /* 1360 */ 4, 580, 1340, 580, 367, 580, 1114, 1114, 499, 158, - /* 1370 */ 414, 413, 70, 70, 575, 1211, 328, 71, 71, 72, - /* 1380 */ 72, 479, 73, 73, 55, 55, 56, 56, 57, 57, - /* 1390 */ 532, 134, 134, 134, 134, 133, 133, 132, 132, 132, - /* 1400 */ 131, 128, 455, 580, 300, 502, 580, 456, 580, 1286, - /* 1410 */ 423, 569, 580, 492, 580, 573, 423, 123, 572, 580, - /* 1420 */ 4, 44, 260, 259, 258, 481, 59, 59, 580, 60, - /* 1430 */ 60, 74, 74, 111, 575, 75, 75, 76, 76, 580, - /* 1440 */ 1044, 467, 20, 20, 468, 467, 121, 121, 9, 580, - /* 1450 */ 355, 77, 77, 469, 122, 415, 456, 581, 456, 580, - /* 1460 */ 221, 1032, 143, 143, 415, 297, 478, 1227, 580, 297, - /* 1470 */ 1157, 569, 144, 144, 424, 184, 482, 415, 320, 571, - /* 1480 */ 287, 231, 78, 78, 238, 546, 580, 415, 456, 580, - /* 1490 */ 545, 62, 62, 1032, 1032, 1034, 1035, 35, 123, 572, - /* 1500 */ 1044, 4, 580, 97, 218, 580, 121, 121, 242, 79, - /* 1510 */ 79, 580, 63, 63, 122, 575, 456, 581, 456, 108, - /* 1520 */ 319, 1032, 1192, 580, 489, 80, 80, 580, 64, 64, - /* 1530 */ 1248, 415, 456, 1370, 170, 170, 897, 580, 120, 580, - /* 1540 */ 117, 580, 123, 572, 580, 4, 171, 171, 538, 580, - /* 1550 */ 87, 87, 569, 1032, 1032, 1034, 1035, 35, 38, 575, - /* 1560 */ 65, 65, 83, 83, 146, 146, 546, 84, 84, 580, - /* 1570 */ 23, 547, 168, 168, 161, 1384, 16, 1148, 1383, 404, - /* 1580 */ 580, 1044, 1192, 580, 470, 580, 222, 121, 121, 580, - /* 1590 */ 293, 39, 148, 148, 580, 122, 569, 456, 581, 456, - /* 1600 */ 905, 904, 1032, 142, 142, 1561, 169, 169, 162, 162, - /* 1610 */ 546, 428, 152, 152, 346, 545, 111, 151, 151, 1025, - /* 1620 */ 484, 266, 266, 490, 323, 1044, 580, 244, 580, 341, - /* 1630 */ 580, 121, 121, 893, 1032, 1032, 1034, 1035, 35, 122, - /* 1640 */ 1096, 456, 581, 456, 580, 1534, 1032, 456, 580, 149, - /* 1650 */ 149, 150, 150, 86, 86, 912, 913, 123, 572, 486, - /* 1660 */ 4, 266, 351, 1192, 111, 990, 991, 88, 88, 1036, - /* 1670 */ 1533, 85, 85, 205, 575, 580, 494, 290, 1032, 1032, - /* 1680 */ 1034, 1035, 35, 1605, 1196, 458, 580, 508, 292, 263, - /* 1690 */ 946, 352, 125, 400, 400, 399, 277, 397, 52, 52, - /* 1700 */ 863, 1380, 365, 165, 111, 111, 456, 1192, 356, 58, - /* 1710 */ 58, 569, 1092, 239, 263, 327, 123, 572, 978, 4, - /* 1720 */ 266, 359, 943, 326, 125, 1108, 456, 1108, 1107, 876, - /* 1730 */ 1107, 159, 361, 575, 363, 1328, 90, 572, 1312, 4, - /* 1740 */ 1044, 370, 945, 893, 125, 380, 121, 121, 952, 1593, - /* 1750 */ 1096, 1393, 1438, 575, 122, 241, 456, 581, 456, 1366, - /* 1760 */ 1378, 1032, 567, 175, 1443, 503, 43, 1293, 1363, 1284, - /* 1770 */ 569, 1272, 1271, 1273, 1613, 280, 167, 312, 313, 1036, - /* 1780 */ 314, 401, 224, 240, 333, 1425, 12, 237, 336, 295, - /* 1790 */ 569, 343, 344, 1032, 1032, 1034, 1035, 35, 1620, 1044, - /* 1800 */ 349, 1430, 1429, 301, 408, 121, 121, 513, 488, 1311, - /* 1810 */ 373, 1506, 228, 122, 1505, 456, 581, 456, 418, 1044, - /* 1820 */ 1032, 1375, 1192, 320, 571, 121, 121, 1616, 570, 1376, - /* 1830 */ 1374, 395, 1248, 122, 186, 456, 581, 456, 210, 559, - /* 1840 */ 1032, 270, 1553, 211, 223, 1373, 1551, 462, 1245, 235, - /* 1850 */ 195, 426, 1032, 1032, 1034, 1035, 35, 92, 95, 558, - /* 1860 */ 96, 1511, 220, 140, 1420, 557, 332, 180, 13, 1426, - /* 1870 */ 188, 1413, 1032, 1032, 1034, 1035, 35, 335, 1196, 458, - /* 1880 */ 472, 1192, 292, 473, 190, 191, 192, 400, 400, 399, - /* 1890 */ 277, 397, 193, 506, 863, 247, 109, 1432, 406, 480, - /* 1900 */ 456, 1192, 1431, 14, 1434, 409, 199, 239, 102, 327, - /* 1910 */ 123, 572, 251, 4, 1500, 501, 495, 326, 1522, 203, - /* 1920 */ 354, 522, 281, 253, 504, 358, 254, 575, 1274, 255, - /* 1930 */ 440, 411, 1331, 1322, 104, 1330, 1329, 897, 1321, 229, - /* 1940 */ 444, 531, 445, 310, 311, 268, 269, 1630, 1598, 241, - /* 1950 */ 1629, 1301, 412, 372, 1300, 1299, 1628, 175, 1584, 1398, - /* 1960 */ 43, 378, 1583, 448, 569, 11, 1487, 389, 1397, 318, - /* 1970 */ 110, 116, 541, 42, 583, 1202, 276, 240, 278, 1354, - /* 1980 */ 279, 387, 584, 1269, 1264, 185, 1353, 216, 393, 394, - /* 1990 */ 419, 420, 172, 1044, 1538, 850, 1539, 156, 308, 121, - /* 2000 */ 121, 1537, 1536, 173, 174, 457, 89, 122, 225, 456, - /* 2010 */ 581, 456, 418, 226, 1032, 217, 236, 320, 571, 322, - /* 2020 */ 154, 1106, 1104, 330, 187, 176, 929, 189, 1227, 243, - /* 2030 */ 246, 342, 1120, 194, 177, 178, 429, 431, 196, 98, - /* 2040 */ 99, 462, 100, 101, 1123, 179, 1032, 1032, 1034, 1035, - /* 2050 */ 35, 248, 292, 249, 1119, 163, 24, 400, 400, 399, - /* 2060 */ 277, 397, 250, 353, 863, 1112, 266, 200, 500, 1242, - /* 2070 */ 252, 201, 15, 374, 865, 1192, 505, 239, 256, 327, - /* 2080 */ 202, 509, 103, 25, 895, 366, 26, 326, 514, 369, - /* 2090 */ 105, 908, 517, 309, 164, 106, 181, 1189, 525, 230, - /* 2100 */ 27, 1073, 107, 1159, 17, 204, 1158, 284, 286, 976, - /* 2110 */ 1175, 125, 1179, 265, 982, 28, 1177, 8, 1182, 241, - /* 2120 */ 1183, 29, 30, 31, 32, 1164, 41, 175, 208, 553, - /* 2130 */ 43, 111, 33, 113, 114, 1087, 1074, 1072, 1076, 34, - /* 2140 */ 1077, 565, 1128, 118, 271, 36, 18, 240, 1037, 877, - /* 2150 */ 124, 939, 37, 272, 273, 398, 576, 183, 153, 1621, - /* 2160 */ 1198, 1197, 1260, 1260, 1260, 1260, 1260, 1260, 1260, 1260, - /* 2170 */ 1260, 1260, 1260, 1260, 1260, 1260, 1260, 1260, 1260, 1260, - /* 2180 */ 1260, 1260, 418, 1260, 1260, 1260, 1260, 320, 571, 1260, - /* 2190 */ 1260, 1260, 1260, 1260, 1260, 1260, 1260, 1260, 1260, 1260, - /* 2200 */ 1260, 1260, 1260, 1260, 1260, 1260, 1260, 1260, 1260, 1260, - /* 2210 */ 1260, 462, + /* 60 */ 1208, 1546, 580, 38, 1285, 288, 288, 1606, 586, 2, + /* 70 */ 1263, 285, 1208, 973, 582, 321, 582, 155, 577, 502, + /* 80 */ 566, 214, 288, 288, 1345, 82, 82, 391, 136, 136, + /* 90 */ 136, 136, 129, 245, 416, 577, 39, 566, 1337, 1337, + /* 100 */ 264, 231, 283, 134, 134, 134, 134, 133, 133, 132, + /* 110 */ 132, 132, 131, 128, 455, 1151, 307, 1581, 307, 288, + /* 120 */ 288, 7, 561, 417, 1545, 459, 1586, 384, 1586, 548, + /* 130 */ 1208, 535, 577, 1572, 566, 134, 134, 134, 134, 133, + /* 140 */ 133, 132, 132, 132, 131, 128, 455, 245, 137, 138, + /* 150 */ 91, 455, 1232, 1232, 1067, 1070, 1057, 1057, 135, 135, + /* 160 */ 136, 136, 136, 136, 130, 127, 234, 233, 1208, 1209, + /* 170 */ 1208, 257, 953, 1297, 515, 512, 511, 182, 441, 459, + /* 180 */ 1208, 1209, 1208, 368, 510, 132, 132, 132, 131, 128, + /* 190 */ 455, 1178, 973, 1178, 134, 134, 134, 134, 133, 133, + /* 200 */ 132, 132, 132, 131, 128, 455, 362, 134, 134, 134, + /* 210 */ 134, 133, 133, 132, 132, 132, 131, 128, 455, 133, + /* 220 */ 133, 132, 132, 132, 131, 128, 455, 417, 452, 451, + /* 230 */ 44, 289, 289, 112, 485, 1023, 261, 1237, 1208, 1209, + /* 240 */ 1208, 111, 1239, 44, 577, 1574, 566, 381, 580, 329, + /* 250 */ 1238, 502, 137, 138, 91, 518, 1232, 1232, 1067, 1070, + /* 260 */ 1057, 1057, 135, 135, 136, 136, 136, 136, 357, 465, + /* 270 */ 360, 19, 19, 438, 392, 1240, 388, 1240, 139, 274, + /* 280 */ 291, 376, 521, 371, 520, 262, 430, 320, 571, 348, + /* 290 */ 1296, 367, 1173, 1173, 527, 527, 1509, 1023, 417, 7, + /* 300 */ 320, 571, 487, 544, 422, 1173, 1173, 294, 1173, 1173, + /* 310 */ 296, 134, 134, 134, 134, 133, 133, 132, 132, 132, + /* 320 */ 131, 128, 455, 137, 138, 91, 1632, 1232, 1232, 1067, + /* 330 */ 1070, 1057, 1057, 135, 135, 136, 136, 136, 136, 417, + /* 340 */ 1510, 1455, 288, 288, 94, 257, 214, 93, 515, 512, + /* 350 */ 511, 348, 471, 334, 396, 577, 385, 566, 510, 410, + /* 360 */ 182, 543, 386, 502, 137, 138, 91, 417, 1232, 1232, + /* 370 */ 1067, 1070, 1057, 1057, 135, 135, 136, 136, 136, 136, + /* 380 */ 377, 1599, 134, 134, 134, 134, 133, 133, 132, 132, + /* 390 */ 132, 131, 128, 455, 91, 421, 1232, 1232, 1067, 1070, + /* 400 */ 1057, 1057, 135, 135, 136, 136, 136, 136, 425, 1602, + /* 410 */ 417, 1208, 130, 127, 234, 44, 579, 1208, 130, 127, + /* 420 */ 234, 476, 350, 134, 134, 134, 134, 133, 133, 132, + /* 430 */ 132, 132, 131, 128, 455, 137, 138, 91, 427, 1232, + /* 440 */ 1232, 1067, 1070, 1057, 1057, 135, 135, 136, 136, 136, + /* 450 */ 136, 134, 134, 134, 134, 133, 133, 132, 132, 132, + /* 460 */ 131, 128, 455, 580, 452, 451, 1439, 460, 1208, 580, + /* 470 */ 157, 1208, 320, 571, 562, 45, 554, 552, 552, 496, + /* 480 */ 528, 46, 7, 493, 197, 275, 82, 82, 1054, 1054, + /* 490 */ 1068, 1071, 61, 61, 134, 134, 134, 134, 133, 133, + /* 500 */ 132, 132, 132, 131, 128, 455, 468, 1208, 331, 288, + /* 510 */ 288, 1240, 580, 1240, 417, 288, 288, 415, 364, 1208, + /* 520 */ 1209, 1208, 577, 561, 566, 1208, 1209, 1208, 577, 529, + /* 530 */ 566, 382, 563, 580, 417, 51, 51, 432, 516, 137, + /* 540 */ 138, 91, 219, 1232, 1232, 1067, 1070, 1057, 1057, 135, + /* 550 */ 135, 136, 136, 136, 136, 379, 82, 82, 539, 137, + /* 560 */ 138, 91, 1058, 1232, 1232, 1067, 1070, 1057, 1057, 135, + /* 570 */ 135, 136, 136, 136, 136, 1173, 1208, 1209, 1208, 1208, + /* 580 */ 1209, 1208, 536, 108, 320, 571, 551, 580, 1173, 987, + /* 590 */ 580, 1173, 1575, 540, 446, 407, 1208, 988, 134, 134, + /* 600 */ 134, 134, 133, 133, 132, 132, 132, 131, 128, 455, + /* 610 */ 82, 82, 538, 82, 82, 1208, 1209, 1208, 134, 134, + /* 620 */ 134, 134, 133, 133, 132, 132, 132, 131, 128, 455, + /* 630 */ 288, 288, 550, 1153, 1662, 1588, 1662, 383, 417, 574, + /* 640 */ 574, 574, 890, 577, 542, 566, 578, 561, 940, 940, + /* 650 */ 561, 549, 131, 128, 455, 1208, 560, 238, 417, 443, + /* 660 */ 1184, 483, 883, 137, 138, 91, 303, 1232, 1232, 1067, + /* 670 */ 1070, 1057, 1057, 135, 135, 136, 136, 136, 136, 108, + /* 680 */ 537, 464, 111, 137, 138, 91, 533, 1232, 1232, 1067, + /* 690 */ 1070, 1057, 1057, 135, 135, 136, 136, 136, 136, 464, + /* 700 */ 463, 288, 288, 1248, 1208, 1209, 1208, 1439, 538, 22, + /* 710 */ 22, 427, 1184, 1151, 577, 1208, 566, 580, 232, 1343, + /* 720 */ 1572, 554, 134, 134, 134, 134, 133, 133, 132, 132, + /* 730 */ 132, 131, 128, 455, 580, 1281, 580, 229, 526, 96, + /* 740 */ 82, 82, 134, 134, 134, 134, 133, 133, 132, 132, + /* 750 */ 132, 131, 128, 455, 288, 288, 580, 19, 19, 19, + /* 760 */ 19, 6, 417, 1208, 1209, 1208, 1191, 577, 48, 566, + /* 770 */ 288, 288, 435, 464, 437, 320, 571, 316, 433, 145, + /* 780 */ 145, 212, 417, 577, 897, 566, 1045, 137, 138, 91, + /* 790 */ 975, 1232, 1232, 1067, 1070, 1057, 1057, 135, 135, 136, + /* 800 */ 136, 136, 136, 580, 390, 580, 523, 137, 138, 91, + /* 810 */ 6, 1232, 1232, 1067, 1070, 1057, 1057, 135, 135, 136, + /* 820 */ 136, 136, 136, 1208, 1209, 1208, 19, 19, 19, 19, + /* 830 */ 427, 469, 1573, 948, 381, 209, 555, 555, 947, 580, + /* 840 */ 475, 306, 415, 442, 530, 1556, 134, 134, 134, 134, + /* 850 */ 133, 133, 132, 132, 132, 131, 128, 455, 580, 1558, + /* 860 */ 580, 1579, 82, 82, 580, 7, 134, 134, 134, 134, + /* 870 */ 133, 133, 132, 132, 132, 131, 128, 455, 1208, 288, + /* 880 */ 288, 19, 19, 19, 19, 491, 417, 19, 19, 492, + /* 890 */ 1028, 198, 577, 1111, 566, 461, 206, 1111, 207, 317, + /* 900 */ 213, 1208, 556, 1631, 580, 915, 417, 136, 136, 136, + /* 910 */ 136, 137, 138, 91, 40, 1232, 1232, 1067, 1070, 1057, + /* 920 */ 1057, 135, 135, 136, 136, 136, 136, 147, 147, 1515, + /* 930 */ 497, 137, 138, 91, 1228, 1232, 1232, 1067, 1070, 1057, + /* 940 */ 1057, 135, 135, 136, 136, 136, 136, 1515, 1517, 267, + /* 950 */ 340, 1130, 141, 1439, 134, 134, 134, 134, 133, 133, + /* 960 */ 132, 132, 132, 131, 128, 455, 1131, 1109, 1572, 536, + /* 970 */ 134, 134, 134, 134, 133, 133, 132, 132, 132, 131, + /* 980 */ 128, 455, 1132, 477, 1208, 298, 1208, 1209, 1208, 1208, + /* 990 */ 134, 134, 134, 134, 133, 133, 132, 132, 132, 131, + /* 1000 */ 128, 455, 5, 926, 580, 485, 345, 1208, 1044, 1208, + /* 1010 */ 1209, 1208, 337, 927, 339, 478, 50, 580, 125, 417, + /* 1020 */ 3, 1515, 1349, 1129, 434, 1033, 415, 66, 66, 1032, + /* 1030 */ 453, 453, 453, 290, 577, 866, 566, 414, 413, 417, + /* 1040 */ 67, 67, 1572, 1228, 137, 138, 91, 115, 1232, 1232, + /* 1050 */ 1067, 1070, 1057, 1057, 135, 135, 136, 136, 136, 136, + /* 1060 */ 580, 1032, 1032, 1034, 137, 138, 91, 537, 1232, 1232, + /* 1070 */ 1067, 1070, 1057, 1057, 135, 135, 136, 136, 136, 136, + /* 1080 */ 1573, 299, 381, 82, 82, 476, 350, 1228, 1554, 485, + /* 1090 */ 47, 1192, 1208, 1209, 1208, 474, 338, 1208, 1209, 1208, + /* 1100 */ 10, 564, 267, 134, 134, 134, 134, 133, 133, 132, + /* 1110 */ 132, 132, 131, 128, 455, 1208, 1209, 1208, 1572, 974, + /* 1120 */ 449, 507, 580, 134, 134, 134, 134, 133, 133, 132, + /* 1130 */ 132, 132, 131, 128, 455, 580, 288, 288, 1089, 557, + /* 1140 */ 580, 1114, 1114, 499, 580, 21, 21, 580, 485, 577, + /* 1150 */ 261, 566, 417, 1620, 1573, 1439, 381, 215, 82, 82, + /* 1160 */ 260, 259, 258, 82, 82, 302, 49, 53, 53, 1211, + /* 1170 */ 68, 68, 417, 1335, 1335, 1454, 1502, 137, 138, 91, + /* 1180 */ 119, 1232, 1232, 1067, 1070, 1057, 1057, 135, 135, 136, + /* 1190 */ 136, 136, 136, 906, 324, 450, 1228, 137, 138, 91, + /* 1200 */ 454, 1232, 1232, 1067, 1070, 1057, 1057, 135, 135, 136, + /* 1210 */ 136, 136, 136, 1453, 377, 1599, 1663, 403, 422, 854, + /* 1220 */ 855, 856, 1286, 423, 304, 519, 498, 973, 1372, 315, + /* 1230 */ 1573, 580, 381, 907, 367, 502, 134, 134, 134, 134, + /* 1240 */ 133, 133, 132, 132, 132, 131, 128, 455, 580, 227, + /* 1250 */ 580, 491, 377, 1599, 54, 54, 134, 134, 134, 134, + /* 1260 */ 133, 133, 132, 132, 132, 131, 128, 455, 580, 1173, + /* 1270 */ 580, 69, 69, 70, 70, 580, 417, 214, 1211, 227, + /* 1280 */ 1344, 557, 1173, 325, 878, 1173, 573, 423, 1439, 439, + /* 1290 */ 405, 71, 71, 72, 72, 580, 417, 160, 73, 73, + /* 1300 */ 158, 137, 126, 91, 502, 1232, 1232, 1067, 1070, 1057, + /* 1310 */ 1057, 135, 135, 136, 136, 136, 136, 580, 55, 55, + /* 1320 */ 886, 233, 138, 91, 580, 1232, 1232, 1067, 1070, 1057, + /* 1330 */ 1057, 135, 135, 136, 136, 136, 136, 402, 1253, 491, + /* 1340 */ 56, 56, 1130, 424, 184, 456, 973, 57, 57, 1340, + /* 1350 */ 502, 347, 580, 466, 580, 123, 572, 1131, 4, 447, + /* 1360 */ 134, 134, 134, 134, 133, 133, 132, 132, 132, 131, + /* 1370 */ 128, 455, 575, 1132, 109, 59, 59, 60, 60, 580, + /* 1380 */ 134, 134, 134, 134, 133, 133, 132, 132, 132, 131, + /* 1390 */ 128, 455, 580, 878, 568, 355, 580, 222, 585, 580, + /* 1400 */ 1263, 580, 74, 74, 120, 321, 117, 155, 569, 402, + /* 1410 */ 1152, 161, 1044, 16, 1345, 75, 75, 1254, 44, 76, + /* 1420 */ 76, 482, 20, 20, 77, 77, 1580, 1578, 886, 1033, + /* 1430 */ 7, 7, 415, 1032, 1577, 489, 580, 1044, 7, 305, + /* 1440 */ 1227, 467, 948, 121, 121, 467, 580, 947, 297, 288, + /* 1450 */ 288, 122, 297, 456, 581, 456, 580, 319, 1032, 143, + /* 1460 */ 143, 456, 577, 166, 566, 1032, 1032, 1034, 415, 144, + /* 1470 */ 144, 123, 572, 242, 4, 320, 571, 245, 328, 78, + /* 1480 */ 78, 287, 231, 293, 481, 479, 456, 580, 575, 1254, + /* 1490 */ 1032, 1032, 1034, 1035, 35, 1192, 123, 572, 580, 4, + /* 1500 */ 97, 218, 9, 580, 1148, 580, 404, 580, 300, 459, + /* 1510 */ 62, 62, 580, 575, 23, 375, 580, 323, 580, 221, + /* 1520 */ 1192, 79, 79, 580, 569, 374, 63, 63, 80, 80, + /* 1530 */ 64, 64, 580, 490, 1384, 170, 170, 532, 546, 171, + /* 1540 */ 171, 87, 87, 545, 912, 913, 65, 65, 346, 569, + /* 1550 */ 111, 1383, 580, 1044, 428, 83, 83, 580, 222, 121, + /* 1560 */ 121, 470, 1025, 546, 266, 905, 904, 122, 547, 456, + /* 1570 */ 581, 456, 1561, 893, 1032, 146, 146, 456, 1044, 1096, + /* 1580 */ 84, 84, 990, 991, 121, 121, 524, 123, 572, 341, + /* 1590 */ 4, 244, 122, 1534, 456, 581, 456, 580, 484, 1032, + /* 1600 */ 266, 580, 456, 580, 575, 1036, 1032, 1032, 1034, 1035, + /* 1610 */ 35, 205, 123, 572, 580, 4, 1157, 1108, 580, 1108, + /* 1620 */ 168, 168, 1533, 494, 148, 148, 142, 142, 486, 575, + /* 1630 */ 266, 1032, 1032, 1034, 1035, 35, 1192, 169, 169, 292, + /* 1640 */ 569, 162, 162, 352, 400, 400, 399, 277, 397, 580, + /* 1650 */ 351, 863, 111, 1380, 546, 580, 508, 580, 263, 545, + /* 1660 */ 365, 1192, 111, 356, 239, 569, 327, 359, 580, 1044, + /* 1670 */ 361, 363, 152, 152, 326, 121, 121, 1328, 151, 151, + /* 1680 */ 149, 149, 893, 122, 1315, 456, 581, 456, 1096, 1314, + /* 1690 */ 1032, 150, 150, 1313, 1044, 580, 165, 1092, 111, 263, + /* 1700 */ 121, 121, 952, 1312, 978, 241, 266, 580, 122, 370, + /* 1710 */ 456, 581, 456, 175, 1036, 1032, 43, 380, 86, 86, + /* 1720 */ 1370, 1393, 1032, 1032, 1034, 1035, 35, 1605, 1196, 458, + /* 1730 */ 88, 88, 292, 240, 946, 1438, 125, 400, 400, 399, + /* 1740 */ 277, 397, 580, 943, 863, 125, 1366, 1032, 1032, 1034, + /* 1750 */ 1035, 35, 1192, 1593, 1107, 580, 1107, 239, 876, 327, + /* 1760 */ 159, 945, 1378, 125, 580, 85, 85, 326, 418, 567, + /* 1770 */ 503, 1443, 1293, 320, 571, 1284, 1272, 1192, 52, 52, + /* 1780 */ 1271, 1273, 1613, 280, 401, 167, 1363, 58, 58, 12, + /* 1790 */ 312, 313, 314, 1425, 224, 237, 295, 462, 241, 333, + /* 1800 */ 336, 343, 344, 301, 349, 488, 175, 1375, 456, 43, + /* 1810 */ 1430, 513, 1429, 228, 1376, 1311, 408, 210, 123, 572, + /* 1820 */ 373, 4, 1506, 1505, 1374, 1373, 240, 1616, 456, 570, + /* 1830 */ 211, 395, 1248, 270, 1553, 575, 1245, 223, 90, 572, + /* 1840 */ 1551, 4, 426, 186, 96, 1511, 220, 92, 235, 1426, + /* 1850 */ 95, 195, 140, 557, 332, 575, 13, 180, 1420, 188, + /* 1860 */ 1413, 418, 335, 472, 190, 191, 320, 571, 473, 192, + /* 1870 */ 193, 569, 506, 247, 109, 1434, 406, 199, 495, 1432, + /* 1880 */ 251, 102, 1431, 480, 409, 14, 501, 1522, 281, 354, + /* 1890 */ 462, 569, 1500, 203, 253, 522, 358, 254, 504, 1274, + /* 1900 */ 1044, 255, 1331, 411, 1330, 1329, 121, 121, 440, 104, + /* 1910 */ 1301, 412, 1322, 1630, 122, 1629, 456, 581, 456, 897, + /* 1920 */ 1044, 1032, 372, 1300, 1299, 1628, 121, 121, 1321, 229, + /* 1930 */ 1598, 444, 531, 310, 122, 445, 456, 581, 456, 311, + /* 1940 */ 559, 1032, 378, 268, 269, 448, 1398, 11, 1487, 389, + /* 1950 */ 116, 318, 110, 1032, 1032, 1034, 1035, 35, 1354, 387, + /* 1960 */ 558, 1353, 216, 1584, 1583, 541, 1397, 393, 42, 394, + /* 1970 */ 583, 1202, 276, 1032, 1032, 1034, 1035, 35, 1196, 458, + /* 1980 */ 278, 279, 292, 1192, 584, 1538, 172, 400, 400, 399, + /* 1990 */ 277, 397, 156, 1539, 863, 1269, 1264, 1537, 1536, 308, + /* 2000 */ 456, 225, 173, 1192, 226, 174, 850, 239, 457, 327, + /* 2010 */ 123, 572, 89, 4, 217, 322, 419, 326, 185, 420, + /* 2020 */ 154, 236, 1106, 1104, 330, 187, 176, 575, 1227, 189, + /* 2030 */ 929, 243, 342, 246, 1120, 194, 177, 178, 429, 98, + /* 2040 */ 99, 196, 100, 101, 179, 431, 1123, 248, 241, 1119, + /* 2050 */ 249, 163, 24, 250, 266, 353, 175, 1242, 1112, 43, + /* 2060 */ 500, 252, 200, 569, 201, 15, 374, 865, 505, 509, + /* 2070 */ 256, 895, 202, 103, 25, 26, 240, 366, 164, 514, + /* 2080 */ 369, 105, 309, 517, 1189, 908, 106, 525, 107, 1073, + /* 2090 */ 1159, 17, 1044, 1158, 27, 181, 230, 284, 121, 121, + /* 2100 */ 286, 204, 265, 976, 28, 125, 122, 982, 456, 581, + /* 2110 */ 456, 418, 29, 1032, 1175, 30, 320, 571, 31, 1179, + /* 2120 */ 8, 1177, 1182, 32, 1164, 41, 553, 33, 34, 208, + /* 2130 */ 111, 1087, 1074, 1072, 1076, 1128, 271, 113, 565, 114, + /* 2140 */ 462, 118, 1077, 36, 18, 1032, 1032, 1034, 1035, 35, + /* 2150 */ 1037, 877, 1183, 939, 124, 37, 398, 272, 153, 576, + /* 2160 */ 273, 183, 1621, 1198, 1197, 1260, 1260, 1260, 1260, 1260, + /* 2170 */ 1260, 1260, 1260, 1260, 1260, 1192, }; static const YYCODETYPE yy_lookahead[] = { - /* 0 */ 277, 278, 279, 277, 278, 279, 212, 213, 214, 195, - /* 10 */ 218, 187, 188, 189, 190, 191, 192, 195, 195, 19, - /* 20 */ 190, 197, 192, 199, 298, 218, 206, 197, 23, 199, - /* 30 */ 206, 31, 218, 219, 217, 31, 206, 237, 238, 39, - /* 40 */ 218, 219, 225, 39, 44, 45, 46, 206, 48, 49, + /* 0 */ 225, 195, 227, 195, 195, 195, 195, 241, 242, 217, + /* 10 */ 235, 187, 188, 189, 190, 191, 192, 225, 207, 19, + /* 20 */ 254, 197, 256, 199, 218, 219, 218, 219, 218, 219, + /* 30 */ 206, 31, 277, 278, 279, 22, 23, 218, 25, 39, + /* 40 */ 277, 278, 279, 234, 44, 45, 46, 206, 48, 49, /* 50 */ 50, 51, 52, 53, 54, 55, 56, 57, 58, 59, - /* 60 */ 195, 241, 242, 240, 16, 241, 242, 212, 213, 214, - /* 70 */ 19, 241, 242, 208, 254, 24, 256, 255, 254, 195, - /* 80 */ 256, 9, 241, 242, 254, 23, 256, 25, 56, 57, - /* 90 */ 58, 59, 60, 269, 195, 254, 195, 256, 26, 269, - /* 100 */ 259, 260, 218, 219, 104, 105, 106, 107, 108, 109, - /* 110 */ 110, 111, 112, 113, 114, 115, 218, 233, 189, 190, - /* 120 */ 191, 192, 299, 221, 19, 301, 197, 79, 199, 81, - /* 130 */ 29, 301, 318, 319, 33, 206, 104, 105, 106, 107, - /* 140 */ 108, 109, 110, 111, 112, 113, 114, 115, 97, 44, - /* 150 */ 45, 46, 147, 48, 49, 50, 51, 52, 53, 54, - /* 160 */ 55, 56, 57, 58, 59, 195, 115, 9, 67, 195, - /* 170 */ 241, 242, 121, 122, 123, 124, 125, 126, 127, 277, - /* 180 */ 278, 279, 120, 254, 133, 256, 206, 286, 218, 219, - /* 190 */ 118, 119, 120, 88, 295, 90, 195, 195, 269, 108, - /* 200 */ 109, 110, 111, 112, 113, 114, 115, 145, 207, 104, - /* 210 */ 105, 106, 107, 108, 109, 110, 111, 112, 113, 114, - /* 220 */ 115, 241, 242, 110, 111, 112, 113, 114, 115, 19, - /* 230 */ 301, 75, 130, 131, 254, 25, 256, 25, 9, 83, - /* 240 */ 104, 105, 106, 107, 108, 109, 110, 111, 112, 113, - /* 250 */ 114, 115, 272, 24, 44, 45, 46, 115, 48, 49, - /* 260 */ 50, 51, 52, 53, 54, 55, 56, 57, 58, 59, - /* 270 */ 48, 49, 50, 51, 9, 195, 118, 119, 120, 121, - /* 280 */ 195, 71, 124, 125, 126, 129, 312, 313, 318, 195, - /* 290 */ 320, 317, 134, 195, 78, 121, 140, 141, 124, 125, - /* 300 */ 126, 19, 313, 218, 219, 89, 317, 91, 134, 307, - /* 310 */ 94, 309, 218, 219, 104, 105, 106, 107, 108, 109, - /* 320 */ 110, 111, 112, 113, 114, 115, 44, 45, 46, 232, - /* 330 */ 48, 49, 50, 51, 52, 53, 54, 55, 56, 57, - /* 340 */ 58, 59, 19, 78, 78, 123, 266, 24, 119, 255, - /* 350 */ 137, 69, 140, 141, 89, 142, 91, 91, 264, 94, - /* 360 */ 94, 22, 23, 147, 25, 206, 221, 44, 45, 46, - /* 370 */ 206, 48, 49, 50, 51, 52, 53, 54, 55, 56, - /* 380 */ 57, 58, 59, 118, 119, 120, 104, 105, 106, 107, - /* 390 */ 108, 109, 110, 111, 112, 113, 114, 115, 25, 200, - /* 400 */ 241, 242, 195, 318, 319, 241, 242, 272, 56, 57, - /* 410 */ 58, 59, 147, 254, 19, 256, 22, 23, 254, 25, - /* 420 */ 256, 75, 277, 278, 279, 218, 219, 104, 105, 106, - /* 430 */ 107, 108, 109, 110, 111, 112, 113, 114, 115, 44, - /* 440 */ 45, 46, 103, 48, 49, 50, 51, 52, 53, 54, - /* 450 */ 55, 56, 57, 58, 59, 195, 104, 105, 106, 107, - /* 460 */ 108, 109, 110, 111, 112, 113, 114, 115, 195, 241, - /* 470 */ 242, 9, 195, 48, 9, 129, 130, 131, 218, 219, - /* 480 */ 195, 206, 254, 284, 256, 206, 195, 288, 281, 195, - /* 490 */ 206, 218, 219, 120, 195, 218, 219, 103, 195, 104, - /* 500 */ 105, 106, 107, 108, 109, 110, 111, 112, 113, 114, - /* 510 */ 115, 195, 213, 214, 195, 255, 241, 242, 145, 19, - /* 520 */ 241, 242, 203, 23, 264, 241, 242, 208, 255, 254, - /* 530 */ 9, 256, 255, 254, 228, 256, 74, 234, 254, 19, - /* 540 */ 256, 264, 117, 23, 44, 45, 46, 152, 48, 49, - /* 550 */ 50, 51, 52, 53, 54, 55, 56, 57, 58, 59, - /* 560 */ 9, 195, 271, 19, 44, 45, 46, 23, 48, 49, - /* 570 */ 50, 51, 52, 53, 54, 55, 56, 57, 58, 59, - /* 580 */ 118, 119, 120, 118, 119, 120, 287, 271, 44, 45, - /* 590 */ 46, 195, 48, 49, 50, 51, 52, 53, 54, 55, - /* 600 */ 56, 57, 58, 59, 104, 105, 106, 107, 108, 109, - /* 610 */ 110, 111, 112, 113, 114, 115, 215, 195, 153, 195, - /* 620 */ 9, 195, 195, 9, 104, 105, 106, 107, 108, 109, - /* 630 */ 110, 111, 112, 113, 114, 115, 22, 213, 214, 118, - /* 640 */ 119, 120, 108, 109, 218, 219, 140, 141, 104, 105, - /* 650 */ 106, 107, 108, 109, 110, 111, 112, 113, 114, 115, - /* 660 */ 9, 110, 241, 242, 167, 168, 145, 271, 19, 118, - /* 670 */ 119, 120, 23, 22, 195, 254, 9, 256, 277, 278, - /* 680 */ 279, 255, 237, 238, 205, 74, 207, 265, 19, 155, - /* 690 */ 25, 157, 23, 44, 45, 46, 276, 48, 49, 50, - /* 700 */ 51, 52, 53, 54, 55, 56, 57, 58, 59, 83, - /* 710 */ 195, 287, 19, 44, 45, 46, 23, 48, 49, 50, - /* 720 */ 51, 52, 53, 54, 55, 56, 57, 58, 59, 118, - /* 730 */ 119, 120, 118, 119, 120, 315, 316, 44, 45, 46, - /* 740 */ 195, 48, 49, 50, 51, 52, 53, 54, 55, 56, - /* 750 */ 57, 58, 59, 104, 105, 106, 107, 108, 109, 110, - /* 760 */ 111, 112, 113, 114, 115, 195, 140, 141, 195, 118, - /* 770 */ 119, 120, 195, 104, 105, 106, 107, 108, 109, 110, - /* 780 */ 111, 112, 113, 114, 115, 118, 119, 120, 218, 219, - /* 790 */ 9, 218, 219, 19, 19, 218, 219, 104, 105, 106, - /* 800 */ 107, 108, 109, 110, 111, 112, 113, 114, 115, 144, - /* 810 */ 233, 195, 145, 113, 114, 115, 195, 19, 44, 45, - /* 820 */ 46, 23, 48, 49, 50, 51, 52, 53, 54, 55, - /* 830 */ 56, 57, 58, 59, 218, 219, 195, 19, 9, 218, - /* 840 */ 219, 23, 44, 45, 46, 195, 48, 49, 50, 51, - /* 850 */ 52, 53, 54, 55, 56, 57, 58, 59, 136, 83, - /* 860 */ 138, 139, 44, 45, 46, 195, 48, 49, 50, 51, - /* 870 */ 52, 53, 54, 55, 56, 57, 58, 59, 104, 105, - /* 880 */ 106, 107, 108, 109, 110, 111, 112, 113, 114, 115, - /* 890 */ 195, 195, 117, 118, 304, 305, 195, 9, 122, 118, - /* 900 */ 119, 120, 104, 105, 106, 107, 108, 109, 110, 111, - /* 910 */ 112, 113, 114, 115, 195, 276, 140, 141, 215, 218, - /* 920 */ 219, 146, 104, 105, 106, 107, 108, 109, 110, 111, - /* 930 */ 112, 113, 114, 115, 233, 161, 295, 108, 109, 195, - /* 940 */ 9, 195, 19, 262, 263, 195, 117, 118, 119, 120, - /* 950 */ 25, 122, 206, 22, 315, 316, 276, 9, 195, 130, - /* 960 */ 310, 311, 218, 219, 218, 219, 195, 44, 45, 46, - /* 970 */ 22, 48, 49, 50, 51, 52, 53, 54, 55, 56, - /* 980 */ 57, 58, 59, 16, 155, 266, 157, 241, 242, 12, - /* 990 */ 102, 195, 289, 243, 195, 315, 316, 294, 195, 255, - /* 1000 */ 254, 255, 256, 78, 27, 206, 118, 119, 312, 313, - /* 1010 */ 264, 123, 242, 317, 243, 195, 91, 218, 219, 94, - /* 1020 */ 43, 218, 219, 251, 254, 253, 256, 104, 105, 106, - /* 1030 */ 107, 108, 109, 110, 111, 112, 113, 114, 115, 195, - /* 1040 */ 241, 242, 65, 155, 156, 157, 79, 195, 81, 118, - /* 1050 */ 119, 120, 75, 254, 255, 256, 19, 307, 255, 309, - /* 1060 */ 195, 24, 266, 243, 9, 195, 118, 119, 120, 195, - /* 1070 */ 218, 219, 184, 23, 195, 25, 19, 22, 307, 195, - /* 1080 */ 309, 44, 45, 46, 161, 48, 49, 50, 51, 52, - /* 1090 */ 53, 54, 55, 56, 57, 58, 59, 218, 219, 195, - /* 1100 */ 195, 44, 45, 46, 102, 48, 49, 50, 51, 52, - /* 1110 */ 53, 54, 55, 56, 57, 58, 59, 195, 225, 195, - /* 1120 */ 227, 119, 218, 219, 240, 123, 228, 307, 235, 309, - /* 1130 */ 163, 25, 208, 195, 255, 265, 271, 233, 21, 265, - /* 1140 */ 25, 104, 105, 106, 107, 108, 109, 110, 111, 112, - /* 1150 */ 113, 114, 115, 195, 230, 195, 232, 155, 156, 157, - /* 1160 */ 195, 104, 105, 106, 107, 108, 109, 110, 111, 112, - /* 1170 */ 113, 114, 115, 118, 119, 120, 218, 219, 218, 219, - /* 1180 */ 131, 19, 19, 218, 219, 195, 184, 195, 266, 9, - /* 1190 */ 313, 233, 195, 233, 317, 195, 228, 110, 233, 82, - /* 1200 */ 295, 19, 22, 154, 24, 11, 44, 45, 46, 195, - /* 1210 */ 48, 49, 50, 51, 52, 53, 54, 55, 56, 57, - /* 1220 */ 58, 59, 35, 195, 137, 68, 234, 45, 46, 142, - /* 1230 */ 48, 49, 50, 51, 52, 53, 54, 55, 56, 57, - /* 1240 */ 58, 59, 125, 243, 87, 9, 218, 219, 22, 23, - /* 1250 */ 19, 22, 23, 96, 240, 68, 266, 151, 22, 195, - /* 1260 */ 313, 233, 147, 76, 317, 9, 104, 105, 106, 107, - /* 1270 */ 108, 109, 110, 111, 112, 113, 114, 115, 195, 48, - /* 1280 */ 163, 118, 218, 219, 22, 195, 104, 105, 106, 107, - /* 1290 */ 108, 109, 110, 111, 112, 113, 114, 115, 195, 119, - /* 1300 */ 12, 218, 219, 195, 195, 148, 19, 307, 311, 309, - /* 1310 */ 7, 8, 9, 123, 313, 27, 313, 195, 317, 132, - /* 1320 */ 317, 218, 219, 133, 130, 131, 218, 219, 195, 103, - /* 1330 */ 240, 43, 103, 46, 215, 48, 49, 50, 51, 52, - /* 1340 */ 53, 54, 55, 56, 57, 58, 59, 9, 117, 195, - /* 1350 */ 147, 218, 219, 65, 195, 119, 195, 19, 20, 195, - /* 1360 */ 22, 195, 240, 195, 133, 195, 129, 130, 131, 166, - /* 1370 */ 108, 109, 218, 219, 36, 119, 195, 218, 219, 218, - /* 1380 */ 219, 131, 218, 219, 218, 219, 218, 219, 218, 219, - /* 1390 */ 19, 104, 105, 106, 107, 108, 109, 110, 111, 112, - /* 1400 */ 113, 114, 115, 195, 154, 195, 195, 9, 195, 210, - /* 1410 */ 211, 73, 195, 294, 195, 210, 211, 19, 20, 195, - /* 1420 */ 22, 83, 129, 130, 131, 117, 218, 219, 195, 218, - /* 1430 */ 219, 218, 219, 25, 36, 218, 219, 218, 219, 195, - /* 1440 */ 102, 263, 218, 219, 246, 267, 108, 109, 50, 195, - /* 1450 */ 240, 218, 219, 246, 116, 257, 118, 119, 120, 195, - /* 1460 */ 152, 123, 218, 219, 257, 263, 246, 25, 195, 267, - /* 1470 */ 99, 73, 218, 219, 302, 303, 246, 257, 140, 141, - /* 1480 */ 259, 260, 218, 219, 15, 87, 195, 257, 9, 195, - /* 1490 */ 92, 218, 219, 155, 156, 157, 158, 159, 19, 20, - /* 1500 */ 102, 22, 195, 151, 152, 195, 108, 109, 24, 218, - /* 1510 */ 219, 195, 218, 219, 116, 36, 118, 119, 120, 117, - /* 1520 */ 246, 123, 184, 195, 19, 218, 219, 195, 218, 219, - /* 1530 */ 61, 257, 9, 261, 218, 219, 128, 195, 160, 195, - /* 1540 */ 162, 195, 19, 20, 195, 22, 218, 219, 146, 195, - /* 1550 */ 218, 219, 73, 155, 156, 157, 158, 159, 22, 36, - /* 1560 */ 218, 219, 218, 219, 218, 219, 87, 218, 219, 195, - /* 1570 */ 22, 92, 218, 219, 22, 195, 24, 23, 195, 25, - /* 1580 */ 195, 102, 184, 195, 195, 195, 144, 108, 109, 195, - /* 1590 */ 101, 55, 218, 219, 195, 116, 73, 118, 119, 120, - /* 1600 */ 122, 123, 123, 218, 219, 195, 218, 219, 218, 219, - /* 1610 */ 87, 63, 218, 219, 23, 92, 25, 218, 219, 23, - /* 1620 */ 23, 25, 25, 118, 135, 102, 195, 143, 195, 195, - /* 1630 */ 195, 108, 109, 9, 155, 156, 157, 158, 159, 116, - /* 1640 */ 9, 118, 119, 120, 195, 195, 123, 9, 195, 218, - /* 1650 */ 219, 218, 219, 218, 219, 7, 8, 19, 20, 23, - /* 1660 */ 22, 25, 23, 184, 25, 85, 86, 218, 219, 9, - /* 1670 */ 195, 218, 219, 258, 36, 195, 195, 22, 155, 156, - /* 1680 */ 157, 158, 159, 0, 1, 2, 195, 23, 5, 25, - /* 1690 */ 23, 195, 25, 10, 11, 12, 13, 14, 218, 219, - /* 1700 */ 17, 195, 23, 23, 25, 25, 9, 184, 195, 218, - /* 1710 */ 219, 73, 23, 30, 25, 32, 19, 20, 23, 22, - /* 1720 */ 25, 195, 23, 40, 25, 155, 9, 157, 155, 23, - /* 1730 */ 157, 25, 195, 36, 195, 195, 19, 20, 195, 22, - /* 1740 */ 102, 195, 23, 119, 25, 195, 108, 109, 110, 322, - /* 1750 */ 119, 195, 195, 36, 116, 72, 118, 119, 120, 195, - /* 1760 */ 195, 123, 238, 80, 195, 291, 83, 195, 258, 195, - /* 1770 */ 73, 195, 195, 195, 195, 290, 244, 258, 258, 119, - /* 1780 */ 258, 193, 216, 100, 270, 274, 245, 300, 270, 247, - /* 1790 */ 73, 296, 248, 155, 156, 157, 158, 159, 143, 102, - /* 1800 */ 247, 274, 274, 248, 274, 108, 109, 222, 296, 227, - /* 1810 */ 221, 221, 231, 116, 221, 118, 119, 120, 135, 102, - /* 1820 */ 123, 262, 184, 140, 141, 108, 109, 198, 283, 262, - /* 1830 */ 262, 247, 61, 116, 300, 118, 119, 120, 251, 142, - /* 1840 */ 123, 143, 202, 251, 245, 262, 202, 164, 38, 300, - /* 1850 */ 22, 202, 155, 156, 157, 158, 159, 297, 297, 142, - /* 1860 */ 153, 286, 152, 150, 252, 147, 251, 44, 273, 275, - /* 1870 */ 236, 252, 155, 156, 157, 158, 159, 251, 1, 2, - /* 1880 */ 18, 184, 5, 202, 239, 239, 239, 10, 11, 12, - /* 1890 */ 13, 14, 239, 18, 17, 201, 151, 275, 248, 248, - /* 1900 */ 9, 184, 275, 273, 236, 248, 236, 30, 160, 32, - /* 1910 */ 19, 20, 201, 22, 248, 64, 202, 40, 293, 22, - /* 1920 */ 292, 117, 202, 201, 223, 202, 201, 36, 202, 201, - /* 1930 */ 66, 223, 220, 229, 22, 220, 220, 128, 229, 167, - /* 1940 */ 24, 308, 115, 285, 285, 202, 93, 226, 316, 72, - /* 1950 */ 226, 220, 223, 220, 222, 220, 220, 80, 321, 268, - /* 1960 */ 83, 223, 321, 84, 73, 22, 280, 202, 268, 282, - /* 1970 */ 149, 160, 148, 25, 204, 13, 196, 100, 196, 252, - /* 1980 */ 6, 251, 194, 194, 194, 303, 252, 250, 249, 248, - /* 1990 */ 306, 306, 209, 102, 215, 4, 215, 224, 224, 108, - /* 2000 */ 109, 215, 215, 209, 209, 3, 215, 116, 216, 118, - /* 2010 */ 119, 120, 135, 216, 123, 22, 15, 140, 141, 165, - /* 2020 */ 16, 23, 23, 141, 153, 132, 20, 144, 25, 24, - /* 2030 */ 146, 16, 1, 144, 132, 132, 63, 37, 153, 55, - /* 2040 */ 55, 164, 55, 55, 118, 132, 155, 156, 157, 158, - /* 2050 */ 159, 34, 5, 143, 1, 5, 22, 10, 11, 12, - /* 2060 */ 13, 14, 117, 163, 17, 70, 25, 70, 41, 77, - /* 2070 */ 143, 117, 24, 133, 20, 184, 19, 30, 127, 32, - /* 2080 */ 22, 69, 22, 22, 9, 23, 22, 40, 69, 24, - /* 2090 */ 22, 28, 98, 69, 23, 151, 37, 23, 22, 143, - /* 2100 */ 34, 23, 25, 23, 22, 22, 99, 23, 23, 145, - /* 2110 */ 90, 25, 77, 34, 118, 34, 88, 45, 95, 72, - /* 2120 */ 77, 34, 34, 34, 34, 23, 22, 80, 25, 24, - /* 2130 */ 83, 25, 34, 144, 144, 23, 23, 23, 23, 22, - /* 2140 */ 11, 25, 23, 25, 22, 22, 22, 100, 23, 23, - /* 2150 */ 22, 137, 22, 143, 143, 15, 25, 25, 23, 143, - /* 2160 */ 1, 1, 323, 323, 323, 323, 323, 323, 323, 323, - /* 2170 */ 323, 323, 323, 323, 323, 323, 323, 323, 323, 323, - /* 2180 */ 323, 323, 135, 323, 323, 323, 323, 140, 141, 323, + /* 60 */ 9, 298, 195, 22, 195, 241, 242, 189, 190, 191, + /* 70 */ 192, 23, 9, 25, 205, 197, 207, 199, 254, 195, + /* 80 */ 256, 195, 241, 242, 206, 218, 219, 281, 56, 57, + /* 90 */ 58, 59, 60, 269, 208, 254, 55, 256, 237, 238, + /* 100 */ 259, 260, 215, 103, 104, 105, 106, 107, 108, 109, + /* 110 */ 110, 111, 112, 113, 114, 102, 230, 313, 232, 241, + /* 120 */ 242, 317, 255, 19, 240, 301, 318, 319, 318, 319, + /* 130 */ 9, 264, 254, 195, 256, 103, 104, 105, 106, 107, + /* 140 */ 108, 109, 110, 111, 112, 113, 114, 269, 44, 45, + /* 150 */ 46, 114, 48, 49, 50, 51, 52, 53, 54, 55, + /* 160 */ 56, 57, 58, 59, 277, 278, 279, 119, 117, 118, + /* 170 */ 119, 120, 109, 218, 123, 124, 125, 195, 19, 301, + /* 180 */ 117, 118, 119, 24, 133, 109, 110, 111, 112, 113, + /* 190 */ 114, 87, 144, 89, 103, 104, 105, 106, 107, 108, + /* 200 */ 109, 110, 111, 112, 113, 114, 16, 103, 104, 105, + /* 210 */ 106, 107, 108, 109, 110, 111, 112, 113, 114, 107, + /* 220 */ 108, 109, 110, 111, 112, 113, 114, 19, 107, 108, + /* 230 */ 82, 241, 242, 25, 195, 74, 48, 116, 117, 118, + /* 240 */ 119, 25, 121, 82, 254, 307, 256, 309, 195, 195, + /* 250 */ 129, 195, 44, 45, 46, 96, 48, 49, 50, 51, + /* 260 */ 52, 53, 54, 55, 56, 57, 58, 59, 78, 121, + /* 270 */ 80, 218, 219, 114, 251, 154, 253, 156, 70, 120, + /* 280 */ 121, 122, 123, 124, 125, 126, 233, 139, 140, 128, + /* 290 */ 218, 132, 77, 77, 312, 313, 240, 74, 19, 317, + /* 300 */ 139, 140, 195, 88, 116, 90, 90, 206, 93, 93, + /* 310 */ 271, 103, 104, 105, 106, 107, 108, 109, 110, 111, + /* 320 */ 112, 113, 114, 44, 45, 46, 232, 48, 49, 50, + /* 330 */ 51, 52, 53, 54, 55, 56, 57, 58, 59, 19, + /* 340 */ 286, 276, 241, 242, 24, 120, 195, 68, 123, 124, + /* 350 */ 125, 128, 129, 130, 203, 254, 221, 256, 133, 208, + /* 360 */ 195, 146, 221, 195, 44, 45, 46, 19, 48, 49, + /* 370 */ 50, 51, 52, 53, 54, 55, 56, 57, 58, 59, + /* 380 */ 315, 316, 103, 104, 105, 106, 107, 108, 109, 110, + /* 390 */ 111, 112, 113, 114, 46, 200, 48, 49, 50, 51, + /* 400 */ 52, 53, 54, 55, 56, 57, 58, 59, 240, 195, + /* 410 */ 19, 9, 277, 278, 279, 82, 195, 9, 277, 278, + /* 420 */ 279, 129, 130, 103, 104, 105, 106, 107, 108, 109, + /* 430 */ 110, 111, 112, 113, 114, 44, 45, 46, 195, 48, + /* 440 */ 49, 50, 51, 52, 53, 54, 55, 56, 57, 58, + /* 450 */ 59, 103, 104, 105, 106, 107, 108, 109, 110, 111, + /* 460 */ 112, 113, 114, 195, 107, 108, 195, 299, 9, 195, + /* 470 */ 25, 9, 139, 140, 206, 73, 195, 312, 313, 284, + /* 480 */ 206, 73, 317, 288, 22, 26, 218, 219, 48, 49, + /* 490 */ 50, 51, 218, 219, 103, 104, 105, 106, 107, 108, + /* 500 */ 109, 110, 111, 112, 113, 114, 246, 9, 265, 241, + /* 510 */ 242, 154, 195, 156, 19, 241, 242, 257, 23, 117, + /* 520 */ 118, 119, 254, 255, 256, 117, 118, 119, 254, 255, + /* 530 */ 256, 195, 264, 195, 19, 218, 219, 266, 23, 44, + /* 540 */ 45, 46, 151, 48, 49, 50, 51, 52, 53, 54, + /* 550 */ 55, 56, 57, 58, 59, 195, 218, 219, 195, 44, + /* 560 */ 45, 46, 122, 48, 49, 50, 51, 52, 53, 54, + /* 570 */ 55, 56, 57, 58, 59, 77, 117, 118, 119, 117, + /* 580 */ 118, 119, 19, 116, 139, 140, 88, 195, 90, 31, + /* 590 */ 195, 93, 311, 255, 234, 206, 9, 39, 103, 104, + /* 600 */ 105, 106, 107, 108, 109, 110, 111, 112, 113, 114, + /* 610 */ 218, 219, 145, 218, 219, 117, 118, 119, 103, 104, + /* 620 */ 105, 106, 107, 108, 109, 110, 111, 112, 113, 114, + /* 630 */ 241, 242, 67, 22, 23, 318, 25, 320, 19, 212, + /* 640 */ 213, 214, 23, 254, 146, 256, 135, 255, 137, 138, + /* 650 */ 255, 86, 112, 113, 114, 9, 264, 15, 19, 264, + /* 660 */ 95, 272, 23, 44, 45, 46, 206, 48, 49, 50, + /* 670 */ 51, 52, 53, 54, 55, 56, 57, 58, 59, 116, + /* 680 */ 117, 195, 25, 44, 45, 46, 195, 48, 49, 50, + /* 690 */ 51, 52, 53, 54, 55, 56, 57, 58, 59, 213, + /* 700 */ 214, 241, 242, 61, 117, 118, 119, 195, 145, 218, + /* 710 */ 219, 195, 147, 102, 254, 9, 256, 195, 195, 206, + /* 720 */ 195, 195, 103, 104, 105, 106, 107, 108, 109, 110, + /* 730 */ 111, 112, 113, 114, 195, 206, 195, 166, 167, 152, + /* 740 */ 218, 219, 103, 104, 105, 106, 107, 108, 109, 110, + /* 750 */ 111, 112, 113, 114, 241, 242, 195, 218, 219, 218, + /* 760 */ 219, 215, 19, 117, 118, 119, 23, 254, 243, 256, + /* 770 */ 241, 242, 233, 287, 233, 139, 140, 255, 266, 218, + /* 780 */ 219, 265, 19, 254, 127, 256, 23, 44, 45, 46, + /* 790 */ 144, 48, 49, 50, 51, 52, 53, 54, 55, 56, + /* 800 */ 57, 58, 59, 195, 195, 195, 109, 44, 45, 46, + /* 810 */ 215, 48, 49, 50, 51, 52, 53, 54, 55, 56, + /* 820 */ 57, 58, 59, 117, 118, 119, 218, 219, 218, 219, + /* 830 */ 195, 246, 307, 136, 309, 289, 310, 311, 141, 195, + /* 840 */ 294, 233, 257, 233, 206, 195, 103, 104, 105, 106, + /* 850 */ 107, 108, 109, 110, 111, 112, 113, 114, 195, 195, + /* 860 */ 195, 313, 218, 219, 195, 317, 103, 104, 105, 106, + /* 870 */ 107, 108, 109, 110, 111, 112, 113, 114, 9, 241, + /* 880 */ 242, 218, 219, 218, 219, 195, 19, 218, 219, 294, + /* 890 */ 23, 22, 254, 29, 256, 195, 233, 33, 233, 255, + /* 900 */ 265, 9, 233, 23, 195, 25, 19, 56, 57, 58, + /* 910 */ 59, 44, 45, 46, 22, 48, 49, 50, 51, 52, + /* 920 */ 53, 54, 55, 56, 57, 58, 59, 218, 219, 195, + /* 930 */ 66, 44, 45, 46, 9, 48, 49, 50, 51, 52, + /* 940 */ 53, 54, 55, 56, 57, 58, 59, 213, 214, 24, + /* 950 */ 16, 12, 22, 195, 103, 104, 105, 106, 107, 108, + /* 960 */ 109, 110, 111, 112, 113, 114, 27, 11, 195, 19, + /* 970 */ 103, 104, 105, 106, 107, 108, 109, 110, 111, 112, + /* 980 */ 113, 114, 43, 130, 9, 295, 117, 118, 119, 9, + /* 990 */ 103, 104, 105, 106, 107, 108, 109, 110, 111, 112, + /* 1000 */ 113, 114, 22, 64, 195, 195, 153, 9, 101, 117, + /* 1010 */ 118, 119, 78, 74, 80, 246, 243, 195, 25, 19, + /* 1020 */ 22, 287, 242, 23, 266, 118, 257, 218, 219, 122, + /* 1030 */ 212, 213, 214, 22, 254, 21, 256, 107, 108, 19, + /* 1040 */ 218, 219, 195, 118, 44, 45, 46, 160, 48, 49, + /* 1050 */ 50, 51, 52, 53, 54, 55, 56, 57, 58, 59, + /* 1060 */ 195, 154, 155, 156, 44, 45, 46, 117, 48, 49, + /* 1070 */ 50, 51, 52, 53, 54, 55, 56, 57, 58, 59, + /* 1080 */ 307, 271, 309, 218, 219, 129, 130, 9, 195, 195, + /* 1090 */ 243, 184, 117, 118, 119, 81, 162, 117, 118, 119, + /* 1100 */ 22, 206, 24, 103, 104, 105, 106, 107, 108, 109, + /* 1110 */ 110, 111, 112, 113, 114, 117, 118, 119, 195, 144, + /* 1120 */ 255, 19, 195, 103, 104, 105, 106, 107, 108, 109, + /* 1130 */ 110, 111, 112, 113, 114, 195, 241, 242, 124, 146, + /* 1140 */ 195, 128, 129, 130, 195, 218, 219, 195, 195, 254, + /* 1150 */ 48, 256, 19, 142, 307, 195, 309, 24, 218, 219, + /* 1160 */ 128, 129, 130, 218, 219, 271, 243, 218, 219, 9, + /* 1170 */ 218, 219, 19, 237, 238, 276, 162, 44, 45, 46, + /* 1180 */ 160, 48, 49, 50, 51, 52, 53, 54, 55, 56, + /* 1190 */ 57, 58, 59, 35, 195, 255, 118, 44, 45, 46, + /* 1200 */ 255, 48, 49, 50, 51, 52, 53, 54, 55, 56, + /* 1210 */ 57, 58, 59, 276, 315, 316, 304, 305, 116, 7, + /* 1220 */ 8, 9, 210, 211, 271, 67, 266, 25, 262, 263, + /* 1230 */ 307, 195, 309, 75, 132, 195, 103, 104, 105, 106, + /* 1240 */ 107, 108, 109, 110, 111, 112, 113, 114, 195, 25, + /* 1250 */ 195, 195, 315, 316, 218, 219, 103, 104, 105, 106, + /* 1260 */ 107, 108, 109, 110, 111, 112, 113, 114, 195, 77, + /* 1270 */ 195, 218, 219, 218, 219, 195, 19, 195, 118, 25, + /* 1280 */ 240, 146, 90, 195, 9, 93, 210, 211, 195, 131, + /* 1290 */ 208, 218, 219, 218, 219, 195, 19, 22, 218, 219, + /* 1300 */ 165, 44, 45, 46, 195, 48, 49, 50, 51, 52, + /* 1310 */ 53, 54, 55, 56, 57, 58, 59, 195, 218, 219, + /* 1320 */ 9, 119, 45, 46, 195, 48, 49, 50, 51, 52, + /* 1330 */ 53, 54, 55, 56, 57, 58, 59, 22, 23, 195, + /* 1340 */ 218, 219, 12, 302, 303, 9, 144, 218, 219, 240, + /* 1350 */ 195, 295, 195, 272, 195, 19, 20, 27, 22, 266, + /* 1360 */ 103, 104, 105, 106, 107, 108, 109, 110, 111, 112, + /* 1370 */ 113, 114, 36, 43, 150, 218, 219, 218, 219, 195, + /* 1380 */ 103, 104, 105, 106, 107, 108, 109, 110, 111, 112, + /* 1390 */ 113, 114, 195, 118, 64, 240, 195, 143, 190, 195, + /* 1400 */ 192, 195, 218, 219, 159, 197, 161, 199, 72, 22, + /* 1410 */ 23, 22, 101, 24, 206, 218, 219, 102, 82, 218, + /* 1420 */ 219, 246, 218, 219, 218, 219, 313, 313, 117, 118, + /* 1430 */ 317, 317, 257, 122, 313, 19, 195, 101, 317, 295, + /* 1440 */ 25, 263, 136, 107, 108, 267, 195, 141, 263, 241, + /* 1450 */ 242, 115, 267, 117, 118, 119, 195, 246, 122, 218, + /* 1460 */ 219, 9, 254, 23, 256, 154, 155, 156, 257, 218, + /* 1470 */ 219, 19, 20, 24, 22, 139, 140, 269, 195, 218, + /* 1480 */ 219, 259, 260, 100, 116, 130, 9, 195, 36, 102, + /* 1490 */ 154, 155, 156, 157, 158, 184, 19, 20, 195, 22, + /* 1500 */ 150, 151, 50, 195, 23, 195, 25, 195, 153, 301, + /* 1510 */ 218, 219, 195, 36, 22, 122, 195, 134, 195, 151, + /* 1520 */ 184, 218, 219, 195, 72, 132, 218, 219, 218, 219, + /* 1530 */ 218, 219, 195, 117, 195, 218, 219, 19, 86, 218, + /* 1540 */ 219, 218, 219, 91, 7, 8, 218, 219, 23, 72, + /* 1550 */ 25, 195, 195, 101, 62, 218, 219, 195, 143, 107, + /* 1560 */ 108, 195, 23, 86, 25, 121, 122, 115, 91, 117, + /* 1570 */ 118, 119, 195, 9, 122, 218, 219, 9, 101, 9, + /* 1580 */ 218, 219, 84, 85, 107, 108, 146, 19, 20, 195, + /* 1590 */ 22, 142, 115, 195, 117, 118, 119, 195, 23, 122, + /* 1600 */ 25, 195, 9, 195, 36, 9, 154, 155, 156, 157, + /* 1610 */ 158, 258, 19, 20, 195, 22, 98, 154, 195, 156, + /* 1620 */ 218, 219, 195, 195, 218, 219, 218, 219, 23, 36, + /* 1630 */ 25, 154, 155, 156, 157, 158, 184, 218, 219, 5, + /* 1640 */ 72, 218, 219, 195, 10, 11, 12, 13, 14, 195, + /* 1650 */ 23, 17, 25, 195, 86, 195, 23, 195, 25, 91, + /* 1660 */ 23, 184, 25, 195, 30, 72, 32, 195, 195, 101, + /* 1670 */ 195, 195, 218, 219, 40, 107, 108, 195, 218, 219, + /* 1680 */ 218, 219, 118, 115, 228, 117, 118, 119, 118, 228, + /* 1690 */ 122, 218, 219, 228, 101, 195, 23, 23, 25, 25, + /* 1700 */ 107, 108, 109, 195, 23, 71, 25, 195, 115, 195, + /* 1710 */ 117, 118, 119, 79, 118, 122, 82, 195, 218, 219, + /* 1720 */ 261, 195, 154, 155, 156, 157, 158, 0, 1, 2, + /* 1730 */ 218, 219, 5, 99, 23, 195, 25, 10, 11, 12, + /* 1740 */ 13, 14, 195, 23, 17, 25, 195, 154, 155, 156, + /* 1750 */ 157, 158, 184, 322, 154, 195, 156, 30, 23, 32, + /* 1760 */ 25, 23, 195, 25, 195, 218, 219, 40, 134, 238, + /* 1770 */ 291, 195, 195, 139, 140, 195, 195, 184, 218, 219, + /* 1780 */ 195, 195, 195, 290, 193, 244, 258, 218, 219, 245, + /* 1790 */ 258, 258, 258, 274, 216, 300, 247, 163, 71, 270, + /* 1800 */ 270, 296, 248, 248, 247, 296, 79, 262, 9, 82, + /* 1810 */ 274, 222, 274, 231, 262, 227, 274, 251, 19, 20, + /* 1820 */ 221, 22, 221, 221, 262, 262, 99, 198, 9, 283, + /* 1830 */ 251, 247, 61, 142, 202, 36, 38, 245, 19, 20, + /* 1840 */ 202, 22, 202, 300, 152, 286, 151, 297, 300, 275, + /* 1850 */ 297, 22, 149, 146, 251, 36, 273, 44, 252, 236, + /* 1860 */ 252, 134, 251, 18, 239, 239, 139, 140, 202, 239, + /* 1870 */ 239, 72, 18, 201, 150, 236, 248, 236, 202, 275, + /* 1880 */ 201, 159, 275, 248, 248, 273, 63, 293, 202, 292, + /* 1890 */ 163, 72, 248, 22, 201, 116, 202, 201, 223, 202, + /* 1900 */ 101, 201, 220, 223, 220, 220, 107, 108, 65, 22, + /* 1910 */ 220, 223, 229, 226, 115, 226, 117, 118, 119, 127, + /* 1920 */ 101, 122, 220, 222, 220, 220, 107, 108, 229, 166, + /* 1930 */ 316, 24, 308, 285, 115, 114, 117, 118, 119, 285, + /* 1940 */ 141, 122, 223, 202, 92, 83, 268, 22, 280, 202, + /* 1950 */ 159, 282, 148, 154, 155, 156, 157, 158, 252, 251, + /* 1960 */ 141, 252, 250, 321, 321, 147, 268, 249, 25, 248, + /* 1970 */ 204, 13, 196, 154, 155, 156, 157, 158, 1, 2, + /* 1980 */ 196, 6, 5, 184, 194, 215, 209, 10, 11, 12, + /* 1990 */ 13, 14, 224, 215, 17, 194, 194, 215, 215, 224, + /* 2000 */ 9, 216, 209, 184, 216, 209, 4, 30, 3, 32, + /* 2010 */ 19, 20, 215, 22, 22, 164, 306, 40, 303, 306, + /* 2020 */ 16, 15, 23, 23, 140, 152, 131, 36, 25, 143, + /* 2030 */ 20, 24, 16, 145, 1, 143, 131, 131, 62, 55, + /* 2040 */ 55, 152, 55, 55, 131, 37, 117, 34, 71, 1, + /* 2050 */ 142, 5, 22, 116, 25, 162, 79, 76, 69, 82, + /* 2060 */ 41, 142, 69, 72, 116, 24, 132, 20, 19, 68, + /* 2070 */ 126, 9, 22, 22, 22, 22, 99, 23, 23, 68, + /* 2080 */ 24, 22, 68, 97, 23, 28, 150, 22, 25, 23, + /* 2090 */ 23, 22, 101, 98, 34, 37, 142, 23, 107, 108, + /* 2100 */ 23, 22, 34, 144, 34, 25, 115, 117, 117, 118, + /* 2110 */ 119, 134, 34, 122, 89, 34, 139, 140, 34, 76, + /* 2120 */ 45, 87, 94, 34, 23, 22, 24, 34, 22, 25, + /* 2130 */ 25, 23, 23, 23, 23, 23, 22, 143, 25, 143, + /* 2140 */ 163, 25, 11, 22, 22, 154, 155, 156, 157, 158, + /* 2150 */ 23, 23, 76, 136, 22, 22, 15, 142, 23, 25, + /* 2160 */ 142, 25, 142, 1, 1, 323, 323, 323, 323, 323, + /* 2170 */ 323, 323, 323, 323, 323, 184, 323, 323, 323, 323, + /* 2180 */ 323, 323, 323, 323, 323, 323, 323, 323, 323, 323, /* 2190 */ 323, 323, 323, 323, 323, 323, 323, 323, 323, 323, /* 2200 */ 323, 323, 323, 323, 323, 323, 323, 323, 323, 323, - /* 2210 */ 323, 164, 323, 323, 323, 323, 323, 323, 323, 323, + /* 2210 */ 323, 323, 323, 323, 323, 323, 323, 323, 323, 323, /* 2220 */ 323, 323, 323, 323, 323, 323, 323, 323, 323, 323, /* 2230 */ 323, 323, 323, 323, 323, 323, 323, 323, 323, 323, /* 2240 */ 323, 323, 323, 323, 323, 323, 323, 323, 323, 323, @@ -178668,123 +177984,120 @@ static const YYCODETYPE yy_lookahead[] = { /* 2310 */ 323, 323, 323, 323, 323, 323, 323, 323, 323, 323, /* 2320 */ 323, 323, 323, 323, 323, 323, 323, 323, 323, 323, /* 2330 */ 323, 323, 323, 323, 323, 323, 323, 323, 323, 323, - /* 2340 */ 323, 323, 323, 323, 323, 323, 323, 323, 323, 187, - /* 2350 */ 187, 187, 187, 187, 187, 187, 187, 187, 187, 187, - /* 2360 */ 187, 187, 187, 187, 187, 187, 187, 187, 187, 187, - /* 2370 */ 187, 187, 187, 187, 187, 187, 187, 187, 187, 187, - /* 2380 */ 187, 187, 187, 187, 187, 187, 187, 187, 187, 187, - /* 2390 */ 187, 187, 187, 187, 187, 187, 187, 187, 187, + /* 2340 */ 323, 323, 323, 323, 323, 323, 323, 323, 323, 323, + /* 2350 */ 323, 323, 187, 187, 187, 187, 187, 187, 187, 187, + /* 2360 */ 187, 187, 187, }; #define YY_SHIFT_COUNT (586) #define YY_SHIFT_MIN (0) -#define YY_SHIFT_MAX (2160) +#define YY_SHIFT_MAX (2163) static const unsigned short int yy_shift_ofst[] = { - /* 0 */ 1877, 1683, 2047, 1338, 1338, 626, 156, 1398, 1479, 1523, - /* 10 */ 1891, 1891, 1891, 776, 626, 626, 626, 626, 626, 0, - /* 20 */ 0, 282, 1057, 1891, 1891, 1891, 1891, 1891, 1891, 1891, - /* 30 */ 1891, 1891, 1891, 1891, 1891, 1891, 1891, 1891, 829, 829, - /* 40 */ 265, 265, 158, 462, 611, 781, 781, 212, 212, 212, - /* 50 */ 212, 105, 210, 323, 395, 500, 520, 544, 649, 669, - /* 60 */ 693, 798, 774, 818, 923, 1037, 1057, 1057, 1057, 1057, - /* 70 */ 1057, 1057, 1057, 1057, 1057, 1057, 1057, 1057, 1057, 1057, - /* 80 */ 1057, 1057, 1057, 1057, 1162, 1057, 1182, 1287, 1287, 1638, - /* 90 */ 1697, 1717, 1891, 1891, 1891, 1891, 1891, 1891, 1891, 1891, - /* 100 */ 1891, 1891, 1891, 1891, 1891, 1891, 1891, 1891, 1891, 1891, - /* 110 */ 1891, 1891, 1891, 1891, 1891, 1891, 1891, 1891, 1891, 1891, - /* 120 */ 1891, 1891, 1891, 1891, 1891, 1891, 1891, 1891, 1891, 1891, - /* 130 */ 1891, 1891, 1891, 1891, 1891, 1891, 1891, 1891, 1891, 1891, - /* 140 */ 1891, 1891, 32, 352, 352, 352, 352, 352, 352, 352, - /* 150 */ 136, 91, 113, 72, 781, 1117, 1231, 781, 781, 534, - /* 160 */ 534, 781, 700, 102, 497, 497, 497, 506, 142, 142, - /* 170 */ 2212, 2212, 51, 51, 51, 465, 614, 614, 614, 614, - /* 180 */ 977, 977, 216, 72, 339, 394, 781, 781, 781, 781, - /* 190 */ 781, 781, 781, 781, 781, 781, 781, 781, 781, 781, - /* 200 */ 781, 781, 781, 781, 781, 775, 925, 925, 781, 1194, - /* 210 */ 266, 266, 1163, 1163, 1256, 1256, 1203, 2212, 2212, 2212, - /* 220 */ 2212, 2212, 2212, 2212, 888, 1002, 1002, 651, 174, 931, - /* 230 */ 551, 948, 521, 667, 1055, 781, 781, 781, 781, 781, - /* 240 */ 781, 781, 781, 781, 781, 346, 781, 781, 781, 781, - /* 250 */ 781, 781, 781, 781, 781, 781, 781, 781, 1187, 1187, - /* 260 */ 1187, 781, 781, 781, 62, 781, 781, 781, 1180, 1157, - /* 270 */ 781, 1288, 781, 781, 781, 781, 781, 781, 781, 781, - /* 280 */ 1237, 101, 722, 229, 229, 229, 229, 373, 722, 722, - /* 290 */ 1087, 1262, 1303, 1469, 1308, 1352, 665, 1352, 1505, 1106, - /* 300 */ 1308, 1308, 1106, 1308, 665, 1505, 1408, 1050, 425, 4, - /* 310 */ 4, 4, 1402, 1402, 1402, 1402, 1115, 1115, 1378, 1442, - /* 320 */ 213, 1552, 1771, 1771, 1698, 1698, 1810, 1810, 1698, 1707, - /* 330 */ 1710, 1828, 1713, 1718, 1823, 1713, 1718, 1862, 1862, 1862, - /* 340 */ 1862, 1698, 1875, 1745, 1710, 1710, 1745, 1828, 1823, 1745, - /* 350 */ 1823, 1745, 1698, 1875, 1748, 1851, 1698, 1875, 1897, 1698, - /* 360 */ 1875, 1698, 1875, 1897, 1804, 1804, 1804, 1864, 1912, 1912, - /* 370 */ 1897, 1804, 1809, 1804, 1864, 1804, 1804, 1772, 1916, 1827, - /* 380 */ 1827, 1897, 1698, 1853, 1853, 1879, 1879, 1713, 1718, 1943, - /* 390 */ 1698, 1811, 1713, 1821, 1824, 1745, 1948, 1962, 1962, 1974, - /* 400 */ 1974, 1974, 2212, 2212, 2212, 2212, 2212, 2212, 2212, 2212, - /* 410 */ 2212, 2212, 2212, 2212, 2212, 2212, 2212, 222, 967, 1226, - /* 420 */ 1229, 48, 1293, 1236, 1554, 1489, 1536, 1484, 1049, 1250, - /* 430 */ 1591, 1548, 1596, 1597, 1636, 1639, 1664, 1679, 1624, 1478, - /* 440 */ 1648, 1190, 1680, 5, 1371, 1631, 1689, 1695, 1580, 1667, - /* 450 */ 1699, 1570, 1573, 1706, 1719, 1660, 1655, 1991, 2002, 1993, - /* 460 */ 1854, 2001, 2004, 1998, 1999, 1882, 1871, 1893, 2003, 2003, - /* 470 */ 2005, 1883, 2006, 1884, 2015, 2031, 1889, 1902, 2003, 1903, - /* 480 */ 1973, 2000, 2003, 1885, 1984, 1985, 1987, 1988, 1913, 1926, - /* 490 */ 2017, 1910, 2053, 2050, 2034, 1945, 1900, 1995, 2041, 1997, - /* 500 */ 1992, 2027, 1927, 1954, 2048, 2054, 2057, 1940, 1951, 2058, - /* 510 */ 2012, 2060, 2061, 2062, 2064, 2019, 2075, 2065, 1994, 2063, - /* 520 */ 2068, 2024, 2059, 2071, 2066, 1944, 2076, 2074, 2078, 2077, - /* 530 */ 2080, 2082, 2007, 1956, 2084, 2085, 1996, 2079, 2083, 1964, - /* 540 */ 2086, 2081, 2087, 2088, 2089, 2020, 2035, 2028, 2072, 2043, - /* 550 */ 2023, 2090, 2102, 2104, 2105, 2103, 2106, 2098, 1989, 1990, - /* 560 */ 2112, 2086, 2113, 2114, 2115, 2117, 2116, 2118, 2119, 2122, - /* 570 */ 2129, 2123, 2124, 2125, 2126, 2128, 2130, 2131, 2014, 2010, - /* 580 */ 2011, 2016, 2132, 2135, 2140, 2159, 2160, + /* 0 */ 1977, 1727, 1634, 1336, 1336, 333, 161, 1452, 1477, 1568, + /* 10 */ 1991, 1991, 1991, 148, 333, 333, 333, 333, 333, 0, + /* 20 */ 0, 279, 1153, 1991, 1991, 1991, 1991, 1991, 1991, 1991, + /* 30 */ 1991, 1991, 1991, 1991, 1991, 1991, 1991, 1991, 121, 121, + /* 40 */ 498, 498, 51, 402, 408, 706, 706, 445, 445, 445, + /* 50 */ 445, 104, 208, 320, 391, 495, 515, 619, 639, 743, + /* 60 */ 763, 867, 887, 1000, 1020, 1133, 1153, 1153, 1153, 1153, + /* 70 */ 1153, 1153, 1153, 1153, 1153, 1153, 1153, 1153, 1153, 1153, + /* 80 */ 1153, 1153, 1153, 1153, 1257, 1153, 1277, 348, 348, 1593, + /* 90 */ 1799, 1819, 1991, 1991, 1991, 1991, 1991, 1991, 1991, 1991, + /* 100 */ 1991, 1991, 1991, 1991, 1991, 1991, 1991, 1991, 1991, 1991, + /* 110 */ 1991, 1991, 1991, 1991, 1991, 1991, 1991, 1991, 1991, 1991, + /* 120 */ 1991, 1991, 1991, 1991, 1991, 1991, 1991, 1991, 1991, 1991, + /* 130 */ 1991, 1991, 1991, 1991, 1991, 1991, 1991, 1991, 1991, 1991, + /* 140 */ 1991, 1991, 32, 851, 851, 851, 851, 851, 851, 851, + /* 150 */ 91, 112, 76, 459, 706, 1014, 1102, 706, 706, 357, + /* 160 */ 357, 706, 540, 292, 571, 571, 571, 636, 37, 37, + /* 170 */ 2176, 2176, 159, 159, 159, 587, 462, 462, 462, 462, + /* 180 */ 939, 939, 215, 459, 13, 611, 706, 706, 706, 706, + /* 190 */ 706, 706, 706, 706, 706, 706, 706, 706, 706, 706, + /* 200 */ 706, 706, 706, 706, 706, 563, 216, 216, 706, 956, + /* 210 */ 1192, 1192, 950, 950, 1160, 1160, 1135, 2176, 2176, 2176, + /* 220 */ 2176, 2176, 2176, 2176, 1311, 907, 907, 869, 225, 892, + /* 230 */ 63, 980, 646, 975, 998, 706, 706, 706, 706, 706, + /* 240 */ 706, 706, 706, 706, 706, 223, 706, 706, 706, 706, + /* 250 */ 706, 706, 706, 706, 706, 706, 706, 706, 1158, 1158, + /* 260 */ 1158, 706, 706, 706, 48, 706, 706, 706, 1078, 565, + /* 270 */ 706, 1330, 706, 706, 706, 706, 706, 706, 706, 706, + /* 280 */ 1013, 864, 511, 925, 925, 925, 925, 1202, 511, 511, + /* 290 */ 697, 930, 1212, 642, 1368, 1350, 1254, 1350, 1416, 1224, + /* 300 */ 1368, 1368, 1224, 1368, 1254, 1416, 657, 880, 188, 558, + /* 310 */ 558, 558, 467, 467, 467, 467, 993, 993, 1245, 1415, + /* 320 */ 1306, 1389, 1771, 1771, 1691, 1691, 1798, 1798, 1691, 1692, + /* 330 */ 1695, 1829, 1703, 1707, 1813, 1703, 1707, 1845, 1845, 1845, + /* 340 */ 1845, 1691, 1854, 1724, 1695, 1695, 1724, 1829, 1813, 1724, + /* 350 */ 1813, 1724, 1691, 1854, 1722, 1823, 1691, 1854, 1871, 1691, + /* 360 */ 1854, 1691, 1854, 1871, 1779, 1779, 1779, 1843, 1887, 1887, + /* 370 */ 1871, 1779, 1792, 1779, 1843, 1779, 1779, 1763, 1907, 1821, + /* 380 */ 1821, 1871, 1691, 1852, 1852, 1862, 1862, 1703, 1707, 1925, + /* 390 */ 1691, 1791, 1703, 1804, 1818, 1724, 1943, 1958, 1958, 1975, + /* 400 */ 1975, 1975, 2176, 2176, 2176, 2176, 2176, 2176, 2176, 2176, + /* 410 */ 2176, 2176, 2176, 2176, 2176, 2176, 2176, 440, 934, 1315, + /* 420 */ 1387, 190, 1032, 1275, 1481, 1383, 41, 1449, 853, 1355, + /* 430 */ 1525, 1492, 1539, 1575, 1605, 1627, 1633, 1637, 1564, 1444, + /* 440 */ 1537, 1393, 1673, 1440, 1518, 1570, 1674, 1681, 1498, 1711, + /* 450 */ 1720, 1463, 1600, 1735, 1738, 1596, 1011, 2002, 2005, 1992, + /* 460 */ 1851, 2006, 2004, 1999, 2000, 1884, 1873, 1895, 2003, 2003, + /* 470 */ 2007, 1886, 2010, 1888, 2016, 2033, 1892, 1905, 2003, 1906, + /* 480 */ 1976, 2008, 2003, 1889, 1984, 1985, 1987, 1988, 1913, 1929, + /* 490 */ 2013, 1908, 2048, 2046, 2030, 1937, 1893, 1989, 2029, 1993, + /* 500 */ 1981, 2019, 1919, 1948, 2041, 2047, 2049, 1934, 1944, 2050, + /* 510 */ 2001, 2051, 2052, 2054, 2053, 2011, 2062, 2056, 1986, 2057, + /* 520 */ 2059, 2014, 2058, 2055, 2060, 1936, 2065, 2061, 2066, 2063, + /* 530 */ 2067, 2069, 1995, 1954, 2074, 2077, 1990, 2068, 2079, 1959, + /* 540 */ 2080, 2070, 2078, 2081, 2084, 2025, 2043, 2034, 2075, 2076, + /* 550 */ 2028, 2089, 2101, 2103, 2102, 2104, 2105, 2093, 1994, 1996, + /* 560 */ 2108, 2080, 2109, 2110, 2111, 2106, 2113, 2116, 2112, 2114, + /* 570 */ 2131, 2121, 2122, 2127, 2128, 2132, 2133, 2134, 2017, 2015, + /* 580 */ 2018, 2020, 2136, 2135, 2141, 2162, 2163, }; #define YY_REDUCE_COUNT (416) -#define YY_REDUCE_MIN (-277) -#define YY_REDUCE_MAX (1797) +#define YY_REDUCE_MIN (-245) +#define YY_REDUCE_MAX (1802) static const short yy_reduce_ofst[] = { - /* 0 */ -176, -71, -170, 746, 799, -159, -20, -186, -30, 85, - /* 10 */ 94, 260, 277, -180, 159, 164, 275, 279, 284, -98, - /* 20 */ 145, -274, 401, -116, 577, 701, 904, 958, -178, 960, - /* 30 */ 965, 273, 744, 1028, 803, 207, 426, 879, 299, 424, - /* 40 */ -26, 696, 924, 750, 771, 820, 1000, 228, 421, 228, - /* 50 */ 421, -277, -277, -277, -277, -277, -277, -277, -277, -277, - /* 60 */ -277, -277, -277, -277, -277, -277, -277, -277, -277, -277, - /* 70 */ -277, -277, -277, -277, -277, -277, -277, -277, -277, -277, - /* 80 */ -277, -277, -277, -277, -277, -277, -277, -277, -277, 570, - /* 90 */ 573, 616, 621, 852, 1064, 1083, 1103, 1108, 1133, 1154, - /* 100 */ 1159, 1161, 1164, 1166, 1168, 1170, 1208, 1211, 1213, 1217, - /* 110 */ 1219, 1224, 1233, 1244, 1254, 1264, 1273, 1291, 1294, 1307, - /* 120 */ 1310, 1316, 1328, 1332, 1342, 1344, 1346, 1349, 1354, 1374, - /* 130 */ 1385, 1388, 1390, 1394, 1399, 1431, 1433, 1435, 1449, 1453, - /* 140 */ 1480, 1491, -277, -277, -277, -277, -277, -277, -277, -277, - /* 150 */ -277, -277, -277, 479, -177, 199, 893, 2, 650, -206, - /* 160 */ -145, 319, -277, 703, 420, 639, 680, 770, -277, -277, - /* 170 */ -277, -277, -183, -183, -183, -99, 291, 316, 396, 865, - /* 180 */ -200, 445, -11, 1, 590, 590, -135, 422, 870, 874, - /* 190 */ 884, 1014, 1090, 1122, -101, 80, 641, 719, 796, 905, - /* 200 */ 922, 1210, 303, 992, 990, 681, 877, 947, 997, 1119, - /* 210 */ 1001, 1003, 1178, 1202, 1199, 1205, 772, 1172, 1198, 1207, - /* 220 */ 1220, 1230, 1221, 1274, -208, -193, -102, 98, 97, 285, - /* 230 */ 294, 366, 427, 515, 545, 670, 695, 763, 844, 938, - /* 240 */ 1109, 1181, 1380, 1383, 1389, 135, 1410, 1434, 1450, 1475, - /* 250 */ 1481, 1496, 1506, 1513, 1526, 1537, 1539, 1540, 306, 898, - /* 260 */ 968, 1543, 1546, 1550, 1272, 1556, 1557, 1564, 1415, 1427, - /* 270 */ 1565, 1524, 1569, 294, 1572, 1574, 1576, 1577, 1578, 1579, - /* 280 */ 1474, 1485, 1532, 1510, 1519, 1520, 1522, 1272, 1532, 1532, - /* 290 */ 1541, 1566, 1588, 1487, 1511, 1514, 1542, 1518, 1495, 1544, - /* 300 */ 1527, 1528, 1555, 1530, 1553, 1512, 1585, 1581, 1582, 1589, - /* 310 */ 1590, 1593, 1559, 1567, 1568, 1583, 1587, 1592, 1545, 1584, - /* 320 */ 1599, 1629, 1534, 1549, 1640, 1644, 1560, 1561, 1649, 1575, - /* 330 */ 1594, 1595, 1612, 1615, 1634, 1619, 1626, 1645, 1646, 1647, - /* 340 */ 1653, 1681, 1694, 1650, 1622, 1627, 1651, 1630, 1668, 1657, - /* 350 */ 1670, 1666, 1714, 1711, 1625, 1628, 1720, 1722, 1701, 1723, - /* 360 */ 1725, 1726, 1728, 1708, 1712, 1715, 1716, 1704, 1721, 1724, - /* 370 */ 1729, 1731, 1732, 1733, 1709, 1735, 1736, 1632, 1633, 1658, - /* 380 */ 1659, 1738, 1743, 1637, 1641, 1691, 1700, 1727, 1730, 1686, - /* 390 */ 1765, 1687, 1734, 1737, 1739, 1741, 1770, 1780, 1782, 1788, - /* 400 */ 1789, 1790, 1684, 1685, 1682, 1783, 1779, 1781, 1786, 1787, - /* 410 */ 1794, 1773, 1774, 1792, 1797, 1791, 1795, + /* 0 */ -176, -122, 1208, 268, 274, -159, 389, -192, 317, -190, + /* 10 */ -133, 392, 395, 101, 460, 513, 529, 638, 895, 135, + /* 20 */ 141, -237, -113, 53, 539, 541, 608, 610, 338, 663, + /* 30 */ 665, 522, 644, 669, 865, -194, 940, 945, 486, 734, + /* 40 */ -18, 165, -114, 525, 773, 847, 923, -234, -10, -234, + /* 50 */ -10, -245, -245, -245, -245, -245, -245, -245, -245, -245, + /* 60 */ -245, -245, -245, -245, -245, -245, -245, -245, -245, -245, + /* 70 */ -245, -245, -245, -245, -245, -245, -245, -245, -245, -245, + /* 80 */ -245, -245, -245, -245, -245, -245, -245, -245, -245, 491, + /* 90 */ 561, 709, 809, 822, 927, 949, 952, 1036, 1053, 1055, + /* 100 */ 1073, 1075, 1080, 1100, 1122, 1129, 1157, 1159, 1184, 1197, + /* 110 */ 1201, 1204, 1206, 1241, 1251, 1261, 1292, 1303, 1308, 1310, + /* 120 */ 1312, 1317, 1321, 1323, 1328, 1337, 1357, 1362, 1402, 1406, + /* 130 */ 1408, 1419, 1423, 1454, 1460, 1462, 1473, 1500, 1512, 1547, + /* 140 */ 1560, 1569, -245, -245, -245, -245, -245, -245, -245, -245, + /* 150 */ -245, -245, -245, -131, 168, 195, -225, -62, 526, 427, + /* 160 */ 818, 151, -245, 546, 65, 899, 937, 780, -245, -245, + /* 170 */ -245, -245, -208, -208, -208, 54, 39, 810, 894, 953, + /* 180 */ -139, 936, -196, -189, 912, 912, 1082, 243, 516, 635, + /* 190 */ -116, 56, 1040, 1109, 690, 271, 1056, 512, 758, 1144, + /* 200 */ 960, 1155, -191, 360, 1093, 966, 548, 1113, 281, 595, + /* 210 */ 1114, 1121, 1178, 1185, 1012, 1076, 23, 1041, 260, 585, + /* 220 */ 769, 1175, 1222, 1211, -181, -45, 72, 107, 94, 214, + /* 230 */ 221, 336, 363, 523, 609, 650, 664, 700, 893, 999, + /* 240 */ 1088, 1283, 1339, 1356, 1366, 1081, 1377, 1394, 1398, 1427, + /* 250 */ 1428, 1448, 1458, 1468, 1472, 1475, 1476, 1482, 1456, 1461, + /* 260 */ 1465, 1508, 1514, 1522, 1459, 1526, 1540, 1551, 1353, 1431, + /* 270 */ 1567, 1531, 1576, 221, 1577, 1580, 1581, 1585, 1586, 1587, + /* 280 */ 1479, 1493, 1541, 1528, 1532, 1533, 1534, 1459, 1541, 1541, + /* 290 */ 1544, 1578, 1591, 1495, 1519, 1529, 1549, 1530, 1505, 1554, + /* 300 */ 1536, 1538, 1555, 1542, 1557, 1509, 1589, 1582, 1588, 1599, + /* 310 */ 1601, 1602, 1545, 1552, 1562, 1563, 1566, 1579, 1546, 1584, + /* 320 */ 1592, 1629, 1543, 1548, 1632, 1638, 1550, 1553, 1640, 1559, + /* 330 */ 1574, 1583, 1606, 1603, 1623, 1608, 1611, 1625, 1626, 1630, + /* 340 */ 1631, 1666, 1672, 1628, 1604, 1607, 1635, 1612, 1639, 1636, + /* 350 */ 1641, 1644, 1676, 1679, 1594, 1597, 1686, 1693, 1675, 1694, + /* 360 */ 1696, 1697, 1700, 1680, 1682, 1684, 1685, 1683, 1687, 1689, + /* 370 */ 1688, 1690, 1701, 1702, 1699, 1704, 1705, 1614, 1624, 1648, + /* 380 */ 1654, 1719, 1741, 1642, 1643, 1678, 1698, 1706, 1708, 1668, + /* 390 */ 1747, 1669, 1709, 1712, 1718, 1721, 1766, 1776, 1784, 1790, + /* 400 */ 1801, 1802, 1710, 1713, 1715, 1777, 1770, 1778, 1782, 1783, + /* 410 */ 1793, 1768, 1775, 1785, 1788, 1797, 1796, }; static const YYACTIONTYPE yy_default[] = { /* 0 */ 1667, 1667, 1667, 1495, 1258, 1371, 1258, 1258, 1258, 1258, @@ -178927,7 +178240,6 @@ static const YYCODETYPE yyFallback[] = { 0, /* GE => nothing */ 0, /* ESCAPE => nothing */ 9, /* COLUMNKW => ID */ - 9, /* CONCURRENT => ID */ 9, /* DO => ID */ 9, /* FOR => ID */ 9, /* IGNORE => ID */ @@ -179043,6 +178355,7 @@ static const YYCODETYPE yyFallback[] = { 0, /* UMINUS => nothing */ 0, /* TRUTH => nothing */ 0, /* REGISTER => nothing */ + 0, /* CONCURRENT => nothing */ 0, /* VECTOR => nothing */ 0, /* SELECT_COLUMN => nothing */ 0, /* IF_NULL_ROW => nothing */ @@ -179197,122 +178510,122 @@ static const char *const yyTokenName[] = { /* 59 */ "GE", /* 60 */ "ESCAPE", /* 61 */ "COLUMNKW", - /* 62 */ "CONCURRENT", - /* 63 */ "DO", - /* 64 */ "FOR", - /* 65 */ "IGNORE", - /* 66 */ "INITIALLY", - /* 67 */ "INSTEAD", - /* 68 */ "NO", - /* 69 */ "KEY", - /* 70 */ "OF", - /* 71 */ "OFFSET", - /* 72 */ "PRAGMA", - /* 73 */ "RAISE", - /* 74 */ "RECURSIVE", - /* 75 */ "REPLACE", - /* 76 */ "RESTRICT", - /* 77 */ "ROW", - /* 78 */ "ROWS", - /* 79 */ "TRIGGER", - /* 80 */ "VACUUM", - /* 81 */ "VIEW", - /* 82 */ "VIRTUAL", - /* 83 */ "WITH", - /* 84 */ "NULLS", - /* 85 */ "FIRST", - /* 86 */ "LAST", - /* 87 */ "CURRENT", - /* 88 */ "FOLLOWING", - /* 89 */ "PARTITION", - /* 90 */ "PRECEDING", - /* 91 */ "RANGE", - /* 92 */ "UNBOUNDED", - /* 93 */ "EXCLUDE", - /* 94 */ "GROUPS", - /* 95 */ "OTHERS", - /* 96 */ "TIES", - /* 97 */ "GENERATED", - /* 98 */ "ALWAYS", - /* 99 */ "MATERIALIZED", - /* 100 */ "REINDEX", - /* 101 */ "RENAME", - /* 102 */ "CTIME_KW", - /* 103 */ "ANY", - /* 104 */ "BITAND", - /* 105 */ "BITOR", - /* 106 */ "LSHIFT", - /* 107 */ "RSHIFT", - /* 108 */ "PLUS", - /* 109 */ "MINUS", - /* 110 */ "STAR", - /* 111 */ "SLASH", - /* 112 */ "REM", - /* 113 */ "CONCAT", - /* 114 */ "PTR", - /* 115 */ "COLLATE", - /* 116 */ "BITNOT", - /* 117 */ "ON", - /* 118 */ "INDEXED", - /* 119 */ "STRING", - /* 120 */ "JOIN_KW", - /* 121 */ "CONSTRAINT", - /* 122 */ "DEFAULT", - /* 123 */ "NULL", - /* 124 */ "PRIMARY", - /* 125 */ "UNIQUE", - /* 126 */ "CHECK", - /* 127 */ "REFERENCES", - /* 128 */ "AUTOINCR", - /* 129 */ "INSERT", - /* 130 */ "DELETE", - /* 131 */ "UPDATE", - /* 132 */ "SET", - /* 133 */ "DEFERRABLE", - /* 134 */ "FOREIGN", - /* 135 */ "DROP", - /* 136 */ "UNION", - /* 137 */ "ALL", - /* 138 */ "EXCEPT", - /* 139 */ "INTERSECT", - /* 140 */ "SELECT", - /* 141 */ "VALUES", - /* 142 */ "DISTINCT", - /* 143 */ "DOT", - /* 144 */ "FROM", - /* 145 */ "JOIN", - /* 146 */ "USING", - /* 147 */ "ORDER", - /* 148 */ "GROUP", - /* 149 */ "HAVING", - /* 150 */ "LIMIT", - /* 151 */ "WHERE", - /* 152 */ "RETURNING", - /* 153 */ "INTO", - /* 154 */ "NOTHING", - /* 155 */ "FLOAT", - /* 156 */ "BLOB", - /* 157 */ "INTEGER", - /* 158 */ "VARIABLE", - /* 159 */ "CASE", - /* 160 */ "WHEN", - /* 161 */ "THEN", - /* 162 */ "ELSE", - /* 163 */ "INDEX", - /* 164 */ "ALTER", - /* 165 */ "ADD", - /* 166 */ "WINDOW", - /* 167 */ "OVER", - /* 168 */ "FILTER", - /* 169 */ "COLUMN", - /* 170 */ "AGG_FUNCTION", - /* 171 */ "AGG_COLUMN", - /* 172 */ "TRUEFALSE", - /* 173 */ "FUNCTION", - /* 174 */ "UPLUS", - /* 175 */ "UMINUS", - /* 176 */ "TRUTH", - /* 177 */ "REGISTER", + /* 62 */ "DO", + /* 63 */ "FOR", + /* 64 */ "IGNORE", + /* 65 */ "INITIALLY", + /* 66 */ "INSTEAD", + /* 67 */ "NO", + /* 68 */ "KEY", + /* 69 */ "OF", + /* 70 */ "OFFSET", + /* 71 */ "PRAGMA", + /* 72 */ "RAISE", + /* 73 */ "RECURSIVE", + /* 74 */ "REPLACE", + /* 75 */ "RESTRICT", + /* 76 */ "ROW", + /* 77 */ "ROWS", + /* 78 */ "TRIGGER", + /* 79 */ "VACUUM", + /* 80 */ "VIEW", + /* 81 */ "VIRTUAL", + /* 82 */ "WITH", + /* 83 */ "NULLS", + /* 84 */ "FIRST", + /* 85 */ "LAST", + /* 86 */ "CURRENT", + /* 87 */ "FOLLOWING", + /* 88 */ "PARTITION", + /* 89 */ "PRECEDING", + /* 90 */ "RANGE", + /* 91 */ "UNBOUNDED", + /* 92 */ "EXCLUDE", + /* 93 */ "GROUPS", + /* 94 */ "OTHERS", + /* 95 */ "TIES", + /* 96 */ "GENERATED", + /* 97 */ "ALWAYS", + /* 98 */ "MATERIALIZED", + /* 99 */ "REINDEX", + /* 100 */ "RENAME", + /* 101 */ "CTIME_KW", + /* 102 */ "ANY", + /* 103 */ "BITAND", + /* 104 */ "BITOR", + /* 105 */ "LSHIFT", + /* 106 */ "RSHIFT", + /* 107 */ "PLUS", + /* 108 */ "MINUS", + /* 109 */ "STAR", + /* 110 */ "SLASH", + /* 111 */ "REM", + /* 112 */ "CONCAT", + /* 113 */ "PTR", + /* 114 */ "COLLATE", + /* 115 */ "BITNOT", + /* 116 */ "ON", + /* 117 */ "INDEXED", + /* 118 */ "STRING", + /* 119 */ "JOIN_KW", + /* 120 */ "CONSTRAINT", + /* 121 */ "DEFAULT", + /* 122 */ "NULL", + /* 123 */ "PRIMARY", + /* 124 */ "UNIQUE", + /* 125 */ "CHECK", + /* 126 */ "REFERENCES", + /* 127 */ "AUTOINCR", + /* 128 */ "INSERT", + /* 129 */ "DELETE", + /* 130 */ "UPDATE", + /* 131 */ "SET", + /* 132 */ "DEFERRABLE", + /* 133 */ "FOREIGN", + /* 134 */ "DROP", + /* 135 */ "UNION", + /* 136 */ "ALL", + /* 137 */ "EXCEPT", + /* 138 */ "INTERSECT", + /* 139 */ "SELECT", + /* 140 */ "VALUES", + /* 141 */ "DISTINCT", + /* 142 */ "DOT", + /* 143 */ "FROM", + /* 144 */ "JOIN", + /* 145 */ "USING", + /* 146 */ "ORDER", + /* 147 */ "GROUP", + /* 148 */ "HAVING", + /* 149 */ "LIMIT", + /* 150 */ "WHERE", + /* 151 */ "RETURNING", + /* 152 */ "INTO", + /* 153 */ "NOTHING", + /* 154 */ "FLOAT", + /* 155 */ "BLOB", + /* 156 */ "INTEGER", + /* 157 */ "VARIABLE", + /* 158 */ "CASE", + /* 159 */ "WHEN", + /* 160 */ "THEN", + /* 161 */ "ELSE", + /* 162 */ "INDEX", + /* 163 */ "ALTER", + /* 164 */ "ADD", + /* 165 */ "WINDOW", + /* 166 */ "OVER", + /* 167 */ "FILTER", + /* 168 */ "COLUMN", + /* 169 */ "AGG_FUNCTION", + /* 170 */ "AGG_COLUMN", + /* 171 */ "TRUEFALSE", + /* 172 */ "FUNCTION", + /* 173 */ "UPLUS", + /* 174 */ "UMINUS", + /* 175 */ "TRUTH", + /* 176 */ "REGISTER", + /* 177 */ "CONCURRENT", /* 178 */ "VECTOR", /* 179 */ "SELECT_COLUMN", /* 180 */ "IF_NULL_ROW", @@ -181563,11 +180876,7 @@ static YYACTIONTYPE yy_reduce( case 84: /* cmd ::= select */ { SelectDest dest = {SRT_Output, 0, 0, 0, 0, 0, 0}; - if( (pParse->db->mDbFlags & DBFLAG_EncodingFixed)!=0 - || sqlite3ReadSchema(pParse)==SQLITE_OK - ){ - sqlite3Select(pParse, yymsp[0].minor.yy637, &dest); - } + sqlite3Select(pParse, yymsp[0].minor.yy637, &dest); sqlite3SelectDelete(pParse->db, yymsp[0].minor.yy637); } break; @@ -183337,153 +182646,152 @@ const unsigned char ebcdicToAscii[] = { ** is substantially reduced. This is important for embedded applications ** on platforms with limited memory. */ -/* Hash score: 233 */ -/* zKWText[] encodes 1018 bytes of keyword text in 669 bytes */ -/* CONCURRENT_DATEMPORARYREINDEXEDESCAPEACHECKEYBEFOREIGNOREGEXP */ -/* LAINSTEADDATABASELECTABLEFTHENDEFERRABLELSEXCLUDELETEXCEPTIES */ -/* AVEPOINTERSECTRANSACTIONOTNULLSISNULLIKEXCLUSIVEXISTS */ -/* CONSTRAINTOFFSETRIGGERAISEUNIQUERYWITHOUTERANGENERATEDETACH */ -/* AVINGLOBEGINNEREFERENCESATTACHBETWEENATURALTERELEASECASCADE */ -/* FAULTCASECOLLATECREATEIMMEDIATEJOINSERTMATCHPLANALYZEPRAGMA */ -/* TERIALIZEDEFERREDISTINCTUPDATEVALUESVIRTUALWAYSWHENOTHINGROUPS */ -/* WHERECURSIVEABORTAFTERENAMEANDROPARTITIONAUTOINCREMENTCAST */ -/* COLUMNCOMMITCONFLICTCROSSCURRENT_TIMESTAMPRECEDINGFAILAST */ -/* FILTEREPLACEFIRSTFOLLOWINGFROMFULLIMITIFORDERESTRICTOTHERSOVER */ +/* Hash score: 231 */ +/* zKWText[] encodes 1007 bytes of keyword text in 667 bytes */ +/* REINDEXEDESCAPEACHECKEYBEFOREIGNOREGEXPLAINSTEADDATABASELECT */ +/* ABLEFTHENDEFERRABLELSEXCLUDELETEMPORARYISNULLSAVEPOINTERSECT */ +/* IESNOTNULLIKEXCEPTRANSACTIONATURALTERAISEXCLUSIVEXISTS */ +/* CONSTRAINTOFFSETRIGGERANGENERATEDETACHAVINGLOBEGINNEREFERENCES */ +/* UNIQUERYWITHOUTERELEASEATTACHBETWEENOTHINGROUPSCASCADEFAULT */ +/* CASECOLLATECREATECURRENT_DATEIMMEDIATEJOINSERTMATCHPLANALYZE */ +/* PRAGMATERIALIZEDEFERREDISTINCTUPDATEVALUESVIRTUALWAYSWHENWHERE */ +/* CURSIVEABORTAFTERENAMEANDROPARTITIONAUTOINCREMENTCASTCOLUMN */ +/* COMMITCONFLICTCROSSCURRENT_TIMESTAMPRECEDINGFAILASTFILTER */ +/* EPLACEFIRSTFOLLOWINGFROMFULLIMITIFORDERESTRICTOTHERSOVER */ /* ETURNINGRIGHTROLLBACKROWSUNBOUNDEDUNIONUSINGVACUUMVIEWINDOWBY */ /* INITIALLYPRIMARY */ -static const char zKWText[668] = { - 'C','O','N','C','U','R','R','E','N','T','_','D','A','T','E','M','P','O', - 'R','A','R','Y','R','E','I','N','D','E','X','E','D','E','S','C','A','P', - 'E','A','C','H','E','C','K','E','Y','B','E','F','O','R','E','I','G','N', - 'O','R','E','G','E','X','P','L','A','I','N','S','T','E','A','D','D','A', - 'T','A','B','A','S','E','L','E','C','T','A','B','L','E','F','T','H','E', - 'N','D','E','F','E','R','R','A','B','L','E','L','S','E','X','C','L','U', - 'D','E','L','E','T','E','X','C','E','P','T','I','E','S','A','V','E','P', - 'O','I','N','T','E','R','S','E','C','T','R','A','N','S','A','C','T','I', - 'O','N','O','T','N','U','L','L','S','I','S','N','U','L','L','I','K','E', - 'X','C','L','U','S','I','V','E','X','I','S','T','S','C','O','N','S','T', - 'R','A','I','N','T','O','F','F','S','E','T','R','I','G','G','E','R','A', - 'I','S','E','U','N','I','Q','U','E','R','Y','W','I','T','H','O','U','T', - 'E','R','A','N','G','E','N','E','R','A','T','E','D','E','T','A','C','H', - 'A','V','I','N','G','L','O','B','E','G','I','N','N','E','R','E','F','E', - 'R','E','N','C','E','S','A','T','T','A','C','H','B','E','T','W','E','E', - 'N','A','T','U','R','A','L','T','E','R','E','L','E','A','S','E','C','A', - 'S','C','A','D','E','F','A','U','L','T','C','A','S','E','C','O','L','L', - 'A','T','E','C','R','E','A','T','E','I','M','M','E','D','I','A','T','E', - 'J','O','I','N','S','E','R','T','M','A','T','C','H','P','L','A','N','A', - 'L','Y','Z','E','P','R','A','G','M','A','T','E','R','I','A','L','I','Z', - 'E','D','E','F','E','R','R','E','D','I','S','T','I','N','C','T','U','P', - 'D','A','T','E','V','A','L','U','E','S','V','I','R','T','U','A','L','W', - 'A','Y','S','W','H','E','N','O','T','H','I','N','G','R','O','U','P','S', - 'W','H','E','R','E','C','U','R','S','I','V','E','A','B','O','R','T','A', - 'F','T','E','R','E','N','A','M','E','A','N','D','R','O','P','A','R','T', - 'I','T','I','O','N','A','U','T','O','I','N','C','R','E','M','E','N','T', - 'C','A','S','T','C','O','L','U','M','N','C','O','M','M','I','T','C','O', - 'N','F','L','I','C','T','C','R','O','S','S','C','U','R','R','E','N','T', - '_','T','I','M','E','S','T','A','M','P','R','E','C','E','D','I','N','G', - 'F','A','I','L','A','S','T','F','I','L','T','E','R','E','P','L','A','C', - 'E','F','I','R','S','T','F','O','L','L','O','W','I','N','G','F','R','O', - 'M','F','U','L','L','I','M','I','T','I','F','O','R','D','E','R','E','S', - 'T','R','I','C','T','O','T','H','E','R','S','O','V','E','R','E','T','U', - 'R','N','I','N','G','R','I','G','H','T','R','O','L','L','B','A','C','K', - 'R','O','W','S','U','N','B','O','U','N','D','E','D','U','N','I','O','N', - 'U','S','I','N','G','V','A','C','U','U','M','V','I','E','W','I','N','D', - 'O','W','B','Y','I','N','I','T','I','A','L','L','Y','P','R','I','M','A', - 'R','Y', +static const char zKWText[666] = { + 'R','E','I','N','D','E','X','E','D','E','S','C','A','P','E','A','C','H', + 'E','C','K','E','Y','B','E','F','O','R','E','I','G','N','O','R','E','G', + 'E','X','P','L','A','I','N','S','T','E','A','D','D','A','T','A','B','A', + 'S','E','L','E','C','T','A','B','L','E','F','T','H','E','N','D','E','F', + 'E','R','R','A','B','L','E','L','S','E','X','C','L','U','D','E','L','E', + 'T','E','M','P','O','R','A','R','Y','I','S','N','U','L','L','S','A','V', + 'E','P','O','I','N','T','E','R','S','E','C','T','I','E','S','N','O','T', + 'N','U','L','L','I','K','E','X','C','E','P','T','R','A','N','S','A','C', + 'T','I','O','N','A','T','U','R','A','L','T','E','R','A','I','S','E','X', + 'C','L','U','S','I','V','E','X','I','S','T','S','C','O','N','S','T','R', + 'A','I','N','T','O','F','F','S','E','T','R','I','G','G','E','R','A','N', + 'G','E','N','E','R','A','T','E','D','E','T','A','C','H','A','V','I','N', + 'G','L','O','B','E','G','I','N','N','E','R','E','F','E','R','E','N','C', + 'E','S','U','N','I','Q','U','E','R','Y','W','I','T','H','O','U','T','E', + 'R','E','L','E','A','S','E','A','T','T','A','C','H','B','E','T','W','E', + 'E','N','O','T','H','I','N','G','R','O','U','P','S','C','A','S','C','A', + 'D','E','F','A','U','L','T','C','A','S','E','C','O','L','L','A','T','E', + 'C','R','E','A','T','E','C','U','R','R','E','N','T','_','D','A','T','E', + 'I','M','M','E','D','I','A','T','E','J','O','I','N','S','E','R','T','M', + 'A','T','C','H','P','L','A','N','A','L','Y','Z','E','P','R','A','G','M', + 'A','T','E','R','I','A','L','I','Z','E','D','E','F','E','R','R','E','D', + 'I','S','T','I','N','C','T','U','P','D','A','T','E','V','A','L','U','E', + 'S','V','I','R','T','U','A','L','W','A','Y','S','W','H','E','N','W','H', + 'E','R','E','C','U','R','S','I','V','E','A','B','O','R','T','A','F','T', + 'E','R','E','N','A','M','E','A','N','D','R','O','P','A','R','T','I','T', + 'I','O','N','A','U','T','O','I','N','C','R','E','M','E','N','T','C','A', + 'S','T','C','O','L','U','M','N','C','O','M','M','I','T','C','O','N','F', + 'L','I','C','T','C','R','O','S','S','C','U','R','R','E','N','T','_','T', + 'I','M','E','S','T','A','M','P','R','E','C','E','D','I','N','G','F','A', + 'I','L','A','S','T','F','I','L','T','E','R','E','P','L','A','C','E','F', + 'I','R','S','T','F','O','L','L','O','W','I','N','G','F','R','O','M','F', + 'U','L','L','I','M','I','T','I','F','O','R','D','E','R','E','S','T','R', + 'I','C','T','O','T','H','E','R','S','O','V','E','R','E','T','U','R','N', + 'I','N','G','R','I','G','H','T','R','O','L','L','B','A','C','K','R','O', + 'W','S','U','N','B','O','U','N','D','E','D','U','N','I','O','N','U','S', + 'I','N','G','V','A','C','U','U','M','V','I','E','W','I','N','D','O','W', + 'B','Y','I','N','I','T','I','A','L','L','Y','P','R','I','M','A','R','Y', }; /* aKWHash[i] is the hash value for the i-th keyword */ static const unsigned char aKWHash[127] = { - 82, 90, 135, 80, 106, 4, 0, 0, 92, 0, 83, 96, 0, - 52, 34, 84, 20, 0, 37, 95, 53, 87, 136, 24, 0, 0, - 141, 0, 46, 130, 0, 27, 108, 0, 14, 0, 0, 124, 79, - 0, 77, 11, 0, 57, 104, 148, 0, 137, 116, 0, 0, 47, - 0, 88, 29, 0, 22, 0, 32, 69, 28, 31, 10, 65, 143, - 111, 123, 0, 97, 89, 70, 146, 66, 121, 98, 0, 48, 0, - 16, 33, 0, 114, 0, 0, 0, 110, 15, 112, 117, 126, 19, - 49, 125, 0, 101, 0, 23, 122, 145, 61, 131, 140, 86, 81, - 41, 5, 127, 0, 0, 109, 50, 132, 129, 0, 36, 0, 0, - 133, 0, 99, 42, 44, 0, 25, 71, 118, 91, + 84, 92, 134, 82, 105, 29, 0, 0, 94, 0, 85, 72, 0, + 53, 35, 86, 15, 0, 42, 97, 54, 89, 135, 19, 0, 0, + 140, 0, 40, 129, 0, 22, 107, 0, 9, 0, 0, 123, 80, + 0, 78, 6, 0, 65, 103, 147, 0, 136, 115, 0, 0, 48, + 0, 90, 24, 0, 17, 0, 27, 70, 23, 26, 5, 60, 142, + 110, 122, 0, 73, 91, 71, 145, 61, 120, 74, 0, 49, 0, + 11, 41, 0, 113, 0, 0, 0, 109, 10, 111, 116, 125, 14, + 50, 124, 0, 100, 0, 18, 121, 144, 56, 130, 139, 88, 83, + 37, 30, 126, 0, 0, 108, 51, 131, 128, 0, 34, 0, 0, + 132, 0, 98, 38, 39, 0, 20, 45, 117, 93, }; /* aKWNext[] forms the hash collision chain. If aKWHash[i]==0 ** then the i-th keyword has no more hash collisions. Otherwise, ** the next keyword with the same hash is aKWHash[i]-1. */ -static const unsigned char aKWNext[149] = {0, - 0, 0, 0, 0, 120, 0, 0, 0, 0, 9, 0, 38, 0, - 0, 107, 115, 0, 0, 0, 7, 0, 0, 144, 0, 0, 0, - 18, 0, 0, 0, 0, 142, 0, 17, 0, 138, 134, 0, 0, - 0, 0, 67, 0, 0, 51, 139, 3, 76, 1, 0, 0, 0, - 64, 0, 0, 0, 0, 0, 73, 0, 55, 0, 0, 0, 0, - 0, 0, 0, 0, 0, 40, 0, 0, 0, 63, 0, 6, 74, - 0, 0, 45, 0, 0, 0, 0, 0, 128, 0, 105, 0, 56, - 58, 68, 0, 0, 0, 147, 8, 0, 0, 0, 72, 0, 21, - 13, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 2, 102, - 0, 113, 26, 12, 59, 0, 78, 94, 119, 0, 0, 60, 0, - 0, 100, 39, 0, 54, 0, 75, 0, 93, 43, 35, 62, 30, - 0, 103, 0, 0, 85, +static const unsigned char aKWNext[148] = {0, + 0, 0, 0, 0, 4, 0, 43, 0, 0, 106, 114, 0, 0, + 0, 2, 0, 0, 143, 0, 0, 0, 13, 0, 0, 0, 0, + 141, 0, 0, 119, 52, 0, 0, 137, 12, 0, 0, 62, 0, + 138, 0, 133, 0, 0, 36, 0, 0, 28, 77, 0, 0, 0, + 0, 59, 0, 47, 0, 0, 0, 0, 0, 0, 0, 0, 0, + 0, 69, 0, 0, 0, 0, 0, 146, 3, 0, 58, 0, 1, + 75, 0, 0, 0, 31, 0, 0, 0, 0, 0, 127, 0, 104, + 0, 64, 66, 63, 0, 0, 0, 0, 0, 46, 0, 16, 8, + 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 81, 101, 0, + 112, 21, 7, 67, 0, 79, 96, 118, 0, 0, 68, 0, 0, + 99, 44, 0, 55, 0, 76, 0, 95, 32, 33, 57, 25, 0, + 102, 0, 0, 87, }; /* aKWLen[i] is the length (in bytes) of the i-th keyword */ -static const unsigned char aKWLen[149] = {0, - 10, 12, 9, 4, 2, 7, 7, 5, 4, 6, 4, 5, 3, - 6, 7, 3, 6, 6, 7, 7, 3, 8, 2, 6, 5, 4, - 4, 3, 10, 4, 7, 6, 6, 4, 9, 9, 11, 6, 2, - 7, 3, 2, 5, 4, 6, 4, 9, 6, 10, 4, 6, 2, - 3, 7, 5, 6, 5, 7, 4, 5, 5, 9, 6, 6, 4, - 5, 5, 10, 6, 7, 7, 5, 7, 7, 3, 7, 4, 7, - 6, 9, 4, 6, 5, 4, 7, 6, 12, 8, 8, 2, 6, - 6, 7, 6, 4, 7, 6, 5, 5, 9, 5, 5, 6, 3, - 4, 9, 13, 2, 2, 4, 6, 6, 8, 5, 17, 12, 7, - 9, 4, 4, 6, 7, 5, 9, 4, 4, 5, 2, 5, 8, - 6, 4, 9, 5, 8, 4, 3, 9, 5, 5, 6, 4, 6, - 2, 2, 9, 3, 7, +static const unsigned char aKWLen[148] = {0, + 7, 7, 5, 4, 6, 4, 5, 3, 6, 7, 3, 6, 6, + 7, 7, 3, 8, 2, 6, 5, 4, 4, 3, 10, 4, 7, + 6, 9, 4, 2, 6, 5, 9, 9, 4, 7, 3, 2, 4, + 4, 6, 11, 6, 2, 7, 5, 5, 9, 6, 10, 4, 6, + 2, 3, 7, 5, 9, 6, 6, 4, 5, 5, 10, 6, 5, + 7, 4, 5, 7, 6, 7, 7, 6, 5, 7, 3, 7, 4, + 7, 6, 12, 9, 4, 6, 5, 4, 7, 6, 12, 8, 8, + 2, 6, 6, 7, 6, 4, 5, 9, 5, 5, 6, 3, 4, + 9, 13, 2, 2, 4, 6, 6, 8, 5, 17, 12, 7, 9, + 4, 4, 6, 7, 5, 9, 4, 4, 5, 2, 5, 8, 6, + 4, 9, 5, 8, 4, 3, 9, 5, 5, 6, 4, 6, 2, + 2, 9, 3, 7, }; /* aKWOffset[i] is the index into zKWText[] of the start of ** the text for the i-th keyword. */ -static const unsigned short int aKWOffset[149] = {0, - 0, 3, 13, 13, 17, 22, 24, 24, 30, 31, 36, 38, 42, - 45, 47, 47, 51, 55, 58, 63, 68, 70, 75, 76, 81, 84, - 87, 89, 91, 100, 103, 108, 113, 118, 121, 127, 135, 140, 144, - 145, 145, 145, 148, 148, 153, 158, 161, 169, 175, 182, 185, 185, - 188, 190, 196, 201, 204, 209, 209, 213, 217, 220, 228, 233, 238, - 241, 244, 248, 258, 264, 270, 275, 279, 286, 287, 291, 298, 302, - 309, 315, 324, 326, 332, 337, 339, 346, 350, 361, 368, 369, 376, - 382, 388, 393, 399, 402, 408, 408, 414, 417, 426, 431, 435, 441, - 443, 446, 455, 457, 459, 468, 472, 478, 484, 492, 497, 497, 497, - 513, 522, 525, 529, 534, 541, 546, 555, 559, 562, 567, 569, 573, - 581, 587, 590, 599, 604, 612, 612, 616, 625, 630, 635, 641, 644, - 647, 650, 652, 657, 661, +static const unsigned short int aKWOffset[148] = {0, + 0, 2, 2, 8, 9, 14, 16, 20, 23, 25, 25, 29, 33, + 36, 41, 46, 48, 53, 54, 59, 62, 65, 67, 69, 78, 81, + 86, 90, 90, 94, 99, 101, 105, 111, 119, 123, 123, 123, 126, + 129, 132, 137, 142, 146, 147, 152, 156, 160, 168, 174, 181, 184, + 184, 187, 189, 195, 198, 206, 211, 216, 219, 222, 226, 236, 239, + 244, 244, 248, 252, 259, 265, 271, 277, 277, 283, 284, 288, 295, + 299, 306, 312, 324, 333, 335, 341, 346, 348, 355, 359, 370, 377, + 378, 385, 391, 397, 402, 408, 412, 415, 424, 429, 433, 439, 441, + 444, 453, 455, 457, 466, 470, 476, 482, 490, 495, 495, 495, 511, + 520, 523, 527, 532, 539, 544, 553, 557, 560, 565, 567, 571, 579, + 585, 588, 597, 602, 610, 610, 614, 623, 628, 633, 639, 642, 645, + 648, 650, 655, 659, }; /* aKWCode[i] is the parser symbol code for the i-th keyword */ -static const unsigned char aKWCode[149] = {0, - TK_CONCURRENT, TK_CTIME_KW, TK_TEMP, TK_TEMP, TK_OR, +static const unsigned char aKWCode[148] = {0, TK_REINDEX, TK_INDEXED, TK_INDEX, TK_DESC, TK_ESCAPE, TK_EACH, TK_CHECK, TK_KEY, TK_BEFORE, TK_FOREIGN, TK_FOR, TK_IGNORE, TK_LIKE_KW, TK_EXPLAIN, TK_INSTEAD, TK_ADD, TK_DATABASE, TK_AS, TK_SELECT, TK_TABLE, TK_JOIN_KW, TK_THEN, TK_END, TK_DEFERRABLE, TK_ELSE, - TK_EXCLUDE, TK_DELETE, TK_EXCEPT, TK_TIES, TK_SAVEPOINT, - TK_INTERSECT, TK_TRANSACTION,TK_ACTION, TK_ON, TK_NOTNULL, - TK_NOT, TK_NO, TK_NULLS, TK_NULL, TK_ISNULL, - TK_LIKE_KW, TK_EXCLUSIVE, TK_EXISTS, TK_CONSTRAINT, TK_INTO, - TK_OFFSET, TK_OF, TK_SET, TK_TRIGGER, TK_RAISE, - TK_UNIQUE, TK_QUERY, TK_WITHOUT, TK_WITH, TK_JOIN_KW, + TK_EXCLUDE, TK_DELETE, TK_TEMP, TK_TEMP, TK_OR, + TK_ISNULL, TK_NULLS, TK_SAVEPOINT, TK_INTERSECT, TK_TIES, + TK_NOTNULL, TK_NOT, TK_NO, TK_NULL, TK_LIKE_KW, + TK_EXCEPT, TK_TRANSACTION,TK_ACTION, TK_ON, TK_JOIN_KW, + TK_ALTER, TK_RAISE, TK_EXCLUSIVE, TK_EXISTS, TK_CONSTRAINT, + TK_INTO, TK_OFFSET, TK_OF, TK_SET, TK_TRIGGER, TK_RANGE, TK_GENERATED, TK_DETACH, TK_HAVING, TK_LIKE_KW, - TK_BEGIN, TK_JOIN_KW, TK_REFERENCES, TK_ATTACH, TK_BETWEEN, - TK_JOIN_KW, TK_ALTER, TK_RELEASE, TK_CASCADE, TK_ASC, - TK_DEFAULT, TK_CASE, TK_COLLATE, TK_CREATE, TK_IMMEDIATE, - TK_JOIN, TK_INSERT, TK_MATCH, TK_PLAN, TK_ANALYZE, - TK_PRAGMA, TK_MATERIALIZED, TK_DEFERRED, TK_DISTINCT, TK_IS, - TK_UPDATE, TK_VALUES, TK_VIRTUAL, TK_ALWAYS, TK_WHEN, - TK_NOTHING, TK_GROUPS, TK_GROUP, TK_WHERE, TK_RECURSIVE, - TK_ABORT, TK_AFTER, TK_RENAME, TK_AND, TK_DROP, - TK_PARTITION, TK_AUTOINCR, TK_TO, TK_IN, TK_CAST, - TK_COLUMNKW, TK_COMMIT, TK_CONFLICT, TK_JOIN_KW, TK_CTIME_KW, - TK_CTIME_KW, TK_CURRENT, TK_PRECEDING, TK_FAIL, TK_LAST, - TK_FILTER, TK_REPLACE, TK_FIRST, TK_FOLLOWING, TK_FROM, - TK_JOIN_KW, TK_LIMIT, TK_IF, TK_ORDER, TK_RESTRICT, - TK_OTHERS, TK_OVER, TK_RETURNING, TK_JOIN_KW, TK_ROLLBACK, - TK_ROWS, TK_ROW, TK_UNBOUNDED, TK_UNION, TK_USING, - TK_VACUUM, TK_VIEW, TK_WINDOW, TK_DO, TK_BY, - TK_INITIALLY, TK_ALL, TK_PRIMARY, + TK_BEGIN, TK_JOIN_KW, TK_REFERENCES, TK_UNIQUE, TK_QUERY, + TK_WITHOUT, TK_WITH, TK_JOIN_KW, TK_RELEASE, TK_ATTACH, + TK_BETWEEN, TK_NOTHING, TK_GROUPS, TK_GROUP, TK_CASCADE, + TK_ASC, TK_DEFAULT, TK_CASE, TK_COLLATE, TK_CREATE, + TK_CTIME_KW, TK_IMMEDIATE, TK_JOIN, TK_INSERT, TK_MATCH, + TK_PLAN, TK_ANALYZE, TK_PRAGMA, TK_MATERIALIZED, TK_DEFERRED, + TK_DISTINCT, TK_IS, TK_UPDATE, TK_VALUES, TK_VIRTUAL, + TK_ALWAYS, TK_WHEN, TK_WHERE, TK_RECURSIVE, TK_ABORT, + TK_AFTER, TK_RENAME, TK_AND, TK_DROP, TK_PARTITION, + TK_AUTOINCR, TK_TO, TK_IN, TK_CAST, TK_COLUMNKW, + TK_COMMIT, TK_CONFLICT, TK_JOIN_KW, TK_CTIME_KW, TK_CTIME_KW, + TK_CURRENT, TK_PRECEDING, TK_FAIL, TK_LAST, TK_FILTER, + TK_REPLACE, TK_FIRST, TK_FOLLOWING, TK_FROM, TK_JOIN_KW, + TK_LIMIT, TK_IF, TK_ORDER, TK_RESTRICT, TK_OTHERS, + TK_OVER, TK_RETURNING, TK_JOIN_KW, TK_ROLLBACK, TK_ROWS, + TK_ROW, TK_UNBOUNDED, TK_UNION, TK_USING, TK_VACUUM, + TK_VIEW, TK_WINDOW, TK_DO, TK_BY, TK_INITIALLY, + TK_ALL, TK_PRIMARY, }; /* Hash table decoded: ** 0: INSERT @@ -183577,7 +182885,7 @@ static const unsigned char aKWCode[149] = {0, ** 88: CURRENT AFTER ALTER ** 89: FULL FAIL CONFLICT ** 90: EXPLAIN -** 91: CONSTRAINT CONCURRENT +** 91: CONSTRAINT ** 92: FROM ALWAYS ** 93: ** 94: ABORT @@ -183638,154 +182946,153 @@ static int keywordCode(const char *z, int n, int *pType){ while( j=2 ) keywordCode((char*)z, n, &id); return id; } -#define SQLITE_N_KEYWORD 148 +#define SQLITE_N_KEYWORD 147 SQLITE_API int sqlite3_keyword_name(int i,const char **pzName,int *pnName){ if( i<0 || i>=SQLITE_N_KEYWORD ) return SQLITE_ERROR; i++; @@ -185029,9 +184336,6 @@ static int (*const sqlite3BuiltinExtensions[])(sqlite3*) = { #ifdef SQLITE_EXTRA_AUTOEXT SQLITE_EXTRA_AUTOEXT, #endif -#ifdef SQLITE_ENABLE_HCT - sqlite3HctVtabInit, -#endif }; #ifndef SQLITE_AMALGAMATION @@ -185105,6 +184409,32 @@ SQLITE_API char *sqlite3_temp_directory = 0; */ SQLITE_API char *sqlite3_data_directory = 0; +/* +** Determine whether or not high-precision (long double) floating point +** math works correctly on CPU currently running. +*/ +static SQLITE_NOINLINE int hasHighPrecisionDouble(int rc){ + if( sizeof(LONGDOUBLE_TYPE)<=8 ){ + /* If the size of "long double" is not more than 8, then + ** high-precision math is not possible. */ + return 0; + }else{ + /* Just because sizeof(long double)>8 does not mean that the underlying + ** hardware actually supports high-precision floating point. For example, + ** clearing the 0x100 bit in the floating-point control word on Intel + ** processors will make long double work like double, even though long + ** double takes up more space. The only way to determine if long double + ** actually works is to run an experiment. */ + LONGDOUBLE_TYPE a, b, c; + rc++; + a = 1.0+rc*0.1; + b = 1.0e+18+rc*25.0; + c = a+b; + return b!=c; + } +} + + /* ** Initialize SQLite. ** @@ -185299,6 +184629,13 @@ SQLITE_API int sqlite3_initialize(void){ rc = SQLITE_EXTRA_INIT(0); } #endif + + /* Experimentally determine if high-precision floating point is + ** available. */ +#ifndef SQLITE_OMIT_WSD + sqlite3Config.bUseLongDouble = hasHighPrecisionDouble(rc); +#endif + return rc; } @@ -186369,6 +185706,10 @@ SQLITE_PRIVATE void sqlite3LeaveMutexAndCloseZombie(sqlite3 *db){ sqlite3Error(db, SQLITE_OK); /* Deallocates any cached error strings. */ sqlite3ValueFree(db->pErr); sqlite3CloseExtensions(db); +#if SQLITE_USER_AUTHENTICATION + sqlite3_free(db->auth.zAuthUser); + sqlite3_free(db->auth.zAuthPW); +#endif db->eOpenState = SQLITE_STATE_ERROR; @@ -187803,8 +187144,8 @@ static const int aHardLimit[] = { #if SQLITE_MAX_VDBE_OP<40 # error SQLITE_MAX_VDBE_OP must be at least 40 #endif -#if SQLITE_MAX_FUNCTION_ARG<0 || SQLITE_MAX_FUNCTION_ARG>127 -# error SQLITE_MAX_FUNCTION_ARG must be between 0 and 127 +#if SQLITE_MAX_FUNCTION_ARG<0 || SQLITE_MAX_FUNCTION_ARG>32767 +# error SQLITE_MAX_FUNCTION_ARG must be between 0 and 32767 #endif #if SQLITE_MAX_ATTACHED<0 || SQLITE_MAX_ATTACHED>125 # error SQLITE_MAX_ATTACHED must be between 0 and 125 @@ -187871,8 +187212,8 @@ SQLITE_API int sqlite3_limit(sqlite3 *db, int limitId, int newLimit){ if( newLimit>=0 ){ /* IMP: R-52476-28732 */ if( newLimit>aHardLimit[limitId] ){ newLimit = aHardLimit[limitId]; /* IMP: R-51463-25634 */ - }else if( newLimitaLimit[limitId] = newLimit; } @@ -188391,7 +187732,6 @@ static int openDatabase( if( ((1<<(flags&7)) & 0x46)==0 ){ rc = SQLITE_MISUSE_BKPT; /* IMP: R-18321-05872 */ }else{ - if( zFilename==0 ) zFilename = ":memory:"; rc = sqlite3ParseUri(zVfs, zFilename, &flags, &db->pVfs, &zOpen, &zErrMsg); } if( rc!=SQLITE_OK ){ @@ -189546,21 +188886,24 @@ SQLITE_API int sqlite3_test_control(int op, ...){ *pI2 = sqlite3LogEst(*pU64); break; } - /* sqlite3_test_control(SQLITE_TESTCTRL_HCT_MTCOMMIT, - ** sqlite3 *db, - ** void(*xMtCommit)(void*, int), - ** void *pCtx - ** ); + +#if !defined(SQLITE_OMIT_WSD) + /* sqlite3_test_control(SQLITE_TESTCTRL_USELONGDOUBLE, int X); ** - ** Install xMtCommit hook on "main" hct database. + ** X<0 Make no changes to the bUseLongDouble. Just report value. + ** X==0 Disable bUseLongDouble + ** X==1 Enable bUseLongDouble + ** X>=2 Set bUseLongDouble to its default value for this platform */ - case SQLITE_TESTCTRL_HCT_MTCOMMIT: { - typedef void (*mt_commit_hook)(void*,int); - sqlite3 *db = va_arg(ap, sqlite3*); - db->xMtCommit = va_arg(ap, mt_commit_hook); - db->pMtCommitCtx = va_arg(ap, void*); + case SQLITE_TESTCTRL_USELONGDOUBLE: { + int b = va_arg(ap, int); + if( b>=2 ) b = hasHighPrecisionDouble(b); + if( b>=0 ) sqlite3Config.bUseLongDouble = b>0; + rc = sqlite3Config.bUseLongDouble!=0; break; - }; + } +#endif + #if defined(SQLITE_DEBUG) && !defined(SQLITE_OMIT_WSD) /* sqlite3_test_control(SQLITE_TESTCTRL_TUNE, id, *piValue) @@ -189869,11 +189212,7 @@ SQLITE_API int sqlite3_snapshot_get( if( iDb==0 || iDb>1 ){ Btree *pBt = db->aDb[iDb].pBt; if( SQLITE_TXN_WRITE!=sqlite3BtreeTxnState(pBt) ){ - Pager *pPager = sqlite3BtreePager(pBt); - i64 dummy = 0; - sqlite3PagerSnapshotOpen(pPager, (sqlite3_snapshot*)&dummy); rc = sqlite3BtreeBeginTrans(pBt, 0, 0); - sqlite3PagerSnapshotOpen(pPager, 0); if( rc==SQLITE_OK ){ rc = sqlite3PagerSnapshotGet(sqlite3BtreePager(pBt), ppSnapshot); } @@ -193692,15 +193031,10 @@ static int fts3PoslistPhraseMerge( if( *p1==POS_COLUMN ){ p1++; p1 += fts3GetVarint32(p1, &iCol1); - /* iCol1==0 indicates corruption. Column 0 does not have a POS_COLUMN - ** entry, so this is actually end-of-doclist. */ - if( iCol1==0 ) return 0; } if( *p2==POS_COLUMN ){ p2++; p2 += fts3GetVarint32(p2, &iCol2); - /* As above, iCol2==0 indicates corruption. */ - if( iCol2==0 ) return 0; } while( 1 ){ @@ -196871,7 +196205,7 @@ static int fts3EvalNearTest(Fts3Expr *pExpr, int *pRc){ nTmp += p->pRight->pPhrase->doclist.nList; } nTmp += p->pPhrase->doclist.nList; - aTmp = sqlite3_malloc64(nTmp*2 + FTS3_VARINT_MAX); + aTmp = sqlite3_malloc64(nTmp*2); if( !aTmp ){ *pRc = SQLITE_NOMEM; res = 0; @@ -197522,7 +196856,7 @@ SQLITE_PRIVATE int sqlite3Fts3Corrupt(){ } #endif -#if !defined(SQLITE_CORE) +#if !SQLITE_CORE /* ** Initialize API pointer table, if required. */ @@ -221813,7 +221147,7 @@ SQLITE_API int sqlite3_rtree_query_callback( ); } -#ifndef SQLITE_CORE +#if !SQLITE_CORE #ifdef _WIN32 __declspec(dllexport) #endif @@ -222404,7 +221738,7 @@ SQLITE_PRIVATE int sqlite3IcuInit(sqlite3 *db){ return rc; } -#ifndef SQLITE_CORE +#if !SQLITE_CORE #ifdef _WIN32 __declspec(dllexport) #endif @@ -223662,27 +222996,6 @@ struct RbuFrame { u32 iWalFrame; }; -#ifndef UNUSED_PARAMETER -/* -** The following macros are used to suppress compiler warnings and to -** make it clear to human readers when a function parameter is deliberately -** left unused within the body of a function. This usually happens when -** a function is called via a function pointer. For example the -** implementation of an SQL aggregate step callback may not use the -** parameter indicating the number of arguments passed to the aggregate, -** if it knows that this is enforced elsewhere. -** -** When a function parameter is not used at all within the body of a function, -** it is generally named "NotUsed" or "NotUsed2" to make things even clearer. -** However, these macros may also be used to suppress warnings related to -** parameters that may or may not be used depending on compilation options. -** For example those parameters only used in assert() statements. In these -** cases the parameters are named as per the usual conventions. -*/ -#define UNUSED_PARAMETER(x) (void)(x) -#define UNUSED_PARAMETER2(x,y) UNUSED_PARAMETER(x),UNUSED_PARAMETER(y) -#endif - /* ** RBU handle. ** @@ -223734,7 +223047,7 @@ struct sqlite3rbu { int rc; /* Value returned by last rbu_step() call */ char *zErrmsg; /* Error message if rc!=SQLITE_OK */ int nStep; /* Rows processed for current object */ - sqlite3_int64 nProgress; /* Rows processed for all objects */ + int nProgress; /* Rows processed for all objects */ RbuObjIter objiter; /* Iterator for skipping through tbl/idx */ const char *zVfsName; /* Name of automatically created rbu vfs */ rbu_file *pTargetFd; /* File handle open on target db */ @@ -223851,7 +223164,7 @@ static unsigned int rbuDeltaGetInt(const char **pz, int *pLen){ v = (v<<6) + c; } z--; - *pLen -= (int)(z - zStart); + *pLen -= z - zStart; *pz = (char*)z; return v; } @@ -224036,7 +223349,6 @@ static void rbuFossilDeltaFunc( char *aOut; assert( argc==2 ); - UNUSED_PARAMETER(argc); nOrig = sqlite3_value_bytes(argv[0]); aOrig = (const char*)sqlite3_value_blob(argv[0]); @@ -225616,13 +224928,13 @@ static char *rbuObjIterGetIndexWhere(sqlite3rbu *p, RbuObjIter *pIter){ else if( c==')' ){ nParen--; if( nParen==0 ){ - int nSpan = (int)(&zSql[i] - pIter->aIdxCol[iIdxCol].zSpan); + int nSpan = &zSql[i] - pIter->aIdxCol[iIdxCol].zSpan; pIter->aIdxCol[iIdxCol++].nSpan = nSpan; i++; break; } }else if( c==',' && nParen==1 ){ - int nSpan = (int)(&zSql[i] - pIter->aIdxCol[iIdxCol].zSpan); + int nSpan = &zSql[i] - pIter->aIdxCol[iIdxCol].zSpan; pIter->aIdxCol[iIdxCol++].nSpan = nSpan; pIter->aIdxCol[iIdxCol].zSpan = &zSql[i+1]; }else if( c=='"' || c=='\'' || c=='`' ){ @@ -226312,8 +225624,6 @@ static void rbuFileSuffix3(const char *zBase, char *z){ for(i=sz-1; i>0 && z[i]!='/' && z[i]!='.'; i--){} if( z[i]=='.' && sz>i+4 ) memmove(&z[i+1], &z[sz-3], 4); } -#else - UNUSED_PARAMETER2(zBase,z); #endif } @@ -226898,7 +226208,7 @@ static void rbuSaveState(sqlite3rbu *p, int eStage){ "(%d, %Q), " "(%d, %Q), " "(%d, %d), " - "(%d, %lld), " + "(%d, %d), " "(%d, %lld), " "(%d, %lld), " "(%d, %lld), " @@ -227256,7 +226566,6 @@ static void rbuIndexCntFunc( sqlite3 *db = (rbuIsVacuum(p) ? p->dbRbu : p->dbMain); assert( nVal==1 ); - UNUSED_PARAMETER(nVal); rc = prepareFreeAndCollectError(db, &pStmt, &zErrmsg, sqlite3_mprintf("SELECT count(*) FROM sqlite_schema " @@ -227532,7 +226841,7 @@ SQLITE_API sqlite3rbu *sqlite3rbu_vacuum( ){ if( zTarget==0 ){ return rbuMisuseError(); } if( zState ){ - size_t n = strlen(zState); + int n = strlen(zState); if( n>=7 && 0==memcmp("-vactmp", &zState[n-7], 7) ){ return rbuMisuseError(); } @@ -227749,7 +227058,6 @@ SQLITE_API int sqlite3rbu_savestate(sqlite3rbu *p){ */ static int xDefaultRename(void *pArg, const char *zOld, const char *zNew){ int rc = SQLITE_OK; - UNUSED_PARAMETER(pArg); #if defined(_WIN32_WCE) { LPWSTR zWideOld; @@ -228654,9 +227962,6 @@ static int rbuVfsCurrentTime(sqlite3_vfs *pVfs, double *pTimeOut){ ** No-op. */ static int rbuVfsGetLastError(sqlite3_vfs *pVfs, int a, char *b){ - UNUSED_PARAMETER(pVfs); - UNUSED_PARAMETER(a); - UNUSED_PARAMETER(b); return 0; } @@ -229713,13 +229018,7 @@ SQLITE_PRIVATE int sqlite3DbstatRegister(sqlite3 *db){ return SQLITE_OK; } ** ** The data field of sqlite_dbpage table can be updated. The new ** value must be a BLOB which is the correct page size, otherwise the -** update fails. INSERT operations also work, and operate as if they -** where REPLACE. The size of the database can be extended by INSERT-ing -** new pages on the end. -** -** Rows may not be deleted. However, doing an INSERT to page number N -** with NULL page data causes the N-th page and all subsequent pages to be -** deleted and the database to be truncated. +** update fails. Rows may not be deleted or inserted. */ /* #include "sqliteInt.h" ** Requires access to internal data structures ** */ @@ -229742,8 +229041,6 @@ struct DbpageCursor { struct DbpageTable { sqlite3_vtab base; /* Base class. Must be first */ sqlite3 *db; /* The database */ - int iDbTrunc; /* Database to truncate */ - Pgno pgnoTrunc; /* Size to truncate to */ }; /* Columns */ @@ -229752,6 +229049,7 @@ struct DbpageTable { #define DBPAGE_COLUMN_SCHEMA 2 + /* ** Connect to or create a dbpagevfs virtual table. */ @@ -230013,11 +229311,11 @@ static int dbpageUpdate( DbPage *pDbPage = 0; int rc = SQLITE_OK; char *zErr = 0; + const char *zSchema; int iDb; Btree *pBt; Pager *pPager; int szPage; - int isInsert; (void)pRowid; if( pTab->db->flags & SQLITE_Defensive ){ @@ -230028,29 +229326,21 @@ static int dbpageUpdate( zErr = "cannot delete"; goto update_fail; } - if( sqlite3_value_type(argv[0])==SQLITE_NULL ){ - pgno = (Pgno)sqlite3_value_int(argv[2]); - isInsert = 1; - }else{ - pgno = sqlite3_value_int(argv[0]); - if( (Pgno)sqlite3_value_int(argv[1])!=pgno ){ - zErr = "cannot insert"; - goto update_fail; - } - isInsert = 0; + pgno = sqlite3_value_int(argv[0]); + if( sqlite3_value_type(argv[0])==SQLITE_NULL + || (Pgno)sqlite3_value_int(argv[1])!=pgno + ){ + zErr = "cannot insert"; + goto update_fail; } - if( sqlite3_value_type(argv[4])==SQLITE_NULL ){ - iDb = 0; - }else{ - const char *zSchema = (const char*)sqlite3_value_text(argv[4]); - iDb = sqlite3FindDbName(pTab->db, zSchema); - if( iDb<0 ){ - zErr = "no such schema"; - goto update_fail; - } + zSchema = (const char*)sqlite3_value_text(argv[4]); + iDb = ALWAYS(zSchema) ? sqlite3FindDbName(pTab->db, zSchema) : -1; + if( NEVER(iDb<0) ){ + zErr = "no such schema"; + goto update_fail; } pBt = pTab->db->aDb[iDb].pBt; - if( pgno<1 || NEVER(pBt==0) ){ + if( NEVER(pgno<1) || NEVER(pBt==0) || NEVER(pgno>sqlite3BtreeLastPage(pBt)) ){ zErr = "bad page number"; goto update_fail; } @@ -230058,25 +229348,18 @@ static int dbpageUpdate( if( sqlite3_value_type(argv[3])!=SQLITE_BLOB || sqlite3_value_bytes(argv[3])!=szPage ){ - if( sqlite3_value_type(argv[3])==SQLITE_NULL && isInsert && pgno>1 ){ - /* "INSERT INTO dbpage($PGNO,NULL)" causes page number $PGNO and - ** all subsequent pages to be deleted. */ - pTab->iDbTrunc = iDb; - pgno--; - pTab->pgnoTrunc = pgno; - }else{ - zErr = "bad page value"; - goto update_fail; - } + zErr = "bad page value"; + goto update_fail; } pPager = sqlite3BtreePager(pBt); rc = sqlite3PagerGet(pPager, pgno, (DbPage**)&pDbPage, 0); if( rc==SQLITE_OK ){ const void *pData = sqlite3_value_blob(argv[3]); - if( (rc = sqlite3PagerWrite(pDbPage))==SQLITE_OK && pData ){ - unsigned char *aPage = sqlite3PagerGetData(pDbPage); - memcpy(aPage, pData, szPage); - pTab->pgnoTrunc = 0; + assert( pData!=0 || pTab->db->mallocFailed ); + if( pData + && (rc = sqlite3PagerWrite(pDbPage))==SQLITE_OK + ){ + memcpy(sqlite3PagerGetData(pDbPage), pData, szPage); } } sqlite3PagerUnref(pDbPage); @@ -230100,31 +229383,9 @@ static int dbpageBegin(sqlite3_vtab *pVtab){ Btree *pBt = db->aDb[i].pBt; if( pBt ) (void)sqlite3BtreeBeginTrans(pBt, 1, 0); } - pTab->pgnoTrunc = 0; return SQLITE_OK; } -/* Invoke sqlite3PagerTruncate() as necessary, just prior to COMMIT -*/ -static int dbpageSync(sqlite3_vtab *pVtab){ - DbpageTable *pTab = (DbpageTable *)pVtab; - if( pTab->pgnoTrunc>0 ){ - Btree *pBt = pTab->db->aDb[pTab->iDbTrunc].pBt; - Pager *pPager = sqlite3BtreePager(pBt); - sqlite3PagerTruncateImage(pPager, pTab->pgnoTrunc); - } - pTab->pgnoTrunc = 0; - return SQLITE_OK; -} - -/* Cancel any pending truncate. -*/ -static int dbpageRollbackTo(sqlite3_vtab *pVtab, int notUsed1){ - DbpageTable *pTab = (DbpageTable *)pVtab; - pTab->pgnoTrunc = 0; - (void)notUsed1; - return SQLITE_OK; -} /* ** Invoke this routine to register the "dbpage" virtual table module @@ -230146,14 +229407,14 @@ SQLITE_PRIVATE int sqlite3DbpageRegister(sqlite3 *db){ dbpageRowid, /* xRowid - read data */ dbpageUpdate, /* xUpdate */ dbpageBegin, /* xBegin */ - dbpageSync, /* xSync */ + 0, /* xSync */ 0, /* xCommit */ 0, /* xRollback */ 0, /* xFindMethod */ 0, /* xRename */ 0, /* xSavepoint */ 0, /* xRelease */ - dbpageRollbackTo, /* xRollbackTo */ + 0, /* xRollbackTo */ 0, /* xShadowName */ 0 /* xIntegrity */ }; @@ -230248,10 +229509,6 @@ struct SessionBuffer { ** input data. Input data may be supplied either as a single large buffer ** (e.g. sqlite3changeset_start()) or using a stream function (e.g. ** sqlite3changeset_start_strm()). -** -** bNoDiscard: -** If true, then the only time data is discarded is as a result of explicit -** sessionDiscardData() calls. Not within every sessionInputBuffer() call. */ struct SessionInput { int bNoDiscard; /* If true, do not discard in InputBuffer() */ @@ -231936,19 +231193,16 @@ static void sessionPreupdateOneChange( for(i=0; i<(pTab->nCol-pTab->bRowid); i++){ sqlite3_value *p = 0; if( op!=SQLITE_INSERT ){ - /* This may fail if the column has a non-NULL default and was added - ** using ALTER TABLE ADD COLUMN after this record was created. */ - rc = pSession->hook.xOld(pSession->hook.pCtx, i, &p); + TESTONLY(int trc = ) pSession->hook.xOld(pSession->hook.pCtx, i, &p); + assert( trc==SQLITE_OK ); }else if( pTab->abPK[i] ){ TESTONLY(int trc = ) pSession->hook.xNew(pSession->hook.pCtx, i, &p); assert( trc==SQLITE_OK ); } - if( rc==SQLITE_OK ){ - /* This may fail if SQLite value p contains a utf-16 string that must - ** be converted to utf-8 and an OOM error occurs while doing so. */ - rc = sessionSerializeValue(0, p, &nByte); - } + /* This may fail if SQLite value p contains a utf-16 string that must + ** be converted to utf-8 and an OOM error occurs while doing so. */ + rc = sessionSerializeValue(0, p, &nByte); if( rc!=SQLITE_OK ) goto error_out; } if( pTab->bRowid ){ @@ -235329,21 +234583,15 @@ static int sessionChangesetApply( int nTab = 0; /* Result of sqlite3Strlen30(zTab) */ SessionApplyCtx sApply; /* changeset_apply() context object */ int bPatchset; - u64 savedFlag = db->flags & SQLITE_FkNoAction; assert( xConflict!=0 ); - sqlite3_mutex_enter(sqlite3_db_mutex(db)); - if( flags & SQLITE_CHANGESETAPPLY_FKNOACTION ){ - db->flags |= ((u64)SQLITE_FkNoAction); - db->aDb[0].pSchema->schema_cookie -= 32; - } - pIter->in.bNoDiscard = 1; memset(&sApply, 0, sizeof(sApply)); sApply.bRebase = (ppRebase && pnRebase); sApply.bInvertConstraints = !!(flags & SQLITE_CHANGESETAPPLY_INVERT); sApply.bIgnoreNoop = !!(flags & SQLITE_CHANGESETAPPLY_IGNORENOOP); + sqlite3_mutex_enter(sqlite3_db_mutex(db)); if( (flags & SQLITE_CHANGESETAPPLY_NOSAVEPOINT)==0 ){ rc = sqlite3_exec(db, "SAVEPOINT changeset_apply", 0, 0, 0); } @@ -235505,12 +234753,6 @@ static int sessionChangesetApply( sqlite3_free((char*)sApply.azCol); /* cast works around VC++ bug */ sqlite3_free((char*)sApply.constraints.aBuf); sqlite3_free((char*)sApply.rebase.aBuf); - - if( (flags & SQLITE_CHANGESETAPPLY_FKNOACTION) && savedFlag==0 ){ - assert( db->flags & SQLITE_FkNoAction ); - db->flags &= ~((u64)SQLITE_FkNoAction); - db->aDb[0].pSchema->schema_cookie -= 32; - } sqlite3_mutex_leave(sqlite3_db_mutex(db)); return rc; } @@ -235539,6 +234781,12 @@ SQLITE_API int sqlite3changeset_apply_v2( sqlite3_changeset_iter *pIter; /* Iterator to skip through changeset */ int bInv = !!(flags & SQLITE_CHANGESETAPPLY_INVERT); int rc = sessionChangesetStart(&pIter, 0, 0, nChangeset, pChangeset, bInv, 1); + u64 savedFlag = db->flags & SQLITE_FkNoAction; + + if( flags & SQLITE_CHANGESETAPPLY_FKNOACTION ){ + db->flags |= ((u64)SQLITE_FkNoAction); + db->aDb[0].pSchema->schema_cookie -= 32; + } if( rc==SQLITE_OK ){ rc = sessionChangesetApply( @@ -235546,6 +234794,11 @@ SQLITE_API int sqlite3changeset_apply_v2( ); } + if( (flags & SQLITE_CHANGESETAPPLY_FKNOACTION) && savedFlag==0 ){ + assert( db->flags & SQLITE_FkNoAction ); + db->flags &= ~((u64)SQLITE_FkNoAction); + db->aDb[0].pSchema->schema_cookie -= 32; + } return rc; } @@ -235866,9 +235119,6 @@ static int sessionChangesetExtendRecord( sessionAppendBlob(pOut, aRec, nRec, &rc); if( rc==SQLITE_OK && pTab->pDfltStmt==0 ){ rc = sessionPrepareDfltStmt(pGrp->db, pTab, &pTab->pDfltStmt); - if( rc==SQLITE_OK && SQLITE_ROW!=sqlite3_step(pTab->pDfltStmt) ){ - rc = sqlite3_errcode(pGrp->db); - } } for(ii=nCol; rc==SQLITE_OK && iinCol; ii++){ int eType = sqlite3_column_type(pTab->pDfltStmt, ii); @@ -235885,7 +235135,6 @@ static int sessionChangesetExtendRecord( } if( SQLITE_OK==sessionBufferGrow(pOut, 8, &rc) ){ sessionPutI64(&pOut->aBuf[pOut->nBuf], iVal); - pOut->nBuf += 8; } break; } @@ -236025,8 +235274,6 @@ static int sessionOneChangeToHash( u8 *aRec = &pIter->in.aData[pIter->in.iCurrent + 2]; int nRec = (pIter->in.iNext - pIter->in.iCurrent) - 2; - assert( nRec>0 ); - /* Ensure that only changesets, or only patchsets, but not a mixture ** of both, are being combined. It is an error to try to combine a ** changeset and a patchset. */ @@ -236104,7 +235351,6 @@ static int sessionChangesetToHash( int nRec; int rc = SQLITE_OK; - pIter->in.bNoDiscard = 1; while( SQLITE_ROW==(sessionChangesetNext(pIter, &aRec, &nRec, 0)) ){ rc = sessionOneChangeToHash(pGrp, pIter, bRebase); if( rc!=SQLITE_OK ) break; @@ -236737,27 +235983,7 @@ SQLITE_API int sqlite3session_config(int op, void *pArg){ /************** End of sqlite3session.c **************************************/ /************** Begin file fts5.c ********************************************/ -/* -** This, the "fts5.c" source file, is a composite file that is itself -** assembled from the following files: -** -** fts5.h -** fts5Int.h -** fts5parse.h <--- Generated from fts5parse.y by Lemon -** fts5parse.c <--- Generated from fts5parse.y by Lemon -** fts5_aux.c -** fts5_buffer.c -** fts5_config.c -** fts5_expr.c -** fts5_hash.c -** fts5_index.c -** fts5_main.c -** fts5_storage.c -** fts5_tokenize.c -** fts5_unicode2.c -** fts5_varint.c -** fts5_vocab.c -*/ + #if !defined(SQLITE_CORE) || defined(SQLITE_ENABLE_FTS5) #if !defined(NDEBUG) && !defined(SQLITE_DEBUG) @@ -236767,12 +235993,6 @@ SQLITE_API int sqlite3session_config(int op, void *pArg){ # undef NDEBUG #endif -#ifdef HAVE_STDINT_H -/* #include */ -#endif -#ifdef HAVE_INTTYPES_H -/* #include */ -#endif /* ** 2014 May 31 ** @@ -237171,6 +236391,7 @@ struct Fts5ExtensionApi { ** Applications may also register custom tokenizer types. A tokenizer ** is registered by providing fts5 with a populated instance of the ** following structure. All structure methods must be defined, setting +** ** any member of the fts5_tokenizer struct to NULL leads to undefined ** behaviour. The structure methods are expected to function as follows: ** @@ -237739,7 +236960,6 @@ struct Fts5Config { int *aPrefix; /* Sizes in bytes of nPrefix prefix indexes */ int eContent; /* An FTS5_CONTENT value */ int bContentlessDelete; /* "contentless_delete=" option (dflt==0) */ - int bContentlessUnindexed; /* "contentless_unindexed=" option (dflt=0) */ char *zContent; /* content table */ char *zContentRowid; /* "content_rowid=" option value */ int bColumnsize; /* "columnsize=" option value (dflt==1) */ @@ -237778,10 +236998,9 @@ struct Fts5Config { #define FTS5_CURRENT_VERSION 4 #define FTS5_CURRENT_VERSION_SECUREDELETE 5 -#define FTS5_CONTENT_NORMAL 0 -#define FTS5_CONTENT_NONE 1 -#define FTS5_CONTENT_EXTERNAL 2 -#define FTS5_CONTENT_UNINDEXED 3 +#define FTS5_CONTENT_NORMAL 0 +#define FTS5_CONTENT_NONE 1 +#define FTS5_CONTENT_EXTERNAL 2 #define FTS5_DETAIL_FULL 0 #define FTS5_DETAIL_NONE 1 @@ -238153,14 +237372,17 @@ static Fts5Table *sqlite3Fts5TableFromCsrid(Fts5Global*, i64); static int sqlite3Fts5FlushToDisk(Fts5Table*); -static void sqlite3Fts5ClearLocale(Fts5Config *pConfig); -static void sqlite3Fts5SetLocale(Fts5Config *pConfig, const char *pLoc, int nLoc); - -static int sqlite3Fts5IsLocaleValue(Fts5Config *pConfig, sqlite3_value *pVal); -static int sqlite3Fts5DecodeLocaleValue(sqlite3_value *pVal, - const char **ppText, int *pnText, const char **ppLoc, int *pnLoc +static int sqlite3Fts5ExtractText( + Fts5Config *pConfig, + sqlite3_value *pVal, /* Value to extract text from */ + int bContent, /* Loaded from content table */ + int *pbResetTokenizer, /* OUT: True if ClearLocale() required */ + const char **ppText, /* OUT: Pointer to text buffer */ + int *pnText /* OUT: Size of (*ppText) in bytes */ ); +static void sqlite3Fts5ClearLocale(Fts5Config *pConfig); + /* ** End of interface to code in fts5.c. **************************************************************************/ @@ -238241,7 +237463,7 @@ static int sqlite3Fts5DropAll(Fts5Config*); static int sqlite3Fts5CreateTable(Fts5Config*, const char*, const char*, int, char **); static int sqlite3Fts5StorageDelete(Fts5Storage *p, i64, sqlite3_value**, int); -static int sqlite3Fts5StorageContentInsert(Fts5Storage *p, int, sqlite3_value**, i64*); +static int sqlite3Fts5StorageContentInsert(Fts5Storage *p, sqlite3_value**, i64*); static int sqlite3Fts5StorageIndexInsert(Fts5Storage *p, sqlite3_value**, i64); static int sqlite3Fts5StorageIntegrity(Fts5Storage *p, int iArg); @@ -241448,7 +240670,6 @@ static int fts5ConfigParseSpecial( ){ int rc = SQLITE_OK; int nCmd = (int)strlen(zCmd); - if( sqlite3_strnicmp("prefix", zCmd, nCmd)==0 ){ const int nByte = sizeof(int) * FTS5_MAX_PREFIX_INDEXES; const char *p; @@ -241568,16 +240789,6 @@ static int fts5ConfigParseSpecial( return rc; } - if( sqlite3_strnicmp("contentless_unindexed", zCmd, nCmd)==0 ){ - if( (zArg[0]!='0' && zArg[0]!='1') || zArg[1]!='\0' ){ - *pzErr = sqlite3_mprintf("malformed contentless_delete=... directive"); - rc = SQLITE_ERROR; - }else{ - pConfig->bContentlessUnindexed = (zArg[0]=='1'); - } - return rc; - } - if( sqlite3_strnicmp("content_rowid", zCmd, nCmd)==0 ){ if( pConfig->zContentRowid ){ *pzErr = sqlite3_mprintf("multiple content_rowid=... directives"); @@ -241695,8 +240906,7 @@ static int fts5ConfigParseColumn( Fts5Config *p, char *zCol, char *zArg, - char **pzErr, - int *pbUnindexed + char **pzErr ){ int rc = SQLITE_OK; if( 0==sqlite3_stricmp(zCol, FTS5_RANK_NAME) @@ -241707,7 +240917,6 @@ static int fts5ConfigParseColumn( }else if( zArg ){ if( 0==sqlite3_stricmp(zArg, "unindexed") ){ p->abUnindexed[p->nCol] = 1; - *pbUnindexed = 1; }else{ *pzErr = sqlite3_mprintf("unrecognized column option: %s", zArg); rc = SQLITE_ERROR; @@ -241728,26 +240937,11 @@ static int fts5ConfigMakeExprlist(Fts5Config *p){ sqlite3Fts5BufferAppendPrintf(&rc, &buf, "T.%Q", p->zContentRowid); if( p->eContent!=FTS5_CONTENT_NONE ){ - assert( p->eContent==FTS5_CONTENT_EXTERNAL - || p->eContent==FTS5_CONTENT_NORMAL - || p->eContent==FTS5_CONTENT_UNINDEXED - ); for(i=0; inCol; i++){ if( p->eContent==FTS5_CONTENT_EXTERNAL ){ sqlite3Fts5BufferAppendPrintf(&rc, &buf, ", T.%Q", p->azCol[i]); - }else if( p->eContent==FTS5_CONTENT_NORMAL || p->abUnindexed[i] ){ - sqlite3Fts5BufferAppendPrintf(&rc, &buf, ", T.c%d", i); }else{ - sqlite3Fts5BufferAppendPrintf(&rc, &buf, ", NULL"); - } - } - } - if( p->eContent==FTS5_CONTENT_NORMAL && p->bLocale ){ - for(i=0; inCol; i++){ - if( p->abUnindexed[i]==0 ){ - sqlite3Fts5BufferAppendPrintf(&rc, &buf, ", T.l%d", i); - }else{ - sqlite3Fts5BufferAppendPrintf(&rc, &buf, ", NULL"); + sqlite3Fts5BufferAppendPrintf(&rc, &buf, ", T.c%d", i); } } } @@ -241781,7 +240975,6 @@ static int sqlite3Fts5ConfigParse( Fts5Config *pRet; /* New object to return */ int i; sqlite3_int64 nByte; - int bUnindexed = 0; /* True if there are one or more UNINDEXED */ *ppOut = pRet = (Fts5Config*)sqlite3_malloc(sizeof(Fts5Config)); if( pRet==0 ) return SQLITE_NOMEM; @@ -241841,7 +241034,7 @@ static int sqlite3Fts5ConfigParse( pzErr ); }else{ - rc = fts5ConfigParseColumn(pRet, zOne, zTwo, pzErr, &bUnindexed); + rc = fts5ConfigParseColumn(pRet, zOne, zTwo, pzErr); zOne = 0; } } @@ -241873,19 +241066,6 @@ static int sqlite3Fts5ConfigParse( rc = SQLITE_ERROR; } - /* We only allow contentless_unindexed=1 if the table is actually a - ** contentless one. - */ - if( rc==SQLITE_OK - && pRet->bContentlessUnindexed - && pRet->eContent!=FTS5_CONTENT_NONE - ){ - *pzErr = sqlite3_mprintf( - "contentless_unindexed=1 requires a contentless table" - ); - rc = SQLITE_ERROR; - } - /* If no zContent option was specified, fill in the default values. */ if( rc==SQLITE_OK && pRet->zContent==0 ){ const char *zTail = 0; @@ -241894,9 +241074,6 @@ static int sqlite3Fts5ConfigParse( ); if( pRet->eContent==FTS5_CONTENT_NORMAL ){ zTail = "content"; - }else if( bUnindexed && pRet->bContentlessUnindexed ){ - pRet->eContent = FTS5_CONTENT_UNINDEXED; - zTail = "content"; }else if( pRet->bColumnsize ){ zTail = "docsize"; } @@ -247438,7 +246615,7 @@ static i64 fts5IndexDataVersion(Fts5Index *p){ if( p->pDataVersion==0 ){ p->rc = fts5IndexPrepareStmt(p, &p->pDataVersion, sqlite3_mprintf("PRAGMA %Q.data_version", p->pConfig->zDb) - ); + ); if( p->rc ) return 0; } @@ -251070,11 +250247,6 @@ static int fts5IndexFindDeleteMerge(Fts5Index *p, Fts5Structure *pStruct){ nBest = nPercent; } } - - /* If pLvl is already the input level to an ongoing merge, look no - ** further for a merge candidate. The caller should be allowed to - ** continue merging from pLvl first. */ - if( pLvl->nMerge ) break; } } return iRet; @@ -254999,7 +254171,7 @@ static int fts5structConnectMethod( /* ** We must have a single struct=? constraint that will be passed through -** into the xFilter method. If there is no valid struct=? constraint, +** into the xFilter method. If there is no valid stmt=? constraint, ** then return an SQLITE_CONSTRAINT error. */ static int fts5structBestIndexMethod( @@ -255341,17 +254513,8 @@ struct Fts5Global { Fts5TokenizerModule *pTok; /* First in list of all tokenizer modules */ Fts5TokenizerModule *pDfltTok; /* Default tokenizer module */ Fts5Cursor *pCsr; /* First in list of all open cursors */ - u32 aLocaleHdr[4]; }; -/* -** Size of header on fts5_locale() values. And macro to access a buffer -** containing a copy of the header from an Fts5Config pointer. -*/ -#define FTS5_LOCALE_HDR_SIZE ((int)sizeof( ((Fts5Global*)0)->aLocaleHdr )) -#define FTS5_LOCALE_HDR(pConfig) ((const u8*)(pConfig->pGlobal->aLocaleHdr)) - - /* ** Each auxiliary function registered with the FTS5 module is represented ** by an object of the following type. All such objects are stored as part @@ -255514,6 +254677,12 @@ struct Fts5Cursor { #define BitFlagAllTest(x,y) (((x) & (y))==(y)) #define BitFlagTest(x,y) (((x) & (y))!=0) +/* +** The subtype value and header bytes used by fts5_locale(). +*/ +#define FTS5_LOCALE_SUBTYPE ((unsigned int)'L') +#define FTS5_LOCALE_HEADER "\x00\xE0\xB2\xEB" + /* ** Macros to Set(), Clear() and Test() cursor flags. @@ -255590,16 +254759,10 @@ static void fts5CheckTransactionState(Fts5FullTable *p, int op, int iSavepoint){ #endif /* -** Return true if pTab is a contentless table. If parameter bIncludeUnindexed -** is true, this includes contentless tables that store UNINDEXED columns -** only. +** Return true if pTab is a contentless table. */ -static int fts5IsContentless(Fts5FullTable *pTab, int bIncludeUnindexed){ - int eContent = pTab->p.pConfig->eContent; - return ( - eContent==FTS5_CONTENT_NONE - || (bIncludeUnindexed && eContent==FTS5_CONTENT_UNINDEXED) - ); +static int fts5IsContentless(Fts5FullTable *pTab){ + return pTab->p.pConfig->eContent==FTS5_CONTENT_NONE; } /* @@ -255890,7 +255053,6 @@ static int fts5BestIndexMethod(sqlite3_vtab *pVTab, sqlite3_index_info *pInfo){ if( p->usable==0 || iCol<0 ){ /* As there exists an unusable MATCH constraint this is an ** unusable plan. Return SQLITE_CONSTRAINT. */ - idxStr[iIdxStr] = 0; return SQLITE_CONSTRAINT; }else{ if( iCol==nCol+1 ){ @@ -256524,7 +255686,7 @@ static void fts5SetVtabError(Fts5FullTable *p, const char *zFormat, ...){ ** valid until after the final call to sqlite3Fts5Tokenize() that will use ** the locale. */ -static void sqlite3Fts5SetLocale( +static void fts5SetLocale( Fts5Config *pConfig, const char *zLocale, int nLocale @@ -256535,74 +255697,127 @@ static void sqlite3Fts5SetLocale( } /* -** Clear any locale configured by an earlier call to sqlite3Fts5SetLocale(). +** Clear any locale configured by an earlier call to fts5SetLocale() or +** sqlite3Fts5ExtractText(). */ static void sqlite3Fts5ClearLocale(Fts5Config *pConfig){ - sqlite3Fts5SetLocale(pConfig, 0, 0); + fts5SetLocale(pConfig, 0, 0); } /* -** Return true if the value passed as the only argument is an -** fts5_locale() value. +** This function is used to extract utf-8 text from an sqlite3_value. This +** is usually done in order to tokenize it. For example, when: +** +** * a value is written to an fts5 table, +** * a value is deleted from an FTS5_CONTENT_NORMAL table, +** * a value containing a query expression is passed to xFilter() +** +** and so on. +** +** This function handles 2 cases: +** +** 1) Ordinary values. The text can be extracted from these using +** sqlite3_value_text(). +** +** 2) Combination text/locale blobs created by fts5_locale(). There +** are several cases for these: +** +** * Blobs tagged with FTS5_LOCALE_SUBTYPE. +** * Blobs read from the content table of a locale=1 external-content +** table, and +** * Blobs read from the content table of a locale=1 regular +** content table. +** +** The first two cases above should have the 4 byte FTS5_LOCALE_HEADER +** header. It is an error if a blob with the subtype or a blob read +** from the content table of an external content table does not have +** the required header. A blob read from the content table of a regular +** locale=1 table does not have the header. This is to save space. +** +** If successful, SQLITE_OK is returned and output parameters (*ppText) +** and (*pnText) are set to point to a buffer containing the extracted utf-8 +** text and its length in bytes, respectively. The buffer is not +** nul-terminated. It has the same lifetime as the sqlite3_value object +** from which it is extracted. +** +** Parameter bContent must be true if the value was read from an indexed +** column (i.e. not UNINDEXED) of the on disk content. +** +** If pbResetTokenizer is not NULL and if case (2) is used, then +** fts5SetLocale() is called to ensure subsequent sqlite3Fts5Tokenize() calls +** use the locale. In this case (*pbResetTokenizer) is set to true before +** returning, to indicate that the caller must call sqlite3Fts5ClearLocale() +** to clear the locale after tokenizing the text. */ -static int sqlite3Fts5IsLocaleValue(Fts5Config *pConfig, sqlite3_value *pVal){ - int ret = 0; +static int sqlite3Fts5ExtractText( + Fts5Config *pConfig, + sqlite3_value *pVal, /* Value to extract text from */ + int bContent, /* True if indexed table content */ + int *pbResetTokenizer, /* OUT: True if xSetLocale(NULL) required */ + const char **ppText, /* OUT: Pointer to text buffer */ + int *pnText /* OUT: Size of (*ppText) in bytes */ +){ + const char *pText = 0; + int nText = 0; + int rc = SQLITE_OK; + int bDecodeBlob = 0; + + assert( pbResetTokenizer==0 || *pbResetTokenizer==0 ); + assert( bContent==0 || pConfig->eContent!=FTS5_CONTENT_NONE ); + assert( bContent==0 || sqlite3_value_subtype(pVal)==0 ); + if( sqlite3_value_type(pVal)==SQLITE_BLOB ){ - /* Call sqlite3_value_bytes() after sqlite3_value_blob() in this case. - ** If the blob was created using zeroblob(), then sqlite3_value_blob() - ** may call malloc(). If this malloc() fails, then the values returned - ** by both value_blob() and value_bytes() will be 0. If value_bytes() were - ** called first, then the NULL pointer returned by value_blob() might - ** be dereferenced. */ - const u8 *pBlob = sqlite3_value_blob(pVal); - int nBlob = sqlite3_value_bytes(pVal); - if( nBlob>FTS5_LOCALE_HDR_SIZE - && 0==memcmp(pBlob, FTS5_LOCALE_HDR(pConfig), FTS5_LOCALE_HDR_SIZE) + if( sqlite3_value_subtype(pVal)==FTS5_LOCALE_SUBTYPE + || (bContent && pConfig->bLocale) ){ - ret = 1; + bDecodeBlob = 1; } } - return ret; -} -/* -** Value pVal is guaranteed to be an fts5_locale() value, according to -** sqlite3Fts5IsLocaleValue(). This function extracts the text and locale -** from the value and returns them separately. -** -** If successful, SQLITE_OK is returned and (*ppText) and (*ppLoc) set -** to point to buffers containing the text and locale, as utf-8, -** respectively. In this case output parameters (*pnText) and (*pnLoc) are -** set to the sizes in bytes of these two buffers. -** -** Or, if an error occurs, then an SQLite error code is returned. The final -** value of the four output parameters is undefined in this case. -*/ -static int sqlite3Fts5DecodeLocaleValue( - sqlite3_value *pVal, - const char **ppText, - int *pnText, - const char **ppLoc, - int *pnLoc -){ - const char *p = sqlite3_value_blob(pVal); - int n = sqlite3_value_bytes(pVal); - int nLoc = 0; + if( bDecodeBlob ){ + const int SZHDR = sizeof(FTS5_LOCALE_HEADER)-1; + const u8 *pBlob = sqlite3_value_blob(pVal); + int nBlob = sqlite3_value_bytes(pVal); + + /* Unless this blob was read from the %_content table of an + ** FTS5_CONTENT_NORMAL table, it should have the 4 byte fts5_locale() + ** header. Check for this. If it is not found, return an error. */ + if( (!bContent || pConfig->eContent!=FTS5_CONTENT_NORMAL) ){ + if( nBlobFTS5_LOCALE_HDR_SIZE ); + if( rc==SQLITE_OK ){ + int nLocale = 0; - for(nLoc=FTS5_LOCALE_HDR_SIZE; p[nLoc]; nLoc++){ - if( nLoc==(n-1) ){ - return SQLITE_MISMATCH; + for(nLocale=0; nLocaleeContent==FTS5_CONTENT_NONE ){ fts5SetVtabError(pTab, "'rebuild' may not be used with a contentless fts5 table" ); @@ -257060,7 +256277,7 @@ static void fts5StorageInsert( ){ int rc = *pRc; if( rc==SQLITE_OK ){ - rc = sqlite3Fts5StorageContentInsert(pTab->pStorage, 0, apVal, piRowid); + rc = sqlite3Fts5StorageContentInsert(pTab->pStorage, apVal, piRowid); } if( rc==SQLITE_OK ){ rc = sqlite3Fts5StorageIndexInsert(pTab->pStorage, apVal, *piRowid); @@ -257068,67 +256285,6 @@ static void fts5StorageInsert( *pRc = rc; } -/* -** -** This function is called when the user attempts an UPDATE on a contentless -** table. Parameter bRowidModified is true if the UPDATE statement modifies -** the rowid value. Parameter apVal[] contains the new values for each user -** defined column of the fts5 table. pConfig is the configuration object of the -** table being updated (guaranteed to be contentless). The contentless_delete=1 -** and contentless_unindexed=1 options may or may not be set. -** -** This function returns SQLITE_OK if the UPDATE can go ahead, or an SQLite -** error code if it cannot. In this case an error message is also loaded into -** pConfig. Output parameter (*pbContent) is set to true if the caller should -** update the %_content table only - not the FTS index or any other shadow -** table. This occurs when an UPDATE modifies only UNINDEXED columns of the -** table. -** -** An UPDATE may proceed if: -** -** * The only columns modified are UNINDEXED columns, or -** -** * The contentless_delete=1 option was specified and all of the indexed -** columns (not a subset) have been modified. -*/ -static int fts5ContentlessUpdate( - Fts5Config *pConfig, - sqlite3_value **apVal, - int bRowidModified, - int *pbContent -){ - int ii; - int bSeenIndex = 0; /* Have seen modified indexed column */ - int bSeenIndexNC = 0; /* Have seen unmodified indexed column */ - int rc = SQLITE_OK; - - for(ii=0; iinCol; ii++){ - if( pConfig->abUnindexed[ii]==0 ){ - if( sqlite3_value_nochange(apVal[ii]) ){ - bSeenIndexNC++; - }else{ - bSeenIndex++; - } - } - } - - if( bSeenIndex==0 && bRowidModified==0 ){ - *pbContent = 1; - }else{ - if( bSeenIndexNC || pConfig->bContentlessDelete==0 ){ - rc = SQLITE_ERROR; - sqlite3Fts5ConfigErrmsg(pConfig, - (pConfig->bContentlessDelete ? - "%s a subset of columns on fts5 contentless-delete table: %s" : - "%s contentless fts5 table: %s") - , "cannot UPDATE", pConfig->zName - ); - } - } - - return rc; -} - /* ** This function is the implementation of the xUpdate callback used by ** FTS3 virtual tables. It is invoked by SQLite each time a row is to be @@ -257215,34 +256371,44 @@ static int fts5UpdateMethod( assert( eType0==SQLITE_INTEGER || eType0==SQLITE_NULL ); assert( nArg!=1 || eType0==SQLITE_INTEGER ); + /* Filter out attempts to run UPDATE or DELETE on contentless tables. + ** This is not suported. Except - they are both supported if the CREATE + ** VIRTUAL TABLE statement contained "contentless_delete=1". */ + if( eType0==SQLITE_INTEGER + && pConfig->eContent==FTS5_CONTENT_NONE + && pConfig->bContentlessDelete==0 + ){ + pTab->p.base.zErrMsg = sqlite3_mprintf( + "cannot %s contentless fts5 table: %s", + (nArg>1 ? "UPDATE" : "DELETE from"), pConfig->zName + ); + rc = SQLITE_ERROR; + } + /* DELETE */ - if( nArg==1 ){ - /* It is only possible to DELETE from a contentless table if the - ** contentless_delete=1 flag is set. */ - if( fts5IsContentless(pTab, 1) && pConfig->bContentlessDelete==0 ){ - fts5SetVtabError(pTab, - "cannot DELETE from contentless fts5 table: %s", pConfig->zName - ); - rc = SQLITE_ERROR; - }else{ - i64 iDel = sqlite3_value_int64(apVal[0]); /* Rowid to delete */ - rc = sqlite3Fts5StorageDelete(pTab->pStorage, iDel, 0, 0); - bUpdateOrDelete = 1; - } + else if( nArg==1 ){ + i64 iDel = sqlite3_value_int64(apVal[0]); /* Rowid to delete */ + rc = sqlite3Fts5StorageDelete(pTab->pStorage, iDel, 0, 0); + bUpdateOrDelete = 1; } /* INSERT or UPDATE */ else{ int eType1 = sqlite3_value_numeric_type(apVal[1]); - /* It is an error to write an fts5_locale() value to a table without - ** the locale=1 option. */ - if( pConfig->bLocale==0 ){ - int ii; - for(ii=0; iinCol; ii++){ - sqlite3_value *pVal = apVal[ii+2]; - if( sqlite3Fts5IsLocaleValue(pConfig, pVal) ){ - fts5SetVtabError(pTab, "fts5_locale() requires locale=1"); + /* Ensure that no fts5_locale() values are written to locale=0 tables. + ** And that no blobs except fts5_locale() blobs are written to indexed + ** (i.e. not UNINDEXED) columns of locale=1 tables. */ + int ii; + for(ii=0; iinCol; ii++){ + if( sqlite3_value_type(apVal[ii+2])==SQLITE_BLOB ){ + int bSub = (sqlite3_value_subtype(apVal[ii+2])==FTS5_LOCALE_SUBTYPE); + if( (pConfig->bLocale && !bSub && pConfig->abUnindexed[ii]==0) + || (pConfig->bLocale==0 && bSub) + ){ + if( pConfig->bLocale==0 ){ + fts5SetVtabError(pTab, "fts5_locale() requires locale=1"); + } rc = SQLITE_MISMATCH; goto update_out; } @@ -257262,55 +256428,35 @@ static int fts5UpdateMethod( /* UPDATE */ else{ - Fts5Storage *pStorage = pTab->pStorage; i64 iOld = sqlite3_value_int64(apVal[0]); /* Old rowid */ i64 iNew = sqlite3_value_int64(apVal[1]); /* New rowid */ - int bContent = 0; /* Content only update */ - - /* If this is a contentless table (including contentless_unindexed=1 - ** tables), check if the UPDATE may proceed. */ - if( fts5IsContentless(pTab, 1) ){ - rc = fts5ContentlessUpdate(pConfig, &apVal[2], iOld!=iNew, &bContent); - if( rc!=SQLITE_OK ) goto update_out; - } - if( eType1!=SQLITE_INTEGER ){ rc = SQLITE_MISMATCH; }else if( iOld!=iNew ){ - assert( bContent==0 ); if( eConflict==SQLITE_REPLACE ){ - rc = sqlite3Fts5StorageDelete(pStorage, iOld, 0, 1); + rc = sqlite3Fts5StorageDelete(pTab->pStorage, iOld, 0, 1); if( rc==SQLITE_OK ){ - rc = sqlite3Fts5StorageDelete(pStorage, iNew, 0, 0); + rc = sqlite3Fts5StorageDelete(pTab->pStorage, iNew, 0, 0); } fts5StorageInsert(&rc, pTab, apVal, pRowid); }else{ - rc = sqlite3Fts5StorageFindDeleteRow(pStorage, iOld); + rc = sqlite3Fts5StorageFindDeleteRow(pTab->pStorage, iOld); if( rc==SQLITE_OK ){ - rc = sqlite3Fts5StorageContentInsert(pStorage, 0, apVal, pRowid); + rc = sqlite3Fts5StorageContentInsert(pTab->pStorage,apVal,pRowid); } if( rc==SQLITE_OK ){ - rc = sqlite3Fts5StorageDelete(pStorage, iOld, 0, 0); + rc = sqlite3Fts5StorageDelete(pTab->pStorage, iOld, 0, 1); } if( rc==SQLITE_OK ){ - rc = sqlite3Fts5StorageIndexInsert(pStorage, apVal, *pRowid); + rc = sqlite3Fts5StorageIndexInsert(pTab->pStorage, apVal,*pRowid); } } - }else if( bContent ){ - /* This occurs when an UPDATE on a contentless table affects *only* - ** UNINDEXED columns. This is a no-op for contentless_unindexed=0 - ** tables, or a write to the %_content table only for =1 tables. */ - assert( fts5IsContentless(pTab, 1) ); - rc = sqlite3Fts5StorageFindDeleteRow(pStorage, iOld); - if( rc==SQLITE_OK ){ - rc = sqlite3Fts5StorageContentInsert(pStorage, 1, apVal, pRowid); - } }else{ - rc = sqlite3Fts5StorageDelete(pStorage, iOld, 0, 1); + rc = sqlite3Fts5StorageDelete(pTab->pStorage, iOld, 0, 1); fts5StorageInsert(&rc, pTab, apVal, pRowid); } bUpdateOrDelete = 1; - sqlite3Fts5StorageReleaseDeleteRow(pStorage); + sqlite3Fts5StorageReleaseDeleteRow(pTab->pStorage); } } @@ -257424,11 +256570,11 @@ static int fts5ApiTokenize_v2( Fts5Table *pTab = (Fts5Table*)(pCsr->base.pVtab); int rc = SQLITE_OK; - sqlite3Fts5SetLocale(pTab->pConfig, pLoc, nLoc); + fts5SetLocale(pTab->pConfig, pLoc, nLoc); rc = sqlite3Fts5Tokenize(pTab->pConfig, FTS5_TOKENIZE_AUX, pText, nText, pUserData, xToken ); - sqlite3Fts5SetLocale(pTab->pConfig, 0, 0); + fts5SetLocale(pTab->pConfig, 0, 0); return rc; } @@ -257456,49 +256602,6 @@ static int fts5ApiPhraseSize(Fts5Context *pCtx, int iPhrase){ return sqlite3Fts5ExprPhraseSize(pCsr->pExpr, iPhrase); } -/* -** Argument pStmt is an SQL statement of the type used by Fts5Cursor. This -** function extracts the text value of column iCol of the current row. -** Additionally, if there is an associated locale, it invokes -** sqlite3Fts5SetLocale() to configure the tokenizer. In all cases the caller -** should invoke sqlite3Fts5ClearLocale() to clear the locale at some point -** after this function returns. -** -** If successful, (*ppText) is set to point to a buffer containing the text -** value as utf-8 and SQLITE_OK returned. (*pnText) is set to the size of that -** buffer in bytes. It is not guaranteed to be nul-terminated. If an error -** occurs, an SQLite error code is returned. The final values of the two -** output parameters are undefined in this case. -*/ -static int fts5TextFromStmt( - Fts5Config *pConfig, - sqlite3_stmt *pStmt, - int iCol, - const char **ppText, - int *pnText -){ - sqlite3_value *pVal = sqlite3_column_value(pStmt, iCol+1); - const char *pLoc = 0; - int nLoc = 0; - int rc = SQLITE_OK; - - if( pConfig->bLocale - && pConfig->eContent==FTS5_CONTENT_EXTERNAL - && sqlite3Fts5IsLocaleValue(pConfig, pVal) - ){ - rc = sqlite3Fts5DecodeLocaleValue(pVal, ppText, pnText, &pLoc, &nLoc); - }else{ - *ppText = (const char*)sqlite3_value_text(pVal); - *pnText = sqlite3_value_bytes(pVal); - if( pConfig->bLocale && pConfig->eContent==FTS5_CONTENT_NORMAL ){ - pLoc = (const char*)sqlite3_column_text(pStmt, iCol+1+pConfig->nCol); - nLoc = sqlite3_column_bytes(pStmt, iCol+1+pConfig->nCol); - } - } - sqlite3Fts5SetLocale(pConfig, pLoc, nLoc); - return rc; -} - static int fts5ApiColumnText( Fts5Context *pCtx, int iCol, @@ -257512,14 +256615,16 @@ static int fts5ApiColumnText( assert( pCsr->ePlan!=FTS5_PLAN_SPECIAL ); if( iCol<0 || iCol>=pTab->pConfig->nCol ){ rc = SQLITE_RANGE; - }else if( fts5IsContentless((Fts5FullTable*)(pCsr->base.pVtab), 0) ){ + }else if( fts5IsContentless((Fts5FullTable*)(pCsr->base.pVtab)) ){ *pz = 0; *pn = 0; }else{ rc = fts5SeekCursor(pCsr, 0); if( rc==SQLITE_OK ){ - rc = fts5TextFromStmt(pTab->pConfig, pCsr->pStmt, iCol, pz, pn); - sqlite3Fts5ClearLocale(pTab->pConfig); + Fts5Config *pConfig = pTab->pConfig; + int bContent = (pConfig->abUnindexed[iCol]==0); + sqlite3_value *pVal = sqlite3_column_value(pCsr->pStmt, iCol+1); + sqlite3Fts5ExtractText(pConfig, pVal, bContent, 0, pz, pn); } } return rc; @@ -257545,7 +256650,7 @@ static int fts5CsrPoslist( if( iPhrase<0 || iPhrase>=sqlite3Fts5ExprPhraseCount(pCsr->pExpr) ){ rc = SQLITE_RANGE; }else if( pConfig->eDetail!=FTS5_DETAIL_FULL - && fts5IsContentless((Fts5FullTable*)pCsr->base.pVtab, 1) + && pConfig->eContent==FTS5_CONTENT_NONE ){ *pa = 0; *pn = 0; @@ -257561,15 +256666,17 @@ static int fts5CsrPoslist( rc = fts5SeekCursor(pCsr, 0); } for(i=0; inCol && rc==SQLITE_OK; i++){ + sqlite3_value *pVal = sqlite3_column_value(pCsr->pStmt, i+1); const char *z = 0; int n = 0; - rc = fts5TextFromStmt(pConfig, pCsr->pStmt, i, &z, &n); + int bReset = 0; + rc = sqlite3Fts5ExtractText(pConfig, pVal, 1, &bReset, &z, &n); if( rc==SQLITE_OK ){ rc = sqlite3Fts5ExprPopulatePoslists( pConfig, pCsr->pExpr, aPopulator, i, z, n ); } - sqlite3Fts5ClearLocale(pConfig); + if( bReset ) sqlite3Fts5ClearLocale(pConfig); } sqlite3_free(aPopulator); @@ -257741,7 +256848,7 @@ static int fts5ApiColumnSize(Fts5Context *pCtx, int iCol, int *pnToken){ if( pConfig->bColumnsize ){ i64 iRowid = fts5CursorRowid(pCsr); rc = sqlite3Fts5StorageDocsize(pTab->pStorage, iRowid, pCsr->aColumnSize); - }else if( !pConfig->zContent || pConfig->eContent==FTS5_CONTENT_UNINDEXED ){ + }else if( pConfig->zContent==0 ){ int i; for(i=0; inCol; i++){ if( pConfig->abUnindexed[i]==0 ){ @@ -257755,14 +256862,17 @@ static int fts5ApiColumnSize(Fts5Context *pCtx, int iCol, int *pnToken){ if( pConfig->abUnindexed[i]==0 ){ const char *z = 0; int n = 0; + int bReset = 0; + sqlite3_value *pVal = sqlite3_column_value(pCsr->pStmt, i+1); + pCsr->aColumnSize[i] = 0; - rc = fts5TextFromStmt(pConfig, pCsr->pStmt, i, &z, &n); + rc = sqlite3Fts5ExtractText(pConfig, pVal, 1, &bReset, &z, &n); if( rc==SQLITE_OK ){ rc = sqlite3Fts5Tokenize(pConfig, FTS5_TOKENIZE_AUX, z, n, (void*)&pCsr->aColumnSize[i], fts5ColumnSizeCb ); + if( bReset ) sqlite3Fts5ClearLocale(pConfig); } - sqlite3Fts5ClearLocale(pConfig); } } } @@ -258029,19 +257139,42 @@ static int fts5ApiColumnLocale( rc = SQLITE_RANGE; }else if( pConfig->abUnindexed[iCol]==0 - && 0==fts5IsContentless((Fts5FullTable*)pCsr->base.pVtab, 1) + && pConfig->eContent!=FTS5_CONTENT_NONE && pConfig->bLocale ){ rc = fts5SeekCursor(pCsr, 0); if( rc==SQLITE_OK ){ - const char *zDummy = 0; - int nDummy = 0; - rc = fts5TextFromStmt(pConfig, pCsr->pStmt, iCol, &zDummy, &nDummy); - if( rc==SQLITE_OK ){ - *pzLocale = pConfig->t.pLocale; - *pnLocale = pConfig->t.nLocale; + /* Load the value into pVal. pVal is a locale/text pair iff: + ** + ** 1) It is an SQLITE_BLOB, and + ** 2) Either the subtype is FTS5_LOCALE_SUBTYPE, or else the + ** value was loaded from an FTS5_CONTENT_NORMAL table, and + ** 3) It does not begin with an 0x00 byte. + */ + sqlite3_value *pVal = sqlite3_column_value(pCsr->pStmt, iCol+1); + if( sqlite3_value_type(pVal)==SQLITE_BLOB ){ + const u8 *pBlob = (const u8*)sqlite3_value_blob(pVal); + int nBlob = sqlite3_value_bytes(pVal); + if( pConfig->eContent==FTS5_CONTENT_EXTERNAL ){ + const int SZHDR = sizeof(FTS5_LOCALE_HEADER)-1; + if( nBlobeContent!=FTS5_CONTENT_NONE ); + + if( pConfig->bLocale + && sqlite3_value_type(pVal)==SQLITE_BLOB + && pConfig->abUnindexed[iCol]==0 + ){ + const int SZHDR = sizeof(FTS5_LOCALE_HEADER)-1; + const u8 *pBlob = sqlite3_value_blob(pVal); + int nBlob = sqlite3_value_bytes(pVal); + int ii; + + if( pConfig->eContent==FTS5_CONTENT_EXTERNAL ){ + if( nBlobeContent!=FTS5_CONTENT_NONE ){ + /* A column created by the user containing values. */ + int bNochange = sqlite3_vtab_nochange(pCtx); + + if( fts5IsContentless(pTab) ){ + if( bNochange && pConfig->bContentlessDelete ){ + fts5ResultError(pCtx, "cannot UPDATE a subset of " + "columns on fts5 contentless-delete table: %s", pConfig->zName + ); + } + }else if( bNochange==0 || pConfig->eContent!=FTS5_CONTENT_NORMAL ){ pConfig->pzErrmsg = &pTab->p.base.zErrMsg; rc = fts5SeekCursor(pCsr, 1); if( rc==SQLITE_OK ){ sqlite3_value *pVal = sqlite3_column_value(pCsr->pStmt, iCol+1); - if( pConfig->bLocale - && pConfig->eContent==FTS5_CONTENT_EXTERNAL - && sqlite3Fts5IsLocaleValue(pConfig, pVal) - ){ - const char *z = 0; - int n = 0; - rc = fts5TextFromStmt(pConfig, pCsr->pStmt, iCol, &z, &n); - if( rc==SQLITE_OK ){ - sqlite3_result_text(pCtx, z, n, SQLITE_TRANSIENT); - } - sqlite3Fts5ClearLocale(pConfig); - }else{ - sqlite3_result_value(pCtx, pVal); - } + fts5ExtractValueFromColumn(pCtx, pConfig, iCol, pVal); } - pConfig->pzErrmsg = 0; } } @@ -258847,7 +258026,7 @@ static void fts5SourceIdFunc( ){ assert( nArg==0 ); UNUSED_PARAM2(nArg, apUnused); - sqlite3_result_text(pCtx, "fts5: 2024-11-15 19:25:39 ed829bf2b069a48c644ae5706399dad7486e5abb87dc1225764038ac258ea4dc", -1, SQLITE_TRANSIENT); + sqlite3_result_text(pCtx, "fts5: 2024-12-20 19:37:41 b40cd7395c44b1f2d019d8e809e03de0e083c93693322a72ddb250a85640528f", -1, SQLITE_TRANSIENT); } /* @@ -258886,12 +258065,13 @@ static void fts5LocaleFunc( if( zLocale==0 || zLocale[0]=='\0' ){ sqlite3_result_text(pCtx, zText, nText, SQLITE_TRANSIENT); }else{ - Fts5Global *p = (Fts5Global*)sqlite3_user_data(pCtx); u8 *pBlob = 0; u8 *pCsr = 0; int nBlob = 0; + const int nHdr = 4; + assert( sizeof(FTS5_LOCALE_HEADER)==nHdr+1 ); - nBlob = FTS5_LOCALE_HDR_SIZE + nLocale + 1 + nText; + nBlob = nHdr + nLocale + 1 + nText; pBlob = (u8*)sqlite3_malloc(nBlob); if( pBlob==0 ){ sqlite3_result_error_nomem(pCtx); @@ -258899,8 +258079,8 @@ static void fts5LocaleFunc( } pCsr = pBlob; - memcpy(pCsr, (const u8*)p->aLocaleHdr, FTS5_LOCALE_HDR_SIZE); - pCsr += FTS5_LOCALE_HDR_SIZE; + memcpy(pCsr, FTS5_LOCALE_HEADER, nHdr); + pCsr += nHdr; memcpy(pCsr, zLocale, nLocale); pCsr += nLocale; (*pCsr++) = 0x00; @@ -258908,6 +258088,7 @@ static void fts5LocaleFunc( assert( &pCsr[nText]==&pBlob[nBlob] ); sqlite3_result_blob(pCtx, pBlob, nBlob, sqlite3_free); + sqlite3_result_subtype(pCtx, FTS5_LOCALE_SUBTYPE); } } @@ -259009,16 +258190,6 @@ static int fts5Init(sqlite3 *db){ pGlobal->api.xFindTokenizer = fts5FindTokenizer; pGlobal->api.xCreateTokenizer_v2 = fts5CreateTokenizer_v2; pGlobal->api.xFindTokenizer_v2 = fts5FindTokenizer_v2; - - /* Initialize pGlobal->aLocaleHdr[] to a 128-bit pseudo-random vector. - ** The constants below were generated randomly. */ - sqlite3_randomness(sizeof(pGlobal->aLocaleHdr), pGlobal->aLocaleHdr); - pGlobal->aLocaleHdr[0] ^= 0xF924976D; - pGlobal->aLocaleHdr[1] ^= 0x16596E13; - pGlobal->aLocaleHdr[2] ^= 0x7C80BEAA; - pGlobal->aLocaleHdr[3] ^= 0x9B03A67F; - assert( sizeof(pGlobal->aLocaleHdr)==16 ); - rc = sqlite3_create_module_v2(db, "fts5", &fts5Mod, p, fts5ModuleDestroy); if( rc==SQLITE_OK ) rc = sqlite3Fts5IndexInit(db); if( rc==SQLITE_OK ) rc = sqlite3Fts5ExprInit(pGlobal, db); @@ -259245,35 +258416,20 @@ static int fts5StorageGetStmt( case FTS5_STMT_INSERT_CONTENT: case FTS5_STMT_REPLACE_CONTENT: { - char *zBind = 0; + int nCol = pC->nCol + 1; + char *zBind; int i; - assert( pC->eContent==FTS5_CONTENT_NORMAL - || pC->eContent==FTS5_CONTENT_UNINDEXED - ); - - /* Add bindings for the "c*" columns - those that store the actual - ** table content. If eContent==NORMAL, then there is one binding - ** for each column. Or, if eContent==UNINDEXED, then there are only - ** bindings for the UNINDEXED columns. */ - for(i=0; rc==SQLITE_OK && i<(pC->nCol+1); i++){ - if( !i || pC->eContent==FTS5_CONTENT_NORMAL || pC->abUnindexed[i-1] ){ - zBind = sqlite3Fts5Mprintf(&rc, "%z%s?%d", zBind, zBind?",":"",i+1); - } - } - - /* Add bindings for any "l*" columns. Only non-UNINDEXED columns - ** require these. */ - if( pC->bLocale && pC->eContent==FTS5_CONTENT_NORMAL ){ - for(i=0; rc==SQLITE_OK && inCol; i++){ - if( pC->abUnindexed[i]==0 ){ - zBind = sqlite3Fts5Mprintf(&rc, "%z,?%d", zBind, pC->nCol+i+2); - } + zBind = sqlite3_malloc64(1 + nCol*2); + if( zBind ){ + for(i=0; izDb, pC->zName, zBind); + sqlite3_free(zBind); } - - zSql = sqlite3Fts5Mprintf(&rc, azStmt[eStmt], pC->zDb, pC->zName,zBind); - sqlite3_free(zBind); break; } @@ -259459,11 +258615,9 @@ static int sqlite3Fts5StorageOpen( p->pIndex = pIndex; if( bCreate ){ - if( pConfig->eContent==FTS5_CONTENT_NORMAL - || pConfig->eContent==FTS5_CONTENT_UNINDEXED - ){ + if( pConfig->eContent==FTS5_CONTENT_NORMAL ){ int nDefn = 32 + pConfig->nCol*10; - char *zDefn = sqlite3_malloc64(32 + (sqlite3_int64)pConfig->nCol * 20); + char *zDefn = sqlite3_malloc64(32 + (sqlite3_int64)pConfig->nCol * 10); if( zDefn==0 ){ rc = SQLITE_NOMEM; }else{ @@ -259472,20 +258626,8 @@ static int sqlite3Fts5StorageOpen( sqlite3_snprintf(nDefn, zDefn, "id INTEGER PRIMARY KEY"); iOff = (int)strlen(zDefn); for(i=0; inCol; i++){ - if( pConfig->eContent==FTS5_CONTENT_NORMAL - || pConfig->abUnindexed[i] - ){ - sqlite3_snprintf(nDefn-iOff, &zDefn[iOff], ", c%d", i); - iOff += (int)strlen(&zDefn[iOff]); - } - } - if( pConfig->bLocale ){ - for(i=0; inCol; i++){ - if( pConfig->abUnindexed[i]==0 ){ - sqlite3_snprintf(nDefn-iOff, &zDefn[iOff], ", l%d", i); - iOff += (int)strlen(&zDefn[iOff]); - } - } + sqlite3_snprintf(nDefn-iOff, &zDefn[iOff], ", c%d", i); + iOff += (int)strlen(&zDefn[iOff]); } rc = sqlite3Fts5CreateTable(pConfig, "content", zDefn, 0, pzErr); } @@ -259638,8 +258780,7 @@ static int fts5StorageDeleteFromIndex( sqlite3_value *pVal = 0; const char *pText = 0; int nText = 0; - const char *pLoc = 0; - int nLoc = 0; + int bReset = 0; assert( pSeek==0 || apVal==0 ); assert( pSeek!=0 || apVal!=0 ); @@ -259649,19 +258790,10 @@ static int fts5StorageDeleteFromIndex( pVal = apVal[iCol-1]; } - if( pConfig->bLocale && sqlite3Fts5IsLocaleValue(pConfig, pVal) ){ - rc = sqlite3Fts5DecodeLocaleValue(pVal, &pText, &nText, &pLoc, &nLoc); - }else{ - pText = (const char*)sqlite3_value_text(pVal); - nText = sqlite3_value_bytes(pVal); - if( pConfig->bLocale && pSeek ){ - pLoc = (const char*)sqlite3_column_text(pSeek, iCol + pConfig->nCol); - nLoc = sqlite3_column_bytes(pSeek, iCol + pConfig->nCol); - } - } - + rc = sqlite3Fts5ExtractText( + pConfig, pVal, pSeek!=0, &bReset, &pText, &nText + ); if( rc==SQLITE_OK ){ - sqlite3Fts5SetLocale(pConfig, pLoc, nLoc); ctx.szCol = 0; rc = sqlite3Fts5Tokenize(pConfig, FTS5_TOKENIZE_DOCUMENT, pText, nText, (void*)&ctx, fts5StorageInsertCallback @@ -259670,7 +258802,7 @@ static int fts5StorageDeleteFromIndex( if( rc==SQLITE_OK && p->aTotalSize[iCol-1]<0 ){ rc = FTS5_CORRUPT; } - sqlite3Fts5ClearLocale(pConfig); + if( bReset ) sqlite3Fts5ClearLocale(pConfig); } } } @@ -259715,9 +258847,7 @@ static int fts5StorageContentlessDelete(Fts5Storage *p, i64 iDel){ int rc = SQLITE_OK; assert( p->pConfig->bContentlessDelete ); - assert( p->pConfig->eContent==FTS5_CONTENT_NONE - || p->pConfig->eContent==FTS5_CONTENT_UNINDEXED - ); + assert( p->pConfig->eContent==FTS5_CONTENT_NONE ); /* Look up the origin of the document in the %_docsize table. Store ** this in stack variable iOrigin. */ @@ -259841,12 +258971,6 @@ static int sqlite3Fts5StorageDelete( if( rc==SQLITE_OK ){ if( p->pConfig->bContentlessDelete ){ rc = fts5StorageContentlessDelete(p, iDel); - if( rc==SQLITE_OK - && bSaveRow - && p->pConfig->eContent==FTS5_CONTENT_UNINDEXED - ){ - rc = sqlite3Fts5StorageFindDeleteRow(p, iDel); - } }else{ rc = fts5StorageDeleteFromIndex(p, iDel, apVal, bSaveRow); } @@ -259863,9 +258987,7 @@ static int sqlite3Fts5StorageDelete( } /* Delete the %_content record */ - if( pConfig->eContent==FTS5_CONTENT_NORMAL - || pConfig->eContent==FTS5_CONTENT_UNINDEXED - ){ + if( pConfig->eContent==FTS5_CONTENT_NORMAL ){ if( rc==SQLITE_OK ){ rc = fts5StorageGetStmt(p, FTS5_STMT_DELETE_CONTENT, &pDel, 0); } @@ -259897,13 +259019,8 @@ static int sqlite3Fts5StorageDeleteAll(Fts5Storage *p){ ); if( rc==SQLITE_OK && pConfig->bColumnsize ){ rc = fts5ExecPrintf(pConfig->db, 0, - "DELETE FROM %Q.'%q_docsize';", pConfig->zDb, pConfig->zName - ); - } - - if( rc==SQLITE_OK && pConfig->eContent==FTS5_CONTENT_UNINDEXED ){ - rc = fts5ExecPrintf(pConfig->db, 0, - "DELETE FROM %Q.'%q_content';", pConfig->zDb, pConfig->zName + "DELETE FROM %Q.'%q_docsize';", + pConfig->zDb, pConfig->zName ); } @@ -259944,35 +259061,20 @@ static int sqlite3Fts5StorageRebuild(Fts5Storage *p){ for(ctx.iCol=0; rc==SQLITE_OK && ctx.iColnCol; ctx.iCol++){ ctx.szCol = 0; if( pConfig->abUnindexed[ctx.iCol]==0 ){ + int bReset = 0; /* True if tokenizer locale must be reset */ int nText = 0; /* Size of pText in bytes */ const char *pText = 0; /* Pointer to buffer containing text value */ - int nLoc = 0; /* Size of pLoc in bytes */ - const char *pLoc = 0; /* Pointer to buffer containing text value */ - sqlite3_value *pVal = sqlite3_column_value(pScan, ctx.iCol+1); - if( pConfig->eContent==FTS5_CONTENT_EXTERNAL - && sqlite3Fts5IsLocaleValue(pConfig, pVal) - ){ - rc = sqlite3Fts5DecodeLocaleValue(pVal, &pText, &nText, &pLoc, &nLoc); - }else{ - pText = (const char*)sqlite3_value_text(pVal); - nText = sqlite3_value_bytes(pVal); - if( pConfig->bLocale ){ - int iCol = ctx.iCol + 1 + pConfig->nCol; - pLoc = (const char*)sqlite3_column_text(pScan, iCol); - nLoc = sqlite3_column_bytes(pScan, iCol); - } - } + rc = sqlite3Fts5ExtractText(pConfig, pVal, 1, &bReset, &pText, &nText); if( rc==SQLITE_OK ){ - sqlite3Fts5SetLocale(pConfig, pLoc, nLoc); rc = sqlite3Fts5Tokenize(pConfig, FTS5_TOKENIZE_DOCUMENT, pText, nText, (void*)&ctx, fts5StorageInsertCallback ); - sqlite3Fts5ClearLocale(pConfig); + if( bReset ) sqlite3Fts5ClearLocale(pConfig); } } sqlite3Fts5BufferAppendVarint(&rc, &buf, ctx.szCol); @@ -260039,7 +259141,6 @@ static int fts5StorageNewRowid(Fts5Storage *p, i64 *piRowid){ */ static int sqlite3Fts5StorageContentInsert( Fts5Storage *p, - int bReplace, /* True to use REPLACE instead of INSERT */ sqlite3_value **apVal, i64 *piRowid ){ @@ -260047,9 +259148,7 @@ static int sqlite3Fts5StorageContentInsert( int rc = SQLITE_OK; /* Insert the new row into the %_content table. */ - if( pConfig->eContent!=FTS5_CONTENT_NORMAL - && pConfig->eContent!=FTS5_CONTENT_UNINDEXED - ){ + if( pConfig->eContent!=FTS5_CONTENT_NORMAL ){ if( sqlite3_value_type(apVal[1])==SQLITE_INTEGER ){ *piRowid = sqlite3_value_int64(apVal[1]); }else{ @@ -260058,52 +259157,33 @@ static int sqlite3Fts5StorageContentInsert( }else{ sqlite3_stmt *pInsert = 0; /* Statement to write %_content table */ int i; /* Counter variable */ - - assert( FTS5_STMT_INSERT_CONTENT+1==FTS5_STMT_REPLACE_CONTENT ); - assert( bReplace==0 || bReplace==1 ); - rc = fts5StorageGetStmt(p, FTS5_STMT_INSERT_CONTENT+bReplace, &pInsert, 0); - if( pInsert ) sqlite3_clear_bindings(pInsert); - - /* Bind the rowid value */ - sqlite3_bind_value(pInsert, 1, apVal[1]); - - /* Loop through values for user-defined columns. i=2 is the leftmost - ** user-defined column. As is column 1 of pSavedRow. */ - for(i=2; rc==SQLITE_OK && i<=pConfig->nCol+1; i++){ - int bUnindexed = pConfig->abUnindexed[i-2]; - if( pConfig->eContent==FTS5_CONTENT_NORMAL || bUnindexed ){ - sqlite3_value *pVal = apVal[i]; - - if( sqlite3_value_nochange(pVal) && p->pSavedRow ){ - /* This is an UPDATE statement, and user-defined column (i-2) was not - ** modified. Retrieve the value from Fts5Storage.pSavedRow. */ - pVal = sqlite3_column_value(p->pSavedRow, i-1); - if( pConfig->bLocale && bUnindexed==0 ){ - sqlite3_bind_value(pInsert, pConfig->nCol + i, - sqlite3_column_value(p->pSavedRow, pConfig->nCol + i - 1) - ); - } - }else if( sqlite3Fts5IsLocaleValue(pConfig, pVal) ){ + rc = fts5StorageGetStmt(p, FTS5_STMT_INSERT_CONTENT, &pInsert, 0); + for(i=1; rc==SQLITE_OK && i<=pConfig->nCol+1; i++){ + sqlite3_value *pVal = apVal[i]; + if( sqlite3_value_nochange(pVal) && p->pSavedRow ){ + /* This is an UPDATE statement, and column (i-2) was not modified. + ** Retrieve the value from Fts5Storage.pSavedRow instead. */ + pVal = sqlite3_column_value(p->pSavedRow, i-1); + }else if( sqlite3_value_subtype(pVal)==FTS5_LOCALE_SUBTYPE ){ + assert( pConfig->bLocale ); + assert( i>1 ); + if( pConfig->abUnindexed[i-2] ){ + /* At attempt to insert an fts5_locale() value into an UNINDEXED + ** column. Strip the locale away and just bind the text. */ const char *pText = 0; - const char *pLoc = 0; int nText = 0; - int nLoc = 0; - assert( pConfig->bLocale ); - - rc = sqlite3Fts5DecodeLocaleValue(pVal, &pText, &nText, &pLoc, &nLoc); - if( rc==SQLITE_OK ){ - sqlite3_bind_text(pInsert, i, pText, nText, SQLITE_TRANSIENT); - if( bUnindexed==0 ){ - int iLoc = pConfig->nCol + i; - sqlite3_bind_text(pInsert, iLoc, pLoc, nLoc, SQLITE_TRANSIENT); - } - } - - continue; + rc = sqlite3Fts5ExtractText(pConfig, pVal, 0, 0, &pText, &nText); + sqlite3_bind_text(pInsert, i, pText, nText, SQLITE_TRANSIENT); + }else{ + const u8 *pBlob = (const u8*)sqlite3_value_blob(pVal); + int nBlob = sqlite3_value_bytes(pVal); + assert( nBlob>4 ); + sqlite3_bind_blob(pInsert, i, pBlob+4, nBlob-4, SQLITE_TRANSIENT); } - - rc = sqlite3_bind_value(pInsert, i, pVal); + continue; } + + rc = sqlite3_bind_value(pInsert, i, pVal); } if( rc==SQLITE_OK ){ sqlite3_step(pInsert); @@ -260138,37 +259218,23 @@ static int sqlite3Fts5StorageIndexInsert( for(ctx.iCol=0; rc==SQLITE_OK && ctx.iColnCol; ctx.iCol++){ ctx.szCol = 0; if( pConfig->abUnindexed[ctx.iCol]==0 ){ + int bReset = 0; /* True if tokenizer locale must be reset */ int nText = 0; /* Size of pText in bytes */ const char *pText = 0; /* Pointer to buffer containing text value */ - int nLoc = 0; /* Size of pText in bytes */ - const char *pLoc = 0; /* Pointer to buffer containing text value */ - sqlite3_value *pVal = apVal[ctx.iCol+2]; + int bDisk = 0; if( p->pSavedRow && sqlite3_value_nochange(pVal) ){ pVal = sqlite3_column_value(p->pSavedRow, ctx.iCol+1); - if( pConfig->eContent==FTS5_CONTENT_NORMAL && pConfig->bLocale ){ - int iCol = ctx.iCol + 1 + pConfig->nCol; - pLoc = (const char*)sqlite3_column_text(p->pSavedRow, iCol); - nLoc = sqlite3_column_bytes(p->pSavedRow, iCol); - } - }else{ - pVal = apVal[ctx.iCol+2]; - } - - if( pConfig->bLocale && sqlite3Fts5IsLocaleValue(pConfig, pVal) ){ - rc = sqlite3Fts5DecodeLocaleValue(pVal, &pText, &nText, &pLoc, &nLoc); - }else{ - pText = (const char*)sqlite3_value_text(pVal); - nText = sqlite3_value_bytes(pVal); + bDisk = 1; } - + rc = sqlite3Fts5ExtractText(pConfig, pVal, bDisk, &bReset, &pText,&nText); if( rc==SQLITE_OK ){ - sqlite3Fts5SetLocale(pConfig, pLoc, nLoc); + assert( bReset==0 || pConfig->bLocale ); rc = sqlite3Fts5Tokenize(pConfig, FTS5_TOKENIZE_DOCUMENT, pText, nText, (void*)&ctx, fts5StorageInsertCallback ); - sqlite3Fts5ClearLocale(pConfig); + if( bReset ) sqlite3Fts5ClearLocale(pConfig); } } sqlite3Fts5BufferAppendVarint(&rc, &buf, ctx.szCol); @@ -260333,62 +259399,38 @@ static int sqlite3Fts5StorageIntegrity(Fts5Storage *p, int iArg){ rc = sqlite3Fts5TermsetNew(&ctx.pTermset); } for(i=0; rc==SQLITE_OK && inCol; i++){ - if( pConfig->abUnindexed[i]==0 ){ - const char *pText = 0; - int nText = 0; - const char *pLoc = 0; - int nLoc = 0; - sqlite3_value *pVal = sqlite3_column_value(pScan, i+1); - - if( pConfig->eContent==FTS5_CONTENT_EXTERNAL - && sqlite3Fts5IsLocaleValue(pConfig, pVal) - ){ - rc = sqlite3Fts5DecodeLocaleValue( - pVal, &pText, &nText, &pLoc, &nLoc - ); - }else{ - if( pConfig->eContent==FTS5_CONTENT_NORMAL && pConfig->bLocale ){ - int iCol = i + 1 + pConfig->nCol; - pLoc = (const char*)sqlite3_column_text(pScan, iCol); - nLoc = sqlite3_column_bytes(pScan, iCol); - } - pText = (const char*)sqlite3_value_text(pVal); - nText = sqlite3_value_bytes(pVal); - } - - ctx.iCol = i; - ctx.szCol = 0; - - if( rc==SQLITE_OK && pConfig->eDetail==FTS5_DETAIL_COLUMNS ){ - rc = sqlite3Fts5TermsetNew(&ctx.pTermset); - } + if( pConfig->abUnindexed[i] ) continue; + ctx.iCol = i; + ctx.szCol = 0; + if( pConfig->eDetail==FTS5_DETAIL_COLUMNS ){ + rc = sqlite3Fts5TermsetNew(&ctx.pTermset); + } + if( rc==SQLITE_OK ){ + int bReset = 0; /* True if tokenizer locale must be reset */ + int nText = 0; /* Size of pText in bytes */ + const char *pText = 0; /* Pointer to buffer containing text value */ + rc = sqlite3Fts5ExtractText(pConfig, + sqlite3_column_value(pScan, i+1), 1, &bReset, &pText, &nText + ); if( rc==SQLITE_OK ){ - sqlite3Fts5SetLocale(pConfig, pLoc, nLoc); rc = sqlite3Fts5Tokenize(pConfig, FTS5_TOKENIZE_DOCUMENT, pText, nText, (void*)&ctx, fts5StorageIntegrityCallback ); - sqlite3Fts5ClearLocale(pConfig); - } - - /* If this is not a columnsize=0 database, check that the number - ** of tokens in the value matches the aColSize[] value read from - ** the %_docsize table. */ - if( rc==SQLITE_OK - && pConfig->bColumnsize - && ctx.szCol!=aColSize[i] - ){ - rc = FTS5_CORRUPT; - } - aTotalSize[i] += ctx.szCol; - if( pConfig->eDetail==FTS5_DETAIL_COLUMNS ){ - sqlite3Fts5TermsetFree(ctx.pTermset); - ctx.pTermset = 0; + if( bReset ) sqlite3Fts5ClearLocale(pConfig); } } + if( rc==SQLITE_OK && pConfig->bColumnsize && ctx.szCol!=aColSize[i] ){ + rc = FTS5_CORRUPT; + } + aTotalSize[i] += ctx.szCol; + if( pConfig->eDetail==FTS5_DETAIL_COLUMNS ){ + sqlite3Fts5TermsetFree(ctx.pTermset); + ctx.pTermset = 0; + } } sqlite3Fts5TermsetFree(ctx.pTermset); ctx.pTermset = 0; @@ -260813,7 +259855,7 @@ static const unsigned char sqlite3Utf8Trans1[] = { c = *(zIn++); \ if( c>=0xc0 ){ \ c = sqlite3Utf8Trans1[c-0xc0]; \ - while( zIn=zEof ) return SQLITE_OK; READ_UTF8(zIn, zEof, iCode); + if( iCode==0 ) return SQLITE_OK; if( p->bFold ) iCode = sqlite3Fts5UnicodeFold(iCode, p->iFoldParam); }while( iCode==0 ); WRITE_UTF8(zOut, iCode); @@ -262000,11 +261042,8 @@ static int fts5TriTokenize( /* Read characters from the input up until the first non-diacritic */ do { iNext = zIn - (const unsigned char*)pText; - if( zIn>=zEof ){ - iCode = 0; - break; - } READ_UTF8(zIn, zEof, iCode); + if( iCode==0 ) break; if( p->bFold ) iCode = sqlite3Fts5UnicodeFold(iCode, p->iFoldParam); }while( iCode==0 ); @@ -264041,7 +263080,7 @@ static int sqlite3Fts5VocabInit(Fts5Global *pGlobal, sqlite3 *db){ } -/* Here ends the fts5.c composite file. */ + #endif /* !defined(SQLITE_CORE) || defined(SQLITE_ENABLE_FTS5) */ /************** End of fts5.c ************************************************/ @@ -264395,21268 +263434,6 @@ SQLITE_API int sqlite3_stmt_init( #endif /* !defined(SQLITE_CORE) || defined(SQLITE_ENABLE_STMTVTAB) */ /************** End of stmt.c ************************************************/ -/************** Begin file hct_pman.c ****************************************/ -/* -** 2022 April 10 -** -** The author disclaims copyright to this source code. In place of -** a legal notice, here is a blessing: -** -** May you do good and not evil. -** May you find forgiveness for yourself and forgive others. -** May you share freely, never taking more than you give. -** -************************************************************************* -*/ - - -/************** Include hctInt.h in the middle of hct_pman.c *****************/ -/************** Begin file hctInt.h ******************************************/ - -/* #include */ -/************** Include sqlite3hct.h in the middle of hctInt.h ***************/ -/************** Begin file sqlite3hct.h **************************************/ -/* -** 2023 May 16 -** -** The author disclaims copyright to this source code. In place of -** a legal notice, here is a blessing: -** -** May you do good and not evil. -** May you find forgiveness for yourself and forgive others. -** May you share freely, never taking more than you give. -** -************************************************************************* -*/ - - - -#ifndef SQLITE3HCT_H -#define SQLITE3HCT_H - -/* -** Make sure we can call this stuff from C++. -*/ -#if 0 -extern "C" { -#endif - -#define SQLITE_HCT_JOURNAL_HASHSIZE 16 - -/* -** Initialize the main database for replication. -*/ -SQLITE_API int sqlite3_hct_journal_init(sqlite3 *db); - -/* -** Write a transaction into the database. -*/ -SQLITE_API int sqlite3_hct_journal_write( - sqlite3 *db, /* Write to "main" db of this handle */ - sqlite3_int64 iCid, - const char *zSchema, - const void *pData, int nData, - sqlite3_int64 iSchemaCid -); - -SQLITE_API int sqlite3_hct_journal_truncate(sqlite3 *db, sqlite3_int64 iMinCid); - -/* -** Candidate values for second arg to sqlite3_hct_journal_setmode() -*/ -#define SQLITE_HCT_JOURNAL_MODE_FOLLOWER 0 -#define SQLITE_HCT_JOURNAL_MODE_LEADER 1 - -/* -** Query the LEADER/FOLLOWER setting of the db passed as the only argument. -*/ -SQLITE_API int sqlite3_hct_journal_mode(sqlite3 *db); - -/* -** Set the LEADER/FOLLOWER setting of the db passed as the first argument. -** Return SQLITE_OK if successful. Otherwise, return an SQLite error code -** and leave an English language error message (accessible using -** sqlite3_errmsg()) in the database handle. -*/ -SQLITE_API int sqlite3_hct_journal_setmode(sqlite3 *db, int eMode); - -/* -** Rollback transactions that follow the first hole in the journal. -*/ -SQLITE_API int sqlite3_hct_journal_rollback(sqlite3 *db, sqlite3_int64 iCid); - -/* -** Special values that may be passed as second argument to -** sqlite3_hct_journal_rollback(). -*/ -#define SQLITE_HCT_ROLLBACK_MAXIMUM 0 -#define SQLITE_HCT_ROLLBACK_PRESERVE -1 - -/* -** Set output variable (*piCid) to the CID of the newest available -** database snapshot. Return SQLITE_OK if successful, or an SQLite -** error code if something goes wrong. -*/ -SQLITE_API int sqlite3_hct_journal_snapshot(sqlite3 *db, sqlite3_int64 *piCid); - -/* -** Register a custom validation callback with the database handle. -*/ -SQLITE_API int sqlite3_hct_journal_hook( - sqlite3 *db, - void *pArg, - int(*xValidate)( - void *pCopyOfArg, - sqlite3_int64 iCid, - const char *zSchema, - const void *pData, int nData, - sqlite3_int64 iSchemaCid - ) -); - -/* -** Both arguments are assumed to point to SQLITE_HCT_JOURNAL_HASHSIZE -** byte buffers. This function updates the hash stored in buffer pHash -** based on the contents of buffer pData. -*/ -SQLITE_API void sqlite3_hct_journal_hash(void *pHash, const void *pData); - -/* -** It is assumed that buffer pHash points to a buffer -** SQLITE_HCT_JOURNAL_HASHSIZE bytes in size. This function populates this -** buffer with a hash based on the remaining arguments. -*/ -SQLITE_API void sqlite3_hct_journal_hashentry( - void *pHash, /* OUT: Hash of other arguments */ - sqlite3_int64 iCid, - const char *zSchema, - const void *pData, int nData, - sqlite3_int64 iSchemaCid -); - -SQLITE_API void sqlite3_hct_migrate_mode(sqlite3 *db, int bActivate); - -#if 0 -} -#endif -#endif /* SQLITE3HCT_H */ - -/************** End of sqlite3hct.h ******************************************/ -/************** Continuing where we left off in hctInt.h *********************/ - -typedef sqlite3_int64 i64; -typedef unsigned char u8; -typedef unsigned int u32; - -/* -** Primitives for atomic load and store. -*/ -#define HctAtomicStore(PTR,VAL) __atomic_store_n((PTR),(VAL), __ATOMIC_SEQ_CST) -#define HctAtomicLoad(PTR) __atomic_load_n((PTR), __ATOMIC_SEQ_CST) - -#define HctCASBool(PTR,OLD,NEW) \ - (int)__sync_bool_compare_and_swap((PTR),(OLD),(NEW)) - - -/* -*/ -typedef struct HctConfig HctConfig; -struct HctConfig { - int nDbFile; /* Number of files (hct_file.c) */ - int nPageSet; /* Used by hct_pman.c */ - int nPageScan; /* Used by hct_pman.c */ - int szLogChunk; /* Used by hctree.c */ - int nTryBeforeUnevict; - int bQuiescentIntegrityCheck; /* PRAGMA hct_quiescent_integrity_check */ - int pgsz; - sqlite3 *db; -}; - -#define HCT_TID_MASK ((((u64)0x00FFFFFF) << 32)|0xFFFFFFFF) -#define HCT_PGNO_MASK ((u64)0xFFFFFFFF) - -#define HCT_MAX_NDBFILE 128 - -#define HCT_DEFAULT_NDBFILE 1 -#define HCT_DEFAULT_NPAGESET 256 -#define HCT_DEFAULT_NTRYBEFOREUNEVICT 100 -#define HCT_DEFAULT_NPAGESCAN 1024 -#define HCT_DEFAULT_SZLOGCHUNK 16384 -#define HCT_DEFAULT_PAGESIZE 4096 - - - -/************** Include hctTMapInt.h in the middle of hctInt.h ***************/ -/************** Begin file hctTMapInt.h **************************************/ -/* -** 2021 February 24 -** -** The author disclaims copyright to this source code. In place of -** a legal notice, here is a blessing: -** -** May you do good and not evil. -** May you find forgiveness for yourself and forgive others. -** May you share freely, never taking more than you give. -** -************************************************************************* -** -** This header file describes the transaction map implementation. It -** serves two tasks: -** -** * Provides the transaction map itself, a mapping from 56-bit TID values -** to a combination of a CID value (also 56 bits) and some flags. -** -** * Provides the read-lock system required by readers to ensure that old -** database pages and other resources are not reused before they -** are guaranteed to be finished with them. -*/ - -/* -*/ - -/* #define HCT_TMAP_PAGESIZE 1024 */ - -#define HCT_TMAP_PGSZBITS 10 -#define HCT_TMAP_PAGESIZE (1 << HCT_TMAP_PGSZBITS) - -#define HCT_TMAP_ENTRYSLOT(iEntry) \ - (((iEntry) >> 3) + (((iEntry) & 0x07) << (HCT_TMAP_PGSZBITS-3))) -// #define HCT_TMAP_ENTRYSLOT(iEntry) (((iEntry) >> 3) + (((iEntry) & 0x07) << 10)) - -/* -** Transaction state - stored in the MSB of the 8-byte transaction map entry. -*/ -#define HCT_TMAP_WRITING (((u64)0x00) << 56) -#define HCT_TMAP_VALIDATING (((u64)0x01) << 56) -#define HCT_TMAP_ROLLBACK (((u64)0x02) << 56) -#define HCT_TMAP_COMMITTED (((u64)0x03) << 56) - -#define HCT_TMAP_STATE_MASK (((u64)0x07) << 56) -#define HCT_TMAP_CID_MASK ~(((u64)0xFF) << 56) - -/* -** There is a single object of this type for each distinct database -** opened within the process. All connections to said database have -** a pointer to the same HctTMapServer object. -*/ -typedef struct HctTMapServer HctTMapServer; - -/* -** Each separate database connection holds a handle of this type for -** the lifetime of the connection. Obtained and later released using -** functions: -** -** sqlite3HctTMapServerNew() -** sqlite3HctTMapServerFree() -*/ -typedef struct HctTMapClient HctTMapClient; - -/* -*/ -typedef struct HctTMap HctTMap; - -/* -** A transaction-map object. -** -** iMinTid: -** This, and all smaller TID values have been finalized (fully committed -** or rolled back). The client may not query the map for any TID values -** less than or equal to this one. -** -** iMinCid: -** This an all smaller CID values were committed -*/ -struct HctTMap { - /* Snapshot locking values */ -#if 0 - u64 iMinCid; /* This + all smaller CIDs fully committed */ - u64 iMinTid; /* This + all smaller TIDs fully committed */ -#endif - - /* Transaction map */ - u64 iFirstTid; /* TID corresponding to aaMap[0][0] */ - int nMap; /* Number of mapping pages in aaMap[] */ - u64 **aaMap; /* Array of u64[HCT_TMAP_PAGESIZE] arrays */ -}; - -/* -** Create or delete a tmap server object. -*/ -SQLITE_PRIVATE int sqlite3HctTMapServerNew(u64 iFirstTid, u64 iLastTid, HctTMapServer **pp); -SQLITE_PRIVATE void sqlite3HctTMapServerFree(HctTMapServer *p); - -/* -** Connect/disconnect a tmap client object. -*/ -SQLITE_PRIVATE int sqlite3HctTMapClientNew(HctTMapServer*, HctConfig*, HctTMapClient**); -SQLITE_PRIVATE void sqlite3HctTMapClientFree(HctTMapClient *pClient); - -/* -** Obtain, update or release a reference to a transaction map object. -*/ -SQLITE_PRIVATE int sqlite3HctTMapBegin(HctTMapClient *p, u64 iSnapshot, HctTMap **ppMap); -SQLITE_PRIVATE int sqlite3HctTMapUpdate(HctTMapClient *p, HctTMap **ppMap); -SQLITE_PRIVATE int sqlite3HctTMapEnd(HctTMapClient *p, u64 iCID); - -/* -** Return a TID value for which: -** -** 1. the transactions associated with it and all smaller TID values -** have been finalized (marked as committed or rolled back), and -** -** 2. the transactions associated with it and all smaller TID values -** are included in the snapshots accessed by all current and future -** readers. -** -** All physical and logical pages freed by transactions with TIDs equal to -** or smaller than the returned value may now be reused without disturbing -** current or future readers. -*/ -SQLITE_PRIVATE u64 sqlite3HctTMapSafeTID(HctTMapClient*); - -SQLITE_PRIVATE int sqlite3HctTMapNewTID(HctTMapClient *p, u64 iTid, HctTMap **ppMap); - -/* -** Return TID value T for all transactions with tid values less than or -** equal to T were finished (marked as committed or rolled back), last -** time sqlite3HctTMapBegin() was called. -*/ -SQLITE_PRIVATE u64 sqlite3HctTMapCommitedTID(HctTMapClient*); - -SQLITE_PRIVATE i64 sqlite3HctTMapStats(sqlite3 *db, int iStat, const char **pzStat); - -SQLITE_PRIVATE void sqlite3HctTMapScan(HctTMapClient*); - - -/* -** The following API is used when recovering a replication-enabled database. -** In that case, a new HctTMap object must be created during recovery to -** reflect the contents of the sqlite_hct_journal table. -*/ -SQLITE_PRIVATE int sqlite3HctTMapRecoverySet(HctTMapClient*, u64 iTid, u64 iCid); -SQLITE_PRIVATE void sqlite3HctTMapRecoveryFinish(HctTMapClient*, int rc); - -SQLITE_PRIVATE int sqlite3HctTMapServerSet(HctTMapServer *pServer, u64 iTid, u64 iCid); - - - - -/************** End of hctTMapInt.h ******************************************/ -/************** Continuing where we left off in hctInt.h *********************/ -/************** Include hctFileInt.h in the middle of hctInt.h ***************/ -/************** Begin file hctFileInt.h **************************************/ -/* -** 2023 January 6 -** -** The author disclaims copyright to this source code. In place of -** a legal notice, here is a blessing: -** -** May you do good and not evil. -** May you find forgiveness for yourself and forgive others. -** May you share freely, never taking more than you give. -** -************************************************************************* -** -*/ - -typedef struct HctFileServer HctFileServer; -typedef struct HctFile HctFile; - -SQLITE_PRIVATE HctFile *sqlite3HctFileOpen( - int *pRc, - const char *zFile, - HctConfig *pConfig -); -SQLITE_PRIVATE void sqlite3HctFileClose(HctFile *pFile); - -/* -** If the database has not yet been created on disk, create it. Or, if -** the db has already been created, then this function is a no-op. -*/ -SQLITE_PRIVATE int sqlite3HctFileNewDb(HctFile *pFile); - -/* -** Return true if the db has not yet been created on disk. Or false -** if it already has. -*/ -SQLITE_PRIVATE int sqlite3HctFileIsNewDb(HctFile *pFile); - -SQLITE_PRIVATE u32 sqlite3HctFileMaxpage(HctFile *pFile); - -typedef struct HctFilePage HctFilePage; -struct HctFilePage { - u8 *aOld; /* Current buffer, or NULL */ - u8 *aNew; /* New buffer (to be populated) */ - - /* Used internally by hct_file.c. Mostly... */ - u32 iPg; /* logical page number */ - u32 iNewPg; /* New physical page number */ - u32 iOldPg; /* Original physical page number */ - HctFile *pFile; -}; - -/* -** Allocate logical root page numbers. And free the same (required if the -** transaction is rolled back). -*/ -SQLITE_PRIVATE int sqlite3HctFileRootPgno(HctFile *pFile, u32 *piRoot); -SQLITE_PRIVATE int sqlite3HctFileRootFree(HctFile *pFile, u32 iRoot); -SQLITE_PRIVATE int sqlite3HctFileRootNew(HctFile *pFile, u32 iRoot, HctFilePage*); - - -SQLITE_PRIVATE int sqlite3HctFilePageNew(HctFile *pFile, HctFilePage *pPg); - -/* -** Obtain a read-only reference to logical page iPg. -*/ -SQLITE_PRIVATE int sqlite3HctFilePageGet(HctFile *pFile, u32 iPg, HctFilePage *pPg); - -/* -** If the page is not already writable (if pPg->aNew==0), make it writable. -** This involves allocating a new physical page and setting pPg->aNew -** to point to the buffer. -*/ -SQLITE_PRIVATE int sqlite3HctFilePageWrite(HctFilePage *pPg); - -/* -** This is a no-op if the page is not writable. -** -** If the page is already writable, reverse this so that will not be -** written out when PageRelease() or PageCommit() is called. This reclaims -** the physical page that was allocated by the earlier PageWrite() call -** and sets pPg->aNew to NULL. -*/ -SQLITE_PRIVATE void sqlite3HctFilePageUnwrite(HctFilePage *pPg); - -/* -** This is a no-op if the page is not writable. -** -** Commit the new version of the page to disk (i.e. set the page-map entry -** so that the logical page number now maps to the new version of the page -** in pPg->aNew). Then make pPg a non-writable reference to the logical -** page (so that pPg->aOld points to the new version of the page and -** pPg->aNew is NULL). -*/ -SQLITE_PRIVATE int sqlite3HctFilePageCommit(HctFilePage *pPg); - -/* -** Evict the page from the data structure - i.e. set the LOGICAL_EVICTED -** flag for it. This operation fails if the LOGICAL_EVICTED flag has -** already been set, or if the page has been written since it was read. -*/ -SQLITE_PRIVATE int sqlite3HctFilePageEvict(HctFilePage *pPg, int bIrrevocable); - -SQLITE_PRIVATE void sqlite3HctFilePageUnevict(HctFilePage *pPg); - -SQLITE_PRIVATE int sqlite3HctFilePageIsEvicted(HctFile *pFile, u32 iPgno); -SQLITE_PRIVATE int sqlite3HctFilePageIsFree(HctFile *pFile, u32 iPgno, int bLogical); - -/* -** Release a page reference obtained via an earlier call to -** sqlite3HctFilePageGet() or sqlite3HctFilePageNew(). After this call -** pPg->aOld is NULL. -** -** If the page is writable, it is committed (see sqlite3HctFilePageCommit) -** before the reference is released. -*/ -SQLITE_PRIVATE int sqlite3HctFilePageRelease(HctFilePage *pPg); - - -SQLITE_PRIVATE int sqlite3HctFilePageGetPhysical(HctFile *pFile, u32 iPg, HctFilePage *pPg); -SQLITE_PRIVATE int sqlite3HctFilePageNewPhysical(HctFile *pFile, HctFilePage *pPg); - -SQLITE_PRIVATE u64 sqlite3HctFileAllocateTransid(HctFile *pFile); -SQLITE_PRIVATE u64 sqlite3HctFileAllocateCID(HctFile *pFile, int); -SQLITE_PRIVATE u64 sqlite3HctFileGetSnapshotid(HctFile *pFile); - -SQLITE_PRIVATE void sqlite3HctFileSetCID(HctFile *pFile, u64); - -/* -** Increment the global write-count by nIncr, and return the final value. -*/ -SQLITE_PRIVATE u64 sqlite3HctFileIncrWriteCount(HctFile *pFile, int nIncr); - -SQLITE_PRIVATE HctTMapClient *sqlite3HctFileTMapClient(HctFile*); - -SQLITE_PRIVATE int sqlite3HctFilePgsz(HctFile *pFile); -SQLITE_PRIVATE int sqlite3HctFileVtabInit(sqlite3 *db); - -SQLITE_PRIVATE u64 sqlite3HctFileSafeTID(HctFile*); -SQLITE_PRIVATE u32 sqlite3HctFilePageRangeAlloc(HctFile*, int bLogical, int nPg); - -SQLITE_PRIVATE int sqlite3HctFileClearInUse(HctFilePage *pPg, int bReuseNow); -SQLITE_PRIVATE int sqlite3HctFileClearPhysInUse(HctFile *pFile, u32 pgno, int bReuseNow); - -SQLITE_PRIVATE void sqlite3HctFileDebugPrint(HctFile *pFile, const char *zFmt, ...); - -SQLITE_PRIVATE char *sqlite3HctFileLogFile(HctFile *pFile); -SQLITE_PRIVATE int sqlite3HctFileStartRecovery(HctFile *pFile, int iStage); -SQLITE_PRIVATE int sqlite3HctFileFinishRecovery(HctFile *pFile, int iStage, int rc); -SQLITE_PRIVATE int sqlite3HctFileRecoverFreelists( - HctFile *pFile, /* File to recover freelists for */ - int nRoot, i64 *aRoot, /* Array of root page numbers */ - int nPhys, i64 *aPhys /* Sorted array of phys. pages to preserve */ -); - -SQLITE_PRIVATE int sqlite3HctFileFindLogs(HctFile*, void*, int(*)(void*, const char*)); - -SQLITE_PRIVATE u32 sqlite3HctFilePageMapping(HctFile *pFile, u32 iLogical, int *pbEvicted); - -SQLITE_PRIVATE void sqlite3HctFileICArrays(HctFile*, u8**, u32*, u8**, u32*); -SQLITE_PRIVATE int sqlite3HctFileTreeFree(HctFile *, u32, int); -SQLITE_PRIVATE int sqlite3HctFilePageClearIsRoot(HctFile*, u32); -SQLITE_PRIVATE int sqlite3HctFilePageClearInUse(HctFile *pFile, u32 iPg, int bLogic); - -/************** Include hctPManInt.h in the middle of hctFileInt.h ***********/ -/************** Begin file hctPManInt.h **************************************/ -/* -** 2022 March 20 -** -** The author disclaims copyright to this source code. In place of -** a legal notice, here is a blessing: -** -** May you do good and not evil. -** May you find forgiveness for yourself and forgive others. -** May you share freely, never taking more than you give. -** -************************************************************************* -** -*/ - -/* -** There is a single object of this type for each distinct database opened -** by the process. Allocated and later freed using the following functions. -** -** sqlite3HctPManServerNew() -** sqlite3HctPManServerFree() -** -** Once an HctPManServer object has been created, it is configured with -** the set of free logical and physical pages, which the caller presumably -** discovers by scanning the page-map. -*/ -typedef struct HctPManServer HctPManServer; -typedef struct HctFile HctFile; - -SQLITE_PRIVATE HctPManServer *sqlite3HctPManServerNew( - int *pRc, /* IN/OUT: Error code */ - HctFileServer *pFileServer /* Associated file-server object */ -); -SQLITE_PRIVATE void sqlite3HctPManServerFree(HctPManServer*); - -/* -** This function is called multiple times while scanning the page-map -** during initialization. To load the initial set of free physical and -** logical pages. -*/ -SQLITE_PRIVATE void sqlite3HctPManServerInit( - int *pRc, HctPManServer*, u64 iTid, u32 iPg, int bLogical -); - -/* -** Each separate database connection holds a handle of this type for -** the lifetime of the connection. -*/ -typedef struct HctPManClient HctPManClient; - -SQLITE_PRIVATE HctPManClient *sqlite3HctPManClientNew( - int *pRc, - HctConfig*, - HctPManServer*, - HctFile* -); -SQLITE_PRIVATE void sqlite3HctPManClientFree(HctPManClient*); - -/* -** Allocate a new logical or physical page. -*/ -SQLITE_PRIVATE u32 sqlite3HctPManAllocPg( - int *pRc, /* IN/OUT: Error code */ - HctPManClient *p, /* page-manager client handle */ - HctFile *pFile, - int bLogical -); - -/* -** Mark a logical or physical page as no longer in use. Parameter iTid -** is the transaction-id associated with the transaction that freed the -** page. The page may be reused once all clients are accessing a -** snapshot that includes this transaction. In other words, once the -** snapshot id of all readers is greater than or equal to the commit id -** that maps to transaction id iTid. -** -** Sometimes this function is called with iTid==0, to indicate that the -** page in question may be reused immediately. -*/ -SQLITE_PRIVATE void sqlite3HctPManFreePg( - int *pRc, /* IN/OUT: Error code */ - HctPManClient *p, /* page-manager client handle */ - i64 iTid, /* Associated TID value */ - u32 iPg, /* Page number */ - int bLogical /* True for logical, false for physical */ -); - -SQLITE_PRIVATE void sqlite3HctPManClientHandoff(HctPManClient *p); - -SQLITE_PRIVATE void sqlite3HctPManServerReset(HctPManServer *pServer); - -SQLITE_PRIVATE int sqlite3HctPManVtabInit(sqlite3 *db); - -/* -** Mark an entire tree of logical and physical pages as free. The iTid -** parameter works just as it does for sqlite3HctPManFreePg(). -** -** SQLITE_OK is returned if successful, or an error code (e.g. SQLITE_NOMEM) -** otherwise. -*/ -SQLITE_PRIVATE int sqlite3HctPManFreeTree(HctPManClient *p, HctFile*, u32 iRoot, u64 iTid); - -SQLITE_PRIVATE int sqlite3HctPManServerInitRoot(int *pRc, HctPManServer*, u64, HctFile*, u32); -SQLITE_PRIVATE i64 sqlite3HctPManStats(sqlite3 *db, int iStat, const char **pzStat); - -/************** End of hctPManInt.h ******************************************/ -/************** Continuing where we left off in hctFileInt.h *****************/ -SQLITE_PRIVATE HctPManClient *sqlite3HctFilePManClient(HctFile*); - -SQLITE_PRIVATE int sqlite3HctFileRootArray(HctFile*, u32**, int*); - -/* Interface used by hct_stats virtual table */ -SQLITE_PRIVATE i64 sqlite3HctFileStats(sqlite3*, int, const char**); - -/* -** Return the total number of physical page allocations made during -** the entire lifetime of this object. -*/ -SQLITE_PRIVATE u64 sqlite3HctFileWriteCount(HctFile *pFile); - -/* -** Return the number of files used to store data within the database (the -** value to return for "PRAGMA hct_ndbfile"). Before returning, set output -** parameter *pbFixed if the database has been created and the number -** of files is therefore fixed, or clear it if the db has yet to be created. -*/ -SQLITE_PRIVATE int sqlite3HctFileNFile(HctFile *pFile, int *pbFixed); - -SQLITE_PRIVATE void sqlite3HctFileSetJrnlPtr(HctFile *pFile, void *pPtr, void(*xDel)(void*)); -SQLITE_PRIVATE void *sqlite3HctFileGetJrnlPtr(HctFile *pFile); - -SQLITE_PRIVATE int sqlite3HctIoerr(int rc); - - -/************** End of hctFileInt.h ******************************************/ -/************** Continuing where we left off in hctInt.h *********************/ - -#ifdef SQLITE_DEBUG -# define SQLITE_LOCKED_ERR(x,y) sqlite3HctLockedErr(x,y) -SQLITE_PRIVATE int sqlite3HctLockedErr(u32 pgno, const char *zReason); -#else -# define SQLITE_LOCKED_ERR(x,y) SQLITE_LOCKED -#endif - -#define HCT_TREE_SCHEMAOP_ROOT 3 - -/* -** Growable buffer type used for various things. -*/ -typedef struct HctBuffer HctBuffer; -struct HctBuffer { - u8 *aBuf; - int nBuf; - int nAlloc; -}; -SQLITE_PRIVATE int sqlite3HctBufferGrow(HctBuffer *pBuf, int nSize); -SQLITE_PRIVATE void sqlite3HctBufferFree(HctBuffer *pBuf); - - - -/************************************************************************* -** Interface to code in hct_tree.c -*/ -typedef struct HctTree HctTree; -typedef struct HctTreeCsr HctTreeCsr; - -SQLITE_PRIVATE int sqlite3HctTreeNew(HctTree **ppTree); -SQLITE_PRIVATE void sqlite3HctTreeFree(HctTree *pTree); - -SQLITE_PRIVATE int sqlite3HctTreeInsert(HctTreeCsr*, UnpackedRecord*, i64, int, const u8*,int); -SQLITE_PRIVATE int sqlite3HctTreeAppend(HctTreeCsr*, KeyInfo*, i64, int, const u8*,int); -SQLITE_PRIVATE int sqlite3HctTreeDelete(HctTreeCsr *pCsr); -SQLITE_PRIVATE int sqlite3HctTreeDeleteKey(HctTreeCsr *, UnpackedRecord *, i64, int,const u8*); - -/* -** These functions are used to open and close transactions and nested -** sub-transactions. -** -** The Begin() function is used to open transactions and sub-transactions. -** A successful call to Begin() ensures that there are at least iLevel -** nested transactions open. To open a top-level transaction, pass iLevel=1. -** To open a sub-transaction within the top-level transaction, iLevel=2. -** Passing iLevel=0 is a no-op. -** -** Release() is used to commit transactions and sub-transactions. A -** successful call to Release() ensures that there are at most iLevel -** nested transactions open. To commit a top-level transaction, pass iLevel=0. -** To commit all sub-transactions inside the main transaction, pass iLevel=1. -** -** Function lsm_rollback() is used to roll back transactions and -** sub-transactions. A successful call to lsm_rollback() restores the database -** to the state it was in when the iLevel'th nested sub-transaction (if any) -** was first opened. And then closes transactions to ensure that there are -** at most iLevel nested transactions open. Passing iLevel=0 rolls back and -** closes the top-level transaction. iLevel=1 also rolls back the top-level -** transaction, but leaves it open. iLevel=2 rolls back the sub-transaction -** nested directly inside the top-level transaction (and leaves it open). -*/ -SQLITE_PRIVATE int sqlite3HctTreeBegin(HctTree *pTree, int iStmt); -SQLITE_PRIVATE int sqlite3HctTreeRelease(HctTree *pTree, int iStmt); -SQLITE_PRIVATE int sqlite3HctTreeRollbackTo(HctTree *pTree, int iStmt); - -SQLITE_PRIVATE int sqlite3HctTreeClearOne(HctTree *pTree, u32 iRoot, i64 *pnRow); - -SQLITE_PRIVATE int sqlite3HctTreeCsrOpen(HctTree *pTree, u32 iRoot, HctTreeCsr **ppCsr); -SQLITE_PRIVATE int sqlite3HctTreeCsrClose(HctTreeCsr *pCsr); - -SQLITE_PRIVATE int sqlite3HctTreeCsrNext(HctTreeCsr *pCsr); -SQLITE_PRIVATE int sqlite3HctTreeCsrPrev(HctTreeCsr *pCsr); -SQLITE_PRIVATE int sqlite3HctTreeCsrEof(HctTreeCsr *pCsr); - -SQLITE_PRIVATE int sqlite3HctTreeCsrSeek(HctTreeCsr*, UnpackedRecord*, i64 iKey, int *pRes); -SQLITE_PRIVATE int sqlite3HctTreeCsrFirst(HctTreeCsr *pCsr); -SQLITE_PRIVATE int sqlite3HctTreeCsrLast(HctTreeCsr *pCsr); - -SQLITE_PRIVATE int sqlite3HctTreeCsrKey(HctTreeCsr *pCsr, i64 *piKey); -SQLITE_PRIVATE int sqlite3HctTreeCsrData(HctTreeCsr *pCsr, int *pnData, const u8 **paData); -SQLITE_PRIVATE int sqlite3HctTreeCsrIsDelete(HctTreeCsr *pCsr); - -SQLITE_PRIVATE void sqlite3HctTreeCsrPin(HctTreeCsr *pCsr); -SQLITE_PRIVATE void sqlite3HctTreeCsrUnpin(HctTreeCsr *pCsr); - -SQLITE_PRIVATE int sqlite3HctTreeCsrHasMoved(HctTreeCsr *pCsr); -SQLITE_PRIVATE int sqlite3HctTreeCsrRestore(HctTreeCsr *pCsr, int *pIsDifferent); -SQLITE_PRIVATE void sqlite3HctTreeCsrClear(HctTreeCsr *pCsr); - -SQLITE_PRIVATE u32 sqlite3HctTreeCsrRoot(HctTreeCsr *pCsr); - - -/* -** Iterate through non-empty tables/indexes within an HctTree structure. Used -** when flushing contents to disk. -** -** If parameter bSchemaOp is false, then no callback is issued for the table -** with root page number HCT_TREE_SCHEMAOP_ROOT. If bSchemaOp is non-zero, -** then HCT_TREE_SCHEMAOP_ROOT is treated like any other table. -*/ - -SQLITE_PRIVATE int sqlite3HctTreeForeach( - HctTree *pTree, - int bSchemOp, - void *pCtx, - int (*x)(void *, u32, KeyInfo*) -); -SQLITE_PRIVATE void sqlite3HctTreeClear(HctTree *pTree); - -SQLITE_PRIVATE void sqlite3HctTreeCsrIncrblob(HctTreeCsr *pCsr); -SQLITE_PRIVATE int sqlite3HctTreeCsrReseek(HctTreeCsr *pCsr, int*); - -SQLITE_PRIVATE int sqlite3HctTreeUpdateMeta(HctTree*, const u8*, int); - -/************************************************************************* -** Interface to code in hct_database.c -*/ -typedef struct HctDatabase HctDatabase; -typedef struct HctDbCsr HctDbCsr; - -typedef struct HctJournal HctJournal; - -SQLITE_PRIVATE HctDatabase *sqlite3HctDbFind(sqlite3*, int); -SQLITE_PRIVATE int sqlite3HctDetectJournals(sqlite3 *db); - -SQLITE_PRIVATE HctDatabase *sqlite3HctDbOpen(int*, const char *zFile, HctConfig*); -SQLITE_PRIVATE void sqlite3HctDbClose(HctDatabase *pDb); - -SQLITE_PRIVATE int sqlite3HctDbRootNew(HctDatabase *p, u32 *piRoot); -SQLITE_PRIVATE int sqlite3HctDbRootFree(HctDatabase *p, u32 iRoot); - -SQLITE_PRIVATE int sqlite3HctDbRootInit(HctDatabase *p, int bIndex, u32 iRoot); -SQLITE_PRIVATE void sqlite3HctDbRootPageInit(int bIndex, u8 *aPage, int szPage); -SQLITE_PRIVATE int sqlite3HctDbGetMeta(HctDatabase *p, u8 *aBuf, int nBuf); - -SQLITE_PRIVATE int sqlite3HctDbInsert( - HctDatabase *pDb, - u32 iRoot, - UnpackedRecord *pRec, i64 iKey, - int bDel, int nData, const u8 *aData, - int *pnRetry -); -SQLITE_PRIVATE int sqlite3HctDbInsertFlush(HctDatabase *pDb, int *pnRetry); -SQLITE_PRIVATE int sqlite3HctDbStartRead(HctDatabase*,HctJournal*); -SQLITE_PRIVATE int sqlite3HctDbStartWrite(HctDatabase*, u64*); -SQLITE_PRIVATE int sqlite3HctDbEndWrite(HctDatabase*, u64, int); -SQLITE_PRIVATE int sqlite3HctDbEndRead(HctDatabase*); -SQLITE_PRIVATE int sqlite3HctDbValidate(sqlite3*, HctDatabase*, u64 *piCid, int*); - -SQLITE_PRIVATE i64 sqlite3HctDbTid(HctDatabase *); - -SQLITE_PRIVATE void sqlite3HctDbRollbackMode(HctDatabase*,int); - -SQLITE_PRIVATE int sqlite3HctDbCsrOpen(HctDatabase*, struct KeyInfo*, u32 iRoot, HctDbCsr**); -SQLITE_PRIVATE void sqlite3HctDbCsrClose(HctDbCsr *pCsr); - -SQLITE_PRIVATE void sqlite3HctDbCsrNosnap(HctDbCsr *pCsr, int bNosnap); - -SQLITE_PRIVATE void sqlite3HctDbCsrDir(HctDbCsr*, int eDir); -SQLITE_PRIVATE int sqlite3HctDbCsrSeek(HctDbCsr*, UnpackedRecord*, i64 iKey, int *pRes); - -SQLITE_PRIVATE int sqlite3HctDbCsrEof(HctDbCsr*); -SQLITE_PRIVATE int sqlite3HctDbCsrFirst(HctDbCsr*); -SQLITE_PRIVATE int sqlite3HctDbCsrLast(HctDbCsr*); -SQLITE_PRIVATE int sqlite3HctDbCsrNext(HctDbCsr*); -SQLITE_PRIVATE int sqlite3HctDbCsrPrev(HctDbCsr*); -SQLITE_PRIVATE void sqlite3HctDbCsrClear(HctDbCsr*); - -SQLITE_PRIVATE void sqlite3HctDbCsrKey(HctDbCsr*, i64 *piKey); -SQLITE_PRIVATE int sqlite3HctDbCsrData(HctDbCsr *pCsr, int *pnData, const u8 **paData); -SQLITE_PRIVATE int sqlite3HctDbCsrLoadAndDecode(HctDbCsr *pCsr, UnpackedRecord **ppRec); - -SQLITE_PRIVATE int sqlite3HctDbIsIndex(HctDatabase *pDb, u32 iRoot, int *pbIndex); - -SQLITE_PRIVATE int sqlite3HctDbStartRecovery(HctDatabase *pDb, int iStage); -SQLITE_PRIVATE int sqlite3HctDbFinishRecovery(HctDatabase *db, int iStage, int rc); -SQLITE_PRIVATE void sqlite3HctDbRecoverTid(HctDatabase *db, u64 iTid); - -SQLITE_PRIVATE char *sqlite3HctDbLogFile(HctDatabase*); - -SQLITE_PRIVATE i64 sqlite3HctDbNCasFail(HctDatabase*); - -SQLITE_PRIVATE char *sqlite3HctDbIntegrityCheck(HctDatabase*, u32 *aRoot,Mem*,int nRoot, int*); -SQLITE_PRIVATE i64 sqlite3HctDbStats(sqlite3 *db, int iStat, const char **pzStat); - -SQLITE_PRIVATE int sqlite3HctDbCsrRollbackSeek(HctDbCsr*, UnpackedRecord*, i64, int *pOp); - -SQLITE_PRIVATE void sqlite3HctDbSetSavePhysical( - HctDatabase *pDb, - int (*xSave)(void*, i64 iPhys), - void *pSave -); - -SQLITE_PRIVATE char *sqlite3HctDbRecordToText(sqlite3 *db, const u8 *aRec, int nRec); - -SQLITE_PRIVATE void sqlite3HctDbTMapScan(HctDatabase *pDb); - -SQLITE_PRIVATE void sqlite3HctDbTransIsConcurrent(HctDatabase *pDb, int bConcurrent); - -SQLITE_PRIVATE HctFile *sqlite3HctDbFile(HctDatabase *pDb); - -SQLITE_PRIVATE int sqlite3HctDbWalkTree( - HctFile *pFile, /* File tree resides in */ - u32 iRoot, /* Root page of tree */ - int (*x)(void*, u32, u32), /* Callback function */ - void *pCtx /* First argument to pass to x() */ -); - -SQLITE_PRIVATE int sqlite3HctDbPagesize(HctDatabase *pDb); - -SQLITE_PRIVATE void sqlite3HctDbRecordTrim(UnpackedRecord *pRec); - -/* -** This function returns the current snapshot-id. It may only be called -** when a read transaction is active. -*/ -SQLITE_PRIVATE i64 sqlite3HctDbSnapshotId(HctDatabase *pDb); - -SQLITE_PRIVATE int sqlite3HctDbCsrFindLastWrite( - HctDbCsr *pCsr, /* Cursor to seek */ - UnpackedRecord *pRec, /* Key for index/without rowid tables */ - i64 iKey, /* Key for intkey tables */ - u64 *piCid /* Last CID to write to this key */ -); - -SQLITE_PRIVATE void sqlite3HctDbJrnlWriteCid(HctDatabase *pDb, u64 iVal); - -/************************************************************************* -** Interface to code in hct_file.c -*/ - -/************************************************************************* -** Interface to code in hct_record.c -*/ -SQLITE_PRIVATE int sqlite3HctSerializeRecord( - UnpackedRecord *pRec, /* Record to serialize */ - u8 **ppRec, /* OUT: buffer containing serialization */ - int *pnRec /* OUT: size of (*ppRec) in bytes */ -); - -/************************************************************************* -** Interface to code in hct_stats.c -*/ -SQLITE_PRIVATE int sqlite3HctStatsInit(sqlite3*); - -/************************************************************************* -** Utility functions: -*/ -SQLITE_PRIVATE void *sqlite3HctMalloc(int *pRc, i64 nByte); - -/************************************************************************* -** hctree.c: -**/ - -/************** Include hctJrnlInt.h in the middle of hctInt.h ***************/ -/************** Begin file hctJrnlInt.h **************************************/ -/* -** 2023 January 6 -** -** The author disclaims copyright to this source code. In place of -** a legal notice, here is a blessing: -** -** May you do good and not evil. -** May you find forgiveness for yourself and forgive others. -** May you share freely, never taking more than you give. -** -************************************************************************* -** -*/ - -typedef struct HctJournal HctJournal; - -/* -** If schema pSchema contains the special tables sqlite_hct_journal and -** sqlite_hct_baseline, allocate a new HctJournal object, set (*pp) -** to point to it and return SQLITE_OK. Or, if neither table can be -** found, set (*pp) to NULL and return SQLITE_OK. -** -** If only one of the required tables is found (SQLITE_CORRUPT), or if an -** OOM error occurs (SQLITE_NOMEM), return an SQLite error code. The final -** value of (*pp) is NULL in this case. -*/ -SQLITE_PRIVATE int sqlite3HctJournalNewIf(Schema*, HctTree*, HctDatabase*, HctJournal **pp); - -SQLITE_PRIVATE void sqlite3HctJournalClose(HctJournal*); - - -SQLITE_PRIVATE int sqlite3HctJrnlLog( - HctJournal *pJrnl, - sqlite3 *db, - Schema *pSchema, - u64 iCid, - u64 iTid, - int *pbCustomValid -); - -/* -** This is called as part of stage 1 recovery (the bit after the upper layer -** has loaded the database schema). The recovery mutex is held, so the client -** has exclusive access to the database on disk. -*/ -SQLITE_PRIVATE int sqlite3HctJrnlRecovery(HctJournal *pJrnl, HctDatabase *pDb); - -SQLITE_PRIVATE int sqlite3HctJrnlSavePhysical(sqlite3 *db, HctJournal *pJrnl, - int (*xSave)(void*, i64 iPhys), void *pSave -); - -/* -** Register the hct_journal_entry() SQL user-function with the database -** handle. For decoding the "data" column of the sqlite_hct_journal table. -*/ -SQLITE_PRIVATE int sqlite3HctJrnlInit(sqlite3 *db); - -/* -** Return non-zero if (1) argument pJrnl is not NULL, and either (2a) argument -** iTable is the logical root page of either the journal or baseline table -** represented by pJrnl, or (2b) the connection is in follower mode. -** -** Before returning, set output variable (*pbNosnap) to non-zero if condition -** (2a) was true. To indicate that the table does not use snapshots - all -** committed rows are visible. -*/ -SQLITE_PRIVATE int sqlite3HctJournalIsReadonly(HctJournal *pJrnl, u64 iTable, int *pbNosnap); - -SQLITE_PRIVATE int sqlite3HctJrnlRollbackEntry(HctJournal *pJrnl, i64 iTid); - -SQLITE_PRIVATE int sqlite3HctJrnlWriteEmpty(HctJournal *Jrnl, u64 iCid, u64 iTid, sqlite3 *db); - -SQLITE_PRIVATE u64 sqlite3HctJrnlWriteTid(HctJournal *pJrnl, u64 *piCid); - -SQLITE_PRIVATE u64 sqlite3HctJournalSnapshot(HctJournal *pJrnl); - -SQLITE_PRIVATE void sqlite3HctJournalFixSchema(HctJournal *pJrnl, sqlite3*, void *pSchema); - -SQLITE_PRIVATE void sqlite3HctJournalSchemaVersion(HctJournal *pJrnl, u32 *pSchemaVersion); - -SQLITE_PRIVATE void sqlite3HctJrnlInvokeHook(HctJournal *pJrnl, sqlite3 *db); - -/************** End of hctJrnlInt.h ******************************************/ -/************** Continuing where we left off in hctInt.h *********************/ -SQLITE_PRIVATE HctJournal *sqlite3HctJrnlFind(sqlite3*); - -SQLITE_PRIVATE int sqlite3HctBtreeIsNewTable(Btree *pBt, u64 iRoot); -SQLITE_PRIVATE u64 sqlite3HctBtreeSnapshotId(Btree *pBt); - -SQLITE_PRIVATE i64 sqlite3HctMainStats(sqlite3 *db, int iStat, const char **pzStat); - - - -/************** End of hctInt.h **********************************************/ -/************** Continuing where we left off in hct_pman.c *******************/ - -typedef struct HctPManPageset HctPManPageset; -typedef struct HctPManTree HctPManTree; - -#define PAGESET_INIT_SIZE 1000 - -typedef struct HctPManFreePg HctPManFreePg; -typedef struct HctPManFreePgSet HctPManFreePgSet; - -struct HctPManFreePg { - i64 pgno; /* The free page number */ - i64 iTid; /* TID of transaction that freed page */ -}; - -struct HctPManFreePgSet { - HctPManFreePg *aPg; /* Page buffer */ - int nAlloc; /* Allocated size of aPg[] */ - int iFirst; /* Index of first entry in aPg[] */ - int nPg; /* Number of valid pages in aPg[] */ -}; - - - - -/****************************************************************/ - -/* -** A basket of free page ids - a pageset - is represented by an instance -** of the following type. -** -** nAlloc: -** Allocated size of aPg[] array, in entries (not bytes). -** -** nPg: -** Number of valid entries in aPg[]. -** -** aPg: -** Array of free logical or physical page ids. -** -** iMaxTid: -** When a page is freed, it is associated with a TID. Such that the page -** may be reused once it is guaranteed that all current and future readers -** include in their snapshots all transactions with TID values less than -** the associated TID. The maximum of all these values for pages in the -** page set is stored in this variable. -** -** pNext: -** Used to link the HctPManServer.apList[] lists together. -*/ -struct HctPManPageset { - i64 iMaxTid; /* Max associated TID of aPg[] entries */ - int nAlloc; /* Allocated size of aPg[] array */ - int nPg; /* Number of valid entries in aPg[] */ - u32 *aPg; /* Array of page numbers */ - HctPManPageset *pNext; /* Next in list */ -}; - -/* -** A tree of free logical and physical pages. -*/ -struct HctPManTree { - u32 iRoot; /* Logical root of free tree */ - i64 iTid; /* Associated TID value */ -}; - -/* -** Indexes into HctPManServer.apList[], HctPManClient.apAcc[] and -** HctPManClient.apUse[] arrays. -*/ -#define PAGESET_PHYSICAL 0 -#define PAGESET_LOGICAL 1 - -/* -** aList[]: -** aList[0].pHead is a pointer to the first element of a singly-linked -** list of pagesets containing free physical page ids. aList[0].pTail -** always points to the last element of this list. The list is sorted -** in order of HctPManPageset.iMaxTid values. -** -** aList[1] is similar, but for logical page ids. -** -** aTree[]: -** Array of tree structures to eventually walk and free -*/ -struct HctPManServer { - sqlite3_mutex *pMutex; /* Mutex to protect this object */ - HctFileServer *pFileServer; /* Associated file-server object */ - struct HctPManServerList { - HctPManPageset *pHead; - HctPManPageset *pTail; - } aList[2]; - - int nTree; - HctPManTree *aTree; -}; - -/* -** Event counters used by the hctstats virtual table. -*/ -typedef struct HctPManStats HctPManStats; -struct HctPManStats { - i64 nMutex; - i64 nMutexBlock; -}; - -/* -** apAcc[]: -** These two pagesets are used to accumulate physical (apAcc[0]) and -** logical (apAcc[1]) page ids as they are freed by the client. Once -** sufficient page ids have been accumulated the pageset will be handed -** to the server object. -** -** apUse[]: -** These two pagesets are guaranteed to contain page ids that can be -** reused immediately. For the client to use as it requires. -*/ -struct HctPManClient { - HctConfig *pConfig; - HctPManServer *pServer; - HctFile *pFile; - - HctPManFreePgSet aPgSet[2]; /* Free physical and logical pages */ - - HctPManStats stats; -}; - -static void hctPManMutexEnter(HctPManClient *pClient){ - sqlite3_mutex *pMutex = pClient->pServer->pMutex; - pClient->stats.nMutex++; - if( sqlite3_mutex_try(pMutex)!=SQLITE_OK ){ - pClient->stats.nMutexBlock++; - sqlite3_mutex_enter(pMutex); - } -} - - -#define ENTER_PMAN_MUTEX(pClient) hctPManMutexEnter(pClient) -#define LEAVE_PMAN_MUTEX(pClient) sqlite3_mutex_leave(pClient->pServer->pMutex) - -/* -** Utility malloc function for hct. Allocate nByte bytes of zeroed memory. -*/ -SQLITE_PRIVATE void *sqlite3HctMalloc(int *pRc, i64 nByte){ - void *pRet = 0; - assert( nByte!=0 ); - if( *pRc==SQLITE_OK ){ - pRet = sqlite3MallocZero(nByte); - if( pRet==0 ){ - *pRc = SQLITE_NOMEM_BKPT; - } - } - return pRet; -} - - -/* -** Allocate and return a new HctPManServer object. -*/ -SQLITE_PRIVATE HctPManServer *sqlite3HctPManServerNew( - int *pRc, - HctFileServer *pFileServer -){ - int rc = *pRc; - HctPManServer *pRet = 0; - pRet = sqlite3HctMalloc(&rc, sizeof(*pRet)); - if( pRet ){ - pRet->pFileServer = pFileServer; - pRet->pMutex = sqlite3_mutex_alloc(SQLITE_MUTEX_RECURSIVE); - if( pRet->pMutex==0 ){ - rc = SQLITE_NOMEM_BKPT; - } - } - - if( rc!=SQLITE_OK ){ - sqlite3HctPManServerFree(pRet); - pRet = 0; - } - *pRc = rc; - return pRet; -} - - -SQLITE_PRIVATE void sqlite3HctPManServerReset(HctPManServer *pServer){ - int ii = 0; - for(ii=0; ii<2; ii++){ - HctPManPageset *pNext = pServer->aList[ii].pHead; - while( pNext ){ - HctPManPageset *pDel = pNext; - pNext = pNext->pNext; - sqlite3_free(pDel); - } - memset(&pServer->aList[ii], 0, sizeof(struct HctPManServerList)); - } -} - -/* -** Free an HctPManServer object allocated by an earlier call to -** sqlite3HctPManServerNew(). -*/ -SQLITE_PRIVATE void sqlite3HctPManServerFree(HctPManServer *pServer){ - if( pServer ){ - sqlite3HctPManServerReset(pServer); - sqlite3_mutex_free(pServer->pMutex); - sqlite3_free(pServer->aTree); - sqlite3_free(pServer); - } -} - -/* -** Allocate and return a pointer to a new pageset object with enough -** space for up to nAlloc page ids. -*/ -static HctPManPageset *hctPManPagesetNew(int *pRc, int nAlloc){ - const int nByte = sizeof(HctPManPageset) + nAlloc*sizeof(u32); - HctPManPageset *pRet = 0; - - pRet = (HctPManPageset*)sqlite3HctMalloc(pRc, nByte); - if( pRet ){ - pRet->aPg = (u32*)&pRet[1]; - pRet->nAlloc = nAlloc; - } - - return pRet; -} - -/* -** Add page iPg directly to the list of free pages managed by server pServer. -** iPg may be either a logical (if bLogical==1) or a physical (if bLogical==0) -** page id. It is available for reuse immediately. -** -** This function is not threadsafe. It is only called during initialization, -** when there is only one thread that may be accessing object pServer. -*/ -SQLITE_PRIVATE void sqlite3HctPManServerInit( - int *pRc, - HctPManServer *pServer, - u64 iTid, - u32 iPg, - int bLogical -){ - struct HctPManServerList *p = &pServer->aList[bLogical]; - assert( bLogical==0 || bLogical==1 ); - - if( p->pHead==0 || p->pHead->nPg==p->pHead->nAlloc ){ - HctPManPageset *pNew = hctPManPagesetNew(pRc, PAGESET_INIT_SIZE); - if( pNew==0 ) return; - pNew->pNext = p->pHead; - pNew->iMaxTid = iTid; - p->pHead = pNew; - if( p->pTail==0 ) p->pTail = pNew; - } - p->pHead->aPg[p->pHead->nPg++] = iPg; -} - -/* -** Allocate a new page-manager client. -*/ -SQLITE_PRIVATE HctPManClient *sqlite3HctPManClientNew( - int *pRc, /* IN/OUT: Error code */ - HctConfig *pConfig, /* Connection configuration object */ - HctPManServer *pServer, /* Page-manager server to connect to */ - HctFile *pFile /* File object */ -){ - HctPManClient *pClient = 0; - pClient = (HctPManClient*)sqlite3HctMalloc(pRc, sizeof(HctPManClient)); - if( pClient ){ - pClient->pConfig = pConfig; - pClient->pServer = pServer; - pClient->pFile = pFile; - } - return pClient; -} - -/* -** Hand off a page-set object to the server passed as the first argument. -*/ -static void hctPManServerHandoff( - HctPManServer *p, /* Server object */ - HctPManPageset *pPageSet, /* Pageset to pass to the server */ - int bLogical, /* True for logical, false for physical ids */ - int bUsable /* Page ids are immediately usable */ -){ - if( pPageSet ){ - struct HctPManServerList *pList = &p->aList[bLogical]; - if( bUsable ){ - pPageSet->pNext = pList->pHead; - pList->pHead = pPageSet; - if( pList->pTail==0 ) pList->pTail = pPageSet; - }else{ - pPageSet->pNext = 0; - if( pList->pTail==0 ){ - pList->pTail = pList->pHead = pPageSet; - }else{ - pList->pTail->pNext = pPageSet; - pList->pTail = pPageSet; - } - } - } -} - -/* -** -*/ -static int hctPManHandback( - HctPManClient *pClient, /* Client to hand pages back from */ - int bLogical, /* True for logical pages, false for phys. */ - int nPg /* Number of pages to hand back */ -){ - u64 iSafeTid = sqlite3HctFileSafeTID(pClient->pFile); - const int nPageSet = pClient->pConfig->nPageSet; - HctPManFreePgSet *pSet = &pClient->aPgSet[bLogical]; - int nRem = nPg; - int rc = SQLITE_OK; - - HctPManPageset *pList = 0; - - assert( bLogical==0 || bLogical==1 ); - assert( nPg<=pSet->nPg ); - - while( nRem>0 ){ - int ii = 0; - HctPManPageset *pNew = 0; - int nCopy = MIN(nRem, nPageSet); - - nRem -= nCopy; - pNew = hctPManPagesetNew(&rc, nCopy); - if( !pNew ) break; - for(ii=0; iiiFirst + ii) % pSet->nAlloc; - pNew->aPg[pNew->nPg++] = (u32)(pSet->aPg[iPg].pgno); - pNew->iMaxTid = pSet->aPg[iPg].iTid; - } - pSet->iFirst = (pSet->iFirst+nCopy) % pSet->nAlloc; - pSet->nPg -= nCopy; - - pNew->pNext = pList; - pList = pNew; - } - assert( pList || nPg==0 || rc!=SQLITE_OK ); - - ENTER_PMAN_MUTEX(pClient); - while( pList ){ - int bSafe = (pList->iMaxTid<=iSafeTid); - HctPManPageset *pNext = pList->pNext; - pList->pNext = 0; - hctPManServerHandoff(pClient->pServer, pList, bLogical, bSafe); - pList = pNext; - } - LEAVE_PMAN_MUTEX(pClient); - - return rc; -} - -/* -** Free a page-manager client. -*/ -SQLITE_PRIVATE void sqlite3HctPManClientFree(HctPManClient *pClient){ - if( pClient ){ - /* Return all pages to the server object */ - hctPManHandback(pClient, 0, pClient->aPgSet[0].nPg); - hctPManHandback(pClient, 1, pClient->aPgSet[1].nPg); - - /* Free allocations */ - sqlite3_free(pClient->aPgSet[0].aPg); - sqlite3_free(pClient->aPgSet[1].aPg); - sqlite3_free(pClient); - } -} - - -typedef struct FreeTreeCtx FreeTreeCtx; -struct FreeTreeCtx { - HctFile *pFile; - HctPManClient *pPManClient; -}; - -static int pmanFreeTreeCb(void *pCtx, u32 iLogic, u32 iPhys){ - FreeTreeCtx *p = (FreeTreeCtx*)pCtx; - int rc = SQLITE_OK; - - if( iLogic && !sqlite3HctFilePageIsFree(p->pFile, iLogic, 1) ){ - rc = sqlite3HctFilePageClearInUse(p->pFile, iLogic, 1); - sqlite3HctPManFreePg(&rc, p->pPManClient, 0, iLogic, 1); - } - if( iPhys && !sqlite3HctFilePageIsFree(p->pFile, iPhys, 0) && rc==SQLITE_OK ){ - rc = sqlite3HctFilePageClearInUse(p->pFile, iPhys, 0); - sqlite3HctPManFreePg(&rc, p->pPManClient, 0, iPhys, 0); - } - - return rc; -} - -static int hctPManFreeTreeNow( - HctPManClient *p, - HctFile *pFile, - u32 iRoot -){ - int rc = SQLITE_OK; - FreeTreeCtx ctx; - ctx.pPManClient = p; - ctx.pFile = pFile; - rc = sqlite3HctDbWalkTree(pFile, iRoot, pmanFreeTreeCb, (void*)&ctx); - if( rc==SQLITE_OK ){ - rc = sqlite3HctFilePageClearIsRoot(pFile, iRoot); - } - return rc; -} - -#if 0 -static void pman_debug( - HctPManClient *pClient, - const char *zOp, - int bLogical, - u32 iPg, - i64 iTid -){ - printf("pman: (%p) %s %s page %d - tid=%lld\n", pClient, - zOp, bLogical ? "LOGICAL" : "PHYSICAL", (int)iPg, iTid - ); - fflush(stdout); -} - -static void pman_debug_new_pageset( - HctPManPageset *pPageSet, - int bLogical, - u64 iSafeTid, - u64 iServerTid -){ - printf( - "pman: new %s pageset - safetid=%lld servertid=%lld\n", - bLogical ? "LOGICAL" : "PHYSICAL", iSafeTid, iServerTid - ); - fflush(stdout); -} -#else - -# define pman_debug(a,b,c,d,e) -# define pman_debug_new_pageset(a,b,c,d) - -#endif - -/* -** Ensure that the circular buffer identified by bLogical has at least -** nPg free slots in it. -*/ -static int hctPManMakeSpace( - HctPManClient *pClient, - int bLogical, - int nPg -){ - int rc = SQLITE_OK; - HctPManFreePgSet *pSet = &pClient->aPgSet[bLogical]; - - if( (pSet->nAlloc-pSet->nPg)nPg + nPg; - int nByte = nNew * sizeof(HctPManFreePg); - HctPManFreePg *aNew = (HctPManFreePg*)sqlite3_realloc(pSet->aPg, nByte); - - if( aNew==0 ){ - rc = SQLITE_NOMEM; - }else{ - pSet->aPg = aNew; - if( (pSet->iFirst + pSet->nPg)>pSet->nAlloc ){ - int nExtra = nNew - pSet->nAlloc; - int nStart = pSet->nPg - (pSet->nAlloc - pSet->iFirst); - - if( nExtra>=nStart ){ - memcpy(&aNew[pSet->nAlloc], aNew, nStart*sizeof(HctPManFreePg)); - }else{ - memcpy(&aNew[pSet->nAlloc], aNew, nExtra*sizeof(HctPManFreePg)); - memmove(aNew, &aNew[nExtra], (nStart-nExtra)*sizeof(HctPManFreePg)); - } - } - pSet->nAlloc = nNew; - } - } - - return rc; -} - -static void hctPManAddFree( - HctPManClient *pClient, - int bLogical, - i64 iPg, - i64 iTid -){ - HctPManFreePgSet *pSet = &pClient->aPgSet[bLogical]; - int iIdx = 0; - - assert( pSet->nPgnAlloc ); - if( iTid==0 ){ - if( pSet->iFirst==0 ) pSet->iFirst = pSet->nAlloc; - pSet->iFirst--; - iIdx = pSet->iFirst; - }else{ - iIdx = (pSet->iFirst + pSet->nPg) % pSet->nAlloc; - } - - pSet->nPg++; - pSet->aPg[iIdx].pgno = iPg; - pSet->aPg[iIdx].iTid = iTid; -} - - -/* -** Allocate a new logical or physical page. -*/ -SQLITE_PRIVATE u32 sqlite3HctPManAllocPg( - int *pRc, /* IN/OUT: Error code */ - HctPManClient *pClient, /* page-manager client handle */ - HctFile *pFile, - int bLogical -){ - HctPManServer *p = pClient->pServer; - u64 iSafeTid = sqlite3HctFileSafeTID(pFile); - HctPManFreePgSet *pSet = &pClient->aPgSet[bLogical]; - u32 iRoot = 0; - HctPManPageset *pPgset = 0; - int rc = SQLITE_OK; - - /* Check if the client has a usable page already. If so, return early. */ - if( pSet->nPg>0 && pSet->aPg[pSet->iFirst].iTid<=iSafeTid ){ - u32 pgno = pSet->aPg[pSet->iFirst].pgno; - - pman_debug(pClient, "alloc", bLogical, pgno, pSet->aPg[pSet->iFirst].iTid); - - pSet->iFirst = (pSet->iFirst+1) % pSet->nAlloc; - pSet->nPg--; - return pgno; - } - - do{ - iRoot = 0; - - /* Attempt to allocate a page from the page-manager server. */ - ENTER_PMAN_MUTEX(pClient); - if( p->nTree>0 && p->aTree[0].iTid<=iSafeTid ){ - /* A tree structure that can be traversed to find free pages. */ - iRoot = p->aTree[0].iRoot; - p->nTree--; - memmove(&p->aTree[0], &p->aTree[1], (p->nTree)*sizeof(HctPManTree)); - }else{ - struct HctPManServerList *pList = &p->aList[bLogical]; - if( pList->pHead && pList->pHead->iMaxTid<=iSafeTid ){ - /* A page-set object full of usable pages */ - pPgset = pList->pHead; - pList->pHead = pList->pHead->pNext; - if( pList->pHead==0 ) pList->pTail = 0; - } - } - LEAVE_PMAN_MUTEX(pClient); - - /* If a free tree structure was found, iterate through it, returning - ** all physical and logical pages to the server. Then retry the above. - */ - if( iRoot ){ - rc = hctPManFreeTreeNow(pClient, pFile, iRoot); - } - }while( iRoot ); - - if( rc==SQLITE_OK ){ - int ii; - if( pPgset ){ - pman_debug_new_pageset(pPgset, bLogical, iSafeTid, pPgset->iMaxTid); - rc = hctPManMakeSpace(pClient, bLogical, pPgset->nPg); - if( rc==SQLITE_OK ){ - for(ii=pPgset->nPg-1; ii>=0; ii--){ - hctPManAddFree(pClient, bLogical, pPgset->aPg[ii], 0); - } - } - }else{ - const int nPageSet = pClient->pConfig->nPageSet; - rc = hctPManMakeSpace(pClient, bLogical, nPageSet); - if( rc==SQLITE_OK ){ - u32 iPg = sqlite3HctFilePageRangeAlloc(pFile, bLogical, nPageSet); - pman_debug_new_pageset(0, bLogical, iSafeTid, -1); - for(ii=nPageSet-1; ii>=0; ii--){ - hctPManAddFree(pClient, bLogical, iPg+ii, 0); - } - } - } - } - sqlite3_free(pPgset); - - if( rc==SQLITE_OK ){ - assert( pSet->nPg>0 && pSet->aPg[pSet->iFirst].iTid<=iSafeTid ); - return sqlite3HctPManAllocPg(pRc, pClient, pFile, bLogical); - } - - /* An error has occurred. Return 0. */ - *pRc = rc; - return 0; -} - -/* -** Free a physical or logical page. -*/ -SQLITE_PRIVATE void sqlite3HctPManFreePg( - int *pRc, /* IN/OUT: Error code */ - HctPManClient *pClient, /* page-manager client handle */ - i64 iTid, /* Associated TID value */ - u32 iPg, /* Page number */ - int bLogical /* True for logical, false for physical */ -){ - int rc = SQLITE_OK; - pman_debug(pClient, "free", bLogical, iPg, iTid); - assert( iPg>0 ); - rc = hctPManMakeSpace(pClient, bLogical, 1); - if( rc==SQLITE_OK ){ - hctPManAddFree(pClient, bLogical, iPg, iTid); - } -} - -SQLITE_PRIVATE void sqlite3HctPManClientHandoff(HctPManClient *pClient){ - hctPManHandback(pClient, 0, pClient->aPgSet[0].nPg); - hctPManHandback(pClient, 1, pClient->aPgSet[1].nPg); -} - -SQLITE_PRIVATE int sqlite3HctPManFreeTree( - HctPManClient *p, - HctFile *pFile, - u32 iRoot, - u64 iTid -){ - int rc = SQLITE_OK; - if( iTid==0 ){ - rc = hctPManFreeTreeNow(p, pFile, iRoot); - }else{ - HctPManServer *pServer = p->pServer; - int nNew; - HctPManTree *aNew; - - ENTER_PMAN_MUTEX(p); - nNew = pServer->nTree + 1; - aNew = (HctPManTree*)sqlite3_realloc( - pServer->aTree, nNew*sizeof(HctPManTree) - ); - if( aNew==0 ){ - rc = SQLITE_NOMEM_BKPT; - }else{ - aNew[pServer->nTree].iRoot = iRoot; - aNew[pServer->nTree].iTid = iTid; - pServer->nTree++; - pServer->aTree = aNew; - } - LEAVE_PMAN_MUTEX(p); - } - return rc; -} - -typedef struct InitRootCtx InitRootCtx; -struct InitRootCtx { - HctFile *pFile; - HctPManServer *pServer; - u64 iTid; - u64 iRoot; /* Logical root page of this tree */ -}; - -static int pmanInitRootCb(void *pCtx, u32 iLogic, u32 iPhys){ - InitRootCtx *p = (InitRootCtx*)pCtx; - int rc = SQLITE_OK; - - if( iLogic && !sqlite3HctFilePageIsFree(p->pFile, iLogic, 1) ){ - rc = sqlite3HctFilePageClearInUse(p->pFile, iLogic, 1); - if( iLogiciRoot ){ - sqlite3HctPManServerInit(&rc, p->pServer, p->iTid, iLogic, 1); - } - } - if( iPhys && !sqlite3HctFilePageIsFree(p->pFile, iPhys, 0) && rc==SQLITE_OK ){ - rc = sqlite3HctFilePageClearInUse(p->pFile, iPhys, 0); - if( iPhysiRoot ){ - sqlite3HctPManServerInit(&rc, p->pServer, p->iTid, iPhys, 0); - } - } - - return rc; -} - -SQLITE_PRIVATE int sqlite3HctPManServerInitRoot( - int *pRc, - HctPManServer *pServer, - u64 iTid, - HctFile *pFile, - u32 iRoot -){ - int rc = SQLITE_OK; - InitRootCtx ctx; - ctx.pServer = pServer; - ctx.pFile = pFile; - ctx.iTid = iTid; - ctx.iRoot = iRoot; - rc = sqlite3HctDbWalkTree(pFile, iRoot, pmanInitRootCb, (void*)&ctx); - if( rc==SQLITE_OK ){ - rc = sqlite3HctFilePageClearIsRoot(pFile, iRoot); - } - return rc; -} - -/************************************************************************* -** Beginning of vtab implemetation. -*************************************************************************/ - -#define HCT_PMAN_SCHEMA \ -" CREATE TABLE hctpman(" \ -" type TEXT," \ -" location TEXT," \ -" pgno INTEGER," \ -" tid INTEGER" \ -" );" - -typedef struct pman_vtab pman_vtab; -typedef struct pman_cursor pman_cursor; -typedef struct HctPmanRow HctPmanRow; - -/* -** Virtual table type for "hctpman". -*/ -struct pman_vtab { - sqlite3_vtab base; /* Base class - must be first */ - sqlite3 *db; -}; - -/* -** Virtual cursor type for "hctpman". -*/ -struct pman_cursor { - sqlite3_vtab_cursor base; /* Base class - must be first */ - int nRow; - int iRow; - HctPmanRow *aRow; -}; - -/* -** Values to return for a single row of the hctpman table. -*/ -struct HctPmanRow { - u8 eType; /* HCT_PMAN_TYPE_* value */ - u8 eLoc; /* HCT_PMAN_LOC_* value */ - u32 pgno; /* Page number */ - i64 iTid; /* Associated TID */ -}; - -#define HCT_PMAN_TYPE_PHYSICAL 0 -#define HCT_PMAN_TYPE_LOGICAL 1 - -#define HCT_PMAN_LOC_USE 0 -#define HCT_PMAN_LOC_ACC 1 -#define HCT_PMAN_LOC_SERVER 2 - -/* -** This xConnect() method is invoked to create a new hctpman virtual table. -*/ -static int pmanConnect( - sqlite3 *db, - void *pAux, - int argc, const char *const*argv, - sqlite3_vtab **ppVtab, - char **pzErr -){ - pman_vtab *pNew; - int rc; - - rc = sqlite3_declare_vtab(db, HCT_PMAN_SCHEMA); - pNew = (pman_vtab*)sqlite3HctMalloc(&rc, sizeof(*pNew)); - if( pNew ){ - pNew->db = db; - } - - *ppVtab = (sqlite3_vtab*)pNew; - return rc; -} - -/* -** This method is the destructor for pman_vtab objects. -*/ -static int pmanDisconnect(sqlite3_vtab *pVtab){ - pman_vtab *p = (pman_vtab*)pVtab; - sqlite3_free(p); - return SQLITE_OK; -} - -/* -** Constructor for a new pman_cursor object. -*/ -static int pmanOpen(sqlite3_vtab *p, sqlite3_vtab_cursor **ppCursor){ - pman_cursor *pCur; - pCur = sqlite3MallocZero(sizeof(*pCur)); - if( pCur==0 ) return SQLITE_NOMEM; - *ppCursor = &pCur->base; - return SQLITE_OK; -} - -/* -** Destructor for a pman_cursor. -*/ -static int pmanClose(sqlite3_vtab_cursor *cur){ - pman_cursor *pCur = (pman_cursor*)cur; - sqlite3_free(pCur->aRow); - sqlite3_free(pCur); - return SQLITE_OK; -} - -/* -** Return TRUE if the cursor has been moved off of the last row of output. -*/ -static int pmanEof(sqlite3_vtab_cursor *cur){ - pman_cursor *pCur = (pman_cursor*)cur; - return pCur->iRow>=pCur->nRow; -} - -/* -** Advance a pman_cursor to its next row of output. -*/ -static int pmanNext(sqlite3_vtab_cursor *cur){ - pman_cursor *pCur = (pman_cursor*)cur; - pCur->iRow++; - return SQLITE_OK; -} - -/* -** Return values of columns for the row at which the pgmap_cursor -** is currently pointing. -*/ -static int pmanColumn( - sqlite3_vtab_cursor *cur, /* The cursor */ - sqlite3_context *ctx, /* First argument to sqlite3_result_...() */ - int i /* Which column to return */ -){ - const char *aType[] = {"physical", "logical"}; - const char *aLoc[] = {"use", "acc", "server"}; - pman_cursor *pCur = (pman_cursor*)cur; - - HctPmanRow *pRow = &pCur->aRow[pCur->iRow]; - switch( i ){ - case 0: { /* type */ - sqlite3_result_text(ctx, aType[pRow->eType], -1, SQLITE_STATIC); - break; - } - case 1: { /* location */ - sqlite3_result_text(ctx, aLoc[pRow->eLoc], -1, SQLITE_STATIC); - break; - } - case 2: { /* pgno */ - sqlite3_result_int64(ctx, pRow->pgno); - break; - } - case 3: { /* tid */ - sqlite3_result_int64(ctx, pRow->iTid); - break; - } - } - return SQLITE_OK; -} - -/* -** Return the rowid for the current row. In this implementation, the -** rowid is the same as the slotno value. -*/ -static int pmanRowid(sqlite3_vtab_cursor *cur, sqlite_int64 *pRowid){ - pman_cursor *pCur = (pman_cursor*)cur; - *pRowid = pCur->iRow+1; - return SQLITE_OK; -} - -static int hctPagesetSize(HctPManPageset *pPageset){ - return pPageset ? pPageset->nPg : 0; -} - -static void hctPagesetRows( - pman_cursor *pCur, - HctPManPageset *pPageset, - u8 eType, - u8 eLoc -){ - if( pPageset ){ - int ii; - for(ii=0; iinPg; ii++){ - HctPmanRow *pRow = &pCur->aRow[pCur->nRow++]; - pRow->eType = eType; - pRow->eLoc = eLoc; - pRow->pgno = pPageset->aPg[ii]; - pRow->iTid = pPageset->iMaxTid; - } - } -} - -/* -** This method is called to "rewind" the pman_cursor object back -** to the first row of output. This method is always called at least -** once prior to any call to pmanColumn() or pmanRowid() or -** pmanEof(). -*/ -static int pmanFilter( - sqlite3_vtab_cursor *pVtabCursor, - int idxNum, const char *idxStr, - int argc, sqlite3_value **argv -){ - pman_cursor *pCur = (pman_cursor*)pVtabCursor; - pman_vtab *pTab = (pman_vtab*)(pCur->base.pVtab); - HctPManClient *pClient = 0; - int nRow = 0; - int ii = 0; - HctPManPageset *pSet = 0; - int rc = SQLITE_OK; - - pCur->iRow = 0; - pCur->nRow = 0; - sqlite3_free(pCur->aRow); - pCur->aRow = 0; - - pClient = sqlite3HctFilePManClient( - sqlite3HctDbFile(sqlite3HctDbFind(pTab->db, 0)) - ); - - ENTER_PMAN_MUTEX(pClient); - for(ii=0; ii<2; ii++){ - nRow += pClient->aPgSet[ii].nPg; - for(pSet=pClient->pServer->aList[ii].pHead; pSet; pSet=pSet->pNext){ - nRow += hctPagesetSize(pSet); - } - } - pCur->aRow = sqlite3HctMalloc(&rc, sizeof(HctPmanRow) * nRow); - if( pCur->aRow ){ - for(ii=0; ii<2; ii++){ - int i2; - HctPManFreePgSet *pPgSet = &pClient->aPgSet[ii]; - for(i2=0; i2nPg; i2++){ - HctPmanRow *pRow = &pCur->aRow[pCur->nRow++]; - int idx = (pPgSet->iFirst + i2) % pPgSet->nAlloc; - pRow->eType = ii; - pRow->eLoc = HCT_PMAN_LOC_USE; - pRow->pgno = pPgSet->aPg[idx].pgno; - pRow->iTid = pPgSet->aPg[idx].iTid; - } - for(pSet=pClient->pServer->aList[ii].pHead; pSet; pSet=pSet->pNext){ - hctPagesetRows(pCur, pSet, ii, HCT_PMAN_LOC_SERVER); - } - } - } - LEAVE_PMAN_MUTEX(pClient); - - return rc; -} - -/* -** SQLite will invoke this method one or more times while planning a query -** that uses the virtual table. This routine needs to create -** a query plan for each invocation and compute an estimated cost for that -** plan. -*/ -static int pmanBestIndex( - sqlite3_vtab *tab, - sqlite3_index_info *pIdxInfo -){ - pIdxInfo->estimatedCost = (double)10; - pIdxInfo->estimatedRows = 10; - return SQLITE_OK; -} - -SQLITE_PRIVATE int sqlite3HctPManVtabInit(sqlite3 *db){ - static sqlite3_module pmanModule = { - /* iVersion */ 0, - /* xCreate */ 0, - /* xConnect */ pmanConnect, - /* xBestIndex */ pmanBestIndex, - /* xDisconnect */ pmanDisconnect, - /* xDestroy */ 0, - /* xOpen */ pmanOpen, - /* xClose */ pmanClose, - /* xFilter */ pmanFilter, - /* xNext */ pmanNext, - /* xEof */ pmanEof, - /* xColumn */ pmanColumn, - /* xRowid */ pmanRowid, - /* xUpdate */ 0, - /* xBegin */ 0, - /* xSync */ 0, - /* xCommit */ 0, - /* xRollback */ 0, - /* xFindMethod */ 0, - /* xRename */ 0, - /* xSavepoint */ 0, - /* xRelease */ 0, - /* xRollbackTo */ 0, - /* xShadowName */ 0 - }; - - return sqlite3_create_module(db, "hctpman", &pmanModule, 0); -} - - -SQLITE_PRIVATE i64 sqlite3HctPManStats(sqlite3 *db, int iStat, const char **pzStat){ - HctPManClient *pClient = 0; - i64 iVal = -1; - - pClient = sqlite3HctFilePManClient(sqlite3HctDbFile(sqlite3HctDbFind(db, 0))); - switch( iStat ){ - case 0: - *pzStat = "mutex_attempt"; - iVal = pClient->stats.nMutex; - break; - case 1: - *pzStat = "mutex_block"; - iVal = pClient->stats.nMutexBlock; - break; - default: - break; - } - - return iVal; -} - - - -/************** End of hct_pman.c ********************************************/ -/************** Begin file hctree.c ******************************************/ -/* -** 2004 April 6 -** -** The author disclaims copyright to this source code. In place of -** a legal notice, here is a blessing: -** -** May you do good and not evil. -** May you find forgiveness for yourself and forgive others. -** May you share freely, never taking more than you give. -** -************************************************************************* -** -*/ - -/* #include "sqliteInt.h" */ -/* #include "hctInt.h" */ - -/* #include */ -/* #include */ -/* #include */ -/* #include */ -#include - -#ifdef SQLITE_ENABLE_HCT - -typedef struct BtSchemaOp BtSchemaOp; - -typedef struct HBtree HBtree; -typedef struct HBtCursor HBtCursor; -typedef struct HctLogFile HctLogFile; -typedef struct HctMainStats HctMainStats; - - -/* -** An object to help with writing a log file. -*/ -struct HctLogFile { - int fd; /* File descriptor open on log file */ - char *zLogFile; /* Full path to log file */ - u8 *aBuf; /* malloc'd buffer for writing log file */ - int nBuf; /* Size of aBuf[] in bytes */ - i64 iFileOff; /* Current write offset in file */ - int iBufferOff; /* Current write offset in buffer */ -}; - -struct HctMainStats { - i64 nRetry; - i64 nRetryKey; - i64 nKeyOp; -}; - -/* -** aSchemaOp[]: -** Array of nSchemaOp BtSchemaOp structures. Each such structure represents -** a new table or index created by the current transaction. -** aSchemaOp[x].iSavepoint contains the open savepoint count when the table -** with root page aSchemaOp[x].pgnoRoot was created. The value -** HBtree.db->nSavepoint. -** -** eTrans: -** Set to SQLITE_TXN_NONE, READ or WRITE to indicate the type of -** transaction that is open. This is set by the following functions: -** -** sqlite3HctBtreeBeginTrans() -** sqlite3HctBtreeCommitPhaseTwo() -** sqlite3HctBtreeRollback() -*/ -struct HBtree { - BtreeMethods *pMethods; - - HctConfig config; /* Configuration for this connection */ - HctTree *pHctTree; /* In-memory part of database */ - HctDatabase *pHctDb; /* On-disk part of db, if any */ - void *pSchema; /* Memory from sqlite3HctBtreeSchema() */ - void(*xSchemaFree)(void*); /* Function to free pSchema */ - int eTrans; /* SQLITE_TXN_NONE, READ or WRITE */ - HBtCursor *pCsrList; /* List of all open cursors */ - - int nSchemaOp; - BtSchemaOp *aSchemaOp; - int nRollbackOp; - - int openFlags; - HctLogFile *pLog; /* Object for writing to log file */ - u32 iNextRoot; /* Next root page to allocate if pHctDb==0 */ - u32 aMeta[SQLITE_N_BTREE_META]; /* 16 database meta values */ - int eMetaState; - - int bRecoveryDone; -#if 0 - u64 iJrnlRoot; /* Root of sqlite_hct_journal */ - u64 iBaseRoot; /* Root of sqlite_hct_baseline */ -#endif - HctJournal *pHctJrnl; - - Pager *pFakePager; - HctMainStats stats; -}; - -/* -** Another candidate value for HBtree.eTrans. Must be different from -** SQLITE_TXN_NONE, SQLITE_TXN_READ and SQLITE_TXN_WRITE. -*/ -#define SQLITE_TXN_ERROR 4 - -/* -** Candidate values for HBtree.eMetaState. -*/ -#define HCT_METASTATE_NONE 0 -#define HCT_METASTATE_READ 1 - -/* -** A schema op. -*/ -struct BtSchemaOp { - int iSavepoint; - int eSchemaOp; - u32 pgnoRoot; -}; - -/* -** Candidate values for BtSchemaOp.eSchemaOp -*/ -#define HCT_SCHEMAOP_DROP 1 -#define HCT_SCHEMAOP_CREATE_INTKEY 2 -#define HCT_SCHEMAOP_CREATE_INDEX 3 - - -struct HBtCursor { - BtCursorMethods *pMethods; - - HBtree *pBtree; - HctTreeCsr *pHctTreeCsr; - HctDbCsr *pHctDbCsr; - int bUseTree; /* 1 if tree-csr is current entry, else 0 */ - int eDir; /* One of BTREE_DIR_NONE, FORWARD, REVERSE */ - - int isLast; /* Csr has not moved since BtreeLast() */ - - KeyInfo *pKeyInfo; /* For non-intkey tables */ - int errCode; - int wrFlag; /* Value of wrFlag when cursor opened */ - HBtCursor *pCsrNext; /* Next element in Btree.pCsrList list */ -}; - - -#ifdef SQLITE_TEST -SQLITE_PRIVATE BtShared *SQLITE_WSD sqlite3SharedCacheList = 0; -#endif - -#ifndef SQLITE_OMIT_SHARED_CACHE -/* -** Enable or disable the shared pager and schema features. -** -** This routine has no effect on existing database connections. -** The shared cache setting effects only future calls to -** sqlite3_open(), sqlite3_open16(), or sqlite3_open_v2(). -*/ -SQLITE_API int sqlite3_enable_shared_cache(int enable){ - sqlite3GlobalConfig.sharedCacheEnabled = enable; - return SQLITE_OK; -} -#endif - - -/* -** Return an reset the seek counter for a Btree object. -*/ -SQLITE_PRIVATE sqlite3_uint64 sqlite3HctBtreeSeekCount(Btree *pBt){ - assert( 0 ); - return 0; -} - -/* -** Clear the current cursor position. -*/ -SQLITE_PRIVATE void sqlite3HctBtreeClearCursor(BtCursor *pCur){ - HBtCursor *pCsr = (HBtCursor*)pCur; - sqlite3HctDbCsrClear(pCsr->pHctDbCsr); - sqlite3HctTreeCsrClear(pCsr->pHctTreeCsr); -} - -/* -** Determine whether or not a cursor has moved from the position where -** it was last placed, or has been invalidated for any other reason. -** Cursors can move when the row they are pointing at is deleted out -** from under them, for example. Cursor might also move if a btree -** is rebalanced. -** -** Calling this routine with a NULL cursor pointer returns false. -** -** Use the separate sqlite3HctBtreeCursorRestore() routine to restore a cursor -** back to where it ought to be if this routine returns true. -*/ -SQLITE_PRIVATE int sqlite3HctBtreeCursorHasMoved(BtCursor *pCursor){ - HBtCursor *const pCur = (HBtCursor*)pCursor; - return sqlite3HctTreeCsrHasMoved(pCur->pHctTreeCsr); -} - -/* -** Return a pointer to a fake BtCursor object that will always answer -** false to the sqlite3HctBtreeCursorHasMoved() routine above. The fake -** cursor returned must not be used with any other Btree interface. -*/ -#if 0 -SQLITE_PRIVATE BtCursor *sqlite3HctBtreeFakeValidCursor(void){ - static BtCursor csr = {0,0,0}; - return &csr; -} -#endif - -/* -** This routine restores a cursor back to its original position after it -** has been moved by some outside activity (such as a btree rebalance or -** a row having been deleted out from under the cursor). -** -** On success, the *pDifferentRow parameter is false if the cursor is left -** pointing at exactly the same row. *pDifferntRow is the row the cursor -** was pointing to has been deleted, forcing the cursor to point to some -** nearby row. -** -** This routine should only be called for a cursor that just returned -** TRUE from sqlite3HctBtreeCursorHasMoved(). -*/ -SQLITE_PRIVATE int sqlite3HctBtreeCursorRestore(BtCursor *pCursor, int *pDifferentRow){ - HBtCursor *const pCur = (HBtCursor*)pCursor; - return sqlite3HctTreeCsrRestore(pCur->pHctTreeCsr, pDifferentRow); -} - -/* -** Return the size of the database file in pages. If there is any kind of -** error, return ((unsigned int)-1). -*/ -SQLITE_PRIVATE Pgno sqlite3HctBtreeLastPage(Btree *p){ - return 0xFFFFFFFF; -} - -/* -** Provide flag hints to the cursor. -*/ -SQLITE_PRIVATE void sqlite3HctBtreeCursorHintFlags(BtCursor *pCur, unsigned x){ - /* no-op */ - assert( x==BTREE_SEEK_EQ || x==BTREE_BULKLOAD || x==0 ); -} - -typedef struct RecoverCsr RecoverCsr; -struct RecoverCsr { - HctDbCsr *pCsr; /* Cursor to read from database on disk */ - HctTreeCsr *pTreeCsr; /* Cursor to write to in-memory tree */ - UnpackedRecord *pRec; /* Used to seek both cursors */ - KeyInfo *pKeyInfo; -}; - -static void hctRecoverCursorClose(HBtree *p, RecoverCsr *pCsr){ - sqlite3HctDbCsrClose(pCsr->pCsr); - sqlite3HctTreeCsrClose(pCsr->pTreeCsr); - sqlite3DbFree(p->config.db, pCsr->pRec); - sqlite3KeyInfoUnref(pCsr->pKeyInfo); - memset(pCsr, 0, sizeof(RecoverCsr)); -} - -static int hctFindKeyInfo(HBtree *p, u32 iRoot, KeyInfo **ppKeyInfo){ - Schema *pSchema = (Schema*)p->pSchema; - int rc = SQLITE_OK; - HashElem *pE = 0; - KeyInfo *pKeyInfo = 0; - - /* Search the database schema for an index with root page iRoot. If - ** one is found, extract a KeyInfo reference. */ - for(pE=sqliteHashFirst(&pSchema->tblHash); pE; pE=sqliteHashNext(pE)){ - Index *pIdx = 0; - Table *pTab = (Table*)sqliteHashData(pE); - for(pIdx=pTab->pIndex; pIdx; pIdx=pIdx->pNext){ - if( pIdx->tnum==iRoot ){ - Parse sParse; - Parse *pSave = 0; - memset(&sParse, 0, sizeof(sParse)); - sParse.db = p->config.db; - pSave = sParse.db->pParse; - sParse.db->pParse = &sParse; - pKeyInfo = sqlite3KeyInfoOfIndex(&sParse, pIdx); - sParse.db->pParse = pSave; - rc = sParse.rc; - sqlite3DbFree(sParse.db, sParse.zErrMsg); - break; - } - } - if( pTab->tnum==iRoot ) break; - } - - *ppKeyInfo = pKeyInfo; - return rc; -} - -/* -** -*/ -static int hctRecoverCursorOpen( - HBtree *p, - u32 iRoot, - RecoverCsr *pCsr -){ - int rc = SQLITE_OK; - memset(pCsr, 0, sizeof(RecoverCsr)); - - rc = hctFindKeyInfo(p, iRoot, &pCsr->pKeyInfo); - assert( rc==SQLITE_OK || pCsr->pKeyInfo==0 ); - if( pCsr->pKeyInfo ){ - pCsr->pRec = sqlite3VdbeAllocUnpackedRecord(pCsr->pKeyInfo); - if( pCsr->pRec==0 ) rc = SQLITE_NOMEM_BKPT; - } - if( rc==SQLITE_OK ){ - rc = sqlite3HctDbCsrOpen(p->pHctDb, pCsr->pKeyInfo, iRoot, &pCsr->pCsr); - } - if( rc==SQLITE_OK ){ - rc = sqlite3HctTreeCsrOpen(p->pHctTree, iRoot, &pCsr->pTreeCsr); - } - - return rc; -} - -#if 1 -# define hctRecoverDebug(v,w,x,y,z) -#else -static void hctRecoverDebug( - RecoverCsr *p, - const char *zType, - i64 iKey, - const u8 *aKey, - int nKey -){ - if( p->pRec==0 ){ - printf("recover-%s: %lld\n", zType, iKey); - }else{ - char *zText = sqlite3HctDbRecordToText(0, aKey, nKey); - printf("recover-%s: %s\n", zType, zText); - sqlite3_free(zText); - } - fflush(stdout); -} -#endif - -/* -** This object is used to read a log file from disk. It is manipulated using -** the following API: -** -** hctLogReaderOpen() -** hctLogReaderNext() -** hctLogReaderClose() -** -** Log file format consists of an 8-byte TID value followed by one or more -** records. Each record is: -** -** * 32-bit root page number, -** * 32-bit size of key field (nKey), -** * if( nKey==0 ) 64-bit rowid key, -** * if( nKey!=0 ) nKey byte blob key. -*/ -typedef struct HctLogReader HctLogReader; -struct HctLogReader { - u8 *aFile; /* Buffer containing log file contents */ - int nFile; /* Size of aFile[] in bytes */ - int iFile; /* Offset of next record in aFile[] */ - - i64 iTid; /* TID value for log file */ - int bEof; /* True if reader has hit EOF */ - - /* Valid only if bEof==0 */ - i64 iRoot; /* Root page for current entry */ - i64 iKey; /* Integer key for current entry (aKey==0) */ - int nKey; /* Size of aKey[] buffer */ - u8 *aKey; /* Blob key for current entry */ -}; - -static void hctLogReaderNext(HctLogReader *pReader){ - u32 aInt[2]; - - if( (pReader->iFile + sizeof(aInt))>pReader->nFile ){ - pReader->bEof = 1; - }else{ - memcpy(aInt, &pReader->aFile[pReader->iFile], sizeof(aInt)); - pReader->iRoot = (i64)aInt[0]; - if( pReader->iRoot==0 ){ - pReader->bEof = 1; - }else{ - pReader->nKey = (int)aInt[1]; - pReader->iFile += sizeof(aInt); - if( pReader->nKey==0 ){ - pReader->aKey = 0; - if( pReader->iFile+sizeof(i64)>pReader->nFile ){ - pReader->bEof = 1; - }else{ - memcpy(&pReader->iKey, &pReader->aFile[pReader->iFile], sizeof(i64)); - pReader->iFile += sizeof(i64); - } - }else{ - pReader->iKey = 0; - if( pReader->iFile+pReader->nKey>pReader->nFile ){ - pReader->bEof = 1; - }else{ - pReader->aKey = &pReader->aFile[pReader->iFile]; - pReader->iFile += pReader->nKey; - } - } - } - } -} - -static void hctLogReaderClose(HctLogReader *pReader){ - sqlite3_free(pReader->aFile); - memset(pReader, 0, sizeof(*pReader)); -} - -static int hctLogReaderOpen(const char *zFile, HctLogReader *pReader){ - int rc = SQLITE_OK; - int fd = -1; - - memset(pReader, 0, sizeof(*pReader)); - fd = open(zFile, O_RDONLY); - if( fd<0 ){ - rc = sqlite3HctIoerr(SQLITE_IOERR); - }else{ - struct stat sStat; - - memset(&sStat, 0, sizeof(sStat)); - fstat(fd, &sStat); - pReader->nFile = (int)sStat.st_size; - pReader->aFile = (u8*)sqlite3HctMalloc(&rc, pReader->nFile + 8); - if( pReader->aFile ){ - int nRead = read(fd, pReader->aFile, pReader->nFile); - if( nRead!=pReader->nFile ){ - rc = sqlite3HctIoerr(SQLITE_IOERR); - }else{ - memcpy(&pReader->iTid, pReader->aFile, sizeof(i64)); - pReader->iFile = sizeof(i64); - if( pReader->iTid==0 ){ - pReader->bEof = 1; - }else{ - hctLogReaderNext(pReader); - } - } - } - - close(fd); - } - - return rc; -} - - -static int btreeFlushData(HBtree *p, int bRollback); - -static int hctRecoverOne(void *pCtx, const char *zFile){ - HBtree *p = (HBtree*)pCtx; - int rc = SQLITE_OK; - u32 iPrevRoot = 0; - RecoverCsr csr; - HctLogReader rdr; - - memset(&csr, 0, sizeof(csr)); - rc = hctLogReaderOpen(zFile, &rdr); - if( rc==SQLITE_OK && rdr.bEof==0 ){ - - assert( rdr.iTid!=0 ); - sqlite3HctDbRollbackMode(p->pHctDb, 2); - sqlite3HctDbRecoverTid(p->pHctDb, rdr.iTid); - for(/* no-op */; rdr.bEof==0; hctLogReaderNext(&rdr)){ - int op = 0; - - if( rdr.iRoot!=iPrevRoot ){ - iPrevRoot = rdr.iRoot; - hctRecoverCursorClose(p, &csr); - rc = hctRecoverCursorOpen(p, rdr.iRoot, &csr); - } - - if( rdr.nKey ){ - sqlite3VdbeRecordUnpack(csr.pKeyInfo, rdr.nKey, rdr.aKey, csr.pRec); - } - rc = sqlite3HctDbCsrRollbackSeek(csr.pCsr, csr.pRec, rdr.iKey, &op); - - if( rc==SQLITE_OK && op!=0 ){ - HctTreeCsr *pTCsr = csr.pTreeCsr; - if( op<0 ){ - /* rollback requires deleting the key */ - hctRecoverDebug(&csr, "delete", rdr.iKey, rdr.aKey, rdr.nKey); - rc = sqlite3HctTreeDeleteKey( - pTCsr, csr.pRec, rdr.iKey, rdr.nKey, rdr.aKey - ); - }else if( op>0 ){ - const u8 *aOld = 0; - int nOld = 0; - rc = sqlite3HctDbCsrData(csr.pCsr, &nOld, &aOld); - if( rc==SQLITE_OK ){ - hctRecoverDebug(&csr, "insert", rdr.iKey, aOld, nOld); - rc = sqlite3HctTreeInsert(pTCsr, csr.pRec, rdr.iKey, nOld, aOld, 0); - } - } - } - } - hctRecoverCursorClose(p, &csr); - - if( rc==SQLITE_OK ){ - rc = btreeFlushData(p, 0); - } - sqlite3HctDbRollbackMode(p->pHctDb, 0); - if( rc==SQLITE_OK && p->pHctJrnl ){ - rc = sqlite3HctJrnlRollbackEntry(p->pHctJrnl, rdr.iTid); - } - sqlite3HctDbRecoverTid(p->pHctDb, 0); - } - - if( rc==SQLITE_OK ){ - /* TODO!!! */ - unlink(zFile); - } - hctLogReaderClose(&rdr); - return rc; -} - -static int hctRecoverLogs(HBtree *p){ - HctFile *pFile = sqlite3HctDbFile(p->pHctDb); - return sqlite3HctFileFindLogs(pFile, (void*)p, hctRecoverOne); -} - - -/* -** Free a pLog object and close the associated log file handle. If parameter -** bUnlink is true, also unlink() the log file. -*/ -static void hctLogFileClose(HctLogFile *pLog, int bUnlink){ - if( pLog ){ - close(pLog->fd); - if( bUnlink ) unlink(pLog->zLogFile); - sqlite3_free(pLog->zLogFile); - sqlite3_free(pLog->aBuf); - sqlite3_free(pLog); - } -} - -/* -** Open a log file object. -*/ -static int hctLogFileOpen(char *zLogFile, int nBuf, HctLogFile **ppLog){ - int rc = SQLITE_OK; - HctLogFile *pLog; - - pLog = (HctLogFile*)sqlite3HctMalloc(&rc, sizeof(HctLogFile)); - if( pLog ){ - pLog->zLogFile = zLogFile; - pLog->fd = open(zLogFile, O_CREAT|O_RDWR, 0644); - if( pLog->fd<0 ){ - rc = SQLITE_CANTOPEN_BKPT; - }else{ - pLog->nBuf = nBuf; - pLog->aBuf = sqlite3HctMalloc(&rc, nBuf); - } - } - - if( rc!=SQLITE_OK ){ - hctLogFileClose(pLog, 0); - pLog = 0; - } - - *ppLog = pLog; - return rc; -} - -static int hctLogFileWrite(HctLogFile *pLog, const void *aData, int nData){ - int nRem = nData; - const u8 *aRem = (u8*)aData; - - assert( pLog->iBufferOff<=pLog->nBuf ); - while( 1 ){ - - int nCopy = MIN(pLog->nBuf - pLog->iBufferOff, nRem); - if( nCopy>0 ){ - memcpy(&pLog->aBuf[pLog->iBufferOff], aRem, nCopy); - pLog->iBufferOff += nCopy; - nRem -= nCopy; - if( nRem==0 ) break; - aRem += nCopy; - } - - if( write(pLog->fd, pLog->aBuf, pLog->nBuf)!=pLog->nBuf ){ - return sqlite3HctIoerr(SQLITE_IOERR_WRITE); - } - pLog->iFileOff += pLog->nBuf; - pLog->iBufferOff = 0; - } - - return SQLITE_OK; -} - - -static void hctLogFileRestart(HctLogFile *pLog){ - memset(pLog->aBuf, 0, 8); - lseek(pLog->fd, 0, SEEK_SET); - pLog->iFileOff = 0; - pLog->iBufferOff = 8; -} - - -static int hctLogFileWriteTid(HctLogFile *pLog, u64 iTid){ - lseek(pLog->fd, 0, SEEK_SET); - if( write(pLog->fd, &iTid, sizeof(iTid))!=sizeof(iTid) ){ - return sqlite3HctIoerr(SQLITE_IOERR_WRITE); - } - return SQLITE_OK; -} - -static int hctLogFileFinish(HctLogFile *pLog, u64 iTid){ - int rc = SQLITE_OK; - int bDone = 0; - if( pLog->iFileOff==0 ){ - bDone = 1; - memcpy(pLog->aBuf, &iTid, sizeof(iTid)); - } - if( rc==SQLITE_OK ){ - static const u8 aZero[8] = {0,0,0,0, 0,0,0,0}; - rc = hctLogFileWrite(pLog, aZero, sizeof(aZero)); - if( rc==SQLITE_OK ){ - assert( pLog->iBufferOff>0 ); - if( write(pLog->fd, pLog->aBuf, pLog->iBufferOff)!=pLog->iBufferOff ){ - rc = sqlite3HctIoerr(SQLITE_IOERR_WRITE); - } - } - } - if( bDone==0 && rc==SQLITE_OK ){ - rc = hctLogFileWriteTid(pLog, iTid); - } - return rc; -} - -static int btreeLogFileZero(HctLogFile *pLog){ - return hctLogFileWriteTid(pLog, 0); -} - - -/* -** Open a database file. -** -** zFilename is the name of the database file. If zFilename is NULL -** then an ephemeral database is created. The ephemeral database might -** be exclusively in memory, or it might use a disk-based memory cache. -** Either way, the ephemeral database will be automatically deleted -** when sqlite3HctBtreeClose() is called. -** -** If zFilename is ":memory:" then an in-memory database is created -** that is automatically destroyed when it is closed. -** -** The "flags" parameter is a bitmask that might contain bits like -** BTREE_OMIT_JOURNAL and/or BTREE_MEMORY. -** -** If the database is already opened in the same database connection -** and we are in shared cache mode, then the open will fail with an -** SQLITE_CONSTRAINT error. We cannot allow two or more BtShared -** objects in the same database connection since doing so will lead -** to problems with locking. -*/ -SQLITE_PRIVATE int sqlite3HctBtreeOpen( - sqlite3_vfs *pVfs, /* VFS to use for this b-tree */ - const char *zFilename, /* Name of the file containing the BTree database */ - sqlite3 *db, /* Associated database handle */ - Btree **ppBtree, /* Pointer to new Btree object written here */ - int flags, /* Options */ - int vfsFlags /* Flags passed through to sqlite3_vfs.xOpen() */ -){ - int rc = SQLITE_OK; - HBtree *pNew; - - assert( (flags & BTREE_SINGLE)==0 && zFilename && zFilename[0] ); - - pNew = (HBtree*)sqlite3_malloc(sizeof(HBtree)); - if( pNew ){ - memset(pNew, 0, sizeof(HBtree)); - pNew->iNextRoot = 2; - pNew->config.db = db; - pNew->openFlags = flags; - pNew->config.nDbFile = HCT_DEFAULT_NDBFILE; - pNew->config.nPageSet = HCT_DEFAULT_NPAGESET; - pNew->config.nTryBeforeUnevict = HCT_DEFAULT_NTRYBEFOREUNEVICT; - pNew->config.nPageScan = HCT_DEFAULT_NPAGESCAN; - pNew->config.szLogChunk = HCT_DEFAULT_SZLOGCHUNK; - pNew->config.pgsz = HCT_DEFAULT_PAGESIZE; - rc = sqlite3HctTreeNew(&pNew->pHctTree); - pNew->pFakePager = (Pager*)sqlite3HctMalloc(&rc, 4096); - }else{ - rc = SQLITE_NOMEM; - } - - if( rc==SQLITE_OK && zFilename && zFilename[0] ){ - pNew->pHctDb = sqlite3HctDbOpen(&rc, zFilename, &pNew->config); - } - - if( rc!=SQLITE_OK ){ - sqlite3HctBtreeClose((Btree*)pNew); - pNew = 0; - } - *ppBtree = (Btree*)pNew; - return rc; -} - -/* -** Close an open database and invalidate all cursors. -*/ -SQLITE_PRIVATE int sqlite3HctBtreeClose(Btree *pBt){ - HBtree *const p = (HBtree*)pBt; - if( p ){ - while(p->pCsrList){ - sqlite3HctBtreeCloseCursor((BtCursor*)p->pCsrList); - } - hctLogFileClose(p->pLog, 1); - sqlite3HctBtreeRollback((Btree*)p, SQLITE_OK, 0); - sqlite3HctBtreeCommit((Btree*)p); - if( p->xSchemaFree ){ - p->xSchemaFree(p->pSchema); - } - sqlite3_free(p->pSchema); - sqlite3HctJournalClose(p->pHctJrnl); - sqlite3HctTreeFree(p->pHctTree); - sqlite3HctDbClose(p->pHctDb); - sqlite3_free(p->aSchemaOp); - sqlite3_free(p->pFakePager); - sqlite3_free(p); - } - return SQLITE_OK; -} - -/* -** Change the "soft" limit on the number of pages in the cache. -** Unused and unmodified pages will be recycled when the number of -** pages in the cache exceeds this soft limit. But the size of the -** cache is allowed to grow larger than this limit if it contains -** dirty pages or pages still in active use. -*/ -SQLITE_PRIVATE int sqlite3HctBtreeSetCacheSize(Btree *p, int mxPage){ - /* no-op in hct */ - return SQLITE_OK; -} - -/* -** Change the "spill" limit on the number of pages in the cache. -** If the number of pages exceeds this limit during a write transaction, -** the pager might attempt to "spill" pages to the journal early in -** order to free up memory. -** -** The value returned is the current spill size. If zero is passed -** as an argument, no changes are made to the spill size setting, so -** using mxPage of 0 is a way to query the current spill size. -*/ -SQLITE_PRIVATE int sqlite3HctBtreeSetSpillSize(Btree *p, int mxPage){ - return 1024; -} - -#if SQLITE_MAX_MMAP_SIZE>0 -/* -** Change the limit on the amount of the database file that may be -** memory mapped. -*/ -SQLITE_PRIVATE int sqlite3HctBtreeSetMmapLimit(Btree *p, sqlite3_int64 szMmap){ - /* assert( 0 ); */ - return SQLITE_OK; -} -#endif /* SQLITE_MAX_MMAP_SIZE>0 */ - -/* -** Change the way data is synced to disk in order to increase or decrease -** how well the database resists damage due to OS crashes and power -** failures. Level 1 is the same as asynchronous (no syncs() occur and -** there is a high probability of damage) Level 2 is the default. There -** is a very low but non-zero probability of damage. Level 3 reduces the -** probability of damage to near zero but with a write performance reduction. -*/ -#ifndef SQLITE_OMIT_PAGER_PRAGMAS -SQLITE_PRIVATE int sqlite3HctBtreeSetPagerFlags( - Btree *p, /* The btree to set the safety level on */ - unsigned pgFlags /* Various PAGER_* flags */ -){ - /* HCT - does this need fixing? */ - return SQLITE_OK; -} -#endif - -/* -** Change the default pages size and the number of reserved bytes per page. -** Or, if the page size has already been fixed, return SQLITE_READONLY -** without changing anything. -** -** The page size must be a power of 2 between 512 and 65536. If the page -** size supplied does not meet this constraint then the page size is not -** changed. -** -** Page sizes are constrained to be a power of two so that the region -** of the database file used for locking (beginning at PENDING_BYTE, -** the first byte past the 1GB boundary, 0x40000000) needs to occur -** at the beginning of a page. -** -** If parameter nReserve is less than zero, then the number of reserved -** bytes per page is left unchanged. -** -** If the iFix!=0 then the BTS_PAGESIZE_FIXED flag is set so that the page size -** and autovacuum mode can no longer be changed. -*/ -SQLITE_PRIVATE int sqlite3HctBtreeSetPageSize(Btree *pBt, int pgsz, int nReserve, int iFix){ - HBtree *const p = (HBtree*)pBt; - int rc = SQLITE_READONLY; - if( p->pHctDb && pgsz>=512 && pgsz<=32768 && 0==(pgsz & (pgsz-1)) ){ - int orig = sqlite3HctDbPagesize(p->pHctDb); - if( orig==0 ){ - p->config.pgsz = pgsz; - rc = SQLITE_OK; - } - } - return rc; -} - -/* -** Return the currently defined page size -*/ -SQLITE_PRIVATE int sqlite3HctBtreeGetPageSize(Btree *pBt){ - HBtree *const p = (HBtree*)pBt; - int pgsz = 1024; - if( p->pHctDb ){ - pgsz = sqlite3HctDbPagesize(p->pHctDb); - if( pgsz==0 ){ - pgsz = p->config.pgsz; - } - } - p->config.pgsz = pgsz; - return pgsz; -} - -/* -** This function is similar to sqlite3HctBtreeGetReserve(), except that it -** may only be called if it is guaranteed that the b-tree mutex is already -** held. -** -** This is useful in one special case in the backup API code where it is -** known that the shared b-tree mutex is held, but the mutex on the -** database handle that owns *p is not. In this case if sqlite3HctBtreeEnter() -** were to be called, it might collide with some other operation on the -** database handle that owns *p, causing undefined behavior. -*/ -SQLITE_PRIVATE int sqlite3HctBtreeGetReserveNoMutex(Btree *p){ - assert( 0 ); - return 0; -} - -/* -** Return the number of bytes of space at the end of every page that -** are intentually left unused. This is the "reserved" space that is -** sometimes used by extensions. -** -** The value returned is the larger of the current reserve size and -** the latest reserve size requested by SQLITE_FILECTRL_RESERVE_BYTES. -** The amount of reserve can only grow - never shrink. -*/ -SQLITE_PRIVATE int sqlite3HctBtreeGetRequestedReserve(Btree *p){ - return 0; -} - - -/* -** Set the maximum page count for a database if mxPage is positive. -** No changes are made if mxPage is 0 or negative. -** Regardless of the value of mxPage, return the maximum page count. -*/ -SQLITE_PRIVATE Pgno sqlite3HctBtreeMaxPageCount(Btree *p, Pgno mxPage){ - return 0xFFFFFFFF; -} - -/* -** Change the values for the BTS_SECURE_DELETE and BTS_OVERWRITE flags: -** -** newFlag==0 Both BTS_SECURE_DELETE and BTS_OVERWRITE are cleared -** newFlag==1 BTS_SECURE_DELETE set and BTS_OVERWRITE is cleared -** newFlag==2 BTS_SECURE_DELETE cleared and BTS_OVERWRITE is set -** newFlag==(-1) No changes -** -** This routine acts as a query if newFlag is less than zero -** -** With BTS_OVERWRITE set, deleted content is overwritten by zeros, but -** freelist leaf pages are not written back to the database. Thus in-page -** deleted content is cleared, but freelist deleted content is not. -** -** With BTS_SECURE_DELETE, operation is like BTS_OVERWRITE with the addition -** that freelist leaf pages are written back into the database, increasing -** the amount of disk I/O. -*/ -SQLITE_PRIVATE int sqlite3HctBtreeSecureDelete(Btree *p, int newFlag){ - return 0; -} - -/* -** Change the 'auto-vacuum' property of the database. If the 'autoVacuum' -** parameter is non-zero, then auto-vacuum mode is enabled. If zero, it -** is disabled. The default value for the auto-vacuum property is -** determined by the SQLITE_DEFAULT_AUTOVACUUM macro. -*/ -SQLITE_PRIVATE int sqlite3HctBtreeSetAutoVacuum(Btree *p, int autoVacuum){ - return SQLITE_OK; -} - -/* -** Return the value of the 'auto-vacuum' property. If auto-vacuum is -** enabled 1 is returned. Otherwise 0. -*/ -SQLITE_PRIVATE int sqlite3HctBtreeGetAutoVacuum(Btree *p){ - /* hct is never in auto-vacuum mode */ - return 0; -} - -/* -** Initialize the first page of the database file (creating a database -** consisting of a single page and no schema objects). Return SQLITE_OK -** if successful, or an SQLite error code otherwise. -*/ -SQLITE_PRIVATE int sqlite3HctBtreeNewDb(Btree *p){ - int rc = SQLITE_OK; - assert( 0 ); - return rc; -} - -static int hctDetectJournals(HBtree *p){ - int rc = SQLITE_OK; - if( p->pHctJrnl==0 ){ - rc = sqlite3HctJournalNewIf( - (Schema*)p->pSchema, p->pHctTree, p->pHctDb, &p->pHctJrnl - ); - } - return rc; -} - -/* -** This is called by sqlite3_hct_journal_init() after the journal and -** baseline tables have been created in the database to initialize the -** journal sub-system. -** -** Return SQLITE_OK if successful, or an SQLite error code if an error -** occurs. -*/ -SQLITE_PRIVATE int sqlite3HctDetectJournals(sqlite3 *db){ - HBtree *p = (HBtree*)db->aDb[0].pBt; - int rc = hctDetectJournals(p); - if( rc==SQLITE_OK ){ - rc = sqlite3HctDbStartRead(p->pHctDb, 0); - } - if( rc==SQLITE_OK ){ - rc = sqlite3HctJrnlRecovery(p->pHctJrnl, p->pHctDb); - } - sqlite3HctDbEndRead(p->pHctDb); - return rc; -} - -typedef struct HctFreelistCtx HctFreelistCtx; -struct HctFreelistCtx { - /* Physical pages that need to be preserved for log and journal rollback */ - int nAlloc; - int nPg; - i64 *aPg; - - /* Root pages in the current schema */ - int nRootAlloc; - int nRoot; - i64 *aRoot; - - HBtree *p; -}; - -static int hctTopDownMerge( - i64 *aB, - int iBegin1, int iEnd1, - int iBegin2, int iEnd2, - i64 *aA -){ - int i = iBegin1; - int j = iBegin2; - int k; - for(k=iBegin1; i=iEnd2 || aA[i]<=aA[j]) ){ - if( j1 ){ - int iMid = (iEnd + iBegin) / 2; - int i1 = hctTopDownSplitMerge(aA, iBegin, iMid, aB); - int i2 = hctTopDownSplitMerge(aA, iMid, iEnd, aB); - return hctTopDownMerge(aB, iBegin, i1, iMid, i2, aA); - } - return iEnd; -} - -/* -** Sort the array of aPg[] page numbers in ascending order. Discard -** any duplicates. -*/ -static void hctFreelistSort(int *pRc, HctFreelistCtx *p){ - if( *pRc==SQLITE_OK && p->nPg>1 ){ - i64 *aWork = (i64*)sqlite3HctMalloc(pRc, p->nPg * sizeof(i64)); - if( aWork ){ - memcpy(aWork, p->aPg, p->nPg * sizeof(i64)); - p->nPg = hctTopDownSplitMerge(p->aPg, 0, p->nPg, aWork); - sqlite3_free(aWork); -#ifdef SQLITE_DEBUG - { - int ii; - for(ii=1; iinPg; ii++){ - assert( p->aPg[ii]>p->aPg[ii-1] ); - } - } -#endif - } - } -} - -static int hctSavePhysical(void *pCtx, i64 iPhys){ - HctFreelistCtx *p = (HctFreelistCtx*)pCtx; - if( p->nPg==p->nAlloc ){ - int nNew = (p->nPg>0) ? p->nPg * 4 : 64; - i64 *aNew = (i64*)sqlite3_realloc(p->aPg, nNew*sizeof(i64));; - if( aNew==0 ) return SQLITE_NOMEM; - p->aPg = aNew; - p->nAlloc = nNew; - } - p->aPg[p->nPg++] = iPhys; - return SQLITE_OK; -} - -static int hctScanOne(void *pCtx, const char *zFile){ - HctFreelistCtx *p = (HctFreelistCtx*)pCtx; - int rc = SQLITE_OK; - HctLogReader rdr; - - sqlite3HctDbSetSavePhysical(p->p->pHctDb, hctSavePhysical, pCtx); - - rc = hctLogReaderOpen(zFile, &rdr); - if( rc==SQLITE_OK && rdr.bEof==0 ){ - u32 iPrevRoot =0; - RecoverCsr csr; - memset(&csr, 0, sizeof(csr)); - sqlite3HctDbRecoverTid(p->p->pHctDb, rdr.iTid); - for(/* no-op */; rc==SQLITE_OK && rdr.bEof==0; hctLogReaderNext(&rdr)){ - - if( rdr.iRoot!=iPrevRoot ){ - hctRecoverCursorClose(p->p, &csr); - rc = hctRecoverCursorOpen(p->p, rdr.iRoot, &csr); - } - - if( rc==SQLITE_OK ){ - int dummy = 0; - if( rdr.nKey ){ - sqlite3VdbeRecordUnpack(csr.pKeyInfo, rdr.nKey, rdr.aKey, csr.pRec); - } - rc = sqlite3HctDbCsrRollbackSeek(csr.pCsr, csr.pRec, rdr.iKey, &dummy); - } - } - - hctRecoverCursorClose(p->p, &csr); - } - - sqlite3HctDbSetSavePhysical(p->p->pHctDb, 0, 0); - hctLogReaderClose(&rdr); - return rc; -} - -static void hctRootpageAdd(int *pRc, HctFreelistCtx *pCtx, i64 iRoot){ - if( *pRc==SQLITE_OK ){ - if( pCtx->nRoot==pCtx->nRootAlloc ){ - int nNew = (pCtx->nRoot>0) ? pCtx->nRoot * 4 : 64; - i64 *aNew = (i64*)sqlite3_realloc(pCtx->aRoot, nNew*sizeof(i64));; - if( aNew==0 ){ - *pRc = SQLITE_NOMEM; - return; - } - pCtx->aRoot = aNew; - pCtx->nRootAlloc = nNew; - } - - pCtx->aRoot[pCtx->nRoot++] = iRoot; - } -} - -/* -** Assemble a list of the root pages in the current schema in the -** pCtx->aRoot[] array. -*/ -static void hctRootpageList(int *pRc, HctFreelistCtx *pCtx){ - Schema *pSchema = (Schema*)pCtx->p->pSchema; - HashElem *pE = 0; - for(pE=sqliteHashFirst(&pSchema->tblHash); pE; pE=sqliteHashNext(pE)){ - Table *pTab = (Table*)sqliteHashData(pE); - Index *pIdx = 0; - hctRootpageAdd(pRc, pCtx, pTab->tnum); - for(pIdx=pTab->pIndex; pIdx; pIdx=pIdx->pNext){ - hctRootpageAdd(pRc, pCtx, pIdx->tnum); - } - } -} - -/* -** This is called as part of recovery, before any log files are rolled back, -** to rebuild the free-page list (or, if you like, to initialize the -** page-manager). This involves the following: -** -** 1) Scanning the sqlite_hct_journal table, if any, from the first hole -** to the last entry to determine the list of physical database pages -** that will be required if sqlite3_hct_journal_rollback() is called. -** -** 2) Scanning each log file that will be rolled back, accumulating a -** list of the physical database pages that will be required to find -** the "old" values required to roll them back. -** -** 3) Scanning the page map, checking for pages with the PHYSICAL_IN_USE -** flag clear. Each such page is added to the free-page list. If the -** page was one of those found in the scans in steps (1) or (2), then -** it is not available for reuse until after tid $TID, and all previous -** tids, have been committed. Otherwise, it is available for reuse -** immediately. -** -** $TID is set to the TID of the next transaction that will be written -** to this database (page-map entry TRANSID_EOF+1). -** -** This is a complicated procedure. -*/ -static int hctRecoverFreeList(HBtree *p){ - HctFreelistCtx ctx; - HctFile *pFile = sqlite3HctDbFile(p->pHctDb); - int rc = SQLITE_OK; - - memset(&ctx, 0, sizeof(ctx)); - ctx.p = p; - - /* If this is a replication database, scan all journal entries that may - ** be rolled back using a call to sqlite3_hct_journal_rollback(). Record - ** the set of physical pages that may be required by this call in the - ** ctx.aPg[] array. */ - if( p->pHctJrnl ){ - void *pCtx = (void*)&ctx; - rc = sqlite3HctJrnlSavePhysical( - p->config.db, p->pHctJrnl, hctSavePhysical, pCtx - ); - } - - /* Also scan any log files, adding the list of physical pages that must - ** be preserved to the ctx.aPg[] array. */ - if( rc==SQLITE_OK ){ - sqlite3HctDbRollbackMode(p->pHctDb, 2); - rc = sqlite3HctFileFindLogs(pFile, (void*)&ctx, hctScanOne); - sqlite3HctDbRollbackMode(p->pHctDb, 0); - } - - /* Sort the list of physical page numbers accumulated above. */ - hctFreelistSort(&rc, &ctx); - - /* Assemble a list of root pages. */ - hctRootpageList(&rc, &ctx); - - /* Scan the page-map, taking into account the physical pages that must - ** be preserved, and the set of root pages in the current db schema. */ - if( rc==SQLITE_OK ){ - rc = sqlite3HctFileRecoverFreelists( - pFile, ctx.nRoot, ctx.aRoot, ctx.nPg, ctx.aPg - ); - } - - sqlite3_free(ctx.aPg); - sqlite3_free(ctx.aRoot); - return rc; -} - -static int hctAttemptRecovery(HBtree *p){ - int rc = SQLITE_OK; - if( p->bRecoveryDone==0 ){ - HctFile *pFile = sqlite3HctDbFile(p->pHctDb); - if( p->pHctDb && sqlite3HctFileStartRecovery(pFile, 0) ){ - p->bRecoveryDone = 1; - rc = hctRecoverFreeList(p); - - if( rc==SQLITE_OK ){ - rc = hctRecoverLogs(p); - } - - if( rc==SQLITE_OK && p->pHctJrnl ){ - sqlite3HctDbRollbackMode(p->pHctDb, 0); - rc = sqlite3HctJrnlRecovery(p->pHctJrnl, p->pHctDb); - } - rc = sqlite3HctDbFinishRecovery(p->pHctDb, 0, rc); - } - - p->bRecoveryDone = (rc==SQLITE_OK); - } - - return rc; -} - -/* -** Attempt to start a new transaction. A write-transaction -** is started if the second argument is nonzero, otherwise a read- -** transaction. If the second argument is 2 or more and exclusive -** transaction is started, meaning that no other process is allowed -** to access the database. A preexisting transaction may not be -** upgraded to exclusive by calling this routine a second time - the -** exclusivity flag only works for a new transaction. -** -** A write-transaction must be started before attempting any -** changes to the database. None of the following routines -** will work unless a transaction is started first: -** -** sqlite3HctBtreeCreateTable() -** sqlite3HctBtreeCreateIndex() -** sqlite3HctBtreeClearTable() -** sqlite3HctBtreeDropTable() -** sqlite3HctBtreeInsert() -** sqlite3HctBtreeDelete() -** sqlite3HctBtreeUpdateMeta() -*/ -SQLITE_PRIVATE int sqlite3HctBtreeBeginTrans(Btree *pBt, int wrflag, int *pSchemaVersion){ - HBtree *const p = (HBtree*)pBt; - int rc = SQLITE_OK; - int req = wrflag ? SQLITE_TXN_WRITE : SQLITE_TXN_READ; - - assert( wrflag==0 || p->pHctDb==0 || pSchemaVersion ); - - if( p->eTrans==SQLITE_TXN_ERROR ) return SQLITE_BUSY_SNAPSHOT; - - if( rc==SQLITE_OK ){ - rc = sqlite3HctDbStartRead(p->pHctDb, p->pHctJrnl); - } - - if( rc==SQLITE_OK && pSchemaVersion ){ - sqlite3HctBtreeGetMeta((Btree*)p, 1, (u32*)pSchemaVersion); - sqlite3HctDbTransIsConcurrent(p->pHctDb, p->config.db->eConcurrent); - } - - if( rc==SQLITE_OK && wrflag ){ - rc = sqlite3HctTreeBegin(p->pHctTree, 1 + p->config.db->nSavepoint); - } - if( rc==SQLITE_OK && p->eTranseTrans = req; - } - return rc; -} - -/* -** This is called just after the schema is loaded for b-tree pBt. -*/ -SQLITE_PRIVATE int sqlite3HctBtreeSchemaLoaded(Btree *pBt){ - int rc = SQLITE_OK; - HBtree *const p = (HBtree*)pBt; - if( p->bRecoveryDone==0 ){ - rc = hctDetectJournals(p); - if( rc==SQLITE_OK ){ - rc = hctAttemptRecovery(p); - } - if( rc==SQLITE_OK ){ - sqlite3HctDbEndRead(p->pHctDb); - } - } - if( rc==SQLITE_OK && p->pHctJrnl ){ - sqlite3HctJournalFixSchema(p->pHctJrnl, p->config.db, p->pSchema); - } - return rc; -} - -/* -** A write-transaction must be opened before calling this function. -** It performs a single unit of work towards an incremental vacuum. -** -** If the incremental vacuum is finished after this function has run, -** SQLITE_DONE is returned. If it is not finished, but no error occurred, -** SQLITE_OK is returned. Otherwise an SQLite error code. -*/ -SQLITE_PRIVATE int sqlite3HctBtreeIncrVacuum(Btree *p){ - return SQLITE_DONE; -} - -/* -** This routine does the first phase of a two-phase commit. This routine -** causes a rollback journal to be created (if it does not already exist) -** and populated with enough information so that if a power loss occurs -** the database can be restored to its original state by playing back -** the journal. Then the contents of the journal are flushed out to -** the disk. After the journal is safely on oxide, the changes to the -** database are written into the database file and flushed to oxide. -** At the end of this call, the rollback journal still exists on the -** disk and we are still holding all locks, so the transaction has not -** committed. See sqlite3HctBtreeCommitPhaseTwo() for the second phase of the -** commit process. -** -** This call is a no-op if no write-transaction is currently active on pBt. -** -** Otherwise, sync the database file for the btree pBt. zSuperJrnl points to -** the name of a super-journal file that should be written into the -** individual journal file, or is NULL, indicating no super-journal file -** (single database transaction). -** -** When this is called, the super-journal should already have been -** created, populated with this journal pointer and synced to disk. -** -** Once this is routine has returned, the only thing required to commit -** the write-transaction for this database file is to delete the journal. -*/ -SQLITE_PRIVATE int sqlite3HctBtreeCommitPhaseOne(Btree *p, const char *zSuperJrnl){ - /* Everything happens in sqlite3HctBtreeCommitPhaseTwo() */ - return SQLITE_OK; -} - -typedef struct FlushOneCtx FlushOneCtx; -struct FlushOneCtx { - HBtree *p; - int bRollback; -}; - -static int btreeFlushOneToDisk(void *pCtx, u32 iRoot, KeyInfo *pKeyInfo){ - FlushOneCtx *pFC = (FlushOneCtx*)pCtx; - HBtree *p = pFC->p; - int iRollbackDir = pFC->bRollback ? -1 : 1; - - HctDatabase *pDb = p->pHctDb; - HctTreeCsr *pCsr = 0; - int rc; - UnpackedRecord *pRec = 0; - - if( pKeyInfo ){ - pRec = sqlite3VdbeAllocUnpackedRecord(pKeyInfo); - if( pRec==0 ) return SQLITE_NOMEM_BKPT; - } - - rc = sqlite3HctTreeCsrOpen(p->pHctTree, iRoot, &pCsr); - if( rc==SQLITE_OK ){ - for(rc=sqlite3HctTreeCsrFirst(pCsr); rc==SQLITE_OK ; /* no-op */){ - int nRetry = 0; - int ii; - i64 iKey = 0; - int nData = 0; - int bDel = 0; - const u8 *aData = 0; - sqlite3HctTreeCsrKey(pCsr, &iKey); - sqlite3HctTreeCsrData(pCsr, &nData, &aData); - bDel = sqlite3HctTreeCsrIsDelete(pCsr); - if( pRec ) sqlite3VdbeRecordUnpack(pKeyInfo, nData, aData, pRec); - rc = sqlite3HctDbInsert(pDb, iRoot, pRec, iKey, bDel,nData,aData,&nRetry); - p->nRollbackOp += (iRollbackDir * (1 - nRetry)); - if( rc ) break; - p->stats.nKeyOp++; - - if( pFC->bRollback && p->nRollbackOp==0 ){ - assert( nRetry==0 ); - rc = sqlite3HctDbInsertFlush(pDb, &nRetry); - if( rc ) break; - if( nRetry==0 ){ - rc = SQLITE_DONE; - break; - } - p->nRollbackOp = nRetry; - if( sqlite3HctTreeCsrEof(pCsr) ){ - sqlite3HctTreeCsrLast(pCsr); - } - } - - if( nRetry==0 ){ - sqlite3HctTreeCsrNext(pCsr); - if( sqlite3HctTreeCsrEof(pCsr) ){ - rc = sqlite3HctDbInsertFlush(pDb, &nRetry); - if( nRetry ){ - sqlite3HctTreeCsrLast(pCsr); - assert( sqlite3HctTreeCsrEof(pCsr)==0 ); - p->nRollbackOp -= (iRollbackDir * nRetry); - }else{ - /* Done - the table has been successfully flushed to disk */ - break; - } - } - }else{ - p->stats.nRetry++; - p->stats.nRetryKey += nRetry; - } - for(ii=1; iidb, pRec); - } - return rc; -} - -static int btreeLogIntkey(HctLogFile *pLog, u32 iRoot, i64 iRowid){ - u8 aBuf[16]; - memcpy(&aBuf[0], &iRoot, sizeof(u32)); - memset(&aBuf[4], 0, sizeof(u32)); - memcpy(&aBuf[8], &iRowid, sizeof(i64)); - return hctLogFileWrite(pLog, aBuf, sizeof(aBuf)); -} - -static int btreeLogIndex( - HctLogFile *pLog, - u32 iRoot, - const u8 *aData, int nData -){ - if( hctLogFileWrite(pLog, &iRoot, sizeof(iRoot)) - || hctLogFileWrite(pLog, &nData, sizeof(nData)) - || hctLogFileWrite(pLog, aData, nData) - ){ - return sqlite3HctIoerr(SQLITE_IOERR_WRITE); - } - return SQLITE_OK; -} - -static int btreeLogOneToDisk(void *pCtx, u32 iRoot, KeyInfo *pKeyInfo){ - HBtree *p = (HBtree*)pCtx; - HctTreeCsr *pCsr = 0; - int rc; - - rc = sqlite3HctTreeCsrOpen(p->pHctTree, iRoot, &pCsr); - if( rc==SQLITE_OK ){ - for(rc=sqlite3HctTreeCsrFirst(pCsr); - rc==SQLITE_OK && sqlite3HctTreeCsrEof(pCsr)==0; - rc=sqlite3HctTreeCsrNext(pCsr) - ){ - if( pKeyInfo ){ - int nData = 0; - const u8 *aData = 0; - sqlite3HctTreeCsrData(pCsr, &nData, &aData); - rc = btreeLogIndex(p->pLog, iRoot, aData, nData); - }else{ - i64 iRowid = 0; - sqlite3HctTreeCsrKey(pCsr, &iRowid); - rc = btreeLogIntkey(p->pLog, iRoot, iRowid); - } - - if( rc!=SQLITE_OK ) break; - } - sqlite3HctTreeCsrClose(pCsr); - } - - return rc; -} - -static int btreeFlushData(HBtree *p, int bRollback){ - int rc = SQLITE_OK; - - if( bRollback ) sqlite3HctDbRollbackMode(p->pHctDb, 1); - if( bRollback && p->nRollbackOp==0 ){ - rc = SQLITE_DONE; - } - - if( rc==SQLITE_OK ){ - FlushOneCtx ctx; - ctx.p = p; - ctx.bRollback = bRollback; - rc = sqlite3HctTreeForeach(p->pHctTree, 0, (void*)&ctx,btreeFlushOneToDisk); - } - if( bRollback ) sqlite3HctDbRollbackMode(p->pHctDb, 0); - return rc; -} - -static int btreeWriteLog(HBtree *p){ - int rc = SQLITE_OK; - - if( p->pLog==0 ){ - char *zLog = sqlite3HctDbLogFile(p->pHctDb); - if( zLog==0 ){ - rc = SQLITE_NOMEM_BKPT; - }else{ - rc = hctLogFileOpen(zLog, p->config.szLogChunk, &p->pLog); - } - } - - if( rc==SQLITE_OK ){ - hctLogFileRestart(p->pLog); - rc = sqlite3HctTreeForeach(p->pHctTree, 0, (void*)p, btreeLogOneToDisk); - } - - return rc; -} - -/* -** Flush the contents of Btree.pHctTree to Btree.pHctDb. -*/ -static int btreeFlushToDisk(HBtree *p){ - int i; - int rc = SQLITE_OK; - int rcok = SQLITE_OK; - u64 iTid = 0; - u64 iCid = 0; - int bTmapScan = 0; - int bCustomValid = 0; /* True if xValidate() was invoked */ - - /* Write a log file for this transaction. The TID field is still set - ** to zero at this point. */ - if( p->config.db->bHctMigrate==0 ){ - rc = btreeWriteLog(p); - } - - if( rc==SQLITE_OK ){ - /* Obtain the TID for this transaction. */ - iTid = sqlite3HctJrnlWriteTid(p->pHctJrnl, &iCid); - if( iTid==0 ){ - sqlite3HctDbStartWrite(p->pHctDb, &iTid); - } - - /* Invoke the SQLITE_TESTCTRL_HCT_MTCOMMIT hook, if applicable */ - if( p->config.db->xMtCommit ){ - p->config.db->xMtCommit(p->config.db->pMtCommitCtx, 0); - } - - assert( iTid>0 ); - if( p->pLog ) rc = hctLogFileFinish(p->pLog, iTid); - } - - /* Initialize the root pages of any new tables or indexes created by this - ** transaction. At this point the logical root page numbers have been - ** assigned by the page-manager, but there is no mapped physical page, - ** and the LOGICAL_IN_USE and LOGICAL_IS_ROOT flags are not yet set - ** for the page. This allocates and populates the physical root page, - ** and sets the two flags on the logical page slot. - ** - ** If the current transaction does not commit (i.e. failed validiation), - ** then the new tree is returned to the page-manage to be recycled - ** immediately. Or, if a crash occurs, then recovery will see the - ** LOGICAL_IS_ROOT flag on a root page that is not in the sqlite_schema - ** table and free the pages then. */ - for(i=0; rc==SQLITE_OK && inSchemaOp; i++){ - BtSchemaOp *pOp = &p->aSchemaOp[i]; - assert( - pOp->eSchemaOp==HCT_SCHEMAOP_DROP - || pOp->eSchemaOp==HCT_SCHEMAOP_CREATE_INTKEY - || pOp->eSchemaOp==HCT_SCHEMAOP_CREATE_INDEX - ); - if( pOp->eSchemaOp!=HCT_SCHEMAOP_DROP ){ - int bIndex = (pOp->eSchemaOp==HCT_SCHEMAOP_CREATE_INDEX); - rc = sqlite3HctDbRootInit(p->pHctDb, bIndex, pOp->pgnoRoot); - } - } - - /* Write all the new database entries to the database. Any write/write - ** conflicts are detected here - SQLITE_BUSY is returned in that case. */ - p->nRollbackOp = 0; - if( rc==SQLITE_OK ){ - rc = btreeFlushData(p, 0); - } - - /* Assuming the data has been flushed to disk without error or a - ** write/write conflict, allocate a CID and validate the transaction. */ - if( rc==SQLITE_OK ){ - /* Invoke the SQLITE_TESTCTRL_HCT_MTCOMMIT hook, if applicable */ - if( p->config.db->xMtCommit ){ - p->config.db->xMtCommit(p->config.db->pMtCommitCtx, 1); - } - - /* Validate the transaction */ - rc = sqlite3HctDbValidate(p->config.db, p->pHctDb, &iCid, &bTmapScan); - - /* If validation passed and this database is configured for replication, - ** write the journal entry and invoke the custom validation hook */ - if( rc==SQLITE_OK && p->pHctJrnl ){ - rc = sqlite3HctJrnlLog( - p->pHctJrnl, - p->config.db, - (Schema*)p->pSchema, - iCid, iTid, &bCustomValid - ); - } - } - - /* If conflicts have been detected, roll back the transaction */ - assert( rc!=SQLITE_BUSY ); - if( rc==SQLITE_BUSY_SNAPSHOT ){ - rcok = SQLITE_BUSY_SNAPSHOT; - rc = btreeFlushData(p, 1); - if( rc==SQLITE_DONE ) rc = SQLITE_OK; - if( iCid>0 && p->pHctJrnl ){ - rc = sqlite3HctJrnlWriteEmpty(p->pHctJrnl, iCid, iTid, - (bCustomValid ? 0 : p->config.db) - ); - } - } - - for(i=0; rc==SQLITE_OK && inSchemaOp; i++){ - BtSchemaOp *pOp = &p->aSchemaOp[i]; - if( (rcok==SQLITE_OK && pOp->eSchemaOp==HCT_SCHEMAOP_DROP) - || (rcok!=SQLITE_OK && pOp->eSchemaOp!=HCT_SCHEMAOP_DROP) - ){ - HctFile *pFile = sqlite3HctDbFile(p->pHctDb); - rc = sqlite3HctFileTreeFree(pFile, pOp->pgnoRoot, rcok!=SQLITE_OK); - } - } - - /* Zero the log file and set the entry in the transaction-map to - ** finish the transaction. */ - if( rc==SQLITE_OK && p->pLog ){ - rc = btreeLogFileZero(p->pLog); - } - if( rc==SQLITE_OK ){ - rc = sqlite3HctDbEndWrite(p->pHctDb, iCid, rcok!=SQLITE_OK); - } - assert( rc==SQLITE_OK ); - if( bTmapScan ){ - sqlite3HctDbTMapScan(p->pHctDb); - } - - sqlite3HctJrnlInvokeHook(p->pHctJrnl, p->config.db); - return (rc==SQLITE_OK ? rcok : rc); -} - -static void hctEndTransaction(HBtree *p){ - if( p->eTrans>SQLITE_TXN_NONE - && p->pCsrList==0 - && p->config.db->nVdbeRead<=1 - ){ - if( p->pHctDb ){ - sqlite3HctDbEndRead(p->pHctDb); - } - p->eTrans = SQLITE_TXN_NONE; - p->eMetaState = HCT_METASTATE_NONE; - } -} - - -static int hctBtreeMigrateInsert( - HBtCursor *pCur, - UnpackedRecord *pRec, - i64 iKey, - int nData, - const u8 *aData -){ - int rc = SQLITE_OK; - HBtree *p = pCur->pBtree; - int nRetry = 0; - - if( 0==sqlite3HctDbTid(p->pHctDb) ){ - i64 iDummy = 0; - rc = sqlite3HctDbStartWrite(p->pHctDb, &iDummy); - if( rc!=SQLITE_OK ) return rc; - } - - rc = sqlite3HctDbInsert( - p->pHctDb, - sqlite3HctTreeCsrRoot(pCur->pHctTreeCsr), - pRec, iKey, 0, nData, aData, &nRetry - ); - if( nRetry>0 ){ - rc = SQLITE_ABORT; - } - - return rc; -} - -static int hctBtreeMigrateCommit(HBtree *p){ - int rc = SQLITE_OK; - i64 iCid = 0; - int bTmapScan = 0; - int nRetry = 0; - - rc = sqlite3HctDbInsertFlush(p->pHctDb, &nRetry); - if( nRetry>0 ){ - rc = SQLITE_ABORT; - } - - if( rc==SQLITE_OK ){ - rc = sqlite3HctDbValidate(p->config.db, p->pHctDb, &iCid, &bTmapScan); - } - - if( rc==SQLITE_OK ){ - rc = sqlite3HctDbEndWrite(p->pHctDb, iCid, 0); - } - - if( bTmapScan ){ - sqlite3HctDbTMapScan(p->pHctDb); - } - - return rc; -} - -#define BT_IS_MIGRATE(pBt) (pBt->config.db->bHctMigrate) -#define CSR_IS_MIGRATE(pCsr) (pCsr->pBtree->config.db->bHctMigrate) - -/* -** Commit the transaction currently in progress. -** -** This routine implements the second phase of a 2-phase commit. The -** sqlite3HctBtreeCommitPhaseOne() routine does the first phase and should -** be invoked prior to calling this routine. The sqlite3HctBtreeCommitPhaseOne() -** routine did all the work of writing information out to disk and flushing the -** contents so that they are written onto the disk platter. All this -** routine has to do is delete or truncate or zero the header in the -** the rollback journal (which causes the transaction to commit) and -** drop locks. -** -** Normally, if an error occurs while the pager layer is attempting to -** finalize the underlying journal file, this function returns an error and -** the upper layer will attempt a rollback. However, if the second argument -** is non-zero then this b-tree transaction is part of a multi-file -** transaction. In this case, the transaction has already been committed -** (by deleting a super-journal file) and the caller will ignore this -** functions return code. So, even if an error occurs in the pager layer, -** reset the b-tree objects internal state to indicate that the write -** transaction has been closed. This is quite safe, as the pager will have -** transitioned to the error state. -** -** This will release the write lock on the database file. If there -** are no active cursors, it also releases the read lock. -*/ -SQLITE_PRIVATE int sqlite3HctBtreeCommitPhaseTwo(Btree *pBt, int bCleanup){ - HBtree *const p = (HBtree*)pBt; - int rc = SQLITE_OK; - - if( p->eTrans==SQLITE_TXN_ERROR ) return SQLITE_BUSY_SNAPSHOT; - - if( BT_IS_MIGRATE(p) ){ - rc = hctBtreeMigrateCommit(p); - }else{ - if( p->eTrans==SQLITE_TXN_WRITE ){ - if( p->pCsrList ){ - /* Cannot commit with open cursors in hctree */ - return SQLITE_LOCKED; - } - - sqlite3HctTreeRelease(p->pHctTree, 0); - if( p->pHctDb ){ - rc = btreeFlushToDisk(p); - sqlite3HctTreeClear(p->pHctTree); - p->nSchemaOp = 0; - } - p->eTrans = SQLITE_TXN_READ; - } - } - - if( rc==SQLITE_OK ){ - hctEndTransaction(p); - }else{ - p->eTrans = SQLITE_TXN_ERROR; - } - return rc; -} - -/* -** Do both phases of a commit. -*/ -SQLITE_PRIVATE int sqlite3HctBtreeCommit(Btree *pBt){ - int rc; - HBtree *const p = (HBtree*)pBt; - rc = sqlite3HctBtreeCommitPhaseOne((Btree*)p, 0); - if( rc==SQLITE_OK ){ - rc = sqlite3HctBtreeCommitPhaseTwo((Btree*)p, 0); - } - return rc; -} - -/* -** This routine sets the state to CURSOR_FAULT and the error -** code to errCode for every cursor on any BtShared that pBtree -** references. Or if the writeOnly flag is set to 1, then only -** trip write cursors and leave read cursors unchanged. -** -** Every cursor is a candidate to be tripped, including cursors -** that belong to other database connections that happen to be -** sharing the cache with pBtree. -** -** This routine gets called when a rollback occurs. If the writeOnly -** flag is true, then only write-cursors need be tripped - read-only -** cursors save their current positions so that they may continue -** following the rollback. Or, if writeOnly is false, all cursors are -** tripped. In general, writeOnly is false if the transaction being -** rolled back modified the database schema. In this case b-tree root -** pages may be moved or deleted from the database altogether, making -** it unsafe for read cursors to continue. -** -** If the writeOnly flag is true and an error is encountered while -** saving the current position of a read-only cursor, all cursors, -** including all read-cursors are tripped. -** -** SQLITE_OK is returned if successful, or if an error occurs while -** saving a cursor position, an SQLite error code. -*/ -SQLITE_PRIVATE int sqlite3HctBtreeTripAllCursors(Btree *pBt, int errCode, int writeOnly){ - HBtree *const p = (HBtree*)pBt; - int rc = SQLITE_OK; - if( p ){ - HBtCursor *pCur; - for(pCur=p->pCsrList; pCur; pCur=pCur->pCsrNext){ - if( writeOnly==0 || pCur->wrFlag ){ - sqlite3HctTreeCsrClose(pCur->pHctTreeCsr); - pCur->pHctTreeCsr = 0; - pCur->errCode = errCode; - } - } - } - return rc; -} - -/* -** Rollback the transaction in progress. -** -** If tripCode is not SQLITE_OK then cursors will be invalidated (tripped). -** Only write cursors are tripped if writeOnly is true but all cursors are -** tripped if writeOnly is false. Any attempt to use -** a tripped cursor will result in an error. -** -** This will release the write lock on the database file. If there -** are no active cursors, it also releases the read lock. -*/ -SQLITE_PRIVATE int sqlite3HctBtreeRollback(Btree *pBt, int tripCode, int writeOnly){ - HBtree *const p = (HBtree*)pBt; - - assert( SQLITE_TXN_ERROR==4 && SQLITE_TXN_WRITE==2 ); - assert( SQLITE_TXN_READ==1 && SQLITE_TXN_NONE==0 ); - assert( p->eTrans!=SQLITE_TXN_ERROR || p->pCsrList==0 ); - - if( p->eTrans>=SQLITE_TXN_WRITE ){ - sqlite3HctTreeRollbackTo(p->pHctTree, 0); - if( p->pHctDb ){ - sqlite3HctTreeClear(p->pHctTree); - } - p->eTrans = SQLITE_TXN_READ; - p->nSchemaOp = 0; - } - hctEndTransaction(p); - return SQLITE_OK; -} - -/* -** Start a statement subtransaction. The subtransaction can be rolled -** back independently of the main transaction. You must start a transaction -** before starting a subtransaction. The subtransaction is ended automatically -** if the main transaction commits or rolls back. -** -** Statement subtransactions are used around individual SQL statements -** that are contained within a BEGIN...COMMIT block. If a constraint -** error occurs within the statement, the effect of that one statement -** can be rolled back without having to rollback the entire transaction. -** -** A statement sub-transaction is implemented as an anonymous savepoint. The -** value passed as the second parameter is the total number of savepoints, -** including the new anonymous savepoint, open on the B-Tree. i.e. if there -** are no active savepoints and no other statement-transactions open, -** iStatement is 1. This anonymous savepoint can be released or rolled back -** using the sqlite3HctBtreeSavepoint() function. -*/ -SQLITE_PRIVATE int sqlite3HctBtreeBeginStmt(Btree *pBt, int iStatement){ - HBtree *const p = (HBtree*)pBt; - int rc = SQLITE_OK; - assert( p->eTrans!=SQLITE_TXN_ERROR ); - rc = sqlite3HctTreeBegin(p->pHctTree, iStatement+1); - return rc; -} - -static int btreeRollbackRoot(HBtree *p, int iSavepoint){ - int i; - int rc = SQLITE_OK; - for(i=p->nSchemaOp-1; rc==SQLITE_OK && i>=0; i--){ - if( p->aSchemaOp[i].iSavepoint<=iSavepoint ) break; - rc = sqlite3HctDbRootFree(p->pHctDb, p->aSchemaOp[i].pgnoRoot); - } - p->nSchemaOp = i+1; - return rc; -} - -/* -** The second argument to this function, op, is always SAVEPOINT_ROLLBACK -** or SAVEPOINT_RELEASE. This function either releases or rolls back the -** savepoint identified by parameter iSavepoint, depending on the value -** of op. -** -** Normally, iSavepoint is greater than or equal to zero. However, if op is -** SAVEPOINT_ROLLBACK, then iSavepoint may also be -1. In this case the -** contents of the entire transaction are rolled back. This is different -** from a normal transaction rollback, as no locks are released and the -** transaction remains open. -*/ -SQLITE_PRIVATE int sqlite3HctBtreeSavepoint(Btree *pBt, int op, int iSavepoint){ - HBtree *const p = (HBtree*)pBt; - int rc = SQLITE_OK; - if( p && p->eTrans==SQLITE_TXN_WRITE ){ - int i; - assert( op==SAVEPOINT_ROLLBACK || op==SAVEPOINT_RELEASE ); - if( op==SAVEPOINT_RELEASE ){ - for(i=0; inSchemaOp; i++){ - if( p->aSchemaOp[i].iSavepoint>iSavepoint ){ - p->aSchemaOp[i].iSavepoint = iSavepoint; - } - } - sqlite3HctTreeRelease(p->pHctTree, iSavepoint+1); - }else{ - sqlite3HctTreeRollbackTo(p->pHctTree, iSavepoint+2); - btreeRollbackRoot(p, iSavepoint); - p->eMetaState = HCT_METASTATE_NONE; - } - } - return rc; -} - -SQLITE_PRIVATE int sqlite3HctBtreeIsNewTable(Btree *pBt, u64 iRoot){ - HBtree *const p = (HBtree*)pBt; - int ii; - for(ii=0; iinSchemaOp && p->aSchemaOp[ii].pgnoRoot!=iRoot; ii++); - return iinSchemaOp; -} - -SQLITE_PRIVATE u64 sqlite3HctBtreeSnapshotId(Btree *pBt){ - HBtree *const p = (HBtree*)pBt; - return sqlite3HctDbSnapshotId(p->pHctDb); -} - -/* -** Open a new cursor -*/ -SQLITE_PRIVATE int sqlite3HctBtreeCursor( - Btree *pBt, /* The btree */ - Pgno iTable, /* Root page of table to open */ - int wrFlag, /* 1 to write. 0 read-only */ - struct KeyInfo *pKeyInfo, /* First arg to xCompare() */ - BtCursor *pCursor /* Write new cursor here */ -){ - HBtCursor *const pCur = (HBtCursor*)pCursor; - HBtree *const p = (HBtree*)pBt; - int rc = SQLITE_OK; - int bNosnap = 0; - int bReadonly = sqlite3HctJournalIsReadonly(p->pHctJrnl, iTable, &bNosnap); - - assert( p->eTrans!=SQLITE_TXN_NONE ); - assert( p->eTrans!=SQLITE_TXN_ERROR ); - assert( pCur->pHctTreeCsr==0 ); - assert( BT_IS_MIGRATE(p)==0 || wrFlag ); - - /* If this is an attempt to open a read/write cursor on either the - ** sqlite_hct_journal or sqlite_hct_baseline tables, return an error - ** immediately. */ - if( wrFlag && bReadonly ){ - return SQLITE_READONLY; - } - - pCur->pKeyInfo = pKeyInfo; - rc = sqlite3HctTreeCsrOpen(p->pHctTree, iTable, &pCur->pHctTreeCsr); - if( rc==SQLITE_OK && p->pHctDb ){ - int ii; - for(ii=0; iinSchemaOp && p->aSchemaOp[ii].pgnoRoot!=iTable; ii++); - if( ii==p->nSchemaOp ){ - rc = sqlite3HctDbCsrOpen(p->pHctDb, pKeyInfo, iTable, &pCur->pHctDbCsr); - sqlite3HctDbCsrNosnap(pCur->pHctDbCsr, bNosnap); - } - } - if( rc==SQLITE_OK ){ - pCur->pCsrNext = p->pCsrList; - pCur->pBtree = p; - pCur->wrFlag = wrFlag; - p->pCsrList = pCur; - }else{ - sqlite3HctTreeCsrClose(pCur->pHctTreeCsr); - pCur->pHctTreeCsr = 0; - pCur->pKeyInfo = 0; - } - - return rc; -} - -/* -** Return the size of a BtCursor object in bytes. -** -** This interfaces is needed so that users of cursors can preallocate -** sufficient storage to hold a cursor. The BtCursor object is opaque -** to users so they cannot do the sizeof() themselves - they must call -** this routine. -*/ -SQLITE_PRIVATE int sqlite3HctBtreeCursorSize(void){ - return ROUND8(sizeof(HBtCursor)); -} - -/* -** Initialize memory that will be converted into a BtCursor object. -** -** The simple approach here would be to memset() the entire object -** to zero. But it turns out that the apPage[] and aiIdx[] arrays -** do not need to be zeroed and they are large, so we can save a lot -** of run-time by skipping the initialization of those elements. -*/ -SQLITE_PRIVATE void sqlite3HctBtreeCursorZero(BtCursor *p){ - /* hct takes the simple approach mentioned above */ - memset(p, 0, sizeof(HBtCursor)); -} - -/* -** Close a cursor. The read lock on the database file is released -** when the last cursor is closed. -*/ -SQLITE_PRIVATE int sqlite3HctBtreeCloseCursor(BtCursor *pCursor){ - HBtCursor *const pCur = (HBtCursor*)pCursor; - HBtree *const pBtree = pCur->pBtree; - if( pBtree ){ - HBtCursor **pp; - sqlite3HctTreeCsrClose(pCur->pHctTreeCsr); - sqlite3HctDbCsrClose(pCur->pHctDbCsr); - for(pp=&pBtree->pCsrList; *pp!=pCur; pp=&(*pp)->pCsrNext); - *pp = pCur->pCsrNext; - pCur->pHctTreeCsr = 0; - pCur->pBtree = 0; - pCur->pCsrNext = 0; - if( (pBtree->openFlags & BTREE_SINGLE) && pBtree->pCsrList==0 ){ - sqlite3HctBtreeClose((Btree*)pBtree); - } - } - return SQLITE_OK; -} - -/* -** Return true if the given BtCursor is valid. A valid cursor is one -** that is currently pointing to a row in a (non-empty) table. -** This is a verification routine is used only within assert() statements. -*/ -SQLITE_PRIVATE int sqlite3HctBtreeCursorIsValid(BtCursor *pCursor){ - HBtCursor *const pCur = (HBtCursor*)pCursor; - return pCur && ( - !sqlite3HctTreeCsrEof(pCur->pHctTreeCsr) - || !sqlite3HctDbCsrEof(pCur->pHctDbCsr) - ); -} -SQLITE_PRIVATE int sqlite3HctBtreeCursorIsValidNN(BtCursor *pCursor){ - HBtCursor *const pCur = (HBtCursor*)pCursor; - return ( - !sqlite3HctTreeCsrEof(pCur->pHctTreeCsr) - || !sqlite3HctDbCsrEof(pCur->pHctDbCsr) - ); -} - -/* -** Return the value of the integer key or "rowid" for a table btree. -** This routine is only valid for a cursor that is pointing into a -** ordinary table btree. If the cursor points to an index btree or -** is invalid, the result of this routine is undefined. -*/ -SQLITE_PRIVATE i64 sqlite3HctBtreeIntegerKey(BtCursor *pCursor){ - HBtCursor *const pCur = (HBtCursor*)pCursor; - i64 iKey; - if( pCur->bUseTree ){ - sqlite3HctTreeCsrKey(pCur->pHctTreeCsr, &iKey); - }else{ - sqlite3HctDbCsrKey(pCur->pHctDbCsr, &iKey); - } - return iKey; -} - -/* -** Pin or unpin a cursor. -*/ -SQLITE_PRIVATE void sqlite3HctBtreeCursorPin(BtCursor *pCursor){ - HBtCursor *const pCur = (HBtCursor*)pCursor; - sqlite3HctTreeCsrPin(pCur->pHctTreeCsr); -} -SQLITE_PRIVATE void sqlite3HctBtreeCursorUnpin(BtCursor *pCursor){ - HBtCursor *const pCur = (HBtCursor*)pCursor; - sqlite3HctTreeCsrUnpin(pCur->pHctTreeCsr); -} - -#ifdef SQLITE_ENABLE_OFFSET_SQL_FUNC -/* -** Return the offset into the database file for the start of the -** payload to which the cursor is pointing. -*/ -SQLITE_PRIVATE i64 sqlite3HctBtreeOffset(BtCursor *pCur){ - assert( 0 ); - return 0; -} -#endif /* SQLITE_ENABLE_OFFSET_SQL_FUNC */ - -/* -** Return the number of bytes of payload for the entry that pCur is -** currently pointing to. For table btrees, this will be the amount -** of data. For index btrees, this will be the size of the key. -** -** The caller must guarantee that the cursor is pointing to a non-NULL -** valid entry. In other words, the calling procedure must guarantee -** that the cursor has Cursor.eState==CURSOR_VALID. -*/ -SQLITE_PRIVATE u32 sqlite3HctBtreePayloadSize(BtCursor *pCursor){ - HBtCursor *const pCur = (HBtCursor*)pCursor; - int nData; - if( pCur->bUseTree ){ - sqlite3HctTreeCsrData(pCur->pHctTreeCsr, &nData, 0); - }else{ - sqlite3HctDbCsrData(pCur->pHctDbCsr, &nData, 0); - } - return nData; -} - -/* -** Return an upper bound on the size of any record for the table -** that the cursor is pointing into. -** -** This is an optimization. Everything will still work if this -** routine always returns 2147483647 (which is the largest record -** that SQLite can handle) or more. But returning a smaller value might -** prevent large memory allocations when trying to interpret a -** corrupt datrabase. -** -** The current implementation merely returns the size of the underlying -** database file. -*/ -SQLITE_PRIVATE sqlite3_int64 sqlite3HctBtreeMaxRecordSize(BtCursor *pCur){ - assert( 0 ); - return 0x7FFFFFFF; -} - -/* -** Read part of the payload for the row at which that cursor pCur is currently -** pointing. "amt" bytes will be transferred into pBuf[]. The transfer -** begins at "offset". -** -** pCur can be pointing to either a table or an index b-tree. -** If pointing to a table btree, then the content section is read. If -** pCur is pointing to an index b-tree then the key section is read. -** -** For sqlite3HctBtreePayload(), the caller must ensure that pCur is pointing -** to a valid row in the table. For sqlite3HctBtreePayloadChecked(), the -** cursor might be invalid or might need to be restored before being read. -** -** Return SQLITE_OK on success or an error code if anything goes -** wrong. An error is returned if "offset+amt" is larger than -** the available payload. -*/ -SQLITE_PRIVATE int sqlite3HctBtreePayload(BtCursor *pCur, u32 offset, u32 amt, void *pBuf){ - u32 n = 0; - const u8 *p = 0; - - p = (const u8*)sqlite3HctBtreePayloadFetch(pCur, &n); - assert( offset+amt<=n ); - memcpy(pBuf, &p[offset], amt); - - return SQLITE_OK; -} - - -static int btreeSetUseTree(HBtCursor *pCur){ - int rc = SQLITE_OK; - int bTreeEof = sqlite3HctTreeCsrEof(pCur->pHctTreeCsr); - int bDbEof = sqlite3HctDbCsrEof(pCur->pHctDbCsr); - - assert( pCur->eDir==BTREE_DIR_FORWARD || pCur->eDir==BTREE_DIR_REVERSE ); - assert( pCur->pHctTreeCsr ); - - if( bTreeEof ){ - pCur->bUseTree = 0; - }else if( bDbEof ){ - pCur->bUseTree = 1; - }else if( pCur->pKeyInfo==0 ){ - i64 iKeyTree; - i64 iKeyDb; - - sqlite3HctTreeCsrKey(pCur->pHctTreeCsr, &iKeyTree); - sqlite3HctDbCsrKey(pCur->pHctDbCsr, &iKeyDb); - - if( iKeyTree==iKeyDb ){ - pCur->bUseTree = 2; - }else{ - pCur->bUseTree = (iKeyTree < iKeyDb); - if( pCur->eDir==BTREE_DIR_REVERSE ) pCur->bUseTree = !pCur->bUseTree; - } - }else{ - UnpackedRecord *pKeyDb = 0; - const u8 *aKeyTree = 0; - int nKeyTree = 0; - - rc = sqlite3HctDbCsrLoadAndDecode(pCur->pHctDbCsr, &pKeyDb); - if( rc==SQLITE_OK ){ - int res; - int nSave = pKeyDb->nField; - sqlite3HctDbRecordTrim(pKeyDb); - sqlite3HctTreeCsrData(pCur->pHctTreeCsr, &nKeyTree, &aKeyTree); - res = sqlite3VdbeRecordCompare(nKeyTree, aKeyTree, pKeyDb); - pKeyDb->nField = nSave; - if( res==0 ){ - pCur->bUseTree = 2; - }else{ - pCur->bUseTree = (res<0); - if( pCur->eDir==BTREE_DIR_REVERSE ) pCur->bUseTree = !pCur->bUseTree; - } - } - } - - return rc; -} - -static int hctReseekBlobCsr(HBtCursor *pCsr){ - int rc = SQLITE_OK; - assert( pCsr->pKeyInfo==0 ); - if( sqlite3HctTreeCsrHasMoved(pCsr->pHctTreeCsr) ){ - int res = 0; - rc = sqlite3HctTreeCsrReseek(pCsr->pHctTreeCsr, &res); - if( rc==SQLITE_OK && res==0 ){ - pCsr->bUseTree = 1; - } - } - return rc; -} - -/* -** This variant of sqlite3HctBtreePayload() works even if the cursor has not -** in the CURSOR_VALID state. It is only used by the sqlite3_blob_read() -** interface. -*/ -#ifndef SQLITE_OMIT_INCRBLOB -SQLITE_PRIVATE int sqlite3HctBtreePayloadChecked( - BtCursor *pCur, - u32 offset, - u32 amt, - void *pBuf -){ - HBtCursor *pCsr = (HBtCursor*)pCur; - int rc = SQLITE_OK; - rc = hctReseekBlobCsr(pCsr); - if( rc==SQLITE_OK ){ - rc = sqlite3HctBtreePayload(pCur, offset, amt, pBuf); - } - return rc; -} -#endif /* SQLITE_OMIT_INCRBLOB */ - -/* -** For the entry that cursor pCur is point to, return as -** many bytes of the key or data as are available on the local -** b-tree page. Write the number of available bytes into *pAmt. -** -** The pointer returned is ephemeral. The key/data may move -** or be destroyed on the next call to any Btree routine, -** including calls from other threads against the same cache. -** Hence, a mutex on the BtShared should be held prior to calling -** this routine. -** -** These routines is used to get quick access to key and data -** in the common case where no overflow pages are used. -*/ -SQLITE_PRIVATE const void *sqlite3HctBtreePayloadFetch(BtCursor *pCursor, u32 *pAmt){ - HBtCursor *const pCur = (HBtCursor*)pCursor; - const u8 *aData; - int nData; - if( pCur->bUseTree ){ - sqlite3HctTreeCsrData(pCur->pHctTreeCsr, &nData, &aData); - }else{ - sqlite3HctDbCsrData(pCur->pHctDbCsr, &nData, &aData); - } - *pAmt = (u32)nData; - return aData; -} - -/* Move the cursor to the first entry in the table. Return SQLITE_OK -** on success. Set *pRes to 0 if the cursor actually points to something -** or set *pRes to 1 if the table is empty. -*/ -SQLITE_PRIVATE int sqlite3HctBtreeFirst(BtCursor *pCursor, int *pRes){ - HBtCursor *const pCur = (HBtCursor*)pCursor; - int rc = SQLITE_OK; - - sqlite3HctTreeCsrFirst(pCur->pHctTreeCsr); - if( pCur->pHctDbCsr ){ - rc = sqlite3HctDbCsrFirst(pCur->pHctDbCsr); - } - if( rc==SQLITE_OK ){ - pCur->eDir = BTREE_DIR_FORWARD; - btreeSetUseTree(pCur); - if( pCur->bUseTree && sqlite3HctTreeCsrIsDelete(pCur->pHctTreeCsr) ){ - rc = sqlite3HctBtreeNext((BtCursor*)pCur, 0); - if( rc==SQLITE_DONE ) rc = SQLITE_OK; - } - *pRes = sqlite3HctBtreeEof((BtCursor*)pCur); - } - - return rc; -} - -/* Move the cursor to the last entry in the table. Return SQLITE_OK -** on success. Set *pRes to 0 if the cursor actually points to something -** or set *pRes to 1 if the table is empty. -*/ -SQLITE_PRIVATE int sqlite3HctBtreeLast(BtCursor *pCursor, int *pRes){ - int rc = SQLITE_OK; - HBtCursor *const pCur = (HBtCursor*)pCursor; - - if( pCur->isLast==0 ){ - sqlite3HctTreeCsrLast(pCur->pHctTreeCsr); - if( pCur->pHctDbCsr ){ - rc = sqlite3HctDbCsrLast(pCur->pHctDbCsr); - } - if( rc==SQLITE_OK ){ - int bTreeEof = sqlite3HctTreeCsrEof(pCur->pHctTreeCsr); - int bDbEof = sqlite3HctDbCsrEof(pCur->pHctDbCsr); - *pRes = (bTreeEof && bDbEof); - pCur->eDir = BTREE_DIR_REVERSE; - btreeSetUseTree(pCur); - if( pCur->bUseTree ){ - if( sqlite3HctTreeCsrIsDelete(pCur->pHctTreeCsr) ){ - rc = sqlite3HctBtreePrevious((BtCursor*)pCur, 0); - if( rc==SQLITE_DONE ){ - *pRes = sqlite3HctBtreeEof((BtCursor*)pCur); - rc = SQLITE_OK; - } - }else{ - pCur->isLast = 1; - } - } - } - } - - return rc; -} - -/* Move the cursor so that it points to an entry near the key -** specified by pIdxKey or intKey. Return a success code. -** -** For INTKEY tables, the intKey parameter is used. pIdxKey -** must be NULL. For index tables, pIdxKey is used and intKey -** is ignored. -** -** If an exact match is not found, then the cursor is always -** left pointing at a leaf page which would hold the entry if it -** were present. The cursor might point to an entry that comes -** before or after the key. -** -** An integer is written into *pRes which is the result of -** comparing the key with the entry to which the cursor is -** pointing. The meaning of the integer written into -** *pRes is as follows: -** -** *pRes<0 The cursor is left pointing at an entry that -** is smaller than intKey/pIdxKey or if the table is empty -** and the cursor is therefore left point to nothing. -** -** *pRes==0 The cursor is left pointing at an entry that -** exactly matches intKey/pIdxKey. -** -** *pRes>0 The cursor is left pointing at an entry that -** is larger than intKey/pIdxKey. -** -** For index tables, the pIdxKey->eqSeen field is set to 1 if there -** exists an entry in the table that exactly matches pIdxKey. -*/ -static int hctBtreeMovetoUnpacked( - HBtCursor *pCur, /* The cursor to be moved */ - UnpackedRecord *pIdxKey, /* Unpacked index key */ - i64 intKey, /* The table key */ - int biasRight, /* If true, bias the search to the high end */ - int *pRes /* Write search results here */ -){ - int rc = SQLITE_OK; - int res1 = 0; - int res2 = -1; - - pCur->isLast = 0; - rc = sqlite3HctTreeCsrSeek(pCur->pHctTreeCsr, pIdxKey, intKey, &res1); - if( rc==SQLITE_OK && pCur->pHctDbCsr ){ - rc = sqlite3HctDbCsrSeek(pCur->pHctDbCsr, pIdxKey, intKey, &res2); - } - - if( pCur->eDir==BTREE_DIR_NONE ){ - if( res1==0 || pCur->pHctDbCsr==0 ){ - *pRes = res1; - pCur->bUseTree = 1; - if( sqlite3HctTreeCsrIsDelete(pCur->pHctTreeCsr) ){ - *pRes = -1; - } - }else{ - pCur->bUseTree = 0; - *pRes = res2; - } - }else{ - if( pCur->eDir==BTREE_DIR_FORWARD ){ - if( rc==SQLITE_OK && res2<0 && !sqlite3HctDbCsrEof(pCur->pHctDbCsr) ){ - rc = sqlite3HctDbCsrNext(pCur->pHctDbCsr); - } - if( rc==SQLITE_OK && res1<0 && !sqlite3HctTreeCsrEof(pCur->pHctTreeCsr) ){ - rc = sqlite3HctTreeCsrNext(pCur->pHctTreeCsr); - } - - if( res1==0 || (res2==0 && pCur->pHctDbCsr) ){ - *pRes = 0; - }else if( sqlite3HctTreeCsrEof(pCur->pHctTreeCsr) - && sqlite3HctDbCsrEof(pCur->pHctDbCsr) - ){ - *pRes = -1; - }else{ - *pRes = +1; - } - }else{ - assert( pCur->eDir==BTREE_DIR_REVERSE ); - assert( res2<=0 ); - if( rc==SQLITE_OK && res1>0 && !sqlite3HctTreeCsrEof(pCur->pHctTreeCsr) ){ - rc = sqlite3HctTreeCsrPrev(pCur->pHctTreeCsr); - } - if( res1==0 || res2==0 ){ - *pRes = 0; - }else{ - *pRes = -1; - } - } - - btreeSetUseTree(pCur); - if( pCur->bUseTree && sqlite3HctTreeCsrIsDelete(pCur->pHctTreeCsr) ){ - if( pCur->eDir==BTREE_DIR_FORWARD ){ - rc = sqlite3HctBtreeNext((BtCursor*)pCur, 0); - if( rc==SQLITE_DONE ){ - /* Cursor points at EOF. *pRes must be -ve in this case. */ - rc = SQLITE_OK; - *pRes = -1; - }else if( pIdxKey==0 ){ - *pRes = 1; - }else{ - u32 nKey; - const void *a = sqlite3HctBtreePayloadFetch((BtCursor*)pCur, &nKey); - *pRes = sqlite3VdbeRecordCompareWithSkip(nKey, a, pIdxKey, 0); - } - }else{ - rc = sqlite3HctBtreePrevious((BtCursor*)pCur, 0); - if( rc==SQLITE_DONE ) rc = SQLITE_OK; - *pRes = -1; - } - } - } - - return rc; -} - -SQLITE_PRIVATE int sqlite3HctBtreeTableMoveto( - BtCursor *pCursor, /* The cursor to be moved */ - i64 intKey, /* The table key */ - int biasRight, /* If true, bias the search to the high end */ - int *pRes /* Write search results here */ -){ - HBtCursor *const pCur = (HBtCursor*)pCursor; - assert( CSR_IS_MIGRATE(pCur)==0 ); - if( pCur->isLast && sqlite3HctBtreeIntegerKey(pCursor)eDir = eDir; - if( pCur->pHctDbCsr ){ - sqlite3HctDbCsrDir(pCur->pHctDbCsr, eDir); - } -} - -/* -** Return TRUE if the cursor is not pointing at an entry of the table. -** -** TRUE will be returned after a call to sqlite3HctBtreeNext() moves -** past the last entry in the table or sqlite3HctBtreePrev() moves past -** the first entry. TRUE is also returned if the table is empty. -*/ -SQLITE_PRIVATE int sqlite3HctBtreeEof(BtCursor *pCursor){ - HBtCursor *const pCur = (HBtCursor*)pCursor; - /* TODO: What if the cursor is in CURSOR_REQUIRESEEK but all table entries - ** have been deleted? This API will need to change to return an error code - ** as well as the boolean result value. - */ - return ( - sqlite3HctTreeCsrEof(pCur->pHctTreeCsr) - && sqlite3HctDbCsrEof(pCur->pHctDbCsr) - ); -} - -/* -** Return an estimate for the number of rows in the table that pCur is -** pointing to. Return a negative number if no estimate is currently -** available. -*/ -SQLITE_PRIVATE i64 sqlite3HctBtreeRowCountEst(BtCursor *pCur){ - /* TODO: Fix this so that it returns a meaningful value. */ - return -1; -} - -/* -** Advance the cursor to the next entry in the database. -** Return value: -** -** SQLITE_OK success -** SQLITE_DONE cursor is already pointing at the last element -** otherwise some kind of error occurred -** -** The main entry point is sqlite3HctBtreeNext(). That routine is optimized -** for the common case of merely incrementing the cell counter BtCursor.aiIdx -** to the next cell on the current page. The (slower) btreeNext() helper -** routine is called when it is necessary to move to a different page or -** to restore the cursor. -** -** If bit 0x01 of the F argument in sqlite3HctBtreeNext(C,F) is 1, then the -** cursor corresponds to an SQL index and this routine could have been -** skipped if the SQL index had been a unique index. The F argument -** is a hint to the implement. SQLite btree implementation does not use -** this hint, but COMDB2 does. -*/ -SQLITE_PRIVATE int sqlite3HctBtreeNext(BtCursor *pCursor, int flags){ - HBtCursor *const pCur = (HBtCursor*)pCursor; - int rc = SQLITE_OK; - int bDummy; - - assert( pCur->isLast==0 ); - rc = sqlite3HctBtreeCursorRestore((BtCursor*)pCur, &bDummy); - if( rc!=SQLITE_OK ) return rc; - - if( sqlite3HctBtreeEof((BtCursor*)pCur) ){ - rc = SQLITE_DONE; - }else{ - assert( pCur->eDir==BTREE_DIR_FORWARD ); - do{ - if( pCur->bUseTree ){ - rc = sqlite3HctTreeCsrNext(pCur->pHctTreeCsr); - } - if( rc==SQLITE_OK && (pCur->bUseTree==0 || pCur->bUseTree==2) ){ - rc = sqlite3HctDbCsrNext(pCur->pHctDbCsr); - } - if( rc==SQLITE_OK ){ - if( sqlite3HctBtreeEof((BtCursor*)pCur) ){ - rc = SQLITE_DONE; - }else{ - btreeSetUseTree(pCur); - } - } - }while( rc==SQLITE_OK - && pCur->bUseTree && sqlite3HctTreeCsrIsDelete(pCur->pHctTreeCsr) - ); - } - return rc; -} - -/* -** Step the cursor to the back to the previous entry in the database. -** Return values: -** -** SQLITE_OK success -** SQLITE_DONE the cursor is already on the first element of the table -** otherwise some kind of error occurred -** -** The main entry point is sqlite3HctBtreePrevious(). That routine is optimized -** for the common case of merely decrementing the cell counter BtCursor.aiIdx -** to the previous cell on the current page. The (slower) btreePrevious() -** helper routine is called when it is necessary to move to a different page -** or to restore the cursor. -** -** If bit 0x01 of the F argument to sqlite3HctBtreePrevious(C,F) is 1, then -** the cursor corresponds to an SQL index and this routine could have been -** skipped if the SQL index had been a unique index. The F argument is a -** hint to the implement. The native SQLite btree implementation does not -** use this hint, but COMDB2 does. -*/ -SQLITE_PRIVATE int sqlite3HctBtreePrevious(BtCursor *pCursor, int flags){ - HBtCursor *const pCur = (HBtCursor*)pCursor; - int rc = SQLITE_OK; - int bDummy; - assert( pCur->eDir==BTREE_DIR_REVERSE ); - - pCur->isLast = 0; - rc = sqlite3HctBtreeCursorRestore((BtCursor*)pCur, &bDummy); - if( rc!=SQLITE_OK ) return rc; - - do{ - if( pCur->bUseTree ){ - rc = sqlite3HctTreeCsrPrev(pCur->pHctTreeCsr); - } - if( rc==SQLITE_OK && (pCur->bUseTree==0 || pCur->bUseTree==2) ){ - rc = sqlite3HctDbCsrPrev(pCur->pHctDbCsr); - } - if( rc==SQLITE_OK ){ - if( sqlite3HctBtreeEof((BtCursor*)pCur) ){ - rc = SQLITE_DONE; - }else{ - btreeSetUseTree(pCur); - } - } - }while( rc==SQLITE_OK - && pCur->bUseTree && sqlite3HctTreeCsrIsDelete(pCur->pHctTreeCsr) - ); - return rc; -} - -static void hctBtreeClearIsLast(HBtree *pBt, HBtCursor *pExcept){ - HBtCursor *p; - for(p=pBt->pCsrList; p; p=p->pCsrNext){ - if( p!=pExcept ) p->isLast = 0; - } -} - -/* -** Insert a new record into the BTree. The content of the new record -** is described by the pX object. The pCur cursor is used only to -** define what table the record should be inserted into, and is left -** pointing at a random location. -** -** For a table btree (used for rowid tables), only the pX.nKey value of -** the key is used. The pX.pKey value must be NULL. The pX.nKey is the -** rowid or INTEGER PRIMARY KEY of the row. The pX.nData,pData,nZero fields -** hold the content of the row. -** -** For an index btree (used for indexes and WITHOUT ROWID tables), the -** key is an arbitrary byte sequence stored in pX.pKey,nKey. The -** pX.pData,nData,nZero fields must be zero. -** -** If the seekResult parameter is non-zero, then a successful call to -** MovetoUnpacked() to seek cursor pCur to (pKey,nKey) has already -** been performed. In other words, if seekResult!=0 then the cursor -** is currently pointing to a cell that will be adjacent to the cell -** to be inserted. If seekResult<0 then pCur points to a cell that is -** smaller then (pKey,nKey). If seekResult>0 then pCur points to a cell -** that is larger than (pKey,nKey). -** -** If seekResult==0, that means pCur is pointing at some unknown location. -** In that case, this routine must seek the cursor to the correct insertion -** point for (pKey,nKey) before doing the insertion. For index btrees, -** if pX->nMem is non-zero, then pX->aMem contains pointers to the unpacked -** key values and pX->aMem can be used instead of pX->pKey to avoid having -** to decode the key. -*/ -SQLITE_PRIVATE int sqlite3HctBtreeInsert( - BtCursor *pCursor, /* Insert data into the table of this cursor */ - const BtreePayload *pX, /* Content of the row to be inserted */ - int flags, /* True if this is likely an append */ - int seekResult /* Result of prior MovetoUnpacked() call */ -){ - HBtCursor *const pCur = (HBtCursor*)pCursor; - HctTreeCsr *pTreeCsr = pCur->pHctTreeCsr; - int rc = SQLITE_OK; - UnpackedRecord r; - UnpackedRecord *pRec = 0; - const u8 *aData; - int nData; - int nZero; - i64 iKey = 0; - int bMigrate = pCur->pBtree->config.db->bHctMigrate; - - hctBtreeClearIsLast(pCur->pBtree, pCur); - if( pX->pKey ){ - aData = pX->pKey; - nData = pX->nKey; - nZero = 0; - if( pX->nMem ){ - memset(&r, 0, sizeof(r)); - r.pKeyInfo = pCur->pKeyInfo; - r.aMem = pX->aMem; - r.nField = pX->nMem; - pRec = &r; - }else{ - pRec = sqlite3VdbeAllocUnpackedRecord(pCur->pKeyInfo); - if( pRec==0 ) return SQLITE_NOMEM_BKPT; - sqlite3VdbeRecordUnpack(pCur->pKeyInfo, nData, aData, pRec); - } - iKey = 0; - }else{ - aData = pX->pData; - nData = pX->nData; - nZero = pX->nZero; - iKey = pX->nKey; - } - - if( CSR_IS_MIGRATE(pCur) ){ - assert( nZero==0 ); - rc = hctBtreeMigrateInsert(pCur, pRec, iKey, nData, aData); - }else{ - if( pCur->isLast && seekResult<0 ){ - rc = sqlite3HctTreeAppend( - pTreeCsr, pCur->pKeyInfo, iKey, nData, aData, nZero - ); - }else{ - rc = sqlite3HctTreeInsert(pTreeCsr, pRec, iKey, nData, aData, nZero); - pCur->isLast = 0; - } - } - - if( pRec && pRec!=&r ){ - sqlite3DbFree(pCur->pKeyInfo->db, pRec); - } - return rc; -} - -SQLITE_PRIVATE int sqlite3HctSchemaOp(Btree *pBt, const char *zSql){ - int rc = SQLITE_OK; - HBtree *const p = (HBtree*)pBt; - if( p->pHctJrnl ){ - HctTreeCsr *pCsr = 0; - - rc = sqlite3HctTreeCsrOpen(p->pHctTree, HCT_TREE_SCHEMAOP_ROOT, &pCsr); - if( rc==SQLITE_OK ){ - int nSql = sqlite3Strlen30(zSql); - i64 iRowid = 1; - sqlite3HctTreeCsrLast(pCsr); - if( sqlite3HctTreeCsrEof(pCsr)==0 ){ - sqlite3HctTreeCsrKey(pCsr, &iRowid); - iRowid++; - } - - rc = sqlite3HctTreeInsert(pCsr, 0, iRowid, nSql, (const u8*)zSql, 0); - sqlite3HctTreeCsrClose(pCsr); - } - } - return rc; -} - -/* -** Delete the entry that the cursor is pointing to. -** -** If the BTREE_SAVEPOSITION bit of the flags parameter is zero, then -** the cursor is left pointing at an arbitrary location after the delete. -** But if that bit is set, then the cursor is left in a state such that -** the next call to BtreeNext() or BtreePrev() moves it to the same row -** as it would have been on if the call to BtreeDelete() had been omitted. -** -** The BTREE_AUXDELETE bit of flags indicates that is one of several deletes -** associated with a single table entry and its indexes. Only one of those -** deletes is considered the "primary" delete. The primary delete occurs -** on a cursor that is not a BTREE_FORDELETE cursor. All but one delete -** operation on non-FORDELETE cursors is tagged with the AUXDELETE flag. -** The BTREE_AUXDELETE bit is a hint that is not used by this implementation, -** but which might be used by alternative storage engines. -*/ -SQLITE_PRIVATE int sqlite3HctBtreeDelete(BtCursor *pCursor, u8 flags){ - HBtCursor *const pCur = (HBtCursor*)pCursor; - int rc = SQLITE_OK; - - hctBtreeClearIsLast(pCur->pBtree, 0); - if( pCur->pHctDbCsr==0 ){ - rc = sqlite3HctTreeDelete(pCur->pHctTreeCsr); - }else if( pCur->pKeyInfo==0 ){ - i64 iKey = sqlite3HctBtreeIntegerKey((BtCursor*)pCur); - rc = sqlite3HctTreeDeleteKey(pCur->pHctTreeCsr, 0, iKey, 0, 0); - }else{ - u32 nKey; - const u8 *aKey = (u8*)sqlite3HctBtreePayloadFetch((BtCursor*)pCur, &nKey); - UnpackedRecord *pRec = sqlite3VdbeAllocUnpackedRecord(pCur->pKeyInfo); - - if( pRec==0 ){ - rc = SQLITE_NOMEM_BKPT; - }else{ - sqlite3VdbeRecordUnpack(pCur->pKeyInfo, nKey, aKey, pRec); - rc = sqlite3HctTreeDeleteKey(pCur->pHctTreeCsr, pRec, 0, nKey, aKey); - sqlite3DbFree(pCur->pBtree->config.db, pRec); - } - } - return rc; -} - -SQLITE_PRIVATE int sqlite3HctBtreeIdxDelete(BtCursor *pCursor, UnpackedRecord *pKey){ - HBtCursor *const pCur = (HBtCursor*)pCursor; - int rc = SQLITE_OK; - - hctBtreeClearIsLast(pCur->pBtree, 0); - if( pCur->pHctDbCsr ){ - u8 *aRec = 0; - int nRec = 0; - rc = sqlite3HctSerializeRecord(pKey, &aRec, &nRec); - if( rc==SQLITE_OK ){ - rc = sqlite3HctTreeDeleteKey(pCur->pHctTreeCsr, pKey, 0, nRec, aRec); - sqlite3_free(aRec); - } - }else{ - int res = 0; - rc = sqlite3HctTreeCsrSeek(pCur->pHctTreeCsr, pKey, 0, &res); - if( res==0 ){ - rc = sqlite3HctTreeDelete(pCur->pHctTreeCsr); - } - } - return rc; -} - -static int hctreeAddNewSchemaOp(HBtree *p, u32 iRoot, int eOp){ - BtSchemaOp *aSchemaOp; - - /* Grow the Btree.aSchemaOp array */ - assert( p->pHctDb ); - aSchemaOp = (BtSchemaOp*)sqlite3_realloc( - p->aSchemaOp, sizeof(BtSchemaOp)*(p->nSchemaOp+1) - ); - if( aSchemaOp==0 ) return SQLITE_NOMEM_BKPT; - - p->aSchemaOp = aSchemaOp; - p->aSchemaOp[p->nSchemaOp].pgnoRoot = iRoot; - p->aSchemaOp[p->nSchemaOp].iSavepoint = p->config.db->nSavepoint; - p->aSchemaOp[p->nSchemaOp].eSchemaOp = eOp; - p->nSchemaOp++; - - return SQLITE_OK; -} - -static int hctreeAddNewRoot(HBtree *p, u32 iRoot, int bIndex){ - int eOp = bIndex ? HCT_SCHEMAOP_CREATE_INDEX : HCT_SCHEMAOP_CREATE_INTKEY; - return hctreeAddNewSchemaOp(p, iRoot, eOp); -} - -/* -** Create a new BTree table. Write into *piTable the page -** number for the root page of the new table. -** -** The type of type is determined by the flags parameter. Only the -** following values of flags are currently in use. Other values for -** flags might not work: -** -** BTREE_INTKEY|BTREE_LEAFDATA Used for SQL tables with rowid keys -** BTREE_ZERODATA Used for SQL indices -*/ -SQLITE_PRIVATE int sqlite3HctBtreeCreateTable(Btree *pBt, Pgno *piTable, int flags){ - HBtree *const p = (HBtree*)pBt; - Pgno iNew = 0; - int rc = SQLITE_OK; - if( p->pHctDb ){ - rc = sqlite3HctDbRootNew(p->pHctDb, &iNew); - if( rc==SQLITE_OK ){ - rc = hctreeAddNewRoot(p, iNew, (flags & BTREE_INTKEY)==0); - } - }else{ - iNew = p->iNextRoot++; - } - *piTable = iNew; - return rc; -} - -/* -** Delete all information from a single table in the database. iTable is -** the page number of the root of the table. After this routine returns, -** the root page is empty, but still exists. -** -** This routine will fail with SQLITE_LOCKED if there are any open -** read cursors on the table. Open write cursors are moved to the -** root of the table. -** -** If pnChange is not NULL, then table iTable must be an intkey table. The -** integer value pointed to by pnChange is incremented by the number of -** entries in the table. -*/ -SQLITE_PRIVATE int sqlite3HctBtreeClearTable(Btree *pBt, int iTable, i64 *pnChange){ - HBtree *const p = (HBtree*)pBt; - int rc = SQLITE_OK; - KeyInfo *pKeyInfo = 0; - - rc = hctFindKeyInfo(p, iTable, &pKeyInfo); - if( rc==SQLITE_OK ){ - i64 nChange = 0; - BtCursor *pCsr = 0; - HctTreeCsr *pTreeCsr = 0; - UnpackedRecord *pRec = 0; - - if( pKeyInfo ){ - pRec = sqlite3VdbeAllocUnpackedRecord(pKeyInfo); - if( pRec==0 ) rc = SQLITE_NOMEM_BKPT; - } - pCsr = (BtCursor*)sqlite3HctMalloc(&rc, sizeof(HBtCursor)); - if( rc==SQLITE_OK ){ - rc = sqlite3HctBtreeCursor(pBt, iTable, 0, pKeyInfo, pCsr); - } - if( rc==SQLITE_OK ){ - rc = sqlite3HctTreeCsrOpen(p->pHctTree, iTable, &pTreeCsr); - } - - if( rc==SQLITE_OK ){ - int res = 0; - rc = sqlite3HctBtreeFirst(pCsr, &res); - if( res==0 ){ - while( rc==SQLITE_OK ){ - nChange++; - if( pKeyInfo ){ - const u8 *aData = 0; - u32 nData = 0; - aData = (const u8*)sqlite3HctBtreePayloadFetch(pCsr, &nData); - sqlite3VdbeRecordUnpack(pKeyInfo, nData, aData, pRec); - rc = sqlite3HctTreeDeleteKey(pTreeCsr, pRec, 0, nData, aData); - }else{ - i64 iKey = sqlite3HctBtreeIntegerKey((BtCursor*)pCsr); - rc = sqlite3HctTreeDeleteKey(pTreeCsr, 0, iKey, 0, 0); - } - rc = sqlite3HctBtreeNext(pCsr, 0); - } - if( rc==SQLITE_DONE ) rc = SQLITE_OK; - } - } - if( pnChange ) *pnChange = nChange; - - sqlite3KeyInfoUnref(pKeyInfo); - sqlite3HctBtreeCloseCursor(pCsr); - sqlite3HctTreeCsrClose(pTreeCsr); - sqlite3DbFree(p->config.db, pRec); - sqlite3_free(pCsr); - } - return rc; -} - -/* -** Delete all information from the single table that pCur is open on. -** -** This routine only work for pCur on an ephemeral table. -*/ -SQLITE_PRIVATE int sqlite3HctBtreeClearTableOfCursor(BtCursor *pCursor){ - HBtCursor *const pCur = (HBtCursor*)pCursor; - return sqlite3HctTreeClearOne( - pCur->pBtree->pHctTree, sqlite3HctTreeCsrRoot(pCur->pHctTreeCsr), 0 - ); -} - -/* -** Drop the table with root page iTable. Set (*piMoved) to 0 before -** returning. -*/ -SQLITE_PRIVATE int sqlite3HctBtreeDropTable(Btree *pBt, int iTable, int *piMoved){ - HBtree *const p = (HBtree*)pBt; - *piMoved = 0; - return hctreeAddNewSchemaOp(p, iTable, HCT_SCHEMAOP_DROP); -} - - -/* -** This function may only be called if the b-tree connection already -** has a read or write transaction open on the database. -** -** Read the meta-information out of a database file. Meta[0] -** is the number of free pages currently in the database. Meta[1] -** through meta[15] are available for use by higher layers. Meta[0] -** is read-only, the others are read/write. -** -** The schema layer numbers meta values differently. At the schema -** layer (and the SetCookie and ReadCookie opcodes) the number of -** free pages is not visible. So Cookie[0] is the same as Meta[1]. -** -** This routine treats Meta[BTREE_DATA_VERSION] as a special case. Instead -** of reading the value out of the header, it instead loads the "DataVersion" -** from the pager. The BTREE_DATA_VERSION value is not actually stored in the -** database file. It is a number computed by the pager. But its access -** pattern is the same as header meta values, and so it is convenient to -** read it from this routine. -*/ -SQLITE_PRIVATE void sqlite3HctBtreeGetMeta(Btree *pBt, int idx, u32 *pMeta){ - HBtree *const p = (HBtree*)pBt; - - assert( idx>=0 && idxpHctDb ); - if( idx==BTREE_DATA_VERSION ){ - /* TODO: Fix this so that the data_version does not change when the - ** database is written by the current connection. */ - i64 iSnapshot = sqlite3HctDbSnapshotId(p->pHctDb); - *pMeta = (u32)iSnapshot; - }else{ - if( p->eMetaState==HCT_METASTATE_NONE ){ - int rc = SQLITE_OK; - if( p->eTrans==SQLITE_TXN_NONE ){ - rc = sqlite3HctDbGetMeta( - p->pHctDb, (u8*)p->aMeta, SQLITE_N_BTREE_META*4 - ); - }else{ - int res = 0; - HBtCursor csr; - BtCursor *pCsr = (BtCursor*)&csr; - memset(&csr, 0, sizeof(csr)); - - sqlite3HctBtreeCursor(pBt, 2, 0, 0, pCsr); - rc = sqlite3HctBtreeTableMoveto(pCsr, 0, 0, &res); - assert( rc==SQLITE_OK ); - if( rc==SQLITE_OK && res==0 ){ - const void *aMeta = 0; - u32 nMeta = 0; - aMeta = sqlite3HctBtreePayloadFetch(pCsr, &nMeta); - memcpy(p->aMeta, aMeta, MAX(nMeta, SQLITE_N_BTREE_META*4)); - } - sqlite3HctBtreeCloseCursor(pCsr); - } - sqlite3HctJournalSchemaVersion( - p->pHctJrnl, &p->aMeta[BTREE_SCHEMA_VERSION] - ); - } - *pMeta = p->aMeta[idx]; - } -} - -/* -** Write meta-information back into the database. Meta[0] is -** read-only and may not be written. -*/ -SQLITE_PRIVATE int sqlite3HctBtreeUpdateMeta(Btree *pBt, int idx, u32 iMeta){ - HBtree *const p = (HBtree*)pBt; - u32 dummy; - sqlite3HctBtreeGetMeta((Btree*)p, 0, &dummy); - p->aMeta[idx] = iMeta; - return sqlite3HctTreeUpdateMeta( - p->pHctTree, (u8*)p->aMeta, SQLITE_N_BTREE_META*4 - ); -} - -static char *hctDbMPrintf(int *pRc, const char *zFormat, ...){ - char *zRet = 0; - if( *pRc==SQLITE_OK ){ - va_list ap; - va_start(ap, zFormat); - zRet = sqlite3_vmprintf(zFormat, ap); - va_end(ap); - if( !zRet ) *pRc = SQLITE_NOMEM_BKPT; - } - return zRet; -} - -SQLITE_PRIVATE int sqlite3HctBtreePragma(Btree *pBt, char **aFnctl){ - HBtree *const p = (HBtree*)pBt; - int rc = SQLITE_OK; - const char *zLeft = aFnctl[1]; - const char *zRight = aFnctl[2]; - char *zRet = 0; - - if( 0==sqlite3_stricmp("hct_ndbfile", zLeft) ){ - HctFile *pFile = sqlite3HctDbFile(p->pHctDb); - int iCurrent = 0; - int bFixed = 0; - if( zRight ){ - int iVal = sqlite3Atoi(zRight); - if( iVal<1 || iVal>HCT_MAX_NDBFILE ){ - rc = SQLITE_RANGE; - }else{ - p->config.nDbFile = iVal; - } - } - if( rc==SQLITE_OK ){ - iCurrent = sqlite3HctFileNFile(pFile, &bFixed); - if( bFixed==0 ) iCurrent = p->config.nDbFile; - zRet = hctDbMPrintf(&rc, "%d", iCurrent); - } - } - - else if( 0==sqlite3_stricmp("hct_try_before_unevict", zLeft) ){ - int iVal = 0; - if( zRight ){ - iVal = sqlite3Atoi(zRight); - } - if( iVal>0 ){ - p->config.nTryBeforeUnevict = iVal; - } - zRet = hctDbMPrintf(&rc, "%d", p->config.nTryBeforeUnevict); - } - else if( 0==sqlite3_stricmp("hct_npageset", zLeft) ){ - int iVal = 0; - if( zRight ){ - iVal = sqlite3Atoi(zRight); - } - if( iVal>0 ){ - p->config.nPageSet = iVal; - } - zRet = hctDbMPrintf(&rc, "%d", p->config.nPageSet); - } - else if( 0==sqlite3_stricmp("hct_ncasfail", zLeft) ){ - zRet = hctDbMPrintf(&rc, "%lld", sqlite3HctDbNCasFail(p->pHctDb)); - } - else if( p->pHctDb && 0==sqlite3_stricmp("hct_npagescan", zLeft) ){ - int iVal = 0; - if( zRight ){ - iVal = sqlite3Atoi(zRight); - } - if( iVal>0 ){ - p->config.nPageScan = iVal; - } - zRet = hctDbMPrintf(&rc, "%d", p->config.nPageScan); - } - else if( 0==sqlite3_stricmp("hct_quiescent_integrity_check", zLeft) ){ - int iVal = 0; - if( zRight ){ - iVal = sqlite3Atoi(zRight); - } - if( iVal>0 ){ - p->config.bQuiescentIntegrityCheck = (iVal==0 ? 0 : 1); - } - zRet = hctDbMPrintf(&rc, "%d", p->config.bQuiescentIntegrityCheck); - }else{ - rc = SQLITE_NOTFOUND; - } - - aFnctl[0] = zRet; - return rc; -} - -/* -** The first argument, pCur, is a cursor opened on some b-tree. Count the -** number of entries in the b-tree and write the result to *pnEntry. -** -** SQLITE_OK is returned if the operation is successfully executed. -** Otherwise, if an error is encountered (i.e. an IO error or database -** corruption) an SQLite error code is returned. -*/ -SQLITE_PRIVATE int sqlite3HctBtreeCount(sqlite3 *db, BtCursor *pCursor, i64 *pnEntry){ - HBtCursor *const pCur = (HBtCursor*)pCursor; - i64 nEntry = 0; - int dummy = 0; - int rc; - for(rc = sqlite3HctBtreeFirst((BtCursor*)pCur, &dummy); - rc==SQLITE_OK && 0==sqlite3HctBtreeEof((BtCursor*)pCur); - rc = sqlite3HctBtreeNext((BtCursor*)pCur, 0) - ){ - nEntry++; - } - *pnEntry = nEntry; - return SQLITE_OK; -} - -/* -** Return the pager associated with a BTree. This routine is used for -** testing and debugging only. -*/ -SQLITE_PRIVATE Pager *sqlite3HctBtreePager(Btree *pBt){ - HBtree *const p = (HBtree*)pBt; - return p->pFakePager; -} - -#ifndef SQLITE_OMIT_INTEGRITY_CHECK -/* -** This routine does a complete check of the given BTree file. aRoot[] is -** an array of pages numbers were each page number is the root page of -** a table. nRoot is the number of entries in aRoot. -** -** A read-only or read-write transaction must be opened before calling -** this function. -** -** Write the number of error seen in *pnErr. Except for some memory -** allocation errors, an error message held in memory obtained from -** malloc is returned if *pnErr is non-zero. If *pnErr==0 then NULL is -** returned. If a memory allocation error occurs, NULL is returned. -** -** If the first entry in aRoot[] is 0, that indicates that the list of -** root pages is incomplete. This is a "partial integrity-check". This -** happens when performing an integrity check on a single table. The -** zero is skipped, of course. But in addition, the freelist checks -** and the checks to make sure every page is referenced are also skipped, -** since obviously it is not possible to know which pages are covered by -** the unverified btrees. Except, if aRoot[1] is 1, then the freelist -** checks are still performed. -*/ -SQLITE_PRIVATE int sqlite3HctBtreeIntegrityCheck( - sqlite3 *db, /* Database connection that is running the check */ - Btree *pBt, /* The btree to be checked */ - Pgno *aRoot, /* An array of root pages numbers for individual trees */ - Mem *aCnt, - int nRoot, /* Number of entries in aRoot[] */ - int mxErr, /* Stop reporting errors after this many */ - int *pnErr, /* Write number of errors seen to this variable */ - char **pzErr -){ - HBtree *const p = (HBtree*)pBt; - char *zRet = 0; /* Return value */ - *pnErr = 0; - int ii; - for(ii=0; iiconfig.bQuiescentIntegrityCheck && nRoot>0 && aRoot[0]!=0 ){ - zRet = sqlite3HctDbIntegrityCheck(p->pHctDb, aRoot, aCnt, nRoot, pnErr); - assert( zRet==0 || (*pnErr)>0 ); - } - *pzErr = zRet; - return 0; -} -#endif /* SQLITE_OMIT_INTEGRITY_CHECK */ - -/* -** Return the full pathname of the underlying database file. Return -** an empty string if the database is in-memory or a TEMP database. -** -** The pager filename is invariant as long as the pager is -** open so it is safe to access without the BtShared mutex. -*/ -SQLITE_PRIVATE const char *sqlite3HctBtreeGetFilename(Btree *p){ - return 0; -} - -/* -** Return the pathname of the journal file for this database. The return -** value of this routine is the same regardless of whether the journal file -** has been created or not. -** -** The pager journal filename is invariant as long as the pager is -** open so it is safe to access without the BtShared mutex. -*/ -SQLITE_PRIVATE const char *sqlite3HctBtreeGetJournalname(Btree *p){ - return 0; -} - -/* -** Return one of SQLITE_TXN_NONE, SQLITE_TXN_READ, or SQLITE_TXN_WRITE -** to describe the current transaction state of Btree p. -*/ -SQLITE_PRIVATE int sqlite3HctBtreeTxnState(Btree *pBt){ - HBtree *const p = (HBtree*)pBt; - return p ? p->eTrans : SQLITE_TXN_NONE; -} - -#ifndef SQLITE_OMIT_WAL -/* -** Run a checkpoint on the Btree passed as the first argument. -** -** Return SQLITE_LOCKED if this or any other connection has an open -** transaction on the shared-cache the argument Btree is connected to. -** -** Parameter eMode is one of SQLITE_CHECKPOINT_PASSIVE, FULL or RESTART. -*/ -SQLITE_PRIVATE int sqlite3HctBtreeCheckpoint(Btree *p, int eMode, int *pnLog, int *pnCkpt){ - return SQLITE_OK; -} -#endif - -/* -** Return true if there is currently a backup running on Btree p. -*/ -SQLITE_PRIVATE int sqlite3HctBtreeIsInBackup(Btree *p){ - return 0; -} - -/* -** This function returns a pointer to a blob of memory associated with -** a single shared-btree. The memory is used by client code for its own -** purposes (for example, to store a high-level schema associated with -** the shared-btree). The btree layer manages reference counting issues. -** -** The first time this is called on a shared-btree, nBytes bytes of memory -** are allocated, zeroed, and returned to the caller. For each subsequent -** call the nBytes parameter is ignored and a pointer to the same blob -** of memory returned. -** -** If the nBytes parameter is 0 and the blob of memory has not yet been -** allocated, a null pointer is returned. If the blob has already been -** allocated, it is returned as normal. -** -** Just before the shared-btree is closed, the function passed as the -** xFree argument when the memory allocation was made is invoked on the -** blob of allocated memory. The xFree function should not call sqlite3_free() -** on the memory, the btree layer does that. -*/ -SQLITE_PRIVATE void *sqlite3HctBtreeSchema(Btree *pBt, int nBytes, void(*xFree)(void *)){ - HBtree *const p = (HBtree*)pBt; - void *pRet = 0; - if( p->pSchema ){ - pRet = p->pSchema; - }else if( nBytes>0 ){ - pRet = p->pSchema = sqlite3_malloc(nBytes); - if( pRet ){ - memset(pRet, 0, nBytes); - p->xSchemaFree = xFree; - } - } - return pRet; -} - -/* -** Return SQLITE_LOCKED_SHAREDCACHE if another user of the same shared -** btree as the argument handle holds an exclusive lock on the -** sqlite_schema table. Otherwise SQLITE_OK. -*/ -SQLITE_PRIVATE int sqlite3HctBtreeSchemaLocked(Btree *p){ - return SQLITE_OK; -} - -SQLITE_PRIVATE HctDatabase *sqlite3HctDbFind(sqlite3 *db, int iDb){ - Btree *pBt = db->aDb[iDb].pBt; - return sqlite3IsHct(pBt) ? ((HBtree*)pBt)->pHctDb : 0; -} -SQLITE_PRIVATE HctJournal *sqlite3HctJrnlFind(sqlite3 *db){ - Btree *pBt = db->aDb[0].pBt; - return sqlite3IsHct(pBt) ? ((HBtree*)pBt)->pHctJrnl : 0; -} - -#ifndef SQLITE_OMIT_SHARED_CACHE -/* -** Obtain a lock on the table whose root page is iTab. The -** lock is a write lock if isWritelock is true or a read lock -** if it is false. -*/ -SQLITE_PRIVATE int sqlite3HctBtreeLockTable(Btree *p, int iTab, u8 isWriteLock){ - int rc = SQLITE_OK; - assert( 0 ); - return rc; -} -#endif - -#ifndef SQLITE_OMIT_INCRBLOB -/* -** Argument pCur must be a cursor opened for writing on an -** INTKEY table currently pointing at a valid table entry. -** This function modifies the data stored as part of that entry. -** -** Only the data content may only be modified, it is not possible to -** change the length of the data stored. If this function is called with -** parameters that attempt to write past the end of the existing data, -** no modifications are made and SQLITE_CORRUPT is returned. -*/ -SQLITE_PRIVATE int sqlite3HctBtreePutData(BtCursor *pCur, u32 offset, u32 amt, void *z){ - HBtCursor *pCsr = (HBtCursor*)pCur; - int rc = SQLITE_OK; - - if( pCsr->wrFlag==0 ){ - rc = SQLITE_READONLY; - }else{ - rc = hctReseekBlobCsr(pCsr); - } - if( rc==SQLITE_OK ){ - u32 nData = 0; - const void *aData = sqlite3HctBtreePayloadFetch(pCur, &nData); - if( offset+amt>nData ){ - rc = SQLITE_CORRUPT_BKPT; - }else{ - u8 *aBuf = (u8*)sqlite3_malloc(nData+1); - if( aBuf ){ - BtreePayload payload; - memcpy(aBuf, aData, nData); - memcpy(&aBuf[offset], z, amt); - - memset(&payload, 0, sizeof(payload)); - payload.nKey = sqlite3HctBtreeIntegerKey(pCur); - payload.pData = (const void*)aBuf; - payload.nData = nData; - rc = sqlite3HctBtreeInsert(pCur, &payload, 0, 0); - if( rc==SQLITE_OK ){ - int dummy = 0; - rc = sqlite3HctBtreeTableMoveto(pCur, payload.nKey, 0, &dummy); - assert( dummy==0 ); - } - sqlite3_free(aBuf); - }else{ - rc = SQLITE_NOMEM; - } - } - } - - return rc; -} - -/* -** Mark this cursor as an incremental blob cursor. -*/ -SQLITE_PRIVATE void sqlite3HctBtreeIncrblobCursor(BtCursor *pCur){ - HBtCursor *pCsr = (HBtCursor*)pCur; - sqlite3HctTreeCsrIncrblob(pCsr->pHctTreeCsr); -} -#endif - -/* -** Set both the "read version" (single byte at byte offset 18) and -** "write version" (single byte at byte offset 19) fields in the database -** header to iVersion. -*/ -SQLITE_PRIVATE int sqlite3HctBtreeSetVersion(Btree *pBtree, int iVersion){ - assert( 0 ); - return SQLITE_OK; -} - -/* -** Return true if the cursor has a hint specified. This routine is -** only used from within assert() statements -*/ -SQLITE_PRIVATE int sqlite3HctBtreeCursorHasHint(BtCursor *pCsr, unsigned int mask){ - return 0; -} - -/* -** Return true if the given Btree is read-only. -*/ -SQLITE_PRIVATE int sqlite3HctBtreeIsReadonly(Btree *p){ - return 0; -} - -#if !defined(SQLITE_OMIT_SHARED_CACHE) -/* -** Return true if the Btree passed as the only argument is sharable. -*/ -SQLITE_PRIVATE int sqlite3HctBtreeSharable(Btree *p){ - assert( 0 ); - return 0; -} - -/* -** Return the number of connections to the BtShared object accessed by -** the Btree handle passed as the only argument. For private caches -** this is always 1. For shared caches it may be 1 or greater. -*/ -SQLITE_PRIVATE int sqlite3HctBtreeConnectionCount(Btree *p){ - assert( 0 ); - return 1; -} -#endif - -SQLITE_PRIVATE int sqlite3HctBtreeExclusiveLock(Btree *p){ - return SQLITE_OK; -} - -SQLITE_PRIVATE int sqlite3HctBtreeTransferRow(BtCursor *p1, BtCursor *p2, i64 iKey){ - assert( 0 ); - return SQLITE_LOCKED; -} - -SQLITE_PRIVATE int sqlite3HctLockedErr(u32 pgno, const char *zReason){ - return SQLITE_LOCKED; -} - -SQLITE_PRIVATE i64 sqlite3HctMainStats(sqlite3 *db, int iStat, const char **pzStat){ - Btree *pBt = db->aDb[0].pBt; - - i64 iRet = 0; - - if( sqlite3IsHct(pBt) ){ - HBtree *pHct = (HBtree*)pBt; - switch( iStat ){ - case 0: - *pzStat = "nretry"; - iRet = pHct->stats.nRetry; - break; - case 1: - *pzStat = "nretrykey"; - iRet = pHct->stats.nRetryKey; - break; - case 2: - *pzStat = "nkeyop"; - iRet = pHct->stats.nKeyOp; - break; - } - } - - return iRet; -} - - -#endif /* SQLITE_ENABLE_HCT */ - -/************** End of hctree.c **********************************************/ -/************** Begin file hct_tree.c ****************************************/ -/* -** 2020 September 24 -** -** The author disclaims copyright to this source code. In place of -** a legal notice, here is a blessing: -** -** May you do good and not evil. -** May you find forgiveness for yourself and forgive others. -** May you share freely, never taking more than you give. -** -************************************************************************* -*/ - - -/* #include "hctInt.h" */ -/* #include */ -/* #include */ - -#define HCT_TREE_MAX_DEPTH 60 - -typedef struct HctTreeNode HctTreeNode; -typedef struct HctTreeRoot HctTreeRoot; - -struct HctTree { - int nRootHash; - int nRootEntry; - HctTreeRoot **apRootHash; - HctTreeNode *pRollback; /* List of rollback list items */ - HctTreeNode **apStmt; /* Array of open statement transactions */ - int nStmt; /* Allocated size of apStmt[] */ - int iStmt; /* Current entry in apStmt (-1 == none) */ -}; - -/* -** pReseek: -** Set to non-NULL if the cursor was disrupted by a write. The cursor -** should be seeked to the key in node pReseek. -*/ -struct HctTreeCsr { - HctTree *pTree; - HctTreeRoot *pRoot; - u8 bPin; /* True if cursor is pinned */ - u8 eIncrblob; /* Incrblob cursor state */ - i64 iSeekRowid; /* Last rowid value seeked to */ - int iSkip; /* -ve -> skip Prev(), +ve -> skip Next() */ - int iNode; /* Current depth */ - HctTreeNode *apNode[HCT_TREE_MAX_DEPTH]; - HctTreeNode *pReseek; - HctTreeCsr *pCsrNext; /* Next item in HctTreeRoot.pCsrList list */ -}; - -#define TREE_INCRBLOB_NONE 0 -#define TREE_INCRBLOB_READY 1 -#define TREE_INCRBLOB_ABORT 2 - -struct HctTreeNode { - i64 iKey; /* 64-bit key for this node */ - u8 bBlack; /* 1 for black node, 0 for red node */ - u8 nRef; /* Number of pointers to this node */ - u8 bDelete; /* True if this is a delete key */ - int nData; /* Size of aData[] in bytes */ - u8 *aData; /* Pointer to associated data (or NULL) */ - u32 iRoot; /* Root id of table this node belongs to */ - HctTreeNode *pLeft; /* Left child in tree */ - HctTreeNode *pRight; /* Right child in tree */ - - /* Rollback list related variables */ - HctTreeNode *pPrev; /* Previous entry in rollback list */ - HctTreeNode *pClobber; /* If non-NULL, entry this one clobbered */ -}; - -static HctTreeNode hctTreeGlobalEofNode; -#define TREE_RESEEK_EOF (&hctTreeGlobalEofNode) - -/* -** pCsrCache: -** List of unused cursor objects for this table/index. -*/ -struct HctTreeRoot { - u32 iRoot; /* Name of this tree structure */ - KeyInfo *pKeyInfo; - HctTreeNode *pNode; /* Root node of tree (or NULL) */ - HctTreeRoot *pHashNext; /* Next entry in hash-chain */ - HctTreeCsr *pCsrList; /* Cursors open on this tree */ - HctTreeCsr *pCsrCache; /* Cache of unused cursor objects */ -}; - -/* -** Allocate and return nByte bytes of zeroed memory. -*/ -static void *hctMallocZero(int nByte){ - void *pNew = sqlite3_malloc(nByte); - if( pNew ){ - memset(pNew, 0, nByte); - } - return pNew; -} - -SQLITE_PRIVATE int sqlite3HctTreeNew(HctTree **ppTree){ - HctTree *pNew; - int rc = SQLITE_OK; - - pNew = (HctTree*)hctMallocZero(sizeof(HctTree)); - if( pNew ){ - pNew->apRootHash = (HctTreeRoot**)hctMallocZero(sizeof(HctTreeRoot*)*16); - pNew->nRootHash = 16; - } - if( pNew==0 || pNew->apRootHash==0 ){ - sqlite3_free(pNew); - rc = SQLITE_NOMEM_BKPT; - } - - *ppTree = pNew; - return rc; -} - -static void treeNodeUnref(HctTreeNode *pNode){ - if( pNode!=TREE_RESEEK_EOF ){ - assert( pNode->nRef>0 ); - pNode->nRef--; - if( pNode->nRef==0 ){ - sqlite3_free(pNode); - } - } -} - -static void hctTreeFreeNode(HctTreeNode *pNode){ - if( pNode ){ - hctTreeFreeNode(pNode->pLeft); - hctTreeFreeNode(pNode->pRight); - assert( pNode->nRef==1 ); - treeNodeUnref(pNode); - } -} - -SQLITE_PRIVATE void sqlite3HctTreeFree(HctTree *pTree){ - if( pTree ){ - int i; - sqlite3HctTreeRelease(pTree, 0); - assert( pTree->pRollback==0 ); - for(i=0; inRootHash; i++){ - while( pTree->apRootHash[i] ){ - HctTreeRoot *p = pTree->apRootHash[i]; - HctTreeCsr *pCsr = p->pCsrCache; - sqlite3KeyInfoUnref(p->pKeyInfo); - pTree->apRootHash[i] = p->pHashNext; - while( pCsr ){ - HctTreeCsr *pNext = pCsr->pCsrNext; - sqlite3_free(pCsr); - pCsr = pNext; - } - hctTreeFreeNode(p->pNode); - sqlite3_free(p); - } - } - sqlite3_free(pTree->apRootHash); - sqlite3_free(pTree->apStmt); - sqlite3_free(pTree); - } -} - -#ifdef SQLITE_DEBUG -/* #include */ -static void hct_print_subtree2(HctTreeNode *pNode, char *aPrefix){ - if( pNode ){ - int n = strlen(aPrefix); - fprintf(stdout, "%-8s %s k=%lld\n", - aPrefix, pNode->bBlack ? "BLACK" : "RED ", pNode->iKey - ); - aPrefix[n] = 'L'; - hct_print_subtree2(pNode->pLeft, aPrefix); - aPrefix[n] = 'R'; - hct_print_subtree2(pNode->pRight, aPrefix); - aPrefix[n] = '\0'; - } -} -static void hct_print_subtree(HctTreeNode *pNode){ - if( pNode ){ - char aPrefix[64]; - memset(aPrefix, 0, sizeof(aPrefix)); - hct_print_subtree2(pNode, aPrefix); - fflush(stdout); - } -} - -/* -** To be used as: -** -** assert( hct_tree_check(pTree) ) -** -** An assert() fails if any of the following tree properties are violated: -** -** 1. Root node must be black. -** 2. A red node may not have a red parent. -** 3. Every path from root to NULL passes through the same number -** of black nodes. -*/ -static void hct_tree_check_subtree(HctTreeNode *pNode, int nDepth, int nExpect){ - if( pNode ){ - int nThisDepth = nDepth; - if( pNode->bBlack ){ - nThisDepth++; - }else{ - /* Property 2 - red parents have black children */ - assert( pNode->pLeft==0 || pNode->pLeft->bBlack ); - assert( pNode->pRight==0 || pNode->pRight->bBlack ); - } - - /* Property 3 - Every path from root to NULL has same black-depth */ - assert( (pNode->pLeft && pNode->pRight) || nThisDepth==nExpect ); - - hct_tree_check_subtree(pNode->pLeft, nThisDepth, nExpect); - hct_tree_check_subtree(pNode->pRight, nThisDepth, nExpect); - } - hct_print_subtree(0); /* no-op - just to avoid a warning */ -} -static int hct_tree_check(HctTreeRoot *pRoot){ - if( 0 && pRoot->pNode ){ - int nBlack = 0; - HctTreeNode *pNode = 0; - assert( pRoot->pNode->bBlack ); /* 1. Root is black */ - - /* Calculate the expected number of black nodes between root and NULL. */ - for(pNode=pRoot->pNode; pNode; pNode=pNode->pLeft){ - if( pNode->bBlack ) nBlack++; - } - - hct_tree_check_subtree(pRoot->pNode, 0, nBlack); - } - return 1; -} -#endif - -static HctTreeRoot *hctTreeFindRoot(HctTree *pTree, u32 iRoot){ - HctTreeRoot *pNew = 0; - - /* Search the hash table for an existing root. Return immediately if - ** one is found. */ - HctTreeRoot *pRoot; - for(pRoot = pTree->apRootHash[iRoot % pTree->nRootHash]; - pRoot; - pRoot=pRoot->pHashNext - ){ - if( pRoot->iRoot==iRoot ) return pRoot; - } - - /* If the hash table needs to grow, do that now */ - if( (pTree->nRootEntry+1)*2 > pTree->nRootHash ){ - int ii; - int nOld = pTree->nRootHash; - int nNew = nOld ? nOld*2 : 16; - HctTreeRoot **apNew = (HctTreeRoot**)sqlite3_realloc( - pTree->apRootHash, nNew*sizeof(HctTreeRoot*) - ); - if( apNew==0 ) return 0; - memset(&apNew[nOld], 0, (nNew-nOld)*sizeof(HctTreeRoot*)); - - for(ii=0; iipHashNext; - int iHash = p->iRoot % nNew; - p->pHashNext = apNew[iHash]; - apNew[iHash] = p; - p = pNext; - } - } - - pTree->apRootHash = apNew; - pTree->nRootHash = nNew; - } - - /* Allocate a new root and add it to the hash table */ - pNew = hctMallocZero(sizeof(HctTreeRoot)); - if( pNew ){ - int iHash = iRoot % pTree->nRootHash; - pNew->iRoot = iRoot; - pNew->pHashNext = pTree->apRootHash[iHash]; - pTree->apRootHash[iHash] = pNew; - pTree->nRootEntry++; - } - - return pNew; -} - -static void leftRotate(HctTreeNode **pp){ - HctTreeNode *pG = *pp; - HctTreeNode *pRight = pG->pRight; - - pG->pRight = pRight->pLeft; - pRight->pLeft = pG; - *pp = pRight; -} - -static void rightRotate(HctTreeNode **pp){ - HctTreeNode *pG = *pp; - HctTreeNode *pLeft = pG->pLeft; - - pG->pLeft = pLeft->pRight; - pLeft->pRight = pG; - *pp = pLeft; -} - -static HctTreeNode **hctTreeFindPointer(HctTreeCsr *pCsr, int iNode){ - HctTreeNode **pp; - if( iNode==0 ){ - assert( pCsr->apNode[0]==pCsr->pRoot->pNode ); - pp = &pCsr->pRoot->pNode; - }else{ - HctTreeNode *pParent = pCsr->apNode[iNode-1]; - if( pParent->pLeft==pCsr->apNode[iNode] ){ - pp = &pParent->pLeft; - }else{ - assert( pParent->pRight==pCsr->apNode[iNode] ); - pp = &pParent->pRight; - } - } - return pp; -} - -static void hctTreeFixInsert( - HctTree *pTree, - HctTreeCsr *pCsr, - HctTreeNode *pX -){ - HctTreeNode *pP = pCsr->apNode[pCsr->iNode]; - HctTreeNode *pG = pCsr->apNode[pCsr->iNode-1]; - HctTreeNode *pU; - - assert( pCsr->iNode>=1 ); - - if( pG->pLeft==pP ){ - pU = pG->pRight; - }else{ - pU = pG->pLeft; - } - - if( pU && pU->bBlack==0 ){ - /* Uncle of X is red */ - pP->bBlack = 1; - pU->bBlack = 1; - if( pCsr->iNode>1 ){ - pG->bBlack = 0; - if( pCsr->apNode[pCsr->iNode-2]->bBlack==0 ){ - pCsr->iNode -= 2; - hctTreeFixInsert(pTree, pCsr, pG); - } - } - }else{ - /* Uncle of X is black */ - int iCase = ((pG->pRight==pP) ? 2 : 0) + (pP->pRight==pX ? 1 : 0); - HctTreeNode **ppG = hctTreeFindPointer(pCsr, pCsr->iNode-1); - - switch( iCase ){ - case 1: /* left/right */ - leftRotate(&pG->pLeft); - pP = pX; - /* fall-through */ - case 0: /* left/left */ - rightRotate(ppG); - pP->bBlack = 1; - pG->bBlack = 0; - break; - case 2: /* right/left */ - rightRotate(&pG->pRight); - pP = pX; - /* fall-through */ - case 3: /* right/right */ - leftRotate(ppG); - pP->bBlack = 1; - pG->bBlack = 0; - break; - default: - assert( 0 ); - } - } -} - -static int hctSaveCursors( - HctTreeRoot *pRoot, - HctTreeCsr *pExcept, - int bAbortBlob, - i64 iRowid -){ - int rc = SQLITE_OK; - HctTreeCsr *pCsr; - for(pCsr=pRoot->pCsrList; pCsr; pCsr=pCsr->pCsrNext){ - if( pCsr!=pExcept && pCsr->pReseek==0 ){ - if( pCsr->iNode>=0 ){ - if( pCsr->bPin ){ - return SQLITE_CONSTRAINT_PINNED; - } - pCsr->pReseek = pCsr->apNode[pCsr->iNode]; - pCsr->pReseek->nRef++; - }else{ - pCsr->pReseek = TREE_RESEEK_EOF; - } - } - if( bAbortBlob - && pCsr->eIncrblob==TREE_INCRBLOB_READY - && pCsr->iSeekRowid==iRowid - ){ - pCsr->eIncrblob = TREE_INCRBLOB_ABORT; - } - } - return rc; -} - - -static int hctTreeCsrSeekInt( - HctTreeCsr *pCsr, - i64 iKey, - int *pRes -){ - int rc = SQLITE_OK; /* Return code */ - int res = -1; /* Value to return via *pRes */ - HctTreeNode *pNode = pCsr->pRoot->pNode; - pCsr->iNode = -1; - while( pNode ){ - i64 iNodeKey = pNode->iKey; - pCsr->apNode[++pCsr->iNode] = pNode; - if( iNodeKey==iKey ){ - res = 0; - break; - } - if( iKeypLeft; - }else{ - res = -1; - pNode = pNode->pRight; - } - assert( pCsr->iNodepRoot->pNode; - pCsr->iNode = -1; - while( pNode ){ - pCsr->apNode[++pCsr->iNode] = pNode; - res = sqlite3VdbeRecordCompare(pNode->nData, pNode->aData, pRec); - if( res==0 ) break; - if( res>0 ){ - /* pRec is smaller than this node's key. Go left. */ - pNode = pNode->pLeft; - }else{ - /* pRec is larger than this node's key. Go left. */ - pNode = pNode->pRight; - } - assert( pCsr->iNodepRoot->pKeyInfo==0 ){ - pCsr->pRoot->pKeyInfo = sqlite3KeyInfoRef(pRec->pKeyInfo); - } - - if( pRes ) *pRes = res; - return rc; -} - -static int hctTreeCsrSeekPacked( - HctTreeCsr *pCsr, - int nKey, - const u8 *aKey, - int *pRes -){ - int rc; - KeyInfo *pKeyInfo = pCsr->pRoot->pKeyInfo; - UnpackedRecord *pRec; - - assert( pKeyInfo ); - pRec = sqlite3VdbeAllocUnpackedRecord(pKeyInfo); - if( pRec ){ - sqlite3VdbeRecordUnpack(pKeyInfo, nKey, aKey, pRec); - rc = hctTreeCsrSeekUnpacked(pCsr, pRec, pRes); - sqlite3DbFree(pKeyInfo->db, pRec); - }else{ - rc = SQLITE_NOMEM; - } - return rc; -} - - -static int hctRestoreCursor(HctTreeCsr *pCsr, int *pRes){ - int rc = SQLITE_OK; - HctTreeNode *pReseek = pCsr->pReseek; - if( pReseek ){ - if( pReseek!=TREE_RESEEK_EOF ){ - if( pCsr->pRoot->pKeyInfo ){ - rc = hctTreeCsrSeekPacked(pCsr, pReseek->nData, pReseek->aData, pRes); - }else{ - rc = hctTreeCsrSeekInt(pCsr, pReseek->iKey, pRes); - } - treeNodeUnref(pReseek); - } - pCsr->pReseek = 0; - }else{ - *pRes = 0; - } - return rc; -} - -static void hctRestoreDiscard(HctTreeCsr *pCsr){ - if( pCsr->pReseek ){ - treeNodeUnref(pCsr->pReseek); - pCsr->pReseek = 0; - pCsr->iNode = -1; - } - pCsr->iSkip = 0; -} - -static int treeInsertNode( - HctTree *pTree, - int bRollback, - UnpackedRecord *pKey, - i64 iKey, - HctTreeNode *pNew -){ - HctTreeRoot *pRoot = hctTreeFindRoot(pTree, pNew->iRoot); - UnpackedRecord *pFree = 0; - int res = 0; - HctTreeCsr csr; - memset(&csr, 0, sizeof(csr)); - csr.pRoot = pRoot; - csr.pTree = pTree; - - /* Special case. If this insert is to effect a rollback on an index - ** tree, pKey will still be NULL. In this case construct a pKey value - ** with which to do the seek. */ - if( pRoot->pKeyInfo && pKey==0 ){ - assert( bRollback ); - pFree = sqlite3VdbeAllocUnpackedRecord(pRoot->pKeyInfo); - if( pFree==0 ){ - return SQLITE_NOMEM; - } - sqlite3VdbeRecordUnpack(pRoot->pKeyInfo, pNew->nData, pNew->aData, pFree); - pKey = pFree; - } - - sqlite3HctTreeCsrSeek(&csr, pKey, iKey, &res); - if( csr.iNode<0 ){ - assert( pRoot->pNode==0 ); - pRoot->pNode = pNew; - }else{ - HctTreeNode *pNode = csr.apNode[csr.iNode]; - if( res==0 ){ - pNew->pLeft = pNode->pLeft; - pNew->pRight = pNode->pRight; - pNew->bBlack = pNode->bBlack; - *(hctTreeFindPointer(&csr, csr.iNode)) = pNew; - if( bRollback==0 && pTree->iStmt>=0 ){ - pNew->pClobber = pNode; - assert( pNew->iKey==pNode->iKey ); - }else{ - treeNodeUnref(pNode); - } - }else{ - if( res<0 ){ - assert( pNode->pRight==0 ); - pNode->pRight = pNew; - }else{ - assert( pNode->pLeft==0 ); - pNode->pLeft = pNew; - } - if( pNode->bBlack==0 ){ - hctTreeFixInsert(pTree, &csr, pNew); - } - } - } - pNew->nRef++; - - /* Root node is always black */ - pRoot->pNode->bBlack = 1; - assert( hct_tree_check(pRoot) ); - if( pFree ){ - sqlite3DbFree(pFree->pKeyInfo->db, pFree); - } - return SQLITE_OK; -} - -static HctTreeNode *treeNewNode2( - HctTree *pTree, - HctTreeRoot *pRoot, - i64 iKey, - int bDelete, - int nData, - const u8 *aData, - int nZero -){ - HctTreeNode *pNew; - - pNew = (HctTreeNode*)hctMallocZero(sizeof(HctTreeNode) + nData + nZero); - if( pNew ){ - pNew->iKey = iKey; - pNew->nData = nData + nZero; - pNew->iRoot = pRoot->iRoot; - pNew->bDelete = bDelete; - if( (nData+nZero)>0 ){ - pNew->aData = (u8*)&pNew[1]; - memcpy(pNew->aData, aData, nData); - } - - if( pTree->iStmt>0 ){ - pNew->pPrev = pTree->pRollback; - pTree->pRollback = pNew; - pNew->nRef = 1; - } - } - - return pNew; -} - -/* -** Allocate a new tree node. Link it into the rollback list. -*/ -static HctTreeNode *treeNewNode( - HctTreeCsr *pCsr, - i64 iKey, - int bDelete, - int nData, - const u8 *aData, - int nZero -){ - return treeNewNode2( - pCsr->pTree, pCsr->pRoot, iKey, bDelete, nData, aData, nZero - ); -} - -static int treeInsert( - HctTreeCsr *pCsr, - UnpackedRecord *pKey, - i64 iKey, - int bDelete, - int nData, - const u8 *aData, - int nZero -){ - HctTree *pTree = pCsr->pTree; - HctTreeNode *pNew; - int rc = SQLITE_OK; - - assert( bDelete==0 || pKey || (aData==0 && nData==0 && nZero==0) ); - - pNew = treeNewNode(pCsr, iKey, bDelete, nData, aData, nZero); - if( pNew==0 ){ - rc = SQLITE_NOMEM; - }else{ - int nSave = 0; - int bPinSave = pCsr->bPin; - if( pKey ){ - nSave = pKey->nField; - sqlite3HctDbRecordTrim(pKey); - } - pCsr->bPin = 0; - rc = hctSaveCursors(pCsr->pRoot, 0, (pCsr->eIncrblob==0), iKey); - if( rc==SQLITE_OK && bPinSave ){ - int dummy; - rc = hctRestoreCursor(pCsr, &dummy); - } - pCsr->bPin = bPinSave; - if( rc==SQLITE_OK ){ - rc = treeInsertNode(pTree, pTree->iStmt<=0, pKey, iKey, pNew); - } - if( pKey ) pKey->nField = nSave; - } - - return rc; -} - -SQLITE_PRIVATE int sqlite3HctTreeUpdateMeta( - HctTree *pTree, - const u8 *aMeta, /* Meta data */ - int nMeta /* Size of meta data in bytes */ -){ - HctTreeRoot *pRoot = hctTreeFindRoot(pTree, 2); - HctTreeNode *pNew = treeNewNode2(pTree, pRoot, 0, 0, nMeta, aMeta, 0); - treeInsertNode(pTree, pTree->iStmt<=0, 0, 0, pNew); - return SQLITE_OK; -} - -/* -** This function is like sqlite3HctTreeInsert(), except that: -** -** 1) the new key is always larger than any existing key in the -** tree, and -** -** 2) unless the tree is empty, cursor pCsr is guaranteed to point to the -** largest record in it, and -** -** 3) before returning, this function leaves cursor pCsr pointing to the -** new entry. -*/ -SQLITE_PRIVATE int sqlite3HctTreeAppend( - HctTreeCsr *pCsr, - KeyInfo *pKeyInfo, - i64 iKey, - int nData, - const u8 *aData, - int nZero -){ - HctTreeRoot *pRoot = pCsr->pRoot; - int rc = SQLITE_OK; - - assert( pCsr->pTree->iStmt>0 ); - - if( pKeyInfo && pRoot->pKeyInfo==0 ){ - pRoot->pKeyInfo = sqlite3KeyInfoRef(pKeyInfo); - } - - rc = hctSaveCursors(pRoot, pCsr, pCsr->eIncrblob==0, iKey); - if( rc==SQLITE_OK ){ - HctTreeNode *pNew = treeNewNode(pCsr, iKey, 0, nData, aData, nZero); - if( pNew==0 ){ - rc = SQLITE_NOMEM_BKPT; - }else{ - pNew->nRef++; - if( pRoot->pNode==0 ){ - pRoot->pNode = pNew; - pCsr->apNode[0] = pNew; - pCsr->iNode = 0; - }else{ - HctTreeNode *pParent = pCsr->apNode[pCsr->iNode]; - - assert( pCsr->iNode>=0 ); - assert( pParent->pRight==0 ); - pParent->pRight = pNew; - - if( pParent->bBlack==0 ){ - hctTreeFixInsert(pCsr->pTree, pCsr, pNew); - sqlite3HctTreeCsrLast(pCsr); - }else{ - pCsr->apNode[++pCsr->iNode] = pNew; - } - } - - /* Root node is always black */ - pRoot->pNode->bBlack = 1; - assert( hct_tree_check(pRoot) ); - } - } - - return rc; -} - -#if 0 -static void debug_write_op( - HctTreeCsr *pCsr, - const char *zOp, - UnpackedRecord *pKey, - i64 iKey, - int nData, - const u8 *aData -){ - printf("%s(%d) ", zOp, (int)pCsr->pRoot->iRoot); - if( pKey ){ - char *z = sqlite3HctDbRecordToText(0, aData, nData); - printf("[%s]\n", z); - }else{ - printf("%lld\n", iKey); - } - fflush(stdout); -} -#else -# define debug_write_op(r,s,w,x,y,z) -#endif - -SQLITE_PRIVATE int sqlite3HctTreeInsert( - HctTreeCsr *pCsr, - UnpackedRecord *pKey, - i64 iKey, - int nData, - const u8 *aData, - int nZero -){ - assert( pKey==0 || iKey==0 ); - debug_write_op(pCsr, "INSERT", pKey, iKey, nData, aData); - return treeInsert(pCsr, pKey, iKey, 0, nData, aData, nZero); -} - -SQLITE_PRIVATE int sqlite3HctTreeDeleteKey( - HctTreeCsr *pCsr, - UnpackedRecord *pKey, - i64 iKey, - int nData, - const u8 *aData -){ - debug_write_op(pCsr, "DELETE", pKey, iKey, nData, aData); - return treeInsert(pCsr, pKey, iKey, 1, nData, aData, 0); -} - -/* -** Cursor pCsr currently points at a double-black node. Fix it. -*/ -static void hctTreeFixDelete(HctTreeCsr *pCsr){ - assert( pCsr->iNode>0 || pCsr->pRoot->pNode->bBlack ); - if( pCsr->iNode>0 ){ - HctTreeNode *pDB; /* The double-black */ - HctTreeNode *pP; /* Parent of pDB */ - HctTreeNode *pS; /* Sibling of pDB */ - - pDB = pCsr->apNode[pCsr->iNode]; - pP = pCsr->apNode[pCsr->iNode-1]; - pS = pP->pLeft==pDB ? pP->pRight : pP->pLeft; - - if( pS->bBlack ){ - HctTreeNode *pR = 0; - if( pS->pLeft && pS->pLeft->bBlack==0 ){ - pR = pS->pLeft; - }else if( pS->pRight && pS->pRight->bBlack==0 ){ - pR = pS->pRight; - } - - if( pR ){ - /* Sibling is black, pR is a red child */ - HctTreeNode **ppP = hctTreeFindPointer(pCsr, pCsr->iNode-1); - int iCase = ((pP->pRight==pS) ? 2 : 0) + (pS->pRight==pR ? 1 : 0); - switch( iCase ){ - case 0: /* Left/Left */ - pR->bBlack = 1; - pS->bBlack = pP->bBlack; - rightRotate(ppP); - pP->bBlack = 1; - break; - case 1: /* Left/Right */ - leftRotate(&pP->pLeft); - rightRotate(ppP); - pR->bBlack = pP->bBlack; - pP->bBlack = 1; - break; - case 2: /* Right/Left */ - rightRotate(&pP->pRight); - leftRotate(ppP); - pR->bBlack = pP->bBlack; - pP->bBlack = 1; - break; - case 3: /* Right/Right */ - pR->bBlack = 1; - pS->bBlack = pP->bBlack; - leftRotate(ppP); - pP->bBlack = 1; - break; - } - }else{ - /* Sibling is black, with no red children. */ - pS->bBlack = 0; - if( pP->bBlack ){ - pCsr->iNode--; - hctTreeFixDelete(pCsr); - }else{ - pP->bBlack = 1; - } - } - }else{ - HctTreeNode **ppP = hctTreeFindPointer(pCsr, pCsr->iNode-1); - - /* Sibling is red. Because it is the red sibling of a double-black, it - ** must have children on both sides. And because it is red, both those - ** children must be black. */ - assert( pS->pLeft->bBlack && pS->pRight->bBlack ); - - if( pS==pP->pLeft ){ - rightRotate(ppP); - }else{ - leftRotate(ppP); - } - pS->bBlack = 1; - pP->bBlack = 0; - pCsr->apNode[pCsr->iNode-1] = pS; - pCsr->apNode[pCsr->iNode] = pP; - pCsr->apNode[pCsr->iNode+1] = pDB; - pCsr->iNode++; - hctTreeFixDelete(pCsr); - } - } -} - -static int treeDelete(HctTreeCsr *pCsr, int bRollback){ - HctTreeNode *pDel = pCsr->apNode[pCsr->iNode]; - HctTreeNode *pU = 0; - HctTreeNode *pReseek = 0; - int rc; - - /* Save the positions of all cursors on this table */ - rc = hctSaveCursors(pCsr->pRoot, pCsr, 0, 0); - if( rc ) return rc; - - assert( pCsr->pReseek==0 ); - assert( pCsr->iNode>=0 ); -#if 0 - fprintf(stdout, "deleting %lld\n", iKey); - hct_print_subtree(pCsr->pRoot->pNode); -#endif - - if( bRollback==0 ){ - HctTreeNode *pEntry = hctMallocZero(sizeof(*pEntry)); - if( pEntry==0 ) return SQLITE_NOMEM; - pEntry->iKey = pDel->iKey; - pEntry->pClobber = pDel; - pEntry->pPrev = pCsr->pTree->pRollback; - pEntry->nRef = 1; - pEntry->iRoot = pCsr->pRoot->iRoot; - pDel->nRef++; - pCsr->pTree->pRollback = pEntry; - pReseek = pDel; - pReseek->nRef++; - } - - /* If node pDel has two children, swap it with its immediate successor - ** in the tree. This node is guaranteed to have pNode->pLeft==0. */ - if( pDel->pLeft && pDel->pRight ){ - int iDel = pCsr->iNode; - HctTreeNode *pSwap; - sqlite3HctTreeCsrNext(pCsr); - pSwap = pCsr->apNode[pCsr->iNode]; - SWAP(HctTreeNode*, pSwap->pLeft, pDel->pLeft); - SWAP(HctTreeNode*, pSwap->pRight, pDel->pRight); - SWAP(int, pSwap->bBlack, pDel->bBlack); - *hctTreeFindPointer(pCsr, iDel) = pSwap; - pCsr->apNode[iDel] = pSwap; - *hctTreeFindPointer(pCsr, pCsr->iNode) = pDel; - pCsr->apNode[pCsr->iNode] = pDel; - - assert( pDel->pLeft==0 ); - assert( hct_tree_check(pCsr->pRoot) ); - } - - assert( pCsr->apNode[pCsr->iNode]==pDel ); - assert( pDel->pLeft==0 || pDel->pRight==0 ); - - pU = pDel->pLeft ? pDel->pLeft : pDel->pRight; - *hctTreeFindPointer(pCsr, pCsr->iNode) = pU; - if( pDel->bBlack==0 || (pU && pU->bBlack==0) || pCsr->pRoot->pNode==0 ){ - /* Simple case. If either pDel or its child pU are red, then - ** replacing the pDel with the child and ensuring the child is - ** colored black is enough. No change in black-height for the - ** children of pU. */ - if( pU ) pU->bBlack = 1; - }else{ - pCsr->apNode[pCsr->iNode] = pU; - hctTreeFixDelete(pCsr); - } - - treeNodeUnref(pDel); - assert( pCsr->pReseek==0 ); - pCsr->pReseek = pReseek; - -#if 0 - fprintf(stdout, "finished deleting %lld\n", iKey); - hct_print_subtree(pCsr->pRoot->pNode); -#endif - assert( hct_tree_check(pCsr->pRoot) ); - return SQLITE_OK; -} - -SQLITE_PRIVATE int sqlite3HctTreeDelete(HctTreeCsr *pCsr){ - int rc; - assert( pCsr->pReseek==0 ); - rc = treeDelete(pCsr, 0); - return rc; -} - -SQLITE_PRIVATE int sqlite3HctTreeBegin(HctTree *pTree, int iStmt){ - if( iStmt>pTree->iStmt ){ - int ii; - if( pTree->nStmt<=iStmt ){ - int nNew = iStmt+16; - HctTreeNode **apNew = (HctTreeNode**)hctMallocZero(nNew*sizeof(*apNew)); - if( apNew==0 ) return SQLITE_NOMEM; - if( pTree->apStmt ){ - memcpy(apNew, pTree->apStmt, pTree->nStmt*sizeof(*apNew)); - sqlite3_free(pTree->apStmt); - } - pTree->apStmt = apNew; - pTree->nStmt = nNew; - } - for(ii=pTree->iStmt+1; ii<=iStmt; ii++){ - pTree->apStmt[ii] = pTree->pRollback; - } - pTree->iStmt = iStmt; - } - return SQLITE_OK; -} - -SQLITE_PRIVATE int sqlite3HctTreeRelease(HctTree *pTree, int iStmt){ - if( iStmtiStmt ){ - if( iStmt==0 ){ - HctTreeNode *pStop = pTree->apStmt[iStmt+1]; - HctTreeNode *pNode; - HctTreeNode *pPrev; - for(pNode=pTree->pRollback; pNode!=pStop; pNode=pPrev){ - pPrev = pNode->pPrev; - if( pNode->pClobber ) treeNodeUnref(pNode->pClobber); - treeNodeUnref(pNode); - } - pTree->pRollback = pStop; - } - pTree->iStmt = iStmt; - } - return SQLITE_OK; -} - -SQLITE_PRIVATE int sqlite3HctTreeRollbackTo(HctTree *pTree, int iStmt){ - int rc = SQLITE_OK; - if( iStmt<=pTree->iStmt ){ - HctTreeNode *pStop = pTree->apStmt[iStmt]; - HctTreeNode *pNode; - HctTreeNode *pPrev; - for(pNode=pTree->pRollback; pNode!=pStop; pNode=pPrev){ - KeyInfo *pKeyInfo = 0; - UnpackedRecord *pRec = 0; - HctTreeRoot *pRoot = hctTreeFindRoot(pTree, pNode->iRoot); - - pPrev = pNode->pPrev; - - if( (pKeyInfo = pRoot->pKeyInfo) ){ - pRec = sqlite3VdbeAllocUnpackedRecord(pKeyInfo); - if( pRec==0 ){ - rc = SQLITE_NOMEM; - pStop = pNode; - break; - } - sqlite3VdbeRecordUnpack(pKeyInfo, pNode->nData, pNode->aData, pRec); - } - - if( pNode->pClobber ){ - HctTreeNode *pClobber = pNode->pClobber; - assert( pNode->iKey==pNode->pClobber->iKey ); - pClobber->pLeft = pClobber->pRight = 0; - pClobber->bBlack = 0; - if( (rc = hctSaveCursors(pRoot, 0, 0, 0)) - || (rc = treeInsertNode(pTree, 1, pRec, pNode->iKey, pClobber)) ){ - pStop = pNode; - break; - } - treeNodeUnref(pClobber); - }else{ - HctTreeCsr csr; - int res; - memset(&csr, 0, sizeof(csr)); - csr.pRoot = pRoot; - csr.pTree = pTree; - sqlite3HctTreeCsrSeek(&csr, pRec, pNode->iKey, &res); - if( res==0 ) treeDelete(&csr, 1); - } - if( pRec ) sqlite3DbFree(pKeyInfo->db, pRec); - treeNodeUnref(pNode); - } - pTree->pRollback = pStop; - pTree->iStmt = iStmt; - } - return rc; -} - -/* -** Clear the contents of the entire tree. -*/ -SQLITE_PRIVATE void sqlite3HctTreeClear(HctTree *pTree){ - HctTreeRoot **pp; - HctTreeRoot **pEnd = &pTree->apRootHash[pTree->nRootHash]; - for(pp=pTree->apRootHash; pppHashNext){ - hctSaveCursors(p, 0, 0, 0); - hctTreeFreeNode(p->pNode); - p->pNode = 0; - sqlite3KeyInfoUnref(p->pKeyInfo); - p->pKeyInfo = 0; - } - } -} - -SQLITE_PRIVATE int sqlite3HctTreeClearOne(HctTree *pTree, u32 iRoot, i64 *pnRow){ - HctTreeCsr csr; - int rc = SQLITE_OK; - int nRow = 0; - - memset(&csr, 0, sizeof(csr)); - csr.pTree = pTree; - csr.pRoot = hctTreeFindRoot(pTree, iRoot); - csr.iNode = -1; - rc = hctSaveCursors(csr.pRoot, 0, 0, 0); - if( rc ) return rc; - while( rc==SQLITE_OK && csr.pRoot->pNode ){ - sqlite3HctTreeCsrFirst(&csr); - rc = sqlite3HctTreeDelete(&csr); - nRow++; - hctRestoreDiscard(&csr); - } - if( pnRow ) *pnRow = nRow; - return rc; -} - -SQLITE_PRIVATE int sqlite3HctTreeCsrOpen(HctTree *pTree, u32 iRoot, HctTreeCsr **ppCsr){ - int rc = SQLITE_OK; - HctTreeCsr *pNew = 0; - HctTreeRoot *pRoot = hctTreeFindRoot(pTree, iRoot); - if( pRoot==0 ){ - rc = SQLITE_NOMEM; - }else{ - if( pRoot->pCsrCache ){ - pNew = pRoot->pCsrCache; - pRoot->pCsrCache = pNew->pCsrNext; - pNew->pCsrNext = 0; - assert( pNew->pTree==pTree ); - assert( pNew->pRoot==pRoot ); - assert( pNew->iNode==-1 ); - assert( pNew->eIncrblob==TREE_INCRBLOB_NONE ); - }else{ - pNew = (HctTreeCsr*)hctMallocZero(sizeof(HctTreeCsr)); - if( pNew==0 ){ - rc = SQLITE_NOMEM; - }else{ - pNew->pTree = pTree; - pNew->pRoot = pRoot; - pNew->iNode = -1; - } - } - pNew->pCsrNext = pRoot->pCsrList; - pRoot->pCsrList = pNew; - } - *ppCsr = pNew; - return rc; -} - -SQLITE_PRIVATE int sqlite3HctTreeCsrClose(HctTreeCsr *pCsr){ - if( pCsr ){ - HctTreeCsr **pp; - for(pp=&pCsr->pRoot->pCsrList; *pp!=pCsr; pp=&(*pp)->pCsrNext); - *pp = pCsr->pCsrNext; - if( pCsr->pReseek ){ - treeNodeUnref(pCsr->pReseek); - pCsr->pReseek = 0; - } - pCsr->pCsrNext = pCsr->pRoot->pCsrCache; - pCsr->pRoot->pCsrCache = pCsr; - pCsr->iSkip = 0; - pCsr->bPin = 0; - pCsr->iNode = -1; - pCsr->eIncrblob = TREE_INCRBLOB_NONE; - } - return SQLITE_OK; -} - -/* -** An integer is written into *pRes which is the result of -** comparing the key with the entry to which the cursor is -** pointing. The meaning of the integer written into -** *pRes is as follows: -** -** *pRes<0 The cursor is left pointing at an entry that -** is smaller than intKey/pIdxKey. Or, the table is empty -** and the cursor is therefore left point to nothing. -** -** *pRes==0 The cursor is left pointing at an entry that -** exactly matches intKey/pIdxKey. -** -** *pRes>0 The cursor is left pointing at an entry that -** is larger than intKey/pIdxKey. -*/ -SQLITE_PRIVATE int sqlite3HctTreeCsrSeek( - HctTreeCsr *pCsr, - UnpackedRecord *pRec, - i64 iKey, - int *pRes -){ - hctRestoreDiscard(pCsr); - pCsr->iSeekRowid = iKey; - if( pRec ){ - return hctTreeCsrSeekUnpacked(pCsr, pRec, pRes); - } - return hctTreeCsrSeekInt(pCsr, iKey, pRes); -} - -/* -** Move the cursor to EOF. -*/ -SQLITE_PRIVATE void sqlite3HctTreeCsrClear(HctTreeCsr *pCsr){ - hctRestoreDiscard(pCsr); - pCsr->iNode = -1; -} - -SQLITE_PRIVATE int sqlite3HctTreeCsrNext(HctTreeCsr *pCsr){ - int iNode; - int res = 0; - - assert( pCsr->pReseek==0 || pCsr->iSkip==0 ); - if( pCsr->iSkip>0 ){ - pCsr->iSkip = 0; - return SQLITE_OK; - } - if( hctRestoreCursor(pCsr, &res) ) return SQLITE_NOMEM; - if( res>0 ) return SQLITE_OK; - - iNode = pCsr->iNode; - if( iNode>=0 ){ - HctTreeNode *pNode = pCsr->apNode[iNode]; - assert( iNode>=0 ); - if( pNode->pRight ){ - pNode = pNode->pRight; - while( pNode ){ - iNode++; - pCsr->apNode[iNode] = pNode; - pNode = pNode->pLeft; - } - }else{ - while( (--iNode)>=0 ){ - HctTreeNode *pParent = pCsr->apNode[iNode]; - assert( pNode==pParent->pLeft || pNode==pParent->pRight ); - if( pNode==pParent->pLeft ) break; - pNode = pParent; - } - } - pCsr->iNode = iNode; - } - return SQLITE_OK; -} - -SQLITE_PRIVATE int sqlite3HctTreeCsrPrev(HctTreeCsr *pCsr){ - int iNode; - int res = 0; - - assert( pCsr->pReseek==0 || pCsr->iSkip==0 ); - if( pCsr->iSkip<0 ){ - pCsr->iSkip = 0; - return SQLITE_OK; - } - if( hctRestoreCursor(pCsr, &res) ) return SQLITE_NOMEM; - if( res<0 ) return SQLITE_OK; - - iNode = pCsr->iNode; - if( iNode>=0 ){ - HctTreeNode *pNode = pCsr->apNode[iNode]; - assert( iNode>=0 ); - if( pNode->pLeft ){ - pNode = pNode->pLeft; - while( pNode ){ - iNode++; - pCsr->apNode[iNode] = pNode; - pNode = pNode->pRight; - } - }else{ - while( (--iNode)>=0 ){ - HctTreeNode *pParent = pCsr->apNode[iNode]; - assert( pNode==pParent->pLeft || pNode==pParent->pRight ); - if( pNode==pParent->pRight ) break; - pNode = pParent; - } - } - pCsr->iNode = iNode; - } - return SQLITE_OK; -} - -/* -** Return false if cursor points to a valid entry, or true otherwise. -*/ -SQLITE_PRIVATE int sqlite3HctTreeCsrEof(HctTreeCsr *pCsr){ - return (pCsr->iNode<0); -} - -static void hctTreeCursorEnd(HctTreeCsr *pCsr, int bLast){ - int iNode = -1; - HctTreeNode *pNode = pCsr->pRoot->pNode; - - hctRestoreDiscard(pCsr); - while( pNode ){ - iNode++; - assert( iNodeapNode[iNode] = pNode; - pNode = (bLast ? pNode->pRight : pNode->pLeft); - } - pCsr->iNode = iNode; -} - -SQLITE_PRIVATE int sqlite3HctTreeCsrFirst(HctTreeCsr *pCsr){ - hctTreeCursorEnd(pCsr, 0); - return SQLITE_OK; -} - -SQLITE_PRIVATE int sqlite3HctTreeCsrLast(HctTreeCsr *pCsr){ - hctTreeCursorEnd(pCsr, 1); - return SQLITE_OK; -} - -SQLITE_PRIVATE int sqlite3HctTreeCsrKey(HctTreeCsr *pCsr, i64 *piKey){ - assert( pCsr->iNode>=0 ); - assert( pCsr->pReseek==0 ); - *piKey = pCsr->apNode[pCsr->iNode]->iKey; - return SQLITE_OK; -} - -SQLITE_PRIVATE int sqlite3HctTreeCsrData(HctTreeCsr *pCsr, int *pnData, const u8 **paData){ - HctTreeNode *pNode = pCsr->apNode[pCsr->iNode]; - assert( pCsr->pReseek==0 ); - assert( pCsr->iNode>=0 ); - *pnData = pNode->nData; - if( paData ) *paData = pNode->aData; - return SQLITE_OK; -} - -/* -** Return non-zero if the cursor is pointing to a delete key. Return zero -** if it is pointing to an insert or to EOF. -*/ -SQLITE_PRIVATE int sqlite3HctTreeCsrIsDelete(HctTreeCsr *pCsr){ - assert( pCsr->pReseek==0 ); - return (pCsr->iNode>=0 && pCsr->apNode[pCsr->iNode]->bDelete); -} - -SQLITE_PRIVATE void sqlite3HctTreeCsrPin(HctTreeCsr *pCsr){ - pCsr->bPin = 1; -} -SQLITE_PRIVATE void sqlite3HctTreeCsrUnpin(HctTreeCsr *pCsr){ - pCsr->bPin = 0; -} - -SQLITE_PRIVATE void sqlite3HctTreeCsrIncrblob(HctTreeCsr *pCsr){ - if( pCsr->eIncrblob==TREE_INCRBLOB_NONE ){ - pCsr->eIncrblob = TREE_INCRBLOB_READY; - } -} - -SQLITE_PRIVATE int sqlite3HctTreeCsrHasMoved(HctTreeCsr *pCsr){ - return pCsr && pCsr->pReseek!=0; -} - -SQLITE_PRIVATE int sqlite3HctTreeCsrReseek(HctTreeCsr *pCsr, int *pRes){ - assert( - pCsr->eIncrblob==TREE_INCRBLOB_READY - || pCsr->eIncrblob==TREE_INCRBLOB_ABORT - ); - assert( pCsr->pReseek ); - if( pCsr->eIncrblob==TREE_INCRBLOB_ABORT ) return SQLITE_ABORT; - return sqlite3HctTreeCsrSeek(pCsr, 0, pCsr->iSeekRowid, pRes); -} - - -SQLITE_PRIVATE int sqlite3HctTreeCsrRestore(HctTreeCsr *pCsr, int *pIsDifferent){ - int rc = SQLITE_OK; - if( pCsr->pReseek ){ - assert( pCsr->iSkip==0 ); - rc = hctRestoreCursor(pCsr, &pCsr->iSkip); - } - *pIsDifferent = pCsr->iSkip; - return rc; -} - -SQLITE_PRIVATE u32 sqlite3HctTreeCsrRoot(HctTreeCsr *pCsr){ - return pCsr->pRoot->iRoot; -} - -SQLITE_PRIVATE int sqlite3HctTreeForeach( - HctTree *pTree, - int bSchemaOp, - void *pCtx, - int (*x)(void *, u32, KeyInfo*) -){ - int i; - int rc = SQLITE_OK; - for(i=0; rc==SQLITE_OK && inRootHash; i++){ - HctTreeRoot *p; - for(p=pTree->apRootHash[i]; rc==SQLITE_OK && p; p=p->pHashNext){ - if( p->pNode && (bSchemaOp || p->iRoot!=HCT_TREE_SCHEMAOP_ROOT) ){ - rc = x(pCtx, p->iRoot, p->pKeyInfo); - } - } - } - return rc; -} - - - -/************** End of hct_tree.c ********************************************/ -/************** Begin file hct_file.c ****************************************/ -/* -** 2020 October 13 -** -** The author disclaims copyright to this source code. In place of -** a legal notice, here is a blessing: -** -** May you do good and not evil. -** May you find forgiveness for yourself and forgive others. -** May you share freely, never taking more than you give. -** -************************************************************************* -*/ - - -/* #include "hctInt.h" */ -/* #include */ -/* #include */ -/* #include */ - -/* #include */ -/* #include */ -#include -/* #include */ -/* #include */ - -/* #include */ -/* #include */ - -#define HCT_DEFAULT_PAGESIZE 4096 - -/* -** The database file is extended and managed in chunks of -** HCT_DEFAULT_PAGEPERCHUNK pages. Since pages are normally 4096 bytes, this -** is 2MiB by default. But, if the file is mmap()ed 2MiB at a time, we -** quickly read the system limit for number of mappings (on Linux, this is -** kernel parameter vm.max_map_count - 65530 by default). So, each mapping -** is made for HCT_MMAP_QUANTA times this amount. Since we need mappings for -** both the database file and page-map, this means we can mmap() a database of: -** -** 32765 * 1024*512*4096 bytes -** -** or around 64TiB. -*/ -#define HCT_DEFAULT_PAGEPERCHUNK 512 -#define HCT_MMAP_QUANTA 1024 - -#define HCT_HEADER_PAGESIZE 4096 - -#define HCT_LOCK_OFFSET (1024*1024) -#define HCT_LOCK_SIZE 1 - - -/* -** Pagemap slots used for special purposes. -*/ -#define HCT_ROOTPAGE_SCHEMA 1 -#define HCT_ROOTPAGE_META 2 - -#define HCT_PAGEMAP_LOGICAL_EOF 3 -#define HCT_PAGEMAP_PHYSICAL_EOF 4 - -#define HCT_PAGEMAP_TRANSID_EOF 16 - -#define HCT_PMF_LOGICAL_EVICTED (((u64)0x00000001)<<56) -#define HCT_PMF_LOGICAL_IRREVICTED (((u64)0x00000002)<<56) -#define HCT_PMF_PHYSICAL_IN_USE (((u64)0x00000004)<<56) -#define HCT_PMF_LOGICAL_IN_USE (((u64)0x00000008)<<56) -#define HCT_PMF_LOGICAL_IS_ROOT (((u64)0x00000010)<<56) - -#define HCT_FIRST_LOGICAL 33 - -/* -** Masks for use with pagemap values. -*/ -#define HCT_PAGEMAP_FMASK (((u64)0xFF) << 56) /* Flags MASK */ -#define HCT_PAGEMAP_VMASK (~HCT_PAGEMAP_FMASK) /* Value MASK */ - -#define assert_pgno_ok(iPg) assert( ((u64)iPg)<((u64)1<<48) && iPg>0 ) - -typedef struct HctFileServer HctFileServer; -typedef struct HctMapping HctMapping; -typedef struct HctMappingChunk HctMappingChunk; - -/* -** Global variables for this module. -** -** pServerList: -** Linked list of distinct files opened by this process. Access to this -** variable is protected by SQLITE_MUTEX_STATIC_VFS1. -** -** nCASFailCnt/nCASFailReset: -** These are used to inject CAS instruction failures for testing purposes. -** Set by the sqlite3_hct_cas_failure() API. They are not threadsafe. -** -** nProcFailCnt -** These are used to inject process failures (i.e. abort() calls) for -** testing purposes. Set by the sqlite3_hct_proc_failure() API. Not -** threadsafe. -*/ -static struct HctFileGlobalVars { - HctFileServer *pServerList; - - int nCASFailCnt; - int nCASFailReset; - - int nProcFailCnt; -} g; - -SQLITE_API void sqlite3_hct_cas_failure(int nCASFailCnt, int nCASFailReset){ - g.nCASFailCnt = nCASFailCnt; - g.nCASFailReset = nCASFailReset; -} -SQLITE_API void sqlite3_hct_proc_failure(int nProcFailCnt){ - g.nProcFailCnt = nProcFailCnt; -} - -/* -** This is called to check if a CAS fault should be injected. It returns -** true if a fault should be injected, or false otherwise. -*/ -static int inject_cas_failure(void){ - if( g.nCASFailCnt>0 ){ - if( (--g.nCASFailCnt)==0 ){ - g.nCASFailCnt = g.nCASFailReset; - return 1; - } - } - if( g.nProcFailCnt>0 ){ - if( (--g.nProcFailCnt)==0 ){ - abort(); - } - } - return 0; -} - -/* -** nRef: -** Number of references to this object held by the system. The -** HctFileServer object may hold one reference, each HctFile may -** also hold one. -** -** iLogPPC: -** Log2 of number of pages-per-chunk. e.g. if there are 512 pages -** on each mapping chunk, this value is set to 9. -** -** aPagemap/nPagemap: -** Mapping of the current page-map file. -*/ -struct HctMappingChunk { - void *pData; /* Mapping of chunk in data file */ - u64 *aMap; /* Mapping of chunk in map file */ -}; -struct HctMapping { - int nRef; /* Number of pointers to this array */ - int szPage; /* Size of pages in bytes */ - int nChunk; /* Size of aChunk[] array */ - u32 mapShift; - u32 mapMask; - HctMappingChunk *aChunk; /* Array of database chunk mappings */ -}; - -/* -** eInitState: -** Set to one of the HCT_INIT_XXX constants defined below. See comments -** above those constants for details. -** -** iNextFileId: -** Used to allocate unique ids to each HctFile associated with this -** HctFileServer object. These ids are used for debugging, and also -** to generate log file names. -*/ -struct HctFileServer { - sqlite3_mutex *pMutex; /* Mutex to protect this object */ - HctFile *pFileList; - - u64 iCommitId; /* CID value */ - u64 nWriteCount; /* Write count */ - - int iNextFileId; - char *zPath; /* Path to database (aFdDb[0]) */ - char *zDir; /* Directory component of zPath */ - int fdMap; /* Read/write file descriptor for page-map */ - int nFdDb; /* Number of valid entries in aFdDb[] */ - int aFdDb[HCT_MAX_NDBFILE]; - - int szPage; /* Page size for database */ - int nPagePerChunk; - HctMapping *pMapping; /* Mapping of pagemap and db pages */ - - int bReadOnlyMap; /* True for a read-only mapping of db file */ - - HctTMapServer *pTMapServer; /* Transaction map server */ - HctPManServer *pPManServer; /* Page manager server */ - int eInitState; - - void *pJrnlPtr; - void(*xJrnlDel)(void*); - - i64 st_dev; /* File identification 1 */ - i64 st_ino; /* File identification 2 */ - HctFileServer *pServerNext; /* Next object in g.pServerList list */ -}; - -/* -** System initialization state: -** -** HCT_INIT_NONE: -** No initialization has been done. -** -** HCT_INIT_RECOVER1: -** The sqlite_schema table (root page 1) has been recovered. And the -** page-map scanned to initialize the page-manager. -** -** HCT_INIT_RECOVER2: -** Other tables (apart from sqlite_schema) have been recovered. -** Initialization has finished. -*/ -#define HCT_INIT_NONE 0 -#define HCT_INIT_RECOVER1 1 -#define HCT_INIT_RECOVER2 2 - -/* -** Event counters used by the hctstats virtual table. -*/ -typedef struct HctFileStats HctFileStats; -struct HctFileStats { - i64 nCasAttempt; - i64 nCasFail; - i64 nIncrAttempt; - i64 nIncrFail; - i64 nMutex; - i64 nMutexBlock; -}; - -/* -** iCurrentTid: -** Most recent value returned by sqlite3HctFileAllocateTransid(). This -** is the current TID while the upper layer is writing the database, and -** meaningless at other times. Used by this object as the "current TID" -** when freeing a page. -** -** nPageAlloc: -** The total number of physical page allocations requested by the upper -** layer in the lifetime of this object. -*/ -struct HctFile { - HctConfig *pConfig; /* Connection configuration object */ - HctFileServer *pServer; /* Connection to global db object */ - HctFile *pFileNext; /* Next handle opened on same file */ - int iFileId; /* Id used for debugging output */ - int eInitState; - - HctTMapClient *pTMapClient; /* Transaction map client object */ - HctPManClient *pPManClient; /* Transaction map client object */ - - u64 iCurrentTid; - u64 nPageAlloc; - - /* Copies of HctFileServer variables */ - int szPage; - HctMapping *pMapping; - - /* Event counters used by the hctstats virtual table */ - HctFileStats stats; -}; - -static int hctLog2(int n){ - int i; - assert( (n & (n-1))==0 ); - for(i=0; (1<0 ){ - if( (--g.nCASFailCnt)==0 ){ - g.nCASFailCnt = g.nCASFailReset; - return 0; - } - } -#endif - return HctCASBool(pPtr, iOld, iNew); -} - -/* -** Allocate and return a new HctMapping object with enough space for -** nChunk chunks. -*/ -static HctMapping *hctMappingNew(int *pRc, HctMapping *pOld, int nChunk){ - HctMapping *pNew = 0; - if( *pRc==SQLITE_OK ){ - int nByte = sizeof(HctMapping) + nChunk*sizeof(HctMappingChunk); - pNew = (HctMapping*)sqlite3MallocZero(nByte); - if( pNew ){ - pNew->aChunk = (HctMappingChunk*)&pNew[1]; - pNew->nRef = 1; - pNew->nChunk = nChunk; - if( pOld ){ - assert( nChunk>pOld->nChunk ); - pNew->mapShift = pOld->mapShift; - pNew->mapMask = pOld->mapMask; - pNew->szPage = pOld->szPage; - memcpy(pNew->aChunk,pOld->aChunk,pOld->nChunk*sizeof(HctMappingChunk)); - } - }else{ - *pRc = SQLITE_NOMEM_BKPT; - } - } - return pNew; -} - -static void hctMappingUnref(HctMapping *p){ - if( p ){ - p->nRef--; - if( p->nRef==0 ){ - sqlite3_free(p); - } - } -} - - -static u64 *hctPagemapPtr(HctMapping *p, u32 iSlot){ - return &(p->aChunk[(iSlot-1) >> p->mapShift].aMap[(iSlot-1) & p->mapMask]); -} - -static void *hctPagePtr(HctMapping *p, u32 iPhys){ - return &((u8*)(p->aChunk[(iPhys-1) >> p->mapShift].pData))[ - ((iPhys-1) & p->mapMask) * p->szPage - ]; -} - -/* -** Buffer aBuf[] is p->szPage bytes in size. This function writes the -** contents of said buffer to physical database page iPhys. -*/ -static int hctPageWriteToDisk(HctFileServer *p, u64 iPhys, u8 *aBuf){ - i64 iChunk = ((iPhys-1) / p->nPagePerChunk); - int iFd = iChunk % p->nFdDb; - i64 iOff = p->szPage * ( - ((iChunk / p->nFdDb) * p->nPagePerChunk) - + (iPhys-1) % p->nPagePerChunk - ); - ssize_t res; - assert_pgno_ok( iPhys ); - res = pwrite(p->aFdDb[iFd], aBuf, p->szPage, iOff); - return (res==p->szPage ? SQLITE_OK : SQLITE_ERROR); -} - -static void hctFilePagemapSetDirect(HctMapping *p, u32 iSlot, u64 iNew){ - u64 *pPtr = hctPagemapPtr(p, iSlot); - *pPtr = iNew; -} - -static void hctFilePagemapSetFlag(HctMapping *p, u32 iSlot, u64 mask){ - u64 *pPtr = hctPagemapPtr(p, iSlot); - *pPtr = *pPtr | mask; -} - -/* -** Use a CAS instruction to the value of page-map slot iSlot. Return true -** if the slot is successfully set to value iNew, or false otherwise. -*/ -static int hctFilePagemapSet(HctFile *pFile, u32 iSlot, u64 iOld, u64 iNew){ - u64 *pPtr = hctPagemapPtr(pFile->pMapping, iSlot); - pFile->stats.nCasAttempt++; - if( hctBoolCompareAndSwap64(pPtr, iOld, iNew) ) return 1; - pFile->stats.nCasFail++; - return 0; -} - - -static u64 hctFilePagemapGet(HctMapping *p, u32 iSlot){ - return HctAtomicLoad( hctPagemapPtr(p, iSlot) ); -} - -static u64 hctFilePagemapGetSafe(HctMapping *p, u32 iSlot){ - if( ((iSlot-1)>>p->mapShift)>=p->nChunk ){ - return 0; - } - return hctFilePagemapGet(p, iSlot); -} - -static u64 hctFileAtomicIncr(HctFile *pFile, u64 *pPtr, int nIncr){ - u64 iOld; - while( 1 ){ - iOld = HctAtomicLoad(pPtr); - pFile->stats.nIncrAttempt++; - if( hctBoolCompareAndSwap64(pPtr, iOld, iOld+nIncr) ) return iOld+nIncr; - pFile->stats.nIncrFail++; - } -} - -/* -** Increment the value in slot iSlot by nIncr. Return the new value. -*/ -static u64 hctFilePagemapIncr(HctFile *pFile, u32 iSlot, int nIncr){ - u64 *pPtr = hctPagemapPtr(pFile->pMapping, iSlot); - u64 iOld; - while( 1 ){ - iOld = HctAtomicLoad(pPtr); - pFile->stats.nIncrAttempt++; - if( hctBoolCompareAndSwap64(pPtr, iOld, iOld+nIncr) ) return iOld+nIncr; - pFile->stats.nIncrFail++; - } -} - -/* -** Set the physical page id mapped from logical page iLogical to physical -** page id iNew. Return 1 if successful, or 0 if the operation fails. The -** operation fails if either: -** -** * the LOGICAL_EVICTED flag is already set for the logical page, or -** * the current physical page id to which the logical page is mapped -** is not equal to parameter iOld. -*/ -static int hctFilePagemapSetLogical( - HctFile *pFile, /* Use mapping object of this file */ - u32 iLogical, /* Logical page to set the physical id for */ - u64 iOld, /* Old physical page id */ - u64 iNew /* New physical page id */ -){ - HctMapping *p = pFile->pMapping; - while( 1 ){ - u64 i1 = hctFilePagemapGet(p, iLogical); - u64 iOld1 = (iOld & HCT_PAGEMAP_VMASK) | (i1 & HCT_PAGEMAP_FMASK); - u64 iNew1 = (iNew & HCT_PAGEMAP_VMASK) | (i1 & HCT_PAGEMAP_FMASK); - - iNew1 |= HCT_PMF_LOGICAL_IN_USE; - - /* If a CAS instruction failure injection is scheduled, return 0 - ** to the caller. */ - if( inject_cas_failure() ) return 0; - - /* This operation fails if LOGICAL_EVICTED has been set. */ - iOld1 &= ~HCT_PMF_LOGICAL_EVICTED; - iNew1 &= ~HCT_PMF_LOGICAL_EVICTED; - - if( hctFilePagemapSet(pFile, iLogical, iOld1, iNew1) ){ - return 1; - } - if( i1!=iOld1 ) return 0; - } - - assert( !"unreachable" ); - return 0; -} - -/* -** Set the EVICTED or IRREVICTED flag on page iLogical. -*/ -static int hctFileSetEvicted( - HctFile *pFile, - u32 iLogical, - u32 iOldPg, - int bIrrevocable -){ - u64 *pPtr = hctPagemapPtr(pFile->pMapping, iLogical); - while( 1 ){ - u64 iOld = HctAtomicLoad(pPtr); - u64 iNew = iOld | ( - bIrrevocable ? HCT_PMF_LOGICAL_IRREVICTED : HCT_PMF_LOGICAL_EVICTED - ); - - /* Fail if either the current physical page mapped to logical page iLogical - ** is not iOldPg, or if the LOGICAL_EVICTED flag has already been set. */ - if( (iOld & HCT_PAGEMAP_VMASK)!=iOldPg - || ((iOld & HCT_PMF_LOGICAL_EVICTED) && !bIrrevocable) - || ((iOld & HCT_PMF_LOGICAL_EVICTED)==0 && bIrrevocable) - || ((iOld & HCT_PMF_LOGICAL_IN_USE)==0) - ){ - return 0; - } - if( inject_cas_failure() ) return 0; - - pFile->stats.nCasAttempt++; - if( hctBoolCAS64(pPtr, iOld, iNew) ) return 1; - pFile->stats.nCasFail++; - } - - assert( !"unreachable" ); - return 0; -} - -/* -** Clear the LOGICAL_EVICTED flag from page-map entry iLogical. This will -** fail if the LOGICAL_IRREVICTED flag is already set. Return 1 if the -** flag is successfully cleared, or 0 otherwise. -*/ -static int hctFileClearEvicted(HctFile *pFile, u32 iLogical){ - u64 *pPtr = hctPagemapPtr(pFile->pMapping, iLogical); - while( 1 ){ - u64 iOld = HctAtomicLoad(pPtr); - u64 iNew = iOld & ~HCT_PMF_LOGICAL_EVICTED; - - if( (iOld & HCT_PMF_LOGICAL_IRREVICTED) ) return 0; - if( inject_cas_failure() ) return 0; - - pFile->stats.nCasAttempt++; - if( hctBoolCAS64(pPtr, iOld, iNew) ) return 1; - pFile->stats.nCasFail++; - } - - assert( !"unreachable" ); - return 0; -} - -static void hctFilePagemapZeroValue(HctFile *pFile, u32 iSlot){ - while( 1 ){ - u64 i1 = hctFilePagemapGet(pFile->pMapping, iSlot); - u64 i2 = (i1 & HCT_PMF_PHYSICAL_IN_USE); - if( hctFilePagemapSet(pFile, iSlot, i1, i2) ) return; - } -} - -/* -** Open a file descriptor for read/write access on the filename formed by -** concatenating arguments zFile and zPost (e.g. "test.db" and "-pagemap"). -** Return the file descriptor if successful. -*/ -static int hctFileOpen(int *pRc, const char *zFile, const char *zPost){ - int fd = -1; - if( *pRc==SQLITE_OK ){ - char *zPath = sqlite3_mprintf("%s%s", zFile, zPost); - if( zPath==0 ){ - *pRc = SQLITE_NOMEM_BKPT; - }else{ - while( fd<0 ){ - fd = open(zPath, O_CREAT|O_RDWR, 0644); - if( fd<0 ){ - *pRc = SQLITE_CANTOPEN_BKPT; - break; - } - if( fd<3 ){ - /* Do not use any file-descriptor with values 0, 1 or 2. Using - ** these means that stray calls to printf() etc. may corrupt the - ** database. */ - close(fd); - fd = open("/dev/null", O_RDONLY, 0644); - if( fd<0 ){ - *pRc = SQLITE_CANTOPEN_BKPT; - break; - } - fd = -1; - } - } - sqlite3_free(zPath); - } - } - return fd; -} - -/* -** Take an exclusive POSIX lock on the file-descriptor passed as the -** second argument. -*/ -static void hctFileLock(int *pRc, int fd, const char *zFile){ - if( *pRc==SQLITE_OK ){ - int res; - struct flock l; - memset(&l, 0, sizeof(l)); - l.l_type = F_WRLCK; - l.l_whence = SEEK_SET; - l.l_start = HCT_LOCK_OFFSET; - l.l_len = HCT_LOCK_SIZE; - res = fcntl(fd, F_SETLK, &l); - if( res!=0 ){ - fcntl(fd, F_GETLK, &l); - sqlite3_log(SQLITE_BUSY, "hct file \"%s\" locked by process %lld", - zFile, (i64)l.l_pid - ); - *pRc = SQLITE_BUSY; - } - } -} - -/* -** Argument fd is an open file-handle. Return the size of the file in bytes. -** -** This function is a no-op (returns 0) if *pRc is other than SQLITE_OK -** when it is called. If an error occurs, *pRc is set to an SQLite error -** code before returning. -*/ -static i64 hctFileSize(int *pRc, int fd){ - i64 szRet = 0; - if( *pRc==SQLITE_OK ){ - struct stat sStat; - if( fstat(fd, &sStat) ){ - *pRc = sqlite3HctIoerr(SQLITE_IOERR_FSTAT); - }else{ - szRet = (i64)(sStat.st_size); - } - } - return szRet; -} - -static int hctFileTruncate(int *pRc, int fd, i64 sz){ - if( *pRc==SQLITE_OK ){ - int res = ftruncate(fd, (off_t)sz); - if( res ){ - *pRc = sqlite3HctIoerr(SQLITE_IOERR_TRUNCATE); - } - } - return *pRc; -} - -static void hctFileUnlink(int *pRc, const char *zFile){ - if( *pRc==SQLITE_OK ) unlink(zFile); -} - - -/* -** This function is a no-op if (*pRc) is set to other than SQLITE_OK -** when it is called. -** -** Otherwise, argument fd is assumed to be an open file-descriptor. This -** function attempts to map and return a pointer to a region nByte bytes in -** size at offset iOff of the open file. The mapping is read-only if parameter -** bRO is non-zero, or read/write if it is zero. -** -** If an error occurs, NULL is returned and (*pRc) set to an SQLite error -** code. -*/ -static void *hctFileMmap(int *pRc, int fd, i64 nByte, i64 iOff, int bRO){ - void *pRet = 0; - if( *pRc==SQLITE_OK ){ - const int flags = PROT_READ | (bRO ? 0 : PROT_WRITE); - pRet = mmap(0, nByte, flags, MAP_SHARED, fd, iOff); - if( pRet==MAP_FAILED ){ - pRet = 0; - *pRc = sqlite3HctIoerr(SQLITE_IOERR_MMAP); - } - } - return pRet; -} - -static void hctFileMunmap(void *pMap, i64 nByte){ - if( pMap ) munmap(pMap, nByte); -} - -static char *hctStrdup(int *pRc, const char *zIn){ - char *zRet = 0; - if( *pRc==SQLITE_OK ){ - zRet = sqlite3_mprintf("%s", zIn); - if( zRet==0 ) *pRc = SQLITE_NOMEM_BKPT; - } - return zRet; -} - - -/* -** Given local path zFile, return the associated canonical path in a buffer -** obtained from sqlite3_malloc(). It is the responsibility of the caller -** to eventually free this buffer using sqlite3_free(). -*/ -static char *fileGetFullPath(int *pRc, const char *zFile){ - char *zRet = 0; - if( *pRc==SQLITE_OK ){ - char *zFree = realpath(zFile, 0); - if( zFree==0 ){ - *pRc = SQLITE_CANTOPEN_BKPT; - }else{ - zRet = hctStrdup(pRc, zFree); - free(zFree); - } - } - return zRet; -} - -static int hctFileFindLogs( - HctFileServer *pServer, - void *pCtx, - int (*xLog)(void*, const char*) -){ - DIR *d; - const char *zName = &pServer->zPath[strlen(pServer->zDir)]; - int nName = strlen(zName); - int rc = SQLITE_OK; - - d = opendir(pServer->zDir); - if (d) { - struct dirent *dir; - while( rc==SQLITE_OK && (dir = readdir(d))!=NULL ){ - const char *zFile = (const char*)dir->d_name; - int nFile = strlen(zFile); - if( nFile>(nName+5) - && memcmp(zFile, zName, nName)==0 - && memcmp(&zFile[nName], "-log-", 5)==0 - ){ - char *zFull = sqlite3_mprintf("%s/%s", pServer->zDir, zFile); - rc = xLog(pCtx, zFull); - sqlite3_free(zFull); - } - } - closedir(d); - } - - return rc; -} - -static int hctFileServerInitUnlinkLog(void *pDummy, const char *zFile){ - int rc = SQLITE_OK; - hctFileUnlink(&rc, zFile); - return rc; -} - -static void hctFileReadHdr( - int *pRc, - void *pHdr, - int *pszPage, - int *pnDbFile -){ - *pszPage = 0; - if( *pRc==SQLITE_OK ){ - /* 12345678901234567890123456789012 */ - int szPage = 0; - int nDbFile = 0; - char *zHdr = "Hctree database version 00000001"; - u8 aEmpty[32] = {0}; - - assert( strlen(zHdr)==32 ); - if( memcmp(zHdr, pHdr, 32)==0 ){ - memcpy(&szPage, &((u8*)pHdr)[32], sizeof(int)); - if( szPage<512 || szPage>32768 || (szPage & (szPage-1))!=0 ){ - *pRc = SQLITE_CANTOPEN_BKPT; - return; - } - - memcpy(&nDbFile, &((u8*)pHdr)[36], sizeof(int)); - if( nDbFile<1 || nDbFile>HCT_MAX_NDBFILE ){ - *pRc = SQLITE_CANTOPEN_BKPT; - return; - } - }else if( memcmp(aEmpty, pHdr, 32)==0 ){ - /* no-op */ - }else{ - *pRc = SQLITE_CANTOPEN_BKPT; - } - - *pszPage = szPage; - *pnDbFile = nDbFile; - } -} - -static void *hctFileMmapDbChunk( - int *pRc, - HctFileServer *p, - HctMapping *pMap, - int iChunk -){ - void *pRet = 0; - i64 szChunk = p->nPagePerChunk * p->szPage; - int iFd = iChunk % p->nFdDb; - int iChunkOfFile = (iChunk / p->nFdDb); - - if( (iChunkOfFile % HCT_MMAP_QUANTA)==0 ){ - i64 iOff = szChunk * iChunkOfFile; - pRet = hctFileMmap( - pRc, p->aFdDb[iFd], szChunk*HCT_MMAP_QUANTA, iOff, p->bReadOnlyMap - ); - }else{ - pRet = (void*)(((u8*)pMap->aChunk[iChunk - p->nFdDb].pData) + szChunk); - } - - return pRet; -} - -static void *hctFileMmapPagemapChunk( - int *pRc, - HctFileServer *p, - HctMapping *pMap, - int iChunk -){ - void *pRet = 0; - i64 szChunk = p->nPagePerChunk * sizeof(u64); - - if( (iChunk % HCT_MMAP_QUANTA)==0 ){ - pRet = hctFileMmap( - pRc, p->fdMap, szChunk*HCT_MMAP_QUANTA, (szChunk*iChunk), 0 - ); - }else{ - pRet = (void*)(((u8*)(pMap->aChunk[iChunk-1].aMap)) + szChunk); - } - - return pRet; -} - -static void hctFileOpenDataFiles( - int *pRc, - HctFileServer *p, - int nDbFile -){ - int ii; - int rc = *pRc; - assert( p->nFdDb==1 ); - for(ii=1; iiaFdDb[ii] = hctFileOpen(&rc, p->zPath, z); - sqlite3_free(z); - if( rc==SQLITE_OK ) p->nFdDb = ii+1; - } - - if( rc!=SQLITE_OK ){ - for(ii=1; iinFdDb; ii++){ - if( p->aFdDb[ii]>0 ) close(p->aFdDb[ii]); - p->aFdDb[ii] = -1; - } - p->nFdDb = 1; - } - *pRc = rc; -} - -static i64 round_up(i64 iVal, i64 nQuanta){ - return ((iVal + nQuanta - 1) / nQuanta) * nQuanta; -} - -static void hctFileAllocateMapping( - int *pRc, - HctFileServer *p, - int nChunk -){ - i64 szChunkPagemap = p->nPagePerChunk * sizeof(u64); - i64 szChunkData = p->nPagePerChunk * p->szPage; - int rc = *pRc; - HctMapping *pMapping = 0; - int iFd = 0; - int i = 0; - - p->pMapping = pMapping = hctMappingNew(&rc, 0, nChunk); - if( rc==SQLITE_OK ){ - pMapping->mapShift = hctLog2(p->nPagePerChunk); - pMapping->mapMask = (1<mapShift)-1; - pMapping->szPage = p->szPage; - } - - /* Map all chunks of the pagemap file using a single call to mmap() */ - { - int nAll = round_up(nChunk, HCT_MMAP_QUANTA); - u8 *pMap = (u8*)hctFileMmap(&rc, p->fdMap, nAll*szChunkPagemap,0,0); - for(i=0; rc==SQLITE_OK && iaChunk[i].aMap = (u64*)&pMap[i * szChunkPagemap]; - } - } - - /* Map all chunks of the data files. One call to mmap() for each file. */ - for(iFd=0; iFdnFdDb && rc==SQLITE_OK; iFd++){ - int nFileChunk = (nChunk / p->nFdDb) + (iFd < (nChunk % p->nFdDb)); - i64 n = round_up(nFileChunk, HCT_MMAP_QUANTA) * szChunkData; - u8 *pMap = (u8*)hctFileMmap(&rc, p->aFdDb[iFd], n, 0, p->bReadOnlyMap); - for(i=0; inFdDb; - pMapping->aChunk[iChunk].pData = &pMap[i*szChunkData]; - } - } - - *pRc = rc; -} - -typedef struct Uncommitted Uncommitted; -struct Uncommitted { - int nAlloc; - int nTid; - i64 *aTid; -}; - -static int hctFileServerInitUncommitted(void *pCtx, const char *zFile){ - int fd; - Uncommitted *p = (Uncommitted*)pCtx; - - fd = open(zFile, O_RDONLY); - if( fd>=0 ){ - i64 iTid = 0; - read(fd, &iTid, sizeof(iTid)); - close(fd); - if( iTid>0 ){ - if( p->nTid==p->nAlloc ){ - int nNew = p->nTid ? p->nTid*4 : 64; - i64 *aNew = sqlite3_realloc(p->aTid, nNew*sizeof(i64)); - if( aNew==0 ){ - return SQLITE_NOMEM; - }else{ - p->aTid = aNew; - p->nAlloc = nNew; - } - } - p->aTid[p->nTid++] = iTid; - } - } - return SQLITE_OK; -} - -static int hctFileServerInit( - HctFileServer *p, - HctConfig *pConfig, - const char *zFile -){ - int rc = SQLITE_OK; - assert( sqlite3_mutex_held(p->pMutex) ); - if( p->zPath==0 ){ - i64 szHdr; /* Size of header file */ - i64 szMap; /* Size of pagemap file */ - int nChunk = 0; /* Number of chunks in database */ - int szPage = 0; - int nDbFile = 0; - - Uncommitted unc; - memset(&unc, 0, sizeof(unc)); - - /* Open the data and page-map files */ - p->fdMap = hctFileOpen(&rc, zFile, "-pagemap"); - p->zPath = fileGetFullPath(&rc, zFile); - - if( rc==SQLITE_OK ){ - int n = strlen(p->zPath); - while( p->zPath[n-1]!='/' && n>1 ) n--; - p->zDir = sqlite3_mprintf("%.*s", n, p->zPath); - if( p->zDir==0 ) rc = SQLITE_NOMEM_BKPT; - } - - /* Initialize the page-manager */ - p->pPManServer = sqlite3HctPManServerNew(&rc, p); - - /* If the header file is zero bytes in size, or is not yet populated, - ** then the database is empty, regardless of the contents of the - ** *-data or *-pagemap file. Truncate the pagemap and data files to - ** zero bytes in size to make sure of this. - ** - ** Alternatively, if the header file is the right size, try to read it. - */ - szHdr = hctFileSize(&rc, p->aFdDb[0]); - if( rc==SQLITE_OK ){ - void *pHdr = 0; - if( szHdr==0 ){ - szHdr = HCT_HEADER_PAGESIZE*2; - hctFileTruncate(&rc, p->aFdDb[0], szHdr); - }else if( szHdr<(HCT_HEADER_PAGESIZE*2) ){ - rc = SQLITE_CANTOPEN_BKPT; - } - - pHdr = hctFileMmap(&rc, p->aFdDb[0], HCT_HEADER_PAGESIZE*2, 0, 1); - hctFileReadHdr(&rc, pHdr, &szPage, &nDbFile); - if( rc==SQLITE_OK && szPage==0 ){ - hctFileTruncate(&rc, p->fdMap, 0); - hctFileTruncate(&rc, p->fdMap, HCT_DEFAULT_PAGEPERCHUNK*sizeof(i64)); - szHdr = HCT_HEADER_PAGESIZE*2; - hctFileTruncate(&rc, p->aFdDb[0], szHdr); - if( rc==SQLITE_OK ){ - rc = hctFileFindLogs(p, 0, hctFileServerInitUnlinkLog); - } - }else{ - if( rc==SQLITE_OK ){ - rc = hctFileFindLogs(p, (void*)&unc, hctFileServerInitUncommitted); - } - hctFileOpenDataFiles(&rc, p, nDbFile); - } - hctFileMunmap(pHdr, HCT_HEADER_PAGESIZE*2); - } - p->nPagePerChunk = HCT_DEFAULT_PAGEPERCHUNK; - - assert( szPage==0 || rc==SQLITE_OK ); - if( szPage>0 ){ - i64 szChunkPagemap = p->nPagePerChunk * sizeof(u64); - - p->szPage = szPage; - szMap = hctFileSize(&rc, p->fdMap); - if( rc==SQLITE_OK ){ - if( szMapnFdDb==1 && szHdr!=p->szPage*(szMap/sizeof(u64))) - ){ - rc = SQLITE_CANTOPEN_BKPT; - }else{ - nChunk = szMap / szChunkPagemap; - } - } - - hctFileAllocateMapping(&rc, p, nChunk); - } - - /* Initialize CID value */ - p->iCommitId = 5; - - /* Allocate a transaction map server */ - if( rc==SQLITE_OK && p->pTMapServer==0 ){ - u64 iFirst = 0; /* First tid that will be written in tmap */ - u64 iLast = 0; /* Last such tid */ - int ii; /* To iterate through unc.aTid[] */ - - if( p->pMapping ){ - iFirst = hctFilePagemapGet(p->pMapping, HCT_PAGEMAP_TRANSID_EOF); - iFirst = (iFirst & HCT_TID_MASK) + 1; - }else{ - iFirst = 1; - } - - iLast = iFirst; - for(ii=0; ii=iLast ) iLast = iThis+1; - } - - /* Allocate the tmap-server object. Set all entries between iFirst and - ** iLast to (HCT_TMAP_COMMITTED, cid=1). Ensuring that the contents of - ** these transactions are visible to all readers. - ** - ** Then go back and set the entry for all tid values in unc.aTid[] to - ** (HCT_TMAP_ROLLBACK, 0) - not visible to any readers. */ - rc = sqlite3HctTMapServerNew(iFirst, iLast, &p->pTMapServer); - for(ii=0; iipTMapServer, iThis, HCT_TMAP_ROLLBACK); - } - } - sqlite3_free(unc.aTid); - } - return rc; -} - -/* -** This is called as part of initializing a new database on disk. Mutex -** HctFileServer.mutex must be held to call this function. It writes a -** new, empty, root page to physical page iPhys, to be used for either -** HCT_ROOTPAGE_SCHEMA or HCT_ROOTPAGE_META. -** -** SQLITE_OK is returned if successful, or an SQLite error code otherwise. -*/ -static int hctFileInitSystemRoot(HctFileServer *p, u64 iPhys){ - int rc = SQLITE_OK; - u8 *aBuf = sqlite3_malloc(p->szPage); - - assert( sqlite3_mutex_held(p->pMutex) ); - if( aBuf==0 ){ - rc = SQLITE_NOMEM; - }else{ - sqlite3HctDbRootPageInit(0, aBuf, p->szPage); - - if( p->bReadOnlyMap ){ - rc = hctPageWriteToDisk(p, iPhys, aBuf); - }else{ - u8 *a = (u8*)hctPagePtr(p->pMapping, iPhys); - memcpy(a, aBuf, p->szPage); - } - sqlite3_free(aBuf); - } - return rc; -} - -static int hctFileInitHdr(HctFileServer *p){ - int rc = SQLITE_OK; - u8 *aBuf = sqlite3_malloc(HCT_HEADER_PAGESIZE); - assert( sqlite3_mutex_held(p->pMutex) ); - if( aBuf==0 ){ - rc = SQLITE_NOMEM; - }else{ - char *zHdr = "Hctree database version 00000001"; - assert( strlen(zHdr)==32 ); - memset(aBuf, 0, HCT_HEADER_PAGESIZE); - memcpy(aBuf, zHdr, 32); - memcpy(&aBuf[32], &p->szPage, sizeof(int)); - memcpy(&aBuf[36], &p->nFdDb, sizeof(int)); - if( p->bReadOnlyMap ){ - ssize_t res = pwrite(p->aFdDb[0], aBuf, HCT_HEADER_PAGESIZE, 0); - rc = (res==HCT_HEADER_PAGESIZE ? SQLITE_OK : SQLITE_ERROR); - }else{ - memcpy(p->pMapping->aChunk[0].pData, aBuf, HCT_HEADER_PAGESIZE); - } - } - sqlite3_free(aBuf); - return rc; -} - - -/* -** This is called each time a new snapshot is opened. If HctFile.szPage is -** still set to 0, then: -** -** a) this is the first snapshot opened by connection pFile, and -** b) the database had not been created when pFile was opened. -** -** In this case the server-mutex is taken, and if the db has still not been -** created (HctFileServer.szPage==0), then it is created on disk under the -** cover of the mutex. -*/ -SQLITE_PRIVATE int sqlite3HctFileNewDb(HctFile *pFile){ - int rc = SQLITE_OK; - if( pFile->szPage==0 ){ - HctFileServer *p = pFile->pServer; - sqlite3_mutex_enter(p->pMutex); - if( p->szPage==0 ){ - HctConfig *pConfig = pFile->pConfig; - HctMapping *pMapping = 0; - int szPage = pConfig->pgsz; - int nDbFile = pConfig->nDbFile; - - p->szPage = szPage; - hctFileTruncate(&rc, p->fdMap, p->nPagePerChunk * sizeof(u64)); - hctFileTruncate(&rc, p->aFdDb[0], p->nPagePerChunk * szPage); - - hctFileAllocateMapping(&rc, p, 1); - pMapping = p->pMapping; - - assert( nDbFile>=1 && nDbFile<=HCT_MAX_NDBFILE ); - hctFileOpenDataFiles(&rc, p, nDbFile); - - /* 1. Make logical page 1 an empty intkey root page (SQLite uses this - ** as the root of sqlite_schema). - ** - ** 2. Set the initial values of the largest logical and physical page - ** ids allocated fields. - */ - - /* Set the initial values of the largest logical and physical page - ** ids allocated fields. These will be used when the set of free pages - ** is recovered in sqlite3HctFileRecoverFreelists(). */ - if( rc==SQLITE_OK ){ - const int nPageSet = pConfig->nPageSet; - hctFilePagemapSetDirect(pMapping, HCT_PAGEMAP_LOGICAL_EOF, nPageSet); - hctFilePagemapSetDirect(pMapping, HCT_PAGEMAP_PHYSICAL_EOF, nPageSet); - } - - if( rc==SQLITE_OK ){ - const u64 f = HCT_PMF_LOGICAL_IN_USE | HCT_PMF_LOGICAL_IS_ROOT; - u64 aRoot[] = { - HCT_ROOTPAGE_SCHEMA, - HCT_ROOTPAGE_META, - }; - int ii = 0; - u64 iPhys1 = 1 + (((HCT_HEADER_PAGESIZE*2)+szPage-1) / szPage); - - for(ii=0; iipMapping); - p->pMapping = 0; - } - } - - if( rc==SQLITE_OK ){ - pFile->szPage = p->szPage; - pFile->pMapping = p->pMapping; - pFile->pMapping->nRef++; - pFile->eInitState = p->eInitState; - } - sqlite3_mutex_leave(p->pMutex); - } - return rc; -} - - -/* -** Return true if the db has not yet been created on disk. Or false -** if it already has. -*/ -SQLITE_PRIVATE int sqlite3HctFileIsNewDb(HctFile *pFile){ - int bRet = 0; - if( pFile->szPage==0 ){ - HctFileServer *p = pFile->pServer; - sqlite3_mutex_enter(p->pMutex); - if( p->szPage==0 ){ - bRet = 1; - } - sqlite3_mutex_leave(p->pMutex); - } - return bRet; -} - -static sqlite3_int64 current_time(){ - struct timeval sNow; - gettimeofday(&sNow, 0); - return (sqlite3_int64)sNow.tv_sec*1000 + sNow.tv_usec/1000; -} - -static void hctFileEnterServerMutex(HctFile *pFile){ - sqlite3_mutex *pMutex = pFile->pServer->pMutex; - pFile->stats.nMutex++; - if( sqlite3_mutex_try(pMutex)!=SQLITE_OK ){ - pFile->stats.nMutexBlock++; - sqlite3_mutex_enter(pMutex); - } -} - -/* -** This is called to ensure that the mapping currently held by client -** pFile contains at least nChunk chunks. -*/ -static int hctFileGrowMapping(HctFile *pFile, int nChunk){ - int rc = SQLITE_OK; - if( pFile->pMapping->nChunkpServer; - HctMapping *pOld; - hctFileEnterServerMutex(pFile); - hctMappingUnref(pFile->pMapping); - pFile->pMapping = 0; - pOld = p->pMapping; - nOld = pOld->nChunk; - if( nOldnPagePerChunk*p->szPage; - i64 szChunkMap = p->nPagePerChunk*sizeof(u64); - int i; - - /* Grow the mapping file */ - hctFileTruncate(&rc, p->fdMap, nChunk*szChunkMap); - - for(i=nOld; iaChunk[i]; - - /* Grow the data file */ - int iFd = (i % p->nFdDb); - i64 sz = ((i / p->nFdDb) + 1) * szChunkData; - hctFileTruncate(&rc, p->aFdDb[iFd], sz); - - /* Map the new chunks of both the data and mapping files. */ - pChunk->aMap = hctFileMmapPagemapChunk(&rc, p, pNew, i); - pChunk->pData = hctFileMmapDbChunk(&rc, p, pNew, i); - } - - if( rc==SQLITE_OK ){ - p->pMapping = pNew; - hctMappingUnref(pOld); - }else{ - hctMappingUnref(pNew); - } - } - } - pFile->pMapping = p->pMapping; - pFile->pMapping->nRef++; - sqlite3_mutex_leave(p->pMutex); - } - return rc; -} - -/* -** Grow the mapping so that it is at least large enough to have an entry -** for slot iSlot. Return SQLITE_OK if successful (or if the mapping does -** not need to grow), or an SQLite error code otherwise. -*/ -static int hctFileGrowMappingForSlot(HctFile *pFile, u32 iSlot){ - assert( iSlot>0 ); - return hctFileGrowMapping(pFile, 1 + ((iSlot-1) / HCT_DEFAULT_PAGEPERCHUNK)); -} - - -static int hctFileServerFind(HctFile *pFile, const char *zFile){ - int rc = SQLITE_OK; - struct stat sStat; - HctFileServer *pServer = 0; - sqlite3_mutex *pMutex = sqlite3_mutex_alloc(SQLITE_MUTEX_STATIC_VFS1); - - memset(&sStat, 0, sizeof(sStat)); - - /* Take the VFS1 mutex that protects the globals in this file */ - sqlite3_mutex_enter(pMutex); - - /* Search for an existing HctFileServer already open on this database */ - if( 0==stat(zFile, &sStat) ){ - for(pServer=g.pServerList; pServer; pServer=pServer->pServerNext){ - if( pServer->st_ino==(i64)sStat.st_ino - && pServer->st_dev==(i64)sStat.st_dev - ){ - break; - } - } - } - - if( pServer==0 ){ - int fd = hctFileOpen(&rc, zFile, ""); - if( rc==SQLITE_OK ){ - assert( fd>0 ); - hctFileLock(&rc, fd, zFile); - pServer = (HctFileServer*)sqlite3HctMalloc(&rc, sizeof(*pServer)); - if( pServer==0 ){ - close(fd); - }else{ - int ii; - for(ii=0; iiaFdDb[ii] = -1; - } - fstat(fd, &sStat); - pServer->st_dev = (i64)sStat.st_dev; - pServer->st_ino = (i64)sStat.st_ino; - pServer->pServerNext = g.pServerList; - pServer->aFdDb[0] = fd; - pServer->nFdDb = 1; - pServer->pMutex = sqlite3_mutex_alloc(SQLITE_MUTEX_RECURSIVE); - /* pServer->bReadOnlyMap = 1; */ - g.pServerList = pServer; - } - } - } - - if( rc==SQLITE_OK ){ - pFile->pServer = pServer; - pFile->pFileNext = pServer->pFileList; - pServer->pFileList = pFile; - } - - /* Release the global mutex */ - sqlite3_mutex_leave(pMutex); - - return rc; -} - - -/* -** Open a connection to the database zFile. -*/ -SQLITE_PRIVATE HctFile *sqlite3HctFileOpen(int *pRc, const char *zFile, HctConfig *pConfig){ - int rc = *pRc; - HctFile *pNew; - - pNew = (HctFile*)sqlite3HctMalloc(&rc, sizeof(*pNew)); - if( pNew ){ - pNew->pConfig = pConfig; - rc = hctFileServerFind(pNew, zFile); - if( rc==SQLITE_OK ){ - HctFileServer *pServer = pNew->pServer; - - sqlite3_mutex_enter(pServer->pMutex); - rc = hctFileServerInit(pServer, pConfig, zFile); - assert( rc==SQLITE_OK ); - if( rc==SQLITE_OK && pServer->szPage>0 ){ - pNew->szPage = pServer->szPage; - pNew->pMapping = pServer->pMapping; - pNew->pMapping->nRef++; - } - pNew->eInitState = pServer->eInitState; - pNew->iFileId = pServer->iNextFileId++; - sqlite3_mutex_leave(pServer->pMutex); - - if( rc==SQLITE_OK ){ - sqlite3HctTMapClientNew( - pServer->pTMapServer, pConfig, &pNew->pTMapClient - ); - } - if( rc==SQLITE_OK ){ - pNew->pPManClient = sqlite3HctPManClientNew( - &rc, pConfig, pServer->pPManServer, pNew - ); - } - }else{ - sqlite3_free(pNew); - pNew = 0; - } - - if( rc!=SQLITE_OK ){ - sqlite3HctFileClose(pNew); - pNew = 0; - } - } - assert( (rc==SQLITE_OK)==(pNew!=0) ); - *pRc = rc; - return pNew; -} - -SQLITE_PRIVATE HctTMapClient *sqlite3HctFileTMapClient(HctFile *pFile){ - return pFile->pTMapClient; -} -SQLITE_PRIVATE HctPManClient *sqlite3HctFilePManClient(HctFile *pFile){ - return pFile->pPManClient; -} - -SQLITE_PRIVATE void sqlite3HctFileClose(HctFile *pFile){ - if( pFile ){ - HctFileServer *pDel = 0; - HctFile **pp; - HctFileServer *pServer = pFile->pServer; - - /* Release the page-manager client */ - sqlite3HctPManClientFree(pFile->pPManClient); - pFile->pPManClient = 0; - - /* Release the transaction map client */ - sqlite3HctTMapClientFree(pFile->pTMapClient); - pFile->pTMapClient = 0; - - /* Release the reference to the HctMapping object, if any */ - hctMappingUnref(pFile->pMapping); - pFile->pMapping = 0; - - /* Remove this object from the HctFileServer.pFileList list. If this - ** means there are no longer any connections to this server object, - ** remove the HctFileServer object itself from the global list. In - ** this case leave stack variable pDel set to point to the - ** HctFileServer. */ - sqlite3_mutex_enter( sqlite3_mutex_alloc(SQLITE_MUTEX_STATIC_VFS1) ); - for(pp=&pServer->pFileList; *pp!=pFile; pp=&(*pp)->pFileNext); - *pp = pFile->pFileNext; - if( pServer->pFileList==0 ){ - HctFileServer **ppS; - pDel = pServer; - for(ppS=&g.pServerList; *ppS!=pServer; ppS=&(*ppS)->pServerNext); - *ppS = pServer->pServerNext; - } - sqlite3_mutex_leave( sqlite3_mutex_alloc(SQLITE_MUTEX_STATIC_VFS1) ); - - /* It if was removed from the global list, clean up the HctFileServer - ** object. */ - if( pDel ){ - int szChunkData = pDel->nPagePerChunk*pDel->szPage; - int szChunkMap = pDel->nPagePerChunk*sizeof(u64); - int i; - HctMapping *pMapping = pDel->pMapping; - - sqlite3HctTMapServerFree(pDel->pTMapServer); - pDel->pTMapServer = 0; - - sqlite3HctPManServerFree(pDel->pPManServer); - pDel->pPManServer = 0; - - if( pMapping ){ - pDel->pMapping = 0; - for(i=0; inChunk; i++){ - HctMappingChunk *pChunk = &pMapping->aChunk[i]; - if( pChunk->aMap ) hctFileMunmap(pChunk->aMap, szChunkMap); - if( pChunk->pData ) hctFileMunmap(pChunk->pData, szChunkData); - } - hctMappingUnref(pMapping); - } - - /* Close the data files and the mapping file. */ - for(i=0; inFdDb; i++){ - if( pDel->aFdDb[i]>0 ) close(pDel->aFdDb[i]); - } - if( pDel->fdMap ) close(pDel->fdMap); - - if( pDel->xJrnlDel ){ - pDel->xJrnlDel(pDel->pJrnlPtr); - } - sqlite3_free(pDel->zDir); - sqlite3_free(pDel->zPath); - sqlite3_mutex_free(pDel->pMutex); - sqlite3_free(pDel); - } - - /* Finally, free the HctFile object */ - sqlite3_free(pFile); - } -} - -SQLITE_PRIVATE u32 sqlite3HctFileMaxpage(HctFile *pFile){ - u64 iVal = hctFilePagemapGet(pFile->pMapping, HCT_PAGEMAP_PHYSICAL_EOF); - return (iVal & 0xFFFFFFFF); -} - -/* -** Set the flags in mask within page-map slot iSlot. -*/ -static int hctFileSetFlag(HctFile *pFile, u32 iSlot, u64 mask){ - int rc = hctFileGrowMappingForSlot(pFile, iSlot); - if( rc==SQLITE_OK ){ - HctMapping *pMapping = pFile->pMapping; - while( 1 ){ - u64 iVal = hctFilePagemapGet(pMapping, iSlot); - if( hctFilePagemapSet(pFile, iSlot, iVal, iVal | mask) ) break; - } - } - return rc; -} - -/* -** Clear the flags in mask within page-map slot iSlot. -*/ -static int hctFileClearFlag(HctFile *pFile, u32 iSlot, u64 mask){ - int rc = hctFileGrowMappingForSlot(pFile, iSlot); - if( rc==SQLITE_OK ){ - HctMapping *pMapping = pFile->pMapping; - while( 1 ){ - u64 iVal = hctFilePagemapGet(pMapping, iSlot); - if( hctFilePagemapSet(pFile, iSlot, iVal, iVal & ~mask) ) break; - } - } - return rc; -} - - -SQLITE_PRIVATE int sqlite3HctFileRootFree(HctFile *pFile, u32 iRoot){ - /* TODO - do something with freed root-page */ - return SQLITE_OK; -} - -SQLITE_PRIVATE int sqlite3HctFilePageClearIsRoot(HctFile *pFile, u32 iRoot){ - return hctFileClearFlag(pFile, iRoot, HCT_PMF_LOGICAL_IS_ROOT); -} -SQLITE_PRIVATE int sqlite3HctFilePageClearInUse(HctFile *pFile, u32 iPg, int bLogic){ - u64 flag = bLogic ? HCT_PMF_LOGICAL_IN_USE : HCT_PMF_PHYSICAL_IN_USE; - return hctFileClearFlag(pFile, iPg, flag); -} - -SQLITE_PRIVATE int sqlite3HctFileTreeFree(HctFile *pFile, u32 iRoot, int bImmediate){ - u64 iTid = bImmediate ? 0 : pFile->iCurrentTid; - return sqlite3HctPManFreeTree(pFile->pPManClient, pFile, iRoot, iTid); -} - -static int hctFilePagemapGetGrow(HctFile *pFile, u32 iPg, u64 *piVal){ - int rc = hctFileGrowMapping(pFile, 1+(iPg>>pFile->pMapping->mapShift)); - if( rc==SQLITE_OK ){ - *piVal = hctFilePagemapGet(pFile->pMapping, iPg); - } - return rc; -} - -/* -** Obtain the lower 32-bits of the value currently stored in slot iSlot. -*/ -static int hctFilePagemapGetGrow32(HctFile *pFile, u32 iSlot, u32 *piVal){ - int rc; - u64 val = 0; - rc = hctFilePagemapGetGrow(pFile, iSlot, &val); - *piVal = (u32)(val & 0xFFFFFFFF); - return rc; -} - - -static int hctFilePagemapPtr(HctFile *pFile, u32 iPg, u8 **paData){ - int rc = hctFileGrowMapping(pFile, 1+(iPg>>pFile->pMapping->mapShift)); - if( rc==SQLITE_OK ){ - *paData = hctPagePtr(pFile->pMapping, iPg); - } - return rc; -} - -SQLITE_PRIVATE int sqlite3HctFilePageGet(HctFile *pFile, u32 iPg, HctFilePage *pPg){ - int rc; - assert( iPg!=0 ); - memset(pPg, 0, sizeof(*pPg)); - pPg->pFile = pFile; - pPg->iPg = iPg; - rc = hctFilePagemapGetGrow32(pFile, iPg, &pPg->iOldPg); - if( rc==SQLITE_OK ){ - u32 iPhys = pPg->iOldPg; - assert( iPhys!=0 ); - rc = hctFilePagemapPtr(pFile, iPhys, &pPg->aOld); - } - return rc; -} - -SQLITE_PRIVATE u32 sqlite3HctFilePageMapping(HctFile *pFile, u32 iLogical, int *pbEvicted){ - u64 val = hctFilePagemapGet(pFile->pMapping, iLogical); - *pbEvicted = (val & HCT_PMF_LOGICAL_EVICTED) ? 1 : 0; - return (u32)(val & 0xFFFFFFFF); -} - -/* -** Obtain a reference to physical page iPg. -*/ -SQLITE_PRIVATE int sqlite3HctFilePageGetPhysical(HctFile *pFile, u32 iPg, HctFilePage *pPg){ - u32 iVal; - int rc; - assert( iPg!=0 ); - memset(pPg, 0, sizeof(*pPg)); - rc = hctFilePagemapGetGrow32(pFile, iPg, &iVal); - if( rc==SQLITE_OK ){ - pPg->iOldPg = iPg; - pPg->aOld = (u8*)hctPagePtr(pFile->pMapping, iPg); - } - return rc; -} - -static u32 hctFileAllocPg(int *pRc, HctFile *pFile, int bLogical){ - int rc = *pRc; - u32 iRet = 0; - - if( bLogical==0 ) pFile->nPageAlloc++; - iRet = sqlite3HctPManAllocPg(&rc, pFile->pPManClient, pFile, bLogical); - if( rc==SQLITE_OK ){ - rc = hctFileGrowMappingForSlot(pFile, iRet); - if( rc!=SQLITE_OK ){ - /* TODO: Something about this resource leak */ - iRet = 0; - } - } - - *pRc = rc; - return iRet; -} - -/* -** This function makes the page object pPg writable if it is not already -** so. Specifically, it allocates a new physical page and sets the -** following variables accordingly: -** -** HctFilePage.iNewPg -** HctFilePage.aNew -** -** The PHYSICAL_IN_USE flag is set on the new physical page allocated -** here. -*/ -static void hctFilePageMakeWritable(int *pRc, HctFilePage *pPg){ - if( pPg->aNew==0 ){ - HctFile *pFile = pPg->pFile; - u32 iNewPg = hctFileAllocPg(pRc, pFile, 0); - if( iNewPg ){ - hctFileSetFlag(pPg->pFile, iNewPg, HCT_PMF_PHYSICAL_IN_USE); - pPg->iNewPg = iNewPg; - - if( pFile->pServer->bReadOnlyMap ){ - pPg->aNew = (u8*)sqlite3_malloc(pFile->szPage); - /* todo: handle oom here */ - }else{ - pPg->aNew = (u8*)hctPagePtr(pPg->pFile->pMapping, iNewPg); - } - } - } -} - - -#if 0 -static void debug_printf(const char *zFmt, ...){ - va_list ap; - va_start(ap, zFmt); - vprintf(zFmt, ap); - va_end(ap); -} - -static void debug_slot_value(HctFile *pFile, u32 iSlot){ - u64 iVal = hctFilePagemapGet(pFile->pMapping, iSlot); - printf("[flags=%02x val=%lld]", (u32)(iVal>>56), iVal & HCT_PAGEMAP_VMASK); -} - -#define DEBUG_PAGE_MUTEX_ENTER(pPg) \ - sqlite3_mutex_enter(pPg->pFile->pServer->pMutex) - -#define DEBUG_PAGE_MUTEX_LEAVE(pPg) \ - fflush(stdout); sqlite3_mutex_leave(pPg->pFile->pServer->pMutex) - -#define DEBUG_PRINTF(...) debug_printf(__VA_ARGS__) -#define DEBUG_SLOT_VALUE(pFile, iSlot) debug_slot_value(pFile, iSlot) - -SQLITE_PRIVATE void sqlite3HctFileDebugPrint(HctFile *pFile, const char *zFmt, ...){ - va_list ap; - sqlite3_mutex_enter(pFile->pServer->pMutex); - printf("f=%d: ", pFile->iFileId); - va_start(ap, zFmt); - vprintf(zFmt, ap); - va_end(ap); - sqlite3_mutex_leave(pFile->pServer->pMutex); -} - -#else -# define DEBUG_PAGE_MUTEX_ENTER(x) -# define DEBUG_PAGE_MUTEX_LEAVE(x) -# define DEBUG_PRINTF(...) -# define DEBUG_SLOT_VALUE(x,y) -SQLITE_PRIVATE void sqlite3HctFileDebugPrint(HctFile *pFile, const char *zFmt, ...){ } -#endif - -void hctFileFreePg( - int *pRc, - HctFile *pFile, - i64 iTid, /* Associated TID value */ - u32 iPg, /* Page number */ - int bLogical /* True for logical, false for physical */ -){ - if( pFile->eInitState>=HCT_INIT_RECOVER1 ){ - sqlite3HctPManFreePg(pRc, pFile->pPManClient, iTid, iPg, bLogical); - } -} - - -static int hctFilePageFlush(HctFilePage *pPg){ - int rc = SQLITE_OK; - if( pPg->aNew ){ - u32 iOld = pPg->iOldPg; - - DEBUG_PAGE_MUTEX_ENTER(pPg); - DEBUG_PRINTF("f=%d: Flushing page %d orig=", pPg->pFile->iFileId, pPg->iPg); - DEBUG_SLOT_VALUE(pPg->pFile, pPg->iPg); - DEBUG_PRINTF(" (ioldpg=%d) (inewpg=%d)", pPg->iOldPg, pPg->iNewPg); - - DEBUG_PRINTF("\n"); - DEBUG_PAGE_MUTEX_LEAVE(pPg); - - if( pPg->pFile->pServer->bReadOnlyMap ){ - rc = hctPageWriteToDisk(pPg->pFile->pServer, pPg->iNewPg, pPg->aNew); - } - - if( rc==SQLITE_OK ){ - if( !hctFilePagemapSetLogical(pPg->pFile, pPg->iPg, iOld, pPg->iNewPg) ){ - rc = SQLITE_LOCKED_ERR(pPg->iPg, "flush"); - }else{ - if( iOld ){ - u64 iTid = pPg->pFile->iCurrentTid; - hctFileFreePg(&rc, pPg->pFile, iTid, iOld, 0); - hctFileClearFlag(pPg->pFile, iOld, HCT_PMF_PHYSICAL_IN_USE); - } - pPg->iOldPg = pPg->iNewPg; - if( pPg->pFile->pServer->bReadOnlyMap ){ - sqlite3_free(pPg->aNew); - pPg->aOld = hctPagePtr(pPg->pFile->pMapping, pPg->iOldPg); - }else{ - pPg->aOld = pPg->aNew; - } - pPg->aNew = 0; - pPg->iNewPg = 0; - } - } - - DEBUG_PAGE_MUTEX_ENTER(pPg); - DEBUG_PRINTF("f=%d:", pPg->pFile->iFileId); - - DEBUG_PRINTF(" rc=%d final=", rc); - DEBUG_SLOT_VALUE(pPg->pFile, pPg->iPg); - DEBUG_PRINTF("%s\n", rc==SQLITE_LOCKED ? " SQLITE_LOCKED" : ""); - DEBUG_PAGE_MUTEX_LEAVE(pPg); - } - return rc; -} - -SQLITE_PRIVATE int sqlite3HctFilePageCommit(HctFilePage *pPg){ - assert( pPg->iPg ); - return hctFilePageFlush(pPg); -} - -SQLITE_PRIVATE int sqlite3HctFilePageEvict(HctFilePage *pPg, int bIrrevocable){ - int ret; - - DEBUG_PAGE_MUTEX_ENTER(pPg); - DEBUG_PRINTF("f=%d: Evicting page %d (irrecocable=%d) orig=", - pPg->pFile->iFileId, pPg->iPg, bIrrevocable - ); - DEBUG_SLOT_VALUE(pPg->pFile, pPg->iPg); - - ret = hctFileSetEvicted(pPg->pFile, pPg->iPg, pPg->iOldPg, bIrrevocable); - ret = (ret ? SQLITE_OK : SQLITE_LOCKED_ERR(pPg->iPg, "evict")); - - DEBUG_PRINTF(" rc=%d final=", ret); - DEBUG_SLOT_VALUE(pPg->pFile, pPg->iPg); - DEBUG_PRINTF("%s\n", ret==SQLITE_LOCKED ? " SQLITE_LOCKED" : ""); - DEBUG_PAGE_MUTEX_LEAVE(pPg); - return ret; -} - -SQLITE_PRIVATE void sqlite3HctFilePageUnevict(HctFilePage *pPg){ - DEBUG_PAGE_MUTEX_ENTER(pPg); - DEBUG_PRINTF("f=%d: Unevicting page %d orig=", pPg->pFile->iFileId, pPg->iPg); - DEBUG_SLOT_VALUE(pPg->pFile, pPg->iPg); - - hctFileClearEvicted(pPg->pFile, pPg->iPg); - - DEBUG_PRINTF(" final="); - DEBUG_SLOT_VALUE(pPg->pFile, pPg->iPg); - DEBUG_PRINTF("\n"); - DEBUG_PAGE_MUTEX_LEAVE(pPg); -} - -SQLITE_PRIVATE int sqlite3HctFilePageIsEvicted(HctFile *pFile, u32 iPgno){ - u64 val; - int rc = hctFilePagemapGetGrow(pFile, iPgno, &val); - return (rc || (val & HCT_PMF_LOGICAL_EVICTED)!=0); -} - -SQLITE_PRIVATE int sqlite3HctFilePageIsFree(HctFile *pFile, u32 iPgno, int bLogical){ - u64 iVal = hctFilePagemapGet(pFile->pMapping, iPgno); - u64 mask = (bLogical ? HCT_PMF_LOGICAL_IN_USE : HCT_PMF_PHYSICAL_IN_USE); - return (iVal & mask) ? 0 : 1; -} - -SQLITE_PRIVATE int sqlite3HctFilePageRelease(HctFilePage *pPg){ - int rc = SQLITE_OK; - if( pPg->iPg ){ - rc = hctFilePageFlush(pPg); - }else if( pPg->aNew && pPg->pFile->pServer->bReadOnlyMap ){ - rc = hctPageWriteToDisk(pPg->pFile->pServer, pPg->iNewPg, pPg->aNew); - sqlite3_free(pPg->aNew); - } - memset(pPg, 0, sizeof(*pPg)); - return rc; -} - - - -/* -** Allocate a new physical page and set (*pPg) to refer to it. The new -** physical page number is available in HctFilePage.iNewPg. -*/ -SQLITE_PRIVATE int sqlite3HctFilePageNewPhysical(HctFile *pFile, HctFilePage *pPg){ - int rc = SQLITE_OK; - memset(pPg, 0, sizeof(*pPg)); - pPg->pFile = pFile; - hctFilePageMakeWritable(&rc, pPg); - return rc; -} - -/* -** Allocate a new logical page. If parameter iPg is zero, then a new -** logical page number is allocated. Otherwise, it must be a logical page -** number obtained by an earlier call to sqlite3HctFileRootPgno(). -*/ -SQLITE_PRIVATE int sqlite3HctFilePageNew(HctFile *pFile, HctFilePage *pPg){ - int rc = SQLITE_OK; /* Return code */ - u32 iLPg = hctFileAllocPg(&rc, pFile, 1); - if( rc==SQLITE_OK ){ - memset(pPg, 0, sizeof(*pPg)); - pPg->pFile = pFile; - pPg->iPg = iLPg; - hctFilePagemapZeroValue(pFile, iLPg); - hctFilePageMakeWritable(&rc, pPg); - } - - return rc; -} - -/* -** Allocate a new logical root page number. -*/ -SQLITE_PRIVATE int sqlite3HctFileRootPgno(HctFile *pFile, u32 *piRoot){ - int rc = SQLITE_OK; - u32 iRoot = hctFileAllocPg(&rc, pFile, 1); - if( rc==SQLITE_OK ){ - hctFilePagemapZeroValue(pFile, iRoot); - *piRoot = iRoot; - } - return rc; -} - -/* -** Parameter iRoot is a root page number previously obtained from -** sqlite3HctFileRootPgno(). This function allocates a physical -** page to go with the logical one. -*/ -SQLITE_PRIVATE int sqlite3HctFileRootNew(HctFile *pFile, u32 iRoot, HctFilePage *pPg){ - int rc = SQLITE_OK; /* Return code */ - - memset(pPg, 0, sizeof(*pPg)); - pPg->pFile = pFile; - pPg->iPg = iRoot; - hctFilePageMakeWritable(&rc, pPg); - - /* Set the LOGICAL_IN_USE and LOGICAL_IS_ROOT flags on page iRoot. At - ** the same time, set the mapping to 0. Take care not to clear the - ** PHYSICAL_IN_USE flag while doing so, in case there is a physical - ** page with page number iRoot currently in use somewhere. */ - while( rc==SQLITE_OK ){ - u64 i1 = hctFilePagemapGet(pFile->pMapping, iRoot); - u64 i2 = (i1 & HCT_PMF_PHYSICAL_IN_USE); - i2 |= (HCT_PMF_LOGICAL_IS_ROOT|HCT_PMF_LOGICAL_IN_USE); - if( hctFilePagemapSet(pFile, iRoot, i1, i2) ) break; - } - - return rc; -} - -SQLITE_PRIVATE void sqlite3HctFilePageUnwrite(HctFilePage *pPg){ - int rc = SQLITE_OK; - if( pPg->aNew ){ - hctFileClearFlag(pPg->pFile, pPg->iNewPg, HCT_PMF_PHYSICAL_IN_USE); - hctFileFreePg(&rc, pPg->pFile, 0, pPg->iNewPg, 0); - if( pPg->pFile->pServer->bReadOnlyMap ){ - sqlite3_free(pPg->aNew); - } - pPg->iNewPg = 0; - pPg->aNew = 0; - if( pPg->iOldPg==0 ){ - assert( pPg->aOld==0 ); - hctFileFreePg(&rc, pPg->pFile, 0, pPg->iPg, 1); - pPg->iPg = 0; - } - } -} - -SQLITE_PRIVATE int sqlite3HctFilePageWrite(HctFilePage *pPg){ - int rc = SQLITE_OK; /* Return code */ - hctFilePageMakeWritable(&rc, pPg); - return rc; -} - -SQLITE_PRIVATE u64 sqlite3HctFileAllocateTransid(HctFile *pFile){ - u64 iVal = hctFilePagemapIncr(pFile, HCT_PAGEMAP_TRANSID_EOF, 1); - pFile->iCurrentTid = (iVal & HCT_TID_MASK); - return pFile->iCurrentTid; -} -SQLITE_PRIVATE u64 sqlite3HctFileAllocateCID(HctFile *pFile, int nWrite){ - assert( nWrite>0 ); - return hctFileAtomicIncr(pFile, &pFile->pServer->iCommitId, nWrite); -} - -SQLITE_PRIVATE void sqlite3HctFileSetCID(HctFile *pFile, u64 iVal){ - HctAtomicStore(&pFile->pServer->iCommitId, iVal); -} - -SQLITE_PRIVATE u64 sqlite3HctFileIncrWriteCount(HctFile *pFile, int nIncr){ - return hctFileAtomicIncr(pFile, &pFile->pServer->nWriteCount, nIncr); -} - -SQLITE_PRIVATE u64 sqlite3HctFileGetSnapshotid(HctFile *pFile){ - return HctAtomicLoad( &pFile->pServer->iCommitId ); -} - -SQLITE_PRIVATE int sqlite3HctFilePgsz(HctFile *pFile){ - return pFile->szPage; -} - -SQLITE_PRIVATE void sqlite3HctFileSetJrnlPtr( - HctFile *pFile, - void *pPtr, - void(*xDel)(void*) -){ - assert( pFile->pServer->pJrnlPtr==0 ); - assert( pFile->pServer->xJrnlDel==0 ); - pFile->pServer->pJrnlPtr = pPtr; - pFile->pServer->xJrnlDel = xDel; -} - -SQLITE_PRIVATE void *sqlite3HctFileGetJrnlPtr(HctFile *pFile){ - return pFile->pServer->pJrnlPtr; -} - -/* -** Return the current "safe" TID value. -*/ -SQLITE_PRIVATE u64 sqlite3HctFileSafeTID(HctFile *pFile){ - return sqlite3HctTMapSafeTID(pFile->pTMapClient); -} - -/* -** Allocate a block of nPg physical or logical page ids from the -** end of the current range. -*/ -SQLITE_PRIVATE u32 sqlite3HctFilePageRangeAlloc(HctFile *pFile, int bLogical, int nPg){ - u32 iSlot = HCT_PAGEMAP_PHYSICAL_EOF - bLogical; - u64 iNew = 0; - - assert( bLogical==0 || iSlot==HCT_PAGEMAP_LOGICAL_EOF ); - assert( bLogical!=0 || iSlot==HCT_PAGEMAP_PHYSICAL_EOF ); - - /* Increment the selected slot by nPg. The returned value, iNew, is the - ** new value of the slot - the last page in the range allocated. */ - iNew = hctFilePagemapIncr(pFile, iSlot, nPg); - - /* Return the first page number in the range of nPg allocated */ - return (iNew+1 - nPg); -} - -/* -** This function is called by the upper layer to clear the: -** -** * LOGICAL_IN_USE flag on the specified page id, and the -** * PHYSICAL_IN_USE flag on currently mapped physical page id. -** -** If parameter bReuseNow is true, then the page was never properly linked -** into a list, and so the logical and physical page ids can be reused -** immediately. Otherwise, they are handled as if freed by the current -** transaction. -*/ -SQLITE_PRIVATE int sqlite3HctFileClearInUse(HctFilePage *pPg, int bReuseNow){ - int rc = SQLITE_OK; - if( pPg->pFile ){ - u64 iTid = pPg->pFile->iCurrentTid; - u32 iPhysPg = pPg->iOldPg; - - assert( pPg->iPg>0 ); - assert( pPg->iOldPg>0 ); - -#ifdef SQLITE_DEBUG - if( bReuseNow==0 ){ - u64 iVal = hctFilePagemapGet(pPg->pFile->pMapping, pPg->iPg); - assert( iVal & HCT_PMF_LOGICAL_EVICTED ); - } -#endif - - hctFileClearFlag(pPg->pFile, pPg->iPg, HCT_PMF_LOGICAL_IN_USE); - hctFileClearFlag(pPg->pFile, iPhysPg, HCT_PMF_PHYSICAL_IN_USE); - hctFileFreePg(&rc, pPg->pFile, iTid, pPg->iPg, 1); - hctFileFreePg(&rc, pPg->pFile, iTid, iPhysPg, 0); - } - - return rc; -} - -SQLITE_PRIVATE int sqlite3HctFileClearPhysInUse(HctFile *pFile, u32 pgno, int bReuseNow){ - u64 iTid = pFile->iCurrentTid; - int rc = SQLITE_OK; - - hctFileClearFlag(pFile, pgno, HCT_PMF_PHYSICAL_IN_USE); - hctFileFreePg(&rc, pFile, iTid, pgno, 0); - return rc; -} - -SQLITE_PRIVATE char *sqlite3HctFileLogFile(HctFile *pFile){ - char *zRet = 0; - HctFileServer *pServer = pFile->pServer; - sqlite3_mutex_enter(pServer->pMutex); - zRet = sqlite3_mprintf("%s-log-%d", pServer->zPath, pFile->iFileId); - sqlite3_mutex_leave(pServer->pMutex); - return zRet; -} - -SQLITE_PRIVATE int sqlite3HctFileStartRecovery(HctFile *pFile, int iStage){ - int bRet = 0; - if( pFile->eInitState==iStage ){ - HctFileServer *pServer = pFile->pServer; - sqlite3_mutex_enter(pServer->pMutex); - if( pServer->eInitState==iStage ){ - bRet = 1; - }else{ - pFile->eInitState = pServer->eInitState; - sqlite3_mutex_leave(pServer->pMutex); - } - } - return bRet; -} - -SQLITE_PRIVATE int sqlite3HctFileFinishRecovery(HctFile *pFile, int iStage, int rc){ - HctFileServer *pServer = pFile->pServer; - if( rc==SQLITE_OK ){ - pFile->eInitState = iStage+1; - pServer->eInitState = iStage+1; - } - sqlite3HctPManClientHandoff(pFile->pPManClient); - sqlite3_mutex_leave(pFile->pServer->pMutex); - return rc; -} - -SQLITE_PRIVATE int sqlite3HctFileRecoverFreelists( - HctFile *pFile, /* File to recover freelists for */ - int nRoot, i64 *aRoot, /* Array of root page numbers */ - int nPhys, i64 *aPhys /* Sorted array of phys. pages to preserve */ -){ - int rc = SQLITE_OK; - HctFileServer *pServer = pFile->pServer; - HctPManServer *pPManServer = pServer->pPManServer; - HctMapping *pMapping = pServer->pMapping; - u64 iSafeTid = hctFilePagemapGet(pMapping, HCT_PAGEMAP_TRANSID_EOF); - u64 nPg1 = hctFilePagemapGet(pMapping, HCT_PAGEMAP_PHYSICAL_EOF); - u64 nPg2 = hctFilePagemapGet(pMapping, HCT_PAGEMAP_LOGICAL_EOF); - u32 iPg; - u32 nPg; - u32 iPhysOff = ((HCT_HEADER_PAGESIZE*2)+pServer->szPage-1)/pServer->szPage; - - int iPhys = 0; - - nPg1 = nPg1 & HCT_PAGEMAP_VMASK; - nPg2 = nPg2 & HCT_PAGEMAP_VMASK; - - /* TODO: Really - page-manager must be empty at this point. Should assert() - ** that instead of making this call. */ - sqlite3HctPManServerReset(pPManServer); - - nPg = MAX((nPg1 & 0xFFFFFFFF), (nPg2 & 0xFFFFFFFF)); - for(iPg=1; iPg<=nPg; iPg++){ - u64 iVal = hctFilePagemapGetSafe(pMapping, iPg); - - if( (iVal & HCT_PMF_LOGICAL_IS_ROOT) && iPg>=3 ){ - int ii; - for(ii=0; iiiPhysOff) - ){ - /* Check if page iPg is one that must be preserved. */ - u64 iTid = iSafeTid; - while( iPhys=HCT_FIRST_LOGICAL - ){ - sqlite3HctPManServerInit(&rc, pPManServer, iSafeTid, iPg, 1); - } - } - - return rc; -} - -SQLITE_PRIVATE int sqlite3HctFileFindLogs( - HctFile *pFile, - void *pCtx, - int (*xLog)(void*, const char*) -){ - return hctFileFindLogs(pFile->pServer, pCtx, xLog); -} - -SQLITE_PRIVATE int sqlite3HctFileRootArray( - HctFile *pFile, - u32 **paiRoot, - int *pnRoot -){ - int nAlloc = 0; - int nRoot = 0; - u32 *aRoot = 0; - u32 nLogic = 0; - int ii; - int rc; - - rc = hctFilePagemapGetGrow32(pFile, HCT_PAGEMAP_LOGICAL_EOF, &nLogic); - for(ii=1; rc==SQLITE_OK && ii<=nLogic; ii++){ - u64 val; - rc = hctFilePagemapGetGrow(pFile, ii, &val); - if( rc==SQLITE_OK && (val & HCT_PMF_LOGICAL_IS_ROOT) ){ - if( nRoot>=nAlloc ){ - int nNew = (nAlloc ? nAlloc*2 : 16); - u32 *aNew = (u32*)sqlite3_realloc(aRoot, nNew*sizeof(u32)); - if( aNew==0 ){ - rc = SQLITE_NOMEM_BKPT; - }else{ - nAlloc = nNew; - aRoot = aNew; - } - } - - if( rc==SQLITE_OK ){ - aRoot[nRoot++] = ii; - } - } - } - - if( rc!=SQLITE_OK ){ - sqlite3_free(aRoot); - aRoot = 0; - nRoot = 0; - } - *paiRoot = aRoot; - *pnRoot = nRoot; - return rc; -} - -SQLITE_PRIVATE u64 sqlite3HctFileWriteCount(HctFile *pFile){ - return pFile->nPageAlloc; -} - -SQLITE_PRIVATE void sqlite3HctFileICArrays( - HctFile *pFile, - u8 **paLogic, u32 *pnLogic, - u8 **paPhys, u32 *pnPhys -){ - int rc = SQLITE_OK; - u32 nLogic = 0; - u32 nPhys = 0; - u8 *aLogic = 0; - u8 *aPhys = 0; - u32 ii; - - rc = hctFilePagemapGetGrow32(pFile, HCT_PAGEMAP_LOGICAL_EOF, &nLogic); - if( rc==SQLITE_OK ){ - rc = hctFilePagemapGetGrow32(pFile, HCT_PAGEMAP_PHYSICAL_EOF, &nPhys); - } - - if( rc==SQLITE_OK ){ - aLogic = (u8*)sqlite3HctMalloc(&rc, (nLogic + nPhys) * sizeof(u8)); - if( aLogic ){ - aPhys = &aLogic[nLogic]; - } - } - - for(ii=1; ii<=nLogic && rc==SQLITE_OK; ii++){ - u64 val; - rc = hctFilePagemapGetGrow(pFile, ii, &val); - if( rc==SQLITE_OK && (val & HCT_PMF_LOGICAL_IN_USE)==0 ){ - aLogic[ii-1] = 1; - } - } - for(ii=1; ii<=nPhys && rc==SQLITE_OK; ii++){ - u64 val; - rc = hctFilePagemapGetGrow(pFile, ii, &val); - if( rc==SQLITE_OK && (val & HCT_PMF_PHYSICAL_IN_USE)==0 ){ - aPhys[ii-1] = 1; - } - } - - if( rc!=SQLITE_OK ){ - sqlite3_free(aLogic); - aLogic = aPhys = 0; - nLogic = nPhys = 0; - } - - *paLogic = aLogic; - *paPhys = aPhys; - *pnLogic = nLogic; - *pnPhys = nPhys; -} - -SQLITE_PRIVATE i64 sqlite3HctFileStats(sqlite3 *db, int iStat, const char **pzStat){ - i64 iVal = -1; - HctFile *pFile = sqlite3HctDbFile(sqlite3HctDbFind(db, 0)); - - switch( iStat ){ - case 0: - *pzStat = "cas_attempt"; - iVal = pFile->stats.nCasAttempt; - break; - case 1: - *pzStat = "cas_fail"; - iVal = pFile->stats.nCasFail; - break; - case 2: - *pzStat = "incr_attempt"; - iVal = pFile->stats.nIncrAttempt; - break; - case 3: - *pzStat = "incr_fail"; - iVal = pFile->stats.nIncrFail; - break; - case 4: - *pzStat = "mutex_attempt"; - iVal = pFile->stats.nMutex; - break; - case 5: - *pzStat = "mutex_block"; - iVal = pFile->stats.nMutexBlock; - break; - default: - break; - } - - return iVal; -} - -SQLITE_PRIVATE int sqlite3HctFileNFile(HctFile *pFile, int *pbFixed){ - int iRet = 0; - HctFileServer *p = pFile->pServer; - sqlite3_mutex_enter(p->pMutex); - iRet = p->nFdDb; - *pbFixed = (p->szPage>0); - sqlite3_mutex_leave(p->pMutex); - return iRet; -} - -/************************************************************************* -** Beginning of vtab implemetation. -*************************************************************************/ - -#define HCT_PGMAP_SCHEMA \ -" CREATE TABLE hct_pgmap(" \ -" slot INTEGER," \ -" value INTEGER," \ -" comment TEXT," \ -" physical_in_use BOOLEAN," \ -" logical_in_use BOOLEAN," \ -" logical_evicted BOOLEAN," \ -" logical_irrevicted BOOLEAN,"\ -" logical_is_root BOOLEAN" \ -" );" - -/* -** Virtual table type for "hctpgmap". -*/ -typedef struct pgmap_vtab pgmap_vtab; -struct pgmap_vtab { - sqlite3_vtab base; /* Base class - must be first */ - sqlite3 *db; -}; - -/* -** Virtual cursor type for "hctpgmap". -*/ -typedef struct pgmap_cursor pgmap_cursor; -struct pgmap_cursor { - sqlite3_vtab_cursor base; /* Base class - must be first */ - HctFile *pFile; /* Database to report on */ - u64 iMaxSlotno; /* Maximum page number for this scan */ - u64 slotno; /* The page-number/rowid value */ - u64 iVal; /* Value read from pagemap */ -}; - -/* -** The pgmapConnect() method is invoked to create a new -** template virtual table. -** -** Think of this routine as the constructor for pgmap_vtab objects. -** -** All this routine needs to do is: -** -** (1) Allocate the pgmap_vtab object and initialize all fields. -** -** (2) Tell SQLite (via the sqlite3_declare_vtab() interface) what the -** result set of queries against the virtual table will look like. -*/ -static int pgmapConnect( - sqlite3 *db, - void *pAux, - int argc, const char *const*argv, - sqlite3_vtab **ppVtab, - char **pzErr -){ - pgmap_vtab *pNew; - int rc; - - rc = sqlite3_declare_vtab(db, HCT_PGMAP_SCHEMA); - pNew = (pgmap_vtab*)sqlite3HctMalloc(&rc, sizeof(*pNew)); - if( pNew ){ - pNew->db = db; - } - - *ppVtab = (sqlite3_vtab*)pNew; - return rc; -} - -/* -** This method is the destructor for pgmap_vtab objects. -*/ -static int pgmapDisconnect(sqlite3_vtab *pVtab){ - pgmap_vtab *p = (pgmap_vtab*)pVtab; - sqlite3_free(p); - return SQLITE_OK; -} - -/* -** Constructor for a new pgmap_cursor object. -*/ -static int pgmapOpen(sqlite3_vtab *p, sqlite3_vtab_cursor **ppCursor){ - pgmap_cursor *pCur; - pCur = sqlite3MallocZero(sizeof(*pCur)); - if( pCur==0 ) return SQLITE_NOMEM; - *ppCursor = &pCur->base; - return SQLITE_OK; -} - -/* -** Destructor for a pgmap_cursor. -*/ -static int pgmapClose(sqlite3_vtab_cursor *cur){ - pgmap_cursor *pCur = (pgmap_cursor*)cur; - sqlite3_free(pCur); - return SQLITE_OK; -} - -/* -** Return TRUE if the cursor has been moved off of the last -** row of output. -*/ -static int pgmapEof(sqlite3_vtab_cursor *cur){ - pgmap_cursor *pCur = (pgmap_cursor*)cur; - return pCur->slotno>pCur->iMaxSlotno; -} - -static int pgmapLoadSlot(pgmap_cursor *pCur){ - return hctFilePagemapGetGrow( - pCur->pFile, pCur->slotno, &pCur->iVal - ); -} - -/* -** Advance a hctdb_cursor to its next row of output. -*/ -static int pgmapNext(sqlite3_vtab_cursor *cur){ - pgmap_cursor *pCur = (pgmap_cursor*)cur; - pCur->slotno++; - return pgmapEof(cur) ? SQLITE_OK : pgmapLoadSlot(pCur); -} - -static void pgmapGetComment(sqlite3_context *ctx, i64 iSlot){ - const char *zText = 0; - - switch( iSlot ){ - case HCT_ROOTPAGE_SCHEMA: - zText = "ROOTPAGE_SCHEMA"; - break; - case HCT_ROOTPAGE_META: - zText = "ROOTPAGE_META"; - break; - case HCT_PAGEMAP_LOGICAL_EOF: - zText = "LOGICAL_EOF"; - break; - case HCT_PAGEMAP_PHYSICAL_EOF: - zText = "PHYSICAL_EOF"; - break; - case HCT_PAGEMAP_TRANSID_EOF: - zText = "TRANSID_EOF"; - break; - } - - if( zText ){ - sqlite3_result_text(ctx, zText, -1, SQLITE_TRANSIENT); - } -} - -/* -** Return values of columns for the row at which the pgmap_cursor -** is currently pointing. -*/ -static int pgmapColumn( - sqlite3_vtab_cursor *cur, /* The cursor */ - sqlite3_context *ctx, /* First argument to sqlite3_result_...() */ - int i /* Which column to return */ -){ - pgmap_cursor *pCur = (pgmap_cursor*)cur; - switch( i ){ - case 0: { /* slotno */ - sqlite3_result_int64(ctx, pCur->slotno); - break; - } - case 1: { /* pgno */ - sqlite3_result_int64(ctx, (pCur->iVal & 0xFFFFFFFF)); - break; - } - case 2: { /* pgno */ - pgmapGetComment(ctx, pCur->slotno); - break; - } - case 3: { /* physical_in_use */ - sqlite3_result_int64(ctx, (pCur->iVal & HCT_PMF_PHYSICAL_IN_USE)?1:0); - break; - } - case 4: { /* logical_in_use */ - sqlite3_result_int64(ctx, (pCur->iVal & HCT_PMF_LOGICAL_IN_USE)?1:0); - break; - } - case 5: { /* logical_evicted */ - sqlite3_result_int64(ctx, (pCur->iVal & HCT_PMF_LOGICAL_EVICTED)?1:0); - break; - } - case 6: { /* logical_irrevicted */ - sqlite3_result_int64(ctx, (pCur->iVal & HCT_PMF_LOGICAL_IRREVICTED)?1:0); - break; - } - case 7: { /* logical_is_root */ - sqlite3_result_int64(ctx, (pCur->iVal & HCT_PMF_LOGICAL_IS_ROOT)?1:0); - break; - } - } - return SQLITE_OK; -} - -/* -** Return the rowid for the current row. In this implementation, the -** rowid is the same as the slotno value. -*/ -static int pgmapRowid(sqlite3_vtab_cursor *cur, sqlite_int64 *pRowid){ - pgmap_cursor *pCur = (pgmap_cursor*)cur; - *pRowid = pCur->slotno; - return SQLITE_OK; -} - -/* -** This method is called to "rewind" the pgmap_cursor object back -** to the first row of output. This method is always called at least -** once prior to any call to pgmapColumn() or pgmapRowid() or -** pgmapEof(). -*/ -static int pgmapFilter( - sqlite3_vtab_cursor *pVtabCursor, - int idxNum, const char *idxStr, - int argc, sqlite3_value **argv -){ - pgmap_cursor *pCur = (pgmap_cursor*)pVtabCursor; - pgmap_vtab *pTab = (pgmap_vtab*)(pCur->base.pVtab); - int rc; - u64 max1; - u64 max2; - - pCur->pFile = sqlite3HctDbFile(sqlite3HctDbFind(pTab->db, 0)); - pCur->slotno = 1; - max1 = hctFilePagemapGet(pCur->pFile->pMapping, HCT_PAGEMAP_PHYSICAL_EOF); - max2 = hctFilePagemapGet(pCur->pFile->pMapping, HCT_PAGEMAP_LOGICAL_EOF); - max1 &= HCT_PGNO_MASK; - max2 &= HCT_PGNO_MASK; - pCur->iMaxSlotno = max1>max2 ? max1 : max2; - rc = pgmapLoadSlot(pCur); - return rc; -} - -/* -** SQLite will invoke this method one or more times while planning a query -** that uses the virtual table. This routine needs to create -** a query plan for each invocation and compute an estimated cost for that -** plan. -*/ -static int pgmapBestIndex( - sqlite3_vtab *tab, - sqlite3_index_info *pIdxInfo -){ - pIdxInfo->estimatedCost = (double)10; - pIdxInfo->estimatedRows = 10; - return SQLITE_OK; -} - -/* -** This function is the implementation of the xUpdate callback used by -** hctpgmap virtual tables. It is invoked by SQLite each time a row is -** to be inserted, updated or deleted. -** -** A delete specifies a single argument - the rowid of the row to remove. -** -** Update and insert operations pass: -** -** 1. The "old" rowid (for an UPDATE), or NULL (for an INSERT). -** 2. The "new" rowid. -** 3. Values for each of the 6 columns. -** -** Specifically: -** -** apVal[2]: slot -** apVal[3]: value -** apVal[4]: comment -** apVal[5]: physical_in_use -** apVal[6]: logical_in_use -** apVal[7]: logical_evicted -** apVal[8]: logical_irrevicted -** apVal[9]: logical_is_root -*/ -static int pgmapUpdate( - sqlite3_vtab *pVtab, - int nVal, - sqlite3_value **apVal, - sqlite3_int64 *piRowid -){ - pgmap_vtab *p = (pgmap_vtab*)pVtab; - HctFile *pFile = sqlite3HctDbFile(sqlite3HctDbFind(p->db, 0)); - u32 iSlot = 0; - u64 val = 0; - u64 *pPtr = 0; - - i64 iValue = 0; - int bPhysicalInUse = 0; - int bLogicalInUse = 0; - int bLogicalEvicted = 0; - int bLogicalIrrevicted = 0; - int bLogicalIsRoot = 0; - - if( nVal==1 || sqlite3_value_type(apVal[0])!=SQLITE_INTEGER ){ - return SQLITE_CONSTRAINT; - } - iSlot = sqlite3_value_int64(apVal[0]); - - iValue = sqlite3_value_int64(apVal[3]); - bPhysicalInUse = sqlite3_value_int(apVal[5]); - bLogicalInUse = sqlite3_value_int(apVal[6]); - bLogicalEvicted = sqlite3_value_int(apVal[7]); - bLogicalIrrevicted = sqlite3_value_int(apVal[8]); - bLogicalIsRoot = sqlite3_value_int(apVal[9]); - - val = iValue & HCT_PAGEMAP_VMASK; - val |= (bPhysicalInUse ? HCT_PMF_PHYSICAL_IN_USE : 0); - val |= (bLogicalInUse ? HCT_PMF_LOGICAL_IN_USE : 0); - val |= (bLogicalEvicted ? HCT_PMF_LOGICAL_EVICTED : 0); - val |= (bLogicalIrrevicted ? HCT_PMF_LOGICAL_IRREVICTED : 0); - val |= (bLogicalIsRoot ? HCT_PMF_LOGICAL_IS_ROOT : 0); - - pPtr = hctPagemapPtr(pFile->pMapping, iSlot); - AtomicStore(pPtr, val); - - *piRowid = iSlot; - return SQLITE_OK; -} - -SQLITE_PRIVATE int sqlite3HctFileVtabInit(sqlite3 *db){ - static sqlite3_module pgmapModule = { - /* iVersion */ 0, - /* xCreate */ 0, - /* xConnect */ pgmapConnect, - /* xBestIndex */ pgmapBestIndex, - /* xDisconnect */ pgmapDisconnect, - /* xDestroy */ 0, - /* xOpen */ pgmapOpen, - /* xClose */ pgmapClose, - /* xFilter */ pgmapFilter, - /* xNext */ pgmapNext, - /* xEof */ pgmapEof, - /* xColumn */ pgmapColumn, - /* xRowid */ pgmapRowid, - /* xUpdate */ pgmapUpdate, - /* xBegin */ 0, - /* xSync */ 0, - /* xCommit */ 0, - /* xRollback */ 0, - /* xFindMethod */ 0, - /* xRename */ 0, - /* xSavepoint */ 0, - /* xRelease */ 0, - /* xRollbackTo */ 0, - /* xShadowName */ 0 - }; - - return sqlite3_create_module(db, "hctpgmap", &pgmapModule, 0); -} - -SQLITE_PRIVATE int sqlite3HctIoerr(int rc){ - sqlite3_log(rc, "sqlite3HctIoerr() - rc=%d errno=%d\n", rc, (int)errno); - assert( 0 ); - abort(); - return rc; -} - - -/************** End of hct_file.c ********************************************/ -/************** Begin file hct_database.c ************************************/ -/* -** 2020 October 13 -** -** The author disclaims copyright to this source code. In place of -** a legal notice, here is a blessing: -** -** May you do good and not evil. -** May you find forgiveness for yourself and forgive others. -** May you share freely, never taking more than you give. -** -************************************************************************* -*/ - -/* #include "hctInt.h" */ -/* #include "vdbeInt.h" */ -/* #include */ -/* #include */ - -typedef struct HctDatabase HctDatabase; -typedef struct HctDbIndexEntry HctDbIndexEntry; -typedef struct HctDbIndexLeaf HctDbIndexLeaf; -typedef struct HctDbIndexNode HctDbIndexNode; -typedef struct HctDbIndexNodeEntry HctDbIndexNodeEntry; -typedef struct HctDbIndexNodeHdr HctDbIndexNodeHdr; -typedef struct HctDbIntkeyEntry HctDbIntkeyEntry; -typedef struct HctDbIntkeyLeaf HctDbIntkeyLeaf; -typedef struct HctDbIntkeyNodeEntry HctDbIntkeyNodeEntry; -typedef struct HctDbIntkeyNode HctDbIntkeyNode; -typedef struct HctDbKey HctDbKey; -typedef struct HctDbLeaf HctDbLeaf; -typedef struct HctDbLeafHdr HctDbLeafHdr; -typedef struct HctDbWriter HctDbWriter; -typedef struct HctDbPageHdr HctDbPageHdr; -typedef struct HctDbHistoryFan HctDbHistoryFan; -typedef struct HctDbRangeCsr HctDbRangeCsr; - -typedef struct HctCsrIntkeyOp HctCsrIntkeyOp; -typedef struct HctCsrIndexOp HctCsrIndexOp; - -typedef struct HctDbPageArray HctDbPageArray; - -struct HctCsrIntkeyOp { - HctCsrIntkeyOp *pNextOp; - i64 iFirst; - i64 iLast; - - u32 iLogical; - u32 iPhysical; -}; - -struct HctCsrIndexOp { - HctCsrIndexOp *pNextOp; - u8 *pFirst; - int nFirst; - u8 *pLast; - int nLast; - - u32 iLogical; - u32 iPhysical; -}; - -struct CsrIntkey { - HctCsrIntkeyOp *pOpList; - HctCsrIntkeyOp *pCurrentOp; -}; -struct CsrIndex { - HctCsrIndexOp *pOpList; - HctCsrIndexOp *pCurrentOp; -}; - -struct HctDbKey { - i64 iKey; /* Integer key value */ - UnpackedRecord *pKey; /* Index key value */ - HctBuffer buf; /* Buffer for pKey data (if required) */ -}; - -/* -** eRange: -** Set to one of the HCT_RANGE_* constants defined below. -*/ -struct HctDbRangeCsr { - HctDbKey lowkey; - HctDbKey highkey; - u64 iRangeTid; /* The range TID that was followed here */ - - int eRange; /* HCT_RANGE_* constant */ - int iCell; - HctFilePage pg; -}; - -#define HCT_RANGE_FOLLOW 0 /* Follow range-pointers only */ -#define HCT_RANGE_MERGE 1 /* Merge in data + follow range-pointers */ -#define HCT_RANGE_FAN 2 /* HctDbRangeCsr.pg is a fan page */ - -#define IS_HCT_MIGRATE(pDb) (pDb->pConfig->db->bHctMigrate) - -/* -** iRoot: -** Logical root page of tree structure that this cursor is open on. -** -** pKeyInfo: -** NULL for cursors open on intkey trees, otherwise points to the -** KeyInfo used to compare keys in the open index tree. For cursors -** opened by the user, this is set when the cursor is opened within -** sqlite3HctDbCsrOpen() and never modified. -** -** pRec: -** UnpackedRecord structure suitable for use with pKeyInfo. This is -** allocated the first time it is required and then retained for -** the lifetime of the HctDbCsr structure. -** -** eDir: -** One of BTREE_DIR_NONE, BTREE_DIR_FORWARD or BTREE_DIR_REVERSE. -** -** pIntkeyOps: -*/ -struct HctDbCsr { - HctDatabase *pDb; /* Database that owns this cursor */ - u32 iRoot; /* Root page cursor is opened on */ - KeyInfo *pKeyInfo; - UnpackedRecord *pRec; - int eDir; /* Direction cursor will step after Seek() */ - int bNosnap; /* The "no-snapshot" flag */ - - u8 *aRecord; /* Record in allocated memory */ - int nRecord; /* Size of aRecord[] in bytes */ - HctBuffer rec; /* Buffer used to manage aRecord[] */ - - struct CsrIntkey intkey; - struct CsrIndex index; - HctDbCsr *pNextScanner; - - int iCell; /* Current cell within page */ - HctFilePage pg; /* Current leaf page */ - - int nRange; - int nRangeAlloc; - HctDbRangeCsr *aRange; -}; - -#define HCTDB_MAX_DIRTY (HCTDB_MAX_PAGEARRAY-2) -// #define HCTDB_MAX_DIRTY (HCTDB_STATIC_PAGEARRAY-2) -#define HCTDB_MAX_PAGEARRAY 2048 -#define HCTDB_STATIC_PAGEARRAY (8+2) - -#define HCTDB_APPEND_MODE_THRESHOLD 5 - - -#define LARGEST_TID ((((u64)1)<<56)-1) -#define HCT_TID_ROLLBACK_OVERRIDE (((u64)0x01) << 56) - - -struct HctDbPageArray { - int nPg; - HctFilePage *aPg; - HctFilePage aStatic[HCTDB_STATIC_PAGEARRAY]; - HctFilePage *aDyn; - int nDyn; -}; - -typedef struct HctDbOverflow HctDbOverflow; -typedef struct HctDbOverflowArray HctDbOverflowArray; - -struct HctDbOverflow { - u32 pgno; - int nOvfl; -}; - -struct HctDbOverflowArray { - int nEntry; - int nAlloc; - HctDbOverflow *aOvfl; -}; - -typedef struct HctDbFPKey HctDbFPKey; -struct HctDbFPKey { - i64 iKey; - u8 *aKey; - HctBuffer buf; -}; - -/* -** -** iHeight: -** The height of the list that this writer is writing to. 0 for leaves, -** 1 for the parents of leaves, etc. -** -** aWritePg/nWritePg: -** -** nWriteKey: -** Number of hctDbInsert() calls since last flush - i.e. how many have to -** be retried if we hit a CAS failure and have to redo this write operation. -** -** iWriteFpKey/aWriteFpKey: -** These two variables store the fence-post key for the peer page of -** the rightmost page in the aWritePg[] array - aWritePg[nWritePg-1]. -** For intkey tables, iWriteFpKey is the 64-bit integer key value. For -** index tables, aWriteFpKey points to a buffer containing the FP key, -** and iWriteFpKey its size in bytes. The buffer is allocated with -** sqlite3_malloc(). -** -** If there is no peer page and writing to an intkey list, iWriteFpKey -** is set to LARGEST_INT64. If writing to an index list, aWriteFpKey is -** set to NULL and iWriteFpKey to 0. -** -** discardpg: -** Pages to the right of writepg[0] that will be removed from the list -** if the CAS instruction for this write succeeds. -** -** bAppend: -** True if the writer is in append mode. -** -** bDoCleanup: -** True if hctDbInsert() has been called since the most recent -** hctDbWriterCleanup(). -*/ -struct HctDbWriter { - int iHeight; /* Height to write at (0==leaves) */ - HctDbPageArray writepg; - int nWriteKey; /* Number of new keys in writepg array */ - - int bAppend; /* Writer is in "append" mode */ - HctDbFPKey fp; /* Fence-Post key. */ - - HctDbCsr writecsr; /* Used to find target page while writing */ - HctDbPageArray discardpg; - HctFilePage fanpg; - - int bDoCleanup; - int nEvictLocked; - u32 iEvictLockedPgno; - - HctDbOverflowArray delOvfl; /* Overflow chains to free on write */ - HctDbOverflowArray insOvfl; /* Overflow chains to free on don't-write */ - - int nOverflow; - - int nMigrateKey; -}; - -/* -** This is used by the rebalance operation implemented by hctDbBalance(). -** The first step of that operation is to assemble an array of these -** structures - one for each cell that will be distributed between the -** output pages. -** -** nByte: -** Total bytes of space required by cell on new page. This includes -** the header entry and the data stored in the cell area. -** -** aEntry: -** Pointer to buffer containing cell entry. Or NULL to indicate that -** the HctDbCellSz structure corresponds to a new cell being written -** (that is not on any input page). -** -** aCell: -** Only valid if (aEntry!=0). Pointer to buffer containing leaf-page -** portion of cell. -*/ -typedef struct HctDbCellSz HctDbCellSz; -struct HctDbCellSz { - int nByte; /* Size of cell in bytes */ - u8 *aEntry; /* Buffer containing cell entry */ - u8 *aCell; /* Buffer containing cell body */ -}; - -typedef struct HctBalance HctBalance; -struct HctBalance { - u8 *aPg[3]; - int nSzAlloc; /* Allocated size of aSz[] array */ - HctDbCellSz *aSz; /* aSz[] array */ -}; - -/* -** Given the database page-size as an argument, the maximum number of cells -** that may fit on any page with variable sized entries (an index leaf or node, -** or intkey leaf page). -*/ -#define MAX_CELLS_PER_PAGE(pgsz) ((pgsz) / 8) - -/* -** This structure, an instance of which is part of each HctDatabase object, -** holds counters collected for the hctstats structure. -*/ -typedef struct HctDatabaseStats HctDatabaseStats; -struct HctDatabaseStats { - i64 nBalanceIntkey; - i64 nBalanceIndex; - i64 nBalanceSingle; - i64 nTMapLookup; - i64 nUpdateInPlace; - i64 nInternalRetry; -}; - -/* -** pScannerList: -** Linked list of cursors used by the current transaction. If this turns -** out to be a write transaction, this list is used to detect read/write -** conflicts. -** -** iJrnlWriteCid: -** This value is set within calls to sqlite3_hct_journal_write(). The CID -** of the journal entry being written to the db. -*/ -struct HctDatabase { - HctFile *pFile; - HctConfig *pConfig; - i64 nCasFail; /* Number cas-collisions so far */ - int pgsz; /* Page size in bytes */ - - u8 *aTmp; /* Temp buffer pgsz bytes in size */ - HctBalance *pBalance; /* Space for hctDbBalance() */ - - HctDbCsr *pScannerList; - - u64 iJrnlWriteCid; - - HctTMap *pTmap; /* Transaction map (non-NULL if trans open) */ - u64 iSnapshotId; /* Snapshot id for reading */ - u64 iLocalMinTid; - HctDbWriter pa; - HctDbCsr rbackcsr; /* Used to find old values during rollback */ - u64 iTid; /* Transaction id for writing */ - u64 nWriteCount; /* Write-count at start of commit */ - - int eMode; /* HCT_MODE_XXX constant */ - int bConcurrent; /* Collect validation information */ - - int (*xSavePhysical)(void*, i64); - void *pSavePhysical; - - HctDatabaseStats stats; -}; - -/* -** Values for HctDatabase.eMode. -*/ -#define HCT_MODE_NORMAL 0 -#define HCT_MODE_ROLLBACK 1 -#define HCT_MODE_VALIDATE 3 - - -/* -** 8-byte database page header. Described in fileformat.wiki. -*/ -struct HctDbPageHdr { - u8 hdrFlags; - u8 nHeight; /* 0 for leaves, 1 for parents etc. */ - u16 nEntry; - u32 iPeerPg; -}; - -/* -** Page types. These are the values that may appear in the page-type -** field of a page header. -*/ -#define HCT_PAGETYPE_INTKEY 0x01 -#define HCT_PAGETYPE_INDEX 0x03 -#define HCT_PAGETYPE_OVERFLOW 0x05 -#define HCT_PAGETYPE_HISTORY 0x06 - -#define HCT_PAGETYPE_MASK 0x07 - -/* -** Page types may be ORed with the following: -*/ -#define HCT_PAGETYPE_LEFTMOST 0x80 - -#define hctPagetype(p) (((HctDbPageHdr*)(p))->hdrFlags&HCT_PAGETYPE_MASK) -#define hctIsLeftmost(p) (((HctDbPageHdr*)(p))->hdrFlags&HCT_PAGETYPE_LEFTMOST) -#define hctPageheight(p) (((HctDbPageHdr*)(p))->nHeight) -#define hctPagenentry(p) (((HctDbPageHdr*)(p))->nEntry) -#define hctPagePeer(p) (((HctDbPageHdr*)(p))->iPeerPg) - -/* -** 16-byte leaf page header. Used by both index and intkey leaf pages. -** Described in fileformat.wiki. -*/ -struct HctDbLeafHdr { - u16 nFreeGap; /* Size of free-space region, in bytes */ - u16 nFreeBytes; /* Total free bytes on page */ - u32 unused; -}; - -struct HctDbLeaf { - HctDbPageHdr pg; - HctDbLeafHdr hdr; -}; - - -struct HctDbIntkeyEntry { - u32 nSize; /* 0: Total size of data (local+overflow) */ - u16 iOff; /* 4: Offset of record within this page */ - u8 flags; /* 6: Flags (see below) */ - u8 unused; /* 7: */ - i64 iKey; /* 8: Integer key value */ -}; - -struct HctDbIndexEntry { - u32 nSize; /* 0: Total size of data (local+overflow) */ - u16 iOff; /* 4: Offset of record within this page */ - u8 flags; /* 6: Flags (see below) */ - u8 unused; /* 7: */ -}; - -struct HctDbIndexNodeEntry { - u32 nSize; - u16 iOff; - u8 flags; - u8 unused; - u32 iChildPg; -}; - -struct HctDbIntkeyNodeEntry { - i64 iKey; /* Value of FP key on page iChild */ - u32 iChildPg; /* Child page */ - u32 unused; -}; - -struct HctDbIntkeyNode { - HctDbPageHdr pg; - HctDbIntkeyNodeEntry aEntry[0]; -}; - -struct HctDbIntkeyLeaf { - HctDbPageHdr pg; - HctDbLeafHdr hdr; - HctDbIntkeyEntry aEntry[0]; -}; - -struct HctDbIndexLeaf { - HctDbPageHdr pg; - HctDbLeafHdr hdr; - HctDbIndexEntry aEntry[0]; -}; - -struct HctDbIndexNodeHdr { - u16 nFreeGap; /* Size of free-space region, in bytes */ - u16 nFreeBytes; /* Total free bytes on page */ -}; - -struct HctDbIndexNode { - HctDbPageHdr pg; - HctDbIndexNodeHdr hdr; - HctDbIndexNodeEntry aEntry[0]; -}; - -/* -** History fanout page. -** -** iSplit0: -** The index of a key in page aPgOld1[0]. This key is the first that -** should be considered in aPgOld1[0]. Implying that no key equal to -** or greater than this from pgOld0 should be considered. -*/ -struct HctDbHistoryFan { - HctDbPageHdr pg; - - u64 iRangeTid0; - u64 iFollowTid0; - u32 pgOld0; - - int iSplit0; - - u64 iRangeTid1; - u32 aPgOld1[0]; -}; - -/* -** Structure for reading/writing cells from and to pages. -*/ -typedef struct HctDbCell HctDbCell; -struct HctDbCell { - u64 iTid; - u64 iRangeTid; - u32 iRangeOld; - u32 iOvfl; - const u8 *aPayload; -}; - -#if 1 -__attribute__ ((noinline)) -static void hctMemcpy(void *a, const void *b, size_t c){ - if( c ) memcpy(a, b, c); -} -#else -# define hctMemcpy memcpy -#endif - - - -/* -** Flags for HctDbIntkeyEntry.flags -*/ -#define HCTDB_HAS_TID 0x01 /* 8 bytes */ -#define HCTDB_HAS_OVFL 0x04 /* 4 bytes */ -#define HCTDB_HAS_RANGETID 0x08 /* 8 bytes */ -#define HCTDB_HAS_RANGEOLD 0x10 /* 4 bytes */ - -#define HCTDB_MAX_EXTRA_CELL_DATA (8+4+8+4) - -SQLITE_PRIVATE int sqlite3HctBufferGrow(HctBuffer *pBuf, int nSize){ - int rc = SQLITE_OK; - if( nSize>pBuf->nAlloc ){ - u8 *aNew = sqlite3_realloc(pBuf->aBuf, nSize); - if( aNew==0 ){ - rc = SQLITE_NOMEM_BKPT; - }else{ - pBuf->aBuf = aNew; - pBuf->nAlloc = nSize; - } - } - return rc; -} - -SQLITE_PRIVATE void sqlite3HctBufferFree(HctBuffer *pBuf){ - sqlite3_free(pBuf->aBuf); - memset(pBuf, 0, sizeof(HctBuffer)); -} - -static int hctBufferSet(HctBuffer *pBuf, const u8 *aData, int nData){ - int rc = sqlite3HctBufferGrow(pBuf, nData); - if( rc==SQLITE_OK ){ - hctMemcpy(pBuf->aBuf, aData, nData); - } - return rc; -} - - -#ifdef SQLITE_DEBUG -static int hctSqliteBusy(int iLine){ - return SQLITE_BUSY_SNAPSHOT; -} -# define HCT_SQLITE_BUSY hctSqliteBusy(__LINE__) -#else -# define HCT_SQLITE_BUSY SQLITE_BUSY_SNAPSHOT -#endif /* SQLITE_DEBUG */ - -static u64 hctDbTMapLookup(HctDatabase *pDb, u64 iTid, u64 *peState){ - u64 iVal = 0; - HctTMap *pTmap = pDb->pTmap; - if( iTid==LARGEST_TID ){ - *peState = HCT_TMAP_ROLLBACK; - }else if( iTidiFirstTid ){ - *peState = HCT_TMAP_COMMITTED; - }else{ - int iMap = (iTid - pTmap->iFirstTid) / HCT_TMAP_PAGESIZE; - - if( iMap>=pTmap->nMap ){ - HctTMapClient *pTMapClient = sqlite3HctFileTMapClient(pDb->pFile); - sqlite3HctTMapUpdate(pTMapClient, &pDb->pTmap); - assert( iTid<(pDb->pTmap->nMap*HCT_TMAP_PAGESIZE)+pDb->pTmap->iFirstTid ); - return hctDbTMapLookup(pDb, iTid, peState); - } - - { - int iOff = (iTid - pTmap->iFirstTid) % HCT_TMAP_PAGESIZE; - iOff = HCT_TMAP_ENTRYSLOT(iOff); - iVal = AtomicLoad(&pTmap->aaMap[iMap][iOff]); - pDb->stats.nTMapLookup++; - } - - *peState = (iVal & HCT_TMAP_STATE_MASK); - } - return (iVal & HCT_TMAP_CID_MASK); -} - - -static void print_out_tmap(HctDatabase *pDb, int nLimit){ - int ii; - - for(ii=0; iipTmap->iFirstTid + ii; - u64 iCid = hctDbTMapLookup(pDb, iTid, &eState); - - printf("tid=%d -> (%s, %d)\n", (int)iTid, - eState==HCT_TMAP_WRITING ? "WRITING" : - eState==HCT_TMAP_VALIDATING ? "VALIDATING" : - eState==HCT_TMAP_ROLLBACK ? "ROLLBACK" : - eState==HCT_TMAP_COMMITTED ? "COMMITTED" : "???", - (int)iCid - ); - } - -} - -static void hctDbPageArrayReset(HctDbPageArray *pArray){ - sqlite3_free(pArray->aDyn); - pArray->nPg = 0; - pArray->aPg = pArray->aStatic; - pArray->aDyn = 0; - pArray->nDyn = 0; -} - -static int hctDbPageArrayGrow(HctDbPageArray *pArray){ - assert( pArray->aDyn==0 ); - pArray->aDyn = sqlite3MallocZero(sizeof(HctFilePage) * HCTDB_MAX_PAGEARRAY); - if( pArray->aDyn==0 ){ - return SQLITE_NOMEM_BKPT; - } - pArray->nDyn = HCTDB_MAX_PAGEARRAY; - pArray->aPg = pArray->aDyn; - hctMemcpy(pArray->aPg, pArray->aStatic, - sizeof(HctFilePage)*HCTDB_STATIC_PAGEARRAY - ); - return SQLITE_OK; -} - -/* -** Grow the dynamic arrays used by the writer, if necessary -*/ -static int hctDbWriterGrow(HctDbWriter *pWriter){ - int rc = SQLITE_OK; - if( pWriter->writepg.aDyn==0 ){ - if( pWriter->writepg.nPg>=(HCTDB_STATIC_PAGEARRAY-2) - || pWriter->discardpg.nPg>=(HCTDB_STATIC_PAGEARRAY-2) - ){ - rc = hctDbPageArrayGrow(&pWriter->writepg); - if( rc==SQLITE_OK ){ - rc = hctDbPageArrayGrow(&pWriter->discardpg); - } - } - } - return rc; -} - -SQLITE_PRIVATE HctDatabase *sqlite3HctDbOpen( - int *pRc, - const char *zFile, - HctConfig *pConfig -){ - int rc = *pRc; - HctDatabase *pNew = 0; - - pNew = (HctDatabase*)sqlite3HctMalloc(&rc, sizeof(*pNew)); - if( pNew ){ - pNew->pFile = sqlite3HctFileOpen(&rc, zFile, pConfig); - pNew->pConfig = pConfig; - if( pNew->pFile ) pNew->pgsz = sqlite3HctFilePgsz(pNew->pFile); - } - - if( rc!=SQLITE_OK ){ - sqlite3HctDbClose(pNew); - pNew = 0; - } - - *pRc = rc; - return pNew; -} - -SQLITE_PRIVATE int sqlite3HctDbPagesize(HctDatabase *pDb){ - return pDb->pgsz; -} - - -SQLITE_PRIVATE void sqlite3HctDbClose(HctDatabase *p){ - if( p ){ - sqlite3_free(p->aTmp); - sqlite3HctFileClose(p->pFile); - p->pFile = 0; - sqlite3_free(p->pBalance); - sqlite3_free(p); - } -} - -SQLITE_PRIVATE HctFile *sqlite3HctDbFile(HctDatabase *pDb){ - return pDb->pFile; -} - -SQLITE_PRIVATE int sqlite3HctDbRootNew(HctDatabase *p, u32 *piRoot){ - return sqlite3HctFileRootPgno(p->pFile, piRoot); -} - -SQLITE_PRIVATE int sqlite3HctDbRootFree(HctDatabase *p, u32 iRoot){ - return sqlite3HctFileRootFree(p->pFile, iRoot); -} - -SQLITE_PRIVATE void sqlite3HctDbRootPageInit( - int bIndex, /* True for an index, false for intkey */ - u8 *aPage, /* Buffer to initialize */ - int szPage /* Size of aPage[] in bytes */ -){ - HctDbLeaf *pLeaf = (HctDbLeaf*)aPage; - memset(aPage, 0, szPage); - if( bIndex ){ - pLeaf->pg.hdrFlags = HCT_PAGETYPE_INDEX | HCT_PAGETYPE_LEFTMOST; - }else{ - pLeaf->pg.hdrFlags = HCT_PAGETYPE_INTKEY | HCT_PAGETYPE_LEFTMOST; - } - pLeaf->hdr.nFreeBytes = szPage - sizeof(HctDbLeaf); - pLeaf->hdr.nFreeGap = pLeaf->hdr.nFreeBytes; -} - -static void hctDbRootPageInit( - int bIndex, /* True for an index, false for intkey */ - int nHeight, /* Initial height */ - u32 iChildPg, /* Child page number */ - u8 *aPage, /* Buffer to initialize */ - int szPage /* Size of aPage[] in bytes */ -){ - HctDbPageHdr *pPg = (HctDbPageHdr*)aPage; - memset(aPage, 0, szPage); - if( bIndex ){ - pPg->hdrFlags = HCT_PAGETYPE_INDEX | HCT_PAGETYPE_LEFTMOST; - }else{ - pPg->hdrFlags = HCT_PAGETYPE_INTKEY | HCT_PAGETYPE_LEFTMOST; - } - if( nHeight>0 ){ - pPg->nHeight = nHeight; - pPg->nEntry = 1; - if( bIndex ){ - HctDbIndexNode *pNode = (HctDbIndexNode*)pPg; - pNode->aEntry[0].iChildPg = iChildPg; - pNode->hdr.nFreeBytes = - szPage - sizeof(HctDbIndexNode) - sizeof(HctDbIndexNodeEntry); - pNode->hdr.nFreeGap = pNode->hdr.nFreeBytes; - }else{ - HctDbIntkeyNode *pNode = (HctDbIntkeyNode*)pPg; - pNode->aEntry[0].iKey = SMALLEST_INT64; - pNode->aEntry[0].iChildPg = iChildPg; - } - }else{ - HctDbLeaf *pLeaf = (HctDbLeaf*)pPg; - pLeaf->hdr.nFreeBytes = szPage - sizeof(HctDbLeaf); - pLeaf->hdr.nFreeGap = pLeaf->hdr.nFreeBytes; - } -} - -/* -** Open a read transaction, if one is not already open. -*/ -SQLITE_PRIVATE int sqlite3HctDbStartRead(HctDatabase *pDb, HctJournal *pJrnl){ - int rc = SQLITE_OK; - - assert( (pDb->iSnapshotId==0)==(pDb->pTmap==0) ); - assert( pDb->iSnapshotId!=0 || pDb->bConcurrent==0 ); - if( pDb->iSnapshotId==0 && SQLITE_OK==(rc=sqlite3HctFileNewDb(pDb->pFile)) ){ - if( pDb->aTmp==0 ){ - pDb->pgsz = sqlite3HctFilePgsz(pDb->pFile); - pDb->aTmp = (u8*)sqlite3HctMalloc(&rc, pDb->pgsz); - } - if( rc==SQLITE_OK ){ - u64 iSnapshot = 0; - HctTMapClient *pTMapClient = sqlite3HctFileTMapClient(pDb->pFile); - - iSnapshot = sqlite3HctJournalSnapshot(pJrnl); - rc = sqlite3HctTMapBegin(pTMapClient, iSnapshot, &pDb->pTmap); - assert( rc==SQLITE_OK ); /* todo */ - - iSnapshot = sqlite3HctJournalSnapshot(pJrnl); - if( iSnapshot==0 ){ - iSnapshot = sqlite3HctFileGetSnapshotid(pDb->pFile); - } - pDb->iSnapshotId = iSnapshot; - pDb->iLocalMinTid = sqlite3HctTMapCommitedTID(pTMapClient); - assert( pDb->iSnapshotId>0 ); - } - } - - return rc; -} - -static u64 hctGetU64(const u8 *a){ - u64 ret; - hctMemcpy(&ret, a, sizeof(u64)); - return ret; -} -static u32 hctGetU32(const u8 *a){ - u32 ret; - hctMemcpy(&ret, a, sizeof(u32)); - return ret; -} - -static void hctPutU32(u8 *a, u32 val){ - hctMemcpy(a, &val, sizeof(u32)); -} - -/* -** Return true if TID iTid maps to a commit-id visible to the current -** client. Or false otherwise. -*/ -static int hctDbTidIsVisible(HctDatabase *pDb, u64 iTid, int bNosnap){ - - if( (iTid & HCT_TID_MASK)<=pDb->iLocalMinTid ) return 1; - while( 1 ){ - u64 eState = 0; - u64 iCid = hctDbTMapLookup(pDb, (iTid & HCT_TID_MASK), &eState); - if( iTid & HCT_TID_ROLLBACK_OVERRIDE ){ - eState = HCT_TMAP_COMMITTED; - } - if( eState==HCT_TMAP_WRITING || eState==HCT_TMAP_ROLLBACK ){ - return 0; - } - if( eState==HCT_TMAP_COMMITTED ){ - if( bNosnap==0 && iCid>pDb->iSnapshotId ){ - return 0; - } - return 1; - } - assert( eState==HCT_TMAP_VALIDATING ); - if( iCid>pDb->iSnapshotId || iTid==pDb->iTid ){ - return 0; - } - } - - assert( 0 ); - return 0; -} - -/* -** This is called when writing keys to the database as part of committing -** a transaction. One of the writes will clobber a key associated with -** transaction-id iTid. This function returns true if this represents -** a write/write conflict and the transaction should be rolled back, or -** false if the write should proceed. -*/ -static int hctDbTidIsConflict(HctDatabase *pDb, u64 iTid){ - if( iTid==pDb->iTid || iTid<=pDb->iLocalMinTid || iTid==LARGEST_TID ){ - return 0; - }else{ - u64 eState = 0; - u64 iCid = hctDbTMapLookup(pDb, iTid & HCT_TID_MASK, &eState); - - /* This should only be called while writing or validating. */ - assert( pDb->iTid ); - if( iTid & HCT_TID_ROLLBACK_OVERRIDE ){ - eState = HCT_TMAP_COMMITTED; - } - - if( eState==HCT_TMAP_COMMITTED && iCid<=pDb->iSnapshotId ) return 0; - if( iCid==pDb->iJrnlWriteCid ) return 0; - return 1; - - if( eState==HCT_TMAP_WRITING || eState==HCT_TMAP_VALIDATING ) return 1; - - /* It's tempting to return 0 here - how can a key that has been rolled - ** back be a conflict? The problem is that the previous version of the - ** key - the one before this rolled back version - may be a write/write - ** conflict. Ideally, this code would check that and return accordingly. */ - if( eState==HCT_TMAP_ROLLBACK ) return 1; - - assert( eState==HCT_TMAP_COMMITTED ); - return (iCid > pDb->iSnapshotId); - } -} - - -static int hctDbOffset(int iOff, int flags){ - static const int aVal[] = { - 0+0+0+0+0, 0+0+0+0+8, 0+0+0+0+0, 0+0+0+0+8, - 0+0+4+0+0, 0+0+4+0+8, 0+0+4+0+0, 0+0+4+0+8, - 0+8+0+0+0, 0+8+0+0+8, 0+8+0+0+0, 0+8+0+0+8, - 0+8+4+0+0, 0+8+4+0+8, 0+8+4+0+0, 0+8+4+0+8, - - 4+0+0+0+0, 4+0+0+0+8, 4+0+0+0+0, 4+0+0+0+8, - 4+0+4+0+0, 4+0+4+0+8, 4+0+4+0+0, 4+0+4+0+8, - 4+8+0+0+0, 4+8+0+0+8, 4+8+0+0+0, 4+8+0+0+8, - 4+8+4+0+0, 4+8+4+0+8, 4+8+4+0+0, 4+8+4+0+8, - }; - - assert( HCTDB_HAS_RANGEOLD==0x10 ); /* +4 */ - assert( HCTDB_HAS_RANGETID==0x08 ); /* +8 */ - assert( HCTDB_HAS_OVFL==0x04 ); /* +4 */ - assert( HCTDB_HAS_TID==0x01 ); /* +8 */ - - assert( aVal[ flags & 0x1F ]==( - ((flags & HCTDB_HAS_TID) ? 8 : 0) - + ((flags & HCTDB_HAS_RANGETID) ? 8 : 0) - + ((flags & HCTDB_HAS_RANGEOLD) ? 4 : 0) - + ((flags & HCTDB_HAS_OVFL) ? 4 : 0) - )); - - return iOff + aVal[ flags&0x1F ]; -} - - -/* -** Wrapper around sqlite3HctFilePageGetPhysical() that also invokes the -** xSavePhysical callback, if one is configured. -*/ -static int hctDbGetPhysical(HctDatabase *pDb, u32 iPg, HctFilePage *pPg){ - int rc = sqlite3HctFilePageGetPhysical(pDb->pFile, iPg, pPg); - if( rc==SQLITE_OK && pDb->xSavePhysical ){ - rc = pDb->xSavePhysical(pDb->pSavePhysical, (i64)iPg); - } - return rc; -} - -/* -** Load the meta-data record from the database and store it in buffer aBuf -** (size nBuf bytes). The meta-data record is stored with rowid=0 int the -** intkey table with root-page=2. -*/ -SQLITE_PRIVATE int sqlite3HctDbGetMeta(HctDatabase *pDb, u8 *aBuf, int nBuf){ - HctFilePage pg; - int rc; - - assert( pDb->iSnapshotId ); - memset(aBuf, 0, nBuf); - rc = sqlite3HctFilePageGet(pDb->pFile, 2, &pg); - while( rc==SQLITE_OK ){ - HctDbIntkeyLeaf *pLeaf = (HctDbIntkeyLeaf*)pg.aOld; - int iOff; - u8 flags; - - if( pLeaf->pg.nEntry==0 ){ - break; - } - - assert( pLeaf->pg.nEntry==1 ); - assert( pLeaf->aEntry[0].iKey==0 ); - assert( pLeaf->aEntry[0].nSize==nBuf ); - iOff = pLeaf->aEntry[0].iOff; - flags = pLeaf->aEntry[0].flags; - - assert( flags==HCTDB_HAS_TID - || flags==(HCTDB_HAS_RANGEOLD|HCTDB_HAS_RANGETID|HCTDB_HAS_TID) - ); - if( (flags & HCTDB_HAS_RANGEOLD) - && 0==hctDbTidIsVisible(pDb, hctGetU64(&pg.aOld[iOff]), 0) - ){ - u32 iOld = hctGetU32(&pg.aOld[iOff+8+8]); - if( iOld==0 ) break; - sqlite3HctFilePageRelease(&pg); - rc = hctDbGetPhysical(pDb, iOld, &pg); - }else{ - iOff = hctDbOffset(iOff, pLeaf->aEntry[0].flags ); - hctMemcpy(aBuf, &pg.aOld[iOff], nBuf); - sqlite3HctFilePageRelease(&pg); - break; - } - } - - return rc; -} - -SQLITE_PRIVATE void sqlite3HctDbTransIsConcurrent(HctDatabase *pDb, int eConcurrent){ - pDb->bConcurrent = (eConcurrent!=0); -} - -static int hctDbValidateMeta(HctDatabase *pDb){ - int rc = SQLITE_OK; - HctFilePage pg; - - assert( pDb->iSnapshotId>0 ); - rc = sqlite3HctFilePageGet(pDb->pFile, 2, &pg); - if( rc==SQLITE_OK ){ - HctDbIntkeyEntry *p = &((HctDbIntkeyLeaf*)pg.aOld)->aEntry[0]; - if( p->flags & HCTDB_HAS_TID ){ - u64 iTid = hctGetU64(&pg.aOld[p->iOff]); - if( hctDbTidIsConflict(pDb, iTid) ) rc = HCT_SQLITE_BUSY; - } - sqlite3HctFilePageRelease(&pg); - } - - return rc; -} - -SQLITE_PRIVATE int sqlite3HctDbRootInit(HctDatabase *p, int bIndex, u32 iRoot){ - HctFilePage pg; - int rc = SQLITE_OK; - - rc = sqlite3HctFileRootNew(p->pFile, iRoot, &pg); - if( rc==SQLITE_OK ){ - sqlite3HctDbRootPageInit(bIndex, pg.aNew, p->pgsz); - rc = sqlite3HctFilePageRelease(&pg); - } - return rc; -} - -static i64 hctDbIntkeyFPKey(const void *aPg){ - if( ((HctDbPageHdr*)aPg)->nHeight==0 ){ - return ((HctDbIntkeyLeaf*)aPg)->aEntry[0].iKey; - } - return ((HctDbIntkeyNode*)aPg)->aEntry[0].iKey; -} - - -static i64 hctDbGetIntkey(const u8 *aTarget, int iCell){ - assert( hctPagetype(aTarget)==HCT_PAGETYPE_INTKEY ); - assert( hctPageheight(aTarget)==0 ); - assert( iCell>=0 && iCell<((HctDbIntkeyLeaf*)aTarget)->pg.nEntry ); - - return ((HctDbIntkeyLeaf*)aTarget)->aEntry[iCell].iKey; -} - -#if 0 -static i64 hctDbGetIntkeyFromPhys( - int *pRc, - HctDatabase *pDb, - u32 iPhys, - int iCell -){ - i64 iRet = 0; - int rc = *pRc; - if( rc==SQLITE_OK ){ - HctFilePage pg; - rc = sqlite3HctFilePageGetPhysical(pDb->pFile, iPhys, &pg); - if( rc==SQLITE_OK ){ - iRet = hctDbGetIntkey(pg.aOld, iCell); - sqlite3HctFilePageRelease(&pg); - } - } - *pRc = rc; - return iRet; -} -#endif - - -/* -** Buffer aPg contains an intkey leaf page. -** -** This function searches the leaf page for key iKey. If found, it returns -** the index of the matching key within the page and sets output variable -** (*pbExact) to 1. If there is no match for key iKey, this function returns -** the index of the smallest key on the page that is larger than iKey, or -** (nEntry) if all keys on the page are smaller than iKey. (*pbExact) is -** set to 0 before returning in this case. -*/ -static int hctDbIntkeyLeafSearch( - const u8 *aPg, - i64 iKey, - int *pbExact -){ - const HctDbIntkeyLeaf *pLeaf = (const HctDbIntkeyLeaf*)aPg; - int i1 = 0; - int i2 = pLeaf->pg.nEntry; - - assert( hctPagetype(aPg)==HCT_PAGETYPE_INTKEY ); - assert( pLeaf->pg.nHeight==0 ); - while( i2>i1 ){ - int iTest = (i1+i2)/2; - i64 iPgKey = pLeaf->aEntry[iTest].iKey; - if( iPgKey==iKey ){ - *pbExact = 1; - return iTest; - }else if( iPgKey=0 ); - assert( i2==pLeaf->pg.nEntry || iKeyaEntry[i2].iKey ); - assert( i2==0 || iKey>pLeaf->aEntry[i2-1].iKey ); - - *pbExact = 0; - return i2; -} - -static int hctDbIntkeyLocalsize(int pgsz, int nSize){ - const int nMax = ( - pgsz - - sizeof(HctDbIntkeyLeaf) - - sizeof(HctDbIntkeyEntry) - - (HCTDB_MAX_EXTRA_CELL_DATA - sizeof(u32)) - ); - - int nLocal; - if( nSize (nMax-sizeof(u32)) ){ - nLocal = nMin; - } - } - - return nLocal; -} - -static int hctDbIndexLocalsize(int pgsz, int nSize){ - int nLocal; - int nMax = pgsz/4; - if( nSizenMax ){ - nLocal = nMin; - } - } - return nLocal; -} - -static int hctDbLocalsize(const u8 *aPg, int pgsz, int nSize){ - if( hctPagetype(aPg)==HCT_PAGETYPE_INTKEY ){ - return hctDbIntkeyLocalsize(pgsz, nSize); - } - return hctDbIndexLocalsize(pgsz, nSize); -} - -static int hctDbIntkeyEntrySize(HctDbIntkeyEntry *pEntry, int pgsz){ - int sz = hctDbIntkeyLocalsize(pgsz, pEntry->nSize) - + hctDbOffset(0, pEntry->flags); - return sz; -} - -static int hctDbIndexEntrySize(HctDbIndexEntry *pEntry, int pgsz){ - int sz = hctDbIndexLocalsize(pgsz, pEntry->nSize) - + hctDbOffset(0, pEntry->flags); - return sz; -} - -static int hctDbIndexNodeEntrySize(HctDbIndexNodeEntry *pEntry, int pgsz){ - return hctDbIndexLocalsize(pgsz, pEntry->nSize) - + ((pEntry->flags & HCTDB_HAS_OVFL) ? 4 : 0); -} - -/* -** The pointer passed as the first argument is a pointer to a buffer -** containing a page that uses variable sized records. That is, an -** intkey leaf page, or an index leaf or node page. This function -** returns the number of bytes of record-area space consumed by -** entry iEntry on the page. -*/ -static int hctDbPageRecordSize(void *aPg, int pgsz, int iEntry){ - int eType = hctPagetype(aPg); - if( eType==HCT_PAGETYPE_INTKEY ){ - assert( hctPageheight(aPg)==0 ); - return hctDbIntkeyEntrySize(&((HctDbIntkeyLeaf*)aPg)->aEntry[iEntry], pgsz); - }else if( hctPageheight(aPg)==0 ){ - return hctDbIndexEntrySize(&((HctDbIndexLeaf*)aPg)->aEntry[iEntry], pgsz); - } - return hctDbIndexNodeEntrySize(&((HctDbIndexNode*)aPg)->aEntry[iEntry], pgsz); -} -static int hctDbPageEntrySize(void *aPg){ - int eType = hctPagetype(aPg); - if( eType==HCT_PAGETYPE_INTKEY ){ - assert( hctPageheight(aPg)==0 ); - return sizeof(HctDbIntkeyEntry); - }else if( hctPageheight(aPg)==0 ){ - return sizeof(HctDbIndexEntry); - } - return sizeof(HctDbIndexNodeEntry); -} - -/* -** The buffer passed as the first argument contains a page that is -** guaranteed to be either an intkey leaf, or an index leaf or node. -** This function returns a pointer to HctDbIndexEntry structure -** associated with page entry iEntry. -*/ -static HctDbIndexEntry *hctDbEntryEntry(const void *aPg, int iEntry){ - int iOff; - - assert( (hctPagetype(aPg)==HCT_PAGETYPE_INTKEY && hctPageheight(aPg)==0) - || (hctPagetype(aPg)==HCT_PAGETYPE_INDEX) - ); - - if( hctPagetype(aPg)==HCT_PAGETYPE_INTKEY ){ - iOff = sizeof(HctDbIntkeyLeaf) + iEntry*sizeof(HctDbIntkeyEntry); - }else if( hctPageheight(aPg)==0 ){ - iOff = sizeof(HctDbIndexLeaf) + iEntry*sizeof(HctDbIndexEntry); - }else{ - iOff = sizeof(HctDbIndexNode) + iEntry*sizeof(HctDbIndexNodeEntry); - } - - return (HctDbIndexEntry*)&((u8*)aPg)[iOff]; -} - -/* -** Argument aPg[] is a buffer containing either an index tree page, or an -** intkey leaf page. This function locates the record associated with -** cell iCell on the page, and populates output variables *pnData and -** *paData with the size and a pointer to a buffer containing the record, -** respectively. -** -** If the record in cell iCell does not overflow the page, (*paData) is -** set to point into the body of the page itself. If the record does -** overflow the page, then buffer pBuf is used to store the record and -** (*paData) is set to point to the buffer's allocation. In this case -** it is the responsibility of the caller to eventually release the buffer. -** -** SQLITE_OK is returned if successful, or an SQLite error code otherwise. -*/ -static int hctDbLoadRecord( - HctDatabase *pDb, - HctBuffer *pBuf, - const u8 *aPg, - int iCell, - int *pnData, - const u8 **paData -){ - int rc = SQLITE_OK; - HctDbIndexEntry *p = hctDbEntryEntry(aPg, iCell); - - *pnData = p->nSize; - if( paData ){ - if( p->flags & HCTDB_HAS_OVFL ){ - rc = sqlite3HctBufferGrow(pBuf, p->nSize); - *paData = pBuf->aBuf; - if( rc==SQLITE_OK ){ - u32 pgOvfl; - int nLocal = hctDbLocalsize(aPg, pDb->pgsz, p->nSize); - - int iOff = hctDbOffset(p->iOff, p->flags); - hctMemcpy(pBuf->aBuf, &aPg[iOff], nLocal); - pgOvfl = hctGetU32(&aPg[iOff-sizeof(u32)]); - iOff = nLocal; - - while( rc==SQLITE_OK && iOffnSize ){ - HctFilePage ovfl; - rc = hctDbGetPhysical(pDb, pgOvfl, &ovfl); - if( rc==SQLITE_OK ){ - int nCopy = MIN(pDb->pgsz-8, p->nSize-iOff); - hctMemcpy(&pBuf->aBuf[iOff],&ovfl.aOld[sizeof(HctDbPageHdr)],nCopy); - iOff += nCopy; - pgOvfl = ((HctDbPageHdr*)ovfl.aOld)->iPeerPg; - sqlite3HctFilePageRelease(&ovfl); - } - } - } - }else{ - int iOff = hctDbOffset(p->iOff, p->flags); - *paData = &aPg[iOff]; - } - } - - return rc; -} - -/* -** Buffer aPg[] contains either an index page or an intkey leaf (i.e. a page -** that contains variable length records). This function loads the record -** associated with cell iCell on the page, and populates output object -** pFP with the results. -** -** SQLITE_OK is returned if successful, or an SQLite error code otherwise. -*/ -static int hctDbLoadRecordFP( - HctDatabase *pDb, /* Database handle */ - const u8 *aPg, /* Page to load record from */ - int iCell, /* Cell to load */ - HctDbFPKey *pFP /* Populate this structure with record */ -){ - const u8 *aKey = 0; - int nKey = 0; - int rc = SQLITE_OK; - - rc = hctDbLoadRecord(pDb, &pFP->buf, aPg, iCell, &nKey, &aKey); - if( rc==SQLITE_OK ){ - if( aKey!=pFP->buf.aBuf ){ - rc = sqlite3HctBufferGrow(&pFP->buf, nKey); - if( rc==SQLITE_OK ){ - hctMemcpy(pFP->buf.aBuf, aKey, nKey); - } - } - pFP->iKey = nKey; - pFP->aKey = pFP->buf.aBuf; - } - - return rc; -} - -/* -** Buffer aPg[] contains a history fan page. -** -** This page searches the page, returning the index of the entry that -** points to the page with the largest key that is less than or equal -** to parameter pKey/iKey. -*/ -static int hctDbFanSearch( - int *pRc, - HctDatabase *pDb, - const u8 *aPg, - UnpackedRecord *pKey, - i64 iKey -){ - HctDbHistoryFan *pFan = (HctDbHistoryFan*)aPg; - int rc = *pRc; - int i1 = 0; - int i2 = pFan->pg.nEntry-1; - HctBuffer buf = {0, 0, 0}; - - assert( hctPagetype(aPg)==HCT_PAGETYPE_HISTORY ); - - while( rc==SQLITE_OK && i2>i1 ){ - HctFilePage pg; - int iTest = (i1+i2)/2; - - rc = hctDbGetPhysical(pDb, pFan->aPgOld1[iTest], &pg); - while( rc==SQLITE_OK && hctPagetype(pg.aOld)==HCT_PAGETYPE_HISTORY ){ - HctDbHistoryFan *pFan = (HctDbHistoryFan*)pg.aOld; - rc = hctDbGetPhysical(pDb, pFan->pgOld0, &pg); - } - if( rc==SQLITE_OK ){ - int iCell = (iTest==0 ? pFan->iSplit0 : 0); - - assert( pKey || hctPagetype(pg.aOld)==HCT_PAGETYPE_INTKEY ); - assert( pKey==0 || hctPagetype(pg.aOld)==HCT_PAGETYPE_INDEX ); - - if( pKey==0 ){ - i64 iPgKey = hctDbGetIntkey(pg.aOld, iCell); - if( iPgKey==iKey ){ - i1 = i2 = iTest+1; - }else if( iPgKeypKeyInfo->db, pRec); - } -} - -static UnpackedRecord *hctDbAllocateUnpacked(int *pRc, KeyInfo *pKeyInfo){ - UnpackedRecord *pRet = 0; - if( *pRc==SQLITE_OK ){ - pRet = sqlite3VdbeAllocUnpackedRecord(pKeyInfo); - if( pRet==0 ) *pRc = SQLITE_NOMEM_BKPT; - } - return pRet; -} - -SQLITE_PRIVATE void sqlite3HctDbRecordTrim(UnpackedRecord *pRec){ - if( pRec && pRec->pKeyInfo->nUniqField ){ - int ii; - u16 nUniqField = pRec->pKeyInfo->nUniqField; - for(ii=0; iiaMem[ii].flags & MEM_Null ){ - return; - } - } - pRec->nField = nUniqField; - } -} - - -/* -** This function returns the current snapshot-id. It may only be called -** when a read transaction is active. -*/ -SQLITE_PRIVATE i64 sqlite3HctDbSnapshotId(HctDatabase *pDb){ - assert( pDb->iSnapshotId>0 ); - return pDb->iSnapshotId; -} - -/* -** Load the key belonging to cell iCell on page aPg[] into structure (*pKey). -*/ -static void hctDbGetKey( - int *pRc, - HctDatabase *pDb, - KeyInfo *pKeyInfo, - int bDup, - const u8 *aPg, - int iCell, - HctDbKey *pKey -){ - int rc = *pRc; - - if( rc==SQLITE_OK ){ - assert( hctPageheight(aPg)==0 ); - assert( iCell>=0 && iCelliKey = hctDbGetIntkey(aPg, iCell); - }else{ - const u8 *aRec = 0; - int nRec = 0; - rc = hctDbLoadRecord(pDb, &pKey->buf, aPg, iCell, &nRec, &aRec); - if( aRec!=pKey->buf.aBuf && bDup && rc==SQLITE_OK ){ - rc = hctBufferSet(&pKey->buf, aRec, nRec); - aRec = pKey->buf.aBuf; - } - pKey->pKey = hctDbAllocateUnpacked(&rc, pKeyInfo); - if( rc==SQLITE_OK ){ - sqlite3VdbeRecordUnpack(pKeyInfo, nRec, aRec, pKey->pKey); - } - if( rc==SQLITE_OK ){ - sqlite3HctDbRecordTrim(pKey->pKey); - } - } - } - *pRc = rc; -} - -/* -** Retrieve the key from iCell of physical page iPhys. iPhys may be an -** intkey or index leaf page. Populate structure (*pKey) with the key -** value before returning. -*/ -static void hctDbGetKeyFromPage( - int *pRc, - HctDatabase *pDb, - KeyInfo *pKeyInfo, - int bLogical, /* True for logical, false for physical */ - u32 iPg, - int iCell, - HctDbKey *pKey -){ - int rc = *pRc; - - if( rc==SQLITE_OK ){ - HctFilePage pg; - if( bLogical ){ - rc = sqlite3HctFilePageGet(pDb->pFile, iPg, &pg); - }else{ - rc = hctDbGetPhysical(pDb, iPg, &pg); - while( rc==SQLITE_OK && hctPagetype(pg.aOld)==HCT_PAGETYPE_HISTORY ){ - HctDbHistoryFan *pFan = (HctDbHistoryFan*)pg.aOld; - rc = hctDbGetPhysical(pDb, pFan->pgOld0, &pg); - } - } - if( rc==SQLITE_OK ){ - hctDbGetKey(&rc, pDb, pKeyInfo, 1, pg.aOld, iCell, pKey); - sqlite3HctFilePageRelease(&pg); - } - } - *pRc = rc; -} - -/* static RecordCompare find_record_compare((UnpackedRecord*, RecordCompare); */ -#define find_record_compare(pRec, xCompare) ( \ - (xCompare) ? (xCompare) : sqlite3VdbeFindCompare(pRec) \ -) - - -static int hctDbIndexSearch( - HctDatabase *pDb, - const u8 *aPg, - RecordCompare xCompare, - UnpackedRecord *pRec, - int *piPos, - int *pbExact -){ - int rc = SQLITE_OK; - HctBuffer buf; - int i1 = 0; - int i2 = ((HctDbPageHdr*)aPg)->nEntry; - - if( pRec ) xCompare = find_record_compare(pRec, xCompare); - memset(&buf, 0, sizeof(buf)); - - while( i2>i1 ){ - int iTest = (i1+i2)/2; - int res; - int nRec = 0; - const u8 *aRec = 0; - - rc = hctDbLoadRecord(pDb, &buf, aPg, iTest, &nRec, &aRec); - if( rc!=SQLITE_OK ) break; - if( nRec==0 ){ - res = -1; - }else{ - res = xCompare(nRec, aRec, pRec); - } - - if( res==0 ){ - *pbExact = 1; - *piPos = iTest; - sqlite3HctBufferFree(&buf); - return SQLITE_OK; - }else if( res<0 ){ - i1 = iTest+1; - }else{ - i2 = iTest; - } - } - - assert( i1==i2 && i2>=0 ); - sqlite3HctBufferFree(&buf); - *pbExact = 0; - *piPos = i2; - return rc; -} - - -/* -** The first argument is a pointer to an intkey internal node page. -** -** This function searches the node page for key iKey. If found, it returns -** the index of the matching key within the page and sets output variable -** (*pbExact) to 1. If there is no match for key iKey, this function returns -** the index of the smallest key on the page that is larger than iKey, or -** (nEntry) if all keys on the page are smaller than iKey. (*pbExact) is -** set to 0 before returning in this case. -*/ -static int hctDbIntkeyNodeSearch( - void *aPg, - i64 iKey, - int *pbExact -){ - HctDbIntkeyNode *pNode = (HctDbIntkeyNode*)aPg; - int i1 = 0; - int i2 = pNode->pg.nEntry; - - assert( hctPagetype(pNode)==HCT_PAGETYPE_INTKEY && pNode->pg.nHeight>0 ); - while( i2>i1 ){ - int iTest = (i1+i2)/2; - i64 iPgKey = pNode->aEntry[iTest].iKey; - if( iPgKey==iKey ){ - *pbExact = 1; - return iTest; - }else if( iPgKey=0 ); - assert( i2==pNode->pg.nEntry || iKeyaEntry[i2].iKey ); - assert( i2==0 || iKey>pNode->aEntry[i2-1].iKey ); - - *pbExact = 0; - return i2; -} - -/* -** Set (*bGe) to true if (pRec >= (FP-key for aPg)). -*/ -static int hctDbCompareFPKey( - HctDatabase *pDb, - UnpackedRecord *pRec, - const u8 *aPg, - int *pbGe -){ - const u8 *aFP = 0; - int nFP = 0; - int res; - int rc; - HctBuffer buf = {0,0,0}; - - rc = hctDbLoadRecord(pDb, &buf, aPg, 0, &nFP, &aFP); - if( rc==SQLITE_OK ){ - res = sqlite3VdbeRecordCompare(nFP, aFP, pRec); - sqlite3HctBufferFree(&buf); - *pbGe = (res<=0); - } - return rc; -} - -static int hctDbCsrGoLeft(HctDbCsr*); - -/* -** Seek the cursor within its tree. This only seeks within the tree, it does -** not follow any old-data pointers. -*/ -int hctDbCsrSeek( - HctDbCsr *pCsr, /* Cursor to seek */ - HctDbFPKey *pFP, - int iHeight, /* Height to seek at (0==leaf, 1==parent) */ - RecordCompare xCompare, - UnpackedRecord *pRec, /* Key for index/without rowid tables */ - i64 iKey, /* Key for intkey tables */ - int *pbExact -){ - HctFile *pFile = pCsr->pDb->pFile; - u32 iPg = pCsr->iRoot; - int rc = SQLITE_OK; - - HctFilePage par; - memset(&par, 0, sizeof(par)); - int iPar = 0; - - if( pRec ) xCompare = find_record_compare(pRec, xCompare); - while( rc==SQLITE_OK ){ - if( iPg ) rc = sqlite3HctFilePageGet(pFile, iPg, &pCsr->pg); - if( rc==SQLITE_OK ){ - HctDbPageHdr *pHdr = (HctDbPageHdr*)pCsr->pg.aOld; - int i2 = pHdr->nEntry-1; - int bExact; - if( pHdr->nHeight==0 ){ - if( pRec ){ - rc = hctDbIndexSearch( - pCsr->pDb, pCsr->pg.aOld, xCompare, pRec, &i2, &bExact - ); - }else{ - i2 = hctDbIntkeyLeafSearch(pCsr->pg.aOld, iKey, &bExact); - } - if( bExact==0 ) i2--; - }else{ - if( pRec ){ - HctDbIndexNode *pNode = (HctDbIndexNode*)pCsr->pg.aOld; - rc = hctDbIndexSearch( - pCsr->pDb, pCsr->pg.aOld, xCompare, pRec, &i2, &bExact - ); - i2 -= !bExact; - iPg = pNode->aEntry[i2].iChildPg; - assert( iPg ); - }else{ - HctDbIntkeyNode *pNode = (HctDbIntkeyNode*)pCsr->pg.aOld; - i2 = hctDbIntkeyNodeSearch(pNode, iKey, &bExact); - assert( i2==pHdr->nEntry || iKey<=pNode->aEntry[i2].iKey ); - assert( i2==pHdr->nEntry || bExact==(iKey==pNode->aEntry[i2].iKey) ); - assert( i2nEntry || bExact==0 ); - i2 -= !bExact; - iPg = pNode->aEntry[i2].iChildPg; - assert( iPg ); - } - - /* Avoid following a pointer to an EVICTED page */ - if( pHdr->nHeight!=iHeight ){ - while( sqlite3HctFilePageIsEvicted(pFile, iPg) ){ - i2--; - if( i2<0 ){ - rc = hctDbCsrGoLeft(pCsr); - if( rc!=SQLITE_OK ) break; - i2 = pCsr->iCell; - } - - bExact = 0; - if( pRec ){ - HctDbIndexNode *pNode = (HctDbIndexNode*)pCsr->pg.aOld; - iPg = pNode->aEntry[i2].iChildPg; - }else{ - HctDbIntkeyNode *pNode = (HctDbIntkeyNode*)pCsr->pg.aOld; - iPg = pNode->aEntry[i2].iChildPg; - } - } - } - } - - - /* Test if it is necessary to skip to the peer node. */ - if( i2>=0 && i2==pHdr->nEntry-1 && pHdr->iPeerPg!=0 ){ - HctFilePage peer; - rc = sqlite3HctFilePageGet(pFile, pHdr->iPeerPg, &peer); - if( rc==SQLITE_OK ){ - int bGotoPeer; - if( pRec ){ - rc = hctDbCompareFPKey(pCsr->pDb, pRec, peer.aOld, &bGotoPeer); - }else{ - i64 iFP = hctDbIntkeyFPKey(peer.aOld); - bGotoPeer = (iFP<=iKey); - } - if( bGotoPeer ){ - SWAP(HctFilePage, pCsr->pg, peer); - sqlite3HctFilePageRelease(&peer); - assert( pCsr->pg.aOld ); - iPg = 0; - continue; - } - sqlite3HctFilePageRelease(&peer); - } - } - - if( pHdr->nHeight==iHeight ){ - pCsr->iCell = i2; - if( pbExact ) *pbExact = bExact; - - /* If parameter pFP was not NULL and there is a parent page stored - ** in variable par, try to load the FP key from that page. This - ** is used when seeking a cursor for writing. */ - if( pFP && par.aOld ){ - i64 iPeer = ((HctDbPageHdr*)pCsr->pg.aOld)->iPeerPg; - if( pRec ){ - HctDbIndexNode *pPar = (HctDbIndexNode*)par.aOld; - if( (iPar+1)pg.nEntry - && pPar->aEntry[iPar+1].iChildPg==iPeer - ){ - rc = hctDbLoadRecordFP(pCsr->pDb, par.aOld, iPar+1, pFP); - } - }else{ - HctDbIntkeyNode *pPar = (HctDbIntkeyNode*)par.aOld; - if( (iPar+1)pg.nEntry - && pPar->aEntry[iPar+1].iChildPg==iPeer - ){ - pFP->iKey = pPar->aEntry[iPar+1].iKey; - } - } - } - - break; - } - - if( pFP && pHdr->nHeight==iHeight+1 ){ - par = pCsr->pg; - iPar = i2; - memset(&pCsr->pg, 0, sizeof(HctFilePage)); - }else{ - sqlite3HctFilePageRelease(&pCsr->pg); - } - assert( rc!=SQLITE_OK || iPg!=0 ); - } - } - - if( pFP ) sqlite3HctFilePageRelease(&par); - return rc; -} - -SQLITE_PRIVATE void sqlite3HctDbCsrDir(HctDbCsr *pCsr, int eDir){ - pCsr->eDir = eDir; -} - -static int hctDbCellOffset(const u8 *aPage, int iCell, u8 *pFlags){ - HctDbPageHdr *pHdr = (HctDbPageHdr*)aPage; - int iRet; - if( hctPagetype(pHdr)==HCT_PAGETYPE_INTKEY ){ - HctDbIntkeyEntry *pEntry = &((HctDbIntkeyLeaf*)pHdr)->aEntry[iCell]; - *pFlags = pEntry->flags; - iRet = pEntry->iOff; - }else if( hctPageheight(pHdr)>0 ){ - HctDbIndexNodeEntry *pEntry = &((HctDbIndexNode*)pHdr)->aEntry[iCell]; - *pFlags = pEntry->flags; - iRet = pEntry->iOff; - }else{ - HctDbIndexEntry *pEntry = &((HctDbIndexLeaf*)pHdr)->aEntry[iCell]; - *pFlags = pEntry->flags; - iRet = pEntry->iOff; - } - return iRet; -} - -/* -** If the cursor is open on an index tree, ensure that the UnpackedRecord -** structure is allocated. Return SQLITE_NOMEM if an OOM is encountered -** while attempting to allocate said structure, or SQLITE_OK otherwise. -*/ -static int hctDbCsrAllocateUnpacked(HctDbCsr *pCsr){ - int rc = SQLITE_OK; - if( pCsr->pKeyInfo && pCsr->pRec==0 ){ - pCsr->pRec = sqlite3VdbeAllocUnpackedRecord(pCsr->pKeyInfo); - if( pCsr->pRec==0 ){ - rc = SQLITE_NOMEM_BKPT; - } - } - return rc; -} - -static const u8 *hctDbCsrPageAndCellIdx( - HctDbCsr *pCsr, - int iIdx, - int *piCell -){ - const u8 *aPg = 0; - int iCell = 0; - - if( iIdx<0 ){ - aPg = pCsr->pg.aOld; - iCell = pCsr->iCell; - }else{ - aPg = pCsr->aRange[iIdx].pg.aOld; - iCell = pCsr->aRange[iIdx].iCell; - } - *piCell = iCell; - return aPg; -} - -/* -** Return a pointer to the current page accessed by the cursor. Before -** returning, also set output variable (*piCell) to the index of the -** current cell within the page. -*/ -static const u8 *hctDbCsrPageAndCell(HctDbCsr *pCsr, int *piCell){ - const u8 *aPg = 0; - int iCell = 0; - if( pCsr->nRange ){ - aPg = pCsr->aRange[pCsr->nRange-1].pg.aOld; - iCell = pCsr->aRange[pCsr->nRange-1].iCell; - }else{ - aPg = pCsr->pg.aOld; - iCell = pCsr->iCell; - } - - *piCell = iCell; - return aPg; -} - -static void hctDbFreeKeyContents(HctDbKey *pKey){ - hctDbFreeUnpacked(pKey->pKey); - sqlite3HctBufferFree(&pKey->buf); -} - -static void hctDbCsrAscendRange(HctDbCsr *pCsr){ - HctDbRangeCsr *pLast = &pCsr->aRange[--pCsr->nRange]; - assert( pCsr->nRange>=0 ); - hctDbFreeKeyContents(&pLast->highkey); - hctDbFreeKeyContents(&pLast->lowkey); - sqlite3HctFilePageRelease(&pLast->pg); -} - -static void hctDbCsrReset(HctDbCsr *pCsr){ - sqlite3HctFilePageRelease(&pCsr->pg); - pCsr->iCell = -1; - while( pCsr->nRange>0 ){ - hctDbCsrAscendRange(pCsr); - } -} - -static void hctDbFreeCsr(HctDbCsr *pCsr){ - hctDbCsrReset(pCsr); - while( pCsr->intkey.pOpList ){ - HctCsrIntkeyOp *pOp = pCsr->intkey.pOpList; - pCsr->intkey.pOpList = pOp->pNextOp; - sqlite3_free(pOp); - } - while( pCsr->index.pOpList ){ - HctCsrIndexOp *pOp = pCsr->index.pOpList; - pCsr->index.pOpList = pOp->pNextOp; - if( pOp->pLast!=pOp->pFirst ){ - sqlite3_free(pOp->pLast); - } - sqlite3_free(pOp->pFirst); - sqlite3_free(pOp); - } - if( pCsr->pRec ) sqlite3DbFree(pCsr->pKeyInfo->db, pCsr->pRec); - sqlite3KeyInfoUnref(pCsr->pKeyInfo); - sqlite3HctBufferFree(&pCsr->rec); - sqlite3_free(pCsr->aRange); - pCsr->aRange = 0; - pCsr->nRangeAlloc = 0; - sqlite3_free(pCsr); -} - -static void hctDbCsrCleanup(HctDbCsr *pCsr){ - hctDbCsrReset(pCsr); - if( pCsr->pKeyInfo ){ - sqlite3DbFree(pCsr->pKeyInfo->db, pCsr->pRec); - sqlite3KeyInfoUnref(pCsr->pKeyInfo); - pCsr->pKeyInfo = 0; - pCsr->pRec = 0; - } - sqlite3_free(pCsr->aRange); - pCsr->aRange = 0; - pCsr->nRangeAlloc = 0; - sqlite3HctBufferFree(&pCsr->rec); - pCsr->iRoot = 0; -} - -static int hctDbCsrScanStart(HctDbCsr *pCsr, UnpackedRecord *pRec, i64 iKey){ - int rc = SQLITE_OK; - - if( pCsr->pDb->bConcurrent ){ - if( pCsr->pDb->iTid==0 ){ - if( pCsr->pKeyInfo==0 ){ - HctCsrIntkeyOp *pOp = 0; - pOp = sqlite3MallocZero(sizeof(HctCsrIntkeyOp)); - if( pOp==0 ){ - rc = SQLITE_NOMEM_BKPT; - }else{ - assert( pCsr->intkey.pCurrentOp==0 ); - pOp->iFirst = pOp->iLast = iKey; - pCsr->intkey.pCurrentOp = pOp; - pOp->iLogical = pCsr->pg.iPg; - pOp->iPhysical = pCsr->pg.iOldPg; - } - }else{ - HctCsrIndexOp *pOp = 0; - pOp = sqlite3MallocZero(sizeof(HctCsrIndexOp)); - if( pOp==0 ){ - rc = SQLITE_NOMEM_BKPT; - }else{ - if( pRec ){ - rc = sqlite3HctSerializeRecord(pRec, &pOp->pFirst, &pOp->nFirst); - pOp->pLast = pOp->pFirst; - pOp->nLast = pOp->nFirst; - pOp->iLogical = pCsr->pg.iPg; - pOp->iPhysical = pCsr->pg.iOldPg; - } - assert( pCsr->index.pCurrentOp==0 ); - pCsr->index.pCurrentOp = pOp; - } - } - } - } - - return rc; -} - -static int hctDbCsrScanFinish(HctDbCsr *pCsr){ - int rc = SQLITE_OK; - if( pCsr->pDb->bConcurrent ){ - if( pCsr->pKeyInfo==0 ){ - HctCsrIntkeyOp *pOp = pCsr->intkey.pCurrentOp; - pCsr->intkey.pCurrentOp = 0; - if( pOp ){ - HctCsrIntkeyOp *pPrev = pCsr->intkey.pOpList; - - if( pCsr->eDir!=BTREE_DIR_NONE ){ - i64 iVal = 0; - if( sqlite3HctDbCsrEof(pCsr) ){ - if( pCsr->eDir==BTREE_DIR_FORWARD ){ - iVal = LARGEST_INT64; - }else{ - iVal = SMALLEST_INT64; - } - pOp->iLogical = pOp->iPhysical = 0; - }else{ - sqlite3HctDbCsrKey(pCsr, &iVal); - if( pCsr->pg.iPg!=pOp->iLogical ){ - pOp->iLogical = pOp->iPhysical = 0; - } - } - - if( iVal>=pOp->iFirst ){ - pOp->iLast = iVal; - }else{ - pOp->iLast = pOp->iFirst; - pOp->iFirst = iVal; - } - } - - if( pPrev && pOp->iLast<=pPrev->iLast && pOp->iFirst>=pPrev->iFirst ){ - pPrev->iLogical = pPrev->iPhysical = 0; - sqlite3_free(pOp); - }else{ - pOp->pNextOp = pPrev; - pCsr->intkey.pOpList = pOp; - } - } - }else{ - HctCsrIndexOp *pOp = pCsr->index.pCurrentOp; - pCsr->index.pCurrentOp = 0; - if( pOp ){ - if( pCsr->eDir!=BTREE_DIR_NONE ){ - int nKey = 0; - u8 *aCopy = 0; - if( !sqlite3HctDbCsrEof(pCsr) ){ - const u8 *aKey = 0; - rc = sqlite3HctDbCsrData(pCsr, &nKey, &aKey); - if( rc==SQLITE_OK ){ - aCopy = sqlite3_malloc(nKey); - if( aCopy==0 ){ - rc = SQLITE_NOMEM_BKPT; - }else{ - hctMemcpy(aCopy, aKey, nKey); - } - } - if( pCsr->pg.iPg!=pOp->iLogical ){ - pOp->iLogical = pOp->iPhysical = 0; - } - }else{ - pOp->iLogical = pOp->iPhysical = 0; - } - - if( pCsr->eDir==BTREE_DIR_FORWARD ){ - pOp->pLast = aCopy; - pOp->nLast = nKey; - }else{ - pOp->pFirst = aCopy; - pOp->nFirst = nKey; - } - } - - pOp->pNextOp = pCsr->index.pOpList; - pCsr->index.pOpList = pOp; - } - } - } - - return rc; -} - -static int hctDbCsrFirst(HctDbCsr *pCsr){ - int rc = SQLITE_OK; - - /* Starting at the root of the tree structure, follow the left-most - ** pointers to find the left-most node in the list of leaves. */ - u32 iPg = pCsr->iRoot; - HctFile *pFile = pCsr->pDb->pFile; - HctFilePage pg; - while( 1 ){ - HctDbPageHdr *pPg; - rc = sqlite3HctFilePageGet(pFile, iPg, &pg); - if( rc!=SQLITE_OK ) break; - pPg = (HctDbPageHdr*)pg.aOld; - if( pPg->nHeight==0 ){ - break; - }else if( hctPagetype(pPg)==HCT_PAGETYPE_INTKEY ){ - iPg = ((HctDbIntkeyNode*)pPg)->aEntry[0].iChildPg; - }else{ - iPg = ((HctDbIndexNode*)pPg)->aEntry[0].iChildPg; - } - sqlite3HctFilePageRelease(&pg); - } - hctMemcpy(&pCsr->pg, &pg, sizeof(pg)); - if( ((HctDbPageHdr*)pCsr->pg.aOld)->nEntry>0 ){ - pCsr->iCell = 0; - }else{ - pCsr->iCell = -1; - } - return rc; -} - -static int hctDbCsrFirstValid(HctDbCsr *pCsr){ - int rc = SQLITE_OK; - - rc = hctDbCsrFirst(pCsr); - - /* Skip forward to the first visible entry, if any. */ - if( rc==SQLITE_OK ){ - pCsr->iCell = -1; - rc = sqlite3HctDbCsrNext(pCsr); - } - - return rc; -} - -static int hctDbCellPut( - u8 *aBuf, - HctDbCell *pCell, - int nLocal -){ - int iOff = 0; - if( pCell->iTid ){ - hctMemcpy(&aBuf[iOff], &pCell->iTid, sizeof(u64)); - iOff += sizeof(u64); - } - if( pCell->iRangeTid ){ - hctMemcpy(&aBuf[iOff], &pCell->iRangeTid, sizeof(u64)); - iOff += sizeof(u64); - } - if( pCell->iRangeOld ){ - hctMemcpy(&aBuf[iOff], &pCell->iRangeOld, sizeof(u32)); - iOff += sizeof(u32); - } - if( pCell->iOvfl ){ - hctMemcpy(&aBuf[iOff], &pCell->iOvfl, sizeof(u32)); - iOff += sizeof(u32); - } - hctMemcpy(&aBuf[iOff], pCell->aPayload, nLocal); - return iOff+nLocal; -} - -static void hctDbCellGet( - HctDatabase *pDb, - const u8 *aBuf, - int flags, - HctDbCell *pCell -){ - int iOff = 0; - memset(pCell, 0, sizeof(HctDbCell)); - - if( flags & HCTDB_HAS_TID ){ - hctMemcpy(&pCell->iTid, &aBuf[iOff], sizeof(u64)); - iOff += sizeof(u64); - } - if( flags & HCTDB_HAS_RANGETID ){ - hctMemcpy(&pCell->iRangeTid, &aBuf[iOff], sizeof(u64)); - iOff += sizeof(u64); - } - if( flags & HCTDB_HAS_RANGEOLD ){ - hctMemcpy(&pCell->iRangeOld, &aBuf[iOff], sizeof(u32)); - iOff += sizeof(u32); - } - if( flags & HCTDB_HAS_OVFL ){ - hctMemcpy(&pCell->iOvfl, &aBuf[iOff], sizeof(u32)); - iOff += sizeof(u32); - } - - pCell->aPayload = &aBuf[iOff]; -} - -static void hctDbCellGetByIdx( - HctDatabase *pDb, - const u8 *aPg, - int iIdx, - HctDbCell *pCell -){ - HctDbIndexEntry *p = hctDbEntryEntry(aPg, iIdx); - hctDbCellGet(pDb, &aPg[p->iOff], p->flags, pCell); -} - -static u8 hctDbCellToFlags(HctDbCell *pCell){ - u8 flags = 0; - if( pCell->iTid ) flags |= HCTDB_HAS_TID; - if( pCell->iOvfl ) flags |= HCTDB_HAS_OVFL; - if( pCell->iRangeTid ) flags |= HCTDB_HAS_RANGETID; - if( pCell->iRangeOld ) flags |= HCTDB_HAS_RANGEOLD; - return flags; -} - -typedef struct HctRangePtr HctRangePtr; -struct HctRangePtr { - u64 iRangeTid; - u64 iFollowTid; - u32 iOld; -}; - -/* -** This function is called when a reader encounters an old-range pointer -** with associated TID value iRangeTid. It returns true if the pointer -** should be followed, or false otherwise. -** -** If the data items on the linked page should be merged in to the cursor -** results, output parameter (*pbMerge) is set to true before returning. -** This happens if the transaction with TID iRangeTid is not visible to -** the reader. Or, if the only reason to follow the pointer is in order -** to follow other pointers on the indicated page, (*pbMerge) is set to -** true. This happens when iRangeTid is included in the transaction, but -** there exists one or more transactions with TID values smaller than iRangeTid -** that are not. -*/ -static int hctDbFollowRangeOld( - HctDatabase *pDb, - HctRangePtr *pPtr, - int *pbMerge -){ - int bRet = 0; - int bMerge = 0; - u64 iRangeTidValue = (pPtr->iRangeTid & HCT_TID_MASK); - - /* HctDatabase.iTid is set when writing, validating or rolling back a - ** transaction. When writing or validating, old-ranges created by this - ** transaction should not be merge in, even if they are followed. But, when - ** doing rollback, they must be merged in (to find the old data). */ - - i64 iDoNotMergeTid = (pDb->eMode==HCT_MODE_VALIDATE) ? 0 : pDb->iTid; - assert( pDb->eMode!=HCT_MODE_ROLLBACK ); - - if( iRangeTidValue>pDb->iLocalMinTid ){ - bRet = 1; - if( iDoNotMergeTid!=iRangeTidValue ){ - bMerge = (0==hctDbTidIsVisible(pDb, pPtr->iRangeTid, 0)); - } - }else if( (pPtr->iFollowTid & HCT_TID_MASK)>pDb->iLocalMinTid ){ - bRet = 1; - assert( bMerge==0 ); - } - - *pbMerge = bMerge; - assert( bRet==0 || iRangeTidValue>0 ); - return bRet; -} - -static int hctDbCsrExtendRange(HctDbCsr *pCsr){ - if( pCsr->nRange==pCsr->nRangeAlloc ){ - int nNew = pCsr->nRangeAlloc ? pCsr->nRangeAlloc*2 : 16; - HctDbRangeCsr *aNew = 0; - - aNew = (HctDbRangeCsr*)sqlite3_realloc( - pCsr->aRange, nNew*sizeof(HctDbRangeCsr) - ); - if( aNew==0 ) return SQLITE_NOMEM_BKPT; - pCsr->nRangeAlloc = nNew; - pCsr->aRange = aNew; - } - - memset(&pCsr->aRange[pCsr->nRange], 0, sizeof(HctDbRangeCsr)); - pCsr->nRange++; - return SQLITE_OK; -} - -static int hctDbCompareKey2( - KeyInfo *pKeyInfo, - UnpackedRecord *pKey1, - i64 iKey1, - HctDbKey *p2 -){ - int ret = 0; - if( pKeyInfo ){ - int ii = 0; - int n1, n2; - - if( pKey1==0 ) return 1; - if( p2->pKey==0 ) return -1; - - n1 = pKey1->nField; - n2 = p2->pKey->nField; - - for(ii=0; ret==0 && iiaColl[ii]; - ret = sqlite3MemCompare(&pKey1->aMem[ii], &p2->pKey->aMem[ii], pColl); - if( pKeyInfo->aSortFlags[ii] & KEYINFO_ORDER_DESC ) ret = -ret; - } - if( ret==0 ){ - /* default_rc==1 if the key has been passed to hctDbDecrementKey() */ - assert( pKey1->default_rc==0 || pKey1->default_rc==1 ); - assert( p2->pKey->default_rc==0 || p2->pKey->default_rc==1 ); - ret = p2->pKey->default_rc - pKey1->default_rc; - } - if( ret==0 ){ - if( n1n2 ){ - ret = +1; - } - } - }else{ - if( iKey1iKey ){ - ret = -1; - }else if( iKey1>p2->iKey ){ - ret = +1; - } - } - return ret; -} - -/* -** Compare the key values in p1 and p2, returning a value less than, equal -** to, or greater than zero if p1 is respectively less than, equal to or -** greater than p2. i.e. -** -** res = (*p1) - (*p2) -*/ -static int hctDbCompareKey(KeyInfo *pKeyInfo, HctDbKey *p1, HctDbKey *p2){ - return hctDbCompareKey2(pKeyInfo, p1->pKey, p1->iKey, p2); -} - -static int hctDbCopyKey(HctDbKey *p1, HctDbKey *p2){ - if( p2->pKey ){ - int ii; - int bNew = 0; - if( p1->pKey==0 || p1->pKey->nFieldpKey->nField ){ - int rc = SQLITE_OK; - hctDbFreeUnpacked(p1->pKey); - p1->pKey = hctDbAllocateUnpacked(&rc, p2->pKey->pKeyInfo); - if( rc!=SQLITE_OK ) return rc; - bNew = 1; - p1->pKey->default_rc = 0; - } - for(ii=0; iipKey->nField; ii++){ - Mem *pFrom = &p2->pKey->aMem[ii]; - Mem *pTo = &p1->pKey->aMem[ii]; - if( bNew ) sqlite3VdbeMemInit(pTo, pFrom->db, 0); - sqlite3VdbeMemShallowCopy(pTo, pFrom, MEM_Static); - } - p1->pKey->nField = p2->pKey->nField; - p1->pKey->default_rc = p2->pKey->default_rc; - }else{ - p1->iKey = p2->iKey; - } - return SQLITE_OK; -} - -static void hctDbDecrementKey(HctDbKey *pKey){ - if( pKey->pKey ){ - /* TODO: Is this correct? Or should it be +1? Or...? */ - pKey->pKey->default_rc = +1; - }else if( pKey->iKey!=SMALLEST_INT64 ){ - pKey->iKey--; - } -} - -static void hctDbCsrDescendRange( - int *pRc, - HctDbCsr *pCsr, - u64 iRangeTid, - u32 iRangeOld, - int bMerge -){ - int rc = *pRc; - - if( rc==SQLITE_OK ){ - rc = hctDbCsrExtendRange(pCsr); - } - - if( rc==SQLITE_OK ){ - HctDbRangeCsr *pNew = &pCsr->aRange[pCsr->nRange-1]; - assert( bMerge==HCT_RANGE_FOLLOW || bMerge==HCT_RANGE_MERGE ); - - pNew->eRange = bMerge; - pNew->iRangeTid = iRangeTid; - rc = hctDbGetPhysical(pCsr->pDb, iRangeOld, &pNew->pg); - - if( rc==SQLITE_OK ){ - int iPar = pCsr->nRange-2; - int iPCell = 0; - const u8 *aParent = hctDbCsrPageAndCellIdx(pCsr, iPar, &iPCell); - const HctDbPageHdr *pPar = (HctDbPageHdr*)aParent; - int bSeen = 0; - - /* Figure out the upper limit key for the scan of this page */ - if( hctPagetype(aParent)==HCT_PAGETYPE_HISTORY ){ - if( iPCell==0 && pPar->nEntry>1 ){ - const HctDbHistoryFan *pFan = (const HctDbHistoryFan*)aParent; - hctDbGetKeyFromPage(&rc, pCsr->pDb, pCsr->pKeyInfo, - 0, pFan->aPgOld1[0], pFan->iSplit0, &pNew->highkey - ); - bSeen = 1; - } - }else{ - if( iPCell==(pPar->nEntry-1) ){ - if( pPar->iPeerPg ){ - hctDbGetKeyFromPage(&rc, pCsr->pDb, pCsr->pKeyInfo, - 1, pPar->iPeerPg, 0, &pNew->highkey - ); - bSeen = 1; - } - }else{ - hctDbGetKey(&rc, - pCsr->pDb, pCsr->pKeyInfo, 0, aParent, iPCell+1, &pNew->highkey - ); - bSeen = 1; - } - } - - if( bSeen==0 ){ - if( iPar>=0 ){ - hctDbCopyKey(&pNew->highkey, &pNew[-1].highkey); - }else{ - pNew->highkey.iKey = LARGEST_INT64; - assert( pNew->highkey.pKey==0 ); - } - }else if( iPar>=0 ){ - /* The 'highkey' should be the minimum of pNew->highkey and the - ** parent highkey. highkey = MIN(highkey, parent.highkey); */ - HctDbKey *pPKey = &pNew[-1].highkey; - if( hctDbCompareKey(pCsr->pKeyInfo, &pNew->highkey, pPKey)>0 ){ - hctDbCopyKey(&pNew->highkey, pPKey); - } - } - - /* Figure the lower limit key for the scan of this page */ - pNew->lowkey.iKey = SMALLEST_INT64; - if( hctPagetype(aParent)==HCT_PAGETYPE_HISTORY ){ - if( iPCell>0 ){ - const HctDbHistoryFan *pFan = (const HctDbHistoryFan*)aParent; - hctDbGetKeyFromPage(&rc, pCsr->pDb, pCsr->pKeyInfo, - 0, pFan->aPgOld1[0], pFan->iSplit0, &pNew->lowkey - ); - hctDbDecrementKey(&pNew->lowkey); - }else{ - hctDbCopyKey(&pNew->lowkey, &pNew[-1].lowkey); - } - }else{ - HctDbCell pcell; - hctDbGetKey(&rc, - pCsr->pDb, pCsr->pKeyInfo, 0, aParent, iPCell, &pNew->lowkey - ); - hctDbCellGetByIdx(pCsr->pDb, aParent, iPCell, &pcell); - if( hctDbTidIsVisible(pCsr->pDb, pcell.iTid, 0)==0 ){ - hctDbDecrementKey(&pNew->lowkey); - } - } - if( iPar>=0 ){ - /* The 'lowkey' should be the maximum of pNew->lowkey and the - ** parent lowkey. lowkey = MAX(lowkey, parent.lowkey); */ - HctDbKey *pPKey = &pNew[-1].lowkey; - if( hctDbCompareKey(pCsr->pKeyInfo, &pNew->lowkey, pPKey)<0 ){ - hctDbCopyKey(&pNew->lowkey, pPKey); - } - } - - if( rc==SQLITE_OK && hctPagetype(pNew->pg.aOld)==HCT_PAGETYPE_HISTORY){ - pNew->eRange = HCT_RANGE_FAN; - } - } - } - - *pRc = rc; -} - -static void hctDbGetRange( - const u8 *aPg, - int iCell, - HctRangePtr *pPtr -){ - if( iCell<0 ){ - memset(pPtr, 0, sizeof(*pPtr)); - }else if( hctPagetype(aPg)==HCT_PAGETYPE_HISTORY ){ - HctDbHistoryFan *pFan = (HctDbHistoryFan*)aPg; - if( iCell==0 ){ - pPtr->iRangeTid = pFan->iRangeTid0; - pPtr->iFollowTid = pFan->iFollowTid0; - pPtr->iOld = pFan->pgOld0; - }else{ - pPtr->iFollowTid = pPtr->iRangeTid = pFan->iRangeTid1; - pPtr->iOld = pFan->aPgOld1[iCell-1]; - } - }else{ - HctDbCell cell; - hctDbCellGetByIdx(0, aPg, iCell, &cell); - pPtr->iFollowTid = pPtr->iRangeTid = cell.iRangeTid; - pPtr->iOld = cell.iRangeOld; - } - - assert( (pPtr->iFollowTid & HCT_TID_MASK)>=(pPtr->iRangeTid & HCT_TID_MASK) ); -} - -static void hctDbCsrGetRange( - HctDbCsr *pCsr, - HctRangePtr *pPtr -){ - const u8 *aPg = 0; - int iCell = 0; - aPg = hctDbCsrPageAndCell(pCsr, &iCell); - assert( ((HctDbPageHdr*)aPg)->nEntry>iCell ); - assert( ((HctDbPageHdr*)aPg)->nHeight==0 ); - hctDbGetRange(aPg, iCell, pPtr); -} - -/* -** Return true if the entry that the cursor currently points to is visible -** to the current transaction, or false otherwise. -*/ -static int hctDbCurrentIsVisible(HctDbCsr *pCsr){ - int iCell = 0; - HctDbIndexEntry *p; - const u8 *aPg = hctDbCsrPageAndCell(pCsr, &iCell); - u64 iTid = 0; - - if( pCsr->pKeyInfo ){ - p = &((HctDbIndexLeaf*)aPg)->aEntry[iCell]; - }else{ - p = (HctDbIndexEntry*)&((HctDbIntkeyLeaf*)aPg)->aEntry[iCell]; - } - if( (p->flags & HCTDB_HAS_TID)==0 ) return 1; - hctMemcpy(&iTid, &aPg[p->iOff], sizeof(u64)); - if( pCsr->pDb->iTid==iTid && pCsr->pDb->eMode==HCT_MODE_VALIDATE ) return 1; - - return hctDbTidIsVisible(pCsr->pDb, iTid, pCsr->bNosnap); -} - -/* -** Search leaf page aPg[] for a specified key. -** -** If the key is present in the page, set output variable (*piPos) to -** the index of the key in the page, and (*pbExact) to true. -** -** Or, if the key is not present in the page, set output variable (*piPos) -** to the index of the SMALLEST KEY THAT IS LARGER THAN IKEY/PKEY, and -** set (*pbExact) to false. -*/ -static int hctDbLeafSearch( - HctDatabase *pDb, - const u8 *aPg, - i64 iKey, - UnpackedRecord *pKey, - int *piPos, - int *pbExact -){ - if( hctPagetype(aPg)==HCT_PAGETYPE_INDEX ){ - if( pKey==0 ){ - *piPos = hctPagenentry(aPg); - *pbExact = 0; - }else{ - int rc = hctDbIndexSearch(pDb, aPg, 0, pKey, piPos, pbExact); - if( rc ) return rc; - } - }else{ - *piPos = hctDbIntkeyLeafSearch(aPg, iKey, pbExact); - } - return SQLITE_OK; -} - -static int hctDbCsrRollbackDescend( - HctDbCsr *pCsr, /* Cursor to seek */ - UnpackedRecord *pRec, /* Key for index/without rowid tables */ - i64 iKey, /* Key for intkey tables */ - int *pbExact -){ - HctDatabase *pDb = pCsr->pDb; - int bExact = 0; - int rc = SQLITE_OK; - - assert( pDb->eMode==HCT_MODE_ROLLBACK ); - while( 1 ){ - HctRangePtr ptr; - HctDbRangeCsr *p = 0; - - hctDbCsrGetRange(pCsr, &ptr); - - if( (ptr.iFollowTid & HCT_TID_MASK)pDb->iTid ) break; - - rc = hctDbCsrExtendRange(pCsr); - if( rc==SQLITE_OK ){ - p = &pCsr->aRange[pCsr->nRange-1]; - rc = hctDbGetPhysical(pDb, ptr.iOld, &p->pg); - } - if( rc==SQLITE_OK ){ - p->iRangeTid = ptr.iRangeTid & HCT_TID_MASK; - if( hctPagetype(p->pg.aOld)==HCT_PAGETYPE_HISTORY ){ - p->eRange = HCT_RANGE_FAN; - p->iCell = hctDbFanSearch(&rc, pCsr->pDb, p->pg.aOld, pRec, iKey); - bExact = 0; - }else{ - p->eRange = HCT_RANGE_MERGE; - rc = hctDbLeafSearch( - pCsr->pDb, p->pg.aOld, iKey, pRec, &p->iCell, &bExact - ); - if( rc!=SQLITE_OK || bExact ) break; - p->iCell--; - if( p->iCell<0 ) break; - } - } - } - - *pbExact = bExact; - return rc; -} - -static int hctDbCsrSeekAndDescend( - HctDbCsr *pCsr, /* Cursor to seek */ - UnpackedRecord *pRec, /* Key for index/without rowid tables */ - i64 iKey, /* Key for intkey tables */ - int bStopOnExact, /* Stop on exact match, even if not visible */ - int *pbExact -){ - int rc = SQLITE_OK; - int bExact = 0; - - /* This function is never called when writing to the database. Or while - ** doing rollback. But it is called during transaction preparation (iTid==0), - ** and validation (eMode==HCT_MODE_VALIDATE). */ - assert( pCsr->pDb->eMode==HCT_MODE_VALIDATE || pCsr->pDb->iTid==0 ); - - rc = hctDbCsrSeek(pCsr, 0, 0, 0, pRec, iKey, &bExact); - if( bExact && bStopOnExact ){ - *pbExact = 1; - return rc; - } - - while( rc==SQLITE_OK && (0==bExact || 0==hctDbCurrentIsVisible(pCsr)) ){ - HctRangePtr ptr; - int bMerge = 0; - - /* Check if there is a range pointer that we should follow */ - hctDbCsrGetRange(pCsr, &ptr); - if( hctDbFollowRangeOld(pCsr->pDb, &ptr, &bMerge) ){ - hctDbCsrDescendRange(&rc, pCsr, ptr.iRangeTid, ptr.iOld, bMerge); - if( rc==SQLITE_OK ){ - HctDbRangeCsr *p = &pCsr->aRange[pCsr->nRange-1]; - - assert( hctDbCompareKey2(pCsr->pKeyInfo, pRec, iKey, &p->lowkey)>=0 ); - if( hctDbCompareKey2(pCsr->pKeyInfo, pRec, iKey, &p->lowkey)<=0 ){ - p->iCell = -1; - break; - } - - if( p->eRange==HCT_RANGE_FAN ){ - p->iCell = hctDbFanSearch(&rc, pCsr->pDb, p->pg.aOld, pRec, iKey); - bExact = 0; - }else{ - rc = hctDbLeafSearch( - pCsr->pDb, p->pg.aOld, iKey, pRec, &p->iCell, &bExact - ); - if( rc!=SQLITE_OK ) break; - if( bExact==0 ){ - p->iCell--; - }else if( bStopOnExact ){ - *pbExact = 1; - return SQLITE_OK; - } - if( p->iCell<0 ) break; - if( p->eRange==HCT_RANGE_FOLLOW ) bExact = 0; - } - } - }else{ - break; - } - } - - while( rc==SQLITE_OK && pCsr->nRange>0 ){ - HctDbRangeCsr *p = &pCsr->aRange[pCsr->nRange-1]; - if( p->eRange==HCT_RANGE_MERGE && p->iCell>=0 ) break; - hctDbCsrAscendRange(pCsr); - } - - *pbExact = bExact; - return rc; -} - -/* -** Find the CID of the last transaction to write to a specified key. -** -** This must be called from within a transaction. -*/ -SQLITE_PRIVATE int sqlite3HctDbCsrFindLastWrite( - HctDbCsr *pCsr, /* Cursor to seek */ - UnpackedRecord *pRec, /* Key for index/without rowid tables */ - i64 iKey, /* Key for intkey tables */ - u64 *piCid /* Last CID to write to this key */ -){ - int rc = SQLITE_OK; - u64 iCid = 0; - int bExact = 0; - - rc = hctDbCsrSeekAndDescend(pCsr, pRec, iKey, 1, &bExact); - if( rc==SQLITE_OK && bExact ){ - u64 iTid = 0; - if( pCsr->nRange>1 ){ - /* In this case the key has been deleted. Find the TID of the - ** transaction that deleted it. */ - HctDbRangeCsr *p = &pCsr->aRange[pCsr->nRange-2]; - HctRangePtr ptr; - hctDbGetRange(p->pg.aOld, p->iCell, &ptr); - iTid = ptr.iRangeTid; - }else{ - HctDbCell cell; - hctDbCellGetByIdx(pCsr->pDb, pCsr->pg.aOld, pCsr->iCell, &cell); - if( pCsr->nRange ){ - assert( pCsr->nRange==1 ); - iTid = cell.iRangeTid; - }else{ - iTid = cell.iTid; - } - } - - if( iTid ){ - u64 dummy = 0; - iTid = (iTid & HCT_TID_MASK); - iCid = hctDbTMapLookup(pCsr->pDb, iTid, &dummy); - }else{ - iCid = 1; - } - } - - *piCid = iCid; - return rc; -} - -/* -** An integer is written into *pRes which is the result of -** comparing the key with the entry to which the cursor is -** pointing. The meaning of the integer written into -** *pRes is as follows: -** -** *pRes<0 The cursor is left pointing at an entry that -** is smaller than iKey/pRec or if the table is empty -** and the cursor is therefore left point to nothing. -** -** *pRes==0 The cursor is left pointing at an entry that -** exactly matches iKey/pRec. -** -** *pRes>0 The cursor is left pointing at an entry that -** is larger than iKey/pRec. -*/ -SQLITE_PRIVATE int sqlite3HctDbCsrSeek( - HctDbCsr *pCsr, /* Cursor to seek */ - UnpackedRecord *pRec, /* Key for index tables */ - i64 iKey, /* Key for intkey tables */ - int *pRes /* Result of seek (see above) */ -){ - int rc = SQLITE_OK; - int bExact; - - /* Should not be called while committing, validating or during rollback. */ - assert( pCsr->pDb->eMode==HCT_MODE_NORMAL ); - assert( pCsr->pDb->iTid==0 ); - - rc = hctDbCsrScanFinish(pCsr); - hctDbCsrReset(pCsr); - - if( rc==SQLITE_OK ){ - rc = hctDbCsrSeekAndDescend(pCsr, pRec, iKey, 0, &bExact); - } - if( rc==SQLITE_OK ){ - rc = hctDbCsrScanStart(pCsr, pRec, iKey); - } - - /* The main cursor now points to the largest entry less than or equal - ** to the supplied key (pRec or iKey). If the supplied key is smaller - ** than all entries in the table, then pCsr->iCell is set to -1. */ - if( rc==SQLITE_OK ){ - if( pCsr->iCell<0 ){ - /* The supplied key is smaller than all keys in the table. If the cursor - ** is BTREE_DIR_REVERSE or NONE, then leave it as it is at EOF. - ** Otherwise, if the cursor is BTREE_DIR_FORWARD, attempt to move - ** it to the first valid entry. */ - if( pCsr->eDir==BTREE_DIR_FORWARD ){ - rc = hctDbCsrFirstValid(pCsr); - *pRes = sqlite3HctDbCsrEof(pCsr) ? -1 : +1; - }else{ - *pRes = -1; - } - }else{ - - if( rc==SQLITE_OK && 0==hctDbCurrentIsVisible(pCsr) ){ - switch( pCsr->eDir ){ - case BTREE_DIR_FORWARD: - *pRes = 1; - rc = sqlite3HctDbCsrNext(pCsr); - *pRes = sqlite3HctDbCsrEof(pCsr) ? -1 : +1; - break; - case BTREE_DIR_REVERSE: - rc = sqlite3HctDbCsrPrev(pCsr); - /* Either the cursor is is now at EOF or it points to a key - ** smaller than iKey/pRec. Either way, set (*pRes) to -ve. */ - *pRes = -1; - break; - default: assert( pCsr->eDir==BTREE_DIR_NONE ); - hctDbCsrReset(pCsr); - *pRes = -1; - break; - } - }else{ - *pRes = (bExact ? 0 : -1); - } - } - } - - return rc; -} - -SQLITE_PRIVATE void sqlite3HctDbSetSavePhysical( - HctDatabase *pDb, - int (*xSave)(void*, i64 iPhys), - void *pSave -){ - pDb->xSavePhysical = xSave; - pDb->pSavePhysical = pSave; -} - -SQLITE_PRIVATE int sqlite3HctDbCsrRollbackSeek( - HctDbCsr *pCsr, /* Cursor to seek */ - UnpackedRecord *pRec, /* Key for index tables */ - i64 iKey, /* Key for intkey tables */ - int *pOp /* Required rollback op */ -){ - HctDatabase *pDb = pCsr->pDb; - int rc = SQLITE_OK; - int bExact = 0; - int op = 0; - - hctDbCsrReset(pCsr); - - /* At this point pDb->bRollback is set and pDb->iTid is set to the TID - ** of the transaction being rolled back. There are four possibilities: - ** - ** 1) The key was written by transaction pDb->iTid and there was no - ** previous entry. - ** - ** 2) The key was written by transaction pDb->iTid and there is a - ** previous entry to restore. - ** - ** 3) The key was deleted by transaction pDb->iTid. - ** - ** 4) None of the above. No rollback required. - */ - - rc = hctDbCsrSeek(pCsr, 0, 0, 0, pRec, iKey, &bExact); - if( rc==SQLITE_OK && bExact==0 ){ - rc = hctDbCsrRollbackDescend(pCsr, pRec, iKey, &bExact); - } - - if( rc==SQLITE_OK && bExact ){ - HctDbCell cell; - int iCell = 0; - const u8 *aPg = hctDbCsrPageAndCell(pCsr, &iCell); - - memset(&cell, 0, sizeof(cell)); - hctDbCellGetByIdx(pDb, aPg, iCell, &cell); - if( cell.iTid==pDb->iTid ){ - op = -1; - rc = hctDbCsrRollbackDescend(pCsr, pRec, iKey, &bExact); - } - - if( rc==SQLITE_OK - && bExact - && pCsr->nRange && pDb->iTid==pCsr->aRange[pCsr->nRange-1].iRangeTid - ){ - op = +1; - } - } - - *pOp = op; - return rc; -} - -SQLITE_PRIVATE int sqlite3HctDbIsIndex(HctDatabase *pDb, u32 iRoot, int *pbIndex){ - HctFilePage pg; - int rc = sqlite3HctFilePageGet(pDb->pFile, iRoot, &pg); - if( rc==SQLITE_OK ){ - *pbIndex = (hctPagetype(pg.aOld)==HCT_PAGETYPE_INDEX); - sqlite3HctFilePageRelease(&pg); - } - return rc; -} - -SQLITE_PRIVATE char *sqlite3HctDbLogFile(HctDatabase *pDb){ - return sqlite3HctFileLogFile(pDb->pFile); -} - -static void hctDbCsrInit( - HctDatabase *pDb, - u32 iRoot, - KeyInfo *pKeyInfo, - HctDbCsr *pCsr -){ - memset(pCsr, 0, sizeof(HctDbCsr)); - pCsr->pDb = pDb; - pCsr->iRoot = iRoot; - if( pKeyInfo ){ - pCsr->pKeyInfo = sqlite3KeyInfoRef(pKeyInfo); - } -} - - - -/* -** Return the size of the local part of a nData byte record stored on -** an intkey leaf page. -*/ -#if 0 -static int hctDbLocalSize(HctDatabase *pDb, int nData){ - int nOther = sizeof(HctDbIntkeyLeaf) + sizeof(HctDbIntkeyEntry) + 12; - if( nData<=(pDb->pgsz-nOther) ){ - return nData; - } - assert( !"todo" ); - return 0; -} -#endif - -#if 0 -static i64 hctDbIntkeyGetKey(u8 *aPg, int ii){ - HctDbIntkeyLeaf *p = (HctDbIntkeyLeaf*)aPg; - return p->aEntry[ii].iKey; -} -#endif - - - -/* -** Return the maximum number of entries that fit on an intkey internal -** node if the database page size is as specified by the only parameter. -*/ -static int hctDbMaxCellsPerIntkeyNode(int pgsz){ - return (pgsz - sizeof(HctDbIntkeyNode)) / sizeof(HctDbIntkeyNodeEntry); -} -static int hctDbMinCellsPerIntkeyNode(int pgsz){ - return (pgsz - sizeof(HctDbIntkeyNode)) / (3*sizeof(HctDbIntkeyNodeEntry)); -} - -static void hctDbIrrevocablyEvictPage(HctDatabase *pDb, HctDbWriter *p); - -static int hctDbOverflowArrayFree(HctDatabase *pDb, HctDbOverflowArray *p){ - int ii = 0; - int rc = SQLITE_OK; - - for(ii=0; rc==SQLITE_OK && iinEntry; ii++){ - u32 pgno = p->aOvfl[ii].pgno; - int nRem = p->aOvfl[ii].nOvfl; - while( 1 ){ - HctFilePage pg; - sqlite3HctFileClearPhysInUse(pDb->pFile, pgno, 0); - nRem--; - if( nRem==0 ) break; - rc = hctDbGetPhysical(pDb, pgno, &pg); - assert( rc==SQLITE_OK ); - pgno = ((HctDbPageHdr*)pg.aOld)->iPeerPg; - sqlite3HctFilePageRelease(&pg); - } - } - - return rc; -} - -#ifdef SQLITE_DEBUG -/* -** Do some assert() statements to check that: -** -** * the pages in discardpg[] are sorted according to key. -*/ -static void assert_writer_is_ok(HctDatabase *pDb, HctDbWriter *p){ - int ii; - HctBuffer buf = {0,0,0}; - UnpackedRecord *pRec = 0; - - for(ii=1; iidiscardpg.nPg; ii++){ - u8 *a1 = p->discardpg.aPg[ii-1].aOld; - u8 *a2 = p->discardpg.aPg[ii].aOld; - - if( hctPagetype(a1)==HCT_PAGETYPE_INTKEY ){ - i64 i1 = hctDbIntkeyFPKey(a1); - i64 i2 = hctDbIntkeyFPKey(a2); - assert( i2>i1 ); - }else{ - int nData = 0; - const u8 *aData = 0; - int rc = hctDbLoadRecord(pDb, &buf, a1, 0, &nData, &aData); - if( rc==SQLITE_OK && pRec==0 ){ - pRec = sqlite3VdbeAllocUnpackedRecord(p->writecsr.pKeyInfo); - if( pRec==0 ){ - rc = SQLITE_NOMEM; - } - } - if( rc==SQLITE_OK ){ - int bGe = 555; - sqlite3VdbeRecordUnpack(p->writecsr.pKeyInfo, nData, aData, pRec); - rc = hctDbCompareFPKey(pDb, pRec, a2, &bGe); - assert( rc!=SQLITE_OK || bGe==0 ); - } - } - } - - sqlite3HctBufferFree(&buf); - hctDbFreeUnpacked(pRec); -} -#else /* if !SQLITE_DEBUG */ -# define assert_writer_is_ok(pDb, p) -#endif - -/* -** Cleanup the writer object passed as the first argument. -*/ -static void hctDbWriterCleanup(HctDatabase *pDb, HctDbWriter *p, int bRevert){ - - if( p->bDoCleanup ){ - int ii; - - sqlite3HctFileDebugPrint(pDb->pFile, - "writer cleanup height=%d bRevert=%d\n", p->iHeight, bRevert - ); - - assert_writer_is_ok(pDb, p); - - sqlite3HctBufferFree(&p->fp.buf); - memset(&p->fp, 0, sizeof(p->fp)); - - /* sqlite3HctFilePageUnwrite(&p->fanpg); */ - sqlite3HctFilePageRelease(&p->fanpg); - - /* If not reverting, mark the overflow chains in p->delOvfl as free */ - if( bRevert==0 ){ - hctDbOverflowArrayFree(pDb, &p->delOvfl); - }else{ - hctDbOverflowArrayFree(pDb, &p->insOvfl); - } - sqlite3_free(p->delOvfl.aOvfl); - sqlite3_free(p->insOvfl.aOvfl); - memset(&p->delOvfl, 0, sizeof(p->delOvfl)); - memset(&p->insOvfl, 0, sizeof(p->insOvfl)); - - for(ii=0; iiwritepg.nPg; ii++){ - HctFilePage *pPg = &p->writepg.aPg[ii]; - if( bRevert ){ - if( pPg->aNew ){ - sqlite3HctFilePageUnwrite(pPg); - }else if( ii>0 ){ - sqlite3HctFileClearInUse(pPg, 1); - } - } - sqlite3HctFilePageRelease(pPg); - } - hctDbPageArrayReset(&p->writepg); - - for(ii=0; iidiscardpg.nPg; ii++){ - if( bRevert && pDb->pConfig->nTryBeforeUnevict>1 ){ - sqlite3HctFilePageUnevict(&p->discardpg.aPg[ii]); - } - sqlite3HctFilePageRelease(&p->discardpg.aPg[ii]); - } - - hctDbPageArrayReset(&p->discardpg); - p->fp.iKey = 0; - p->fp.aKey = 0; - - if( p->iEvictLockedPgno ){ - assert( p->writecsr.iRoot ); - p->nEvictLocked++; - if( p->nEvictLocked>=pDb->pConfig->nTryBeforeUnevict ){ - p->nEvictLocked = -1; - hctDbIrrevocablyEvictPage(pDb, p); - p->nEvictLocked = 0; - } - }else{ - p->nEvictLocked = 0; - } - p->iEvictLockedPgno = 0; - p->bAppend = 0; - - /* Free/zero various buffers and caches */ - hctDbCsrCleanup(&p->writecsr); - hctDbCsrCleanup(&pDb->rbackcsr); - p->bDoCleanup = 0; - } -} - -static int hctDbInsert( - HctDatabase *pDb, - HctDbWriter *p, - u32 iRoot, - UnpackedRecord *pRec, /* The key value for index tables */ - i64 iKey, /* For intkey tables, the key value */ - u32 iChildPg, /* For internal node ops, the child pgno */ - int bDel, /* True for a delete operation */ - int nData, const u8 *aData /* Record/key to insert */ -); - -typedef struct HctDbWriterOrigin HctDbWriterOrigin; -struct HctDbWriterOrigin { - u8 bDiscard; /* 1 for aDiscard[], 0 for aWritePg[] */ - i16 iPg; /* Index of page in array*/ -}; - -static int hctdbWriterSortFPKeys( - HctDatabase *pDb, - int eType, - HctDbWriter *p, - HctDbWriterOrigin *aOrigin /* Populate this array */ -){ - int iDiscard = 0; - int iWP = 1; - int iOut = 0; - int rc = SQLITE_OK; - - assert( eType==HCT_PAGETYPE_INDEX || eType==HCT_PAGETYPE_INTKEY ); - - while( iDiscarddiscardpg.nPg || iWPwritepg.nPg ){ - if( iDiscard>=p->discardpg.nPg ){ - aOrigin[iOut].bDiscard = 0; - aOrigin[iOut].iPg = iWP++; - iOut++; - } - else if( iWP>=p->writepg.nPg ){ - aOrigin[iOut].bDiscard = 1; - aOrigin[iOut].iPg = iDiscard++; - iOut++; - }else{ - int bDiscard = 0; - const u8 *aD = p->discardpg.aPg[iDiscard].aOld; - const u8 *aW = p->writepg.aPg[iWP].aOld; - - if( eType==HCT_PAGETYPE_INTKEY ){ - i64 i1 = hctDbIntkeyFPKey(aD); - i64 i2 = hctDbIntkeyFPKey(aW); - bDiscard = (i1<=i2); - }else{ - int nFP = 0; - const u8 *aFP = 0; - UnpackedRecord *pRec = p->writecsr.pRec; - rc = hctDbLoadRecord(pDb, &p->writecsr.rec, aW, 0, &nFP, &aFP); - if( rc!=SQLITE_OK ) break; - sqlite3VdbeRecordUnpack(p->writecsr.pKeyInfo, nFP, aFP, pRec); - rc = hctDbCompareFPKey(pDb, pRec, aD, &bDiscard); - if( rc!=SQLITE_OK ) break; - } - - aOrigin[iOut].bDiscard = bDiscard; - if( bDiscard ){ - aOrigin[iOut].iPg = iDiscard++; - }else{ - aOrigin[iOut].iPg = iWP++; - } - iOut++; - } - } - - return rc; -} - -#if 0 -/* -** -*/ -static int hctDbTruncateRecord( - HctBuffer *pBuf, /* Buffer to use for storage space */ - KeyInfo *pKeyInfo, /* Description of index */ - int *pnFP, /* IN/OUT: Size of record */ - const u8 **aFP /* IN/OUT: Pointer to record */ -){ -} -#endif - -/* -** This is a wrapper around: -** -** sqlite3HctFilePageEvict(pPg, 0); -** -** If the call fails with SQLITE_LOCKED because page pPg has been evicted, -** HctDbWriter.iEvictLockedPgno is set to the logical page number of pPg. -*/ -static int hctDbFilePageEvict(HctDbWriter *p, HctFilePage *pPg){ - int rc = sqlite3HctFilePageEvict(pPg, 0); - if( rc==SQLITE_LOCKED && sqlite3HctFilePageIsEvicted(pPg->pFile, pPg->iPg) ){ - p->iEvictLockedPgno = pPg->iPg; - } - return rc; -} - -static int hctDbFilePageCommit(HctDbWriter *p, HctFilePage *pPg){ - int rc = sqlite3HctFilePageCommit(pPg); - if( rc==SQLITE_LOCKED && sqlite3HctFilePageIsEvicted(pPg->pFile, pPg->iPg) ){ - p->iEvictLockedPgno = pPg->iPg; - } - return rc; -} - -static int hctDbMigrateReinsertKeys(HctDatabase *pDb, HctDbWriter *p); - -static int hctDbInsertFlushWrite(HctDatabase *pDb, HctDbWriter *p){ - int rc = SQLITE_OK; - int ii; - int eType = hctPagetype(p->writepg.aPg[0].aNew); - HctFilePage root; - int bUnevict = 0; - - memset(&root, 0, sizeof(root)); - - rc = hctDbMigrateReinsertKeys(pDb, p); - -#ifdef SQLITE_DEBUG - for(ii=1; rc==SQLITE_OK && iiwritepg.nPg; ii++){ - u32 iPeer = ((HctDbPageHdr*)p->writepg.aPg[ii-1].aNew)->iPeerPg; - assert( p->writepg.aPg[ii].iPg==iPeer ); - } -#endif - - /* Test if this is a split of a root page of the tree. */ - if( rc==SQLITE_OK - && p->writepg.nPg>1 - && p->writepg.aPg[0].iPg==p->writecsr.iRoot - ){ - HctFilePage *pPg0 = &p->writepg.aPg[0]; - hctMemcpy(&root, pPg0, sizeof(HctFilePage)); - memset(pPg0, 0, sizeof(HctFilePage)); - rc = sqlite3HctFilePageNew(pDb->pFile, pPg0); - if( rc==SQLITE_OK ){ - hctMemcpy(pPg0->aNew, root.aNew, pDb->pgsz); - hctDbRootPageInit(eType==HCT_PAGETYPE_INDEX, - hctPageheight(root.aNew)+1, pPg0->iPg, root.aNew, pDb->pgsz - ); - } - } - - if( rc==SQLITE_OK ){ - rc = sqlite3HctFilePageRelease(&p->fanpg); - } - - /* Loop through the set of pages to write out. They must be - ** written in reverse order - so that page aWritePg[0] is written - ** last. */ - assert( p->writepg.nPg>0 ); - for(ii=p->writepg.nPg-1; rc==SQLITE_OK && ii>=0; ii--){ - rc = hctDbFilePageCommit(p, &p->writepg.aPg[ii]); - } - - /* If there is one, write the new root page to disk */ - if( rc==SQLITE_OK && root.iPg ){ - rc = hctDbFilePageCommit(p, &root); - sqlite3HctFilePageRelease(&root); - } - - if( rc!=SQLITE_OK ){ - bUnevict = 1; - } - - /* If there is more than one page in the writepg array, or more than - ** zero in the discardpg array, then the parent list must be updated. - ** This block does that. */ - if( (p->writepg.nPg>1 || p->discardpg.nPg>0) && rc==SQLITE_OK ){ - const u32 iRoot = p->writecsr.iRoot; - const int nOrig = p->discardpg.nPg + p->writepg.nPg - 1; - HctDbWriterOrigin aStatic[6]; - HctDbWriterOrigin *aDyn = 0; - HctDbWriterOrigin *aOrig = aStatic; - HctBuffer buf; - HctDbWriter wr; - int iOrig = 0; - - memset(&buf, 0, sizeof(buf)); - memset(&wr, 0, sizeof(wr)); - hctDbPageArrayReset(&wr.writepg); - hctDbPageArrayReset(&wr.discardpg); - - if( nOrig>ArraySize(aStatic) ){ - int nByte = sizeof(HctDbWriterOrigin) * nOrig; - aOrig = aDyn = (HctDbWriterOrigin*)sqlite3HctMalloc(&rc, nByte); - } - - if( rc==SQLITE_OK ){ - wr.iHeight = p->iHeight + 1; - rc = hctDbCsrAllocateUnpacked(&p->writecsr); - } - - if( rc==SQLITE_OK ){ - rc = hctdbWriterSortFPKeys(pDb, eType, p, aOrig); - } - - if( rc==SQLITE_OK ){ - do { - assert( rc==SQLITE_OK || rc==SQLITE_LOCKED ); - rc = SQLITE_OK; - - while( iOrigbDiscard; - - pPg = &(bDel ? p->discardpg.aPg : p->writepg.aPg)[pOrig->iPg]; - if( eType==HCT_PAGETYPE_INTKEY ){ - iKey = hctDbIntkeyFPKey(pPg->aOld); - }else{ - rc = hctDbLoadRecord(pDb, &buf, pPg->aOld, 0, &nFP, &aFP); - if( rc!=SQLITE_OK ) break; - pRec = p->writecsr.pRec; - sqlite3VdbeRecordUnpack(p->writecsr.pKeyInfo, nFP, aFP, pRec); - sqlite3HctDbRecordTrim(pRec); - } - - rc = hctDbInsert( - pDb, &wr, iRoot, pRec, iKey, pPg->iPg, bDel, nFP, aFP - ); - } - - if( rc==SQLITE_OK ){ - rc = hctDbInsertFlushWrite(pDb, &wr); - } - if( rc==SQLITE_LOCKED ){ - assert( iOrig>=wr.nWriteKey ); - iOrig -= wr.nWriteKey; - pDb->nCasFail++; - pDb->stats.nInternalRetry++; - } - hctDbWriterCleanup(pDb, &wr, (rc!=SQLITE_OK)); - wr.nWriteKey = 0; - - }while( rc==SQLITE_LOCKED ); - } - - sqlite3HctBufferFree(&buf); - sqlite3_free(aDyn); - } - - if( rc==SQLITE_OK ){ - for(ii=0; iidiscardpg.nPg; ii++){ - sqlite3HctFileClearInUse(&p->discardpg.aPg[ii], 0); - } - } - - /* Clean up the Writer object */ - hctDbWriterCleanup(pDb, p, bUnevict); - return rc; -} - -SQLITE_PRIVATE void sqlite3HctDbRollbackMode(HctDatabase *pDb, int eRollback){ - assert( eRollback==0 || pDb->eMode==HCT_MODE_NORMAL ); - pDb->pa.nWriteKey = 0; - pDb->eMode = eRollback ? HCT_MODE_ROLLBACK : HCT_MODE_NORMAL; - if( eRollback>1 ){ - memset(&pDb->pa, 0, sizeof(pDb->pa)); - hctDbPageArrayReset(&pDb->pa.writepg); - hctDbPageArrayReset(&pDb->pa.discardpg); - - /* During recovery rollback the connection should read the latest - ** version of the db - no exceptions. Set these two to the largest - ** possible values to ensure that this happens. */ - pDb->iSnapshotId = LARGEST_TID-1; - pDb->iLocalMinTid = LARGEST_TID-1; - } -} - -SQLITE_PRIVATE i64 sqlite3HctDbNCasFail(HctDatabase *pDb){ - return pDb->nCasFail; -} - -#if 0 -static HctDbIntkeyEntry *hctDbIntkeyEntry(u8 *aPg, int iCell){ - return iCell<0 ? 0 : (&((HctDbIntkeyLeaf*)aPg)->aEntry[iCell]); -} -#endif - -SQLITE_PRIVATE int sqlite3HctDbInsertFlush(HctDatabase *pDb, int *pnRetry){ - int rc = SQLITE_OK; - if( pDb->pa.writepg.nPg ){ - rc = hctDbInsertFlushWrite(pDb, &pDb->pa); - if( rc==SQLITE_LOCKED ){ - *pnRetry = pDb->pa.nWriteKey; - rc = SQLITE_OK; - pDb->nCasFail++; - }else{ - *pnRetry = 0; - } -#if 0 - { - sqlite3HctFileDebugPrint(pDb->pFile, - "%p: %s sqlite3HctDbInsertFlush() -> %d (nRetry=%d)\n", - pDb, (pDb->eMode==HCT_MODE_ROLLBACK ? "RB" : " "), rc, *pnRetry - ); - fflush(stdout); - } -#endif - pDb->pa.nWriteKey = 0; - } - return rc; -} - -/* -** If pRec is not NULL, it contains an unpacked index key. Compare this key -** with the write-fp-key in pDb->pa.aWriteFpKey. Return true if pRec is greater -** than or equal to the write-fp-key. -** -** Or, if pRec is NULL, iKey is the key and it is compared to -** pDb->iWriteFpKey. -*/ -static int hctDbTestWriteFpKey( - HctDbWriter *p, - RecordCompare xCompare, - UnpackedRecord *pRec, - i64 iKey -){ - if( pRec ){ - int r; - if( p->fp.aKey==0 ){ - r = 1; - }else{ - r = xCompare(p->fp.iKey, p->fp.aKey, pRec); - } - return (r <= 0); - } - return iKey>=p->fp.iKey; -} - -static int hctDbSetWriteFpKey(HctDatabase *pDb, HctDbWriter *p){ - int rc = SQLITE_OK; - HctDbPageHdr *pHdr = (HctDbPageHdr*)p->writepg.aPg[p->writepg.nPg-1].aNew; - - p->fp.aKey = 0; - p->fp.iKey = 0; - - if( pHdr->iPeerPg==0 ){ - if( hctPagetype(pHdr)==HCT_PAGETYPE_INTKEY ){ - p->fp.iKey = LARGEST_INT64; - } - }else{ - HctFilePage pg; - rc = sqlite3HctFilePageGet(pDb->pFile, pHdr->iPeerPg, &pg); - if( rc==SQLITE_OK ){ - if( hctPagetype(pHdr)==HCT_PAGETYPE_INTKEY ){ - p->fp.iKey = hctDbIntkeyFPKey(pg.aOld); - }else{ - rc = hctDbLoadRecordFP(pDb, pg.aOld, 0, &p->fp); - } - sqlite3HctFilePageRelease(&pg); - } - } - - return rc; -} - -/* -** Buffer aTarget[] contains a page that contains variable length keys -** (i.e. an intkey leaf or an index leaf or node). This function returns -** the offset of the aEntry[] array in aTarget. Before doing so, it sets -** output variable (*pszEntry) to the sizeof(aEntry[0]). -*/ -static int hctDbEntryArrayDim(const u8 *aTarget, int *pszEntry){ - int eType = hctPagetype(aTarget); - int nHeight = hctPageheight(aTarget); - int nRet; - - assert( eType==HCT_PAGETYPE_INTKEY || eType==HCT_PAGETYPE_INDEX ); - assert( eType==HCT_PAGETYPE_INDEX || nHeight==0 ); - if( eType==HCT_PAGETYPE_INTKEY ){ - *pszEntry = sizeof(HctDbIntkeyEntry); - nRet = sizeof(HctDbIntkeyLeaf); - }else if( nHeight==0 ){ - *pszEntry = sizeof(HctDbIndexEntry); - nRet = sizeof(HctDbIndexLeaf); - }else{ - *pszEntry = sizeof(HctDbIndexNodeEntry); - nRet = sizeof(HctDbIndexNode); - } - - return nRet; -} - -static int hctIsVarRecords(const u8 *aTarget){ - int eType = hctPagetype(aTarget); - int nHeight = hctPageheight(aTarget); - return (nHeight==0 || eType==HCT_PAGETYPE_INDEX); -} - -#ifdef SQLITE_DEBUG - -static void print_out_page(const char *zCaption, const u8 *aData, int nData){ - HctDbPageHdr *pPg = (HctDbPageHdr*)aData; - - if( hctPagetype(pPg)==HCT_PAGETYPE_INTKEY && pPg->nHeight==0 ){ - HctDbIntkeyLeaf *pLeaf = (HctDbIntkeyLeaf*)pPg; - char *zPrint = 0; - const char *zSep = ""; - int ii; - - for(ii=0; iipg.nEntry; ii++){ - HctDbIntkeyEntry *pEntry = &pLeaf->aEntry[ii]; - zPrint = sqlite3_mprintf("%z%s(k=%lld f=%.2x %d..%d)", zPrint, zSep, - pEntry->iKey, pEntry->flags, - pEntry->iOff, pEntry->iOff+ hctDbIntkeyEntrySize(pEntry, nData) - ); - zSep = ","; - } - - printf("%s: nFreeGap=%d nFreeBytes=%d (intkey leaf)\n", zCaption, - pLeaf->hdr.nFreeGap, - pLeaf->hdr.nFreeBytes - ); - printf("%s: %s\n", zCaption, zPrint); - sqlite3_free(zPrint); - } - - if( hctPagetype(pPg)==HCT_PAGETYPE_INDEX && pPg->nHeight==0 ){ - HctDbIndexLeaf *pLeaf = (HctDbIndexLeaf*)pPg; - char *zPrint = 0; - const char *zSep = ""; - int ii; - - for(ii=0; iipg.nEntry; ii++){ - HctDbIndexEntry *pEntry = &pLeaf->aEntry[ii]; - zPrint = sqlite3_mprintf("%z%s(%d..%d)", zPrint, zSep, - pEntry->iOff, pEntry->iOff + hctDbIndexEntrySize(pEntry, nData) - ); - zSep = ","; - } - - printf("%s: nFreeGap=%d nFreeBytes=%d (index leaf)\n", zCaption, - pLeaf->hdr.nFreeGap, - pLeaf->hdr.nFreeBytes - ); - printf("%s: %s\n", zCaption, zPrint); - fflush(stdout); - sqlite3_free(zPrint); - } - - -} - -#define assert_or_print(E) \ - if( !(E) ){ \ - print_out_page("page", aData, nData); \ - assert( E ); \ - } - -typedef struct VarCellReader VarCellReader; -struct VarCellReader { - const u8 *aData; - int nData; - int szEntry; - int iEntry0; -}; - -static void hctVCRInit(VarCellReader *p, const u8 *aData, int nData){ - p->aData = aData; - p->nData = nData; - p->iEntry0 = hctDbEntryArrayDim(aData, &p->szEntry); -} - -static int hctVCRFindCell(VarCellReader *p, int iCell, int *pnByte){ - HctDbIndexNodeEntry *pEntry; - - pEntry = (HctDbIndexNodeEntry*)&p->aData[p->iEntry0 + iCell*p->szEntry]; - *pnByte = hctDbLocalsize(p->aData, p->nData, pEntry->nSize) - + hctDbOffset(0, pEntry->flags); - - return pEntry->iOff; -} - -static void assert_page_is_ok(const u8 *aData, int nData){ - - if( aData && hctIsVarRecords(aData) ){ - HctDbIndexNode *p = (HctDbIndexNode*)aData; - VarCellReader vcr; - int iEnd = nData; - int iStart = 0; - int nRecTotal = 0; - int ii = 0; - int nFreeExpect; - - hctVCRInit(&vcr, aData, nData); - for(ii=0; iipg.nEntry; ii++){ - int sz = 0; - int iOff = hctVCRFindCell(&vcr, ii, &sz); - if( iOff ){ - assert_or_print( (iOff+sz)<=nData ); - iEnd = MIN(iEnd, iOff); - nRecTotal += sz; - }else{ - assert( sz==0 && ii==0 ); - } - } - - iStart = vcr.iEntry0 + vcr.szEntry * p->pg.nEntry; - nFreeExpect = nData - (iStart + nRecTotal); - - assert_or_print( p->hdr.nFreeGap==(iEnd - iStart) ); - assert_or_print( p->hdr.nFreeBytes==nFreeExpect); - } - -} -#else -# define assert_page_is_ok(x,y) -#endif - -#ifdef SQLITE_DEBUG -static void assert_all_pages_ok(HctDatabase *pDb, HctDbWriter *p){ - int ii; - return; - for(ii=0; iiwritepg.nPg; ii++){ - u8 *aPg = p->writepg.aPg[ii].aNew; - assert( aPg[0]!=0x00 ); - assert( hctIsVarRecords(aPg) ); - assert_page_is_ok(aPg, pDb->pgsz); - assert( ii==p->writepg.nPg-1 - || ((HctDbPageHdr*)aPg)->iPeerPg==p->writepg.aPg[ii+1].iPg - ); - } -} -static void assert_all_pages_nonempty(HctDatabase *pDb, HctDbWriter *p){ - return; - if( p->writepg.nPg>1 ){ - int ii; - for(ii=0; iiwritepg.nPg; ii++){ - HctDbPageHdr *pPg = (HctDbPageHdr*)p->writepg.aPg[ii].aNew; - assert( pPg->nEntry>0 ); - } - } -} -#else -# define assert_all_pages_ok(x,y) -# define assert_all_pages_nonempty(x,y) -#endif - - -/* -** HOW INSERT/DELETE OPERATIONS WORK: -** -** 1. If the page array is not empty, flush it to disk if required. It -** should be flushed to disk if either: -** -** a) the key being written (specified by iKey/pRec) is greater or -** equal to the FP key to the right of the page array (stored -** in HctDbWriter.iWriteFpKey/aWriteFpKey). -** -** b) there are more than HCTDB_MAX_DIRTY pages in the array. -** -** 2. If the page array is empty, either because it was flushed to disk -** in (1) or because it was empty when this function was called, seek -** the write-cursor (HctDbWriter.writecsr) to the key being written. -** The page the cursor seeks to becomes the first page of the page -** array. -** -** 3. Locate within the page array the page into which the new key -** or delete-key should be inserted. There are three possible outcomes: -** -** i) the new key may just be written to the page. -** -** ii) the new key fits on the page, but leaves it underfull. In this -** context, "underfull" means that the total amount of free space -** on the page is less than or equal to (pgsz*2/3). -** -** iii) the new key does not fit on the page. -** -** In cases (ii) or (iii), first ensure that that the page has two peers in -** the page array (unless there are fewer than three pages in the list, in -** which case the entire list should be loaded). Then redistribute the keys -** between the minimum number of pages, discarding or adding nodes as -** required. -*/ - -/* -** Insert nPg new pages at index iPg into the write-array of the HctDbWriter -** passed as the second argument and link them into the list. -*/ -static int hctDbExtendWriteArray( - HctDatabase *pDb, - HctDbWriter *p, - int iPg, - int nPg -){ - int rc = SQLITE_OK; - int ii; - - assert( iPg>0 ); - assert( (p->writepg.nPg+nPg)>0 ); - assert( p->writepg.nPg>0 ); - - /* Add any new pages required */ - for(ii=iPg; rc==SQLITE_OK && iiwritepg.nPgwritepg.aPg) ); - assert( ii>0 ); - if( iiwritepg.nPg ){ - int nByte = sizeof(HctFilePage) * (p->writepg.nPg-ii); - memmove(&p->writepg.aPg[ii+1], &p->writepg.aPg[ii], nByte); - } - p->writepg.nPg++; - memset(&p->writepg.aPg[ii], 0, sizeof(HctFilePage)); - rc = sqlite3HctFilePageNew(pDb->pFile, &p->writepg.aPg[ii]); - if( rc==SQLITE_OK ){ - HctDbPageHdr *pNew = (HctDbPageHdr*)p->writepg.aPg[ii].aNew; - HctDbPageHdr *pPrev = (HctDbPageHdr*)p->writepg.aPg[ii-1].aNew; - memset(pNew, 0, sizeof(HctDbPageHdr)); - pNew->hdrFlags = hctPagetype(pPrev); - pNew->nHeight = pPrev->nHeight; - pNew->iPeerPg = pPrev->iPeerPg; - pPrev->iPeerPg = p->writepg.aPg[ii].iPg; - } - } - - /* Remove pages that are not required */ - for(ii=nPg; ii<0; ii++){ - HctDbPageHdr *pPrev = (HctDbPageHdr*)(p->writepg.aPg[iPg-1].aNew); - HctDbPageHdr *pRem = (HctDbPageHdr*)(p->writepg.aPg[iPg].aNew); - pPrev->iPeerPg = pRem->iPeerPg; - assert( p->writepg.nPg>1 ); - p->writepg.nPg--; - - assert( iPg!=0 ); - assert( p->writepg.aPg[iPg].aOld==0 ); - sqlite3HctFilePageUnwrite(&p->writepg.aPg[iPg]); - - if( iPg!=p->writepg.nPg ){ - int nByte = sizeof(HctFilePage) * (p->writepg.nPg-iPg); - assert( nByte>0 ); - memmove(&p->writepg.aPg[iPg], &p->writepg.aPg[iPg+1], nByte); - } - } - - return rc; -} - -static int hctDbCsrLoadAndDecode( - HctDbCsr *pCsr, - int iCell, - UnpackedRecord **ppRec -){ - const u8 *aPg = pCsr->pg.aNew ? pCsr->pg.aNew : pCsr->pg.aOld; - int nData = 0; - const u8 *aData = 0; - int rc; - - rc = hctDbLoadRecord(pCsr->pDb, &pCsr->rec, aPg, iCell, &nData, &aData); - if( rc==SQLITE_OK ){ - rc = hctDbCsrAllocateUnpacked(pCsr); - } - if( rc==SQLITE_OK ){ - *ppRec = pCsr->pRec; - sqlite3VdbeRecordUnpack(pCsr->pKeyInfo, nData, aData, pCsr->pRec); - assert( pCsr->pRec->nField>0 ); - } - - return rc; -} - -SQLITE_PRIVATE int sqlite3HctDbCsrLoadAndDecode(HctDbCsr *pCsr, UnpackedRecord **ppRec){ - return hctDbCsrLoadAndDecode(pCsr, pCsr->iCell, ppRec); -} - -/* -** -*/ -static int hctDbFindLhsPeer( - HctDatabase *pDb, - HctDbWriter *p, - HctFilePage *pPg, - HctFilePage *pOut -){ - HctDbCsr csr; - u8 *aLeft = pPg->aNew ? pPg->aNew : pPg->aOld; - int rc = SQLITE_OK; - - hctDbCsrInit(pDb, p->writecsr.iRoot, 0, &csr); - csr.pKeyInfo = p->writecsr.pKeyInfo; - if( hctPagetype(aLeft)==HCT_PAGETYPE_INTKEY ){ - i64 iKey = hctDbIntkeyFPKey(aLeft); - assert( iKey!=SMALLEST_INT64 ); - rc = hctDbCsrSeek(&csr, 0, p->iHeight, 0, 0, iKey-1, 0); - }else{ - UnpackedRecord *pRec = 0; - HctBuffer buf; - int nData = 0; - const u8 *aData = 0; - memset(&buf, 0, sizeof(buf)); - rc = hctDbLoadRecord(pDb, &buf, aLeft, 0, &nData, &aData); - if( rc==SQLITE_OK ){ - rc = hctDbCsrAllocateUnpacked(&p->writecsr); - } - if( rc==SQLITE_OK ){ - pRec = p->writecsr.pRec; - sqlite3VdbeRecordUnpack(p->writecsr.pKeyInfo, nData, aData, pRec); - sqlite3HctDbRecordTrim(pRec); - pRec->default_rc = 1; - rc = hctDbCsrSeek(&csr, 0, p->iHeight, 0, pRec, 0, 0); - pRec->default_rc = 0; - - assert( csr.pg.iPg!=pPg->iPg ); - } - sqlite3HctBufferFree(&buf); - } - - if( rc==SQLITE_OK - && ((HctDbPageHdr*)csr.pg.aOld)->iPeerPg==pPg->iPg - ){ - *pOut = csr.pg; - }else{ - memset(pOut, 0, sizeof(HctFilePage)); - rc = SQLITE_LOCKED_ERR(pPg->iPg, "peer"); - } - - return rc; -} - -static void hctDbIrrevocablyEvictPage(HctDatabase *pDb, HctDbWriter *p){ - int rc = SQLITE_OK; - u32 iLocked = p->iEvictLockedPgno; - int bDone = 0; - - KeyInfo *pKeyInfo = sqlite3KeyInfoRef(p->writecsr.pKeyInfo); - u32 iRoot = p->writecsr.iRoot; - - sqlite3HctFileDebugPrint(pDb->pFile,"BEGIN forced eviction of %d\n", iLocked); - - do { - HctFilePage pg1; - HctFilePage pg0; - memset(&pg1, 0, sizeof(pg1)); - if( p->writecsr.iRoot==0 ){ - hctDbCsrInit(pDb, iRoot, pKeyInfo, &p->writecsr); - } - rc = sqlite3HctFilePageGet(pDb->pFile, iLocked, &pg1); - while( rc==SQLITE_OK ){ - memset(&pg0, 0, sizeof(pg0)); - rc = hctDbFindLhsPeer(pDb, p, &pg1, &pg0); - if( rc ) break; - if( 0==sqlite3HctFilePageIsEvicted(pg0.pFile, pg0.iPg) ) break; - sqlite3HctFilePageRelease(&pg1); - pg1 = pg0; - memset(&pg0, 0, sizeof(pg0)); - } - - if( rc==SQLITE_OK ){ - bDone = (pg1.iPg==iLocked); - sqlite3HctFileDebugPrint( - pDb->pFile, "forcing write of %d->%d\n", pg0.iPg, pg1.iPg - ); - - rc = sqlite3HctFilePageEvict(&pg1, 1); - if( rc==SQLITE_OK ){ - rc = sqlite3HctFilePageWrite(&pg0); - } - if( rc==SQLITE_OK ){ - hctMemcpy(pg0.aNew, pg0.aOld, pDb->pgsz); - } - if( rc==SQLITE_OK ){ - p->writepg.aPg[0] = pg0; - p->writepg.nPg = 1; - rc = hctDbExtendWriteArray(pDb, p, 1, 1); - } - if( rc==SQLITE_OK ){ - hctMemcpy(p->writepg.aPg[1].aNew, pg1.aOld, pDb->pgsz); - p->discardpg.aPg[0] = pg1; - p->discardpg.nPg = 1; - } - - p->bDoCleanup = 1; - if( rc==SQLITE_OK ){ - rc = hctDbInsertFlushWrite(pDb, p); - }else{ - hctDbWriterCleanup(pDb, p, 1); - } - - }else{ - sqlite3HctFilePageRelease(&pg0); - sqlite3HctFilePageRelease(&pg1); - } - }while( rc==SQLITE_OK && bDone==0 ); - - sqlite3KeyInfoUnref(pKeyInfo); - sqlite3HctFileDebugPrint(pDb->pFile,"END forced eviction of %d\n", iLocked); -} - -/* -** -*/ -static int hctDbLoadPeers(HctDatabase *pDb, HctDbWriter *p, int *piPg){ - int rc = SQLITE_OK; - int iPg = *piPg; - - if( p->writepg.nPg<3 ){ - HctFilePage *pLeft = &p->writepg.aPg[0]; - - if( p->writepg.nPg==1 && 0==hctIsLeftmost(pLeft->aNew) ){ - HctFilePage *pCopy = 0; - assert( iPg==0 ); - - /* First, evict the page currently in p->writepg.aPg[0]. If we - ** successfully evict the page here, then of course no other thread - ** can - which guarantees that the seek operation below really does - ** find the left-hand peer (assuming the db is not corrupt). */ - rc = hctDbFilePageEvict(p, pLeft); - - /* Assuming the LOGICAL_EVICTED flag was successfully set, seek - ** cursor csr to the leaf page immediately to the left of pLeft. */ - if( rc==SQLITE_OK ){ - if( p->discardpg.nPg>0 ){ - int nMove = p->discardpg.nPg * sizeof(HctFilePage); - memmove(&p->discardpg.aPg[1], &p->discardpg.aPg[0], nMove); - } - pCopy = &p->discardpg.aPg[0]; - p->discardpg.nPg++; - *pCopy = *pLeft; - rc = hctDbFindLhsPeer(pDb, p, pCopy, pLeft); - } - if( rc==SQLITE_OK ){ - assert( ((HctDbPageHdr*)pLeft->aOld)->iPeerPg==pCopy->iPg ); - rc = sqlite3HctFilePageWrite(pLeft); - } - - if( rc==SQLITE_OK ){ - hctMemcpy(pLeft->aNew, pLeft->aOld, pDb->pgsz); - rc = hctDbExtendWriteArray(pDb, p, 1, 1); - } - if( rc==SQLITE_OK ){ - hctMemcpy(p->writepg.aPg[1].aNew, pCopy->aNew, pDb->pgsz); - sqlite3HctFilePageUnwrite(pCopy); - *piPg = 1; - } - } - - if( rc==SQLITE_OK ){ - HctDbPageHdr *pHdr = (HctDbPageHdr*)p->writepg.aPg[p->writepg.nPg-1].aNew; - if( pHdr->iPeerPg ){ - HctFilePage *pCopy = &p->discardpg.aPg[p->discardpg.nPg]; - - rc = sqlite3HctFilePageGet(pDb->pFile, pHdr->iPeerPg, pCopy); - if( rc==SQLITE_OK ){ - /* Evict the page immediately */ - rc = hctDbFilePageEvict(p, pCopy); - if( rc!=SQLITE_OK ){ - sqlite3HctFilePageRelease(pCopy); - }else{ - p->discardpg.nPg++; - } - } - - if( rc==SQLITE_OK ){ - rc = hctDbExtendWriteArray(pDb, p, p->writepg.nPg, 1); - } - if( rc==SQLITE_OK ){ - HctFilePage *pPg = &p->writepg.aPg[p->writepg.nPg-1]; - hctMemcpy(pPg->aNew, pCopy->aOld, pDb->pgsz); - rc = hctDbSetWriteFpKey(pDb, p); - } - } - } - } - - return rc; -} - -static int hctDbOverflowArrayAppend(HctDbOverflowArray *p, u32 ovfl, int nOvfl){ - assert( p->nAlloc>=p->nEntry ); - assert( ovfl>0 && nOvfl>0 ); - - if( p->nAlloc==p->nEntry ){ - int nNew = p->nAlloc ? p->nAlloc*2 : 16; - int nByte = nNew*sizeof(HctDbOverflow); - HctDbOverflow *aNew = (HctDbOverflow*)sqlite3_realloc(p->aOvfl, nByte); - - if( aNew==0 ){ - return SQLITE_NOMEM_BKPT; - } - p->aOvfl = aNew; - p->nAlloc = nNew; - } - - p->aOvfl[p->nEntry].pgno = ovfl; - p->aOvfl[p->nEntry].nOvfl = nOvfl; - p->nEntry++; - - return SQLITE_OK; -} - - -/* -** Buffer aTarget[] must contain a page with variable sized records - an -** index leaf or node, or an intkey leaf. This function returns the offset -** of the record for entry iEntry, and populates output variable *pFlags -** with the entry flags. -*/ -static int hctDbFindEntry(u8 *aTarget, int iEntry, u8 *pFlags, int *pnSize){ - int iRet; - if( hctPagetype(aTarget)==HCT_PAGETYPE_INTKEY ){ - iRet = ((HctDbIntkeyLeaf*)aTarget)->aEntry[iEntry].iOff; - *pFlags = ((HctDbIntkeyLeaf*)aTarget)->aEntry[iEntry].flags; - *pnSize = ((HctDbIntkeyLeaf*)aTarget)->aEntry[iEntry].nSize; - }else if( hctPageheight(aTarget)==0 ){ - iRet = ((HctDbIndexLeaf*)aTarget)->aEntry[iEntry].iOff; - *pFlags = ((HctDbIndexLeaf*)aTarget)->aEntry[iEntry].flags; - *pnSize = ((HctDbIndexLeaf*)aTarget)->aEntry[iEntry].nSize; - }else{ - iRet = ((HctDbIndexNode*)aTarget)->aEntry[iEntry].iOff; - *pFlags = ((HctDbIndexNode*)aTarget)->aEntry[iEntry].flags; - *pnSize = ((HctDbIndexNode*)aTarget)->aEntry[iEntry].nSize; - } - return iRet; -} - -static int hctDbRemoveOverflow( - HctDatabase *pDb, - HctDbWriter *p, - u8 *aPage, - int iCell -){ - int rc = SQLITE_OK; - - int nSize = 0; - u8 flags = 0; - int iOff = hctDbFindEntry(aPage, iCell, &flags, &nSize); - if( flags & HCTDB_HAS_OVFL ){ - u32 ovfl = 0; - int nOvfl = 0; - const int nBytePerOvfl = pDb->pgsz - sizeof(HctDbPageHdr); - int nLocal = hctDbLocalsize(aPage, pDb->pgsz, nSize); - - if( flags & HCTDB_HAS_TID ) iOff += 8; - if( flags & HCTDB_HAS_RANGETID ) iOff += 8; - if( flags & HCTDB_HAS_RANGEOLD ) iOff += 4; - - ovfl = hctGetU32(&aPage[iOff]); - nOvfl = ((nSize - nLocal) + nBytePerOvfl - 1) / nBytePerOvfl; - - rc = hctDbOverflowArrayAppend(&p->delOvfl, ovfl, nOvfl); - } - - return rc; -} - -static void hctDbRemoveTids( - HctDbIndexNodeEntry *p, - u8 *aPg, - u64 iSafeTid -){ - if( (p->flags & HCTDB_HAS_TID)==HCTDB_HAS_TID ){ - u64 iTid; - memcpy(&iTid, &aPg[p->iOff], sizeof(u64)); - if( (iTid & HCT_TID_MASK)<=iSafeTid ){ - p->flags &= ~HCTDB_HAS_TID; - p->iOff += sizeof(u64); - } - } - if( (p->flags & (HCTDB_HAS_TID|HCTDB_HAS_RANGETID))==HCTDB_HAS_RANGETID ){ - u64 iTid; - assert( p->flags & HCTDB_HAS_RANGEOLD ); - memcpy(&iTid, &aPg[p->iOff], sizeof(u64)); - if( (iTid & HCT_TID_MASK)<=iSafeTid ){ - p->flags &= ~(HCTDB_HAS_RANGETID|HCTDB_HAS_RANGEOLD); - p->iOff += (sizeof(u64) + sizeof(u32)); - } - } -} - -/* -** Populate the aSz[] array with the sizes and locations of each cell -** -** (bClobber && nNewCell==0) -> full-delete -** (bClobber) -> clobber -** (bClobber==0) -> insert of new key -*/ -static void hctDbBalanceGetCellSz( - HctDatabase *pDb, - HctDbWriter *pWriter, - int iInsert, - int bClobber, - int nNewCell, /* Bytes stored on page for new cell */ - u8 *aPg, - HctDbCellSz *aSz, - int *pnSz /* OUT: number of entries in aSz[] */ -){ - HctDbPageHdr *pPg = (HctDbPageHdr*)aPg; - u64 iSafeTid = sqlite3HctFileSafeTID(pDb->pFile); - int szEntry; - int i0 = hctDbEntryArrayDim(aPg, &szEntry); - int iCell = 0; /* Current cell of aPgCopy[ii] */ - int iSz = 0; /* Current populated size of aSz[] */ - int iIns = iInsert; - - for(iSz=0; iCellnEntry || iCell==iIns; iSz++){ - HctDbCellSz *pSz = &aSz[iSz]; - - assert( pPg->nEntrypgsz ); - if( iCell==iIns ){ - assert( nNewCell>0 || bClobber ); - if( nNewCell ){ - pSz->nByte = szEntry + nNewCell; - pSz->aEntry = 0; - pSz->aCell = 0; - }else{ - iSz--; - } - if( bClobber ){ - iCell++; - } - iIns = -1; - }else{ - HctDbIndexNodeEntry *pE = (HctDbIndexNodeEntry*)&aPg[i0+iCell*szEntry]; - hctDbRemoveTids(pE, aPg, iSafeTid); - - pSz->nByte = szEntry + hctDbPageRecordSize(pPg, pDb->pgsz, iCell); - pSz->aEntry = (u8*)pE; - pSz->aCell = &aPg[pE->iOff]; - assert( pSz->nByte>0 ); - iCell++; - } - } - if( pnSz ) *pnSz = iSz; -} - -typedef struct HctDbInsertOp HctDbInsertOp; -struct HctDbInsertOp { - u8 entryFlags; /* Flags for page entry added by this call */ - u8 *aEntry; /* Buffer containing formatted entry */ - int nEntry; /* Size of aEntry[] */ - int nEntrySize; /* Value for page header nSize field */ - - int iPg; /* Index in HctDbWriter.writepg.aPg */ - int iInsert; /* Index in page to write to */ - - i64 iIntkey; /* Key to insert (if intkey page) */ - - int eBalance; /* True if balance routine must be called */ - int bFullDel; /* True to skip insert */ - - u32 iOldPg; - const u8 *aOldPg; -}; - -/* -** Values for HctDbInsertOp.eBalance -*/ -#define BALANCE_NONE 0 -#define BALANCE_OPTIONAL 1 -#define BALANCE_REQUIRED 2 - - -static int hctDbBalanceAppend( - HctDatabase *pDb, - HctDbWriter *p, - HctDbInsertOp *pOp -){ - int rc = hctDbExtendWriteArray(pDb, p, p->writepg.nPg, 1); - if( rc==SQLITE_OK ){ - HctDbLeaf *pLeaf = (HctDbLeaf*)p->writepg.aPg[p->writepg.nPg-1].aNew; - pLeaf->hdr.nFreeBytes = pDb->pgsz - sizeof(HctDbLeaf); - pLeaf->hdr.nFreeGap = pLeaf->hdr.nFreeBytes; - assert( p->iHeight==0 ); - assert_all_pages_ok(pDb, p); - pOp->iPg = p->writepg.nPg-1; - pOp->iInsert = 0; - } - return rc; -} - -static HctBalance *hctDbBalanceSpace(int *pRc, HctDatabase *pDb){ - if( pDb->pBalance==0 ){ - HctBalance *p = 0; - int nPg = ArraySize(p->aPg); - int nSzAlloc = (nPg * 2 * MAX_CELLS_PER_PAGE(pDb->pgsz)) + 1; - - pDb->pBalance = p = (HctBalance*)sqlite3HctMalloc(pRc, - sizeof(HctBalance) + - nPg * pDb->pgsz + - sizeof(HctDbCellSz) * nSzAlloc - ); - if( p ){ - u8 *aCsr = (u8*)&p[1]; - int ii; - for(ii=0; iiaPg[ii] = aCsr; - aCsr += pDb->pgsz; - } - p->aSz = (HctDbCellSz*)aCsr; - p->nSzAlloc = nSzAlloc; - } - } - return pDb->pBalance; -} - -/* -** Rebalance routine for pages with variably-sized records - intkey leaves, -** index leaves and index nodes. -*/ -static int hctDbBalance( - HctDatabase *pDb, - HctDbWriter *p, - HctDbInsertOp *pOp, - int bClobber -){ - int rc = SQLITE_OK; /* Return code */ - int iPg = pOp->iPg; - int iIns = pOp->iInsert; - - int iLeftPg; /* Index of leftmost page used in balance */ - int nIn = 1; /* Number of input peers for balance */ - int ii; /* Iterator used for various things */ - int nOut = 0; /* Number of output peers */ - int szEntry = 0; - int iEntry0 = 0; - HctDbCellSz *aSz = 0; - int nSz = 0; - u8 **aPgCopy = 0; - - int nRem; - - int aPgRem[5]; - int aPgFirst[6]; - - /* Grab the temporary space used by balance operations. */ - HctBalance *pBal = 0; - pBal = hctDbBalanceSpace(&rc, pDb); - if( pBal==0 ) return rc; - - /* Populate the aSz[] and aPgCopy[] arrays as if this were a single-page - ** rebalance only. */ - aSz = &pBal->aSz[MAX_CELLS_PER_PAGE(pDb->pgsz) * 2]; - aPgCopy = pBal->aPg; - hctMemcpy(aPgCopy[0], p->writepg.aPg[iPg].aNew, pDb->pgsz); - hctDbBalanceGetCellSz(pDb, p, iIns, bClobber,pOp->nEntry,aPgCopy[0],aSz,&nSz); - - if( pOp->eBalance==BALANCE_OPTIONAL ){ - int nTotal = 0; - for(ii=0; iipgsz - sizeof(HctDbIntkeyLeaf)) ){ - /* This is a single page balance */ - nIn = 1; - nOut = 1; - iLeftPg = iPg; - } - } - - if( nOut==0 ){ - HctDbPageHdr *pHdr = (HctDbPageHdr*)p->writepg.aPg[iPg].aNew; - if( p->iHeight==0 - && bClobber==0 && pOp->nEntry>0 - && pHdr->iPeerPg==0 && pHdr->nEntry==iIns - ){ - p->bAppend = 1; - rc = hctDbBalanceAppend(pDb, p, pOp); - return rc; - } - - /* If the HctDbWriter.writepg.aPg[] array still contains a single page, - ** load some peer pages into it. */ - assert( p->discardpg.nPg>=0 ); - if( IS_HCT_MIGRATE(pDb)==0 ){ - rc = hctDbLoadPeers(pDb, p, &iPg); - if( rc!=SQLITE_OK ){ - return rc; - } - } - assert_all_pages_ok(pDb, p); - - /* Determine the subset of HctDbWriter.writepg.aPg[] pages that will be - ** rebalanced. Variable nIn is set to the number of input pages, and - ** iLeftPg to the index of the leftmost of them. */ - iLeftPg = iPg; - if( iPg==0 ){ - nIn = MIN(p->writepg.nPg, 3); - }else{ - if( iPg==p->writepg.nPg-1 ){ - nIn = MIN(p->writepg.nPg, 3); - iLeftPg -= (nIn-1); - }else{ - nIn = 3; - iLeftPg--; - } - SWAP(u8*, aPgCopy[0], aPgCopy[iPg-iLeftPg]); - } - - /* aPgCopy[iPg-iLeftPg] already contains a copy of page iPg at this - ** point. This loop takes copies of the other pages involved in the - ** balance operation. */ - for(ii=0; iiwritepg.aPg[iLeftPg+ii].aNew, pDb->pgsz); - } - - for(ii=(iPg-iLeftPg)-1; ii>=0; ii--){ - int nCell = hctPagenentry(aPgCopy[ii]); - aSz -= nCell; - nSz += nCell; - hctDbBalanceGetCellSz(pDb, p, -1, 0, 0, aPgCopy[ii], aSz, 0); - } - for(ii=(iPg-iLeftPg)+1; iiwritecsr.pKeyInfo==0 ){ - pDb->stats.nBalanceIntkey++; - }else{ - pDb->stats.nBalanceIndex++; - } - if( nIn==1 ){ - pDb->stats.nBalanceSingle++; - } - - /* Figure out how many output pages will be required. This loop calculates - ** a mapping heavily biased to the left. */ - aPgFirst[0] = 0; - if( nOut==0 ){ - assert( sizeof(HctDbIntkeyLeaf)==sizeof(HctDbIndexLeaf) ); - nRem = pDb->pgsz - sizeof(HctDbIntkeyLeaf); - nOut = 1; - for(ii=0; iinRem ){ - aPgRem[nOut-1] = nRem; - aPgFirst[nOut] = ii; - nOut++; - nRem = pDb->pgsz - sizeof(HctDbIntkeyLeaf); - assert( nOut<=ArraySize(aPgRem) ); - } - nRem -= aSz[ii].nByte; - } - aPgRem[nOut-1] = nRem; - } - aPgFirst[nOut] = nSz; - - /* Adjust the packing calculated by the previous loop. */ - for(ii=nOut-1; ii>0; ii--){ - /* Try to shift cells from output page (ii-1) to output page (ii). Shift - ** cells for as long as (a) there is more free space on page (ii) than on - ** page (ii-1), and (b) there is enough free space on page (ii) to fit - ** the last cell from page (ii-1). */ - while( aPgRem[ii]>aPgRem[ii-1] ){ /* condition (a) */ - HctDbCellSz *pLast = &aSz[aPgFirst[ii]-1]; - if( pLast->nByte>aPgRem[ii] ) break; /* condition (b) */ - aPgRem[ii] -= pLast->nByte; - aPgRem[ii-1] += pLast->nByte; - aPgFirst[ii] = (pLast - aSz); - } - } - - /* Allocate any required new pages and link them into the list. */ - rc = hctDbExtendWriteArray(pDb, p, iLeftPg+1, nOut-nIn); - - /* Populate the output pages */ - iEntry0 = hctDbEntryArrayDim(aPgCopy[0], &szEntry); - for(ii=0; iiwritepg.aPg[iIdx].aNew; - HctDbIndexLeaf *pLeaf = (HctDbIndexLeaf*)aTarget; - int iOff = pDb->pgsz; /* Start of data area in aTarget[] */ - int iLast = (ii==(nOut-1) ? nSz : aPgFirst[ii+1]); - int nNewEntry = 0; /* Number of entries on this output page */ - int i2; - - for(i2=0; i2<(iLast - aPgFirst[ii]); i2++){ - HctDbCellSz *pSz = &aSz[aPgFirst[ii] + i2]; - if( pSz->aEntry ){ - u8 *aETo = &aTarget[iEntry0 + nNewEntry*szEntry]; - int nCopy = pSz->nByte - szEntry; - hctMemcpy(aETo, pSz->aEntry, szEntry); - iOff -= nCopy; - ((HctDbIndexEntry*)aETo)->iOff = iOff; - hctMemcpy(&aTarget[iOff], pSz->aCell, nCopy); - nNewEntry++; - }else{ - pOp->iPg = iIdx; - pOp->iInsert = i2; - } - } - - pLeaf->pg.nEntry = nNewEntry; - pLeaf->hdr.nFreeBytes = iOff - (iEntry0 + nNewEntry*szEntry); - pLeaf->hdr.nFreeGap = iOff - (iEntry0 + nNewEntry*szEntry); - } - - return rc; -} - - -static int hctDbBalanceIntkeyNode( - HctDatabase *pDb, - HctDbWriter *p, - int iPg, - int iInsert, /* Index in iPg for new key, if any */ - i64 iKey, /* Integer key value */ - u32 iChildPg /* The child pgno */ -){ - int nMax = hctDbMaxCellsPerIntkeyNode(pDb->pgsz); - int rc = SQLITE_OK; - int nIn; /* Number of input pages */ - int nOut; /* Number of output pages */ - int iLeftPg; /* Index of left-most page in balance */ - int ii; /* Iterator variable */ - int nTotal = 0; /* Total number of keys for balance */ - u8 *aPgCopy[3]; - u8 *pFree = 0; - - assert( p->writepg.aPg[p->writepg.nPg-1].aNew ); - if( IS_HCT_MIGRATE(pDb)==0 ){ - rc = hctDbLoadPeers(pDb, p, &iPg); - if( rc!=SQLITE_OK ){ - return rc; - } - } - - iLeftPg = iPg; - if( iPg==0 ){ - nIn = MIN(p->writepg.nPg, 3); - }else if( iPg==p->writepg.nPg-1 ){ - nIn = MIN(p->writepg.nPg, 3); - iLeftPg -= (nIn-1); - }else{ - nIn = MIN(p->writepg.nPg, 3); - iLeftPg--; - assert( iLeftPg+nIn<=p->writepg.nPg ); - } - - /* Take a copy of each input page. Make the buffer used to store each - ** copy larger than required by the size of one entry. Then, there is - ** a new entry to add in stack variables (iKey/iChildPg), add it to the - ** copy of its page. This is to make the loop that populates the output - ** pages below easier to write. A real candidate for optimization, this. */ - pFree = (u8*)sqlite3Malloc(nIn*(pDb->pgsz+sizeof(HctDbIntkeyNodeEntry))); - if( pFree==0 ) return SQLITE_NOMEM; - for(ii=0; iipgsz + sizeof(HctDbIntkeyNodeEntry)) * ii]; - hctMemcpy(aPgCopy[ii], p->writepg.aPg[iLeftPg+ii].aNew, pDb->pgsz); - } - if( iInsert>=0 ){ - HctDbIntkeyNode *pNode = (HctDbIntkeyNode*)aPgCopy[iPg-iLeftPg]; - if( iInsertpg.nEntry ){ - int nByte = sizeof(HctDbIntkeyNodeEntry) * (pNode->pg.nEntry-iInsert); - memmove(&pNode->aEntry[iInsert+1], &pNode->aEntry[iInsert], nByte); - } - pNode->pg.nEntry++; - pNode->aEntry[iInsert].iKey = iKey; - pNode->aEntry[iInsert].iChildPg = iChildPg; - } - - /* Figure out how many entries there are, in total */ - for(ii=0; iipg.nEntry; - } - - /* Figure out how many output pages are required */ - nOut = (nTotal + (nMax-1)) / nMax; - rc = hctDbExtendWriteArray(pDb, p, iLeftPg+1, nOut-nIn); - assert( rc==SQLITE_OK ); /* todo */ - - /* Populate the output pages */ - if( rc==SQLITE_OK ){ - int nRem = nTotal; - int iIn = 0; - int iInEntry = 0; - - for(ii=0; iiwritepg.aPg[ii+iLeftPg].aNew; - for(pEntry=pNode->aEntry; pEntry<&pNode->aEntry[nCell]; pEntry++){ - HctDbIntkeyNode *pIn = (HctDbIntkeyNode*)aPgCopy[iIn]; - *pEntry = pIn->aEntry[iInEntry++]; - if( iInEntry>=pIn->pg.nEntry ){ - iInEntry = 0; - iIn++; - } - } - pNode->pg.nEntry = nCell; - nRem -= nCell; - } - } - - sqlite3_free(pFree); - return rc; -} - -/* -** This function handles the second part of an insert or delete operation -** on an internal intkey node key. The implementation is separate from the -** usual insert/delete routine because internal intkey nodes use fixed size -** records. The other three types of pages found in lists - intkey leaves, -** index leaves and index nodes - all use variable sized entries. -*/ -static int hctDbInsertIntkeyNode( - HctDatabase *pDb, - HctDbWriter *p, - int iPg, - int iInsert, - i64 iKey, /* Integer key value */ - u32 iChildPg, /* The child pgno */ - int bClobber, /* True to clobber entry iInsert */ - int bDel /* True for a delete operation */ -){ - int nMax = hctDbMaxCellsPerIntkeyNode(pDb->pgsz); - int nMin = hctDbMinCellsPerIntkeyNode(pDb->pgsz); - HctDbIntkeyNode *pNode; - int rc = SQLITE_OK; - - /* If bDel is set, then bClobber must also be set. */ - assert( bDel==0 || bClobber ); - - pNode = (HctDbIntkeyNode*)p->writepg.aPg[iPg].aNew; - if( (pNode->pg.nEntry>=nMax && bClobber==0 && bDel==0 ) ){ - /* Need to do a balance operation to make room for the new entry */ - rc = hctDbBalanceIntkeyNode(pDb, p, iPg, iInsert, iKey, iChildPg); - }else if( bDel ){ - assert( iInsertpg.nEntry ); - if( iInsert==0 ){ - rc = hctDbLoadPeers(pDb, p, &iPg); - pNode = (HctDbIntkeyNode*)p->writepg.aPg[iPg].aNew; - } - if( rc==SQLITE_OK ){ - if( iInsert<(pNode->pg.nEntry-1) ){ - int nByte = sizeof(HctDbIntkeyNodeEntry) * (pNode->pg.nEntry-1-iInsert); - memmove(&pNode->aEntry[iInsert], &pNode->aEntry[iInsert+1], nByte); - } - pNode->pg.nEntry--; - if( iInsert==0 || pNode->pg.nEntrypg.nEntry ){ - int nByte = sizeof(HctDbIntkeyNodeEntry) * (pNode->pg.nEntry-iInsert); - memmove(&pNode->aEntry[iInsert+1], &pNode->aEntry[iInsert], nByte); - } - pNode->pg.nEntry++; - } - pNode->aEntry[iInsert].iKey = iKey; - pNode->aEntry[iInsert].iChildPg = iChildPg; - pNode->aEntry[iInsert].unused = 0; - } - - return rc; -} - - -/* -** The buffer passed as the first -*/ -static int hctDbFreegap(void *aPg){ - assert( - (hctPagetype(aPg)==HCT_PAGETYPE_INTKEY && hctPageheight(aPg)==0) - || (hctPagetype(aPg)==HCT_PAGETYPE_INDEX) - ); - return ((HctDbIndexNode*)aPg)->hdr.nFreeGap; -} - -static int hctDbFreebytes(void *aPg){ - assert( - (hctPagetype(aPg)==HCT_PAGETYPE_INTKEY && hctPageheight(aPg)==0) - || (hctPagetype(aPg)==HCT_PAGETYPE_INDEX) - ); - return ((HctDbIndexNode*)aPg)->hdr.nFreeBytes; -} - -static int hctDbInsertOverflow( - HctDatabase *pDb, - HctDbWriter *pWriter, - u8 *aTarget, - int nData, - const u8 *aData, - int *pnWrite, - u32 *ppgOvfl -){ - int rc = SQLITE_OK; - int nLocal = hctDbLocalsize(aTarget, pDb->pgsz, nData); - - if( nLocal==nData ){ - *pnWrite = nData; - *ppgOvfl = 0; - }else{ - const int sz = (pDb->pgsz - sizeof(HctDbPageHdr)); - int nRem; - int nCopy; - u32 iPg = 0; - int nOvfl = 0; - - nRem = nData; - nCopy = (nRem-nLocal) % sz; - if( nCopy==0 ) nCopy = sz; - while( rc==SQLITE_OK && nRem>nLocal ){ - HctFilePage pg; - nOvfl++; - rc = sqlite3HctFilePageNewPhysical(pDb->pFile, &pg); - if( rc==SQLITE_OK ){ - HctDbPageHdr *pPg = (HctDbPageHdr*)pg.aNew; - memset(pPg, 0, sizeof(HctDbPageHdr)); - pPg->iPeerPg = iPg; - pPg->nEntry = nCopy; - hctMemcpy(&pPg[1], &aData[nRem-nCopy], nCopy); - iPg = pg.iNewPg; - sqlite3HctFilePageRelease(&pg); - } - nRem -= nCopy; - nCopy = sz; - } - - *ppgOvfl = iPg; - *pnWrite = nLocal; - - if( rc==SQLITE_OK ){ - rc = hctDbOverflowArrayAppend(&pWriter->insOvfl, iPg, nOvfl); - } - } - - return rc; -} - -static void hctDbRemoveCell( - HctDatabase *pDb, - HctDbWriter *pWriter, - u8 *aTarget, - int iRem -){ - HctDbIndexNode *p = (HctDbIndexNode*)aTarget; - const int eType = hctPagetype(aTarget); - const int nHeight = hctPageheight(aTarget); - const int pgsz = pDb->pgsz; - - int szEntry = 0; /* Size of each entry in aEntry[] array */ - int iArrayOff = 0; /* Offset of aEntry array in aTarget */ - int iData = 0; /* Offset of cell in aTarget[] */ - int nData = 0; /* Local size of cell to remove */ - - /* Populate stack variables szEntry, iArrayOff, iData and nData. */ - assert( eType==HCT_PAGETYPE_INTKEY || eType==HCT_PAGETYPE_INDEX ); - assert( eType==HCT_PAGETYPE_INDEX || nHeight==0 ); - if( eType==HCT_PAGETYPE_INTKEY ){ - HctDbIntkeyEntry *pEntry = &((HctDbIntkeyLeaf*)aTarget)->aEntry[iRem]; - iData = pEntry->iOff; - nData = hctDbIntkeyEntrySize(pEntry, pgsz); - szEntry = sizeof(*pEntry); - iArrayOff = sizeof(HctDbIntkeyLeaf); - }else if( nHeight==0 ){ - HctDbIndexEntry *pEntry = &((HctDbIndexLeaf*)aTarget)->aEntry[iRem]; - iData = pEntry->iOff; - nData = hctDbIndexEntrySize(pEntry, pgsz); - szEntry = sizeof(*pEntry); - iArrayOff = sizeof(HctDbIndexLeaf); - }else{ - HctDbIndexNodeEntry *pEntry = &((HctDbIndexNode*)aTarget)->aEntry[iRem]; - iData = pEntry->iOff; - nData = hctDbIndexNodeEntrySize(pEntry, pgsz); - szEntry = sizeof(*pEntry); - iArrayOff = sizeof(HctDbIndexNode); - } - - /* Remove the aEntry[] array entry */ - if( iRempg.nEntry-1 ){ - u8 *aTo = &aTarget[iArrayOff + iRem*szEntry]; - memmove(aTo, &aTo[szEntry], (p->pg.nEntry-iRem-1) * szEntry); - } - p->pg.nEntry--; - p->hdr.nFreeBytes += szEntry; - p->hdr.nFreeGap += szEntry; - - /* Remove the cell from the data area */ - if( iData==(iArrayOff + szEntry*p->pg.nEntry + p->hdr.nFreeGap) ){ - int ii; - int iFirst = pDb->pgsz; - p->hdr.nFreeGap += nData; - for(ii=0; iipg.nEntry; ii++){ - int iOff = ((HctDbIndexEntry*)&aTarget[iArrayOff + szEntry*ii])->iOff; - if( iOff && iOffhdr.nFreeGap = iFirst - (iArrayOff + szEntry*p->pg.nEntry); - } - p->hdr.nFreeBytes += nData; - -} - - -/* -** This is called as part of a bulk insert of contiguous keys. At present -** this only occurs as part of a migrate, but in the future it could be -** auto-detected. -*/ -static int hctDbBalanceMigrate( - HctDatabase *pDb, - HctDbWriter *p, - HctDbInsertOp *pOp -){ - HctDbLeaf *pLeaf = (HctDbLeaf*)p->writepg.aPg[0].aNew; - int ii = 0; - - assert( p->writepg.nPg==1 ); - assert( p->bAppend==0 ); - assert( p->iHeight==0 ); - assert( pOp->iInsert<=pLeaf->pg.nEntry ); - assert( pOp->eBalance==BALANCE_REQUIRED || pOp->eBalance==BALANCE_OPTIONAL ); - - /* Set nMigrateKey to the number of keys to copy from p->writepg.aPg[0].aOld - ** before flushing the current array of pages to disk. */ - p->nMigrateKey = pLeaf->pg.nEntry - pOp->iInsert; - - /* Remove the last nMigrateKey cells from the page. */ - for(ii=0; iinMigrateKey; ii++){ - hctDbRemoveCell(pDb, 0, (u8*)pLeaf, pLeaf->pg.nEntry-1); - } - p->bAppend = 1; - - /* Use a regular balance to make space for the new key */ - pOp->eBalance = BALANCE_REQUIRED; - return hctDbBalance(pDb, p, pOp, 0); -} - -/* -** Buffer aTarget must contain the image of a page that uses variable -** length records - an intkey leaf, or an index leaf or node. This -** function does part of the job of inserting a new record into the -** page. -** -** Buffer aEntry[], size nEntry bytes, contains the sequence of bytes that -** will be stored in the data area of the page (i.e. any serialized -** tids, the old page number if any, any overflow page number and the -** portion of the database record that will be stored on the main -** page. Parameter iIns specifies the index within the page at which -** the new entry will be inserted. -*/ -static void hctDbInsertEntry( - HctDatabase *pDb, - u8 *aTarget, - int iIns, - const u8 *aEntry, - int nEntry -){ - HctDbIndexNode *p = (HctDbIndexNode*)aTarget; - int szEntry = 0; /* Size of each entry in aEntry[] array */ - int iEntry0 = 0; /* Offset of aEntry array in aTarget */ - int iOff = 0; /* Offset of new cell data in aTarget */ - u8 *aFrom = 0; - - iEntry0 = hctDbEntryArrayDim(aTarget, &szEntry); - - /* This might fail if the db is corrupt */ - assert( p->hdr.nFreeGap>=(nEntry + szEntry) ); - - /* Insert the new zeroed entry into the aEntry[] array */ - aFrom = &aTarget[iEntry0 + szEntry*iIns]; - if( iInspg.nEntry ){ - memmove(&aFrom[szEntry], aFrom, (p->pg.nEntry-iIns) * szEntry); - } - memset(aFrom, 0, szEntry); - p->hdr.nFreeBytes -= szEntry; - p->hdr.nFreeGap -= szEntry; - p->pg.nEntry++; - - /* Insert the cell into the data area */ - iOff = iEntry0 + p->pg.nEntry*szEntry + p->hdr.nFreeGap - nEntry; - hctMemcpy(&aTarget[iOff], aEntry, nEntry); - p->hdr.nFreeBytes -= nEntry; - p->hdr.nFreeGap -= nEntry; - - /* Set the aEntry[].iOff field */ - ((HctDbIndexEntry*)aFrom)->iOff = iOff; -} - - -static int hctDbMigrateReinsertKeys(HctDatabase *pDb, HctDbWriter *p){ - int rc = SQLITE_OK; - if( p->nMigrateKey>0 ){ - assert( p->iHeight==0 ); - - /* Append a page to the write-array */ - rc = hctDbExtendWriteArray(pDb, p, p->writepg.nPg, 1); - - - if( rc==SQLITE_OK ){ - int ii = 0; - HctDbInsertOp op; - HctDbLeaf *pOld = (HctDbLeaf*)p->writepg.aPg[0].aOld; - HctDbLeaf *pNew = (HctDbLeaf*)p->writepg.aPg[p->writepg.nPg-1].aNew; - - /* TODO: Might this not be a part of ExtendWriteArray() ? */ - pNew->hdr.nFreeBytes = pDb->pgsz - sizeof(HctDbLeaf); - pNew->hdr.nFreeGap = pNew->hdr.nFreeBytes; - - /* Loop through the last nMigrateKey on the old page, copying them - ** to the new page. */ - for(ii=0; iinMigrateKey; ii++){ - int iOld = (pOld->pg.nEntry - p->nMigrateKey) + ii; - HctDbIndexEntry *pOldE = 0; - HctDbIndexEntry *pNewE = 0; - int nEntry = 0; - - pOldE = hctDbEntryEntry(pOld, iOld); - nEntry = hctDbPageRecordSize(pOld, pDb->pgsz, iOld); - hctDbInsertEntry(pDb, (u8*)pNew, ii, &((u8*)pOld)[pOldE->iOff], nEntry); - - pNewE = hctDbEntryEntry(pNew, ii); - pNewE->nSize = pOldE->nSize; - pNewE->flags = pOldE->flags; - if( hctPagetype(pOld)==HCT_PAGETYPE_INTKEY ){ - ((HctDbIntkeyEntry*)pNewE)->iKey = ((HctDbIntkeyEntry*)pOldE)->iKey; - } - } - - memset(&op, 0, sizeof(op)); - op.iPg = p->writepg.nPg-1; - op.iInsert = -1; - op.eBalance = BALANCE_OPTIONAL; - rc = hctDbBalance(pDb, p, &op, 0); - } - } - - return rc; -} - -/* -** Parameter aTarget points to a buffer containing an intkey or index -** internal node. Return the child-page number for entry iInsert on -** that page. -*/ -u32 hctDbGetChildPage(u8 *aTarget, int iInsert){ - const int eType = hctPagetype(aTarget); - u32 iChildPg; - if( eType==HCT_PAGETYPE_INTKEY ){ - iChildPg = ((HctDbIntkeyNode*)aTarget)->aEntry[iInsert].iChildPg; - }else{ - assert( eType==HCT_PAGETYPE_INDEX ); - iChildPg = ((HctDbIndexNode*)aTarget)->aEntry[iInsert].iChildPg; - } - return iChildPg; -} - -static void hctDbClobberEntry( - HctDatabase *pDb, - u8 *aTarget, - HctDbInsertOp *pOp -){ - HctDbIndexEntry *pEntry; /* Entry being clobbered */ - int nOld = hctDbPageRecordSize(aTarget, pDb->pgsz, pOp->iInsert); - - pEntry = hctDbEntryEntry(aTarget, pOp->iInsert); - pEntry->nSize = pOp->nEntrySize; - pEntry->flags = pOp->entryFlags; - - memcpy(&aTarget[pEntry->iOff], pOp->aEntry, pOp->nEntry); - ((HctDbIndexNode*)aTarget)->hdr.nFreeBytes += (nOld - pOp->nEntry); - - pDb->stats.nUpdateInPlace++; -} - -static int hctDbFindOldPage( - HctDatabase *pDb, - HctDbWriter *p, - UnpackedRecord *pKey, - i64 iKey, - u32 *piOld, - const u8 **paOld -){ - HctFilePage *pPg = 0; - int rc = SQLITE_OK; - int iTest; - - for(iTest=p->discardpg.nPg-1; iTest>=0; iTest--){ - pPg = &p->discardpg.aPg[iTest]; - if( pKey ){ - int bGe = 0; - rc = hctDbCompareFPKey(pDb, pKey, pPg->aOld, &bGe); - if( bGe || rc!=SQLITE_OK ) break; - }else{ - i64 iFP = hctDbIntkeyFPKey(pPg->aOld); - if( iKey>=iFP ) break; - } - pPg = 0; - } - - if( pPg==0 ){ - pPg = &p->writepg.aPg[0]; - } - assert( pPg->iOldPg!=0 ); - *piOld = pPg->iOldPg; - *paOld = pPg->aOld; - - return rc; -} - -static u64 hctDbGetRangeTidByIdx(HctDatabase *pDb, u8 *aTarget, int iIdx){ - HctDbCell cell; - hctDbCellGetByIdx(pDb, aTarget, iIdx, &cell); - return cell.iRangeTid; -} - -static u32 hctDbMakeFollowPtr( - int *pRc, - HctDatabase *pDb, - u64 iFollowTid, - u32 iPg -){ - int rc = *pRc; - HctFilePage pg; - u32 iRet = 0; - - memset(&pg, 0, sizeof(pg)); - if( rc==SQLITE_OK ){ - rc = sqlite3HctFilePageNewPhysical(pDb->pFile, &pg); - iRet = pg.iNewPg; - } - if( rc==SQLITE_OK ){ - rc = sqlite3HctFileClearPhysInUse(pDb->pFile, iRet, 0); - } - if( rc==SQLITE_OK ){ - HctDbHistoryFan *pFan = (HctDbHistoryFan*)pg.aNew; - memset(pFan, 0, sizeof(*pFan)); - pFan->pg.hdrFlags = HCT_PAGETYPE_HISTORY; - pFan->pg.nEntry = 1; - pFan->iRangeTid0 = pDb->iTid; - pFan->iFollowTid0 = iFollowTid; - pFan->pgOld0 = iPg; - rc = sqlite3HctFilePageRelease(&pg); - }else{ - sqlite3HctFilePageUnwrite(&pg); - sqlite3HctFilePageRelease(&pg); - } - - *pRc = rc; - return iRet; -} - -static int hctDbDelete( - HctDatabase *pDb, - HctDbWriter *p, - UnpackedRecord *pRec, - HctDbInsertOp *pOp -){ - u64 iTidOr = (pDb->eMode==HCT_MODE_ROLLBACK ? HCT_TID_ROLLBACK_OVERRIDE : 0); - u64 iSafeTid = sqlite3HctFileSafeTID(pDb->pFile); - u64 iTidValue = pDb->iTid | iTidOr; - u64 iDelRangeTid = 0; - int rc = SQLITE_OK; - u8 *aNull = 0; - int prevFlags = 0; - int nLocalSz = 0; - u8 *aTarget = p->writepg.aPg[pOp->iPg].aNew; - int bLeftmost = (hctIsLeftmost(aTarget) && pOp->iInsert==0); - - HctDbCell prev; /* Previous cell on page */ - - assert( pOp->bFullDel==0 ); - - if( pOp->iInsert==0 && !bLeftmost ){ - /* If deleting the first key on the first page, set the eBalance flag (as - ** deleting a FP key means the parent list must be adjusted) and load peer - ** pages into memory. */ - pOp->eBalance = BALANCE_REQUIRED; - if( pOp->iPg==0 ){ - rc = hctDbLoadPeers(pDb, p, &pOp->iPg); - if( rc!=SQLITE_OK ) return rc; - aTarget = p->writepg.aPg[pOp->iPg].aNew; - } - } - assert_page_is_ok(aTarget, pDb->pgsz); - - /* Deal with the case where the cell we are about to remove (cell iInsert) - ** has a range-tid greater than that of the current transaction (iTid) */ - iDelRangeTid = hctDbGetRangeTidByIdx(pDb, aTarget, pOp->iInsert); - if( (iDelRangeTid & HCT_TID_MASK)>pDb->iTid ){ - iTidValue = iDelRangeTid; - pOp->iOldPg = hctDbMakeFollowPtr(&rc, pDb, iDelRangeTid, pOp->iOldPg); - sqlite3HctFilePageRelease(&p->fanpg); - } - - if( bLeftmost ){ - int nNull = 0; - - memset(&prev, 0, sizeof(prev)); - prev.iTid = LARGEST_TID; - prevFlags |= HCTDB_HAS_TID; - - assert( pOp->iPg==0 ); - if( hctPagetype(aTarget)==HCT_PAGETYPE_INDEX ){ - int nField = p->writecsr.pKeyInfo->nAllField; - int nByte = nField + 9; - aNull = sqlite3HctMalloc(&rc, nByte); - if( rc!=SQLITE_OK ) return rc; - if( nField<=126 ){ - aNull[0] = nField+1; - nNull = nField+1; - } - else if( nField<=16382 ){ - sqlite3PutVarint(aNull, nField+2); - nNull = nField+2; - }else{ - assert( sqlite3VarintLen(nField+3)==3 ); - sqlite3PutVarint(aNull, nField+3); - nNull = nField+3; - } - prev.aPayload = aNull; - } - prev.iTid = LARGEST_TID; - prevFlags |= HCTDB_HAS_TID; - pOp->nEntrySize = nNull; - nLocalSz = hctDbLocalsize(aTarget, pDb->pgsz, pOp->nEntrySize); - - }else{ - HctDbIndexEntry *pPrev = 0; - - /* Remove the cell being deleted from the target page. This must be done - ** after hctDbLoadPeers() is called (if it is called). */ - assert_page_is_ok(aTarget, pDb->pgsz); - hctDbRemoveCell(pDb, p, aTarget, pOp->iInsert); - assert_page_is_ok(aTarget, pDb->pgsz); - if( pOp->iInsert==0 ){ - assert( pOp->iPg>0 ); - pOp->iPg--; - aTarget = p->writepg.aPg[pOp->iPg].aNew; - assert( hctPagenentry(aTarget)>0 ); - pOp->iInsert = ((HctDbPageHdr*)aTarget)->nEntry - 1; - }else{ - pOp->iInsert--; - } - - /* Load the cell immediately before the one just removed */ - pPrev = hctDbEntryEntry(aTarget, pOp->iInsert); - pOp->nEntrySize = pPrev->nSize; - prevFlags = pPrev->flags; - - hctDbCellGet(pDb, &aTarget[pPrev->iOff], pPrev->flags, &prev); - nLocalSz = hctDbLocalsize(aTarget, pDb->pgsz, pOp->nEntrySize); - } - - /* Update the range-tid and range-oldpg fields. There are several - ** possibilities: - ** - ** 1) The left-hand-cell already has the desired range-pointer values - ** (both TID and old-page-number). - ** - ** 2) The left-hand-cell does not have a range-pointer. Or else - ** has a range-pointer so old it can be overwritten with impunity. - ** - ** 3) The left-hand-cell has a range-pointer to a fan-page that was - ** created by the current HctDbWriter batch, and that fan-page - ** is not already full. - ** - ** 4) None of the above are true. A new fan-page must be created. - */ - if( prev.iRangeTid==iTidValue && prev.iRangeOld==pOp->iOldPg ){ - /* Possibility (1) */ - pOp->bFullDel = 1; - pOp->iInsert = -1; - } - else if( prev.iRangeTid==0 || (prev.iRangeTid & HCT_TID_MASK)<=iSafeTid ){ - /* Possibility (2) */ - prev.iRangeTid = iTidValue; - prev.iRangeOld = pOp->iOldPg; - }else if( prev.iRangeOld==p->fanpg.iNewPg ){ - /* Possibility (3) */ - HctDbHistoryFan *pFan = (HctDbHistoryFan*)p->fanpg.aNew; - assert( pFan->iRangeTid1==iTidValue ); - if( pFan->aPgOld1[pFan->pg.nEntry-2]!=pOp->iOldPg ){ - const int nMax = ((pDb->pgsz - sizeof(HctDbHistoryFan))/sizeof(u32)); - assert( pFan->pg.nEntryaPgOld1[pFan->pg.nEntry-1] = pOp->iOldPg; - pFan->pg.nEntry++; - if( pFan->pg.nEntry==nMax ){ - rc = sqlite3HctFilePageRelease(&p->fanpg); - } - } - pOp->bFullDel = 1; - pOp->iInsert = -1; - }else{ - /* Possibility (4) */ - rc = sqlite3HctFilePageRelease(&p->fanpg); - if( rc==SQLITE_OK ){ - rc = sqlite3HctFilePageNewPhysical(pDb->pFile, &p->fanpg); - } - if( rc==SQLITE_OK ){ - rc = sqlite3HctFileClearPhysInUse(pDb->pFile, p->fanpg.iNewPg, 0); - } - if( rc==SQLITE_OK ){ - int bDummy = 0; - HctDbHistoryFan *pFan = (HctDbHistoryFan*)p->fanpg.aNew; - memset(pFan, 0, pDb->pgsz); - pFan->pg.hdrFlags = HCT_PAGETYPE_HISTORY; - pFan->pg.nEntry = 2; - pFan->iRangeTid0 = prev.iRangeTid; - pFan->iFollowTid0 = prev.iRangeTid; - pFan->pgOld0 = prev.iRangeOld; - rc = hctDbLeafSearch( - pDb, pOp->aOldPg, pOp->iIntkey, pRec, &pFan->iSplit0, &bDummy - ); - assert( bDummy ); - pFan->iRangeTid1 = iTidValue; - pFan->aPgOld1[0] = pOp->iOldPg; - prev.iRangeOld = p->fanpg.iNewPg; - if( (prev.iRangeTid & HCT_TID_MASK)<(iTidValue & HCT_TID_MASK) ){ - prev.iRangeTid = iTidValue; - } - } - } - - if( rc==SQLITE_OK && pOp->bFullDel==0 ){ - prev.iRangeTid |= iTidOr; - pOp->aEntry = pDb->aTmp; - pOp->nEntry = hctDbCellPut(pOp->aEntry, &prev, nLocalSz); - pOp->entryFlags = prevFlags | HCTDB_HAS_RANGETID | HCTDB_HAS_RANGEOLD; - if( hctPagetype(aTarget)==HCT_PAGETYPE_INTKEY ){ - if( bLeftmost ){ - pOp->iIntkey = SMALLEST_INT64; - }else{ - pOp->iIntkey = ((HctDbIntkeyLeaf*)aTarget)->aEntry[pOp->iInsert].iKey; - } - } - } - - assert_page_is_ok(aTarget, pDb->pgsz); - if( aNull ) sqlite3_free(aNull); - return rc; -} - -static int hctDbInsertFindPosition( - HctDatabase *pDb, - HctDbWriter *p, - u32 iRoot, - UnpackedRecord *pRec, - i64 iKey, - HctDbInsertOp *pOp, - int *pbClobber -){ - const RecordCompare xCompare = pRec ? sqlite3VdbeFindCompare(pRec) : 0; - int rc = SQLITE_OK; - - if( p->writepg.nPg==0 ){ - if( p->writecsr.iRoot!=iRoot ){ - hctDbCsrInit(pDb, iRoot, 0, &p->writecsr); - }else{ - hctDbCsrReset(&p->writecsr); - } - if( pRec ){ - p->writecsr.pKeyInfo = sqlite3KeyInfoRef(pRec->pKeyInfo); - } - rc = hctDbCsrSeek( - &p->writecsr, &p->fp, p->iHeight, xCompare, pRec, iKey, pbClobber - ); - if( rc ) return rc; - pOp->iInsert = p->writecsr.iCell; - if( *pbClobber==0 ) pOp->iInsert++; - - p->writepg.aPg[0] = p->writecsr.pg; - memset(&p->writecsr.pg, 0, sizeof(HctFilePage)); - - assert( p->bDoCleanup ); - p->writepg.nPg = 1; - rc = sqlite3HctFilePageWrite(&p->writepg.aPg[0]); - if( rc ) return rc; - hctMemcpy(p->writepg.aPg[0].aNew, p->writepg.aPg[0].aOld, pDb->pgsz); - if( p->fp.iKey==0 ){ - rc = hctDbSetWriteFpKey(pDb, p); - } - if( rc ) return rc; - }else if( pRec ){ - HctBuffer buf = {0,0,0}; - for(pOp->iPg=p->writepg.nPg-1; pOp->iPg>0; pOp->iPg--){ - const u8 *aK; - int nK; - rc = hctDbLoadRecord( - pDb, &buf, p->writepg.aPg[pOp->iPg].aNew, 0, &nK, &aK - ); - if( rc!=SQLITE_OK ){ - sqlite3HctBufferFree(&buf); - return rc; - } - if( xCompare(nK, aK, pRec)<=0 ) break; - } - sqlite3HctBufferFree(&buf); - rc = hctDbIndexSearch(pDb, - p->writepg.aPg[pOp->iPg].aNew, xCompare, pRec, &pOp->iInsert, pbClobber - ); - if( rc!=SQLITE_OK ) return rc; - }else{ - for(pOp->iPg=p->writepg.nPg-1; pOp->iPg>0; pOp->iPg--){ - if( hctDbIntkeyFPKey(p->writepg.aPg[pOp->iPg].aNew)<=iKey ) break; - } - if( p->iHeight==0 ){ - pOp->iInsert = hctDbIntkeyLeafSearch( - p->writepg.aPg[pOp->iPg].aNew, iKey, pbClobber - ); - }else{ - pOp->iInsert = hctDbIntkeyNodeSearch( - p->writepg.aPg[pOp->iPg].aNew, iKey, pbClobber - ); - } - } - - return rc; -} - -static int hctDbWriteWriteConflict( - HctDatabase *pDb, - HctDbWriter *p, - HctDbInsertOp *pOp, - UnpackedRecord *pKey, - i64 iKey, - int bClobber -){ - int rc = SQLITE_OK; - const u8 *aTarget = p->writepg.aPg[pOp->iPg].aNew; - - assert( p->iHeight==0 && pDb->eMode==HCT_MODE_NORMAL ); - - if( bClobber ){ - HctDbIndexEntry *pE; - if( pKey ){ - pE = &((HctDbIndexLeaf*)aTarget)->aEntry[pOp->iInsert]; - }else{ - pE = (HctDbIndexEntry*)&((HctDbIntkeyLeaf*)aTarget)->aEntry[pOp->iInsert]; - } - if( pE->flags & HCTDB_HAS_TID ){ - u64 iTid; - hctMemcpy(&iTid, &aTarget[pE->iOff], sizeof(u64)); - if( hctDbTidIsConflict(pDb, iTid) ){ - rc = HCT_SQLITE_BUSY; - } - } - }else if( pOp->iInsert>0 ){ - int iCell = 0; - int bMerge = 0; - HctRangePtr ptr; - - iCell = (pOp->iInsert - 1); - hctDbGetRange(aTarget, iCell, &ptr); - while( hctDbFollowRangeOld(pDb, &ptr, &bMerge) ){ - HctFilePage pg; - const u8 *aOld = 0; - - if( ptr.iOld==pDb->pa.fanpg.iNewPg ){ - aOld = pDb->pa.fanpg.aNew; - memset(&pg, 0, sizeof(pg)); - }else{ - rc = hctDbGetPhysical(pDb, ptr.iOld, &pg); - aOld = pg.aOld; - } - - /* assert( bMerge==0 || iRangeTid!=pDb->iTid ); */ - if( rc==SQLITE_OK ){ - int iCell = 0; - if( hctPagetype(aOld)==HCT_PAGETYPE_HISTORY ){ - iCell = hctDbFanSearch(&rc, pDb, aOld, pKey, iKey); - }else{ - int bExact = 0; - rc = hctDbLeafSearch(pDb, aOld, iKey, pKey, &iCell, &bExact); - if( rc==SQLITE_OK && bExact ){ - if( bMerge ){ - HctDbCell cell; - hctDbCellGetByIdx(pDb, aOld, iCell, &cell); - if( hctDbTidIsVisible(pDb, cell.iTid, 0) ) rc = HCT_SQLITE_BUSY; - } - sqlite3HctFilePageRelease(&pg); - break; - }else{ - iCell--; - } - if( rc ){ - sqlite3HctFilePageRelease(&pg); - break; - } - } - - hctDbGetRange(aOld, iCell, &ptr); - sqlite3HctFilePageRelease(&pg); - }else{ - break; - } - } - } - - return rc; -} - -static int hctDbInsert( - HctDatabase *pDb, - HctDbWriter *p, - u32 iRoot, - UnpackedRecord *pRec, /* The key value for index tables */ - i64 iKey, /* For intkey tables, the key value */ - u32 iChildPg, /* For internal node ops, the child pgno */ - int bDel, /* True for a delete operation */ - int nData, const u8 *aData /* Record/key to insert */ -){ - const RecordCompare xCompare = pRec ? sqlite3VdbeFindCompare(pRec) : 0; - int rc = SQLITE_OK; - int bClobber = 0; - u8 *aTarget; /* Page to write new entry to */ - HctDbInsertOp op = {0,0,0,0,0,0,0,0,0,0,0}; - int bUpdateInPlace = 0; - - p->nWriteKey++; - - assert( pDb->eMode==HCT_MODE_NORMAL || pDb->eMode==HCT_MODE_ROLLBACK ); - - /* Check if any existing dirty pages need to be flushed to disk before - ** this key can be inserted. If they do, flush them. */ - assert( p->writepg.nPg==0 || iRoot==p->writecsr.iRoot ); - assert( p->writepg.nPg>0 || p->bAppend==0 ); - if( p->writepg.nPg ){ - assert( p->bDoCleanup ); - if( p->writepg.nPg>HCTDB_MAX_DIRTY - || p->discardpg.nPg>=HCTDB_MAX_DIRTY - || hctDbTestWriteFpKey(p, xCompare, pRec, iKey) - ){ - rc = hctDbInsertFlushWrite(pDb, p); - if( rc ) return rc; - p->nWriteKey = 1; - } - } - - p->bDoCleanup = 1; - rc = hctDbWriterGrow(p); - if( rc ) return rc; - - /* This block sets stack variables: - ** - ** op.iPg: Index of page in HctDbWriter.writepg.aPg[] to write to. - ** op.iInsert: The index of the new, overwritten, or deleted entry - ** within the page. - ** bClobber: True if this write clobbers (or deletes, if bDel) an - ** existing entry. - ** aTarget: The aNew[] buffer of the page that will be written. - ** - ** It also checks if the current key is a write-write conflict. And - ** returns early if so. - */ - if( p->bAppend ){ - assert( bClobber==0 ); - assert( p->writepg.nPg>0 ); - op.iPg = p->writepg.nPg-1; - aTarget = p->writepg.aPg[op.iPg].aNew; - op.iInsert = hctPagenentry(aTarget); - }else{ - /* If the page array is empty, seek the write cursor to find the leaf - ** page on which to insert this new entry or delete key. - ** - ** Otherwise, figure out which page in the HctDbWriter.aWritePg[] array the - ** new entry belongs on. */ - rc = hctDbInsertFindPosition(pDb, p, iRoot, pRec, iKey, &op, &bClobber); - if( rc ) return rc; - aTarget = p->writepg.aPg[op.iPg].aNew; - assert( aTarget ); - - /* If this is a write to a leaf page, and not part of a rollback, - ** check for a write-write conflict here. */ - if( 0==p->iHeight - && pDb->eMode==HCT_MODE_NORMAL - && (rc=hctDbWriteWriteConflict(pDb, p, &op, pRec, iKey, bClobber)) - ){ - return rc; - } - } - - if( bClobber==0 && bDel ){ - return SQLITE_OK; - } - - /* At this point, once the page that will be modified has been loaded - ** and marked as writable, if the operation is on an internal list: - ** - ** 1) For an insert, check if the child page has already been marked - ** as EVICTED by some other client. If so, return early. - ** - ** 2) For a delete, check that there is an entry to delete. And if so, - ** that the value of its child-page field matches iChildPg. If - ** not, return early. Note that the page marked as writable will - ** still be flushed to disk in this case - even though it may be - ** unmodified. - ** - ** This resolves a race condition that may occur if client B starts - ** removing page X from a list before client A has finished inserting - ** the corresponding entry into the parent list. Specifically: - ** - ** + when client A gets here, if the EVICTED flag is not set on page X, - ** then client B will try to delete the corresponding entry from - ** the parent list at some point in the future. This will either - ** occur after client A has updated the list, in which case no - ** problem, or it will cause client A's attempt to flush the modified - ** page to disk to fail. Client A will retry, see the EVICTED flag - ** is set, and continue. - ** - ** + or, if EVICTED is set, then there is no point in writing the - ** entry into the parent list. - */ - assert( rc==SQLITE_OK ); - if( p->iHeight>0 ){ - if( bDel==0 && sqlite3HctFilePageIsEvicted(pDb->pFile, iChildPg) ){ - return SQLITE_OK; - } - if( bDel ){ - u32 iChild = hctDbGetChildPage(aTarget, op.iInsert); - if( iChild!=iChildPg ) return SQLITE_OK; - } - } - - /* Writes to an intkey internal node are handled separately. They are - ** different because they used fixed size key/data pairs. All other types - ** of page use variably sized key/data entries. */ - if( pRec==0 && p->iHeight>0 ){ - return hctDbInsertIntkeyNode( - pDb, p, op.iPg, op.iInsert, iKey, iChildPg, bClobber, bDel - ); - } - - if( p->iHeight>0 ){ - op.bFullDel = bDel; - } - - if( rc ){ - assert( !"is this really possible?" ); - return rc; - } - - /* If this is a clobber or delete operation and the entry being removed - ** has an overflow chain, add an entry to HctDbWriter.delOvfl. */ - if( bClobber ){ - hctDbRemoveOverflow(pDb, p, aTarget, op.iInsert); - } - - /* Populate the following variables: - ** - ** entryFlags - ** aEntry - ** nEntry - ** nEntrySize - ** - ** This block populates the above variables. It also inserts overflow pages. - */ - op.iIntkey = iKey; - if( op.bFullDel==0 ){ - - if( p->iHeight==0 && (bClobber || bDel) ){ - rc = hctDbFindOldPage(pDb, p, pRec, iKey, &op.iOldPg, &op.aOldPg); - if( rc!=SQLITE_OK ) goto insert_out; - assert( op.iOldPg!=0 ); - } - - if( bDel && p->iHeight==0 ){ - assert( bClobber ); - rc = hctDbDelete(pDb, p, pRec, &op); - aTarget = p->writepg.aPg[op.iPg].aNew; - assert_page_is_ok(aTarget, pDb->pgsz); - if( op.bFullDel ) bClobber = 0; - }else{ - HctDbCell cell; - int nLocal = 0; - memset(&cell, 0, sizeof(cell)); - - if( p->iHeight==0 ){ - - /* There should never be a rollback operation while migrating a - ** database. */ - assert( IS_HCT_MIGRATE(pDb)==0 || pDb->eMode!=HCT_MODE_ROLLBACK ); - - if( IS_HCT_MIGRATE(pDb)==0 ){ - cell.iTid = pDb->iTid; - if( pDb->eMode==HCT_MODE_ROLLBACK ){ - cell.iTid |= HCT_TID_ROLLBACK_OVERRIDE; - } - } - - if( bClobber ){ - u64 iOldRangeTid = hctDbGetRangeTidByIdx(pDb, aTarget, op.iInsert); - if( (iOldRangeTid & HCT_TID_MASK)>pDb->iTid ){ - cell.iRangeOld = hctDbMakeFollowPtr(&rc,pDb,iOldRangeTid,op.iOldPg); - cell.iRangeTid = iOldRangeTid; - }else{ - cell.iRangeTid = pDb->iTid; - cell.iRangeOld = op.iOldPg; - } - }else if( op.iInsert>0 ){ - HctDbCell prev; - hctDbCellGetByIdx(pDb, aTarget, op.iInsert-1, &prev); - cell.iRangeTid = prev.iRangeTid; - cell.iRangeOld = prev.iRangeOld; - assert( cell.iRangeTid==0 || cell.iRangeOld!=0 ); - } - } - rc = hctDbInsertOverflow( - pDb, p, aTarget, nData, aData, &nLocal, &cell.iOvfl - ); - cell.aPayload = aData; - - op.aEntry = pDb->aTmp; - op.nEntry = hctDbCellPut(op.aEntry, &cell, nLocal); - op.nEntrySize = nData; - op.entryFlags = hctDbCellToFlags(&cell); - } - - assert( rc!=SQLITE_OK || op.bFullDel || op.aEntry==pDb->aTmp ); - if( rc!=SQLITE_OK ) goto insert_out; - } - - assert( op.aEntry==0 || op.aEntry==pDb->aTmp ); - - /* There are now two choices - either the aTarget[] page can be updated - ** directly (if the new entry fits on the page), or the balance-tree() - ** routine runs to redistribute cells between aTarget[] and its peers, - ** writing the new entry at the same time. A balance is required if: - ** - ** 1) there is insufficient space in the free-gap for any new - ** cell and array entry, or - ** - ** 2) this is a full-delete of the fpkey of the page (iInsert==0), or - ** - ** 3) this operation would leave the page underfull, and it is not - ** the only page in its list. - */ - if( op.eBalance==BALANCE_NONE ){ - int szEntry = hctDbPageEntrySize(aTarget); - int nFree = hctDbFreebytes(aTarget); - int nReq = 0; - int nSpace = 0; /* Space freed by removing cell */ - - if( bClobber ){ - nSpace = hctDbPageRecordSize(aTarget, pDb->pgsz, op.iInsert); - nFree += szEntry; - nFree += nSpace; - } - - if( op.bFullDel==0 ){ - if( nSpace>=op.nEntry ) bUpdateInPlace = 1; - nFree -= op.nEntry; - nFree -= szEntry; - nReq = op.nEntry + (bClobber ? 0 : szEntry); - } - - /* If (a) this is a clobber operation, and (b) either the first - ** key on the page is being deleted or else the page will be less - ** than 1/3 full following the update, and (c) the page is not - ** the only page in its linked list, rebalance! */ - if( (bClobber || bDel) /* (a) */ - && ((op.iInsert==0 && op.bFullDel) || (nFree>(2*pDb->pgsz/3))) /* (b) */ - && (hctIsLeftmost(aTarget)==0 || hctPagePeer(aTarget)!=0) /* (c) */ - ){ - /* Target page will be underfull following this op. Rebalance! */ - op.eBalance = BALANCE_REQUIRED; - bUpdateInPlace = 0; - }else if( hctDbFreegap(aTarget)bAppend ){ - rc = hctDbBalanceAppend(pDb, p, &op); - }else if( IS_HCT_MIGRATE(pDb) && p->iHeight==0 ){ - rc = hctDbBalanceMigrate(pDb, p, &op); - }else{ - rc = hctDbBalance(pDb, p, &op, bClobber); - } - if( rc==SQLITE_OK ) assert_all_pages_ok(pDb, p); - aTarget = p->writepg.aPg[op.iPg].aNew; - }else if( bUpdateInPlace ){ - assert_page_is_ok(aTarget, pDb->pgsz); - hctDbClobberEntry(pDb, aTarget, &op); - assert_page_is_ok(aTarget, pDb->pgsz); - }else if( bClobber ){ - assert_page_is_ok(aTarget, pDb->pgsz); - hctDbRemoveCell(pDb, p, aTarget, op.iInsert); - assert_page_is_ok(aTarget, pDb->pgsz); - } - - /* Unless this is a full-delete operation, update rest of the aEntry[] - ** entry fields for the new cell. */ - if( rc==SQLITE_OK && op.bFullDel==0 ){ - int eType = hctPagetype(aTarget); - assert_page_is_ok(aTarget, pDb->pgsz); - assert( op.iInsert>=0 ); - - /* print_out_page("1", aTarget, pDb->pgsz); */ - if( bUpdateInPlace==0 ){ - hctDbInsertEntry(pDb, aTarget, op.iInsert, op.aEntry, op.nEntry); - } - - assert( (pRec==0)==(eType==HCT_PAGETYPE_INTKEY) ); - if( eType==HCT_PAGETYPE_INTKEY ){ - HctDbIntkeyEntry *pE = &((HctDbIntkeyLeaf*)aTarget)->aEntry[op.iInsert]; - pE->iKey = op.iIntkey; - pE->nSize = op.nEntrySize; - pE->flags = op.entryFlags; - }else if( p->iHeight==0 ){ - HctDbIndexEntry *pE = &((HctDbIndexLeaf*)aTarget)->aEntry[op.iInsert]; - pE->nSize = op.nEntrySize; - pE->flags = op.entryFlags; - }else{ - HctDbIndexNodeEntry *pE = &((HctDbIndexNode*)aTarget)->aEntry[op.iInsert]; - pE->nSize = op.nEntrySize; - pE->flags = op.entryFlags; - pE->iChildPg = iChildPg; - } - - /* print_out_page("2", aTarget, pDb->pgsz); */ - assert_page_is_ok(aTarget, pDb->pgsz); - } - - insert_out: - if( rc==SQLITE_OK ){ - assert_all_pages_ok(pDb, p); - assert_all_pages_nonempty(pDb, p); - } - return rc; -} - -SQLITE_PRIVATE int sqlite3HctDbInsert( - HctDatabase *pDb, /* Database to insert into or delete from */ - u32 iRoot, /* Root page of table to modify */ - UnpackedRecord *pRec, /* The key value for index tables */ - i64 iKey, /* For intkey tables, the key value */ - int bDel, /* True for a delete, false for insert */ - int nData, const u8 *aData, /* Record/key to insert */ - int *pnRetry /* OUT: number of operations to retry */ -){ - int rc = SQLITE_OK; - int nRecField = pRec ? pRec->nField : 0; - - /* If this operation is inserting an index entry, figure out how many of - ** the record fields to consider when determining if a potential write - ** collision is found in the data structure. */ - sqlite3HctDbRecordTrim(pRec); - -#if 0 - { - char *zText = sqlite3HctDbRecordToText(0, aData, nData); - sqlite3HctFileDebugPrint(pDb->pFile, - "%p: %s sqlite3HctDbInsert(bDel=%d, iKey=%lld, aData={%s}) iTid=%lld\n", - pDb, - (pDb->eMode==HCT_MODE_ROLLBACK ? "RB" : " "), - bDel, iKey, zText, (i64)pDb->iTid - ); - fflush(stdout); - } -#endif - - assert( pDb->eMode==HCT_MODE_NORMAL - || pDb->eMode==HCT_MODE_ROLLBACK - ); - if( pDb->eMode==HCT_MODE_ROLLBACK ){ - int op = 0; - - pDb->pa.bDoCleanup = 1; - if( pDb->rbackcsr.iRoot!=iRoot ){ - hctDbCsrInit(pDb, iRoot, 0, &pDb->rbackcsr); - if( pRec ){ - pDb->rbackcsr.pKeyInfo = sqlite3KeyInfoRef(pRec->pKeyInfo); - } - }else{ - hctDbCsrReset(&pDb->rbackcsr); - } - - rc = sqlite3HctDbCsrRollbackSeek(&pDb->rbackcsr, pRec, iKey, &op); - if( rc==SQLITE_OK ){ - if( op<0 ){ - bDel = 1; - aData = 0; - nData = 0; - }else if( op>0 ){ - rc = sqlite3HctDbCsrData(&pDb->rbackcsr, &nData, &aData); - bDel = 0; - }else{ - /* TODO: It would be nice to assert( op!=0 ) here, but this fails - ** if the original op being rolled back was a no-op delete. If - ** we could note these as they occur, we could bring a form - ** of this assert() back. */ - /* assert( op!=0 ); */ - goto insert_done; - } - } - } - - if( rc==SQLITE_OK ){ - rc = hctDbInsert(pDb, &pDb->pa, iRoot, pRec, iKey, 0, bDel, nData, aData); - if( rc!=SQLITE_OK ){ - hctDbWriterCleanup(pDb, &pDb->pa, 1); - } - } - if( rc==SQLITE_LOCKED || (rc&0xFF)==SQLITE_BUSY ){ - if( rc==SQLITE_LOCKED ){ - rc = SQLITE_OK; - pDb->nCasFail++; - } - *pnRetry = pDb->pa.nWriteKey; - pDb->pa.nWriteKey = 0; - }else{ - *pnRetry = 0; - } - - insert_done: - if( pRec ) pRec->nField = nRecField; - return rc; -} - -/* -** Start the write-phase of a transaction. -*/ -SQLITE_PRIVATE int sqlite3HctDbStartWrite(HctDatabase *p, u64 *piTid){ - int rc = SQLITE_OK; - HctTMapClient *pTMapClient = sqlite3HctFileTMapClient(p->pFile); - - assert( p->iTid==0 ); - assert( p->eMode==HCT_MODE_NORMAL ); - memset(&p->pa, 0, sizeof(p->pa)); - hctDbPageArrayReset(&p->pa.writepg); - hctDbPageArrayReset(&p->pa.discardpg); - - p->nWriteCount = sqlite3HctFileWriteCount(p->pFile); - p->iTid = sqlite3HctFileAllocateTransid(p->pFile); - rc = sqlite3HctTMapNewTID(pTMapClient, p->iTid, &p->pTmap); - *piTid = p->iTid; - return rc; -} - -SQLITE_PRIVATE i64 sqlite3HctDbTid(HctDatabase *p){ - return p->iTid; -} - -/* -** Set HctDatabase.iJrnlWriteCid. -*/ -SQLITE_PRIVATE void sqlite3HctDbJrnlWriteCid(HctDatabase *pDb, u64 iVal){ - pDb->iJrnlWriteCid = iVal; -} - -static u64 *hctDbFindTMapEntry(HctTMap *pTmap, u64 iTid){ - int iMap, iEntry; - assert( pTmap->iFirstTid<=iTid ); - assert( pTmap->iFirstTid+(pTmap->nMap*HCT_TMAP_PAGESIZE)>iTid ); - iMap = (iTid - pTmap->iFirstTid) / HCT_TMAP_PAGESIZE; - iEntry = (iTid - pTmap->iFirstTid) % HCT_TMAP_PAGESIZE; - - iEntry = HCT_TMAP_ENTRYSLOT(iEntry); - return &pTmap->aaMap[iMap][iEntry]; -} - -/* -** This is called once the current transaction has been completely -** written to disk and validated. The CID is passed as the second argument. -** Or, if the transaction was abandoned and rolled back, iCid is passed -** zero. -*/ -SQLITE_PRIVATE int sqlite3HctDbEndWrite(HctDatabase *p, u64 iCid, int bRollback){ - int rc = SQLITE_OK; - u64 *pEntry = hctDbFindTMapEntry(p->pTmap, p->iTid); - - assert( p->eMode==HCT_MODE_NORMAL ); - assert( p->pa.writepg.nPg==0 ); - - HctAtomicStore(pEntry, iCid|(bRollback?HCT_TMAP_ROLLBACK:HCT_TMAP_COMMITTED)); - p->iTid = 0; - return rc; -} - -static void hctDbFreeCsrList(HctDbCsr *pList){ - HctDbCsr *pNext = pList; - while( pNext ){ - HctDbCsr *pDel = pNext; - pNext = pNext->pNextScanner; - hctDbFreeCsr(pDel); - } -} - -SQLITE_PRIVATE int sqlite3HctDbEndRead(HctDatabase *pDb){ - HctTMapClient *pTMapClient = sqlite3HctFileTMapClient(pDb->pFile); - // assert( (pDb->iSnapshotId==0)==(pDb->pTmap==0) ); - hctDbFreeCsrList(pDb->pScannerList); - pDb->pScannerList = 0; - if( pDb->pTmap ){ - sqlite3HctTMapEnd(pTMapClient, pDb->iSnapshotId); - pDb->pTmap = 0; - pDb->iSnapshotId = 0; - pDb->bConcurrent = 0; - } - return SQLITE_OK; -} - -/* -** If recovery is still required, this function grabs the file-server -** mutex and returns non-zero. Or, if recovery is not required, returns -** zero without grabbing the mutex. -*/ -SQLITE_PRIVATE int sqlite3HctDbStartRecovery(HctDatabase *pDb, int iStage){ - assert( iStage==0 || iStage==1 ); - assert( pDb->eMode==HCT_MODE_NORMAL ); - if( sqlite3HctFileStartRecovery(pDb->pFile, iStage) ){ - memset(&pDb->pa, 0, sizeof(pDb->pa)); - hctDbPageArrayReset(&pDb->pa.writepg); - hctDbPageArrayReset(&pDb->pa.discardpg); - pDb->eMode = HCT_MODE_ROLLBACK; - - /* During recovery the connection should read the latest version of - ** the db - no exceptions. Set these two to the largest possible - ** values to ensure that this happens. */ - pDb->iSnapshotId = LARGEST_TID-1; - pDb->iLocalMinTid = LARGEST_TID-1; - } - return (pDb->eMode==HCT_MODE_ROLLBACK); -} - -SQLITE_PRIVATE void sqlite3HctDbRecoverTid(HctDatabase *pDb, u64 iTid){ - pDb->iTid = iTid; - pDb->iLocalMinTid = iTid ? iTid-1 : 0; -} - -SQLITE_PRIVATE int sqlite3HctDbFinishRecovery(HctDatabase *pDb, int iStage, int rc){ - /* assert( pDb->eMode==HCT_MODE_ROLLBACK ); */ - assert( iStage==0 || iStage==1 ); - assert( pDb->iSnapshotId>0 ); - - pDb->iTid = 0; - pDb->eMode = HCT_MODE_NORMAL; - pDb->iSnapshotId = 0; - pDb->iLocalMinTid = 0; - return sqlite3HctFileFinishRecovery(pDb->pFile, iStage, rc); -} - -/* -** Open a cursor. -*/ -SQLITE_PRIVATE int sqlite3HctDbCsrOpen( - HctDatabase *pDb, - KeyInfo *pKeyInfo, - u32 iRoot, - HctDbCsr **ppCsr -){ - int rc = SQLITE_OK; - HctDbCsr *p; - - assert( pDb->iSnapshotId!=0 ); - - /* Search for an existing cursor that can be reused. */ - HctDbCsr **pp; - for(pp=&pDb->pScannerList; *pp; pp=&(*pp)->pNextScanner){ - if( (*pp)->iRoot==iRoot ){ - *ppCsr = *pp; - *pp = (*pp)->pNextScanner; - return SQLITE_OK; - } - } - - /* If no existing cursor was found, allocate a new one */ - p = (HctDbCsr*)sqlite3MallocZero(sizeof(HctDbCsr)); - if( p==0 ){ - rc = SQLITE_NOMEM_BKPT; - }else{ - p->pDb = pDb; - p->iRoot = iRoot; - p->iCell = -1; - p->pKeyInfo = pKeyInfo; - sqlite3KeyInfoRef(pKeyInfo); - } - *ppCsr = p; - return rc; -} - -/* -** Set the "no-snapshot" flag on the cursor passed as the first argument. -*/ -SQLITE_PRIVATE void sqlite3HctDbCsrNosnap(HctDbCsr *pCsr, int bNosnap){ - if( pCsr ) pCsr->bNosnap = bNosnap; -} - -/* -** Close a cursor opened with sqlite3HctDbCsrOpen(). -*/ -SQLITE_PRIVATE void sqlite3HctDbCsrClose(HctDbCsr *pCsr){ - if( pCsr ){ - HctDatabase *pDb = pCsr->pDb; - hctDbCsrScanFinish(pCsr); - hctDbCsrReset(pCsr); - if( pDb->bConcurrent && pDb->iTid==0 ){ - pCsr->pNextScanner = pDb->pScannerList; - pDb->pScannerList = pCsr; - }else{ - hctDbFreeCsr(pCsr); - } - } -} - -/* -** The cursor passed as the first argument must be open on an intkey -** table and pointed at a valid entry. This function sets output variable -** (*piKey) to the integer key value associated with that entry before -** returning. -*/ -SQLITE_PRIVATE void sqlite3HctDbCsrKey(HctDbCsr *pCsr, i64 *piKey){ - int iCell = 0; - const u8 *aPg = 0; - - aPg = hctDbCsrPageAndCell(pCsr, &iCell); - *piKey = hctDbGetIntkey(aPg, iCell); -} - -/* -** Return true if the cursor is at EOF. Otherwise false. -*/ -SQLITE_PRIVATE int sqlite3HctDbCsrEof(HctDbCsr *pCsr){ - return pCsr==0 || pCsr->iCell<0; -} - -/* -** Set the cursor to point to the first entry in its table. If it is -** stepped, this cursor will be stepped with sqlite3HctDbCsrNext(). -*/ -SQLITE_PRIVATE int sqlite3HctDbCsrFirst(HctDbCsr *pCsr){ - int rc = SQLITE_OK; - - rc = hctDbCsrScanFinish(pCsr); - if( rc==SQLITE_OK ){ - hctDbCsrReset(pCsr); - pCsr->eDir = BTREE_DIR_FORWARD; - rc = hctDbCsrScanStart(pCsr, 0, SMALLEST_INT64); - } - pCsr->eDir = BTREE_DIR_FORWARD; - - if( rc==SQLITE_OK ){ - rc = hctDbCsrFirstValid(pCsr); - } - - return rc; -} - -/* -** Set the cursor to point to the last entry in its table. If it is -** stepped, this cursor will be stepped with sqlite3HctDbCsrPrev(). -*/ -SQLITE_PRIVATE int sqlite3HctDbCsrLast(HctDbCsr *pCsr){ - int rc = SQLITE_OK; - HctFile *pFile = pCsr->pDb->pFile; - u32 iPg = pCsr->iRoot; - HctDbPageHdr *pPg = 0; - HctFilePage pg; - - rc = hctDbCsrScanFinish(pCsr); - if( rc==SQLITE_OK ){ - hctDbCsrReset(pCsr); - pCsr->eDir = BTREE_DIR_REVERSE; - rc = hctDbCsrScanStart(pCsr, 0, LARGEST_INT64); - } - - /* Find the last page in the leaf page list. */ - while( 1 ){ - rc = sqlite3HctFilePageGet(pFile, iPg, &pg); - if( rc!=SQLITE_OK ) break; - - pPg = (HctDbPageHdr*)pg.aOld; - if( pPg->iPeerPg ){ - iPg = pPg->iPeerPg; - }else if( pPg->nHeight==0 ){ - break; - }else if( hctPagetype(pPg)==HCT_PAGETYPE_INTKEY ){ - HctDbIntkeyNode *pNode = (HctDbIntkeyNode*)pPg; - iPg = pNode->aEntry[pPg->nEntry-1].iChildPg; - }else{ - HctDbIndexNode *pNode = (HctDbIndexNode*)pPg; - iPg = pNode->aEntry[pPg->nEntry-1].iChildPg; - } - sqlite3HctFilePageRelease(&pg); - } - - /* Set the cursor to point to one position past the last entry on the - ** page located above. Then call sqlite3HctDbCsrPrev() to step back to - ** the first entry visible to the current transaction. */ - if( rc==SQLITE_OK ){ - assert( pPg->nHeight==0 && pPg->iPeerPg==0 ); - hctMemcpy(&pCsr->pg, &pg, sizeof(pg)); - if( pPg->nEntry==0 ){ - pCsr->iCell = -1; - }else{ - pCsr->iCell = pPg->nEntry; - rc = sqlite3HctDbCsrPrev(pCsr); - } - } - - return rc; -} - -/* -** Load the key associated with cell iCell1 on page aPg1[] and compare -** it to pKey2. Return an integer less than, equal to or greater than -** zero if the loaded key is less than, equal to or greater than pKey2, -** respectively. i.e. -** -** ret = key(aPg1, iCell1) - (*pKey2) -*/ -static int hctDbCompareCellKey( - int *pRc, - HctDatabase *pDb, - const u8 *aPg1, - int iCell1, - HctDbKey *pKey2 -){ - int ret = 0; - if( *pRc==SQLITE_OK ){ - - assert( hctPagetype(aPg1)==HCT_PAGETYPE_INTKEY - || hctPagetype(aPg1)==HCT_PAGETYPE_INDEX - ); - if( hctPagetype(aPg1)==HCT_PAGETYPE_INTKEY ){ - i64 iKey = hctDbGetIntkey(aPg1, iCell1); - if( iKeyiKey ){ - ret = -1; - }else if( iKey>pKey2->iKey ){ - ret = +1; - } - }else if( pKey2->pKey==0 ){ - ret = -1; - }else{ - int nRec = 0; - const u8 *aRec = 0; - HctBuffer buf = {0,0,0}; - int rc = hctDbLoadRecord(pDb, &buf, aPg1, iCell1, &nRec, &aRec); - if( rc!=SQLITE_OK ){ - *pRc = rc; - }else{ - ret = sqlite3VdbeRecordCompare(nRec, aRec, pKey2->pKey); - } - sqlite3HctBufferFree(&buf); - } - } - - return ret; -} - - -static int hctDbCsrNext(HctDbCsr *pCsr){ - HctDatabase *pDb = pCsr->pDb; - HctDbPageHdr *pPg = 0; - int rc = SQLITE_OK; - - /* Check if the current cell, be it on the linked list of leaves, or - ** on a history page, has an old-data pointer that should be followed. - ** - ** Except, don't do this if pCsr->iCell is less than zero. In that - ** case this call is supposed to jump to the first cell on the main - ** page. */ - if( pCsr->iCell>=0 ){ - do { - int bMerge = 0; - HctRangePtr ptr; - - hctDbCsrGetRange(pCsr, &ptr); - if( hctDbFollowRangeOld(pDb, &ptr, &bMerge) ){ - hctDbCsrDescendRange(&rc, pCsr, ptr.iRangeTid, ptr.iOld, bMerge); - if( rc==SQLITE_OK ){ - HctDbRangeCsr *p = &pCsr->aRange[pCsr->nRange-1]; - if( p->eRange==HCT_RANGE_FAN ){ - p->iCell = -1; - }else{ - int bExact = 0; - hctDbLeafSearch(pDb, - p->pg.aOld, p->lowkey.iKey, p->lowkey.pKey, &p->iCell, &bExact - ); - if( bExact==0 ) p->iCell--; - } - } - } - - while( pCsr->nRange ){ - HctDbRangeCsr *p = &pCsr->aRange[pCsr->nRange-1]; - - p->iCell++; - if( p->iCellpg.aOld) && ( - p->eRange==HCT_RANGE_FAN - || hctDbCompareCellKey(&rc, pDb, p->pg.aOld, p->iCell, &p->highkey)<0 - )){ - if( p->eRange==HCT_RANGE_MERGE ){ - return SQLITE_OK; - } - break; - } - sqlite3HctFilePageRelease(&p->pg); - hctDbCsrAscendRange(pCsr); - } - - }while( pCsr->nRange ); - - } - - pPg = (HctDbPageHdr*)pCsr->pg.aOld; - assert( pCsr->iCell>=-1 && pCsr->iCellnEntry ); - assert( pPg->nHeight==0 ); - - pCsr->iCell++; - if( pCsr->iCell==pPg->nEntry ){ - u32 iPeerPg = pPg->iPeerPg; - if( iPeerPg==0 ){ - /* Main cursor is now at EOF */ - pCsr->iCell = -1; - sqlite3HctFilePageRelease(&pCsr->pg); - }else{ - /* Jump to peer page */ - rc = sqlite3HctFilePageRelease(&pCsr->pg); - if( rc==SQLITE_OK ){ - rc = sqlite3HctFilePageGet(pDb->pFile, iPeerPg, &pCsr->pg); - pCsr->iCell = 0; - } - } - } - - return rc; -} - -static int hctDbCsrGoLeft(HctDbCsr *pCsr){ - int rc = SQLITE_OK; - int nHeight = ((HctDbPageHdr*)pCsr->pg.aOld)->nHeight; - - if( pCsr->pKeyInfo ){ - UnpackedRecord *pRec = 0; - rc = hctDbCsrLoadAndDecode(pCsr, 0, &pRec); - if( rc==SQLITE_OK ){ - int bDummy; - HctFilePage pg = pCsr->pg; - memset(&pCsr->pg, 0, sizeof(HctFilePage)); - pRec->default_rc = 1; - hctDbCsrSeek(pCsr, 0, nHeight, 0, pRec, 0, &bDummy); - pRec->default_rc = 0; - sqlite3HctFilePageRelease(&pg); - } - }else if( hctIsLeftmost(pCsr->pg.aOld)==0 ){ - i64 iKey = hctDbIntkeyFPKey(pCsr->pg.aOld); - sqlite3HctFilePageRelease(&pCsr->pg); - rc = hctDbCsrSeek(pCsr, 0, nHeight, 0, 0, iKey-1, 0); - } - - return rc; -} - -static int hctDbCsrPrev(HctDbCsr *pCsr){ - HctDatabase *pDb = pCsr->pDb; - int rc = SQLITE_OK; - /* Advance the cursor */ - - if( pCsr->nRange ){ - HctDbRangeCsr *pRange = &pCsr->aRange[pCsr->nRange-1]; - pRange->iCell--; - }else{ - pCsr->iCell--; - if( pCsr->iCell<0 ){ - rc = hctDbCsrGoLeft(pCsr); - } - } - - if( pCsr->iCell>=0 ){ - do { - HctRangePtr ptr; - int bMerge = 0; - - hctDbCsrGetRange(pCsr, &ptr); - if( hctDbFollowRangeOld(pDb, &ptr, &bMerge) ){ - do { - hctDbCsrDescendRange(&rc, pCsr, ptr.iRangeTid, ptr.iOld, bMerge); - memset(&ptr, 0, sizeof(ptr)); - if( rc==SQLITE_OK ){ - HctDbRangeCsr *p = &pCsr->aRange[pCsr->nRange-1]; - if( p->eRange==HCT_RANGE_FAN ){ - p->iCell = ((HctDbPageHdr*)p->pg.aOld)->nEntry-1; - }else{ - int bExact; - hctDbLeafSearch(pDb, p->pg.aOld, - p->highkey.iKey, p->highkey.pKey, &p->iCell, &bExact - ); - p->iCell--; - } - - if( p->iCell>=0 ){ - hctDbCsrGetRange(pCsr, &ptr); - } - } - }while( hctDbFollowRangeOld(pDb, &ptr, &bMerge) ); - } - - while( pCsr->nRange>0 ){ - HctDbRangeCsr *p = &pCsr->aRange[pCsr->nRange-1]; - if( p->iCell>=0 && ( - p->eRange==HCT_RANGE_FAN - || hctDbCompareCellKey(&rc, pDb, p->pg.aOld, p->iCell, &p->lowkey)>0 - )){ - if( p->eRange==HCT_RANGE_MERGE ){ - return SQLITE_OK; - } - p->iCell--; - break; - } - sqlite3HctFilePageRelease(&p->pg); - hctDbCsrAscendRange(pCsr); - } - }while( pCsr->nRange ); - } - - return rc; -} - -SQLITE_PRIVATE int sqlite3HctDbCsrNext(HctDbCsr *pCsr){ - int rc = SQLITE_OK; - - /* Should not be called while committing, validating or doing rollback. */ - assert( pCsr->pDb->iTid==0 && pCsr->pDb->eMode==HCT_MODE_NORMAL ); - - do { - rc = hctDbCsrNext(pCsr); - }while( rc==SQLITE_OK && pCsr->iCell>=0 && hctDbCurrentIsVisible(pCsr)==0 ); - return rc; -} - -SQLITE_PRIVATE int sqlite3HctDbCsrPrev(HctDbCsr *pCsr){ - int rc = SQLITE_OK; - - assert( pCsr->pDb->eMode==HCT_MODE_NORMAL ); - do { - rc = hctDbCsrPrev(pCsr); - }while( rc==SQLITE_OK && pCsr->iCell>=0 && hctDbCurrentIsVisible(pCsr)==0 ); - return rc; -} - -SQLITE_PRIVATE void sqlite3HctDbCsrClear(HctDbCsr *pCsr){ - hctDbCsrScanFinish(pCsr); - hctDbCsrReset(pCsr); -} - - -SQLITE_PRIVATE int sqlite3HctDbCsrData(HctDbCsr *pCsr, int *pnData, const u8 **paData){ - const u8 *pPg; - int iCell; - - pPg = hctDbCsrPageAndCell(pCsr, &iCell); - assert( hctPageheight(pPg)==0 ); - -#if 0 - if( pCsr->nRange ){ - printf("%p: data from range page %d (from %d) (snapshotid=%lld)\n", - pCsr->pDb, - (int)pCsr->aRange[pCsr->nRange-1].pg.iOldPg, - (int)pCsr->pg.iOldPg, pCsr->pDb->iSnapshotId - ); - }else{ - printf("%p: data from page %d (snapshotid=%lld)\n", - pCsr->pDb, - (int)pCsr->pg.iOldPg, pCsr->pDb->iSnapshotId - ); - } - fflush(stdout); -#endif - - return hctDbLoadRecord(pCsr->pDb, &pCsr->rec, pPg, iCell, pnData, paData); -} - -static int hctDbValidateEntry(HctDatabase *pDb, HctDbCsr *pCsr){ - int rc = SQLITE_OK; - u8 flags; - - if( pCsr->nRange ){ - /* If the current entry is on a history page, it is not valid (as - ** it has already been deleted). Later: unless of course it was this - ** transaction that deleted it! */ - if( pCsr->aRange[pCsr->nRange-1].iRangeTid!=pDb->iTid ){ - rc = HCT_SQLITE_BUSY; - } - }else{ - int iOff = hctDbCellOffset(pCsr->pg.aOld, pCsr->iCell, &flags); - if( flags & HCTDB_HAS_TID ){ - u64 iTid = hctGetU64(&pCsr->pg.aOld[iOff]); - if( hctDbTidIsConflict(pCsr->pDb, iTid) ){ - rc = HCT_SQLITE_BUSY; - } - } - } - return rc; -} - -static int hctDbValidateIntkey(HctDatabase *pDb, HctDbCsr *pCsr){ - int rc = SQLITE_OK; - HctCsrIntkeyOp *pOpList = pCsr->intkey.pOpList; - HctCsrIntkeyOp *pOp; - - pCsr->intkey.pOpList = 0; - assert( pCsr->intkey.pCurrentOp==0 ); - for(pOp=pOpList; pOp && rc==SQLITE_OK; pOp=pOp->pNextOp){ - int bDum = 0; - assert( pOp->iFirst<=pOp->iLast ); - - if( pOp->iLogical ){ - int bEvict = 0; - - /* If the physical page associated with the logical page containing - ** the current key has not changed, and the logical page has not been - ** evicted, then the current key itself may not have been modified. - ** Jump to the next iteration of the loop in this case. */ - u32 iPhys = sqlite3HctFilePageMapping(pDb->pFile, pOp->iLogical, &bEvict); - if( pOp->iPhysical==iPhys && bEvict==0 ) continue; - - /* Alternatively, if the logical page has not been evicted, load it - ** and seek to the desired key. If the key is found, or if it is not - ** found but the key would reside on the current page, then load - ** the page into the cursor. This is faster than the hctDbCsrSeek() - ** call below. */ - if( bEvict==0 && pOp->iLogical!=pCsr->iRoot ){ - rc = hctDbGetPhysical(pDb, iPhys, &pCsr->pg); - if( rc==SQLITE_OK ){ - pCsr->eDir = BTREE_DIR_FORWARD; - pCsr->iCell = hctDbIntkeyLeafSearch(pCsr->pg.aOld, pOp->iFirst,&bDum); - if( pCsr->iCell>=((HctDbIntkeyLeaf*)pCsr->pg.aOld)->pg.nEntry ){ - hctDbCsrReset(pCsr); - } - } - } - } - - if( pCsr->pg.aOld==0 ){ - if( pOp->iFirst==SMALLEST_INT64 ){ - pCsr->eDir = BTREE_DIR_FORWARD; - rc = hctDbCsrFirst(pCsr); - }else{ - if( pOp->iFirst==pOp->iLast ){ - pCsr->eDir = BTREE_DIR_NONE; - }else{ - pCsr->eDir = BTREE_DIR_FORWARD; - } - rc = hctDbCsrSeekAndDescend(pCsr, 0, pOp->iFirst, 0, &bDum); - } - } - - while( rc==SQLITE_OK && !sqlite3HctDbCsrEof(pCsr) ){ - i64 iKey = 0; - sqlite3HctDbCsrKey(pCsr, &iKey); - if( iKey>=pOp->iFirst && iKey<=pOp->iLast ){ - rc = hctDbValidateEntry(pDb, pCsr); - } - if( rc!=SQLITE_OK || iKey>=pOp->iLast ) break; - rc = hctDbCsrNext(pCsr); - } - hctDbCsrReset(pCsr); - } - assert( pCsr->intkey.pOpList==0 && pCsr->intkey.pCurrentOp==0 ); - pCsr->intkey.pOpList = pOpList; - - return rc; -} - -static int hctDbValidateIndex(HctDatabase *pDb, HctDbCsr *pCsr){ - int rc = SQLITE_OK; - HctCsrIndexOp *pOpList = pCsr->index.pOpList; - HctCsrIndexOp *pOp; - - pCsr->index.pOpList = 0; - assert( pCsr->index.pCurrentOp==0 ); - rc = hctDbCsrAllocateUnpacked(pCsr); - for(pOp=pOpList; pOp && rc==SQLITE_OK; pOp=pOp->pNextOp){ - UnpackedRecord *pRec = pCsr->pRec; - int bDummy = 0; - - if( pOp->iLogical - && pOp->iPhysical==sqlite3HctFilePageMapping(pDb->pFile, pOp->iLogical, &bDummy) - ){ - continue; - } - - hctDbCsrReset(pCsr); - pCsr->eDir = (pOp->pFirst==pOp->pLast) ? BTREE_DIR_NONE : BTREE_DIR_FORWARD; - if( pOp->pFirst==0 ){ - rc = hctDbCsrFirst(pCsr); - }else{ - int bExact = 0; - sqlite3VdbeRecordUnpack(pCsr->pKeyInfo, pOp->nFirst, pOp->pFirst, pRec); - rc = hctDbCsrSeek(pCsr, 0, 0, 0, pRec, 0, &bExact); - if( rc==SQLITE_OK && bExact==0 ){ - rc = hctDbCsrNext(pCsr); - } - } - if( pOp->pLast && pOp->pLast!=pOp->pFirst ){ - sqlite3VdbeRecordUnpack(pCsr->pKeyInfo, pOp->nLast, pOp->pLast, pRec); - }else{ - pRec = 0; - } - if( rc!=SQLITE_OK ) break; - - if( pOp->pLast==pOp->pFirst ){ - assert( !sqlite3HctDbCsrEof(pCsr) ); - rc = hctDbValidateEntry(pDb, pCsr); - }else{ - while( !sqlite3HctDbCsrEof(pCsr) ){ - int res = -1; - if( pRec ){ - const u8 *aKey = 0; - int nKey = 0; - rc = sqlite3HctDbCsrData(pCsr, &nKey, &aKey); - if( rc!=SQLITE_OK ) break; - res = sqlite3VdbeRecordCompare(nKey, aKey, pRec); - if( res<0 ) break; - } - rc = hctDbValidateEntry(pDb, pCsr); - if( res==0 || rc!=SQLITE_OK ) break; - rc = hctDbCsrNext(pCsr); - if( rc!=SQLITE_OK ) break; - } - } - } - - assert( pCsr->index.pOpList==0 && pCsr->index.pCurrentOp==0 ); - pCsr->index.pOpList = pOpList; - return rc; -} - -SQLITE_PRIVATE void sqlite3HctDbTMapScan(HctDatabase *pDb){ - sqlite3HctTMapScan(sqlite3HctFileTMapClient(pDb->pFile)); -} - -int -__attribute__ ((noinline)) -sqlite3HctDbValidate( - sqlite3 *db, - HctDatabase *pDb, - u64 *piCid, - int *pbTmapscan -){ - HctDbCsr *pCsr = 0; - u64 *pEntry = hctDbFindTMapEntry(pDb->pTmap, pDb->iTid); - u64 iCid = *piCid; - u64 nFinalWrite = 0; - int rc = SQLITE_OK; - int nPageScan = pDb->pConfig->nPageScan; - - /* Set nWrite to the number of pages written by this transaction. This - ** is used for scheduling tmap scans only, so it doesn't matter if it - ** is slightly inaccurate in some cases. */ - int nWrite = sqlite3HctFileWriteCount(pDb->pFile) - pDb->nWriteCount; - assert( nWrite>=0 ); - if( nWrite==0 ) nWrite = 1; - - assert( *pEntry==0 ); - if( iCid==0 ){ - HctAtomicStore(pEntry, HCT_TMAP_VALIDATING); - iCid = sqlite3HctFileAllocateCID(pDb->pFile, 1); - } - HctAtomicStore(pEntry, HCT_TMAP_VALIDATING | iCid); - - nFinalWrite = sqlite3HctFileIncrWriteCount(pDb->pFile, nWrite); - if( (nFinalWrite / nPageScan)!=((nFinalWrite-nWrite) / nPageScan) ){ - *pbTmapscan = 1; - } - - assert( pDb->eMode==HCT_MODE_NORMAL ); - - /* Invoke the SQLITE_TESTCTRL_HCT_MTCOMMIT hook, if applicable */ - if( db->xMtCommit ) db->xMtCommit(db->pMtCommitCtx, 2); - - /* If iCid is one more than pDb->iSnapshotId, then this transaction is - ** being applied against the snapshot that it was run against. In this - ** case we can skip validation entirely. */ - if( iCid!=pDb->iSnapshotId+1 ){ - if( pDb->bConcurrent ){ - pDb->eMode = HCT_MODE_VALIDATE; - if( hctDbValidateMeta(pDb) ){ - rc = HCT_SQLITE_BUSY; - }else{ - for(pCsr=pDb->pScannerList; pCsr; pCsr=pCsr->pNextScanner){ - if( pCsr->pKeyInfo==0 ){ - rc = hctDbValidateIntkey(pDb, pCsr); - }else{ - rc = hctDbValidateIndex(pDb, pCsr); - } - if( rc ) break; - } - } - pDb->eMode = HCT_MODE_NORMAL; - }else{ - rc = HCT_SQLITE_BUSY; - } - } - - *piCid = iCid; - return rc; -} - -/************************************************************************* -************************************************************************** -** Start of integrity-check implementation. -** -** The code here assumes that the database is quiescent. If it is invoked -** concurrently with database writers, false-positive errors may be reported. -*/ - -/* -** Walk the tree structure with logical root page iRoot, visiting every -** page and overflow page currently linked in. -** -** For each page in the tree, the supplied callback is invoked. The first -** argument passed to the callback is a copy of the fourth argument to -** this function. The second and third arguments are the logical and -** physical page number, respectively. If there is no logical page number, -** as for overflow pages, the second parameter is passed zero. -** -** It (presumably) makes little sense to call this function without -** somehow guaranteeing that the tree is not being currently written to. -*/ -SQLITE_PRIVATE int sqlite3HctDbWalkTree( - HctFile *pFile, /* File tree resides in */ - u32 iRoot, /* Root page of tree */ - int (*x)(void*, u32, u32), /* Callback function */ - void *pCtx /* First argument to pass to x() */ -){ - int rc = SQLITE_OK; - u32 pgno = iRoot; - - u32 iPhys = 0; - int dummy = 0; - - /* Special case - the root page is not mapped to any physical page. */ - iPhys = sqlite3HctFilePageMapping(pFile, iRoot, &dummy); - if( iPhys==0 ){ - return x(pCtx, iRoot, 0); - } - - /* This outer loop runs once for each list in the tree structure - once - ** for the list of leaves, once for the list of parent, and so on. - ** Starting from the root page and descending towards the leaves. */ - do { - HctFilePage pg; - int nHeight = 0; - int eType = 0; - u32 pgnoChild = 0; - - /* Load up page pgno - the leftmost of its list. Then, unless this - ** is the list of leaves, set pgnoChild to the leftmost child of - ** the page. Or, if this is a list of leaves, leave pgnoChild set - ** to zero. */ - rc = sqlite3HctFilePageGet(pFile, pgno, &pg); - if( rc!=SQLITE_OK ){ - break; - }else{ - nHeight = hctPageheight(pg.aOld); - eType = hctPagetype(pg.aOld); - if( eType!=HCT_PAGETYPE_INTKEY && eType!=HCT_PAGETYPE_INDEX ){ - rc = SQLITE_CORRUPT_BKPT; - break; - } - else if( nHeight>0 ){ - if( eType==HCT_PAGETYPE_INTKEY ){ - pgnoChild = ((HctDbIntkeyNode*)pg.aOld)->aEntry[0].iChildPg; - }else{ - pgnoChild = ((HctDbIndexNode*)pg.aOld)->aEntry[0].iChildPg; - } - } - } - - while( pg.aOld ){ - u32 iPeerPg = ((HctDbPageHdr*)pg.aOld)->iPeerPg; - u32 iLogic = pg.iPg; - u32 iPhys = pg.iOldPg; - - rc = x(pCtx, iLogic, iPhys); - if( rc!=SQLITE_OK ) break; - - if( nHeight==0 || eType==HCT_PAGETYPE_INDEX ){ - int iCell = 0; - int nEntry = ((HctDbPageHdr*)pg.aOld)->nEntry; - for(iCell=0; iCelliPeerPg; - sqlite3HctFilePageRelease(&ov); - } - } - } - } - - sqlite3HctFilePageRelease(&pg); - if( iPeerPg ){ - rc = sqlite3HctFilePageGet(pFile, iPeerPg, &pg); - if( rc!=SQLITE_OK ) break; - } - } - - pgno = pgnoChild; - }while( rc==SQLITE_OK && pgno!=0 ); - - return rc; -} - -typedef struct IntCheckCtx IntCheckCtx; -struct IntCheckCtx { - u32 nLogic; /* Number of logical pages in db */ - u32 nPhys; /* Number of physical pages in db */ - u8 *aLogic; - u8 *aPhys; - int nErr; - int nMaxErr; - char *zErr; - i64 nEntry; /* Number of entries in table */ -}; - -static void hctDbICError( - IntCheckCtx *p, - char *zFmt, - ... -){ - va_list ap; - char *zErr; - va_start(ap, zFmt); - zErr = sqlite3_vmprintf(zFmt, ap); - p->zErr = sqlite3_mprintf("%z%s%z", p->zErr, (p->zErr ? "\n" : ""), zErr); - p->nErr++; - va_end(ap); -} - -static int hctDbIntegrityCheckCb( - void *pCtx, - u32 iLogic, - u32 iPhys -){ - IntCheckCtx *p = (IntCheckCtx*)pCtx; - if( iLogic ){ - if( p->aLogic[iLogic-1] ){ - hctDbICError(p, "multiple refs to logical page %d", (int)iLogic); - } - p->aLogic[iLogic-1] = 1; - } - if( iPhys ){ - if( p->aPhys[iPhys-1] ){ - hctDbICError(p, "multiple refs to physical page %d", (int)iPhys); - } - p->aPhys[iPhys-1] = 1; - } - - return (p->nErr>=p->nMaxErr) ? -1 : 0; -} - - -SQLITE_PRIVATE char *sqlite3HctDbIntegrityCheck( - HctDatabase *pDb, - u32 *aRoot, - Mem *aCnt, - int nRoot, - int *pnErr -){ - HctFile *pFile = pDb->pFile; - IntCheckCtx c; - u32 *aFileRoot = 0; - int nFileRoot = 0; - - int rc = sqlite3HctFileRootArray(pFile, &aFileRoot, &nFileRoot); - memset(&c, 0, sizeof(c)); - if( rc==SQLITE_OK ){ - c.nErr = *pnErr; - c.nMaxErr = 100; - sqlite3HctFileICArrays(pFile, &c.aLogic, &c.nLogic, &c.aPhys, &c.nPhys); - } - if( !c.aLogic ){ - c.nErr++; - }else{ - int ii; - - for(ii=0; c.nErr==0 && iistats.nBalanceIntkey; - break; - case 1: - *pzStat = "balance_index"; - iVal = pDb->stats.nBalanceIndex; - break; - case 2: - *pzStat = "balance_single"; - iVal = pDb->stats.nBalanceSingle; - break; - case 3: - *pzStat = "tmap_lookup"; - iVal = pDb->stats.nTMapLookup; - break; - case 4: - *pzStat = "update_in_place"; - iVal = pDb->stats.nUpdateInPlace; - break; - case 5: - *pzStat = "internal_retry"; - iVal = pDb->stats.nInternalRetry; - break; - default: - break; - } - - return iVal; -} - -/************************************************************************* -************************************************************************** -** Below are the virtual table implementations. These are debugging -** aids only. -*/ - -typedef struct hctdb_vtab hctdb_vtab; -struct hctdb_vtab { - sqlite3_vtab base; /* Base class - must be first */ - sqlite3 *db; -}; - -/* templatevtab_cursor is a subclass of sqlite3_vtab_cursor which will -** serve as the underlying representation of a cursor that scans -** over rows of the result -*/ -typedef struct hctdb_cursor hctdb_cursor; -struct hctdb_cursor { - sqlite3_vtab_cursor base; /* Base class - must be first */ - HctDatabase *pDb; /* Database to report on */ - u64 iMaxPgno; /* Maximum page number for this scan */ - - u64 pgno; /* The page-number/rowid value */ - const char *zPgtype; - u32 iPeerPg; - u32 nEntry; - u32 nHeight; - u32 nFree; - char *zFpKey; -}; - -/* -** The hctdbConnect() method is invoked to create a new -** template virtual table. -** -** Think of this routine as the constructor for hctdb_vtab objects. -** -** All this routine needs to do is: -** -** (1) Allocate the hctdb_vtab object and initialize all fields. -** -** (2) Tell SQLite (via the sqlite3_declare_vtab() interface) what the -** result set of queries against the virtual table will look like. -*/ -static int hctdbConnect( - sqlite3 *db, - void *pAux, - int argc, const char *const*argv, - sqlite3_vtab **ppVtab, - char **pzErr -){ - hctdb_vtab *pNew; - int rc; - - rc = sqlite3_declare_vtab(db, - "CREATE TABLE x(" - "pgno INTEGER, pgtype TEXT, nheight INTEGER, " - "peer INTEGER, nentry INTEGER, nfree INTEGER, fpkey TEXT" - ")" - ); - - if( rc==SQLITE_OK ){ - pNew = sqlite3MallocZero( sizeof(*pNew) ); - *ppVtab = (sqlite3_vtab*)pNew; - if( pNew==0 ) return SQLITE_NOMEM; - pNew->db = db; - } - return rc; -} - -/* -** This method is the destructor for hctdb_vtab objects. -*/ -static int hctdbDisconnect(sqlite3_vtab *pVtab){ - hctdb_vtab *p = (hctdb_vtab*)pVtab; - sqlite3_free(p); - return SQLITE_OK; -} - -/* -** Constructor for a new hctdb_cursor object. -*/ -static int hctdbOpen(sqlite3_vtab *p, sqlite3_vtab_cursor **ppCursor){ - hctdb_cursor *pCur; - pCur = sqlite3MallocZero(sizeof(*pCur)); - if( pCur==0 ) return SQLITE_NOMEM; - *ppCursor = &pCur->base; - return SQLITE_OK; -} - -/* -** Destructor for a hctdb_cursor. -*/ -static int hctdbClose(sqlite3_vtab_cursor *cur){ - hctdb_cursor *pCur = (hctdb_cursor*)cur; - sqlite3_free(pCur->zFpKey); - sqlite3_free(pCur); - return SQLITE_OK; -} - -static char *hex_encode(const u8 *aIn, int nIn){ - char *zRet = sqlite3MallocZero(nIn*2+1); - if( zRet ){ - static const char aDigit[] = "0123456789ABCDEF"; - int i; - for(i=0; i> 4) ]; - zRet[i*2+1] = aDigit[ (aIn[i] & 0xF) ]; - } - } - return zRet; -} - - -SQLITE_PRIVATE char *sqlite3HctDbRecordToText(sqlite3 *db, const u8 *aRec, int nRec){ - char *zRet = 0; - const char *zSep = ""; - const u8 *pEndHdr; /* Points to one byte past record header */ - const u8 *pHdr; /* Current point in record header */ - const u8 *pBody; /* Current point in record data */ - u64 nHdr; /* Bytes in record header */ - - if( nRec==0 ){ - return sqlite3_mprintf(""); - } - - pHdr = aRec + sqlite3GetVarint(aRec, &nHdr); - pBody = pEndHdr = &aRec[nHdr]; - while( pHdrbase.pVtab)->db; - int eType; - - assert( 0==sqlite3_stricmp("intkey", azType[HCT_PAGETYPE_INTKEY]) ); - assert( 0==sqlite3_stricmp("index", azType[HCT_PAGETYPE_INDEX]) ); - assert( 0==sqlite3_stricmp("overflow", azType[HCT_PAGETYPE_OVERFLOW]) ); - - sqlite3_free(pCur->zFpKey); - pCur->zFpKey = 0; - - eType = hctPagetype(pHdr); - if( eTypezPgtype = azType[hctPagetype(pHdr)]; - }else{ - pCur->zPgtype = "!INVALID!"; - } - pCur->iPeerPg = pHdr->iPeerPg; - pCur->nEntry = pHdr->nEntry; - pCur->nHeight = pHdr->nHeight; - - if( eType==HCT_PAGETYPE_INTKEY ){ - if( pHdr->nHeight==0 ){ - HctDbIntkeyLeaf *pLeaf = (HctDbIntkeyLeaf*)aPg; - char *zFpKey = sqlite3_mprintf("%lld", pLeaf->aEntry[0].iKey); - if( zFpKey==0 ) rc = SQLITE_NOMEM_BKPT; - pCur->zFpKey = zFpKey; - pCur->nFree = (int)pLeaf->hdr.nFreeBytes; - }else{ - HctDbIntkeyNode *pNode = (HctDbIntkeyNode*)aPg; - char *zFpKey = sqlite3_mprintf("%lld", pNode->aEntry[0].iKey); - if( zFpKey==0 ) rc = SQLITE_NOMEM_BKPT; - pCur->zFpKey = zFpKey; - pCur->nFree = ( - hctDbMaxCellsPerIntkeyNode(pCur->pDb->pgsz) - pNode->pg.nEntry - ) * sizeof(HctDbIntkeyNodeEntry); - } - - }else if( eType==HCT_PAGETYPE_INDEX ){ - HctBuffer buf = {0,0,0}; - const u8 *aRec = 0; - int nRec = 0; - - rc = hctDbLoadRecord(pCur->pDb, &buf, aPg, 0, &nRec, &aRec); - if( rc==SQLITE_OK ){ - char *zFpKey = sqlite3HctDbRecordToText(db, aRec, nRec); - if( zFpKey==0 ) rc = SQLITE_NOMEM_BKPT; - pCur->zFpKey = zFpKey; - } - - pCur->nFree = (int)(((HctDbIndexNode*)pHdr)->hdr.nFreeBytes); - sqlite3HctBufferFree(&buf); - } - return rc; -} - -/* -** Return TRUE if the cursor has been moved off of the last -** row of output. -*/ -static int hctdbEof(sqlite3_vtab_cursor *cur){ - hctdb_cursor *pCur = (hctdb_cursor*)cur; - return pCur->pgno>pCur->iMaxPgno; -} - -/* -** Advance a hctdb_cursor to its next row of output. -*/ -static int hctdbNext(sqlite3_vtab_cursor *cur){ - hctdb_cursor *pCur = (hctdb_cursor*)cur; - int rc = SQLITE_OK; - HctFilePage pg; - - memset(&pg, 0, sizeof(pg)); - do { - sqlite3HctFilePageRelease(&pg); - pCur->pgno++; - if( hctdbEof(cur) ) return SQLITE_OK; - rc = sqlite3HctFilePageGetPhysical(pCur->pDb->pFile, pCur->pgno, &pg); - }while( rc==SQLITE_OK && pg.aOld==0 ); - - if( pg.aOld ){ - rc = hctdbLoadPage(pCur, pg.aOld); - } - return rc; -} - -/* -** Return values of columns for the row at which the hctdb_cursor -** is currently pointing. -*/ -static int hctdbColumn( - sqlite3_vtab_cursor *cur, /* The cursor */ - sqlite3_context *ctx, /* First argument to sqlite3_result_...() */ - int i /* Which column to return */ -){ - hctdb_cursor *pCur = (hctdb_cursor*)cur; - assert( i>=0 && i<=5 ); - switch( i ){ - case 0: /* pgno */ - sqlite3_result_int64(ctx, (i64)pCur->pgno); - break; - case 1: /* pgtype */ - sqlite3_result_text(ctx, pCur->zPgtype, -1, SQLITE_TRANSIENT); - break; - case 2: /* nHeight */ - sqlite3_result_int64(ctx, (i64)pCur->nHeight); - break; - case 3: /* peer */ - sqlite3_result_int64(ctx, (i64)pCur->iPeerPg); - break; - case 4: /* nEntry */ - sqlite3_result_int64(ctx, (i64)pCur->nEntry); - break; - case 5: /* nfree */ - sqlite3_result_int64(ctx, (i64)pCur->nFree); - break; - case 6: /* fpkey */ - sqlite3_result_text(ctx, pCur->zFpKey, -1, SQLITE_TRANSIENT); - break; - } - return SQLITE_OK; -} - -/* -** Return the rowid for the current row. In this implementation, the -** rowid is the same as the output value. -*/ -static int hctdbRowid(sqlite3_vtab_cursor *cur, sqlite_int64 *pRowid){ - hctdb_cursor *pCur = (hctdb_cursor*)cur; - *pRowid = pCur->pgno; - return SQLITE_OK; -} - -/* -** This method is called to "rewind" the hctdb_cursor object back -** to the first row of output. This method is always called at least -** once prior to any call to hctdbColumn() or hctdbRowid() or -** hctdbEof(). -*/ -static int hctdbFilter( - sqlite3_vtab_cursor *pVtabCursor, - int idxNum, const char *idxStr, - int argc, sqlite3_value **argv -){ - hctdb_cursor *pCur = (hctdb_cursor*)pVtabCursor; - hctdb_vtab *pTab = (hctdb_vtab*)(pCur->base.pVtab); - - pCur->pDb = sqlite3HctDbFind(pTab->db, 0); - if( argc==1 ){ - u32 iVal = (u32)sqlite3_value_int64(argv[0]); - pCur->iMaxPgno = iVal; - pCur->pgno = iVal-1; - }else{ - pCur->pgno = 0; - pCur->iMaxPgno = sqlite3HctFileMaxpage(pCur->pDb->pFile); - } - return hctdbNext(pVtabCursor); -} - -/* -** SQLite will invoke this method one or more times while planning a query -** that uses the virtual table. This routine needs to create -** a query plan for each invocation and compute an estimated cost for that -** plan. -*/ -static int hctdbBestIndex( - sqlite3_vtab *tab, - sqlite3_index_info *pIdxInfo -){ - int i; - pIdxInfo->estimatedCost = (double)10000; - pIdxInfo->estimatedRows = 10000; - - for(i=0; inConstraint; i++){ - struct sqlite3_index_constraint *p = &pIdxInfo->aConstraint[i]; - if( p->iColumn!=0 ) continue; - if( p->op!=SQLITE_INDEX_CONSTRAINT_EQ ) continue; - if( !p->usable ) continue; - pIdxInfo->aConstraintUsage[i].argvIndex = 1; - pIdxInfo->idxNum = 1; - pIdxInfo->estimatedCost = (double)10; - pIdxInfo->estimatedRows = 10; - break; - } - - return SQLITE_OK; -} - -typedef struct hctentry_vtab hctentry_vtab; -struct hctentry_vtab { - sqlite3_vtab base; /* Base class - must be first */ - sqlite3 *db; -}; - -/* templatevtab_cursor is a subclass of sqlite3_vtab_cursor which will -** serve as the underlying representation of a cursor that scans -** over rows of the result -*/ -typedef struct hctentry_cursor hctentry_cursor; -struct hctentry_cursor { - sqlite3_vtab_cursor base; /* Base class - must be first */ - HctDatabase *pDb; /* Database to report on */ - int iEntry; - HctFilePage pg; - u32 iPg; /* Current physical page number */ - u32 iLastPg; /* Last physical page to report on */ -}; - -/* -** The hctentryConnect() method is invoked to create a new -** template virtual table. -** -** Think of this routine as the constructor for hctentry_vtab objects. -** -** All this routine needs to do is: -** -** (1) Allocate the hctentry_vtab object and initialize all fields. -** -** (2) Tell SQLite (via the sqlite3_declare_vtab() interface) what the -** result set of queries against the virtual table will look like. -*/ -static int hctentryConnect( - sqlite3 *db, - void *pAux, - int argc, const char *const*argv, - sqlite3_vtab **ppVtab, - char **pzErr -){ - hctentry_vtab *pNew; - int rc; - - rc = sqlite3_declare_vtab(db, - "CREATE TABLE x(" - "pgno INTEGER, entry INTEGER, " - "ikey INTEGER, size INTEGER, offset INTEGER, " - "child INTEGER, " - "tid INTEGER, rangetid INTEGER, " - /* "oldpg INTEGER, " */ - "rangeoldpg INTEGER, ovfl INTEGER, record TEXT" - ")" - ); - - if( rc==SQLITE_OK ){ - pNew = sqlite3MallocZero( sizeof(*pNew) ); - *ppVtab = (sqlite3_vtab*)pNew; - if( pNew==0 ) return SQLITE_NOMEM; - pNew->db = db; - } - return rc; -} - -/* -** This method is the destructor for hctentry_vtab objects. -*/ -static int hctentryDisconnect(sqlite3_vtab *pVtab){ - hctentry_vtab *p = (hctentry_vtab*)pVtab; - sqlite3_free(p); - return SQLITE_OK; -} - -/* -** Constructor for a new hctentry_cursor object. -*/ -static int hctentryOpen(sqlite3_vtab *p, sqlite3_vtab_cursor **ppCursor){ - hctentry_cursor *pCur; - pCur = sqlite3MallocZero(sizeof(*pCur)); - if( pCur==0 ) return SQLITE_NOMEM; - *ppCursor = &pCur->base; - return SQLITE_OK; -} - -/* -** Destructor for a hctentry_cursor. -*/ -static int hctentryClose(sqlite3_vtab_cursor *cur){ - hctentry_cursor *pCur = (hctentry_cursor*)cur; - sqlite3HctFilePageRelease(&pCur->pg); - sqlite3_free(pCur); - return SQLITE_OK; -} - -/* -** Return TRUE if the cursor has been moved off of the last -** row of output. -*/ -static int hctentryEof(sqlite3_vtab_cursor *cur){ - hctentry_cursor *pCur = (hctentry_cursor*)cur; - return pCur->pg.aOld==0; -} - -/* -** Advance a hctentry_cursor to its next row of output. -*/ -static int hctentryNext(sqlite3_vtab_cursor *cur){ - int rc = SQLITE_OK; - hctentry_cursor *pCur = (hctentry_cursor*)cur; - - while( rc==SQLITE_OK ){ - HctDbPageHdr *pPg = (HctDbPageHdr*)pCur->pg.aOld; - int eType = hctPagetype(pPg); - if( eType==HCT_PAGETYPE_INTKEY - || eType==HCT_PAGETYPE_INDEX - || eType==HCT_PAGETYPE_HISTORY - ){ - pCur->iEntry++; - if( pCur->iEntrynEntry ) break; - } - pCur->iEntry = -1; - pCur->iPg++; - sqlite3HctFilePageRelease(&pCur->pg); - if( pCur->iPg>pCur->iLastPg ) break; - rc = sqlite3HctFilePageGetPhysical(pCur->pDb->pFile, pCur->iPg, &pCur->pg); - } - - return rc; -} - -/* -** Return values of columns for the row at which the hctentry_cursor -** is currently pointing. -*/ -static int hctentryColumn( - sqlite3_vtab_cursor *cur, /* The cursor */ - sqlite3_context *ctx, /* First argument to sqlite3_result_...() */ - int i /* Which column to return */ -){ - hctentry_cursor *pCur = (hctentry_cursor*)cur; - int eType = hctPagetype(pCur->pg.aOld); - int nHeight = hctPageheight(pCur->pg.aOld); - - HctDbIntkeyEntry *pIntkey = 0; - HctDbIntkeyNodeEntry *pIntkeyNode = 0; - HctDbIndexEntry *pIndex = 0; - HctDbIndexNodeEntry *pIndexNode = 0; - HctDbHistoryFan *pFan = 0; - - switch( eType ){ - case HCT_PAGETYPE_INTKEY: - if( nHeight==0 ){ - pIntkey = &((HctDbIntkeyLeaf*)pCur->pg.aOld)->aEntry[pCur->iEntry]; - }else{ - pIntkeyNode = &((HctDbIntkeyNode*)pCur->pg.aOld)->aEntry[pCur->iEntry]; - } - break; - - case HCT_PAGETYPE_INDEX: - if( nHeight==0 ){ - pIndex = &((HctDbIndexLeaf*)pCur->pg.aOld)->aEntry[pCur->iEntry]; - }else{ - pIndexNode = &((HctDbIndexNode*)pCur->pg.aOld)->aEntry[pCur->iEntry]; - } - break; - - case HCT_PAGETYPE_HISTORY: - pFan = (HctDbHistoryFan*)pCur->pg.aOld; - break; - - } - - switch( i ){ - case 0: /* pgno */ - sqlite3_result_int64(ctx, (i64)pCur->iPg); - break; - case 1: /* iEntry */ - sqlite3_result_int64(ctx, (i64)pCur->iEntry); - break; - case 2: /* ikey */ - if( pIntkey ) sqlite3_result_int64(ctx, pIntkey->iKey); - if( pIntkeyNode ) sqlite3_result_int64(ctx, pIntkeyNode->iKey); - break; - case 3: /* size */ - if( pIntkey ) sqlite3_result_int64(ctx, pIntkey->nSize); - if( pIndex ) sqlite3_result_int64(ctx, pIndex->nSize); - if( pIndexNode ) sqlite3_result_int64(ctx, pIndexNode->nSize); - break; - case 4: /* offset */ - if( pIntkey ) sqlite3_result_int64(ctx, pIntkey->iOff); - if( pIndex ) sqlite3_result_int64(ctx, pIndex->iOff); - if( pIndexNode ) sqlite3_result_int64(ctx, pIndexNode->iOff); - break; - case 5: /* child */ - if( pIndexNode ) sqlite3_result_int64(ctx, pIndexNode->iChildPg); - if( pIntkeyNode ) sqlite3_result_int64(ctx, pIntkeyNode->iChildPg); - break; - - case 6: /* tid */ - case 7: /* rangetid */ - case 8: /* rangeoldpg */ - case 9: /* ovfl */ - if( pIntkey || pIndex || pIndexNode ){ - u8 *aPg = pCur->pg.aOld; - HctDbCell cell; - HctDbIndexEntry *p = hctDbEntryEntry(aPg, pCur->iEntry); - hctDbCellGet(pCur->pDb, &aPg[p->iOff], p->flags, &cell); - - if( i==6 && cell.iTid ){ - i64 iVal = (cell.iTid & HCT_TID_MASK); - if( cell.iTid & HCT_TID_ROLLBACK_OVERRIDE ) iVal = iVal*-1; - sqlite3_result_int64(ctx, iVal); - } - if( i==7 && cell.iRangeTid ){ - i64 iVal = (cell.iRangeTid & HCT_TID_MASK); - if( cell.iRangeTid & HCT_TID_ROLLBACK_OVERRIDE ) iVal = iVal*-1; - sqlite3_result_int64(ctx, iVal); - } - if( i==8 && cell.iRangeOld ){ - sqlite3_result_int64(ctx, (i64)cell.iRangeOld); - } - if( i==9 && cell.iOvfl ){ - sqlite3_result_int64(ctx, (i64)cell.iOvfl); - } - }else if( pFan ){ - if( i==7 ){ /* rangetid */ - u64 iVal = ((pCur->iEntry==0) ? pFan->iRangeTid0 : pFan->iRangeTid1); - if( iVal & HCT_TID_ROLLBACK_OVERRIDE ){ - sqlite3_result_int64(ctx, ((i64)(iVal & HCT_TID_MASK)) * -1); - }else{ - sqlite3_result_int64(ctx, (i64)iVal); - } - }else if( i==8 ){ /* rangeoldpg */ - u32 iRangeOldPg = - ((pCur->iEntry==0) ? pFan->pgOld0 : pFan->aPgOld1[pCur->iEntry-1]); - sqlite3_result_int64(ctx, (i64)iRangeOldPg); - } - } - break; - case 10: /* record */ - if( pIntkey || pIndex || pIndexNode ){ - sqlite3 *db = sqlite3_context_db_handle(ctx); - u8 *aPg = pCur->pg.aOld; - char *zRec; - int sz; - const u8 *aRec = 0; - HctBuffer buf = {0,0,0}; - - hctDbLoadRecord(pCur->pDb, &buf, aPg, pCur->iEntry, &sz, &aRec); - - zRec = sqlite3HctDbRecordToText(db, aRec, sz); - if( zRec ){ - sqlite3_result_text(ctx, zRec, -1, SQLITE_TRANSIENT); - sqlite3_free(zRec); - } - sqlite3HctBufferFree(&buf); - }else if( pFan ){ - char *zRec = sqlite3_mprintf("iSplit0=%d", pFan->iSplit0); - if( zRec ){ - sqlite3_result_text(ctx, zRec, -1, SQLITE_TRANSIENT); - sqlite3_free(zRec); - } - } - break; - } - - return SQLITE_OK; -} - -/* -** Return the rowid for the current row. In this implementation, the -** rowid is the same as the output value. -*/ -static int hctentryRowid(sqlite3_vtab_cursor *cur, sqlite_int64 *pRowid){ - hctentry_cursor *pCur = (hctentry_cursor*)cur; - *pRowid = (((i64)pCur->iPg) << 32) + pCur->iEntry; - return SQLITE_OK; -} - -/* -** This method is called to "rewind" the hctentry_cursor object back -** to the first row of output. This method is always called at least -** once prior to any call to hctentryColumn() or hctentryRowid() or -** hctentryEof(). -*/ -static int hctentryFilter( - sqlite3_vtab_cursor *pVtabCursor, - int idxNum, const char *idxStr, - int argc, sqlite3_value **argv -){ - int rc; - hctentry_cursor *pCur = (hctentry_cursor*)pVtabCursor; - hctentry_vtab *pTab = (hctentry_vtab*)(pCur->base.pVtab); - u32 iLastPg; - - pCur->pDb = sqlite3HctDbFind(pTab->db, 0); - pCur->iEntry = -1; - iLastPg = sqlite3HctFileMaxpage(pCur->pDb->pFile); - - if( idxNum==1 ){ - u32 iPg = (u32)sqlite3_value_int64(argv[0]); - assert( argc==1 ); - if( iPg<1 || iPg>iLastPg ) return SQLITE_OK; - pCur->iPg = pCur->iLastPg = iPg; - }else{ - pCur->iPg = 1; - pCur->iLastPg = iLastPg; - } - - rc = sqlite3HctFilePageGetPhysical(pCur->pDb->pFile, pCur->iPg, &pCur->pg); - if( rc!=SQLITE_OK ){ - return rc; - } - return hctentryNext(pVtabCursor); -} - -/* -** SQLite will invoke this method one or more times while planning a query -** that uses the virtual table. This routine needs to create -** a query plan for each invocation and compute an estimated cost for that -** plan. -*/ -static int hctentryBestIndex( - sqlite3_vtab *tab, - sqlite3_index_info *pIdxInfo -){ - int i; - int iPgnoEq = -1; - - pIdxInfo->estimatedCost = (double)1000000; - pIdxInfo->estimatedRows = 1000000; - - /* Search for a pgno=? constraint */ - for(i=0; inConstraint; i++){ - struct sqlite3_index_constraint *p = &pIdxInfo->aConstraint[i]; - if( p->usable && p->iColumn==0 && p->op==SQLITE_INDEX_CONSTRAINT_EQ ){ - iPgnoEq = i; - } - } - - if( iPgnoEq>=0 ){ - pIdxInfo->aConstraintUsage[iPgnoEq].argvIndex = 1; - pIdxInfo->idxNum = 1; - pIdxInfo->estimatedCost = (double)1000; - pIdxInfo->estimatedRows = 1000; - } - - return SQLITE_OK; -} - -typedef struct hctvalid_vtab hctvalid_vtab; -typedef struct hctvalid_cursor hctvalid_cursor; -struct hctvalid_vtab { - sqlite3_vtab base; /* Base class - must be first */ - sqlite3 *db; -}; -struct hctvalid_cursor { - sqlite3_vtab_cursor base; /* Base class - must be first */ - HctDatabase *pDb; /* Database to report on */ - int iEntry; /* Current entry (i.e. rowid) */ - - u32 rootpgno; /* Value of rootpgno column */ - char *zFirst; - char *zLast; - char *zPglist; -}; -static int hctvalidConnect( - sqlite3 *db, - void *pAux, - int argc, const char *const*argv, - sqlite3_vtab **ppVtab, - char **pzErr -){ - hctvalid_vtab *pNew = 0; - int rc = SQLITE_OK; - - *ppVtab = 0; - rc = sqlite3_declare_vtab(db, - "CREATE TABLE x(rootpgno, first, last, pglist)" - ); - - if( rc==SQLITE_OK ){ - pNew = sqlite3MallocZero( sizeof(*pNew) ); - *ppVtab = (sqlite3_vtab*)pNew; - if( pNew==0 ) return SQLITE_NOMEM; - pNew->db = db; - } - return rc; -} -static int hctvalidBestIndex( - sqlite3_vtab *tab, - sqlite3_index_info *pIdxInfo -){ - pIdxInfo->estimatedCost = (double)10000; - pIdxInfo->estimatedRows = 10000; - return SQLITE_OK; -} -static int hctvalidDisconnect(sqlite3_vtab *pVtab){ - hctvalid_vtab *p = (hctvalid_vtab*)pVtab; - sqlite3_free(p); - return SQLITE_OK; -} -static int hctvalidOpen(sqlite3_vtab *p, sqlite3_vtab_cursor **ppCursor){ - hctvalid_cursor *pCur; - pCur = sqlite3MallocZero(sizeof(*pCur)); - if( pCur==0 ) return SQLITE_NOMEM; - *ppCursor = &pCur->base; - return SQLITE_OK; -} -static int hctvalidClose(sqlite3_vtab_cursor *cur){ - hctvalid_cursor *pCur = (hctvalid_cursor*)cur; - sqlite3_free(pCur); - return SQLITE_OK; -} -static int hctvalidNext(sqlite3_vtab_cursor *cur){ - hctvalid_cursor *pCsr = (hctvalid_cursor*)cur; - hctvalid_vtab *pTab = (hctvalid_vtab*)(pCsr->base.pVtab); - int ii; - HctDbCsr *pDbCsr = 0; - HctCsrIntkeyOp *pIntkeyOp = 0; - HctCsrIndexOp *pIndexOp = 0; - - sqlite3_free(pCsr->zFirst); - sqlite3_free(pCsr->zLast); - sqlite3_free(pCsr->zPglist); - pCsr->zFirst = 0; - pCsr->zLast = 0; - pCsr->zPglist = 0; - pCsr->rootpgno = 0; - pCsr->iEntry++; - pDbCsr = pCsr->pDb->pScannerList; - pIntkeyOp = pDbCsr->intkey.pOpList; - pIndexOp = pDbCsr->index.pOpList; - ii = 0; - if( pIntkeyOp==0 && pIndexOp==0 ) ii--; - for(/*noop*/; pDbCsr && iiiEntry; ii++){ - if( pIntkeyOp ) pIntkeyOp = pIntkeyOp->pNextOp; - if( pIndexOp ) pIndexOp = pIndexOp->pNextOp; - if( pIntkeyOp==0 && pIndexOp==0 ){ - pDbCsr = pDbCsr->pNextScanner; - if( pDbCsr ){ - pIntkeyOp = pDbCsr->intkey.pOpList; - pIndexOp = pDbCsr->index.pOpList; - if( pIntkeyOp==0 && pIndexOp==0 ) ii--; - } - } - } - - if( pDbCsr ){ - pCsr->rootpgno = pDbCsr->iRoot; - if( pIntkeyOp ){ - if( pIntkeyOp->iFirst!=SMALLEST_INT64 ){ - pCsr->zFirst = sqlite3_mprintf("%lld", pIntkeyOp->iFirst); - } - if( pIntkeyOp->iFirst!=LARGEST_INT64 ){ - pCsr->zLast = sqlite3_mprintf("%lld", pIntkeyOp->iLast); - } - if( pIntkeyOp->iLogical ){ - pCsr->zPglist = sqlite3_mprintf( - "%lld/%lld", pIntkeyOp->iLogical, pIntkeyOp->iPhysical - ); - } - }else{ - if( pIndexOp->pFirst ){ - pCsr->zFirst = sqlite3HctDbRecordToText( - pTab->db, pIndexOp->pFirst, pIndexOp->nFirst - ); - } - if( pIndexOp->pLast ){ - pCsr->zLast = sqlite3HctDbRecordToText( - pTab->db, pIndexOp->pLast, pIndexOp->nLast - ); - } - if( pIndexOp->iLogical ){ - pCsr->zPglist = sqlite3_mprintf( - "%lld/%lld", pIndexOp->iLogical, pIndexOp->iPhysical - ); - } - } - } - - return SQLITE_OK; -} -static int hctvalidFilter( - sqlite3_vtab_cursor *cur, - int idxNum, const char *idxStr, - int argc, sqlite3_value **argv -){ - hctvalid_cursor *pCsr = (hctvalid_cursor*)cur; - hctvalid_vtab *pTab = (hctvalid_vtab*)(pCsr->base.pVtab); - - pCsr->pDb = sqlite3HctDbFind(pTab->db, 0); - pCsr->iEntry = -1; - return hctvalidNext(cur); -} -static int hctvalidEof(sqlite3_vtab_cursor *cur){ - hctvalid_cursor *pCsr = (hctvalid_cursor*)cur; - return (pCsr->rootpgno==0); -} -static int hctvalidColumn( - sqlite3_vtab_cursor *cur, /* The cursor */ - sqlite3_context *ctx, /* First argument to sqlite3_result_...() */ - int i /* Which column to return */ -){ - hctvalid_cursor *pCsr = (hctvalid_cursor*)cur; - switch( i ){ - case 0: - sqlite3_result_int64(ctx, (i64)pCsr->rootpgno); - break; - case 1: - sqlite3_result_text(ctx, pCsr->zFirst, -1, SQLITE_TRANSIENT); - break; - case 2: - sqlite3_result_text(ctx, pCsr->zLast, -1, SQLITE_TRANSIENT); - break; - case 3: - sqlite3_result_text(ctx, pCsr->zPglist, -1, SQLITE_TRANSIENT); - break; - } - return SQLITE_OK; -} -static int hctvalidRowid(sqlite3_vtab_cursor *cur, sqlite_int64 *pRowid){ - hctvalid_cursor *pCsr = (hctvalid_cursor*)cur; - *pRowid = pCsr->iEntry; - return SQLITE_OK; -} - - - -SQLITE_PRIVATE int sqlite3HctVtabInit(sqlite3 *db){ - static sqlite3_module hctdbModule = { - /* iVersion */ 0, - /* xCreate */ 0, - /* xConnect */ hctdbConnect, - /* xBestIndex */ hctdbBestIndex, - /* xDisconnect */ hctdbDisconnect, - /* xDestroy */ 0, - /* xOpen */ hctdbOpen, - /* xClose */ hctdbClose, - /* xFilter */ hctdbFilter, - /* xNext */ hctdbNext, - /* xEof */ hctdbEof, - /* xColumn */ hctdbColumn, - /* xRowid */ hctdbRowid, - /* xUpdate */ 0, - /* xBegin */ 0, - /* xSync */ 0, - /* xCommit */ 0, - /* xRollback */ 0, - /* xFindMethod */ 0, - /* xRename */ 0, - /* xSavepoint */ 0, - /* xRelease */ 0, - /* xRollbackTo */ 0, - /* xShadowName */ 0 - }; - - static sqlite3_module hctentryModule = { - /* iVersion */ 0, - /* xCreate */ 0, - /* xConnect */ hctentryConnect, - /* xBestIndex */ hctentryBestIndex, - /* xDisconnect */ hctentryDisconnect, - /* xDestroy */ 0, - /* xOpen */ hctentryOpen, - /* xClose */ hctentryClose, - /* xFilter */ hctentryFilter, - /* xNext */ hctentryNext, - /* xEof */ hctentryEof, - /* xColumn */ hctentryColumn, - /* xRowid */ hctentryRowid, - /* xUpdate */ 0, - /* xBegin */ 0, - /* xSync */ 0, - /* xCommit */ 0, - /* xRollback */ 0, - /* xFindMethod */ 0, - /* xRename */ 0, - /* xSavepoint */ 0, - /* xRelease */ 0, - /* xRollbackTo */ 0, - /* xShadowName */ 0 - }; - - static sqlite3_module hctvalidModule = { - /* iVersion */ 0, - /* xCreate */ 0, - /* xConnect */ hctvalidConnect, - /* xBestIndex */ hctvalidBestIndex, - /* xDisconnect */ hctvalidDisconnect, - /* xDestroy */ 0, - /* xOpen */ hctvalidOpen, - /* xClose */ hctvalidClose, - /* xFilter */ hctvalidFilter, - /* xNext */ hctvalidNext, - /* xEof */ hctvalidEof, - /* xColumn */ hctvalidColumn, - /* xRowid */ hctvalidRowid, - /* xUpdate */ 0, - /* xBegin */ 0, - /* xSync */ 0, - /* xCommit */ 0, - /* xRollback */ 0, - /* xFindMethod */ 0, - /* xRename */ 0, - /* xSavepoint */ 0, - /* xRelease */ 0, - /* xRollbackTo */ 0, - /* xShadowName */ 0 - }; - - int rc; - - rc = sqlite3_create_module(db, "hctdb", &hctdbModule, 0); - if( rc==SQLITE_OK ){ - rc = sqlite3_create_module(db, "hctentry", &hctentryModule, 0); - } - if( rc==SQLITE_OK ){ - rc = sqlite3_create_module(db, "hctvalid", &hctvalidModule, 0); - } - if( rc==SQLITE_OK ){ - rc = sqlite3HctFileVtabInit(db); - } - if( rc==SQLITE_OK ){ - rc = sqlite3HctPManVtabInit(db); - } - if( rc==SQLITE_OK ){ - rc = sqlite3HctStatsInit(db); - } - if( rc==SQLITE_OK ){ - rc = sqlite3HctJrnlInit(db); - } - return rc; -} - -/************** End of hct_database.c ****************************************/ -/************** Begin file hct_tmap.c ****************************************/ -/* -** 2021 February 28 -** -** The author disclaims copyright to this source code. In place of -** a legal notice, here is a blessing: -** -** May you do good and not evil. -** May you find forgiveness for yourself and forgive others. -** May you share freely, never taking more than you give. -** -************************************************************************* -*/ - -/* -** NOTES ON LOCKING -** -** Each time a new HctTMap object is allocated, the locking related -** variables are set: -** -** HctTMap.iMinTid -** HctTMap.iMinCid -** -** New HctTMap objects are always allocated by writers during the -** WRITING phase of a transaction. The iMinCid variable is set to -** the CID value associated with the snapshot on which the writer -** based its transaction. The iMinTid value is set to the largest -** TID value for which it and all smaller TID values map to fully -** committed transactions with CID values smaller than or equal -** to iMinCid. This means that: -** -** * The new object may be used by any client accessing a snapshot -** with a snapshot-id >= iMinCid. -** -** * So long as this object exists, it is not safe to reuse any -** page ids (logical or physical) freed by transactions with -** TID values > iMinTid. -** -** The HctTMap object may then be used to access any snapshot with -** a CID value greater than or equal to iMinCid. While the HctTMap -** is still in use, it is not safe to reuse any logical or physical -** page id freed by a transaction with a TID value greater than -** iMinTid. -** -** A new HctTMap object is created by a writer after it is allocated -** its TID iff: -** -** TODO: This all needs updating!!! -** -** * The expression (iNewTid % HctTMapServer.nTidStep)==0 is true, or -** * The existing transaction map is too small to contain an entry -** for iNewTid. -** -** The first time a client obtains a new HctTMap object, it remembers -** the CID of the first snapshot it accesses using it. The HctTMap -** is released at the end of the first transaction for which the CID is -** greater than or equal to (iFirstCid + HctTMapServer.nTidStep). This -** happens even if a new HctTMap has been obtained since then. TODO: There -** is probably a role for some randomness here. -** -** The above creates a problem - a single dormant connection can prevent -** all reuse of freed logical and physical pages. This is addressed by -** using smart reference objects of type HctTMapRef that support the -** reference being revoked by the server at any time. See comments above -** struct HctTMapRef for details. -*/ - -/* #include "hctInt.h" */ - -typedef struct HctTMapFull HctTMapFull; -typedef struct HctTMapRef HctTMapRef; - -/* -** The following object type represents a reference to an HctTMapFull -** object. The reference is taken and released under the cover of the -** associated HctTMapServer.mutex mutex. -** -** pRefNext/pRefPrev: -** These are used to link this object into the linked list at -** HctTMapFull.pRefList. They may only be accessed under the cover -** of the associated HctTMapServer.mutex mutex. -** -** pMap: -** Pointer to the HctTMapFull object, if any, that this reference -** currently points to. -** -** refMask: -** This may be set to one of four values. It is always modified using -** CAS instructions. -** -** Zero: -** HctTMapRef.pMap is not valid (always NULL). -** -** HCT_TMAPREF_SERVER: -** When the reference is first taken, under cover of the server mutex, -** refMask is set to this value. -** -** HCT_TMAPREF_SERVER|HCT_TMAPREF_CLIENT: -** When a client actually wishes to use the tmap indicated by this -** reference, it uses a CAS instruction to set refMask to this value. -** It may then use the tmap object. This does not require the mutex. -** -** If the client finds that refMask is not HCT_TMAPREF_SERVER, but -** has been set to 0, then the reference has been revoked. In this -** case it is not safe for the client to touch pMap. It must -** reinitialize the HctTmapRef object (under cover of the server -** mutex). -** -** When the read transaction is over, and the client does not need -** need the tmap object, it uses a CAS instruction to set refMask -** back to HCT_TMAPREF_SERVER. If, when doing so, it finds that the -** HCT_TMAPREF_SERVER bit has already been cleared, then it must -** release the reference immediately (under cover of the server -** mutex). -** -** HCT_TMAPREF_CLIENT: -*/ -struct HctTMapRef { - u32 refMask; - HctTMapFull *pMap; - HctTMapRef *pRefNext; - HctTMapRef *pRefPrev; -}; - -/* -** Bits from HctTMapRef.refMask. -*/ -#define HCT_TMAPREF_CLIENT 0x01 -#define HCT_TMAPREF_SERVER 0x02 -#define HCT_TMAPREF_BOTH 0x03 - -/* -** Event counters used by the hctstats virtual table. -*/ -typedef struct HctTMapStats HctTMapStats; -struct HctTMapStats { - i64 nMutex; - i64 nMutexBlock; -}; - - -/* -** iLockValue: -** This field contains two things - a flag and a safe-tid value. The flag -** is set whenever a read transaction is active, and clear otherwise. -** The safe-tid value is set to a TID value for which itself an all smaller -** TID values are included in the connection's transactions - current and -** future. -** -** Pages freed by the transaction with the safe-tid value may be reused -** without disturbing this client. -** -** pNextClient: -** Linked list of all clients associated with pServer. -** -** pBuild: -** This is used by the sqlite3HctTMapRecoveryXXX() API when constructing -** a new tmap object as part of sqlite_hct_journal recovery. -*/ -struct HctTMapClient { - HctTMapServer *pServer; - HctConfig *pConfig; - u64 iLockValue; - HctTMapClient *pNextClient; - HctTMapFull *pMap; - HctTMapStats stats; - - HctTMapFull *pBuild; - u64 iBuildMin; /* Min TID value explicitly set in pBuild */ -}; - -#define HCT_LOCKVALUE_ACTIVE (((u64)0x01) << 56) - -/* -** Values for HctTMapClient.eState -*/ -#define HCT_CLIENT_NONE 0 -#define HCT_CLIENT_OPEN 1 -#define HCT_CLIENT_UP 2 - -/* -** iMinMinTid: -** This value is set only when the mutex is held, using HctAtomicStore(). -** It may be read, using HctAtomicLoad(), at any time. -*/ -struct HctTMapServer { - sqlite3_mutex *pMutex; /* Mutex to protect this object */ - int nClient; /* Number of connected clients */ - u64 iMinMinTid; /* Smallest iMinTid value in pList */ - HctTMapFull *pList; /* List of tmaps. Newest first */ - HctTMapClient *pClientList; /* List of clients */ -}; - -/* -** nRef: -** Number of clients that hold a pointer to this object. -*/ -struct HctTMapFull { - HctTMap m; - int nRef; /* Number of pointers to this object */ - HctTMapFull *pNext; /* Next entry in HctTMapServer.pList */ -}; - -/* -** ENTER_TMAP_MUTEX(pClient) implementation. -** -** Grab the server mutex. And update client-stats as required at the same -** time. -*/ -static void hctTMapMutexEnter(HctTMapClient *pClient){ - sqlite3_mutex *pMutex = pClient->pServer->pMutex; - pClient->stats.nMutex++; - if( sqlite3_mutex_try(pMutex)!=SQLITE_OK ){ - pClient->stats.nMutexBlock++; - sqlite3_mutex_enter(pMutex); - } -} - -#if 0 -#define ENTER_TMAP_MUTEX(pClient) sqlite3_mutex_enter(pClient->pServer->pMutex) -#endif -#define ENTER_TMAP_MUTEX(pClient) hctTMapMutexEnter(pClient) -#define LEAVE_TMAP_MUTEX(pClient) sqlite3_mutex_leave(pClient->pServer->pMutex) - -/* -** Atomic version of: -** -** if( *pPtr!=iOld ){ -** return 0; -** } -** *pPtr = iNew; -** return 1; -*/ -#if 0 -static int hctTMapBoolCAS32(u32 *pPtr, u32 iOld, u32 iNew){ - return HctCASBool(pPtr, iOld, iNew); -} -#endif -static int hctTMapBoolCAS64(u64 *pPtr, u64 iOld, u64 iNew){ - return HctCASBool(pPtr, iOld, iNew); -} - -/* -** Return a pointer to the slot in pMap associated with TID iTid. -*/ -static u64 *hctTMapFind(HctTMapFull *pMap, u64 iTid){ - int iOff = iTid - pMap->m.iFirstTid; - int iMap = iOff / HCT_TMAP_PAGESIZE; - iOff = HCT_TMAP_ENTRYSLOT( (iOff % HCT_TMAP_PAGESIZE) ); - return &pMap->m.aaMap[iMap][iOff % HCT_TMAP_PAGESIZE]; -} - -/* -** Allocate the initial HctTMapFull object for the server passed as the -** only argument. This is called as part of sqlite3HctTMapServerNew(). -*/ -static int hctTMapInit(HctTMapServer *p, u64 iFirstTid, u64 iLastTid){ - int rc = SQLITE_OK; - int nMap = 0; - int nByte = 0; - u64 iFirst = (iFirstTid / HCT_TMAP_PAGESIZE) * HCT_TMAP_PAGESIZE; - HctTMapFull *pNew = 0; - - assert( p->pList==0 ); - assert( (iFirstTid & HCT_TMAP_CID_MASK)==iFirstTid ); - - nMap = (iLastTid / HCT_TMAP_PAGESIZE) - (iFirst / HCT_TMAP_PAGESIZE) + 3; - nByte = sizeof(HctTMapFull) + sizeof(u64*)*nMap; - pNew = (HctTMapFull*)sqlite3HctMalloc(&rc, nByte); - if( pNew ){ - int i; - pNew->m.iFirstTid = iFirst; - pNew->m.nMap = nMap; - pNew->m.aaMap = (u64**)&pNew[1]; - for(i=0; im.nMap; i++){ - u64 *a = (u64*)sqlite3HctMalloc(&rc, sizeof(u64)*HCT_TMAP_PAGESIZE); - pNew->m.aaMap[i] = a; - } - - if( rc!=SQLITE_OK ){ - assert( 0 ); /* OOM case */ - for(i=0; im.nMap; i++){ - sqlite3_free(pNew->m.aaMap[i]); - } - sqlite3_free(pNew); - }else{ - u64 t; - for(t=iFirst; tpList = pNew; - pNew->nRef = 1; /* Server reference */ - } - } - - return rc; -} - -SQLITE_PRIVATE int sqlite3HctTMapServerNew(u64 iFirstTid, u64 iLastTid, HctTMapServer **pp){ - int rc = SQLITE_OK; - HctTMapServer *pNew; - - pNew = sqlite3MallocZero(sizeof(HctTMapServer)); - if( pNew==0 ){ - rc = SQLITE_NOMEM_BKPT; - }else{ - pNew->pMutex = sqlite3_mutex_alloc(SQLITE_MUTEX_FAST); - if( pNew->pMutex==0 ){ - rc = SQLITE_NOMEM_BKPT; - }else{ - pNew->iMinMinTid = iFirstTid-1; - rc = hctTMapInit(pNew, iFirstTid, iLastTid); - } - } - - if( rc!=SQLITE_OK ){ - sqlite3HctTMapServerFree(pNew); - pNew = 0; - } - - *pp = pNew; - return rc; -} - -SQLITE_PRIVATE int sqlite3HctTMapServerSet(HctTMapServer *pServer, u64 iTid, u64 iCid){ - u64 *pEntry = hctTMapFind(pServer->pList, iTid); - *pEntry = iCid; - return SQLITE_OK; -} - -/* -** Argument pMap is an HctTMapFull object that is currently linked -** into the list at HctTMapServer.pList. This function removes pMap -** from that list and frees all associated allocations. -*/ -static void hctTMapFreeMap(HctTMapServer *p, HctTMapFull *pMap){ - int iFirst = 0; /* First in pMap->m.aaMap[] to free */ - int iSave = 0; /* First in pMap->m.aaMap[] to preserve */ - int ii; - - assert( pMap && pMap->nRef==0 ); - if( pMap==p->pList ){ - if( pMap->pNext==0 ) iSave = pMap->m.nMap; - p->pList = pMap->pNext; - }else{ - HctTMapFull *pPrev; - HctTMapFull *pNext = pMap->pNext; - - for(pPrev=p->pList; pPrev->pNext!=pMap; pPrev=pPrev->pNext); - for(iSave=0; iSavem.nMap; iSave++){ - if( pMap->m.aaMap[iSave]==pPrev->m.aaMap[0] ) break; - } - - if( pNext ){ - u64 *aDoNotDel = pNext->m.aaMap[pNext->m.nMap-1]; - for(iFirst=pMap->m.nMap; iFirst>0; iFirst--){ - if( pMap->m.aaMap[iFirst-1]==aDoNotDel ) break; - } - } - - pPrev->pNext = pMap->pNext; - } - - for(ii=iFirst; iim.aaMap[ii]); - } - sqlite3_free(pMap); - -} - -/* -** Free a tmap-server object. -*/ -SQLITE_PRIVATE void sqlite3HctTMapServerFree(HctTMapServer *p){ - if( p ){ - assert( p->pClientList==0 ); - sqlite3_mutex_free(p->pMutex); - - assert( p->pList==0 || p->pList->nRef==1 ); - if( p->pList ) p->pList->nRef--; - while( p->pList ){ - HctTMapFull *pMap = p->pList; - while( pMap->pNext ) pMap = pMap->pNext; - hctTMapFreeMap(p, pMap); - } - - sqlite3_free(p); - } -} - -SQLITE_PRIVATE int sqlite3HctTMapClientNew( - HctTMapServer *p, - HctConfig *pConfig, - HctTMapClient **ppClient -){ - int rc = SQLITE_OK; - HctTMapClient *pNew; - - pNew = (HctTMapClient*)sqlite3HctMalloc(&rc, sizeof(HctTMapClient)); - if( pNew ){ - pNew->pServer = p; - pNew->pConfig = pConfig; - ENTER_TMAP_MUTEX(pNew); - /* Under cover of the server mutex, link this new client into the - ** list of clients associated with the server. The minimum TID value - ** for the client is set to the current global minimum. */ - pNew->iLockValue = p->iMinMinTid; - pNew->pNextClient = p->pClientList; - pNew->pMap = p->pList; - pNew->pMap->nRef++; - p->pClientList = pNew; - LEAVE_TMAP_MUTEX(pNew); - } - *ppClient = pNew; - return rc; -} - -SQLITE_PRIVATE void sqlite3HctTMapClientFree(HctTMapClient *pClient){ - if( pClient ){ - HctTMapClient **pp; - ENTER_TMAP_MUTEX(pClient); - - pClient->pMap->nRef--; - if( pClient->pMap->nRef==0 ){ - hctTMapFreeMap(pClient->pServer, pClient->pMap); - } - - /* Remove this client from the HctTMapServer.pClientList list */ - for(pp=&pClient->pServer->pClientList;*pp!=pClient;pp=&(*pp)->pNextClient); - *pp = pClient->pNextClient; - - LEAVE_TMAP_MUTEX(pClient); - sqlite3_free(pClient); - } -} - - -SQLITE_PRIVATE int sqlite3HctTMapBegin(HctTMapClient *pClient, u64 iSnapshot, HctTMap **ppMap){ - HctTMapFull *pMap = pClient->pMap; - u64 iEof = pMap->m.iFirstTid + pMap->m.nMap*HCT_TMAP_PAGESIZE; - - while( 1 ){ - u64 iOrigLockValue = HctAtomicLoad(&pClient->iLockValue); - u64 iLockValue; - - /* Find the new "safe-tid" value */ - u64 iSafe = (iOrigLockValue & HCT_TMAP_CID_MASK); - u64 iMinMinTid = HctAtomicLoad(&pClient->pServer->iMinMinTid); - if( iSafeiSnapshot ) break; - iSafe++; - } - - /* Set the lock-value. If this fails, it means some writer process - ** has increased the safe-tid value for us. */ - assert( (iOrigLockValue & HCT_LOCKVALUE_ACTIVE)==0 ); - iLockValue = iSafe | HCT_LOCKVALUE_ACTIVE; - if( hctTMapBoolCAS64(&pClient->iLockValue, iOrigLockValue, iLockValue) ){ - break; - } - } - - *ppMap = (HctTMap*)pMap; - return SQLITE_OK; -} - -SQLITE_PRIVATE u64 sqlite3HctTMapCommitedTID(HctTMapClient *pClient){ - return (pClient->iLockValue & HCT_TMAP_CID_MASK); -} - -static void hctTMapUpdateSafe(HctTMapClient *pClient){ - assert( sqlite3_mutex_held(pClient->pServer->pMutex) ); - if( pClient->pMap!=pClient->pServer->pList ){ - pClient->pMap->nRef--; - if( pClient->pMap->nRef==0 ){ - hctTMapFreeMap(pClient->pServer, pClient->pMap); - } - pClient->pMap = pClient->pServer->pList; - pClient->pMap->nRef++; - } -} - -/* -** This is called by a reader if it needs to look-up a TID for which its -** current HctTMap object is not large enough. This function sets output -** parameter (*ppMap) to point to the latest HctTMap object, which, -** unless the db is corrupt, is guaranteed to be large enough. -** -** SQLITE_OK is returned if successful. -*/ -SQLITE_PRIVATE int sqlite3HctTMapUpdate(HctTMapClient *pClient, HctTMap **ppMap){ - ENTER_TMAP_MUTEX(pClient); - hctTMapUpdateSafe(pClient); - LEAVE_TMAP_MUTEX(pClient); - *ppMap = (HctTMap*)pClient->pMap; - return SQLITE_OK; -} - -/* -** Called to signal the end of a read or write a transaction. Parameter -** iCID is passed the CID of the snapshot on which the transaction was -** based. -*/ -SQLITE_PRIVATE int sqlite3HctTMapEnd(HctTMapClient *pClient, u64 iCID){ - while( 1 ){ - u64 iOrigLockValue = pClient->iLockValue; - u64 iLockValue; - - assert( (iOrigLockValue & HCT_LOCKVALUE_ACTIVE)!=0 ); - iLockValue = (iOrigLockValue & ~HCT_LOCKVALUE_ACTIVE); - if( hctTMapBoolCAS64(&pClient->iLockValue, iOrigLockValue, iLockValue) ){ - break; - } - } - return SQLITE_OK; -} - -/* -** Allocate a new HctTMapFull object and link it into the list -** belonging to server pServer. The new map object is based on -** the server's current newest - pServer->pList. Relative to this -** object, the new map: -** -** * appends one mapping page to the end of the map, and -** -** * may remove one or more mapping pages from the start of the -** map, based on the current value of HctTMapServer.iMinMinTid. -** -** The server mutex must be held to call this function. -*/ -static int hctTMapNewObject(HctTMapServer *pServer){ - u64 iFirst = (pServer->iMinMinTid / HCT_TMAP_PAGESIZE) * HCT_TMAP_PAGESIZE; - HctTMapFull *pOld = pServer->pList; - HctTMapFull *pNew = 0; - int nMap = 0; - int nDiscard = 0; - int nByte = 0; - int rc = SQLITE_OK; - - assert( sqlite3_mutex_held(pServer->pMutex) ); - assert( (iFirst % HCT_TMAP_PAGESIZE)==0 ); - assert( (pOld->m.iFirstTid % HCT_TMAP_PAGESIZE)==0 ); - assert( (pServer->iMinMinTid & HCT_TMAP_CID_MASK)==pServer->iMinMinTid ); - assert( (iFirst & HCT_TMAP_CID_MASK)==iFirst ); - - nDiscard = (iFirst - pOld->m.iFirstTid) / HCT_TMAP_PAGESIZE; - nMap = pOld->m.nMap + 1 - nDiscard; - nByte = sizeof(HctTMapFull) + nMap*sizeof(u64*); - pNew = (HctTMapFull*)sqlite3HctMalloc(&rc, nByte); - - if( pNew ){ - int ii; - pNew->m.iFirstTid = iFirst; - pNew->m.nMap = nMap; - pNew->m.aaMap = (u64**)&pNew[1]; - pNew->nRef = 1; - for(ii=0; ii<(nMap-1); ii++){ - pNew->m.aaMap[ii] = pOld->m.aaMap[ii+nDiscard]; - } - pNew->m.aaMap[ii] = (u64*)sqlite3HctMalloc( - &rc, sizeof(u64)*HCT_TMAP_PAGESIZE - ); - - pServer->pList->nRef--; - if( pServer->pList->nRef==0 ){ - hctTMapFreeMap(pServer, pServer->pList); - } - pNew->pNext = pServer->pList; - pServer->pList = pNew; - } - - return rc; -} - -/* -** Return the largest TID for which it is safe to reuse freed pages. -*/ -SQLITE_PRIVATE u64 sqlite3HctTMapSafeTID(HctTMapClient *p){ - /* TODO: -1? */ - return HctAtomicLoad(&p->pServer->iMinMinTid); -} - -/* -** This is called by write transactions immediately after obtaining -** the transaction's TID value (at the start of the commit process). -*/ -SQLITE_PRIVATE int sqlite3HctTMapNewTID( - HctTMapClient *p, /* Transaction map client */ - u64 iTid, /* TID for write transaction */ - HctTMap **ppMap /* OUT: (possibly) new transaction map */ -){ - int rc = SQLITE_OK; - HctTMapFull *pMap = p->pMap; - u64 iEof = pMap->m.iFirstTid + ((u64)pMap->m.nMap*HCT_TMAP_PAGESIZE); - - /* If it is time to do so, allocate a new transaction-map */ - if( iTid>=iEof || iTid==(iEof - HCT_TMAP_PAGESIZE/2) ){ - ENTER_TMAP_MUTEX(p); - hctTMapUpdateSafe(p); - pMap = p->pMap; - iEof = pMap->m.iFirstTid + ((u64)pMap->m.nMap*HCT_TMAP_PAGESIZE); - if( iTid>=iEof || iTid==(iEof - HCT_TMAP_PAGESIZE/2) ){ - hctTMapNewObject(p->pServer); - hctTMapUpdateSafe(p); - } - LEAVE_TMAP_MUTEX(p); - } - - *ppMap = (HctTMap*)p->pMap; - return rc; -} - -SQLITE_PRIVATE void sqlite3HctTMapScan(HctTMapClient *p){ - HctTMapClient *pClient = 0; - u64 iSafe = p->iLockValue & HCT_TMAP_CID_MASK; - - ENTER_TMAP_MUTEX(p); - for(pClient=p->pServer->pClientList; pClient; pClient=pClient->pNextClient){ - u64 iVal = HctAtomicLoad(&pClient->iLockValue); - u64 iTid = (iVal & HCT_TMAP_CID_MASK); - - if( (iVal & HCT_LOCKVALUE_ACTIVE)==0 && iTidiLockValue, iVal, iSafe); - iVal = HctAtomicLoad(&pClient->iLockValue); - iTid = (iVal & HCT_TMAP_CID_MASK); - } - - iSafe = MIN(iSafe, iTid); - } - HctAtomicStore(&p->pServer->iMinMinTid, iSafe); - LEAVE_TMAP_MUTEX(p); -} - -SQLITE_PRIVATE i64 sqlite3HctTMapStats(sqlite3 *db, int iStat, const char **pzStat){ - HctTMapClient *pClient = 0; - i64 iVal = -1; - - pClient = sqlite3HctFileTMapClient(sqlite3HctDbFile(sqlite3HctDbFind(db, 0))); - switch( iStat ){ - case 0: - *pzStat = "mutex_attempt"; - iVal = pClient->stats.nMutex; - break; - case 1: - *pzStat = "mutex_block"; - iVal = pClient->stats.nMutexBlock; - break; - default: - break; - } - - return iVal; -} - -SQLITE_PRIVATE int sqlite3HctTMapRecoverySet(HctTMapClient *p, u64 iTid, u64 iCid){ - int rc = SQLITE_OK; - HctTMapFull *pNew = p->pBuild; - if( pNew==0 ){ - u64 iFirst = 1; - u64 iEof = p->pServer->pList->m.iFirstTid; - u64 iLast = iEof + (HCT_TMAP_PAGESIZE*2); - int nMap = 0; - if( iTid>=HCT_TMAP_PAGESIZE ){ - iFirst = 1 + ((iTid / HCT_TMAP_PAGESIZE) - 1) * HCT_TMAP_PAGESIZE; - } - nMap = ((iLast - iFirst) + HCT_TMAP_PAGESIZE-1) / HCT_TMAP_PAGESIZE; - assert( nMap>0 ); - - p->pBuild = pNew = (HctTMapFull*)sqlite3HctMalloc(&rc, - sizeof(HctTMapFull) + nMap*sizeof(u64*) - ); - p->iBuildMin = iTid; - if( pNew ){ - int ii; - pNew->m.iFirstTid = iFirst; - pNew->m.nMap = nMap; - pNew->m.aaMap = (u64**)&pNew[1]; - pNew->nRef = 1; - for(ii=0; iim.aaMap[ii] = aMap; - } - if( rc==SQLITE_OK ){ - u64 ee; - for(ee=iFirst; eem.aaMap[iMap][iOff] = ((u64)1 | HCT_TMAP_COMMITTED); - } - } - } - } - p->iBuildMin = MIN(p->iBuildMin, iTid); - - while( rc==SQLITE_OK && pNew->m.iFirstTid>iTid ){ - int ii; - HctTMapFull *pAlloc = 0; - int nMap = pNew->m.nMap + 1; - - pAlloc = (HctTMapFull*)sqlite3HctMalloc(&rc, - sizeof(HctTMapFull) + nMap*sizeof(u64*) - ); - pAlloc->nRef = 1; - pAlloc->m.nMap = nMap; - pAlloc->m.aaMap = (u64**)&pAlloc[1]; - pAlloc->m.iFirstTid = pNew->m.iFirstTid - HCT_TMAP_PAGESIZE; - memcpy(&pAlloc->m.aaMap[1], pNew->m.aaMap, pNew->m.nMap*sizeof(u64*)); - pAlloc->m.aaMap[0] = (u64*)sqlite3HctMalloc(&rc, - sizeof(u64) * HCT_TMAP_PAGESIZE - ); - for(ii=0; iim.aaMap[0][ii] = ((u64)1 | HCT_TMAP_COMMITTED); - } - - assert( pNew->nRef==1 ); - sqlite3_free(pNew); - p->pBuild = pNew = pAlloc; - } - - if( rc==SQLITE_OK ){ - int iMap = (iTid - pNew->m.iFirstTid) / HCT_TMAP_PAGESIZE; - int iOff = (iTid - pNew->m.iFirstTid) % HCT_TMAP_PAGESIZE; - pNew->m.aaMap[iMap][iOff] = (iCid | HCT_TMAP_COMMITTED); - } - - return rc; -} - -SQLITE_PRIVATE void sqlite3HctTMapRecoveryFinish(HctTMapClient *p, int rc){ - HctTMapFull *pNew = p->pBuild; - if( pNew ){ - p->pBuild = 0; - if( rc==SQLITE_OK ){ - pNew->pNext = p->pServer->pList; - p->pServer->pList = pNew; - p->pServer->iMinMinTid = p->iBuildMin; - if( pNew->pNext ){ - pNew->pNext->nRef--; - if( pNew->pNext->nRef==0 ){ - hctTMapFreeMap(p->pServer, pNew->pNext); - } - } - }else{ - int ii; - for(ii=0; iim.nMap; ii++){ - sqlite3_free(pNew->m.aaMap[ii]); - } - sqlite3_free(pNew); - } - p->iBuildMin = 0; - } -} - - -/************** End of hct_tmap.c ********************************************/ -/************** Begin file hct_record.c **************************************/ -/* -** 2022 May 19 -** -** The author disclaims copyright to this source code. In place of -** a legal notice, here is a blessing: -** -** May you do good and not evil. -** May you find forgiveness for yourself and forgive others. -** May you share freely, never taking more than you give. -** -************************************************************************* -*/ - -/* #include "hctInt.h" */ -/* #include "sqliteInt.h" */ -/* #include "vdbeInt.h" */ - -/* #include */ -/* #include */ - -/* -** Write the serialized data blob for the value stored in pMem into -** buf. It is assumed that the caller has allocated sufficient space. -** Return the number of bytes written. -** -** nBuf is the amount of space left in buf[]. The caller is responsible -** for allocating enough space to buf[] to hold the entire field, exclusive -** of the pMem->u.nZero bytes for a MEM_Zero value. -** -** Return the number of bytes actually written into buf[]. The number -** of bytes in the zero-filled tail is included in the return value only -** if those bytes were zeroed in buf[]. -*/ -static u32 hctRecordSerialPut(u8 *buf, Mem *pMem, u32 serial_type){ - u32 len; - - /* Integer and Real */ - if( serial_type<=7 && serial_type>0 ){ - u64 v; - u32 i; - if( serial_type==7 ){ - assert( sizeof(v)==sizeof(pMem->u.r) ); - memcpy(&v, &pMem->u.r, sizeof(v)); - swapMixedEndianFloat(v); - }else{ - v = pMem->u.i; - } - len = i = sqlite3SmallTypeSizes[serial_type]; - assert( i>0 ); - do{ - buf[--i] = (u8)(v&0xFF); - v >>= 8; - }while( i ); - return len; - } - - /* String or blob */ - if( serial_type>=12 ){ - assert( pMem->n + ((pMem->flags & MEM_Zero)?pMem->u.nZero:0) - == (int)sqlite3VdbeSerialTypeLen(serial_type) ); - len = pMem->n; - if( len>0 ) memcpy(buf, pMem->z, len); - return len; - } - - /* NULL or constants 0 or 1 */ - return 0; -} - -/* -** Return the serial-type for the value stored in pMem. -** -** This routine might convert a large MEM_IntReal value into MEM_Real. -*/ -static u32 hctRecordSerialType(Mem *pMem, u32 *pLen){ - int flags = pMem->flags; - u32 n; - - assert( pLen!=0 ); - if( flags&MEM_Null ){ - *pLen = 0; - return 0; - } - if( flags&(MEM_Int|MEM_IntReal) ){ - /* Figure out whether to use 1, 2, 4, 6 or 8 bytes. */ -# define MAX_6BYTE ((((i64)0x00008000)<<32)-1) - i64 i = pMem->u.i; - u64 u; - testcase( flags & MEM_Int ); - testcase( flags & MEM_IntReal ); - if( i<0 ){ - u = ~i; - }else{ - u = i; - } - if( u<=127 ){ - if( (i&1)==i ){ - *pLen = 0; - return 8+(u32)u; - }else{ - *pLen = 1; - return 1; - } - } - if( u<=32767 ){ *pLen = 2; return 2; } - if( u<=8388607 ){ *pLen = 3; return 3; } - if( u<=2147483647 ){ *pLen = 4; return 4; } - if( u<=MAX_6BYTE ){ *pLen = 6; return 5; } - *pLen = 8; - if( flags&MEM_IntReal ){ - /* If the value is IntReal and is going to take up 8 bytes to store - ** as an integer, then we might as well make it an 8-byte floating - ** point value */ - pMem->u.r = (double)pMem->u.i; - pMem->flags &= ~MEM_IntReal; - pMem->flags |= MEM_Real; - return 7; - } - return 6; - } - if( flags&MEM_Real ){ - *pLen = 8; - return 7; - } - assert( pMem->db->mallocFailed || flags&(MEM_Str|MEM_Blob) ); - assert( pMem->n>=0 ); - n = (u32)pMem->n; - if( flags & MEM_Zero ){ - n += pMem->u.nZero; - } - *pLen = n; - return ((n*2) + 12 + ((flags&MEM_Str)!=0)); -} - - -/* -** -*/ -SQLITE_PRIVATE int sqlite3HctSerializeRecord( - UnpackedRecord *pRec, /* Record to serialize */ - u8 **ppRec, /* OUT: buffer containing serialization */ - int *pnRec /* OUT: size of (*ppRec) in bytes */ -){ - int ii; - int nData = 0; - int nHdr = 0; - u8 *pOut = 0; - int iOffHdr = 0; - int iOffData = 0; - - for(ii=0; iinField; ii++){ - u32 n; - u32 stype = hctRecordSerialType(&pRec->aMem[ii], &n); - nData += n; - nHdr += sqlite3VarintLen(stype); - pRec->aMem[ii].uTemp = stype; - } - - if( nHdr<=126 ){ - /* The common case */ - nHdr += 1; - }else{ - /* Rare case of a really large header */ - int nVarint = sqlite3VarintLen(nHdr); - nHdr += nVarint; - if( nVarintnField; ii++){ - u32 stype = pRec->aMem[ii].uTemp; - iOffHdr += putVarint32(&pOut[iOffHdr], stype); - iOffData += hctRecordSerialPut(&pOut[iOffData], &pRec->aMem[ii], stype); - } - assert( iOffData==(nHdr+nData) ); - - *ppRec = pOut; - *pnRec = iOffData; - - return SQLITE_OK; -} - - -/************** End of hct_record.c ******************************************/ -/************** Begin file hct_stats.c ***************************************/ -/* -** 2022 September 28 -** -** The author disclaims copyright to this source code. In place of -** a legal notice, here is a blessing: -** -** May you do good and not evil. -** May you find forgiveness for yourself and forgive others. -** May you share freely, never taking more than you give. -** -************************************************************************* -*/ - - -/* #include "hctInt.h" */ - -typedef struct hctstats_vtab hctstats_vtab; -typedef struct hctstats_cursor hctstats_cursor; -struct hctstats_vtab { - sqlite3_vtab base; /* Base class - must be first */ - sqlite3 *db; -}; -struct hctstats_cursor { - sqlite3_vtab_cursor base; /* Base class - must be first */ - int iSubsys; - int iStat; - - i64 iRowid; - const char *zStat; /* Value for "stat" column. NULL for EOF. */ - i64 iVal; /* Value for "val" column. */ -}; - -typedef struct HctStatsSubsys HctStatsSubsys; -struct HctStatsSubsys { - const char *zSubsys; - i64 (*xStat)(sqlite3*, int iStat, const char **pzStat); -}; - -static HctStatsSubsys aHctStatGlobal[] = { - { "file", sqlite3HctFileStats }, - { "db", sqlite3HctDbStats }, - { "tmap", sqlite3HctTMapStats }, - { "pman", sqlite3HctPManStats }, - { "hct", sqlite3HctMainStats } -}; - -#define HCTSTATS_SCHEMA "CREATE TABLE x(subsys, stat, val)" - -/* -** xConnect() callback for hctstats table. -*/ -static int hctstatsConnect( - sqlite3 *db, - void *pAux, - int argc, const char *const*argv, - sqlite3_vtab **ppVtab, - char **pzErr -){ - hctstats_vtab *pNew = 0; - int rc = SQLITE_OK; - - *ppVtab = 0; - rc = sqlite3_declare_vtab(db, HCTSTATS_SCHEMA); - - if( rc==SQLITE_OK ){ - pNew = sqlite3MallocZero( sizeof(*pNew) ); - *ppVtab = (sqlite3_vtab*)pNew; - if( pNew==0 ) return SQLITE_NOMEM; - pNew->db = db; - } - return rc; -} - -/* -** xBestIndex() callback for hctstats table. -*/ -static int hctstatsBestIndex( - sqlite3_vtab *tab, - sqlite3_index_info *pIdxInfo -){ - pIdxInfo->estimatedCost = (double)10000; - pIdxInfo->estimatedRows = 10000; - return SQLITE_OK; -} - -/* -** xDisconnect() callback for hctstats table. Free the vtab handle. -*/ -static int hctstatsDisconnect(sqlite3_vtab *pVtab){ - hctstats_vtab *p = (hctstats_vtab*)pVtab; - sqlite3_free(p); - return SQLITE_OK; -} - -/* -** xOpen() callback for hctstats table. Free the vtab handle. -*/ -static int hctstatsOpen(sqlite3_vtab *p, sqlite3_vtab_cursor **ppCursor){ - hctstats_cursor *pCur; - pCur = sqlite3MallocZero(sizeof(*pCur)); - if( pCur==0 ) return SQLITE_NOMEM; - *ppCursor = &pCur->base; - return SQLITE_OK; -} - -/* -** xClose() callback for hctstats table. Free the vtab handle. -*/ -static int hctstatsClose(sqlite3_vtab_cursor *cur){ - hctstats_cursor *pCur = (hctstats_cursor*)cur; - sqlite3_free(pCur); - return SQLITE_OK; -} - -static int hctstatsNext(sqlite3_vtab_cursor *cur){ - hctstats_cursor *pCsr = (hctstats_cursor*)cur; - hctstats_vtab *pTab = (hctstats_vtab*)(pCsr->base.pVtab); - - pCsr->zStat = 0; - pCsr->iStat++; - - while( pCsr->zStat==0 && pCsr->iSubsysiSubsys]; - pCsr->iVal = p->xStat(pTab->db, pCsr->iStat, &pCsr->zStat); - if( pCsr->zStat==0 ){ - pCsr->iStat = 0; - pCsr->iSubsys++; - } - } - - return SQLITE_OK; -} - -static int hctstatsFilter( - sqlite3_vtab_cursor *cur, - int idxNum, const char *idxStr, - int argc, sqlite3_value **argv -){ - hctstats_cursor *pCsr = (hctstats_cursor*)cur; - - if( sqlite3HctDbFind(((hctstats_vtab*)cur->pVtab)->db, 0)==0 ){ - /* Main database is not an hctree db */ - return SQLITE_OK; - } - - pCsr->iStat = -1; - pCsr->iSubsys = 0; - pCsr->iRowid = 0; - return hctstatsNext(cur); -} - -static int hctstatsEof(sqlite3_vtab_cursor *cur){ - hctstats_cursor *pCsr = (hctstats_cursor*)cur; - return (pCsr->zStat==0); -} - -static int hctstatsColumn( - sqlite3_vtab_cursor *cur, /* The cursor */ - sqlite3_context *ctx, /* First argument to sqlite3_result_...() */ - int i /* Which column to return */ -){ - hctstats_cursor *pCsr = (hctstats_cursor*)cur; - - assert( i==0 || i==1 || i==2 ); - switch( i ){ - case 0: { - HctStatsSubsys *p = &aHctStatGlobal[pCsr->iSubsys]; - sqlite3_result_text(ctx, p->zSubsys, -1, SQLITE_STATIC); - break; - } - - case 1: - sqlite3_result_text(ctx, pCsr->zStat, -1, SQLITE_STATIC); - break; - - default: - assert( i==2 ); - sqlite3_result_int64(ctx, pCsr->iVal); - break; - } - return SQLITE_OK; -} - -static int hctstatsRowid(sqlite3_vtab_cursor *cur, sqlite_int64 *pRowid){ - hctstats_cursor *pCsr = (hctstats_cursor*)cur; - *pRowid = pCsr->iRowid; - return SQLITE_OK; -} - - -/* -** Register the hct_stats virtual table module with the supplied -** SQLite database handle. -*/ -SQLITE_PRIVATE int sqlite3HctStatsInit(sqlite3 *db){ - static sqlite3_module hctstatsModule = { - /* iVersion */ 0, - /* xCreate */ 0, - /* xConnect */ hctstatsConnect, - /* xBestIndex */ hctstatsBestIndex, - /* xDisconnect */ hctstatsDisconnect, - /* xDestroy */ 0, - /* xOpen */ hctstatsOpen, - /* xClose */ hctstatsClose, - /* xFilter */ hctstatsFilter, - /* xNext */ hctstatsNext, - /* xEof */ hctstatsEof, - /* xColumn */ hctstatsColumn, - /* xRowid */ hctstatsRowid, - /* xUpdate */ 0, - /* xBegin */ 0, - /* xSync */ 0, - /* xCommit */ 0, - /* xRollback */ 0, - /* xFindMethod */ 0, - /* xRename */ 0, - /* xSavepoint */ 0, - /* xRelease */ 0, - /* xRollbackTo */ 0, - /* xShadowName */ 0 - }; - - return sqlite3_create_module(db, "hctstats", &hctstatsModule, 0); -} - - - -/************** End of hct_stats.c *******************************************/ -/************** Begin file hct_journal.c *************************************/ -/* -** 2020 October 13 -** -** The author disclaims copyright to this source code. In place of -** a legal notice, here is a blessing: -** -** May you do good and not evil. -** May you find forgiveness for yourself and forgive others. -** May you share freely, never taking more than you give. -** -************************************************************************* -*/ - -/* #include "hctInt.h" */ -/* #include "vdbeInt.h" */ - -#define HCT_JOURNAL_SCHEMA \ -"CREATE TABLE sqlite_hct_journal(" \ - "cid INTEGER PRIMARY KEY," \ - "schema TEXT," \ - "data BLOB," \ - "schemacid INTEGER," \ - "hash BLOB," \ - "tid INTEGER," \ - "validcid INTEGER" \ -");" - - -#define HCT_BASELINE_SCHEMA \ -"CREATE TABLE sqlite_hct_baseline(" \ - "cid INTEGER," \ - "schemacid INTEGER," \ - "hash BLOB" \ -");" - -/* -** In follower mode, it is not possible to call sqlite3_hct_journal_write() -** for the transaction with CID (N + HCT_MAX_LEADING_WRITE) until all -** transactions with CID values of N or less have been committed. -*/ -#define HCT_MAX_LEADING_WRITE (8*1024) - -typedef struct HctJrnlServer HctJrnlServer; -typedef struct HctJrnlPendingHook HctJrnlPendingHook; - -/* -** One object of this type is shared by all connections to the same -** database. Managed by the HctFileServer object (see functions -** sqlite3HctFileGetJrnlPtr() and SetJrnlPtr()). -** -** iSchemaCid: -** This contains the current schema version of the database. Even though -** this value may be concurrently accessed, there is no need for an -** advanced or versioned data structure. Because: -** -** 1) In LEADER mode, this value is only accessed when writing an entry -** to the journal table, from within sqlite3HctJrnlLog(). It is only -** written to if the transaction has modified the database schema. -** -** The call to sqlite3HctJrnlLog() comes after the transaction has been -** successfully validated. And a transaction that modifies the schema -** only passes validation if there have been no writes at all to the -** the database since its snapshot was opened - i.e. if the CID for the -** transaction is one greater than the CID of its snapshot. This -** guarantees that there are no transactions with CID values less than -** that of the schema transaction concurrently accessing iSchemaCid. -** -** Also, since schema transactions modify the schema cookie, and all other -** transactions check the schema cookie during validation, it is -** guaranteed that no transaction started before the schema transaction -** is committed may successfully validate with a CID value greater than -** that of the schema transaction. -** -** Therefore, if a schema transaction has passed validation, it is -** guaranteed exclusive access to the iSchemaCid variable. -** -** 2) In FOLLOWER mode, the value is: -** -** * read from within sqlite3_hct_journal_write(), just after opening -** a snapshot, and -** -** * written from within the same call, following successful validation -** of a schema transaction. -** -** A schema transaction is only started once all transactions with CID -** values less than that of the schema transaction have finished -** committing. This alone ensures that there is at most a single -** writer to the iSchemaCid variable at any one time. -** -** eMode: -** The current database mode - either SQLITE_HCT_JOURNAL_MODE_FOLLOWER or -** SQLITE_HCT_JOURNAL_MODE_LEADER. -** -** iSnapshot: -** This is meaningful in FOLLOWER mode only. -** -** This is set to a CID value for which it and all prior transactions are -** committed. It may be written by any client using an atomic CAS operation, -** but may only be increased, never decreased. No transaction with a CID -** greater than (iSnapshot + HCT_MAX_LEADING_WRITE) may be started - -** iSnapshot must be increased first. -** -** nCommit: -** Size of aCommit[] array. -** -** aCommit: -** This array is only populated if the object is in FOLLOWER mode. -** -** Say the size of the array is N (actually HctJrnlServer.nCommit). Then, -** when transaction X is committed, slot aCommit[X % N] is set to X. Or, -** if transaction X is committed but no snapshot is valid until Y (for Y>X), -** then instead slot aCommit[X % N] is set to Y. -*/ -struct HctJrnlServer { - u64 iSchemaCid; - int eMode; - u64 iSnapshot; - int nSchemaVersionIncr; - int nCommit; - u64 *aCommit; /* Array of size nCommit */ -}; - -struct HctJrnlPendingHook { - u64 iCid; - u64 iSCid; - HctBuffer data; - HctBuffer schema; -}; - -/* -** There is one instance of this structure for each database handle (HBtree*) -** open on a replication-enabled hctree database. -** -** eInWrite: -** Set to true while the database connection is in a call to -** sqlite3_hct_journal_write(). -*/ -struct HctJournal { - u64 iJrnlRoot; /* Root page of journal table */ - u64 iBaseRoot; /* Root page of base table */ - int eInWrite; - u64 iWriteTid; - u64 iWriteCid; - u64 iRollbackSnapshot; - HctDatabase *pDb; - HctTree *pTree; - HctJrnlServer *pServer; - HctJrnlPendingHook pending; -}; - -#define HCT_JOURNAL_NONE 0 -#define HCT_JOURNAL_INWRITE 1 -#define HCT_JOURNAL_INROLLBACK 2 - -static void hctJournalSetDbError( - sqlite3 *db, /* Database on which to set error */ - int rc, /* Error code */ - const char *zFormat, ... /* Printf() error string and arguments */ -){ - char *zErr = 0; - sqlite3_mutex_enter( sqlite3_db_mutex(db) ); - if( zFormat ){ - va_list ap; - va_start(ap, zFormat); - zErr = sqlite3_vmprintf(zFormat, ap); - va_end(ap); - } - if( zErr ){ - sqlite3ErrorWithMsg(db, rc, "%s", zErr); - sqlite3_free(zErr); - }else{ - sqlite3ErrorWithMsg(db, rc, 0, 0); - } - sqlite3_mutex_leave( sqlite3_db_mutex(db) ); -} - -/* -** Initialize the main database for replication. -*/ -SQLITE_API int sqlite3_hct_journal_init(sqlite3 *db){ - const char *zTest1 = "PRAGMA hct_ndbfile"; - const char *zTest2 = "SELECT 1 WHERE (SELECT count(*) FROM sqlite_schema)=0"; - sqlite3_stmt *pTest = 0; - int rc = SQLITE_OK; - - /* Test that there is not already an open transaction on this database. */ - if( sqlite3_get_autocommit(db)==0 ){ - hctJournalSetDbError(db, SQLITE_ERROR, "open transaction on database"); - return SQLITE_ERROR; - } - - /* Test that the main db really is an hct database. Leave rc set to - ** something other than SQLITE_OK and an error message in the database - ** handle if it is not. */ - if( rc==SQLITE_OK ){ - rc = sqlite3_prepare_v2(db, zTest1, -1, &pTest, 0); - } - if( rc==SQLITE_OK ){ - rc = sqlite3_step(pTest); - sqlite3_finalize(pTest); - if( rc==SQLITE_DONE ){ - hctJournalSetDbError(db, SQLITE_ERROR, "not an hct database"); - }else if( rc==SQLITE_ROW ){ - rc = SQLITE_OK; - } - } - - /* Open a transaction on the db */ - if( rc==SQLITE_OK ){ - rc = sqlite3_exec(db, "BEGIN", 0, 0, 0); - } - - /* Test that the main db really is empty */ - if( rc==SQLITE_OK ){ - rc = sqlite3_prepare_v2(db, zTest2, -1, &pTest, 0); - } - if( rc==SQLITE_OK ){ - rc = sqlite3_step(pTest); - sqlite3_finalize(pTest); - if( rc==SQLITE_DONE ){ - hctJournalSetDbError(db, SQLITE_ERROR, "not an empty database"); - rc = SQLITE_ERROR; - }else if( rc==SQLITE_ROW ){ - rc = SQLITE_OK; - } - } - - if( rc==SQLITE_OK ){ - rc = sqlite3_exec(db, - "PRAGMA writable_schema = 1;" - HCT_JOURNAL_SCHEMA ";" - HCT_BASELINE_SCHEMA ";" - "INSERT INTO sqlite_hct_baseline VALUES(6, 0, zeroblob(16));" - "PRAGMA writable_schema = 0;" - ,0 ,0 ,0 - ); - } - - if( rc==SQLITE_OK ){ - rc = sqlite3_exec(db, "COMMIT", 0, 0, 0); - } - if( rc!=SQLITE_OK ){ - char *zErr = sqlite3_mprintf("%s", sqlite3_errmsg(db)); - sqlite3_exec(db, "ROLLBACK", 0, 0, 0); - hctJournalSetDbError(db, rc, "%s", zErr); - sqlite3_free(zErr); - }else{ - rc = sqlite3HctDetectJournals(db); - } - - return rc; -} - -/* -** Register a custom validation callback with the database handle. -*/ -SQLITE_API int sqlite3_hct_journal_hook( - sqlite3 *db, - void *pArg, - int(*xValidate)( - void *pCopyOfArg, - sqlite3_int64 iCid, - const char *zSchema, - const void *pData, int nData, - sqlite3_int64 iSchemaCid - ) -){ - db->xValidate = xValidate; - db->pValidateArg = pArg; - return SQLITE_OK; -} - -SQLITE_API void sqlite3_hct_migrate_mode(sqlite3 *db, int bActivate){ - db->bHctMigrate = bActivate; -} - -/* -** Value iVal is to be stored as an integer in an SQLite record. This -** function returns the number of bytes that it will use for storage. -*/ -static int hctJrnlIntSize(u64 iVal){ -#define MAX_6BYTE ((((i64)0x00008000)<<32)-1) - if( iVal<=127 ) return 1; - if( iVal<=32767 ) return 2; - if( iVal<=8388607 ) return 3; - if( iVal<=2147483647 ) return 4; - if( iVal<=MAX_6BYTE ) return 6; - return 8; -} - -/* -** Store an (nByte*8) bit big-endian integer, value iVal, in buffer a[]. -*/ -static void hctJrnlIntPut(u8 *a, u64 iVal, int nByte){ - int i; - for(i=1; i<=nByte; i++){ - a[nByte-i] = (iVal & 0xFF); - iVal = (iVal >> 8); - } -} - -/* -** Return the byte value that should be stored in the SQLite record -** header for an nSize byte integer field. -*/ -static u8 hctJrnlIntHdr(int nSize){ - if( nSize==8 ) return 6; - if( nSize==6 ) return 5; - return nSize; -} - -/* -** Compose an SQLite record suitable for the sqlite_hct_journal table. -*/ -static u8 *hctJrnlComposeRecord( - u64 iCid, - const char *zSchema, - const u8 *pData, int nData, - u64 iSchemaCid, - u64 iTid, - u64 iValidCid, - int *pnRec -){ - u8 *pRec = 0; - int nRec = 0; - int nHdr = 0; - int nBody = 0; - int nSchema = 0; /* Length of zSchema, in bytes */ - int nTidByte = 0; - int nSchemaCidByte = 0; - int nValidCidByte = 0; - u8 aHash[SQLITE_HCT_JOURNAL_HASHSIZE]; - - nSchema = sqlite3Strlen30(zSchema); - nTidByte = hctJrnlIntSize(iTid); - nSchemaCidByte = hctJrnlIntSize(iSchemaCid); - nValidCidByte = hctJrnlIntSize(iValidCid); - - sqlite3_hct_journal_hashentry( - aHash, iCid, zSchema, pData, nData, iSchemaCid - ); - - /* First figure out how large the eventual record will be */ - nHdr = 1 /* size of header varint */ - + 1 /* "cid" - always NULL */ - + sqlite3VarintLen((nSchema * 2) + 13) /* "schema" - TEXT */ - + sqlite3VarintLen((nData * 2) + 12) /* "data" - BLOB */ - + 1 /* "schemacid" - INTEGER */ - + 1 /* "hash" - BLOB */ - + 1 /* "tid" - INTEGER */ - + 1; /* "validcid" - INTEGER */ - - nBody = 0 /* "cid" - always NULL */ - + nSchema /* "schema" - TEXT */ - + nData /* "data" - BLOB */ - + nSchemaCidByte /* "schemacid" - INTEGER */ - + SQLITE_HCT_JOURNAL_HASHSIZE /* "hash" - BLOB */ - + nTidByte /* "tid" - INTEGER */ - + nValidCidByte; /* "validcid" - INTEGER */ - - nRec = nBody+nHdr; - pRec = (u8*)sqlite3_malloc(nRec); - if( pRec ){ - u8 *pHdr = pRec; - u8 *pBody = &pRec[nHdr]; - - *pHdr++ = (u8)nHdr; /* size-of-header varint */ - *pHdr++ = 0x00; /* "cid" - NULL */ - - /* "schema" field - TEXT */ - pHdr += sqlite3PutVarint(pHdr, (nSchema*2) + 13); - if( nSchema>0 ){ - memcpy(pBody, zSchema, nSchema); - pBody += nSchema; - } - - /* "data" field - BLOB */ - pHdr += sqlite3PutVarint(pHdr, (nData*2) + 12); - if( nData>0 ){ - memcpy(pBody, pData, nData); - pBody += nData; - } - - /* "schemacid" field - INTEGER */ - *pHdr++ = hctJrnlIntHdr(nSchemaCidByte); - hctJrnlIntPut(pBody, iSchemaCid, nSchemaCidByte); - pBody += nSchemaCidByte; - - /* "hash" field - SQLITE_HCT_JOURNAL_HASHSIZE byte BLOB */ - *pHdr++ = (u8)((SQLITE_HCT_JOURNAL_HASHSIZE * 2) + 12); - memcpy(pBody, aHash, SQLITE_HCT_JOURNAL_HASHSIZE); - pBody += SQLITE_HCT_JOURNAL_HASHSIZE; - - /* "tid" field - INTEGER */ - *pHdr++ = hctJrnlIntHdr(nTidByte); - hctJrnlIntPut(pBody, iTid, nTidByte); - pBody += nTidByte; - - /* "validcid" field - INTEGER */ - *pHdr++ = hctJrnlIntHdr(nValidCidByte); - hctJrnlIntPut(pBody, iValidCid, nValidCidByte); - pBody += nValidCidByte; - - assert( pHdr==&pRec[nHdr] ); - assert( pBody==&pRec[nRec] ); - }else{ - nRec = 0; - } - - *pnRec = nRec; - return pRec; -} - -typedef struct JrnlCtx JrnlCtx; -struct JrnlCtx { - Schema *pSchema; - HctTree *pTree; - HctBuffer *pBuf; - HctBuffer *pSchemaSql; -}; - -typedef struct JrnlTree JrnlTree; -struct JrnlTree { - const char *zName; -}; - -static int hctJrnlFindTree(Schema *pSchema, u32 iRoot, JrnlTree *pJTree){ - HashElem *k; - if( iRoot==1 ) return 0; - for(k=sqliteHashFirst(&pSchema->tblHash); k; k=sqliteHashNext(k)){ - Table *pTab = (Table*)sqliteHashData(k); - if( pTab->tnum==iRoot ){ - pJTree->zName = pTab->zName; - return 1; - } - } - return 0; -} - -static void hctJrnlRecordPrefix( - HctBuffer *pBuf, - int nData, /* Size of buffer aData[] in bytes */ - const u8 *aData, /* Buffer containing SQLite record */ - int nField /* Number of prefix fields requested */ -){ - int iHdr = 0; - int iBody = 0; - int ii = 0; - int szHdr = 0; /* Size of output header */ - int szBody = 0; /* Size of output record body */ - u8 *aHdrOut = 0; - u8 *aBodyOut = 0; - - iHdr = getVarint32(aData, iBody); - - /* Figure out the aggregate sizes of the header and body fields for the - ** required number of prefix fields. */ - for(ii=0; ii126 ){ - int nVarint = sqlite3VarintLen(szHdr); - szHdr += nVarint; - if( sqlite3VarintLen(szHdr)!=nVarint ) szHdr++; - } - - /* Size of record field */ - pBuf->nBuf += sqlite3PutVarint(&pBuf->aBuf[pBuf->nBuf], szHdr+szBody); - - aHdrOut = &pBuf->aBuf[pBuf->nBuf]; - aBodyOut = &aHdrOut[szHdr]; - - /* Write the size-of-header field for the output record */ - aHdrOut += sqlite3PutVarint(aHdrOut, szHdr); - - /* Write the other fields to both the header and body of the output record */ - for(ii=0; ii0 ){ - memcpy(aBodyOut, &aData[iBody], nBody); - iBody += nBody; - aBodyOut += nBody; - } - } - - pBuf->nBuf = (aBodyOut - pBuf->aBuf); -} - -static int hctBufferExtend(HctBuffer *pBuf, int nExtend){ - i64 nDesire = pBuf->nBuf + nExtend; - if( pBuf->nAllocaBuf[pBuf->nBuf], zApp, nApp+1); - pBuf->nBuf += nApp; - sqlite3_free(zApp); - return SQLITE_OK; -} - - -static int hctJrnlLogTree(void *pCtx, u32 iRoot, KeyInfo *pKeyInfo){ - int rc = SQLITE_OK; - JrnlCtx *pJrnl = (JrnlCtx*)pCtx; - HctBuffer *pBuf = pJrnl->pBuf; - - if( iRoot==HCT_TREE_SCHEMAOP_ROOT ){ - HctTreeCsr *pCsr = 0; - rc = sqlite3HctTreeCsrOpen(pJrnl->pTree, iRoot, &pCsr); - if( rc==SQLITE_OK ){ - for(rc=sqlite3HctTreeCsrFirst(pCsr); - rc==SQLITE_OK && sqlite3HctTreeCsrEof(pCsr)==0; - rc=sqlite3HctTreeCsrNext(pCsr) - ){ - int nData = 0; - const u8 *aData = 0; - sqlite3HctTreeCsrData(pCsr, &nData, &aData); - rc = hctBufferAppend(pJrnl->pSchemaSql, "%s%.*s", - (pJrnl->pSchemaSql->nBuf>0 ? ";" : ""), nData, (const char*)aData - ); - } - sqlite3HctTreeCsrClose(pCsr); - } - }else{ - JrnlTree jrnltree; - memset(&jrnltree, 0, sizeof(jrnltree)); - if( hctJrnlFindTree(pJrnl->pSchema, iRoot, &jrnltree) ){ - int nName = sqlite3Strlen30(jrnltree.zName); - - rc = hctBufferExtend(pBuf, 1+nName+1); - if( rc==SQLITE_OK ){ - HctTreeCsr *pCsr = 0; - - pBuf->aBuf[pBuf->nBuf++] = 'T'; - memcpy(&pBuf->aBuf[pBuf->nBuf], jrnltree.zName, nName+1); - pBuf->nBuf += nName+1; - rc = sqlite3HctTreeCsrOpen(pJrnl->pTree, iRoot, &pCsr); - - if( rc==SQLITE_OK ){ - for(rc=sqlite3HctTreeCsrFirst(pCsr); - rc==SQLITE_OK && sqlite3HctTreeCsrEof(pCsr)==0; - rc=sqlite3HctTreeCsrNext(pCsr) - ){ - i64 iKey = 0; - int nData = 0; - const u8 *aData = 0; - int bDel = 0; - - sqlite3HctTreeCsrKey(pCsr, &iKey); - sqlite3HctTreeCsrData(pCsr, &nData, &aData); - bDel = sqlite3HctTreeCsrIsDelete(pCsr); - - rc = hctBufferExtend(pBuf, 1+9+9+nData); - if( rc!=SQLITE_OK ) break; - - if( pKeyInfo==0 ){ - pBuf->aBuf[pBuf->nBuf++] = bDel ? 'd' : 'i'; - pBuf->nBuf += sqlite3PutVarint(&pBuf->aBuf[pBuf->nBuf], iKey); - }else{ - pBuf->aBuf[pBuf->nBuf++] = bDel ? 'D' : 'I'; - if( bDel ){ - hctJrnlRecordPrefix(pBuf, nData, aData, pKeyInfo->nUniqField); - } - } - if( bDel==0 ){ - pBuf->nBuf += sqlite3PutVarint(&pBuf->aBuf[pBuf->nBuf], nData); - memcpy(&pBuf->aBuf[pBuf->nBuf], aData, nData); - pBuf->nBuf += nData; - } - } - } - - sqlite3HctTreeCsrClose(pCsr); - } - } - } - - return rc; -} - -static int hctJrnlWriteRecord( - HctJournal *pJrnl, - u64 iCid, - const char *zSchema, - const void *pData, int nData, - u64 iSchemaCid, - u64 iTid -){ - int rc = SQLITE_OK; - u8 *pRec = 0; - int nRec = 0; - - pRec = hctJrnlComposeRecord( - iCid, zSchema, pData, nData, iSchemaCid, iTid, 0, &nRec - ); - if( pRec==0 ){ - rc = SQLITE_NOMEM_BKPT; - }else{ - int nRetry = 0; - do { - nRetry = 0; - rc = sqlite3HctDbInsert( - pJrnl->pDb, (u32)pJrnl->iJrnlRoot, 0, iCid, 0, nRec, pRec, &nRetry - ); - if( rc!=SQLITE_OK ) break; - assert( nRetry==0 || nRetry==1 ); - if( nRetry==0 ){ - rc = sqlite3HctDbInsertFlush(pJrnl->pDb, &nRetry); - if( rc!=SQLITE_OK ) break; - } - }while( nRetry ); - } - sqlite3_free(pRec); - - return rc; -} - -SQLITE_PRIVATE int sqlite3HctJrnlWriteEmpty( - HctJournal *pJrnl, - u64 iCid, - u64 iTid, - sqlite3 *db /* If non-NULL, invoke custom validation */ -){ - int rc = SQLITE_OK; - if( pJrnl->eInWrite==HCT_JOURNAL_NONE ){ - rc = hctJrnlWriteRecord(pJrnl, iCid, "", 0, 0, 0, iTid); - - /* If argument db is not NULL and there is a custom validation hook - ** configured, invoke it now. This is just to propagate the empty - ** transaction to any follower databases, not to actually validate - ** an empty transaction - the return code is ignored. */ - if( rc==SQLITE_OK && db && db->xValidate ){ - // (void)db->xValidate(db->pValidateArg, iCid, "", 0, 0, 0); - pJrnl->pending.iCid = iCid; - pJrnl->pending.iSCid = 0; - pJrnl->pending.data.nBuf = 0; - pJrnl->pending.schema.nBuf = 0; - } - } - return rc; -} - -SQLITE_PRIVATE void sqlite3HctJrnlInvokeHook(HctJournal *pJrnl, sqlite3 *db){ - if( pJrnl ){ - HctJrnlPendingHook *pPending = &pJrnl->pending; - if( pPending->iCid>0 ){ - if( db->xValidate ){ - const char *zSchema = ""; - if( pJrnl->pending.schema.nBuf>0 ){ - zSchema = (const char*)pJrnl->pending.schema.aBuf; - } - (void)db->xValidate( - db->pValidateArg, pPending->iCid, - zSchema, pPending->data.aBuf, pPending->data.nBuf, - pPending->iSCid - ); - } - - pPending->iCid = 0; - } - } -} - -SQLITE_PRIVATE int sqlite3HctJrnlLog( - HctJournal *pJrnl, - sqlite3 *db, - Schema *pSchema, - u64 iCid, - u64 iTid, - int *pbValidateCalled -){ - int rc = SQLITE_OK; - JrnlCtx jrnlctx; - const char *zSchema = ""; - u64 iSchemaCid = HctAtomicLoad(&pJrnl->pServer->iSchemaCid); - - assert( *pbValidateCalled==0 ); - if( pJrnl->eInWrite!=HCT_JOURNAL_NONE ) return SQLITE_OK; - - memset(&jrnlctx, 0, sizeof(jrnlctx)); - jrnlctx.pSchema = pSchema; - jrnlctx.pTree = pJrnl->pTree; - jrnlctx.pBuf = &pJrnl->pending.data; - jrnlctx.pSchemaSql = &pJrnl->pending.schema; - - jrnlctx.pBuf->nBuf = 0; - jrnlctx.pSchemaSql->nBuf = 0; - - rc = sqlite3HctTreeForeach(pJrnl->pTree, 1, (void*)&jrnlctx, hctJrnlLogTree); - if( jrnlctx.pSchemaSql->nBuf ){ - zSchema =(const char*)jrnlctx.pSchemaSql->aBuf; - } - - if( rc==SQLITE_OK ){ - rc = hctJrnlWriteRecord(pJrnl, iCid, zSchema, - jrnlctx.pBuf->aBuf, jrnlctx.pBuf->nBuf, iSchemaCid, iTid - ); - } - - /* If one is registered, invoke the validation hook */ - if( rc==SQLITE_OK && db->xValidate ){ -#if 0 - int res = db->xValidate(db->pValidateArg, iCid, zSchema, - jrnlctx.buf.aBuf, jrnlctx.buf.nBuf, iSchemaCid - ); - if( res!=0 ){ - rc = SQLITE_BUSY_SNAPSHOT; - } - *pbValidateCalled = 1; -#endif - pJrnl->pending.iCid = iCid; - pJrnl->pending.iSCid = iSchemaCid; - } - - if( zSchema[0] && rc==SQLITE_OK ){ - HctAtomicStore(&pJrnl->pServer->iSchemaCid, iCid); - } - - return rc; -} - -static void hctJrnlDelServer(void *p){ - if( p ){ - HctJrnlServer *pServer = (HctJrnlServer*)p; - sqlite3_free(pServer->aCommit); - sqlite3_free(pServer); - } -} - -typedef struct HctJournalRecord HctJournalRecord; -struct HctJournalRecord { - i64 iCid; - const char *zSchema; int nSchema; - const void *pData; int nData; - i64 iSchemaCid; - const void *pHash; - i64 iTid; - i64 iValidCid; -}; - -/* -** Structure containing values read from the sqlite_hct_baseline table. -*/ -typedef struct HctBaselineRecord HctBaselineRecord; -struct HctBaselineRecord { - i64 iCid; - u8 aHash[SQLITE_HCT_JOURNAL_HASHSIZE]; - i64 iSchemaCid; -}; - - -typedef struct HctRecordReader HctRecordReader; -struct HctRecordReader { - const u8 *aRec; - int nRec; - int nHdr; - const u8 *pHdr; - const u8 *pBody; -}; - -static void hctJrnlReadInit( - HctRecordReader *p, - int nRec, - const u8 *aRec -){ - memset(p, 0, sizeof(*p)); - p->aRec = aRec; - p->nRec = nRec; - p->pHdr = p->aRec + getVarint32(aRec, p->nHdr); - p->pBody = &p->aRec[p->nHdr]; -} - -static const u8 *hctJrnlReadBlobText( - int *pRc, - HctRecordReader *p, - int bText, - int *pnData -){ - const u8 *pRet = 0; - if( *pRc==SQLITE_OK ){ - u64 iType = 0; - p->pHdr += sqlite3GetVarint(p->pHdr, &iType); - if( iType<12 || (iType % 2)!=bText ){ - *pRc = SQLITE_CORRUPT_BKPT; - }else{ - *pnData = (iType - 12) / 2; - pRet = p->pBody; - p->pBody += (*pnData); - } - } - return pRet; -} - -static const char *hctJrnlReadText( - int *pRc, - HctRecordReader *p, - int *pnText -){ - return (const char*)hctJrnlReadBlobText(pRc, p, 1, pnText); -} -static const u8 *hctJrnlReadBlob( - int *pRc, - HctRecordReader *p, - int *pnText -){ - return hctJrnlReadBlobText(pRc, p, 0, pnText); -} - -static i64 hctJrnlReadInteger(int *pRc, HctRecordReader *p){ - i64 iRet = 0; - if( *pRc==SQLITE_OK ){ - u64 iType = 0; - p->pHdr += sqlite3GetVarint(p->pHdr, &iType); - switch( iType ){ - case 1: - iRet = p->pBody[0]; - p->pBody++; - break; - case 2: - iRet = ((u64)p->pBody[0] << 8) - + ((u64)p->pBody[1] << 0); - p->pBody += 2; - break; - case 3: - iRet = ((u64)p->pBody[0] << 16) - + ((u64)p->pBody[1] << 8) - + ((u64)p->pBody[2] << 0); - p->pBody += 3; - break; - case 4: - iRet = ((u64)p->pBody[0] << 24) - + ((u64)p->pBody[1] << 16) - + ((u64)p->pBody[2] << 8) - + ((u64)p->pBody[3] << 0); - p->pBody += 4; - break; - case 5: - iRet = ((u64)p->pBody[0] << 40) - + ((u64)p->pBody[1] << 32) - + ((u64)p->pBody[2] << 24) - + ((u64)p->pBody[3] << 16) - + ((u64)p->pBody[4] << 8) - + ((u64)p->pBody[5] << 0); - p->pBody += 6; - break; - case 6: - iRet = ((u64)p->pBody[0] << 56) - + ((u64)p->pBody[1] << 48) - + ((u64)p->pBody[2] << 40) - + ((u64)p->pBody[3] << 32) - + ((u64)p->pBody[4] << 24) - + ((u64)p->pBody[5] << 16) - + ((u64)p->pBody[6] << 8) - + ((u64)p->pBody[7] << 0); - p->pBody += 6; - break; - case 8: - iRet = 0; - break; - case 9: - iRet = 1; - break; - default: - *pRc = SQLITE_CORRUPT_BKPT; - break; - } - } - - return iRet; -} - -static void hctJrnlReadHash( - int *pRc, /* IN/OUT: Error code */ - HctRecordReader *p, /* Record reader */ - u8 *aHash /* Pointer to buffer to populate */ -){ - int nHash = 0; - const u8 *a = 0; - a = hctJrnlReadBlob(pRc, p, &nHash); - if( *pRc==SQLITE_OK && nHash!=SQLITE_HCT_JOURNAL_HASHSIZE ){ - *pRc = SQLITE_CORRUPT_BKPT; - } - if( *pRc==SQLITE_OK ){ - memcpy(aHash, a, SQLITE_HCT_JOURNAL_HASHSIZE); - } -} - -static int hctJrnlReadJournalRecord(HctDbCsr *pCsr, HctJournalRecord *pRec){ - int rc = SQLITE_OK; - int nData = 0; - const u8 *aData = 0; - - memset(pRec, 0, sizeof(*pRec)); - - sqlite3HctDbCsrKey(pCsr, (i64*)&pRec->iCid); - rc = sqlite3HctDbCsrData(pCsr, &nData, &aData); - if( rc==SQLITE_OK ){ - int nHash = 0; - HctRecordReader rdr; - hctJrnlReadInit(&rdr, nData, aData); - - /* "cid" field - always NULL */ - if( *rdr.pHdr++!=0 ) return SQLITE_CORRUPT_BKPT; - - /* "schema" field - always TEXT. */ - pRec->zSchema = hctJrnlReadText(&rc, &rdr, &pRec->nSchema); - - /* "data" field - always BLOB */ - pRec->pData = hctJrnlReadBlob(&rc, &rdr, &pRec->nData); - - /* "schemacid" field - always INTEGER */ - pRec->iSchemaCid = hctJrnlReadInteger(&rc, &rdr); - - /* "hash" field - SQLITE_HCT_JOURNAL_HASHSIZE byte BLOB */ - pRec->pHash = (const void*)hctJrnlReadBlob(&rc, &rdr, &nHash); - if( nHash!=SQLITE_HCT_JOURNAL_HASHSIZE ) rc = SQLITE_CORRUPT_BKPT; - - /* "tid" field - an INTEGER */ - pRec->iTid = hctJrnlReadInteger(&rc, &rdr); - - /* "valid_cid" field - an INTEGER */ - pRec->iValidCid = hctJrnlReadInteger(&rc, &rdr); - } - return rc; -} - -/* -** Read the contents of the sqlite_hct_baseline table into structure -** (*pRec). Return SQLITE_OK if successful, or an SQLite error code -** otherwise. -*/ -static int hctJrnlReadBaseline( - HctJournal *pJrnl, /* Database to read from */ - HctBaselineRecord *pRec /* Populate this structure before returning */ -){ - HctDbCsr *pCsr = 0; - int rc = SQLITE_OK; - - memset(pRec, 0, sizeof(HctBaselineRecord)); - - /* Open a cursor on the baseline table */ - rc = sqlite3HctDbCsrOpen(pJrnl->pDb, 0, (u32)pJrnl->iBaseRoot, &pCsr); - - /* Move the cursor to the first record in the table. */ - if( rc==SQLITE_OK ){ - rc = sqlite3HctDbCsrFirst(pCsr); - } - if( rc==SQLITE_OK && sqlite3HctDbCsrEof(pCsr) ){ - rc = SQLITE_CORRUPT_BKPT; - } - - if( rc==SQLITE_OK ){ - int nData = 0; - const u8 *aData = 0; - - rc = sqlite3HctDbCsrData(pCsr, &nData, &aData); - if( rc==SQLITE_OK ){ - HctRecordReader rdr; - hctJrnlReadInit(&rdr, nData, aData); - - /* "cid" field - an INTEGER */ - pRec->iCid = hctJrnlReadInteger(&rc, &rdr); - - /* "schemacid" field - an INTEGER */ - pRec->iSchemaCid = hctJrnlReadInteger(&rc, &rdr); - - /* "hash" field - SQLITE_HCT_JOURNAL_HASHSIZE byte BLOB */ - hctJrnlReadHash(&rc, &rdr, pRec->aHash); - } - } - sqlite3HctDbCsrClose(pCsr); - - return rc; -} - -static int hctJrnlGetJrnlShape( - sqlite3 *db, - i64 *piLast, /* Out: Last entry in journal */ - i64 *piLastCont /* Out: Last contiguous entry in journal */ -){ - const char *z1 = "SELECT max(cid) FROM sqlite_hct_journal"; - const char *z2 = "SELECT cid FROM sqlite_hct_journal ORDER BY 1 DESC"; - - int rc = SQLITE_OK; - sqlite3_stmt *pStmt = 0; - i64 iLast = 0; - i64 iLastCont = 0; - - rc = sqlite3_prepare_v2(db, z1, -1, &pStmt, 0); - if( rc==SQLITE_OK ){ - if( SQLITE_ROW==sqlite3_step(pStmt) ){ - iLast = sqlite3_column_int64(pStmt, 0); - } - rc = sqlite3_finalize(pStmt); - } - - if( rc==SQLITE_OK ){ - rc = sqlite3_prepare_v2(db, z2, -1, &pStmt, 0); - } - if( rc==SQLITE_OK ){ - i64 iPrev = iLast; - iLastCont = iLast; - while( sqlite3_step(pStmt)==SQLITE_ROW ){ - i64 iThis = sqlite3_column_int64(pStmt, 0); - if( iThis!=iPrev-1 ){ - iLastCont = iThis; - } - if( (iLast-iThis)>HCT_MAX_LEADING_WRITE*2 ) break; - iPrev = iThis; - } - rc = sqlite3_finalize(pStmt); - } - - *piLast = iLast; - *piLastCont = iLastCont; - return rc; -} - -static sqlite3_stmt *hctPreparePrintf( - int *pRc, - sqlite3 *db, - const char *zFmt, ... -){ - sqlite3_stmt *pRet = 0; - va_list ap; - char *zSql = 0; - - va_start(ap, zFmt); - zSql = sqlite3_vmprintf(zFmt, ap); - va_end(ap); - - if( *pRc==SQLITE_OK ){ - if( zSql==0 ){ - *pRc = SQLITE_NOMEM; - }else{ - *pRc = sqlite3_prepare_v2(db, zSql, -1, &pRet, 0); - } - } - sqlite3_free(zSql); - return pRet; -} - -/* -** Iterator for reading a blob from the "data" column of a journal entry. -*/ -typedef struct HctDataReader HctDataReader; -struct HctDataReader { - const u8 *aData; - int nData; - int iData; - - int bEof; - char eType; - - /* Valid for all values of eType */ - const char *zTab; - - /* For eType==HCT_TYPE_INSERT_ROWID, HCT_TYPE_DELETE_ROWID */ - i64 iRowid; - - /* For eType==HCT_TYPE_INSERT_ROWID */ - int nRecord; - const u8 *aRecord; -}; - -#define HCT_TYPE_TABLE 'T' -#define HCT_TYPE_INSERT_ROWID 'i' -#define HCT_TYPE_DELETE_ROWID 'd' - -static int hctDataReaderNext(HctDataReader *p){ - if( p->iData>=p->nData ){ - p->bEof = 1; - }else{ - p->eType = (char)(p->aData[p->iData++]); - switch( p->eType ){ - case 'T': { - p->zTab = (const char*)&p->aData[p->iData]; - p->iData += sqlite3Strlen30(p->zTab) + 1; - break; - } - - case 'd': { - p->iData += sqlite3GetVarint(&p->aData[p->iData], (u64*)&p->iRowid); - break; - } - - case 'i': { - p->iData += sqlite3GetVarint(&p->aData[p->iData], (u64*)&p->iRowid); - p->iData += getVarint32(&p->aData[p->iData], p->nRecord); - p->aRecord = &p->aData[p->iData]; - p->iData += p->nRecord; - break; - } - - default: { - return SQLITE_CORRUPT_BKPT; - } - } - } - - return SQLITE_OK; -} - -/* -** Initialize an HctDataReader object to iterate through the nData byte -** 'data' blob in buffer pData. Leave the iterator pointing at the first -** entry in the blob. -*/ -static int hctDataReaderInit(const void *pData, int nData, HctDataReader *pRdr){ - memset(pRdr, 0, sizeof(*pRdr)); - pRdr->aData = (const u8*)pData; - pRdr->nData = nData; - return hctDataReaderNext(pRdr); -} - -SQLITE_PRIVATE int sqlite3HctJrnlSavePhysical( - sqlite3 *db, - HctJournal *pJrnl, - int (*xSave)(void*, i64 iPhys), - void *pSave -){ - const char *zSql = "SELECT data FROM sqlite_hct_journal WHERE cid>?"; - int rc = SQLITE_OK; - i64 iLast = 0; - i64 iLastCont = 0; - sqlite3_stmt *pStmt = 0; - - rc = hctJrnlGetJrnlShape(db, &iLast, &iLastCont); - if( rc==SQLITE_OK ){ - rc = sqlite3_prepare_v2(db, zSql, -1, &pStmt, 0); - } - if( rc==SQLITE_OK ){ - sqlite3_bind_int64(pStmt, 1, iLastCont); - while( rc==SQLITE_OK && sqlite3_step(pStmt)==SQLITE_ROW ){ - const void *pData = sqlite3_column_blob(pStmt, 0); - int nData = sqlite3_column_bytes(pStmt, 0); - sqlite3_stmt *pQuery = 0; - HctDataReader rdr; - - sqlite3HctDbSetSavePhysical(pJrnl->pDb, xSave, pSave); - for(rc=hctDataReaderInit(pData, nData, &rdr); - rc==SQLITE_OK && rdr.bEof==0; - rc=hctDataReaderNext(&rdr) - ){ - switch( rdr.eType ){ - case HCT_TYPE_TABLE: { - rc = sqlite3_finalize(pQuery); - pQuery = hctPreparePrintf( - &rc, db, "SELECT * FROM %Q WHERE _rowid_=?", rdr.zTab - ); - break; - } - - case HCT_TYPE_INSERT_ROWID: - case HCT_TYPE_DELETE_ROWID: { - sqlite3_bind_int64(pQuery, 1, rdr.iRowid); - sqlite3_step(pQuery); - rc = sqlite3_reset(pQuery); - break; - } - - default: assert( 0 ); - } - if( rc ) break; - } - sqlite3HctDbSetSavePhysical(pJrnl->pDb, 0, 0); - sqlite3_finalize(pQuery); - } - rc = sqlite3_finalize(pStmt); - } - - return rc; -} - -/* -** Do special recovery (startup) processing for replication-enabled databases. -** This function is called during stage 1 recovery - after any log files have -** been processed (and the database schema + contents restored), but before the -** free-page-lists are recovered. -*/ -SQLITE_PRIVATE int sqlite3HctJrnlRecovery(HctJournal *pJrnl, HctDatabase *pDb){ - HctBaselineRecord base; /* sqlite_hct_baseline data */ - HctJrnlServer *pServer = 0; - HctFile *pFile = sqlite3HctDbFile(pDb); - int rc = SQLITE_OK; - HctDbCsr *pCsr = 0; - - i64 iMaxCid = 0; - i64 iSchemaCid = 0; - - /* Read the contents of the sqlite_hct_baseline table. */ - rc = hctJrnlReadBaseline(pJrnl, &base); - - /* Allocate the new HctJrnlServer structure */ - pServer = (HctJrnlServer*)sqlite3HctMalloc(&rc, sizeof(HctJrnlServer)); - - /* Read the last record of the sqlite_hct_journal table. Specifically, - ** the value of fields "cid" and "schema_version". Store these values - ** in stack variables iMaxCid and aSchema, respectively. Or, if the - ** sqlite_hct_journal table is empty, populate iMaxCid and aSchema[] with - ** values from the baseline table. */ - if( rc==SQLITE_OK ){ - rc = sqlite3HctDbCsrOpen(pDb, 0, (u32)pJrnl->iJrnlRoot, &pCsr); - } - if( rc==SQLITE_OK ){ - rc = sqlite3HctDbCsrLast(pCsr); - } - if( rc==SQLITE_OK ){ - if( sqlite3HctDbCsrEof(pCsr)==0 ){ - HctJournalRecord rec; - rc = hctJrnlReadJournalRecord(pCsr, &rec); - if( rc==SQLITE_OK ){ - iMaxCid = rec.iCid; - iSchemaCid = (rec.zSchema[0] ? rec.iCid : rec.iSchemaCid); - } - }else{ - iMaxCid = base.iCid; - iSchemaCid = base.iSchemaCid; - } - } - - /* Scan the sqlite_hct_journal table from beginning to end. When - ** the first missing entry is found, calculate the size of the - ** HctJrnlServer.aCommit[] API and allocate it. Then continue - ** scanning the sqlite_hct_journal table, populating aCommit[] along - ** the way. */ - if( rc==SQLITE_OK ){ - HctTMapClient *pTClient = sqlite3HctFileTMapClient(pFile); - i64 iPrev = base.iCid; - int nTrans = 0; - u64 *aCommit = 0; - - /* Scan until the first missing entry. Set nTrans to the number of - ** number of entries between the first missing one and the last - ** present, or to HCT_MAX_LEADING_WRITE, whichever is greater. - ** Set iPrev to the largest CID value for which it and all previous - ** CIDs have been written into the journal table. */ - for(rc = sqlite3HctDbCsrFirst(pCsr); - rc==SQLITE_OK && 0==sqlite3HctDbCsrEof(pCsr); - rc = sqlite3HctDbCsrNext(pCsr) - ){ - i64 iCid = 0; - sqlite3HctDbCsrKey(pCsr, &iCid); - if( iPrev!=0 && iCid!=iPrev+1 ){ - nTrans = iMaxCid - iPrev; - break; - } - iPrev = iCid; - } - nTrans = MAX(HCT_MAX_LEADING_WRITE, nTrans); - - pServer->nCommit = nTrans*2; - aCommit = (u64*)sqlite3HctMalloc(&rc, pServer->nCommit*sizeof(u64)); - pServer->aCommit = aCommit; - pServer->iSnapshot = iPrev; - - /* Scan through whatever is left of the sqlite_hct_journal table, - ** populating the aCommit[] array and the transaction-map (hct_tmap.c) - ** along the way. */ - while( rc==SQLITE_OK && 0==sqlite3HctDbCsrEof(pCsr) ){ - HctJournalRecord rec; - rc = hctJrnlReadJournalRecord(pCsr, &rec); - if( rc==SQLITE_OK ){ - i64 iVal = rec.iValidCid ? rec.iValidCid : rec.iCid; - pServer->aCommit[rec.iCid % pServer->nCommit] = iVal; - rc = sqlite3HctTMapRecoverySet(pTClient, rec.iTid, rec.iCid); - } - if( rc==SQLITE_OK ){ - rc = sqlite3HctDbCsrNext(pCsr); - } - } - sqlite3HctTMapRecoveryFinish(pTClient, rc); - } - - if( rc==SQLITE_OK ){ - HctAtomicStore(&pServer->iSchemaCid, iSchemaCid); - pJrnl->pServer = pServer; - sqlite3HctFileSetJrnlPtr(pFile, (void*)pServer, hctJrnlDelServer); - if( iMaxCid>0 ) sqlite3HctFileSetCID(pFile, iMaxCid); - }else{ - hctJrnlDelServer((void*)pServer); - } - - sqlite3HctDbCsrClose(pCsr); - return rc; -} - -static u64 hctFindRootByName(Schema *pSchema, const char *zName){ - u64 iRet = 0; - Table *pTab = (Table*)sqlite3HashFind(&pSchema->tblHash, zName); - if( pTab ){ - iRet = pTab->tnum; - } - return iRet; -} - -SQLITE_PRIVATE int sqlite3HctJournalNewIf( - Schema *pSchema, - HctTree *pTree, - HctDatabase *pDb, - HctJournal **pp -){ - int rc = SQLITE_OK; - u64 iJrnlRoot = hctFindRootByName(pSchema, "sqlite_hct_journal"); - u64 iBaseRoot = hctFindRootByName(pSchema, "sqlite_hct_baseline"); - - assert( *pp==0 ); - - if( (iJrnlRoot==0)!=(iBaseRoot==0) ){ - return SQLITE_CORRUPT_BKPT; - } - if( iJrnlRoot ){ - HctJournal *pNew = sqlite3HctMalloc(&rc, sizeof(HctJournal)); - if( pNew ){ - HctFile *pFile = sqlite3HctDbFile(pDb); - pNew->iJrnlRoot = iJrnlRoot; - pNew->iBaseRoot = iBaseRoot; - pNew->pDb = pDb; - pNew->pTree = pTree; - pNew->pServer = (HctJrnlServer*)sqlite3HctFileGetJrnlPtr(pFile); - *pp = pNew; - } - } - - return rc; -} - -SQLITE_PRIVATE void sqlite3HctJournalClose(HctJournal *pJrnl){ - sqlite3_free(pJrnl); -} - -/* -** See description in hctJrnlInt.h. -*/ -SQLITE_PRIVATE int sqlite3HctJournalIsReadonly( - HctJournal *pJrnl, - u64 iTable, - int *pbNosnap -){ - if( pJrnl ){ - HctJrnlServer *p = pJrnl->pServer; - int bNosnap = (pJrnl->iJrnlRoot==iTable || pJrnl->iBaseRoot==iTable); - *pbNosnap = bNosnap; - return (pJrnl->eInWrite==HCT_JOURNAL_NONE && ( - bNosnap || !p || p->eMode==SQLITE_HCT_JOURNAL_MODE_FOLLOWER - )); - } - return 0; -} - -/* -** Called during log file recovery to remove the entry with "tid" (not CID!) -** value iTid from the sqlite_hct_journal table. -*/ -SQLITE_PRIVATE int sqlite3HctJrnlRollbackEntry(HctJournal *pJrnl, i64 iTid){ - i64 iDel = 0; - HctDbCsr *pCsr = 0; - int rc = SQLITE_OK; - - rc = sqlite3HctDbCsrOpen(pJrnl->pDb, 0, (u32)pJrnl->iJrnlRoot, &pCsr); - if( rc==SQLITE_OK ){ - HctJournalRecord rec; - sqlite3HctDbCsrNosnap(pCsr, 1); - for(rc=sqlite3HctDbCsrLast(pCsr); - iDel==0 && rc==SQLITE_OK && sqlite3HctDbCsrEof(pCsr)==0; - rc=sqlite3HctDbCsrPrev(pCsr) - ){ - hctJrnlReadJournalRecord(pCsr, &rec); - if( rec.iTid==iTid ) iDel = rec.iCid; - } - - if( iDel!=0 && rc==SQLITE_OK ){ - rc = hctJrnlWriteRecord(pJrnl, iDel, "", 0, 0, 0, iTid); - } - - sqlite3HctDbCsrClose(pCsr); - } - - return rc; -} - -/* -** Find the HctJournal object associated with the "main" database of the -** connection passed as the only argument. If successful, set (*ppJrnl) -** to point to said object and return SQLITE_OK. Or, if the database is -** not a replication-enabled db, set (*ppJrnl) to NULL and return SQLITE_OK. -** Or, if an error occurs, return an SQLite error code. The final value -** of (*ppJrnl) is undefined in this case. -*/ -static int hctJrnlFind(sqlite3 *db, HctJournal **ppJrnl){ - int rc = SQLITE_OK; - HctJournal *pJrnl = sqlite3HctJrnlFind(db); - - if( pJrnl==0 ){ - /* If the journal was not found, it might be because the database is - ** not yet initialized. Run a query to ensure it is, then try to retrieve - ** the journal object again. */ - rc = sqlite3_exec(db, "SELECT 1 FROM sqlite_schema LIMIT 1", 0, 0, 0); - if( rc==SQLITE_OK ){ - pJrnl = sqlite3HctJrnlFind(db); - } - } - - if( rc==SQLITE_OK && pJrnl==0 ){ - hctJournalSetDbError(db, SQLITE_ERROR, "not a journaled hct database"); - rc = SQLITE_ERROR; - } - - *ppJrnl = pJrnl; - return rc; -} - - -/* -** Return the current journal mode - SQLITE_HCT_JOURNAL_MODE_FOLLOWER or -** SQLITE_HCT_JOURNAL_MODE_LEADER - for the main database of the connection -** passed as the only argument. Or, if the main database is not a -** replication-enabled hct database, return -1; -*/ -SQLITE_API int sqlite3_hct_journal_mode(sqlite3 *db){ - int eRet = -1; - HctJournal *pJrnl = sqlite3HctJrnlFind(db); - if( pJrnl ){ - eRet = pJrnl->pServer->eMode; - } - return eRet; -} - -/* -** Return true if the journal is complete - contains no holes. Or false -** otherwise. This function is not threadsafe. Results are undefined -** if there are concurrent transactions running on the database. -*/ -static int hctJrnlIsComplete(HctJournal *pJrnl){ - HctJrnlServer *pServer = pJrnl->pServer; - u64 iSnapshot = pServer->iSnapshot; - int ii; - - assert( pServer->eMode==SQLITE_HCT_JOURNAL_MODE_FOLLOWER ); - - /* Set iSnapshot to the CID of the last contiguous commit */ - while( 1 ){ - int iNext = (iSnapshot+1) % pServer->nCommit; - u64 iVal = HctAtomicLoad(&pServer->aCommit[iNext]); - if( iVal<=iSnapshot ) break; - iSnapshot++; - } - - /* See if there are any transactions yet committed with CID values greater - ** than iSnapshot. If there are, then the journal is not complete. */ - for(ii=0; iinCommit; ii++){ - u64 iVal = HctAtomicLoad(&pServer->aCommit[ii]); - if( iVal>iSnapshot ){ - return 0; - } - } - - return 1; -} - -/* -** Set the LEADER/FOLLOWER setting of the main database of the connection -** passed as the first argument. -*/ -SQLITE_API int sqlite3_hct_journal_setmode(sqlite3 *db, int eMode){ - int rc = SQLITE_OK; - HctJournal *pJrnl = sqlite3HctJrnlFind(db); - - if( pJrnl==0 ){ - rc = sqlite3_exec(db, "SELECT 1 FROM sqlite_schema LIMIT 1", 0, 0, 0); - if( rc==SQLITE_OK ){ - pJrnl = sqlite3HctJrnlFind(db); - } - } - - if( rc==SQLITE_OK ){ - if( eMode!=SQLITE_HCT_JOURNAL_MODE_LEADER - && eMode!=SQLITE_HCT_JOURNAL_MODE_FOLLOWER - ){ - return SQLITE_MISUSE_BKPT; - }else if( pJrnl==0 ){ - hctJournalSetDbError(db, SQLITE_ERROR, "not a journaled hct database"); - rc = SQLITE_ERROR; - }else{ - HctFile *pFile = sqlite3HctDbFile(pJrnl->pDb); - HctJrnlServer *pServer = pJrnl->pServer; - if( eMode!=pServer->eMode ){ - if( eMode==SQLITE_HCT_JOURNAL_MODE_LEADER ){ - /* Switch from FOLLOWER to LEADER mode. This is only allowed if - ** there are no holes in the journal. */ - if( hctJrnlIsComplete(pJrnl)==0 ){ - hctJournalSetDbError(db, SQLITE_ERROR, "incomplete journal"); - rc = SQLITE_ERROR; - }else{ - u64 iCid = sqlite3HctJournalSnapshot(pJrnl); - pServer->eMode = SQLITE_HCT_JOURNAL_MODE_LEADER; - if( iCid>0 ){ - sqlite3HctFileSetCID(sqlite3HctDbFile(pJrnl->pDb), iCid); - } - } - pServer->nSchemaVersionIncr++; - }else{ - /* Switch from LEADER to FOLLOWER mode. This is always possible. */ - void *pSchema = sqlite3HctBtreeSchema(db->aDb[0].pBt, 0, 0); - u64 iSnapshotId = sqlite3HctFileGetSnapshotid(pFile); - memset(pServer->aCommit, 0, pServer->nCommit*sizeof(u64)); - pServer->iSnapshot = iSnapshotId; - pServer->eMode = SQLITE_HCT_JOURNAL_MODE_FOLLOWER; - sqlite3HctJournalFixSchema(pJrnl, db, pSchema); - } - } - } - } - - return rc; -} - -static void hctJrnlFixTable(Table *pTab){ - Index *pIdx; - for(pIdx=pTab->pIndex; pIdx; pIdx=pIdx->pNext){ - if( pIdx->idxType==SQLITE_IDXTYPE_UNIQUE - || pIdx->idxType==SQLITE_IDXTYPE_PRIMARYKEY - ){ - pIdx->idxType = SQLITE_IDXTYPE_APPDEF; - } - pIdx->uniqNotNull = 0; - pIdx->onError = OE_None; - } - - -} - -/* -** This function is used to "fix" a schema so that it can be used in -** a FOLLOWER mode database. Specifically: -** -** * All UNIQUE indexes are marked as not-unique. -** * All triggers are removed from the schema. -** * All FK definitions are removed from the schema. -*/ -SQLITE_PRIVATE void sqlite3HctJournalFixSchema(HctJournal *pJrnl, sqlite3 *db, void *pS){ - HctJrnlServer *pServer = pJrnl->pServer; - if( pServer==0 || pServer->eMode==SQLITE_HCT_JOURNAL_MODE_FOLLOWER ){ - Schema *pSchema = (Schema*)pS; - HashElem *k; - - for(k=sqliteHashFirst(&pSchema->tblHash); k; k=sqliteHashNext(k)){ - Table *pTab = (Table*)sqliteHashData(k); - hctJrnlFixTable(pTab); - while( pTab->pTrigger ){ - Trigger *pTrig = pTab->pTrigger; - pTab->pTrigger = pTrig->pNext; - sqlite3DeleteTrigger(db, pTrig); - } - if( IsOrdinaryTable(pTab) ){ - sqlite3FkDelete(db, pTab); - } - } - sqlite3HashClear(&pSchema->trigHash); - } -} - -SQLITE_PRIVATE void sqlite3HctJournalSchemaVersion(HctJournal *pJrnl, u32 *pSchemaVersion){ - if( pJrnl && pJrnl->pServer ){ - *pSchemaVersion += HctAtomicLoad(&pJrnl->pServer->nSchemaVersionIncr); - } -} - -#ifdef SQLITE_DEBUG -/* -** assert() that the schema associated with table pTab has been "fixed", -** according to the definition used by sqlite3HctJournalFixSchema(). -*/ -static void assert_schema_is_fixed(Table *pTab){ - Index *pIdx; - for(pIdx=pTab->pIndex; pIdx; pIdx=pIdx->pNext){ - assert( pIdx->idxType==SQLITE_IDXTYPE_APPDEF ); - assert( pIdx->uniqNotNull==0 ); - assert( pIdx->onError==OE_None ); - } - assert( pTab->pTrigger==0 ); - assert( pTab->u.tab.pFKey==0 ); -} -#else -# define assert_schema_is_fixed(x) -#endif - -static int hctJrnlGetInsertStmt( - sqlite3 *db, - const char *zTab, - int *piPk, - sqlite3_stmt **ppStmt -){ - sqlite3_str *pStr; - Schema *pSchema = db->aDb[0].pSchema; - Table *pTab = (Table*)sqlite3HashFind(&pSchema->tblHash, zTab); - char *zSql = 0; - int rc = SQLITE_OK; - int ii; - - assert( pTab ); - assert_schema_is_fixed(pTab); - - *ppStmt = 0; - pStr = sqlite3_str_new(0); - sqlite3_str_appendf(pStr, "REPLACE INTO main.%Q(", zTab); - if( pTab->iPKey<0 ){ - sqlite3_str_appendf(pStr, "_rowid_, "); - } - for(ii=0; iinCol; ii++){ - const char *zSep = (ii==pTab->nCol-1) ? ") VALUES (" : ","; - sqlite3_str_appendf(pStr, "%Q%s ", pTab->aCol[ii].zCnName, zSep); - } - if( pTab->iPKey<0 ){ - sqlite3_str_appendf(pStr, "?%d, ", pTab->nCol+1); - *piPk = pTab->nCol+1; - }else{ - *piPk = pTab->iPKey+1; - } - for(ii=0; iinCol; ii++){ - const char *zSep = (ii==pTab->nCol-1) ? ")" : ", "; - sqlite3_str_appendf(pStr, "?%d%s", ii+1, zSep); - } - - zSql = sqlite3_str_finish(pStr); - if( zSql==0 ){ - rc = SQLITE_NOMEM_BKPT; - }else{ - rc = sqlite3_prepare_v2(db, zSql, -1, ppStmt, 0); - sqlite3_free(zSql); - } - - return rc; -} - -static int hctJrnlGetDeleteStmt( - sqlite3 *db, - const char *zTab, - sqlite3_stmt **ppStmt -){ - Schema *pSchema = db->aDb[0].pSchema; - Table *pTab = (Table*)sqlite3HashFind(&pSchema->tblHash, zTab); - int rc = SQLITE_OK; - char *zSql = 0; - const char *zRowid = "_rowid_"; - - assert( pTab ); - assert_schema_is_fixed(pTab); - - if( pTab->iPKey>=0 ){ - zRowid = pTab->aCol[pTab->iPKey].zCnName; - } - - *ppStmt = 0; - zSql = sqlite3_mprintf( - "DELETE FROM main.%Q WHERE main.%Q.%Q = ?", - pTab->zName, pTab->zName, zRowid - ); - if( zSql==0 ){ - rc = SQLITE_NOMEM_BKPT; - }else{ - rc = sqlite3_prepare_v2(db, zSql, -1, ppStmt, 0); - sqlite3_free(zSql); - } - - return rc; -} - -/* -** Parameter aData[] points to a record encoded in SQLite format. Bind -** each value in the record to the statement passed as the second argument. -*/ -static int hctJrnlBindRecord(int *pRc, sqlite3_stmt *pStmt, const u8 *aData){ - int rc = *pRc; - int ret = 0; - if( rc==SQLITE_OK ){ - const u8 *pHdr = aData; - const u8 *pData = 0; - int nHdr; - int iBind; - - pHdr += getVarint32(pHdr, nHdr); - pData = &aData[nHdr]; - for(iBind=1; pHdr<&aData[nHdr]; iBind++){ - u32 t; - pHdr += getVarint32(pHdr, t); - switch( t ){ - case 10: - case 11: - case 0: /* NULL */ - sqlite3_bind_null(pStmt, iBind); - break; - - case 1: { /* 1 byte integer */ - i64 iVal = pData[0]; - pData += 1; - sqlite3_bind_int64(pStmt, iBind, iVal); - break; - } - case 2: { /* 2 byte integer */ - i64 iVal = ((i64)pData[0]<<8) + (i64)pData[1]; - pData += 2; - sqlite3_bind_int64(pStmt, iBind, iVal); - break; - } - case 3: { /* 3 byte integer */ - i64 iVal = ((i64)pData[0]<<16) + ((i64)pData[1]<<8) + (i64)pData[2]; - pData += 3; - sqlite3_bind_int64(pStmt, iBind, iVal); - break; - } - case 4: { /* 4 byte integer */ - i64 iVal = ((i64)pData[0]<<24) - + ((i64)pData[1]<<16) - + ((i64)pData[2]<<8) - + (i64)pData[3]; - pData += 4; - sqlite3_bind_int64(pStmt, iBind, iVal); - break; - } - case 5: { /* 6 byte integer */ - i64 iVal = ((i64)pData[0]<<40) - + ((i64)pData[1]<<32) - + ((i64)pData[2]<<24) - + ((i64)pData[3]<<16) - + ((i64)pData[4]<<8) - + (i64)pData[5]; - pData += 6; - sqlite3_bind_int64(pStmt, iBind, iVal); - break; - } - - case 6: case 7: { /* 8 byte integer, 8 byte real value */ - u64 iVal = ((u64)pData[0]<<56) - + ((u64)pData[1]<<48) - + ((u64)pData[2]<<40) - + ((u64)pData[3]<<32) - + ((u64)pData[4]<<24) - + ((u64)pData[5]<<16) - + ((u64)pData[6]<<8) - + (u64)pData[7]; - pData += 8; - if( t==6 ){ - i64 iVal2; - memcpy(&iVal2, &iVal, sizeof(iVal)); - sqlite3_bind_int64(pStmt, iBind, iVal2); - }else{ - double rVal2; - memcpy(&rVal2, &iVal, sizeof(iVal)); - sqlite3_bind_double(pStmt, iBind, rVal2); - } - break; - } - - case 8: /* integer value 0 */ - sqlite3_bind_int(pStmt, iBind, 0); - break; - - case 9: /* integer value 1 */ - sqlite3_bind_int(pStmt, iBind, 1); - break; - - default: { - int nByte = (t - 12) / 2; - if( t & 0x01 ){ - sqlite3_bind_text( - pStmt, iBind, (const char*)pData, nByte, SQLITE_TRANSIENT - ); - }else{ - sqlite3_bind_blob( - pStmt, iBind, (const void*)pData, nByte, SQLITE_TRANSIENT - ); - } - pData += nByte; - break; - }; - } - } - - ret = pData - aData; - } - return ret; -} - -SQLITE_PRIVATE u64 sqlite3HctJrnlWriteTid(HctJournal *pJrnl, u64 *piCid){ - u64 iRet = 0; - assert( *piCid==0 ); - if( pJrnl && pJrnl->eInWrite!=HCT_JOURNAL_NONE ){ - iRet = pJrnl->iWriteTid; - *piCid = pJrnl->iWriteCid; - } - return iRet; -} - -SQLITE_PRIVATE u64 sqlite3HctJournalSnapshot(HctJournal *pJrnl){ - u64 iRet = 0; - if( pJrnl ){ - if( pJrnl->eInWrite==HCT_JOURNAL_INROLLBACK ){ - return pJrnl->iRollbackSnapshot; - } - HctJrnlServer *pServer = pJrnl->pServer; - if( pServer && pServer->eMode==SQLITE_HCT_JOURNAL_MODE_FOLLOWER ){ - u64 iTest = 0; - u64 iValid = 0; - u64 iSnap = HctAtomicLoad(&pServer->iSnapshot); - iRet = iSnap; - for(iTest=iRet+1; 1; iTest++){ - u64 iVal = HctAtomicLoad(&pServer->aCommit[iTest % pServer->nCommit]); - if( iVal=iValid ) iRet = iTest; - }else{ - iValid = MAX(iVal, iValid); - } - } - - /* Update HctJrnlServer.iSnapshot if required */ - if( iRet>=iSnap+16 ){ - (void)HctCASBool(&pServer->iSnapshot, iSnap, iRet); - } - - /* If we are in an sqlite3_hct_journal_write() call, it is fine (and - ** necessary) to read snapshots that are invalid to the application. - ** So ignore any entries in the aCommit[] array that indicate such. */ - if( pJrnl->eInWrite==HCT_JOURNAL_INWRITE ){ - assert( (iTest-1)>=iRet ); - iRet = (iTest-1); - } - } - } - return iRet; -} - -/* -** Set output variable (*piCid) to the CID of the newest available -** database snapshot. Return SQLITE_OK if successful, or an SQLite -** error code if something goes wrong. -*/ -SQLITE_API int sqlite3_hct_journal_snapshot(sqlite3 *db, sqlite3_int64 *piCid){ - int rc = SQLITE_OK; - HctJournal *pJrnl = 0; - - rc = hctJrnlFind(db, &pJrnl); - if( rc==SQLITE_OK ){ - *piCid = (i64)sqlite3HctJournalSnapshot(pJrnl); - }else{ - *piCid = 0; - } - return rc; -} - -static sqlite3_stmt *hctJrnlPrepare(int *pRc, sqlite3 *db, const char *zSql){ - sqlite3_stmt *pStmt = 0; - if( *pRc==SQLITE_OK ){ - *pRc = sqlite3_prepare_v2(db, zSql, -1, &pStmt, 0); - } - return pStmt; -} - -static void hctJrnlFinalize(int *pRc, sqlite3_stmt *pStmt){ - int rc = sqlite3_finalize(pStmt); - if( *pRc==SQLITE_OK ){ - *pRc = rc; - } -} - -SQLITE_API int sqlite3_hct_journal_truncate(sqlite3 *db, i64 iMinCid){ - int rc = SQLITE_OK; - HctJournal *pJrnl = 0; - sqlite3_stmt *pSelJrnl = 0; - sqlite3_stmt *pSelBaseline = 0; - sqlite3_stmt *pDelete = 0; - sqlite3_stmt *pUpdate = 0; - - if( 0==sqlite3_get_autocommit(db) ){ - hctJournalSetDbError(db, SQLITE_ERROR, - "cannot truncate journal from within a transaction" - ); - return SQLITE_ERROR; - } - - rc = hctJrnlFind(db, &pJrnl); - if( rc==SQLITE_OK - && pJrnl->pServer->eMode==SQLITE_HCT_JOURNAL_MODE_FOLLOWER - ){ - u64 iCid = sqlite3HctJournalSnapshot(pJrnl); - if( iCideInWrite = HCT_JOURNAL_INWRITE; - rc = sqlite3_exec(db, "BEGIN CONCURRENT", 0, 0, 0); - } - - pSelBaseline = hctJrnlPrepare(&rc, db, - "SELECT cid, schemacid, hash FROM sqlite_hct_baseline" - ); - pSelJrnl = hctJrnlPrepare(&rc, db, - "SELECT cid, schemacid, hash FROM sqlite_hct_journal WHERE cidpDb, &pJrnl->iWriteTid); - pJrnl->iWriteCid = 1; - } - if( rc==SQLITE_OK ){ - rc = sqlite3_exec(db, "COMMIT", 0, 0, 0); - } - if( rc!=SQLITE_OK ){ - sqlite3_exec(db, "ROLLBACK", 0, 0, 0); - } - } - - hctJrnlFinalize(&rc, pSelJrnl); - hctJrnlFinalize(&rc, pSelBaseline); - hctJrnlFinalize(&rc, pDelete); - hctJrnlFinalize(&rc, pUpdate); - pJrnl->eInWrite = HCT_JOURNAL_NONE; - pJrnl->iWriteTid = 0; - pJrnl->iWriteCid = 0; - return rc; -} - -static int hctBufferAppendInsert( - HctBuffer *pBuf, - i64 iRowid, - Table *pTab, - sqlite3_stmt *pQuery -){ - int ii; - int rc = SQLITE_OK; - - rc = hctBufferAppend(pBuf, "REPLACE INTO %Q(_rowid_", pTab->zName); - for(ii=0; rc==SQLITE_OK && iinCol; ii++){ - if( ii!=pTab->iPKey ){ - rc = hctBufferAppend(pBuf, ", %Q", pTab->aCol[ii].zCnName); - } - } - - if( rc==SQLITE_OK ){ - rc = hctBufferAppend(pBuf, ") VALUES(%lld", iRowid); - } - - for(ii=0; rc==SQLITE_OK && iinCol; ii++){ - if( ii!=pTab->iPKey ){ - rc = hctBufferAppend(&buf, "%squote(x.%Q)", zSep, pTab->aCol[ii].zCnName); - zSep = ", "; - } - } - if( rc==SQLITE_OK ){ - rc = hctBufferAppend(&buf, "FROM %Q AS x WHERE _rowid_=?", pTab->zName); - } - - if( rc==SQLITE_OK ){ - rc = sqlite3_prepare_v2(db, (const char*)buf.aBuf, -1, &pRet, 0); - } - sqlite3_free(buf.aBuf); - - *pRc = rc; - return pRet; -} - -/* -** Rollback transactions that follow the first hole in the journal. -*/ -SQLITE_API int sqlite3_hct_journal_rollback(sqlite3 *db, sqlite3_int64 iCid){ - int rc = SQLITE_OK; - HctJournal *pJrnl = 0; - i64 iLast = 0; - i64 iLastCont = 0; - sqlite3_stmt *pStmt = 0; - Schema *pSchema = 0; - - rc = hctJrnlFind(db, &pJrnl); - if( rc!=SQLITE_OK ) return rc; - pSchema = db->aDb[0].pSchema; - - /* - ** 1. Find the location of the first hole in the journal. - ** - ** 2. Loop through journal entries, from the newest back to the - ** first hole in the journal. - ** - ** 3. Work through each of the transactions identified in step (1). - ** For each, write a log file, make the required modifications to - ** the db and journal file, then delete the log file. - */ - - /* Cannot call this with an open transaction. */ - if( 0==sqlite3_get_autocommit(db) ){ - hctJournalSetDbError(db, SQLITE_ERROR, - "cannot rollback journal from within a transaction" - ); - return SQLITE_ERROR; - } - - /* Cannot call this in LEADER mode. */ - if( pJrnl->pServer->eMode==SQLITE_HCT_JOURNAL_MODE_LEADER ){ - hctJournalSetDbError(db, SQLITE_ERROR, - "cannot rollback journal in leader database" - ); - return SQLITE_ERROR; - } - - /* Find the location of the first hole in the journal. If there are no - ** holes in the journal, this call is a no-op. */ - rc = hctJrnlGetJrnlShape(db, &iLast, &iLastCont); - assert( iLastCont<=iLast ); - if( rc!=SQLITE_OK || iLastCont>=iLast ) return rc; - - /* Loop through all of the journal entries that will be rolled back. - ** For each, extract the primary keys from the "data" blob. Query the - ** current database snapshot for each of these keys, generating an SQL - ** script with a "REPLACE INTO" for each row present in the db and a - ** "DELETE" for each not. */ - rc = sqlite3_prepare_v2(db, - "SELECT data FROM sqlite_hct_journal WHERE cid>?", -1, &pStmt, 0 - ); - if( rc==SQLITE_OK ){ - HctBuffer sql = {0, 0, 0}; - sqlite3_bind_int64(pStmt, 1, iLastCont); - - rc = hctBufferAppend(&sql, "BEGIN CONCURRENT;\n"); - while( rc==SQLITE_OK && SQLITE_ROW==sqlite3_step(pStmt) ){ - const void *pData = sqlite3_column_blob(pStmt, 0); - int nData = sqlite3_column_bytes(pStmt, 0); - sqlite3_stmt *pQuery = 0; - Table *pTab = 0; - HctDataReader rdr; - - for(rc=hctDataReaderInit(pData, nData, &rdr); - rc==SQLITE_OK && rdr.bEof==0; - rc=hctDataReaderNext(&rdr) - ){ - switch( rdr.eType ){ - case HCT_TYPE_TABLE: { - pTab = (Table*)sqlite3HashFind(&pSchema->tblHash, rdr.zTab); - if( pTab==0 ){ - rc = SQLITE_CORRUPT_BKPT; - }else{ - rc = sqlite3_finalize(pQuery); - pQuery = hctGetQuoteQuery(&rc, db, pTab); - } - break; - } - - case HCT_TYPE_INSERT_ROWID: - case HCT_TYPE_DELETE_ROWID: { - sqlite3_bind_int64(pQuery, 1, rdr.iRowid); - if( SQLITE_ROW==sqlite3_step(pQuery) ){ - rc = hctBufferAppendInsert(&sql, rdr.iRowid, pTab, pQuery); - }else{ - rc = hctBufferAppend(&sql, - "DELETE FROM %Q WHERE _rowid_=%lld;\n", rdr.zTab, rdr.iRowid - ); - } - rc = sqlite3_reset(pQuery); - break; - } - - default: assert( 0 ); - } - if( rc ) break; - } - sqlite3_finalize(pQuery); - } - if( rc==SQLITE_OK ){ - rc = hctBufferAppend(&sql, - "DELETE FROM sqlite_hct_journal WHERE cid>%lld;\n", iLastCont - ); - } - - if( rc==SQLITE_OK ){ - assert( pJrnl->eInWrite==HCT_JOURNAL_NONE ); - pJrnl->eInWrite = HCT_JOURNAL_INROLLBACK; - rc = sqlite3_exec(db, (const char*)sql.aBuf, 0, 0, 0); - if( rc==SQLITE_OK ){ - rc = sqlite3HctDbStartWrite(pJrnl->pDb, &pJrnl->iWriteTid); - } - if( rc==SQLITE_OK ){ - pJrnl->iWriteCid = iLastCont; - pJrnl->iRollbackSnapshot = iLast; - rc = sqlite3_exec(db, "COMMIT", 0, 0, 0); - } - if( rc!=SQLITE_OK ){ - sqlite3_exec(db, "ROLLBACK", 0, 0, 0); - } - pJrnl->eInWrite = HCT_JOURNAL_NONE; - } - - sqlite3_free(sql.aBuf); - sqlite3_finalize(pStmt); - } - - return rc; -} - -static u64 hctJournalFindLastWrite( - int *pRc, /* IN/OUT: Error code */ - HctJournal *pJrnl, /* Journal object */ - u64 iRoot, /* Root page of table */ - i64 iRowid /* Key (for rowid tables) */ -){ - int rc = *pRc; - u64 iRet = 0; - if( rc==SQLITE_OK ){ - HctDbCsr *pCsr = 0; - rc = sqlite3HctDbCsrOpen(pJrnl->pDb, 0, iRoot, &pCsr); - if( rc==SQLITE_OK ){ - rc = sqlite3HctDbCsrFindLastWrite(pCsr, 0, iRowid, &iRet); - sqlite3HctDbCsrClose(pCsr); - } - *pRc = rc; - } - return iRet; -} - -/* -** Write a transaction into the database. -*/ -SQLITE_API int sqlite3_hct_journal_write( - sqlite3 *db, /* Write to "main" db of this handle */ - sqlite3_int64 iCid, - const char *zSchema, - const void *pData, int nData, - sqlite3_int64 iSchemaCid -){ - int rc = SQLITE_OK; - char *zErr = 0; /* Error message, if any */ - HctJournal *pJrnl = 0; - u64 iValidCid = 0; - u64 iSnapshotId = 0; - Btree *pBt = db->aDb[0].pBt; - Schema *pSchema = db->aDb[0].pSchema; - u64 iRoot = 0; /* Root page of zTab */ - HctJrnlServer *pServer = 0; - - HctDataReader rdr; /* For iterating through pData/nData */ - - rc = hctJrnlFind(db, &pJrnl); - if( rc!=SQLITE_OK ) return rc; - pJrnl->eInWrite = HCT_JOURNAL_INWRITE; - pServer = pJrnl->pServer; - - /* Check that the journal is in follower mode */ - if( pServer->eMode!=SQLITE_HCT_JOURNAL_MODE_FOLLOWER ){ - hctJournalSetDbError(db, SQLITE_ERROR, "database is not in FOLLOWER mode"); - return SQLITE_ERROR; - } - - /* Check that there is no transaction open on the connection */ - if( rc==SQLITE_OK && sqlite3_get_autocommit(db)==0 ){ - hctJournalSetDbError(db, SQLITE_ERROR, "open transaction on database"); - return SQLITE_ERROR; - } - - /* Open a concurrent transaction on the db handle. Then ensure that the - ** snapshot on the main database has also been opened. */ - rc = sqlite3_exec(db, "BEGIN CONCURRENT", 0, 0, 0); - if( rc==SQLITE_OK ){ - int dummy = 0; - rc = sqlite3BtreeBeginTrans(pBt, 1, &dummy); - } - - /* Check that the snapshot that was just opened has a schema new enough - ** for this transaction to be applied. */ - if( rc==SQLITE_OK ){ - iSnapshotId = sqlite3HctBtreeSnapshotId(pBt); - if( iSchemaCid>iSnapshotId ){ - rc = SQLITE_BUSY; - zErr = sqlite3_mprintf( - "change may not be applied yet (requires newer schema)" - ); - }else if( (iSnapshotId+HCT_MAX_LEADING_WRITE)aDb[0].pBt, iRoot)==0 ){ - iLastCid = hctJournalFindLastWrite(&rc, pJrnl, iRoot, rdr.iRowid); - } - if( iLastCid>iSnapshotId && iLastCidaDb[0].pBt, iRoot)==0 ){ - u64 iLastCid = 0; - iLastCid = hctJournalFindLastWrite(&rc, pJrnl, iRoot, rdr.iRowid); - if( iLastCid>iSnapshotId && iLastCidpDb, &pJrnl->iWriteTid); - sqlite3HctDbJrnlWriteCid(pJrnl->pDb, iCid); - pJrnl->iWriteCid = iCid; - } - - /* Write the sqlite_hct_journal record directly into the HctTree - ** structure. We don't write via the SQL interface here, because - ** writing to the db once sqlite3HctDbStartWrite() has been called - ** causes assert() failures. And we don't write directly to the db - ** either, because the write needs to be rolled back if there is - ** a conflict. */ - if( rc==SQLITE_OK ){ - u8 *pRec = 0; - int nRec = 0; - - /* TODO: "validcid" value */ - pRec = hctJrnlComposeRecord(iCid, zSchema, - pData, nData, iSchemaCid, pJrnl->iWriteTid, iValidCid, &nRec - ); - if( pRec==0 ){ - rc = SQLITE_NOMEM_BKPT; - }else{ - HctTreeCsr *pCsr = 0; - u64 root = hctFindRootByName(db->aDb[0].pSchema, "sqlite_hct_journal"); - - rc = sqlite3HctTreeCsrOpen(pJrnl->pTree, root, &pCsr); - if( rc==SQLITE_OK ){ - rc = sqlite3HctTreeInsert(pCsr, 0, iCid, nRec, pRec, 0); - sqlite3HctTreeCsrClose(pCsr); - } - } - sqlite3_free(pRec); - - if( rc==SQLITE_OK ){ - rc = sqlite3_exec(db, "COMMIT", 0, 0, 0); - } - if( rc==SQLITE_OK ){ - i64 iVal = iValidCid ? iValidCid : iCid; - i64 *pPtr = (i64*)&pServer->aCommit[iCid % pServer->nCommit]; - - /* If this transaction updated the schema, update the Server.iSchemaCid - ** field as well. This field is not used in FOLLOWER mode, but may be - ** if this process switches to LEADER later on. */ - if( zSchema[0] ){ - HctAtomicStore(&pServer->iSchemaCid, iCid); - } - - assert( iVal>=iCid ); - while( 1 ){ - i64 iExist = *pPtr; - if( iExist>=iVal ) break; - if( HctCASBool(pPtr, iExist, iVal) ) break; - } - assert( *pPtr>=iVal ); - - if( HctAtomicLoad(&pServer->iSnapshot)==0 ){ - (void)HctCASBool(&pServer->iSnapshot, (u64)0, (u64)iCid); - } - } - } - - if( rc!=SQLITE_OK ){ - sqlite3_exec(db, "ROLLBACK", 0, 0, 0); - if( zErr ){ - hctJournalSetDbError(db, rc, "%s", zErr); - sqlite3_free(zErr); - }else{ - hctJournalSetDbError(db, rc, 0); - } - } - pJrnl->eInWrite = HCT_JOURNAL_NONE; - sqlite3HctDbJrnlWriteCid(pJrnl->pDb, 0); - return rc; -} - -static int hctBufferAppendIf(HctBuffer *pBuf, const char *zSep){ - int rc = SQLITE_OK; - if( pBuf->nBuf>0 ){ - rc = hctBufferAppend(pBuf, "%s", zSep); - } - return rc; -} - -static void hctJournalEntryFunc( - sqlite3_context *pCtx, - int nArg, - sqlite3_value **apArg -){ - sqlite3 *db = sqlite3_context_db_handle(pCtx); - const u8 *aEntry = 0; - int nEntry = 0; - int ii = 0; - HctBuffer buf; - const char *zTab = "!"; - const char *zSep = " "; - - assert( nArg==1 ); - memset(&buf, 0, sizeof(buf)); - - nEntry = sqlite3_value_bytes(apArg[0]); - aEntry = (const u8*)sqlite3_value_blob(apArg[0]); - - while( ii */ - -/* - * If compiled on a machine that doesn't have a 32-bit integer, - * you just set "uint32" to the appropriate datatype for an - * unsigned 32-bit integer. For example: - * - * cc -Duint32='unsigned long' md5.c - * - */ -#ifndef uint32 -# define uint32 unsigned int -#endif - -struct MD5Context { - int isInit; - uint32 buf[4]; - uint32 bits[2]; - unsigned char in[64]; -}; -typedef struct MD5Context MD5Context; - -/* - * Note: this code is harmless on little-endian machines. - */ -static void byteReverse (unsigned char *buf, unsigned longs){ - uint32 t; - do { - t = (uint32)((unsigned)buf[3]<<8 | buf[2]) << 16 | - ((unsigned)buf[1]<<8 | buf[0]); - *(uint32 *)buf = t; - buf += 4; - } while (--longs); -} -/* The four core functions - F1 is optimized somewhat */ - -/* #define F1(x, y, z) (x & y | ~x & z) */ -#define F1(x, y, z) (z ^ (x & (y ^ z))) -#define F2(x, y, z) F1(z, x, y) -#define F3(x, y, z) (x ^ y ^ z) -#define F4(x, y, z) (y ^ (x | ~z)) - -/* This is the central step in the MD5 algorithm. */ -#define MD5STEP(f, w, x, y, z, data, s) \ - ( w += f(x, y, z) + data, w = w<>(32-s), w += x ) - -/* - * The core of the MD5 algorithm, this alters an existing MD5 hash to - * reflect the addition of 16 longwords of new data. MD5Update blocks - * the data and converts bytes into longwords for this routine. - */ -static void MD5Transform(uint32 buf[4], const uint32 in[16]){ - register uint32 a, b, c, d; - - a = buf[0]; - b = buf[1]; - c = buf[2]; - d = buf[3]; - - MD5STEP(F1, a, b, c, d, in[ 0]+0xd76aa478, 7); - MD5STEP(F1, d, a, b, c, in[ 1]+0xe8c7b756, 12); - MD5STEP(F1, c, d, a, b, in[ 2]+0x242070db, 17); - MD5STEP(F1, b, c, d, a, in[ 3]+0xc1bdceee, 22); - MD5STEP(F1, a, b, c, d, in[ 4]+0xf57c0faf, 7); - MD5STEP(F1, d, a, b, c, in[ 5]+0x4787c62a, 12); - MD5STEP(F1, c, d, a, b, in[ 6]+0xa8304613, 17); - MD5STEP(F1, b, c, d, a, in[ 7]+0xfd469501, 22); - MD5STEP(F1, a, b, c, d, in[ 8]+0x698098d8, 7); - MD5STEP(F1, d, a, b, c, in[ 9]+0x8b44f7af, 12); - MD5STEP(F1, c, d, a, b, in[10]+0xffff5bb1, 17); - MD5STEP(F1, b, c, d, a, in[11]+0x895cd7be, 22); - MD5STEP(F1, a, b, c, d, in[12]+0x6b901122, 7); - MD5STEP(F1, d, a, b, c, in[13]+0xfd987193, 12); - MD5STEP(F1, c, d, a, b, in[14]+0xa679438e, 17); - MD5STEP(F1, b, c, d, a, in[15]+0x49b40821, 22); - - MD5STEP(F2, a, b, c, d, in[ 1]+0xf61e2562, 5); - MD5STEP(F2, d, a, b, c, in[ 6]+0xc040b340, 9); - MD5STEP(F2, c, d, a, b, in[11]+0x265e5a51, 14); - MD5STEP(F2, b, c, d, a, in[ 0]+0xe9b6c7aa, 20); - MD5STEP(F2, a, b, c, d, in[ 5]+0xd62f105d, 5); - MD5STEP(F2, d, a, b, c, in[10]+0x02441453, 9); - MD5STEP(F2, c, d, a, b, in[15]+0xd8a1e681, 14); - MD5STEP(F2, b, c, d, a, in[ 4]+0xe7d3fbc8, 20); - MD5STEP(F2, a, b, c, d, in[ 9]+0x21e1cde6, 5); - MD5STEP(F2, d, a, b, c, in[14]+0xc33707d6, 9); - MD5STEP(F2, c, d, a, b, in[ 3]+0xf4d50d87, 14); - MD5STEP(F2, b, c, d, a, in[ 8]+0x455a14ed, 20); - MD5STEP(F2, a, b, c, d, in[13]+0xa9e3e905, 5); - MD5STEP(F2, d, a, b, c, in[ 2]+0xfcefa3f8, 9); - MD5STEP(F2, c, d, a, b, in[ 7]+0x676f02d9, 14); - MD5STEP(F2, b, c, d, a, in[12]+0x8d2a4c8a, 20); - - MD5STEP(F3, a, b, c, d, in[ 5]+0xfffa3942, 4); - MD5STEP(F3, d, a, b, c, in[ 8]+0x8771f681, 11); - MD5STEP(F3, c, d, a, b, in[11]+0x6d9d6122, 16); - MD5STEP(F3, b, c, d, a, in[14]+0xfde5380c, 23); - MD5STEP(F3, a, b, c, d, in[ 1]+0xa4beea44, 4); - MD5STEP(F3, d, a, b, c, in[ 4]+0x4bdecfa9, 11); - MD5STEP(F3, c, d, a, b, in[ 7]+0xf6bb4b60, 16); - MD5STEP(F3, b, c, d, a, in[10]+0xbebfbc70, 23); - MD5STEP(F3, a, b, c, d, in[13]+0x289b7ec6, 4); - MD5STEP(F3, d, a, b, c, in[ 0]+0xeaa127fa, 11); - MD5STEP(F3, c, d, a, b, in[ 3]+0xd4ef3085, 16); - MD5STEP(F3, b, c, d, a, in[ 6]+0x04881d05, 23); - MD5STEP(F3, a, b, c, d, in[ 9]+0xd9d4d039, 4); - MD5STEP(F3, d, a, b, c, in[12]+0xe6db99e5, 11); - MD5STEP(F3, c, d, a, b, in[15]+0x1fa27cf8, 16); - MD5STEP(F3, b, c, d, a, in[ 2]+0xc4ac5665, 23); - - MD5STEP(F4, a, b, c, d, in[ 0]+0xf4292244, 6); - MD5STEP(F4, d, a, b, c, in[ 7]+0x432aff97, 10); - MD5STEP(F4, c, d, a, b, in[14]+0xab9423a7, 15); - MD5STEP(F4, b, c, d, a, in[ 5]+0xfc93a039, 21); - MD5STEP(F4, a, b, c, d, in[12]+0x655b59c3, 6); - MD5STEP(F4, d, a, b, c, in[ 3]+0x8f0ccc92, 10); - MD5STEP(F4, c, d, a, b, in[10]+0xffeff47d, 15); - MD5STEP(F4, b, c, d, a, in[ 1]+0x85845dd1, 21); - MD5STEP(F4, a, b, c, d, in[ 8]+0x6fa87e4f, 6); - MD5STEP(F4, d, a, b, c, in[15]+0xfe2ce6e0, 10); - MD5STEP(F4, c, d, a, b, in[ 6]+0xa3014314, 15); - MD5STEP(F4, b, c, d, a, in[13]+0x4e0811a1, 21); - MD5STEP(F4, a, b, c, d, in[ 4]+0xf7537e82, 6); - MD5STEP(F4, d, a, b, c, in[11]+0xbd3af235, 10); - MD5STEP(F4, c, d, a, b, in[ 2]+0x2ad7d2bb, 15); - MD5STEP(F4, b, c, d, a, in[ 9]+0xeb86d391, 21); - - buf[0] += a; - buf[1] += b; - buf[2] += c; - buf[3] += d; -} - -/* - * Start MD5 accumulation. Set bit count to 0 and buffer to mysterious - * initialization constants. - */ -static void MD5Init(MD5Context *ctx){ - ctx->isInit = 1; - ctx->buf[0] = 0x67452301; - ctx->buf[1] = 0xefcdab89; - ctx->buf[2] = 0x98badcfe; - ctx->buf[3] = 0x10325476; - ctx->bits[0] = 0; - ctx->bits[1] = 0; -} - -/* - * Update context to reflect the concatenation of another buffer full - * of bytes. - */ -static -void MD5Update(MD5Context *ctx, const unsigned char *buf, unsigned int len){ - uint32 t; - - /* Update bitcount */ - - t = ctx->bits[0]; - if ((ctx->bits[0] = t + ((uint32)len << 3)) < t) - ctx->bits[1]++; /* Carry from low to high */ - ctx->bits[1] += len >> 29; - - t = (t >> 3) & 0x3f; /* Bytes already in shsInfo->data */ - - /* Handle any leading odd-sized chunks */ - - if ( t ) { - unsigned char *p = (unsigned char *)ctx->in + t; - - t = 64-t; - if (len < t) { - if( len ) memcpy(p, buf, len); - return; - } - memcpy(p, buf, t); - byteReverse(ctx->in, 16); - MD5Transform(ctx->buf, (uint32 *)ctx->in); - buf += t; - len -= t; - } - - /* Process data in 64-byte chunks */ - - while (len >= 64) { - memcpy(ctx->in, buf, 64); - byteReverse(ctx->in, 16); - MD5Transform(ctx->buf, (uint32 *)ctx->in); - buf += 64; - len -= 64; - } - - /* Handle any remaining bytes of data. */ - - memcpy(ctx->in, buf, len); -} - -/* - * Final wrapup - pad to 64-byte boundary with the bit pattern - * 1 0* (64-bit count of bits processed, MSB-first) - */ -static void MD5Final(unsigned char digest[16], MD5Context *ctx){ - unsigned count; - unsigned char *p; - - /* Compute number of bytes mod 64 */ - count = (ctx->bits[0] >> 3) & 0x3F; - - /* Set the first char of padding to 0x80. This is safe since there is - always at least one byte free */ - p = ctx->in + count; - *p++ = 0x80; - - /* Bytes of padding needed to make 64 bytes */ - count = 64 - 1 - count; - - /* Pad out to 56 mod 64 */ - if (count < 8) { - /* Two lots of padding: Pad the first block to 64 bytes */ - memset(p, 0, count); - byteReverse(ctx->in, 16); - MD5Transform(ctx->buf, (uint32 *)ctx->in); - - /* Now fill the next block with 56 bytes */ - memset(ctx->in, 0, 56); - } else { - /* Pad block to 56 bytes */ - memset(p, 0, count-8); - } - byteReverse(ctx->in, 14); - - /* Append length in bits and transform */ - memcpy(ctx->in + 14*4, ctx->bits, 8); - - MD5Transform(ctx->buf, (uint32 *)ctx->in); - byteReverse((unsigned char *)ctx->buf, 4); - memcpy(digest, ctx->buf, 16); -} - -/*************************************************************************/ -/*************************************************************************/ -/*************************************************************************/ - - -/* -** Both arguments are assumed to point to SQLITE_HCT_JOURNAL_HASHSIZE -** byte buffers. This function updates the hash stored in buffer pHash -** based on the contents of buffer pData. -*/ -SQLITE_API void sqlite3_hct_journal_hash(void *pHash, const void *pData){ - MD5Context ctx; - MD5Init(&ctx); - MD5Update(&ctx, pHash, SQLITE_HCT_JOURNAL_HASHSIZE); - MD5Update(&ctx, pData, SQLITE_HCT_JOURNAL_HASHSIZE); - MD5Final(pHash, &ctx); -} - -static void md5U64(MD5Context *pCtx, sqlite3_uint64 iVal){ - u8 aVal[8]; - aVal[0] = (iVal >> 56) & 0xFF; - aVal[1] = (iVal >> 48) & 0xFF; - aVal[2] = (iVal >> 40) & 0xFF; - aVal[3] = (iVal >> 32) & 0xFF; - aVal[4] = (iVal >> 24) & 0xFF; - aVal[5] = (iVal >> 16) & 0xFF; - aVal[6] = (iVal >> 8) & 0xFF; - aVal[7] = (iVal >> 0) & 0xFF; - MD5Update(pCtx, aVal, sizeof(aVal)); -} - -/* -** It is assumed that buffer pHash points to a buffer -** SQLITE_HCT_JOURNAL_HASHSIZE bytes in size. This function populates this -** buffer with a hash based on the remaining arguments. -*/ -SQLITE_API void sqlite3_hct_journal_hashentry( - void *pHash, /* OUT: Hash of other arguments */ - sqlite3_int64 iCid, - const char *zSchema, - const void *pData, int nData, - sqlite3_int64 iSchemaCid -){ - MD5Context ctx; - MD5Init(&ctx); - - md5U64(&ctx, (sqlite3_uint64)iCid); - MD5Update(&ctx, (const u8*)zSchema, sqlite3Strlen30(zSchema)); - MD5Update(&ctx, pData, nData); - md5U64(&ctx, (sqlite3_uint64)iSchemaCid); - - MD5Final(pHash, &ctx); -} - - -/************** End of hct_journalhash.c *************************************/ /* Return the source-id for this library */ SQLITE_API const char *sqlite3_sourceid(void){ return SQLITE_SOURCE_ID; } -#endif /* SQLITE_AMALGAMATION */ /************************** End of sqlite3.c ******************************/ diff --git a/libstuff/sqlite3.h b/libstuff/sqlite3.h index 12fe5fda2..9827d4007 100644 --- a/libstuff/sqlite3.h +++ b/libstuff/sqlite3.h @@ -146,9 +146,9 @@ extern "C" { ** [sqlite3_libversion_number()], [sqlite3_sourceid()], ** [sqlite_version()] and [sqlite_source_id()]. */ -#define SQLITE_VERSION "3.48.0" -#define SQLITE_VERSION_NUMBER 3048000 -#define SQLITE_SOURCE_ID "2024-11-15 19:25:39 ed829bf2b069a48c644ae5706399dad7486e5abb87dc1225764038ac258ea4dc" +#define SQLITE_VERSION "3.47.0" +#define SQLITE_VERSION_NUMBER 3047000 +#define SQLITE_SOURCE_ID "2024-12-20 19:37:41 b40cd7395c44b1f2d019d8e809e03de0e083c93693322a72ddb250a85640528f" /* ** CAPI3REF: Run-Time Library Version Numbers @@ -652,13 +652,6 @@ SQLITE_API int sqlite3_exec( ** filesystem supports doing multiple write operations atomically when those ** write operations are bracketed by [SQLITE_FCNTL_BEGIN_ATOMIC_WRITE] and ** [SQLITE_FCNTL_COMMIT_ATOMIC_WRITE]. -** -** The SQLITE_IOCAP_SUBPAGE_READ property means that it is ok to read -** from the database file in amounts that are not a multiple of the -** page size and that do not begin at a page boundary. Without this -** property, SQLite is careful to only do full-page reads and write -** on aligned pages, with the one exception that it will do a sub-page -** read of the first page to access the database header. */ #define SQLITE_IOCAP_ATOMIC 0x00000001 #define SQLITE_IOCAP_ATOMIC512 0x00000002 @@ -675,7 +668,6 @@ SQLITE_API int sqlite3_exec( #define SQLITE_IOCAP_POWERSAFE_OVERWRITE 0x00001000 #define SQLITE_IOCAP_IMMUTABLE 0x00002000 #define SQLITE_IOCAP_BATCH_ATOMIC 0x00004000 -#define SQLITE_IOCAP_SUBPAGE_READ 0x00008000 /* ** CAPI3REF: File Locking Levels @@ -822,7 +814,6 @@ struct sqlite3_file { **
  • [SQLITE_IOCAP_POWERSAFE_OVERWRITE] **
  • [SQLITE_IOCAP_IMMUTABLE] **
  • [SQLITE_IOCAP_BATCH_ATOMIC] -**
  • [SQLITE_IOCAP_SUBPAGE_READ] ** ** ** The SQLITE_IOCAP_ATOMIC property means that all writes of @@ -1100,11 +1091,6 @@ struct sqlite3_io_methods { ** pointed to by the pArg argument. This capability is used during testing ** and only needs to be supported when SQLITE_TEST is defined. ** -**
  • [[SQLITE_FCNTL_NULL_IO]] -** The [SQLITE_FCNTL_NULL_IO] opcode sets the low-level file descriptor -** or file handle for the [sqlite3_file] object such that it will no longer -** read or write to the database file. -** **
  • [[SQLITE_FCNTL_WAL_BLOCK]] ** The [SQLITE_FCNTL_WAL_BLOCK] is a signal to the VFS layer that it might ** be advantageous to block on the next WAL lock if the lock is not immediately @@ -1258,7 +1244,6 @@ struct sqlite3_io_methods { #define SQLITE_FCNTL_EXTERNAL_READER 40 #define SQLITE_FCNTL_CKSM_FILE 41 #define SQLITE_FCNTL_RESET_CACHE 42 -#define SQLITE_FCNTL_NULL_IO 43 /* deprecated names */ #define SQLITE_GET_LOCKPROXYFILE SQLITE_FCNTL_GET_LOCKPROXYFILE @@ -2637,14 +2622,10 @@ SQLITE_API void sqlite3_set_last_insert_rowid(sqlite3*,sqlite3_int64); ** deleted by the most recently completed INSERT, UPDATE or DELETE ** statement on the database connection specified by the only parameter. ** The two functions are identical except for the type of the return value -** and that if the number of rows modified by the most recent INSERT, UPDATE, +** and that if the number of rows modified by the most recent INSERT, UPDATE ** or DELETE is greater than the maximum value supported by type "int", then ** the return value of sqlite3_changes() is undefined. ^Executing any other ** type of SQL statement does not modify the value returned by these functions. -** For the purposes of this interface, a CREATE TABLE AS SELECT statement -** does not count as an INSERT, UPDATE or DELETE statement and hence the rows -** added to the new table by the CREATE TABLE AS SELECT statement are not -** counted. ** ** ^Only changes made directly by the INSERT, UPDATE or DELETE statement are ** considered - auxiliary changes caused by [CREATE TRIGGER | triggers], @@ -4241,17 +4222,13 @@ SQLITE_API int sqlite3_limit(sqlite3*, int id, int newVal); ** and sqlite3_prepare16_v3() use UTF-16. ** ** ^If the nByte argument is negative, then zSql is read up to the -** first zero terminator. ^If nByte is positive, then it is the maximum -** number of bytes read from zSql. When nByte is positive, zSql is read -** up to the first zero terminator or until the nByte bytes have been read, -** whichever comes first. ^If nByte is zero, then no prepared +** first zero terminator. ^If nByte is positive, then it is the +** number of bytes read from zSql. ^If nByte is zero, then no prepared ** statement is generated. ** If the caller knows that the supplied string is nul-terminated, then ** there is a small performance advantage to passing an nByte parameter that ** is the number of bytes in the input string including ** the nul-terminator. -** Note that nByte measure the length of the input in bytes, not -** characters, even for the UTF-16 interfaces. ** ** ^If pzTail is not NULL then *pzTail is made to point to the first byte ** past the end of the first SQL statement in zSql. These routines only @@ -5622,7 +5599,7 @@ SQLITE_API int sqlite3_create_window_function( ** This flag instructs SQLite to omit some corner-case optimizations that ** might disrupt the operation of the [sqlite3_value_subtype()] function, ** causing it to return zero rather than the correct subtype(). -** All SQL functions that invoke [sqlite3_value_subtype()] should have this +** SQL functions that invokes [sqlite3_value_subtype()] should have this ** property. If the SQLITE_SUBTYPE property is omitted, then the return ** value from [sqlite3_value_subtype()] might sometimes be zero even though ** a non-zero subtype was specified by the function argument expression. @@ -8387,9 +8364,8 @@ SQLITE_API int sqlite3_test_control(int op, ...); #define SQLITE_TESTCTRL_TRACEFLAGS 31 #define SQLITE_TESTCTRL_TUNE 32 #define SQLITE_TESTCTRL_LOGEST 33 -#define SQLITE_TESTCTRL_USELONGDOUBLE 34 /* NOT USED */ -#define SQLITE_TESTCTRL_HCT_MTCOMMIT 35 -#define SQLITE_TESTCTRL_LAST 35 /* Largest TESTCTRL */ +#define SQLITE_TESTCTRL_USELONGDOUBLE 34 +#define SQLITE_TESTCTRL_LAST 34 /* Largest TESTCTRL */ /* ** CAPI3REF: SQL Keyword Checking @@ -9364,16 +9340,6 @@ typedef struct sqlite3_backup sqlite3_backup; ** APIs are not strictly speaking threadsafe. If they are invoked at the ** same time as another thread is invoking sqlite3_backup_step() it is ** possible that they return invalid values. -** -** Alternatives To Using The Backup API -** -** Other techniques for safely creating a consistent backup of an SQLite -** database include: -** -**
      -**
    • The [VACUUM INTO] command. -**
    • The [sqlite3_rsync] utility program. -**
    */ SQLITE_API sqlite3_backup *sqlite3_backup_init( sqlite3 *pDest, /* Destination database handle */ @@ -10573,14 +10539,6 @@ typedef struct sqlite3_snapshot { ** If there is not already a read-transaction open on schema S when ** this function is called, one is opened automatically. ** -** If a read-transaction is opened by this function, then it is guaranteed -** that the returned snapshot object may not be invalidated by a database -** writer or checkpointer until after the read-transaction is closed. This -** is not guaranteed if a read-transaction is already open when this -** function is called. In that case, any subsequent write or checkpoint -** operation on the database may invalidate the returned snapshot handle, -** even while the read-transaction remains open. -** ** The following must be true for this function to succeed. If any of ** the following statements are false when sqlite3_snapshot_get() is ** called, SQLITE_ERROR is returned. The final value of *P is undefined @@ -11003,9 +10961,6 @@ SQLITE_API int sqlite3_commit_status( # undef double #endif -SQLITE_API void sqlite3_hct_cas_failure(int nCASFailCnt, int nCASFailReset); -SQLITE_API void sqlite3_hct_proc_failure(int nProcFailCnt); - #if defined(__wasi__) # undef SQLITE_WASI # define SQLITE_WASI 1 @@ -11020,7 +10975,7 @@ SQLITE_API void sqlite3_hct_proc_failure(int nProcFailCnt); #ifdef __cplusplus } /* End of the 'extern "C"' block */ #endif -/* #endif for SQLITE3_H will be added by mksqlite3.tcl */ +#endif /* SQLITE3_H */ /******** Begin file sqlite3rtree.h *********/ /* @@ -13399,6 +13354,7 @@ struct Fts5ExtensionApi { ** Applications may also register custom tokenizer types. A tokenizer ** is registered by providing fts5 with a populated instance of the ** following structure. All structure methods must be defined, setting +** ** any member of the fts5_tokenizer struct to NULL leads to undefined ** behaviour. The structure methods are expected to function as follows: ** @@ -13742,4 +13698,3 @@ struct fts5_api { #endif /* _FTS5_H */ /******** End of fts5.h *********/ -#endif /* SQLITE3_H */ diff --git a/test/lib/BedrockTester.cpp b/test/lib/BedrockTester.cpp index 424c2e0e0..f0cfd5c83 100644 --- a/test/lib/BedrockTester.cpp +++ b/test/lib/BedrockTester.cpp @@ -17,7 +17,7 @@ PortMap BedrockTester::ports; mutex BedrockTester::_testersMutex; set BedrockTester::_testers; -const bool BedrockTester::ENABLE_HCTREE{true}; +const bool BedrockTester::ENABLE_HCTREE{false}; string BedrockTester::getTempFileName(string prefix) { string templateStr = "/tmp/" + prefix + "bedrocktest_XXXXXX.db"; From 7d26f017cefe9fca7710d688110e46940da59ef5 Mon Sep 17 00:00:00 2001 From: Tyler Karaszewski Date: Wed, 8 Jan 2025 12:17:04 -0800 Subject: [PATCH 120/127] Fix fork --- sqlitecluster/SQLiteNode.cpp | 91 +++++++++++++++++++----------------- 1 file changed, 49 insertions(+), 42 deletions(-) diff --git a/sqlitecluster/SQLiteNode.cpp b/sqlitecluster/SQLiteNode.cpp index a87e25f96..c17d55cb8 100644 --- a/sqlitecluster/SQLiteNode.cpp +++ b/sqlitecluster/SQLiteNode.cpp @@ -166,27 +166,27 @@ SQLiteNode::~SQLiteNode() { } void SQLiteNode::_replicate(SQLitePeer* peer, SData command, size_t sqlitePoolIndex, uint64_t threadAttemptStartTimestamp) { - // Notify the sync thread that this thread has begun. + bool goSearchingOnExit = false; { - unique_lock lock(_replicateStartMutex); - _replicateThreadStarted = true; - } - _replicateStartCV.notify_all(); + // Make sure when this thread exits we decrement our thread counter. + ScopedDecrement decrementer(_replicationThreadCount); - // Initialize each new thread with a new number. - SInitialize("replicate" + to_string(currentReplicateThreadID.fetch_add(1))); + // Notify the sync thread that this thread has begun. + { + unique_lock lock(_replicateStartMutex); + _replicateThreadStarted = true; + } + _replicateStartCV.notify_all(); - // Actual thread startup time. - uint64_t threadStartTime = STimeNow(); + // Initialize each new thread with a new number. + SInitialize("replicate" + to_string(currentReplicateThreadID.fetch_add(1))); - // Allow the DB handle to be returned regardless of how this function exits. - SQLiteScopedHandle dbScope(*_dbPool, sqlitePoolIndex); - SQLite& db = dbScope.db(); + // Actual thread startup time. + uint64_t threadStartTime = STimeNow(); - bool goSearchingOnExit = false; - { - // Make sure when this thread exits we decrement our thread counter. - ScopedDecrement decrementer(_replicationThreadCount); + // Allow the DB handle to be returned regardless of how this function exits. + SQLiteScopedHandle dbScope(*_dbPool, sqlitePoolIndex); + SQLite& db = dbScope.db(); SDEBUG("Replicate thread started: " << command.methodLine); if (SIEquals(command.methodLine, "BEGIN_TRANSACTION")) { @@ -293,9 +293,6 @@ void SQLiteNode::_replicate(SQLitePeer* peer, SData command, size_t sqlitePoolIn _handleRollbackTransaction(db, peer, command); --_concurrentReplicateTransactions; goSearchingOnExit = true; - } else if (SIEquals(command.methodLine, "COMMIT_TRANSACTION")) { - SINFO("[performance] Notifying threads that leader has committed transaction " << command.calcU64("CommitCount")); - _leaderCommitNotifier.notifyThrough(command.calcU64("CommitCount")); } } if (goSearchingOnExit) { @@ -1656,33 +1653,43 @@ void SQLiteNode::_onMESSAGE(SQLitePeer* peer, const SData& message) { if (_replicationThreadsShouldExit) { SINFO("Discarding replication message, stopping FOLLOWING"); } else { - auto threadID = _replicationThreadCount.fetch_add(1); - SDEBUG("Spawning concurrent replicate thread (blocks until DB handle available): " << threadID); - try { - uint64_t threadAttemptStartTimestamp = STimeNow(); - _replicateThreadStarted = false; - thread(&SQLiteNode::_replicate, this, peer, message, _dbPool->getIndex(false), threadAttemptStartTimestamp).detach(); - { - unique_lock lock(_replicateStartMutex); - while (!_replicateThreadStarted) { - _replicateStartCV.wait(lock); - if (!_replicateThreadStarted) { - SINFO("condition variable finished waiting but replicate thread not started."); + if (SIEquals(message.methodLine, "COMMIT_TRANSACTION")) { + // For COMMIT_TRANSACTION messages, we do not start a new thread. This aoids a race condition where we could spin up the + // COMMIT thread, but not yet have called `_leaderCommitNotifier.notifyThrough` for the current transaction number while + // the sync thread changes states. Particularly, if the sync thread drops out of FOLLOWING before this happens, + // It may mean that we drop commits that leader had sent us, because we haven't recorded that we received them. + // When leader is standing down this can ultimately lead to a fork. + SINFO("[performance] Notifying threads that leader has committed transaction " << message.calcU64("CommitCount")); + _leaderCommitNotifier.notifyThrough(message.calcU64("CommitCount")); + } else { + try { + auto threadID = _replicationThreadCount.fetch_add(1); + SDEBUG("Spawning concurrent replicate thread (blocks until DB handle available): " << threadID); + uint64_t threadAttemptStartTimestamp = STimeNow(); + _replicateThreadStarted = false; + thread(&SQLiteNode::_replicate, this, peer, message, _dbPool->getIndex(false), threadAttemptStartTimestamp).detach(); + { + unique_lock lock(_replicateStartMutex); + while (!_replicateThreadStarted) { + _replicateStartCV.wait(lock); + if (!_replicateThreadStarted) { + SINFO("condition variable finished waiting but replicate thread not started."); + } } } + SDEBUG("Done spawning concurrent replicate thread: " << threadID); + } catch (const system_error& e) { + // If the server is strugling and falling behind on replication, we might have too many threads + // causing a resource exhaustion. If that happens, all the transactions that are already threaded + // and waiting for the transaction that failed will be stuck in an infinite loop. To prevent that + // we're changing the state to SEARCHING and sending the cancelAfter property to drop all threads + // that depend on the transaction that failed to be threaded. + _replicationThreadCount.fetch_sub(1); + SWARN("Caught system_error starting _replicate thread with " << _replicationThreadCount.load() << " threads. e.what()=" << e.what()); + _changeState(SQLiteNodeState::SEARCHING, message.calcU64("NewCount") - 1); + STHROW("Error starting replicate thread so giving up and reconnecting."); } - } catch (const system_error& e) { - // If the server is strugling and falling behind on replication, we might have too many threads - // causing a resource exhaustion. If that happens, all the transactions that are already threaded - // and waiting for the transaction that failed will be stuck in an infinite loop. To prevent that - // we're changing the state to SEARCHING and sending the cancelAfter property to drop all threads - // that depend on the transaction that failed to be threaded. - _replicationThreadCount.fetch_sub(1); - SWARN("Caught system_error starting _replicate thread with " << _replicationThreadCount.load() << " threads. e.what()=" << e.what()); - _changeState(SQLiteNodeState::SEARCHING, message.calcU64("NewCount") - 1); - STHROW("Error starting replicate thread so giving up and reconnecting."); } - SDEBUG("Done spawning concurrent replicate thread: " << threadID); } } else if (SIEquals(message.methodLine, "APPROVE_TRANSACTION") || SIEquals(message.methodLine, "DENY_TRANSACTION")) { // APPROVE_TRANSACTION: Sent to the leader by a follower when it confirms it was able to begin a transaction and From 014be2cb8d24a5fcd2f5efe1a5b80b7ec921960b Mon Sep 17 00:00:00 2001 From: Tyler Karaszewski Date: Wed, 8 Jan 2025 12:23:03 -0800 Subject: [PATCH 121/127] Don't need this part of the change --- sqlitecluster/SQLiteNode.cpp | 32 ++++++++++++++++---------------- 1 file changed, 16 insertions(+), 16 deletions(-) diff --git a/sqlitecluster/SQLiteNode.cpp b/sqlitecluster/SQLiteNode.cpp index c17d55cb8..128b1fcc1 100644 --- a/sqlitecluster/SQLiteNode.cpp +++ b/sqlitecluster/SQLiteNode.cpp @@ -166,27 +166,27 @@ SQLiteNode::~SQLiteNode() { } void SQLiteNode::_replicate(SQLitePeer* peer, SData command, size_t sqlitePoolIndex, uint64_t threadAttemptStartTimestamp) { - bool goSearchingOnExit = false; + // Notify the sync thread that this thread has begun. { - // Make sure when this thread exits we decrement our thread counter. - ScopedDecrement decrementer(_replicationThreadCount); + unique_lock lock(_replicateStartMutex); + _replicateThreadStarted = true; + } + _replicateStartCV.notify_all(); - // Notify the sync thread that this thread has begun. - { - unique_lock lock(_replicateStartMutex); - _replicateThreadStarted = true; - } - _replicateStartCV.notify_all(); + // Initialize each new thread with a new number. + SInitialize("replicate" + to_string(currentReplicateThreadID.fetch_add(1))); - // Initialize each new thread with a new number. - SInitialize("replicate" + to_string(currentReplicateThreadID.fetch_add(1))); + // Actual thread startup time. + uint64_t threadStartTime = STimeNow(); - // Actual thread startup time. - uint64_t threadStartTime = STimeNow(); + // Allow the DB handle to be returned regardless of how this function exits. + SQLiteScopedHandle dbScope(*_dbPool, sqlitePoolIndex); + SQLite& db = dbScope.db(); - // Allow the DB handle to be returned regardless of how this function exits. - SQLiteScopedHandle dbScope(*_dbPool, sqlitePoolIndex); - SQLite& db = dbScope.db(); + bool goSearchingOnExit = false; + { + // Make sure when this thread exits we decrement our thread counter. + ScopedDecrement decrementer(_replicationThreadCount); SDEBUG("Replicate thread started: " << command.methodLine); if (SIEquals(command.methodLine, "BEGIN_TRANSACTION")) { From bc1aa0a57981ed2eb008ab3cd60ffc4fcbf33b2a Mon Sep 17 00:00:00 2001 From: Tyler Karaszewski Date: Wed, 8 Jan 2025 14:26:05 -0800 Subject: [PATCH 122/127] Update sqlitecluster/SQLiteNode.cpp Co-authored-by: Daniel Silva --- sqlitecluster/SQLiteNode.cpp | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/sqlitecluster/SQLiteNode.cpp b/sqlitecluster/SQLiteNode.cpp index 128b1fcc1..044fdf414 100644 --- a/sqlitecluster/SQLiteNode.cpp +++ b/sqlitecluster/SQLiteNode.cpp @@ -1654,7 +1654,7 @@ void SQLiteNode::_onMESSAGE(SQLitePeer* peer, const SData& message) { SINFO("Discarding replication message, stopping FOLLOWING"); } else { if (SIEquals(message.methodLine, "COMMIT_TRANSACTION")) { - // For COMMIT_TRANSACTION messages, we do not start a new thread. This aoids a race condition where we could spin up the + // For COMMIT_TRANSACTION messages, we do not start a new thread. This avoids a race condition where we could spin up the // COMMIT thread, but not yet have called `_leaderCommitNotifier.notifyThrough` for the current transaction number while // the sync thread changes states. Particularly, if the sync thread drops out of FOLLOWING before this happens, // It may mean that we drop commits that leader had sent us, because we haven't recorded that we received them. From feeb5bab0ce942536e1f09335c9a597527fed323 Mon Sep 17 00:00:00 2001 From: Tyler Karaszewski Date: Wed, 8 Jan 2025 14:29:41 -0800 Subject: [PATCH 123/127] Updated comment --- sqlitecluster/SQLiteNode.cpp | 6 +++--- 1 file changed, 3 insertions(+), 3 deletions(-) diff --git a/sqlitecluster/SQLiteNode.cpp b/sqlitecluster/SQLiteNode.cpp index 044fdf414..047a0fd78 100644 --- a/sqlitecluster/SQLiteNode.cpp +++ b/sqlitecluster/SQLiteNode.cpp @@ -1656,9 +1656,9 @@ void SQLiteNode::_onMESSAGE(SQLitePeer* peer, const SData& message) { if (SIEquals(message.methodLine, "COMMIT_TRANSACTION")) { // For COMMIT_TRANSACTION messages, we do not start a new thread. This avoids a race condition where we could spin up the // COMMIT thread, but not yet have called `_leaderCommitNotifier.notifyThrough` for the current transaction number while - // the sync thread changes states. Particularly, if the sync thread drops out of FOLLOWING before this happens, - // It may mean that we drop commits that leader had sent us, because we haven't recorded that we received them. - // When leader is standing down this can ultimately lead to a fork. + // the sync thread changes states. Particularly, if the sync thread dropped out of FOLLOWING before this happened, + // We could have dropped commits that leader had sent us, because we hadn't recorded that we received them. + // When leader is standing down this could have ultimately led to a fork because no other node saved those commits. SINFO("[performance] Notifying threads that leader has committed transaction " << message.calcU64("CommitCount")); _leaderCommitNotifier.notifyThrough(message.calcU64("CommitCount")); } else { From 0860abd510f793ceee819ab795f9e2fb94076678 Mon Sep 17 00:00:00 2001 From: Jasper Huang Date: Wed, 8 Jan 2025 21:54:42 -0800 Subject: [PATCH 124/127] Add recipientEmail to the whitelist --- libstuff/SLog.cpp | 1 + 1 file changed, 1 insertion(+) diff --git a/libstuff/SLog.cpp b/libstuff/SLog.cpp index 2e4f662a5..d92e5b47b 100644 --- a/libstuff/SLog.cpp +++ b/libstuff/SLog.cpp @@ -62,6 +62,7 @@ static set PARAMS_WHITELIST = { "employees", "mergeFromEmail", "mergeToEmail", + "recipientEmail", }; string addLogParams(string&& message, const STable& params) { From 0b0c09b87aeee9914c75e5f3881e87684159b9af Mon Sep 17 00:00:00 2001 From: Jasper Huang Date: Thu, 9 Jan 2025 11:16:32 -0800 Subject: [PATCH 125/127] Revert "Add recipientEmail to the whitelist" --- libstuff/SLog.cpp | 1 - 1 file changed, 1 deletion(-) diff --git a/libstuff/SLog.cpp b/libstuff/SLog.cpp index d92e5b47b..2e4f662a5 100644 --- a/libstuff/SLog.cpp +++ b/libstuff/SLog.cpp @@ -62,7 +62,6 @@ static set PARAMS_WHITELIST = { "employees", "mergeFromEmail", "mergeToEmail", - "recipientEmail", }; string addLogParams(string&& message, const STable& params) { From 16768f41133352b40030547a96f932e8bba55cbc Mon Sep 17 00:00:00 2001 From: Andrew Rosiclair Date: Thu, 9 Jan 2025 16:07:18 -0500 Subject: [PATCH 126/127] Revert "Add log params" --- libstuff/SLog.cpp | 2 -- 1 file changed, 2 deletions(-) diff --git a/libstuff/SLog.cpp b/libstuff/SLog.cpp index 2e4f662a5..ff94e5a9b 100644 --- a/libstuff/SLog.cpp +++ b/libstuff/SLog.cpp @@ -60,8 +60,6 @@ static set PARAMS_WHITELIST = { "approver", "approvers", "employees", - "mergeFromEmail", - "mergeToEmail", }; string addLogParams(string&& message, const STable& params) { From 7232f77babf6247209b3c6b9d0b437fb5ff1af20 Mon Sep 17 00:00:00 2001 From: Tyler Karaszewski Date: Thu, 9 Jan 2025 16:13:11 -0800 Subject: [PATCH 127/127] Fix timeouts not worknig when not leading or follwoing --- BedrockCore.cpp | 20 +++++++++++--------- BedrockCore.h | 6 +++--- BedrockServer.cpp | 10 +++++++++- sqlitecluster/SQLiteCore.h | 5 +++++ test/lib/BedrockTester.cpp | 9 ++++++--- test/lib/BedrockTester.h | 5 +++-- 6 files changed, 37 insertions(+), 18 deletions(-) diff --git a/BedrockCore.cpp b/BedrockCore.cpp index 5103612b8..602e05a95 100644 --- a/BedrockCore.cpp +++ b/BedrockCore.cpp @@ -52,12 +52,12 @@ uint64_t BedrockCore::_getRemainingTime(const unique_ptr& comman return isProcessing ? min(processTimeout, adjustedTimeout) : adjustedTimeout; } -bool BedrockCore::isTimedOut(unique_ptr& command) { +bool BedrockCore::isTimedOut(unique_ptr& command, SQLite* db, const BedrockServer* server) { try { _getRemainingTime(command, false); } catch (const SException& e) { // Yep, timed out. - _handleCommandException(command, e); + _handleCommandException(command, e, db, server); command->complete = true; return true; } @@ -104,7 +104,7 @@ void BedrockCore::prePeekCommand(unique_ptr& command, bool isBlo STHROW("555 Timeout prePeeking command"); } } catch (const SException& e) { - _handleCommandException(command, e); + _handleCommandException(command, e, &_db, &_server); command->complete = true; } catch (...) { SALERT("Unhandled exception typename: " << SGetCurrentExceptionName() << ", command: " << request.methodLine); @@ -186,7 +186,7 @@ BedrockCore::RESULT BedrockCore::peekCommand(unique_ptr& command } } catch (const SException& e) { command->repeek = false; - _handleCommandException(command, e); + _handleCommandException(command, e, &_db, &_server); } catch (const SHTTPSManager::NotLeading& e) { command->repeek = false; returnValue = RESULT::SHOULD_PROCESS; @@ -284,7 +284,7 @@ BedrockCore::RESULT BedrockCore::processCommand(unique_ptr& comm } } } catch (const SException& e) { - _handleCommandException(command, e); + _handleCommandException(command, e, &_db, &_server); _db.rollback(); needsCommit = false; } catch (const SQLite::constraint_error& e) { @@ -353,7 +353,7 @@ void BedrockCore::postProcessCommand(unique_ptr& command, bool i STHROW("555 Timeout postProcessing command"); } } catch (const SException& e) { - _handleCommandException(command, e); + _handleCommandException(command, e, &_db, &_server); } catch (...) { SALERT("Unhandled exception typename: " << SGetCurrentExceptionName() << ", command: " << request.methodLine); command->response.methodLine = "500 Unhandled Exception"; @@ -367,7 +367,7 @@ void BedrockCore::postProcessCommand(unique_ptr& command, bool i _db.setQueryOnly(false); } -void BedrockCore::_handleCommandException(unique_ptr& command, const SException& e) { +void BedrockCore::_handleCommandException(unique_ptr& command, const SException& e, SQLite* db, const BedrockServer* server) { string msg = "Error processing command '" + command->request.methodLine + "' (" + e.what() + "), ignoring."; if (!e.body.empty()) { msg = msg + " Request body: " + e.body; @@ -396,9 +396,11 @@ void BedrockCore::_handleCommandException(unique_ptr& command, c } // Add the commitCount header to the response. - command->response["commitCount"] = to_string(_db.getCommitCount()); + if (db) { + command->response["commitCount"] = to_string(db->getCommitCount()); + } - if (_server.args.isSet("-extraExceptionLogging")) { + if (server && server->args.isSet("-extraExceptionLogging")) { auto stack = e.details(); command->response["exceptionSource"] = stack.back(); } diff --git a/BedrockCore.h b/BedrockCore.h index d072b8397..e1eeaf869 100644 --- a/BedrockCore.h +++ b/BedrockCore.h @@ -34,7 +34,7 @@ class BedrockCore : public SQLiteCore { // Checks if a command has already timed out. Like `peekCommand` without doing any work. Returns `true` and sets // the same command state as `peekCommand` would if the command has timed out. Returns `false` and does nothing if // the command hasn't timed out. - bool isTimedOut(unique_ptr& command); + static bool isTimedOut(unique_ptr& command, SQLite* db = nullptr, const BedrockServer* server = nullptr); void prePeekCommand(unique_ptr& command, bool isBlockingCommitThread); @@ -71,8 +71,8 @@ class BedrockCore : public SQLiteCore { // Gets the amount of time remaining until this command times out. This is the difference between the command's // 'timeout' value (or the default timeout, if not set) and the time the command was initially scheduled to run. If // this time is already expired, this throws `555 Timeout` - uint64_t _getRemainingTime(const unique_ptr& command, bool isProcessing); + static uint64_t _getRemainingTime(const unique_ptr& command, bool isProcessing); - void _handleCommandException(unique_ptr& command, const SException& e); + static void _handleCommandException(unique_ptr& command, const SException& e, SQLite* db = nullptr, const BedrockServer* server = nullptr); const BedrockServer& _server; }; diff --git a/BedrockServer.cpp b/BedrockServer.cpp index c0cad65f3..9183f0d19 100644 --- a/BedrockServer.cpp +++ b/BedrockServer.cpp @@ -801,6 +801,14 @@ void BedrockServer::runCommand(unique_ptr&& _command, bool isBlo // We just spin until the node looks ready to go. Typically, this doesn't happen expect briefly at startup. size_t waitCount = 0; while (_upgradeInProgress || (getState() != SQLiteNodeState::LEADING && getState() != SQLiteNodeState::FOLLOWING)) { + + // It's feasible that our command times out in this loop. In this case, we do not have a DB object to pass. + // The only implication of this is the response does not get the commitCount attached to it. + if (BedrockCore::isTimedOut(command, nullptr, this)) { + _reply(command); + return; + } + // This sleep call is pretty ugly, but it should almost never happen. We're accepting the potential // looping sleep call for the general case where we just check some bools and continue, instead of // avoiding the sleep call but having every thread lock a mutex here on every loop. @@ -880,7 +888,7 @@ void BedrockServer::runCommand(unique_ptr&& _command, bool isBlo // to be returned to the main queue, where they would have timed out in `peek`, but it was never called // because the commands already had a HTTPS request attached, and then they were immediately re-sent to the // sync queue, because of the QUORUM consistency requirement, resulting in an endless loop. - if (core.isTimedOut(command)) { + if (core.isTimedOut(command, &db, this)) { _reply(command); return; } diff --git a/sqlitecluster/SQLiteCore.h b/sqlitecluster/SQLiteCore.h index ebefb20ae..1342a2f77 100644 --- a/sqlitecluster/SQLiteCore.h +++ b/sqlitecluster/SQLiteCore.h @@ -2,6 +2,11 @@ class SQLite; class SQLiteNode; +#include +#include + +using namespace std; + class SQLiteCore { public: // Constructor that stores the database object we'll be working on. diff --git a/test/lib/BedrockTester.cpp b/test/lib/BedrockTester.cpp index f0cfd5c83..bd0ae951c 100644 --- a/test/lib/BedrockTester.cpp +++ b/test/lib/BedrockTester.cpp @@ -546,10 +546,10 @@ void BedrockTester::freeDB() { _db = nullptr; } -string BedrockTester::readDB(const string& query, bool online) +string BedrockTester::readDB(const string& query, bool online, int64_t timeoutMS) { SQResult result; - bool success = readDB(query, result, online); + bool success = readDB(query, result, online, timeoutMS); if (!success) { return ""; } @@ -565,7 +565,7 @@ string BedrockTester::readDB(const string& query, bool online) return result.rows[0][0]; } -bool BedrockTester::readDB(const string& query, SQResult& result, bool online) +bool BedrockTester::readDB(const string& query, SQResult& result, bool online, int64_t timeoutMS) { if (ENABLE_HCTREE && online) { string fixedQuery = query; @@ -576,6 +576,9 @@ bool BedrockTester::readDB(const string& query, SQResult& result, bool online) SData command("Query"); command["Query"] = fixedQuery; command["Format"] = "JSON"; + if (timeoutMS) { + command["timeout"] = to_string(timeoutMS); + } auto commandResult = executeWaitMultipleData({command}, 1); auto row0 = SParseJSONObject(commandResult[0].content)["rows"]; auto headerString = SParseJSONObject(commandResult[0].content)["headers"]; diff --git a/test/lib/BedrockTester.h b/test/lib/BedrockTester.h index 6aa005954..dedbfa3be 100644 --- a/test/lib/BedrockTester.h +++ b/test/lib/BedrockTester.h @@ -79,8 +79,9 @@ class BedrockTester { // Read from the DB file, without going through the bedrock server. Two interfaces are provided to maintain // compatibility with the `SQLite` class. - string readDB(const string& query, bool online = true); - bool readDB(const string& query, SQResult& result, bool online = true); + // Note that timeoutMS only applies in HC-Tree mode. It is ignored in WAL2 mode. + string readDB(const string& query, bool online = true, int64_t timeoutMS = 0); + bool readDB(const string& query, SQResult& result, bool online = true, int64_t timeoutMS = 0); // Closes and releases any existing DB file. void freeDB();