Skip to content
New issue

Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.

By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.

Already on GitHub? Sign in to your account

making the option --limit in pull usable. #165

Open
wants to merge 8 commits into
base: master
Choose a base branch
from
Open
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
4 changes: 4 additions & 0 deletions docs/index.md
Original file line number Diff line number Diff line change
Expand Up @@ -159,6 +159,10 @@ Lieer can be configured using `gmi set`. Use without any options to get a list o

**`lastmod`** is the latest synced Notmuch database revision. Anything changed after this revision will be pushed on [`gmi push`](#ush).

**`limit`** is the parameter which limits the number of messages to be saved in the local database. It affects all the pull and push commands and removes older messages to keep the number of messages less than the limit specified. Further, the older messages are removed in a way as to not leave any incomplete threads. This limit can be removed using `gmi set --unset-limit`.

*Important:* If the limit parameter is changed to a value which is greater than one set previously, the next pull will not be able to fetch extra mails to fill upto the limit. So, a forced pull `gmi pull -f` needs to be performed. This situation would not arise if the new limit parameter is less than the older limit, as the partial pull will limit the number of messages by itself.

**`Timeout`** is the timeout in seconds used for the HTTP connection to GMail. `0` means the forever or system error/timeout, [whichever occurs first](https://github.com/gauteh/lieer/issues/83#issuecomment-396487919).

**`File extension`** is an optional argument to include the specified extension in local file names (e.g., `mbox`) which can be useful for indexing them with third-party programs.
Expand Down
82 changes: 58 additions & 24 deletions lieer/gmailieer.py
Original file line number Diff line number Diff line change
Expand Up @@ -59,10 +59,6 @@ def main (self):
parser_pull.add_argument ('-t', '--list-labels', action='store_true', default = False,
help = 'list all remote labels (pull)')

parser_pull.add_argument ('--limit', type = int, default = None,
help = 'Maximum number of messages to pull (soft limit, GMail may return more), note that this may upset the tally of synchronized messages.')


parser_pull.add_argument ('-d', '--dry-run', action='store_true',
default = False, help = 'do not make any changes')

Expand All @@ -76,9 +72,6 @@ def main (self):
description = 'push',
help = 'push local tag-changes')

parser_push.add_argument ('--limit', type = int, default = None,
help = 'Maximum number of messages to push, note that this may upset the tally of synchronized messages.')

parser_push.add_argument ('-d', '--dry-run', action='store_true',
default = False, help = 'do not make any changes')

Expand Down Expand Up @@ -119,9 +112,6 @@ def main (self):
description = 'sync',
help = 'sync changes (flags have same meaning as for push and pull)')

parser_sync.add_argument ('--limit', type = int, default = None,
help = 'Maximum number of messages to sync, note that this may upset the tally of synchronized messages.')

parser_sync.add_argument ('-d', '--dry-run', action='store_true',
default = False, help = 'do not make any changes')

Expand Down Expand Up @@ -193,7 +183,9 @@ def main (self):
help = 'Remove messages that have been deleted on the remote (default is on)')
parser_set.add_argument ('--no-remove-local-messages', action = 'store_true', default = False,
help = 'Do not remove messages that have been deleted on the remote')

parser_set.add_argument ('--limit', type = int, default = None,
help = 'Maximum number of messages to sync with the local database')
parser_set.add_argument ('--unset-limit',action = 'store_true',default = False, help = 'Do not limit the number of messages to be synced in the local database')
parser_set.set_defaults (func = self.set)


Expand Down Expand Up @@ -275,7 +267,6 @@ def setup (self, args, dry_run = False, load = False, block = False):
def sync (self, args):
self.setup (args, args.dry_run, True)
self.force = args.force
self.limit = args.limit
self.list_labels = False

self.remote.get_labels ()
Expand All @@ -293,7 +284,6 @@ def push (self, args, setup = False):
self.setup (args, args.dry_run, True)

self.force = args.force
self.limit = args.limit

self.remote.get_labels ()

Expand All @@ -313,8 +303,8 @@ def push (self, args, setup = False):
query = notmuch.Query (db, qry)

messages = list(query.search_messages ())
if self.limit is not None and len(messages) > self.limit:
messages = messages[:self.limit]
if self.local.config.limit is not None and len(messages) > self.local.config.limit:
messages = messages[:self.local.config.limit]

# get gids and filter out messages outside this repository
messages, gids = self.local.messages_to_gids (messages)
Expand Down Expand Up @@ -344,8 +334,8 @@ def _got_msgs (ms):
actions = [ a for a in actions if a ]

# limit
if self.limit is not None and len(actions) >= self.limit:
actions = actions[:self.limit]
if self.local.config.limit is not None and len(actions) >= self.local.config.limit:
Copy link
Owner

@gauteh gauteh Jul 15, 2020

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

>= ?

Copy link
Author

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

I have not checked this part of the code actually. This does not seem necessary now anyways, if the number of messages itself is limited.

Copy link
Owner

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

actions is not the same as number of messages. i think if this PR is going to be considered for merging it needs to be thoroughly tested, over a longer time, and shown to work well. many users rely on lieer to work correctly, and we have to make sure to not break setups or cause data loss. it also needs to be tested with different limits and also with no limit. I do not guarantee that it will be merged in the end.

Copy link
Owner

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

the >= comment refers to mismatching use of sometimes > and sometimes >=.

Copy link
Author

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

Okay, I did go through the mismatching operations. I did not change any of them and it is your call as they remain the same as in the original repository. On that note in the push function, I do not think the if loops to limit the messages and actions are necessary now. They can be removed. Should I do that in the next commit?

Copy link
Author

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

actions is not the same as number of messages. i think if this PR is going to be considered for merging it needs to be thoroughly tested, over a longer time, and shown to work well. many users rely on lieer to work correctly, and we have to make sure to not break setups or cause data loss. it also needs to be tested with different limits and also with no limit. I do not guarantee that it will be merged in the end.

And, yes I will test it for the said scenarios to see if it works well.

actions = actions[:self.local.config.limit]

# push changes
if len(actions) > 0:
Expand Down Expand Up @@ -386,7 +376,6 @@ def pull (self, args, setup = False):

self.list_labels = args.list_labels
self.force = args.force
self.limit = args.limit

self.remote.get_labels () # to make sure label map is initialized

Expand Down Expand Up @@ -422,7 +411,7 @@ def partial_pull (self):

self.bar_update (len(hist))

if self.limit is not None and len(history) >= self.limit:
if self.local.config.limit is not None and len(history) >= self.local.config.limit:
break

except googleapiclient.errors.HttpError as excep:
Expand Down Expand Up @@ -551,6 +540,44 @@ def remove_from_list (lst, m):

changed = True

#limiting the number of messages in the database to local.config.limit parameter if it is set
if self.local.config.limit is not None:
with notmuch.Database (mode = notmuch.Database.MODE.READ_WRITE) as db:
query = notmuch.Query(db,'')
query.set_sort(notmuch.Query.SORT.NEWEST_FIRST)
thdlist = list(query.search_threads())
msglist = []
for t in thdlist:
msglist += list(t.get_messages())
n_keep = len(msglist) #initiating the parameter
i = 0
while n_keep > self.local.config.limit: #number of messages to keep, avoiding incomplete threads
n_keep -= thdlist[-1-i].get_total_messages()
i += 1

if len(msglist) > self.local.config.limit:
self.bar_create (total = len(msglist)-n_keep, leave = True, desc = 'Removing older messages (0)')
delete_msgs = msglist[n_keep-len(msglist):] # list of older messages to be deleted

delete_gids=[] # getting the gids for messages to be deleted
for m in delete_msgs:
for fname in m.get_filenames ():
if self.local.contains (fname):
# get gmail id
gid = self.local.__filename_to_gid__ (os.path.basename (fname))
if gid:
delete_gids.append (gid)

deleted = 0
for m in delete_gids:
self.local.remove (m,db)
deleted += 1
if not self.args.quiet and self.bar:
self.bar.set_description ('Removing older messages (%d)' % deleted)
self.bar_update (1)
self.bar_close ()
changed = True

if len (labels_changed) > 0:
lchanged = 0
with notmuch.Database (mode = notmuch.Database.MODE.READ_WRITE) as db:
Expand Down Expand Up @@ -580,6 +607,9 @@ def remove_from_list (lst, m):
def full_pull (self):
total = 1

if self.local.config.limit is not None:
print("Limit parameter set, number of messages that will be fetched:",self.local.config.limit)

self.bar_create (leave = True, total = total, desc = 'fetching messages')

# NOTE:
Expand All @@ -588,24 +618,21 @@ def full_pull (self):
# simple metadata like message ids.
message_gids = []
last_id = self.remote.get_current_history_id (self.local.state.last_historyId)

for mset in self.remote.all_messages ():
(total, gids) = mset

self.bar.total = total
self.bar_update (len(gids))

for m in gids:
message_gids.append (m['id'])

if self.limit is not None and len(message_gids) >= self.limit:
if self.local.config.limit is not None and len(message_gids) >= self.local.config.limit:
break

self.bar_close ()

if self.local.config.remove_local_messages:
if self.limit and not self.dry_run:
raise ValueError('--limit with "remove_local_messages" will cause lots of messages to be deleted')

# removing files that have been deleted remotely
all_remote = set (message_gids)
Expand Down Expand Up @@ -785,6 +812,12 @@ def set (self, args):
if args.remove_local_messages:
self.local.config.set_remove_local_messages (True)

if args.limit is not None:
Lattitude75 marked this conversation as resolved.
Show resolved Hide resolved
self.local.config.set_limit (args.limit)

if args.unset_limit:
self.local.config.set_limit (0)

if args.no_remove_local_messages:
self.local.config.set_remove_local_messages (False)

Expand All @@ -802,6 +835,7 @@ def set (self, args):
print ("historyId .........: %d" % self.local.state.last_historyId)
print ("lastmod ...........: %d" % self.local.state.lastmod)
print ("Timeout ...........: %f" % self.local.config.timeout)
print ("Limit .............:",self.local.config.limit)
print ("File extension ....: %s" % self.local.config.file_extension)
print ("Remove local messages .....:", self.local.config.remove_local_messages)
print ("Drop non existing labels...:", self.local.config.drop_non_existing_label)
Expand Down
10 changes: 10 additions & 0 deletions lieer/local.py
Original file line number Diff line number Diff line change
Expand Up @@ -102,6 +102,7 @@ def __init__ (self, config_f):
self.ignore_tags = set(self.json.get ('ignore_tags', []))
self.ignore_remote_labels = set(self.json.get ('ignore_remote_labels', Remote.DEFAULT_IGNORE_LABELS))
self.file_extension = self.json.get ('file_extension', '')
self.limit = self.json.get ('limit', None)

def write (self):
self.json = {}
Expand All @@ -115,6 +116,7 @@ def write (self):
self.json['ignore_remote_labels'] = list(self.ignore_remote_labels)
self.json['remove_local_messages'] = self.remove_local_messages
self.json['file_extension'] = self.file_extension
self.json['limit'] = self.limit

if os.path.exists (self.config_f):
shutil.copyfile (self.config_f, self.config_f + '.bak')
Expand Down Expand Up @@ -174,6 +176,13 @@ def set_file_extension (self, t):
print ("Failed creating test file with file extension: " + t + ", not set.")
raise

def set_limit (self,l):
if l != 0:
self.limit = l
else:
self.limit = None
self.write()


class State:
# last historyid of last synchronized message, anything that has happened
Expand Down Expand Up @@ -413,6 +422,7 @@ def messages_to_gids (self, msgs):

return (messages, gids)


def __filename_to_gid__ (self, fname):
ext = ''
if self.config.file_extension:
Expand Down