From 8d90a51566e68a1cf2486e9f39d79045a038e530 Mon Sep 17 00:00:00 2001
From: Murali Krishna <mur.kri14@gmail.com>
Date: Sat, 18 Jul 2020 11:16:30 +0530
Subject: [PATCH 1/4] HackerEarth scraper has similarly been reworked to be
 functional; Bugfixes for CodeChef scraper

---
 scrapers/codechef.py    |  6 +++---
 scrapers/hackerearth.py | 41 ++++++++++++++++++++++++++++++++---------
 2 files changed, 35 insertions(+), 12 deletions(-)

diff --git a/scrapers/codechef.py b/scrapers/codechef.py
index 86c661a..98fdc2b 100644
--- a/scrapers/codechef.py
+++ b/scrapers/codechef.py
@@ -55,6 +55,7 @@ def get_rankings(site, contest_code):
         if page == total_pages-1: # Reached Last Page
             break    
         driver.get(site + f'&page={page+2}') # go to next page
+    return scraped_scoreboard
 
 def scrape(contest_codes):
     scoreboard_base_url:str = "https://www.codechef.com/rankings"
@@ -70,11 +71,10 @@ def scrape(contest_codes):
             easy_points=100000 # Initial value set to points per problem
 
         for Division in divisions: # Build the scraped scoreboard
-            divisions[Division].problems = get_problems(f"{site_url}/{contest_code}{Division}")
-            divisions[Division].scraped_scoreboard = get_rankings(f"{scoreboard_base_url}/{contest_code}{Division}{scoreboard_filter_query}", contest_code)
+            divisions[Division] = divisions[Division]._replace(problems = get_problems(f"{site_url}/{contest_code}{Division}"))
+            divisions[Division] = divisions[Division]._replace(scraped_scoreboard = get_rankings(f"{scoreboard_base_url}/{contest_code}{Division}{scoreboard_filter_query}", contest_code))
 
         easy_points=len(divisions['B'].problems-divisions['A'].problems)*easy_points # Points to add to div-A participants
-
         for i in range(len(divisions['A'].scraped_scoreboard)): # Add easy points to all div-A participants
             divisions['A'].scraped_scoreboard[i]=divisions['A'].scraped_scoreboard[i][0],divisions['A'].scraped_scoreboard[i][1]+easy_points
             
diff --git a/scrapers/hackerearth.py b/scrapers/hackerearth.py
index c82670f..1e47651 100644
--- a/scrapers/hackerearth.py
+++ b/scrapers/hackerearth.py
@@ -1,13 +1,18 @@
 import requests
+import sys
+import os
+sys.path.append( # Add absolute path of utils to sys.path
+    os.path.join( os.path.dirname( os.path.realpath( __file__ )), 
+    '../../student-ratings' ))
 from bs4 import BeautifulSoup
 from tinydb import TinyDB, where
 from database.db_tools import DB_FILE, HACKEREARTH
-import logging
+from utils.log import *
 
 # 0 - event_id
 # 1 - page number
-leaderboard_base_url = 'https://www.hackerearth.com/AJAX/feed/newsfeed/icpc-leaderboard/event/{0}/{1}/'
-
+API = 'https://www.hackerearth.com/AJAX/feed/newsfeed/icpc-leaderboard/event/{0}/{1}/'
+leaderboard_base_url = 'https://www.hackerearth.com/challenges/competitive/{}/leaderboard/'
 
 def get_handles(html_doc):
     soup = BeautifulSoup(html_doc, 'html.parser')
@@ -28,24 +33,42 @@ def get_leaderboard(event_id):
     leaderboard = []
     
     while True:
-        r = requests.get(leaderboard_base_url.format(event_id, page_num))
-        handles = get_handles(r.text)
+
+        handles = get_handles(requests.get(API.format(event_id, page_num)).text)
         # url returns last page for page_num greater than last page number
-        if leaderboard[-len(handles):] == handles:
+        if leaderboard[-len(handles):] == handles or page_num > 10:
             break
         
         leaderboard.extend(handles)
+        info(f'Handles retrieved for page {page_num}')
         page_num += 1
-        logging.info(page_num)
 
     return leaderboard
 
+def get_contest_IDs(contest_codes):
+    contest_IDs = dict()
+    for contest_code in contest_codes:
+        # Gets the ICPC contest ID from the url; We use this ID to send the request
+        contest_IDs[contest_code] = BeautifulSoup(requests.get(leaderboard_base_url.format(contest_code)).text, 'html.parser').find('div', class_='event-id').text #
+
+    return contest_IDs
 
-if __name__ == "__main__":
+def scrape(contest_codes):
+    contest_IDs = get_contest_IDs(contest_codes)
+    leaderboards = []
+    for contest in contest_IDs:
+        info(f'HackerEarth contest {contest}:')
+        leaderboard = get_leaderboard(contest_IDs[contest])
+        #contest_ranks_file = f'database/contest_ranks/hackerearth-{contest}.in'
+        leaderboards.append(leaderboard)
+    return leaderboards
+
+'''if __name__ == "__main__":
     logging.basicConfig(level='INFO')
     event_id = '814357'
     leaderboard = get_leaderboard(event_id)
-    print(*leaderboard, sep='\n')
+    print(*leaderboard, sep='\n')'''
+
 
 """Uncomment to output mapped PES handles only
     with TinyDB(DB_FILE) as database:

From f60efa959ff30b7c9dc5de5e4cc41542d8d3f651 Mon Sep 17 00:00:00 2001
From: Murali Krishna <mur.kri14@gmail.com>
Date: Sat, 18 Jul 2020 20:04:58 +0530
Subject: [PATCH 2/4] Created a main file; It performs all the functionalities
 covered by executor.sh. Updated README and requirements

---
 .gitignore                                    |   6 +-
 README.md                                     |  28 +--
 ...estsToProcess.in => contest_names_file.in} |   0
 database/db_tools.py                          |  30 +--
 ratings/processor.py                          |  56 +++---
 requirements.txt                              |  10 +-
 run.py                                        | 172 ++++++++++++++++++
 scrapers/codechef.py                          |   4 -
 scrapers/hackerearth.py                       |   9 +-
 9 files changed, 252 insertions(+), 63 deletions(-)
 rename database/{contestsToProcess.in => contest_names_file.in} (100%)
 create mode 100644 run.py

diff --git a/.gitignore b/.gitignore
index 6115d9f..9bad418 100644
--- a/.gitignore
+++ b/.gitignore
@@ -43,4 +43,8 @@ pip-delete-this-directory.txt
 .pytest_cache/
 
 # Mypy (static type checking)
-.mypy_cache/
\ No newline at end of file
+.mypy_cache/
+
+# Credentials for calendar
+credentials.json
+token.pickle
\ No newline at end of file
diff --git a/README.md b/README.md
index bbd8faf..570e589 100644
--- a/README.md
+++ b/README.md
@@ -3,28 +3,28 @@
 
 [![Build Status](https://travis-ci.com/varunvora/alcoding.svg?branch=master)](https://travis-ci.com/varunvora/alcoding)
 
-Alcoding Club of [PES University](https://pes.edu/) maintains ratings of its students who are active in [competitive programming](https://en.wikipedia.org/wiki/Competitive_programming). This repository contains the ratings and the code which generates it.
+Alcoding Club of [PES University](https://pes.edu/) maintains ratings of its students who are active in [competitive programming](https://en.wikipedia.org/wiki/Competitive_programming). This repository contains the ratings and the code that generates it.
 
 ## Purpose
-An intra-college rating is maintained so that the club can identify good coders.  The club will group these students and help them improve at competitive programming by organizing meet-ups, providing resources, arranging contests and develop a coding community in the University.
+An intra-college rating is maintained to aid the club in identifying good coders. The club aims to help these students improve their competitive programming skills by organizing meet-ups, providing resources, arranging contests and developing a coding community in the University.
 
 
 ## Ratings
-The ratings are calculated by students' performances in [specified contests](database/README.md).
+The ratings are calculated using students' performances in [specified contests](database/README.md).
 
 ### Mechanism
-A [rank list](database/contest_ranks) of registered students is generated at the end of each contest. A rating is computed from the rank list, which indicates their relative performance. The implementation is almost the same as [Codechef's Rating Mechanism](https://www.codechef.com/ratings) which is a modified version of [Elo rating system](https://en.wikipedia.org/wiki/Elo_rating_system). To avoid students from [protecting their ratings](https://en.wikipedia.org/wiki/Elo_rating_system#Game_activity_versus_protecting_one's_rating) and encourage participation, a decay rule is also added which decrements a student's rating by 1% if she does not take part in 5 consecutive contests.
+A [rank list](database/contest_ranks) of registered students is generated at the end of each contest. A rating is computed from the rank list, which indicates their relative performance. The implementation is very similar to [Codechef's Rating Mechanism](https://www.codechef.com/ratings) which is a modified version of the [Elo rating system](https://en.wikipedia.org/wiki/Elo_rating_system). To prevent students from [protecting their ratings](https://en.wikipedia.org/wiki/Elo_rating_system#Game_activity_versus_protecting_one's_rating) and encourage participation, a decay rule, which decrements a student's rating by 1% if they do not take part in 5 consecutive rated contests, is also added.
 
 
 ### Verification
-The [code that generates the rating](ratings/processor.py) is open. Along with that we have provided [a script with which you can verify](executor.sh) that the displayed ratings are correct. This script resets all students' ratings, and computes the ratings after all the contest ranks are considered. You may [report an issue](https://github.com/varunvora/alcoding/issues) if you find any discrepancy.
+The [code that generates the rating](ratings/processor.py) is open. Further, we also provide [a method with which you can verify](run.py) the displayed ratings. This method resets all students' ratings, and recomputes the ratings of every student after considering all contest ranks. Please do [report an issue](https://github.com/pes-alcoding-club/student-ratings/issues) if you find any discrepancy.
 
 ## Calendar
-Alcoding Club maintains a [Google calendar for competitive programming](https://calendar.google.com/calendar?cid=N3RsZGt1dXEwcW1mOW9ub2Jxb3ByZ2Z1cDRAZ3JvdXAuY2FsZW5kYXIuZ29vZ2xlLmNvbQ). Contests that are marked as "Rated" will be considered for these ratings. 
+Alcoding Club maintains a [Google Calendar for competitive programming](https://calendar.google.com/calendar?cid=N3RsZGt1dXEwcW1mOW9ub2Jxb3ByZ2Z1cDRAZ3JvdXAuY2FsZW5kYXIuZ29vZ2xlLmNvbQ). Contests that are marked "Rated" will be considered for these ratings. 
 ## Contribute
-This project is still very small so there are no strict guidelines for contribution. For now we are following [PEP 8 -- Style Guide for Python Code](https://www.python.org/dev/peps/pep-0008/).
+At the moment, there are no strict guidelines for contribution. As a standard, we follow the [PEP 8 -- Style Guide for Python Code](https://www.python.org/dev/peps/pep-0008/).
 
-You can [report an issue](https://github.com/varunvora/alcoding/issues) if you find a bug or any other change you would like to make. You may also make a [pull request](https://github.com/varunvora/alcoding/pulls). It would be helpful if you use [our Github labels](https://github.com/varunvora/alcoding/labels) for all issues and pull requests. Be sure to clearly document and describe any issues or changes.
+Feel free to [report an issue](https://github.com/pes-alcoding-club/student-ratings/issues) if you find a bug, or have any other change you would like to see. You may also create a [pull request](https://github.com/pes-alcoding-club/student-ratings/pulls). It would be helpful if you use [our Github labels](https://github.com/pes-alcoding-club/student-ratings/labels) for all issues and pull requests. Be sure to clearly document and describe any issues or changes.
 
 ## FAQ
 
@@ -37,11 +37,11 @@ You can [report an issue](https://github.com/varunvora/alcoding/issues) if you f
 1. Which contests are taken into account for rating?
 
     Contests in ['Competitive Programming PESU' Calendar](https://calendar.google.com/calendar?cid=N3RsZGt1dXEwcW1mOW9ub2Jxb3ByZ2Z1cDRAZ3JvdXAuY2FsZW5kYXIuZ29vZ2xlLmNvbQ) are considered for ratings.
-1. How can I tell if these ratings are legitimate?
+1. How can I tell whether these ratings are legitimate?
 
-    You can verify the ratings yourself by running [this script](executor.sh). It resets all students' ratings to default values and recomputes it for all contests so far in chronological order. 
-   
-1. How can I get the scoreboard only for some particular contest(s)?
-
-    Clone this repository, open [executor.sh](executor.sh) and remove the contests you do not want the scoreboard for. Run this script and check [scoreboard.csv](scoreboard.csv).
+    You can verify the ratings yourself by calling the [make_scoreboard] function in [run.py](run.py). It resets all students' ratings to default values and recomputes it for all contests so far in chronological order. 
    
+1. How can I make a scoreboard for a few particular contests?
+    Firstly, clone this repository.
+    Create your own [contest_names_file.in](database/contest_names_file.in) and add the contest names in the format [platform]-[month]-[contest_code]. In [run.py](run.py), change the [contest_names_file_path] variable's value to your file's path.
+    Now call the [make_scoreboard] function in [run.py](run.py) with the required parameters and check [scoreboard.csv](scoreboard.csv).
\ No newline at end of file
diff --git a/database/contestsToProcess.in b/database/contest_names_file.in
similarity index 100%
rename from database/contestsToProcess.in
rename to database/contest_names_file.in
diff --git a/database/db_tools.py b/database/db_tools.py
index c48796e..40c4874 100644
--- a/database/db_tools.py
+++ b/database/db_tools.py
@@ -1,7 +1,6 @@
 import re
 import sys
 import csv
-import logging
 from os import listdir
 from os.path import join
 from collections import Counter
@@ -9,6 +8,7 @@
 from typing import List, Set, Tuple, Dict, Callable, Any
 from tinydb import TinyDB, where
 from ratings import elo
+from utils.log import *
 
 DB_FILE: str = 'database/db.json'
 CONTEST_RANKS_DIR: str = 'database/contest_ranks'
@@ -57,7 +57,7 @@ def reset_database(db_file: str = DB_FILE) -> None:
             BEST: elo.DEFAULT_RATING,
             TIMES_PLAYED: 0,
             LAST_FIVE: 5})
-    logging.info(f'Successfully reset database and stored in {db_file}')
+    info(f'Successfully reset database and stored in {db_file}')
 
 
 def get_site_name_from_file_name(file_name: str) -> str:
@@ -68,8 +68,8 @@ def get_site_name_from_file_name(file_name: str) -> str:
     """
     file_name_parts = file_name.split("-")
     if len(file_name_parts) < 2 or file_name_parts[0] not in SITES:
-        logging.error(f"Invalid filename '{file_name}' in contest ranks. File name convention is"
-                      f"'site-contest-details.in'")
+        error(f"Invalid filename '{file_name}' in contest ranks. File name convention is"
+                      f"'site-month-contestCode.in'")
         quit()
     return file_name_parts[0]
 
@@ -144,7 +144,7 @@ def log_unmapped_handles(site_username_tuple_list: List[Tuple[str, str]]) -> Non
 
         log_unmapped_handles(site_handle_tuple_list)
 
-    logging.info('Mapped ')
+    info('Mapped usernames to SRNs')
 
 
 def remove_unmapped_handles_from_rank_file(file_name: str) -> None:
@@ -152,10 +152,11 @@ def remove_unmapped_handles_from_rank_file(file_name: str) -> None:
     Removes unmapped handles from outdated rank files
     to reduce space and time it takes for the script to run
     """
-    with open(join(CONTEST_RANKS_DIR, file_name), 'r') as rank_file:
+    with open(file_name, 'r') as rank_file:
         input_data: str = rank_file.read()
-
-    with open(join(CONTEST_RANKS_DIR, file_name), 'w') as rank_file:
+    #print(len(input_data))
+    count = 0
+    with open(file_name, 'w') as rank_file:
         for user_name_line in input_data.split("\n"):
             check_occurrence_in_line: bool = False
             for user_name in user_name_line.split():
@@ -164,7 +165,10 @@ def remove_unmapped_handles_from_rank_file(file_name: str) -> None:
                     rank_file.write(user_name + " ")
             if check_occurrence_in_line:
                 rank_file.write("\n")
-    logging.info(f'Cleaned {file_name}')
+                count+=1
+    #print(count)
+    loginfo = file_name.split('/')[2]
+    info(f'Cleaned {loginfo}')
 
 
 def export_to_csv(db_file: str = DB_FILE, scoreboard_file: str = SCOREBOARD_FILE) -> None:
@@ -190,7 +194,7 @@ def export_to_csv(db_file: str = DB_FILE, scoreboard_file: str = SCOREBOARD_FILE
         wr = csv.writer(fp)
         wr.writerows(csv_table)
 
-    logging.info(f'Successfully exported database from {db_file} to {scoreboard_file}')
+    info(f'Successfully exported database from {db_file} to {scoreboard_file}')
 
 
 def prettify(db_file: str = DB_FILE) -> None:
@@ -201,11 +205,11 @@ def prettify(db_file: str = DB_FILE) -> None:
         fp.write_back(fp.all())
 
 
-if __name__ == "__main__":
+'''if __name__ == "__main__":
     # While executing this script, you can specify which function to execute
     func_str: str = sys.argv[1]
     try:
         func_obj: Callable = globals()[func_str]
         func_obj(*sys.argv[2:])  # Arguments to specified function can be passed
-    except KeyError:
-        logging.error(f'Provided invalid argument. No function {func_str}')
+    except KeyError:'
+        error(f'Provided invalid argument. No function {func_str}')'''
\ No newline at end of file
diff --git a/ratings/processor.py b/ratings/processor.py
index 17c302b..0aaab6c 100644
--- a/ratings/processor.py
+++ b/ratings/processor.py
@@ -1,22 +1,23 @@
 import sys
-import logging
 from time import time
 from ratings import elo
 from database import db_tools as db
 from tinydb import TinyDB, where
-
+from utils.log import *
 
 class RatingProcessor:
 
-    def __init__(self, database: TinyDB, rank_file):
+    def __init__(self, database: TinyDB, rank_file_path):
         self.database: TinyDB = database
 
         self.N: int = 0
         self.Cf: float = 0.0
         self.Rb_Vb_list: list = []
         self.usn_rank_dict: dict = {}
+        self.rank_file_path = rank_file_path
+        self.rank_file = open(rank_file_path)
 
-        self.read_contest_ranks(rank_file)  # sets usn_rank_dict
+        self.read_contest_ranks(self.rank_file)  # sets usn_rank_dict
         self.set_contest_details()  # sets N, Cf and Rb_Vb_list
         self.process_competition()  # uses the set attributes to compute new ratings
 
@@ -35,9 +36,9 @@ def read_contest_ranks(self, rank_file) -> None:
                     self.usn_rank_dict[usn] = current_rank
                     same_rank_count += 1
                 else:
-                    logging.info(f'Ignoring usn {usn}')
+                    info(f'Ignoring SRN {usn}')
             current_rank += same_rank_count  # ranks are not 1, 1, 1, 2 but 1, 1, 1, 4
-        logging.debug(self.usn_rank_dict)
+        debug(self.usn_rank_dict)
 
     def set_contest_details(self) -> None:
         """
@@ -54,12 +55,12 @@ def set_contest_details(self) -> None:
         self.N = len(self.usn_rank_dict)
         self.Cf = elo.Cf(rating_list, vol_list, self.N)
         self.Rb_Vb_list = list(zip(rating_list, vol_list))
-        logging.debug(f'Contest: {rank_file_path}\nPlayers: {self.N}\nCompetition Factor: {self.Cf}')
+        debug(f'Contest: {self.rank_file_path}\nPlayers: {self.N}\nCompetition Factor: {self.Cf}')
 
     @staticmethod
     def _decay_player(player_dict: dict) -> None:
         """
-        Reduces ratings by 10% for those who have competed at least once
+        Reduces ratings by 1% for those who have competed at least once
         but have not taken part in the past 5 contests
         :param player_dict: dict with all details of a player
         """
@@ -76,7 +77,7 @@ def _decay_player(player_dict: dict) -> None:
         player_dict[db.RATING] = rating
         player_dict[db.LAST_FIVE] = max(1, last_five)
 
-        logging.debug('Successfully decayed ratings')
+        debug('Successfully decayed ratings')
 
     def _update_player(self, player_dict: dict, actual_rank: int) -> None:
         """
@@ -99,27 +100,27 @@ def _update_player(self, player_dict: dict, actual_rank: int) -> None:
         player_dict[db.BEST] = max(old_best, new_rating)
         player_dict[db.LAST_FIVE] = 5
 
-        logging.debug('Successfully updated ratings')
+        debug('Successfully updated ratings')
 
     def process_competition(self) -> None:
 
         rows = self.database.all()
         for row in rows:
-            logging.debug(f'Before: {row}')
+            debug(f'Before: {row}')
             if row[db.USN] in self.usn_rank_dict:
                 actual_rank = self.usn_rank_dict[row[db.USN]]
                 self._update_player(row, actual_rank)
             else:
                 self._decay_player(row)
-            logging.debug(f'After: {row}')
+            debug(f'After: {row}')
         self.database.write_back(rows)
 
 
-def read_argv(argv_format_alert: str):
-    """
+"""def read_argv(argv_format_alert: str):
+    '''
     :param argv_format_alert: An error message on what the command line arguments should be
     :return: rank file if argv is valid
-    """
+    '''
     try:
         assert len(sys.argv) == 2
         rank_file = sys.argv[1]
@@ -128,15 +129,28 @@ def read_argv(argv_format_alert: str):
             return rank_file
 
         except IOError or FileNotFoundError:
-            logging.error(f'Invalid file path for rank file: {rank_file}\n{argv_format_alert}')
+            error(f'Invalid file path for rank file: {rank_file}\n{argv_format_alert}')
             quit()
 
     except AssertionError:
-        logging.error(f'Invalid command line arguments.\n{argv_format_alert}')
-        quit()
+        error(f'Invalid command line arguments.\n{argv_format_alert}')
+        quit()"""
 
+def process(rank_file_path):
+    start_time = time()
+    # Main logic starts here
+    database_obj = TinyDB(db.DB_FILE)
+    RatingProcessor(database_obj, rank_file_path)
+    database_obj.close()
 
-if __name__ == "__main__":
+    duration = time()-start_time
+    debug(f'Updated ratings for {rank_file_path}')
+    if duration > 10:
+        critical(f'Ratings update for {rank_file_path} took {duration} seconds.\n'
+                         f'Consider removing unnecessary handles or optimize ratings algorithm')
+
+
+'''if __name__ == "__main__":
     start_time = time()
 
     argv_format = 'processor.py rank_file_path'
@@ -149,7 +163,7 @@ def read_argv(argv_format_alert: str):
     database_obj.close()
 
     duration = time()-start_time
-    logging.debug(f'Updated ratings for {rank_file_path}')
+    debug(f'Updated ratings for {rank_file_path}')
     if duration > 10:
         logging.critical(f'Ratings update for {rank_file_path} took {duration} seconds.\n'
-                         f'Consider removing unnecessary handles or optimize ratings algorithm')
+                         f'Consider removing unnecessary handles or optimize ratings algorithm')'''
diff --git a/requirements.txt b/requirements.txt
index c731e28..3d32c79 100644
--- a/requirements.txt
+++ b/requirements.txt
@@ -1,4 +1,6 @@
-tinydb
-requests
-bs4
-selenium
\ No newline at end of file
+requests==2.22.0
+beautifulsoup4==4.9.1
+google_api_python_client==1.10.0
+google_auth_oauthlib==0.4.1
+selenium==3.141.0
+tinydb==3.15.2
diff --git a/run.py b/run.py
new file mode 100644
index 0000000..8cb0b83
--- /dev/null
+++ b/run.py
@@ -0,0 +1,172 @@
+import datetime
+import pickle
+import os.path
+from googleapiclient.discovery import build
+from google_auth_oauthlib.flow import InstalledAppFlow
+from google.auth.transport.requests import Request
+import sys
+import os
+sys.path.append( # Add absolute path of utils to sys.path
+    os.path.join( os.path.dirname( os.path.realpath( __file__ )), 
+    '../student-ratings' ))
+from scrapers import codechef, hackerearth
+from database import db_tools as tools
+from ratings import processor
+from pathlib import Path
+from collections import defaultdict
+from utils.log import *
+
+PATH_TO_RANK_FILES = 'database/contest_ranks/' # Change this path to 'database/[YOUR_CUSTOM_RANKS_DIR]' to calculate ratings for only a few contests
+contest_names_file_path = 'database/contest_names_file.in' # Change this path to 'database/[YOUR_CUSTOM_CONTEST_NAMES_FILE.in]' and add required (supported) contests to calculate ratings for only those
+SCOPES = ['https://www.googleapis.com/auth/calendar.readonly']
+months = ['jan', 'feb', 'march', 'april', 'may', 'june', 'july', 'aug', 'sept', 'oct', 'nov', 'dec']
+
+class contest_details():
+    def __init__(self, url):
+        self.website = str(url[0].split('.')[1]) # Fetch the platform name
+        self.contest_code = str(url[-1])
+        self.file_name = self.make_file_name()
+
+    def make_file_name(self):
+        if self.website == 'codechef':
+            if self.contest_code[0:5] == 'LTIME':
+                #Specific formula to determine month and year for Lunchtime based on numeric ID
+                month, year = months[(int(self.contest_code[5:]) + 4) % 12], int((int(self.contest_code[5:]) + 5) / 12) + 13 
+                return f'codechef-{month}-lunchtime-{year}.in'
+                #Specific formula to determine month and year for Cookoff based on numeric ID
+            elif self.contest_code[0:4]=="COOK":
+                month, year = months[(int(self.contest_code[4:]) + 6) % 12], int((int(self.contest_code[4:]) + 7) / 12) + 10
+                return f'codechef-{month}-cookoff-{year}.in'
+            else:
+                month = self.contest_code[:-2].lower()
+                return f'codechef-{month}-long-{self.contest_code[-2:]}.in'
+        elif self.website == 'hackerearth':
+            return f'hackerearth-{self.contest_code}.in'
+    
+    def set_leaderboard(self, leaderboard):
+        self.leaderboard = leaderboard
+
+
+def get_calendar_events(DAYS):
+    """This block of code is to allow OAuth"""
+    creds = None
+    if os.path.exists('token.pickle'):
+        with open('token.pickle', 'rb') as token:
+            creds = pickle.load(token)
+    if not creds or not creds.valid:
+        if creds and creds.expired and creds.refresh_token:
+            creds.refresh(Request())
+        else:
+            flow = InstalledAppFlow.from_client_secrets_file(
+                'credentials.json', SCOPES)
+            creds = flow.run_local_server(port=0)
+        with open('token.pickle', 'wb') as token:
+            pickle.dump(creds, token)
+
+    service = build('calendar', 'v3', credentials=creds)
+
+    """ Call the Calendar API
+        Z indicates UTC time, as Google requires
+        the input timezones to be consistent """
+    now = datetime.utcnow().isoformat() + 'Z'                                                     
+    tmin = (datetime.utcnow() - timedelta(days=DAYS)).isoformat() + 'Z'
+    response = service.events().list(calendarId='7tldkuuq0qmf9onobqoprgfup4@group.calendar.google.com', timeMin=tmin,
+                                        timeMax=now, singleEvents=True,
+                                        orderBy='startTime').execute()
+
+    calendar_response = response.get('items', [])
+    return calendar_response
+
+
+def get_all_contests(DAYS):
+    calendar_response = get_calendar_events(DAYS)
+    contests = defaultdict(list)
+    contest_names_file = open(contest_names_file_path, 'r+')
+    existing_contests = list(contest_names_file.read().split('\n'))
+    if not calendar_response:
+        info('No upcoming contests found.')
+        contest_names_file.close()
+        return calendar_response
+    else:
+        for event in calendar_response:
+            try:
+                url = event['location'].replace('https://', '').split('/')
+            except:
+                error('The contest {} does not have an associated website and is hence ignored.'.format(event['summary']))
+                continue
+            try:
+                url.remove('') # To remove any unexpected blank items caused by a trailing slash
+            except:
+                pass
+            
+            contest = contest_details(url) # Create a contest_details object for the contest
+            if contest.website not in ['codechef', 'hackerearth']: # Only codechef and hackerearth scrapers are compatible as of now
+                continue
+            if contest.file_name not in existing_contests: # Checks whether the contest has already been scraped
+                contest_names_file.write(contest.file_name+'\n')
+                contests[contest.website].append(contest)
+            else:
+                warn(f'{contest.file_name} already exists, ignoring; To re-scrape, delete the file and remove this entry.')
+ 
+        contest_names_file.close()
+        return contests
+
+
+""" The scrapers take in a list of contest id's at a go to avoid the overhead of repeatedly calling it. This means that the output
+    leaderboards have to be reverse mapped back to the contest_details objects; Since they are in a list and the order is preserved,
+    we use the index of the leaderboard and map it to the object of the same index """
+def scrape(DAYS=30):
+    contests = get_all_contests(DAYS)
+    if contests:
+        leaderboards = codechef.scrape(list(contest.contest_code for contest in contests['codechef']))
+        assert len(leaderboards) == len(contests['codechef']) # Make sure the number of leaderboards is the same as number of contests
+        for i in range(len(leaderboards)):
+            contests['codechef'][i].set_leaderboard(leaderboards[i])
+
+        leaderboards = hackerearth.scrape(list(contest.contest_code for contest in contests['hackerearth']))
+        assert len(leaderboards) == len(contests['hackerearth']) # Make sure the number of leaderboards is the same as number of contests
+        for i in range(len(leaderboards)):
+            contests['hackerearth'][i].set_leaderboard(leaderboards[i])
+    
+    for platform in contests:
+        for contest in contests[platform]:
+            file_path = PATH_TO_RANK_FILES + contest.file_name
+            with open (file_path, 'w+') as rank_file:
+                for rank in contest.leaderboard:
+                    rank_file.write(rank + '\n')
+            info('Wrote to {file_path}')
+
+
+def recalculate(clean=False):
+    contest_names_file = open(contest_names_file_path, 'r')
+    contest_names = list(contest_names_file.read().split('\n'))
+    try:
+        contest_names.remove('') # Removes trailing newline in case the input file had it
+    except:
+        pass   
+    info('Built list of files to process')
+    for contest in contest_names:
+        if clean:
+            """ Removes handles that couldn't be mapped to a USN
+                Usually required in a contest where we couldn't obtain handles of only required students """
+            tools.remove_unmapped_handles_from_rank_file(f'{PATH_TO_RANK_FILES}{contest}') 
+        processor.process(f'{PATH_TO_RANK_FILES}{contest}') # Call the processor for each contest
+        info(f'Processed contest: {contest}')
+    tools.export_to_csv()
+    tools.prettify()
+    contest_names_file.close()
+
+
+def make_scoreboard(map_USN=True, clean=False):
+    tools.reset_database()
+    if map_USN:
+        tools.map_username_to_usn()
+    recalculate(clean)
+
+
+def execute(DAYS=30, map_USN=True, clean=False):
+    scrape(DAYS=DAYS)
+    make_scoreboard(map_USN=map_USN, clean=clean)
+
+#execute(clean=True)
+make_scoreboard(map_USN=True, clean=True)
\ No newline at end of file
diff --git a/scrapers/codechef.py b/scrapers/codechef.py
index 98fdc2b..e85cf7c 100644
--- a/scrapers/codechef.py
+++ b/scrapers/codechef.py
@@ -3,9 +3,6 @@
 from datetime import datetime
 import sys
 import os
-sys.path.append( # Add absolute path of utils to sys.path
-    os.path.join( os.path.dirname( os.path.realpath( __file__ )), 
-    '../../student-ratings' ))
 from utils import selenium_utils
 from utils.log import info
 
@@ -17,7 +14,6 @@
 
 division = namedtuple('division',['problems','scraped_scoreboard'])
 divisions: dict = {'A':division(set(), list()),'B':division(set(), list())}
-month = ['jan', 'feb', 'march', 'april', 'may', 'june', 'july', 'aug', 'sept', 'oct', 'nov', 'dec']
 
 
 def get_problems(site):
diff --git a/scrapers/hackerearth.py b/scrapers/hackerearth.py
index 1e47651..5bf62a8 100644
--- a/scrapers/hackerearth.py
+++ b/scrapers/hackerearth.py
@@ -1,9 +1,6 @@
 import requests
 import sys
 import os
-sys.path.append( # Add absolute path of utils to sys.path
-    os.path.join( os.path.dirname( os.path.realpath( __file__ )), 
-    '../../student-ratings' ))
 from bs4 import BeautifulSoup
 from tinydb import TinyDB, where
 from database.db_tools import DB_FILE, HACKEREARTH
@@ -36,11 +33,11 @@ def get_leaderboard(event_id):
 
         handles = get_handles(requests.get(API.format(event_id, page_num)).text)
         # url returns last page for page_num greater than last page number
-        if leaderboard[-len(handles):] == handles or page_num > 10:
+        if leaderboard[-len(handles):] == handles:
             break
         
         leaderboard.extend(handles)
-        info(f'Handles retrieved for page {page_num}')
+        debug(f'Handles retrieved for page {page_num}')
         page_num += 1
 
     return leaderboard
@@ -49,7 +46,7 @@ def get_contest_IDs(contest_codes):
     contest_IDs = dict()
     for contest_code in contest_codes:
         # Gets the ICPC contest ID from the url; We use this ID to send the request
-        contest_IDs[contest_code] = BeautifulSoup(requests.get(leaderboard_base_url.format(contest_code)).text, 'html.parser').find('div', class_='event-id').text #
+        contest_IDs[contest_code] = BeautifulSoup(requests.get(leaderboard_base_url.format(contest_code)).text, 'html.parser').find('div', class_='event-id').text
 
     return contest_IDs
 

From 9e1cbd8d5dff39534c1f9838ab8468c2a19a790e Mon Sep 17 00:00:00 2001
From: Murali Krishna <mur.kri14@gmail.com>
Date: Mon, 20 Jul 2020 21:35:32 +0530
Subject: [PATCH 3/4] Removed stray comment, changed namespace for log

---
 database/db_tools.py    | 16 +++++++---------
 ratings/processor.py    | 24 ++++++++++++------------
 run.py                  | 14 +++++++-------
 scrapers/codechef.py    |  9 ++++-----
 scrapers/hackerearth.py | 10 +++++-----
 5 files changed, 35 insertions(+), 38 deletions(-)

diff --git a/database/db_tools.py b/database/db_tools.py
index 40c4874..ee15c90 100644
--- a/database/db_tools.py
+++ b/database/db_tools.py
@@ -8,7 +8,7 @@
 from typing import List, Set, Tuple, Dict, Callable, Any
 from tinydb import TinyDB, where
 from ratings import elo
-from utils.log import *
+from utils import log
 
 DB_FILE: str = 'database/db.json'
 CONTEST_RANKS_DIR: str = 'database/contest_ranks'
@@ -57,7 +57,7 @@ def reset_database(db_file: str = DB_FILE) -> None:
             BEST: elo.DEFAULT_RATING,
             TIMES_PLAYED: 0,
             LAST_FIVE: 5})
-    info(f'Successfully reset database and stored in {db_file}')
+    log.info(f'Successfully reset database and stored in {db_file}')
 
 
 def get_site_name_from_file_name(file_name: str) -> str:
@@ -68,7 +68,7 @@ def get_site_name_from_file_name(file_name: str) -> str:
     """
     file_name_parts = file_name.split("-")
     if len(file_name_parts) < 2 or file_name_parts[0] not in SITES:
-        error(f"Invalid filename '{file_name}' in contest ranks. File name convention is"
+        log.error(f"Invalid filename '{file_name}' in contest ranks. File name convention is"
                       f"'site-month-contestCode.in'")
         quit()
     return file_name_parts[0]
@@ -144,7 +144,7 @@ def log_unmapped_handles(site_username_tuple_list: List[Tuple[str, str]]) -> Non
 
         log_unmapped_handles(site_handle_tuple_list)
 
-    info('Mapped usernames to SRNs')
+    log.info('Mapped usernames to SRNs')
 
 
 def remove_unmapped_handles_from_rank_file(file_name: str) -> None:
@@ -154,7 +154,6 @@ def remove_unmapped_handles_from_rank_file(file_name: str) -> None:
     """
     with open(file_name, 'r') as rank_file:
         input_data: str = rank_file.read()
-    #print(len(input_data))
     count = 0
     with open(file_name, 'w') as rank_file:
         for user_name_line in input_data.split("\n"):
@@ -166,9 +165,8 @@ def remove_unmapped_handles_from_rank_file(file_name: str) -> None:
             if check_occurrence_in_line:
                 rank_file.write("\n")
                 count+=1
-    #print(count)
     loginfo = file_name.split('/')[2]
-    info(f'Cleaned {loginfo}')
+    log.info(f'Cleaned {loginfo}')
 
 
 def export_to_csv(db_file: str = DB_FILE, scoreboard_file: str = SCOREBOARD_FILE) -> None:
@@ -194,7 +192,7 @@ def export_to_csv(db_file: str = DB_FILE, scoreboard_file: str = SCOREBOARD_FILE
         wr = csv.writer(fp)
         wr.writerows(csv_table)
 
-    info(f'Successfully exported database from {db_file} to {scoreboard_file}')
+    log.info(f'Successfully exported database from {db_file} to {scoreboard_file}')
 
 
 def prettify(db_file: str = DB_FILE) -> None:
@@ -212,4 +210,4 @@ def prettify(db_file: str = DB_FILE) -> None:
         func_obj: Callable = globals()[func_str]
         func_obj(*sys.argv[2:])  # Arguments to specified function can be passed
     except KeyError:'
-        error(f'Provided invalid argument. No function {func_str}')'''
\ No newline at end of file
+        log.error(f'Provided invalid argument. No function {func_str}')'''
\ No newline at end of file
diff --git a/ratings/processor.py b/ratings/processor.py
index 0aaab6c..0893039 100644
--- a/ratings/processor.py
+++ b/ratings/processor.py
@@ -3,7 +3,7 @@
 from ratings import elo
 from database import db_tools as db
 from tinydb import TinyDB, where
-from utils.log import *
+from utils import log
 
 class RatingProcessor:
 
@@ -36,9 +36,9 @@ def read_contest_ranks(self, rank_file) -> None:
                     self.usn_rank_dict[usn] = current_rank
                     same_rank_count += 1
                 else:
-                    info(f'Ignoring SRN {usn}')
+                    log.info(f'Ignoring SRN {usn}')
             current_rank += same_rank_count  # ranks are not 1, 1, 1, 2 but 1, 1, 1, 4
-        debug(self.usn_rank_dict)
+        log.debug(self.usn_rank_dict)
 
     def set_contest_details(self) -> None:
         """
@@ -55,7 +55,7 @@ def set_contest_details(self) -> None:
         self.N = len(self.usn_rank_dict)
         self.Cf = elo.Cf(rating_list, vol_list, self.N)
         self.Rb_Vb_list = list(zip(rating_list, vol_list))
-        debug(f'Contest: {self.rank_file_path}\nPlayers: {self.N}\nCompetition Factor: {self.Cf}')
+        log.debug(f'Contest: {self.rank_file_path}\nPlayers: {self.N}\nCompetition Factor: {self.Cf}')
 
     @staticmethod
     def _decay_player(player_dict: dict) -> None:
@@ -77,7 +77,7 @@ def _decay_player(player_dict: dict) -> None:
         player_dict[db.RATING] = rating
         player_dict[db.LAST_FIVE] = max(1, last_five)
 
-        debug('Successfully decayed ratings')
+        log.debug('Successfully decayed ratings')
 
     def _update_player(self, player_dict: dict, actual_rank: int) -> None:
         """
@@ -100,19 +100,19 @@ def _update_player(self, player_dict: dict, actual_rank: int) -> None:
         player_dict[db.BEST] = max(old_best, new_rating)
         player_dict[db.LAST_FIVE] = 5
 
-        debug('Successfully updated ratings')
+        log.debug('Successfully updated ratings')
 
     def process_competition(self) -> None:
 
         rows = self.database.all()
         for row in rows:
-            debug(f'Before: {row}')
+            log.debug(f'Before: {row}')
             if row[db.USN] in self.usn_rank_dict:
                 actual_rank = self.usn_rank_dict[row[db.USN]]
                 self._update_player(row, actual_rank)
             else:
                 self._decay_player(row)
-            debug(f'After: {row}')
+            log.debug(f'After: {row}')
         self.database.write_back(rows)
 
 
@@ -144,9 +144,9 @@ def process(rank_file_path):
     database_obj.close()
 
     duration = time()-start_time
-    debug(f'Updated ratings for {rank_file_path}')
+    log.debug(f'Updated ratings for {rank_file_path}')
     if duration > 10:
-        critical(f'Ratings update for {rank_file_path} took {duration} seconds.\n'
+        log.critical(f'Ratings update for {rank_file_path} took {duration} seconds.\n'
                          f'Consider removing unnecessary handles or optimize ratings algorithm')
 
 
@@ -163,7 +163,7 @@ def process(rank_file_path):
     database_obj.close()
 
     duration = time()-start_time
-    debug(f'Updated ratings for {rank_file_path}')
+    log.debug(f'Updated ratings for {rank_file_path}')
     if duration > 10:
-        logging.critical(f'Ratings update for {rank_file_path} took {duration} seconds.\n'
+        logging.log.critical(f'Ratings update for {rank_file_path} took {duration} seconds.\n'
                          f'Consider removing unnecessary handles or optimize ratings algorithm')'''
diff --git a/run.py b/run.py
index e8389ad..e170edf 100644
--- a/run.py
+++ b/run.py
@@ -14,7 +14,7 @@
 from ratings import processor
 from pathlib import Path
 from collections import defaultdict
-from utils.log import *
+from utils import log
 
 PATH_TO_RANK_FILES = 'database/contest_ranks/' # Change this path to 'database/[YOUR_CUSTOM_RANKS_DIR]' to calculate ratings for only a few contests
 contest_names_file_path = 'database/contest_names_file.in' # Change this path to 'database/[YOUR_CUSTOM_CONTEST_NAMES_FILE.in]' and add required (supported) contests to calculate ratings for only those
@@ -85,7 +85,7 @@ def get_all_contests(DAYS):
     contest_names_file = open(contest_names_file_path, 'r+') # Contains list of all contests scraped till now
     existing_contests = list(contest_names_file.read().split('\n'))
     if not calendar_response:
-        error('No upcoming contests found.')
+        log.error('No upcoming contests found.')
         contest_names_file.close()
         return calendar_response
     else:
@@ -93,7 +93,7 @@ def get_all_contests(DAYS):
             try:
                 url = event['location'].replace('https://', '').split('/') # Remove the https and make the parts of the url a list
             except:
-                error('The contest {} does not have an associated website and is hence ignored.'.format(event['summary']))
+                log.error('The contest {} does not have an associated website and is hence ignored.'.format(event['summary']))
                 continue
             try:
                 url.remove('') # To remove any unexpected blank items caused by a trailing slash
@@ -107,7 +107,7 @@ def get_all_contests(DAYS):
                 contest_names_file.write(contest.file_name+'\n')
                 contests[contest.website].append(contest)
             else:
-                warn(f'{contest.file_name} already exists, ignoring; To re-scrape, delete the file and remove this entry.')
+                log.warn(f'{contest.file_name} already exists, ignoring; To re-scrape, delete the file and remove this entry.')
  
         contest_names_file.close()
         return contests
@@ -138,7 +138,7 @@ def scrape(DAYS=30):
             with open (file_path, 'w+') as rank_file:
                 for rank in contest.leaderboard:
                     rank_file.write(rank + '\n')
-            info('Wrote to {file_path}')
+            log.info('Wrote to {file_path}')
 
 
 def recalculate(clean=False): # Recalculates the ratings from ground-up; This is to ensure integrity and to allow for later joinees
@@ -148,14 +148,14 @@ def recalculate(clean=False): # Recalculates the ratings from ground-up; This is
         contest_names.remove('') # Removes trailing newline in case the input file had it
     except:
         pass   
-    info('Built list of files to process')
+    log.info('Built list of files to process')
     for contest in contest_names:
         if clean:
             """ Removes handles that couldn't be mapped to a USN
                 Usually required in a contest where we couldn't obtain handles of only required students, such as HackerEarth """
             tools.remove_unmapped_handles_from_rank_file(f'{PATH_TO_RANK_FILES}{contest}') 
         processor.process(f'{PATH_TO_RANK_FILES}{contest}') # Call the processor for each contest
-        info(f'Processed contest: {contest}')
+        log.info(f'Processed contest: {contest}')
     tools.export_to_csv()
     tools.prettify()
     contest_names_file.close()
diff --git a/scrapers/codechef.py b/scrapers/codechef.py
index 8f2046e..1a935aa 100644
--- a/scrapers/codechef.py
+++ b/scrapers/codechef.py
@@ -3,8 +3,7 @@
 from datetime import datetime
 import sys
 import os
-from utils import selenium_utils
-from utils.log import info
+from utils import selenium_utils, log
 
 driver = selenium_utils.make_driver()  
 load_all = selenium_utils.load_all(driver)
@@ -18,7 +17,7 @@
 
 def get_problems(site):
     driver.get(site)
-    info(f'Initialised website: {site}')
+    log.info(f'Initialised website: {site}')
     problem_list = list(load(r'tbody', 'tag').text.split('\n'))[1::4]
     problems = set()
     for question in problem_list:
@@ -27,7 +26,7 @@ def get_problems(site):
 
 def get_rankings(site, contest_code):
     driver.get(site)
-    info(f'Initialised website: {site}')
+    log.info(f'Initialised website: {site}')
     total_pages = int(load_all(r'jump', 'class')[-1].text)
     scraped_scoreboard = []
     for page in range(total_pages):    
@@ -59,7 +58,7 @@ def scrape(contest_codes):
     scoreboard_filter_query:str = "?filterBy=Institution%3DPES%20University&itemsPerPage=100&order=asc&sortBy=rank"
     leaderboards = []
     for contest_code in contest_codes:
-        info(f'Codechef contest {contest_code}:')
+        log.info(f'Codechef contest {contest_code}:')
         final_scoreboard:list=list()
         easy_points:int=100 # Points to add to division A participants assuming they can solve all easy div B problems
 
diff --git a/scrapers/hackerearth.py b/scrapers/hackerearth.py
index 82de558..74cf596 100644
--- a/scrapers/hackerearth.py
+++ b/scrapers/hackerearth.py
@@ -4,7 +4,7 @@
 from bs4 import BeautifulSoup
 from tinydb import TinyDB, where
 from database.db_tools import DB_FILE, HACKEREARTH
-from utils.log import *
+from utils import log
 
 # 0 - event_id
 # 1 - page number
@@ -15,13 +15,13 @@ def get_handles(html_doc):
     soup = BeautifulSoup(html_doc, 'html.parser')
 
     '''
-    <div class="inline-block less-margin-left hof-user-info">
+    <div class="inline-block less-margin-left hof-user-log.info">
         <div class="no-color hover-link weight-600"> Gennady Korotkevich </div>
         <div class="gray-text body-font-small hover-link">gennady</div>
     </div>
     '''
 
-    handles = list(map(lambda d: d.find_all('div')[1].text, soup.select('div.hof-user-info')))
+    handles = list(map(lambda d: d.find_all('div')[1].text, soup.select('div.hof-user-log.info')))
     return handles
 
 
@@ -37,7 +37,7 @@ def get_leaderboard(event_id):
             break
         
         leaderboard.extend(handles)
-        debug(f'Handles retrieved for page {page_num}')
+        log.debug(f'Handles retrieved for page {page_num}')
         page_num += 1
 
     return leaderboard
@@ -54,7 +54,7 @@ def scrape(contest_codes):
     contest_IDs = get_contest_IDs(contest_codes)
     leaderboards = []
     for contest in contest_IDs:
-        info(f'HackerEarth contest {contest}:')
+        log.info(f'HackerEarth contest {contest}:')
         leaderboard = get_leaderboard(contest_IDs[contest])
         leaderboards.append(leaderboard)
     return leaderboards

From 084d0cdab771b3526706a37a3abcbb75c987ccf0 Mon Sep 17 00:00:00 2001
From: Murali Krishna <mur.kri14@gmail.com>
Date: Mon, 20 Jul 2020 21:39:42 +0530
Subject: [PATCH 4/4] Small bugfix in hackerearth scraper

---
 scrapers/hackerearth.py | 4 ++--
 1 file changed, 2 insertions(+), 2 deletions(-)

diff --git a/scrapers/hackerearth.py b/scrapers/hackerearth.py
index 74cf596..a94a86d 100644
--- a/scrapers/hackerearth.py
+++ b/scrapers/hackerearth.py
@@ -15,13 +15,13 @@ def get_handles(html_doc):
     soup = BeautifulSoup(html_doc, 'html.parser')
 
     '''
-    <div class="inline-block less-margin-left hof-user-log.info">
+    <div class="inline-block less-margin-left hof-user-info">
         <div class="no-color hover-link weight-600"> Gennady Korotkevich </div>
         <div class="gray-text body-font-small hover-link">gennady</div>
     </div>
     '''
 
-    handles = list(map(lambda d: d.find_all('div')[1].text, soup.select('div.hof-user-log.info')))
+    handles = list(map(lambda d: d.find_all('div')[1].text, soup.select('div.hof-user-info')))
     return handles