diff --git a/README.md b/README.md index 80a2808..a544982 100644 --- a/README.md +++ b/README.md @@ -8,6 +8,8 @@ *PyMedium* is an unofficial Medium API written in python flask. It provides developers to access to user, post list and detail information from [Medium]( https://medium.com/) website. This is a read-only API to access public information from Medium, you can customize this API to fit your requirements and deploy on your own server. +Now we provide another way: python client SDK to let you integrate into your application. More detail go to [wiki page](https://github.com/enginebai/PyMedium/wiki/Python-SDK). + ## Installation Before running PyMedium API, you have to clone the code from this repository, install requirements at first. diff --git a/pymedium/api.py b/pymedium/api.py index 69265d8..2fbff0f 100644 --- a/pymedium/api.py +++ b/pymedium/api.py @@ -4,13 +4,11 @@ import requests from flask import Flask, jsonify, Response, request -from selenium import webdriver from pymedium.parser import parse_user, parse_publication, parse_post, parse_post_detail from pymedium.model import OutputFormat import pymedium.constant as const app = Flask(__name__) -driver = webdriver.Chrome("driver/chromedriver") @app.route("/", methods=["GET"]) @@ -55,7 +53,7 @@ def send_request(url, headers=const.ACCEPT_HEADER, param=None, parse_function=No if req.status_code == requests.codes.ok: if parse_function is None: parse_function = parse_post - model_dict = parse_function(json.loads(req.text.replace(const.ESCAPE_CHARACTERS, "").strip())) + model_dict = parse_function(json.loads(req.text.replace(const.ESCAPE_CHARACTERS, "").strip()), return_dict=True) return jsonify(model_dict) else: return Response(status=req.status_code) @@ -77,7 +75,7 @@ def get_post(): if not output_format: output_format = OutputFormat.PLAIN_TEXT.value if url: - detail_str = parse_post_detail(url, output_format, driver) + detail_str = parse_post_detail(url, output_format) status_code = 200 mime_type = "text/html" if output_format == OutputFormat.JSON.value: diff --git a/pymedium/medium.py b/pymedium/medium.py new file mode 100644 index 0000000..8ae7cc9 --- /dev/null +++ b/pymedium/medium.py @@ -0,0 +1,53 @@ +#!/usr/bin/python3 +# -*- encoding: utf-8 -*- +import json + +import requests +from pymedium.parser import parse_user, parse_publication, parse_post, parse_post_detail +from pymedium.constant import ROOT_URL, ACCEPT_HEADER, ESCAPE_CHARACTERS, COUNT +from pymedium.model import Sort + + +class Medium(object): + def __init__(self): + pass + + def get_user_profile(self, username): + url = "{}@{}/latest".format(ROOT_URL, username) + return self._send_request(url, parse_user) + + def get_publication_profile(self, publication_name): + url = "{}{}/latest".format(ROOT_URL, publication_name) + return self._send_request(url, parse_publication) + + def get_user_posts(self, username, n=COUNT): + return self._send_post_request(ROOT_URL + "@{0}/latest?limit={count}".format(username, count=n)) + + def get_publication_posts(self, publication_name, n=COUNT): + return self._send_post_request(ROOT_URL + "{0}/latest?limit={count}".format(publication_name, count=n)) + + def get_top_posts(self, n=COUNT): + return self._send_post_request(ROOT_URL + "browse/top?limit={count}".format(count=n)) + + def get_posts_by_tag(self, tag, n=COUNT, sort=Sort.TOP): + url = "{}tag/{tag}".format(ROOT_URL, tag=tag) + if sort == Sort.LATEST: + url += "/latest" + url += "?limit={}".format(n) + return self._send_post_request(url) + + def parse_post_content(self, url): + pass + + @staticmethod + def _send_request(url, parse_function): + req = requests.get(url, headers=ACCEPT_HEADER) + print(url, req.status_code) + if req.status_code == requests.codes.ok: + return parse_function(json.loads(req.text.replace(ESCAPE_CHARACTERS, "").strip())) + else: + return None + + @staticmethod + def _send_post_request(url): + return Medium._send_request(url, parse_post) diff --git a/pymedium/model.py b/pymedium/model.py index 0bf5122..d044367 100644 --- a/pymedium/model.py +++ b/pymedium/model.py @@ -97,6 +97,12 @@ def author_tags(self): def author_tags(self, tags): self._author_tags = tags + def __str__(self, *args, **kwargs): + return str(to_dict(self)) + + def __repr__(self, *args, **kwargs): + return str(to_dict(self)) + class Post: def __init__(self, post_id): @@ -190,6 +196,12 @@ def post_tags(self): def post_tags(self, tags): self._post_tags = tags + def __str__(self, *args, **kwargs): + return str(to_dict(self)) + + def __repr__(self, *args, **kwargs): + return str(to_dict(self)) + class Publication: def __init__(self, publication_id): @@ -267,6 +279,12 @@ def post_count(self): def post_count(self, count): self._post_count = count + def __str__(self, *args, **kwargs): + return str(to_dict(self)) + + def __repr__(self, *args, **kwargs): + return str(to_dict(self)) + class Tag: @property @@ -309,6 +327,12 @@ def image(self): def image(self, image): self._image = image + def __str__(self, *args, **kwargs): + return str(to_dict(self)) + + def __repr__(self, *args, **kwargs): + return str(to_dict(self)) + class Image: def __init__(self, image_id): @@ -338,6 +362,12 @@ def original_width(self, width): # def url(self, url): # self._url = url + def __str__(self, *args, **kwargs): + return str(to_dict(self)) + + def __repr__(self, *args, **kwargs): + return str(to_dict(self)) + class OutputFormat(Enum): PLAIN_TEXT = "text" @@ -346,10 +376,16 @@ class OutputFormat(Enum): MARKDOWN = "md" +class Sort(Enum): + TOP = "top" + LATEST = "latest" + + def to_dict(model): return dict((get_key(key), value) for key, value in model.__dict__.items() if not callable(value) and not key.startswith("__")) + def get_key(key): return key.replace("_", "", 1) if key.startswith("_") else key \ No newline at end of file diff --git a/pymedium/parser.py b/pymedium/parser.py index 47ef4c8..44487c8 100644 --- a/pymedium/parser.py +++ b/pymedium/parser.py @@ -12,7 +12,7 @@ __author__ = 'enginebai' -def parse_user(payload): +def parse_user(payload, return_dict=False): user_dict = payload["payload"]["user"] user_id = user_dict["userId"] user = User(user_id) @@ -27,15 +27,15 @@ def parse_user(payload): ref_dict = payload["payload"]["references"] # interest_tags = user_meta_dict["interestTags"] - # user.interest_tags = parse_tags(interest_tags) + # user.interest_tags = parse_tags(interest_tags, return_dict) # author_tags = user_meta_dict["authorTags"] - # user.author_tags = parse_tags(author_tags) + # user.author_tags = parse_tags(author_tags, return_dict) publication_ids = ref_dict["Collection"] if publication_ids is not None and len(publication_ids.keys()) > 0: publication_list = [] for pub_id in publication_ids.keys(): - publication = parse_publication(payload, pub_id) + publication = parse_publication(payload, pub_id, return_dict) publication_list.append(publication) if len(publication_list) > 0: user.publications = publication_list @@ -54,10 +54,13 @@ def parse_user(payload): user.following_count = following_count user.followedby_count = followby_count - return to_dict(user) + if return_dict: + return to_dict(user) + else: + return user -def parse_publication(payload, pub_id=None): +def parse_publication(payload, pub_id=None, return_dict=False): if pub_id is None: pub_id = payload["payload"]["collection"]["id"] publication_dict = payload["payload"]["references"]["Collection"][pub_id] @@ -66,25 +69,29 @@ def parse_publication(payload, pub_id=None): publication.description = publication_dict["description"] publication.creator_user_id = publication_dict["creatorId"] image_dict = publication_dict["image"] - image = parse_images(image_dict) + image = parse_images(image_dict, return_dict) if image is not None: publication.image = image logo_dict = publication_dict["logo"] - logo = parse_images(logo_dict) + logo = parse_images(logo_dict, return_dict) if logo is not None: publication.logo = logo publication.follower_count = publication_dict["metadata"]["followerCount"] - publication.post_count = publication_dict["metadata"]["postCount"] + if "postCount" in publication_dict["metadata"]: + publication.post_count = publication_dict["metadata"]["postCount"] if "domain" in publication_dict: publication.url = "http://" + publication_dict["domain"] else: publication.url = ROOT_URL + publication_dict["slug"] publication.name = publication_dict["slug"] - return to_dict(publication) + if return_dict: + return to_dict(publication) + else: + return publication -def parse_post(payload): +def parse_post(payload, return_dict=False): # get the different parsing keys post_detail_parsing_keys = ("payload", "references", "Post") if post_detail_parsing_keys is None: @@ -93,6 +100,12 @@ def parse_post(payload): for key in post_detail_parsing_keys: post_list_payload = post_list_payload.get(key) + if post_list_payload is None: + post_detail_parsing_keys = ("payload", "posts") + post_list_payload = payload + for key in post_detail_parsing_keys: + post_list_payload = post_list_payload.get(key) + def parse_post_dict(post_dict, post_id=None): if post_id is None: post_id = post_dict["id"] @@ -129,7 +142,7 @@ def parse_post_dict(post_dict, post_id=None): image_count = virtual_dict["imageCount"] preview_image = virtual_dict["previewImage"] # post_tags = virtual_dict["tags"] - # post.post_tags = parse_tags(post_tags) + # post.post_tags = parse_tags(post_tags, return_dict) # post.unique_slug = unique_slug post.title = title @@ -140,17 +153,20 @@ def parse_post_dict(post_dict, post_id=None): post.read_time = read_time post.word_count = word_count post.image_count = image_count - image = parse_images(preview_image) + image = parse_images(preview_image, return_dict) if image is not None: post.preview_image = image # print("{id}, {title}".format(id=post_id, title=title)) # print("{recommend}, {response}, {read}".format( # recommend=recommend_count, response=response_count, read=read_time)) - return to_dict(post) + if return_dict: + return to_dict(post) + else: + return post post_list = [] - # print(post_list_payload) + print(post_list_payload) # payload -> references -> Post if type(post_list_payload) is dict: for post_id in post_list_payload.keys(): @@ -164,7 +180,7 @@ def parse_post_dict(post_dict, post_id=None): return post_list -def parse_tags(tags_list_dict): +def parse_tags(tags_list_dict, return_dict=False): if tags_list_dict is not None and len(tags_list_dict) > 0: tags_list = [] for tag_dict in tags_list_dict: @@ -175,11 +191,14 @@ def parse_tags(tags_list_dict): metadata_dict = tag_dict["metadata"] if metadata_dict is not None: tag.follower_count = metadata_dict["followerCount"] - tags_list.append(to_dict(tag)) + if return_dict: + tags_list.append(to_dict(tag)) + else: + tags_list.append(tag) return tags_list -def parse_images(image_dict): +def parse_images(image_dict, return_dict=False): if image_dict is not None: image_id = image_dict["imageId"] if "imageId" in image_dict else image_dict["id"] if image_id: @@ -191,12 +210,15 @@ def parse_images(image_dict): # .format(width=image.original_width, # height=image.original_height, # id=image.image_id) - return to_dict(image) + if return_dict: + return to_dict(image) + else: + return image else: return None -def parse_post_detail(post_url, output_format, driver): +def parse_post_detail(post_url, output_format): # driver = webdriver.Remote(desired_capabilities=DesiredCapabilities.CHROME) # for json format, just return medium json response if output_format == OutputFormat.JSON.value: @@ -206,17 +228,14 @@ def parse_post_detail(post_url, output_format, driver): else: return None else: - # for else formats, use Selenium to render page to get actual content and parse it - driver.get(post_url) - content_elements = driver.find_element_by_class_name("postArticle-content") - inner_html = BeautifulSoup(content_elements.get_attribute("innerHTML"), HTML_PARSER) + inner_html = BeautifulSoup(requests.get(post_url).text, HTML_PARSER) content_tags = inner_html.find_all() response = "" if output_format == OutputFormat.MARKDOWN.value: for i in range(0, len(content_tags)): tag = content_tags[i] - md = to_markdown(tag, driver) + md = to_markdown(tag) if md is not None and md: response += md + "\n" elif output_format == OutputFormat.HTML.value: @@ -234,7 +253,7 @@ def strip_space(text, trim_space=True): return text -def to_markdown(medium_tag, driver): +def to_markdown(medium_tag): text = strip_space(medium_tag.text) if medium_tag.name == 'h3': return '\n## {}'.format(text) diff --git a/requirements.txt b/requirements.txt index 9b67658..eba6d91 100644 --- a/requirements.txt +++ b/requirements.txt @@ -1,7 +1,6 @@ appdirs==1.4.0 beautifulsoup4==4.5.1 bs4==0.0.1 -click==6.7 Flask==0.12 itsdangerous==0.24 Jinja2==2.9.5 diff --git a/setup.py b/setup.py index 1f0dad1..d2b275b 100644 --- a/setup.py +++ b/setup.py @@ -1,16 +1,21 @@ #!/usr/bin/python # -*- encoding: utf-8 -*- -from distutils.core import setup +from setuptools import setup setup( name='PyMedium', - version='1.0.0', - packages=['PyMedium', ], + version='1.0.3', + packages=['pymedium', ], license='The MIT License (MIT) Copyright © 2017 Engine Bai.', description='PyMedium - Unofficial Medium API', long_description=open('README', 'r').read(), author='Engine Bai', author_email='enginebai@gmail.com', url='https://github.com/enginebai/PyMedium', + install_requires=[ + 'flask', + 'bs4', + 'requests' + ], ) diff --git a/tests/test_regression.py b/tests/test_api_regression.py similarity index 96% rename from tests/test_regression.py rename to tests/test_api_regression.py index 1c6e97f..5dc68fa 100644 --- a/tests/test_regression.py +++ b/tests/test_api_regression.py @@ -12,8 +12,8 @@ class RegressionTest(unittest.TestCase): def setUp(self): self.users = ( - "sitapati", "enginebai", "101", "mobiscroll", "richard.yang.uw", "tzhongg", "jon.moore", "JonDeng", - "waymo", "quincylarson", "benjaminhardy", "jsaito", "lindacaroll", "jasonfried") + "sitapati", "enginebai", "101", "mobiscroll", "richard.yang.uw", "tzhongg", "jon.moore", "JonDeng", + "waymo", "quincylarson", "benjaminhardy", "jsaito", "lindacaroll", "jasonfried") def test_user_api(self): for user in self.users: diff --git a/tests/test_medium.py b/tests/test_medium.py new file mode 100644 index 0000000..83568b8 --- /dev/null +++ b/tests/test_medium.py @@ -0,0 +1,35 @@ +#!/usr/bin/python3 +# -*- coding: utf8 -*- +import string +import unittest +import random + +from pymedium.medium import Medium +from pymedium.model import Sort + +__author__ = "Engine Bai" + + +class TestMedium(unittest.TestCase): + def setUp(self): + self.medium = Medium() + + def test_user(self): + user = "enginebai" + self.assertIsNotNone(self.medium.get_user_profile(user)) + self.assertIsNone(self.medium.get_user_profile( + "".join(random.choice(string.ascii_lowercase + string.digits) for _ in range(6)))) + + def test_publication(self): + publication = "dualcores-studio" + self.assertIsNotNone(self.medium.get_publication_profile(publication)) + self.assertIsNotNone(self.medium.get_publication_posts(publication)) + + def test_public_posts(self): + self.assertIsNotNone(self.medium.get_top_posts()) + self.assertIsNotNone(self.medium.get_posts_by_tag("android")) + self.assertIsNotNone(self.medium.get_posts_by_tag("android", sort=Sort.LATEST)) + + +if __name__ == "__main__": + unittest.main()