Skip to content

Feature/sdk #14

New issue

Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.

By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.

Already on GitHub? Sign in to your account

Open
wants to merge 5 commits into
base: master
Choose a base branch
from
Open
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension


Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
2 changes: 2 additions & 0 deletions README.md
Original file line number Diff line number Diff line change
Expand Up @@ -8,6 +8,8 @@
*PyMedium* is an unofficial Medium API written in python flask. It provides developers to access to user, post list and detail information from [Medium](
https://medium.com/) website. This is a read-only API to access public information from Medium, you can customize this API to fit your requirements and deploy on your own server.

Now we provide another way: python client SDK to let you integrate into your application. More detail go to [wiki page](https://github.com/enginebai/PyMedium/wiki/Python-SDK).

## Installation
Before running PyMedium API, you have to clone the code from this repository, install requirements at first.

Expand Down
6 changes: 2 additions & 4 deletions pymedium/api.py
Original file line number Diff line number Diff line change
Expand Up @@ -4,13 +4,11 @@

import requests
from flask import Flask, jsonify, Response, request
from selenium import webdriver
from pymedium.parser import parse_user, parse_publication, parse_post, parse_post_detail
from pymedium.model import OutputFormat
import pymedium.constant as const

app = Flask(__name__)
driver = webdriver.Chrome("driver/chromedriver")


@app.route("/<name>", methods=["GET"])
Expand Down Expand Up @@ -55,7 +53,7 @@ def send_request(url, headers=const.ACCEPT_HEADER, param=None, parse_function=No
if req.status_code == requests.codes.ok:
if parse_function is None:
parse_function = parse_post
model_dict = parse_function(json.loads(req.text.replace(const.ESCAPE_CHARACTERS, "").strip()))
model_dict = parse_function(json.loads(req.text.replace(const.ESCAPE_CHARACTERS, "").strip()), return_dict=True)
return jsonify(model_dict)
else:
return Response(status=req.status_code)
Expand All @@ -77,7 +75,7 @@ def get_post():
if not output_format:
output_format = OutputFormat.PLAIN_TEXT.value
if url:
detail_str = parse_post_detail(url, output_format, driver)
detail_str = parse_post_detail(url, output_format)
status_code = 200
mime_type = "text/html"
if output_format == OutputFormat.JSON.value:
Expand Down
53 changes: 53 additions & 0 deletions pymedium/medium.py
Original file line number Diff line number Diff line change
@@ -0,0 +1,53 @@
#!/usr/bin/python3
# -*- encoding: utf-8 -*-
import json

import requests
from pymedium.parser import parse_user, parse_publication, parse_post, parse_post_detail
from pymedium.constant import ROOT_URL, ACCEPT_HEADER, ESCAPE_CHARACTERS, COUNT
from pymedium.model import Sort


class Medium(object):
def __init__(self):
pass

def get_user_profile(self, username):
url = "{}@{}/latest".format(ROOT_URL, username)
return self._send_request(url, parse_user)

def get_publication_profile(self, publication_name):
url = "{}{}/latest".format(ROOT_URL, publication_name)
return self._send_request(url, parse_publication)

def get_user_posts(self, username, n=COUNT):
return self._send_post_request(ROOT_URL + "@{0}/latest?limit={count}".format(username, count=n))

def get_publication_posts(self, publication_name, n=COUNT):
return self._send_post_request(ROOT_URL + "{0}/latest?limit={count}".format(publication_name, count=n))

def get_top_posts(self, n=COUNT):
return self._send_post_request(ROOT_URL + "browse/top?limit={count}".format(count=n))

def get_posts_by_tag(self, tag, n=COUNT, sort=Sort.TOP):
url = "{}tag/{tag}".format(ROOT_URL, tag=tag)
if sort == Sort.LATEST:
url += "/latest"
url += "?limit={}".format(n)
return self._send_post_request(url)

def parse_post_content(self, url):
pass

@staticmethod
def _send_request(url, parse_function):
req = requests.get(url, headers=ACCEPT_HEADER)
print(url, req.status_code)
if req.status_code == requests.codes.ok:
return parse_function(json.loads(req.text.replace(ESCAPE_CHARACTERS, "").strip()))
else:
return None

@staticmethod
def _send_post_request(url):
return Medium._send_request(url, parse_post)
36 changes: 36 additions & 0 deletions pymedium/model.py
Original file line number Diff line number Diff line change
Expand Up @@ -97,6 +97,12 @@ def author_tags(self):
def author_tags(self, tags):
self._author_tags = tags

def __str__(self, *args, **kwargs):
return str(to_dict(self))

def __repr__(self, *args, **kwargs):
return str(to_dict(self))


class Post:
def __init__(self, post_id):
Expand Down Expand Up @@ -190,6 +196,12 @@ def post_tags(self):
def post_tags(self, tags):
self._post_tags = tags

def __str__(self, *args, **kwargs):
return str(to_dict(self))

def __repr__(self, *args, **kwargs):
return str(to_dict(self))


class Publication:
def __init__(self, publication_id):
Expand Down Expand Up @@ -267,6 +279,12 @@ def post_count(self):
def post_count(self, count):
self._post_count = count

def __str__(self, *args, **kwargs):
return str(to_dict(self))

def __repr__(self, *args, **kwargs):
return str(to_dict(self))


class Tag:
@property
Expand Down Expand Up @@ -309,6 +327,12 @@ def image(self):
def image(self, image):
self._image = image

def __str__(self, *args, **kwargs):
return str(to_dict(self))

def __repr__(self, *args, **kwargs):
return str(to_dict(self))


class Image:
def __init__(self, image_id):
Expand Down Expand Up @@ -338,6 +362,12 @@ def original_width(self, width):
# def url(self, url):
# self._url = url

def __str__(self, *args, **kwargs):
return str(to_dict(self))

def __repr__(self, *args, **kwargs):
return str(to_dict(self))


class OutputFormat(Enum):
PLAIN_TEXT = "text"
Expand All @@ -346,10 +376,16 @@ class OutputFormat(Enum):
MARKDOWN = "md"


class Sort(Enum):
TOP = "top"
LATEST = "latest"


def to_dict(model):
return dict((get_key(key), value)
for key, value in model.__dict__.items()
if not callable(value) and not key.startswith("__"))


def get_key(key):
return key.replace("_", "", 1) if key.startswith("_") else key
71 changes: 45 additions & 26 deletions pymedium/parser.py
Original file line number Diff line number Diff line change
Expand Up @@ -12,7 +12,7 @@
__author__ = 'enginebai'


def parse_user(payload):
def parse_user(payload, return_dict=False):
user_dict = payload["payload"]["user"]
user_id = user_dict["userId"]
user = User(user_id)
Expand All @@ -27,15 +27,15 @@ def parse_user(payload):
ref_dict = payload["payload"]["references"]

# interest_tags = user_meta_dict["interestTags"]
# user.interest_tags = parse_tags(interest_tags)
# user.interest_tags = parse_tags(interest_tags, return_dict)
# author_tags = user_meta_dict["authorTags"]
# user.author_tags = parse_tags(author_tags)
# user.author_tags = parse_tags(author_tags, return_dict)

publication_ids = ref_dict["Collection"]
if publication_ids is not None and len(publication_ids.keys()) > 0:
publication_list = []
for pub_id in publication_ids.keys():
publication = parse_publication(payload, pub_id)
publication = parse_publication(payload, pub_id, return_dict)
publication_list.append(publication)
if len(publication_list) > 0:
user.publications = publication_list
Expand All @@ -54,10 +54,13 @@ def parse_user(payload):
user.following_count = following_count
user.followedby_count = followby_count

return to_dict(user)
if return_dict:
return to_dict(user)
else:
return user


def parse_publication(payload, pub_id=None):
def parse_publication(payload, pub_id=None, return_dict=False):
if pub_id is None:
pub_id = payload["payload"]["collection"]["id"]
publication_dict = payload["payload"]["references"]["Collection"][pub_id]
Expand All @@ -66,25 +69,29 @@ def parse_publication(payload, pub_id=None):
publication.description = publication_dict["description"]
publication.creator_user_id = publication_dict["creatorId"]
image_dict = publication_dict["image"]
image = parse_images(image_dict)
image = parse_images(image_dict, return_dict)
if image is not None:
publication.image = image
logo_dict = publication_dict["logo"]
logo = parse_images(logo_dict)
logo = parse_images(logo_dict, return_dict)
if logo is not None:
publication.logo = logo
publication.follower_count = publication_dict["metadata"]["followerCount"]
publication.post_count = publication_dict["metadata"]["postCount"]
if "postCount" in publication_dict["metadata"]:
publication.post_count = publication_dict["metadata"]["postCount"]

if "domain" in publication_dict:
publication.url = "http://" + publication_dict["domain"]
else:
publication.url = ROOT_URL + publication_dict["slug"]
publication.name = publication_dict["slug"]
return to_dict(publication)
if return_dict:
return to_dict(publication)
else:
return publication


def parse_post(payload):
def parse_post(payload, return_dict=False):
# get the different parsing keys
post_detail_parsing_keys = ("payload", "references", "Post")
if post_detail_parsing_keys is None:
Expand All @@ -93,6 +100,12 @@ def parse_post(payload):
for key in post_detail_parsing_keys:
post_list_payload = post_list_payload.get(key)

if post_list_payload is None:
post_detail_parsing_keys = ("payload", "posts")
post_list_payload = payload
for key in post_detail_parsing_keys:
post_list_payload = post_list_payload.get(key)

def parse_post_dict(post_dict, post_id=None):
if post_id is None:
post_id = post_dict["id"]
Expand Down Expand Up @@ -129,7 +142,7 @@ def parse_post_dict(post_dict, post_id=None):
image_count = virtual_dict["imageCount"]
preview_image = virtual_dict["previewImage"]
# post_tags = virtual_dict["tags"]
# post.post_tags = parse_tags(post_tags)
# post.post_tags = parse_tags(post_tags, return_dict)

# post.unique_slug = unique_slug
post.title = title
Expand All @@ -140,17 +153,20 @@ def parse_post_dict(post_dict, post_id=None):
post.read_time = read_time
post.word_count = word_count
post.image_count = image_count
image = parse_images(preview_image)
image = parse_images(preview_image, return_dict)
if image is not None:
post.preview_image = image

# print("{id}, {title}".format(id=post_id, title=title))
# print("{recommend}, {response}, {read}".format(
# recommend=recommend_count, response=response_count, read=read_time))
return to_dict(post)
if return_dict:
return to_dict(post)
else:
return post

post_list = []
# print(post_list_payload)
print(post_list_payload)
# payload -> references -> Post
if type(post_list_payload) is dict:
for post_id in post_list_payload.keys():
Expand All @@ -164,7 +180,7 @@ def parse_post_dict(post_dict, post_id=None):
return post_list


def parse_tags(tags_list_dict):
def parse_tags(tags_list_dict, return_dict=False):
if tags_list_dict is not None and len(tags_list_dict) > 0:
tags_list = []
for tag_dict in tags_list_dict:
Expand All @@ -175,11 +191,14 @@ def parse_tags(tags_list_dict):
metadata_dict = tag_dict["metadata"]
if metadata_dict is not None:
tag.follower_count = metadata_dict["followerCount"]
tags_list.append(to_dict(tag))
if return_dict:
tags_list.append(to_dict(tag))
else:
tags_list.append(tag)
return tags_list


def parse_images(image_dict):
def parse_images(image_dict, return_dict=False):
if image_dict is not None:
image_id = image_dict["imageId"] if "imageId" in image_dict else image_dict["id"]
if image_id:
Expand All @@ -191,12 +210,15 @@ def parse_images(image_dict):
# .format(width=image.original_width,
# height=image.original_height,
# id=image.image_id)
return to_dict(image)
if return_dict:
return to_dict(image)
else:
return image
else:
return None


def parse_post_detail(post_url, output_format, driver):
def parse_post_detail(post_url, output_format):
# driver = webdriver.Remote(desired_capabilities=DesiredCapabilities.CHROME)
# for json format, just return medium json response
if output_format == OutputFormat.JSON.value:
Expand All @@ -206,17 +228,14 @@ def parse_post_detail(post_url, output_format, driver):
else:
return None
else:
# for else formats, use Selenium to render page to get actual content and parse it
driver.get(post_url)
content_elements = driver.find_element_by_class_name("postArticle-content")
inner_html = BeautifulSoup(content_elements.get_attribute("innerHTML"), HTML_PARSER)
inner_html = BeautifulSoup(requests.get(post_url).text, HTML_PARSER)
content_tags = inner_html.find_all()

response = ""
if output_format == OutputFormat.MARKDOWN.value:
for i in range(0, len(content_tags)):
tag = content_tags[i]
md = to_markdown(tag, driver)
md = to_markdown(tag)
if md is not None and md:
response += md + "\n"
elif output_format == OutputFormat.HTML.value:
Expand All @@ -234,7 +253,7 @@ def strip_space(text, trim_space=True):
return text


def to_markdown(medium_tag, driver):
def to_markdown(medium_tag):
text = strip_space(medium_tag.text)
if medium_tag.name == 'h3':
return '\n## {}'.format(text)
Expand Down
1 change: 0 additions & 1 deletion requirements.txt
Original file line number Diff line number Diff line change
@@ -1,7 +1,6 @@
appdirs==1.4.0
beautifulsoup4==4.5.1
bs4==0.0.1
click==6.7
Flask==0.12
itsdangerous==0.24
Jinja2==2.9.5
Expand Down
Loading