From aa1599de669d3aa5e5428a589d4230c9bb5cd613 Mon Sep 17 00:00:00 2001 From: mberaha Date: Sat, 29 May 2021 15:44:42 +0200 Subject: [PATCH 01/57] somehow documenting protos --- docs/Makefile | 5 +- docs/conf.py | 20 +- docs/source/modules.rst | 7 + docs/source/protos.html | 2634 +++++++++++++++++++++++++++++ docs/source/pybmix.core.rst | 45 + docs/source/pybmix.estimators.rst | 29 + docs/source/pybmix.proto.rst | 5 + docs/source/pybmix.rst | 32 + docs/source/pybmix.utils.rst | 29 + 9 files changed, 2799 insertions(+), 7 deletions(-) create mode 100644 docs/source/modules.rst create mode 100644 docs/source/protos.html create mode 100644 docs/source/pybmix.core.rst create mode 100644 docs/source/pybmix.estimators.rst create mode 100644 docs/source/pybmix.proto.rst create mode 100644 docs/source/pybmix.rst create mode 100644 docs/source/pybmix.utils.rst diff --git a/docs/Makefile b/docs/Makefile index d4bb2cbb..d8696e95 100644 --- a/docs/Makefile +++ b/docs/Makefile @@ -6,7 +6,8 @@ SPHINXOPTS ?= SPHINXBUILD ?= sphinx-build SOURCEDIR = . -BUILDDIR = _build +BUILDDIR = $(PWD)/_build +PROTO_DIR = $(PWD)/../pybmix/core/pybmixcpp/bayesmix/proto # Put it first so that "make" without argument is like "make help". help: @@ -18,3 +19,5 @@ help: # "make mode" option. $(O) is meant as a shortcut for $(SPHINXOPTS). %: Makefile @$(SPHINXBUILD) -M $@ "$(SOURCEDIR)" "$(BUILDDIR)" $(SPHINXOPTS) $(O) + docker run --rm -v $(PWD)/source:/out -v $(PROTO_DIR):/protos \ + pseudomuto/protoc-gen-doc --doc_opt=html,protos.html diff --git a/docs/conf.py b/docs/conf.py index f77b76bc..01a2c97f 100644 --- a/docs/conf.py +++ b/docs/conf.py @@ -10,9 +10,10 @@ # add these directories to sys.path here. If the directory is relative to the # documentation root, use os.path.abspath to make it absolute, like shown here. # -# import os -# import sys -# sys.path.insert(0, os.path.abspath('.')) +import os +import sys +sys.path.insert(0, os.path.abspath('.')) +sys.path.insert(0, os.path.abspath('../')) # -- Project information ----------------------------------------------------- @@ -31,6 +32,9 @@ # extensions coming with Sphinx (named 'sphinx.ext.*') or your custom # ones. extensions = [ + 'sphinx.ext.autodoc', + 'sphinx.ext.intersphinx', + 'sphinx.ext.napoleon' ] # Add any paths that contain templates here, relative to this directory. @@ -39,7 +43,11 @@ # List of patterns, relative to source directory, that match files and # directories to ignore when looking for source files. # This pattern also affects html_static_path and html_extra_path. -exclude_patterns = ['_build', 'Thumbs.db', '.DS_Store'] +exclude_patterns = [ + '_build', 'Thumbs.db', '.DS_Store', '../lib', + '../pybmix/core/pybmixcpp'] + +# html_extra_path = ["protos.html"] # -- Options for HTML output ------------------------------------------------- @@ -47,9 +55,9 @@ # The theme to use for HTML and HTML Help pages. See the documentation for # a list of builtin themes. # -html_theme = 'alabaster' +html_theme = 'sphinx_rtd_theme' # Add any paths that contain custom static files (such as style sheets) here, # relative to this directory. They are copied after the builtin static files, # so a file named "default.css" will overwrite the builtin "default.css". -html_static_path = ['_static'] \ No newline at end of file +html_static_path = ['_static'] diff --git a/docs/source/modules.rst b/docs/source/modules.rst new file mode 100644 index 00000000..af935c06 --- /dev/null +++ b/docs/source/modules.rst @@ -0,0 +1,7 @@ +pybmix +====== + +.. toctree:: + :maxdepth: 4 + + pybmix diff --git a/docs/source/protos.html b/docs/source/protos.html new file mode 100644 index 00000000..8fe3ec47 --- /dev/null +++ b/docs/source/protos.html @@ -0,0 +1,2634 @@ + + + + + Protocol Documentation + + + + + + + + + + +

Protocol Documentation

+ +

Table of Contents

+ +
+ +
+ + + +
+

algorithm_id.proto

Top +
+

+ + + + +

AlgorithmId

+

+ + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + +
NameNumberDescription
UNKNOWN_ALGORITHM0

Neal21

Neal32

Neal83

BlockedGibbs4

SplitMerge5

Slice6

+ + + + + + + +
+

algorithm_params.proto

Top +
+

+ + +

AlgorithmParams

+

+ + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + +
FieldTypeLabelDescription
algo_idstring

rng_seeduint32

iterationsuint32

burninuint32

init_num_clustersuint32

neal8_n_auxuint32

+ + + + + + + + + + + + + +
+

algorithm_state.proto

Top +
+

+ + +

AlgorithmState

+

+ + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + +
FieldTypeLabelDescription
cluster_statesAlgorithmState.ClusterStaterepeated

cluster_allocsint32repeated

mixing_stateMixingState

iteration_numint32

+ + + + + +

AlgorithmState.ClusterState

+

+ + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + +
FieldTypeLabelDescription
uni_ls_stateUniLSState

multi_ls_stateMultiLSState

lin_reg_uni_ls_stateLinRegUniLSState

cardinalityint32

+ + + + + + + + + + + + + +
+

distribution.proto

Top +
+

+ + +

BetaDistribution

+

+ + + + + + + + + + + + + + + + + + + + + + + +
FieldTypeLabelDescription
shape_adouble

shape_bdouble

+ + + + + +

GammaDistribution

+

+ + + + + + + + + + + + + + + + + + + + + + + +
FieldTypeLabelDescription
shapedouble

ratedouble

+ + + + + +

InvWishartDistribution

+

+ + + + + + + + + + + + + + + + + + + + + + + +
FieldTypeLabelDescription
deg_freedouble

scaleMatrix

+ + + + + +

MultiNormalDistribution

+

+ + + + + + + + + + + + + + + + + + + + + + + +
FieldTypeLabelDescription
meanVector

varMatrix

+ + + + + +

UniNormalDistribution

+

+ + + + + + + + + + + + + + + + + + + + + + + +
FieldTypeLabelDescription
meandouble

vardouble

+ + + + + + + + + + + + + +
+

hierarchy_id.proto

Top +
+

+ + + + +

HierarchyId

+

+ + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + +
NameNumberDescription
UNKNOWN_HIERARCHY0

NNIG1

NNW2

LinRegUni3

+ + + + + + + +
+

hierarchy_prior.proto

Top +
+

+ + +

LinRegUniPrior

+

+ + + + + + + + + + + + + + + + +
FieldTypeLabelDescription
fixed_valuesLinRegUniPrior.FixedValues

+ + + + + +

LinRegUniPrior.FixedValues

+

+ + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + +
FieldTypeLabelDescription
meanVector

var_scalingMatrix

shapedouble

scaledouble

+ + + + + +

NNIGPrior

+

+ + + + + + + + + + + + + + + + + + + + + + + + + + + + + + +
FieldTypeLabelDescription
fixed_valuesNNIGPrior.FixedValues

normal_mean_priorNNIGPrior.NormalMeanPrior

ngg_priorNNIGPrior.NGGPrior

+ + + + + +

NNIGPrior.FixedValues

+

+ + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + +
FieldTypeLabelDescription
meandouble

var_scalingdouble

shapedouble

scaledouble

+ + + + + +

NNIGPrior.NGGPrior

+

+ + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + +
FieldTypeLabelDescription
mean_priorUniNormalDistribution

var_scaling_priorGammaDistribution

shapedouble

scale_priorGammaDistribution

+ + + + + +

NNIGPrior.NormalMeanPrior

+

+ + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + +
FieldTypeLabelDescription
mean_priorUniNormalDistribution

var_scalingdouble

shapedouble

scaledouble

+ + + + + +

NNWPrior

+

+ + + + + + + + + + + + + + + + + + + + + + + + + + + + + + +
FieldTypeLabelDescription
fixed_valuesNNWPrior.FixedValues

normal_mean_priorNNWPrior.NormalMeanPrior

ngiw_priorNNWPrior.NGIWPrior

+ + + + + +

NNWPrior.FixedValues

+

+ + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + +
FieldTypeLabelDescription
meanVector

var_scalingdouble

deg_freedouble

scaleMatrix

+ + + + + +

NNWPrior.NGIWPrior

+

+ + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + +
FieldTypeLabelDescription
mean_priorMultiNormalDistribution

var_scaling_priorGammaDistribution

deg_freedouble

scale_priorInvWishartDistribution

+ + + + + +

NNWPrior.NormalMeanPrior

+

+ + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + +
FieldTypeLabelDescription
mean_priorMultiNormalDistribution

var_scalingdouble

deg_freedouble

scaleMatrix

+ + + + + + + + + + + + + +
+

ls_state.proto

Top +
+

+ + +

LinRegUniLSState

+

+ + + + + + + + + + + + + + + + + + + + + + + +
FieldTypeLabelDescription
regression_coeffsVector

vardouble

+ + + + + +

MultiLSState

+

+ + + + + + + + + + + + + + + + + + + + + + + + + + + + + + +
FieldTypeLabelDescription
meanVector

precMatrix

prec_cholMatrix

+ + + + + +

UniLSState

+

+ + + + + + + + + + + + + + + + + + + + + + + +
FieldTypeLabelDescription
meandouble

vardouble

+ + + + + + + + + + + + + +
+

matrix.proto

Top +
+

+ + +

Matrix

+

+ + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + +
FieldTypeLabelDescription
rowsint32

colsint32

datadoublerepeated

rowmajorbool

+ + + + + +

Vector

+

+ + + + + + + + + + + + + + + + + + + + + + + +
FieldTypeLabelDescription
sizeint32

datadoublerepeated

+ + + + + + + + + + + + + +
+

mixing_id.proto

Top +
+

+ + + + +

MixingId

+

+ + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + +
NameNumberDescription
UNKNOWN_MIXING0

DP1

PY2

LogSB3

TruncSB4

+ + + + + + + +
+

mixing_prior.proto

Top +
+

+ + +

DPPrior

+

+ + + + + + + + + + + + + + + + + + + + + + + +
FieldTypeLabelDescription
fixed_valueDPPrior.FixedValue

gamma_priorDPPrior.GammaPrior

+ + + + + +

DPPrior.FixedValue

+

+ + + + + + + + + + + + + + + + +
FieldTypeLabelDescription
totalmassdouble

+ + + + + +

DPPrior.GammaPrior

+

+ + + + + + + + + + + + + + + + +
FieldTypeLabelDescription
totalmass_priorGammaDistribution

+ + + + + +

LogSBPrior

+

+ + + + + + + + + + + + + + + + + + + + + + + + + + + + + + +
FieldTypeLabelDescription
normal_priorMultiNormalDistribution

step_sizedouble

TODO move?

num_componentsuint32

+ + + + + +

PYPrior

+

+ + + + + + + + + + + + + + + + +
FieldTypeLabelDescription
fixed_valuesPYPrior.FixedValues

+ + + + + +

PYPrior.FixedValues

+

+ + + + + + + + + + + + + + + + + + + + + + + +
FieldTypeLabelDescription
strengthdouble

discountdouble

+ + + + + +

TruncSBPrior

+

+ + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + +
FieldTypeLabelDescription
beta_priorsTruncSBPrior.BetaPriors

dp_priorTruncSBPrior.DPPrior

py_priorTruncSBPrior.PYPrior

num_componentsuint32

+ + + + + +

TruncSBPrior.BetaPriors

+

+ + + + + + + + + + + + + + + + +
FieldTypeLabelDescription
beta_distributionsBetaDistributionrepeated

+ + + + + +

TruncSBPrior.DPPrior

+

+ + + + + + + + + + + + + + + + +
FieldTypeLabelDescription
totalmassdouble

+ + + + + +

TruncSBPrior.PYPrior

+

+ + + + + + + + + + + + + + + + + + + + + + + +
FieldTypeLabelDescription
strengthdouble

discountdouble

+ + + + + + + + + + + + + +
+

mixing_state.proto

Top +
+

+ + +

DPState

+

+ + + + + + + + + + + + + + + + +
FieldTypeLabelDescription
totalmassdouble

+ + + + + +

LogSBState

+

+ + + + + + + + + + + + + + + + +
FieldTypeLabelDescription
regression_coeffsMatrix

+ + + + + +

MixingState

+

+ + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + +
FieldTypeLabelDescription
dp_stateDPState

py_statePYState

log_sb_stateLogSBState

trunc_sb_stateTruncSBState

+ + + + + +

PYState

+

+ + + + + + + + + + + + + + + + + + + + + + + +
FieldTypeLabelDescription
strengthdouble

discountdouble

+ + + + + +

TruncSBState

+

+ + + + + + + + + + + + + + + + + + + + + + + +
FieldTypeLabelDescription
sticksVector

logweightsVector

+ + + + + + + + + + + + + +
+

semihdp.proto

Top +
+

+ + +

SemiHdpParams

+

+ + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + +
FieldTypeLabelDescription
pseudo_priorSemiHdpParams.PseudoPriorParams

dirichlet_concentrationdouble

rest_allocs_updatestring

Either "full", "metro_base", "metro_dist"

totalmass_restdouble

totalmass_hdpdouble

w_priorSemiHdpParams.WPriorParams

+ + + + + +

SemiHdpParams.PseudoPriorParams

+

+ + + + + + + + + + + + + + + + + + + + + + + + + + + + + + +
FieldTypeLabelDescription
card_weightdouble

mean_perturb_sddouble

var_perturb_fracdouble

+ + + + + +

SemiHdpParams.WPriorParams

+

+ + + + + + + + + + + + + + + + + + + + + + + +
FieldTypeLabelDescription
shape1double

shape2double

+ + + + + +

SemiHdpState

+

+ + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + +
FieldTypeLabelDescription
restaurantsSemiHdpState.RestaurantStaterepeated

groupsSemiHdpState.GroupStaterepeated

tausSemiHdpState.ClusterStaterepeated

cint32repeated

wdouble

+ + + + + +

SemiHdpState.ClusterState

+

+ + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + +
FieldTypeLabelDescription
uni_ls_stateUniLSState

multi_ls_stateMultiLSState

lin_reg_uni_ls_stateLinRegUniLSState

cardinalityint32

+ + + + + +

SemiHdpState.GroupState

+

+ + + + + + + + + + + + + + + + +
FieldTypeLabelDescription
cluster_allocsint32repeated

+ + + + + +

SemiHdpState.RestaurantState

+

+ + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + +
FieldTypeLabelDescription
theta_starsSemiHdpState.ClusterStaterepeated

n_by_clusint32repeated

table_to_sharedint32repeated

table_to_idioint32repeated

+ + + + + + + + + + + + + +

Scalar Value Types

+ + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + +
.proto TypeNotesC++JavaPythonGoC#PHPRuby
doubledoubledoublefloatfloat64doublefloatFloat
floatfloatfloatfloatfloat32floatfloatFloat
int32Uses variable-length encoding. Inefficient for encoding negative numbers – if your field is likely to have negative values, use sint32 instead.int32intintint32intintegerBignum or Fixnum (as required)
int64Uses variable-length encoding. Inefficient for encoding negative numbers – if your field is likely to have negative values, use sint64 instead.int64longint/longint64longinteger/stringBignum
uint32Uses variable-length encoding.uint32intint/longuint32uintintegerBignum or Fixnum (as required)
uint64Uses variable-length encoding.uint64longint/longuint64ulonginteger/stringBignum or Fixnum (as required)
sint32Uses variable-length encoding. Signed int value. These more efficiently encode negative numbers than regular int32s.int32intintint32intintegerBignum or Fixnum (as required)
sint64Uses variable-length encoding. Signed int value. These more efficiently encode negative numbers than regular int64s.int64longint/longint64longinteger/stringBignum
fixed32Always four bytes. More efficient than uint32 if values are often greater than 2^28.uint32intintuint32uintintegerBignum or Fixnum (as required)
fixed64Always eight bytes. More efficient than uint64 if values are often greater than 2^56.uint64longint/longuint64ulonginteger/stringBignum
sfixed32Always four bytes.int32intintint32intintegerBignum or Fixnum (as required)
sfixed64Always eight bytes.int64longint/longint64longinteger/stringBignum
boolboolbooleanbooleanboolboolbooleanTrueClass/FalseClass
stringA string must always contain UTF-8 encoded or 7-bit ASCII text.stringStringstr/unicodestringstringstringString (UTF-8)
bytesMay contain any arbitrary sequence of bytes.stringByteStringstr[]byteByteStringstringString (ASCII-8BIT)
+ + + diff --git a/docs/source/pybmix.core.rst b/docs/source/pybmix.core.rst new file mode 100644 index 00000000..aca84b01 --- /dev/null +++ b/docs/source/pybmix.core.rst @@ -0,0 +1,45 @@ +pybmix.core package +=================== + +Submodules +---------- + +pybmix.core.chain module +------------------------ + +.. automodule:: pybmix.core.chain + :members: + :undoc-members: + :show-inheritance: + +pybmix.core.hierarchy module +---------------------------- + +.. automodule:: pybmix.core.hierarchy + :members: + :undoc-members: + :show-inheritance: + +pybmix.core.mixing module +------------------------- + +.. automodule:: pybmix.core.mixing + :members: + :undoc-members: + :show-inheritance: + +pybmix.core.mixture\_model module +--------------------------------- + +.. automodule:: pybmix.core.mixture_model + :members: + :undoc-members: + :show-inheritance: + +Module contents +--------------- + +.. automodule:: pybmix.core + :members: + :undoc-members: + :show-inheritance: diff --git a/docs/source/pybmix.estimators.rst b/docs/source/pybmix.estimators.rst new file mode 100644 index 00000000..2b07f6dd --- /dev/null +++ b/docs/source/pybmix.estimators.rst @@ -0,0 +1,29 @@ +pybmix.estimators package +========================= + +Submodules +---------- + +pybmix.estimators.cluster\_estimator module +------------------------------------------- + +.. automodule:: pybmix.estimators.cluster_estimator + :members: + :undoc-members: + :show-inheritance: + +pybmix.estimators.density\_estimator module +------------------------------------------- + +.. automodule:: pybmix.estimators.density_estimator + :members: + :undoc-members: + :show-inheritance: + +Module contents +--------------- + +.. automodule:: pybmix.estimators + :members: + :undoc-members: + :show-inheritance: diff --git a/docs/source/pybmix.proto.rst b/docs/source/pybmix.proto.rst new file mode 100644 index 00000000..94f8ae4a --- /dev/null +++ b/docs/source/pybmix.proto.rst @@ -0,0 +1,5 @@ +pybmix.proto package +=================== + +.. raw:: html + :file: protos.html \ No newline at end of file diff --git a/docs/source/pybmix.rst b/docs/source/pybmix.rst new file mode 100644 index 00000000..04d9e32f --- /dev/null +++ b/docs/source/pybmix.rst @@ -0,0 +1,32 @@ +pybmix package +============== + +Subpackages +----------- + +.. toctree:: + :maxdepth: 4 + + pybmix.core + pybmix.estimators + pybmix.proto + pybmix.utils + +Submodules +---------- + +pybmix.example module +--------------------- + +.. automodule:: pybmix.example + :members: + :undoc-members: + :show-inheritance: + +Module contents +--------------- + +.. automodule:: pybmix + :members: + :undoc-members: + :show-inheritance: diff --git a/docs/source/pybmix.utils.rst b/docs/source/pybmix.utils.rst new file mode 100644 index 00000000..f1e4bc84 --- /dev/null +++ b/docs/source/pybmix.utils.rst @@ -0,0 +1,29 @@ +pybmix.utils package +==================== + +Submodules +---------- + +pybmix.utils.combinatorials module +---------------------------------- + +.. automodule:: pybmix.utils.combinatorials + :members: + :undoc-members: + :show-inheritance: + +pybmix.utils.proto\_utils module +-------------------------------- + +.. automodule:: pybmix.utils.proto_utils + :members: + :undoc-members: + :show-inheritance: + +Module contents +--------------- + +.. automodule:: pybmix.utils + :members: + :undoc-members: + :show-inheritance: From 0043f9bfdfbbd348bb3930505540226b1a401594 Mon Sep 17 00:00:00 2001 From: mberaha Date: Mon, 31 May 2021 16:36:33 +0200 Subject: [PATCH 02/57] using notebooks for documentation --- docs/conf.py | 13 +- .../examples/estimate_univ_density.ipynb | 159 +++++++----------- .../examples/prior_elicitation.ipynb | 0 docs/index.rst | 6 + docs/source/pybmix.proto.rst | 2 +- docs/source/pybmix.rst | 1 + 6 files changed, 79 insertions(+), 102 deletions(-) rename examples/Estimate Univariate Density.ipynb => docs/examples/estimate_univ_density.ipynb (51%) rename examples/Compare Prior Number of Clusters.ipynb => docs/examples/prior_elicitation.ipynb (100%) diff --git a/docs/conf.py b/docs/conf.py index 01a2c97f..90a74f0a 100644 --- a/docs/conf.py +++ b/docs/conf.py @@ -14,6 +14,9 @@ import sys sys.path.insert(0, os.path.abspath('.')) sys.path.insert(0, os.path.abspath('../')) +sys.path.insert(0, os.path.abspath('../examples')) + + # -- Project information ----------------------------------------------------- @@ -32,9 +35,15 @@ # extensions coming with Sphinx (named 'sphinx.ext.*') or your custom # ones. extensions = [ - 'sphinx.ext.autodoc', + 'sphinx.ext.autodoc', + 'sphinx.ext.autosummary', 'sphinx.ext.intersphinx', - 'sphinx.ext.napoleon' + 'sphinx.ext.mathjax', + 'sphinx.ext.napoleon', + 'sphinx.ext.viewcode', + 'matplotlib.sphinxext.plot_directive', + 'nbsphinx', + 'sphinx_rtd_theme', ] # Add any paths that contain templates here, relative to this directory. diff --git a/examples/Estimate Univariate Density.ipynb b/docs/examples/estimate_univ_density.ipynb similarity index 51% rename from examples/Estimate Univariate Density.ipynb rename to docs/examples/estimate_univ_density.ipynb index 0a04a116..4d25d122 100644 --- a/examples/Estimate Univariate Density.ipynb +++ b/docs/examples/estimate_univ_density.ipynb @@ -1,5 +1,12 @@ { "cells": [ + { + "cell_type": "markdown", + "metadata": {}, + "source": [ + "# Univariate Density Estimation via Dirichlet Process Mixture" + ] + }, { "cell_type": "code", "execution_count": null, @@ -17,12 +24,15 @@ ] }, { - "cell_type": "code", - "execution_count": null, + "cell_type": "markdown", "metadata": {}, - "outputs": [], "source": [ - "import pybmix.proto.algorithm_id_pb2 as algo_id" + "## Data Generation\n", + "\n", + "We generate data from a two-component mixture model\n", + "$$\n", + "y_i \\sim \\frac{1}{2} \\mathcal N(-3, 1) + \\frac{1}{2} \\mathcal N(3, 1), \\quad i=1, \\ldots, 200\n", + "$$" ] }, { @@ -31,14 +41,10 @@ "metadata": {}, "outputs": [], "source": [ - "algo_id.DESCRIPTOR.message_types_by_name()" - ] - }, - { - "cell_type": "markdown", - "metadata": {}, - "source": [ - "generate data" + "def sample_from_mixture(weigths, means, sds, n_data):\n", + " n_comp = len(weigths)\n", + " clus_alloc = np.random.choice(np.arange(n_comp), p=[0.5, 0.5], size=n_data)\n", + " return np.random.normal(loc=means[clus_alloc], scale=sds[clus_alloc])" ] }, { @@ -47,8 +53,8 @@ "metadata": {}, "outputs": [], "source": [ - "y = np.concatenate(\n", - " [np.random.normal(loc=3, size=100), np.random.normal(loc=-3, size=100)])\n", + "y = sample_from_mixture(\n", + " np.array([0.5, 0.5]), np.array([-3, 3]), np.array([1, 1]), 200)\n", "plt.hist(y)\n", "plt.show()" ] @@ -57,7 +63,38 @@ "cell_type": "markdown", "metadata": {}, "source": [ - "define the mixture model" + "## The statistical model\n", + "\n", + "We assume the following model\n", + "$$\n", + "y_i | \\tilde{p} \\sim f(\\cdot) = \\int_{R \\times R^+} \\mathcal{N}(\\cdot | \\mu, \\sigma^2) \\tilde{p}(d\\mu, d\\sigma^2)\n", + "$$\n", + "$$\n", + "\\tilde{p} \\sim DP(\\alpha, G_0)\n", + "$$\n", + "where $DP(\\alpha, G_0)$ is the Dirichlet Process with base measure $\\alpha G_0$. \n", + "\n", + "Given the stick-breaking represetation of the Dirichlet Process, the model is equivalently written as\n", + "$$\n", + "y_i | \\{w_h\\}_h \\{(\\mu_h, \\sigma^2_h)\\}_h \\sim f(\\cdot) = \\sum_{h=1}^\\infty w_h \\mathcal{N}(\\cdot | \\mu_h, \\sigma_h^2)\n", + "$$\n", + "$$\n", + "\\{w_h\\}_h \\sim GEM(\\alpha)\n", + "$$\n", + "$$\n", + " \\{(\\mu_h, \\sigma^2_h)\\}_h \\sim G_0\n", + "$$\n", + "\n", + "In pybmix we take advantage of the second representation, and specify a MixtureModel in terms of a Mixing and a Hierarchy. The Mixing is the prior for the weights, while the Hierarchy combines the base measure $G_0$ with the kernel of the mixture (in this case, the univariate Gaussian distribution)" + ] + }, + { + "cell_type": "markdown", + "metadata": {}, + "source": [ + "Here, we assume that $alpha = 5$ and $G_0(d\\mu, d\\sigma^2) = \\mathcal N(d\\mu | \\mu_0, \\lambda \\sigma^2) \\times IG(d\\sigma^2 | a, b)$, i.e., $G_0$ is a normal-inverse gamma distribution. \n", + "\n", + "The parameters $(\\mu_0, \\lambda, a , b)$ of $G_0$ can be set automatically by the method 'make_default_fixed_params' which takes as input the observations and a \"guess\" on the number of clusters" ] }, { @@ -66,7 +103,7 @@ "metadata": {}, "outputs": [], "source": [ - "mixing = DirichletProcessMixing(5)\n", + "mixing = DirichletProcessMixing(total_mass=5)\n", "hierarchy = UnivariateNormal()\n", "hierarchy.make_default_fixed_params(y, 2)\n", "mixture = MixtureModel(mixing, hierarchy)" @@ -76,7 +113,7 @@ "cell_type": "markdown", "metadata": {}, "source": [ - "run mcmc" + "## Run MCMC simulations" ] }, { @@ -85,14 +122,18 @@ "metadata": {}, "outputs": [], "source": [ - "mixture.run_mcmc(y, algorithm=\"BlockedGibbs\", niter=2000, nburn=1000)" + "mixture.run_mcmc(y, algorithm=\"Neal2\", niter=2000, nburn=1000)" ] }, { "cell_type": "markdown", "metadata": {}, "source": [ - "get the density estimates: fix a grid where to estimate the densities; the method 'estimate_density' returns a matrix of shape [niter - nburn, len(grid)]" + "## Get the density estimates\n", + "\n", + "1) fix a grid where to estimate the densities\n", + "\n", + "2) the method 'estimate_density' returns a matrix of shape [niter - nburn, len(grid)]" ] }, { @@ -137,86 +178,6 @@ "plt.legend()\n", "plt.show()" ] - }, - { - "cell_type": "markdown", - "metadata": {}, - "source": [ - "plot the chain of the number of clusters" - ] - }, - { - "cell_type": "code", - "execution_count": null, - "metadata": {}, - "outputs": [], - "source": [ - "mcmc_chain = mixture.get_chain()\n", - "\n", - "# extract the cluster allocations\n", - "cluster_alloc_chain = mcmc_chain.extract(\"cluster_allocs\")\n", - "\n", - "# cluster alloc chain is a matrix of shape [niter - nburn, ndata], we must count at\n", - "# each row the number of unique values\n", - "n_clust_chain = np.apply_along_axis(lambda x: len(np.unique(x)), 1, \n", - " cluster_alloc_chain)\n", - "\n", - "fig, axes = plt.subplots(nrows=1, ncols=2, figsize=(8, 4))\n", - "axes[0].vlines(np.arange(len(n_clust_chain)), n_clust_chain - 0.3, n_clust_chain + 0.3)\n", - "axes[0].set_title(\"Traceplot\")\n", - "\n", - "clusgrid = np.arange(1, 10)\n", - "probas = np.zeros_like(clusgrid)\n", - "for i, c in enumerate(clusgrid):\n", - " probas[i] = np.sum(n_clust_chain == c)\n", - "\n", - "probas = probas / np.sum(probas)\n", - "axes[1].bar(clusgrid, probas)\n", - "axes[1].set_title(\"Posterior number of clusters\")\n", - " \n", - "plt.show()" - ] - }, - { - "cell_type": "markdown", - "metadata": {}, - "source": [ - "Find a point estimate for the clustering" - ] - }, - { - "cell_type": "code", - "execution_count": null, - "metadata": {}, - "outputs": [], - "source": [ - "from pybmix.estimators.cluster_estimator import ClusterEstimator\n", - "\n", - "clus_est = ClusterEstimator(mixture)\n", - "best_clust = clus_est.get_point_estimate()" - ] - }, - { - "cell_type": "code", - "execution_count": null, - "metadata": {}, - "outputs": [], - "source": [ - "plt.hist(y, density=True, alpha=0.3)\n", - "plt.plot(grid, np.mean(densities, axis=0), lw=3, label=\"predictive density\")\n", - "for cluster_idx in clus_est.group_by_cluster(best_clust):\n", - " data = y[cluster_idx]\n", - " plt.scatter(data, np.zeros_like(data) + 5e-3)\n", - " \n", - "plt.show()" - ] - }, - { - "cell_type": "markdown", - "metadata": {}, - "source": [ - "Note how the posterior mode of the number of clusters is 3, but the point estimate for the best clustering consists of 2 clusters" - ] } ], "metadata": { diff --git a/examples/Compare Prior Number of Clusters.ipynb b/docs/examples/prior_elicitation.ipynb similarity index 100% rename from examples/Compare Prior Number of Clusters.ipynb rename to docs/examples/prior_elicitation.ipynb diff --git a/docs/index.rst b/docs/index.rst index 55c7053a..67e7da13 100644 --- a/docs/index.rst +++ b/docs/index.rst @@ -10,7 +10,13 @@ Welcome to pybmix's documentation! :maxdepth: 2 :caption: Contents: +.. toctree:: + :maxdepth: 1 + :caption: Tutorials: + examples/estimate_univ_density.ipynb + examples/clusterng_univ_data.ipynb + examples/prior_elicitation.ipynb Indices and tables ================== diff --git a/docs/source/pybmix.proto.rst b/docs/source/pybmix.proto.rst index 94f8ae4a..a7e44764 100644 --- a/docs/source/pybmix.proto.rst +++ b/docs/source/pybmix.proto.rst @@ -1,5 +1,5 @@ pybmix.proto package -=================== +==================== .. raw:: html :file: protos.html \ No newline at end of file diff --git a/docs/source/pybmix.rst b/docs/source/pybmix.rst index 04d9e32f..d4b003a7 100644 --- a/docs/source/pybmix.rst +++ b/docs/source/pybmix.rst @@ -1,6 +1,7 @@ pybmix package ============== + Subpackages ----------- From ea7ffef415e9ffcecd19837e8c3ebab9e58d18da Mon Sep 17 00:00:00 2001 From: mberaha Date: Mon, 31 May 2021 16:37:16 +0200 Subject: [PATCH 03/57] added example --- docs/examples/clustering_univ_data.ipynb | 219 +++++++++++++++++++++++ 1 file changed, 219 insertions(+) create mode 100644 docs/examples/clustering_univ_data.ipynb diff --git a/docs/examples/clustering_univ_data.ipynb b/docs/examples/clustering_univ_data.ipynb new file mode 100644 index 00000000..503a473f --- /dev/null +++ b/docs/examples/clustering_univ_data.ipynb @@ -0,0 +1,219 @@ +{ + "cells": [ + { + "cell_type": "markdown", + "metadata": {}, + "source": [ + "# Clustering of univariate data via Dirichlet Process Mixture" + ] + }, + { + "cell_type": "markdown", + "metadata": {}, + "source": [ + "this is a continuation of 'estimate_univ_density'. Make sure to check it before going through this tutorial!" + ] + }, + { + "cell_type": "code", + "execution_count": null, + "metadata": {}, + "outputs": [], + "source": [ + "import numpy as np\n", + "import matplotlib.pyplot as plt\n", + "\n", + "from pybmix.core.mixing import DirichletProcessMixing, StickBreakMixing\n", + "from pybmix.core.hierarchy import UnivariateNormal\n", + "from pybmix.core.mixture_model import MixtureModel\n", + "\n", + "np.random.seed(2021)" + ] + }, + { + "cell_type": "markdown", + "metadata": {}, + "source": [ + "## DP and clustering\n", + "\n", + "Recall that $\\tilde p \\sim DP(\\alpha, G_0)$ means that $\\tilde p = \\sum_{h=1}^\\infty w_h \\delta_{\\tau_h}$ with $\\{w_h\\}_h \\sim GEM(\\alpha)$ and $\\{\\tau_h\\}_h \\sim G_0$. Hence, realizations from a DP are almost surely discrete probability measures.\n", + "\n", + "Hence, sampling \n", + "$$\n", + "\\theta_1, \\ldots, \\theta_n | \\tilde{p} \\sim p\n", + "$$\n", + "$$\n", + "\\tilde{p} \\sim DP(\\alpha, G_0)\n", + "$$\n", + "entails that with positive probability $\\theta_i = \\theta_j$ (with $i \\neq j$). In a sample of size $n$ there will be $k \\geq n$ unique values $\\theta^*_1, \\ldots, \\theta^*_k$ among the $\\theta_i$'s and clusters are defined as $C_j = \\{i : \\theta_i = \\theta^*_j \\}.\n", + "\n", + "When considering a mixture model, the $\\theta_i$'s are not observations but latent variables. In the case of a univariate normal mizture models, $\\theta_i = (\\mu_i, \\sigma^2_i)$ and the model can be written as\n", + "$$\n", + " y_i | \\theta_i = (\\mu_i, \\sigma^2_i) \\sim \\mathcal N(\\mu_i, \\sigma^2_i)\n", + "$$\n", + "$$\n", + " \\theta_1, \\ldots, \\theta_n | \\tilde{p} \\sim \\tilde{p}\n", + "$$\n", + "$$\n", + " \\tilde{p} \\sim DP(\\alpha, G_0)\n", + "$$\n", + "and the clustering among the observations $y_i$'s is inherited by the clustering among the $\\theta_i$'s." + ] + }, + { + "cell_type": "markdown", + "metadata": {}, + "source": [ + "Let's go back to the previous example" + ] + }, + { + "cell_type": "code", + "execution_count": null, + "metadata": {}, + "outputs": [], + "source": [ + "def sample_from_mixture(weigths, means, sds, n_data):\n", + " n_comp = len(weigths)\n", + " clus_alloc = np.random.choice(np.arange(n_comp), p=[0.5, 0.5], size=n_data)\n", + " return np.random.normal(loc=means[clus_alloc], scale=sds[clus_alloc])\n", + "\n", + "y = sample_from_mixture(\n", + " np.array([0.5, 0.5]), np.array([-3, 3]), np.array([1, 1]), 200)\n", + "\n", + "mixing = DirichletProcessMixing(total_mass=5)\n", + "hierarchy = UnivariateNormal()\n", + "hierarchy.make_default_fixed_params(y, 2)\n", + "mixture = MixtureModel(mixing, hierarchy)\n", + "\n", + "mixture.run_mcmc(y, algorithm=\"Neal2\", niter=2000, nburn=1000)" + ] + }, + { + "cell_type": "markdown", + "metadata": {}, + "source": [ + "We can extract the cluster allocation MCMC chain very easily" + ] + }, + { + "cell_type": "code", + "execution_count": null, + "metadata": {}, + "outputs": [], + "source": [ + "mcmc_chain = mixture.get_chain()\n", + "cluster_alloc_chain = mcmc_chain.extract(\"cluster_allocs\")\n", + "print(cluster_alloc_chain.shape)" + ] + }, + { + "cell_type": "markdown", + "metadata": {}, + "source": [ + "cluster_alloc_chain is a matrix of shape [niter - nburn, ndata]. \n", + "\n", + "To get the posterior distribution of the number of clusters, we count in each row the number of unique values" + ] + }, + { + "cell_type": "code", + "execution_count": null, + "metadata": {}, + "outputs": [], + "source": [ + "n_clust_chain = np.apply_along_axis(lambda x: len(np.unique(x)), 1, \n", + " cluster_alloc_chain)\n", + "\n", + "fig, axes = plt.subplots(nrows=1, ncols=2, figsize=(8, 4))\n", + "axes[0].vlines(np.arange(len(n_clust_chain)), n_clust_chain - 0.3, n_clust_chain + 0.3)\n", + "axes[0].set_title(\"Traceplot\")\n", + "\n", + "clusgrid = np.arange(1, 10)\n", + "probas = np.zeros_like(clusgrid)\n", + "for i, c in enumerate(clusgrid):\n", + " probas[i] = np.sum(n_clust_chain == c)\n", + "\n", + "probas = probas / np.sum(probas)\n", + "axes[1].bar(clusgrid, probas)\n", + "axes[1].set_xticks(clusgrid)\n", + "axes[1].set_title(\"Posterior number of clusters\")\n", + " \n", + "plt.show()" + ] + }, + { + "cell_type": "markdown", + "metadata": {}, + "source": [ + "Let's inspect two iterations: the first one and the last one, and look at the cluster allocations of the first 5 observations" + ] + }, + { + "cell_type": "code", + "execution_count": null, + "metadata": {}, + "outputs": [], + "source": [ + "print(\"First iteration: \", cluster_alloc_chain[0][:5])\n", + "print(\"Last iteration: \", cluster_alloc_chain[-1][:5])" + ] + }, + { + "cell_type": "markdown", + "metadata": {}, + "source": [ + "Observe that the clustering are identicals: the one is made of observations $\\{1, 2, 5\\}$ and the other cluster of observations $\\{3, 4\\}$. However the labels associated to each cluster are differend depending on the iterations: in the first iteration, $\\{1, 2, 5\\}$ are the first cluster (0th cluster) and $\\{3, 4\\}$ are the second cluster, while in the last iteration the opposite happens.\n", + "\n", + "This is due to the so-called \"label-switching\". Usually to interpret the clustering result, a suitable point-estimate is chosen to minimize a loss function." + ] + }, + { + "cell_type": "code", + "execution_count": null, + "metadata": {}, + "outputs": [], + "source": [ + "from pybmix.estimators.cluster_estimator import ClusterEstimator\n", + "\n", + "clus_est = ClusterEstimator(mixture)\n", + "best_clust = clus_est.get_point_estimate()\n", + "\n", + "plt.hist(y, density=True, alpha=0.3)\n", + "for cluster_idx in clus_est.group_by_cluster(best_clust):\n", + " data = y[cluster_idx]\n", + " plt.scatter(data, np.zeros_like(data) + 5e-3)\n", + " \n", + "plt.show()" + ] + }, + { + "cell_type": "markdown", + "metadata": {}, + "source": [ + "Note how the posterior mode of the number of clusters is 3, but the point estimate for the best clustering consists of 2 clusters" + ] + } + ], + "metadata": { + "kernelspec": { + "display_name": "Python 3", + "language": "python", + "name": "python3" + }, + "language_info": { + "codemirror_mode": { + "name": "ipython", + "version": 3 + }, + "file_extension": ".py", + "mimetype": "text/x-python", + "name": "python", + "nbconvert_exporter": "python", + "pygments_lexer": "ipython3", + "version": "3.8.0" + } + }, + "nbformat": 4, + "nbformat_minor": 4 +} From 55dd3672ead8fd7d2b1506cff22cbad92a2bf24d Mon Sep 17 00:00:00 2001 From: mberaha Date: Mon, 31 May 2021 16:37:40 +0200 Subject: [PATCH 04/57] added requirements for docs --- docs/requirements.txt | 9 +++++++++ 1 file changed, 9 insertions(+) create mode 100644 docs/requirements.txt diff --git a/docs/requirements.txt b/docs/requirements.txt new file mode 100644 index 00000000..dcc9d4b7 --- /dev/null +++ b/docs/requirements.txt @@ -0,0 +1,9 @@ +matplotlib>=3.3.4 +pandoc>=1.0.2 +sphinx>=3.5.1 +nbsphinx>=0.8.0 +recommonmark>=0.7.1 +sphinx_rtd_theme>=0.5.1 +sphinx_autodoc_typehints>=1.11.1 +ipython>=7.20.0 +ipykernel>=5.5.0 From 164aa3c66c3793fe0ed3654abc05c2b85d35a643 Mon Sep 17 00:00:00 2001 From: mberaha Date: Mon, 31 May 2021 16:45:40 +0200 Subject: [PATCH 05/57] updated tutorials --- docs/examples/prior_elicitation.ipynb | 22 ++++++++++++---------- docs/index.rst | 2 +- 2 files changed, 13 insertions(+), 11 deletions(-) diff --git a/docs/examples/prior_elicitation.ipynb b/docs/examples/prior_elicitation.ipynb index 7efa0592..e138a7b8 100644 --- a/docs/examples/prior_elicitation.ipynb +++ b/docs/examples/prior_elicitation.ipynb @@ -1,5 +1,14 @@ { "cells": [ + { + "cell_type": "markdown", + "metadata": {}, + "source": [ + "# Prior elicitation\n", + "\n", + "What is the meaning of the parameters in the various processes? Let's study the prior distribution of the number of clusters!" + ] + }, { "cell_type": "code", "execution_count": null, @@ -69,14 +78,14 @@ "cell_type": "markdown", "metadata": {}, "source": [ - "# Stick Breaking prior" + "## Stick Breaking prior" ] }, { "cell_type": "markdown", "metadata": {}, "source": [ - "## Truncation of a Dirichlet Process" + "### Truncation of a Dirichlet Process" ] }, { @@ -106,7 +115,7 @@ "cell_type": "markdown", "metadata": {}, "source": [ - "## Truncation of a Pitman-Yor Process" + "### Truncation of a Pitman-Yor Process" ] }, { @@ -133,13 +142,6 @@ "axes[1, 1].legend(ncol=2, fontsize=14, bbox_to_anchor=(1.2, -0.1))\n", "plt.show()" ] - }, - { - "cell_type": "code", - "execution_count": null, - "metadata": {}, - "outputs": [], - "source": [] } ], "metadata": { diff --git a/docs/index.rst b/docs/index.rst index 67e7da13..e042170b 100644 --- a/docs/index.rst +++ b/docs/index.rst @@ -15,7 +15,7 @@ Welcome to pybmix's documentation! :caption: Tutorials: examples/estimate_univ_density.ipynb - examples/clusterng_univ_data.ipynb + examples/clustering_univ_data.ipynb examples/prior_elicitation.ipynb Indices and tables From 3abba0afe2d59cc2f4ceb9408fa35819c5ac0b6e Mon Sep 17 00:00:00 2001 From: mberaha Date: Mon, 31 May 2021 17:02:58 +0200 Subject: [PATCH 06/57] moved cmake out of pip install --- .readthedocs.yaml | 5 +++++ docs/conf.py | 3 --- setup.py | 1 - 3 files changed, 5 insertions(+), 4 deletions(-) diff --git a/.readthedocs.yaml b/.readthedocs.yaml index 8f260d8d..9b0649f6 100644 --- a/.readthedocs.yaml +++ b/.readthedocs.yaml @@ -9,6 +9,11 @@ version: 1 sphinx: configuration: docs/conf.py +build: + apt_packages: + - python3-dev + - cmake + # Optionally set the version of Python and requirements required to build your docs python: version: 3.8 diff --git a/docs/conf.py b/docs/conf.py index 90a74f0a..463b60d3 100644 --- a/docs/conf.py +++ b/docs/conf.py @@ -14,9 +14,6 @@ import sys sys.path.insert(0, os.path.abspath('.')) sys.path.insert(0, os.path.abspath('../')) -sys.path.insert(0, os.path.abspath('../examples')) - - # -- Project information ----------------------------------------------------- diff --git a/setup.py b/setup.py index f79d464f..97aec964 100644 --- a/setup.py +++ b/setup.py @@ -173,7 +173,6 @@ def build_extension(self, ext): }, install_requires=[ "2to3", - "cmake", "ninja", "numpy", "scipy", From a4a76cf1e351240c4b497ba2da40b1814f837cb5 Mon Sep 17 00:00:00 2001 From: mberaha Date: Mon, 31 May 2021 17:09:09 +0200 Subject: [PATCH 07/57] moving stuff around --- .readthedocs.yaml | 10 +++++----- 1 file changed, 5 insertions(+), 5 deletions(-) diff --git a/.readthedocs.yaml b/.readthedocs.yaml index 9b0649f6..4a5af7bb 100644 --- a/.readthedocs.yaml +++ b/.readthedocs.yaml @@ -5,15 +5,15 @@ # Required version: 1 -# Build documentation in the docs/ directory with Sphinx -sphinx: - configuration: docs/conf.py - build: apt_packages: - - python3-dev + - libclang - cmake +# Build documentation in the docs/ directory with Sphinx +sphinx: + configuration: docs/conf.py + # Optionally set the version of Python and requirements required to build your docs python: version: 3.8 From a37ae0d059b6ff081689b00c81ec87438507c3c2 Mon Sep 17 00:00:00 2001 From: mberaha Date: Mon, 31 May 2021 17:47:27 +0200 Subject: [PATCH 08/57] updated version --- .readthedocs.yaml | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) diff --git a/.readthedocs.yaml b/.readthedocs.yaml index 4a5af7bb..57a246de 100644 --- a/.readthedocs.yaml +++ b/.readthedocs.yaml @@ -3,13 +3,13 @@ # See https://docs.readthedocs.io/en/stable/config-file/v2.html for details # Required -version: 1 +version: 2 build: apt_packages: - libclang - cmake - + # Build documentation in the docs/ directory with Sphinx sphinx: configuration: docs/conf.py From e9915c6fbf4f430bf6a74164980266295ea2e1a8 Mon Sep 17 00:00:00 2001 From: mberaha Date: Mon, 31 May 2021 17:48:45 +0200 Subject: [PATCH 09/57] updated version --- .readthedocs.yaml | 3 +-- 1 file changed, 1 insertion(+), 2 deletions(-) diff --git a/.readthedocs.yaml b/.readthedocs.yaml index 57a246de..ef56e692 100644 --- a/.readthedocs.yaml +++ b/.readthedocs.yaml @@ -7,9 +7,8 @@ version: 2 build: apt_packages: - - libclang - cmake - + # Build documentation in the docs/ directory with Sphinx sphinx: configuration: docs/conf.py From 2b4f06581f05c3c85031960fa855f3a364b90bd5 Mon Sep 17 00:00:00 2001 From: mberaha Date: Mon, 31 May 2021 17:53:50 +0200 Subject: [PATCH 10/57] added docs requirements in main setup --- setup.py | 18 +++++++++++------- 1 file changed, 11 insertions(+), 7 deletions(-) diff --git a/setup.py b/setup.py index 97aec964..7212b118 100644 --- a/setup.py +++ b/setup.py @@ -155,6 +155,16 @@ def build_extension(self, ext): if __name__ == "__main__": + folder = os.path.dirname(__file__) + + install_requires = ["2to3", "ninja", "numpy", "scipy", "protobuf==3.14.0"] + + # with open(os.path.join(folder, 'requirements.txt')) as fp: + # install_requires.extend([line.strip() for line in fp]) + + with open(os.path.join(folder, "docs", 'requirements.txt')) as fp: + install_requires.extend([line.strip() for line in fp]) + # Build tbb before setup if needed maybe_build_tbb() @@ -171,12 +181,6 @@ def build_extension(self, ext): "clean": clean, "build_ext": CMakeBuild, }, - install_requires=[ - "2to3", - "ninja", - "numpy", - "scipy", - "protobuf==3.14.0" - ], + install_requires=install_requires, zip_safe=False, ) From cec9b235a41aae9b49a6bf187bdf28e52a1b2952 Mon Sep 17 00:00:00 2001 From: mberaha Date: Mon, 31 May 2021 17:58:30 +0200 Subject: [PATCH 11/57] tracking requirements --- .readthedocs.yaml | 2 ++ 1 file changed, 2 insertions(+) diff --git a/.readthedocs.yaml b/.readthedocs.yaml index ef56e692..c3de0c38 100644 --- a/.readthedocs.yaml +++ b/.readthedocs.yaml @@ -16,6 +16,8 @@ sphinx: # Optionally set the version of Python and requirements required to build your docs python: version: 3.8 + install: + - requirements: docs/requirements.txt submodules: exclude: all From d9f9f6d7f1cdaf29acc370de4a282779be931079 Mon Sep 17 00:00:00 2001 From: mberaha Date: Tue, 1 Jun 2021 08:01:40 +0200 Subject: [PATCH 12/57] maybe building --- .readthedocs.yaml | 2 ++ 1 file changed, 2 insertions(+) diff --git a/.readthedocs.yaml b/.readthedocs.yaml index c3de0c38..2cbdf900 100644 --- a/.readthedocs.yaml +++ b/.readthedocs.yaml @@ -18,6 +18,8 @@ python: version: 3.8 install: - requirements: docs/requirements.txt + - method: setuptools + - path: . submodules: exclude: all From f8a8cb092921f6f731adc3cf01553c5b23a9d5f3 Mon Sep 17 00:00:00 2001 From: mberaha Date: Tue, 1 Jun 2021 08:02:23 +0200 Subject: [PATCH 13/57] maybe building --- .readthedocs.yaml | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/.readthedocs.yaml b/.readthedocs.yaml index 2cbdf900..628d3350 100644 --- a/.readthedocs.yaml +++ b/.readthedocs.yaml @@ -19,7 +19,7 @@ python: install: - requirements: docs/requirements.txt - method: setuptools - - path: . + path: . submodules: exclude: all From 04c125dafc7968068f310b71b275d27f9c5022e7 Mon Sep 17 00:00:00 2001 From: mberaha Date: Tue, 1 Jun 2021 08:06:03 +0200 Subject: [PATCH 14/57] build_tbb --- setup.py | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/setup.py b/setup.py index 7212b118..2685c03f 100644 --- a/setup.py +++ b/setup.py @@ -12,7 +12,7 @@ from setuptools.command.egg_info import egg_info as _egg_info from distutils.command.install import install as _install -sys.path.append("pybmix/core/pybmixcpp/bayesmix") +sys.path.append(os.path.join[os.path.abspath('.'), "pybmix/core/pybmixcpp/bayesmix"]) from build_tbb import maybe_build_tbb From a19fcde3af34a0acb2056737a89cb06774858a8d Mon Sep 17 00:00:00 2001 From: mberaha Date: Tue, 1 Jun 2021 08:08:09 +0200 Subject: [PATCH 15/57] using abs path --- setup.py | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/setup.py b/setup.py index 2685c03f..9cec19f8 100644 --- a/setup.py +++ b/setup.py @@ -12,7 +12,7 @@ from setuptools.command.egg_info import egg_info as _egg_info from distutils.command.install import install as _install -sys.path.append(os.path.join[os.path.abspath('.'), "pybmix/core/pybmixcpp/bayesmix"]) +sys.path.append(os.path.join(os.path.abspath('.'), "pybmix/core/pybmixcpp/bayesmix")) from build_tbb import maybe_build_tbb From 7fdd1d277d96ba9697c6cfb0c696590865894a13 Mon Sep 17 00:00:00 2001 From: mberaha Date: Tue, 1 Jun 2021 08:15:41 +0200 Subject: [PATCH 16/57] installing with pip --- .readthedocs.yaml | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/.readthedocs.yaml b/.readthedocs.yaml index 628d3350..bda996b1 100644 --- a/.readthedocs.yaml +++ b/.readthedocs.yaml @@ -18,7 +18,7 @@ python: version: 3.8 install: - requirements: docs/requirements.txt - - method: setuptools + - method: pip path: . submodules: From 2733c70554094aa3ea533623da3294b4d473236d Mon Sep 17 00:00:00 2001 From: mberaha Date: Tue, 1 Jun 2021 08:18:55 +0200 Subject: [PATCH 17/57] insert instead of append? --- setup.py | 3 ++- 1 file changed, 2 insertions(+), 1 deletion(-) diff --git a/setup.py b/setup.py index 9cec19f8..f8ba4b26 100644 --- a/setup.py +++ b/setup.py @@ -12,7 +12,8 @@ from setuptools.command.egg_info import egg_info as _egg_info from distutils.command.install import install as _install -sys.path.append(os.path.join(os.path.abspath('.'), "pybmix/core/pybmixcpp/bayesmix")) +sys.path.insert(0, os.path.join(os.path.abspath('.'), + "pybmix/core/pybmixcpp/bayesmix")) from build_tbb import maybe_build_tbb From 5d10ab76f96bc25dfd9e4fea084d1f4afb79f843 Mon Sep 17 00:00:00 2001 From: mberaha Date: Tue, 1 Jun 2021 08:45:11 +0200 Subject: [PATCH 18/57] fixed install issue --- CMakeLists.txt | 8 +++++++- 1 file changed, 7 insertions(+), 1 deletion(-) diff --git a/CMakeLists.txt b/CMakeLists.txt index d031e18f..5931dad7 100644 --- a/CMakeLists.txt +++ b/CMakeLists.txt @@ -44,9 +44,15 @@ foreach(PROTO_FILE IN LISTS ProtoFiles) list(APPEND PROTO_PYS ${PROTO_PY}) endforeach() +set(PYBMIX_LINK_LIBRARIES + pthread + protobuf::libprotobuf + tbb +) + target_include_directories(pybmixcpp PUBLIC ${BAYESMIX_INCLUDE_PATHS}) target_compile_definitions(pybmixcpp PRIVATE VERSION_INFO=${EXAMPLE_VERSION_INFO}) -target_link_libraries(pybmixcpp PUBLIC bayesmixlib ${BAYESMIX_LINK_LIBRARIES}) +target_link_libraries(pybmixcpp PUBLIC "-L${CMAKE_CURRENT_LIST_DIR}/pybmix/core/pybmixcpp/bayesmix/lib/math/lib/tbb/" bayesmixlib ${PYBMIX_LINK_LIBRARIES}) target_compile_options(pybmixcpp PUBLIC ${BAYESMIX_COMPILE_OPTIONS}) add_custom_target(genterate_protos ALL DEPENDS ${PROTO_PYS}) From dc001ca8cd4b88206a8c81ce50330c2eae85e49b Mon Sep 17 00:00:00 2001 From: mberaha Date: Tue, 1 Jun 2021 08:48:27 +0200 Subject: [PATCH 19/57] debug print --- setup.py | 1 + 1 file changed, 1 insertion(+) diff --git a/setup.py b/setup.py index f8ba4b26..216cd3bf 100644 --- a/setup.py +++ b/setup.py @@ -14,6 +14,7 @@ sys.path.insert(0, os.path.join(os.path.abspath('.'), "pybmix/core/pybmixcpp/bayesmix")) +print(sys.path) from build_tbb import maybe_build_tbb From 674a553547062bc9fd6ba8d115327676c3b723bf Mon Sep 17 00:00:00 2001 From: mberaha Date: Tue, 1 Jun 2021 08:54:36 +0200 Subject: [PATCH 20/57] adding path in conf.py --- docs/conf.py | 3 +++ 1 file changed, 3 insertions(+) diff --git a/docs/conf.py b/docs/conf.py index 463b60d3..1350200a 100644 --- a/docs/conf.py +++ b/docs/conf.py @@ -14,6 +14,9 @@ import sys sys.path.insert(0, os.path.abspath('.')) sys.path.insert(0, os.path.abspath('../')) +sys.path.insert(0, os.path.join(os.path.abspath('../'), + "pybmix/core/pybmixcpp/bayesmix")) + # -- Project information ----------------------------------------------------- From 47e3b8186692a4033ccf7233285f42947bed7c44 Mon Sep 17 00:00:00 2001 From: mberaha Date: Tue, 1 Jun 2021 08:56:25 +0200 Subject: [PATCH 21/57] copy build_tbb file --- build_tbb.py | 62 ++++++++++++++++++++++++++++++++++++++++++++++++++++ 1 file changed, 62 insertions(+) create mode 100644 build_tbb.py diff --git a/build_tbb.py b/build_tbb.py new file mode 100644 index 00000000..fdd51ffa --- /dev/null +++ b/build_tbb.py @@ -0,0 +1,62 @@ +import os +import platform +import shutil +import subprocess + +# If on Linux and this script gives a "busy file" error, please run +# bash/cleanup_tbb.sh + +def maybe_build_tbb(): + """Build tbb. This function is taken from + https://github.com/stan-dev/pystan/blob/develop/setup.py""" + + stan_math_lib = os.path.abspath(os.path.join(os.path.dirname( + __file__), 'lib', 'math', 'lib')) + + tbb_dir = os.path.join(stan_math_lib, 'tbb') + tbb_dir = os.path.abspath(tbb_dir) + if os.path.exists(tbb_dir): + return + + make = 'make' if platform.system() != 'Windows' else 'mingw32-make' + cmd = [make] + + tbb_root = os.path.join(stan_math_lib, 'tbb_2019_U8').replace("\\", "/") + + cmd.extend(['-C', tbb_root]) + cmd.append('tbb_build_dir={}'.format(stan_math_lib)) + cmd.append('tbb_build_prefix=tbb') + cmd.append('tbb_root={}'.format(tbb_root)) + + cmd.append('stdver=c++14') + + cmd.append('compiler=gcc') + + cwd = os.path.abspath(os.path.dirname(__file__)) + + subprocess.check_call(cmd, cwd=cwd) + + tbb_debug = os.path.join(stan_math_lib, "tbb_debug") + tbb_release = os.path.join(stan_math_lib, "tbb_release") + tbb_dir = os.path.join(stan_math_lib, "tbb") + + if not os.path.exists(tbb_dir): + os.makedirs(tbb_dir) + + if os.path.exists(tbb_debug): + shutil.rmtree(tbb_debug) + + shutil.move(os.path.join(tbb_root, 'include'), tbb_dir) + shutil.rmtree(tbb_root) + + for name in os.listdir(tbb_release): + srcname = os.path.join(tbb_release, name) + dstname = os.path.join(tbb_dir, name) + shutil.move(srcname, dstname) + + if os.path.exists(tbb_release): + shutil.rmtree(tbb_release) + + +if __name__ == "__main__": + maybe_build_tbb() From 245a56043909b15673fbb70ffe27814074c77f3e Mon Sep 17 00:00:00 2001 From: mberaha Date: Tue, 1 Jun 2021 08:59:56 +0200 Subject: [PATCH 22/57] update apt requirements --- .readthedocs.yaml | 2 ++ 1 file changed, 2 insertions(+) diff --git a/.readthedocs.yaml b/.readthedocs.yaml index bda996b1..238967ba 100644 --- a/.readthedocs.yaml +++ b/.readthedocs.yaml @@ -8,6 +8,8 @@ version: 2 build: apt_packages: - cmake + - build-essential + - g++ # Build documentation in the docs/ directory with Sphinx sphinx: From 90c0e55bb21f1c2fafb38d5b5338d55af36b8382 Mon Sep 17 00:00:00 2001 From: mberaha Date: Tue, 1 Jun 2021 09:16:50 +0200 Subject: [PATCH 23/57] updated path --- build_tbb.py | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/build_tbb.py b/build_tbb.py index fdd51ffa..1c4329d5 100644 --- a/build_tbb.py +++ b/build_tbb.py @@ -11,7 +11,7 @@ def maybe_build_tbb(): https://github.com/stan-dev/pystan/blob/develop/setup.py""" stan_math_lib = os.path.abspath(os.path.join(os.path.dirname( - __file__), 'lib', 'math', 'lib')) + __file__), 'pybmix', 'core', 'pybmixcpp', 'bayesmix' 'lib', 'math', 'lib')) tbb_dir = os.path.join(stan_math_lib, 'tbb') tbb_dir = os.path.abspath(tbb_dir) From 8030ea7a74bd41c13390c8ffda01506e8b821431 Mon Sep 17 00:00:00 2001 From: mberaha Date: Tue, 1 Jun 2021 09:22:41 +0200 Subject: [PATCH 24/57] debug string --- build_tbb.py | 3 +++ 1 file changed, 3 insertions(+) diff --git a/build_tbb.py b/build_tbb.py index 1c4329d5..f692f524 100644 --- a/build_tbb.py +++ b/build_tbb.py @@ -23,6 +23,9 @@ def maybe_build_tbb(): tbb_root = os.path.join(stan_math_lib, 'tbb_2019_U8').replace("\\", "/") + import glob + print("\n".join(glob.glob(tbb_root + "/*"))) + cmd.extend(['-C', tbb_root]) cmd.append('tbb_build_dir={}'.format(stan_math_lib)) cmd.append('tbb_build_prefix=tbb') From 776bf781448f147b563a10ee37a43a25f953798c Mon Sep 17 00:00:00 2001 From: mberaha Date: Tue, 1 Jun 2021 09:43:18 +0200 Subject: [PATCH 25/57] debug string --- build_tbb.py | 1 + 1 file changed, 1 insertion(+) diff --git a/build_tbb.py b/build_tbb.py index f692f524..f745927a 100644 --- a/build_tbb.py +++ b/build_tbb.py @@ -24,6 +24,7 @@ def maybe_build_tbb(): tbb_root = os.path.join(stan_math_lib, 'tbb_2019_U8').replace("\\", "/") import glob + print("******** LET'S SEE WHAT'S INSIDE ********") print("\n".join(glob.glob(tbb_root + "/*"))) cmd.extend(['-C', tbb_root]) From 644c4761f3c4f51f7db761dec994e5af8449c75c Mon Sep 17 00:00:00 2001 From: mberaha Date: Tue, 1 Jun 2021 09:45:37 +0200 Subject: [PATCH 26/57] cloning submodules recursively --- .readthedocs.yaml | 3 ++- 1 file changed, 2 insertions(+), 1 deletion(-) diff --git a/.readthedocs.yaml b/.readthedocs.yaml index 238967ba..3595364e 100644 --- a/.readthedocs.yaml +++ b/.readthedocs.yaml @@ -24,4 +24,5 @@ python: path: . submodules: - exclude: all + include: all + recursive: true From aa0c4972cf67d7d880db5350e2c3cd22e6934861 Mon Sep 17 00:00:00 2001 From: mberaha Date: Tue, 1 Jun 2021 09:51:49 +0200 Subject: [PATCH 27/57] updated cmake version --- .readthedocs.yaml | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/.readthedocs.yaml b/.readthedocs.yaml index 3595364e..6a3c53b1 100644 --- a/.readthedocs.yaml +++ b/.readthedocs.yaml @@ -7,7 +7,7 @@ version: 2 build: apt_packages: - - cmake + - cmake=3.18.4 - build-essential - g++ From 9c3f10e659b704f9952b975ddd4139ce53d19631 Mon Sep 17 00:00:00 2001 From: mberaha Date: Tue, 1 Jun 2021 10:03:44 +0200 Subject: [PATCH 28/57] updating cmake via pip --- .readthedocs.yaml | 2 +- docs/requirements.txt | 1 + 2 files changed, 2 insertions(+), 1 deletion(-) diff --git a/.readthedocs.yaml b/.readthedocs.yaml index 6a3c53b1..3595364e 100644 --- a/.readthedocs.yaml +++ b/.readthedocs.yaml @@ -7,7 +7,7 @@ version: 2 build: apt_packages: - - cmake=3.18.4 + - cmake - build-essential - g++ diff --git a/docs/requirements.txt b/docs/requirements.txt index dcc9d4b7..4243dfd4 100644 --- a/docs/requirements.txt +++ b/docs/requirements.txt @@ -7,3 +7,4 @@ sphinx_rtd_theme>=0.5.1 sphinx_autodoc_typehints>=1.11.1 ipython>=7.20.0 ipykernel>=5.5.0 +cmake>=.15.0 From 8776d43077fae486f9e2da73ffb2a41201417c87 Mon Sep 17 00:00:00 2001 From: mberaha Date: Tue, 1 Jun 2021 10:05:49 +0200 Subject: [PATCH 29/57] updating cmake via pip --- docs/requirements.txt | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/docs/requirements.txt b/docs/requirements.txt index 4243dfd4..bc52bed8 100644 --- a/docs/requirements.txt +++ b/docs/requirements.txt @@ -7,4 +7,4 @@ sphinx_rtd_theme>=0.5.1 sphinx_autodoc_typehints>=1.11.1 ipython>=7.20.0 ipykernel>=5.5.0 -cmake>=.15.0 +cmake>=3.15.0 From 3422a6b617b1ab050cd0b7ecf545a16d44212ada Mon Sep 17 00:00:00 2001 From: mberaha Date: Tue, 1 Jun 2021 10:18:48 +0200 Subject: [PATCH 30/57] building tbb only via bayesmix --- CMakeLists.txt | 2 +- setup.py | 2 +- 2 files changed, 2 insertions(+), 2 deletions(-) diff --git a/CMakeLists.txt b/CMakeLists.txt index 5931dad7..1cddc58a 100644 --- a/CMakeLists.txt +++ b/CMakeLists.txt @@ -52,7 +52,7 @@ set(PYBMIX_LINK_LIBRARIES target_include_directories(pybmixcpp PUBLIC ${BAYESMIX_INCLUDE_PATHS}) target_compile_definitions(pybmixcpp PRIVATE VERSION_INFO=${EXAMPLE_VERSION_INFO}) -target_link_libraries(pybmixcpp PUBLIC "-L${CMAKE_CURRENT_LIST_DIR}/pybmix/core/pybmixcpp/bayesmix/lib/math/lib/tbb/" bayesmixlib ${PYBMIX_LINK_LIBRARIES}) +target_link_libraries(pybmixcpp PUBLIC bayesmixlib ${BAYESMIX_LINK_LIBRARIES}) target_compile_options(pybmixcpp PUBLIC ${BAYESMIX_COMPILE_OPTIONS}) add_custom_target(genterate_protos ALL DEPENDS ${PROTO_PYS}) diff --git a/setup.py b/setup.py index 216cd3bf..5113c6bf 100644 --- a/setup.py +++ b/setup.py @@ -168,7 +168,7 @@ def build_extension(self, ext): install_requires.extend([line.strip() for line in fp]) # Build tbb before setup if needed - maybe_build_tbb() + # maybe_build_tbb() setup( name="pybmix", From 05ce4d8ffb32ac7376f0cd4ec9e0b7f3eb6a4ab8 Mon Sep 17 00:00:00 2001 From: mberaha Date: Tue, 1 Jun 2021 10:27:57 +0200 Subject: [PATCH 31/57] debug string --- build_tbb.py | 2 ++ setup.py | 2 +- 2 files changed, 3 insertions(+), 1 deletion(-) diff --git a/build_tbb.py b/build_tbb.py index f745927a..bcd53223 100644 --- a/build_tbb.py +++ b/build_tbb.py @@ -9,6 +9,8 @@ def maybe_build_tbb(): """Build tbb. This function is taken from https://github.com/stan-dev/pystan/blob/develop/setup.py""" + + print("******* BUILD TBB ***********") stan_math_lib = os.path.abspath(os.path.join(os.path.dirname( __file__), 'pybmix', 'core', 'pybmixcpp', 'bayesmix' 'lib', 'math', 'lib')) diff --git a/setup.py b/setup.py index 5113c6bf..216cd3bf 100644 --- a/setup.py +++ b/setup.py @@ -168,7 +168,7 @@ def build_extension(self, ext): install_requires.extend([line.strip() for line in fp]) # Build tbb before setup if needed - # maybe_build_tbb() + maybe_build_tbb() setup( name="pybmix", From f5466dc46c60b2d5723ea76abedb33e36d290940 Mon Sep 17 00:00:00 2001 From: mberaha Date: Tue, 1 Jun 2021 10:42:17 +0200 Subject: [PATCH 32/57] hacking around --- setup.py | 4 +++- 1 file changed, 3 insertions(+), 1 deletion(-) diff --git a/setup.py b/setup.py index 216cd3bf..959b1cdb 100644 --- a/setup.py +++ b/setup.py @@ -66,7 +66,9 @@ def run(self): except OSError: msg = "CMake missing - probably upgrade to a newer version of Pip?" raise RuntimeError(msg) - + + maybe_build_tbb() + # To support Python 2, we have to avoid super(), since distutils is all # old-style classes. build_ext.run(self) From 4112a5ea52d84a9ee0315f4967bfb01d3cea7417 Mon Sep 17 00:00:00 2001 From: mberaha Date: Tue, 1 Jun 2021 12:09:47 +0200 Subject: [PATCH 33/57] hacking around --- CMakeLists.txt | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/CMakeLists.txt b/CMakeLists.txt index 1cddc58a..5931dad7 100644 --- a/CMakeLists.txt +++ b/CMakeLists.txt @@ -52,7 +52,7 @@ set(PYBMIX_LINK_LIBRARIES target_include_directories(pybmixcpp PUBLIC ${BAYESMIX_INCLUDE_PATHS}) target_compile_definitions(pybmixcpp PRIVATE VERSION_INFO=${EXAMPLE_VERSION_INFO}) -target_link_libraries(pybmixcpp PUBLIC bayesmixlib ${BAYESMIX_LINK_LIBRARIES}) +target_link_libraries(pybmixcpp PUBLIC "-L${CMAKE_CURRENT_LIST_DIR}/pybmix/core/pybmixcpp/bayesmix/lib/math/lib/tbb/" bayesmixlib ${PYBMIX_LINK_LIBRARIES}) target_compile_options(pybmixcpp PUBLIC ${BAYESMIX_COMPILE_OPTIONS}) add_custom_target(genterate_protos ALL DEPENDS ${PROTO_PYS}) From cbf38a82f108a142359afb28d869022953efeb08 Mon Sep 17 00:00:00 2001 From: mberaha Date: Tue, 1 Jun 2021 12:28:54 +0200 Subject: [PATCH 34/57] changed bayesmix branch --- pybmix/core/pybmixcpp/bayesmix | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/pybmix/core/pybmixcpp/bayesmix b/pybmix/core/pybmixcpp/bayesmix index 8baf9823..c715806a 160000 --- a/pybmix/core/pybmixcpp/bayesmix +++ b/pybmix/core/pybmixcpp/bayesmix @@ -1 +1 @@ -Subproject commit 8baf98232123cd6850f5ad60fd667514cf3aa914 +Subproject commit c715806ad92f7d18ca0e17083f5cf21999f79f1d From 5709531e3ef9a399136681ee17befb37c95f4b25 Mon Sep 17 00:00:00 2001 From: mberaha Date: Tue, 1 Jun 2021 12:31:00 +0200 Subject: [PATCH 35/57] updated bayesmix --- pybmix/core/pybmixcpp/bayesmix | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/pybmix/core/pybmixcpp/bayesmix b/pybmix/core/pybmixcpp/bayesmix index c715806a..5e59ec26 160000 --- a/pybmix/core/pybmixcpp/bayesmix +++ b/pybmix/core/pybmixcpp/bayesmix @@ -1 +1 @@ -Subproject commit c715806ad92f7d18ca0e17083f5cf21999f79f1d +Subproject commit 5e59ec26b794a140b8199d85b822b5bc27129fb4 From 2de50dc97cdf074b02f0718d2cc0050fc24e0202 Mon Sep 17 00:00:00 2001 From: mberaha Date: Tue, 1 Jun 2021 15:03:47 +0200 Subject: [PATCH 36/57] update bayesmix --- pybmix/core/pybmixcpp/bayesmix | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/pybmix/core/pybmixcpp/bayesmix b/pybmix/core/pybmixcpp/bayesmix index 5e59ec26..f5908666 160000 --- a/pybmix/core/pybmixcpp/bayesmix +++ b/pybmix/core/pybmixcpp/bayesmix @@ -1 +1 @@ -Subproject commit 5e59ec26b794a140b8199d85b822b5bc27129fb4 +Subproject commit f5908666245273d319031ac34f7116579925f5c2 From 1aaa1fb003aa5ed2d355cf8d5ace896c1d8435c6 Mon Sep 17 00:00:00 2001 From: mberaha Date: Tue, 1 Jun 2021 16:23:01 +0200 Subject: [PATCH 37/57] update bayesmix --- pybmix/core/pybmixcpp/bayesmix | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/pybmix/core/pybmixcpp/bayesmix b/pybmix/core/pybmixcpp/bayesmix index f5908666..2737b9b2 160000 --- a/pybmix/core/pybmixcpp/bayesmix +++ b/pybmix/core/pybmixcpp/bayesmix @@ -1 +1 @@ -Subproject commit f5908666245273d319031ac34f7116579925f5c2 +Subproject commit 2737b9b2507ecadefe00b494073f274264c15e6d From 53c77074353505b8b02175949e7705d4a8cecc66 Mon Sep 17 00:00:00 2001 From: mberaha Date: Tue, 1 Jun 2021 16:28:04 +0200 Subject: [PATCH 38/57] update bayesmix --- pybmix/core/pybmixcpp/bayesmix | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/pybmix/core/pybmixcpp/bayesmix b/pybmix/core/pybmixcpp/bayesmix index 2737b9b2..5c742f27 160000 --- a/pybmix/core/pybmixcpp/bayesmix +++ b/pybmix/core/pybmixcpp/bayesmix @@ -1 +1 @@ -Subproject commit 2737b9b2507ecadefe00b494073f274264c15e6d +Subproject commit 5c742f27a66af71df0998a5e3447f67877819277 From 5bc9ebd986a754cbfe8feafb4bad3342ede811a6 Mon Sep 17 00:00:00 2001 From: mberaha Date: Tue, 1 Jun 2021 16:43:32 +0200 Subject: [PATCH 39/57] update bayesmix --- pybmix/core/pybmixcpp/bayesmix | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/pybmix/core/pybmixcpp/bayesmix b/pybmix/core/pybmixcpp/bayesmix index 5c742f27..ad01d517 160000 --- a/pybmix/core/pybmixcpp/bayesmix +++ b/pybmix/core/pybmixcpp/bayesmix @@ -1 +1 @@ -Subproject commit 5c742f27a66af71df0998a5e3447f67877819277 +Subproject commit ad01d51737aaa0f911d3faaab2cf363751e545d5 From f2e890078adcf87afe27c8efdfbd6f58673f31e7 Mon Sep 17 00:00:00 2001 From: mberaha Date: Tue, 1 Jun 2021 17:11:48 +0200 Subject: [PATCH 40/57] using bayesmix link libraries --- CMakeLists.txt | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/CMakeLists.txt b/CMakeLists.txt index 5931dad7..1cddc58a 100644 --- a/CMakeLists.txt +++ b/CMakeLists.txt @@ -52,7 +52,7 @@ set(PYBMIX_LINK_LIBRARIES target_include_directories(pybmixcpp PUBLIC ${BAYESMIX_INCLUDE_PATHS}) target_compile_definitions(pybmixcpp PRIVATE VERSION_INFO=${EXAMPLE_VERSION_INFO}) -target_link_libraries(pybmixcpp PUBLIC "-L${CMAKE_CURRENT_LIST_DIR}/pybmix/core/pybmixcpp/bayesmix/lib/math/lib/tbb/" bayesmixlib ${PYBMIX_LINK_LIBRARIES}) +target_link_libraries(pybmixcpp PUBLIC bayesmixlib ${BAYESMIX_LINK_LIBRARIES}) target_compile_options(pybmixcpp PUBLIC ${BAYESMIX_COMPILE_OPTIONS}) add_custom_target(genterate_protos ALL DEPENDS ${PROTO_PYS}) From 60bcebb02f157bbb4f58942e7d67514aed497a68 Mon Sep 17 00:00:00 2001 From: mberaha Date: Tue, 1 Jun 2021 17:19:37 +0200 Subject: [PATCH 41/57] linking libtbb.so.2 --- CMakeLists.txt | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) diff --git a/CMakeLists.txt b/CMakeLists.txt index 1cddc58a..f36883af 100644 --- a/CMakeLists.txt +++ b/CMakeLists.txt @@ -47,12 +47,12 @@ endforeach() set(PYBMIX_LINK_LIBRARIES pthread protobuf::libprotobuf - tbb + ${CMAKE_CURRENT_LIST_DIR}/pybmix/core/pybmixcpp/bayesmix/lib/math/lib/tbb/libtbb.so.2 ) target_include_directories(pybmixcpp PUBLIC ${BAYESMIX_INCLUDE_PATHS}) target_compile_definitions(pybmixcpp PRIVATE VERSION_INFO=${EXAMPLE_VERSION_INFO}) -target_link_libraries(pybmixcpp PUBLIC bayesmixlib ${BAYESMIX_LINK_LIBRARIES}) +target_link_libraries(pybmixcpp PUBLIC bayesmixlib ${PYBMIX_LINK_LIBRARIES}) target_compile_options(pybmixcpp PUBLIC ${BAYESMIX_COMPILE_OPTIONS}) add_custom_target(genterate_protos ALL DEPENDS ${PROTO_PYS}) From 0d086bbb55f1140f0be0a17ff5226b40b59c9b85 Mon Sep 17 00:00:00 2001 From: mberaha Date: Tue, 1 Jun 2021 17:54:52 +0200 Subject: [PATCH 42/57] updated requirements and test notebook --- docs/index.rst | 4 +--- docs/requirements.txt | 3 +++ 2 files changed, 4 insertions(+), 3 deletions(-) diff --git a/docs/index.rst b/docs/index.rst index e042170b..c7179f49 100644 --- a/docs/index.rst +++ b/docs/index.rst @@ -14,9 +14,7 @@ Welcome to pybmix's documentation! :maxdepth: 1 :caption: Tutorials: - examples/estimate_univ_density.ipynb - examples/clustering_univ_data.ipynb - examples/prior_elicitation.ipynb + examples/test.ipynb Indices and tables ================== diff --git a/docs/requirements.txt b/docs/requirements.txt index bc52bed8..4e6c7425 100644 --- a/docs/requirements.txt +++ b/docs/requirements.txt @@ -1,3 +1,5 @@ +numpy>=1.18.0 +joblib>=0.14.1 matplotlib>=3.3.4 pandoc>=1.0.2 sphinx>=3.5.1 @@ -8,3 +10,4 @@ sphinx_autodoc_typehints>=1.11.1 ipython>=7.20.0 ipykernel>=5.5.0 cmake>=3.15.0 + From f31372ec9914563950fe707d4352a706510c28b9 Mon Sep 17 00:00:00 2001 From: mberaha Date: Tue, 1 Jun 2021 18:08:10 +0200 Subject: [PATCH 43/57] removed autosummary --- docs/conf.py | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/docs/conf.py b/docs/conf.py index 1350200a..a342eff4 100644 --- a/docs/conf.py +++ b/docs/conf.py @@ -36,7 +36,7 @@ # ones. extensions = [ 'sphinx.ext.autodoc', - 'sphinx.ext.autosummary', + # 'sphinx.ext.autosummary', 'sphinx.ext.intersphinx', 'sphinx.ext.mathjax', 'sphinx.ext.napoleon', From acd0d64a2999d4945f2585ff6104fd5ea0e87af4 Mon Sep 17 00:00:00 2001 From: mberaha Date: Wed, 2 Jun 2021 08:39:16 +0200 Subject: [PATCH 44/57] only test notebook --- docs/examples/clustering_univ_data.ipynb | 219 ---------------------- docs/examples/estimate_univ_density.ipynb | 204 -------------------- docs/examples/prior_elicitation.ipynb | 168 ----------------- docs/examples/test.ipynb | 58 ++++++ 4 files changed, 58 insertions(+), 591 deletions(-) delete mode 100644 docs/examples/clustering_univ_data.ipynb delete mode 100644 docs/examples/estimate_univ_density.ipynb delete mode 100644 docs/examples/prior_elicitation.ipynb create mode 100644 docs/examples/test.ipynb diff --git a/docs/examples/clustering_univ_data.ipynb b/docs/examples/clustering_univ_data.ipynb deleted file mode 100644 index 503a473f..00000000 --- a/docs/examples/clustering_univ_data.ipynb +++ /dev/null @@ -1,219 +0,0 @@ -{ - "cells": [ - { - "cell_type": "markdown", - "metadata": {}, - "source": [ - "# Clustering of univariate data via Dirichlet Process Mixture" - ] - }, - { - "cell_type": "markdown", - "metadata": {}, - "source": [ - "this is a continuation of 'estimate_univ_density'. Make sure to check it before going through this tutorial!" - ] - }, - { - "cell_type": "code", - "execution_count": null, - "metadata": {}, - "outputs": [], - "source": [ - "import numpy as np\n", - "import matplotlib.pyplot as plt\n", - "\n", - "from pybmix.core.mixing import DirichletProcessMixing, StickBreakMixing\n", - "from pybmix.core.hierarchy import UnivariateNormal\n", - "from pybmix.core.mixture_model import MixtureModel\n", - "\n", - "np.random.seed(2021)" - ] - }, - { - "cell_type": "markdown", - "metadata": {}, - "source": [ - "## DP and clustering\n", - "\n", - "Recall that $\\tilde p \\sim DP(\\alpha, G_0)$ means that $\\tilde p = \\sum_{h=1}^\\infty w_h \\delta_{\\tau_h}$ with $\\{w_h\\}_h \\sim GEM(\\alpha)$ and $\\{\\tau_h\\}_h \\sim G_0$. Hence, realizations from a DP are almost surely discrete probability measures.\n", - "\n", - "Hence, sampling \n", - "$$\n", - "\\theta_1, \\ldots, \\theta_n | \\tilde{p} \\sim p\n", - "$$\n", - "$$\n", - "\\tilde{p} \\sim DP(\\alpha, G_0)\n", - "$$\n", - "entails that with positive probability $\\theta_i = \\theta_j$ (with $i \\neq j$). In a sample of size $n$ there will be $k \\geq n$ unique values $\\theta^*_1, \\ldots, \\theta^*_k$ among the $\\theta_i$'s and clusters are defined as $C_j = \\{i : \\theta_i = \\theta^*_j \\}.\n", - "\n", - "When considering a mixture model, the $\\theta_i$'s are not observations but latent variables. In the case of a univariate normal mizture models, $\\theta_i = (\\mu_i, \\sigma^2_i)$ and the model can be written as\n", - "$$\n", - " y_i | \\theta_i = (\\mu_i, \\sigma^2_i) \\sim \\mathcal N(\\mu_i, \\sigma^2_i)\n", - "$$\n", - "$$\n", - " \\theta_1, \\ldots, \\theta_n | \\tilde{p} \\sim \\tilde{p}\n", - "$$\n", - "$$\n", - " \\tilde{p} \\sim DP(\\alpha, G_0)\n", - "$$\n", - "and the clustering among the observations $y_i$'s is inherited by the clustering among the $\\theta_i$'s." - ] - }, - { - "cell_type": "markdown", - "metadata": {}, - "source": [ - "Let's go back to the previous example" - ] - }, - { - "cell_type": "code", - "execution_count": null, - "metadata": {}, - "outputs": [], - "source": [ - "def sample_from_mixture(weigths, means, sds, n_data):\n", - " n_comp = len(weigths)\n", - " clus_alloc = np.random.choice(np.arange(n_comp), p=[0.5, 0.5], size=n_data)\n", - " return np.random.normal(loc=means[clus_alloc], scale=sds[clus_alloc])\n", - "\n", - "y = sample_from_mixture(\n", - " np.array([0.5, 0.5]), np.array([-3, 3]), np.array([1, 1]), 200)\n", - "\n", - "mixing = DirichletProcessMixing(total_mass=5)\n", - "hierarchy = UnivariateNormal()\n", - "hierarchy.make_default_fixed_params(y, 2)\n", - "mixture = MixtureModel(mixing, hierarchy)\n", - "\n", - "mixture.run_mcmc(y, algorithm=\"Neal2\", niter=2000, nburn=1000)" - ] - }, - { - "cell_type": "markdown", - "metadata": {}, - "source": [ - "We can extract the cluster allocation MCMC chain very easily" - ] - }, - { - "cell_type": "code", - "execution_count": null, - "metadata": {}, - "outputs": [], - "source": [ - "mcmc_chain = mixture.get_chain()\n", - "cluster_alloc_chain = mcmc_chain.extract(\"cluster_allocs\")\n", - "print(cluster_alloc_chain.shape)" - ] - }, - { - "cell_type": "markdown", - "metadata": {}, - "source": [ - "cluster_alloc_chain is a matrix of shape [niter - nburn, ndata]. \n", - "\n", - "To get the posterior distribution of the number of clusters, we count in each row the number of unique values" - ] - }, - { - "cell_type": "code", - "execution_count": null, - "metadata": {}, - "outputs": [], - "source": [ - "n_clust_chain = np.apply_along_axis(lambda x: len(np.unique(x)), 1, \n", - " cluster_alloc_chain)\n", - "\n", - "fig, axes = plt.subplots(nrows=1, ncols=2, figsize=(8, 4))\n", - "axes[0].vlines(np.arange(len(n_clust_chain)), n_clust_chain - 0.3, n_clust_chain + 0.3)\n", - "axes[0].set_title(\"Traceplot\")\n", - "\n", - "clusgrid = np.arange(1, 10)\n", - "probas = np.zeros_like(clusgrid)\n", - "for i, c in enumerate(clusgrid):\n", - " probas[i] = np.sum(n_clust_chain == c)\n", - "\n", - "probas = probas / np.sum(probas)\n", - "axes[1].bar(clusgrid, probas)\n", - "axes[1].set_xticks(clusgrid)\n", - "axes[1].set_title(\"Posterior number of clusters\")\n", - " \n", - "plt.show()" - ] - }, - { - "cell_type": "markdown", - "metadata": {}, - "source": [ - "Let's inspect two iterations: the first one and the last one, and look at the cluster allocations of the first 5 observations" - ] - }, - { - "cell_type": "code", - "execution_count": null, - "metadata": {}, - "outputs": [], - "source": [ - "print(\"First iteration: \", cluster_alloc_chain[0][:5])\n", - "print(\"Last iteration: \", cluster_alloc_chain[-1][:5])" - ] - }, - { - "cell_type": "markdown", - "metadata": {}, - "source": [ - "Observe that the clustering are identicals: the one is made of observations $\\{1, 2, 5\\}$ and the other cluster of observations $\\{3, 4\\}$. However the labels associated to each cluster are differend depending on the iterations: in the first iteration, $\\{1, 2, 5\\}$ are the first cluster (0th cluster) and $\\{3, 4\\}$ are the second cluster, while in the last iteration the opposite happens.\n", - "\n", - "This is due to the so-called \"label-switching\". Usually to interpret the clustering result, a suitable point-estimate is chosen to minimize a loss function." - ] - }, - { - "cell_type": "code", - "execution_count": null, - "metadata": {}, - "outputs": [], - "source": [ - "from pybmix.estimators.cluster_estimator import ClusterEstimator\n", - "\n", - "clus_est = ClusterEstimator(mixture)\n", - "best_clust = clus_est.get_point_estimate()\n", - "\n", - "plt.hist(y, density=True, alpha=0.3)\n", - "for cluster_idx in clus_est.group_by_cluster(best_clust):\n", - " data = y[cluster_idx]\n", - " plt.scatter(data, np.zeros_like(data) + 5e-3)\n", - " \n", - "plt.show()" - ] - }, - { - "cell_type": "markdown", - "metadata": {}, - "source": [ - "Note how the posterior mode of the number of clusters is 3, but the point estimate for the best clustering consists of 2 clusters" - ] - } - ], - "metadata": { - "kernelspec": { - "display_name": "Python 3", - "language": "python", - "name": "python3" - }, - "language_info": { - "codemirror_mode": { - "name": "ipython", - "version": 3 - }, - "file_extension": ".py", - "mimetype": "text/x-python", - "name": "python", - "nbconvert_exporter": "python", - "pygments_lexer": "ipython3", - "version": "3.8.0" - } - }, - "nbformat": 4, - "nbformat_minor": 4 -} diff --git a/docs/examples/estimate_univ_density.ipynb b/docs/examples/estimate_univ_density.ipynb deleted file mode 100644 index 4d25d122..00000000 --- a/docs/examples/estimate_univ_density.ipynb +++ /dev/null @@ -1,204 +0,0 @@ -{ - "cells": [ - { - "cell_type": "markdown", - "metadata": {}, - "source": [ - "# Univariate Density Estimation via Dirichlet Process Mixture" - ] - }, - { - "cell_type": "code", - "execution_count": null, - "metadata": {}, - "outputs": [], - "source": [ - "import numpy as np\n", - "import matplotlib.pyplot as plt\n", - "\n", - "from pybmix.core.mixing import DirichletProcessMixing, StickBreakMixing\n", - "from pybmix.core.hierarchy import UnivariateNormal\n", - "from pybmix.core.mixture_model import MixtureModel\n", - "\n", - "np.random.seed(2021)" - ] - }, - { - "cell_type": "markdown", - "metadata": {}, - "source": [ - "## Data Generation\n", - "\n", - "We generate data from a two-component mixture model\n", - "$$\n", - "y_i \\sim \\frac{1}{2} \\mathcal N(-3, 1) + \\frac{1}{2} \\mathcal N(3, 1), \\quad i=1, \\ldots, 200\n", - "$$" - ] - }, - { - "cell_type": "code", - "execution_count": null, - "metadata": {}, - "outputs": [], - "source": [ - "def sample_from_mixture(weigths, means, sds, n_data):\n", - " n_comp = len(weigths)\n", - " clus_alloc = np.random.choice(np.arange(n_comp), p=[0.5, 0.5], size=n_data)\n", - " return np.random.normal(loc=means[clus_alloc], scale=sds[clus_alloc])" - ] - }, - { - "cell_type": "code", - "execution_count": null, - "metadata": {}, - "outputs": [], - "source": [ - "y = sample_from_mixture(\n", - " np.array([0.5, 0.5]), np.array([-3, 3]), np.array([1, 1]), 200)\n", - "plt.hist(y)\n", - "plt.show()" - ] - }, - { - "cell_type": "markdown", - "metadata": {}, - "source": [ - "## The statistical model\n", - "\n", - "We assume the following model\n", - "$$\n", - "y_i | \\tilde{p} \\sim f(\\cdot) = \\int_{R \\times R^+} \\mathcal{N}(\\cdot | \\mu, \\sigma^2) \\tilde{p}(d\\mu, d\\sigma^2)\n", - "$$\n", - "$$\n", - "\\tilde{p} \\sim DP(\\alpha, G_0)\n", - "$$\n", - "where $DP(\\alpha, G_0)$ is the Dirichlet Process with base measure $\\alpha G_0$. \n", - "\n", - "Given the stick-breaking represetation of the Dirichlet Process, the model is equivalently written as\n", - "$$\n", - "y_i | \\{w_h\\}_h \\{(\\mu_h, \\sigma^2_h)\\}_h \\sim f(\\cdot) = \\sum_{h=1}^\\infty w_h \\mathcal{N}(\\cdot | \\mu_h, \\sigma_h^2)\n", - "$$\n", - "$$\n", - "\\{w_h\\}_h \\sim GEM(\\alpha)\n", - "$$\n", - "$$\n", - " \\{(\\mu_h, \\sigma^2_h)\\}_h \\sim G_0\n", - "$$\n", - "\n", - "In pybmix we take advantage of the second representation, and specify a MixtureModel in terms of a Mixing and a Hierarchy. The Mixing is the prior for the weights, while the Hierarchy combines the base measure $G_0$ with the kernel of the mixture (in this case, the univariate Gaussian distribution)" - ] - }, - { - "cell_type": "markdown", - "metadata": {}, - "source": [ - "Here, we assume that $alpha = 5$ and $G_0(d\\mu, d\\sigma^2) = \\mathcal N(d\\mu | \\mu_0, \\lambda \\sigma^2) \\times IG(d\\sigma^2 | a, b)$, i.e., $G_0$ is a normal-inverse gamma distribution. \n", - "\n", - "The parameters $(\\mu_0, \\lambda, a , b)$ of $G_0$ can be set automatically by the method 'make_default_fixed_params' which takes as input the observations and a \"guess\" on the number of clusters" - ] - }, - { - "cell_type": "code", - "execution_count": null, - "metadata": {}, - "outputs": [], - "source": [ - "mixing = DirichletProcessMixing(total_mass=5)\n", - "hierarchy = UnivariateNormal()\n", - "hierarchy.make_default_fixed_params(y, 2)\n", - "mixture = MixtureModel(mixing, hierarchy)" - ] - }, - { - "cell_type": "markdown", - "metadata": {}, - "source": [ - "## Run MCMC simulations" - ] - }, - { - "cell_type": "code", - "execution_count": null, - "metadata": {}, - "outputs": [], - "source": [ - "mixture.run_mcmc(y, algorithm=\"Neal2\", niter=2000, nburn=1000)" - ] - }, - { - "cell_type": "markdown", - "metadata": {}, - "source": [ - "## Get the density estimates\n", - "\n", - "1) fix a grid where to estimate the densities\n", - "\n", - "2) the method 'estimate_density' returns a matrix of shape [niter - nburn, len(grid)]" - ] - }, - { - "cell_type": "code", - "execution_count": null, - "metadata": {}, - "outputs": [], - "source": [ - "from pybmix.estimators.density_estimator import DensityEstimator" - ] - }, - { - "cell_type": "code", - "execution_count": null, - "metadata": {}, - "outputs": [], - "source": [ - "grid = np.linspace(-6, 6, 500)\n", - "dens_est = DensityEstimator(mixture)\n", - "densities = dens_est.estimate_density(grid)" - ] - }, - { - "cell_type": "markdown", - "metadata": {}, - "source": [ - "Plot some of the densities and their mean" - ] - }, - { - "cell_type": "code", - "execution_count": null, - "metadata": {}, - "outputs": [], - "source": [ - "plt.hist(y, density=True)\n", - "plt.plot(grid, np.mean(densities, axis=0), lw=3, label=\"predictive density\")\n", - "idxs = [5, 100, 300]\n", - "for idx in idxs:\n", - " plt.plot(grid, densities[idx, :], \"--\", label=\"iteration: {0}\".format(idx))\n", - " \n", - "plt.legend()\n", - "plt.show()" - ] - } - ], - "metadata": { - "kernelspec": { - "display_name": "Python 3", - "language": "python", - "name": "python3" - }, - "language_info": { - "codemirror_mode": { - "name": "ipython", - "version": 3 - }, - "file_extension": ".py", - "mimetype": "text/x-python", - "name": "python", - "nbconvert_exporter": "python", - "pygments_lexer": "ipython3", - "version": "3.8.0" - } - }, - "nbformat": 4, - "nbformat_minor": 4 -} diff --git a/docs/examples/prior_elicitation.ipynb b/docs/examples/prior_elicitation.ipynb deleted file mode 100644 index e138a7b8..00000000 --- a/docs/examples/prior_elicitation.ipynb +++ /dev/null @@ -1,168 +0,0 @@ -{ - "cells": [ - { - "cell_type": "markdown", - "metadata": {}, - "source": [ - "# Prior elicitation\n", - "\n", - "What is the meaning of the parameters in the various processes? Let's study the prior distribution of the number of clusters!" - ] - }, - { - "cell_type": "code", - "execution_count": null, - "metadata": {}, - "outputs": [], - "source": [ - "import numpy as np\n", - "import matplotlib.pyplot as plt\n", - "\n", - "from pybmix.core.mixing import DirichletProcessMixing, PitmanYorMixing, StickBreakMixing" - ] - }, - { - "cell_type": "markdown", - "metadata": {}, - "source": [ - "## Dirichlet Process\n", - "\n", - "Compare how the prior distribution varies for a DP for different values of the 'total_mass' parameter" - ] - }, - { - "cell_type": "code", - "execution_count": null, - "metadata": {}, - "outputs": [], - "source": [ - "nsamples = 100\n", - "grid = np.arange(1, 30)\n", - "\n", - "for total_mass in [0.25, 1, 2, 3, 5, 7]:\n", - " dp = DirichletProcessMixing(total_mass=total_mass)\n", - " probs = dp.prior_cluster_distribution(grid, nsamples)\n", - " plt.plot(grid, probs, label=\"total_mass={0}\".format(total_mass))\n", - " \n", - "plt.legend()\n", - "plt.show()" - ] - }, - { - "cell_type": "markdown", - "metadata": {}, - "source": [ - "## Pitman-Yor Process\n", - "\n", - "The PY process is controlled by two parameters: 'strength' (plays the same role of 'total_mass' in the Dirichlet Process) and 'discount'.\n", - "For a fixed value of 'strenght', see how the distribution varies for different values of 'discount'." - ] - }, - { - "cell_type": "code", - "execution_count": null, - "metadata": {}, - "outputs": [], - "source": [ - "strength = 1\n", - "for discount in [0.001, 0.1, 0.2, 0.5]:\n", - " py = PitmanYorMixing(strength=strength, discount=discount)\n", - " probs = py.prior_cluster_distribution(grid, nsamples)\n", - " plt.plot(grid, probs, label=\"strength={0}, discount={1}\".format(strength, discount))\n", - " \n", - "plt.legend()\n", - "plt.show()" - ] - }, - { - "cell_type": "markdown", - "metadata": {}, - "source": [ - "## Stick Breaking prior" - ] - }, - { - "cell_type": "markdown", - "metadata": {}, - "source": [ - "### Truncation of a Dirichlet Process" - ] - }, - { - "cell_type": "code", - "execution_count": null, - "metadata": {}, - "outputs": [], - "source": [ - "n = [50, 1000]\n", - "H = [10, 20, 50]\n", - "grid = np.arange(1, 50)\n", - "\n", - "fig, axes = plt.subplots(nrows=len(n), ncols=len(H), figsize=(15, 10))\n", - "for i, nval in enumerate(n):\n", - " for j, hval in enumerate(H):\n", - " for total_mass in [0.25, 1, 2, 3, 5, 7]:\n", - " sb = StickBreakMixing(n_comp=hval, strength=total_mass)\n", - " probs = sb.prior_cluster_distribution(grid, nval, mc_iter=10000)\n", - " axes[i][j].plot(grid, probs, label=\"total_mass={0}\".format(total_mass))\n", - " axes[i][j].set_title(\"N: {0}, H: {1}\".format(nval, hval), fontsize=18)\n", - "\n", - "axes[1, 1].legend(ncol=3, fontsize=14, bbox_to_anchor=(1.5, -0.1))\n", - "plt.show()" - ] - }, - { - "cell_type": "markdown", - "metadata": {}, - "source": [ - "### Truncation of a Pitman-Yor Process" - ] - }, - { - "cell_type": "code", - "execution_count": null, - "metadata": {}, - "outputs": [], - "source": [ - "n = [200, 1000]\n", - "H = [10, 20, 50]\n", - "strength = 1\n", - "discount = np.linspace(0.2, 0.9, 4)\n", - "\n", - "fig, axes = plt.subplots(nrows=len(n), ncols=len(H), figsize=(15, 10))\n", - "\n", - "for i, nval in enumerate(n):\n", - " for j, hval in enumerate(H):\n", - " for d in discount:\n", - " sb = StickBreakMixing(n_comp=hval, strength=strength, discount=d)\n", - " probs = sb.prior_cluster_distribution(grid, nval, mc_iter=10000)\n", - " axes[i][j].plot(grid, probs, label=\"discount={0:.2f}\".format(d))\n", - " axes[i][j].set_title(\"N: {0}, H: {1}\".format(nval, hval), fontsize=18)\n", - "\n", - "axes[1, 1].legend(ncol=2, fontsize=14, bbox_to_anchor=(1.2, -0.1))\n", - "plt.show()" - ] - } - ], - "metadata": { - "kernelspec": { - "display_name": "Python 3", - "language": "python", - "name": "python3" - }, - "language_info": { - "codemirror_mode": { - "name": "ipython", - "version": 3 - }, - "file_extension": ".py", - "mimetype": "text/x-python", - "name": "python", - "nbconvert_exporter": "python", - "pygments_lexer": "ipython3", - "version": "3.8.0" - } - }, - "nbformat": 4, - "nbformat_minor": 4 -} diff --git a/docs/examples/test.ipynb b/docs/examples/test.ipynb new file mode 100644 index 00000000..2af73b7d --- /dev/null +++ b/docs/examples/test.ipynb @@ -0,0 +1,58 @@ +{ + "cells": [ + { + "cell_type": "markdown", + "metadata": {}, + "source": [ + "# Test Stuff" + ] + }, + { + "cell_type": "code", + "execution_count": null, + "metadata": {}, + "outputs": [], + "source": [ + "import numpy as np\n", + "import matplotlib.pyplot as plt" + ] + }, + { + "cell_type": "code", + "execution_count": null, + "metadata": {}, + "outputs": [], + "source": [ + "plt.plot(np.random.normal(size=100))" + ] + }, + { + "cell_type": "code", + "execution_count": null, + "metadata": {}, + "outputs": [], + "source": [] + } + ], + "metadata": { + "kernelspec": { + "display_name": "Python 3", + "language": "python", + "name": "python3" + }, + "language_info": { + "codemirror_mode": { + "name": "ipython", + "version": 3 + }, + "file_extension": ".py", + "mimetype": "text/x-python", + "name": "python", + "nbconvert_exporter": "python", + "pygments_lexer": "ipython3", + "version": "3.8.0" + } + }, + "nbformat": 4, + "nbformat_minor": 4 +} From 387865fec50628e83a9ff38882620ae63d0575c6 Mon Sep 17 00:00:00 2001 From: mberaha Date: Wed, 2 Jun 2021 09:03:34 +0200 Subject: [PATCH 45/57] readded autosummary + some test imports --- docs/conf.py | 2 +- docs/examples/test.ipynb | 18 ++++++++++++++++++ 2 files changed, 19 insertions(+), 1 deletion(-) diff --git a/docs/conf.py b/docs/conf.py index a342eff4..1350200a 100644 --- a/docs/conf.py +++ b/docs/conf.py @@ -36,7 +36,7 @@ # ones. extensions = [ 'sphinx.ext.autodoc', - # 'sphinx.ext.autosummary', + 'sphinx.ext.autosummary', 'sphinx.ext.intersphinx', 'sphinx.ext.mathjax', 'sphinx.ext.napoleon', diff --git a/docs/examples/test.ipynb b/docs/examples/test.ipynb index 2af73b7d..ff35ff29 100644 --- a/docs/examples/test.ipynb +++ b/docs/examples/test.ipynb @@ -26,6 +26,24 @@ "plt.plot(np.random.normal(size=100))" ] }, + { + "cell_type": "code", + "execution_count": null, + "metadata": {}, + "outputs": [], + "source": [ + "import pybmix" + ] + }, + { + "cell_type": "code", + "execution_count": null, + "metadata": {}, + "outputs": [], + "source": [ + "from pybmix.core.mixing import DirichletProcessMixing" + ] + }, { "cell_type": "code", "execution_count": null, From 53a6bb4750dfd566bb8e6a6c35e5b05e4dbed785 Mon Sep 17 00:00:00 2001 From: mberaha Date: Wed, 2 Jun 2021 09:50:06 +0200 Subject: [PATCH 46/57] debugging --- .readthedocs.yaml | 1 + CMakeLists.txt | 2 +- docs/requirements.txt | 4 ++++ 3 files changed, 6 insertions(+), 1 deletion(-) diff --git a/.readthedocs.yaml b/.readthedocs.yaml index 3595364e..116deb0e 100644 --- a/.readthedocs.yaml +++ b/.readthedocs.yaml @@ -10,6 +10,7 @@ build: - cmake - build-essential - g++ + - ninja-build # Build documentation in the docs/ directory with Sphinx sphinx: diff --git a/CMakeLists.txt b/CMakeLists.txt index f36883af..c2df0ef0 100644 --- a/CMakeLists.txt +++ b/CMakeLists.txt @@ -30,10 +30,10 @@ set(PY_PROTO_DIR "${CMAKE_CURRENT_LIST_DIR}/pybmix/proto") set(PROTO_PYS) message("ProtoFiles: ${ProtoFiles}") foreach(PROTO_FILE IN LISTS ProtoFiles) - message(STATUS "GENERATING PYTHON protoc proto(cc): ${PROTO_FILE}") get_filename_component(PROTO_DIR ${PROTO_FILE} DIRECTORY) get_filename_component(PROTO_NAME ${PROTO_FILE} NAME_WE) set(PROTO_PY ${PY_PROTO_DIR}/${PROTO_NAME}_pb2.py) + message(STATUS "GENERATING PYTHON protoc proto(cc): ${PROTO_FILE} --> ${PROTO_PY}") add_custom_command( OUTPUT ${PROTO_PY} COMMAND protobuf::protoc "--proto_path=${SOURCE_DIR}/bayesmix/proto" diff --git a/docs/requirements.txt b/docs/requirements.txt index 4e6c7425..62520aa9 100644 --- a/docs/requirements.txt +++ b/docs/requirements.txt @@ -10,4 +10,8 @@ sphinx_autodoc_typehints>=1.11.1 ipython>=7.20.0 ipykernel>=5.5.0 cmake>=3.15.0 +ninja>=1.10 +protobuf==3.14.0 +2to3==1.0 +scipy>=1.4.1 From 73d1340561688d63adbf588c3abfa09223481f14 Mon Sep 17 00:00:00 2001 From: mberaha Date: Wed, 2 Jun 2021 10:27:31 +0200 Subject: [PATCH 47/57] building extension first --- setup.py | 13 ++++++++++--- 1 file changed, 10 insertions(+), 3 deletions(-) diff --git a/setup.py b/setup.py index 959b1cdb..35f754a7 100644 --- a/setup.py +++ b/setup.py @@ -17,11 +17,11 @@ print(sys.path) from build_tbb import maybe_build_tbb - -PYBMIXCPP_PATH = os.path.join("pybmix", "core", "pybmixcpp") +HERE = os.path.abspath('.') +PYBMIXCPP_PATH = os.path.join(HERE, "pybmix", "core", "pybmixcpp") BAYEXMIX_PATH = os.path.join(PYBMIXCPP_PATH , "bayesmix") PROTO_IN_DIR = os.path.join(BAYEXMIX_PATH, "proto") -PROTO_OUT_DIR = os.path.join("pybmix", "proto/") +PROTO_OUT_DIR = os.path.join(HERE, "pybmix", "proto/") # Convert distutils Windows platform specifiers to CMake -A arguments PLAT_TO_CMAKE = { @@ -157,6 +157,12 @@ def build_extension(self, ext): sys.exit(-1) +class build_py(_build_py): + def run(self): + self.run_command("build_ext") + return super().run() + + if __name__ == "__main__": folder = os.path.dirname(__file__) @@ -184,6 +190,7 @@ def build_extension(self, ext): cmdclass={ "clean": clean, "build_ext": CMakeBuild, + "build_py": build_py }, install_requires=install_requires, zip_safe=False, From 02551e61a2000fc3fdd96c6e6be0f589c70a6d40 Mon Sep 17 00:00:00 2001 From: mberaha Date: Wed, 2 Jun 2021 11:59:17 +0200 Subject: [PATCH 48/57] triggering after github down --- docs/requirements.txt | 1 - 1 file changed, 1 deletion(-) diff --git a/docs/requirements.txt b/docs/requirements.txt index 62520aa9..63e52a81 100644 --- a/docs/requirements.txt +++ b/docs/requirements.txt @@ -14,4 +14,3 @@ ninja>=1.10 protobuf==3.14.0 2to3==1.0 scipy>=1.4.1 - From cb299eba5788e6d6227ac5a6a3c6e95ae1f0963f Mon Sep 17 00:00:00 2001 From: mberaha Date: Wed, 2 Jun 2021 12:15:14 +0200 Subject: [PATCH 49/57] reverting to old examples --- docs/examples/clustering_univ_data.ipynb | 219 ++++++++++++++++++++++ docs/examples/estimate_univ_density.ipynb | 204 ++++++++++++++++++++ docs/examples/prior_elicitation.ipynb | 168 +++++++++++++++++ docs/examples/test.ipynb | 76 -------- docs/index.rst | 4 +- 5 files changed, 594 insertions(+), 77 deletions(-) create mode 100644 docs/examples/clustering_univ_data.ipynb create mode 100644 docs/examples/estimate_univ_density.ipynb create mode 100644 docs/examples/prior_elicitation.ipynb delete mode 100644 docs/examples/test.ipynb diff --git a/docs/examples/clustering_univ_data.ipynb b/docs/examples/clustering_univ_data.ipynb new file mode 100644 index 00000000..503a473f --- /dev/null +++ b/docs/examples/clustering_univ_data.ipynb @@ -0,0 +1,219 @@ +{ + "cells": [ + { + "cell_type": "markdown", + "metadata": {}, + "source": [ + "# Clustering of univariate data via Dirichlet Process Mixture" + ] + }, + { + "cell_type": "markdown", + "metadata": {}, + "source": [ + "this is a continuation of 'estimate_univ_density'. Make sure to check it before going through this tutorial!" + ] + }, + { + "cell_type": "code", + "execution_count": null, + "metadata": {}, + "outputs": [], + "source": [ + "import numpy as np\n", + "import matplotlib.pyplot as plt\n", + "\n", + "from pybmix.core.mixing import DirichletProcessMixing, StickBreakMixing\n", + "from pybmix.core.hierarchy import UnivariateNormal\n", + "from pybmix.core.mixture_model import MixtureModel\n", + "\n", + "np.random.seed(2021)" + ] + }, + { + "cell_type": "markdown", + "metadata": {}, + "source": [ + "## DP and clustering\n", + "\n", + "Recall that $\\tilde p \\sim DP(\\alpha, G_0)$ means that $\\tilde p = \\sum_{h=1}^\\infty w_h \\delta_{\\tau_h}$ with $\\{w_h\\}_h \\sim GEM(\\alpha)$ and $\\{\\tau_h\\}_h \\sim G_0$. Hence, realizations from a DP are almost surely discrete probability measures.\n", + "\n", + "Hence, sampling \n", + "$$\n", + "\\theta_1, \\ldots, \\theta_n | \\tilde{p} \\sim p\n", + "$$\n", + "$$\n", + "\\tilde{p} \\sim DP(\\alpha, G_0)\n", + "$$\n", + "entails that with positive probability $\\theta_i = \\theta_j$ (with $i \\neq j$). In a sample of size $n$ there will be $k \\geq n$ unique values $\\theta^*_1, \\ldots, \\theta^*_k$ among the $\\theta_i$'s and clusters are defined as $C_j = \\{i : \\theta_i = \\theta^*_j \\}.\n", + "\n", + "When considering a mixture model, the $\\theta_i$'s are not observations but latent variables. In the case of a univariate normal mizture models, $\\theta_i = (\\mu_i, \\sigma^2_i)$ and the model can be written as\n", + "$$\n", + " y_i | \\theta_i = (\\mu_i, \\sigma^2_i) \\sim \\mathcal N(\\mu_i, \\sigma^2_i)\n", + "$$\n", + "$$\n", + " \\theta_1, \\ldots, \\theta_n | \\tilde{p} \\sim \\tilde{p}\n", + "$$\n", + "$$\n", + " \\tilde{p} \\sim DP(\\alpha, G_0)\n", + "$$\n", + "and the clustering among the observations $y_i$'s is inherited by the clustering among the $\\theta_i$'s." + ] + }, + { + "cell_type": "markdown", + "metadata": {}, + "source": [ + "Let's go back to the previous example" + ] + }, + { + "cell_type": "code", + "execution_count": null, + "metadata": {}, + "outputs": [], + "source": [ + "def sample_from_mixture(weigths, means, sds, n_data):\n", + " n_comp = len(weigths)\n", + " clus_alloc = np.random.choice(np.arange(n_comp), p=[0.5, 0.5], size=n_data)\n", + " return np.random.normal(loc=means[clus_alloc], scale=sds[clus_alloc])\n", + "\n", + "y = sample_from_mixture(\n", + " np.array([0.5, 0.5]), np.array([-3, 3]), np.array([1, 1]), 200)\n", + "\n", + "mixing = DirichletProcessMixing(total_mass=5)\n", + "hierarchy = UnivariateNormal()\n", + "hierarchy.make_default_fixed_params(y, 2)\n", + "mixture = MixtureModel(mixing, hierarchy)\n", + "\n", + "mixture.run_mcmc(y, algorithm=\"Neal2\", niter=2000, nburn=1000)" + ] + }, + { + "cell_type": "markdown", + "metadata": {}, + "source": [ + "We can extract the cluster allocation MCMC chain very easily" + ] + }, + { + "cell_type": "code", + "execution_count": null, + "metadata": {}, + "outputs": [], + "source": [ + "mcmc_chain = mixture.get_chain()\n", + "cluster_alloc_chain = mcmc_chain.extract(\"cluster_allocs\")\n", + "print(cluster_alloc_chain.shape)" + ] + }, + { + "cell_type": "markdown", + "metadata": {}, + "source": [ + "cluster_alloc_chain is a matrix of shape [niter - nburn, ndata]. \n", + "\n", + "To get the posterior distribution of the number of clusters, we count in each row the number of unique values" + ] + }, + { + "cell_type": "code", + "execution_count": null, + "metadata": {}, + "outputs": [], + "source": [ + "n_clust_chain = np.apply_along_axis(lambda x: len(np.unique(x)), 1, \n", + " cluster_alloc_chain)\n", + "\n", + "fig, axes = plt.subplots(nrows=1, ncols=2, figsize=(8, 4))\n", + "axes[0].vlines(np.arange(len(n_clust_chain)), n_clust_chain - 0.3, n_clust_chain + 0.3)\n", + "axes[0].set_title(\"Traceplot\")\n", + "\n", + "clusgrid = np.arange(1, 10)\n", + "probas = np.zeros_like(clusgrid)\n", + "for i, c in enumerate(clusgrid):\n", + " probas[i] = np.sum(n_clust_chain == c)\n", + "\n", + "probas = probas / np.sum(probas)\n", + "axes[1].bar(clusgrid, probas)\n", + "axes[1].set_xticks(clusgrid)\n", + "axes[1].set_title(\"Posterior number of clusters\")\n", + " \n", + "plt.show()" + ] + }, + { + "cell_type": "markdown", + "metadata": {}, + "source": [ + "Let's inspect two iterations: the first one and the last one, and look at the cluster allocations of the first 5 observations" + ] + }, + { + "cell_type": "code", + "execution_count": null, + "metadata": {}, + "outputs": [], + "source": [ + "print(\"First iteration: \", cluster_alloc_chain[0][:5])\n", + "print(\"Last iteration: \", cluster_alloc_chain[-1][:5])" + ] + }, + { + "cell_type": "markdown", + "metadata": {}, + "source": [ + "Observe that the clustering are identicals: the one is made of observations $\\{1, 2, 5\\}$ and the other cluster of observations $\\{3, 4\\}$. However the labels associated to each cluster are differend depending on the iterations: in the first iteration, $\\{1, 2, 5\\}$ are the first cluster (0th cluster) and $\\{3, 4\\}$ are the second cluster, while in the last iteration the opposite happens.\n", + "\n", + "This is due to the so-called \"label-switching\". Usually to interpret the clustering result, a suitable point-estimate is chosen to minimize a loss function." + ] + }, + { + "cell_type": "code", + "execution_count": null, + "metadata": {}, + "outputs": [], + "source": [ + "from pybmix.estimators.cluster_estimator import ClusterEstimator\n", + "\n", + "clus_est = ClusterEstimator(mixture)\n", + "best_clust = clus_est.get_point_estimate()\n", + "\n", + "plt.hist(y, density=True, alpha=0.3)\n", + "for cluster_idx in clus_est.group_by_cluster(best_clust):\n", + " data = y[cluster_idx]\n", + " plt.scatter(data, np.zeros_like(data) + 5e-3)\n", + " \n", + "plt.show()" + ] + }, + { + "cell_type": "markdown", + "metadata": {}, + "source": [ + "Note how the posterior mode of the number of clusters is 3, but the point estimate for the best clustering consists of 2 clusters" + ] + } + ], + "metadata": { + "kernelspec": { + "display_name": "Python 3", + "language": "python", + "name": "python3" + }, + "language_info": { + "codemirror_mode": { + "name": "ipython", + "version": 3 + }, + "file_extension": ".py", + "mimetype": "text/x-python", + "name": "python", + "nbconvert_exporter": "python", + "pygments_lexer": "ipython3", + "version": "3.8.0" + } + }, + "nbformat": 4, + "nbformat_minor": 4 +} diff --git a/docs/examples/estimate_univ_density.ipynb b/docs/examples/estimate_univ_density.ipynb new file mode 100644 index 00000000..4d25d122 --- /dev/null +++ b/docs/examples/estimate_univ_density.ipynb @@ -0,0 +1,204 @@ +{ + "cells": [ + { + "cell_type": "markdown", + "metadata": {}, + "source": [ + "# Univariate Density Estimation via Dirichlet Process Mixture" + ] + }, + { + "cell_type": "code", + "execution_count": null, + "metadata": {}, + "outputs": [], + "source": [ + "import numpy as np\n", + "import matplotlib.pyplot as plt\n", + "\n", + "from pybmix.core.mixing import DirichletProcessMixing, StickBreakMixing\n", + "from pybmix.core.hierarchy import UnivariateNormal\n", + "from pybmix.core.mixture_model import MixtureModel\n", + "\n", + "np.random.seed(2021)" + ] + }, + { + "cell_type": "markdown", + "metadata": {}, + "source": [ + "## Data Generation\n", + "\n", + "We generate data from a two-component mixture model\n", + "$$\n", + "y_i \\sim \\frac{1}{2} \\mathcal N(-3, 1) + \\frac{1}{2} \\mathcal N(3, 1), \\quad i=1, \\ldots, 200\n", + "$$" + ] + }, + { + "cell_type": "code", + "execution_count": null, + "metadata": {}, + "outputs": [], + "source": [ + "def sample_from_mixture(weigths, means, sds, n_data):\n", + " n_comp = len(weigths)\n", + " clus_alloc = np.random.choice(np.arange(n_comp), p=[0.5, 0.5], size=n_data)\n", + " return np.random.normal(loc=means[clus_alloc], scale=sds[clus_alloc])" + ] + }, + { + "cell_type": "code", + "execution_count": null, + "metadata": {}, + "outputs": [], + "source": [ + "y = sample_from_mixture(\n", + " np.array([0.5, 0.5]), np.array([-3, 3]), np.array([1, 1]), 200)\n", + "plt.hist(y)\n", + "plt.show()" + ] + }, + { + "cell_type": "markdown", + "metadata": {}, + "source": [ + "## The statistical model\n", + "\n", + "We assume the following model\n", + "$$\n", + "y_i | \\tilde{p} \\sim f(\\cdot) = \\int_{R \\times R^+} \\mathcal{N}(\\cdot | \\mu, \\sigma^2) \\tilde{p}(d\\mu, d\\sigma^2)\n", + "$$\n", + "$$\n", + "\\tilde{p} \\sim DP(\\alpha, G_0)\n", + "$$\n", + "where $DP(\\alpha, G_0)$ is the Dirichlet Process with base measure $\\alpha G_0$. \n", + "\n", + "Given the stick-breaking represetation of the Dirichlet Process, the model is equivalently written as\n", + "$$\n", + "y_i | \\{w_h\\}_h \\{(\\mu_h, \\sigma^2_h)\\}_h \\sim f(\\cdot) = \\sum_{h=1}^\\infty w_h \\mathcal{N}(\\cdot | \\mu_h, \\sigma_h^2)\n", + "$$\n", + "$$\n", + "\\{w_h\\}_h \\sim GEM(\\alpha)\n", + "$$\n", + "$$\n", + " \\{(\\mu_h, \\sigma^2_h)\\}_h \\sim G_0\n", + "$$\n", + "\n", + "In pybmix we take advantage of the second representation, and specify a MixtureModel in terms of a Mixing and a Hierarchy. The Mixing is the prior for the weights, while the Hierarchy combines the base measure $G_0$ with the kernel of the mixture (in this case, the univariate Gaussian distribution)" + ] + }, + { + "cell_type": "markdown", + "metadata": {}, + "source": [ + "Here, we assume that $alpha = 5$ and $G_0(d\\mu, d\\sigma^2) = \\mathcal N(d\\mu | \\mu_0, \\lambda \\sigma^2) \\times IG(d\\sigma^2 | a, b)$, i.e., $G_0$ is a normal-inverse gamma distribution. \n", + "\n", + "The parameters $(\\mu_0, \\lambda, a , b)$ of $G_0$ can be set automatically by the method 'make_default_fixed_params' which takes as input the observations and a \"guess\" on the number of clusters" + ] + }, + { + "cell_type": "code", + "execution_count": null, + "metadata": {}, + "outputs": [], + "source": [ + "mixing = DirichletProcessMixing(total_mass=5)\n", + "hierarchy = UnivariateNormal()\n", + "hierarchy.make_default_fixed_params(y, 2)\n", + "mixture = MixtureModel(mixing, hierarchy)" + ] + }, + { + "cell_type": "markdown", + "metadata": {}, + "source": [ + "## Run MCMC simulations" + ] + }, + { + "cell_type": "code", + "execution_count": null, + "metadata": {}, + "outputs": [], + "source": [ + "mixture.run_mcmc(y, algorithm=\"Neal2\", niter=2000, nburn=1000)" + ] + }, + { + "cell_type": "markdown", + "metadata": {}, + "source": [ + "## Get the density estimates\n", + "\n", + "1) fix a grid where to estimate the densities\n", + "\n", + "2) the method 'estimate_density' returns a matrix of shape [niter - nburn, len(grid)]" + ] + }, + { + "cell_type": "code", + "execution_count": null, + "metadata": {}, + "outputs": [], + "source": [ + "from pybmix.estimators.density_estimator import DensityEstimator" + ] + }, + { + "cell_type": "code", + "execution_count": null, + "metadata": {}, + "outputs": [], + "source": [ + "grid = np.linspace(-6, 6, 500)\n", + "dens_est = DensityEstimator(mixture)\n", + "densities = dens_est.estimate_density(grid)" + ] + }, + { + "cell_type": "markdown", + "metadata": {}, + "source": [ + "Plot some of the densities and their mean" + ] + }, + { + "cell_type": "code", + "execution_count": null, + "metadata": {}, + "outputs": [], + "source": [ + "plt.hist(y, density=True)\n", + "plt.plot(grid, np.mean(densities, axis=0), lw=3, label=\"predictive density\")\n", + "idxs = [5, 100, 300]\n", + "for idx in idxs:\n", + " plt.plot(grid, densities[idx, :], \"--\", label=\"iteration: {0}\".format(idx))\n", + " \n", + "plt.legend()\n", + "plt.show()" + ] + } + ], + "metadata": { + "kernelspec": { + "display_name": "Python 3", + "language": "python", + "name": "python3" + }, + "language_info": { + "codemirror_mode": { + "name": "ipython", + "version": 3 + }, + "file_extension": ".py", + "mimetype": "text/x-python", + "name": "python", + "nbconvert_exporter": "python", + "pygments_lexer": "ipython3", + "version": "3.8.0" + } + }, + "nbformat": 4, + "nbformat_minor": 4 +} diff --git a/docs/examples/prior_elicitation.ipynb b/docs/examples/prior_elicitation.ipynb new file mode 100644 index 00000000..e138a7b8 --- /dev/null +++ b/docs/examples/prior_elicitation.ipynb @@ -0,0 +1,168 @@ +{ + "cells": [ + { + "cell_type": "markdown", + "metadata": {}, + "source": [ + "# Prior elicitation\n", + "\n", + "What is the meaning of the parameters in the various processes? Let's study the prior distribution of the number of clusters!" + ] + }, + { + "cell_type": "code", + "execution_count": null, + "metadata": {}, + "outputs": [], + "source": [ + "import numpy as np\n", + "import matplotlib.pyplot as plt\n", + "\n", + "from pybmix.core.mixing import DirichletProcessMixing, PitmanYorMixing, StickBreakMixing" + ] + }, + { + "cell_type": "markdown", + "metadata": {}, + "source": [ + "## Dirichlet Process\n", + "\n", + "Compare how the prior distribution varies for a DP for different values of the 'total_mass' parameter" + ] + }, + { + "cell_type": "code", + "execution_count": null, + "metadata": {}, + "outputs": [], + "source": [ + "nsamples = 100\n", + "grid = np.arange(1, 30)\n", + "\n", + "for total_mass in [0.25, 1, 2, 3, 5, 7]:\n", + " dp = DirichletProcessMixing(total_mass=total_mass)\n", + " probs = dp.prior_cluster_distribution(grid, nsamples)\n", + " plt.plot(grid, probs, label=\"total_mass={0}\".format(total_mass))\n", + " \n", + "plt.legend()\n", + "plt.show()" + ] + }, + { + "cell_type": "markdown", + "metadata": {}, + "source": [ + "## Pitman-Yor Process\n", + "\n", + "The PY process is controlled by two parameters: 'strength' (plays the same role of 'total_mass' in the Dirichlet Process) and 'discount'.\n", + "For a fixed value of 'strenght', see how the distribution varies for different values of 'discount'." + ] + }, + { + "cell_type": "code", + "execution_count": null, + "metadata": {}, + "outputs": [], + "source": [ + "strength = 1\n", + "for discount in [0.001, 0.1, 0.2, 0.5]:\n", + " py = PitmanYorMixing(strength=strength, discount=discount)\n", + " probs = py.prior_cluster_distribution(grid, nsamples)\n", + " plt.plot(grid, probs, label=\"strength={0}, discount={1}\".format(strength, discount))\n", + " \n", + "plt.legend()\n", + "plt.show()" + ] + }, + { + "cell_type": "markdown", + "metadata": {}, + "source": [ + "## Stick Breaking prior" + ] + }, + { + "cell_type": "markdown", + "metadata": {}, + "source": [ + "### Truncation of a Dirichlet Process" + ] + }, + { + "cell_type": "code", + "execution_count": null, + "metadata": {}, + "outputs": [], + "source": [ + "n = [50, 1000]\n", + "H = [10, 20, 50]\n", + "grid = np.arange(1, 50)\n", + "\n", + "fig, axes = plt.subplots(nrows=len(n), ncols=len(H), figsize=(15, 10))\n", + "for i, nval in enumerate(n):\n", + " for j, hval in enumerate(H):\n", + " for total_mass in [0.25, 1, 2, 3, 5, 7]:\n", + " sb = StickBreakMixing(n_comp=hval, strength=total_mass)\n", + " probs = sb.prior_cluster_distribution(grid, nval, mc_iter=10000)\n", + " axes[i][j].plot(grid, probs, label=\"total_mass={0}\".format(total_mass))\n", + " axes[i][j].set_title(\"N: {0}, H: {1}\".format(nval, hval), fontsize=18)\n", + "\n", + "axes[1, 1].legend(ncol=3, fontsize=14, bbox_to_anchor=(1.5, -0.1))\n", + "plt.show()" + ] + }, + { + "cell_type": "markdown", + "metadata": {}, + "source": [ + "### Truncation of a Pitman-Yor Process" + ] + }, + { + "cell_type": "code", + "execution_count": null, + "metadata": {}, + "outputs": [], + "source": [ + "n = [200, 1000]\n", + "H = [10, 20, 50]\n", + "strength = 1\n", + "discount = np.linspace(0.2, 0.9, 4)\n", + "\n", + "fig, axes = plt.subplots(nrows=len(n), ncols=len(H), figsize=(15, 10))\n", + "\n", + "for i, nval in enumerate(n):\n", + " for j, hval in enumerate(H):\n", + " for d in discount:\n", + " sb = StickBreakMixing(n_comp=hval, strength=strength, discount=d)\n", + " probs = sb.prior_cluster_distribution(grid, nval, mc_iter=10000)\n", + " axes[i][j].plot(grid, probs, label=\"discount={0:.2f}\".format(d))\n", + " axes[i][j].set_title(\"N: {0}, H: {1}\".format(nval, hval), fontsize=18)\n", + "\n", + "axes[1, 1].legend(ncol=2, fontsize=14, bbox_to_anchor=(1.2, -0.1))\n", + "plt.show()" + ] + } + ], + "metadata": { + "kernelspec": { + "display_name": "Python 3", + "language": "python", + "name": "python3" + }, + "language_info": { + "codemirror_mode": { + "name": "ipython", + "version": 3 + }, + "file_extension": ".py", + "mimetype": "text/x-python", + "name": "python", + "nbconvert_exporter": "python", + "pygments_lexer": "ipython3", + "version": "3.8.0" + } + }, + "nbformat": 4, + "nbformat_minor": 4 +} diff --git a/docs/examples/test.ipynb b/docs/examples/test.ipynb deleted file mode 100644 index ff35ff29..00000000 --- a/docs/examples/test.ipynb +++ /dev/null @@ -1,76 +0,0 @@ -{ - "cells": [ - { - "cell_type": "markdown", - "metadata": {}, - "source": [ - "# Test Stuff" - ] - }, - { - "cell_type": "code", - "execution_count": null, - "metadata": {}, - "outputs": [], - "source": [ - "import numpy as np\n", - "import matplotlib.pyplot as plt" - ] - }, - { - "cell_type": "code", - "execution_count": null, - "metadata": {}, - "outputs": [], - "source": [ - "plt.plot(np.random.normal(size=100))" - ] - }, - { - "cell_type": "code", - "execution_count": null, - "metadata": {}, - "outputs": [], - "source": [ - "import pybmix" - ] - }, - { - "cell_type": "code", - "execution_count": null, - "metadata": {}, - "outputs": [], - "source": [ - "from pybmix.core.mixing import DirichletProcessMixing" - ] - }, - { - "cell_type": "code", - "execution_count": null, - "metadata": {}, - "outputs": [], - "source": [] - } - ], - "metadata": { - "kernelspec": { - "display_name": "Python 3", - "language": "python", - "name": "python3" - }, - "language_info": { - "codemirror_mode": { - "name": "ipython", - "version": 3 - }, - "file_extension": ".py", - "mimetype": "text/x-python", - "name": "python", - "nbconvert_exporter": "python", - "pygments_lexer": "ipython3", - "version": "3.8.0" - } - }, - "nbformat": 4, - "nbformat_minor": 4 -} diff --git a/docs/index.rst b/docs/index.rst index c7179f49..e042170b 100644 --- a/docs/index.rst +++ b/docs/index.rst @@ -14,7 +14,9 @@ Welcome to pybmix's documentation! :maxdepth: 1 :caption: Tutorials: - examples/test.ipynb + examples/estimate_univ_density.ipynb + examples/clustering_univ_data.ipynb + examples/prior_elicitation.ipynb Indices and tables ================== From fce72dc644b082d250f7049da883f3475c83c914 Mon Sep 17 00:00:00 2001 From: mberaha Date: Wed, 2 Jun 2021 12:53:25 +0200 Subject: [PATCH 50/57] changed latex --- docs/examples/clustering_univ_data.ipynb | 56 +++++++++++++-------- docs/examples/estimate_univ_density.ipynb | 59 +++++++++++++++-------- 2 files changed, 77 insertions(+), 38 deletions(-) diff --git a/docs/examples/clustering_univ_data.ipynb b/docs/examples/clustering_univ_data.ipynb index 503a473f..90a669d7 100644 --- a/docs/examples/clustering_univ_data.ipynb +++ b/docs/examples/clustering_univ_data.ipynb @@ -38,25 +38,43 @@ "\n", "Recall that $\\tilde p \\sim DP(\\alpha, G_0)$ means that $\\tilde p = \\sum_{h=1}^\\infty w_h \\delta_{\\tau_h}$ with $\\{w_h\\}_h \\sim GEM(\\alpha)$ and $\\{\\tau_h\\}_h \\sim G_0$. Hence, realizations from a DP are almost surely discrete probability measures.\n", "\n", - "Hence, sampling \n", - "$$\n", - "\\theta_1, \\ldots, \\theta_n | \\tilde{p} \\sim p\n", - "$$\n", - "$$\n", - "\\tilde{p} \\sim DP(\\alpha, G_0)\n", - "$$\n", - "entails that with positive probability $\\theta_i = \\theta_j$ (with $i \\neq j$). In a sample of size $n$ there will be $k \\geq n$ unique values $\\theta^*_1, \\ldots, \\theta^*_k$ among the $\\theta_i$'s and clusters are defined as $C_j = \\{i : \\theta_i = \\theta^*_j \\}.\n", - "\n", - "When considering a mixture model, the $\\theta_i$'s are not observations but latent variables. In the case of a univariate normal mizture models, $\\theta_i = (\\mu_i, \\sigma^2_i)$ and the model can be written as\n", - "$$\n", - " y_i | \\theta_i = (\\mu_i, \\sigma^2_i) \\sim \\mathcal N(\\mu_i, \\sigma^2_i)\n", - "$$\n", - "$$\n", - " \\theta_1, \\ldots, \\theta_n | \\tilde{p} \\sim \\tilde{p}\n", - "$$\n", - "$$\n", - " \\tilde{p} \\sim DP(\\alpha, G_0)\n", - "$$\n", + "Hence, sampling " + ] + }, + { + "cell_type": "markdown", + "metadata": {}, + "source": [ + "\\begin{equation}\n", + "\\begin{aligned}\n", + "\\theta_1, \\ldots, \\theta_n | \\tilde{p} & \\sim \\tilde{p} \\\\\n", + "\\tilde{p} &\\sim DP(\\alpha, G_0)\n", + "\\end{aligned}\n", + "\\end{equation}" + ] + }, + { + "cell_type": "markdown", + "metadata": {}, + "source": [ + "\n", + "entails that with positive probability $\\theta_i = \\theta_j$ (with $i \\neq j$). In a sample of size $n$ there will be $k \\geq n$ unique values $\\theta^*_1, \\ldots, \\theta^*_k$ among the $\\theta_i$'s and clusters are defined as $C_j = \\{i : \\theta_i = \\theta^*_j \\}$.\n", + "\n", + "When considering a mixture model, the $\\theta_i$'s are not observations but latent variables. In the case of a univariate normal mizture models, $\\theta_i = (\\mu_i, \\sigma^2_i)$ and the model can be written as" + ] + }, + { + "cell_type": "markdown", + "metadata": {}, + "source": [ + "\\begin{equation}\n", + "\\begin{aligned}\n", + " y_i | \\theta_i = (\\mu_i, \\sigma^2_i) &\\sim \\mathcal N(\\mu_i, \\sigma^2_i) \\\\\n", + " \\theta_1, \\ldots, \\theta_n | \\tilde{p} &\\sim \\tilde{p} \\\\\n", + " \\tilde{p} &\\sim DP(\\alpha, G_0)\n", + "\\end{aligned}\n", + "\\end{equation}\n", + "\n", "and the clustering among the observations $y_i$'s is inherited by the clustering among the $\\theta_i$'s." ] }, diff --git a/docs/examples/estimate_univ_density.ipynb b/docs/examples/estimate_univ_density.ipynb index 4d25d122..103b5332 100644 --- a/docs/examples/estimate_univ_density.ipynb +++ b/docs/examples/estimate_univ_density.ipynb @@ -65,26 +65,47 @@ "source": [ "## The statistical model\n", "\n", - "We assume the following model\n", - "$$\n", - "y_i | \\tilde{p} \\sim f(\\cdot) = \\int_{R \\times R^+} \\mathcal{N}(\\cdot | \\mu, \\sigma^2) \\tilde{p}(d\\mu, d\\sigma^2)\n", - "$$\n", - "$$\n", - "\\tilde{p} \\sim DP(\\alpha, G_0)\n", - "$$\n", + "We assume the following model" + ] + }, + { + "cell_type": "markdown", + "metadata": {}, + "source": [ + "\\begin{equation}\n", + "\\begin{aligned}\n", + "y_i | \\tilde{p} &\\sim f(\\cdot) = \\int_{R \\times R^+} \\mathcal{N}(\\cdot | \\mu, \\sigma^2) \\tilde{p}(d\\mu, d\\sigma^2) \\\\\n", + "\\tilde{p} &\\sim DP(\\alpha, G_0)\n", + "\\end{aligned}\n", + "\\end{equation}" + ] + }, + { + "cell_type": "markdown", + "metadata": {}, + "source": [ "where $DP(\\alpha, G_0)$ is the Dirichlet Process with base measure $\\alpha G_0$. \n", "\n", - "Given the stick-breaking represetation of the Dirichlet Process, the model is equivalently written as\n", - "$$\n", - "y_i | \\{w_h\\}_h \\{(\\mu_h, \\sigma^2_h)\\}_h \\sim f(\\cdot) = \\sum_{h=1}^\\infty w_h \\mathcal{N}(\\cdot | \\mu_h, \\sigma_h^2)\n", - "$$\n", - "$$\n", - "\\{w_h\\}_h \\sim GEM(\\alpha)\n", - "$$\n", - "$$\n", - " \\{(\\mu_h, \\sigma^2_h)\\}_h \\sim G_0\n", - "$$\n", - "\n", + "Given the stick-breaking represetation of the Dirichlet Process, the model is equivalently written as" + ] + }, + { + "cell_type": "markdown", + "metadata": {}, + "source": [ + "\\begin{equation}\n", + "\\begin{aligned}\n", + "y_i | \\{w_h\\}_h \\{(\\mu_h, \\sigma^2_h)\\}_h & \\sim f(\\cdot) = \\sum_{h=1}^\\infty w_h \\mathcal{N}(\\cdot | \\mu_h, \\sigma_h^2) \\\\\n", + "\\{w_h\\}_h &\\sim GEM(\\alpha) \\\\\n", + " \\{(\\mu_h, \\sigma^2_h)\\}_h &\\sim G_0 \\\\\n", + "\\end{aligned}\n", + "\\end{equation}" + ] + }, + { + "cell_type": "markdown", + "metadata": {}, + "source": [ "In pybmix we take advantage of the second representation, and specify a MixtureModel in terms of a Mixing and a Hierarchy. The Mixing is the prior for the weights, while the Hierarchy combines the base measure $G_0$ with the kernel of the mixture (in this case, the univariate Gaussian distribution)" ] }, @@ -92,7 +113,7 @@ "cell_type": "markdown", "metadata": {}, "source": [ - "Here, we assume that $alpha = 5$ and $G_0(d\\mu, d\\sigma^2) = \\mathcal N(d\\mu | \\mu_0, \\lambda \\sigma^2) \\times IG(d\\sigma^2 | a, b)$, i.e., $G_0$ is a normal-inverse gamma distribution. \n", + "Here, we assume that $\\alpha = 5$ and $G_0(d\\mu, d\\sigma^2) = \\mathcal N(d\\mu | \\mu_0, \\lambda \\sigma^2) \\times IG(d\\sigma^2 | a, b)$, i.e., $G_0$ is a normal-inverse gamma distribution. \n", "\n", "The parameters $(\\mu_0, \\lambda, a , b)$ of $G_0$ can be set automatically by the method 'make_default_fixed_params' which takes as input the observations and a \"guess\" on the number of clusters" ] From 9d49fed191d0a09c2e254d0cb5fde37d8f98b250 Mon Sep 17 00:00:00 2001 From: mberaha Date: Fri, 4 Jun 2021 18:42:20 +0200 Subject: [PATCH 51/57] cleanup --- CMakeLists.txt | 8 ++++++-- build_tbb.py | 8 +------- setup.py | 6 ------ 3 files changed, 7 insertions(+), 15 deletions(-) diff --git a/CMakeLists.txt b/CMakeLists.txt index c2df0ef0..98a2d648 100644 --- a/CMakeLists.txt +++ b/CMakeLists.txt @@ -47,12 +47,16 @@ endforeach() set(PYBMIX_LINK_LIBRARIES pthread protobuf::libprotobuf - ${CMAKE_CURRENT_LIST_DIR}/pybmix/core/pybmixcpp/bayesmix/lib/math/lib/tbb/libtbb.so.2 + tbb +) + +SET(PYBMIX_LINK_OPTIONS + "-L${CMAKE_CURRENT_LIST_DIR}/pybmix/core/pybmixcpp/bayesmix/lib/math/lib/tbb/" ) target_include_directories(pybmixcpp PUBLIC ${BAYESMIX_INCLUDE_PATHS}) target_compile_definitions(pybmixcpp PRIVATE VERSION_INFO=${EXAMPLE_VERSION_INFO}) -target_link_libraries(pybmixcpp PUBLIC bayesmixlib ${PYBMIX_LINK_LIBRARIES}) +target_link_libraries(pybmixcpp PUBLIC bayesmixlib ${PYBMIX_LINK_OPTIONS} ${PYBMIX_LINK_LIBRARIES}) target_compile_options(pybmixcpp PUBLIC ${BAYESMIX_COMPILE_OPTIONS}) add_custom_target(genterate_protos ALL DEPENDS ${PROTO_PYS}) diff --git a/build_tbb.py b/build_tbb.py index bcd53223..c7d5a811 100644 --- a/build_tbb.py +++ b/build_tbb.py @@ -10,10 +10,8 @@ def maybe_build_tbb(): """Build tbb. This function is taken from https://github.com/stan-dev/pystan/blob/develop/setup.py""" - print("******* BUILD TBB ***********") - stan_math_lib = os.path.abspath(os.path.join(os.path.dirname( - __file__), 'pybmix', 'core', 'pybmixcpp', 'bayesmix' 'lib', 'math', 'lib')) + __file__), 'pybmix', 'core', 'pybmixcpp', 'bayesmix', 'lib', 'math', 'lib')) tbb_dir = os.path.join(stan_math_lib, 'tbb') tbb_dir = os.path.abspath(tbb_dir) @@ -25,10 +23,6 @@ def maybe_build_tbb(): tbb_root = os.path.join(stan_math_lib, 'tbb_2019_U8').replace("\\", "/") - import glob - print("******** LET'S SEE WHAT'S INSIDE ********") - print("\n".join(glob.glob(tbb_root + "/*"))) - cmd.extend(['-C', tbb_root]) cmd.append('tbb_build_dir={}'.format(stan_math_lib)) cmd.append('tbb_build_prefix=tbb') diff --git a/setup.py b/setup.py index 35f754a7..aba3d564 100644 --- a/setup.py +++ b/setup.py @@ -8,13 +8,7 @@ from distutils.command.build_py import build_py as _build_py from distutils.command.clean import clean as _clean from distutils.spawn import find_executable -from setuptools.command.develop import develop as _develop -from setuptools.command.egg_info import egg_info as _egg_info -from distutils.command.install import install as _install -sys.path.insert(0, os.path.join(os.path.abspath('.'), - "pybmix/core/pybmixcpp/bayesmix")) -print(sys.path) from build_tbb import maybe_build_tbb HERE = os.path.abspath('.') From a96948aa83804d8dd30245256bff33f6365627b5 Mon Sep 17 00:00:00 2001 From: mberaha Date: Fri, 4 Jun 2021 18:45:22 +0200 Subject: [PATCH 52/57] changed bayesmix to master --- pybmix/core/pybmixcpp/bayesmix | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/pybmix/core/pybmixcpp/bayesmix b/pybmix/core/pybmixcpp/bayesmix index ad01d517..98e8e624 160000 --- a/pybmix/core/pybmixcpp/bayesmix +++ b/pybmix/core/pybmixcpp/bayesmix @@ -1 +1 @@ -Subproject commit ad01d51737aaa0f911d3faaab2cf363751e545d5 +Subproject commit 98e8e6244183c6d04c8f8667640918712589603e From 80ff0d6ae863eca7df806343e02922a6e8e3779b Mon Sep 17 00:00:00 2001 From: mberaha Date: Fri, 4 Jun 2021 19:03:06 +0200 Subject: [PATCH 53/57] updated readme --- README.md | 18 ++++++------------ 1 file changed, 6 insertions(+), 12 deletions(-) diff --git a/README.md b/README.md index b41e1b6b..37f7436b 100644 --- a/README.md +++ b/README.md @@ -1,10 +1,6 @@ -# Bayesian Mixture Models in Python +# pybmix: Bayesian Mixture Models in Python - -## Prerequisites - -- Protocol buffers -- cmake +A Python interface to [bayesmix](https://github.com/bayesmix-dev/bayesmix/) ## Installation @@ -19,12 +15,10 @@ pip3 install -e . ``` from the root folder of this repo. +## Getting Started -# Structure - -This repo contains two main directories: `pybmix` and `pybmixcpp`. +See our [tutorials](https://pybmix.readthedocs.io/en/docs/)! -`pybmixcpp/` contains a copy of the C++ library `bayesmix` and the code for -a small python package that is the raw interface between `bayesmix` and Python. +## Contributing -`pybmix/` contains the Python package. +Just get in touch with us (email in setup.cfg), open a PR or an Issue! From 36c82cfece3e965548c2d18323125a2221af07ce Mon Sep 17 00:00:00 2001 From: mberaha Date: Fri, 4 Jun 2021 19:03:53 +0200 Subject: [PATCH 54/57] added config files --- pyproject.toml | 6 ++++++ setup.cfg | 23 +++++++++++++++++++++++ 2 files changed, 29 insertions(+) create mode 100644 pyproject.toml create mode 100644 setup.cfg diff --git a/pyproject.toml b/pyproject.toml new file mode 100644 index 00000000..b5a3c468 --- /dev/null +++ b/pyproject.toml @@ -0,0 +1,6 @@ +[build-system] +requires = [ + "setuptools>=42", + "wheel" +] +build-backend = "setuptools.build_meta" \ No newline at end of file diff --git a/setup.cfg b/setup.cfg new file mode 100644 index 00000000..9c24f08c --- /dev/null +++ b/setup.cfg @@ -0,0 +1,23 @@ + +[metadata] +name = pybmix +version = 0.0.1 +license = BSD 3-Clause +license_files = LICENSE +author = Mario Beraha +author_email = berahamario@gmail.com +description = pybmix: a Python interface to bayesmix. +keywords = mixture models, bayesian, nonparametrics, clustering, density +long_description = file: README.md +long_description_content_type = text/markdown +url = https://github.com/bayesmix-dev/pybmix +classifiers = + Programming Language :: Python :: 3 + +[options] +packages = find: +python_requires = >=3.6 + +[options.packages.find] +exclude = + tests, docs, images \ No newline at end of file From e880b6aeafd413ba9c275692e6082a80f13d670a Mon Sep 17 00:00:00 2001 From: Mario beraha Date: Thu, 2 Dec 2021 09:33:42 +0100 Subject: [PATCH 55/57] building wheel --- pyproject.toml | 4 +--- 1 file changed, 1 insertion(+), 3 deletions(-) diff --git a/pyproject.toml b/pyproject.toml index fbc84c21..76b3fb21 100644 --- a/pyproject.toml +++ b/pyproject.toml @@ -6,6 +6,4 @@ requires = [ "ninja>=1.10.0", "2to3" ] -build-backend = "setuptools.build_meta" - "wheel" -] +build-backend = "wheel" From a7f9395781a1786ac3cccefa6a59dbd040a05f96 Mon Sep 17 00:00:00 2001 From: Mario beraha Date: Thu, 2 Dec 2021 09:37:24 +0100 Subject: [PATCH 56/57] build_beta --- pyproject.toml | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/pyproject.toml b/pyproject.toml index 76b3fb21..d86b9a99 100644 --- a/pyproject.toml +++ b/pyproject.toml @@ -6,4 +6,4 @@ requires = [ "ninja>=1.10.0", "2to3" ] -build-backend = "wheel" +build-backend = "setuptools.build_meta" From 28e05042188c305b54fad08c2c5e82d65ff61787 Mon Sep 17 00:00:00 2001 From: Mario beraha Date: Thu, 2 Dec 2021 09:43:30 +0100 Subject: [PATCH 57/57] idk --- setup.py | 5 ----- 1 file changed, 5 deletions(-) diff --git a/setup.py b/setup.py index 5d5b1a12..1928c70c 100644 --- a/setup.py +++ b/setup.py @@ -11,11 +11,6 @@ from distutils.command.clean import clean as _clean from distutils.spawn import find_executable -<<<<<<< HEAD -======= -from build_tbb import maybe_build_tbb - ->>>>>>> 36c82cfece3e965548c2d18323125a2221af07ce HERE = os.path.abspath('.') PYBMIXCPP_PATH = os.path.join(HERE, "pybmix", "core", "pybmixcpp") BAYEXMIX_PATH = os.path.join(PYBMIXCPP_PATH , "bayesmix")