.. code:: python
from mxnet import np
from mxnet.metric import NegativeLogLikelihood
from mxnet.ndarray import nansum
import random
def self_information(p):
return -np.log2(p)
self_information(1 / 64)
.. parsed-literal::
:class: output
6.0
.. raw:: html

.. raw:: html
.. code:: python
import torch
from torch.nn import NLLLoss
def nansum(x):
# Define nansum, as pytorch doesn't offer it inbuilt.
return x[~torch.isnan(x)].sum()
def self_information(p):
return -torch.log2(torch.tensor(p)).item()
self_information(1 / 64)
.. parsed-literal::
:class: output
6.0
.. raw:: html

.. raw:: html
.. code:: python
import tensorflow as tf
def log2(x):
return tf.math.log(x) / tf.math.log(2.)
def nansum(x):
return tf.reduce_sum(tf.where(tf.math.is_nan(
x), tf.zeros_like(x), x), axis=-1)
def self_information(p):
return -log2(tf.constant(p)).numpy()
self_information(1 / 64)
.. parsed-literal::
:class: output
6.0
.. raw:: html

.. raw:: html
.. code:: python
def entropy(p):
entropy = - p * np.log2(p)
# Operator nansum will sum up the non-nan number
out = nansum(entropy.as_nd_ndarray())
return out
entropy(np.array([0.1, 0.5, 0.1, 0.3]))
.. parsed-literal::
:class: output
[1.6854753]
.. raw:: html

.. raw:: html
.. code:: python
def entropy(p):
entropy = - p * torch.log2(p)
# Operator nansum will sum up the non-nan number
out = nansum(entropy)
return out
entropy(torch.tensor([0.1, 0.5, 0.1, 0.3]))
.. parsed-literal::
:class: output
tensor(1.6855)
.. raw:: html

.. raw:: html
.. code:: python
def entropy(p):
return nansum(- p * log2(p))
entropy(tf.constant([0.1, 0.5, 0.1, 0.3]))
.. parsed-literal::
:class: output
.. raw:: html

.. raw:: html
.. code:: python
def joint_entropy(p_xy):
joint_ent = -p_xy * np.log2(p_xy)
# Operator nansum will sum up the non-nan number
out = nansum(joint_ent.as_nd_ndarray())
return out
joint_entropy(np.array([[0.1, 0.5], [0.1, 0.3]]))
.. parsed-literal::
:class: output
[1.6854753]
.. raw:: html

.. raw:: html
.. code:: python
def joint_entropy(p_xy):
joint_ent = -p_xy * torch.log2(p_xy)
# nansum will sum up the non-nan number
out = nansum(joint_ent)
return out
joint_entropy(torch.tensor([[0.1, 0.5], [0.1, 0.3]]))
.. parsed-literal::
:class: output
tensor(1.6855)
.. raw:: html

.. raw:: html
.. code:: python
def joint_entropy(p_xy):
joint_ent = -p_xy * log2(p_xy)
# nansum will sum up the non-nan number
out = nansum(joint_ent)
return out
joint_entropy(tf.constant([[0.1, 0.5], [0.1, 0.3]]))
.. parsed-literal::
:class: output
.. raw:: html

.. raw:: html
.. code:: python
def conditional_entropy(p_xy, p_x):
p_y_given_x = p_xy/p_x
cond_ent = -p_xy * np.log2(p_y_given_x)
# Operator nansum will sum up the non-nan number
out = nansum(cond_ent.as_nd_ndarray())
return out
conditional_entropy(np.array([[0.1, 0.5], [0.2, 0.3]]), np.array([0.2, 0.8]))
.. parsed-literal::
:class: output
[0.8635472]
.. raw:: html

.. raw:: html
.. code:: python
def conditional_entropy(p_xy, p_x):
p_y_given_x = p_xy/p_x
cond_ent = -p_xy * torch.log2(p_y_given_x)
# nansum will sum up the non-nan number
out = nansum(cond_ent)
return out
conditional_entropy(torch.tensor([[0.1, 0.5], [0.2, 0.3]]),
torch.tensor([0.2, 0.8]))
.. parsed-literal::
:class: output
tensor(0.8635)
.. raw:: html

.. raw:: html
.. code:: python
def conditional_entropy(p_xy, p_x):
p_y_given_x = p_xy/p_x
cond_ent = -p_xy * log2(p_y_given_x)
# nansum will sum up the non-nan number
out = nansum(cond_ent)
return out
conditional_entropy(tf.constant([[0.1, 0.5], [0.2, 0.3]]),
tf.constant([0.2, 0.8]))
.. parsed-literal::
:class: output
.. raw:: html

.. raw:: html
.. code:: python
def mutual_information(p_xy, p_x, p_y):
p = p_xy / (p_x * p_y)
mutual = p_xy * np.log2(p)
# Operator nansum will sum up the non-nan number
out = nansum(mutual.as_nd_ndarray())
return out
mutual_information(np.array([[0.1, 0.5], [0.1, 0.3]]),
np.array([0.2, 0.8]), np.array([[0.75, 0.25]]))
.. parsed-literal::
:class: output
[0.71946025]
.. raw:: html

.. raw:: html
.. code:: python
def mutual_information(p_xy, p_x, p_y):
p = p_xy / (p_x * p_y)
mutual = p_xy * torch.log2(p)
# Operator nansum will sum up the non-nan number
out = nansum(mutual)
return out
mutual_information(torch.tensor([[0.1, 0.5], [0.1, 0.3]]),
torch.tensor([0.2, 0.8]), torch.tensor([[0.75, 0.25]]))
.. parsed-literal::
:class: output
tensor(0.7195)
.. raw:: html

.. raw:: html
.. code:: python
def mutual_information(p_xy, p_x, p_y):
p = p_xy / (p_x * p_y)
mutual = p_xy * log2(p)
# Operator nansum will sum up the non-nan number
out = nansum(mutual)
return out
mutual_information(tf.constant([[0.1, 0.5], [0.1, 0.3]]),
tf.constant([0.2, 0.8]), tf.constant([[0.75, 0.25]]))
.. parsed-literal::
:class: output
.. raw:: html

.. raw:: html
.. code:: python
def kl_divergence(p, q):
kl = p * np.log2(p / q)
out = nansum(kl.as_nd_ndarray())
return out.abs().asscalar()
.. raw:: html

.. raw:: html
.. code:: python
def kl_divergence(p, q):
kl = p * torch.log2(p / q)
out = nansum(kl)
return out.abs().item()
.. raw:: html

.. raw:: html
.. code:: python
def kl_divergence(p, q):
kl = p * log2(p / q)
out = nansum(kl)
return tf.abs(out).numpy()
.. raw:: html

.. raw:: html
.. code:: python
random.seed(1)
nd_len = 10000
p = np.random.normal(loc=0, scale=1, size=(nd_len, ))
q1 = np.random.normal(loc=-1, scale=1, size=(nd_len, ))
q2 = np.random.normal(loc=1, scale=1, size=(nd_len, ))
p = np.array(sorted(p.asnumpy()))
q1 = np.array(sorted(q1.asnumpy()))
q2 = np.array(sorted(q2.asnumpy()))
.. raw:: html

.. raw:: html
.. code:: python
torch.manual_seed(1)
tensor_len = 10000
p = torch.normal(0, 1, (tensor_len, ))
q1 = torch.normal(-1, 1, (tensor_len, ))
q2 = torch.normal(1, 1, (tensor_len, ))
p = torch.sort(p)[0]
q1 = torch.sort(q1)[0]
q2 = torch.sort(q2)[0]
.. raw:: html

.. raw:: html
.. code:: python
tensor_len = 10000
p = tf.random.normal((tensor_len, ), 0, 1)
q1 = tf.random.normal((tensor_len, ), -1, 1)
q2 = tf.random.normal((tensor_len, ), 1, 1)
p = tf.sort(p)
q1 = tf.sort(q1)
q2 = tf.sort(q2)
.. raw:: html

.. raw:: html
.. code:: python
kl_pq1 = kl_divergence(p, q1)
kl_pq2 = kl_divergence(p, q2)
similar_percentage = abs(kl_pq1 - kl_pq2) / ((kl_pq1 + kl_pq2) / 2) * 100
kl_pq1, kl_pq2, similar_percentage
.. parsed-literal::
:class: output
(8470.638, 8664.999, 2.268504302642314)
.. raw:: html

.. raw:: html
.. code:: python
kl_pq1 = kl_divergence(p, q1)
kl_pq2 = kl_divergence(p, q2)
similar_percentage = abs(kl_pq1 - kl_pq2) / ((kl_pq1 + kl_pq2) / 2) * 100
kl_pq1, kl_pq2, similar_percentage
.. parsed-literal::
:class: output
(8582.033203125, 8828.3115234375, 2.829103319669021)
.. raw:: html

.. raw:: html
.. code:: python
kl_pq1 = kl_divergence(p, q1)
kl_pq2 = kl_divergence(p, q2)
similar_percentage = abs(kl_pq1 - kl_pq2) / ((kl_pq1 + kl_pq2) / 2) * 100
kl_pq1, kl_pq2, similar_percentage
.. parsed-literal::
:class: output
(8642.627, 8359.184, 3.3342726481221496)
.. raw:: html

.. raw:: html
.. code:: python
kl_q2p = kl_divergence(q2, p)
differ_percentage = abs(kl_q2p - kl_pq2) / ((kl_q2p + kl_pq2) / 2) * 100
kl_q2p, differ_percentage
.. parsed-literal::
:class: output
(13536.835, 43.88678828000115)
.. raw:: html

.. raw:: html
.. code:: python
kl_q2p = kl_divergence(q2, p)
differ_percentage = abs(kl_q2p - kl_pq2) / ((kl_q2p + kl_pq2) / 2) * 100
kl_q2p, differ_percentage
.. parsed-literal::
:class: output
(14130.125, 46.18618930039122)
.. raw:: html

.. raw:: html
.. code:: python
kl_q2p = kl_divergence(q2, p)
differ_percentage = abs(kl_q2p - kl_pq2) / ((kl_q2p + kl_pq2) / 2) * 100
kl_q2p, differ_percentage
.. parsed-literal::
:class: output
(13414.008, 46.431633511465954)
.. raw:: html

.. raw:: html
.. code:: python
def cross_entropy(y_hat, y):
ce = -np.log(y_hat[range(len(y_hat)), y])
return ce.mean()
.. raw:: html

.. raw:: html
.. code:: python
def cross_entropy(y_hat, y):
ce = -torch.log(y_hat[range(len(y_hat)), y])
return ce.mean()
.. raw:: html

.. raw:: html
.. code:: python
def cross_entropy(y_hat, y):
ce = -tf.math.log(y_hat[:, :len(y)])
return tf.reduce_mean(ce)
.. raw:: html

.. raw:: html
.. code:: python
labels = np.array([0, 2])
preds = np.array([[0.3, 0.6, 0.1], [0.2, 0.3, 0.5]])
cross_entropy(preds, labels)
.. parsed-literal::
:class: output
array(0.94856)
.. raw:: html

.. raw:: html
.. code:: python
labels = torch.tensor([0, 2])
preds = torch.tensor([[0.3, 0.6, 0.1], [0.2, 0.3, 0.5]])
cross_entropy(preds, labels)
.. parsed-literal::
:class: output
tensor(0.9486)
.. raw:: html

.. raw:: html
.. code:: python
labels = tf.constant([0, 2])
preds = tf.constant([[0.3, 0.6, 0.1], [0.2, 0.3, 0.5]])
cross_entropy(preds, labels)
.. parsed-literal::
:class: output
.. raw:: html

.. raw:: html
.. code:: python
nll_loss = NegativeLogLikelihood()
nll_loss.update(labels.as_nd_ndarray(), preds.as_nd_ndarray())
nll_loss.get()
.. parsed-literal::
:class: output
('nll-loss', 0.9485599994659424)
.. raw:: html

.. raw:: html
.. code:: python
# Implementation of CrossEntropy loss in pytorch combines nn.LogSoftmax() and
# nn.NLLLoss()
nll_loss = NLLLoss()
loss = nll_loss(torch.log(preds), labels)
loss
.. parsed-literal::
:class: output
tensor(0.9486)
.. raw:: html

.. raw:: html
.. code:: python
def nll_loss(y_hat, y):
# Convert labels to binary class matrix.
y = tf.keras.utils.to_categorical(y, num_classes=3)
# Since tf.keras.losses.binary_crossentropy returns the mean
# over the last axis, we calculate the sum here.
return tf.reduce_sum(tf.keras.losses.binary_crossentropy(y, y_hat, from_logits=True))
loss = nll_loss(tf.math.log(preds), labels)
loss
.. parsed-literal::
:class: output
.. raw:: html

.. raw:: html
`Discussions `__
.. raw:: html

.. raw:: html
`Discussions `__
.. raw:: html

.. raw:: html
`Discussions `__
.. raw:: html

.. raw:: html