Speaker Recognition Loss

Sept. 17, 2020, 4:25 p.m.

AM-Softmax Loss

import tensorflow as tf
import math
import torch
import torch.nn as nn
from torch import Tensor
import numpy as np

# weight = np.random.randn(1991, 512).astype(np.float32)
# np.save('weight.npy', weight)
# weight = np.load('weight.npy')

class AMSoftmaxLossTF(tf.keras.losses.Loss):
    def __init__(self, nOut: int, nClasses: int, m: float = 0.3, s: float = 15, name="AMSoftmaxLoss") -> None:
        self.embedding_size = nOut
        self.num_classes = nClasses
        self.m = m
        self.s = s
        initializer = tf.initializers.GlorotNormal()
        self.weight = tf.Variable(initializer(
                             shape=[nOut, nClasses], dtype=tf.float32),
        # self.weight = tf.Variable(weight.T, name="AAMSoftmaxLoss_weight")
        self.criterion = tf.nn.softmax_cross_entropy_with_logits

    def __call__(self, input: Tensor, label: Tensor, logit_length=None) -> Tensor:
        # labels = tf.squeeze(samples['output'])
        outputs_norm = tf.math.l2_normalize(input, axis=1)
        weight_norm = tf.math.l2_normalize(self.weight, axis=0)
        costh = tf.matmul(outputs_norm, weight_norm)

        label_onehot = tf.one_hot(label, self.num_classes)
        delt_costh = self.m * label_onehot

        costh_m = costh - delt_costh
        costh_m_s = self.s * costh_m
        loss = tf.reduce_mean(self.criterion(label_onehot, costh_m_s))
        return loss

class AMSoftmaxLossTorch(nn.Module):
    def __init__(self, nOut: int, nClasses: int, m: float = 0.3, s: float = 15) -> None:
        super(AMSoftmaxLossTorch, self).__init__()
        self.test_normalize = True

        self.m = m
        self.s = s
        self.in_feats = nOut
        self.W = torch.nn.Parameter(torch.randn(nOut, nClasses), requires_grad=True)
        # self.W = torch.nn.Parameter(torch.tensor(weight.T), requires_grad=True)
        self.ce = nn.CrossEntropyLoss()
        nn.init.xavier_normal_(self.W, gain=1)
        print('Initialised AMSoftmax m=%.3f s=%.3f' % (self.m, self.s))

    def forward(self, input: Tensor, label: Tensor) -> Tensor:

        assert input.size()[0] == label.size()[0]
        assert input.size()[1] == self.in_feats

        x_norm = torch.norm(input, p=2, dim=1, keepdim=True).clamp(min=1e-12)
        x_norm = torch.div(input, x_norm)
        w_norm = torch.norm(self.W, p=2, dim=0, keepdim=True).clamp(min=1e-12)
        w_norm = torch.div(self.W, w_norm)
        costh = torch.mm(x_norm, w_norm)
        label_view = label.view(-1, 1)
        if label_view.is_cuda: label_view = label_view.cpu()
        delt_costh = torch.zeros(costh.size()).scatter_(1, label_view, self.m)
        if input.is_cuda: delt_costh = delt_costh.cuda()
        costh_m = costh - delt_costh
        costh_m_s = self.s * costh_m
        loss = self.ce(costh_m_s, label)
        return loss

if __name__ == '__main__':
    input = np.random.randn(10, 512)
    # np.save('input.npy', input)
    # input = np.load('input.npy')

    input_torch = torch.from_numpy(input).float()
    input_tf = tf.convert_to_tensor(value=input, dtype=np.float32)

    label = np.array([99, 11, 22, 33, 44, 88, 47, 478, 982, 1990]).astype(np.int32)
    label_torch = torch.from_numpy(label).long()
    label_tf = tf.convert_to_tensor(value=label, dtype=np.int32)

    am_loss_1 = AMSoftmaxLossTF(nOut=512, nClasses=1991)
    am_loss_2 = AMSoftmaxLossTorch(nOut=512, nClasses=1991)

    loss_1 = am_loss_1(input_tf, label_tf)
    loss_2 = am_loss_2(input_torch, label_torch)


AAM-Softmax Loss

import tensorflow as tf
import math
import torch
import torch.nn as nn
from torch import Tensor
import torch.nn.functional as F
import numpy as np

# weight = np.random.randn(1991, 512).astype(np.float32)
# np.save('weight.npy', weight)
# weight = np.load('weight.npy')

class AAMSoftmaxLossTF(tf.keras.losses.Loss):
    def __init__(self, nOut: int, nClasses: int,
                 m: float = 0.3, s: float = 15,
                 easy_margin: bool = False, name: str = "AAMSoftmaxLoss") -> None:
        self.embedding_size = nOut
        self.num_classes = nClasses
        self.m = m
        self.s = s
        initializer = tf.initializers.GlorotNormal()
        self.weight = tf.Variable(initializer(
                             shape=[nOut, nClasses], dtype=tf.float32),

        # self.weight = tf.Variable(weight.T, name="AAMSoftmaxLoss_weight")

        self.criterion = tf.nn.softmax_cross_entropy_with_logits
        self.easy_margin = easy_margin
        self.cos_m = math.cos(self.m)
        self.sin_m = math.sin(self.m)

        # make the function cos(theta+m) monotonic decreasing while theta in [0°,180°]
        self.th = math.cos(math.pi - self.m)
        self.mm = math.sin(math.pi - self.m) * self.m

    def __call__(self, input: Tensor, label: Tensor) -> Tensor:
        # labels = tf.squeeze(samples['output'])
        outputs_norm = tf.math.l2_normalize(input, axis=1)
        weight_norm = tf.math.l2_normalize(self.weight, axis=0)
        cosine = tf.matmul(outputs_norm, weight_norm)
        sine = tf.clip_by_value(tf.math.sqrt(1.0 - tf.math.pow(cosine, 2)), 0, 1)
        phi = cosine * self.cos_m - sine * self.sin_m

        if self.easy_margin:
            phi = tf.where(cosine > 0, phi, cosine)
            phi = tf.where((cosine - self.th) > 0, phi, cosine - self.mm)

        label_onehot = tf.one_hot(label, self.num_classes)
        output = (label_onehot * phi) + ((1.0 - label_onehot) * cosine)
        output = output * self.s
        loss = tf.reduce_mean(self.criterion(label_onehot, output))
        return loss

class AAMSoftmaxLossTorch(nn.Module):
    def __init__(self, nOut: int, nClasses: int, m: float = 0.3, s: float = 15,
                 easy_margin: bool = False) -> None:
        super(AAMSoftmaxLossTorch, self).__init__()
        self.test_normalize = True

        self.m = m
        self.s = s
        self.in_feats = nOut
        self.weight = torch.nn.Parameter(torch.FloatTensor(nClasses, nOut), requires_grad=True)
        # self.weight = torch.nn.Parameter(torch.tensor(weight), requires_grad=True)

        self.ce = nn.CrossEntropyLoss()
        nn.init.xavier_normal_(self.weight, gain=1)

        self.easy_margin = easy_margin
        self.cos_m = math.cos(self.m)
        self.sin_m = math.sin(self.m)

        # make the function cos(theta+m) monotonic decreasing while theta in [0°,180°]
        self.th = math.cos(math.pi - self.m)
        self.mm = math.sin(math.pi - self.m) * self.m

        print('Initialised AAMSoftmax margin %.3f scale %.3f' % (self.m, self.s))

    def forward(self, input: Tensor, label: Tensor) -> Tensor:

        assert input.size()[0] == label.size()[0]
        assert input.size()[1] == self.in_feats

        # cos(theta)
        cosine = F.linear(F.normalize(input), F.normalize(self.weight))
        # cos(theta + m)
        sine = torch.sqrt((1.0 - torch.pow(cosine, 2)).clamp(0, 1))
        phi = cosine * self.cos_m - sine * self.sin_m

        if self.easy_margin:
            phi = torch.where(cosine > 0, phi, cosine)
            phi = torch.where((cosine - self.th) > 0, phi, cosine - self.mm)

        # one_hot = torch.zeros(cosine.size(), device='cuda' if torch.cuda.is_available() else 'cpu')
        one_hot = torch.zeros_like(cosine)
        one_hot.scatter_(1, label.view(-1, 1), 1)
        output = (one_hot * phi) + ((1.0 - one_hot) * cosine)
        output = output * self.s

        loss = self.ce(output, label)
        return loss

if __name__ == '__main__':
    input = np.random.randn(10, 512)
    # np.save('input.npy', input)
    # input = np.load('input.npy')

    input_torch = torch.from_numpy(input).float()
    input_tf = tf.convert_to_tensor(value=input, dtype=np.float32)

    label = np.array([99, 11, 22, 33, 44, 88, 47, 478, 982, 1990]).astype(np.int32)
    label_torch = torch.from_numpy(label).long()
    label_tf = tf.convert_to_tensor(value=label, dtype=np.int32)

    am_loss_1 = AAMSoftmaxLossTF(nOut=512, nClasses=1991)
    am_loss_2 = AAMSoftmaxLossTorch(nOut=512, nClasses=1991)

    loss_1 = am_loss_1(input_tf, label_tf)
    loss_2 = am_loss_2(input_torch, label_torch)


Circle Loss

import torch
import torch.nn as nn
from torch import Tensor
from torch.nn import Parameter
import torch.nn.functional as F

class CosineLinearLayer(nn.Module):
    def __init__(self, in_features: int, out_features: int) -> None:
        super(CosineLinearLayer, self).__init__()

        self.in_features = in_features
        self.out_features = out_features
        self.weight = Parameter(torch.Tensor(in_features, out_features))
        self.weight.data.uniform_(-1, 1).renorm_(2, 1, 1e-5).mul_(1e5)

    def forward(self, input: Tensor) -> Tensor:
        x = input  # F
        w = self.weight  # W
        ww = w.renorm(2, 1, 1e-5).mul(1e5)  # weights normed
        xlen = x.pow(2).sum(1).pow(0.5)  # size=B
        wlen = ww.pow(2).sum(0).pow(0.5)  # size=Classnum

        cos_theta = x.mm(ww)  # size=(B,Classnum)  x.dot(ww) FW/ x_len * w_len
        cos_theta = cos_theta / xlen.view(-1, 1) / wlen.view(1, -1)  #
        cos_theta = cos_theta.clamp(-1.0, 1.0)
        cos_theta = cos_theta * xlen.view(-1, 1)
        return cos_theta

class CircleCore(nn.Module):
    def __init__(self, m: float = 0.35, s: float = 256) -> None:
        super(CircleCore, self).__init__()
        self.s, self.m = s, m
        self.criteria = nn.CrossEntropyLoss()

    def forward(self, input: Tensor, label: Tensor) -> Tensor:
        cosine = input
        alpha_p = F.relu(1 + self.m - cosine).detach()
        margin_p = 1 - self.m
        alpha_n = F.relu(cosine + self.m).detach()
        margin_n = self.m

        sp_y = alpha_p * (cosine - margin_p)
        sp_j = alpha_n * (cosine - margin_n)

        one_hot = torch.zeros(cosine.size()).to(label.device)
        one_hot.scatter_(1, label.view(-1, 1).long(), 1)
        output = one_hot * sp_y + ((1.0 - one_hot) * sp_j)
        output *= self.s
        loss = self.criteria(output, label)
        return loss

class CircleLossV1(nn.Module):
    def __init__(self, nOut: int, nClasses: int, m: float = 0.35, s: float = 256) -> None:
        super(CircleLossV1, self).__init__()
        self.classifier_linear = CosineLinearLayer(in_features=nOut, out_features=nClasses)
        self.circle_core = CircleCore(m=m, s=s)

    def forward(self, input: Tensor, label: Tensor) -> Tensor:
        logits = self.classifier_linear(input)
        loss = self.circle_core(logits, label)
        return loss

class CircleLossV2(nn.Module):
    def __init__(self, nOut: int, nClasses: int, m: float = 0.35, s: float = 256, use_cuda: bool = False) -> None:
        super(CircleLossV2, self).__init__()
        self.margin = m
        self.gamma = s
        self.soft_plus = nn.Softplus()
        self.class_num = nClasses
        self.emdsize = nOut

        self.weight = nn.Parameter(torch.FloatTensor(self.class_num, self.emdsize))
        self.use_cuda = use_cuda

    def forward(self, input: Tensor, label: Tensor) -> Tensor:
        similarity_matrix = nn.functional.linear(nn.functional.normalize(input, p=2, dim=1, eps=1e-12),
                                                 nn.functional.normalize(self.weight, p=2, dim=1, eps=1e-12))
        # similarity_matrix = input
        if self.use_cuda:
            one_hot = torch.zeros(similarity_matrix.size(), device='cuda')
            one_hot = torch.zeros(similarity_matrix.size())
        one_hot.scatter_(1, label.view(-1, 1).long(), 1)
        one_hot = one_hot.type(dtype=torch.bool)
        # sp = torch.gather(similarity_matrix, dim=1, index=label.unsqueeze(1))
        sp = similarity_matrix[one_hot]
        mask = one_hot.logical_not()
        sn = similarity_matrix[mask]

        sp = sp.view(input.size()[0], -1)
        sn = sn.view(input.size()[0], -1)

        ap = torch.clamp_min(-sp.detach() + 1 + self.margin, min=0.)
        an = torch.clamp_min(sn.detach() + self.margin, min=0.)

        delta_p = 1 - self.margin
        delta_n = self.margin

        logit_p = - ap * (sp - delta_p) * self.gamma
        logit_n = an * (sn - delta_n) * self.gamma

        loss = self.soft_plus(torch.logsumexp(logit_n, dim=1) + torch.logsumexp(logit_p, dim=1))

        return loss.mean()

if __name__ == '__main__':
    feat = torch.randn(10, 512)
    lbl = torch.randint(high=1990, size=(10,))

    # Circle Loss 1
    criterion = CircleLossV1(nOut=512, nClasses=1991, m=0.25, s=256)
    circle_loss_1 = criterion(feat, lbl)

    criterion = CircleLossV2(nOut=512, nClasses=1991, m=0.25, s=256)
    circle_loss_2 = criterion(feat, lbl)

    print(circle_loss_1 / circle_loss_2)

CircleLossV1 与 V2仅仅只有 Norm部分不同 其他的部分相同 测试下来 在文本无关的任务下 V2的效果不如V1


