roma_unsb/models/util/lr_decay.py

# Copyright (c) Meta Platforms, Inc. and affiliates.
# All rights reserved.

# This source code is licensed under the license found in the
# LICENSE file in the root directory of this source tree.
# --------------------------------------------------------
# References:
# ELECTRA https://github.com/google-research/electra
# BEiT: https://github.com/microsoft/unilm/tree/master/beit
# --------------------------------------------------------

import json


def param_groups_lrd(model, weight_decay=0.05, no_weight_decay_list=[], layer_decay=.75):
    """
    Parameter groups for layer-wise lr decay
    Following BEiT: https://github.com/microsoft/unilm/blob/master/beit/optim_factory.py#L58
    """
    param_group_names = {}
    param_groups = {}

    num_layers = len(model.blocks) + 1

    layer_scales = list(layer_decay ** (num_layers - i) for i in range(num_layers + 1))

    for n, p in model.named_parameters():
        if not p.requires_grad:
            continue

        # no decay: all 1D parameters and model specific ones
        if p.ndim == 1 or n in no_weight_decay_list:
            g_decay = "no_decay"
            this_decay = 0.
        else:
            g_decay = "decay"
            this_decay = weight_decay
            
        layer_id = get_layer_id_for_vit(n, num_layers)
        group_name = "layer_%d_%s" % (layer_id, g_decay)

        if group_name not in param_group_names:
            this_scale = layer_scales[layer_id]

            param_group_names[group_name] = {
                "lr_scale": this_scale,
                "weight_decay": this_decay,
                "params": [],
            }
            param_groups[group_name] = {
                "lr_scale": this_scale,
                "weight_decay": this_decay,
                "params": [],
            }

        param_group_names[group_name]["params"].append(n)
        param_groups[group_name]["params"].append(p)

    # print("parameter groups: \n%s" % json.dumps(param_group_names, indent=2))

    return list(param_groups.values())


def get_layer_id_for_vit(name, num_layers):
    """
    Assign a parameter with its layer id
    Following BEiT: https://github.com/microsoft/unilm/blob/master/beit/optim_factory.py#L33
    """
    if name in ['cls_token', 'pos_embed']:
        return 0
    elif name.startswith('patch_embed'):
        return 0
    elif name.startswith('blocks'):
        return int(name.split('.')[1]) + 1
    else:
        return num_layers
first commit 2025-02-22 14:21:54 +08:00			`# Copyright (c) Meta Platforms, Inc. and affiliates.`
			`# All rights reserved.`

			`# This source code is licensed under the license found in the`
			`# LICENSE file in the root directory of this source tree.`
			`# --------------------------------------------------------`
			`# References:`
			`# ELECTRA https://github.com/google-research/electra`
			`# BEiT: https://github.com/microsoft/unilm/tree/master/beit`
			`# --------------------------------------------------------`

			`import json`


			`def param_groups_lrd(model, weight_decay=0.05, no_weight_decay_list=[], layer_decay=.75):`
			`"""`
			`Parameter groups for layer-wise lr decay`
			`Following BEiT: https://github.com/microsoft/unilm/blob/master/beit/optim_factory.py#L58`
			`"""`
			`param_group_names = {}`
			`param_groups = {}`

			`num_layers = len(model.blocks) + 1`

			`layer_scales = list(layer_decay ** (num_layers - i) for i in range(num_layers + 1))`

			`for n, p in model.named_parameters():`
			`if not p.requires_grad:`
			`continue`

			`# no decay: all 1D parameters and model specific ones`
			`if p.ndim == 1 or n in no_weight_decay_list:`
			`g_decay = "no_decay"`
			`this_decay = 0.`
			`else:`
			`g_decay = "decay"`
			`this_decay = weight_decay`

			`layer_id = get_layer_id_for_vit(n, num_layers)`
			`group_name = "layer_%d_%s" % (layer_id, g_decay)`

			`if group_name not in param_group_names:`
			`this_scale = layer_scales[layer_id]`

			`param_group_names[group_name] = {`
			`"lr_scale": this_scale,`
			`"weight_decay": this_decay,`
			`"params": [],`
			`}`
			`param_groups[group_name] = {`
			`"lr_scale": this_scale,`
			`"weight_decay": this_decay,`
			`"params": [],`
			`}`

			`param_group_names[group_name]["params"].append(n)`
			`param_groups[group_name]["params"].append(p)`

			`# print("parameter groups: \n%s" % json.dumps(param_group_names, indent=2))`

			`return list(param_groups.values())`


			`def get_layer_id_for_vit(name, num_layers):`
			`"""`
			`Assign a parameter with its layer id`
			`Following BEiT: https://github.com/microsoft/unilm/blob/master/beit/optim_factory.py#L33`
			`"""`
			`if name in ['cls_token', 'pos_embed']:`
			`return 0`
			`elif name.startswith('patch_embed'):`
			`return 0`
			`elif name.startswith('blocks'):`
			`return int(name.split('.')[1]) + 1`
			`else:`
			`return num_layers`