123456789101112131415161718192021222324252627282930313233343536373839404142434445464748495051525354555657585960616263646566676869707172737475767778798081828384858687888990919293949596979899100101102103104105106107108109110111112113114115116117118119120121122123124125126127128129130131132133134135136137138139140141142143144145146147148149150151152153154155156157158159160161162163164165166167168169170171172173174175176177178179180181182183184185186187188189190191192193194195196197198199200201202203204205206207208209210211212213214215216217218219220221222223224225226227228229230231232233234235236237238239240241242243244245246247248249250251252253254255256257258259260261262263264265266267268269270271272273274275276277278279280281282283284285286287288289290291292293294295296297298299300301302303304 |
- # Copyright (c) 2019 PaddlePaddle Authors. All Rights Reserved.
- #
- # Licensed under the Apache License, Version 2.0 (the "License");
- # you may not use this file except in compliance with the License.
- # You may obtain a copy of the License at
- #
- # http://www.apache.org/licenses/LICENSE-2.0
- #
- # Unless required by applicable law or agreed to in writing, software
- # distributed under the License is distributed on an "AS IS" BASIS,
- # WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
- # See the License for the specific language governing permissions and
- # limitations under the License.
- from __future__ import absolute_import
- from __future__ import division
- from __future__ import print_function
- from __future__ import unicode_literals
- import errno
- import os
- import shutil
- import tempfile
- import time
- import numpy as np
- import re
- import paddle.fluid as fluid
- from .download import get_weights_path
- import logging
- logger = logging.getLogger(__name__)
- __all__ = [
- 'load_checkpoint',
- 'load_and_fusebn',
- 'load_params',
- 'save',
- ]
- def is_url(path):
- """
- Whether path is URL.
- Args:
- path (string): URL string or not.
- """
- return path.startswith('http://') or path.startswith('https://')
- def _get_weight_path(path):
- env = os.environ
- if 'PADDLE_TRAINERS_NUM' in env and 'PADDLE_TRAINER_ID' in env:
- trainer_id = int(env['PADDLE_TRAINER_ID'])
- num_trainers = int(env['PADDLE_TRAINERS_NUM'])
- if num_trainers <= 1:
- path = get_weights_path(path)
- else:
- from ppdet.utils.download import map_path, WEIGHTS_HOME
- weight_path = map_path(path, WEIGHTS_HOME)
- lock_path = weight_path + '.lock'
- if not os.path.exists(weight_path):
- try:
- os.makedirs(os.path.dirname(weight_path))
- except OSError as e:
- if e.errno != errno.EEXIST:
- raise
- with open(lock_path, 'w'): # touch
- os.utime(lock_path, None)
- if trainer_id == 0:
- get_weights_path(path)
- os.remove(lock_path)
- else:
- while os.path.exists(lock_path):
- time.sleep(1)
- path = weight_path
- else:
- path = get_weights_path(path)
- return path
- def _load_state(path):
- if os.path.exists(path + '.pdopt'):
- # XXX another hack to ignore the optimizer state
- tmp = tempfile.mkdtemp()
- dst = os.path.join(tmp, os.path.basename(os.path.normpath(path)))
- shutil.copy(path + '.pdparams', dst + '.pdparams')
- state = fluid.io.load_program_state(dst)
- shutil.rmtree(tmp)
- else:
- state = fluid.io.load_program_state(path)
- return state
- def _strip_postfix(path):
- path, ext = os.path.splitext(path)
- assert ext in ['', '.pdparams', '.pdopt', '.pdmodel'], \
- "Unknown postfix {} from weights".format(ext)
- return path
- def load_params(exe, prog, path, ignore_params=[]):
- """
- Load model from the given path.
- Args:
- exe (fluid.Executor): The fluid.Executor object.
- prog (fluid.Program): load weight to which Program object.
- path (string): URL string or loca model path.
- ignore_params (list): ignore variable to load when finetuning.
- It can be specified by finetune_exclude_pretrained_params
- and the usage can refer to docs/advanced_tutorials/TRANSFER_LEARNING.md
- """
- if is_url(path):
- path = _get_weight_path(path)
- path = _strip_postfix(path)
- if not (os.path.isdir(path) or os.path.isfile(path) or
- os.path.exists(path + '.pdparams')):
- raise ValueError("Model pretrain path {} does not "
- "exists.".format(path))
- logger.debug('Loading parameters from {}...'.format(path))
- ignore_set = set()
- state = _load_state(path)
- # ignore the parameter which mismatch the shape
- # between the model and pretrain weight.
- all_var_shape = {}
- for block in prog.blocks:
- for param in block.all_parameters():
- all_var_shape[param.name] = param.shape
- ignore_set.update([
- name for name, shape in all_var_shape.items()
- if name in state and shape != state[name].shape
- ])
- if ignore_params:
- all_var_names = [var.name for var in prog.list_vars()]
- ignore_list = filter(
- lambda var: any([re.match(name, var) for name in ignore_params]),
- all_var_names)
- ignore_set.update(list(ignore_list))
- if len(ignore_set) > 0:
- for k in ignore_set:
- if k in state:
- logger.warning('variable {} not used'.format(k))
- del state[k]
- fluid.io.set_program_state(prog, state)
- def load_checkpoint(exe, prog, path):
- """
- Load model from the given path.
- Args:
- exe (fluid.Executor): The fluid.Executor object.
- prog (fluid.Program): load weight to which Program object.
- path (string): URL string or loca model path.
- """
- if is_url(path):
- path = _get_weight_path(path)
- path = _strip_postfix(path)
- if not (os.path.isdir(path) or os.path.exists(path + '.pdparams')):
- raise ValueError("Model pretrain path {} does not "
- "exists.".format(path))
- fluid.load(prog, path, executor=exe)
- def global_step(scope=None):
- """
- Load global step in scope.
- Args:
- scope (fluid.Scope): load global step from which scope. If None,
- from default global_scope().
- Returns:
- global step: int.
- """
- if scope is None:
- scope = fluid.global_scope()
- v = scope.find_var('@LR_DECAY_COUNTER@')
- step = np.array(v.get_tensor())[0] if v else 0
- return step
- def save(exe, prog, path):
- """
- Load model from the given path.
- Args:
- exe (fluid.Executor): The fluid.Executor object.
- prog (fluid.Program): save weight from which Program object.
- path (string): the path to save model.
- """
- if os.path.isdir(path):
- shutil.rmtree(path)
- logger.info('Save model to {}.'.format(path))
- fluid.save(prog, path)
- def load_and_fusebn(exe, prog, path):
- """
- Fuse params of batch norm to scale and bias.
- Args:
- exe (fluid.Executor): The fluid.Executor object.
- prog (fluid.Program): save weight from which Program object.
- path (string): the path to save model.
- """
- logger.debug('Load model and fuse batch norm if have from {}...'.format(
- path))
- if is_url(path):
- path = _get_weight_path(path)
- if not os.path.exists(path):
- raise ValueError("Model path {} does not exists.".format(path))
- # Since the program uses affine-channel, there is no running mean and var
- # in the program, here append running mean and var.
- # NOTE, the params of batch norm should be like:
- # x_scale
- # x_offset
- # x_mean
- # x_variance
- # x is any prefix
- mean_variances = set()
- bn_vars = []
- state = _load_state(path)
- def check_mean_and_bias(prefix):
- m = prefix + 'mean'
- v = prefix + 'variance'
- return v in state and m in state
- has_mean_bias = True
- with fluid.program_guard(prog, fluid.Program()):
- for block in prog.blocks:
- ops = list(block.ops)
- if not has_mean_bias:
- break
- for op in ops:
- if op.type == 'affine_channel':
- # remove 'scale' as prefix
- scale_name = op.input('Scale')[0] # _scale
- bias_name = op.input('Bias')[0] # _offset
- prefix = scale_name[:-5]
- mean_name = prefix + 'mean'
- variance_name = prefix + 'variance'
- if not check_mean_and_bias(prefix):
- has_mean_bias = False
- break
- bias = block.var(bias_name)
- mean_vb = block.create_var(
- name=mean_name,
- type=bias.type,
- shape=bias.shape,
- dtype=bias.dtype)
- variance_vb = block.create_var(
- name=variance_name,
- type=bias.type,
- shape=bias.shape,
- dtype=bias.dtype)
- mean_variances.add(mean_vb)
- mean_variances.add(variance_vb)
- bn_vars.append(
- [scale_name, bias_name, mean_name, variance_name])
- if not has_mean_bias:
- fluid.io.set_program_state(prog, state)
- logger.warning(
- "There is no paramters of batch norm in model {}. "
- "Skip to fuse batch norm. And load paramters done.".format(path))
- return
- fluid.load(prog, path, exe)
- eps = 1e-5
- for names in bn_vars:
- scale_name, bias_name, mean_name, var_name = names
- scale = fluid.global_scope().find_var(scale_name).get_tensor()
- bias = fluid.global_scope().find_var(bias_name).get_tensor()
- mean = fluid.global_scope().find_var(mean_name).get_tensor()
- var = fluid.global_scope().find_var(var_name).get_tensor()
- scale_arr = np.array(scale)
- bias_arr = np.array(bias)
- mean_arr = np.array(mean)
- var_arr = np.array(var)
- bn_std = np.sqrt(np.add(var_arr, eps))
- new_scale = np.float32(np.divide(scale_arr, bn_std))
- new_bias = bias_arr - mean_arr * new_scale
- # fuse to scale and bias in affine_channel
- scale.set(new_scale, exe.place)
- bias.set(new_bias, exe.place)
|