nlcpy.prof.prof のソースコード

#
# * The source code in this file is developed independently by NEC Corporation.
#
# # NLCPy License #
#
#     Copyright (c) 2020 NEC Corporation
#     All rights reserved.
#
#     Redistribution and use in source and binary forms, with or without
#     modification, are permitted provided that the following conditions are met:
#     * Redistributions of source code must retain the above copyright notice,
#       this list of conditions and the following disclaimer.
#     * Redistributions in binary form must reproduce the above copyright notice,
#       this list of conditions and the following disclaimer in the documentation
#       and/or other materials provided with the distribution.
#     * Neither NEC Corporation nor the names of its contributors may be
#       used to endorse or promote products derived from this software
#       without specific prior written permission.
#
#     THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "AS IS" AND
#     ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE IMPLIED
#     WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE ARE
#     DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT HOLDER OR CONTRIBUTORS BE LIABLE
#     FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES
#     (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES;
#     LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND
#     ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT
#     (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE OF THIS
#     SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
#

import time
import functools
import contextlib
import warnings
import numpy
import nlcpy
from nlcpy import veo

# profiling status
NOT_PROFILING = 0
UNDER_PROFILING = 1
END_PROFILING = 2


class Profiling():
    def __init__(self):
        self.n_alloc_mem = 0
        self.t_alloc_mem = 0
        self.n_free_mem = 0
        self.t_free_mem = 0
        self.n_write_mem = 0
        self.t_write_mem = 0
        self.n_read_mem = 0
        self.t_read_mem = 0
        self.n_wait_result = 0
        self.t_wait_result = 0
        self.vh_runtime = 0
        self.total_runtime = 0
        self.status = NOT_PROFILING

    def start(self):
        if self.status == UNDER_PROFILING:
            raise Exception('under profiling')
        self.clear()
        self.status = UNDER_PROFILING
        self.total_runtime = time.time()

    def stop(self):
        if self.status != UNDER_PROFILING:
            raise Exception('not started profiling')
        self.total_runtime = time.time() - self.total_runtime
        self.vh_runtime = self.total_runtime - (
            self.t_alloc_mem + self.t_free_mem + self.t_write_mem
            + self.t_read_mem + self.t_wait_result)
        self.status = END_PROFILING

    def get_stats(self):
        if _prof.status != END_PROFILING:
            raise Exception('not finished profiling')
        _stats = {
            'veo_alloc_mem': {
                'elapsed_time': 0,
                'number_of_call': 0,
            },
            'veo_free_mem': {
                'elapsed_time': 0,
                'number_of_call': 0,
            },
            'veo_write_mem': {
                'elapsed_time': 0,
                'number_of_call': 0,
            },
            'veo_read_mem': {
                'elapsed_time': 0,
                'number_of_call': 0,
            },
            'veo_wait_result': {
                'elapsed_time': 0,
                'number_of_call': 0,
            },
            'vh_runtime': {
                'elapsed_time': 0,
            },
            'total_runtime': {
                'elapsed_time': 0,
            },
        }
        # alloc mem
        _stats['veo_alloc_mem']['elapsed_time'] = _prof.t_alloc_mem
        _stats['veo_alloc_mem']['number_of_call'] = _prof.n_alloc_mem
        # free mem
        _stats['veo_free_mem']['elapsed_time'] = _prof.t_free_mem
        _stats['veo_free_mem']['number_of_call'] = _prof.n_free_mem
        # write mem
        _stats['veo_write_mem']['elapsed_time'] = _prof.t_write_mem
        _stats['veo_write_mem']['number_of_call'] = _prof.n_write_mem
        # read mem
        _stats['veo_read_mem']['elapsed_time'] = _prof.t_read_mem
        _stats['veo_read_mem']['number_of_call'] = _prof.n_read_mem
        # VE runtime
        _stats['veo_wait_result']['elapsed_time'] = _prof.t_wait_result
        _stats['veo_wait_result']['number_of_call'] = _prof.n_wait_result
        # VH runtime
        _stats['vh_runtime']['elapsed_time'] = _prof.vh_runtime
        # total runtime
        _stats['total_runtime']['elapsed_time'] = _prof.total_runtime
        return _stats

    def clear(self):
        self.n_alloc_mem = 0
        self.t_alloc_mem = 0
        self.n_free_mem = 0
        self.t_free_mem = 0
        self.n_write_mem = 0
        self.t_write_mem = 0
        self.n_read_mem = 0
        self.t_read_mem = 0
        self.n_wait_result = 0
        self.t_wait_result = 0
        self.total_s = 0
        self.total_e = 0
        self.in_analyze = 0
        self.status = NOT_PROFILING


_prof = Profiling()


[ドキュメント]def start_profiling():
    """Starts profiling.

    Profiling the code block between :func:`nlcpy.prof.start_profiling` and
    :func:`nlcpy.prof.stop_profiling`.

    Notes
    -----
    .. deprecated:: 2.0.0

    See Also
    --------
    nlcpy.prof.print_run_stats : Prints NLCPy run stats.
    nlcpy.prof.get_run_stats : Gets dict of NLCPy run stats.
    """
    warnings.warn('This routine is deprecated since version 2.0.0. '
                  'Please use nlcpy.prof.ftrace_region().',
                  UserWarning)
    _prof.start()


[ドキュメント]def stop_profiling():
    """Stops profiling.

    Profiling the code block between :func:`nlcpy.prof.start_profiling` and
    :func:`nlcpy.prof.stop_profiling`.

    Notes
    -----
    .. deprecated:: 2.0.0

    See Also
    --------
    nlcpy.prof.print_run_stats : Prints NLCPy run stats.
    nlcpy.prof.get_run_stats : Gets dict of NLCPy run stats.
    """
    warnings.warn('This routine is deprecated since version 2.0.0. '
                  'Please use nlcpy.prof.ftrace_region().',
                  UserWarning)
    _prof.stop()


def profile_alloc_mem(func):
    @functools.wraps(func)
    def wrap_func(*args, **kwargs):
        if _prof.status != UNDER_PROFILING:
            return func(*args, **kwargs)
        pre_wait_result = _prof.t_wait_result
        pre_write_mem = _prof.t_write_mem
        pre_free_mem = _prof.t_free_mem
        s = time.time()
        res = func(*args, **kwargs)
        e = time.time()
        _prof.n_alloc_mem += 1
        _prof.t_alloc_mem += (e - s)
        if pre_wait_result != _prof.t_wait_result:
            _prof.t_alloc_mem -= (_prof.t_wait_result - pre_wait_result)
        if pre_write_mem != _prof.t_write_mem:
            _prof.t_alloc_mem -= (_prof.t_write_mem - pre_write_mem)
        if pre_free_mem != _prof.t_free_mem:
            _prof.t_free_mem -= (_prof.t_free_mem - pre_free_mem)
        return res
    return wrap_func


def profile_free_mem(func):
    @functools.wraps(func)
    def wrap_func(*args, **kwargs):
        if _prof.status != UNDER_PROFILING:
            return func(*args, **kwargs)
        s = time.time()
        res = func(*args, **kwargs)
        e = time.time()
        _prof.n_free_mem += 1
        _prof.t_free_mem += (e - s)
        return res
    return wrap_func


def profile_write_mem(func):
    @functools.wraps(func)
    def wrap_func(*args, **kwargs):
        if _prof.status != UNDER_PROFILING:
            return func(*args, **kwargs)
        s = time.time()
        res = func(*args, **kwargs)
        e = time.time()
        _prof.n_write_mem += 1
        _prof.t_write_mem += (e - s)
        return res
    return wrap_func


def profile_read_mem(func):
    @functools.wraps(func)
    def wrap_func(*args, **kwargs):
        if _prof.status != UNDER_PROFILING:
            return func(*args, **kwargs)
        s = time.time()
        res = func(*args, **kwargs)
        e = time.time()
        _prof.n_read_mem += 1
        _prof.t_read_mem += (e - s)
        return res
    return wrap_func


def profile_wait_result(func):
    @functools.wraps(func)
    def wrap_func(*args, **kwargs):
        if _prof.status != UNDER_PROFILING:
            return func(*args, **kwargs)
        s = time.time()
        res = func(*args, **kwargs)
        e = time.time()
        _prof.n_wait_result += 1
        _prof.t_wait_result += (e - s)
        return res
    return wrap_func


def _print_impl(msg, val, is_exp):
    if is_exp:
        print("{} {:.3e} [sec]".format(msg, val))
    else:
        print("{} {} times".format(msg, val))


[ドキュメント]def print_run_stats():
    """Prints NLCPy run stats.

    Notes
    -----
    .. deprecated:: 2.0.0

    Examples
    --------
    Sample Program::

        # sample.py
        import nlcpy as vp
        vp.prof.start_profiling()
        for i in range(10):
            vp.random.rand(10000)
        vp.prof.stop_profiling()
        vp.prof.print_run_stats()

    Execution::

        $ python sample.py

        ----------- NLCPy Run Stats ------------
        alloc memory on VE:
          total: 1.097e-05 [sec]
          veo_alloc_mem was called 10 times
        free memory on VE:
          total: 9.298e-06 [sec]
          veo_free_mem was called 10 times
        write memory on VE:
          total: 0.000e+00 [sec]
          veo_write_mem was called 0 times
        read memory from VE:
          total: 0.000e+00 [sec]
          veo_read_mem was called 0 times
        VE runtime(include offload overhead):
          total: 3.016e-04 [sec]
          veo_wait_result was called 10 times
        other VH runtime:
          total: 2.632e-04 [sec]
        total runtime:
          total: 5.851e-04 [sec]
        ----------------------------------------

    """
    warnings.warn('This routine is deprecated since version 2.0.0. '
                  'Please use nlcpy.prof.ftrace_region().',
                  UserWarning)
    if _prof.status != END_PROFILING:
        raise Exception('profiling not finished')
    print("\n----------- NLCPy Run Stats ------------")
    # alloc mem
    print("alloc memory on VE:")
    _print_impl("  total:", _prof.t_alloc_mem, True)
    _print_impl("  veo_alloc_mem was called", _prof.n_alloc_mem, False)
    # free mem
    print("free memory on VE:")
    _print_impl("  total:", _prof.t_free_mem, True)
    _print_impl("  veo_free_mem was called", _prof.n_free_mem, False)
    # write mem
    print("write memory on VE:")
    _print_impl("  total:", _prof.t_write_mem, True)
    _print_impl("  veo_write_mem was called", _prof.n_write_mem, False)
    # read mem
    print("read memory from VE:")
    _print_impl("  total:", _prof.t_read_mem, True)
    _print_impl("  veo_read_mem was called", _prof.n_read_mem, False)
    # VE runtime
    print("VE runtime(include offload overhead):")
    _print_impl("  total:", _prof.t_wait_result, True)
    _print_impl("  veo_wait_result was called", _prof.n_wait_result, False)
    # VH runtime
    print("other VH runtime:")
    _print_impl("  total:", _prof.vh_runtime, True)
    # total runtime
    print("total runtime:")
    _print_impl("  total:", _prof.total_runtime, True)
    print("----------------------------------------\n")


[ドキュメント]def get_run_stats():
    """Gets dict of NLCPy run stats.

    Notes
    -----
    .. deprecated:: 2.0.0

    Returns
    -------
    out : dict

    Examples
    --------
    Sample Program::

        # sample.py
        import nlcpy as vp
        from pprint import pprint
        vp.prof.start_profiling()
        for i in range(10):
            vp.random.rand(10000)
        vp.prof.stop_profiling()
        stats = vp.prof.get_run_stats()
        pprint(stats)

    Execution::

        $ python sample.py
        {'total_runtime': {'elapsed_time': 0.004348278045654297},
        'veo_alloc_mem': {'elapsed_time': 2.574920654296875e-05, 'number_of_call': 10},
        'veo_free_mem': {'elapsed_time': 4.100799560546875e-05, 'number_of_call': 10},
        'veo_read_mem': {'elapsed_time': 0, 'number_of_call': 0},
        'veo_wait_result': {'elapsed_time': 0.0034487247467041016,
                            'number_of_call': 10},
        'veo_write_mem': {'elapsed_time': 0, 'number_of_call': 0},
        'vh_runtime': {'elapsed_time': 0.0008327960968017578}}
    """
    warnings.warn('This routine is deprecated since version 2.0.0. '
                  'Please use nlcpy.prof.ftrace_region().',
                  UserWarning)
    return _prof.get_stats()


[ドキュメント]def ftrace_region_begin(message):
    """Begins an ftrace region.

    A file ftrace.out is generated after running your program that invokes
    this routine.
    The ftrace.out includes performance information of your program.

    Notes
    -----
    It is necessary to specify an identical string *message* to
    :func:`ftrace_region_begin` and :func:`ftrace_region_end`.

    Parameters
    ----------
    message : str
        Any string can be specified to distinguish a user-specified region.

    See Also
    --------
    ftrace_region: Enables an ftrace region.
    ftrace_region_end : Ends an ftrace region.

    Examples
    --------
    >>> import nlcpy as vp
    >>> x = vp.random.rand(10000, 10000)
    >>> vp.prof.ftrace_region_begin('dgemm')
    >>> # something you want to profile
    >>> _ = x @ x
    >>> vp.prof.ftrace_region_end('dgemm')
    """

    nlcpy.request.flush()
    venode = nlcpy.venode.VE()
    if type(message) is not bytes:
        message = message.encode('utf-8')
    buff = numpy.frombuffer(message, dtype=numpy.uint8)
    req = venode.lib_prof.func[b"nlcpy_profiling_region_begin"](
        venode.ctx, veo.OnStack(buff))
    req.wait_result()


[ドキュメント]def ftrace_region_end(message):
    """Ends an ftrace region.

    A file ftrace.out is generated after running your program that invokes
    this routine.
    The ftrace.out includes performance information of your program.

    Notes
    -----
    It is necessary to specify an identical string *message* to
    :func:`ftrace_region_begin` and :func:`ftrace_region_end`.

    Parameters
    ----------
    message : str
        Any string can be specified to distinguish a user-specified region.

    See Also
    --------
    ftrace_region : Enables an ftrace region.
    ftrace_region_begin : Begins an ftrace region.

    Examples
    --------
    >>> import nlcpy as vp
    >>> x = vp.random.rand(10000, 10000)
    >>> vp.prof.ftrace_region_begin('dgemm')
    >>> # something you want to profile
    >>> _ = x @ x
    >>> vp.prof.ftrace_region_end('dgemm')
    """

    nlcpy.request.flush()
    venode = nlcpy.venode.VE()
    if type(message) is not bytes:
        message = message.encode('utf-8')
    buff = numpy.frombuffer(message, dtype=numpy.uint8)
    req = venode.lib_prof.func[b"nlcpy_profiling_region_end"](
        venode.ctx, veo.OnStack(buff))
    req.wait_result()


[ドキュメント]@contextlib.contextmanager
def ftrace_region(message):
    """Enables profiling with an ftrace region during \'with\' statement.

    A file ftrace.out is generated after running your program that invokes
    this routine.
    The ftrace.out includes performance information of your program.

    Parameters
    ----------
    message : str
        Any string can be specified to distinguish a user-specified region.

    See Also
    --------
    ftrace_region_begin : Begins ftrace region.
    ftrace_region_end : Ends ftrace region.

    Examples
    --------
    >>> import nlcpy as vp
    >>> x = vp.random.rand(10000, 10000)
    >>> with vp.prof.ftrace_region('dgemm'):
    ...     # something you want to profile
    ...     _ = x @ x
    """

    ftrace_region_begin(message)
    try:
        yield
    finally:
        ftrace_region_end(message)