nlcpy.prof.prof のソースコード

#
# * The source code in this file is developed independently by NEC Corporation.
#
# # NLCPy License #
#
#     Copyright (c) 2020 NEC Corporation
#     All rights reserved.
#
#     Redistribution and use in source and binary forms, with or without
#     modification, are permitted provided that the following conditions are met:
#     * Redistributions of source code must retain the above copyright notice,
#       this list of conditions and the following disclaimer.
#     * Redistributions in binary form must reproduce the above copyright notice,
#       this list of conditions and the following disclaimer in the documentation
#       and/or other materials provided with the distribution.
#     * Neither NEC Corporation nor the names of its contributors may be
#       used to endorse or promote products derived from this software
#       without specific prior written permission.
#
#     THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "AS IS" AND
#     ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE IMPLIED
#     WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE ARE
#     DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT HOLDER OR CONTRIBUTORS BE LIABLE
#     FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES
#     (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES;
#     LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND
#     ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT
#     (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE OF THIS
#     SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
#

import time
import functools
import contextlib
import warnings
import numpy
import nlcpy
from nlcpy import veo

# profiling status
NOT_PROFILING = 0
UNDER_PROFILING = 1
END_PROFILING = 2


class Profiling():
    def __init__(self):
        self.n_alloc_mem = 0
        self.t_alloc_mem = 0
        self.n_free_mem = 0
        self.t_free_mem = 0
        self.n_write_mem = 0
        self.t_write_mem = 0
        self.n_read_mem = 0
        self.t_read_mem = 0
        self.n_wait_result = 0
        self.t_wait_result = 0
        self.vh_runtime = 0
        self.total_runtime = 0
        self.status = NOT_PROFILING

    def start(self):
        if self.status == UNDER_PROFILING:
            raise Exception('under profiling')
        self.clear()
        self.status = UNDER_PROFILING
        self.total_runtime = time.time()

    def stop(self):
        if self.status != UNDER_PROFILING:
            raise Exception('not started profiling')
        self.total_runtime = time.time() - self.total_runtime
        self.vh_runtime = self.total_runtime - (
            self.t_alloc_mem + self.t_free_mem + self.t_write_mem
            + self.t_read_mem + self.t_wait_result)
        self.status = END_PROFILING

    def get_stats(self):
        if _prof.status != END_PROFILING:
            raise Exception('not finished profiling')
        _stats = {
            'veo_alloc_mem': {
                'elapsed_time': 0,
                'number_of_call': 0,
            },
            'veo_free_mem': {
                'elapsed_time': 0,
                'number_of_call': 0,
            },
            'veo_write_mem': {
                'elapsed_time': 0,
                'number_of_call': 0,
            },
            'veo_read_mem': {
                'elapsed_time': 0,
                'number_of_call': 0,
            },
            'veo_wait_result': {
                'elapsed_time': 0,
                'number_of_call': 0,
            },
            'vh_runtime': {
                'elapsed_time': 0,
            },
            'total_runtime': {
                'elapsed_time': 0,
            },
        }
        # alloc mem
        _stats['veo_alloc_mem']['elapsed_time'] = _prof.t_alloc_mem
        _stats['veo_alloc_mem']['number_of_call'] = _prof.n_alloc_mem
        # free mem
        _stats['veo_free_mem']['elapsed_time'] = _prof.t_free_mem
        _stats['veo_free_mem']['number_of_call'] = _prof.n_free_mem
        # write mem
        _stats['veo_write_mem']['elapsed_time'] = _prof.t_write_mem
        _stats['veo_write_mem']['number_of_call'] = _prof.n_write_mem
        # read mem
        _stats['veo_read_mem']['elapsed_time'] = _prof.t_read_mem
        _stats['veo_read_mem']['number_of_call'] = _prof.n_read_mem
        # VE runtime
        _stats['veo_wait_result']['elapsed_time'] = _prof.t_wait_result
        _stats['veo_wait_result']['number_of_call'] = _prof.n_wait_result
        # VH runtime
        _stats['vh_runtime']['elapsed_time'] = _prof.vh_runtime
        # total runtime
        _stats['total_runtime']['elapsed_time'] = _prof.total_runtime
        return _stats

    def clear(self):
        self.n_alloc_mem = 0
        self.t_alloc_mem = 0
        self.n_free_mem = 0
        self.t_free_mem = 0
        self.n_write_mem = 0
        self.t_write_mem = 0
        self.n_read_mem = 0
        self.t_read_mem = 0
        self.n_wait_result = 0
        self.t_wait_result = 0
        self.total_s = 0
        self.total_e = 0
        self.in_analyze = 0
        self.status = NOT_PROFILING


_prof = Profiling()


[ドキュメント]def start_profiling(): """Starts profiling. Profiling the code block between :func:`nlcpy.prof.start_profiling` and :func:`nlcpy.prof.stop_profiling`. Notes ----- .. deprecated:: 2.0.0 See Also -------- nlcpy.prof.print_run_stats : Prints NLCPy run stats. nlcpy.prof.get_run_stats : Gets dict of NLCPy run stats. """ warnings.warn('This routine is deprecated since version 2.0.0. ' 'Please use nlcpy.prof.ftrace_region().', UserWarning) _prof.start()
[ドキュメント]def stop_profiling(): """Stops profiling. Profiling the code block between :func:`nlcpy.prof.start_profiling` and :func:`nlcpy.prof.stop_profiling`. Notes ----- .. deprecated:: 2.0.0 See Also -------- nlcpy.prof.print_run_stats : Prints NLCPy run stats. nlcpy.prof.get_run_stats : Gets dict of NLCPy run stats. """ warnings.warn('This routine is deprecated since version 2.0.0. ' 'Please use nlcpy.prof.ftrace_region().', UserWarning) _prof.stop()
def profile_alloc_mem(func): @functools.wraps(func) def wrap_func(*args, **kwargs): if _prof.status != UNDER_PROFILING: return func(*args, **kwargs) pre_wait_result = _prof.t_wait_result pre_write_mem = _prof.t_write_mem pre_free_mem = _prof.t_free_mem s = time.time() res = func(*args, **kwargs) e = time.time() _prof.n_alloc_mem += 1 _prof.t_alloc_mem += (e - s) if pre_wait_result != _prof.t_wait_result: _prof.t_alloc_mem -= (_prof.t_wait_result - pre_wait_result) if pre_write_mem != _prof.t_write_mem: _prof.t_alloc_mem -= (_prof.t_write_mem - pre_write_mem) if pre_free_mem != _prof.t_free_mem: _prof.t_free_mem -= (_prof.t_free_mem - pre_free_mem) return res return wrap_func def profile_free_mem(func): @functools.wraps(func) def wrap_func(*args, **kwargs): if _prof.status != UNDER_PROFILING: return func(*args, **kwargs) s = time.time() res = func(*args, **kwargs) e = time.time() _prof.n_free_mem += 1 _prof.t_free_mem += (e - s) return res return wrap_func def profile_write_mem(func): @functools.wraps(func) def wrap_func(*args, **kwargs): if _prof.status != UNDER_PROFILING: return func(*args, **kwargs) s = time.time() res = func(*args, **kwargs) e = time.time() _prof.n_write_mem += 1 _prof.t_write_mem += (e - s) return res return wrap_func def profile_read_mem(func): @functools.wraps(func) def wrap_func(*args, **kwargs): if _prof.status != UNDER_PROFILING: return func(*args, **kwargs) s = time.time() res = func(*args, **kwargs) e = time.time() _prof.n_read_mem += 1 _prof.t_read_mem += (e - s) return res return wrap_func def profile_wait_result(func): @functools.wraps(func) def wrap_func(*args, **kwargs): if _prof.status != UNDER_PROFILING: return func(*args, **kwargs) s = time.time() res = func(*args, **kwargs) e = time.time() _prof.n_wait_result += 1 _prof.t_wait_result += (e - s) return res return wrap_func def _print_impl(msg, val, is_exp): if is_exp: print("{} {:.3e} [sec]".format(msg, val)) else: print("{} {} times".format(msg, val))
[ドキュメント]def get_run_stats(): """Gets dict of NLCPy run stats. Notes ----- .. deprecated:: 2.0.0 Returns ------- out : dict Examples -------- Sample Program:: # sample.py import nlcpy as vp from pprint import pprint vp.prof.start_profiling() for i in range(10): vp.random.rand(10000) vp.prof.stop_profiling() stats = vp.prof.get_run_stats() pprint(stats) Execution:: $ python sample.py {'total_runtime': {'elapsed_time': 0.004348278045654297}, 'veo_alloc_mem': {'elapsed_time': 2.574920654296875e-05, 'number_of_call': 10}, 'veo_free_mem': {'elapsed_time': 4.100799560546875e-05, 'number_of_call': 10}, 'veo_read_mem': {'elapsed_time': 0, 'number_of_call': 0}, 'veo_wait_result': {'elapsed_time': 0.0034487247467041016, 'number_of_call': 10}, 'veo_write_mem': {'elapsed_time': 0, 'number_of_call': 0}, 'vh_runtime': {'elapsed_time': 0.0008327960968017578}} """ warnings.warn('This routine is deprecated since version 2.0.0. ' 'Please use nlcpy.prof.ftrace_region().', UserWarning) return _prof.get_stats()
[ドキュメント]def ftrace_region_begin(message): """Begins an ftrace region. A file ftrace.out is generated after running your program that invokes this routine. The ftrace.out includes performance information of your program. Notes ----- It is necessary to specify an identical string *message* to :func:`ftrace_region_begin` and :func:`ftrace_region_end`. Parameters ---------- message : str Any string can be specified to distinguish a user-specified region. See Also -------- ftrace_region: Enables an ftrace region. ftrace_region_end : Ends an ftrace region. Examples -------- >>> import nlcpy as vp >>> x = vp.random.rand(10000, 10000) >>> vp.prof.ftrace_region_begin('dgemm') >>> # something you want to profile >>> _ = x @ x >>> vp.prof.ftrace_region_end('dgemm') """ nlcpy.request.flush() venode = nlcpy.venode.VE() if type(message) is not bytes: message = message.encode('utf-8') buff = numpy.frombuffer(message, dtype=numpy.uint8) req = venode.lib_prof.func[b"nlcpy_profiling_region_begin"]( venode.ctx, veo.OnStack(buff)) req.wait_result()
[ドキュメント]def ftrace_region_end(message): """Ends an ftrace region. A file ftrace.out is generated after running your program that invokes this routine. The ftrace.out includes performance information of your program. Notes ----- It is necessary to specify an identical string *message* to :func:`ftrace_region_begin` and :func:`ftrace_region_end`. Parameters ---------- message : str Any string can be specified to distinguish a user-specified region. See Also -------- ftrace_region : Enables an ftrace region. ftrace_region_begin : Begins an ftrace region. Examples -------- >>> import nlcpy as vp >>> x = vp.random.rand(10000, 10000) >>> vp.prof.ftrace_region_begin('dgemm') >>> # something you want to profile >>> _ = x @ x >>> vp.prof.ftrace_region_end('dgemm') """ nlcpy.request.flush() venode = nlcpy.venode.VE() if type(message) is not bytes: message = message.encode('utf-8') buff = numpy.frombuffer(message, dtype=numpy.uint8) req = venode.lib_prof.func[b"nlcpy_profiling_region_end"]( venode.ctx, veo.OnStack(buff)) req.wait_result()
[ドキュメント]@contextlib.contextmanager def ftrace_region(message): """Enables profiling with an ftrace region during \'with\' statement. A file ftrace.out is generated after running your program that invokes this routine. The ftrace.out includes performance information of your program. Parameters ---------- message : str Any string can be specified to distinguish a user-specified region. See Also -------- ftrace_region_begin : Begins ftrace region. ftrace_region_end : Ends ftrace region. Examples -------- >>> import nlcpy as vp >>> x = vp.random.rand(10000, 10000) >>> with vp.prof.ftrace_region('dgemm'): ... # something you want to profile ... _ = x @ x """ ftrace_region_begin(message) try: yield finally: ftrace_region_end(message)