[pytorchsourceread][12]setup.py

作者：云龙破月56 | 来源：互联网 | 2023-10-12 16:19

这篇文章是接着上一篇文章的，由于知乎字数的限制我被迫分成了两个文章#########################################################

这篇文章是接着
上一篇文章的，由于知乎字数的限制我被迫分成了两个文章

################################################################################ # Configure compile flags ################################################################################ include_dirs = [] library_dirs = [] extra_link_args = [] if IS_WINDOWS: extra_compile_args = ['/Z7', '/EHa', '/DNOMINMAX', '/wd4267', '/wd4251', '/wd4522', '/wd4522', '/wd4838', '/wd4305', '/wd4244', '/wd4190', '/wd4101', '/wd4996', '/wd4275' # /Z7 turns on symbolic debugging information in .obj files # /EHa is about native C++ catch support for asynchronous # structured exception handling (SEH) # /DNOMINMAX removes builtin min/max functions # /wdXXXX disables warning no. XXXX ] if sys.version_info[0] == 2: # /bigobj increases number of sections in .obj file, which is needed to link # against libaries in Python 2.7 under Windows extra_compile_args.append('/bigobj') else: extra_compile_args = [ '-std=c++11', '-Wall', '-Wextra', '-Wno-unused-parameter', '-Wno-missing-field-initializers', '-Wno-write-strings', '-Wno-zero-length-array', # This is required for Python 2 declarations that are deprecated in 3. '-Wno-deprecated-declarations', # Python 2.6 requires -fno-strict-aliasing, see # http://legacy.python.org/dev/peps/pep-3123/ # We also depend on it in our code (even Python 3). '-fno-strict-aliasing', # Clang has an unfixed bug leading to spurious missing # braces warnings, see # https://bugs.llvm.org/show_bug.cgi?id=21629 '-Wno-missing-braces' ] if check_env_flag('WERROR'): extra_compile_args.append('-Werror') cwd = os.path.dirname(os.path.abspath(__file__)) lib_path = os.path.join(cwd, "torch", "lib") third_party_path = os.path.join(cwd, "third_party") tmp_install_path = lib_path + "/tmp_install" include_dirs += [ cwd, os.path.join(cwd, "torch", "csrc"), third_party_path + "/pybind11/include", tmp_install_path + "/include", tmp_install_path + "/include/TH", tmp_install_path + "/include/THNN", tmp_install_path + "/include/ATen", ] library_dirs.append(lib_path) # we specify exact lib names to avoid conflict with lua-torch installs ATEN_LIB = os.path.join(lib_path, 'libATen.so') THD_LIB = os.path.join(lib_path, 'libTHD.a') NCCL_LIB = os.path.join(lib_path, 'libnccl.so.1') # static library only NANOPB_STATIC_LIB = os.path.join(lib_path, 'libprotobuf-nanopb.a') if IS_DARWIN: ATEN_LIB = os.path.join(lib_path, 'libATen.dylib') NCCL_LIB = os.path.join(lib_path, 'libnccl.1.dylib') if IS_WINDOWS: ATEN_LIB = os.path.join(lib_path, 'ATen.lib') if DEBUG: NANOPB_STATIC_LIB = os.path.join(lib_path, 'protobuf-nanopbd.lib') else: NANOPB_STATIC_LIB = os.path.join(lib_path, 'protobuf-nanopb.lib')

这一部分比较荣昌, 就是对编译的configure以及一些库的位置的说明

普及一下.so文件和.a文件

.so文件是动态链接库, 动态链接所调用的函数代码并没有被拷贝到应用程序的可执行文件中去，而是仅仅在其中加入了所调用函数的描述信息（往往是一些重定位信息），仅当应用程序被装入内存开始运行时，在操作系统的管理下，才在应用程序与相应的.so之间建立链接关系

.a文件是静态链接库文件, 指把要调用的函数或者过程链接到可执行文件中，成为可执行文件的一部分。当多个程序都调用相同函数时，内存中就会存在这个函数的多个拷贝，这样就浪费了宝贵的内存资源

可以这么理解.a文件是多个.o文件的组合

main_compile_args = ['-D_THP_CORE'] main_libraries = ['shm'] main_link_args = [ATEN_LIB, NANOPB_STATIC_LIB] main_sources = [ "torch/csrc/PtrWrapper.cpp", "torch/csrc/Module.cpp", "torch/csrc/Generator.cpp", "torch/csrc/Size.cpp", "torch/csrc/Dtype.cpp", "torch/csrc/Device.cpp", "torch/csrc/Exceptions.cpp", "torch/csrc/Layout.cpp", "torch/csrc/Storage.cpp", "torch/csrc/DataLoader.cpp", "torch/csrc/DynamicTypes.cpp", "torch/csrc/assertions.cpp", "torch/csrc/byte_order.cpp", "torch/csrc/torch.cpp", "torch/csrc/utils.cpp", "torch/csrc/utils/cuda_lazy_init.cpp", "torch/csrc/utils/device.cpp", "torch/csrc/utils/invalid_arguments.cpp", "torch/csrc/utils/object_ptr.cpp", "torch/csrc/utils/python_arg_parser.cpp", "torch/csrc/utils/tensor_list.cpp", "torch/csrc/utils/tensor_new.cpp", "torch/csrc/utils/tensor_numpy.cpp", "torch/csrc/utils/tensor_dtypes.cpp", "torch/csrc/utils/tensor_layouts.cpp", "torch/csrc/utils/tensor_types.cpp", "torch/csrc/utils/tuple_parser.cpp", "torch/csrc/utils/tensor_apply.cpp", "torch/csrc/utils/tensor_conversion_dispatch.cpp", "torch/csrc/utils/tensor_flatten.cpp", "torch/csrc/utils/variadic.cpp", "torch/csrc/allocators.cpp", "torch/csrc/serialization.cpp", "torch/csrc/jit/init.cpp", "torch/csrc/jit/interpreter.cpp", "torch/csrc/jit/ir.cpp", "torch/csrc/jit/fusion_compiler.cpp", "torch/csrc/jit/graph_executor.cpp", "torch/csrc/jit/python_ir.cpp", "torch/csrc/jit/test_jit.cpp", "torch/csrc/jit/tracer.cpp", "torch/csrc/jit/tracer_state.cpp", "torch/csrc/jit/python_tracer.cpp", "torch/csrc/jit/passes/shape_analysis.cpp", "torch/csrc/jit/interned_strings.cpp", "torch/csrc/jit/type.cpp", "torch/csrc/jit/export.cpp", "torch/csrc/jit/import.cpp", "torch/csrc/jit/autodiff.cpp", "torch/csrc/jit/interpreter_autograd_function.cpp", "torch/csrc/jit/python_arg_flatten.cpp", "torch/csrc/jit/python_compiled_function.cpp", "torch/csrc/jit/variable_flags.cpp", "torch/csrc/jit/passes/create_autodiff_subgraphs.cpp", "torch/csrc/jit/passes/graph_fuser.cpp", "torch/csrc/jit/passes/onnx.cpp", "torch/csrc/jit/passes/dead_code_elimination.cpp", "torch/csrc/jit/passes/lower_tuples.cpp", "torch/csrc/jit/passes/common_subexpression_elimination.cpp", "torch/csrc/jit/passes/peephole.cpp", "torch/csrc/jit/passes/inplace_check.cpp", "torch/csrc/jit/passes/canonicalize.cpp", "torch/csrc/jit/passes/batch_mm.cpp", "torch/csrc/jit/passes/onnx/peephole.cpp", "torch/csrc/jit/passes/onnx/fixup_onnx_loop.cpp", "torch/csrc/jit/generated/aten_dispatch.cpp", "torch/csrc/jit/script/lexer.cpp", "torch/csrc/jit/script/compiler.cpp", "torch/csrc/jit/script/module.cpp", "torch/csrc/jit/script/init.cpp", "torch/csrc/jit/script/python_tree_views.cpp", "torch/csrc/autograd/init.cpp", "torch/csrc/autograd/grad_mode.cpp", "torch/csrc/autograd/engine.cpp", "torch/csrc/autograd/function.cpp", "torch/csrc/autograd/variable.cpp", "torch/csrc/autograd/saved_variable.cpp", "torch/csrc/autograd/input_buffer.cpp", "torch/csrc/autograd/profiler.cpp", "torch/csrc/autograd/python_function.cpp", "torch/csrc/autograd/python_cpp_function.cpp", "torch/csrc/autograd/python_variable.cpp", "torch/csrc/autograd/python_variable_indexing.cpp", "torch/csrc/autograd/python_legacy_variable.cpp", "torch/csrc/autograd/python_engine.cpp", "torch/csrc/autograd/python_hook.cpp", "torch/csrc/autograd/generated/VariableType.cpp", "torch/csrc/autograd/generated/Functions.cpp", "torch/csrc/autograd/generated/python_torch_functions.cpp", "torch/csrc/autograd/generated/python_variable_methods.cpp", "torch/csrc/autograd/generated/python_functions.cpp", "torch/csrc/autograd/generated/python_nn_functions.cpp", "torch/csrc/autograd/functions/basic_ops.cpp", "torch/csrc/autograd/functions/tensor.cpp", "torch/csrc/autograd/functions/accumulate_grad.cpp", "torch/csrc/autograd/functions/special.cpp", "torch/csrc/autograd/functions/utils.cpp", "torch/csrc/autograd/functions/init.cpp", "torch/csrc/nn/THNN.cpp", "torch/csrc/tensor/python_tensor.cpp", "torch/csrc/onnx/onnx.pb.cpp", "torch/csrc/onnx/onnx.cpp", "torch/csrc/onnx/init.cpp", ]

这部分没有太多意思, 就是src文件的罗列

try: import numpy as np include_dirs.append(np.get_include()) extra_compile_args.append('-DWITH_NUMPY') WITH_NUMPY = True except ImportError: WITH_NUMPY = False if WITH_DISTRIBUTED: extra_compile_args += ['-DWITH_DISTRIBUTED'] main_sources += [ "torch/csrc/distributed/Module.cpp", ] if WITH_DISTRIBUTED_MW: main_sources += [ "torch/csrc/distributed/Tensor.cpp", "torch/csrc/distributed/Storage.cpp", ] extra_compile_args += ['-DWITH_DISTRIBUTED_MW'] include_dirs += [tmp_install_path + "/include/THD"] main_link_args += [THD_LIB] if WITH_CUDA: nvtoolext_lib_name = None if IS_WINDOWS: cuda_lib_path = CUDA_HOME + '/lib/x64/' nvtoolext_lib_path = NVTOOLEXT_HOME + '/lib/x64/' nvtoolext_include_path = os.path.join(NVTOOLEXT_HOME, 'include') library_dirs.append(nvtoolext_lib_path) include_dirs.append(nvtoolext_include_path) nvtoolext_lib_name = 'nvToolsExt64_1' # MSVC doesn't support runtime symbol resolving, `nvrtc` and `cuda` should be linked main_libraries += ['nvrtc', 'cuda'] else: cuda_lib_dirs = ['lib64', 'lib'] for lib_dir in cuda_lib_dirs: cuda_lib_path = os.path.join(CUDA_HOME, lib_dir) if os.path.exists(cuda_lib_path): break extra_link_args.append('-Wl,-rpath,' + cuda_lib_path) nvtoolext_lib_name = 'nvToolsExt' library_dirs.append(cuda_lib_path) cuda_include_path = os.path.join(CUDA_HOME, 'include') include_dirs.append(cuda_include_path) include_dirs.append(tmp_install_path + "/include/THCUNN") extra_compile_args += ['-DWITH_CUDA'] extra_compile_args += ['-DCUDA_LIB_PATH=' + cuda_lib_path] main_libraries += ['cudart', nvtoolext_lib_name] main_sources += [ "torch/csrc/cuda/Module.cpp", "torch/csrc/cuda/Storage.cpp", "torch/csrc/cuda/Stream.cpp", "torch/csrc/cuda/utils.cpp", "torch/csrc/cuda/comm.cpp", "torch/csrc/cuda/python_comm.cpp", "torch/csrc/cuda/serialization.cpp", "torch/csrc/nn/THCUNN.cpp", ] if WITH_NCCL: if WITH_SYSTEM_NCCL: main_link_args += [NCCL_SYSTEM_LIB] include_dirs.append(NCCL_INCLUDE_DIR) else: main_link_args += [NCCL_LIB] extra_compile_args += ['-DWITH_NCCL'] main_sources += [ "torch/csrc/cuda/nccl.cpp", "torch/csrc/cuda/python_nccl.cpp", ] if WITH_CUDNN: main_libraries += [CUDNN_LIBRARY] # NOTE: these are at the front, in case there's another cuDNN in CUDA path include_dirs.insert(0, CUDNN_INCLUDE_DIR) if not IS_WINDOWS: extra_link_args.insert(0, '-Wl,-rpath,' + CUDNN_LIB_DIR) extra_compile_args += ['-DWITH_CUDNN'] if DEBUG: if IS_WINDOWS: extra_link_args.append('/DEBUG:FULL') else: extra_compile_args += ['-O0', '-g'] extra_link_args += ['-O0', '-g'] if os.getenv('PYTORCH_BINARY_BUILD') and platform.system() == 'Linux': print('PYTORCH_BINARY_BUILD found. Static linking libstdc++ on Linux') # get path of libstdc++ and link manually. # for reasons unknown, -static-libstdc++ doesn't fully link some symbols CXXNAME = os.getenv('CXX', 'g++') STDCPP_LIB = subprocess.check_output([CXXNAME, '-print-file-name=libstdc++.a']) STDCPP_LIB = STDCPP_LIB[:-1] if type(STDCPP_LIB) != str: # python 3 STDCPP_LIB = STDCPP_LIB.decode(sys.stdout.encoding) main_link_args += [STDCPP_LIB] version_script = os.path.abspath("tools/pytorch.version") extra_link_args += ['-Wl,--version-script=' + version_script] def make_relative_rpath(path): if IS_DARWIN: return '-Wl,-rpath,@loader_path/' + path elif IS_WINDOWS: return '' else: return '-Wl,-rpath,$ORIGIN/' + path

这一部分的代码的作用是对extra_compile_args和extra_link_args做一些修正

################################################################################ # Declare extensions and package ################################################################################ extensions = [] packages = find_packages(exclude=('tools', 'tools.*', 'caffe2', 'caffe2.*', 'caffe', 'caffe.*')) C = Extension("torch._C", libraries=main_libraries, sources=main_sources, language='c++', extra_compile_args=main_compile_args + extra_compile_args, include_dirs=include_dirs, library_dirs=library_dirs, extra_link_args=extra_link_args + main_link_args + [make_relative_rpath('lib')], ) extensions.append(C) if not IS_WINDOWS: DL = Extension("torch._dl", sources=["torch/csrc/dl.c"], language='c', ) extensions.append(DL) if WITH_CUDA: thnvrtc_link_flags = extra_link_args + [make_relative_rpath('lib')] if IS_LINUX: thnvrtc_link_flags = thnvrtc_link_flags + ['-Wl,--no-as-needed'] # these have to be specified as -lcuda in link_flags because they # have to come right after the `no-as-needed` option if IS_WINDOWS: thnvrtc_link_flags += ['cuda.lib', 'nvrtc.lib'] else: thnvrtc_link_flags += ['-lcuda', '-lnvrtc'] cuda_stub_path = [cuda_lib_path + '/stubs'] if IS_DARWIN: # on macOS this is where the CUDA stub is installed according to the manual cuda_stub_path = ["/usr/local/cuda/lib"] THNVRTC = Extension("torch._nvrtc", sources=['torch/csrc/nvrtc.cpp'], language='c++', include_dirs=include_dirs, library_dirs=library_dirs + cuda_stub_path, extra_link_args=thnvrtc_link_flags, ) extensions.append(THNVRTC) version = '0.5.0a0' if os.getenv('PYTORCH_BUILD_VERSION'): assert os.getenv('PYTORCH_BUILD_NUMBER') is not None build_number = int(os.getenv('PYTORCH_BUILD_NUMBER')) version = os.getenv('PYTORCH_BUILD_VERSION') if build_number > 1: version += '.post' + str(build_number) else: try: sha = subprocess.check_output(['git', 'rev-parse', 'HEAD'], cwd=cwd).decode('ascii').strip() version += '+' + sha[:7] except Exception: pass

Building C and C++ Extensions with distutils

这部分是加入extensions

cmdclass = { 'build': build, 'build_py': build_py, 'build_ext': build_ext, 'build_deps': build_deps, 'build_module': build_module, 'develop': develop, 'install': install, 'clean': clean, } cmdclass.update(build_dep_cmds) if __name__ == '__main__': setup( name="torch", version=version, description=("Tensors and Dynamic neural networks in " "Python with strong GPU acceleration"), ext_modules=extensions, cmdclass=cmdclass, packages=packages, package_data={ 'torch': [ 'lib/*.so*', 'lib/*.dylib*', 'lib/*.dll', 'lib/*.lib', 'lib/torch_shm_manager', 'lib/*.h', 'lib/include/ATen/*.h', 'lib/include/ATen/cuda/*.h', 'lib/include/ATen/cuda/*.cuh', 'lib/include/ATen/cudnn/*.h', 'lib/include/ATen/cuda/detail/*.cuh', 'lib/include/pybind11/*.h', 'lib/include/pybind11/detail/*.h', 'lib/include/TH/*.h', 'lib/include/TH/generic/*.h', 'lib/include/THC/*.h', 'lib/include/THC/*.cuh', 'lib/include/THC/generic/*.h', 'lib/include/THCUNN/*.cuh', 'lib/include/torch/csrc/*.h', 'lib/include/torch/csrc/autograd/*.h', 'lib/include/torch/csrc/jit/*.h', 'lib/include/torch/csrc/utils/*.h', 'lib/include/torch/csrc/cuda/*.h', 'lib/include/torch/torch.h', ] })

最后执行setup

python setup.py install

就可以运行了&＃8230;

真是不知道读pytorch源码是不是一个正确的选择,
希望能坚持下去吧,
我之所以这么做
是因为
不是很喜欢现在的风气,
做DL的很多人,
连API都看不全,
更何况底层的实现呢?
我怕将来自己也会那些人中的一员
所以 &＃8230;

推荐阅读

string
如何自行分析定位SAP BSP错误

The“BSPtag”Imentionedintheblogtitlemeansforexamplethetagchtmlb:configCelleratorbelowwhichi ... [详细]

蜡笔小新 2023-12-14 19:58:05
go
单击后为什么远程通知操作无效？ - Why remote notification action is doing nothing after clicking?

IhaveconfiguredanactionforaremotenotificationwhenitarrivestomyiOsapp.Iwanttwodiff ... [详细]

蜡笔小新 2023-12-14 15:57:44
go
差分约束系统求解House Man跳跃问题的思路与方法

本文讨论了使用差分约束系统求解House Man跳跃问题的思路与方法。给定一组不同高度，要求从最低点跳跃到最高点，每次跳跃的距离不超过D，并且不能改变给定的顺序。通过建立差分约束系统，将问题转化为图的建立和查询距离的问题。文章详细介绍了建立约束条件的方法，并使用SPFA算法判环并输出结果。同时还讨论了建边方向和跳跃顺序的关系。 ... [详细]

蜡笔小新 2023-12-14 11:49:51
string
JVM 学习总结（三）——对象存活判定算法的两种实现

本文介绍了垃圾收集器在回收堆内存前确定对象存活的两种算法：引用计数算法和可达性分析算法。引用计数算法通过计数器判定对象是否存活，虽然简单高效，但无法解决循环引用的问题；可达性分析算法通过判断对象是否可达来确定存活对象，是主流的Java虚拟机内存管理算法。 ... [详细]

蜡笔小新 2023-12-13 18:59:46
go
Python正则表达式学习记录及常用方法

本文记录了学习Python正则表达式的过程，介绍了re模块的常用方法re.search，并解释了rawstring的作用。正则表达式是一种方便检查字符串匹配模式的工具，通过本文的学习可以掌握Python中使用正则表达式的基本方法。 ... [详细]

蜡笔小新 2023-12-13 16:37:19
dll
Android源码深入理解JNI技术的概述和应用

本文介绍了Android源码中的JNI技术，包括概述和应用。JNI是Java Native Interface的缩写，是一种技术，可以实现Java程序调用Native语言写的函数，以及Native程序调用Java层的函数。在Android平台上，JNI充当了连接Java世界和Native世界的桥梁。本文通过分析Android源码中的相关文件和位置，深入探讨了JNI技术在Android开发中的重要性和应用场景。 ... [详细]

蜡笔小新 2023-12-13 10:00:57
string
C++字符字符串处理及字符集编码方案

本文介绍了C++中字符字符串处理的问题，并详细解释了字符集编码方案，包括UNICODE、Windows apps采用的UTF-16编码、ASCII、SBCS和DBCS编码方案。同时说明了ANSI C标准和Windows中的字符/字符串数据类型实现。文章还提到了在编译时需要定义UNICODE宏以支持unicode编码，否则将使用windows code page编译。最后，给出了相关的头文件和数据类型定义。 ... [详细]

蜡笔小新 2023-12-13 04:59:58
go
hdu 5439（找规律）的数列求和问题

本文讨论了一个数列求和问题，该数列按照一定规律生成。通过观察数列的规律，我们可以得出求解该问题的算法。具体算法为计算前n项i*f[i]的和，其中f[i]表示数列中有i个数字。根据参考的思路，我们可以将算法的时间复杂度控制在O(n)，即计算到5e5即可满足1e9的要求。 ... [详细]

蜡笔小新 2023-12-12 14:05:58
string
李逍遥寻找仙药的迷阵之旅

本文讲述了少年李逍遥为了救治婶婶的病情，前往仙灵岛寻找仙药的故事。他需要穿越一个由M×N个方格组成的迷阵，有些方格内有怪物，有些方格是安全的。李逍遥需要避开有怪物的方格，并经过最少的方格，找到仙药。在寻找的过程中，他还会遇到神秘人物。本文提供了一个迷阵样例及李逍遥找到仙药的路线。 ... [详细]

蜡笔小新 2023-12-12 13:59:33
go
Open judge C16H: Magical Balls 快速幂+逆元问题解析

本文主要解析了Open judge C16H问题中涉及到的Magical Balls的快速幂和逆元算法，并给出了问题的解析和解决方法。详细介绍了问题的背景和规则，并给出了相应的算法解析和实现步骤。通过本文的解析，读者可以更好地理解和解决Open judge C16H问题中的Magical Balls部分。 ... [详细]

蜡笔小新 2023-12-14 12:03:27
go
Go GUIlxn/walk 学习3.菜单栏和工具栏的具体实现

本文介绍了使用Go语言的GUI库lxn/walk实现菜单栏和工具栏的具体方法，包括消息窗口的产生、文件放置动作响应和提示框的应用。部分代码来自上一篇博客和lxn/walk官方示例。文章提供了学习GUI开发的实际案例和代码示例。 ... [详细]

蜡笔小新 2023-12-12 20:56:55
dll
C#制作Java+Mysql+Tomcat环境安装程序，一键式安装教程

本文介绍了如何使用C#制作Java+Mysql+Tomcat环境安装程序，实现一键式安装。通过将JDK、Mysql、Tomcat三者制作成一个安装包，解决了客户在安装软件时的复杂配置和繁琐问题，便于管理软件版本和系统集成。具体步骤包括配置JDK环境变量和安装Mysql服务，其中使用了MySQL Server 5.5社区版和my.ini文件。安装方法为通过命令行将目录转到mysql的bin目录下，执行mysqld --install MySQL5命令。 ... [详细]

蜡笔小新 2023-12-12 19:29:55
config
【openwrt】设备mt7628关于wan侧eth0.1 mac地址固定的问题

本文讨论了在openwrt-17.01版本中，mt7628设备上初始化启动时eth0的mac地址总是随机生成的问题。每次随机生成的eth0的mac地址都会写到/sys/class/net/eth0/address目录下，而openwrt-17.01原版的SDK会根据随机生成的eth0的mac地址再生成eth0.1、eth0.2等，生成后的mac地址会保存在/etc/config/network下。 ... [详细]

蜡笔小新 2023-12-12 17:47:48
string
深入理解Kafka服务端请求队列中请求的处理

本文深入分析了Kafka服务端请求队列中请求的处理过程，详细介绍了请求的封装和放入请求队列的过程，以及处理请求的线程池的创建和容量设置。通过场景分析、图示说明和源码分析，帮助读者更好地理解Kafka服务端的工作原理。 ... [详细]

蜡笔小新 2023-12-12 16:14:59
string
java boolean 大小_java boolean 大小

先看官方文档TheJavaTutorialshavebeenwrittenforJDK8.Examplesandpracticesdescribedinthispagedontta ... [详细]

蜡笔小新 2023-12-12 13:36:56

云龙破月56

这个家伙很懒，什么也没留下！

Tags | 热门标签

RankList | 热门文章