pytorch图像处理的问题

有段时间读取数据的时候都是用下面的命令：

img = cv2.imread(os.path.join(path, file, pic))
img = Image.fromarray(img.astype(np.uint8))

突然有一天突然反应过来为什么要搞这么麻烦，直接使用：

img = Image.open(os.path.join(path, file, pic))

就好了呀

要在pytorch中使用transform等操作就是要用Image.open读入数据才行，这也是为什么上面还要用Image.fromarray转换的原因

补充一点知识：

使用 scipy.misc.imread 读取的图片数据是 RGB 格式；
使用 cv2.imread 读取的图片数据是 BGR 格式；
使用 PIL.Image.open 读取的图片数据是RGB格式；

所以做小波变换的时候就出了问题：

#coding:utf-8
import torch.nn as nn
import torch

def dwt_init(x):

    x01 = x[:, :, 0::2, :] / 2
    x02 = x[:, :, 1::2, :] / 2
    x1 = x01[:, :, :, 0::2]
    x2 = x02[:, :, :, 0::2]
    x3 = x01[:, :, :, 1::2]
    x4 = x02[:, :, :, 1::2]
    x_LL = x1 + x2 + x3 + x4
    x_HL = -x1 - x2 + x3 + x4
    x_LH = -x1 + x2 - x3 + x4
    x_HH = x1 - x2 - x3 + x4

    return torch.cat((x_LL, x_HL, x_LH, x_HH), 1)


# 使用哈尔 haar 小波变换来实现二维逆向离散小波
def iwt_init(x):
    r = 2
    in_batch, in_channel, in_height, in_width = x.size()
    #print([in_batch, in_channel, in_height, in_width])
    out_batch, out_channel, out_height, out_width = in_batch, int(
        in_channel / (r**2)), r * in_height, r * in_width
    print(out_batch, out_channel, out_height, out_width)
    x1 = x[:, 0:out_channel, :, :] / 2
    x2 = x[:, out_channel:out_channel * 2, :, :] / 2
    x3 = x[:, out_channel * 2:out_channel * 3, :, :] / 2
    x4 = x[:, out_channel * 3:out_channel * 4, :, :] / 2
    print(x1.shape)
    print(x2.shape)
    print(x3.shape)
    print(x4.shape)
    # h = torch.zeros([out_batch, out_channel, out_height, out_width]).float().cuda()
    h = torch.zeros([out_batch, out_channel, out_height, out_width]).float()

    h[:, :, 0::2, 0::2] = x1 - x2 - x3 + x4
    h[:, :, 1::2, 0::2] = x1 - x2 + x3 - x4
    h[:, :, 0::2, 1::2] = x1 + x2 - x3 - x4
    h[:, :, 1::2, 1::2] = x1 + x2 + x3 + x4

    return h


# 二维离散小波
class DWT(nn.Module):
    def __init__(self):
        super(DWT, self).__init__()
        self.requires_grad = False  # 信号处理，非卷积运算，不需要进行梯度求导

    def forward(self, x):
        return dwt_init(x)


# 逆向二维离散小波
class IWT(nn.Module):
    def __init__(self):
        super(IWT, self).__init__()
        self.requires_grad = False

    def forward(self, x):
        return iwt_init(x)

if __name__ == \'__main__\':
    import os, cv2, torchvision
    from PIL import Image
    import numpy as np
    from torchvision import transforms as trans
    # img = cv2.imread(\'./1.jpg\')
    # print(img.shape)
    # img = Image.fromarray(img.astype(np.uint8))
    img = Image.open(\'./1.jpg\')
    transform = trans.Compose([
        trans.ToTensor()
    ])
    img = transform(img).unsqueeze(0)
    dwt = DWT()
    change_img_tensor = dwt(img)
    print(change_img_tensor.shape)

    for i in range(change_img_tensor.size(1)//3):
        print(i*3,i*3+3)
        torchvision.utils.save_image(change_img_tensor[:,i*3:i*3+3:,:], os.path.join(\'./\', \'change_{}.jpg\'.format(i)))

只用第一种方法得到的数据为：

用第二种方法得到的数据是：

这是因为第一种方法读进来的是BGR格式，但是实际要的是下面这种RGB格式，正常情况下存储也是RGB格式

为什么深度学习中普遍用BRG描述图像，而非RGB通道？

因为caffe，作为最早最流行的一批库的代表，用了opencv，而opencv默认通道是bgr的。这是opencv的入门大坑之一，bgr是个历史遗留问题，为了兼容早年的某些硬件。其实你自己训练完全可以用rgb，新库也基本没了bgr还是rgb这个问题，就是切换下顺序。但如果你要用一些老的训练好的模型，就得兼容老模型的bgr。可以进行一下外部转换

比如我之前有处理过一个数据：

from pathlib import Path
from config import get_config
import argparse

from PIL import Image
from tqdm import tqdm
import mxnet as mx
import cv2
import numpy as np

def load_mx_rec(rec_path):
    save_path = rec_path/\'imgs\'
    if not save_path.exists():
        save_path.mkdir()
    imgrec = mx.recordio.MXIndexedRecordIO(str(rec_path/\'train.idx\'), str(rec_path/\'train.rec\'), \'r\')
    img_info = imgrec.read_idx(0)
    header,_ = mx.recordio.unpack(img_info)
    max_idx = int(header.label[0])
    for idx in tqdm(range(1,max_idx)):
        img_info = imgrec.read_idx(idx)
        header, img = mx.recordio.unpack_img(img_info)
        label = int(header.label)
        #add
        # img = cv2.cvtColor(img, cv2.COLOR_BGR2RGB)
        # img = cv2.cvtColor(img, cv2.COLOR_RGB2BGR)
        img = Image.fromarray(img)
        label_path = save_path/str(label)
        if not label_path.exists():
            label_path.mkdir()
        img.save(label_path/\'{}.jpg\'.format(idx), quality=95)

if __name__ == \'__main__\':
    parser = argparse.ArgumentParser(description=\'for face verification\')
    parser.add_argument("-r", "--rec_path", help="mxnet record file path",default=\'faces_emore\', type=str)
    args = parser.parse_args()
    conf = get_config()
    rec_path = conf.data_path/args.rec_path
    load_mx_rec(rec_path) #训练数据在train.idx和train.rec

如果这里我没有添加：

img = cv2.cvtColor(img, cv2.COLOR_RGB2BGR)

那么得到的图片颜色就很奇怪：

如果我添加了这一个处理后，得到的图像就正常了：

之前有错误写成：

img = cv2.cvtColor(img, cv2.COLOR_BGR2RGB)

但是发现效果是一样的

pytorch图像处理的问题

【机器学习】生成式对抗网络模型综述

VScode格式化文档换行或不换行的设置方法

【openwrt】设备mt7628关于wan侧eth0.1 mac地址固定的问题

logistic回归（线性和非线性）的开发笔记

YOLOv7基于自己的数据集从零构建模型完整训练、推理计算超详细教程

rhel5.5搭建网关+LAMP+postfix+dhcp的步骤和配置方法

Oracle中tnsnames.ora的作用和配置方法

推荐系统遇上深度学习(十七）详解推荐系统中的常用评测指标

python创建一个窗口_等一个大佬啊要求用python创建一个窗口，窗口按钮功能是创建一个球体或立方体。明天上课之前交给我...

C#制作Java+Mysql+Tomcat环境安装程序，一键式安装教程

web.py开发web 第八章 Formalchemy 服务端验证方法

CentOS安装Python2.7.2的步骤和注意事项

Swing组件及其用法，图标接口的定义和创建方法

VUE2.0+ElementUI2.0表格el-table循环动态列渲染的写法详解

Python张量流中的device spec make_merged_spec()方法使用说明