首页 > 编程语言 >优化之前的ocr算法

优化之前的ocr算法

时间:2024-04-02 17:22:18浏览次数:25  
标签:512 nn conv True self 算法 ocr 优化 Conv2d

import torch.nn as nn
from collections import OrderedDict
 
class BidirectionalLSTM(nn.Module):
 
    def __init__(self, nIn, nHidden, nOut):
        super(BidirectionalLSTM, self).__init__()
 
        self.rnn = nn.LSTM(nIn, nHidden, bidirectional=True)
        self.embedding = nn.Linear(nHidden * 2, nOut)
 
    def forward(self, input):
        recurrent, _ = self.rnn(input)
        T, b, h = recurrent.size()
        t_rec = recurrent.view(T * b, h)
 
        output = self.embedding(t_rec)  # [T * b, nOut]
        output = output.view(T, b, -1)
        return output
 
 
class CRNN(nn.Module):
 
    def __init__(self, imgH, nc, nclass, nh, leakyRelu=False):
        super(CRNN, self).__init__()
        assert imgH % 16 == 0, 'imgH has to be a multiple of 16'
 
        # 1x32x128
        self.conv1 = nn.Conv2d(nc, 64, 3, 1, 1)
        self.relu1 = nn.ReLU(True)
        self.pool1 = nn.MaxPool2d(2, 2)
 
        # 64x16x64
        self.conv2 = nn.Conv2d(64, 128, 3, 1, 1)
        self.relu2 = nn.ReLU(True)
        self.pool2 = nn.MaxPool2d(2, 2)
 
        # 128x8x32
        self.conv3_1 = nn.Conv2d(128, 256, 3, 1, 1)
        self.bn3 = nn.BatchNorm2d(256)
        self.relu3_1 = nn.ReLU(True)
        self.conv3_2 = nn.Conv2d(256, 256, 3, 1, 1)
        self.relu3_2 = nn.ReLU(True)
        self.pool3 = nn.MaxPool2d((2, 2), (2, 1), (0, 1))
 
        # 256x4x16
        self.conv4_1 = nn.Conv2d(256, 512, 3, 1, 1)
        self.bn4 = nn.BatchNorm2d(512)
        self.relu4_1 = nn.ReLU(True)
        self.conv4_2 = nn.Conv2d(512, 512, 3, 1, 1)
        self.relu4_2 = nn.ReLU(True)
        self.pool4 = nn.MaxPool2d((2, 2), (2, 1), (0, 1))
 
        # 512x2x16
        self.conv5 = nn.Conv2d(512, 512, 2, 1, 0)
        self.bn5 = nn.BatchNorm2d(512)
        self.relu5 = nn.ReLU(True)
 
        # 512x1x16
 
        self.rnn = nn.Sequential(
            BidirectionalLSTM(512, nh, nh),
            BidirectionalLSTM(nh, nh, nclass))
 
 
    def forward(self, input):
        # conv features
        x = self.pool1(self.relu1(self.conv1(input)))
        x = self.pool2(self.relu2(self.conv2(x)))
        x = self.pool3(self.relu3_2(self.conv3_2(self.relu3_1(self.bn3(self.conv3_1(x))))))
        x = self.pool4(self.relu4_2(self.conv4_2(self.relu4_1(self.bn4(self.conv4_1(x))))))
        conv = self.relu5(self.bn5(self.conv5(x)))
        # print(conv.size())
 
        b, c, h, w = conv.size()
        assert h == 1, "the height of conv must be 1"
        conv = conv.squeeze(2)
        conv = conv.permute(2, 0, 1)  # [w, b, c]
 
        # rnn features
        output = self.rnn(conv)
 
        return output
 
 
class CRNN_v2(nn.Module):
 
    def __init__(self, imgH, nc, nclass, nh, leakyRelu=False):
        super(CRNN_v2, self).__init__()
        assert imgH % 16 == 0, 'imgH has to be a multiple of 16'
 
        # 1x32x128
        self.conv1_1 = nn.Conv2d(nc, 32, 3, 1, 1)
        self.bn1_1 = nn.BatchNorm2d(32)
        self.relu1_1 = nn.ReLU(True)
 
        self.conv1_2 = nn.Conv2d(32, 64, 3, 1, 1)
        self.bn1_2 = nn.BatchNorm2d(64)
        self.relu1_2 = nn.ReLU(True)
        self.pool1 = nn.MaxPool2d(2, 2)
 
        # 64x16x64
        self.conv2_1 = nn.Conv2d(64, 64, 3, 1, 1)
        self.bn2_1 = nn.BatchNorm2d(64)
        self.relu2_1 = nn.ReLU(True)
 
        self.conv2_2 = nn.Conv2d(64, 128, 3, 1, 1)
        self.bn2_2 = nn.BatchNorm2d(128)
        self.relu2_2 = nn.ReLU(True)
        self.pool2 = nn.MaxPool2d(2, 2)
 
        # 128x8x32
        self.conv3_1 = nn.Conv2d(128, 96, 3, 1, 1)
        self.bn3_1 = nn.BatchNorm2d(96)
        self.relu3_1 = nn.ReLU(True)
 
        self.conv3_2 = nn.Conv2d(96, 192, 3, 1, 1)
        self.bn3_2 = nn.BatchNorm2d(192)
        self.relu3_2 = nn.ReLU(True)
        self.pool3 = nn.MaxPool2d((2, 2), (2, 1), (0, 1))
 
        # 192x4x32
        self.conv4_1 = nn.Conv2d(192, 128, 3, 1, 1)
        self.bn4_1 = nn.BatchNorm2d(128)
        self.relu4_1 = nn.ReLU(True)
        self.conv4_2 = nn.Conv2d(128, 256, 3, 1, 1)
        self.bn4_2 = nn.BatchNorm2d(256)
        self.relu4_2 = nn.ReLU(True)
        self.pool4 = nn.MaxPool2d((2, 2), (2, 1), (0, 1))
 
        # 256x2x32
        self.bn5 = nn.BatchNorm2d(256)
 
 
        # 256x2x32
 
        self.rnn = nn.Sequential(
            BidirectionalLSTM(512, nh, nh),
            BidirectionalLSTM(nh, nh, nclass))
 
 
    def forward(self, input):
        # conv features
        x = self.pool1(self.relu1_2(self.bn1_2(self.conv1_2(self.relu1_1(self.bn1_1(self.conv1_1(input)))))))
        x = self.pool2(self.relu2_2(self.bn2_2(self.conv2_2(self.relu2_1(self.bn2_1(self.conv2_1(x)))))))
        x = self.pool3(self.relu3_2(self.bn3_2(self.conv3_2(self.relu3_1(self.bn3_1(self.conv3_1(x)))))))
        x = self.pool4(self.relu4_2(self.bn4_2(self.conv4_2(self.relu4_1(self.bn4_1(self.conv4_1(x)))))))
        conv = self.bn5(x)
        # print(conv.size())
 
        b, c, h, w = conv.size()
        assert h == 2, "the height of conv must be 2"
        conv = conv.reshape([b,c*h,w])
        conv = conv.permute(2, 0, 1)  # [w, b, c]
 
        # rnn features
        output = self.rnn(conv)
 
        return output
 
 
def conv3x3(nIn, nOut, stride=1):
    # "3x3 convolution with padding"
    return nn.Conv2d( nIn, nOut, kernel_size=3, stride=stride, padding=1, bias=False )
 
 
class basic_res_block(nn.Module):
 
    def __init__(self, nIn, nOut, stride=1, downsample=None):
        super( basic_res_block, self ).__init__()
        m = OrderedDict()
        m['conv1'] = conv3x3( nIn, nOut, stride )
        m['bn1'] = nn.BatchNorm2d( nOut )
        m['relu1'] = nn.ReLU( inplace=True )
        m['conv2'] = conv3x3( nOut, nOut )
        m['bn2'] = nn.BatchNorm2d( nOut )
        self.group1 = nn.Sequential( m )
 
        self.relu = nn.Sequential( nn.ReLU( inplace=True ) )
        self.downsample = downsample
 
    def forward(self, x):
        if self.downsample is not None:
            residual = self.downsample( x )
        else:
            residual = x
        out = self.group1( x ) + residual
        out = self.relu( out )
        return out
 
 
class CRNN_res(nn.Module):
 
    def __init__(self, imgH, nc, nclass, nh):
        super(CRNN_res, self).__init__()
        assert imgH % 16 == 0, 'imgH has to be a multiple of 16'
 
        self.conv1 = nn.Conv2d(nc, 64, 3, 1, 1)
        self.relu1 = nn.ReLU(True)
        self.res1 = basic_res_block(64, 64)
        # 1x32x128
 
        down1 = nn.Sequential(nn.Conv2d(64, 128, kernel_size=1, stride=2, bias=False),nn.BatchNorm2d(128))
        self.res2_1 = basic_res_block( 64, 128, 2, down1 )
        self.res2_2 = basic_res_block(128,128)
        # 64x16x64
 
        down2 = nn.Sequential(nn.Conv2d(128, 256, kernel_size=1, stride=2, bias=False),nn.BatchNorm2d(256))
        self.res3_1 = basic_res_block(128, 256, 2, down2)
        self.res3_2 = basic_res_block(256, 256)
        self.res3_3 = basic_res_block(256, 256)
        # 128x8x32
 
        down3 = nn.Sequential(nn.Conv2d(256, 512, kernel_size=1, stride=(2, 1), bias=False),nn.BatchNorm2d(512))
        self.res4_1 = basic_res_block(256, 512, (2, 1), down3)
        self.res4_2 = basic_res_block(512, 512)
        self.res4_3 = basic_res_block(512, 512)
        # 256x4x16
 
        self.pool = nn.AvgPool2d((2, 2), (2, 1), (0, 1))
        # 512x2x16
 
        self.conv5 = nn.Conv2d(512, 512, 2, 1, 0)
        self.bn5 = nn.BatchNorm2d(512)
        self.relu5 = nn.ReLU(True)
        # 512x1x16
 
        self.rnn = nn.Sequential(
            BidirectionalLSTM(512, nh, nh),
            BidirectionalLSTM(nh, nh, nclass))
 
    def forward(self, input):
        # conv features
        x = self.res1(self.relu1(self.conv1(input)))
        x = self.res2_2(self.res2_1(x))
        x = self.res3_3(self.res3_2(self.res3_1(x)))
        x = self.res4_3(self.res4_2(self.res4_1(x)))
        x = self.pool(x)
        conv = self.relu5(self.bn5(self.conv5(x)))
        # print(conv.size())
        b, c, h, w = conv.size()
        assert h == 1, "the height of conv must be 1"
        conv = conv.squeeze(2)
        conv = conv.permute(2, 0, 1)  # [w, b, c]
 
        # rnn features
        output = self.rnn(conv)
 
        return output
 
if __name__ == '__main__':
    pass

 

import torch.nn as nnfrom collections import OrderedDict class BidirectionalLSTM(nn.Module):     def __init__(self, nIn, nHidden, nOut):        super(BidirectionalLSTM, self).__init__()         self.rnn = nn.LSTM(nIn, nHidden, bidirectional=True)        self.embedding = nn.Linear(nHidden * 2, nOut)     def forward(self, input):        recurrent, _ = self.rnn(input)        T, b, h = recurrent.size()        t_rec = recurrent.view(T * b, h)         output = self.embedding(t_rec)  # [T * b, nOut]        output = output.view(T, b, -1)        return output  class CRNN(nn.Module):     def __init__(self, imgH, nc, nclass, nh, leakyRelu=False):        super(CRNN, self).__init__()        assert imgH % 16 == 0, 'imgH has to be a multiple of 16'         # 1x32x128        self.conv1 = nn.Conv2d(nc, 64, 3, 1, 1)        self.relu1 = nn.ReLU(True)        self.pool1 = nn.MaxPool2d(2, 2)         # 64x16x64        self.conv2 = nn.Conv2d(64, 128, 3, 1, 1)        self.relu2 = nn.ReLU(True)        self.pool2 = nn.MaxPool2d(2, 2)         # 128x8x32        self.conv3_1 = nn.Conv2d(128, 256, 3, 1, 1)        self.bn3 = nn.BatchNorm2d(256)        self.relu3_1 = nn.ReLU(True)        self.conv3_2 = nn.Conv2d(256, 256, 3, 1, 1)        self.relu3_2 = nn.ReLU(True)        self.pool3 = nn.MaxPool2d((2, 2), (2, 1), (0, 1))         # 256x4x16        self.conv4_1 = nn.Conv2d(256, 512, 3, 1, 1)        self.bn4 = nn.BatchNorm2d(512)        self.relu4_1 = nn.ReLU(True)        self.conv4_2 = nn.Conv2d(512, 512, 3, 1, 1)        self.relu4_2 = nn.ReLU(True)        self.pool4 = nn.MaxPool2d((2, 2), (2, 1), (0, 1))         # 512x2x16        self.conv5 = nn.Conv2d(512, 512, 2, 1, 0)        self.bn5 = nn.BatchNorm2d(512)        self.relu5 = nn.ReLU(True)         # 512x1x16         self.rnn = nn.Sequential(            BidirectionalLSTM(512, nh, nh),            BidirectionalLSTM(nh, nh, nclass))      def forward(self, input):        # conv features        x = self.pool1(self.relu1(self.conv1(input)))        x = self.pool2(self.relu2(self.conv2(x)))        x = self.pool3(self.relu3_2(self.conv3_2(self.relu3_1(self.bn3(self.conv3_1(x))))))        x = self.pool4(self.relu4_2(self.conv4_2(self.relu4_1(self.bn4(self.conv4_1(x))))))        conv = self.relu5(self.bn5(self.conv5(x)))        # print(conv.size())         b, c, h, w = conv.size()        assert h == 1, "the height of conv must be 1"        conv = conv.squeeze(2)        conv = conv.permute(2, 0, 1)  # [w, b, c]         # rnn features        output = self.rnn(conv)         return output  class CRNN_v2(nn.Module):     def __init__(self, imgH, nc, nclass, nh, leakyRelu=False):        super(CRNN_v2, self).__init__()        assert imgH % 16 == 0, 'imgH has to be a multiple of 16'         # 1x32x128        self.conv1_1 = nn.Conv2d(nc, 32, 3, 1, 1)        self.bn1_1 = nn.BatchNorm2d(32)        self.relu1_1 = nn.ReLU(True)         self.conv1_2 = nn.Conv2d(32, 64, 3, 1, 1)        self.bn1_2 = nn.BatchNorm2d(64)        self.relu1_2 = nn.ReLU(True)        self.pool1 = nn.MaxPool2d(2, 2)         # 64x16x64        self.conv2_1 = nn.Conv2d(64, 64, 3, 1, 1)        self.bn2_1 = nn.BatchNorm2d(64)        self.relu2_1 = nn.ReLU(True)         self.conv2_2 = nn.Conv2d(64, 128, 3, 1, 1)        self.bn2_2 = nn.BatchNorm2d(128)        self.relu2_2 = nn.ReLU(True)        self.pool2 = nn.MaxPool2d(2, 2)         # 128x8x32        self.conv3_1 = nn.Conv2d(128, 96, 3, 1, 1)        self.bn3_1 = nn.BatchNorm2d(96)        self.relu3_1 = nn.ReLU(True)         self.conv3_2 = nn.Conv2d(96, 192, 3, 1, 1)        self.bn3_2 = nn.BatchNorm2d(192)        self.relu3_2 = nn.ReLU(True)        self.pool3 = nn.MaxPool2d((2, 2), (2, 1), (0, 1))         # 192x4x32        self.conv4_1 = nn.Conv2d(192, 128, 3, 1, 1)        self.bn4_1 = nn.BatchNorm2d(128)        self.relu4_1 = nn.ReLU(True)        self.conv4_2 = nn.Conv2d(128, 256, 3, 1, 1)        self.bn4_2 = nn.BatchNorm2d(256)        self.relu4_2 = nn.ReLU(True)        self.pool4 = nn.MaxPool2d((2, 2), (2, 1), (0, 1))         # 256x2x32        self.bn5 = nn.BatchNorm2d(256)          # 256x2x32         self.rnn = nn.Sequential(            BidirectionalLSTM(512, nh, nh),            BidirectionalLSTM(nh, nh, nclass))      def forward(self, input):        # conv features        x = self.pool1(self.relu1_2(self.bn1_2(self.conv1_2(self.relu1_1(self.bn1_1(self.conv1_1(input)))))))        x = self.pool2(self.relu2_2(self.bn2_2(self.conv2_2(self.relu2_1(self.bn2_1(self.conv2_1(x)))))))        x = self.pool3(self.relu3_2(self.bn3_2(self.conv3_2(self.relu3_1(self.bn3_1(self.conv3_1(x)))))))        x = self.pool4(self.relu4_2(self.bn4_2(self.conv4_2(self.relu4_1(self.bn4_1(self.conv4_1(x)))))))        conv = self.bn5(x)        # print(conv.size())         b, c, h, w = conv.size()        assert h == 2, "the height of conv must be 2"        conv = conv.reshape([b,c*h,w])        conv = conv.permute(2, 0, 1)  # [w, b, c]         # rnn features        output = self.rnn(conv)         return output  def conv3x3(nIn, nOut, stride=1):    # "3x3 convolution with padding"    return nn.Conv2d( nIn, nOut, kernel_size=3, stride=stride, padding=1, bias=False )  class basic_res_block(nn.Module):     def __init__(self, nIn, nOut, stride=1, downsample=None):        super( basic_res_block, self ).__init__()        m = OrderedDict()        m['conv1'] = conv3x3( nIn, nOut, stride )        m['bn1'] = nn.BatchNorm2d( nOut )        m['relu1'] = nn.ReLU( inplace=True )        m['conv2'] = conv3x3( nOut, nOut )        m['bn2'] = nn.BatchNorm2d( nOut )        self.group1 = nn.Sequential( m )         self.relu = nn.Sequential( nn.ReLU( inplace=True ) )        self.downsample = downsample     def forward(self, x):        if self.downsample is not None:            residual = self.downsample( x )        else:            residual = x        out = self.group1( x ) + residual        out = self.relu( out )        return out  class CRNN_res(nn.Module):     def __init__(self, imgH, nc, nclass, nh):        super(CRNN_res, self).__init__()        assert imgH % 16 == 0, 'imgH has to be a multiple of 16'         self.conv1 = nn.Conv2d(nc, 64, 3, 1, 1)        self.relu1 = nn.ReLU(True)        self.res1 = basic_res_block(64, 64)        # 1x32x128         down1 = nn.Sequential(nn.Conv2d(64, 128, kernel_size=1, stride=2, bias=False),nn.BatchNorm2d(128))        self.res2_1 = basic_res_block( 64, 128, 2, down1 )        self.res2_2 = basic_res_block(128,128)        # 64x16x64         down2 = nn.Sequential(nn.Conv2d(128, 256, kernel_size=1, stride=2, bias=False),nn.BatchNorm2d(256))        self.res3_1 = basic_res_block(128, 256, 2, down2)        self.res3_2 = basic_res_block(256, 256)        self.res3_3 = basic_res_block(256, 256)        # 128x8x32         down3 = nn.Sequential(nn.Conv2d(256, 512, kernel_size=1, stride=(2, 1), bias=False),nn.BatchNorm2d(512))        self.res4_1 = basic_res_block(256, 512, (2, 1), down3)        self.res4_2 = basic_res_block(512, 512)        self.res4_3 = basic_res_block(512, 512)        # 256x4x16         self.pool = nn.AvgPool2d((2, 2), (2, 1), (0, 1))        # 512x2x16         self.conv5 = nn.Conv2d(512, 512, 2, 1, 0)        self.bn5 = nn.BatchNorm2d(512)        self.relu5 = nn.ReLU(True)        # 512x1x16         self.rnn = nn.Sequential(            BidirectionalLSTM(512, nh, nh),            BidirectionalLSTM(nh, nh, nclass))     def forward(self, input):        # conv features        x = self.res1(self.relu1(self.conv1(input)))        x = self.res2_2(self.res2_1(x))        x = self.res3_3(self.res3_2(self.res3_1(x)))        x = self.res4_3(self.res4_2(self.res4_1(x)))        x = self.pool(x)        conv = self.relu5(self.bn5(self.conv5(x)))        # print(conv.size())        b, c, h, w = conv.size()        assert h == 1, "the height of conv must be 1"        conv = conv.squeeze(2)        conv = conv.permute(2, 0, 1)  # [w, b, c]         # rnn features        output = self.rnn(conv)         return output if __name__ == '__main__':    pass

标签:512,nn,conv,True,self,算法,ocr,优化,Conv2d
From: https://www.cnblogs.com/wajava/p/18111090

相关文章

  • Linux 性能优化
    1、性能优化概述1)性能分析六个步骤选择指标评估应用程序和系统的性能为应用程序和系统设置性能目标进行性能基准测试性能分析定位瓶颈优化系统和应用程序性能监控和告警3、性能优化方法论1)评估优化效果应用程序维度,吞吐量,请求延时系统资源维度,CPU使用率2)注意点......
  • 14天【代码随想录算法训练营34期】 第六章 二叉树part01(● 理论基础 ● 递归遍历 ●
    理论基础种类满二叉树:k是深度,node数为2^k-1完全二叉树:二叉树底部是从左向右持续的二叉搜索树:左边节点都小于中间节点,右边节点都大于中间节点平衡二叉树AVL:左边和右边高度相差不超过1存储方式链式存储:leftchildptr,rightchildptr线式存储:字符数组保存,2i+1是左孩......
  • 使用支持向量机算法解决手写体识别问题
    文章目录支持向量机导入图片测试算法fromgoogle.colabimportdrivedrive.mount("/content/drive")Drivealreadymountedat/content/drive;toattempttoforciblyremount,calldrive.mount("/content/drive",force_remount=True).支持向量机fromnumpy......
  • 常见的排序算法——选择排序
    本文记述了选择排序的基本思想和一份参考实现代码,并在说明了算法的性能后用实验进行了验证。◆思想将第一个元素开始的所有元素作为待排序范围,通过一一比较,查找待排序范围内的最小元素,将其与范围内的第一个元素交换。然后将从第二个元素开始的所有元素作为新的待排序范围。重复......
  • 全文搜索算法问题
    l 问题描述:用关键词在数据条目列表中搜索相关条目列表,并列出匹配字列表。算法如下:将搜索关键词以字为单位分词,在数据中搜索相关条目,搜索出的条目排序规则:1) 包含字最多的条目排在前面;同一字多次匹配只计数一次2) 如果包含字数相同,条目短的排在前面3) 如果包含字数相同且......
  • DFS算法
    DFS,即深度优先搜索(Depth-FirstSearch),是一种用于遍历或搜索树或图的算法。它从起始节点开始,沿着树的深度尽可能远的路径探索,直到达到最深的节点,然后回溯到上一个节点,继续探索其他分支。DFS通常使用递归或栈来实现。以下是一些常见的DFS算法和应用:二叉树的DFS:在二叉树中,DF......
  • 优化Zabbix系统实现性能提升(详细操作指引)
    一、数据库优化1.将MySQL数据库升级到最新版本操作步骤:查看当前版本:mysql-V升级到最新版本:yuminstallmysql-server-y2.调整MySQL数据库参数以下是一些重要的参数及其建议值:innodb_buffer_pool_size:用于存储InnoDB引擎的数据缓存,建议设置为系统内存的......
  • 程序员常用的几种算法
    1.排序算法:•冒泡排序(BubbleSort)•选择排序(SelectionSort)•插入排序(InsertionSort)•快速排序(QuickSort)•归并排序(MergeSort)•堆排序(HeapSort)•计数排序(CountingSort)、桶排序(BucketSort)等2.查找算法:•线性搜索(LinearSearch)•......
  • 机器学习实践篇第二篇-KNN算法学习
    一.了解什么是K-NN算法  1.KNN算法原理KNN(K-NearestNeighbor)算法是机器学习算法中最基础、最简单的算法之一。它既能用于分类,也能用于回归。KNN通过测量不同特征值之间的距离来进行分类。KNN算法的思想非常简单:对于任意n维输入向量,分别对应于特征空间中的一个点,输出为......
  • k-均值聚类算法 Primary
    目录案例——区分好坏苹果(有Key)案例——自动聚类(无Key)k-均值聚类算法(英文:k-meansclustering)定义:k-均值聚类算法的目的是:把n个点(可以是样本的一次观察或一个实例)划分到k个聚类中,使得每个点都属于离他最近的均值(此即聚类中心)对应的聚类,以之作为聚类的标准。案例——区分好坏苹......