Use python realize warpPerspective with interpolation method bilinear

i try to use python realize warpPerspective with interpolation method bilinear
but got mse 0.12 with the cv2.warpPerspective

how can i get the same result , some one can help me ,i will be very grateful

def pective_transformation(img,warpMatrix,size,flag =0):
    height,weight,channel=img.shape
    #source=np.ones((3,1))
    cv2_invert = cv2.invert(warpMatrix)[1]
    print(cv2_invert)
    new_img=np.zeros((size[1],size[0],channel))#创建透视变换output图像(矩阵)
    M = warpMatrix.copy().reshape((1,-1))
    warpMatrix = np.mat(warpMatrix)
    warpMatrix = warpMatrix.reshape((3, 3))

    
    #print(M.shape)
    #print(M[0])
    D = M[0][0]*M[0][4] - M[0][1]*M[0][3]
    if( D != 0):
        D =1./D 
    A11 = M[0][4]*D ; A22 = M[0][0]*D
    M[0][0] = A11   ; M[0][1] *= -D
    M[0][3] *= -D   ; M[0][4] = A22
    b1 = -M[0][0]*M[0][2] - M[0][1]*M[0][5]
    b2 = -M[0][3]*M[0][2] - M[0][4]*M[0][5]
    M[0][2] = b1 ; M[0][5] = b2

    M = M.reshape((3,3))
    print(M)
    #print(warpMatrix.I)

    cv_m = np.mat([ [1.341463406454491, 0.513128077358975, -93.90243845181439],
                     [0, 1, 0],
                     [0, -0, 1]])
    
    XY = np.zeros((size[1],size[0],2))

    #Y = np.zeros((size[1],size[0]))
    
    for i in range(size[0]):#遍历新图像的列
        for j in range(size[1]):#遍历新图像的行
            #从新图像的左上角到右下角依次插值
            goal = [i,j,1]#目标点坐标(X,Y,Z)
            goal = np.mat(goal).astype(np.float64)
            goal = goal.reshape((3,1))
            #print(goal1)
            img_point = M * goal.astype(np.float64)#利用目标点反推原图像点坐标,得到的坐标为(x,y,z),shape=(3,1)   float64
            # print(img_point.dtype)  # float64

            #print("warpMatrix.I:",warpMatrix.I)
            img_point = img_point.tolist()
            #其中img_point[0][0]代表原图像x坐标
            # 其中img_point[1][0]代表原图像y坐标
            # 其中img_point[2][0]代表原图像z坐标
            #实际使用中z会无限趋近于1,但不会等于1,在这里相当于一个修正参数,修正x和y坐标

            if flag == 0 :

                x = int( round(img_point[0][0] / img_point[2][0] ) )#计算原图像x坐标
                y = int( round(img_point[1][0] / img_point[2][0] ))#计算原图像y坐标
                
                XY[j,i] = (img_point[0][0] / img_point[2][0],img_point[1][0] / img_point[2][0])
                
                # 用边界补齐
                '''
                if y>=img.shape[0]:#防止溢出原图像边界
                    y=img.shape[0]-1
                if x>=img.shape[1]:
                    x=img.shape[1]-1
                if y<0:
                    y=0
                if x<0:
                    x=0
                '''
                # 留黑
                
                if y>=img.shape[0]:#防止溢出原图像边界
                    continue
                if x>=img.shape[1]:
                    continue
                if y<0:
                    continue
                if x<0:
                    continue
                new_img[j,i] = img[y,x]

            if flag == 1:
                x = img_point[0][0] / img_point[2][0] #计算原图像x坐标
                y = img_point[1][0] / img_point[2][0] #计算原图像y坐标
                '''
                int_x = int(np.floor(x)) ; s_x = x - int_x                
                int_y = int(np.floor(y)) ; s_y = y - int_y
                '''
                data_x = round( (img_point[0][0] / img_point[2][0]) * (2**5) ) #计算原图像x坐标
                data_y = round( (img_point[1][0] / img_point[2][0]) * (2**5) )  #计算原图像y坐标
                
                s_x = float( ( data_x & ( 2**5 - 1 ) ) / 2**5 )
                #print(f_x)
                s_y = float( ( data_y & ( 2**5 - 1 ) ) / 2**5 )
                int_x =  int( round( data_x / 2**5 ) )                
                int_y =  int( round( data_y / 2**5 ) )
                
                #print(x,y,int_x,int_y,s_x,s_y)
                if int_y>=img.shape[0]:#防止溢出原图像边界
                    continue
                if int_x>=img.shape[1]:
                    continue
                if int_y+1<=0:
                    continue
                if int_x+1<=0:
                    continue
                
                int_x1 = cv2.borderInterpolate(int_x,size[0],cv2.BORDER_CONSTANT)
                int_x2 = cv2.borderInterpolate(int_x+1,size[0],cv2.BORDER_CONSTANT)
                int_y1 = cv2.borderInterpolate(int_y,size[1],cv2.BORDER_CONSTANT)
                int_y2 = cv2.borderInterpolate(int_y+1,size[1],cv2.BORDER_CONSTANT)


                '''
                if y >= img.shape[0]-1 :
                    if x> img.shape[1]-1:
                        new_img[j,i] =  (1.0-s_x)*(1.0-s_y)*img[int_y,int_x]
                    else:
                        new_img[j,i] =  (1.0-s_x)*(1.0-s_y)*img[int_y,int_x] + s_x*(1.0-s_y)*img[int_y,(int_x+1)] 
                    continue

                if x >= img.shape[1]-1:
                    if y > img.shape[0]-1 :
                        new_img[j,i] =  (1.0-s_x)*(1.0-s_y)*img[int_y,int_x]
                    else:
                        #print(int_x,int_y)
                        new_img[j,i] =  (1.0-s_x)*(1.0-s_y)*img[int_y,int_x] + (1.0-s_x)*s_y*img[(int_y+1),int_x] 
                    continue
                
                                    
                if y <= 0:                    
                    s_y = 1 - s_y
                    if x < 0:
                        s_x = 1 -s_x
                        new_img[j,i] = s_x*s_y*img[(int_y+1),(int_x+1)]
                    else:
                        new_img[j,i] = (1.0-s_x)*s_y*img[(int_y+1),int_x] + s_x*s_y*img[(int_y+1),(int_x+1)]
                    continue
                
                if x <= 0:         
                    if y < 0:
                        print(s_y,s_x)
                        s_y = 1 - s_y
                        new_img[j,i] = s_x*s_y*img[(int_y+1),(int_x+1)]
                    else:
                        
                        new_img[j,i] = s_x*(1.0-s_y)*img[int_y,(int_x+1)] + s_x*s_y*img[(int_y+1),(int_x+1)]
                        #print("xy < 0",s_y,s_x,new_img[j,i],i,j,int_x,int_y)
                    continue
                '''

                if int_x1 >=0  and int_y1>=0 :
                    v0 = img[int_y1,int_x1]
                else:
                    v0 = np.array([0,0,0])
                if int_x2 >=0  and int_y1>=0 :
                    v1 = img[int_y1,int_x2]
                else:
                    v1 = np.array([0,0,0])
                if int_x1 >=0  and int_y2>=0 :
                    v2 = img[int_y2,int_x1]
                else:
                    v2 = np.array([0,0,0])
                if int_x2 >=0  and int_y2>=0 :
                    v3 = img[int_y2,int_x2]
                else:
                    v3 = np.array([0,0,0])

                new_img[j,i] =  (1.0-s_x)*(1.0-s_y)*v0 +  \
                                (1.0-s_x)*s_y*v2 +    \
                                s_x*(1.0-s_y)*v1 +    \
                                s_x*s_y*v3

      #print(new_img.reshape(6,6))
          #print(new_img[0])
          ''''''
          for i in range(new_img.shape[0]):
              for j in range(new_img.shape[1]):
                  new_img[i][j][0] = int(round(new_img[i][j][0]) )
                  new_img[i][j][1] = int(round(new_img[i][j][1]) )
                  new_img[i][j][2] = int(round(new_img[i][j][2]) )
          #print(new_img[0])
          new_img = (new_img).astype(np.uint8)
          #print(new_img)
          #print(new)
          
          #new_img_1 = cv2.remap(img,XY.astype(np.float32),map2.astype(np.float32),interpolation = cv2.INTER_LINEAR)
          return new_img