初步完成png图片识别

main
amuliang 2023-10-03 01:25:40 +08:00
parent d1e0061c14
commit 6ebd0c35ef
1 changed files with 417 additions and 0 deletions

View File

@ -0,0 +1,417 @@
import binascii
import json
import os
import random
import re
import sys
import time
from PIL import Image
class PNG:
def __init__(self):
pass
# 检测文件格式,返回百分比
@staticmethod
def test(data: bytearray):
return 100
# 获取图片信息
@staticmethod
def get_info(data: bytearray):
text = b'tEXt'
index = data.find(text)
if index > 32:
length = int.from_bytes(data[index-4: index], byteorder='big')
chunk_data = data[index+4: index+4+length]
print('[*] 图像信息:')
print(bytes(chunk_data).split(b'\x00'))
info = chunk_data
return info
def check_tail(data: bytearray):
changed = False
tail = None
iend = b'\x49\x45\x4E\x44\xAE\x42\x60\x82'
length = len(data)
index = data.find(iend)
if index == -1:
index = data.find(iend[0:4])
if index != -1:
index = index + 4
else:
index = index + 8
if index == -1:
print('[x] 缺少png文件格式结束标识符IEND')
elif index < 32:
print('[x] 缺少png文件格式结束标识符IEND位置貌似不太对')
else:
tail = data[index:]
if len(tail) > 0:
changed = True
print('[x] 检测到文件尾部有垃圾数据,已尝试修复')
return changed, data, tail
# 修复文件头
@staticmethod
def repair_sig(data: bytearray):
changed = False
head = None
print('[*] 检测文件头')
sig = b'\x89\x50\x4E\x47\x0D\x0A\x1A\x0A'
index = data.find(sig)
if index > 0:
print('[x] 检测到文件头前有垃圾数据,已尝试修复')
head = bytes(data[:index])
data = data[index:]
changed = True
elif data[0:8] != sig:
print('[x] 检测到文件头异常,已尝试修复')
data[0:8] = sig
changed = True
return changed, data, head
# 修复尺寸
@staticmethod
def repair_size(data: bytearray):
changed = False
print('[*] 检测文件尺寸')
chunk_begin = 8
data_begin = chunk_begin + 8
width_begin = data_begin
height_begin = data_begin + 4
length = int.from_bytes(data[chunk_begin:chunk_begin+4], byteorder='big')
ctype = data[chunk_begin+4: chunk_begin+8] # 应该为 b'\x49\x48\x44\x52',即 b'IHDR'
chunk_data = data[data_begin: data_begin+length]
width = int.from_bytes(chunk_data[0:4], byteorder='big')
height = int.from_bytes(chunk_data[4:8], byteorder='big')
crc = int.from_bytes(data[data_begin+length:data_begin+length+4], byteorder='big')
if PNG.test_ihdr_chunk(ctype + chunk_data, crc):
print(' - 图片尺寸设置正确')
else:
b = False
if not b:
print('[x] 尝试修复图片宽度...')
chunk = ctype + chunk_data
for i in range(1, 2048):
chunk[4:8] = i.to_bytes(4, byteorder='big')
if PNG.test_ihdr_chunk(bytes(chunk), crc):
width = i
print(f'[*] 图片宽度修复成功,宽度值为{i}')
data[width_begin: width_begin+4] = width.to_bytes(4, byteorder='big')
changed = True
b = True
break
if not b:
print('[x] 尝试修复图片高度...')
chunk = ctype + chunk_data
for i in range(1, 2048):
chunk[8:12] = i.to_bytes(4, byteorder='big')
if PNG.test_ihdr_chunk(chunk, crc):
height = i
print(f'[*] 图片高度修复成功,宽度值为{i}')
data[height_begin: height_begin+4] = width.to_bytes(4, byteorder='big')
changed = True
b = True
break
if not b:
temp = input(f'当前宽度值为{width},高度值为{height},是否尝试将高度值设置为更高?可输入高度值或直接回车:')
if temp.isdigit():
height = int(temp)
data[height_begin: height_begin+4] = height.to_bytes(4, byteorder='big')
print(f'[*] 已将图片高度设置为{height}')
changed = True
return changed, data, width, height
@staticmethod
def test_ihdr_chunk(chunk_data, crc):
crc32 = binascii.crc32(chunk_data) & 0xffffffff
return crc32 == crc
class JPG:
def __init__(self):
pass
# 检测文件格式,返回百分比
@staticmethod
def test(data: bytearray):
return 100
# 获取图片信息
@staticmethod
def get_info(data: bytearray):
info = {}
return info
def check_tail(data: bytearray):
changed = False
tail = None
return changed, data, tail
# 修复文件头
@staticmethod
def repair_sig(data: bytearray):
changed = False
head = None
print('[*] 检测文件头')
sig = b'\x89\x50\x4E\x47\x0D\x0A\x1A\x0A'
index = data.find(sig)
if index != -1:
print('[x] 检测到文件头前有垃圾数据,已尝试修复')
head = data[:index]
data = data[index:]
changed = True
elif data[0:8] != sig:
print('[x] 检测到文件头异常,已尝试修复')
data[0:8] = sig
changed = True
return changed, data, head
# 修复尺寸
@staticmethod
def repair_size(data: bytearray):
changed = False
print('[*] 检测文件尺寸')
chunk_begin = 8
width_begin = chunk_begin + 8
height_begin = chunk_begin + 12
length = int.from_bytes(data[chunk_begin:chunk_begin+4], byteorder='big')
chunk_data = data[chunk_begin+4: chunk_begin+4+length]
ctype = chunk_data[0:4] # 应该为 b'\x49\x48\x44\x52',即 b'IHDR'
width = int.from_bytes(chunk_data[4:8], byteorder='big')
height = int.from_bytes(chunk_data[8:12], byteorder='big')
crc = chunk_data[-4:]
if JPG.test_ihdr_chunk(chunk_data[:-4], crc):
print(' - 图片尺寸设置正确')
else:
if width == 0:
print('[x] 检测到图片宽度设置异常,尝试修复...')
for i in range(1, 2048):
chunk_data[4:8] = i.to_bytes(4, byteorder='big')
if JPG.test_ihdr_chunk(chunk_data[:-4], crc):
width = i
print(f'[*] 图片宽度修复成功,宽度值为{i}')
data[width_begin: width_begin+4] = width.to_bytes(4, byteorder='big')
changed = True
break
if width == 0:
print('[x] 图片宽度修复失败')
temp = input(f'当前宽度值为{width},高度值为{height},是否尝试将高度值设置为更高?可输入高度值或直接回车:')
if temp.isdigit():
height = int(temp)
data[height_begin: height_begin+4] = height.to_bytes(4, byteorder='big')
print(f'[*] 已将图片高度设置为{height}')
changed = True
return changed, data
@staticmethod
def test_ihdr_chunk(chunk_data, crc):
crc32 = binascii.crc32(chunk_data) & 0xffffffff
return crc32 == crc
class GIF:
def __init__(self):
pass
# 检测文件格式,返回百分比
@staticmethod
def test(data: bytearray):
return 100
# 获取图片信息
@staticmethod
def get_info(data: bytearray):
info = {}
return info
def check_tail(data: bytearray):
changed = False
tail = None
return changed, data, tail
# 修复文件头
@staticmethod
def repair_sig(data: bytearray):
changed = False
head = None
print('[*] 检测文件头')
sig = b'\x89\x50\x4E\x47\x0D\x0A\x1A\x0A'
index = data.find(sig)
if index != -1:
print('[x] 检测到文件头前有垃圾数据,已尝试修复')
head = data[:index]
data = data[index:]
changed = True
elif data[0:8] != sig:
print('[x] 检测到文件头异常,已尝试修复')
data[0:8] = sig
changed = True
return changed, data, head
# 修复尺寸
@staticmethod
def repair_size(data: bytearray):
changed = False
print('[*] 检测文件尺寸')
chunk_begin = 8
width_begin = chunk_begin + 8
height_begin = chunk_begin + 12
length = int.from_bytes(data[chunk_begin:chunk_begin+4], byteorder='big')
chunk_data = data[chunk_begin+4: chunk_begin+4+length]
ctype = chunk_data[0:4] # 应该为 b'\x49\x48\x44\x52',即 b'IHDR'
width = int.from_bytes(chunk_data[4:8], byteorder='big')
height = int.from_bytes(chunk_data[8:12], byteorder='big')
crc = chunk_data[-4:]
if GIF.test_ihdr_chunk(chunk_data[:-4], crc):
print(' - 图片尺寸设置正确')
else:
if width == 0:
print('[x] 检测到图片宽度设置异常,尝试修复...')
for i in range(1, 2048):
chunk_data[4:8] = i.to_bytes(4, byteorder='big')
if GIF.test_ihdr_chunk(chunk_data[:-4], crc):
width = i
print(f'[*] 图片宽度修复成功,宽度值为{i}')
data[width_begin: width_begin+4] = width.to_bytes(4, byteorder='big')
changed = True
break
if width == 0:
print('[x] 图片宽度修复失败')
temp = input(f'当前宽度值为{width},高度值为{height},是否尝试将高度值设置为更高?可输入高度值或直接回车:')
if temp.isdigit():
height = int(temp)
data[height_begin: height_begin+4] = height.to_bytes(4, byteorder='big')
print(f'[*] 已将图片高度设置为{height}')
changed = True
return changed, data
@staticmethod
def test_ihdr_chunk(chunk_data, crc):
crc32 = binascii.crc32(chunk_data) & 0xffffffff
return crc32 == crc
def check_image_type(filename: str, data: bytearray):
file_type1 = None
if re.match('.*\.png$', filename.lower()):
file_type1 = 'png'
elif re.match('.*\.jpg$', filename.lower()):
file_type1 = 'jpg'
elif re.match('.*\.gif$', filename.lower()):
file_type1 = 'gif'
file_type2 = None
weight = 0
weight2 = PNG.test(data)
if weight2 > weight:
file_type2 = 'png'
weight = weight2
weight2 = JPG.test(data)
if weight2 > weight:
file_type2 = 'jpg'
weight = weight2
weight2 = GIF.test(data)
if weight2 > weight:
file_type2 = 'gif'
weight = weight2
if file_type1 != None:
if file_type1 == file_type2:
print(f'看起来文件格式是{file_type1}')
else:
print(f'文件名中格式是{file_type1},但是解析文件头格式发现{weight}%是{file_type2}')
else:
if file_type2 != None:
file_type1 = file_type2
print(f'检测文件头认为文件格式{weight}%是{file_type2}')
else:
print('未检测到任何图片格式特征,该文件可能不是图片文件')
index = input('要指定文件格式吗?或者直接回车。(1、png 2、jpg 3、gif)')
if index.isdigit():
index = int(index)
if index == 1:
file_type1 = 'png'
elif index == 2:
file_type1 = 'jpg'
elif index == 3:
file_type1 = 'gif'
return file_type1
# 总函数
def main(filename: str):
# 读取图片数据
data = None
with open(filename, 'rb') as f:
data = bytearray(f.read())
# 确定文件格式
file_type = check_image_type(filename, data)
model = None
if file_type == 'png':
model = PNG
elif file_type == 'jpg':
model = JPG
elif file_type == 'gif':
model = GIF
# 检测并修复
changed = False
if model != None:
info = model.get_info(data)
# print(json.dumps(info))
changed1, data, tail = model.check_tail(data)
changed2, data, head = model.repair_sig(data)
changed3, data, width, height = model.repair_size(data)
changed = changed1 or changed2 or changed3
else:
print('[*] 未进行任何处理')
if changed:
# 创建结果目录
basename = os.path.basename(filename)
dirname = filename.replace('.', '_') + str(int(time.time()))
dirpath = os.path.join(os.path.dirname(os.path.abspath(filename)), dirname)
os.mkdir(dirpath)
# 保存解析结果
imagepath = os.path.join(dirpath, basename)
f = open(imagepath, 'wb')
f.write(data)
f.close()
if changed1 and tail:
f = open(os.path.join(dirpath, 'tail'), 'wb')
f.write(tail)
f.close()
if changed2 and head:
f = open(os.path.join(dirpath, 'head'), 'wb')
f.write(head)
f.close()
print(f'[*] 分析结果已保存到 {dirpath}')
# 尝试打开图片看一看
try:
im = Image.open(imagepath)
im.show()
except Exception as e:
print('[x] 尝试打开图片失败,失败原因:' + str(e))
else:
print('[*] 未进行任何处理')
print('[*] 分析完成!')
# 开始执行
filename = sys.argv[1]
# filename = 'G:\\1.png'
main(filename)