OpenCV 介绍和使用
OpenCV (Open Source Computer Vision Library) 是一个开源的计算机视觉和机器学习软件库。本教程将带你从基础到高级全面学习 OpenCV 的 Python 接口。
目录
- 安装与环境配置
- 图像基础操作
- 图像处理技术
- 特征检测与描述
- 视频处理
- 对象检测
- 深度学习与OpenCV
- 实战项目
安装与环境配置
安装OpenCV
# 安装OpenCV主模块
pip install opencv-python
# 安装包含额外模块的版本(如SIFT等)
pip install opencv-contrib-python
验证安装
import cv2
print(cv2.__version__) # 打印OpenCV版本
图像基础操作
读取和显示图像
import cv2
# 读取图像
img = cv2.imread('image.jpg') # 参数:文件路径, 标志(cv2.IMREAD_COLOR/GRAYSCALE/UNCHANGED)
# 显示图像
cv2.imshow('Image Window', img)
cv2.waitKey(0) # 等待任意按键
cv2.destroyAllWindows() # 关闭所有窗口
# 保存图像
cv2.imwrite('output.jpg', img)
图像属性
print("图像形状:", img.shape) # (高度, 宽度, 通道数)
print("图像大小:", img.size) # 总像素数
print("数据类型:", img.dtype) # 数据类型
颜色空间转换
# BGR转灰度图
gray = cv2.cvtColor(img, cv2.COLOR_BGR2GRAY)
# BGR转HSV
hsv = cv2.cvtColor(img, cv2.COLOR_BGR2HSV)
# 显示转换结果
cv2.imshow('Gray Image', gray)
cv2.imshow('HSV Image', hsv)
cv2.waitKey(0)
图像缩放与裁剪
# 缩放
resized = cv2.resize(img, (new_width, new_height)) # 指定新尺寸
resized = cv2.resize(img, None, fx=0.5, fy=0.5) # 按比例缩放
# 裁剪
cropped = img[y1:y2, x1:x2] # y1,y2是高度范围,x1,x2是宽度范围
图像处理技术
图像阈值处理
# 简单阈值
ret, thresh1 = cv2.threshold(gray, 127, 255, cv2.THRESH_BINARY)
# 自适应阈值
thresh2 = cv2.adaptiveThreshold(gray, 255, cv2.ADAPTIVE_THRESH_MEAN_C,
cv2.THRESH_BINARY, 11, 2)
# Otsu's二值化
ret, thresh3 = cv2.threshold(gray, 0, 255, cv2.THRESH_BINARY + cv2.THRESH_OTSU)
图像滤波
# 高斯模糊
blur = cv2.GaussianBlur(img, (5,5), 0)
# 中值滤波
median = cv2.medianBlur(img, 5)
# 双边滤波
bilateral = cv2.bilateralFilter(img, 9, 75, 75)
边缘检测
# Canny边缘检测
edges = cv2.Canny(img, 100, 200) # 参数:低阈值,高阈值
# Sobel算子
sobelx = cv2.Sobel(gray, cv2.CV_64F, 1, 0, ksize=5)
sobely = cv2.Sobel(gray, cv2.CV_64F, 0, 1, ksize=5)
形态学操作
kernel = np.ones((5,5), np.uint8)
# 腐蚀
erosion = cv2.erode(img, kernel, iterations=1)
# 膨胀
dilation = cv2.dilate(img, kernel, iterations=1)
# 开运算(先腐蚀后膨胀)
opening = cv2.morphologyEx(img, cv2.MORPH_OPEN, kernel)
# 闭运算(先膨胀后腐蚀)
closing = cv2.morphologyEx(img, cv2.MORPH_CLOSE, kernel)
特征检测与描述
Harris角点检测
gray = np.float32(gray)
dst = cv2.cornerHarris(gray, 2, 3, 0.04)
dst = cv2.dilate(dst, None)
img[dst > 0.01 * dst.max()] = [0,0,255] # 标记角点
SIFT特征检测
sift = cv2.SIFT_create()
keypoints, descriptors = sift.detectAndCompute(gray, None)
# 绘制关键点
img_sift = cv2.drawKeypoints(img, keypoints, None)
cv2.imshow('SIFT Features', img_sift)
cv2.waitKey(0)
SURF特征检测
surf = cv2.xfeatures2d.SURF_create(400)
keypoints, descriptors = surf.detectAndCompute(img, None)
# 绘制关键点
img_surf = cv2.drawKeypoints(img, keypoints, None, (255,0,0), 4)
cv2.imshow('SURF Features', img_surf)
cv2.waitKey(0)
ORB特征检测
orb = cv2.ORB_create()
keypoints, descriptors = orb.detectAndCompute(img, None)
# 绘制关键点
img_orb = cv2.drawKeypoints(img, keypoints, None, color=(0,255,0), flags=0)
cv2.imshow('ORB Features', img_orb)
cv2.waitKey(0)
特征匹配
# 创建匹配器
bf = cv2.BFMatcher(cv2.NORM_HAMMING, crossCheck=True)
# 匹配描述子
matches = bf.match(des1, des2)
# 按距离排序
matches = sorted(matches, key=lambda x:x.distance)
# 绘制匹配结果
img_matches = cv2.drawMatches(img1, kp1, img2, kp2, matches[:10], None, flags=2)
cv2.imshow('Feature Matches', img_matches)
cv2.waitKey(0)
视频处理
读取和显示视频
cap = cv2.VideoCapture('video.mp4') # 也可以传入0表示摄像头
while cap.isOpened():
ret, frame = cap.read()
if not ret:
break
# 处理帧
gray = cv2.cvtColor(frame, cv2.COLOR_BGR2GRAY)
cv2.imshow('Video', gray)
if cv2.waitKey(25) & 0xFF == ord('q'):
break
cap.release()
cv2.destroyAllWindows()
保存视频
fourcc = cv2.VideoWriter_fourcc(*'XVID')
out = cv2.VideoWriter('output.avi', fourcc, 20.0, (640,480))
while cap.isOpened():
ret, frame = cap.read()
if not ret:
break
# 处理帧
out.write(frame)
cv2.imshow('frame', frame)
if cv2.waitKey(1) & 0xFF == ord('q'):
break
cap.release()
out.release()
cv2.destroyAllWindows()
背景减除
# 创建背景减除器
fgbg = cv2.createBackgroundSubtractorMOG2()
while True:
ret, frame = cap.read()
if not ret:
break
# 应用背景减除
fgmask = fgbg.apply(frame)
cv2.imshow('Original', frame)
cv2.imshow('Foreground', fgmask)
if cv2.waitKey(30) & 0xFF == ord('q'):
break
对象检测
人脸检测
# 加载预训练的人脸检测模型
face_cascade = cv2.CascadeClassifier(cv2.data.haarcascades + 'haarcascade_frontalface_default.xml')
# 检测人脸
faces = face_cascade.detectMultiScale(gray, scaleFactor=1.1, minNeighbors=5)
# 绘制矩形框
for (x,y,w,h) in faces:
cv2.rectangle(img, (x,y), (x+w,y+h), (255,0,0), 2)
cv2.imshow('Face Detection', img)
cv2.waitKey(0)
行人检测
# 加载HOG描述子和SVM分类器
hog = cv2.HOGDescriptor()
hog.setSVMDetector(cv2.HOGDescriptor_getDefaultPeopleDetector())
# 检测行人
boxes, weights = hog.detectMultiScale(img, winStride=(4,4), padding=(8,8), scale=1.05)
# 绘制检测结果
for (x,y,w,h) in boxes:
cv2.rectangle(img, (x,y), (x+w,y+h), (0,255,0), 2)
cv2.imshow('Pedestrian Detection', img)
cv2.waitKey(0)
使用深度学习模型进行对象检测
# 加载预训练的MobileNet SSD模型
net = cv2.dnn.readNetFromCaffe('deploy.prototxt', 'mobilenet_iter_73000.caffemodel')
classes = ["background", "aeroplane", "bicycle", "bird", "boat",
"bottle", "bus", "car", "cat", "chair", "cow", "diningtable",
"dog", "horse", "motorbike", "person", "pottedplant", "sheep",
"sofa", "train", "tvmonitor"]
# 准备输入图像
blob = cv2.dnn.blobFromImage(cv2.resize(img, (300,300)), 0.007843, (300,300), 127.5)
# 进行预测
net.setInput(blob)
detections = net.forward()
# 解析检测结果
for i in range(detections.shape[2]):
confidence = detections[0, 0, i, 2]
if confidence > 0.5: # 置信度阈值
idx = int(detections[0, 0, i, 1])
box = detections[0, 0, i, 3:7] * np.array([w, h, w, h])
(startX, startY, endX, endY) = box.astype("int")
# 绘制边界框和标签
label = "{}: {:.2f}%".format(classes[idx], confidence * 100)
cv2.rectangle(img, (startX, startY), (endX, endY), (0,255,0), 2)
cv2.putText(img, label, (startX, startY-10),
cv2.FONT_HERSHEY_SIMPLEX, 0.5, (0,255,0), 2)
cv2.imshow("Object Detection", img)
cv2.waitKey(0)
深度学习与OpenCV
加载和使用预训练模型
# 加载TensorFlow模型
net = cv2.dnn.readNetFromTensorflow('frozen_inference_graph.pb', 'graph.pbtxt')
# 加载PyTorch模型
net = cv2.dnn.readNetFromTorch('model.pt')
# 使用模型进行预测
blob = cv2.dnn.blobFromImage(img, scalefactor=1.0, size=(224,224),
mean=(104,117,123), swapRB=False, crop=False)
net.setInput(blob)
output = net.forward()
实时风格迁移
# 加载风格迁移模型
net = cv2.dnn.readNetFromTorch('models/instance_norm/mosaic.t7')
while True:
ret, frame = cap.read()
if not ret:
break
# 准备输入
blob = cv2.dnn.blobFromImage(frame, 1.0, (frame.shape[1], frame.shape[0]),
(103.939, 116.779, 123.680), swapRB=False, crop=False)
# 应用风格迁移
net.setInput(blob)
output = net.forward()
# 后处理
output = output.reshape((3, output.shape[2], output.shape[3]))
output[0] += 103.939
output[1] += 116.779
output[2] += 123.680
output = output.transpose(1, 2, 0)
output = np.clip(output, 0, 255).astype('uint8')
cv2.imshow('Styled', output)
if cv2.waitKey(1) & 0xFF == ord('q'):
break
实战项目
1. 实时人脸识别
# 加载人脸检测和识别模型
face_cascade = cv2.CascadeClassifier(cv2.data.haarcascades + 'haarcascade_frontalface_default.xml')
recognizer = cv2.face.LBPHFaceRecognizer_create()
recognizer.read('trainer.yml') # 预训练的人脸识别模型
while True:
ret, frame = cap.read()
if not ret:
break
gray = cv2.cvtColor(frame, cv2.COLOR_BGR2GRAY)
faces = face_cascade.detectMultiScale(gray, scaleFactor=1.1, minNeighbors=5)
for (x,y,w,h) in faces:
roi_gray = gray[y:y+h, x:x+w]
# 进行识别
id_, confidence = recognizer.predict(roi_gray)
# 绘制结果
cv2.rectangle(frame, (x,y), (x+w,y+h), (255,0,0), 2)
cv2.putText(frame, f"ID: {id_} Conf: {confidence:.2f}",
(x,y-10), cv2.FONT_HERSHEY_SIMPLEX, 0.8, (255,0,0), 2)
cv2.imshow('Face Recognition', frame)
if cv2.waitKey(1) & 0xFF == ord('q'):
break
2. 文档扫描仪
def order_points(pts):
rect = np.zeros((4, 2), dtype="float32")
s = pts.sum(axis=1)
rect[0] = pts[np.argmin(s)]
rect[2] = pts[np.argmax(s)]
diff = np.diff(pts, axis=1)
rect[1] = pts[np.argmin(diff)]
rect[3] = pts[np.argmax(diff)]
return rect
def four_point_transform(image, pts):
rect = order_points(pts)
(tl, tr, br, bl) = rect
widthA = np.sqrt(((br[0] - bl[0]) ** 2) + ((br[1] - bl[1]) ** 2))
widthB = np.sqrt(((tr[0] - tl[0]) ** 2) + ((tr[1] - tl[1]) ** 2))
maxWidth = max(int(widthA), int(widthB))
heightA = np.sqrt(((tr[0] - br[0]) ** 2) + ((tr[1] - br[1]) ** 2))
heightB = np.sqrt(((tl[0] - bl[0]) ** 2) + ((tl[1] - bl[1]) ** 2))
maxHeight = max(int(heightA), int(heightB))
dst = np.array([
[0, 0],
[maxWidth - 1, 0],
[maxWidth - 1, maxHeight - 1],
[0, maxHeight - 1]], dtype="float32")
M = cv2.getPerspectiveTransform(rect, dst)
warped = cv2.warpPerspective(image, M, (maxWidth, maxHeight))
return warped
# 读取图像
image = cv2.imread('document.jpg')
ratio = image.shape[0] / 500.0
orig = image.copy()
image = cv2.resize(image, (int(image.shape[1]/ratio), 500))
# 预处理
gray = cv2.cvtColor(image, cv2.COLOR_BGR2GRAY)
gray = cv2.GaussianBlur(gray, (5,5), 0)
edged = cv2.Canny(gray, 75, 200)
# 寻找轮廓
cnts = cv2.findContours(edged.copy(), cv2.RETR_LIST, cv2.CHAIN_APPROX_SIMPLE)
cnts = cnts[0] if len(cnts) == 2 else cnts[1]
cnts = sorted(cnts, key=cv2.contourArea, reverse=True)[:5]
# 寻找文档轮廓
for c in cnts:
peri = cv2.arcLength(c, True)
approx = cv2.approxPolyDP(c, 0.02 * peri, True)
if len(approx) == 4:
screenCnt = approx
break
# 应用透视变换
warped = four_point_transform(orig, screenCnt.reshape(4,2) * ratio)
# 二值化
warped = cv2.cvtColor(warped, cv2.COLOR_BGR2GRAY)
warped = cv2.adaptiveThreshold(warped, 255, cv2.ADAPTIVE_THRESH_GAUSSIAN_C,
cv2.THRESH_BINARY, 11, 2)
cv2.imshow("Original", cv2.resize(orig, (500, int(orig.shape[0]/ratio))))
cv2.imshow("Scanned", warped)
cv2.waitKey(0)
3. 车牌识别
# 加载预训练的车牌检测模型
plate_cascade = cv2.CascadeClassifier('haarcascade_russian_plate_number.xml')
while True:
ret, frame = cap.read()
if not ret:
break
gray = cv2.cvtColor(frame, cv2.COLOR_BGR2GRAY)
plates = plate_cascade.detectMultiScale(gray, 1.1, 4)
for (x,y,w,h) in plates:
# 提取车牌区域
plate = frame[y:y+h, x:x+w]
# 预处理车牌图像
plate_gray = cv2.cvtColor(plate, cv2.COLOR_BGR2GRAY)
_, plate_thresh = cv2.threshold(plate_gray, 0, 255, cv2.THRESH_BINARY + cv2.THRESH_OTSU)
# 使用Tesseract OCR识别文本
text = pytesseract.image_to_string(plate_thresh, config='--psm 7')
# 绘制结果
cv2.rectangle(frame, (x,y), (x+w,y+h), (0,255,0), 2)
cv2.putText(frame, text.strip(), (x,y-10), cv2.FONT_HERSHEY_SIMPLEX, 0.7, (0,255,0), 2)
cv2.imshow('License Plate Recognition', frame)
if cv2.waitKey(1) & 0xFF == ord('q'):
break
总结
本教程涵盖了OpenCV Python接口的主要功能,包括:
- 图像基础操作(读取、显示、保存)
- 图像处理技术(滤波、边缘检测、形态学操作)
- 特征检测与匹配(SIFT、SURF、ORB)
- 视频处理(读取、保存、背景减除)
- 对象检测(人脸、行人、车牌等)
- 深度学习与OpenCV结合
- 实战项目(人脸识别、文档扫描、车牌识别)
要深入学习OpenCV,建议:
- 实践更多项目
- 阅读OpenCV官方文档
- 探索OpenCV的contrib模块
- 学习计算机视觉基础理论
- 关注OpenCV的更新和新功能
希望本教程能帮助你掌握OpenCV Python编程!