OpenCV 介绍和使用

OpenCV (Open Source Computer Vision Library) 是一个开源的计算机视觉和机器学习软件库。本教程将带你从基础到高级全面学习 OpenCV 的 Python 接口。

目录

  1. 安装与环境配置
  2. 图像基础操作
  3. 图像处理技术
  4. 特征检测与描述
  5. 视频处理
  6. 对象检测
  7. 深度学习与OpenCV
  8. 实战项目

安装与环境配置

安装OpenCV

# 安装OpenCV主模块
pip install opencv-python

# 安装包含额外模块的版本(如SIFT等)
pip install opencv-contrib-python

验证安装

import cv2

print(cv2.__version__)  # 打印OpenCV版本

图像基础操作

读取和显示图像

import cv2

# 读取图像
img = cv2.imread('image.jpg')  # 参数:文件路径, 标志(cv2.IMREAD_COLOR/GRAYSCALE/UNCHANGED)

# 显示图像
cv2.imshow('Image Window', img)
cv2.waitKey(0)  # 等待任意按键
cv2.destroyAllWindows()  # 关闭所有窗口

# 保存图像
cv2.imwrite('output.jpg', img)

图像属性

print("图像形状:", img.shape)  # (高度, 宽度, 通道数)
print("图像大小:", img.size)   # 总像素数
print("数据类型:", img.dtype)  # 数据类型

颜色空间转换

# BGR转灰度图
gray = cv2.cvtColor(img, cv2.COLOR_BGR2GRAY)

# BGR转HSV
hsv = cv2.cvtColor(img, cv2.COLOR_BGR2HSV)

# 显示转换结果
cv2.imshow('Gray Image', gray)
cv2.imshow('HSV Image', hsv)
cv2.waitKey(0)

图像缩放与裁剪

# 缩放
resized = cv2.resize(img, (new_width, new_height))  # 指定新尺寸
resized = cv2.resize(img, None, fx=0.5, fy=0.5)    # 按比例缩放

# 裁剪
cropped = img[y1:y2, x1:x2]  # y1,y2是高度范围,x1,x2是宽度范围

图像处理技术

图像阈值处理

# 简单阈值
ret, thresh1 = cv2.threshold(gray, 127, 255, cv2.THRESH_BINARY)

# 自适应阈值
thresh2 = cv2.adaptiveThreshold(gray, 255, cv2.ADAPTIVE_THRESH_MEAN_C, 
                               cv2.THRESH_BINARY, 11, 2)

# Otsu's二值化
ret, thresh3 = cv2.threshold(gray, 0, 255, cv2.THRESH_BINARY + cv2.THRESH_OTSU)

图像滤波

# 高斯模糊
blur = cv2.GaussianBlur(img, (5,5), 0)

# 中值滤波
median = cv2.medianBlur(img, 5)

# 双边滤波
bilateral = cv2.bilateralFilter(img, 9, 75, 75)

边缘检测

# Canny边缘检测
edges = cv2.Canny(img, 100, 200)  # 参数:低阈值,高阈值

# Sobel算子
sobelx = cv2.Sobel(gray, cv2.CV_64F, 1, 0, ksize=5)
sobely = cv2.Sobel(gray, cv2.CV_64F, 0, 1, ksize=5)

形态学操作

kernel = np.ones((5,5), np.uint8)

# 腐蚀
erosion = cv2.erode(img, kernel, iterations=1)

# 膨胀
dilation = cv2.dilate(img, kernel, iterations=1)

# 开运算(先腐蚀后膨胀)
opening = cv2.morphologyEx(img, cv2.MORPH_OPEN, kernel)

# 闭运算(先膨胀后腐蚀)
closing = cv2.morphologyEx(img, cv2.MORPH_CLOSE, kernel)

特征检测与描述

Harris角点检测

gray = np.float32(gray)
dst = cv2.cornerHarris(gray, 2, 3, 0.04)
dst = cv2.dilate(dst, None)
img[dst > 0.01 * dst.max()] = [0,0,255]  # 标记角点

SIFT特征检测

sift = cv2.SIFT_create()
keypoints, descriptors = sift.detectAndCompute(gray, None)

# 绘制关键点
img_sift = cv2.drawKeypoints(img, keypoints, None)
cv2.imshow('SIFT Features', img_sift)
cv2.waitKey(0)

SURF特征检测

surf = cv2.xfeatures2d.SURF_create(400)
keypoints, descriptors = surf.detectAndCompute(img, None)

# 绘制关键点
img_surf = cv2.drawKeypoints(img, keypoints, None, (255,0,0), 4)
cv2.imshow('SURF Features', img_surf)
cv2.waitKey(0)

ORB特征检测

orb = cv2.ORB_create()
keypoints, descriptors = orb.detectAndCompute(img, None)

# 绘制关键点
img_orb = cv2.drawKeypoints(img, keypoints, None, color=(0,255,0), flags=0)
cv2.imshow('ORB Features', img_orb)
cv2.waitKey(0)

特征匹配

# 创建匹配器
bf = cv2.BFMatcher(cv2.NORM_HAMMING, crossCheck=True)

# 匹配描述子
matches = bf.match(des1, des2)

# 按距离排序
matches = sorted(matches, key=lambda x:x.distance)

# 绘制匹配结果
img_matches = cv2.drawMatches(img1, kp1, img2, kp2, matches[:10], None, flags=2)
cv2.imshow('Feature Matches', img_matches)
cv2.waitKey(0)

视频处理

读取和显示视频

cap = cv2.VideoCapture('video.mp4')  # 也可以传入0表示摄像头

while cap.isOpened():
    ret, frame = cap.read()
    if not ret:
        break

    # 处理帧
    gray = cv2.cvtColor(frame, cv2.COLOR_BGR2GRAY)

    cv2.imshow('Video', gray)

    if cv2.waitKey(25) & 0xFF == ord('q'):
        break

cap.release()
cv2.destroyAllWindows()

保存视频

fourcc = cv2.VideoWriter_fourcc(*'XVID')
out = cv2.VideoWriter('output.avi', fourcc, 20.0, (640,480))

while cap.isOpened():
    ret, frame = cap.read()
    if not ret:
        break

    # 处理帧
    out.write(frame)

    cv2.imshow('frame', frame)
    if cv2.waitKey(1) & 0xFF == ord('q'):
        break

cap.release()
out.release()
cv2.destroyAllWindows()

背景减除

# 创建背景减除器
fgbg = cv2.createBackgroundSubtractorMOG2()

while True:
    ret, frame = cap.read()
    if not ret:
        break

    # 应用背景减除
    fgmask = fgbg.apply(frame)

    cv2.imshow('Original', frame)
    cv2.imshow('Foreground', fgmask)

    if cv2.waitKey(30) & 0xFF == ord('q'):
        break

对象检测

人脸检测

# 加载预训练的人脸检测模型
face_cascade = cv2.CascadeClassifier(cv2.data.haarcascades + 'haarcascade_frontalface_default.xml')

# 检测人脸
faces = face_cascade.detectMultiScale(gray, scaleFactor=1.1, minNeighbors=5)

# 绘制矩形框
for (x,y,w,h) in faces:
    cv2.rectangle(img, (x,y), (x+w,y+h), (255,0,0), 2)

cv2.imshow('Face Detection', img)
cv2.waitKey(0)

行人检测

# 加载HOG描述子和SVM分类器
hog = cv2.HOGDescriptor()
hog.setSVMDetector(cv2.HOGDescriptor_getDefaultPeopleDetector())

# 检测行人
boxes, weights = hog.detectMultiScale(img, winStride=(4,4), padding=(8,8), scale=1.05)

# 绘制检测结果
for (x,y,w,h) in boxes:
    cv2.rectangle(img, (x,y), (x+w,y+h), (0,255,0), 2)

cv2.imshow('Pedestrian Detection', img)
cv2.waitKey(0)

使用深度学习模型进行对象检测

# 加载预训练的MobileNet SSD模型
net = cv2.dnn.readNetFromCaffe('deploy.prototxt', 'mobilenet_iter_73000.caffemodel')
classes = ["background", "aeroplane", "bicycle", "bird", "boat",
           "bottle", "bus", "car", "cat", "chair", "cow", "diningtable",
           "dog", "horse", "motorbike", "person", "pottedplant", "sheep",
           "sofa", "train", "tvmonitor"]

# 准备输入图像
blob = cv2.dnn.blobFromImage(cv2.resize(img, (300,300)), 0.007843, (300,300), 127.5)

# 进行预测
net.setInput(blob)
detections = net.forward()

# 解析检测结果
for i in range(detections.shape[2]):
    confidence = detections[0, 0, i, 2]
    if confidence > 0.5:  # 置信度阈值
        idx = int(detections[0, 0, i, 1])
        box = detections[0, 0, i, 3:7] * np.array([w, h, w, h])
        (startX, startY, endX, endY) = box.astype("int")

        # 绘制边界框和标签
        label = "{}: {:.2f}%".format(classes[idx], confidence * 100)
        cv2.rectangle(img, (startX, startY), (endX, endY), (0,255,0), 2)
        cv2.putText(img, label, (startX, startY-10), 
                   cv2.FONT_HERSHEY_SIMPLEX, 0.5, (0,255,0), 2)

cv2.imshow("Object Detection", img)
cv2.waitKey(0)

深度学习与OpenCV

加载和使用预训练模型

# 加载TensorFlow模型
net = cv2.dnn.readNetFromTensorflow('frozen_inference_graph.pb', 'graph.pbtxt')

# 加载PyTorch模型
net = cv2.dnn.readNetFromTorch('model.pt')

# 使用模型进行预测
blob = cv2.dnn.blobFromImage(img, scalefactor=1.0, size=(224,224), 
                            mean=(104,117,123), swapRB=False, crop=False)
net.setInput(blob)
output = net.forward()

实时风格迁移

# 加载风格迁移模型
net = cv2.dnn.readNetFromTorch('models/instance_norm/mosaic.t7')

while True:
    ret, frame = cap.read()
    if not ret:
        break

    # 准备输入
    blob = cv2.dnn.blobFromImage(frame, 1.0, (frame.shape[1], frame.shape[0]),
                               (103.939, 116.779, 123.680), swapRB=False, crop=False)

    # 应用风格迁移
    net.setInput(blob)
    output = net.forward()

    # 后处理
    output = output.reshape((3, output.shape[2], output.shape[3]))
    output[0] += 103.939
    output[1] += 116.779
    output[2] += 123.680
    output = output.transpose(1, 2, 0)
    output = np.clip(output, 0, 255).astype('uint8')

    cv2.imshow('Styled', output)
    if cv2.waitKey(1) & 0xFF == ord('q'):
        break

实战项目

1. 实时人脸识别

# 加载人脸检测和识别模型
face_cascade = cv2.CascadeClassifier(cv2.data.haarcascades + 'haarcascade_frontalface_default.xml')
recognizer = cv2.face.LBPHFaceRecognizer_create()
recognizer.read('trainer.yml')  # 预训练的人脸识别模型

while True:
    ret, frame = cap.read()
    if not ret:
        break

    gray = cv2.cvtColor(frame, cv2.COLOR_BGR2GRAY)
    faces = face_cascade.detectMultiScale(gray, scaleFactor=1.1, minNeighbors=5)

    for (x,y,w,h) in faces:
        roi_gray = gray[y:y+h, x:x+w]

        # 进行识别
        id_, confidence = recognizer.predict(roi_gray)

        # 绘制结果
        cv2.rectangle(frame, (x,y), (x+w,y+h), (255,0,0), 2)
        cv2.putText(frame, f"ID: {id_} Conf: {confidence:.2f}", 
                   (x,y-10), cv2.FONT_HERSHEY_SIMPLEX, 0.8, (255,0,0), 2)

    cv2.imshow('Face Recognition', frame)
    if cv2.waitKey(1) & 0xFF == ord('q'):
        break

2. 文档扫描仪

def order_points(pts):
    rect = np.zeros((4, 2), dtype="float32")
    s = pts.sum(axis=1)
    rect[0] = pts[np.argmin(s)]
    rect[2] = pts[np.argmax(s)]

    diff = np.diff(pts, axis=1)
    rect[1] = pts[np.argmin(diff)]
    rect[3] = pts[np.argmax(diff)]
    return rect

def four_point_transform(image, pts):
    rect = order_points(pts)
    (tl, tr, br, bl) = rect

    widthA = np.sqrt(((br[0] - bl[0]) ** 2) + ((br[1] - bl[1]) ** 2))
    widthB = np.sqrt(((tr[0] - tl[0]) ** 2) + ((tr[1] - tl[1]) ** 2))
    maxWidth = max(int(widthA), int(widthB))

    heightA = np.sqrt(((tr[0] - br[0]) ** 2) + ((tr[1] - br[1]) ** 2))
    heightB = np.sqrt(((tl[0] - bl[0]) ** 2) + ((tl[1] - bl[1]) ** 2))
    maxHeight = max(int(heightA), int(heightB))

    dst = np.array([
        [0, 0],
        [maxWidth - 1, 0],
        [maxWidth - 1, maxHeight - 1],
        [0, maxHeight - 1]], dtype="float32")

    M = cv2.getPerspectiveTransform(rect, dst)
    warped = cv2.warpPerspective(image, M, (maxWidth, maxHeight))
    return warped

# 读取图像
image = cv2.imread('document.jpg')
ratio = image.shape[0] / 500.0
orig = image.copy()
image = cv2.resize(image, (int(image.shape[1]/ratio), 500))

# 预处理
gray = cv2.cvtColor(image, cv2.COLOR_BGR2GRAY)
gray = cv2.GaussianBlur(gray, (5,5), 0)
edged = cv2.Canny(gray, 75, 200)

# 寻找轮廓
cnts = cv2.findContours(edged.copy(), cv2.RETR_LIST, cv2.CHAIN_APPROX_SIMPLE)
cnts = cnts[0] if len(cnts) == 2 else cnts[1]
cnts = sorted(cnts, key=cv2.contourArea, reverse=True)[:5]

# 寻找文档轮廓
for c in cnts:
    peri = cv2.arcLength(c, True)
    approx = cv2.approxPolyDP(c, 0.02 * peri, True)

    if len(approx) == 4:
        screenCnt = approx
        break

# 应用透视变换
warped = four_point_transform(orig, screenCnt.reshape(4,2) * ratio)

# 二值化
warped = cv2.cvtColor(warped, cv2.COLOR_BGR2GRAY)
warped = cv2.adaptiveThreshold(warped, 255, cv2.ADAPTIVE_THRESH_GAUSSIAN_C,
                              cv2.THRESH_BINARY, 11, 2)

cv2.imshow("Original", cv2.resize(orig, (500, int(orig.shape[0]/ratio))))
cv2.imshow("Scanned", warped)
cv2.waitKey(0)

3. 车牌识别

# 加载预训练的车牌检测模型
plate_cascade = cv2.CascadeClassifier('haarcascade_russian_plate_number.xml')

while True:
    ret, frame = cap.read()
    if not ret:
        break

    gray = cv2.cvtColor(frame, cv2.COLOR_BGR2GRAY)
    plates = plate_cascade.detectMultiScale(gray, 1.1, 4)

    for (x,y,w,h) in plates:
        # 提取车牌区域
        plate = frame[y:y+h, x:x+w]

        # 预处理车牌图像
        plate_gray = cv2.cvtColor(plate, cv2.COLOR_BGR2GRAY)
        _, plate_thresh = cv2.threshold(plate_gray, 0, 255, cv2.THRESH_BINARY + cv2.THRESH_OTSU)

        # 使用Tesseract OCR识别文本
        text = pytesseract.image_to_string(plate_thresh, config='--psm 7')

        # 绘制结果
        cv2.rectangle(frame, (x,y), (x+w,y+h), (0,255,0), 2)
        cv2.putText(frame, text.strip(), (x,y-10), cv2.FONT_HERSHEY_SIMPLEX, 0.7, (0,255,0), 2)

    cv2.imshow('License Plate Recognition', frame)
    if cv2.waitKey(1) & 0xFF == ord('q'):
        break

总结

本教程涵盖了OpenCV Python接口的主要功能,包括:

  1. 图像基础操作(读取、显示、保存)
  2. 图像处理技术(滤波、边缘检测、形态学操作)
  3. 特征检测与匹配(SIFT、SURF、ORB)
  4. 视频处理(读取、保存、背景减除)
  5. 对象检测(人脸、行人、车牌等)
  6. 深度学习与OpenCV结合
  7. 实战项目(人脸识别、文档扫描、车牌识别)

要深入学习OpenCV,建议:

  1. 实践更多项目
  2. 阅读OpenCV官方文档
  3. 探索OpenCV的contrib模块
  4. 学习计算机视觉基础理论
  5. 关注OpenCV的更新和新功能

希望本教程能帮助你掌握OpenCV Python编程!









results matching ""

    No results matching ""