deep_learning_with_opencv.py

# USAGE
# python deep_learning_with_opencv.py --image images/jemma.png --prototxt bvlc_googlenet.prototxt --model bvlc_googlenet.caffemodel --labels synset_words.txt

# import the necessary packages
import numpy as np
import argparse
import time
import cv2

## construct the argument parse and parse the arguments
ap = argparse.ArgumentParser()
#ap.add_argument("-i", "--image", required=True,
	#help="path to input image")
ap.add_argument("-p", "--prototxt", required=True,
	help="path to Caffe 'deploy' prototxt file")
ap.add_argument("-m", "--model", required=True,
	help="path to Caffe pre-trained model")
ap.add_argument("-l", "--labels", required=True,
	help="path to ImageNet labels (i.e., syn-sets)")
args = vars(ap.parse_args())

## load the input image from disk
#image = cv2.imread(args["image"])

## load the class labels from disk
rows = open(args["labels"]).read().strip().split("\n")
classes = [r[r.find(" ") + 1:].split(",")[0] for r in rows]

# load our serialized model from disk
print("[INFO] loading model...")
net = cv2.dnn.readNetFromCaffe(args["prototxt"], args["model"])


# open the window to see the grabbed video
cv2.namedWindow("preview")
vc = cv2.VideoCapture(1)

if vc.isOpened(): # try to get the first frame
    rval, frame = vc.read()
else:
    rval = False

while rval:
    cv2.imshow("preview", frame)
    rval, frame = vc.read()
	# our CNN requires fixed spatial dimensions for our input image(s)
	# so we need to ensure it is resized to 224x224 pixels while
	# performing mean subtraction (104, 117, 123) to normalize the input;
	# after executing this command our "blob" now has the shape:
	# (1, 3, 224, 224)
    blob = cv2.dnn.blobFromImage(frame, 1, (224, 224), (104, 117, 123))


    ## set the blob as input to the network and perform a forward-pass to
    ## obtain our output classification
    net.setInput(blob)
    #start = time.time()
    preds = net.forward()
    #end = time.time()
    #print("[INFO] classification took {:.5} seconds".format(end - start))

    ## sort the indexes of the probabilities in descending order (higher
    ## probabilitiy first) and grab the top-5 predictions
    idxs = np.argsort(preds[0])[::-1][:5]
    
    #text = "Label: {}, {:.2f}%".format(classes[1], preds[0][1] * 100)
    #cv2.putText(frame, text, (5, 25),  cv2.FONT_HERSHEY_SIMPLEX,0.7, (0, 0, 255), 2)

    # loop over the top-5 predictions and display them
    for (i, idx) in enumerate(idxs):
	    # draw the top prediction on the input image
	    if i == 0:
		    text = "Label: {}, {:.2f}%".format(classes[idx],
			    preds[0][idx] * 100)
		    cv2.putText(frame, text, (5, 25),  cv2.FONT_HERSHEY_SIMPLEX,
			    0.7, (0, 0, 255), 2)

	    ## display the predicted label + associated probability to the
	    ## console	
	    #print("[INFO] {}. label: {}, probability: {:.5}".format(i + 1,
		    #classes[idx], preds[0][idx]))


    key = cv2.waitKey(20)
    if key == 27: # exit on ESC
        break

cv2.destroyWindow("preview")
vc.release()


## display the output image
#cv2.imshow("Image", image)
#cv2.waitKey(0)