Skip to content

hoover-snoop2

Image classification

liquidinvestigations/hoover-snoop2

Image classification

`snoop.data.analyzers.image_classification` #

Task to call a service that runs object detection and/or image classification on images

Attributes#

`CLASSIFICATION_MIN_SPEED_BPS` #

Minimum reference speed for this task. Saved as 10% of the Average Success Speed in the Admin UI. The timeout is calculated using this value, the request file size, and the previous TIMEOUT_BASE constant.

`CLASSIFICATION_TIMEOUT_BASE` #

Minimum number of seconds to wait for this service.

`CLASSIFICATION_TIMEOUT_MAX` #

Maximum number of seconds to wait for this service.

`DETECT_OBJECTS_MIN_SPEED_BPS` #

Minimum reference speed for this task. Saved as 10% of the Average Success Speed in the Admin UI. The timeout is calculated using this value, the request file size, and the previous TIMEOUT_BASE constant.

`DETECT_OBJECTS_TIMEOUT_BASE` #

Minimum number of seconds to wait for this service.

`DETECT_OBJECTS_TIMEOUT_MAX` #

Maximum number of seconds to wait for this service.

`IMAGE_CLASSIFICATION_MIME_TYPES` #

Based on pillow.readthedocs.io/en/stable/handbook/image-file-formats.html#image-file-formats

Functions#

`call_image_classification_service(imagedata, filename, data_size)` #

Executes HTTP PUT request to the object detection service.

Source code in snoop/data/analyzers/image_classification.py

def call_image_classification_service(imagedata, filename, data_size):
    """Executes HTTP PUT request to the object detection service."""

    url = settings.SNOOP_IMAGE_CLASSIFICATION_URL
    timeout = min(
        CLASSIFICATION_TIMEOUT_MAX,
        int(CLASSIFICATION_TIMEOUT_BASE + data_size / CLASSIFICATION_MIN_SPEED_BPS)
    )

    resp = requests.post(url, files={'image': (filename, imagedata)}, timeout=timeout)

    if (resp.status_code != 200 or resp.headers['Content-Type'] != 'application/json'):
        log.error(resp.content)
        raise SnoopTaskBroken('Image classification service could not process the image',
                              f'image_classification_http_{resp.status_code}')

    return resp.json()

`call_object_detection_service(imagedata, filename, data_size)` #

Executes HTTP PUT request to the object detection service.

Source code in snoop/data/analyzers/image_classification.py

def call_object_detection_service(imagedata, filename, data_size):
    """Executes HTTP PUT request to the object detection service."""

    url = settings.SNOOP_OBJECT_DETECTION_URL
    timeout = min(
        DETECT_OBJECTS_TIMEOUT_MAX,
        int(DETECT_OBJECTS_TIMEOUT_BASE + data_size / DETECT_OBJECTS_MIN_SPEED_BPS)
    )

    resp = requests.post(url, files={'image': (filename, imagedata)}, timeout=timeout)

    if (resp.status_code != 200 or resp.headers['Content-Type'] != 'application/json'):
        log.error(resp.content)
        raise SnoopTaskBroken('Object detection service could not process the image',
                              f'object_detection_http_{resp.status_code}')

    return resp.json()

`can_detect(blob)` #

Return true if the image type is supported.

This will return true for all image types that can be converted into .jpg.

Source code in snoop/data/analyzers/image_classification.py

def can_detect(blob):
    """Return true if the image type is supported.

    This will return true for all image types that can be converted into .jpg.
    """
    if blob.mime_type in IMAGE_CLASSIFICATION_MIME_TYPES:
        return True

`classify_image(blob)` #

Calls the image classification service for an image blob.

Filters the results by probability. The limit is given by PROBABILITY_LIMIT.

Source code in snoop/data/analyzers/image_classification.py

@snoop_task('image_classification.classify_image', queue='img-cls')
@returns_json_blob
def classify_image(blob):
    """Calls the image classification service for an image blob.

    Filters the results by probability. The limit is given by PROBABILITY_LIMIT.
    """
    if not current_collection().image_classification_classify_images_enabled \
            or not can_detect(blob):
        raise SnoopTaskBroken('image classification disabled', 'img_classification_disabled')

    filename = models.File.objects.filter(original=blob.pk)[0].name
    if blob.mime_type == 'image/jpeg':
        with blob.open() as f:
            predictions = call_image_classification_service(f, filename, blob.size)
    else:
        image_bytes = convert_image(blob)
        image = io.BytesIO(image_bytes)
        predictions = call_image_classification_service(image, filename, blob.size)

    filtered_predictions = []
    for hit in predictions:
        score = int(hit[1])
        if score >= PROBABILITY_LIMIT:
            filtered_predictions.append({'class': hit[0], 'score': score})
    return filtered_predictions

`convert_image(blob)` #

Convert image to jpg

Source code in snoop/data/analyzers/image_classification.py

def convert_image(blob):
    """Convert image to jpg"""
    with blob.open() as i:
        try:
            image = Image.open(i)
        except UnidentifiedImageError:
            raise SnoopTaskBroken('Cannot convert image to jpg.',
                                  'image_classification_jpg_conversion_error')
        if image.mode != 'RGB':
            image = image.convert('RGB')
        buf = io.BytesIO()
        image.save(buf, format='JPEG')
    return buf.getvalue()

`detect_objects(blob)` #

Calls the object detection service for an image blob.

Filters the results by probability. The limit is given by PROBABILITY_LIMIT.

Source code in snoop/data/analyzers/image_classification.py

@snoop_task('image_classification.detect_objects', queue='img-cls')
@returns_json_blob
def detect_objects(blob):
    """Calls the object detection service for an image blob.

    Filters the results by probability. The limit is given by PROBABILITY_LIMIT.
    """
    if not current_collection().image_classification_object_detection_enabled \
            or not can_detect(blob):
        raise SnoopTaskBroken('image object detection disabled', 'img_obj_detection_disabled')

    filename = models.File.objects.filter(original=blob.pk)[0].name
    if blob.mime_type == 'image/jpeg':
        with blob.open() as f:
            detections = call_object_detection_service(f, filename, blob.size)
    else:
        image_bytes = convert_image(blob)
        image = io.BytesIO(image_bytes)
        detections = call_object_detection_service(image, filename, blob.size)

    filtered_detections = []
    for hit in detections:
        score = int(hit.get('percentage_probability'))
        if score >= PROBABILITY_LIMIT:
            filtered_detections.append({'object': hit.get('name'), 'score': score})
    return filtered_detections