move miscellaneous python scripts to softcascade module

2013-01-29 17:47:35 +04:00
parent 4ba8b53152
commit 7f80054dfd
6 changed files with 0 additions and 0 deletions
--- a/modules/softcascade/misc/detections2negatives.py
+++ b/modules/softcascade/misc/detections2negatives.py
@@ -0,0 +1,81 @@
+#!/usr/bin/env python
+
+import sys, os, os.path, glob, math, cv2, string, random
+from datetime import datetime
+from optparse import OptionParser
+import re
+import numpy as np
+from xml.dom import minidom
+
+def resize(image, d_w, d_h):
+    if (d_h < image.shape[0]) or (d_w < image.shape[1]):
+        ratio = min(d_h / float(image.shape[0]), d_w / float(image.shape[1]))
+
+        kernel_size = int( 5 / (2 * ratio))
+        sigma = 0.5 / ratio
+        image_to_resize = cv2.filter2D(image, cv2.CV_8UC3, cv2.getGaussianKernel(kernel_size, sigma))
+        interpolation_type = cv2.INTER_AREA
+    else:
+        image_to_resize = image
+        interpolation_type = cv2.INTER_CUBIC
+
+    return cv2.resize(image_to_resize,(d_w, d_h), None, 0, 0, interpolation_type)
+
+def det2negative(xmldoc, opath):
+    samples = xmldoc.getElementsByTagName('sample')
+    for sample in samples:
+        detections = sample.getElementsByTagName('detections')
+        detections = minidom.parseString(detections[0].toxml())
+        detections = detections.getElementsByTagName("_")
+        if len(detections) is not 0:
+            path = sample.getElementsByTagName("path")
+            path = path[0].firstChild.nodeValue
+            mat = cv2.imread(path)
+            mat_h, mat_w, _ = mat.shape
+
+            for detection in detections:
+                detection = detection.childNodes
+                for each in detection:
+                    rect = eval(re.sub( r"\b\s\b", ",", re.sub(r"\n", "[", each.nodeValue )) + "]")
+                    print rect
+
+                    ratio = 64.0 / rect[3]
+
+                    print rect, ratio
+                    mat = resize(mat, int(round(mat_w * ratio)), int(round(mat_h * ratio)))
+
+                    rect[0] = int(round(ratio * rect[0])) - 10
+                    rect[1] = int(round(ratio * rect[1])) - 10
+                    rect[2] = rect[0] + 32 + 20
+                    rect[3] = rect[1] + 64 + 20
+                    try:
+                        cropped = mat[rect[1]:(rect[3]), rect[0]:(rect[2]), :]
+                        img = os.path.join(opath, ''.join(random.choice(string.lowercase) for i in range(8)) + ".png")
+                        cr_h, cr_w, _ = cropped.shape
+                        if cr_h is 84 and cr_w is 52:
+                            cv2.imwrite(img, cropped)
+                    except:
+                        pass
+
+if __name__ == "__main__":
+
+    parser = OptionParser()
+    parser.add_option("-i", "--input", dest="input", metavar="DIRECTORY", type="string",
+                       help="Path to the xml collection folder.")
+
+    parser.add_option("-d", "--output-dir", dest="output", metavar="DIRECTORY", type="string",
+                       help="Path to store data", default=".")
+
+    (options, args) = parser.parse_args()
+
+    if not options.input:
+        parser.error("Input folder is required.")
+
+    opath = os.path.join(options.output, datetime.now().strftime("negatives" + "-%Y-%m-%d-%H-%M-%S"))
+    os.mkdir(opath)
+
+    gl = glob.iglob( os.path.join(options.input, "set[0][0]_V0[0][5].seq.xml"))
+    for f in gl:
+        print f
+        xmldoc = minidom.parse(f)
+        det2negative(xmldoc, opath)
--- a/modules/softcascade/misc/roc_caltech.py
+++ b/modules/softcascade/misc/roc_caltech.py
@@ -0,0 +1,17 @@
+#!/usr/bin/env python
+
+import argparse
+import sft
+
+import sys, os, os.path, glob, math, cv2, re
+from datetime import datetime
+import numpy
+
+if __name__ == "__main__":
+    path = "/home/kellan/datasets/caltech/set00/V000.txt"
+    # open annotation file
+    f = open(path)
+    annotations = sft.parse_caltech(f)
+
+    for each in annotations:
+        print each
--- a/modules/softcascade/misc/roc_test.py
+++ b/modules/softcascade/misc/roc_test.py
@@ -0,0 +1,103 @@
+#!/usr/bin/env python
+
+import argparse
+import sft
+
+import sys, os, os.path, glob, math, cv2
+from datetime import datetime
+import numpy
+
+plot_colors = ['b', 'c', 'r', 'g', 'm']
+
+#       "key"   : (  b,   g,   r)
+bgr = { "red"   : (  0,   0, 255),
+        "green" : (  0, 255,   0),
+        "blue"  : (255,   0 ,  0)}
+
+def range(s):
+    try:
+        lb, rb = map(int, s.split(','))
+        return lb, rb
+    except:
+        raise argparse.ArgumentTypeError("Must be lb, rb")
+
+def call_parser(f, a):
+    return eval( "sft.parse_" + f + "('" + a + "')")
+
+if __name__ == "__main__":
+    parser = argparse.ArgumentParser(description = 'Plot ROC curve using Caltech method of per image detection performance estimation.')
+
+    # positional
+    parser.add_argument("cascade",     help = "Path to the tested detector.",  nargs='+')
+    parser.add_argument("input",       help = "Image sequence pattern.")
+    parser.add_argument("annotations", help = "Path to the annotations.")
+
+    # optional
+    parser.add_argument("-m", "--min_scale", dest = "min_scale", type = float, metavar= "fl",   help = "Minimum scale to be tested.",               default = 0.4)
+    parser.add_argument("-M", "--max_scale", dest = "max_scale", type = float, metavar= "fl",   help = "Maximum scale to be tested.",               default = 5.0)
+    parser.add_argument("-o", "--output",    dest = "output",    type = str,   metavar= "path", help = "Path to store resulting image.",           default = "./roc.png")
+    parser.add_argument("-n", "--nscales",   dest = "nscales",   type = int,   metavar= "n",    help = "Preferred count of scales from min to max.", default = 55)
+
+    parser.add_argument("-r", "--scale-range",          dest = "scale_range", type = range,  default = (128 * 0.4, 128 * 2.4))
+    parser.add_argument("-e", "--extended-range-ratio", dest = "ext_ratio",   type = float,  default = 1.25)
+    parser.add_argument("-t", "--title",                dest = "title",       type = str,    default = "ROC curve Bahnhof")
+
+    # required
+    parser.add_argument("-f", "--anttn-format", dest = "anttn_format", choices = ['inria', 'caltech', "idl"], help = "Annotation file for test sequence.", required = True)
+    parser.add_argument("-l", "--labels", dest = "labels" ,required=True,     help = "Plot labels for legend.",       nargs='+')
+
+    args = parser.parse_args()
+
+    print args.scale_range
+
+    print args.cascade
+    # parse annotations
+    sft.initPlot(args.title)
+    samples = call_parser(args.anttn_format, args.annotations)
+    for idx, each in enumerate(args.cascade):
+        print each
+        cascade = sft.cascade(args.min_scale, args.max_scale, args.nscales, each)
+        pattern = args.input
+        camera =  cv2.VideoCapture(pattern)
+
+        # for plotting over dataset
+        nannotated  = 0
+        nframes     = 0
+
+        confidenses = []
+        tp          = []
+        ignored     = []
+
+        while True:
+            ret, img = camera.read()
+            if not ret:
+                break;
+
+            name = pattern % (nframes,)
+            _, tail = os.path.split(name)
+
+            boxes = sft.filter_for_range(samples[tail], args.scale_range, args.ext_ratio)
+
+            nannotated = nannotated + len(boxes)
+            nframes = nframes + 1
+            rects, confs = cascade.detect(img, rois = None)
+
+            if confs is None:
+                continue
+
+            dts = sft.convert2detections(rects, confs)
+
+            confs = confs.tolist()[0]
+            confs.sort(lambda x, y : -1  if (x - y) > 0 else 1)
+            confidenses = confidenses + confs
+
+            matched, skip_list = sft.match(boxes, dts)
+            tp = tp + matched
+            ignored = ignored + skip_list
+
+            print nframes, nannotated
+
+        fppi, miss_rate = sft.computeROC(confidenses, tp, nannotated, nframes, ignored)
+        sft.plotLogLog(fppi, miss_rate, plot_colors[idx])
+
+    sft.showPlot(args.output, args.labels)
--- a/modules/softcascade/misc/scale_caltech.py
+++ b/modules/softcascade/misc/scale_caltech.py
@@ -0,0 +1,142 @@
+#!/usr/bin/env python
+
+import sys, os, os.path, glob, math, cv2
+from datetime import datetime
+from optparse import OptionParser
+import re
+import numpy as np
+
+def extractPositive(f, path, opath, octave, min_possible):
+    newobj = re.compile("^lbl=\'(\w+)\'\s+str=(\d+)\s+end=(\d+)\s+hide=0$")
+    pos    = re.compile("^pos\s=(\[[((\d+\.+\d*)|\s+|\;)]*\])$")
+    occl   = re.compile("^occl\s*=(\[[0-1|\s]*\])$")
+
+    whole_mod_w = int(64  * octave) + 2 * int(20 * octave)
+    whole_mod_h = int(128 * octave) + 2 * int(20 * octave)
+
+    goNext = 0
+    start  = 0
+    end    = 0
+
+    person_id = -1;
+
+    boxes = []
+    occls = []
+
+    for l in f:
+        m = newobj.match(l)
+        if m is not None:
+            if m.group(1) == "person":
+                goNext = 1
+                start = int(m.group(2))
+                end   = int(m.group(3))
+                person_id = person_id + 1
+                print m.group(1), person_id, start, end
+            else:
+                goNext = 0
+        else:
+            m = pos.match(l)
+            if m is not None:
+                if not goNext:
+                    continue
+                strarr = re.sub(r"\s", ", ", re.sub(r"\;\s+(?=\])", "]", re.sub(r"\;\s+(?!\])", "],[", re.sub(r"(\[)(\d)", "\\1[\\2", m.group(1)))))
+                boxes = eval(strarr)
+            else:
+                m = occl.match(l)
+                if m is not None:
+                    occls = eval(re.sub(r"\s+(?!\])", ",", m.group(1)))
+
+                    if len(boxes) > 0 and len(boxes) == len(occls):
+                        for idx, box in enumerate(boxes):
+                            if occls[idx] == 1:
+                                continue
+
+                            x = box[0]
+                            y = box[1]
+                            w = box[2]
+                            h = box[3]
+
+                            id = int(start) - 1 + idx
+                            file = os.path.join(path, "I0%04d.jpg" % id)
+
+                            if (start + id) >= end or w < 10 or h < min_possible:
+                                continue
+
+                            mat = cv2.imread(file)
+                            mat_h, mat_w, _ = mat.shape
+
+                            # let default height of person be 96.
+                            scale = h / float(96)
+                            rel_scale = scale / octave
+
+                            d_w = whole_mod_w * rel_scale
+                            d_h = whole_mod_h * rel_scale
+
+                            tb = (d_h - h) / 2.0
+                            lr = (d_w - w) / 2.0
+
+                            x = int(round(x - lr))
+                            y = int(round(y - tb))
+
+                            w = int(round(w + lr * 2.0))
+                            h = int(round(h + tb * 2.0))
+
+                            inner = [max(5, x), max(5, y), min(mat_w - 5, x + w), min(mat_h - 5, y + h) ]
+                            cropped = mat[inner[1]:inner[3], inner[0]:inner[2], :]
+
+                            top     = int(max(0, 0 - y))
+                            bottom  = int(max(0, y + h - mat_h))
+                            left    = int(max(0, 0 - x))
+                            right   = int(max(0, x + w - mat_w))
+
+                            if top < -d_h / 4.0 or bottom > d_h / 4.0 or left < -d_w / 4.0 or right > d_w / 4.0:
+                                continue
+
+                            cropped = cv2.copyMakeBorder(cropped, top, bottom, left, right, cv2.BORDER_REPLICATE)
+                            resized = sft.resize_sample(cropped, whole_mod_w, whole_mod_h)
+                            flipped = cv2.flip(resized, 1)
+
+                            cv2.imshow("resized", resized)
+
+                            c = cv2.waitKey(20)
+                            if c == 27:
+                                exit(0)
+
+                            fname = re.sub(r"^.*\/(set[0-1]\d)\/(V0\d\d)\.(seq)/(I\d+).jpg$", "\\1_\\2_\\4", file)
+                            fname = os.path.join(opath, fname + "_%04d." % person_id + "png")
+                            fname_fl = os.path.join(opath, fname + "_mirror_%04d." % person_id + "png")
+                            try:
+                                cv2.imwrite(fname, resized)
+                                cv2.imwrite(fname_fl, flipped)
+                            except:
+                                print "something wrong... go next."
+                                pass
+
+if __name__ == "__main__":
+    parser = OptionParser()
+    parser.add_option("-i", "--input", dest="input", metavar="DIRECTORY", type="string",
+                       help="Path to the Caltech dataset folder.")
+
+    parser.add_option("-d", "--output-dir", dest="output", metavar="DIRECTORY", type="string",
+                       help="Path to store data", default=".")
+
+    parser.add_option("-o", "--octave", dest="octave", type="float",
+                       help="Octave for a dataset to be scaled", default="0.5")
+
+    parser.add_option("-m", "--min-possible", dest="min_possible", type="int",
+                       help="Minimum possible height for positive.", default="64")
+
+    (options, args) = parser.parse_args()
+
+    if not options.input:
+        parser.error("Caltech dataset folder is required.")
+
+    opath = os.path.join(options.output, datetime.now().strftime("raw_ge64_cr_mirr_ts" + "-%Y-%m-%d-%H-%M-%S"))
+    os.mkdir(opath)
+
+    gl = glob.iglob( os.path.join(options.input, "set[0][0]/V0[0-9][0-9].txt"))
+    for each in gl:
+        path, ext = os.path.splitext(each)
+        path = path + ".seq"
+        print path
+        extractPositive(open(each), path, opath, options.octave, options.min_possible)
--- a/modules/softcascade/misc/scale_inria.py
+++ b/modules/softcascade/misc/scale_inria.py
@@ -0,0 +1,139 @@
+#!/usr/bin/env python
+
+import sys, os, os.path, glob, math, cv2
+from datetime import datetime
+from optparse import OptionParser
+
+def parse(ipath, f):
+    bbs = []
+    path = None
+    for l in f:
+        box = None
+        if l.startswith("Bounding box"):
+            b = [x.strip() for x in l.split(":")[1].split("-")]
+            c = [x[1:-1].split(",") for x in b]
+            d = [int(x) for x in sum(c, [])]
+            bbs.append(d)
+
+        if l.startswith("Image filename"):
+            path = os.path.join(os.path.join(ipath, ".."), l.split('"')[-2])
+
+    return (path, bbs)
+
+def adjust(box, tb, lr):
+
+    mix = int(round(box[0] - lr))
+    miy = int(round(box[1] - tb))
+
+    max = int(round(box[2] + lr))
+    may = int(round(box[3] + tb))
+
+    return [mix, miy, max, may]
+
+if __name__ == "__main__":
+    parser = OptionParser()
+    parser.add_option("-i", "--input", dest="input", metavar="DIRECTORY", type="string",
+                       help="path to Inria train data folder")
+
+    parser.add_option("-o", "--output", dest="output", metavar="DIRECTORY", type="string",
+                       help="path to store data", default=".")
+
+    parser.add_option("-t", "--target", dest="target", type="string", help="should be train or test", default="train")
+
+    (options, args) = parser.parse_args()
+    if not options.input:
+        parser.error("Inria data folder required")
+
+    if options.target not in ["train", "test"]:
+        parser.error("dataset should contain train or test data")
+
+    octaves = [-1, 0, 1, 2]
+
+    path = os.path.join(options.output, datetime.now().strftime("rescaled-" + options.target + "-%Y-%m-%d-%H-%M-%S"))
+    os.mkdir(path)
+
+    neg_path = os.path.join(path, "neg")
+    os.mkdir(neg_path)
+
+    pos_path = os.path.join(path, "pos")
+    os.mkdir(pos_path)
+
+    print "rescaled Inria training data stored into", path, "\nprocessing",
+    for each in octaves:
+        octave = 2**each
+
+        whole_mod_w = int(64 * octave) + 2 * int(20 * octave)
+        whole_mod_h = int(128 * octave) + 2 * int(20 * octave)
+
+        cpos_path = os.path.join(pos_path, "octave_%d" % each)
+        os.mkdir(cpos_path)
+        idx = 0
+
+        gl = glob.iglob(os.path.join(options.input, "annotations/*.txt"))
+        for image, boxes in [parse(options.input, open(__p)) for __p in gl]:
+            for box in boxes:
+                height = box[3] - box[1]
+                scale = height / float(96)
+
+                mat = cv2.imread(image)
+                mat_h, mat_w, _ = mat.shape
+
+                rel_scale = scale / octave
+
+                d_w = whole_mod_w * rel_scale
+                d_h = whole_mod_h * rel_scale
+
+                top_bottom_border = (d_h - (box[3] - box[1])) / 2.0
+                left_right_border = (d_w - (box[2] - box[0])) / 2.0
+
+                box = adjust(box, top_bottom_border, left_right_border)
+                inner = [max(0, box[0]), max(0, box[1]), min(mat_w, box[2]), min(mat_h, box[3]) ]
+
+                cropped = mat[inner[1]:inner[3], inner[0]:inner[2], :]
+
+                top     = int(max(0, 0 - box[1]))
+                bottom  = int(max(0, box[3] - mat_h))
+                left    = int(max(0, 0 - box[0]))
+                right   = int(max(0, box[2] - mat_w))
+                cropped = cv2.copyMakeBorder(cropped, top, bottom, left, right, cv2.BORDER_REPLICATE)
+                resized = sft.resize_sample(cropped, whole_mod_w, whole_mod_h)
+
+                out_name = ".png"
+                if round(math.log(scale)/math.log(2)) < each:
+                    out_name = "_upscaled" + out_name
+
+                cv2.imwrite(os.path.join(cpos_path, "sample_%d" % idx + out_name), resized)
+
+                flipped = cv2.flip(resized, 1)
+                cv2.imwrite(os.path.join(cpos_path, "sample_%d" % idx + "_mirror" + out_name), flipped)
+                idx = idx + 1
+                print "." ,
+                sys.stdout.flush()
+
+        idx = 0
+        cneg_path = os.path.join(neg_path, "octave_%d" % each)
+        os.mkdir(cneg_path)
+
+        for each in [__n for __n in glob.iglob(os.path.join(options.input, "neg/*.*"))]:
+            img = cv2.imread(each)
+            min_shape = (1.5 * whole_mod_h, 1.5 * whole_mod_w)
+
+            if (img.shape[1] <= min_shape[1]) or (img.shape[0] <= min_shape[0]):
+                out_name = "negative_sample_%i_resized.png" % idx
+
+                ratio = float(img.shape[1]) / img.shape[0]
+
+                if (img.shape[1] <= min_shape[1]):
+                    resized_size = (int(min_shape[1]), int(min_shape[1] / ratio))
+
+                if (img.shape[0] <= min_shape[0]):
+                    resized_size = (int(min_shape[0] * ratio), int(min_shape[0]))
+
+                img = sft.resize_sample(img, resized_size[0], resized_size[1])
+            else:
+                out_name = "negative_sample_%i.png" % idx
+
+            cv2.imwrite(os.path.join(cneg_path, out_name), img)
+            idx = idx + 1
+            print "." ,
+            sys.stdout.flush()
--- a/modules/softcascade/misc/sft.py
+++ b/modules/softcascade/misc/sft.py
@@ -0,0 +1,281 @@
+#!/usr/bin/env python
+
+import cv2, re, glob
+import numpy             as np
+import matplotlib.pyplot as plt
+from itertools import izip
+
+""" Convert numPy matrices with rectangles and confidences to sorted list of detections."""
+def convert2detections(rects, confs, crop_factor = 0.125):
+    if rects is None:
+        return []
+
+    dts = zip(*[rects.tolist(), confs.tolist()])
+    dts = zip(dts[0][0], dts[0][1])
+    dts = [Detection(r,c) for r, c in dts]
+
+    dts.sort(lambda x, y : -1  if (x.conf - y.conf) > 0 else 1)
+
+    for dt in dts:
+        dt.crop(crop_factor)
+
+    return dts
+
+""" Create new instance of soft cascade."""
+def cascade(min_scale, max_scale, nscales, f):
+    # where we use nms cv::SCascade::DOLLAR == 2
+    c = cv2.SCascade(min_scale, max_scale, nscales, 2)
+    xml = cv2.FileStorage(f, 0)
+    dom = xml.getFirstTopLevelNode()
+    assert c.load(dom)
+    return c
+
+""" Compute prefix sum for en array."""
+def cumsum(n):
+    cum = []
+    y = 0
+    for i in n:
+        y += i
+        cum.append(y)
+    return cum
+
+""" Compute x and y arrays for ROC plot."""
+def computeROC(confidenses, tp, nannotated, nframes, ignored):
+    confidenses, tp, ignored = zip(*sorted(zip(confidenses, tp, ignored), reverse = True))
+
+    fp = [(1 - x) for x in tp]
+    fp = [(x - y) for x, y in izip(fp, ignored)]
+
+    fp = cumsum(fp)
+    tp = cumsum(tp)
+    miss_rate = [(1 - x / (nannotated + 0.000001)) for x in tp]
+    fppi = [x / float(nframes) for x in fp]
+
+    return fppi, miss_rate
+
+""" Crop rectangle by factor."""
+def crop_rect(rect, factor):
+    val_x = factor * float(rect[2])
+    val_y = factor * float(rect[3])
+    x = [int(rect[0] + val_x), int(rect[1] + val_y), int(rect[2] - 2.0 * val_x), int(rect[3] - 2.0 * val_y)]
+    return x
+
+""" Initialize plot axises."""
+def initPlot(name):
+    plt.xlabel("fppi")
+    plt.ylabel("miss rate")
+    plt.title(name)
+    plt.grid(True)
+    plt.xscale('log')
+    plt.yscale('log')
+
+""" Draw plot."""
+def plotLogLog(fppi, miss_rate, c):
+    plt.loglog(fppi, miss_rate, color = c, linewidth = 2)
+
+""" Show resulted plot."""
+def showPlot(file_name, labels):
+    plt.axis((pow(10, -3), pow(10, 1), .035, 1))
+    plt.yticks( [0.05, 0.1, 0.2, 0.3, 0.4, 0.5, 0.64, 0.8, 1], ['.05', '.10', '.20', '.30', '.40', '.50', '.64', '.80', '1'] )
+    plt.legend(labels, loc = "lower left")
+    plt.savefig(file_name)
+    plt.show()
+
+""" Filter true positives and ignored detections for cascade detector output."""
+def match(gts, dts):
+    matches_gt     = [0]*len(gts)
+    matches_dt     = [0]*len(dts)
+    matches_ignore = [0]*len(dts)
+
+    if len(gts) == 0:
+        return matches_dt, matches_ignore
+
+    # Cartesian product for each detection BB_dt with each BB_gt
+    overlaps = [[dt.overlap(gt) for gt in gts]for dt in dts]
+
+    for idx, row in enumerate(overlaps):
+        imax = row.index(max(row))
+
+        # try to match ground truth
+        if (matches_gt[imax] == 0 and row[imax] > 0.5):
+            matches_gt[imax] = 1
+            matches_dt[idx]  = 1
+
+    for idx, dt in enumerate(dts):
+        # try to math ignored
+        if matches_dt[idx] == 0:
+            row = gts
+            row = [i for i in row if (i[3] - i[1]) < 53 or (i[3] - i[1]) >  256]
+            for each in row:
+                if dts[idx].overlapIgnored(each) > 0.5:
+                    matches_ignore[idx] = 1
+    return matches_dt, matches_ignore
+
+
+""" Draw detections or ground truth on image."""
+def draw_rects(img, rects, color, l = lambda x, y : x + y):
+    if rects is not None:
+        for x1, y1, x2, y2 in rects:
+            cv2.rectangle(img, (x1, y1), (l(x1, x2), l(y1, y2)), color, 2)
+
+
+def draw_dt(img, dts, color, l = lambda x, y : x + y):
+    if dts is not None:
+        for dt in dts:
+            bb = dt.bb
+            x1, y1, x2, y2 = dt.bb[0], dt.bb[1], dt.bb[2], dt.bb[3]
+
+            cv2.rectangle(img, (x1, y1), (l(x1, x2), l(y1, y2)), color, 2)
+
+class Detection:
+    def __init__(self, bb, conf):
+        self.bb = bb
+        self.conf = conf
+        self.matched = False
+
+    def crop(self, factor):
+        self.bb = crop_rect(self.bb, factor)
+
+    # we use rect-style for dt and box style for gt. ToDo: fix it
+    def overlap(self, b):
+
+        a = self.bb
+        w = min( a[0] + a[2], b[2]) - max(a[0], b[0]);
+        h = min( a[1] + a[3], b[3]) - max(a[1], b[1]);
+
+        cross_area = 0.0 if (w < 0 or h < 0) else float(w * h)
+        union_area = (a[2] * a[3]) + ((b[2] - b[0]) * (b[3] - b[1])) - cross_area;
+
+        return cross_area / union_area
+
+        # we use rect-style for dt and box style for gt. ToDo: fix it
+    def overlapIgnored(self, b):
+
+        a = self.bb
+        w = min( a[0] + a[2], b[2]) - max(a[0], b[0]);
+        h = min( a[1] + a[3], b[3]) - max(a[1], b[1]);
+
+        cross_area = 0.0 if (w < 0 or h < 0) else float(w * h)
+        self_area = (a[2] * a[3]);
+
+        return cross_area / self_area
+
+    def mark_matched(self):
+        self.matched = True
+
+"""Parse INPIA annotation format"""
+def parse_inria(ipath, f):
+    bbs = []
+    path = None
+    for l in f:
+        box = None
+        if l.startswith("Bounding box"):
+            b = [x.strip() for x in l.split(":")[1].split("-")]
+            c = [x[1:-1].split(",") for x in b]
+            d = [int(x) for x in sum(c, [])]
+            bbs.append(d)
+
+        if l.startswith("Image filename"):
+            path = l.split('"')[-2]
+
+    return Sample(path, bbs)
+
+
+def glob_set(pattern):
+    return [__n for __n in glob.iglob(pattern)]
+
+""" Parse ETH idl file. """
+def parse_idl(f):
+    map = {}
+    for l in open(f):
+        l = re.sub(r"^\"left\/", "{\"", l)
+        l = re.sub(r"\:", ":[", l)
+        l = re.sub(r"(\;|\.)$", "]}", l)
+        map.update(eval(l))
+    return map
+
+""" Normalize detection box to unified aspect ration."""
+def norm_box(box, ratio):
+    middle = float(box[0] + box[2]) / 2.0
+    new_half_width = float(box[3] - box[1]) * ratio / 2.0
+    return (int(round(middle - new_half_width)), box[1], int(round(middle + new_half_width)), box[3])
+
+""" Process array of boxes."""
+def norm_acpect_ratio(boxes, ratio):
+    return [ norm_box(box, ratio)  for box in boxes]
+
+""" Filter detections out of extended range. """
+def filter_for_range(boxes, scale_range, ext_ratio):
+    boxes = norm_acpect_ratio(boxes, 0.5)
+    boxes = [b for b in boxes if (b[3] - b[1]) > scale_range[0] / ext_ratio]
+    boxes = [b for b in boxes if (b[3] - b[1]) < scale_range[1] * ext_ratio]
+    return boxes
+
+""" Resize sample for training."""
+def resize_sample(image, d_w, d_h):
+    h, w, _ = image.shape
+    if (d_h < h) or (d_w < w):
+        ratio = min(d_h / float(h), d_w / float(w))
+
+        kernel_size = int( 5 / (2 * ratio))
+        sigma = 0.5 / ratio
+        image_to_resize = cv2.filter2D(image, cv2.CV_8UC3, cv2.getGaussianKernel(kernel_size, sigma))
+        interpolation_type = cv2.INTER_AREA
+    else:
+        image_to_resize = image
+        interpolation_type = cv2.INTER_CUBIC
+
+    return cv2.resize(image_to_resize,(d_w, d_h), None, 0, 0, interpolation_type)
+
+newobj = re.compile("^lbl=\'(\w+)\'\s+str=(\d+)\s+end=(\d+)\s+hide=0$")
+
+class caltech:
+    @staticmethod
+    def extract_objects(f):
+        objects = []
+        tmp = []
+        for l in f:
+            if newobj.match(l) is not None:
+                objects.append(tmp)
+                tmp = []
+            tmp.append(l)
+        return objects[1:]
+
+    @staticmethod
+    def parse_header(f):
+        _    = f.readline() # skip first line (version string)
+        head = f.readline()
+        (nFrame, nSample) = re.search(r'nFrame=(\d+) n=(\d+)', head).groups()
+        return (int(nFrame), int(nSample))
+
+    @staticmethod
+    def parse_pos(l):
+        pos = re.match(r'^posv?\s*=(\[[\d\s\.\;]+\])$', l).group(1)
+        pos = re.sub(r"(\[)(\d)", "\\1[\\2", pos)
+        pos = re.sub(r"\s", ", ", re.sub(r"\;\s+(?=\])", "]", re.sub(r"\;\s+(?!\])", "],[", pos)))
+        return eval(pos)
+
+    @staticmethod
+    def parse_occl(l):
+        occl = re.match(r'^occl\s*=(\[[\d\s\.\;]+\])$', l).group(1)
+        occl = re.sub(r"\s(?!\])", ",", occl)
+        return eval(occl)
+
+def parse_caltech(f):
+    (nFrame, nSample) = caltech.parse_header(f)
+    objects = caltech.extract_objects(f)
+
+    annotations = [[] for i in range(nFrame)]
+    for obj in objects:
+        (type, start, end) = re.search(r'^lbl=\'(\w+)\'\s+str=(\d+)\s+end=(\d+)\s+hide=0$', obj[0]).groups()
+        print type, start, end
+        start = int(start) -1
+        end   = int(end)
+        pos   = caltech.parse_pos(obj[1])
+        posv  = caltech.parse_pos(obj[2])
+        occl  = caltech.parse_occl(obj[3])
+
+        for idx, (p, pv, oc) in enumerate(zip(*[pos, posv, occl])):
+            annotations[start + idx].append((type, p, oc, pv))
+
+    return annotations