按日期对照片实行分类整理

发布时间:2019-06-07  栏目:Python  评论:0 Comments

# -*- coding:utf-8 -*-
__author__ = 'Barry'

from PIL import Image
from PIL.ExifTags import TAGS
from datetime import datetime
from hashlib import md5
import time
import os
import shutil
import fnmatch
import random

def IterFindFiles(path, fnexp):
    for root, dirs, files in os.walk(path):
        for filename in fnmatch.filter(files, fnexp):
            yield os.path.join(root, filename)

def GenRandomStr():
    str = ''
    chars = 'ABCDEFGHIJKLMNOPQRSTUVWXYZ01234567890'
    for num in range(10):
        str += random.choice(chars)
    return str

def GetPicPath():
    pic_full_path = []
    if os.path.isdir(input_path):
        for i in os.listdir(input_path):
            sub_dir = input_path + '/' + i
            if os.path.isdir(sub_dir):
                for n in os.listdir(sub_dir):
                    pic_full_path.append(sub_dir + '/' + n)
    return pic_full_path

def print_all_know_exif_tags():
    for k in sorted(TAGS):
        print k, TAGS[k]

def FormatTime(date):
    try:
        ts = time.mktime(time.strptime(date,'%Y:%m:%d %H:%M:%S'))
        return time.strftime('%Y-%m-%d %H:%M:%S',time.localtime(ts))
    except Exception, e:
        return False

def TrimTime(mtime):
    ts = time.mktime(time.strptime(mtime,'%Y-%m-%d %H:%M:%S'))
    return time.strftime('%Y%m%d',time.localtime(ts))

def CalcDays(mtime,birth_day):
    time_fmt = '%Y%m%d'
    days_born = (datetime.strptime(mtime,time_fmt) - datetime.strptime(birth_day,time_fmt)).days
    return str(days_born + 1)

def GenMd5(filename):
    file_tmp = open(filename,'rb')
    md5_value = md5(file_tmp.read())
    file_tmp.close()
    return md5_value.hexdigest()

def GetPicExif():
    pic_date = {}
    #pic_path = GetPicPath()
    for filename in IterFindFiles(input_path,fnexp):
        mtime = time.strftime('%Y-%m-%d %H:%M:%S',time.localtime(os.stat(filename).st_mtime))
        try:
            img = Image.open(filename)
        except Exception, e:
            print filename,"skipping due to ",e
        try:
            exif_data = img._getexif()
        except Exception, e:
            print filename,"skipping due to ",e
        finally:
            if exif_data:
                if exif_data.has_key(36867):
                    pic_date[filename] = FormatTime(exif_data[36867]) and FormatTime(exif_data[36867])  or mtime
                elif exif_data.has_key(36868):
                    pic_date[filename] = FormatTime(exif_data[36868]) and FormatTime(exif_data[36868])  or mtime
                elif exif_data.has_key(306):
                    pic_date[filename] = FormatTime(exif_data[306]) and FormatTime(exif_data[306]) or mtime
                else:
                    pic_date[filename] = mtime
            else:
                pic_date[filename] = mtime

    return pic_date

def ArchivePic():
    pic_date = GetPicExif()
    new_dir_file_md5_list = []
    for pic,mtime in pic_date.items():
        std_out = "\033[1;33mAdd new picture \033[1;31m%s\033[0m\033[1;33m to dest path:%s\033[0m"%(os.path.split(pic)[1],output_path)
        new_dir = "%s/%s-(%sdays)"%(output_path,TrimTime(mtime),CalcDays(TrimTime(mtime),birth_day))
        if not os.path.exists(new_dir):
            os.mkdir(new_dir)
        new_file_name = '%s-%s-%s%s'%(pic_prefix,TrimTime(mtime),GenRandomStr(),pic_suffix)
        new_file_path = '%s/%s'%(new_dir,new_file_name)
        if not os.path.exists(new_file_path):
            try:
                if os.listdir(new_dir):
                    for i in os.listdir(new_dir):
                        hashvalue = GenMd5(os.path.join(new_dir,i))
                        new_dir_file_md5_list.append(hashvalue)
                    if GenMd5(pic) not in new_dir_file_md5_list:
                        shutil.copy2(pic,new_file_path)
                        print std_out
                else:
                    shutil.copy2(pic,new_file_path)
                    print std_out
            except Exception, e:
                print e
        else:
            while True:
                new_file_name = '%s-%s-%s%s'%(pic_prefix,TrimTime(mtime),GenRandomStr(),pic_suffix)
                if new_file_name not in os.lisdir(new_dir):
                    break
                else:
                    continue
            new_file_path = '%s/%s'%(new_dir,new_file_name)
            for i in os.listdir(new_dir):
                hashvalue = GenMd5(os.path.join(new_dir,i))
                new_dir_file_md5_list.append(hashvalue)
            if GenMd5(pic) not in new_dir_file_md5_list:
                try:
                    shutil.copy2(pic,new_file_path)
                    print std_out
                except Exception, e:
                    print e
    return 

if __name__ == "__main__":
    #input_path = r"D:\baby/"
    input_path = r"C:\users\xxr12\onedrive\pictures"
    #input_path = r"E:\MemArchPic"
    output_path = r"E:\Mybaby"
    fnexp = "*.jpg"
    pic_suffix = ".jpg"   #如果查找的是jpg文件,那文件后缀名也要匹配
    pic_prefix = "MYBABY"
    birth_day = '20150621'
    if not os.path.exists(output_path): 
        os.mkdir(output_path)
    if not os.path.exists(input_path):
        print "some of path not found!"
    else:
        ArchivePic()
    #print_all_know_exif_tags()

用生命调参连串

An Improved SDA Based Defect Prediction Framework for Both Within-Project and Cross-Project Class-Imbalance Problems (TSE, 2017, 可读)

数量集为AEEEM,其中是有个别Java的开源项目。利用SDA方法同不日常候消除项目内和跨类其他欠缺预测难题。

Heterogeneous cross-company defect prediction by unified metric representation and CCA-based transfer learning (FSE/ESEC, 2015)

选拔标准关联剖判(CCA)方法解决跨公司软件的败笔预测难题。数据集用了伍个铺面包车型大巴拾四个软件,包括NASA,SOFTLAB(无源码),ReLink,AEEEM。它们有两样的衡量矩阵。

Dictionary Learning Based Software Defect Prediction (ICSE, 2014, 已读)

建议应用字典学习来开始展览缺陷预测,数据集为NASA mdp。

manbet手机客户端3.0,Transfer defect learning (ICSE, 2013)

跨工程的老毛病预测,PCA和PCA+方法,数据集为ReLink和AEEEM

Label propagation based semi-supervised learning for software defect prediction (ASE, 2017)

使用拉普鲁斯得分采集样品战术来张开模型品质的优化,数据集采取NASA mdp矩阵。

Global vs. local models for cross-project defect prediction (ESE, 2017)

正如全局模型和一些模型在性质上的上下,数据为PROMISE加上NASA mdp。

Data Transformation in Cross-project Defect Prediction (ESE, 2017)

讨论差异数量转变方法对分类质量的震慑,算法选择私行森林,数据应用PROMISE,ReLink等。

The Use of Summation to Aggregate Software Metrics Hinders the Performance of Defect Prediction Models (ESE, 2017)

在缺陷预测中日常会将class为单位和method为单位的代码聚合为以文件为单位,在联谊的进程中固然把各部分的特征值间接相加会产生特征间的相关性扩大。本文商量了不一致的成团方案对瑕疵预测正确性的熏陶。

The Consolidated Tree Construction algorithm in imbalanced defect prediction datasets (Evolutionary Computation, 2017)

研商各个规则树/决策树在缺陷预测上的功能。数据集为NASA mdp。

Which type of metrics are useful to deal with class imbalance in software defect prediction? (IST, 2017, 可读)

在类不平衡的图景下切磋采集样品算法的好坏,数据为静态&动态(?)的矩阵。

A feature matching and transfer approach for cross-company defect prediction (JSS, 2017)

行使一种特色相称迁移(FMT)的办法来打开跨公司的老毛病预测,数据用NASA和PROMISE。

Learning from open-source projects: An empirical study on defect prediction (ESEM, 2013)

多少为Apache Project Directory的心地矩阵。探究feature
selection对瑕疵预测正确性的影响。

Predicting Defect-prone Software Modules Using Support Vector Machines (JSS, 2008)

利用Nasa MDP来试验SVM的性能。

A transfer cost-sensitive boosting approach for cross-project defect prediction (SQJ, 2017)

动用boosting和迁移学习举办跨工程的症结预测,数据集用PROMISE。

Applying Feature Selection to Software Defect Prediction Using Multi-objective Optimization (COMPSAC, 2017)

数码集为PROMISE。

FeSCH: A Feature Selection Method using Clusters of Hybrid-data for Cross-Project Defect Prediction (COMPSAC, 2017)

运用基于密度的聚类方法和来采纳特征,消除跨工程的缺点预测难题。

Machine learning for finding bugs: An initial report (MaLTeSQuE, 2017)

把llvm操作符类别与Parfait工具设计的复杂度衡量指标组成起来进行bug预测。


Android/Binary/意图不在缺陷预测

Graph-based Statistical Language Model for Code (ICSE, 2015)

动用n-gram营造软件图模型,并且用于API code suggestion

ALETHEIA: Improving the Usability of Static Security Analysis (CCS, 2014)

应用分类算法管理静态安全checker生成的false
report和警戒,达到过滤的目标。

On the Localness of Software (FSE/ESEC, 2014)

改良n-gram方法对软件拓展统计表明,并用以code suggestion。

BYTEWEIGHT: Learning to Recognize Functions in Binary Code (CCS, 2013)

用总括学习的点子鉴定识别二进制函数边界。

On the Naturalness of Software (ICSE, 2012)

用自然语言管理的章程(n-gram)对程序进行拍卖,完结越发标准的代码自动达成。能够借鉴程序表明方法。

Predicting faults using the complexity of code changes (ICSE, 2009)

提议一个模型,数据集利用陆个开源工程:NetBSD, FreeBSD, OpenBSD, Postgres,
KDE, KOffice

Classifying software changes: Clean or buggy? (TSE, 2008)

将开源项目中的change分类为buggy和clean,数据为13个开源项目。

Creating an invalid defect classification model using text mining on server development (JSS, 2017)

利用文本开掘的情势对开辟文书档案中的bug
report进行管理,将defect分类为可行和低效。bug
report涉及到硬件,固件,OS以及部分使用。自身跟缺陷检验关系一点都不大,但也许能够借鉴它的点子来采访数据。

A Machine-learning Approach for Classifying and Categorizing Android Sources and Sinks (NDSS, 2014)

利用机械学习的措施直接从Android API中分辨出source和sink。

Buffer Overflow Vulnerability Prediction from x86 executables using Static Analysis and Machine Learning (COMPSAC, 2015, 已读)

行使机械学习的分类方法对2进制造进度序缓冲区溢出纰漏的检查测试。


数码集相关嘲谑&方法

It’s Not a Bug, It’s a Feature: How Misclassification Impacts Bug Prediction (ICSE, 2013)

深入分析了HTTPClient, 杰克rabbit, Lucene-Java, Rhino,
汤姆cat五八个开源项目,提议在那之中33.八%的bug
report报告的并不是bug,而是引入了新的feature,修改文书档案,或内部重构,那进步了缺陷预测模型的错误。

Relink: Recovering links between bugs and changes (FSE/ESEC, 2011)

提议古板的bug音信征集方式的贫乏:由于开辟者的缘由想必会挂一漏万许多bug。提议壹种能够获得开拓者在changelog中未谈起bug的措施,并且手工业检查了七个开源项目ZXing,OpenIntents和Apache(作为ground
truth)

Predicting fault incidence using software change history (TSE, 2000)

小说提到1个90时代的代码管理客栈:IM陆风X8和SCCS,小说中获取bug消息的秘诀正是采访钦命日期区间的被标为bug
fix的变动。文章也波及,那壹措施的主题材料在于有个别“bug
fix”其实是对于软件功效方面包车型客车难点打开改动,而不是软件缺陷。

When Do Changes Induce Fixes? (MSR, 2005)

200五年的舆论,首要分为2个步骤:一)在bug数据库中找到fixed条目款项贰)找到与之有关的代码版本,找到fix的地点3)找到此岗位在bug出现之前做出的改造。


可读

Machine-Learning-Guided Selectively Unsound Static Analysis (ICSE, 2017, 已读)

数据集利用开源软件,当中包蕴几个严重的buffer overflow漏洞。

Automatically learning semantic features for defect prediction (ICSE, 2016, Java, 已读)

抽取函数调用和谓词作者为特色,利用DBN对总括学习模型进行优化。

Bugram: Bug Detection with N-gram Language Models (ICSE, 2016, Java, 已读)

收取函数调用和谓词作者为特色,利用自然语言管理(n-gram)方法进行缺陷预测。

Automatic Inference of Search Patterns for Taint-Style Vulnerabilities (S&P, 2015, 已读)

行使代码属性图,对缓冲区移除漏洞进行进度间数据流分析,利用图发现和方式匹配的办法开采漏洞。

Chucky: Exposing Missing Checks in Source Code for Vulnerability Discovery (CCS, 2013)

利用污点分析和机械学习的诀窍开展漏洞开掘。

Graph-based Mining of Multiple Object Usage Patterns (FSE/ESEC, 2009, Java)

使用图模型来打开代码应用形式的发现,能够检查实验到代码中的分外方式。利用了Ant,
Log四J, AspectJ, Axis, Columba, j艾德it, Jigsaw, Struts, Fluid
VC那多少个java开源软件来进展形式开采。并且利用Fluid来开始展览丰盛检验,对检验出的陆十三个要命,查寻找了四个毛病。

PR-Miner: Automatically Extracting Implicit Programming Rules and Detecting Violations in Large Software Code (FSE/ESEC, 2005, Java)
TLEL: A two-layer ensemble learning approach for just-in-time defect prediction (IST, 2017)

提议一种新型的集成学习算法来展开实时缺陷预测。
用数码发掘的点子检查程序中的隐式规则,并且检查程序中违反那么些规则的地点。数据包含Linux,
Postgre SQL Server, Apache HTTP
Server等。检查出的前伍拾陆个背离规则的地点,有1六早已认但是bug。

Predicting buffer overflow vulnerabilities through mining light-weight static code attributes (ISSRE, 2014, 已读)

温馨规划特点,利用Lincon MIT数据集举行缓冲区溢出漏洞的开挖。

Personalized defect prediction (ASE, 2013, 已读)

独立为种种工程建模,数据采用多少个开源项目:Linux kernel, PostgreSQL,
Xorg, Eclipse,
Lucene和杰克rabbit。当中Lucene的bug音讯是手工业验证的bug音信,其余类型的bug消息是重中之重词找寻方法搜集到的。

Alattin: Mining Alternative Patterns for Detecting Neglected Conditions (ASE, 2009)

利用多少发掘算法来打通程序中违反形式的代码。

Explaining software defects using topic model (MSR, 2012)

话题模型。数据集选取Eclipse,Firefox等


有如胡说八道

Dealing with noise in defect prediction (ICSE)

研究怎么去除数据聚集的噪音。

留下评论

网站地图xml地图