Home >  > Python常用代码(三)

Python常用代码(三)

一、csv

@@@@ 写入并生成csv文件 @@@@
# coding: utf-8
import csv
 
csvfile = file('csv_test.csv', 'wb')
writer = csv.writer(csvfile)
writer.writerow(['姓名', '年龄', '电话'])
data = [
    ('小河', '25', '1234567'),
    ('小芳', '18', '789456')]
 
writer.writerows(data)
csvfile.close()
 
@@@@ 读取csv文件 @@@@
 
# coding: utf-8
import csv
csvfile = file('csv_test.csv', 'rb')
reader = csv.reader(csvfile)
 
for line in reader:
    print line
 
csvfile.close() 

二、提取中文

import sys,re
 
s = """
    en: Regular expression is a powerful tool for manipulating text. 
    zh: 汉语是世界上最优美的语言,正则表达式是一个很有用的工具
    jp: 正規表現は非常に役に立つツールテキストを操作することです。 
    jp-char: あアいイうウえエおオ 
    kr:정규 표현식은 매우 유용한 도구 텍스트를 조작하는 것입니다. 
    """
 
re_words = re.compile(u"[\u4e00-\u9fa5]+")
m =  re_words.search(s,0)
print ("unicode 中文")
print ("--------")
print (m)
print (m.group())
 
res = re.findall(re_words, s)  
    
# 查询出所有的匹配字符串
if res:
    print ("There are %d parts:\n" % len(res)) 
    for r in res: 
        print (r)
print ("--------\n")

三、判断当前字符串是否全部为中文

import sys
reload(sys)
sys.setdefaultencoding('utf8')
 
def check_contain_chinese(check_str):
    n = 0
    m = 0
    for ch in check_str.decode('utf-8'):
        if u'\u4e00' <= ch <= u'\u9fff':
            n += 1
        else:
            m += 1
    if m == 0:
        return 1
    else:
        return 0
 

四、日期遍历

def date_range(start, end, only_monday=False, input_format='%y%m%d', output_format='%y%m%d'):
     '''如print date_range(140130, 140202)
     输出['140130', '140131', '140201', '140202']
     '''
    start = str(start)
    end = str(end)
    start = datetime.datetime.strptime(start, input_format)
    end = datetime.datetime.strptime(end, input_format)
    one_day = datetime.timedelta(days=1)
 
    range_ = []
    d = start - one_day     
 
    while 1:
        d = d + one_day          
        if d &gt; end:            
            break        
        if only_monday and d.strftime('%w')!='1':            
            continue
        range_.append(datetime.datetime.strftime(d, output_format))    
    return range_

五、计算正文字数

'''计算正文字数'''
text = re.sub("[\s+\.\!\/_,$%^*(+\"\']+|[+——!,::。?、~@#¥%……&amp;*()“”《》]+".decode("utf8"), "".decode("utf8"),newcontent) 

text2 = re.sub('&lt;[^&gt;]*?&gt;','',text)  

words_number = len(text2)

六、提取txt中文件中的特定一行

# 若文件很大,而只读一行,则效率低下。
# 可显示使用循环, 注意enumerate从0开始计数,而line_number从1开始
def getline(file_name, line_number):
  if line_number < 1:
    return ''
  for cur_line_number, line in enumerate(open(file_name, 'r')):
    if cur_line_number == line_number-1:
      return line
  return ''

七、利用集合去掉重复内容

tem = set()

sent_num = user.sent_mesages.all()
for s in sent_num:
    temp.add(s.recipient.username)

十一、通过UA识别wap还是pc来访

ua = "Mozilla/5.0 (Windows NT 6.1; Win64; x64; rv:85.0) Gecko/20100101 Firefox/85.0"
def getUA(ua):
    reg_b = re.compile(r"(android|bb\\d+|meego).+mobile|avantgo|bada\\/|blackberry|blazer|compal|elaine|fennec|hiptop|iemobile|ip(hone|od)|iris|kindle|lge |maemo|midp|mmp|mobile.+firefox|netfront|opera m(ob|in)i|palm( os)?|phone|p(ixi|re)\\/|plucker|pocket|psp|series(4|6)0|symbian|treo|up\\.(browser|link)|vodafone|wap|windows ce|xda|xiino", re.I|re.M)
 
    reg_v = re.compile(r"1207|6310|6590|3gso|4thp|50[1-6]i|770s|802s|a wa|abac|ac(er|oo|s\\-)|ai(ko|rn)|al(av|ca|co)|amoi|an(ex|ny|yw)|aptu|ar(ch|go)|as(te|us)|attw|au(di|\\-m|r |s )|avan|be(ck|ll|nq)|bi(lb|rd)|bl(ac|az)|br(e|v)w|bumb|bw\\-(n|u)|c55\\/|capi|ccwa|cdm\\-|cell|chtm|cldc|cmd\\-|co(mp|nd)|craw|da(it|ll|ng)|dbte|dc\\-s|devi|dica|dmob|do(c|p)o|ds(12|\\-d)|el(49|ai)|em(l2|ul)|er(ic|k0)|esl8|ez([4-7]0|os|wa|ze)|fetc|fly(\\-|_)|g1 u|g560|gene|gf\\-5|g\\-mo|go(\\.w|od)|gr(ad|un)|haie|hcit|hd\\-(m|p|t)|hei\\-|hi(pt|ta)|hp( i|ip)|hs\\-c|ht(c(\\-| |_|a|g|p|s|t)|tp)|hu(aw|tc)|i\\-(20|go|ma)|i230|iac( |\\-|\\/)|ibro|idea|ig01|ikom|im1k|inno|ipaq|iris|ja(t|v)a|jbro|jemu|jigs|kddi|keji|kgt( |\\/)|klon|kpt |kwc\\-|kyo(c|k)|le(no|xi)|lg( g|\\/(k|l|u)|50|54|\\-[a-w])|libw|lynx|m1\\-w|m3ga|m50\\/|ma(te|ui|xo)|mc(01|21|ca)|m\\-cr|me(rc|ri)|mi(o8|oa|ts)|mmef|mo(01|02|bi|de|do|t(\\-| |o|v)|zz)|mt(50|p1|v )|mwbp|mywa|n10[0-2]|n20[2-3]|n30(0|2)|n50(0|2|5)|n7(0(0|1)|10)|ne((c|m)\\-|on|tf|wf|wg|wt)|nok(6|i)|nzph|o2im|op(ti|wv)|oran|owg1|p800|pan(a|d|t)|pdxg|pg(13|\\-([1-8]|c))|phil|pire|pl(ay|uc)|pn\\-2|po(ck|rt|se)|prox|psio|pt\\-g|qa\\-a|qc(07|12|21|32|60|\\-[2-7]|i\\-)|qtek|r380|r600|raks|rim9|ro(ve|zo)|s55\\/|sa(ge|ma|mm|ms|ny|va)|sc(01|h\\-|oo|p\\-)|sdk\\/|se(c(\\-|0|1)|47|mc|nd|ri)|sgh\\-|shar|sie(\\-|m)|sk\\-0|sl(45|id)|sm(al|ar|b3|it|t5)|so(ft|ny)|sp(01|h\\-|v\\-|v )|sy(01|mb)|t2(18|50)|t6(00|10|18)|ta(gt|lk)|tcl\\-|tdg\\-|tel(i|m)|tim\\-|t\\-mo|to(pl|sh)|ts(70|m\\-|m3|m5)|tx\\-9|up(\\.b|g1|si)|utst|v400|v750|veri|vi(rg|te)|vk(40|5[0-3]|\\-v)|vm40|voda|vulc|vx(52|53|60|61|70|80|81|83|85|98)|w3c(\\-| )|webc|whit|wi(g |nc|nw)|wmlb|wonu|x700|yas\\-|your|zeto|zte\\-", re.I|re.M)
 
    b = reg_b.search(ua)
    v = reg_v.search(ua[0:4])
    if b or v:
        return 'wap'
    else:
        return 'pc'

print(getUA(ua))

十二、【nginx】针对PC来访返回404

    # 如果来访用户,不是移动和蜘蛛,则返回404状态
    set $mobile_rewrite do_not_perform;
 
    if ($http_user_agent ~* "(android|bb\d+|meego).+mobile|avantgo|bada\/|blackberry|blazer|compal|elaine|fennec|hiptop|iemobile|ip(hone|od)|iris|kindle|lge |maemo|midp|mmp|mobile.+firefox|netfront|opera m(ob|in)i|palm( os)?|phone|p(ixi|re)\/|plucker|pocket|psp|series(4|6)0|symbian|treo|up\.(browser|link)|vodafone|wap|windows ce|xda|xiino|spider|Spider|bot|Bot") {
      set $mobile_rewrite perform;
    }
 
    if ($http_user_agent ~* "^(1207|6310|6590|3gso|4thp|50[1-6]i|770s|802s|a wa|abac|ac(er|oo|s\-)|ai(ko|rn)|al(av|ca|co)|amoi|an(ex|ny|yw)|aptu|ar(ch|go)|as(te|us)|attw|au(di|\-m|r |s )|avan|be(ck|ll|nq)|bi(lb|rd)|bl(ac|az)|br(e|v)w|bumb|bw\-(n|u)|c55\/|capi|ccwa|cdm\-|cell|chtm|cldc|cmd\-|co(mp|nd)|craw|da(it|ll|ng)|dbte|dc\-s|devi|dica|dmob|do(c|p)o|ds(12|\-d)|el(49|ai)|em(l2|ul)|er(ic|k0)|esl8|ez([4-7]0|os|wa|ze)|fetc|fly(\-|_)|g1 u|g560|gene|gf\-5|g\-mo|go(\.w|od)|gr(ad|un)|haie|hcit|hd\-(m|p|t)|hei\-|hi(pt|ta)|hp( i|ip)|hs\-c|ht(c(\-| |_|a|g|p|s|t)|tp)|hu(aw|tc)|i\-(20|go|ma)|i230|iac( |\-|\/)|ibro|idea|ig01|ikom|im1k|inno|ipaq|iris|ja(t|v)a|jbro|jemu|jigs|kddi|keji|kgt( |\/)|klon|kpt |kwc\-|kyo(c|k)|le(no|xi)|lg( g|\/(k|l|u)|50|54|\-[a-w])|libw|lynx|m1\-w|m3ga|m50\/|ma(te|ui|xo)|mc(01|21|ca)|m\-cr|me(rc|ri)|mi(o8|oa|ts)|mmef|mo(01|02|bi|de|do|t(\-| |o|v)|zz)|mt(50|p1|v )|mwbp|mywa|n10[0-2]|n20[2-3]|n30(0|2)|n50(0|2|5)|n7(0(0|1)|10)|ne((c|m)\-|on|tf|wf|wg|wt)|nok(6|i)|nzph|o2im|op(ti|wv)|oran|owg1|p800|pan(a|d|t)|pdxg|pg(13|\-([1-8]|c))|phil|pire|pl(ay|uc)|pn\-2|po(ck|rt|se)|prox|psio|pt\-g|qa\-a|qc(07|12|21|32|60|\-[2-7]|i\-)|qtek|r380|r600|raks|rim9|ro(ve|zo)|s55\/|sa(ge|ma|mm|ms|ny|va)|sc(01|h\-|oo|p\-)|sdk\/|se(c(\-|0|1)|47|mc|nd|ri)|sgh\-|shar|sie(\-|m)|sk\-0|sl(45|id)|sm(al|ar|b3|it|t5)|so(ft|ny)|sp(01|h\-|v\-|v )|sy(01|mb)|t2(18|50)|t6(00|10|18)|ta(gt|lk)|tcl\-|tdg\-|tel(i|m)|tim\-|t\-mo|to(pl|sh)|ts(70|m\-|m3|m5)|tx\-9|up(\.b|g1|si)|utst|v400|v750|veri|vi(rg|te)|vk(40|5[0-3]|\-v)|vm40|voda|vulc|vx(52|53|60|61|70|80|81|83|85|98)|w3c(\-| )|webc|whit|wi(g |nc|nw)|wmlb|wonu|x700|yas\-|your|zeto|zte\-)") {
      set $mobile_rewrite perform;
    }
 
    if ($mobile_rewrite != perform) {
        return 404;
    }

暧昧帖

本文暂无标签