一、數(shù)據(jù)處理
1痰哨、excel 表轉(zhuǎn)sql
=CONCATENATE("update data_source10009 set url='"&B1&"' where urlcategory='"&A1&"';")
=CONCATENATE("replace into data_source10001_website_v1(pageid, url) values ('"&A1&"' , '"&B1&"');")
2、將b的表結(jié)構(gòu)和索引都復(fù)制
create table data_source10026 like data_source10001;
將b的表結(jié)構(gòu)和索引和數(shù)據(jù)都復(fù)制
create table data_source10026 like data_source10001;
INSERT INTO data_source10026 SELECT * FROM data_source10001;
3、設(shè)置表的初始值 導(dǎo)入表的初始數(shù)據(jù)
insert into data_final(urlcategory,ShortName) select urlcategory,ShortName from couponcategory;
4. 批量替換 MySQL 指定字段中的字符串
update 表名 set 字段名=REPLACE (字段名,'原來的值','要修改的值')
update user_item set addr=REPLACE (addr,'成都','天府') where time<'2013-11--5'
update couponcategory.data_source10009 set moreinfo0 =REPLACE (moreinfo0,'[1]','') where moreinfo0 is not null;
5. 魔法函數(shù):筆記string的相似度
php 魔法函數(shù)similar_text(word2compare,
percent 兩個str的相似度
6. php函數(shù)
in_array("Runoob", array)
strpos("Hello world!","world");
str_replace("world","Shanghai","Hello world!");
6. python函數(shù)
6.1 list轉(zhuǎn)字符串
命令:
''.join(list)
其中如贷,引號中是字符之間的分割符,如“,”到踏,“;”杠袱,“\t”等等
如:
list = [1, 2, 3, 4, 5]
''.join(list) 結(jié)果即為:12345
','.join(list) 結(jié)果即為:1,2,3,4,5
二、字符串轉(zhuǎn)list
print list('12345')
輸出: ['1', '2', '3', '4', '5']
print list(map(int, '12345'))
輸出: [1, 2, 3, 4, 5]
str2 = "123 sjhid dhi"
list2 = str2.split() #or list2 = str2.split(" ")
print list2
['123', 'sjhid', 'dhi']
str3 = "www.google.com"
list3 = str3.split(".")
print list3
['www', 'google', 'com']
7.mysql 把colA 的值復(fù)制到colB
update data_source10013 set moreinfo0= moreinfo1;
二窝稿、網(wǎng)頁 分析 xpath 語法
三楣富、正則表達式
四、服務(wù)器執(zhí)行抓取腳本(nohup 防止網(wǎng)絡(luò)中斷)
nohup scrapy crawl getdatawiki > getdatawiki.log 2>&1 &
stu1 = [url, href, urlname, '']
out = open('fix10004.csv', 'a', newline='')
# out = open('d:/data_source10004_v1.csv', 'a', newline='')
# 設(shè)定寫入模式
csv_write = csv.writer(out, dialect='excel')
# 寫入具體內(nèi)容
csv_write.writerow(stu1)
out.close()