提交 f31712e5 编写于 作者: H hathackerwang

ch03

上级 a18e7079
# -*- coding: utf-8 -*-
"""
Created on Fri Jul 6 20:52:50 2018
@author: Administrator
"""
import re
from classifier_test import *
from Train_bayes import *
from word_bag import *
from numpy import *
def text_parser(string_inp):
tokens = re.split(r'\W*', string_inp)
return [tok.lower() for tok in tokens if len(tok) > 2]
def Spam_filter(filename):
'''
处理数据长字符串
对长字符串进行分割,分隔符为除单词和数字之外的任意符号串
# 将分割后的字符串中所有的大些字母变成小写lower(),并且只
# 保留单词长度大于3的单词
'''
#新建三个列表
docList = [];classList = [];fullTest = []
#i 由1到26
for i in range(1,26):
#打开并读取指定目录下的本文中的长字符串,并进行处理返回
wordList = text_parser(open(filename + '/spam/%d.txt' % i).read())
#将得到的字符串列表添加到docList
docList.append(wordList)
#将字符串列表中的元素添加到fullTest
fullTest.extend(wordList)
#类列表添加标签1
classList.append(1)
#打开并取得另外一个类别为0的文件,然后进行处理
wordList = text_parser(open(filename + '/ham/%d.txt' % i).read())
docList.append(wordList)
fullTest.extend(wordList)
classList.append(0)
#将所有邮件中出现的字符串构建成字符串列表
vocabList=Create_wordVec(docList)
#构建一个大小为50的整数列表和一个空列表
trainingSet = list(range(50)); testSet = []
#随机选取1~50中的10个数,作为索引,构建测试集
for i in range(10):
#随机选取1~50中的一个整型数
randIndex=int(random.uniform(0,len(trainingSet)))
#将选出的数的列表索引值添加到testSet列表中
testSet.append(trainingSet[randIndex])
#从整数列表中删除选出的数,防止下次再次选出
#同时将剩下的作为训练集
del(trainingSet[randIndex])
#新建两个列表
trainMat=[];trainClasses=[]
#遍历训练集中的吗每个字符串列表
for docIndex in trainingSet:
#将字符串列表转为词条向量,然后添加到训练矩阵中
trainMat.append(Word2Vec_bag(vocabList, docList[docIndex]))
#将该邮件的类标签存入训练类标签列表中
trainClasses.append(classList[docIndex])
#计算贝叶斯函数需要的概率值并返回
p1V,p0V,pSpam = train_bayes(array(trainMat), array(trainClasses))
errorCount = 0
#遍历测试集中的字符串列表
for docIndex in testSet:
#同样将测试集中的字符串列表转为词条向量
wordVector = Word2Vec_bag(vocabList,docList[docIndex])
# print(wordVector)
#对测试集中字符串向量进行预测分类,分类结果不等于实际结果
if classify_bayes(array(wordVector),p0V,p1V,pSpam)!=classList[docIndex]:
errorCount += 1
print('the error rate is:',float(errorCount)/ len(testSet))
\ No newline at end of file
# -*- coding: utf-8 -*-
"""
Created on Fri Jul 6 21:56:52 2018
@author: Administrator
"""
#实例:使用朴素贝叶斯分类器从个人广告中获取区域倾向
#RSS源分类器及高频词去除函数
import operator
from numpy import *
from Mail_filter import *
from Train_bayes import *
from word_bag import *
from classifier_test import *
def calMostFreq(vocabList,fullTest):
#导入操作符
import operator
#创建新的字典
freqDict={}
#遍历词条列表中的每一个词
for token in vocabList:
#将单词/单词出现的次数作为键值对存入字典
freqDict[token]=fullTest.count(token)
#按照键值value(词条出现的次数)对字典进行排序,由大到小
sortedFreq=sorted(freqDict.items(),key=operator.itemgetter(1),reverse=True)
#返回出现次数最多的前30个单词
return sortedFreq[:30]
def localWords(feed1,feed0):
import feedparser
#新建三个列表
docList=[];classList=[];fullTest=[]
#获取条目较少的RSS源的条目数
minLen = 100
#遍历每一个条目
for i in range(minLen):
#解析和处理获取的相应数据
wordList = text_parser(feed1['entries'][i]['summary'])
#添加词条列表到docList
docList.append(wordList)
#添加词条元素到fullTest
fullTest.extend(wordList)
#类标签列表添加类1
classList.append(1)
#同上
wordList = text_parser(feed0['entries'][i]['summary'])
docList.append(wordList)
fullTest.extend(wordList)
#此时添加类标签0
classList.append(0)
#构建出现的所有词条列表
vocabList=Create_wordVec(docList)
#找到出现的单词中频率最高的30个单词
top30Words=calMostFreq(vocabList,fullTest)
#遍历每一个高频词,并将其在词条列表中移除
#这里移除高频词后错误率下降,如果继续移除结构上的辅助词
#错误率很可能会继续下降
for pairW in top30Words:
if pairW[0] in vocabList:
vocabList.remove(pairW[0])
#下面内容与函数spaTest完全相同
trainingSet=list(range(2*minLen));testSet=[]
for i in range(20):
randIndex=int(random.uniform(0,len(trainingSet)))
testSet.append(trainingSet[randIndex])
del(trainingSet[randIndex])
trainMat=[];trainClasses=[]
for docIndex in trainingSet:
trainMat.append(Word2Vec_bag(vocabList,docList[docIndex]))
trainClasses.append(classList[docIndex])
p1V,p0V,pSpam=train_bayes(array(trainMat),array(trainClasses))
errorCount=0
for docIndex in testSet:
wordVector=Word2Vec_bag(vocabList,docList[docIndex])
if classify_bayes(array(wordVector),p0V,p1V,pSpam)!=classList[docIndex]:
errorCount+=1
print('the error rate is:',float(errorCount)/len(testSet))
return vocabList,p0V,p1V
def getTopWords(ny,sf):
import operator
#利用RSS源分类器获取所有出现的词条列表,以及每个分类中每个单词出现的概率
vocabList,p0V,p1V=localWords(ny,sf)
#创建两个元组列表
topNY=[];topSF=[]
#遍历每个类中各个单词的概率值
for i in range(len(p0V)):
#往相应元组列表中添加概率值大于阈值的单词及其概率值组成的二元列表
if(p0V[i]>-6.0):topSF.append((vocabList[i],p0V[i]))
if(p1V[i]>-6.0):topNY.append((vocabList[i],p1V[i]))
# 对列表按照每个二元列表中的概率值项进行排序,排序规则由大到小
sortedSF=sorted(topSF,key=lambda pair:pair[1],reverse=true)
print('SF**SF**SF**SF**SF**SF**SF**SF**SF**SF**SF**SF**')
#遍历列表中的每一个二元条目列表
for item in sortedSF:
#打印每个二元列表中的单词字符串元素
print(item[0])
#解析同上
sortedNY=sorted(topNY,key=lambda pair:pair[1],reverse=true)
print('SF**SF**SF**SF**SF**SF**SF**SF**SF**SF**SF**SF**')
for item in sortedNY:
print(item[0])
\ No newline at end of file
# -*- coding: utf-8 -*-
"""
Created on Fri Jul 6 13:35:33 2018
@author: Administrator
"""
import numpy
from numpy import *
def train_bayes(train_matrix, train_class):
'''
利用NumPy数组计算p(wi|c1)
词条 属于类1的概率Prob_positive = p(c1)
因为是二分类所以属于类0概率 =1-p(c1)
'''
num_docs = len(train_matrix)
num_word = len(train_matrix[0])
# 获取输入文档(句子)数以及向量的长度
Prob_positive = sum(train_class)/ float(num_docs)
Prob_num_0 = ones(num_word) # 创建一个长度为词条向量等长的列表
Prob_num_1 = ones(num_word)
# 为避免0概率使得最终乘积为0,使用拉普拉斯平滑(加入常数lamda,此处为1)
Prob_denom_0 = 2.0
Prob_denom_1 = 2.0
for i in range(num_docs):
# 统计类别为1的词条向量中出现的所有词条的总数
# 即统计类1所有文档中出现单词的数目
if train_class[i] == 1:
Prob_num_1 += train_matrix[i]
Prob_denom_1 += sum(train_matrix[i])
else:
Prob_num_0 += train_matrix[i]
Prob_denom_0 += sum(train_matrix[i])
p1_vec = log(Prob_num_1 / Prob_denom_1)
p0_vec = log(Prob_num_0 / Prob_denom_0)
# 将结果取自然对数,避免下溢出,即太多很小的数相乘造成的影响
return p1_vec, p0_vec, Prob_positive
# p1概率实际上等于[p(w1|c1),p(w2|c1), ... p(wn|c1)]组成的向量
# 后续计算中,会基于条件独立性求出p(W|c1)
#
\ No newline at end of file
# -*- coding: utf-8 -*-
"""
Created on Fri Jul 6 13:10:01 2018
@author: Administrator
"""
from word_vector import *
from Train_bayes import *
from classifier_test import *
from Mail_filter import *
import feedparser
from RSS_filter import *
if __name__ == "__main__":
wordlist, classlist = loadDataSet()
myVoc = Create_wordVec(wordlist)
print(myVoc)
print(Words2Vec(myVoc, wordlist[0]))
train_matrix = []
for doc in wordlist:
train_matrix.append(Words2Vec(myVoc, doc))
print(train_matrix)
p1,p2,p_pos = train_bayes(train_matrix, classlist)
print(p1,p2,p_pos)
Test_classify()
Spam_filter('email')
ny = feedparser.parse('http://newyork.craigslist.org/stp/index.rss')
sf = feedparser.parse('http://sfbay.craigslist.org/stp/index.rss')
a, b, c = localWords(ny,sf)
print(a,b,c)
\ No newline at end of file
# -*- coding: utf-8 -*-
"""
Created on Fri Jul 6 17:20:42 2018
@author: Administrator
"""
from word_vector import *
from Train_bayes import *
from numpy import *
def classify_bayes(test_vec, p0, p1, p_pos):
'''
@vec2Classify:待测试分类的词条向量
@p0:类别0所有文档中各个词条出现的频数p(wi|c0)
@p1:类别1所有文档中各个词条出现的频数p(wi|c1)
@p_pos:类别为1的文档占文档总数比例
'''
p1 = sum(test_vec * p1) + log( p_pos)
p0 = sum(test_vec * p0) + log( 1.0 - p_pos)
# 原公式为乘积P(w|c)P(c),log取对数后乘积变为相加
# print(p1,p0)
if p1 > p0:
return 1
else:
return 0
#分类测试整体函数
def Test_classify():
#由数据集获取文档矩阵和类标签向量
listOPosts,listClasses=loadDataSet()
#统计所有文档中出现的词条,存入词条列表
myVocabList=Create_wordVec(listOPosts)
#创建新的列表
trainMat=[]
for postinDoc in listOPosts:
#将每篇文档利用words2Vec函数转为词条向量,存入文档矩阵中
trainMat.append(Words2Vec(myVocabList,postinDoc))\
#将文档矩阵和类标签向量转为NumPy的数组形式,方便接下来的概率计算
#调用训练函数,得到相应概率值
p1V,p0V,pAb=train_bayes(array(trainMat),array(listClasses))
#测试文档
testEntry=['love','my','dalmation']
#将测试文档转为词条向量,并转为NumPy数组的形式
thisDoc=array(Words2Vec(myVocabList,testEntry))
#利用贝叶斯分类函数对测试文档进行分类并打印
print(testEntry,'classified as:',classify_bayes(thisDoc,p0V,p1V,pAb))
#第二个测试文档
testEntry1=['stupid','garbage']
#同样转为词条向量,并转为NumPy数组的形式
thisDoc1=array(Words2Vec(myVocabList,testEntry1))
print(testEntry1,'classified as:',classify_bayes(thisDoc1,p0V,p1V,pAb))
\ No newline at end of file
Hi Peter,
With Jose out of town, do you want to
meet once in a while to keep things
going and do some interesting stuff?
Let me know
Eugene
\ No newline at end of file
Ryan Whybrew commented on your status.
Ryan wrote:
"turd ferguson or butt horn."
Arvind Thirumalai commented on your status.
Arvind wrote:
""you know""
Reply to this email to comment on this status.
Thanks Peter.
I'll definitely check in on this. How is your book
going? I heard chapter 1 came in and it was in
good shape. ;-)
I hope you are doing well.
Cheers,
Troy
\ No newline at end of file
Jay Stepp commented on your status.
Jay wrote:
""to the" ???"
Reply to this email to comment on this status.
To see the comment thread, follow the link below:
LinkedIn
Kerry Haloney requested to add you as a connection on LinkedIn:
Peter,
I'd like to add you to my professional network on LinkedIn.
- Kerry Haloney
Hi Peter,
The hotels are the ones that rent out the tent. They are all lined up on the hotel grounds : )) So much for being one with nature, more like being one with a couple dozen tour groups and nature.
I have about 100M of pictures from that trip. I can go through them and get you jpgs of my favorite scenic pictures.
Where are you and Jocelyn now? New York? Will you come to Tokyo for Chinese New Year? Perhaps to see the two of you then. I will go to Thailand for winter holiday to see my mom : )
Take care,
D
yeah I am ready. I may not be here because Jar Jar has plane tickets to Germany for me.
\ No newline at end of file
Benoit Mandelbrot 1924-2010
Benoit Mandelbrot 1924-2010
Wilmott Team
Benoit Mandelbrot, the mathematician, the father of fractal mathematics, and advocate of more sophisticated modelling in quantitative finance, died on 14th October 2010 aged 85.
Wilmott magazine has often featured Mandelbrot, his ideas, and the work of others inspired by his fundamental insights.
You must be logged on to view these articles from past issues of Wilmott Magazine.
\ No newline at end of file
Hi Peter,
Sure thing. Sounds good. Let me know what time would be good for you.
I will come prepared with some ideas and we can go from there.
Regards,
-Vivek.
\ No newline at end of file
LinkedIn
Julius O requested to add you as a connection on LinkedIn:
Hi Peter.
Looking forward to the book!
Accept View invitation from Julius O
Yay to you both doing fine!
I'm working on an MBA in Design Strategy at CCA (top art school.) It's a new program focusing on more of a right-brained creative and strategic approach to management. I'm an 1/8 of the way done today!
\ No newline at end of file
I've thought about this and think it's possible. We should get another
lunch. I have a car now and could come pick you up this time. Does
this wednesday work? 11:50?
Can I have a signed copy of you book?
\ No newline at end of file
we saw this on the way to the coast...thought u might like it
hangzhou is huge, one day wasn't enough, but we got a glimpse...
we went inside the china pavilion at expo, it is pretty interesting,
each province has an exhibit...
\ No newline at end of file
Hi Hommies,
Just got a phone call from the roofer, they will come and spaying the foaming today. it will be dusty. pls close all the doors and windows.
Could you help me to close my bathroom window, cat window and the sliding door behind the TV?
I don't know how can those 2 cats survive......
Sorry for any inconvenience!
\ No newline at end of file
SciFinance now automatically generates GPU-enabled pricing & risk model source code that runs up to 50-300x faster than serial code using a new NVIDIA Fermi-class Tesla 20-Series GPU.
SciFinance is a derivatives pricing and risk model development tool that automatically generates C/C++ and GPU-enabled source code from concise, high-level model specifications. No parallel computing or CUDA programming expertise is required.
SciFinance's automatic, GPU-enabled Monte Carlo pricing model source code generation capabilities have been significantly extended in the latest release. This includes:
Ok I will be there by 10:00 at the latest.
\ No newline at end of file
That is cold. Is there going to be a retirement party?
Are the leaves changing color?
\ No newline at end of file
WHat is going on there?
I talked to John on email. We talked about some computer stuff that's it.
I went bike riding in the rain, it was not that cold.
We went to the museum in SF yesterday it was $3 to get in and they had
free food. At the same time was a SF Giants game, when we got done we
had to take the train with all the Giants fans, they are 1/2 drunk.
\ No newline at end of file
Yo. I've been working on my running website. I'm using jquery and the jqplot plugin. I'm not too far away from having a prototype to launch.
You used jqplot right? If not, I think you would like it.
\ No newline at end of file
There was a guy at the gas station who told me that if I knew Mandarin
and Python I could get a job with the FBI.
\ No newline at end of file
Hello,
Since you are an owner of at least one Google Groups group that uses the customized welcome message, pages or files, we are writing to inform you that we will no longer be supporting these features starting February 2011. We made this decision so that we can focus on improving the core functionalities of Google Groups -- mailing lists and forum discussions. Instead of these features, we encourage you to use products that are designed specifically for file storage and page creation, such as Google Docs and Google Sites.
For example, you can easily create your pages on Google Sites and share the site (http://www.google.com/support/sites/bin/answer.py?hl=en&answer=174623) with the members of your group. You can also store your files on the site by attaching files to pages (http://www.google.com/support/sites/bin/answer.py?hl=en&answer=90563) on the site. If you’re just looking for a place to upload your files so that your group members can download them, we suggest you try Google Docs. You can upload files (http://docs.google.com/support/bin/answer.py?hl=en&answer=50092) and share access with either a group (http://docs.google.com/support/bin/answer.py?hl=en&answer=66343) or an individual (http://docs.google.com/support/bin/answer.py?hl=en&answer=86152), assigning either edit or download only access to the files.
you have received this mandatory email service announcement to update you about important changes to Google Groups.
\ No newline at end of file
Zach Hamm commented on your status.
Zach wrote:
"doggy style - enough said, thank you & good night"
This e-mail was sent from a notification-only address that cannot accept incoming e-mail. Please do not reply to this message.
Thank you for your online reservation. The store you selected has located the item you requested and has placed it on hold in your name. Please note that all items are held for 1 day. Please note store prices may differ from those online.
If you have questions or need assistance with your reservation, please contact the store at the phone number listed below. You can also access store information, such as store hours and location, on the web at http://www.borders.com/online/store/StoreDetailView_98.
\ No newline at end of file
Hi Peter,
These are the only good scenic ones and it's too bad there was a girl's back in one of them. Just try to enjoy the blue sky : ))
D
\ No newline at end of file
--- Codeine 15mg -- 30 for $203.70 -- VISA Only!!! --
-- Codeine (Methylmorphine) is a narcotic (opioid) pain reliever
-- We have 15mg & 30mg pills -- 30/15mg for $203.70 - 60/15mg for $385.80 - 90/15mg for $562.50 -- VISA Only!!! ---
\ No newline at end of file
OrderCializViagra Online & Save 75-90%
0nline Pharmacy NoPrescription required
Buy Canadian Drugs at Wholesale Prices and Save 75-90%
FDA-Approved drugs + Superb Quality Drugs only!
Accept all major credit cards
\ No newline at end of file
You Have Everything To Gain!
Incredib1e gains in length of 3-4 inches to yourPenis, PERMANANTLY
Amazing increase in thickness of yourPenis, up to 30%
BetterEjacu1ation control
Experience Rock-HardErecetions
Explosive, intenseOrgasns
Increase volume ofEjacu1ate
Doctor designed and endorsed
100% herbal, 100% Natural, 100% Safe
The proven NaturalPenisEnhancement that works!
100% MoneyBack Guaranteeed
\ No newline at end of file
Buy Ambiem (Zolpidem) 5mg/10mg @ $2.39/- pill
30 pills x 5 mg - $129.00
60 pills x 5 mg - $199.20
180 pills x 5 mg - $430.20
30 pills x 10 mg - $ 138.00
120 pills x 10 mg - $ 322.80
\ No newline at end of file
OrderCializViagra Online & Save 75-90%
0nline Pharmacy NoPrescription required
Buy Canadian Drugs at Wholesale Prices and Save 75-90%
FDA-Approved drugs + Superb Quality Drugs only!
Accept all major credit cards
Order Today! From $1.38
BuyVIAGRA 25mg, 50mg, 100mg,
BrandViagra, FemaleViagra from $1.15 per pill
ViagraNoPrescription needed - from Certified Canadian Pharmacy
Buy Here... We accept VISA, AMEX, E-Check... Worldwide Delivery
\ No newline at end of file
You Have Everything To Gain!
Incredib1e gains in length of 3-4 inches to yourPenis, PERMANANTLY
Amazing increase in thickness of yourPenis, up to 30%
BetterEjacu1ation control
Experience Rock-HardErecetions
Explosive, intenseOrgasns
Increase volume ofEjacu1ate
Doctor designed and endorsed
100% herbal, 100% Natural, 100% Safe
\ No newline at end of file
You Have Everything To Gain!
Incredib1e gains in length of 3-4 inches to yourPenis, PERMANANTLY
Amazing increase in thickness of yourPenis, up to 30%
BetterEjacu1ation control
Experience Rock-HardErecetions
Explosive, intenseOrgasns
Increase volume ofEjacu1ate
Doctor designed and endorsed
100% herbal, 100% Natural, 100% Safe
\ No newline at end of file
A home based business opportunity is knocking at your door.
Dont be rude and let this chance go by.
You can earn a great income and find
your financial life transformed.
Learn more Here.
To Your Success.
Work From Home Finder Experts
\ No newline at end of file
Codeine (the most competitive price on NET!)
Codeine (WILSON) 30mg x 30 $156.00
Codeine (WILSON) 30mg x 60 $291.00 (+4 FreeViagra pills)
Codeine (WILSON) 30mg x 90 $396.00 (+4 FreeViagra pills)
Codeine (WILSON) 30mg x 120 $492.00 (+10 FreeViagra pills)
\ No newline at end of file
Get Up to 75% OFF at Online WatchesStore
Discount Watches for All Famous Brands
* Watches: aRolexBvlgari, Dior, Hermes, Oris, Cartier, AP and more brands
* Louis Vuitton Bags & Wallets
* Gucci Bags
* Tiffany & Co Jewerly
Enjoy a full 1 year WARRANTY
Shipment via reputable courier: FEDEX, UPS, DHL and EMS Speedpost
You will 100% recieve your order
Save Up to 75% OFF Quality Watches
\ No newline at end of file
Hydrocodone/Vicodin ES/Brand Watson
Vicodin ES - 7.5/750 mg: 30 - $195 / 120 $570
Brand Watson - 7.5/750 mg: 30 - $195 / 120 $570
Brand Watson - 10/325 mg: 30 - $199 / 120 - $588
NoPrescription Required
FREE Express FedEx (3-5 days Delivery) for over $200 order
Major Credit Cards + E-CHECK
\ No newline at end of file
Get Up to 75% OFF at Online WatchesStore
Discount Watches for All Famous Brands
* Watches: aRolexBvlgari, Dior, Hermes, Oris, Cartier, AP and more brands
* Louis Vuitton Bags & Wallets
* Gucci Bags
* Tiffany & Co Jewerly
Enjoy a full 1 year WARRANTY
Shipment via reputable courier: FEDEX, UPS, DHL and EMS Speedpost
You will 100% recieve your order
\ No newline at end of file
Percocet 10/625 mg withoutPrescription 30 tabs - $225!
Percocet, a narcotic analgesic, is used to treat moderate to moderately SeverePain
Top Quality, EXPRESS Shipping, 100% Safe & Discreet & Private.
Buy Cheap Percocet Online
\ No newline at end of file
Get Up to 75% OFF at Online WatchesStore
Discount Watches for All Famous Brands
* Watches: aRolexBvlgari, Dior, Hermes, Oris, Cartier, AP and more brands
* Louis Vuitton Bags & Wallets
* Gucci Bags
* Tiffany & Co Jewerly
Enjoy a full 1 year WARRANTY
Shipment via reputable courier: FEDEX, UPS, DHL and EMS Speedpost
You will 100% recieve your order
\ No newline at end of file
You Have Everything To Gain!
Incredib1e gains in length of 3-4 inches to yourPenis, PERMANANTLY
Amazing increase in thickness of yourPenis, up to 30%
BetterEjacu1ation control
Experience Rock-HardErecetions
Explosive, intenseOrgasns
Increase volume ofEjacu1ate
Doctor designed and endorsed
100% herbal, 100% Natural, 100% Safe
\ No newline at end of file
You Have Everything To Gain!
Incredib1e gains in length of 3-4 inches to yourPenis, PERMANANTLY
Amazing increase in thickness of yourPenis, up to 30%
BetterEjacu1ation control
Experience Rock-HardErecetions
Explosive, intenseOrgasns
Increase volume ofEjacu1ate
Doctor designed and endorsed
100% herbal, 100% Natural, 100% Safe
\ No newline at end of file
Experience with BiggerPenis Today! Grow 3-inches more
The Safest & Most Effective Methods Of_PenisEn1argement.
Save your time and money!
BetterErections with effective Ma1eEnhancement products.
#1 Ma1eEnhancement Supplement. Trusted by Millions. Buy Today!
\ No newline at end of file
You Have Everything To Gain!
Incredib1e gains in length of 3-4 inches to yourPenis, PERMANANTLY
Amazing increase in thickness of yourPenis, up to 30%
BetterEjacu1ation control
Experience Rock-HardErecetions
Explosive, intenseOrgasns
Increase volume ofEjacu1ate
Doctor designed and endorsed
100% herbal, 100% Natural, 100% Safe
The proven NaturalPenisEnhancement that works!
100% MoneyBack Guaranteeed
\ No newline at end of file
Percocet 10/625 mg withoutPrescription 30 tabs - $225!
Percocet, a narcotic analgesic, is used to treat moderate to moderately SeverePain
Top Quality, EXPRESS Shipping, 100% Safe & Discreet & Private.
Buy Cheap Percocet Online
\ No newline at end of file
--- Codeine 15mg -- 30 for $203.70 -- VISA Only!!! --
-- Codeine (Methylmorphine) is a narcotic (opioid) pain reliever
-- We have 15mg & 30mg pills -- 30/15mg for $203.70 - 60/15mg for $385.80 - 90/15mg for $562.50 -- VISA Only!!! ---
\ No newline at end of file
OEM Adobe & Microsoft softwares
Fast order and download
Microsoft Office Professional Plus 2007/2010 $129
Microsoft Windows 7 Ultimate $119
Adobe Photoshop CS5 Extended
Adobe Acrobat 9 Pro Extended
Windows XP Professional & thousand more titles
\ No newline at end of file
Bargains Here! Buy Phentermin 37.5 mg (K-25)
Buy Genuine Phentermin at Low Cost
VISA Accepted
30 - $130.50
60 - $219.00
90 - $292.50
120 - $366.00
180 - $513.00
\ No newline at end of file
You Have Everything To Gain!
Incredib1e gains in length of 3-4 inches to yourPenis, PERMANANTLY
Amazing increase in thickness of yourPenis, up to 30%
BetterEjacu1ation control
Experience Rock-HardErecetions
Explosive, intenseOrgasns
Increase volume ofEjacu1ate
Doctor designed and endorsed
100% herbal, 100% Natural, 100% Safe
\ No newline at end of file
Bargains Here! Buy Phentermin 37.5 mg (K-25)
Buy Genuine Phentermin at Low Cost
VISA Accepted
30 - $130.50
60 - $219.00
90 - $292.50
120 - $366.00
180 - $513.00
\ No newline at end of file
# -*- coding: utf-8 -*-
"""
Created on Fri Jul 6 20:50:09 2018
@author: Administrator
"""
def Word2Vec_bag(wordList, input_set):
'''
基于多项式而非伯努利模型的贝叶斯方法
称为词袋模型,具有大于1的权重
@wordList:为前一个函数的输出值(包含单词)
@input_set:输入需要分类的集合
'''
return_vec = [0] * len(wordList)
# 创建与词汇表等长的列表向量
for word in input_set:
if word in wordList:
return_vec[wordList.index(word)] += 1 # 出现的单词赋1
# else: print("the word %s is not in list" % word)
return return_vec
# -*- coding: utf-8 -*-
"""
Created on Fri Jul 6 12:56:38 2018
@author: Administrator
"""
from numpy import *
def loadDataSet():
'''
postingList: 进行词条切分后的文档集合
classVec:类别标签
使用伯努利模型的贝叶斯分类器只考虑单词出现与否(0,1)
'''
postingList=[['my', 'dog', 'has', 'flea', 'problems', 'help', 'please'],
['maybe', 'not', 'take', 'him', 'to', 'dog', 'park', 'stupid'],
['my', 'dalmation', 'is', 'so', 'cute', 'I', 'love', 'him'],
['stop', 'posting', 'stupid', 'worthless', 'garbage'],
['mr', 'licks', 'ate', 'my', 'steak', 'how', 'to', 'stop', 'him'],
['quit', 'buying', 'worthless', 'dog', 'food', 'stupid']]
classVec = [0,1,0,1,0,1] #1代表侮辱性文字,0代表正常言论
return postingList,classVec
def Create_wordVec(dataset):
word_set = set([])
for doc in dataset:
word_set = word_set | set(doc) # 通过对两个集合取并,找出所有非重复的单词
return list(word_set)
def Words2Vec(wordList, input_set):
'''
@wordList:为前一个函数的输出值(包含单词)
@input_set:输入需要分类的集合
函数输出:包含0,1的布尔型向量(对应Wordlist中的单词出现与否)
'''
return_vec = [0] * len(wordList)
# 创建与词汇表等长的列表向量
for word in input_set:
if word in wordList:
return_vec[wordList.index(word)] = 1 # 出现的单词赋1
else: print("the word %s is not in list" % word)
return return_vec
Markdown is supported
0% .
You are about to add 0 people to the discussion. Proceed with caution.
先完成此消息的编辑!
想要评论请 注册