tfidf 計(jì)算
[輸入]:
word_list = []
for i in range(len(corpus)):
word_list.append(corpus[i].split(' '))
print(word_list)
[輸出]:
[['this', 'is', 'the', 'first', 'document'],
['this', 'is', 'the', 'second', 'second', 'document'],
['and', 'the', 'third', 'one'],
['is', 'this', 'the', 'first', 'document']]
[輸入]:
countlist = []
for i in range(len(word_list)):
count = Counter(word_list[i])
countlist.append(count)
countlist
[輸出]:
[Counter({'document': 1, 'first': 1, 'is': 1, 'the': 1, 'this': 1}),
Counter({'document': 1, 'is': 1, 'second': 2, 'the': 1, 'this': 1}),
Counter({'and': 1, 'one': 1, 'the': 1, 'third': 1}),
Counter({'document': 1, 'first': 1, 'is': 1, 'the': 1, 'this': 1})]
# word可以通過count得到腰懂,count可以通過countlist得到
# count[word]可以得到每個(gè)單詞的詞頻梗逮, sum(count.values())得到整個(gè)句子的單詞總數(shù)
def tf(word, count):
return count[word] / sum(count.values())
# 統(tǒng)計(jì)的是含有該單詞的句子數(shù)
def n_containing(word, count_list):
return sum(1 for count in count_list if word in count)
# len(count_list)是指句子的總數(shù),n_containing(word, count_list)是指含有該單詞的句子的總數(shù)绣溜,加1是為了防止分母為0
def idf(word, count_list):
return math.log(len(count_list) / (1 + n_containing(word, count_list)))
# 將tf和idf相乘
def tfidf(word, count, count_list):
return tf(word, count) * idf(word, count_list)
auc計(jì)算
在有M個(gè)正樣本,N個(gè)負(fù)樣本的數(shù)據(jù)集里慷彤。一共有MN對(duì)樣本(一對(duì)樣本即,一個(gè)正樣本與一個(gè)負(fù)樣本)。統(tǒng)計(jì)這MN對(duì)樣本里底哗,正樣本的預(yù)測(cè)概率大于負(fù)樣本的預(yù)測(cè)概率的個(gè)數(shù)贷屎。
ID label pro
A 0 0.1
B 0 0.4
C 1 0.4
D 1 0.8
假設(shè)有4條樣本。2個(gè)正樣本艘虎,2個(gè)負(fù)樣本唉侄,那么MN=4。即總共有4個(gè)樣本對(duì)野建。分別是:
(D,B),(D,A),(C,B),(C,A)属划。
auc=(1+1+1+0.5)/22=0.875
下面這個(gè)代碼不能處理pred相等時(shí)的情況,但是之前又一次筆試這么寫的都過了樣例
#include<iostream>
#include<algorithm>
#include<vector>
using namespace std;
struct node {
int label;
double pred;
}num[1010];
bool cmp(node x, node y)
{
return x.pred < y.pred;
}
int main() {
int n, neg = 0, pos = 0;
cin >> n;
for (int i = 0; i < n; i++)
{
cin >> num[i].label;
if (num[i].label == 1)pos++;
else neg++;
}
for (int i = 0; i < n; i++)
cin >> num[i].pred;
sort(num, num + n, cmp);
int cnt = 0, cntsum = 0;
for (int i = 0; i < n; i++)
{
if (num[i].label == 1)
cntsum += cnt;
else cnt++;
}
cout << cntsum * 1.0 / (pos*neg);
return 0;
}
/*
4
0 0 1 1
0.1 0.4 0.35 0.8
*/
二叉樹:輸出根節(jié)點(diǎn)到葉子的路徑
#include<iostream>
#include<algorithm>
#include<vector>
using namespace std;
struct TreeNode {
int val;
TreeNode*right, *left;
};
void dfs(TreeNode*root, vector<int>&path, vector<vector<int>>&ans)
{
if (root == NULL)return;
path.push_back(root->val);
if (!root->left && !root->right) {
ans.push_back(path);
return;
}
if (root->left)dfs(root, path, ans);
if (root->right)dfs(root, path, ans);
}
int main() {
TreeNode*root;
vector<int>path;
vector<vector<int>>ans;
dfs(root, path, ans);
}
找到字串中合法匹配的連續(xù)對(duì)數(shù)
`#s = input()`
`s ``=` `'(())(()()()'`
`#s = '(())(()'`
`re ``=` `[]`
`dp ``=` `[``0``]`
`for` `i ``in` `s:`
`if` `not` `re:`
`re.append(i)`
`dp.append(``0``)`
`else``:`
`if` `i``=``=``'('``:`
`re.append(i)`
`dp.append(``0``)`
`else``:`
`if` `re[``-``1``]``=``=``'('``:`
`re.pop()`
`dp.append(dp.pop()``+``1``)`
`else``:`
`re.append(``')'``)`
`dp.append(``0``)`
`print``(re)`
`print``(dp)`
`m ``=` `0`
`cur ``=` `0`
`for` `i ``in` `dp:`
`if` `i!``=``0``:`
`cur``+``=``i`
`m ``=` `max``(cur,m)`
`else``:`
`cur ``=` `0`
`print``(m)`
題目:1-N的自然數(shù)中候生,少了一個(gè)同眯,找出這個(gè)數(shù)
思路:
1、求和的思路唯鸭。求出1-N的和须蜗,減去數(shù)列的總和,差即為這個(gè)數(shù)
public int findLost(int[] a,int n){
int sum = 0;
int sum1 = 0;
for(int i=0;i<n;i++){
sum+=a[i];
sum1+=i;
}
int res = sum1+n-sum;
return res;
}
1
2
3
4
5
6
7
8
9
10
2目溉、用異或的方法明肮。任何數(shù)異或自己都等于0,任何數(shù)異或0都等于他自己缭付。異或兩次即可
假如缺的為3柿估。result = 1245N
第二次異或后 result = 1245N 12345N
= 0^3 = 3
public int findLost(int[] a,int n){
int result = 0;
for (int i = 0; i < a.length; i++) {
result = result ^ a[i];
}
for (int i = 0; i < n; i++) {
result=result^(i+1);
}
return result;
}
題目:1-N的自然數(shù)中,少了兩個(gè)陷猫,找出這個(gè)數(shù)
輸入N秫舌,求N的階乘的末尾有多少個(gè)0
思路:n!= 123...n绣檬;我們要分析一下0是怎么來的足陨,0是2*5得來的,那也就是說看有多少個(gè)2,5就可以了娇未,
再分析墨缘,因子2出現(xiàn)的次數(shù),2,4,6,8...忘蟹,因子5出現(xiàn)的次數(shù)飒房,5,10,15,25...
很顯然,2出現(xiàn)的次數(shù)一定是比5出現(xiàn)的次數(shù)多的媚值,那么我們只需要計(jì)算5出現(xiàn)的次數(shù)有多少狠毯,就可以得到會(huì)有多少個(gè)10,也就是會(huì)有多少個(gè)0了褥芒。所以思路就是嚼松,遍歷1-n嫡良,求每個(gè)數(shù)中因子5的個(gè)數(shù),加加加献酗,就ok了寝受。
求完全二叉樹的最后一層的最后一個(gè)節(jié)點(diǎn)
BinaryTreeNode* getLastNode(BinaryTreeNode* root)
{
if(!root || (!root->left && !root->right))return root;
int depth = 0;
BinaryTreeNode* curNode = root;
while(curNode)//計(jì)算二叉樹的深度
{
depth++;
curNode = curNode->left;
}
int level = 0,tmpDepth = 0;
while(root)
{
level++;//當(dāng)前遍歷到哪一層,跟節(jié)點(diǎn)是第一層
if(level == depth)break;//防止右子樹為空
curNode = root;
if(curNode->right)
{
BinaryTreeNode* pCur = curNode;//當(dāng)前節(jié)點(diǎn)的父節(jié)點(diǎn)
curNode = curNode->right;
tmpDepth = level + 1;
while(curNode->left)
{
tmpDepth++;
pCur = curNode;
curNode = curNode->left;
}
if(tmpDepth < depth)root = root -> left;//二分查找左子樹
else if(!pCur->right || pCur->right == curNode)return curNode;
else root = root->right;//二分查找右子樹
}
else root = root->left;
}
return root;
}
判斷是否為完全二叉樹
鏈表快排
ListNode* partition(ListNode*start, ListNode*end)
{
int num = start->val;
ListNode*p = start, *q = start->next;
while (q != end)
{
if (q->val < num) {
p = p->next;
swap(p->val, q->val);
}
q = q->next;
}
swap(p->val, start->val);
return p;
}
void quicksort(ListNode*start, ListNode*end) {
if (start != end)
{
ListNode*index = partition(start, end);
quicksort(start, index);
quicksort(index->next, end);
}
}
不同長(zhǎng)度的繩子有不同的價(jià)值,一根繩子如何切分可以讓總價(jià)值最大
https://www.cnblogs.com/tgycoder/p/4980655.html
int dfs(int p[], int n)
{
if (n == 0)return 0;
int profit = 0;
for (int i = 1; i <= n; i++)
profit = max(profit, p[i] + dfs(p, n - i));
return profit;
}
int dfs(int p[], int r[], int n)
{
if (r[n])return r[n];
int profit = 0;
for (int i = 1; i <= n; i++)
profit = max(profit, p[i] + dfs(p, r, n - i));
r[n] = profit;
return profit;
}
dijkstra
int G[maxn][maxn];
int d[maxn];
bool vis[maxn];
void dijkstra(int s)
{
fill(d, d + maxn, INF);
d[s] = 0;
while (1)
{
int u = -1, mind = INF;
for (int i = 0; i < n; i++)
if (!vis[i] && d[i] < mind)mind = d[i], u = i;
if (u == -1)break;
vis[u] = true;
for (int v = 0; v < n; v++)
{
if (!vis[v] && G[u][v] < INF)
d[v] = min(d[v], d[u] + G[u][v]);
}
}
}
數(shù)軸上的最長(zhǎng)連續(xù)線段
struct Interval {
int start, end;
};
static bool cmp(Interval x, Interval y)
{
return x.start < y.start;
}
int merge(vector<Interval>intervals) {
vector<Interval>ans;
int maxlen = 0;
sort(intervals.begin(), intervals.end(), cmp);
ans.push_back(intervals[0]);
for (int i = 1; i < intervals.size(); i++)
{
if (intervals[i].start <= ans.back().end)
{
ans.back().start = min(ans.back().start, intervals[i].start);
ans.back().end = max(ans.back().end, intervals[i].end);
maxlen = max(maxlen, ans.back().end - ans.back().start - 1);
}
else if (intervals[i].start > ans.back().end)
{
ans.push_back(intervals[i]);
maxlen = max(maxlen, intervals[i].start-intervals[i].end);
}
}
return maxlen;
}
矩陣中的最短路,有門有鑰匙。動(dòng)態(tài)規(guī)劃加狀態(tài)向量
一個(gè)m*n矩陣超埋,只能往左或往右扶叉,矩陣中的數(shù)字代表不同的權(quán)值词身,求一條路徑,該路徑的權(quán)值和與所給的target值最接近。
找子串在原串中第一次出現(xiàn)的位置