标签：index 4.6 word python dic ch int num 字符串

1 字符串排序

有多篇文章输入，每篇文章分为标题行和正文行，每篇文章输入时标题和正文各占一行。需要统计所有文章中出现的热词并输出topN的热词。title中的词权重为3，text中权重为1，所有文章中的热词权重相加即为该词的总权重。排序方法为：

两词权重大的排前面
若权重一样，则在title中最先出现的排前面
若title中出现顺序一样，则在text中最先出现的排前面

测试案例：
3 2
xinguan feiyan xinzeng bendi quezhen anli
ju baodao chengdu xinzeng xinguan feiyan bendi quezhen anli yili shenzhen xinzeng bendi quezhen anli liangli yi***gti kongzhi lianghao
xinguan yimiao linchuang shiyan
wuzhong xinguan yimiao tongguo sanqi linchaung shiyan xiaoguo lianghao

需要统计文本单词词频，然后根据词频、在文章中出现的位置来排序，然后输出频率最高的 K 个单词。

此题非常直观，思路是哈希表+sort排序，但是值得注意的是有多重排序条件，应该如何进行处理？

这道题可以用 topK 算法做到期望复杂度 O(n)，但是在笔试中直接排序就可以 AC。

import sys
from typing import List
readline = sys.stdin.readline


def readstr() -> str:
    return readline().strip()


def readints() -> List[int]:
    return list(map(int, readline().strip().split()))


class Count:
    def __init__(self, t_num, c_num, t_index, c_index):
        self.t_num = t_num
        self.c_num = c_num
        self.t_index = t_index
        self.c_index = c_index

    def __lt__(self, o):
        if self.t_num != o.t_num:
            return self.t_num > o.t_num
        elif self.c_num != o.c_num:
            return self.c_num > o.c_num
        elif self.t_index != o.t_index:
            return self.t_index < o.t_index
        else:
            return self.c_index < o.c_index

def func():
    topN, M = readints()
    dic = {}
    cur_t = 0
    cur_c = 0
    for i in range(M):
        title = readstr()
        words = title.split()
        for j, word in enumerate(words):
            if word not in dic:
                count = Count(3, 3, cur_t + j, float("inf"))
                dic[word] = count
            else:
                dic[word].t_num += 3
                dic[word].c_num += 3
                if dic[word].t_index is float("inf"):
                    dic[word].t_index = cur_t + j
        cur_t += len(words)

        content = readstr()
        words = content.split()
        for j, word in enumerate(words):
            if word not in dic:
                count = Count(1, 0, float("inf"), cur_c + j)
                dic[word] = count
            else:
                dic[word].c_num += 1
                if dic[word].c_index is float("inf"):
                    dic[word].c_index = cur_c + j
        cur_c += len(words)
    sorted_array = sorted(dic.items(), key=lambda x: x[1])
    ans = []
    for i in range(topN):
        ans.append(sorted_array[i][0])
    print(" ".join(ans))


if __name__ == "__main__":
    func()

本题最恶心的是输入输出，对于经常刷leetcode的人很不习惯。本题如果第一行用两个输入输出则会在第一行留下一个 ’\n’，之后循环读取会先读取这个’\n’，再读取之后的文章行。可以用getline处理掉这个换行符，或者用 scanf_s("%d %d\n", &topN,&M)进行格式化读取，注意里面有个’\n’。

本题中还有一个关键的问题就是多重条件下的排序，可以自己建立一个类，写一个类的比较函数进行排序。

2 拓扑排序

某段程序依赖于其他的程序启动，题目提供了 n 个程序（0 ~ n-1）以及他们之间的依赖关系，要求按下标顺序返回 target 程序所有前置依赖项。若依赖关系中有环存在，则该程序不能启动，返回 -1；若该程序没有依赖关系，则不用管它。

其中，第一行为程序个数n，第二行为target程序下标，之后的第 i 行表示第 i 个程序的相关依赖信息。

对于依赖信息的这一行，第一个数表示依赖个数，之后的数用逗号隔开，每个数表示其依赖程序的下标

测试案例：
4
2
0
1，0
1，1
2，0，2
输出：
0 1

需要找到启动服务的前置服务，并且这些前置服务存在传递关系。这是一道很经典的拓扑排序模板题。

解决此类问题可以用 dfs 和 bfs。由于该题只要输出某个节点的前置依赖节点，不妨使用 dfs。

类似力扣207题（课程表）

#include <bits/stdc++.h>

using namespace std;

#define maxn 5050

int m, n;
vector<int> g[maxn];
vector<int> ans;
int vis[maxn];
bool open(int x) {
    if (vis[x] == 1) return true;
    else if (vis[x] == -1) return false;
    vis[x] = -1;
    for (auto to : g[x]) {
        if (!open(to)) return false;
    }

    if (x != m) ans.push_back(x);
    vis[x] = 1;
    return true;
}

int readint() {
    int sn = 1;
    char ch = getchar();
    while (!(isdigit(ch) || ch == '-')) ch = getchar();
    int ret = 0;
    if (ch == '-') sn = -1;
    else ret = ch - '0';
    while (isdigit(ch = getchar())) {
        ret = ret * 10 + ch - '0';
    }
    return sn * ret;
}

int main() {
    n = readint(), m = readint();
    memset(vis, 0, sizeof(vis));
    for (int i = 0; i < n; ++i) {
        int num = readint();
        int rely;
        for (int j = 0; j < num; ++j) {
            rely = readint();
            g[i].push_back(rely);
        }
    }

    if (!open(m)) {
        cout << "-1" << endl;
    } else if (ans.empty()) {
        cout << "null" << endl;
    } else {
        for (auto e : ans) {
            cout << e;
            if (e != ans.back()) cout << ",";
            else cout << endl;
        }
    }

    return 0;
}

来源：https://www.nowcoder.com/discuss/924780?type=post&order=recall&pos=&page=3&ncTraceId=&channel=-1&source_id=search_post_nctrack&gio_id=F12E16E80D0FED8CE4A3EF4E3785DE10-1662527756745

3 单调栈

与接雨水类似

#include <bits/stdc++.h>

using namespace std;

#define maxn 5050

int m, n;
vector<int> height;

int readint() {
    int sn = 1;
    char ch = getchar();
    while (!(isdigit(ch) || ch == '-')) ch = getchar();
    int ret = 0;
    if (ch == '-') sn = -1;
    else ret = ch - '0';
    while (isdigit(ch = getchar())) {
        ret = ret * 10 + ch - '0';
    }
    return sn * ret;
}

int main() {
    n = readint(), m = readint();
    height.resize(m);

    for (int i = 0; i < m; ++i) {
        height[i] = readint();
    }
    height.push_back(0);
    stack<int> st;
    st.push(-1);
    long long ans = 0;
    for (int i = 0; i < m+1; ++i) {
        while (st.top() != -1 && height[st.top()] < height[i]) {
            int h = height[st.top()]; st.pop();
            int pre = st.top();
            h -= min(pre == -1 ? 0 : height[pre], height[i]);
            if (i - pre - 1 >= n && h < 0) {
                int t = (i - pre - 1) / n;
                ans += 1ll * t * (-h);
            }
        }
        st.push(i);
    }
    cout << ans << endl;
    return 0;
}

来源：https://www.nowcoder.com/discuss/924780?type=post&order=recall&pos=&page=3&ncTraceId=&channel=-1&source_id=search_post_nctrack&gio_id=F12E16E80D0FED8CE4A3EF4E3785DE10-1662527756745

标签：index,4.6,word,python,dic,ch,int,num,字符串
From： https://www.cnblogs.com/Jojo-L/p/16665077.html

4.6字符串大战 python

1 字符串排序

2 拓扑排序

3 单调栈

相关文章

赞助商

阅读排行