commit
75d4a90d5f
2
copy
2
copy
|
@ -42,7 +42,7 @@ if (validate_fid(source)) {
|
|||
copy({ source, target, name, min_size, update, not_teamdrive, service_account }).then(folder => {
|
||||
if (!folder) return
|
||||
const link = 'https://drive.google.com/drive/folders/' + folder.id
|
||||
console.log('任务完成,新文件夹链接:\n', link)
|
||||
console.log('\n任务完成,新文件夹链接:\n', link)
|
||||
})
|
||||
} else {
|
||||
console.warn('目录ID缺失或格式错误')
|
||||
|
|
|
@ -1,7 +1,8 @@
|
|||
{
|
||||
"name": "gd-utils",
|
||||
"version": "1.0.0",
|
||||
"version": "1.0.1",
|
||||
"description": "google drive utils",
|
||||
"repository": "iwestlin/gd-utils",
|
||||
"main": "src/gd.js",
|
||||
"scripts": {
|
||||
"start": "https_proxy='http://127.0.0.1:1086' nodemon server.js",
|
||||
|
|
65
readme.md
65
readme.md
|
@ -2,14 +2,68 @@
|
|||
|
||||
> 不只是最快的 google drive 拷贝工具 [与其他工具的对比](./compare.md)
|
||||
|
||||
## demo
|
||||
[https://drive.google.com/drive/folders/124pjM5LggSuwI1n40bcD5tQ13wS0M6wg](https://drive.google.com/drive/folders/124pjM5LggSuwI1n40bcD5tQ13wS0M6wg)
|
||||
|
||||
## 更新日志
|
||||
[2020-06-30]
|
||||
|
||||
- 命令行操作时,不换行输出进度信息,同时将进度信息输出间隔调整为1秒
|
||||
- 隐藏 timeout exceed 报错信息
|
||||
|
||||
## 重要更新(2020-06-29)
|
||||
如果你遇到了以下几种问题,请务必阅读此节:
|
||||
|
||||
- 任务异常中断
|
||||
- 命令行日志无限循环输出但进度不变
|
||||
- 复制完发现丢文件
|
||||
|
||||
有不少网友遇到这些问题,但是作者一直无法复现,直到有tg网友发了张运行日志截图:
|
||||
![](./static/error-log.png)
|
||||
报错日志的意思是找不到对应的目录ID,这种情况会发生在SA没有对应目录的阅读权限的时候。
|
||||
当进行server side copy时,需要向Google的服务器提交要复制的文件ID,和复制的位置,也就是新创建的目录ID,由于在请求时是随机选取的SA,所以当选中没有权限的SA时,这次拷贝请求没有对应目录的权限,就会发生图中的错误。
|
||||
|
||||
**所以,上述这些问题的源头是,sa目录下,混杂了没有权限的json文件!**
|
||||
|
||||
以下是解决办法:
|
||||
- 在项目目录下,执行 `git pull` 拉取最新代码
|
||||
- 执行 `./validate-sa.js -h` 查看使用说明
|
||||
- 选择一个你的sa拥有阅读权限的目录ID,执行 `./validate-sa.js 你的目录ID`
|
||||
|
||||
程序会读取sa目录下所有json文件,依次检查它们是否拥有对 `你的目录ID` 的阅读权限,如果最后发现了无效的SA,程序会提供选项允许用户将无效的sa json移动到特定目录。
|
||||
|
||||
将无效sa文件移动以后,如果你使用了pm2启动,需要 `pm2 reload server` 重启下进程。
|
||||
|
||||
操作示例: [https://drive.google.com/drive/folders/1iiTAzWF_v9fo_IxrrMYiRGQ7QuPrnxHf](https://drive.google.com/drive/folders/1iiTAzWF_v9fo_IxrrMYiRGQ7QuPrnxHf)
|
||||
|
||||
## 常见问题
|
||||
项目发布一天以内,作者至少收到100种无法配置成功的反馈。。忙得焦头烂额,暂时没空做搭建过程的录屏。
|
||||
下面是一些网友的踩坑心得,如果你配置的时候也不小心掉进坑里,可以进去找找有没有解决办法:
|
||||
|
||||
- [ikarosone 基于宝塔的搭建过程](https://www.ikarosone.top/archives/195.html)
|
||||
|
||||
- [@greathappyforest 踩的坑](doc/tgbot-appache2-note.md)
|
||||
|
||||
在命令行操作时如果输出 `timeout exceed` 这样的消息,是正常情况,不会影响最终结果,因为程序对每个请求都有7次重试的机制。
|
||||
如果timeout的消息比较多,可以考虑降低并行请求数,下文有具体方法。
|
||||
|
||||
复制结束后,如果最后输出的消息里有 `未读取完毕的目录ID`,只需要在命令行执行上次同样的拷贝命令,选continue即可继续。
|
||||
|
||||
如果你成功复制完以后,统计新的文件夹链接发现文件数比源文件夹少,说明Google正在更新数据库,请给它一点时间。。一般等半小时再统计数据会比较完整。
|
||||
|
||||
如果你使用tg操作时,发送拷贝命令以后,/task 进度始终未开始(在复制文件数超多的文件夹时常会发生),是正常现象。
|
||||
这是因为程序正在获取源文件夹的所有文件信息。它的运行机制严格按照以下顺序:
|
||||
|
||||
1、获取源文件夹所有文件信息
|
||||
2、根据源文件夹的目录结构,在目标文件夹创建目录
|
||||
3、所有目录创建完成后,开始复制文件
|
||||
|
||||
**如果源文件夹的文件数非常多(一百万以上),请一定在命令行进行操作**,因为程序运行的时候会把文件信息保存在内存中,文件数太多的话容易内存占用太多被nodejs干掉。可以像这样执行命令:
|
||||
```
|
||||
node --max-old-space-size=4096 count folder-id -S
|
||||
```
|
||||
这样进程就能最大占用4G内存了。
|
||||
|
||||
|
||||
## 搭建过程
|
||||
[https://drive.google.com/drive/folders/1Lu7Cwh9lIJkfqYDIaJrFpzi8Lgdxr4zT](https://drive.google.com/drive/folders/1Lu7Cwh9lIJkfqYDIaJrFpzi8Lgdxr4zT)
|
||||
|
||||
|
@ -35,9 +89,6 @@
|
|||
|
||||
- 支持 telegram bot,配置完成后,上述功能均可通过 bot 进行操作
|
||||
|
||||
## demo
|
||||
[https://drive.google.com/drive/folders/124pjM5LggSuwI1n40bcD5tQ13wS0M6wg](https://drive.google.com/drive/folders/124pjM5LggSuwI1n40bcD5tQ13wS0M6wg)
|
||||
|
||||
## 环境配置
|
||||
本工具需要安装nodejs,客户端安装请访问[https://nodejs.org/zh-cn/download/](https://nodejs.org/zh-cn/download/),服务器安装可参考[https://github.com/nodesource/distributions/blob/master/README.md#debinstall](https://github.com/nodesource/distributions/blob/master/README.md#debinstall)
|
||||
|
||||
|
@ -159,12 +210,6 @@ const DEFAULT_TARGET = '' // 必填,拷贝默认目的地ID,如果不指定t
|
|||
## 注意事项
|
||||
程序的原理是调用了[google drive官方接口](https://developers.google.com/drive/api/v3/reference/files/list),递归获取目标文件夹下所有文件及其子文件夹信息,粗略来讲,某个目录下包含多少个文件夹,就至少需要这么多次请求才能统计完成。
|
||||
|
||||
如果你要统计的文件数非常多(一百万以上),请一定在命令行进行操作,因为程序运行的时候会把文件信息保存在内存中,文件数太多的话容易内存占用太多被nodejs干掉。可以像这样执行命令:
|
||||
```
|
||||
node --max-old-space-size=4096 count folder-id -S
|
||||
```
|
||||
这样进程就能最大占用4G内存了。
|
||||
|
||||
目前尚不知道google是否会对接口做频率限制,也不知道会不会影响google账号本身的安全。
|
||||
|
||||
**请勿滥用,后果自负**
|
||||
|
|
77
src/gd.js
77
src/gd.js
|
@ -16,8 +16,9 @@ const FOLDER_TYPE = 'application/vnd.google-apps.folder'
|
|||
const { https_proxy } = process.env
|
||||
const axins = axios.create(https_proxy ? { httpsAgent: new HttpsProxyAgent(https_proxy) } : {})
|
||||
|
||||
const sa_files = fs.readdirSync(path.join(__dirname, '../sa')).filter(v => v.endsWith('.json'))
|
||||
let SA_TOKENS = sa_files.map(filename => {
|
||||
const SA_FILES = fs.readdirSync(path.join(__dirname, '../sa')).filter(v => v.endsWith('.json'))
|
||||
|
||||
let SA_TOKENS = SA_FILES.map(filename => {
|
||||
const gtoken = new GoogleToken({
|
||||
keyFile: path.join(__dirname, '../sa', filename),
|
||||
scope: ['https://www.googleapis.com/auth/drive']
|
||||
|
@ -152,11 +153,10 @@ async function walk_and_save ({ fid, not_teamdrive, update, service_account }) {
|
|||
const limit = pLimit(PARALLEL_LIMIT)
|
||||
|
||||
const loop = setInterval(() => {
|
||||
console.log('================')
|
||||
console.log('已获取的对象数量', result.length)
|
||||
console.log('正在进行的网络请求', limit.activeCount)
|
||||
console.log('排队等候的目录数量', limit.pendingCount)
|
||||
}, LOG_DELAY)
|
||||
const now = dayjs().format('HH:mm:ss')
|
||||
const message = `${now} | 已获取对象 ${result.length} | 排队等候的网络请求 ${limit.pendingCount}`
|
||||
print_progress(message)
|
||||
}, 1000)
|
||||
|
||||
async function recur (parent) {
|
||||
let files, should_save
|
||||
|
@ -180,8 +180,12 @@ async function walk_and_save ({ fid, not_teamdrive, update, service_account }) {
|
|||
result.push(...files)
|
||||
return Promise.all(folders.map(v => recur(v.id)))
|
||||
}
|
||||
try {
|
||||
await recur(fid)
|
||||
console.log('信息获取完毕')
|
||||
} catch (e) {
|
||||
console.error(e)
|
||||
}
|
||||
console.log('\n信息获取完毕')
|
||||
not_finished.length ? console.log('未读取完毕的目录ID:', JSON.stringify(not_finished)) : console.log('所有目录读取完毕')
|
||||
clearInterval(loop)
|
||||
const smy = summary(result)
|
||||
|
@ -273,7 +277,20 @@ async function get_access_token () {
|
|||
}
|
||||
|
||||
async function get_sa_token () {
|
||||
const el = get_random_element(SA_TOKENS)
|
||||
let tk
|
||||
while (SA_TOKENS.length) {
|
||||
tk = get_random_element(SA_TOKENS)
|
||||
try {
|
||||
return await real_get_sa_token(tk)
|
||||
} catch (e) {
|
||||
console.log(e)
|
||||
SA_TOKENS = SA_TOKENS.filter(v => v.gtoken !== tk.gtoken)
|
||||
}
|
||||
}
|
||||
throw new Error('没有可用的SA帐号')
|
||||
}
|
||||
|
||||
function real_get_sa_token (el) {
|
||||
const { value, expires, gtoken } = el
|
||||
// 把gtoken传递出去的原因是当某账号流量用尽时可以依此过滤
|
||||
if (Date.now() < expires) return { access_token: value, gtoken }
|
||||
|
@ -469,23 +486,27 @@ async function real_copy ({ source, target, name, min_size, update, not_teamdriv
|
|||
}
|
||||
|
||||
async function copy_files ({ files, mapping, root, task_id }) {
|
||||
console.log('开始复制文件,总数:', files.length)
|
||||
console.log('\n开始复制文件,总数:', files.length)
|
||||
const limit = pLimit(PARALLEL_LIMIT)
|
||||
let count = 0
|
||||
const loop = setInterval(() => {
|
||||
console.log('================')
|
||||
console.log('已复制的文件数量', count)
|
||||
console.log('正在进行的网络请求', limit.activeCount)
|
||||
console.log('排队等候的文件数量', limit.pendingCount)
|
||||
}, LOG_DELAY)
|
||||
const now = dayjs().format('HH:mm:ss')
|
||||
const message = `${now} | 已复制文件数 ${count} | 排队等候的网络请求 ${limit.pendingCount}`
|
||||
print_progress(message)
|
||||
}, 1000)
|
||||
await Promise.all(files.map(async file => {
|
||||
try {
|
||||
const { id, parent } = file
|
||||
const target = mapping[parent] || root
|
||||
const new_file = await limit(() => copy_file(id, target))
|
||||
if (new_file) {
|
||||
db.prepare('update task set status=?, copied = copied || ? where id=?').run('copying', id + '\n', task_id)
|
||||
db.prepare('update task set status=?, copied = copied || ? where id=?')
|
||||
.run('copying', id + '\n', task_id)
|
||||
}
|
||||
count++
|
||||
} catch (e) {
|
||||
console.error(e)
|
||||
}
|
||||
}))
|
||||
clearInterval(loop)
|
||||
}
|
||||
|
@ -538,11 +559,10 @@ async function create_folders ({ source, old_mapping, folders, root, task_id, se
|
|||
let same_levels = folders.filter(v => v.parent === folders[0].parent)
|
||||
|
||||
const loop = setInterval(() => {
|
||||
console.log('================')
|
||||
console.log('已创建的文件夹数量', count)
|
||||
console.log('正在进行的网络请求', limit.activeCount)
|
||||
console.log('排队等候的网络请求', limit.pendingCount)
|
||||
}, LOG_DELAY)
|
||||
const now = dayjs().format('HH:mm:ss')
|
||||
const message = `${now} | 已创建目录数 ${count} | 排队等候的网络请求 ${limit.pendingCount}`
|
||||
print_progress(message)
|
||||
}, 1000)
|
||||
|
||||
while (same_levels.length) {
|
||||
await Promise.all(same_levels.map(async v => {
|
||||
|
@ -674,7 +694,20 @@ async function dedupe ({ fid, update, service_account }) {
|
|||
|
||||
function handle_error (err) {
|
||||
const data = err && err.response && err.response.data
|
||||
data ? console.error(JSON.stringify(data)) : console.error(err.message)
|
||||
if (data) {
|
||||
console.error(JSON.stringify(data))
|
||||
} else {
|
||||
if (!err.message.includes('timeout')) console.error(err.message)
|
||||
}
|
||||
}
|
||||
|
||||
function print_progress (msg) {
|
||||
if (process.stdout.cursorTo) {
|
||||
process.stdout.cursorTo(0)
|
||||
process.stdout.write(msg)
|
||||
} else {
|
||||
console.log(msg)
|
||||
}
|
||||
}
|
||||
|
||||
module.exports = { ls_folder, count, validate_fid, copy, dedupe, copy_file, gen_count_body, real_copy }
|
||||
|
|
Binary file not shown.
After Width: | Height: | Size: 152 KiB |
|
@ -0,0 +1,129 @@
|
|||
#!/usr/bin/env node
|
||||
|
||||
const { argv } = require('yargs')
|
||||
.usage('用法: ./$0 folder-id [options]\nfolder-id 是你想检测SA是否对其有阅读权限的目录ID')
|
||||
.help('h')
|
||||
.alias('h', 'help')
|
||||
|
||||
const fs = require('fs')
|
||||
const path = require('path')
|
||||
const prompts = require('prompts')
|
||||
const { GoogleToken } = require('gtoken')
|
||||
const axios = require('@viegg/axios')
|
||||
const HttpsProxyAgent = require('https-proxy-agent')
|
||||
|
||||
const { https_proxy } = process.env
|
||||
const axins = axios.create(https_proxy ? { httpsAgent: new HttpsProxyAgent(https_proxy) } : {})
|
||||
|
||||
const SA_FILES = fs.readdirSync(path.join(__dirname, 'sa')).filter(v => v.endsWith('.json'))
|
||||
const SA_TOKENS = SA_FILES.map(filename => {
|
||||
const gtoken = new GoogleToken({
|
||||
keyFile: path.join(__dirname, 'sa', filename),
|
||||
scope: ['https://www.googleapis.com/auth/drive']
|
||||
})
|
||||
return {gtoken, filename}
|
||||
})
|
||||
|
||||
main()
|
||||
async function main () {
|
||||
const [fid] = argv._
|
||||
if (validate_fid(fid)) {
|
||||
console.log('开始检测', SA_TOKENS.length, '个SA帐号')
|
||||
const invalid_sa = await get_invalid_sa(SA_TOKENS, fid)
|
||||
if (!invalid_sa.length) return console.log('已检测', SA_TOKENS.length, '个SA,未检测到无效帐号')
|
||||
const choice = await choose(invalid_sa.length)
|
||||
if (choice === 'yes') {
|
||||
mv_sa(invalid_sa)
|
||||
console.log('成功移动')
|
||||
} else {
|
||||
console.log('成功退出,无效的SA记录:', invalid_sa)
|
||||
}
|
||||
} else {
|
||||
console.warn('目录ID缺失或格式错误')
|
||||
}
|
||||
}
|
||||
|
||||
function mv_sa (arr) {
|
||||
for (const filename of arr) {
|
||||
const oldpath = path.join(__dirname, 'sa', filename)
|
||||
const new_path = path.join(__dirname, 'sa/invalid', filename)
|
||||
fs.renameSync(oldpath, new_path)
|
||||
}
|
||||
}
|
||||
|
||||
async function choose (count) {
|
||||
const answer = await prompts({
|
||||
type: 'select',
|
||||
name: 'value',
|
||||
message: `检测到 ${count} 个无效的SA,是否将它们移动到 sa/invalid 目录下?`,
|
||||
choices: [
|
||||
{ title: 'Yes', description: '确认移动', value: 'yes' },
|
||||
{ title: 'No', description: '不做更改,直接退出', value: 'no' }
|
||||
],
|
||||
initial: 0
|
||||
})
|
||||
return answer.value
|
||||
}
|
||||
|
||||
async function get_invalid_sa (arr, fid) {
|
||||
if (!fid) throw new Error('请指定要检测权限的目录ID')
|
||||
const fails = []
|
||||
let flag = 0
|
||||
let good = 0
|
||||
for (const v of arr) {
|
||||
console.log('检测进度', `${flag++}/${arr.length}`)
|
||||
console.log('正常/异常', `${good}/${fails.length}`)
|
||||
const {gtoken, filename} = v
|
||||
try {
|
||||
const access_token = await get_sa_token(gtoken)
|
||||
await get_info(fid, access_token)
|
||||
good++
|
||||
} catch (e) {
|
||||
const status = e && e.response && e.response.status
|
||||
if (Number(status) === 400) fails.push(filename) // access_token 获取失败
|
||||
|
||||
const data = e && e.response && e.response.data
|
||||
const code = data && data.error && data.error.code
|
||||
if (Number(code) === 404) fails.push(filename) // 读取文件夹信息失败
|
||||
}
|
||||
}
|
||||
return fails
|
||||
}
|
||||
|
||||
async function get_info (fid, access_token) {
|
||||
let url = `https://www.googleapis.com/drive/v3/files/${fid}`
|
||||
let params = {
|
||||
includeItemsFromAllDrives: true,
|
||||
supportsAllDrives: true,
|
||||
corpora: 'allDrives',
|
||||
fields: 'id,name'
|
||||
}
|
||||
url += '?' + params_to_query(params)
|
||||
const headers = { authorization: 'Bearer ' + access_token }
|
||||
const { data } = await axins.get(url, { headers })
|
||||
return data
|
||||
}
|
||||
|
||||
function params_to_query (data) {
|
||||
const ret = []
|
||||
for (let d in data) {
|
||||
ret.push(encodeURIComponent(d) + '=' + encodeURIComponent(data[d]))
|
||||
}
|
||||
return ret.join('&')
|
||||
}
|
||||
|
||||
async function get_sa_token (gtoken) {
|
||||
return new Promise((resolve, reject) => {
|
||||
gtoken.getToken((err, tk) => {
|
||||
err ? reject(err) : resolve(tk.access_token)
|
||||
})
|
||||
})
|
||||
}
|
||||
|
||||
function validate_fid (fid) {
|
||||
if (!fid) return false
|
||||
fid = String(fid)
|
||||
if (fid.length < 10 || fid.length > 100) return false
|
||||
const reg = /^[a-zA-Z0-9_-]+$/
|
||||
return fid.match(reg)
|
||||
}
|
Loading…
Reference in New Issue