Skip to content
Open
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
237 changes: 140 additions & 97 deletions main.py
Original file line number Diff line number Diff line change
Expand Up @@ -8,129 +8,172 @@
from requests.cookies import create_cookie
import json


def timestamp():
return str(int(time.time() * 1000))


class YoudaoNoteSession(requests.Session):
user_api_base = 'https://note.youdao.com/yws/api/personal'
doc_download_base = 'https://note.youdao.com/ydoc/api/personal'

def __init__(self):
requests.Session.__init__(self)
self.headers = {
'User-Agent': 'Mozilla/5.0 (X11; Linux x86_64) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/63.0.3239.84 Safari/537.36',
'Accept': '*/*',
'Accept-Encoding':'gzip, deflate, br',
'Accept-Language':'zh-CN,zh;q=0.9,en;q=0.8'
'Accept-Encoding': 'gzip, deflate, br',
'Accept-Language': 'zh-CN,zh;q=0.9,en;q=0.8'
}

def login(self, username, password):
self.get('https://note.youdao.com/web/')
def _get_file_type(self, fileName):
extName = os.path.splitext(fileName)[1]
if extName == '.note':
return 'note'
else:
return 'file'

self.headers['Referer'] = 'https://note.youdao.com/web/'
self.get('https://note.youdao.com/signIn/index.html?&callback=https%3A%2F%2Fnote.youdao.com%2Fweb%2F&from=web')
def _save_binary_response_to_file(self, res, destPath):
if res.status_code == 200:
with open(destPath, 'w') as fp:
for chunk in res:
fp.write(chunk)

self.headers['Referer'] = 'https://note.youdao.com/signIn/index.html?&callback=https%3A%2F%2Fnote.youdao.com%2Fweb%2F&from=web'
self.get('https://note.youdao.com/login/acc/pe/getsess?product=YNOTE&_=' + timestamp())
self.get('https://note.youdao.com/auth/cq.json?app=web&_=' + timestamp())
self.get('https://note.youdao.com/auth/urs/login.json?app=web&_=' + timestamp())
data = {
"username": username,
"password": hashlib.md5(password).hexdigest()
}
self.post('https://note.youdao.com/login/acc/urs/verify/check?app=web&product=YNOTE&tp=urstoken&cf=6&fr=1&systemName=&deviceType=&ru=https%3A%2F%2Fnote.youdao.com%2FsignIn%2F%2FloginCallback.html&er=https%3A%2F%2Fnote.youdao.com%2FsignIn%2F%2FloginCallback.html&vcode=&systemName=&deviceType=&timestamp=' + timestamp(), data=data, allow_redirects=True)
self.get('https://note.youdao.com/yws/mapi/user?method=get&multilevelEnable=true&_=' + timestamp())
print(self.cookies)
self.cstk = self.cookies.get('YNOTE_CSTK')
def _download_to_file(self, fileEntry, saveDir):
fileId = fileEntry['id']
version = fileEntry['version']
name = fileEntry['name']
downloadUrl = '{user_api_base}/sync?method=download' \
'&fileId={fileId}' \
'&version={version}' \
'&cstk={cstk}' \
'&keyfrom=web' \
.format(
user_api_base=self.user_api_base,
fileId=fileId,
version=version,
cstk=self.cstk
)
response = self.get(downloadUrl, stream=True)
self._save_binary_response_to_file(
res=response, destPath='%s/%s' % (saveDir, name))

def getRoot(self):
data = {
'path': '/',
'entire': 'true',
'purge': 'false',
'cstk': self.cstk
}
response = self.post('https://note.youdao.com/yws/api/personal/file?method=getByPath&keyfrom=web&cstk=%s' % self.cstk, data = data)
print('getRoot:' + response.content)
jsonObj = json.loads(response.content)
return jsonObj['fileEntry']['id']
def _download_note_to_docx(self, id, saveDir, name):
docxDownloadUrl = '{doc_download_base}/doc?method=download-docx' \
'&fileId={fileId}' \
'&cstk={cstk}' \
'&keyfrom=web' \
.format(
doc_download_base=self.doc_download_base,
fileId=id,
cstk=self.cstk
)
response = self.get(docxDownloadUrl)
fileName = '%s.docx' % os.path.splitext(name)[0]
self._save_binary_response_to_file(
res=response, destPath='%s/%s' % (saveDir, fileName))

def getNote(self, id, saveDir):
data = {
'fileId': id,
'version': -1,
'convert': 'true',
'editorType': 1,
'cstk': self.cstk
}
url = 'https://note.youdao.com/yws/api/personal/sync?method=download&keyfrom=web&cstk=%s' % self.cstk
response = self.post(url, data = data)
with open('%s/%s.xml' % (saveDir, id), 'w') as fp:
fp.write(response.content)

def getNoteDocx(self, id, saveDir):
url = 'https://note.youdao.com/ydoc/api/personal/doc?method=download-docx&fileId=%s&cstk=%s&keyfrom=web' % (id, self.cstk)
response = self.get(url)
with open('%s/%s.docx' % (saveDir, id), 'w') as fp:
fp.write(response.content)

def getFileRecursively(self, id, saveDir, doc_type):
data = {
'path': '/',
'dirOnly': 'false',
'f': 'false',
'cstk': self.cstk
}
url = 'https://note.youdao.com/yws/api/personal/file/%s?all=true&f=true&len=30&sort=1&isReverse=false&method=listPageByParentId&keyfrom=web&cstk=%s' % (id, self.cstk)
def _download_dir(self, id, dirName, saveDir):
subDir = saveDir + '/' + dirName
try:
os.lstat(subDir)
except OSError:
os.mkdir(subDir)
self._download_file_recursively(id, subDir)

def _download_object(self, id, name, fileEntry, saveDir):
if self._get_file_type(name) == 'note':
print('Processing %s' % (name))
self._download_note_to_docx(id, saveDir, name)
else:
print('Processing %s' % (name))
self._download_to_file(fileEntry, saveDir)

def _analyse_response(self, jsonObj, count, saveDir, lastId):
for entry in jsonObj['entries']:
fileEntry = entry['fileEntry']
id = fileEntry['id']
name = fileEntry['name']
print('Processing %s' % (name))
if fileEntry['dir']:
self._download_dir(id, name, saveDir)
else:
self._download_object(id, name, fileEntry, saveDir)
count = count + 1
lastId = id
return count, lastId

def _download_file_recursively(self, id, saveDir):
fileUrl = '{user_api_base}/file/{fileId}?' \
'all=true' \
'&f=true' \
'&len=30' \
'&sort=1' \
'&isReverse=false' \
'&method=listPageByParentId' \
'&keyfrom=web' \
'&cstk={cstk}' \
.format(
user_api_base=self.user_api_base,
fileId=id,
cstk=self.cstk
)
lastId = None
count = 0
total = 1
while count < total:
if lastId == None:
response = self.get(url)
response = self.get(fileUrl)
else:
response = self.get(url + '&lastId=%s' % lastId)
print('getFileRecursively:' + response.content)
response = self.get(fileUrl + '&lastId=%s' % lastId)
jsonObj = json.loads(response.content)
total = jsonObj['count']
for entry in jsonObj['entries']:
fileEntry = entry['fileEntry']
id = fileEntry['id']
name = fileEntry['name']
print('%s %s' % (id, name))
if fileEntry['dir']:
subDir = saveDir + '/' + name
try:
os.lstat(subDir)
except OSError:
os.mkdir(subDir)
self.getFileRecursively(id, subDir, doc_type)
else:
with open('%s/%s.json' % (saveDir, id), 'w') as fp:
fp.write(json.dumps(entry,ensure_ascii=False).encode('utf-8'))
if doc_type == 'xml':
self.getNote(id, saveDir)
else: # docx
self.getNoteDocx(id, saveDir)
count = count + 1
lastId = id

def getAll(self, saveDir, doc_type):
count, lastId = self._analyse_response(
jsonObj, count, saveDir, lastId)

def getAll(self, saveDir):
rootId = self.getRoot()
self.getFileRecursively(rootId, saveDir, doc_type)
self._download_file_recursively(rootId, saveDir)

def getRoot(self):
rootUrl = '{user_api_base}/file?method=getByPath&keyfrom=web&cstk={cstk}'.format(
user_api_base=self.user_api_base,
cstk=self.cstk)
data = {
'path': '/',
'entire': 'true',
'purge': 'false',
'cstk': self.cstk
}
response = self.post(rootUrl, data=data)
jsonObj = json.loads(response.content)
return jsonObj['fileEntry']['id']

def login(self):
# 请根据实际情况从你的cookie中取出如下字段填充进来,取cookies的方法很简单,打开chrome的F12调试模式,看看xhr请求的header里面的cookie信息吧
self.cookies.set('OUTFOX_SEARCH_USER_ID_NCOO', '')
self.cookies.set('OUTFOX_SEARCH_USER_ID', '')
self.cookies.set('__yadk_uid', '')
self.cookies.set('_ga', '')
self.cookies.set('YNOTE_USER', '')
self.cookies.set('Hm_lvt_4566b2fb63e326de8f2b8ceb1ec367f2', '')
self.cookies.set('P_INFO', '')
self.cookies.set('JSESSIONID', '')
self.cookies.set('Hm_lvt_30b679eb2c90c60ff8679ce4ca562fcc', '')
self.cookies.set('YNOTE_CSTK', '')
self.cookies.set('_gid', '')
self.cookies.set('Hm_lpvt_30b679eb2c90c60ff8679ce4ca562fcc', '')
self.cookies.set('YNOTE_SESS', '')
self.cookies.set('YNOTE_PERS', '')
self.cookies.set('YNOTE_LOGIN', '')
self.cstk = ''


if __name__ == '__main__':
if len(sys.argv) < 3:
print('args: <username> <password> [saveDir [doc_type]]' )
print('doc_type: xml or docx')
sys.exit(1)
username = sys.argv[1]
password = sys.argv[2]
if len(sys.argv) >= 4:
saveDir = sys.argv[3]
if len(sys.argv) == 2:
saveDir = sys.argv[1]
else:
saveDir = '.'
if len(sys.argv) >= 5:
doc_type = sys.argv[4]
else:
doc_type = 'xml'
sess = YoudaoNoteSession()
sess.login(username, password)
sess.getAll(saveDir, doc_type)
sess.login()
sess.getAll(saveDir)