diff --git a/package.json b/package.json index 4bc61e94..bee6ae47 100644 --- a/package.json +++ b/package.json @@ -34,7 +34,7 @@ "docs:generate": "typedoc --options typedoc.json", "docs:deploy": "yarn docs:generate && gh-pages -d docs", "format": "prettier --write src/**/*.ts", - "prepare": "husky install", + "prepare": "husky install && yarn build", "test": "jest" }, "dependencies": { diff --git a/src/timeline-v1.ts b/src/timeline-v1.ts index df085d04..19969b80 100644 --- a/src/timeline-v1.ts +++ b/src/timeline-v1.ts @@ -85,6 +85,7 @@ export interface SearchResultRaw { result?: SearchResultRaw; }; legacy?: LegacyTweetRaw; + article?: ArticleRaw; } export interface TimelineResultRaw { @@ -118,6 +119,79 @@ export interface TimelineResultRaw { }; legacy?: LegacyTweetRaw; tweet?: TimelineResultRaw; + article?: ArticleRaw; +} + +export interface ArticleRaw { + article_results: { + result: ArticleResultRaw; + }; +} + +export interface ArticleResultRaw { + rest_id: string; + title: string; + cover_media?: ArticleCoverMediaRaw; + content_state: ArticleContentStateRaw; + media_entities?: ArticleMediaEntityRaw[]; +} + +export interface ArticleCoverMediaRaw { + media_key: string; + media_info: { + original_img_url: string; + }; +} + +export interface ArticleContentStateRaw { + blocks: ArticleBlockRaw[]; + entityMap: ArticleEntityRaw[]; +} + +export interface ArticleBlockRaw { + key: string; + text: string; + type: string; + inlineStyleRanges: { + offset: number; + length: number; + style: string; + }[]; + entityRanges: { + key: number; + offset: number; + length: number; + }[]; +} + +export interface ArticleEntityValueMediaItemRaw { + localMediaId: string; + mediaCategory: string; + mediaId: string; +} + +export interface ArticleEntityValueRaw { + type: string; + mutability?: string; + data: { + url?: string; + entityKey?: string; + mediaItems?: ArticleEntityValueMediaItemRaw[]; + }; +} + +export interface ArticleEntityRaw { + key: number; + value: ArticleEntityValueRaw; +} + +export interface ArticleMediaEntityRaw { + media_key: string; + media_id: string; + media_info: { + __typename: 'ApiImage' | 'ApiGif' | 'ApiVideo'; + original_img_url: string; + }; } export interface LegacyTweetRaw { diff --git a/src/timeline-v2.ts b/src/timeline-v2.ts index 213661c7..966b5673 100644 --- a/src/timeline-v2.ts +++ b/src/timeline-v2.ts @@ -1,6 +1,8 @@ import { CoreUserRaw, LegacyUserRaw } from './profile'; import { parseMediaGroups, reconstructTweetHtml } from './timeline-tweet-util'; import { + ArticleEntityValueMediaItemRaw, + ArticleResultRaw, EditControlInitialRaw, LegacyTweetRaw, ParseTweetResult, @@ -8,7 +10,7 @@ import { SearchResultRaw, TimelineResultRaw, } from './timeline-v1'; -import { Tweet } from './tweets'; +import { Article, Tweet } from './tweets'; import { isFieldDefined } from './type-util'; export interface TimelineUserResultRaw { @@ -256,6 +258,139 @@ export function parseLegacyTweet( return { success: true, tweet: tw }; } +function parseArticleToMarkdown(article: Readonly): string { + const { blocks, entityMap } = article.content_state; + let markdown = `# ${article.title}\\n\\n`; + + for (const block of blocks) { + let text = block.text; + + const sortedEntityRanges = [...block.entityRanges].sort( + (a, b) => b.offset - a.offset, + ); // Reverse order to prevent messing up the offsets + for (const range of sortedEntityRanges) { + const entityWrapper = entityMap.find( + (e) => String(e.key) === String(range.key), + ); + if (!entityWrapper) continue; + const entity = entityWrapper.value; + + const chars = Array.from(text); + const originalText = chars + .slice(range.offset, range.offset + range.length) + .join(''); + let replacement = originalText; + + let textToWrap = originalText; + let trailingNewline = ''; + + if (textToWrap.endsWith('\n')) { + textToWrap = textToWrap.slice(0, -1); + trailingNewline = '\n'; + } + + if (entity.type === 'LINK' && entity.data.url) { + replacement = `[${textToWrap}](${entity.data.url})${trailingNewline}`; + } + + const prefix = chars.slice(0, range.offset).join(''); + const suffix = chars.slice(range.offset + range.length).join(''); + text = prefix + replacement + suffix; + } + + const sortedStyleRanges = [...block.inlineStyleRanges].sort( + (a, b) => b.offset - a.offset, + ); + for (const range of sortedStyleRanges) { + const chars = Array.from(text); + const originalText = chars + .slice(range.offset, range.offset + range.length) + .join(''); + let replacement = originalText; + + let textToWrap = originalText; + let trailingNewline = ''; + + if (textToWrap.endsWith('\n')) { + textToWrap = textToWrap.slice(0, -1); + trailingNewline = '\n'; + } + + if (range.style.toLowerCase() === 'bold') { + replacement = `**${textToWrap}**${trailingNewline}`; + } else if (range.style.toLowerCase() === 'italic') { + replacement = `*${textToWrap}*${trailingNewline}`; + } + + const prefix = chars.slice(0, range.offset).join(''); + const suffix = chars.slice(range.offset + range.length).join(''); + text = prefix + replacement + suffix; + } + + switch (block.type) { + case 'header-one': + markdown += `# ${text}\\n\\n`; + break; + case 'header-two': + markdown += `## ${text}\\n\\n`; + break; + case 'unordered-list-item': + markdown += `* ${text}\\n`; + break; + case 'atomic': + for (const range of block.entityRanges) { + const entityWrapper = entityMap.find( + (e) => String(e.key) === String(range.key), + ); + if (!entityWrapper) continue; + const entity = entityWrapper.value; + if (entity?.type === 'MEDIA' && entity.data.mediaItems) { + for (const mediaItem of entity.data.mediaItems) { + if (mediaItem?.mediaId) { + const mediaEntity = article.media_entities?.find( + (m) => m.media_id === mediaItem.mediaId, + ); + if (mediaEntity) { + markdown += `![image](${mediaEntity.media_info.original_img_url})\\n\\n`; + } + } + } + } + } + break; + case 'unstyled': + default: + markdown += `${text}\\n\\n`; + break; + } + } + + return markdown.trim(); +} + +function parseArticle(articleRaw: Readonly): Article { + const article: Article = { + id: articleRaw.rest_id, + title: articleRaw.title, + content_state: articleRaw.content_state, + }; + + if (articleRaw.cover_media) { + const coverMedia = articleRaw.media_entities?.find( + (m) => m.media_key === articleRaw.cover_media?.media_key, + ); + if (coverMedia) { + article.cover = { + id: coverMedia.media_id, + url: coverMedia.media_info.original_img_url, + alt_text: undefined, // not available + }; + } + } + + return article; +} + function parseResult(result?: TimelineResultRaw): ParseTweetResult { const noteTweetResultText = result?.note_tweet?.note_tweet_results?.result?.text; @@ -281,6 +416,15 @@ function parseResult(result?: TimelineResultRaw): ParseTweetResult { } } + const articleRaw = result?.article?.article_results?.result; + if (articleRaw) { + tweetResult.tweet.isArticle = true; + if (articleRaw.content_state) { + tweetResult.tweet.article = parseArticle(articleRaw); + tweetResult.tweet.text = parseArticleToMarkdown(articleRaw); + } + } + const quotedResult = result?.quoted_status_result?.result; if (quotedResult) { if (quotedResult.legacy && quotedResult.rest_id) { diff --git a/src/tweets.ts b/src/tweets.ts index 2522da31..c2c76ce8 100644 --- a/src/tweets.ts +++ b/src/tweets.ts @@ -1,7 +1,11 @@ import { addApiFeatures, requestApi } from './api'; import { TwitterAuth } from './auth'; import { getUserIdByScreenName } from './profile'; -import { LegacyTweetRaw, QueryTweetsResponse } from './timeline-v1'; +import { + ArticleContentStateRaw, + LegacyTweetRaw, + QueryTweetsResponse, +} from './timeline-v1'; import { parseTimelineTweetsV2, TimelineV2, @@ -33,6 +37,13 @@ export interface Video { url?: string; } +export interface Article { + id: string; + title: string; + cover?: Photo; + content_state: ArticleContentStateRaw; +} + export interface PlaceRaw { id?: string; place_type?: string; @@ -65,6 +76,8 @@ export interface Tweet { isReply?: boolean; isRetweet?: boolean; isSelfThread?: boolean; + isArticle?: boolean; + article?: Article; likes?: number; name?: string; mentions: Mention[];