downflux / BaseParser
Defined in: packages/base/BaseParser.ts:15
Default HTML parser shared by provider-specific parsers.
Parsers exist to keep extraction rules close to the HTML they understand. The base parser collects common page fields such as anchors, images, meta tags, and source URLs, while provider parsers add the site-specific fields needed by transformers and pipelines.
AnalRzParserArtStationParserBeegParserBehanceParserBlackPornParserBlueskyParserBoKepPornParserColliderPornParserCumLouderParserDaFreePornParserDanbooruParserDaNudeParserDeviantArtParserEpicGfsParserEPornerParserFlickrParserGelbooruParserHqPornParserImgurParserInstagramParserInterracialParserItsPornParserLesbian8ParserMangaDexParserMastodonParserMegaTubeParserMomVidsParserMyLustParserNewgroundsParserOkPornParserPerfectGirlsParserPexelsParserPinterestParserPixivParserPorn300ParserPornDoeParserPornHubParserPornIdParserPornOneParserPornSevenParserPornsOkParserPussySpaceParserRedditParserSexVidParserShamelessParserSuperPornParserSxyPornParserTheyAreHugeParserTikTokParserTnAFlixParserTubeVSexParserTumblrParserTwitterParserUnsplashParserWallHavenParserWikiArtParserWikimediaParserXCafeParserXDeguParserXGroovyParserXHamsterParserXnXXParserXozillaParserXVideosParserZbPornParserZzzTubeParser
new BaseParser():
BaseParser
BaseParser
protectedkvsResolver:KvsResolver
Defined in: packages/base/BaseParser.ts:16
transform(
html,sourceUrl):Partial<DefaultExecutionResult>
Defined in: packages/base/BaseParser.ts:25
Extracts common metadata from a fetched HTML document.
string
Raw HTML returned by the HTTP engine.
string
Final URL used as the metadata source.
Partial<DefaultExecutionResult>
Common extracted fields used as the base provider result.
protectedextractScriptMethodInput(fnName,html):string|null
Defined in: packages/base/BaseParser.ts:54
Extracts the first string argument passed to a named script function.
string
Function name to search for.
string
HTML or script text to inspect.
string | null
The first string argument, or null when the call is absent.
protectedgetFlashVars(html):FlashVarsOutput
Defined in: packages/base/BaseParser.ts:66
Extracts KVS flashVars video metadata from inline scripts.
string
HTML containing one or more KVS flashVars blocks.
Normalized KVS fields, video sources, previews, and timelines.
protectedextractElementText(html,begin,end,fallback?):string
Defined in: packages/base/BaseParser.ts:190
string
string
string
string = ''
string
protectedextractElementTextPair(html,begin,end,pos?): [string|null,number]
Defined in: packages/base/BaseParser.ts:199
string
string
string
number = 0
[string | null, number]
protectedextractAllPairs(html,begin,end):Generator<string>
Defined in: packages/base/BaseParser.ts:208
string
string
string
Generator<string>
protectedextractAll(html,rules,startPos?): [Record<string,string>,number]
Defined in: packages/base/BaseParser.ts:228
string
[string, string, string][]
number = 0
[Record<string, string>, number]
protectedextractAnchors(html,sourceUrl?):string[]
Defined in: packages/base/BaseParser.ts:243
string
string
string[]
protectedextractAnchorTextsByHref(html,hrefPattern):string[]
Defined in: packages/base/BaseParser.ts:258
string
RegExp
string[]
protectedextractImageUrls(html,sourceUrl?):string[]
Defined in: packages/base/BaseParser.ts:277
string
string
string[]
protectedextractSourceUrls(html,sourceUrl?):string[]
Defined in: packages/base/BaseParser.ts:316
string
string
string[]
protectedgetFlashVarsVideo(html,sourceUrl,uploader?,starred?):DefaultFlashVarsVideoOutput
Defined in: packages/base/BaseParser.ts:333
string
string
string
string[]
protectedcollectElements(html,type,className?):Record<string,string>[]
Defined in: packages/base/BaseParser.ts:356
string
string
string
Record<string, string>[]
protectedextractVideoPosters(html,sourceUrl?):string[]
Defined in: packages/base/BaseParser.ts:377
string
string
string[]
protectedextractDivHrefs(html,sourceUrl?):string[]
Defined in: packages/base/BaseParser.ts:394
string
string
string[]
protectedextractVideoUrls(html):string[]
Defined in: packages/base/BaseParser.ts:405
string
string[]
protectedextractAllUrls(html):string[]
Defined in: packages/base/BaseParser.ts:415
string
string[]
protectedextractLinks(html):string[]
Defined in: packages/base/BaseParser.ts:419
string
string[]
protectedextractMetaDescription(html):string
Defined in: packages/base/BaseParser.ts:431
string
string
protectedextractMetaNameContent(html,value):string
Defined in: packages/base/BaseParser.ts:439
string
string
string
protectedextractMetaPropertyContent(html,value):string
Defined in: packages/base/BaseParser.ts:447
string
string
string
protectedcollectAnchors(html,options?):object[]
Defined in: packages/base/BaseParser.ts:455
string
string
string
RegExp
object[]
protectedextractMetaKeywords(html):string[]
Defined in: packages/base/BaseParser.ts:507
string
string[]
protectedextractTitle(html):string
Defined in: packages/base/BaseParser.ts:517
string
string
protectedresolveUrl(raw,base?):string|null
Defined in: packages/base/BaseParser.ts:521
string
string
string | null
protectedisHttpUrl(url?):url is string
Defined in: packages/base/BaseParser.ts:530
string | null
url is string
protecteddecodeHtmlEntities(str):string
Defined in: packages/base/BaseParser.ts:534
string
string
protectedextractByTag(html,tag,options?):string[]
Defined in: packages/base/BaseParser.ts:545
string
string
string
string
string[]
protectedextractOneByTag(html,tag,options?):string|null
Defined in: packages/base/BaseParser.ts:571
string
string
string
string | null
protectedextractScriptsByType(html,type,objectType?):Record<string,any>[]
Defined in: packages/base/BaseParser.ts:575
string
string
string
Record<string, any>[]
protectedextractByClass(html,className):string[]
Defined in: packages/base/BaseParser.ts:595
string
string
string[]
protectedextractAttributes(html,tag,attr):string[]
Defined in: packages/base/BaseParser.ts:609
string
string
string
string[]
protectedextractSpans(html,className?):string[]
Defined in: packages/base/BaseParser.ts:622
string
string
string[]
protectedextractDivs(html,className?):string[]
Defined in: packages/base/BaseParser.ts:626
string
string
string[]
protectedextractAnchorsContent(html,className?):string[]
Defined in: packages/base/BaseParser.ts:630
string
string
string[]
protectedextractH2s(html,className?):string[]
Defined in: packages/base/BaseParser.ts:634
string
string
string[]
protectedextractH3s(html,className?):string[]
Defined in: packages/base/BaseParser.ts:638
string
string
string[]
protectedextractLists(html,className?):string[]
Defined in: packages/base/BaseParser.ts:642
string
string
string[]
protectedextractBlocks(html,tag,className?):string[]
Defined in: packages/base/BaseParser.ts:646
string
string
string
string[]
protectedextractKeyValue(html,keyPattern,valuePattern):Record<string,string>
Defined in: packages/base/BaseParser.ts:654
string
RegExp
RegExp
Record<string, string>
protectedcollectByClassNames(html,classNames,options?):any[]
Defined in: packages/base/BaseParser.ts:669
string
string | string[]
boolean
string[]
string
any[]