From 0b361f53db6fe43be002205ed9fc95399687284e Mon Sep 17 00:00:00 2001 From: Josef Prochazka Date: Tue, 14 Apr 2026 11:33:35 +0200 Subject: [PATCH 1/4] Add default dataset related redispatch endpoints --- .../objects/datasets/dataset-items.yaml | 440 ++++++++++++++++++ .../objects/datasets/dataset-statistics.yaml | 48 ++ .../components/objects/datasets/dataset.yaml | 176 +++++++ apify-api/openapi/components/tags.yaml | 23 +- .../openapi/components/x-tag-groups.yaml | 3 + apify-api/openapi/openapi.yaml | 6 + .../actor-runs@{runId}@dataset.yaml | 6 + .../actor-runs@{runId}@dataset@items.yaml | 6 + ...actor-runs@{runId}@dataset@statistics.yaml | 2 + .../actors/acts@{actorId}@validate-input.yaml | 59 +++ .../paths/datasets/datasets@{datasetId}.yaml | 143 +----- .../datasets/datasets@{datasetId}@items.yaml | 340 +------------- .../datasets@{datasetId}@statistics.yaml | 38 +- 13 files changed, 762 insertions(+), 528 deletions(-) create mode 100644 apify-api/openapi/components/objects/datasets/dataset-items.yaml create mode 100644 apify-api/openapi/components/objects/datasets/dataset-statistics.yaml create mode 100644 apify-api/openapi/components/objects/datasets/dataset.yaml create mode 100644 apify-api/openapi/paths/actor-runs/actor-runs@{runId}@dataset.yaml create mode 100644 apify-api/openapi/paths/actor-runs/actor-runs@{runId}@dataset@items.yaml create mode 100644 apify-api/openapi/paths/actor-runs/actor-runs@{runId}@dataset@statistics.yaml create mode 100644 apify-api/openapi/paths/actors/acts@{actorId}@validate-input.yaml diff --git a/apify-api/openapi/components/objects/datasets/dataset-items.yaml b/apify-api/openapi/components/objects/datasets/dataset-items.yaml new file mode 100644 index 0000000000..394a33e3c0 --- /dev/null +++ b/apify-api/openapi/components/objects/datasets/dataset-items.yaml @@ -0,0 +1,440 @@ +commonErrors: &commonErrors + "400": + $ref: ../../responses/BadRequest.yaml + "401": + $ref: ../../responses/Unauthorized.yaml + "403": + $ref: ../../responses/Forbidden.yaml + "404": + $ref: ../../responses/NotFound.yaml + "405": + $ref: ../../responses/MethodNotAllowed.yaml + "429": + $ref: ../../responses/TooManyRequests.yaml + +sharedGet: &sharedGet + responses: + <<: *commonErrors + "200": + description: "" + headers: + $ref: ../../headers/ApifyPaginationHeaders.yaml + content: + application/json: + schema: + type: array + items: + type: object + example: [foo: bar, foo2: bar2] + application/jsonl: + schema: + type: string + example: '{"foo":"bar"}\n{"foo2":"bar2"}\n' + text/csv: + schema: + type: string + example: 'foo,bar\nfoo2,bar2\n' + text/html: + schema: + type: string + example:
foobar
foobar
foo2bar2
+ application/vnd.openxmlformats-officedocument.spreadsheetml.sheet: + schema: + type: string + application/rss+xml: + schema: + type: string + example: barbar2 + application/xml: + schema: + type: string + example: barbar2 + deprecated: false + +getById: + <<: *sharedGet + tags: + - Storage/Datasets + summary: Get dataset items + description: | + Returns data stored in the dataset in a desired format. + + ### Response format + + The format of the response depends on format query parameter. + + The format parameter can have one of the following values: + json, jsonl, xml, html, + csv, xlsx and rss. + + The following table describes how each format is treated. + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + +
FormatItems
jsonThe response is a JSON, JSONL or XML array of raw item objects.
jsonl
xml
htmlThe response is a HTML, CSV or XLSX table, where columns correspond to the + properties of the item and rows correspond to each dataset item.
csv
xlsx
rssThe response is a RSS file. Each item is displayed as child elements of one + <item>.
+ + Note that CSV, XLSX and HTML tables are limited to 2000 columns and the column names cannot be longer than 200 characters. + JSON, XML and RSS formats do not have such restrictions. + + ### Hidden fields + + The top-level fields starting with the `#` character are considered hidden. + These are useful to store debugging information and can be omitted from the output by providing the `skipHidden=1` or `clean=1` query parameters. + For example, if you store the following object to the dataset: + + ``` + { + productName: "iPhone Xs", + description: "Welcome to the big screens." + #debug: { + url: "https://www.apple.com/lae/iphone-xs/", + crawledAt: "2019-01-21T16:06:03.683Z" + } + } + ``` + + The `#debug` field will be considered as hidden and can be omitted from the + results. This is useful to + provide nice cleaned data to end users, while keeping debugging info + available if needed. The Dataset object + returned by the API contains the number of such clean items in the`dataset.cleanItemCount` property. + + ### XML format extension + + When exporting results to XML or RSS formats, the names of object properties become XML tags and the corresponding values become tag's children. For example, the following JavaScript object: + + ``` + { + name: "Paul Newman", + address: [ + { type: "home", street: "21st", city: "Chicago" }, + { type: "office", street: null, city: null } + ] + } + ``` + + will be transformed to the following XML snippet: + + ``` + Paul Newman +
+ home + 21st + Chicago +
+
+ office + + +
+ ``` + + If the JavaScript object contains a property named `@` then its sub-properties are exported as attributes of the parent XML + element. + If the parent XML element does not have any child elements then its value is taken from a JavaScript object property named `#`. + + For example, the following JavaScript object: + + ``` + { + "address": [{ + "@": { + "type": "home" + }, + "street": "21st", + "city": "Chicago" + }, + { + "@": { + "type": "office" + }, + "#": 'unknown' + }] + } + ``` + + will be transformed to the following XML snippet: + + ``` +
+ 21st + Chicago +
+
unknown
+ ``` + + This feature is also useful to customize your RSS feeds generated for various websites. + + By default the whole result is wrapped in a `` element and each page object is wrapped in a `` element. + You can change this using xmlRoot and xmlRow url parameters. + + ### Pagination + + The generated response supports [pagination](#/introduction/pagination). + The pagination is always performed with the granularity of a single item, regardless whether unwind parameter was provided. + By default, the **Items** in the response are sorted by the time they were stored to the database, therefore you can use pagination to incrementally fetch the items as they are being added. + No limit exists to how many items can be returned in one response. + + If you specify `desc=1` query parameter, the results are returned in the reverse order than they were stored (i.e. from newest to oldest items). + Note that only the order of **Items** is reversed, but not the order of the `unwind` array elements. + operationId: dataset_items_get + parameters: + - $ref: "../../parameters/storageParameters.yaml#/datasetId" + - $ref: "../../parameters/datasetItemsParameters.yaml#/format" + - $ref: "../../parameters/datasetItemsParameters.yaml#/clean" + - $ref: "../../parameters/paginationParameters.yaml#/offset" + - $ref: "../../parameters/datasetItemsParameters.yaml#/limit" + - $ref: "../../parameters/datasetItemsParameters.yaml#/fields" + - $ref: "../../parameters/datasetItemsParameters.yaml#/omit" + - $ref: "../../parameters/datasetItemsParameters.yaml#/unwind" + - $ref: "../../parameters/datasetItemsParameters.yaml#/flatten" + - $ref: "../../parameters/datasetItemsParameters.yaml#/descDataset" + - $ref: "../../parameters/datasetItemsParameters.yaml#/attachment" + - $ref: "../../parameters/datasetItemsParameters.yaml#/delimiter" + - $ref: "../../parameters/datasetItemsParameters.yaml#/bom" + - $ref: "../../parameters/datasetItemsParameters.yaml#/xmlRoot" + - $ref: "../../parameters/datasetItemsParameters.yaml#/xmlRow" + - $ref: "../../parameters/datasetItemsParameters.yaml#/skipHeaderRow" + - $ref: "../../parameters/datasetItemsParameters.yaml#/skipHidden" + - $ref: "../../parameters/datasetItemsParameters.yaml#/skipEmpty" + - $ref: "../../parameters/datasetItemsParameters.yaml#/simplified" + - $ref: "../../parameters/datasetItemsParameters.yaml#/view" + - $ref: "../../parameters/datasetItemsParameters.yaml#/skipFailedPages" + - $ref: "../../parameters/storageParameters.yaml#/signature" + x-legacy-doc-urls: + - https://docs.apify.com/api/v2#/reference/datasets/item-collection/get-items + - https://docs.apify.com/api/v2#/reference/datasets/get-items + - https://docs.apify.com/api/v2#tag/DatasetsItem-collection/operation/dataset_items_get + x-js-parent: DatasetClient + x-js-name: listItems + x-js-doc-url: https://docs.apify.com/api/client/js/reference/class/DatasetClient#listItems + x-py-parent: DatasetClientAsync + x-py-name: stream_items + x-py-doc-url: https://docs.apify.com/api/client/python/reference/class/DatasetClientAsync#stream_items + +getDefault: + <<: *sharedGet + tags: + - Default storages + summary: Get default dataset items + description: | + Returns data stored in the default dataset of the Actor run in the desired format. + + This endpoint is a shortcut that resolves the run's `defaultDatasetId` and proxies to the + [Get dataset items](/api/v2/dataset-items-get) endpoint. + operationId: actorRun_dataset_items_get + parameters: + - $ref: "../../parameters/runAndBuildParameters.yaml#/runId" + - $ref: "../../parameters/datasetItemsParameters.yaml#/format" + - $ref: "../../parameters/datasetItemsParameters.yaml#/clean" + - $ref: "../../parameters/paginationParameters.yaml#/offset" + - $ref: "../../parameters/datasetItemsParameters.yaml#/limit" + - $ref: "../../parameters/datasetItemsParameters.yaml#/fields" + - $ref: "../../parameters/datasetItemsParameters.yaml#/omit" + - $ref: "../../parameters/datasetItemsParameters.yaml#/unwind" + - $ref: "../../parameters/datasetItemsParameters.yaml#/flatten" + - $ref: "../../parameters/datasetItemsParameters.yaml#/descDataset" + - $ref: "../../parameters/datasetItemsParameters.yaml#/attachment" + - $ref: "../../parameters/datasetItemsParameters.yaml#/delimiter" + - $ref: "../../parameters/datasetItemsParameters.yaml#/bom" + - $ref: "../../parameters/datasetItemsParameters.yaml#/xmlRoot" + - $ref: "../../parameters/datasetItemsParameters.yaml#/xmlRow" + - $ref: "../../parameters/datasetItemsParameters.yaml#/skipHeaderRow" + - $ref: "../../parameters/datasetItemsParameters.yaml#/skipHidden" + - $ref: "../../parameters/datasetItemsParameters.yaml#/skipEmpty" + - $ref: "../../parameters/datasetItemsParameters.yaml#/simplified" + - $ref: "../../parameters/datasetItemsParameters.yaml#/view" + - $ref: "../../parameters/datasetItemsParameters.yaml#/skipFailedPages" + - $ref: "../../parameters/storageParameters.yaml#/signature" + +sharedHead: &sharedHead + responses: + "200": + description: "" + headers: + $ref: ../../headers/ApifyPaginationHeaders.yaml + content: {} + "400": + $ref: ../../responses/BadRequest.yaml + deprecated: false + +headById: + <<: *sharedHead + tags: + - Storage/Datasets + summary: Get dataset items headers + description: | + Returns only the HTTP headers for the dataset items endpoint, without the response body. + This is useful to check pagination metadata or verify access without downloading the full dataset. + operationId: dataset_items_head + parameters: + - $ref: "../../parameters/storageParameters.yaml#/datasetId" + - $ref: "../../parameters/datasetItemsParameters.yaml#/format" + - $ref: "../../parameters/datasetItemsParameters.yaml#/clean" + - $ref: "../../parameters/paginationParameters.yaml#/offset" + - $ref: "../../parameters/datasetItemsParameters.yaml#/limit" + - $ref: "../../parameters/datasetItemsParameters.yaml#/fields" + - $ref: "../../parameters/datasetItemsParameters.yaml#/omit" + - $ref: "../../parameters/datasetItemsParameters.yaml#/unwind" + - $ref: "../../parameters/datasetItemsParameters.yaml#/flatten" + - $ref: "../../parameters/datasetItemsParameters.yaml#/descDataset" + - $ref: "../../parameters/datasetItemsParameters.yaml#/attachment" + - $ref: "../../parameters/datasetItemsParameters.yaml#/delimiter" + - $ref: "../../parameters/datasetItemsParameters.yaml#/bom" + - $ref: "../../parameters/datasetItemsParameters.yaml#/xmlRoot" + - $ref: "../../parameters/datasetItemsParameters.yaml#/xmlRow" + - $ref: "../../parameters/datasetItemsParameters.yaml#/skipHeaderRow" + - $ref: "../../parameters/datasetItemsParameters.yaml#/skipHidden" + - $ref: "../../parameters/datasetItemsParameters.yaml#/skipEmpty" + - $ref: "../../parameters/datasetItemsParameters.yaml#/simplified" + - $ref: "../../parameters/datasetItemsParameters.yaml#/view" + - $ref: "../../parameters/datasetItemsParameters.yaml#/skipFailedPages" + - $ref: "../../parameters/storageParameters.yaml#/signature" + +headDefault: + <<: *sharedHead + tags: + - Default storages + summary: Get default dataset items headers + description: | + Returns only the HTTP headers for the dataset items endpoint of the Actor run's default dataset, + without the response body. + + This endpoint is a shortcut for getting the run's `defaultDatasetId` and then using the + [Head dataset items](/api/v2/dataset-items-head) endpoint. + operationId: actorRun_dataset_items_head + parameters: + - $ref: "../../parameters/runAndBuildParameters.yaml#/runId" + - $ref: "../../parameters/datasetItemsParameters.yaml#/format" + - $ref: "../../parameters/datasetItemsParameters.yaml#/clean" + - $ref: "../../parameters/paginationParameters.yaml#/offset" + - $ref: "../../parameters/datasetItemsParameters.yaml#/limit" + - $ref: "../../parameters/datasetItemsParameters.yaml#/fields" + - $ref: "../../parameters/datasetItemsParameters.yaml#/omit" + - $ref: "../../parameters/datasetItemsParameters.yaml#/unwind" + - $ref: "../../parameters/datasetItemsParameters.yaml#/flatten" + - $ref: "../../parameters/datasetItemsParameters.yaml#/descDataset" + - $ref: "../../parameters/datasetItemsParameters.yaml#/attachment" + - $ref: "../../parameters/datasetItemsParameters.yaml#/delimiter" + - $ref: "../../parameters/datasetItemsParameters.yaml#/bom" + - $ref: "../../parameters/datasetItemsParameters.yaml#/xmlRoot" + - $ref: "../../parameters/datasetItemsParameters.yaml#/xmlRow" + - $ref: "../../parameters/datasetItemsParameters.yaml#/skipHeaderRow" + - $ref: "../../parameters/datasetItemsParameters.yaml#/skipHidden" + - $ref: "../../parameters/datasetItemsParameters.yaml#/skipEmpty" + - $ref: "../../parameters/datasetItemsParameters.yaml#/simplified" + - $ref: "../../parameters/datasetItemsParameters.yaml#/view" + - $ref: "../../parameters/datasetItemsParameters.yaml#/skipFailedPages" + - $ref: "../../parameters/storageParameters.yaml#/signature" + +sharedPost: &sharedPost + requestBody: + description: "" + content: + application/json: + schema: + oneOf: + - $ref: ../../schemas/datasets/PutItemsRequest.yaml + - type: array + items: + $ref: ../../schemas/datasets/PutItemsRequest.yaml + description: "" + required: true + responses: + "201": + description: "" + headers: + Location: + content: + text/plain: + schema: + type: string + example: https://api.apify.com/v2/datasets/WkzbQMuFYuamGv3YF/items + content: + application/json: + schema: + type: object + example: {} + "400": + description: "" + headers: {} + content: + application/json: + schema: + $ref: ../../schemas/datasets/PutItemResponseError.yaml + "403": + $ref: ../../responses/Forbidden.yaml + "404": + $ref: ../../responses/NotFound.yaml + deprecated: false + +postById: + <<: *sharedPost + tags: + - Storage/Datasets + summary: Store items + description: | + Appends an item or an array of items to the end of the dataset. + The POST payload is a JSON object or a JSON array of objects to save into the dataset. + + If the data you attempt to store in the dataset is invalid (meaning any of the items received by the API fails the validation), the whole request is discarded and the API will return a response with status code 400. + For more information about dataset schema validation, see [Dataset schema](https://docs.apify.com/platform/actors/development/actor-definition/dataset-schema/validation). + + **IMPORTANT:** The limit of request payload size for the dataset is 5 MB. If the array exceeds the size, you'll need to split it into a number of smaller arrays. + operationId: dataset_items_post + parameters: + - $ref: "../../parameters/storageParameters.yaml#/datasetId" + x-legacy-doc-urls: + - https://docs.apify.com/api/v2#/reference/datasets/item-collection/put-items + - https://docs.apify.com/api/v2#/reference/datasets/put-items + - https://docs.apify.com/api/v2#tag/DatasetsItem-collection/operation/dataset_items_post + x-js-parent: DatasetClient + x-js-name: pushItems + x-js-doc-url: https://docs.apify.com/api/client/js/reference/class/DatasetClient#pushItems + x-py-parent: DatasetClientAsync + x-py-name: push_items + x-py-doc-url: https://docs.apify.com/api/client/python/reference/class/DatasetClientAsync#push_items + +postDefault: + <<: *sharedPost + tags: + - Default storages + summary: Store items + description: | + Appends an item or an array of items to the end of the Actor run's default dataset. + + This endpoint is a shortcut that resolves the run's `defaultDatasetId` and proxies to the + [Store items](/api/v2/dataset-items-post) endpoint. + + operationId: actorRun_dataset_items_post + parameters: + - $ref: "../../parameters/runAndBuildParameters.yaml#/runId" diff --git a/apify-api/openapi/components/objects/datasets/dataset-statistics.yaml b/apify-api/openapi/components/objects/datasets/dataset-statistics.yaml new file mode 100644 index 0000000000..7de47e3364 --- /dev/null +++ b/apify-api/openapi/components/objects/datasets/dataset-statistics.yaml @@ -0,0 +1,48 @@ +sharedGet: &sharedGet + responses: + "200": + description: "" + content: + application/json: + schema: + $ref: ../../schemas/datasets/DatasetStatisticsResponse.yaml + "400": + $ref: ../../responses/BadRequest.yaml + "401": + $ref: ../../responses/Unauthorized.yaml + "403": + $ref: ../../responses/Forbidden.yaml + "404": + $ref: ../../responses/NotFound.yaml + "405": + $ref: ../../responses/MethodNotAllowed.yaml + "429": + $ref: ../../responses/TooManyRequests.yaml + +getById: + <<: *sharedGet + tags: + - Storage/Datasets + summary: Get dataset statistics + description: | + Returns statistics for given dataset. + + Provides only [field statistics](https://docs.apify.com/platform/actors/development/actor-definition/dataset-schema/validation#dataset-field-statistics). + + operationId: dataset_statistics_get + parameters: + - $ref: "../../parameters/storageParameters.yaml#/datasetId" + +getDefault: + <<: *sharedGet + tags: + - Default storages + summary: Get default dataset statistics + description: | + Returns statistics for the Actor run's default dataset. + + This endpoint is a shortcut that resolves the run's `defaultDatasetId` and proxies to the + [Get dataset statistics](/api/v2/dataset-statistics-get) endpoint. + operationId: actorRun_dataset_statistics_get + parameters: + - $ref: "../../parameters/runAndBuildParameters.yaml#/runId" diff --git a/apify-api/openapi/components/objects/datasets/dataset.yaml b/apify-api/openapi/components/objects/datasets/dataset.yaml new file mode 100644 index 0000000000..b9f04b9ee0 --- /dev/null +++ b/apify-api/openapi/components/objects/datasets/dataset.yaml @@ -0,0 +1,176 @@ +commonErrors: &commonErrors + "400": + $ref: ../../responses/BadRequest.yaml + "401": + $ref: ../../responses/Unauthorized.yaml + "403": + $ref: ../../responses/Forbidden.yaml + "404": + $ref: ../../responses/NotFound.yaml + "405": + $ref: ../../responses/MethodNotAllowed.yaml + "429": + $ref: ../../responses/TooManyRequests.yaml + +sharedGet: &sharedGet + responses: + <<: *commonErrors + "200": + description: "" + headers: {} + content: + application/json: + schema: + $ref: ../../schemas/datasets/DatasetResponse.yaml + deprecated: false + +getById: + <<: *sharedGet + tags: + - Storage/Datasets + summary: Get dataset + description: | + Returns dataset object for given dataset ID. + + This does not return dataset items, only information about the storage itself. + To retrieve dataset items, use the [List dataset items](/api/v2/dataset-items-get) endpoint. + + :::note + + Keep in mind that attributes `itemCount` and `cleanItemCount` are not propagated right away after data are pushed into a dataset. + + ::: + + There is a short period (up to 5 seconds) during which these counters may not match with exact counts in dataset items. + operationId: dataset_get + parameters: + - $ref: "../../parameters/storageParameters.yaml#/datasetId" + - name: token + in: query + description: | + API authentication token. It is required only when using the `username~dataset-name` format for `datasetId`. + style: form + explode: true + schema: + type: string + example: soSkq9ekdmfOslopH + x-legacy-doc-urls: + - https://docs.apify.com/api/v2#/reference/datasets/dataset/get-dataset + - https://docs.apify.com/api/v2#/reference/datasets/get-dataset + - https://docs.apify.com/api/v2#tag/DatasetsDataset/operation/dataset_get + x-js-parent: DatasetClient + x-js-name: get + x-js-doc-url: https://docs.apify.com/api/client/js/reference/class/DatasetClient#get + x-py-parent: DatasetClientAsync + x-py-name: get + x-py-doc-url: https://docs.apify.com/api/client/python/reference/class/DatasetClientAsync#get + +getDefault: + <<: *sharedGet + tags: + - Default storages + summary: Get default dataset + description: | + Returns the default dataset associated with an Actor run. + + This endpoint is a shortcut for getting the run's `defaultDatasetId` and then using the + [Get dataset](/api/v2/dataset-get) endpoint. + + operationId: actorRun_dataset_get + parameters: + - $ref: "../../parameters/runAndBuildParameters.yaml#/runId" + +sharedPut: &sharedPut + requestBody: + description: "" + content: + application/json: + schema: + $ref: ../../schemas/datasets/UpdateDatasetRequest.yaml + required: true + responses: + <<: *commonErrors + "200": + description: "" + headers: {} + content: + application/json: + schema: + $ref: ../../schemas/datasets/DatasetResponse.yaml + "413": + $ref: ../../responses/PayloadTooLarge.yaml + "415": + $ref: ../../responses/UnsupportedMediaType.yaml + deprecated: false + +putById: + <<: *sharedPut + tags: + - Storage/Datasets + summary: Update dataset + description: | + Updates a dataset's name and general resource access level using a value specified by a JSON object passed in the PUT payload. + The response is the updated dataset object, as returned by the [Get dataset](/api/v2/dataset-get) API endpoint. + operationId: + dataset_put + parameters: + - $ref: "../../parameters/storageParameters.yaml#/datasetId" + x-legacy-doc-urls: + - https://docs.apify.com/api/v2#/reference/datasets/dataset/update-dataset + - https://docs.apify.com/api/v2#/reference/datasets/update-dataset + - https://docs.apify.com/api/v2#tag/DatasetsDataset/operation/dataset_put + x-js-parent: DatasetClient + x-js-name: update + x-js-doc-url: https://docs.apify.com/api/client/js/reference/class/DatasetClient#update + x-py-parent: DatasetClientAsync + x-py-name: update + x-py-doc-url: https://docs.apify.com/api/client/python/reference/class/DatasetClientAsync#update + +putDefault: + <<: *sharedPut + tags: + - Default storages + summary: Update default dataset + description: | + Updates the default dataset associated with an Actor run. + + This endpoint is a shortcut for getting the run's `defaultDatasetId` and then using the + [Put dataset](/api/v2/dataset-put) endpoint. + + operationId: actorRun_dataset_put + parameters: + - $ref: "../../parameters/runAndBuildParameters.yaml#/runId" + +sharedDelete: &sharedDelete + responses: + <<: *commonErrors + "204": + $ref: ../../responses/NoContent.yaml + deprecated: false + +deleteById: + <<: *sharedDelete + tags: + - Storage/Datasets + summary: Delete dataset + description: Deletes a specific dataset. + operationId: dataset_delete + parameters: + - $ref: "../../parameters/storageParameters.yaml#/datasetId" + x-legacy-doc-urls: + - https://docs.apify.com/api/v2#/reference/datasets/dataset/delete-dataset + - https://docs.apify.com/api/v2#/reference/datasets/delete-dataset + - https://docs.apify.com/api/v2#tag/DatasetsDataset/operation/dataset_delete + x-js-parent: DatasetClient + x-js-name: delete + x-js-doc-url: https://docs.apify.com/api/client/js/reference/class/DatasetClient#delete + +deleteDefault: + <<: *sharedDelete + tags: + - Default storages + summary: Delete default dataset + description: Deletes default dataset associated with an Actor run. + operationId: actorRun_dataset_delete + parameters: + - $ref: "../../parameters/runAndBuildParameters.yaml#/runId" diff --git a/apify-api/openapi/components/tags.yaml b/apify-api/openapi/components/tags.yaml index e79a87a270..1097fb21bb 100644 --- a/apify-api/openapi/components/tags.yaml +++ b/apify-api/openapi/components/tags.yaml @@ -406,20 +406,6 @@ ::: x-trait: true -- name: Tools - x-displayName: Tools - Introduction - description: | - The API endpoints described in this section provide utility tools for encoding, - signing, and verifying data, as well as inspecting HTTP request details. - - - **Browser info** (`/v2/browser-info`) - Returns details about the incoming HTTP request, - including the client IP address, country code, and headers. Accepts any HTTP method - (GET, POST, PUT, DELETE) so you can use it to test proxy behavior and verify that - client IP addresses are anonymized correctly. - - **Encode and sign** (`/v2/tools/encode-and-sign`) - Encodes and signs a JSON object, - tying it to the authenticated user's identity. - - **Decode and verify** (`/v2/tools/decode-and-verify`) - Decodes and verifies a value - previously created by the encode-and-sign endpoint. - name: Users x-displayName: Users - Introduction x-legacy-doc-urls: @@ -436,3 +422,12 @@ description: The API endpoints described in this section return information about user accounts. x-trait: true +- name: Tools + x-displayName: Tools - Introduction + description: | + The API endpoints described in this section provide utility tools for encoding, + signing, and verifying data, as well as inspecting HTTP request details. +- name: Default storages + x-displayName: Default storages - Introduction + description: | + The API endpoints described in this section are convenience endpoints that provide access to Actor run's default storages (dataset, key-value store, and request queue) without the need to resolve the storage ID first. diff --git a/apify-api/openapi/components/x-tag-groups.yaml b/apify-api/openapi/components/x-tag-groups.yaml index 5c1cf2fdf2..46caf29d40 100644 --- a/apify-api/openapi/components/x-tag-groups.yaml +++ b/apify-api/openapi/components/x-tag-groups.yaml @@ -41,3 +41,6 @@ - name: Tools tags: - Tools +- name: Convenience endpoints + tags: + - Default storages diff --git a/apify-api/openapi/openapi.yaml b/apify-api/openapi/openapi.yaml index af3cc58791..028b8fe732 100644 --- a/apify-api/openapi/openapi.yaml +++ b/apify-api/openapi/openapi.yaml @@ -550,6 +550,12 @@ paths: $ref: paths/actor-runs/actor-runs@{runId}@resurrect.yaml "/v2/actor-runs/{runId}/charge": $ref: paths/actor-runs/actor-runs@{runId}@charge.yaml + "/v2/actor-runs/{runId}/dataset": + $ref: paths/actor-runs/actor-runs@{runId}@dataset.yaml + "/v2/actor-runs/{runId}/dataset/items": + $ref: "paths/actor-runs/actor-runs@{runId}@dataset@items.yaml" + "/v2/actor-runs/{runId}/dataset/statistics": + $ref: "paths/actor-runs/actor-runs@{runId}@dataset@statistics.yaml" /v2/actor-builds: $ref: paths/actor-builds/actor-builds.yaml "/v2/actor-builds/{buildId}": diff --git a/apify-api/openapi/paths/actor-runs/actor-runs@{runId}@dataset.yaml b/apify-api/openapi/paths/actor-runs/actor-runs@{runId}@dataset.yaml new file mode 100644 index 0000000000..ee2b1539be --- /dev/null +++ b/apify-api/openapi/paths/actor-runs/actor-runs@{runId}@dataset.yaml @@ -0,0 +1,6 @@ +get: + $ref: "../../components/objects/datasets/dataset.yaml#/getDefault" +put: + $ref: "../../components/objects/datasets/dataset.yaml#/putDefault" +delete: + $ref: "../../components/objects/datasets/dataset.yaml#/deleteDefault" diff --git a/apify-api/openapi/paths/actor-runs/actor-runs@{runId}@dataset@items.yaml b/apify-api/openapi/paths/actor-runs/actor-runs@{runId}@dataset@items.yaml new file mode 100644 index 0000000000..1ae818e928 --- /dev/null +++ b/apify-api/openapi/paths/actor-runs/actor-runs@{runId}@dataset@items.yaml @@ -0,0 +1,6 @@ +get: + $ref: "../../components/objects/datasets/dataset-items.yaml#/getDefault" +head: + $ref: "../../components/objects/datasets/dataset-items.yaml#/headDefault" +post: + $ref: "../../components/objects/datasets/dataset-items.yaml#/postDefault" diff --git a/apify-api/openapi/paths/actor-runs/actor-runs@{runId}@dataset@statistics.yaml b/apify-api/openapi/paths/actor-runs/actor-runs@{runId}@dataset@statistics.yaml new file mode 100644 index 0000000000..d6c0cc556d --- /dev/null +++ b/apify-api/openapi/paths/actor-runs/actor-runs@{runId}@dataset@statistics.yaml @@ -0,0 +1,2 @@ +get: + $ref: "../../components/objects/datasets/dataset-statistics.yaml#/getDefault" diff --git a/apify-api/openapi/paths/actors/acts@{actorId}@validate-input.yaml b/apify-api/openapi/paths/actors/acts@{actorId}@validate-input.yaml new file mode 100644 index 0000000000..f2458313e8 --- /dev/null +++ b/apify-api/openapi/paths/actors/acts@{actorId}@validate-input.yaml @@ -0,0 +1,59 @@ +post: + tags: + - Actors + summary: Validate input + description: | + Validates the provided input against the Actor's input schema for the specified build. + + The endpoint checks whether the JSON payload conforms to the input schema + defined in the Actor's build. If no `build` query parameter is provided, + the `latest` build tag is used by default. + operationId: act_validateInput_post + security: [] + parameters: + - $ref: "../../components/parameters/runAndBuildParameters.yaml#/actorId" + - name: build + in: query + description: | + Optional tag or number of the Actor build to use for input schema validation. + By default, the `latest` build tag is used. + required: false + style: form + explode: true + schema: + type: string + example: latest + requestBody: + description: JSON input to validate against the Actor's input schema. + content: + application/json: + schema: + type: object + required: true + responses: + "200": + description: "" + headers: {} + content: + application/json: + schema: + type: object + required: + - valid + properties: + valid: + type: boolean + description: Whether the input is valid according to the Actor's input schema. + "400": + $ref: ../../components/responses/BadRequest.yaml + "404": + $ref: ../../components/responses/NotFound.yaml + "405": + $ref: ../../components/responses/MethodNotAllowed.yaml + "413": + $ref: ../../components/responses/PayloadTooLarge.yaml + "415": + $ref: ../../components/responses/UnsupportedMediaType.yaml + "429": + $ref: ../../components/responses/TooManyRequests.yaml + deprecated: false diff --git a/apify-api/openapi/paths/datasets/datasets@{datasetId}.yaml b/apify-api/openapi/paths/datasets/datasets@{datasetId}.yaml index d5dabe15d7..522ca28746 100644 --- a/apify-api/openapi/paths/datasets/datasets@{datasetId}.yaml +++ b/apify-api/openapi/paths/datasets/datasets@{datasetId}.yaml @@ -1,143 +1,6 @@ get: - tags: - - Storage/Datasets - summary: Get dataset - description: | - Returns dataset object for given dataset ID. - - This does not return dataset items, only information about the storage itself. - To retrieve dataset items, use the [List dataset items](/api/v2/dataset-items-get) endpoint. - - :::note - - Keep in mind that attributes `itemCount` and `cleanItemCount` are not propagated right away after data are pushed into a dataset. - - ::: - - There is a short period (up to 5 seconds) during which these counters may not match with exact counts in dataset items. - operationId: dataset_get - parameters: - - $ref: "../../components/parameters/storageParameters.yaml#/datasetId" - - name: token - in: query - description: | - API authentication token. It is required only when using the `username~dataset-name` format for `datasetId`. - style: form - explode: true - schema: - type: string - example: soSkq9ekdmfOslopH - responses: - "200": - description: "" - headers: {} - content: - application/json: - schema: - $ref: ../../components/schemas/datasets/DatasetResponse.yaml - "400": - $ref: ../../components/responses/BadRequest.yaml - "401": - $ref: ../../components/responses/Unauthorized.yaml - "403": - $ref: ../../components/responses/Forbidden.yaml - "404": - $ref: ../../components/responses/NotFound.yaml - "405": - $ref: ../../components/responses/MethodNotAllowed.yaml - "429": - $ref: ../../components/responses/TooManyRequests.yaml - deprecated: false - x-legacy-doc-urls: - - https://docs.apify.com/api/v2#/reference/datasets/dataset/get-dataset - - https://docs.apify.com/api/v2#/reference/datasets/get-dataset - - https://docs.apify.com/api/v2#tag/DatasetsDataset/operation/dataset_get - x-js-parent: DatasetClient - x-js-name: get - x-js-doc-url: https://docs.apify.com/api/client/js/reference/class/DatasetClient#get - x-py-parent: DatasetClientAsync - x-py-name: get - x-py-doc-url: https://docs.apify.com/api/client/python/reference/class/DatasetClientAsync#get + $ref: "../../components/objects/datasets/dataset.yaml#/getById" put: - tags: - - Storage/Datasets - summary: Update dataset - description: | - Updates a dataset's name and general resource access level using a value specified by a JSON object passed in the PUT payload. - The response is the updated dataset object, as returned by the [Get dataset](#/reference/datasets/dataset-collection/get-dataset) API endpoint. - operationId: dataset_put - parameters: - - $ref: "../../components/parameters/storageParameters.yaml#/datasetId" - requestBody: - description: "" - content: - application/json: - schema: - $ref: ../../components/schemas/datasets/UpdateDatasetRequest.yaml - required: true - responses: - "200": - description: "" - headers: {} - content: - application/json: - schema: - $ref: ../../components/schemas/datasets/DatasetResponse.yaml - "400": - $ref: ../../components/responses/BadRequest.yaml - "401": - $ref: ../../components/responses/Unauthorized.yaml - "403": - $ref: ../../components/responses/Forbidden.yaml - "404": - $ref: ../../components/responses/NotFound.yaml - "405": - $ref: ../../components/responses/MethodNotAllowed.yaml - "413": - $ref: ../../components/responses/PayloadTooLarge.yaml - "415": - $ref: ../../components/responses/UnsupportedMediaType.yaml - "429": - $ref: ../../components/responses/TooManyRequests.yaml - deprecated: false - x-legacy-doc-urls: - - https://docs.apify.com/api/v2#/reference/datasets/dataset/update-dataset - - https://docs.apify.com/api/v2#/reference/datasets/update-dataset - - https://docs.apify.com/api/v2#tag/DatasetsDataset/operation/dataset_put - x-js-parent: DatasetClient - x-js-name: update - x-js-doc-url: https://docs.apify.com/api/client/js/reference/class/DatasetClient#update - x-py-parent: DatasetClientAsync - x-py-name: update - x-py-doc-url: https://docs.apify.com/api/client/python/reference/class/DatasetClientAsync#update + $ref: "../../components/objects/datasets/dataset.yaml#/putById" delete: - tags: - - Storage/Datasets - summary: Delete dataset - description: Deletes a specific dataset. - operationId: dataset_delete - parameters: - - $ref: "../../components/parameters/storageParameters.yaml#/datasetId" - responses: - "204": - $ref: ../../components/responses/NoContent.yaml - "400": - $ref: ../../components/responses/BadRequest.yaml - "401": - $ref: ../../components/responses/Unauthorized.yaml - "403": - $ref: ../../components/responses/Forbidden.yaml - "404": - $ref: ../../components/responses/NotFound.yaml - "405": - $ref: ../../components/responses/MethodNotAllowed.yaml - "429": - $ref: ../../components/responses/TooManyRequests.yaml - deprecated: false - x-legacy-doc-urls: - - https://docs.apify.com/api/v2#/reference/datasets/dataset/delete-dataset - - https://docs.apify.com/api/v2#/reference/datasets/delete-dataset - - https://docs.apify.com/api/v2#tag/DatasetsDataset/operation/dataset_delete - x-js-parent: DatasetClient - x-js-name: delete - x-js-doc-url: https://docs.apify.com/api/client/js/reference/class/DatasetClient#delete + $ref: "../../components/objects/datasets/dataset.yaml#/deleteById" diff --git a/apify-api/openapi/paths/datasets/datasets@{datasetId}@items.yaml b/apify-api/openapi/paths/datasets/datasets@{datasetId}@items.yaml index 7eae2a03ae..204a1a6f65 100644 --- a/apify-api/openapi/paths/datasets/datasets@{datasetId}@items.yaml +++ b/apify-api/openapi/paths/datasets/datasets@{datasetId}@items.yaml @@ -1,340 +1,6 @@ get: - tags: - - Storage/Datasets - summary: Get dataset items - description: | - Returns data stored in the dataset in a desired format. - - ### Response format - - The format of the response depends on format query parameter. - - The format parameter can have one of the following values: - json, jsonl, xml, html, - csv, xlsx and rss. - - The following table describes how each format is treated. - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - -
FormatItems
jsonThe response is a JSON, JSONL or XML array of raw item objects.
jsonl
xml
htmlThe response is a HTML, CSV or XLSX table, where columns correspond to the - properties of the item and rows correspond to each dataset item.
csv
xlsx
rssThe response is a RSS file. Each item is displayed as child elements of one - <item>.
- - Note that CSV, XLSX and HTML tables are limited to 2000 columns and the column names cannot be longer than 200 characters. - JSON, XML and RSS formats do not have such restrictions. - - ### Hidden fields - - The top-level fields starting with the `#` character are considered hidden. - These are useful to store debugging information and can be omitted from the output by providing the `skipHidden=1` or `clean=1` query parameters. - For example, if you store the following object to the dataset: - - ``` - { - productName: "iPhone Xs", - description: "Welcome to the big screens." - #debug: { - url: "https://www.apple.com/lae/iphone-xs/", - crawledAt: "2019-01-21T16:06:03.683Z" - } - } - ``` - - The `#debug` field will be considered as hidden and can be omitted from the - results. This is useful to - provide nice cleaned data to end users, while keeping debugging info - available if needed. The Dataset object - returned by the API contains the number of such clean items in the`dataset.cleanItemCount` property. - - ### XML format extension - - When exporting results to XML or RSS formats, the names of object properties become XML tags and the corresponding values become tag's children. For example, the following JavaScript object: - - ``` - { - name: "Paul Newman", - address: [ - { type: "home", street: "21st", city: "Chicago" }, - { type: "office", street: null, city: null } - ] - } - ``` - - will be transformed to the following XML snippet: - - ``` - Paul Newman -
- home - 21st - Chicago -
-
- office - - -
- ``` - - If the JavaScript object contains a property named `@` then its sub-properties are exported as attributes of the parent XML - element. - If the parent XML element does not have any child elements then its value is taken from a JavaScript object property named `#`. - - For example, the following JavaScript object: - - ``` - { - "address": [{ - "@": { - "type": "home" - }, - "street": "21st", - "city": "Chicago" - }, - { - "@": { - "type": "office" - }, - "#": 'unknown' - }] - } - ``` - - will be transformed to the following XML snippet: - - ``` -
- 21st - Chicago -
-
unknown
- ``` - - This feature is also useful to customize your RSS feeds generated for various websites. - - By default the whole result is wrapped in a `` element and each page object is wrapped in a `` element. - You can change this using xmlRoot and xmlRow url parameters. - - ### Pagination - - The generated response supports [pagination](#/introduction/pagination). - The pagination is always performed with the granularity of a single item, regardless whether unwind parameter was provided. - By default, the **Items** in the response are sorted by the time they were stored to the database, therefore you can use pagination to incrementally fetch the items as they are being added. - No limit exists to how many items can be returned in one response. - - If you specify `desc=1` query parameter, the results are returned in the reverse order than they were stored (i.e. from newest to oldest items). - Note that only the order of **Items** is reversed, but not the order of the `unwind` array elements. - operationId: dataset_items_get - parameters: - - $ref: "../../components/parameters/storageParameters.yaml#/datasetId" - - $ref: "../../components/parameters/datasetItemsParameters.yaml#/format" - - $ref: "../../components/parameters/datasetItemsParameters.yaml#/clean" - - $ref: "../../components/parameters/paginationParameters.yaml#/offset" - - $ref: "../../components/parameters/datasetItemsParameters.yaml#/limit" - - $ref: "../../components/parameters/datasetItemsParameters.yaml#/fields" - - $ref: "../../components/parameters/datasetItemsParameters.yaml#/omit" - - $ref: "../../components/parameters/datasetItemsParameters.yaml#/unwind" - - $ref: "../../components/parameters/datasetItemsParameters.yaml#/flatten" - - $ref: "../../components/parameters/datasetItemsParameters.yaml#/descDataset" - - $ref: "../../components/parameters/datasetItemsParameters.yaml#/attachment" - - $ref: "../../components/parameters/datasetItemsParameters.yaml#/delimiter" - - $ref: "../../components/parameters/datasetItemsParameters.yaml#/bom" - - $ref: "../../components/parameters/datasetItemsParameters.yaml#/xmlRoot" - - $ref: "../../components/parameters/datasetItemsParameters.yaml#/xmlRow" - - $ref: "../../components/parameters/datasetItemsParameters.yaml#/skipHeaderRow" - - $ref: "../../components/parameters/datasetItemsParameters.yaml#/skipHidden" - - $ref: "../../components/parameters/datasetItemsParameters.yaml#/skipEmpty" - - $ref: "../../components/parameters/datasetItemsParameters.yaml#/simplified" - - $ref: "../../components/parameters/datasetItemsParameters.yaml#/view" - - $ref: "../../components/parameters/datasetItemsParameters.yaml#/skipFailedPages" - - $ref: "../../components/parameters/storageParameters.yaml#/signature" - responses: - "200": - description: "" - headers: - $ref: ../../components/headers/ApifyPaginationHeaders.yaml - content: - application/json: - schema: - type: array - items: - type: object - example: [foo: bar, foo2: bar2] - application/jsonl: - schema: - type: string - example: '{"foo":"bar"}\n{"foo2":"bar2"}\n' - text/csv: - schema: - type: string - example: 'foo,bar\nfoo2,bar2\n' - text/html: - schema: - type: string - example:
foobar
foobar
foo2bar2
- application/vnd.openxmlformats-officedocument.spreadsheetml.sheet: - schema: - type: string - application/rss+xml: - schema: - type: string - example: barbar2 - application/xml: - schema: - type: string - example: barbar2 - "400": - $ref: ../../components/responses/BadRequest.yaml - "401": - $ref: ../../components/responses/Unauthorized.yaml - "403": - $ref: ../../components/responses/Forbidden.yaml - "404": - $ref: ../../components/responses/NotFound.yaml - "405": - $ref: ../../components/responses/MethodNotAllowed.yaml - "429": - $ref: ../../components/responses/TooManyRequests.yaml - deprecated: false - x-legacy-doc-urls: - - https://docs.apify.com/api/v2#/reference/datasets/item-collection/get-items - - https://docs.apify.com/api/v2#/reference/datasets/get-items - - https://docs.apify.com/api/v2#tag/DatasetsItem-collection/operation/dataset_items_get - x-js-parent: DatasetClient - x-js-name: listItems - x-js-doc-url: https://docs.apify.com/api/client/js/reference/class/DatasetClient#listItems - x-py-parent: DatasetClientAsync - x-py-name: stream_items - x-py-doc-url: https://docs.apify.com/api/client/python/reference/class/DatasetClientAsync#stream_items + $ref: "../../components/objects/datasets/dataset-items.yaml#/getById" head: - tags: - - Storage/Datasets - summary: Get dataset items headers - description: | - Returns only the HTTP headers for the dataset items endpoint, without the response body. - This is useful to check pagination metadata or verify access without downloading the full dataset. - operationId: dataset_items_head - parameters: - - $ref: "../../components/parameters/storageParameters.yaml#/datasetId" - - $ref: "../../components/parameters/datasetItemsParameters.yaml#/format" - - $ref: "../../components/parameters/datasetItemsParameters.yaml#/clean" - - $ref: "../../components/parameters/paginationParameters.yaml#/offset" - - $ref: "../../components/parameters/datasetItemsParameters.yaml#/limit" - - $ref: "../../components/parameters/datasetItemsParameters.yaml#/fields" - - $ref: "../../components/parameters/datasetItemsParameters.yaml#/omit" - - $ref: "../../components/parameters/datasetItemsParameters.yaml#/unwind" - - $ref: "../../components/parameters/datasetItemsParameters.yaml#/flatten" - - $ref: "../../components/parameters/datasetItemsParameters.yaml#/descDataset" - - $ref: "../../components/parameters/datasetItemsParameters.yaml#/attachment" - - $ref: "../../components/parameters/datasetItemsParameters.yaml#/delimiter" - - $ref: "../../components/parameters/datasetItemsParameters.yaml#/bom" - - $ref: "../../components/parameters/datasetItemsParameters.yaml#/xmlRoot" - - $ref: "../../components/parameters/datasetItemsParameters.yaml#/xmlRow" - - $ref: "../../components/parameters/datasetItemsParameters.yaml#/skipHeaderRow" - - $ref: "../../components/parameters/datasetItemsParameters.yaml#/skipHidden" - - $ref: "../../components/parameters/datasetItemsParameters.yaml#/skipEmpty" - - $ref: "../../components/parameters/datasetItemsParameters.yaml#/simplified" - - $ref: "../../components/parameters/datasetItemsParameters.yaml#/view" - - $ref: "../../components/parameters/datasetItemsParameters.yaml#/skipFailedPages" - - $ref: "../../components/parameters/storageParameters.yaml#/signature" - responses: - "200": - description: "" - headers: - $ref: ../../components/headers/ApifyPaginationHeaders.yaml - content: {} - "400": - $ref: ../../components/responses/BadRequest.yaml - deprecated: false + $ref: "../../components/objects/datasets/dataset-items.yaml#/headById" post: - tags: - - Storage/Datasets - summary: Store items - description: | - Appends an item or an array of items to the end of the dataset. - The POST payload is a JSON object or a JSON array of objects to save into the dataset. - - If the data you attempt to store in the dataset is invalid (meaning any of the items received by the API fails the validation), the whole request is discarded and the API will return a response with status code 400. - For more information about dataset schema validation, see [Dataset schema](https://docs.apify.com/platform/actors/development/actor-definition/dataset-schema/validation). - - **IMPORTANT:** The limit of request payload size for the dataset is 5 MB. If the array exceeds the size, you'll need to split it into a number of smaller arrays. - operationId: dataset_items_post - parameters: - - $ref: "../../components/parameters/storageParameters.yaml#/datasetId" - requestBody: - description: "" - content: - application/json: - schema: - oneOf: - - $ref: ../../components/schemas/datasets/PutItemsRequest.yaml - - type: array - items: - $ref: ../../components/schemas/datasets/PutItemsRequest.yaml - description: "" - required: true - responses: - "201": - description: "" - headers: - Location: - content: - text/plain: - schema: - type: string - example: https://api.apify.com/v2/datasets/WkzbQMuFYuamGv3YF/items - content: - application/json: - schema: - type: object - example: {} - "400": - description: "" - headers: {} - content: - application/json: - schema: - $ref: ../../components/schemas/datasets/PutItemResponseError.yaml - "403": - $ref: ../../components/responses/Forbidden.yaml - "404": - $ref: ../../components/responses/NotFound.yaml - deprecated: false - x-legacy-doc-urls: - - https://docs.apify.com/api/v2#/reference/datasets/item-collection/put-items - - https://docs.apify.com/api/v2#/reference/datasets/put-items - - https://docs.apify.com/api/v2#tag/DatasetsItem-collection/operation/dataset_items_post - x-js-parent: DatasetClient - x-js-name: pushItems - x-js-doc-url: https://docs.apify.com/api/client/js/reference/class/DatasetClient#pushItems - x-py-parent: DatasetClientAsync - x-py-name: push_items - x-py-doc-url: https://docs.apify.com/api/client/python/reference/class/DatasetClientAsync#push_items + $ref: "../../components/objects/datasets/dataset-items.yaml#/postById" diff --git a/apify-api/openapi/paths/datasets/datasets@{datasetId}@statistics.yaml b/apify-api/openapi/paths/datasets/datasets@{datasetId}@statistics.yaml index 6dbc8d0028..af6c3dd054 100644 --- a/apify-api/openapi/paths/datasets/datasets@{datasetId}@statistics.yaml +++ b/apify-api/openapi/paths/datasets/datasets@{datasetId}@statistics.yaml @@ -1,38 +1,2 @@ get: - tags: - - Storage/Datasets - summary: Get dataset statistics - description: | - Returns statistics for given dataset. - - Provides only [field statistics](https://docs.apify.com/platform/actors/development/actor-definition/dataset-schema/validation#dataset-field-statistics). - - operationId: dataset_statistics_get - parameters: - - $ref: "../../components/parameters/storageParameters.yaml#/datasetId" - responses: - "200": - description: "" - content: - application/json: - schema: - $ref: ../../components/schemas/datasets/DatasetStatisticsResponse.yaml - # TODO: add clients methods - # x-js-parent: DatasetClient - # x-js-name: statistics - # x-js-doc-url: https://docs.apify.com/api/client/js/reference/class/DatasetClient#statistics - # x-py-parent: DatasetClientAsync - # x-py-name: statistics - # x-py-doc-url: https://docs.apify.com/api/client/python/reference/class/DatasetClientAsync#statistics - "400": - $ref: ../../components/responses/BadRequest.yaml - "401": - $ref: ../../components/responses/Unauthorized.yaml - "403": - $ref: ../../components/responses/Forbidden.yaml - "404": - $ref: ../../components/responses/NotFound.yaml - "405": - $ref: ../../components/responses/MethodNotAllowed.yaml - "429": - $ref: ../../components/responses/TooManyRequests.yaml + $ref: "../../components/objects/datasets/dataset-statistics.yaml#/getById" From a5320fd0534228aefb02e2c0e1e6c583b77c78aa Mon Sep 17 00:00:00 2001 From: Josef Prochazka Date: Tue, 14 Apr 2026 16:53:01 +0200 Subject: [PATCH 2/4] Add Last run related dataset endpoints --- .../objects/datasets/dataset-items.yaml | 73 +++++++++++-------- .../objects/datasets/dataset-statistics.yaml | 15 ++++ .../components/objects/datasets/dataset.yaml | 51 ++++++++++++- apify-api/openapi/components/tags.yaml | 4 + .../openapi/components/x-tag-groups.yaml | 1 + apify-api/openapi/openapi.yaml | 6 ++ .../actor-runs@{runId}@dataset@items.yaml | 2 - .../acts@{actorId}@runs@last@dataset.yaml | 6 ++ ...cts@{actorId}@runs@last@dataset@items.yaml | 4 + ...actorId}@runs@last@dataset@statistics.yaml | 2 + .../actors/acts@{actorId}@validate-input.yaml | 59 --------------- 11 files changed, 131 insertions(+), 92 deletions(-) create mode 100644 apify-api/openapi/paths/actors/acts@{actorId}@runs@last@dataset.yaml create mode 100644 apify-api/openapi/paths/actors/acts@{actorId}@runs@last@dataset@items.yaml create mode 100644 apify-api/openapi/paths/actors/acts@{actorId}@runs@last@dataset@statistics.yaml delete mode 100644 apify-api/openapi/paths/actors/acts@{actorId}@validate-input.yaml diff --git a/apify-api/openapi/components/objects/datasets/dataset-items.yaml b/apify-api/openapi/components/objects/datasets/dataset-items.yaml index 394a33e3c0..8ea29df2b3 100644 --- a/apify-api/openapi/components/objects/datasets/dataset-items.yaml +++ b/apify-api/openapi/components/objects/datasets/dataset-items.yaml @@ -276,28 +276,20 @@ getDefault: - $ref: "../../parameters/datasetItemsParameters.yaml#/skipFailedPages" - $ref: "../../parameters/storageParameters.yaml#/signature" -sharedHead: &sharedHead - responses: - "200": - description: "" - headers: - $ref: ../../headers/ApifyPaginationHeaders.yaml - content: {} - "400": - $ref: ../../responses/BadRequest.yaml - deprecated: false - -headById: - <<: *sharedHead +getLastRun: + <<: *sharedGet tags: - - Storage/Datasets - summary: Get dataset items headers + - Last Actor run + summary: Get last run's dataset items description: | - Returns only the HTTP headers for the dataset items endpoint, without the response body. - This is useful to check pagination metadata or verify access without downloading the full dataset. - operationId: dataset_items_head + Returns data stored in the default dataset of the last Actor run in the desired format. + + This endpoint is a shortcut that resolves the last run's `defaultDatasetId` and proxies to the + [Get dataset items](/api/v2/dataset-items-get) endpoint. + operationId: act_runs_last_dataset_items_get parameters: - - $ref: "../../parameters/storageParameters.yaml#/datasetId" + - $ref: "../../parameters/runAndBuildParameters.yaml#/actorId" + - $ref: "../../parameters/runAndBuildParameters.yaml#/statusLastRun" - $ref: "../../parameters/datasetItemsParameters.yaml#/format" - $ref: "../../parameters/datasetItemsParameters.yaml#/clean" - $ref: "../../parameters/paginationParameters.yaml#/offset" @@ -320,20 +312,25 @@ headById: - $ref: "../../parameters/datasetItemsParameters.yaml#/skipFailedPages" - $ref: "../../parameters/storageParameters.yaml#/signature" -headDefault: - <<: *sharedHead +headById: + responses: + "200": + description: "" + headers: + $ref: ../../headers/ApifyPaginationHeaders.yaml + content: {} + "400": + $ref: ../../responses/BadRequest.yaml + deprecated: false tags: - - Default storages - summary: Get default dataset items headers + - Storage/Datasets + summary: Get dataset items headers description: | - Returns only the HTTP headers for the dataset items endpoint of the Actor run's default dataset, - without the response body. - - This endpoint is a shortcut for getting the run's `defaultDatasetId` and then using the - [Head dataset items](/api/v2/dataset-items-head) endpoint. - operationId: actorRun_dataset_items_head + Returns only the HTTP headers for the dataset items endpoint, without the response body. + This is useful to check pagination metadata or verify access without downloading the full dataset. + operationId: dataset_items_head parameters: - - $ref: "../../parameters/runAndBuildParameters.yaml#/runId" + - $ref: "../../parameters/storageParameters.yaml#/datasetId" - $ref: "../../parameters/datasetItemsParameters.yaml#/format" - $ref: "../../parameters/datasetItemsParameters.yaml#/clean" - $ref: "../../parameters/paginationParameters.yaml#/offset" @@ -438,3 +435,19 @@ postDefault: operationId: actorRun_dataset_items_post parameters: - $ref: "../../parameters/runAndBuildParameters.yaml#/runId" + +postLastRun: + <<: *sharedPost + tags: + - Last Actor run + summary: Store items in last run's dataset + description: | + Appends an item or an array of items to the end of the last Actor run's default dataset. + + This endpoint is a shortcut that resolves the last run's `defaultDatasetId` and proxies to the + [Store items](/api/v2/dataset-items-post) endpoint. + + operationId: act_runs_last_dataset_items_post + parameters: + - $ref: "../../parameters/runAndBuildParameters.yaml#/actorId" + - $ref: "../../parameters/runAndBuildParameters.yaml#/statusLastRun" diff --git a/apify-api/openapi/components/objects/datasets/dataset-statistics.yaml b/apify-api/openapi/components/objects/datasets/dataset-statistics.yaml index 7de47e3364..e808ae2c35 100644 --- a/apify-api/openapi/components/objects/datasets/dataset-statistics.yaml +++ b/apify-api/openapi/components/objects/datasets/dataset-statistics.yaml @@ -46,3 +46,18 @@ getDefault: operationId: actorRun_dataset_statistics_get parameters: - $ref: "../../parameters/runAndBuildParameters.yaml#/runId" + +getLastRun: + <<: *sharedGet + tags: + - Last Actor run + summary: Get last run's dataset statistics + description: | + Returns statistics for the last Actor run's default dataset. + + This endpoint is a shortcut that resolves the last run's `defaultDatasetId` and proxies to the + [Get dataset statistics](/api/v2/dataset-statistics-get) endpoint. + operationId: act_runs_last_dataset_statistics_get + parameters: + - $ref: "../../parameters/runAndBuildParameters.yaml#/actorId" + - $ref: "../../parameters/runAndBuildParameters.yaml#/statusLastRun" diff --git a/apify-api/openapi/components/objects/datasets/dataset.yaml b/apify-api/openapi/components/objects/datasets/dataset.yaml index b9f04b9ee0..a678ddd46c 100644 --- a/apify-api/openapi/components/objects/datasets/dataset.yaml +++ b/apify-api/openapi/components/objects/datasets/dataset.yaml @@ -80,6 +80,21 @@ getDefault: parameters: - $ref: "../../parameters/runAndBuildParameters.yaml#/runId" +getLastRun: + <<: *sharedGet + tags: + - Last Actor run + summary: Get last run's default dataset + description: | + Returns the default dataset associated with the last Actor run. + + This endpoint is a shortcut for getting the last run's `defaultDatasetId` and then using the + [Get dataset](/api/v2/dataset-get) endpoint. + operationId: act_runs_last_dataset_get + parameters: + - $ref: "../../parameters/runAndBuildParameters.yaml#/actorId" + - $ref: "../../parameters/runAndBuildParameters.yaml#/statusLastRun" + sharedPut: &sharedPut requestBody: description: "" @@ -141,6 +156,21 @@ putDefault: parameters: - $ref: "../../parameters/runAndBuildParameters.yaml#/runId" +putLastRun: + <<: *sharedPut + tags: + - Last Actor run + summary: Update last run's default dataset + description: | + Updates the default dataset associated with the last Actor run. + + This endpoint is a shortcut for getting the last run's `defaultDatasetId` and then using the + [Update dataset](/api/v2/dataset-put) endpoint. + operationId: act_runs_last_dataset_put + parameters: + - $ref: "../../parameters/runAndBuildParameters.yaml#/actorId" + - $ref: "../../parameters/runAndBuildParameters.yaml#/statusLastRun" + sharedDelete: &sharedDelete responses: <<: *commonErrors @@ -170,7 +200,26 @@ deleteDefault: tags: - Default storages summary: Delete default dataset - description: Deletes default dataset associated with an Actor run. + description: | + Deletes default dataset associated with an Actor run. + + This endpoint is a shortcut for getting the last run's `defaultDatasetId` and then using the + [ Delete dataset ](/api/v2/dataset-delete) endpoint. operationId: actorRun_dataset_delete parameters: - $ref: "../../parameters/runAndBuildParameters.yaml#/runId" + +deleteLastRun: + <<: *sharedDelete + tags: + - Last Actor run + summary: Delete last run's default dataset + description: | + Deletes the default dataset associated with the last Actor run. + + This endpoint is a shortcut for getting the last run's `defaultDatasetId` and then using the + [Delete dataset](/api/v2/dataset-delete) endpoint. + operationId: act_runs_last_dataset_delete + parameters: + - $ref: "../../parameters/runAndBuildParameters.yaml#/actorId" + - $ref: "../../parameters/runAndBuildParameters.yaml#/statusLastRun" diff --git a/apify-api/openapi/components/tags.yaml b/apify-api/openapi/components/tags.yaml index 1097fb21bb..694a433e84 100644 --- a/apify-api/openapi/components/tags.yaml +++ b/apify-api/openapi/components/tags.yaml @@ -431,3 +431,7 @@ x-displayName: Default storages - Introduction description: | The API endpoints described in this section are convenience endpoints that provide access to Actor run's default storages (dataset, key-value store, and request queue) without the need to resolve the storage ID first. +- name: Last Actor run + x-displayName: Last Actor run - Introduction + description: | + The API endpoints described in this section are convenience endpoints that provide access to Actor's last run and all it's sub resources without the need to resolve the run ID first. diff --git a/apify-api/openapi/components/x-tag-groups.yaml b/apify-api/openapi/components/x-tag-groups.yaml index 46caf29d40..e454dbb44b 100644 --- a/apify-api/openapi/components/x-tag-groups.yaml +++ b/apify-api/openapi/components/x-tag-groups.yaml @@ -44,3 +44,4 @@ - name: Convenience endpoints tags: - Default storages + - Last Actor run diff --git a/apify-api/openapi/openapi.yaml b/apify-api/openapi/openapi.yaml index 028b8fe732..8aac76d3ec 100644 --- a/apify-api/openapi/openapi.yaml +++ b/apify-api/openapi/openapi.yaml @@ -514,6 +514,12 @@ paths: $ref: "paths/actors/acts@{actorId}@runs@{runId}@resurrect.yaml" "/v2/acts/{actorId}/runs/last": $ref: "paths/actors/acts@{actorId}@runs@last.yaml" + "/v2/acts/{actorId}/runs/last/dataset": + $ref: "paths/actors/acts@{actorId}@runs@last@dataset.yaml" + "/v2/acts/{actorId}/runs/last/dataset/items": + $ref: "paths/actors/acts@{actorId}@runs@last@dataset@items.yaml" + "/v2/acts/{actorId}/runs/last/dataset/statistics": + $ref: "paths/actors/acts@{actorId}@runs@last@dataset@statistics.yaml" "/v2/acts/{actorId}/runs/{runId}": $ref: "paths/actors/acts@{actorId}@runs@{runId}.yaml" "/v2/acts/{actorId}/runs/{runId}/abort": diff --git a/apify-api/openapi/paths/actor-runs/actor-runs@{runId}@dataset@items.yaml b/apify-api/openapi/paths/actor-runs/actor-runs@{runId}@dataset@items.yaml index 1ae818e928..bee4fbf290 100644 --- a/apify-api/openapi/paths/actor-runs/actor-runs@{runId}@dataset@items.yaml +++ b/apify-api/openapi/paths/actor-runs/actor-runs@{runId}@dataset@items.yaml @@ -1,6 +1,4 @@ get: $ref: "../../components/objects/datasets/dataset-items.yaml#/getDefault" -head: - $ref: "../../components/objects/datasets/dataset-items.yaml#/headDefault" post: $ref: "../../components/objects/datasets/dataset-items.yaml#/postDefault" diff --git a/apify-api/openapi/paths/actors/acts@{actorId}@runs@last@dataset.yaml b/apify-api/openapi/paths/actors/acts@{actorId}@runs@last@dataset.yaml new file mode 100644 index 0000000000..7cb247f576 --- /dev/null +++ b/apify-api/openapi/paths/actors/acts@{actorId}@runs@last@dataset.yaml @@ -0,0 +1,6 @@ +get: + $ref: "../../components/objects/datasets/dataset.yaml#/getLastRun" +put: + $ref: "../../components/objects/datasets/dataset.yaml#/putLastRun" +delete: + $ref: "../../components/objects/datasets/dataset.yaml#/deleteLastRun" diff --git a/apify-api/openapi/paths/actors/acts@{actorId}@runs@last@dataset@items.yaml b/apify-api/openapi/paths/actors/acts@{actorId}@runs@last@dataset@items.yaml new file mode 100644 index 0000000000..b3731bb35a --- /dev/null +++ b/apify-api/openapi/paths/actors/acts@{actorId}@runs@last@dataset@items.yaml @@ -0,0 +1,4 @@ +get: + $ref: "../../components/objects/datasets/dataset-items.yaml#/getLastRun" +post: + $ref: "../../components/objects/datasets/dataset-items.yaml#/postLastRun" diff --git a/apify-api/openapi/paths/actors/acts@{actorId}@runs@last@dataset@statistics.yaml b/apify-api/openapi/paths/actors/acts@{actorId}@runs@last@dataset@statistics.yaml new file mode 100644 index 0000000000..c585c7cd25 --- /dev/null +++ b/apify-api/openapi/paths/actors/acts@{actorId}@runs@last@dataset@statistics.yaml @@ -0,0 +1,2 @@ +get: + $ref: "../../components/objects/datasets/dataset-statistics.yaml#/getLastRun" diff --git a/apify-api/openapi/paths/actors/acts@{actorId}@validate-input.yaml b/apify-api/openapi/paths/actors/acts@{actorId}@validate-input.yaml deleted file mode 100644 index f2458313e8..0000000000 --- a/apify-api/openapi/paths/actors/acts@{actorId}@validate-input.yaml +++ /dev/null @@ -1,59 +0,0 @@ -post: - tags: - - Actors - summary: Validate input - description: | - Validates the provided input against the Actor's input schema for the specified build. - - The endpoint checks whether the JSON payload conforms to the input schema - defined in the Actor's build. If no `build` query parameter is provided, - the `latest` build tag is used by default. - operationId: act_validateInput_post - security: [] - parameters: - - $ref: "../../components/parameters/runAndBuildParameters.yaml#/actorId" - - name: build - in: query - description: | - Optional tag or number of the Actor build to use for input schema validation. - By default, the `latest` build tag is used. - required: false - style: form - explode: true - schema: - type: string - example: latest - requestBody: - description: JSON input to validate against the Actor's input schema. - content: - application/json: - schema: - type: object - required: true - responses: - "200": - description: "" - headers: {} - content: - application/json: - schema: - type: object - required: - - valid - properties: - valid: - type: boolean - description: Whether the input is valid according to the Actor's input schema. - "400": - $ref: ../../components/responses/BadRequest.yaml - "404": - $ref: ../../components/responses/NotFound.yaml - "405": - $ref: ../../components/responses/MethodNotAllowed.yaml - "413": - $ref: ../../components/responses/PayloadTooLarge.yaml - "415": - $ref: ../../components/responses/UnsupportedMediaType.yaml - "429": - $ref: ../../components/responses/TooManyRequests.yaml - deprecated: false From 83e217b12270c2158470df0740b91ed0a3babd28 Mon Sep 17 00:00:00 2001 From: Josef Prochazka Date: Tue, 14 Apr 2026 17:41:05 +0200 Subject: [PATCH 3/4] Fix lint --- .../openapi/components/objects/datasets/dataset-items.yaml | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/apify-api/openapi/components/objects/datasets/dataset-items.yaml b/apify-api/openapi/components/objects/datasets/dataset-items.yaml index 8ea29df2b3..6f4fdda223 100644 --- a/apify-api/openapi/components/objects/datasets/dataset-items.yaml +++ b/apify-api/openapi/components/objects/datasets/dataset-items.yaml @@ -318,7 +318,7 @@ headById: description: "" headers: $ref: ../../headers/ApifyPaginationHeaders.yaml - content: {} + content: {} "400": $ref: ../../responses/BadRequest.yaml deprecated: false From a3529929b306e629817dec2ef216085d45c179b2 Mon Sep 17 00:00:00 2001 From: Josef Prochazka Date: Wed, 15 Apr 2026 15:06:51 +0200 Subject: [PATCH 4/4] More reuse and adding one non obvious error type --- .../objects/datasets/dataset-items.yaml | 124 +++++++----------- .../components/objects/datasets/dataset.yaml | 112 ++++++---------- .../components/schemas/common/ErrorType.yaml | 1 + 3 files changed, 91 insertions(+), 146 deletions(-) diff --git a/apify-api/openapi/components/objects/datasets/dataset-items.yaml b/apify-api/openapi/components/objects/datasets/dataset-items.yaml index 6f4fdda223..7ce9e7e1d8 100644 --- a/apify-api/openapi/components/objects/datasets/dataset-items.yaml +++ b/apify-api/openapi/components/objects/datasets/dataset-items.yaml @@ -1,6 +1,4 @@ commonErrors: &commonErrors - "400": - $ref: ../../responses/BadRequest.yaml "401": $ref: ../../responses/Unauthorized.yaml "403": @@ -12,6 +10,43 @@ commonErrors: &commonErrors "429": $ref: ../../responses/TooManyRequests.yaml +sharedTagById: &sharedTagById + tags: + - Storage/Datasets + +sharedTagDefault: &sharedTagDefault + tags: + - Default storages + +sharedTagLastRun: &sharedTagLastRun + tags: + - Last Actor run + +parametersGetHeadById: ¶metersGetHeadById + parameters: + - $ref: "../../parameters/storageParameters.yaml#/datasetId" + - $ref: "../../parameters/datasetItemsParameters.yaml#/format" + - $ref: "../../parameters/datasetItemsParameters.yaml#/clean" + - $ref: "../../parameters/paginationParameters.yaml#/offset" + - $ref: "../../parameters/datasetItemsParameters.yaml#/limit" + - $ref: "../../parameters/datasetItemsParameters.yaml#/fields" + - $ref: "../../parameters/datasetItemsParameters.yaml#/omit" + - $ref: "../../parameters/datasetItemsParameters.yaml#/unwind" + - $ref: "../../parameters/datasetItemsParameters.yaml#/flatten" + - $ref: "../../parameters/datasetItemsParameters.yaml#/descDataset" + - $ref: "../../parameters/datasetItemsParameters.yaml#/attachment" + - $ref: "../../parameters/datasetItemsParameters.yaml#/delimiter" + - $ref: "../../parameters/datasetItemsParameters.yaml#/bom" + - $ref: "../../parameters/datasetItemsParameters.yaml#/xmlRoot" + - $ref: "../../parameters/datasetItemsParameters.yaml#/xmlRow" + - $ref: "../../parameters/datasetItemsParameters.yaml#/skipHeaderRow" + - $ref: "../../parameters/datasetItemsParameters.yaml#/skipHidden" + - $ref: "../../parameters/datasetItemsParameters.yaml#/skipEmpty" + - $ref: "../../parameters/datasetItemsParameters.yaml#/simplified" + - $ref: "../../parameters/datasetItemsParameters.yaml#/view" + - $ref: "../../parameters/datasetItemsParameters.yaml#/skipFailedPages" + - $ref: "../../parameters/storageParameters.yaml#/signature" + sharedGet: &sharedGet responses: <<: *commonErrors @@ -49,12 +84,12 @@ sharedGet: &sharedGet schema: type: string example: barbar2 + "400": + $ref: ../../responses/BadRequest.yaml deprecated: false getById: - <<: *sharedGet - tags: - - Storage/Datasets + <<: [*sharedGet, *sharedTagById, *parametersGetHeadById] summary: Get dataset items description: | Returns data stored in the dataset in a desired format. @@ -207,29 +242,6 @@ getById: If you specify `desc=1` query parameter, the results are returned in the reverse order than they were stored (i.e. from newest to oldest items). Note that only the order of **Items** is reversed, but not the order of the `unwind` array elements. operationId: dataset_items_get - parameters: - - $ref: "../../parameters/storageParameters.yaml#/datasetId" - - $ref: "../../parameters/datasetItemsParameters.yaml#/format" - - $ref: "../../parameters/datasetItemsParameters.yaml#/clean" - - $ref: "../../parameters/paginationParameters.yaml#/offset" - - $ref: "../../parameters/datasetItemsParameters.yaml#/limit" - - $ref: "../../parameters/datasetItemsParameters.yaml#/fields" - - $ref: "../../parameters/datasetItemsParameters.yaml#/omit" - - $ref: "../../parameters/datasetItemsParameters.yaml#/unwind" - - $ref: "../../parameters/datasetItemsParameters.yaml#/flatten" - - $ref: "../../parameters/datasetItemsParameters.yaml#/descDataset" - - $ref: "../../parameters/datasetItemsParameters.yaml#/attachment" - - $ref: "../../parameters/datasetItemsParameters.yaml#/delimiter" - - $ref: "../../parameters/datasetItemsParameters.yaml#/bom" - - $ref: "../../parameters/datasetItemsParameters.yaml#/xmlRoot" - - $ref: "../../parameters/datasetItemsParameters.yaml#/xmlRow" - - $ref: "../../parameters/datasetItemsParameters.yaml#/skipHeaderRow" - - $ref: "../../parameters/datasetItemsParameters.yaml#/skipHidden" - - $ref: "../../parameters/datasetItemsParameters.yaml#/skipEmpty" - - $ref: "../../parameters/datasetItemsParameters.yaml#/simplified" - - $ref: "../../parameters/datasetItemsParameters.yaml#/view" - - $ref: "../../parameters/datasetItemsParameters.yaml#/skipFailedPages" - - $ref: "../../parameters/storageParameters.yaml#/signature" x-legacy-doc-urls: - https://docs.apify.com/api/v2#/reference/datasets/item-collection/get-items - https://docs.apify.com/api/v2#/reference/datasets/get-items @@ -242,9 +254,7 @@ getById: x-py-doc-url: https://docs.apify.com/api/client/python/reference/class/DatasetClientAsync#stream_items getDefault: - <<: *sharedGet - tags: - - Default storages + <<: [*sharedGet, *sharedTagDefault] summary: Get default dataset items description: | Returns data stored in the default dataset of the Actor run in the desired format. @@ -277,9 +287,7 @@ getDefault: - $ref: "../../parameters/storageParameters.yaml#/signature" getLastRun: - <<: *sharedGet - tags: - - Last Actor run + <<: [*sharedGet, *sharedTagLastRun] summary: Get last run's dataset items description: | Returns data stored in the default dataset of the last Actor run in the desired format. @@ -313,7 +321,9 @@ getLastRun: - $ref: "../../parameters/storageParameters.yaml#/signature" headById: + <<: [*parametersGetHeadById, *sharedTagById] responses: + <<: *commonErrors "200": description: "" headers: @@ -322,36 +332,11 @@ headById: "400": $ref: ../../responses/BadRequest.yaml deprecated: false - tags: - - Storage/Datasets summary: Get dataset items headers description: | Returns only the HTTP headers for the dataset items endpoint, without the response body. This is useful to check pagination metadata or verify access without downloading the full dataset. operationId: dataset_items_head - parameters: - - $ref: "../../parameters/storageParameters.yaml#/datasetId" - - $ref: "../../parameters/datasetItemsParameters.yaml#/format" - - $ref: "../../parameters/datasetItemsParameters.yaml#/clean" - - $ref: "../../parameters/paginationParameters.yaml#/offset" - - $ref: "../../parameters/datasetItemsParameters.yaml#/limit" - - $ref: "../../parameters/datasetItemsParameters.yaml#/fields" - - $ref: "../../parameters/datasetItemsParameters.yaml#/omit" - - $ref: "../../parameters/datasetItemsParameters.yaml#/unwind" - - $ref: "../../parameters/datasetItemsParameters.yaml#/flatten" - - $ref: "../../parameters/datasetItemsParameters.yaml#/descDataset" - - $ref: "../../parameters/datasetItemsParameters.yaml#/attachment" - - $ref: "../../parameters/datasetItemsParameters.yaml#/delimiter" - - $ref: "../../parameters/datasetItemsParameters.yaml#/bom" - - $ref: "../../parameters/datasetItemsParameters.yaml#/xmlRoot" - - $ref: "../../parameters/datasetItemsParameters.yaml#/xmlRow" - - $ref: "../../parameters/datasetItemsParameters.yaml#/skipHeaderRow" - - $ref: "../../parameters/datasetItemsParameters.yaml#/skipHidden" - - $ref: "../../parameters/datasetItemsParameters.yaml#/skipEmpty" - - $ref: "../../parameters/datasetItemsParameters.yaml#/simplified" - - $ref: "../../parameters/datasetItemsParameters.yaml#/view" - - $ref: "../../parameters/datasetItemsParameters.yaml#/skipFailedPages" - - $ref: "../../parameters/storageParameters.yaml#/signature" sharedPost: &sharedPost requestBody: @@ -367,6 +352,7 @@ sharedPost: &sharedPost description: "" required: true responses: + <<: *commonErrors "201": description: "" headers: @@ -387,17 +373,13 @@ sharedPost: &sharedPost content: application/json: schema: - $ref: ../../schemas/datasets/PutItemResponseError.yaml - "403": - $ref: ../../responses/Forbidden.yaml - "404": - $ref: ../../responses/NotFound.yaml + anyOf: + - $ref: ../../schemas/datasets/PutItemResponseError.yaml + - $ref: ../../schemas/common/ErrorResponse.yaml deprecated: false postById: - <<: *sharedPost - tags: - - Storage/Datasets + <<: [*sharedPost, *sharedTagById] summary: Store items description: | Appends an item or an array of items to the end of the dataset. @@ -422,9 +404,7 @@ postById: x-py-doc-url: https://docs.apify.com/api/client/python/reference/class/DatasetClientAsync#push_items postDefault: - <<: *sharedPost - tags: - - Default storages + <<: [*sharedPost, *sharedTagDefault] summary: Store items description: | Appends an item or an array of items to the end of the Actor run's default dataset. @@ -437,9 +417,7 @@ postDefault: - $ref: "../../parameters/runAndBuildParameters.yaml#/runId" postLastRun: - <<: *sharedPost - tags: - - Last Actor run + <<: [*sharedPost, *sharedTagLastRun] summary: Store items in last run's dataset description: | Appends an item or an array of items to the end of the last Actor run's default dataset. diff --git a/apify-api/openapi/components/objects/datasets/dataset.yaml b/apify-api/openapi/components/objects/datasets/dataset.yaml index a678ddd46c..6f43b852b9 100644 --- a/apify-api/openapi/components/objects/datasets/dataset.yaml +++ b/apify-api/openapi/components/objects/datasets/dataset.yaml @@ -12,22 +12,41 @@ commonErrors: &commonErrors "429": $ref: ../../responses/TooManyRequests.yaml +common200: &common200 + "200": + description: "" + headers: {} + content: + application/json: + schema: + $ref: ../../schemas/datasets/DatasetResponse.yaml + +sharedById: &sharedById + tags: + - Storage/Datasets + parameters: + - $ref: "../../parameters/storageParameters.yaml#/datasetId" + +sharedDefault: &sharedDefault + tags: + - Default storages + parameters: + - $ref: "../../parameters/runAndBuildParameters.yaml#/runId" + +sharedLastRun: &sharedLastRun + tags: + - Last Actor run + parameters: + - $ref: "../../parameters/runAndBuildParameters.yaml#/actorId" + - $ref: "../../parameters/runAndBuildParameters.yaml#/statusLastRun" + sharedGet: &sharedGet responses: - <<: *commonErrors - "200": - description: "" - headers: {} - content: - application/json: - schema: - $ref: ../../schemas/datasets/DatasetResponse.yaml + <<: [*common200, *commonErrors] deprecated: false getById: - <<: *sharedGet - tags: - - Storage/Datasets + <<: [*sharedGet, *sharedById] summary: Get dataset description: | Returns dataset object for given dataset ID. @@ -43,17 +62,6 @@ getById: There is a short period (up to 5 seconds) during which these counters may not match with exact counts in dataset items. operationId: dataset_get - parameters: - - $ref: "../../parameters/storageParameters.yaml#/datasetId" - - name: token - in: query - description: | - API authentication token. It is required only when using the `username~dataset-name` format for `datasetId`. - style: form - explode: true - schema: - type: string - example: soSkq9ekdmfOslopH x-legacy-doc-urls: - https://docs.apify.com/api/v2#/reference/datasets/dataset/get-dataset - https://docs.apify.com/api/v2#/reference/datasets/get-dataset @@ -66,9 +74,7 @@ getById: x-py-doc-url: https://docs.apify.com/api/client/python/reference/class/DatasetClientAsync#get getDefault: - <<: *sharedGet - tags: - - Default storages + <<: [*sharedGet, *sharedDefault] summary: Get default dataset description: | Returns the default dataset associated with an Actor run. @@ -77,13 +83,9 @@ getDefault: [Get dataset](/api/v2/dataset-get) endpoint. operationId: actorRun_dataset_get - parameters: - - $ref: "../../parameters/runAndBuildParameters.yaml#/runId" getLastRun: - <<: *sharedGet - tags: - - Last Actor run + <<: [*sharedGet, *sharedLastRun] summary: Get last run's default dataset description: | Returns the default dataset associated with the last Actor run. @@ -91,9 +93,6 @@ getLastRun: This endpoint is a shortcut for getting the last run's `defaultDatasetId` and then using the [Get dataset](/api/v2/dataset-get) endpoint. operationId: act_runs_last_dataset_get - parameters: - - $ref: "../../parameters/runAndBuildParameters.yaml#/actorId" - - $ref: "../../parameters/runAndBuildParameters.yaml#/statusLastRun" sharedPut: &sharedPut requestBody: @@ -104,14 +103,7 @@ sharedPut: &sharedPut $ref: ../../schemas/datasets/UpdateDatasetRequest.yaml required: true responses: - <<: *commonErrors - "200": - description: "" - headers: {} - content: - application/json: - schema: - $ref: ../../schemas/datasets/DatasetResponse.yaml + <<: [*common200, *commonErrors] "413": $ref: ../../responses/PayloadTooLarge.yaml "415": @@ -119,17 +111,13 @@ sharedPut: &sharedPut deprecated: false putById: - <<: *sharedPut - tags: - - Storage/Datasets + <<: [*sharedPut, *sharedById] summary: Update dataset description: | Updates a dataset's name and general resource access level using a value specified by a JSON object passed in the PUT payload. The response is the updated dataset object, as returned by the [Get dataset](/api/v2/dataset-get) API endpoint. operationId: dataset_put - parameters: - - $ref: "../../parameters/storageParameters.yaml#/datasetId" x-legacy-doc-urls: - https://docs.apify.com/api/v2#/reference/datasets/dataset/update-dataset - https://docs.apify.com/api/v2#/reference/datasets/update-dataset @@ -142,9 +130,7 @@ putById: x-py-doc-url: https://docs.apify.com/api/client/python/reference/class/DatasetClientAsync#update putDefault: - <<: *sharedPut - tags: - - Default storages + <<: [*sharedPut, *sharedDefault] summary: Update default dataset description: | Updates the default dataset associated with an Actor run. @@ -153,13 +139,9 @@ putDefault: [Put dataset](/api/v2/dataset-put) endpoint. operationId: actorRun_dataset_put - parameters: - - $ref: "../../parameters/runAndBuildParameters.yaml#/runId" putLastRun: - <<: *sharedPut - tags: - - Last Actor run + <<: [*sharedPut, *sharedLastRun] summary: Update last run's default dataset description: | Updates the default dataset associated with the last Actor run. @@ -167,9 +149,6 @@ putLastRun: This endpoint is a shortcut for getting the last run's `defaultDatasetId` and then using the [Update dataset](/api/v2/dataset-put) endpoint. operationId: act_runs_last_dataset_put - parameters: - - $ref: "../../parameters/runAndBuildParameters.yaml#/actorId" - - $ref: "../../parameters/runAndBuildParameters.yaml#/statusLastRun" sharedDelete: &sharedDelete responses: @@ -179,14 +158,10 @@ sharedDelete: &sharedDelete deprecated: false deleteById: - <<: *sharedDelete - tags: - - Storage/Datasets + <<: [*sharedDelete, <<: *sharedById] summary: Delete dataset description: Deletes a specific dataset. operationId: dataset_delete - parameters: - - $ref: "../../parameters/storageParameters.yaml#/datasetId" x-legacy-doc-urls: - https://docs.apify.com/api/v2#/reference/datasets/dataset/delete-dataset - https://docs.apify.com/api/v2#/reference/datasets/delete-dataset @@ -196,9 +171,7 @@ deleteById: x-js-doc-url: https://docs.apify.com/api/client/js/reference/class/DatasetClient#delete deleteDefault: - <<: *sharedDelete - tags: - - Default storages + <<: [*sharedDelete, *sharedDefault] summary: Delete default dataset description: | Deletes default dataset associated with an Actor run. @@ -206,13 +179,9 @@ deleteDefault: This endpoint is a shortcut for getting the last run's `defaultDatasetId` and then using the [ Delete dataset ](/api/v2/dataset-delete) endpoint. operationId: actorRun_dataset_delete - parameters: - - $ref: "../../parameters/runAndBuildParameters.yaml#/runId" deleteLastRun: - <<: *sharedDelete - tags: - - Last Actor run + <<: [*sharedDelete, *sharedLastRun] summary: Delete last run's default dataset description: | Deletes the default dataset associated with the last Actor run. @@ -220,6 +189,3 @@ deleteLastRun: This endpoint is a shortcut for getting the last run's `defaultDatasetId` and then using the [Delete dataset](/api/v2/dataset-delete) endpoint. operationId: act_runs_last_dataset_delete - parameters: - - $ref: "../../parameters/runAndBuildParameters.yaml#/actorId" - - $ref: "../../parameters/runAndBuildParameters.yaml#/statusLastRun" diff --git a/apify-api/openapi/components/schemas/common/ErrorType.yaml b/apify-api/openapi/components/schemas/common/ErrorType.yaml index c6e517d08d..7d18ec687c 100644 --- a/apify-api/openapi/components/schemas/common/ErrorType.yaml +++ b/apify-api/openapi/components/schemas/common/ErrorType.yaml @@ -304,6 +304,7 @@ enum: - schedule-actor-task-not-found - schedule-name-not-unique - schema-validation + - schema-validation-error - schema-validation-failed - sign-up-method-not-allowed - slack-integration-not-custom