From 8d2fa98f192532eb3090bfb21dd34dca4f014837 Mon Sep 17 00:00:00 2001 From: datalogics-dliang Date: Mon, 22 Sep 2025 12:20:28 -0500 Subject: [PATCH 1/2] Add samples for /summarized-pdf-text --- .../JSON Payload/CMakeLists.txt | 4 + .../JSON Payload/summarized_pdf_text.cpp | 139 ++++++++++++++++++ .../Multipart Payload/CMakeLists.txt | 4 + .../Multipart Payload/summarized_pdf_text.cpp | 80 ++++++++++ .../JSON Payload/summarized-pdf-text.cs | 95 ++++++++++++ .../Multipart Payload/summarized-pdf-text.cs | 74 ++++++++++ DotNET/Program.cs | 8 + .../JSON Payload/SummarizedPDFText.java | 99 +++++++++++++ .../Multipart Payload/SummarizedPDFText.java | 69 +++++++++ .../JSON Payload/summarized-pdf-text.js | 63 ++++++++ .../Multipart Payload/summarized-pdf-text.js | 38 +++++ .../JSON Payload/summarized-pdf-text.php | 40 +++++ .../Multipart Payload/summarized-pdf-text.php | 42 ++++++ .../JSON Payload/summarized-pdf-text.pl | 64 ++++++++ .../Multipart Payload/summarized-pdf-text.pl | 45 ++++++ .../Multipart Payload/summarized-pdf-text.py | 38 +++++ .../JSON Payload/summarized-pdf-text.R | 62 ++++++++ .../Multipart Payload/summarized-pdf-text.R | 46 ++++++ .../JSON Payload/summarized-pdf-text.rb | 55 +++++++ .../Multipart Payload/summarized-pdf-text.rb | 39 +++++ .../JSON Payload/summarized-pdf-text.vb | 108 ++++++++++++++ .../Multipart Payload/summarized-pdf-text.vb | 76 ++++++++++ VB.NET/Program.vb | 6 + .../JSON Payload/summarized-pdf-text.sh | 21 +++ .../Multipart Payload/summarized-pdf-text.sh | 15 ++ 25 files changed, 1330 insertions(+) create mode 100644 CPlusPlus/Endpoint Examples/JSON Payload/summarized_pdf_text.cpp create mode 100644 CPlusPlus/Endpoint Examples/Multipart Payload/summarized_pdf_text.cpp create mode 100644 DotNET/Endpoint Examples/JSON Payload/summarized-pdf-text.cs create mode 100644 DotNET/Endpoint Examples/Multipart Payload/summarized-pdf-text.cs create mode 100644 Java/Endpoint Examples/JSON Payload/SummarizedPDFText.java create mode 100644 Java/Endpoint Examples/Multipart Payload/SummarizedPDFText.java create mode 100644 JavaScript/Endpoint Examples/JSON Payload/summarized-pdf-text.js create mode 100644 JavaScript/Endpoint Examples/Multipart Payload/summarized-pdf-text.js create mode 100644 PHP/Endpoint Examples/JSON Payload/summarized-pdf-text.php create mode 100644 PHP/Endpoint Examples/Multipart Payload/summarized-pdf-text.php create mode 100644 Perl/Endpoint Examples/JSON Payload/summarized-pdf-text.pl create mode 100644 Perl/Endpoint Examples/Multipart Payload/summarized-pdf-text.pl create mode 100644 Python/Endpoint Examples/Multipart Payload/summarized-pdf-text.py create mode 100644 R/Endpoint Examples/JSON Payload/summarized-pdf-text.R create mode 100644 R/Endpoint Examples/Multipart Payload/summarized-pdf-text.R create mode 100644 Ruby/Endpoint Examples/JSON Payload/summarized-pdf-text.rb create mode 100644 Ruby/Endpoint Examples/Multipart Payload/summarized-pdf-text.rb create mode 100644 VB.NET/Endpoint Examples/JSON Payload/summarized-pdf-text.vb create mode 100644 VB.NET/Endpoint Examples/Multipart Payload/summarized-pdf-text.vb create mode 100644 cURL/Endpoint Examples/JSON Payload/summarized-pdf-text.sh create mode 100644 cURL/Endpoint Examples/Multipart Payload/summarized-pdf-text.sh diff --git a/CPlusPlus/Endpoint Examples/JSON Payload/CMakeLists.txt b/CPlusPlus/Endpoint Examples/JSON Payload/CMakeLists.txt index f05a6a57..25e4f1f8 100644 --- a/CPlusPlus/Endpoint Examples/JSON Payload/CMakeLists.txt +++ b/CPlusPlus/Endpoint Examples/JSON Payload/CMakeLists.txt @@ -11,4 +11,8 @@ if (cpr_FOUND AND nlohmann_json_FOUND) add_executable(rasterized_pdf_json rasterized_pdf.cpp) target_link_libraries(rasterized_pdf_json PRIVATE cpr::cpr nlohmann_json::nlohmann_json) target_compile_features(rasterized_pdf_json PRIVATE cxx_std_20) + + add_executable(summarized_pdf_text_json summarized_pdf_text.cpp) + target_link_libraries(summarized_pdf_text_json PRIVATE cpr::cpr nlohmann_json::nlohmann_json) + target_compile_features(summarized_pdf_text_json PRIVATE cxx_std_20) endif() diff --git a/CPlusPlus/Endpoint Examples/JSON Payload/summarized_pdf_text.cpp b/CPlusPlus/Endpoint Examples/JSON Payload/summarized_pdf_text.cpp new file mode 100644 index 00000000..a8810660 --- /dev/null +++ b/CPlusPlus/Endpoint Examples/JSON Payload/summarized_pdf_text.cpp @@ -0,0 +1,139 @@ +/* + * What this sample does: + * - Uploads a PDF via /upload, then calls /summarized-pdf-text with a JSON payload + * referencing the uploaded resource id (two-step JSON flow). + * + * Setup (environment): + * - Set PDFREST_API_KEY=your_api_key_here + * - Optional: set PDFREST_URL to override the API region. For EU/GDPR, use: + * PDFREST_URL=https://eu-api.pdfrest.com + * More info: https://pdfrest.com/pricing#how-do-eu-gdpr-api-calls-work + * + * Usage: + * ./summarized_pdf_text_json /path/to/input.pdf + * + * Output: + * - Prints JSON responses to stdout. Non-2xx responses print a concise + * error to stderr and exit non-zero. + */ + +#include +#include + +#include +#include +#include +#include +#include +#include + +namespace fs = std::filesystem; +using json = nlohmann::json; + +static std::string rtrim_slashes(std::string s) { + while (!s.empty() && (s.back() == '/' || s.back() == '\\')) { + s.pop_back(); + } + return s; +} + +static void load_dotenv_if_present(const fs::path &path) { + std::ifstream f(path); + if (!f.is_open()) return; + std::string line; + while (std::getline(f, line)) { + if (line.empty() || line[0] == '#') continue; + auto pos = line.find('='); + if (pos == std::string::npos) continue; + std::string key = line.substr(0, pos); + std::string val = line.substr(pos + 1); + auto trim = [](std::string &s) { + size_t start = s.find_first_not_of(" \t\r\n"); + size_t end = s.find_last_not_of(" \t\r\n"); + if (start == std::string::npos) { s.clear(); return; } + s = s.substr(start, end - start + 1); + }; + trim(key); + trim(val); + if (key.empty()) continue; +#ifdef _WIN32 + _putenv_s(key.c_str(), val.c_str()); +#else + if (std::getenv(key.c_str()) == nullptr) setenv(key.c_str(), val.c_str(), 0); +#endif + } +} + +static void load_env() { + const fs::path here = fs::current_path(); + load_dotenv_if_present(here / ".env"); + if (fs::exists(here.parent_path())) { + load_dotenv_if_present(here.parent_path() / ".env"); + } +} + +static std::optional read_file_to_string(const fs::path &p) { + std::ifstream in(p, std::ios::binary); + if (!in) return std::nullopt; + std::string data((std::istreambuf_iterator(in)), std::istreambuf_iterator()); + return data; +} + +int main(int argc, char *argv[]) { + load_env(); + if (argc < 2) { + std::cerr << "Usage: summarized_pdf_text_json \n"; + return 1; + } + fs::path input_path(argv[1]); + if (!fs::exists(input_path)) { + std::cerr << "File not found: " << input_path << "\n"; + return 1; + } + + const char *api_key_c = std::getenv("PDFREST_API_KEY"); + if (api_key_c == nullptr || std::string(api_key_c).empty()) { + std::cerr << "Missing required environment variable: PDFREST_API_KEY\n"; + return 1; + } + std::string api_key = api_key_c; + + const char *base_url_c = std::getenv("PDFREST_URL"); + std::string base_url = base_url_c && std::string(base_url_c).size() ? base_url_c : std::string("https://api.pdfrest.com"); + base_url = rtrim_slashes(base_url); + + auto maybe_data = read_file_to_string(input_path); + if (!maybe_data) { + std::cerr << "Failed to read input file: " << input_path << "\n"; + return 1; + } + std::string body = std::move(*maybe_data); + + cpr::Header headers{{"Api-Key", api_key}, {"Accept", "application/json"}, {"Content-Type", "application/octet-stream"}, {"Content-Filename", input_path.filename().string()}}; + auto res = cpr::Post(cpr::Url{base_url + "/upload"}, headers, cpr::Body{body}); + if (res.error || res.status_code < 200 || res.status_code >= 300) { + std::cerr << "Upload failed (status " << res.status_code << "): " << res.error.message << "\n" << res.text << "\n"; + return 1; + } + std::cout << res.text << "\n"; + + std::string uploaded_id; + try { + auto j = json::parse(res.text); + uploaded_id = j.at("files").at(0).at("id").get(); + } catch (const std::exception &e) { + std::cerr << "Failed to parse upload id: " << e.what() << "\n"; + return 1; + } + + json payload = { {"id", uploaded_id}, {"target_word_count", 100} }; + + cpr::Header sum_headers{{"Api-Key", api_key}, {"Accept", "application/json"}, {"Content-Type", "application/json"}}; + auto sum_res = cpr::Post(cpr::Url{base_url + "/summarized-pdf-text"}, sum_headers, cpr::Body{payload.dump()}); + if (sum_res.error || sum_res.status_code < 200 || sum_res.status_code >= 300) { + std::cerr << "Summarize failed (status " << sum_res.status_code << "): " << sum_res.error.message << "\n" << sum_res.text << "\n"; + return 1; + } + std::cout << sum_res.text << "\n"; + return 0; +} diff --git a/CPlusPlus/Endpoint Examples/Multipart Payload/CMakeLists.txt b/CPlusPlus/Endpoint Examples/Multipart Payload/CMakeLists.txt index d110c5d7..b4283ebe 100644 --- a/CPlusPlus/Endpoint Examples/Multipart Payload/CMakeLists.txt +++ b/CPlusPlus/Endpoint Examples/Multipart Payload/CMakeLists.txt @@ -10,4 +10,8 @@ if (cpr_FOUND) add_executable(rasterized_pdf_multipart rasterized_pdf.cpp) target_link_libraries(rasterized_pdf_multipart PRIVATE cpr::cpr) target_compile_features(rasterized_pdf_multipart PRIVATE cxx_std_20) + + add_executable(summarized_pdf_text_multipart summarized_pdf_text.cpp) + target_link_libraries(summarized_pdf_text_multipart PRIVATE cpr::cpr) + target_compile_features(summarized_pdf_text_multipart PRIVATE cxx_std_20) endif() diff --git a/CPlusPlus/Endpoint Examples/Multipart Payload/summarized_pdf_text.cpp b/CPlusPlus/Endpoint Examples/Multipart Payload/summarized_pdf_text.cpp new file mode 100644 index 00000000..adb121a0 --- /dev/null +++ b/CPlusPlus/Endpoint Examples/Multipart Payload/summarized_pdf_text.cpp @@ -0,0 +1,80 @@ +/* + * What this sample does: + * - Summarizes PDF content via multipart/form-data (file + options). + * + * Setup (environment): + * - Copy .env.example to .env + * - Set PDFREST_API_KEY=your_api_key_here + * - Optional: set PDFREST_URL to override the API region. For EU/GDPR compliance and proximity, use: + * PDFREST_URL=https://eu-api.pdfrest.com + * For more information visit https://pdfrest.com/pricing#how-do-eu-gdpr-api-calls-work + * + * Usage: + * ./summarized_pdf_text_multipart /path/to/input.pdf + * + * Output: + * - Prints the JSON response to stdout; non-2xx exits with concise error. + */ + +#include +#include +#include +#include +#include + +namespace fs = std::filesystem; + +static std::string rtrim_slashes(std::string s) { + while (!s.empty() && (s.back() == '/' || s.back() == '\\')) s.pop_back(); + return s; +} + +static void load_dotenv_if_present(const fs::path &path) { + std::ifstream f(path); + if (!f.is_open()) return; + std::string line; + while (std::getline(f, line)) { + if (line.empty() || line[0] == '#') continue; + auto p = line.find('='); + if (p == std::string::npos) continue; + std::string k = line.substr(0, p); + std::string v = line.substr(p + 1); + auto trim = [](std::string &s) { size_t b = s.find_first_not_of(" \t\r\n"), e = s.find_last_not_of(" \t\r\n"); if (b == std::string::npos) { s.clear(); return; } s = s.substr(b, e - b + 1); }; + trim(k); trim(v); +#ifdef _WIN32 + _putenv_s(k.c_str(), v.c_str()); +#else + if (!std::getenv(k.c_str())) setenv(k.c_str(), v.c_str(), 0); +#endif + } +} + +static void load_env() { + auto here = fs::current_path(); + load_dotenv_if_present(here / ".env"); + if (fs::exists(here.parent_path())) load_dotenv_if_present(here.parent_path() / ".env"); +} + +int main(int argc, char **argv) { + load_env(); + if (argc < 2) { std::cerr << "Usage: summarized_pdf_text_multipart \n"; return 1; } + fs::path input = argv[1]; + if (!fs::exists(input)) { std::cerr << "File not found: " << input << "\n"; return 1; } + const char *key = getenv("PDFREST_API_KEY"); + if (!key || !*key) { std::cerr << "Missing PDFREST_API_KEY\n"; return 1; } + std::string base = getenv("PDFREST_URL") ? getenv("PDFREST_URL") : "https://api.pdfrest.com"; + base = rtrim_slashes(base); + + cpr::Header hdr{{"Api-Key", key}, {"Accept", "application/json"}}; + cpr::Multipart mp{ + {"file", cpr::File{input.string()}}, + {"target_word_count", "100"} + }; + auto res = cpr::Post(cpr::Url{base + "/summarized-pdf-text"}, hdr, mp); + if (res.error || res.status_code < 200 || res.status_code >= 300) { + std::cerr << "Request failed (" << res.status_code << ")\n" << res.error.message << "\n" << res.text << "\n"; + return 1; + } + std::cout << res.text << "\n"; + return 0; +} diff --git a/DotNET/Endpoint Examples/JSON Payload/summarized-pdf-text.cs b/DotNET/Endpoint Examples/JSON Payload/summarized-pdf-text.cs new file mode 100644 index 00000000..97cf36d8 --- /dev/null +++ b/DotNET/Endpoint Examples/JSON Payload/summarized-pdf-text.cs @@ -0,0 +1,95 @@ +/* + * What this sample does: + * - Implements a command callable from Program.cs that uploads a file and + * then summarizes the content in the file via the JSON two-step flow. + * - Routes `dotnet run -- summarized-pdf-text ` to this module. + * + * Setup (environment): + * - Copy .env.example to .env + * - Set PDFREST_API_KEY=your_api_key_here + * - Optional: set PDFREST_URL to override the API region. For EU/GDPR compliance and proximity, use: + * PDFREST_URL=https://eu-api.pdfrest.com + * For more information visit https://pdfrest.com/pricing#how-do-eu-gdpr-api-calls-work + */ + +using Newtonsoft.Json.Linq; +using System.Text; + +namespace Samples.EndpointExamples.JsonPayload +{ + public static class SummarizedPdfText + { + public static async Task Execute(string[] args) + { + if (args == null || args.Length < 1) + { + Console.Error.WriteLine("summarized-pdf-text requires "); + Environment.Exit(1); + return; + } + + var inputPath = args[0]; + if (!File.Exists(inputPath)) + { + Console.Error.WriteLine($"File not found: {inputPath}"); + Environment.Exit(1); + return; + } + + var apiKey = Environment.GetEnvironmentVariable("PDFREST_API_KEY"); + if (string.IsNullOrWhiteSpace(apiKey)) + { + Console.Error.WriteLine("Missing required environment variable: PDFREST_API_KEY"); + Environment.Exit(1); + return; + } + + var baseUrl = Environment.GetEnvironmentVariable("PDFREST_URL") ?? "https://api.pdfrest.com"; + + using (var httpClient = new HttpClient { BaseAddress = new Uri(baseUrl) }) + { + using (var uploadRequest = new HttpRequestMessage(HttpMethod.Post, "upload")) + { + uploadRequest.Headers.TryAddWithoutValidation("Api-Key", apiKey); + uploadRequest.Headers.Accept.Add(new("application/json")); + + var uploadByteArray = File.ReadAllBytes(inputPath); + var uploadByteAryContent = new ByteArrayContent(uploadByteArray); + uploadByteAryContent.Headers.TryAddWithoutValidation("Content-Type", "application/octet-stream"); + uploadByteAryContent.Headers.TryAddWithoutValidation("Content-Filename", Path.GetFileName(inputPath)); + + uploadRequest.Content = uploadByteAryContent; + var uploadResponse = await httpClient.SendAsync(uploadRequest); + + var uploadResult = await uploadResponse.Content.ReadAsStringAsync(); + + Console.WriteLine("Upload response received."); + Console.WriteLine(uploadResult); + + JObject uploadResultJson = JObject.Parse(uploadResult); + var uploadedID = uploadResultJson["files"][0]["id"]; + using (var summaryRequest = new HttpRequestMessage(HttpMethod.Post, "summarized-pdf-text")) + { + summaryRequest.Headers.TryAddWithoutValidation("Api-Key", apiKey); + summaryRequest.Headers.Accept.Add(new("application/json")); + summaryRequest.Headers.TryAddWithoutValidation("Content-Type", "application/json"); + + JObject parameterJson = new JObject + { + ["id"] = uploadedID, + ["target_word_count"] = 100 + }; + + summaryRequest.Content = new StringContent(parameterJson.ToString(), Encoding.UTF8, "application/json"); + var summaryResponse = await httpClient.SendAsync(summaryRequest); + + var summaryResult = await summaryResponse.Content.ReadAsStringAsync(); + + Console.WriteLine("Processing response received."); + Console.WriteLine(summaryResult); + } + } + } + } + } +} diff --git a/DotNET/Endpoint Examples/Multipart Payload/summarized-pdf-text.cs b/DotNET/Endpoint Examples/Multipart Payload/summarized-pdf-text.cs new file mode 100644 index 00000000..23c73535 --- /dev/null +++ b/DotNET/Endpoint Examples/Multipart Payload/summarized-pdf-text.cs @@ -0,0 +1,74 @@ +/* + * What this sample does: + * - Summarizes PDF content via multipart/form-data. + * - Routed from Program.cs as: `dotnet run -- summarized-pdf-text-multipart `. + * + * Setup (environment): + * - Copy .env.example to .env + * - Set PDFREST_API_KEY=your_api_key_here + * - Optional: set PDFREST_URL to override the API region. For EU/GDPR compliance and proximity, use: + * PDFREST_URL=https://eu-api.pdfrest.com + * For more information visit https://pdfrest.com/pricing#how-do-eu-gdpr-api-calls-work + * + * Usage: + * dotnet run -- summarized-pdf-text-multipart /path/to/input.pdf + * + * Output: + * - Prints the JSON response. Validation errors (args/env) exit non-zero. + */ + +using System.Text; + +namespace Samples.EndpointExamples.MultipartPayload +{ + public static class SummarizedPdfText + { + public static async Task Execute(string[] args) + { + if (args == null || args.Length < 1) + { + Console.Error.WriteLine("summarized-pdf-text-multipart requires "); + Environment.Exit(1); + return; + } + var inputPath = args[0]; + if (!File.Exists(inputPath)) + { + Console.Error.WriteLine($"File not found: {inputPath}"); + Environment.Exit(1); + return; + } + var apiKey = Environment.GetEnvironmentVariable("PDFREST_API_KEY"); + if (string.IsNullOrWhiteSpace(apiKey)) + { + Console.Error.WriteLine("Missing required environment variable: PDFREST_API_KEY"); + Environment.Exit(1); + return; + } + var baseUrl = Environment.GetEnvironmentVariable("PDFREST_URL") ?? "https://api.pdfrest.com"; + + using (var httpClient = new HttpClient { BaseAddress = new Uri(baseUrl) }) + using (var request = new HttpRequestMessage(HttpMethod.Post, "summarized-pdf-text")) + { + request.Headers.TryAddWithoutValidation("Api-Key", apiKey); + request.Headers.Accept.Add(new("application/json")); + var multipartContent = new MultipartFormDataContent(); + + var byteArray = File.ReadAllBytes(inputPath); + var byteAryContent = new ByteArrayContent(byteArray); + multipartContent.Add(byteAryContent, "file", Path.GetFileName(inputPath)); + byteAryContent.Headers.TryAddWithoutValidation("Content-Type", "application/octet-stream"); + + var byteArrayOption = new ByteArrayContent(Encoding.UTF8.GetBytes("100")); + multipartContent.Add(byteArrayOption, "target_word_count"); + + request.Content = multipartContent; + var response = await httpClient.SendAsync(request); + var apiResult = await response.Content.ReadAsStringAsync(); + + Console.WriteLine("API response received."); + Console.WriteLine(apiResult); + } + } + } +} diff --git a/DotNET/Program.cs b/DotNET/Program.cs index ffd729a3..6e7e3c4b 100644 --- a/DotNET/Program.cs +++ b/DotNET/Program.cs @@ -21,6 +21,7 @@ static void PrintUsage() Console.Error.WriteLine(" Info / Extract:"); Console.Error.WriteLine(" pdf-info Document properties and stats"); Console.Error.WriteLine(" extracted-text Extract text to JSON"); + Console.Error.WriteLine(" summarized-pdf-text Summarize text"); Console.Error.WriteLine(" extracted-images Extract embedded images"); Console.Error.WriteLine(" exported-form-data Export form data (XML)"); Console.Error.WriteLine(" PDF Transforms:"); @@ -69,6 +70,7 @@ static void PrintUsage() Console.Error.WriteLine(" Info / Extract:"); Console.Error.WriteLine(" pdf-info-multipart Document properties and stats"); Console.Error.WriteLine(" extracted-text-multipart Extract text to JSON"); + Console.Error.WriteLine(" summarized-pdf-text-multipart Summarize text"); Console.Error.WriteLine(" extracted-images-multipart Extract images"); Console.Error.WriteLine(" PDF Transforms:"); Console.Error.WriteLine(" compressed-pdf-multipart Compress PDF"); @@ -133,6 +135,9 @@ static void PrintUsage() switch (cmd) { + case "summarized-pdf-text": + await Samples.EndpointExamples.JsonPayload.SummarizedPdfText.Execute(rest); + break; case "markdown-json": await Samples.EndpointExamples.JsonPayload.Markdown.Execute(rest); break; @@ -169,6 +174,9 @@ static void PrintUsage() case "word-multipart": await Samples.EndpointExamples.MultipartPayload.Word.Execute(rest); break; + case "summarized-pdf-text-multipart": + await Samples.EndpointExamples.MultipartPayload.SummarizedPdfText.Execute(rest); + break; case "merge-different-file-types": case "merge": await Samples.ComplexFlowExamples.MergeDifferentFileTypes.Execute(rest); diff --git a/Java/Endpoint Examples/JSON Payload/SummarizedPDFText.java b/Java/Endpoint Examples/JSON Payload/SummarizedPDFText.java new file mode 100644 index 00000000..427fa3b6 --- /dev/null +++ b/Java/Endpoint Examples/JSON Payload/SummarizedPDFText.java @@ -0,0 +1,99 @@ +import io.github.cdimascio.dotenv.Dotenv; +import java.io.File; +import java.io.IOException; +import java.util.concurrent.TimeUnit; +import okhttp3.*; +import org.json.JSONArray; +import org.json.JSONObject; + +public class SummarizedPDFText { + + // By default, we use the US-based API service. This is the primary endpoint for global use. + private static final String API_URL = "https://api.pdfrest.com"; + + // For GDPR compliance and enhanced performance for European users, you can switch to the EU-based + // service by commenting out the URL above and uncommenting the URL below. + // For more information visit https://pdfrest.com/pricing#how-do-eu-gdpr-api-calls-work + // private static final String API_URL = "https://eu-api.pdfrest.com"; + + // Specify the path to your file here, or as the first argument when running the program. + private static final String DEFAULT_FILE_PATH = "/path/to/file.pdf"; + + // Specify your API key here, or in the environment variable PDFREST_API_KEY. + // You can also put the environment variable in a .env file. + private static final String DEFAULT_API_KEY = "xxxxxxxx-xxxx-xxxx-xxxx-xxxxxxxxxxxx"; + + public static void main(String[] args) { + File inputFile; + if (args.length > 0) { + inputFile = new File(args[0]); + } else { + inputFile = new File(DEFAULT_FILE_PATH); + } + final Dotenv dotenv = Dotenv.configure().ignoreIfMalformed().ignoreIfMissing().load(); + + String uploadString = uploadFile(inputFile); + JSONObject uploadJSON = new JSONObject(uploadString); + if (uploadJSON.has("error")) { + System.out.println("Error during upload: " + uploadString); + return; + } + JSONArray fileArray = uploadJSON.getJSONArray("files"); + JSONObject fileObject = fileArray.getJSONObject(0); + String uploadedID = fileObject.get("id").toString(); + + String JSONString = + String.format("{\"id\":\"%s\", \"target_word_count\":100}", uploadedID); + final RequestBody requestBody = + RequestBody.create(JSONString, MediaType.parse("application/json")); + + Request request = + new Request.Builder() + .header("Api-Key", dotenv.get("PDFREST_API_KEY", DEFAULT_API_KEY)) + .url(API_URL + "/summarized-pdf-text") + .post(requestBody) + .build(); + try { + OkHttpClient client = + new OkHttpClient().newBuilder().readTimeout(60, TimeUnit.SECONDS).build(); + + Response response = client.newCall(request).execute(); + System.out.println("Summarize Result code " + response.code()); + if (response.body() != null) { + System.out.println(prettyJson(response.body().string())); + } + } catch (IOException e) { + throw new RuntimeException(e); + } + } + + private static String prettyJson(String json) { + return new JSONObject(json).toString(4); + } + + private static String uploadFile(File inputFile) { + final Dotenv dotenv = Dotenv.configure().ignoreIfMalformed().ignoreIfMissing().load(); + final RequestBody requestBody = + RequestBody.create(inputFile, MediaType.parse("application/pdf")); + + Request request = + new Request.Builder() + .header("Api-Key", dotenv.get("PDFREST_API_KEY", DEFAULT_API_KEY)) + .header("Content-Filename", "File.pdf") + .url(API_URL + "/upload") + .post(requestBody) + .build(); + try { + OkHttpClient client = new OkHttpClient().newBuilder().build(); + Response response = client.newCall(request).execute(); + System.out.println("Upload Result code " + response.code()); + if (response.body() != null) { + return response.body().string(); + } + } catch (IOException e) { + throw new RuntimeException(e); + } + return ""; + } +} + diff --git a/Java/Endpoint Examples/Multipart Payload/SummarizedPDFText.java b/Java/Endpoint Examples/Multipart Payload/SummarizedPDFText.java new file mode 100644 index 00000000..fc78b295 --- /dev/null +++ b/Java/Endpoint Examples/Multipart Payload/SummarizedPDFText.java @@ -0,0 +1,69 @@ +import io.github.cdimascio.dotenv.Dotenv; +import java.io.File; +import java.io.IOException; +import okhttp3.MediaType; +import okhttp3.MultipartBody; +import okhttp3.OkHttpClient; +import okhttp3.Request; +import okhttp3.RequestBody; +import okhttp3.Response; +import org.json.JSONObject; + +public class SummarizedPDFText { + + // By default, we use the US-based API service. This is the primary endpoint for global use. + private static final String API_URL = "https://api.pdfrest.com"; + + // For GDPR compliance and enhanced performance for European users, you can switch to the EU-based + // service by commenting out the URL above and uncommenting the URL below. + // For more information visit https://pdfrest.com/pricing#how-do-eu-gdpr-api-calls-work + // private static final String API_URL = "https://eu-api.pdfrest.com"; + + // Specify the path to your file here, or as the first argument when running the program. + private static final String DEFAULT_FILE_PATH = "/path/to/file.pdf"; + + // Specify your API key here, or in the environment variable PDFREST_API_KEY. + // You can also put the environment variable in a .env file. + private static final String DEFAULT_API_KEY = "xxxxxxxx-xxxx-xxxx-xxxx-xxxxxxxxxxxx"; + + public static void main(String[] args) { + File inputFile; + if (args.length > 0) { + inputFile = new File(args[0]); + } else { + inputFile = new File(DEFAULT_FILE_PATH); + } + + final Dotenv dotenv = Dotenv.configure().ignoreIfMalformed().ignoreIfMissing().load(); + + final RequestBody inputFileRequestBody = + RequestBody.create(inputFile, MediaType.parse("application/pdf")); + RequestBody requestBody = + new MultipartBody.Builder() + .setType(MultipartBody.FORM) + .addFormDataPart("file", inputFile.getName(), inputFileRequestBody) + .addFormDataPart("target_word_count", "100") + .build(); + Request request = + new Request.Builder() + .header("Api-Key", dotenv.get("PDFREST_API_KEY", DEFAULT_API_KEY)) + .url(API_URL + "/summarized-pdf-text") + .post(requestBody) + .build(); + try { + OkHttpClient client = new OkHttpClient().newBuilder().build(); + Response response = client.newCall(request).execute(); + System.out.println("Result code " + response.code()); + if (response.body() != null) { + System.out.println(prettyJson(response.body().string())); + } + } catch (IOException e) { + throw new RuntimeException(e); + } + } + + private static String prettyJson(String json) { + return new JSONObject(json).toString(4); + } +} + diff --git a/JavaScript/Endpoint Examples/JSON Payload/summarized-pdf-text.js b/JavaScript/Endpoint Examples/JSON Payload/summarized-pdf-text.js new file mode 100644 index 00000000..d27017e8 --- /dev/null +++ b/JavaScript/Endpoint Examples/JSON Payload/summarized-pdf-text.js @@ -0,0 +1,63 @@ +var axios = require("axios"); +var fs = require("fs"); + +// By default, we use the US-based API service. This is the primary endpoint for global use. +var apiUrl = "https://api.pdfrest.com"; + +/* For GDPR compliance and enhanced performance for European users, you can switch to the EU-based service by uncommenting the URL below. + * For more information visit https://pdfrest.com/pricing#how-do-eu-gdpr-api-calls-work + */ +//var apiUrl = "https://eu-api.pdfrest.com"; + +var upload_data = fs.createReadStream("/path/to/file.pdf"); + +var upload_config = { + method: "post", + maxBodyLength: Infinity, + url: apiUrl + "/upload", + headers: { + "Api-Key": "xxxxxxxx-xxxx-xxxx-xxxx-xxxxxxxxxxxx", // Replace with your API key + "Content-Filename": "filename.pdf", + "Content-Type": "application/octet-stream", + }, + data: upload_data, +}; + +// Send upload request +axios(upload_config) + .then(function (upload_response) { + console.log("Upload response:"); + console.log(JSON.stringify(upload_response.data, null, 2)); + + var uploaded_id = upload_response.data.files[0].id; + + var summarize_config = { + method: "post", + maxBodyLength: Infinity, + url: apiUrl + "/summarized-pdf-text", + headers: { + "Api-Key": "xxxxxxxx-xxxx-xxxx-xxxx-xxxxxxxxxxxx", // Replace with your API key + "Content-Type": "application/json", + }, + data: { + id: uploaded_id, + target_word_count: 100, + }, + }; + + // Send summarized-pdf-text request + axios(summarize_config) + .then(function (summary_response) { + console.log("Summarize response:"); + console.log(JSON.stringify(summary_response.data, null, 2)); + }) + .catch(function (error) { + console.error("Summarize request error:"); + console.error(error.response?.data || error.message); + }); + }) + .catch(function (error) { + console.error("Upload request error:"); + console.error(error.response?.data || error.message); + }); + diff --git a/JavaScript/Endpoint Examples/Multipart Payload/summarized-pdf-text.js b/JavaScript/Endpoint Examples/Multipart Payload/summarized-pdf-text.js new file mode 100644 index 00000000..74948edd --- /dev/null +++ b/JavaScript/Endpoint Examples/Multipart Payload/summarized-pdf-text.js @@ -0,0 +1,38 @@ +// This request demonstrates how to summarize text from a PDF document. +var axios = require("axios"); +var FormData = require("form-data"); +var fs = require("fs"); + +// By default, we use the US-based API service. This is the primary endpoint for global use. +var apiUrl = "https://api.pdfrest.com"; + +/* For GDPR compliance and enhanced performance for European users, you can switch to the EU-based service by uncommenting the URL below. + * For more information visit https://pdfrest.com/pricing#how-do-eu-gdpr-api-calls-work + */ +//var apiUrl = "https://eu-api.pdfrest.com"; + +// Create a new form data instance and append the PDF file and parameters to it +var data = new FormData(); +data.append("file", fs.createReadStream("/path/to/file")); +data.append("target_word_count", "100"); + +// define configuration options for axios request +var config = { + method: "post", + maxBodyLength: Infinity, + url: apiUrl + "/summarized-pdf-text", + headers: { + "Api-Key": "xxxxxxxx-xxxx-xxxx-xxxx-xxxxxxxxxxxx", // Replace with your API key + ...data.getHeaders(), + }, + data: data, +}; + +axios(config) + .then(function (response) { + console.log(JSON.stringify(response.data)); + }) + .catch(function (error) { + console.log(error.response?.data || error.message); + }); + diff --git a/PHP/Endpoint Examples/JSON Payload/summarized-pdf-text.php b/PHP/Endpoint Examples/JSON Payload/summarized-pdf-text.php new file mode 100644 index 00000000..b4ab9b30 --- /dev/null +++ b/PHP/Endpoint Examples/JSON Payload/summarized-pdf-text.php @@ -0,0 +1,40 @@ + false]); +$upload_headers = [ + 'api-key' => 'xxxxxxxx-xxxx-xxxx-xxxx-xxxxxxxxxxxx', + 'content-filename' => 'filename.pdf', + 'Content-Type' => 'application/octet-stream' +]; +$upload_body = file_get_contents('/path/to/file'); +$upload_request = new Request('POST', $apiUrl.'/upload', $upload_headers, $upload_body); +$upload_res = $upload_client->sendAsync($upload_request)->wait(); +echo $upload_res->getBody() . PHP_EOL; + +$upload_response_json = json_decode($upload_res->getBody()); +$uploaded_id = $upload_response_json->{'files'}[0]->{'id'}; + +echo "Successfully uploaded with an id of: " . $uploaded_id . PHP_EOL; + +$client = new Client(['http_errors' => false]); +$headers = [ + 'api-key' => 'xxxxxxxx-xxxx-xxxx-xxxx-xxxxxxxxxxxx', + 'Content-Type' => 'application/json' +]; +$body = '{"id":"'.$uploaded_id.'","target_word_count":100}'; +$request = new Request('POST', $apiUrl.'/summarized-pdf-text', $headers, $body); +$res = $client->sendAsync($request)->wait(); +echo $res->getBody() . PHP_EOL; + diff --git a/PHP/Endpoint Examples/Multipart Payload/summarized-pdf-text.php b/PHP/Endpoint Examples/Multipart Payload/summarized-pdf-text.php new file mode 100644 index 00000000..d39556ad --- /dev/null +++ b/PHP/Endpoint Examples/Multipart Payload/summarized-pdf-text.php @@ -0,0 +1,42 @@ + 'xxxxxxxx-xxxx-xxxx-xxxx-xxxxxxxxxxxx' +]; + +$options = [ + 'multipart' => [ + [ + 'name' => 'file', + 'contents' => Utils::tryFopen('/path/to/file', 'r'), + 'filename' => 'filename.pdf', + 'headers' => [ + 'Content-Type' => 'application/pdf' + ] + ], + [ + 'name' => 'target_word_count', + 'contents' => '100' + ] + ] +]; + +$request = new Request('POST', $apiUrl.'/summarized-pdf-text', $headers); +$res = $client->sendAsync($request, $options)->wait(); +echo $res->getBody(); + diff --git a/Perl/Endpoint Examples/JSON Payload/summarized-pdf-text.pl b/Perl/Endpoint Examples/JSON Payload/summarized-pdf-text.pl new file mode 100644 index 00000000..fd3ae96b --- /dev/null +++ b/Perl/Endpoint Examples/JSON Payload/summarized-pdf-text.pl @@ -0,0 +1,64 @@ +#!/usr/bin/env perl +use strict; +use warnings; +use utf8; +use FindBin qw($Bin); +use File::Basename qw(basename); +use JSON::PP qw(encode_json decode_json); +use LWP::UserAgent; +use HTTP::Request; +use Encode qw(encode); +use Dotenv; + +binmode STDOUT, ':raw'; +binmode STDERR, ':encoding(UTF-8)'; + +my $env_path = "$Bin/../../.env"; +-e $env_path and Dotenv->load($env_path); + +my $api_key = $ENV{PDFREST_API_KEY} // ''; +if (!$api_key || $api_key =~ /^\s*$/) { print STDERR "Missing PDFREST_API_KEY\n"; exit 1; } + +my $api_base = $ENV{PDFREST_URL} // $ENV{PDFREST_API} // 'https://api.pdfrest.com'; +$api_base =~ s{/+$}{}; + +my $pdf_path = shift @ARGV; +if (!$pdf_path || !-f $pdf_path) { print STDERR "Usage: perl summarized-pdf-text.pl /path/to/file.pdf\n"; exit 1; } + +my $filename = basename($pdf_path); +open my $fh, '<:raw', $pdf_path or do { print STDERR "Unable to read $pdf_path: $!\n"; exit 1; }; +my $file_bytes; { local $/; $file_bytes = <$fh>; } +close $fh; + +my $ua = LWP::UserAgent->new( timeout => 60 ); + +eval { + # Upload + my $upload_req = HTTP::Request->new('POST', "$api_base/upload"); + $upload_req->header('api-key' => $api_key); + $upload_req->header('content-filename' => $filename); + $upload_req->header('Content-Type' => 'application/octet-stream'); + $upload_req->content($file_bytes); + my $upload_resp = $ua->request($upload_req); + print STDERR $upload_resp->decoded_content // ''; + if (!$upload_resp->is_success) { print STDERR "\nUpload failed with status " . $upload_resp->code . "\n"; exit 1; } + + my $upload_json = decode_json($upload_resp->decoded_content // '{}'); + my $uploaded_id = $upload_json->{files} && ref $upload_json->{files} eq 'ARRAY' ? $upload_json->{files}[0]{id} : undef; + if (!$uploaded_id) { print STDERR "Unexpected response format: missing files[0].id\n"; exit 1; } + print STDERR "Successfully uploaded with an id of: $uploaded_id\n"; + + # Summarize + my $body = encode_json({ id => $uploaded_id, target_word_count => 100 }); + my $req = HTTP::Request->new('POST', "$api_base/summarized-pdf-text"); + $req->header('api-key' => $api_key); + $req->header('Content-Type' => 'application/json'); + $req->content($body); + my $resp = $ua->request($req); + print STDOUT $resp->decoded_content // ''; + if (!$resp->is_success) { print STDERR "\nSummarize failed with status " . $resp->code . "\n"; exit 1; } + 1; +} or do { my $err = $@ || 'Unknown error'; $err =~ s/\s+$//; print STDERR "Error: $err\n"; exit 1; }; + +__END__ + diff --git a/Perl/Endpoint Examples/Multipart Payload/summarized-pdf-text.pl b/Perl/Endpoint Examples/Multipart Payload/summarized-pdf-text.pl new file mode 100644 index 00000000..f4aa14df --- /dev/null +++ b/Perl/Endpoint Examples/Multipart Payload/summarized-pdf-text.pl @@ -0,0 +1,45 @@ +#!/usr/bin/env perl +use strict; +use warnings; +use utf8; +use FindBin qw($Bin); +use File::Basename qw(basename); +use LWP::UserAgent; +use HTTP::Request::Common qw(POST); +use Dotenv; + +binmode STDOUT, ':raw'; +binmode STDERR, ':encoding(UTF-8)'; + +my $env_path = "$Bin/../../.env"; +-e $env_path and Dotenv->load($env_path); + +my $api_key = $ENV{PDFREST_API_KEY} // ''; +if (!$api_key || $api_key =~ /^\s*$/) { print STDERR "Missing PDFREST_API_KEY\n"; exit 1; } + +my $api_base = $ENV{PDFREST_URL} // $ENV{PDFREST_API} // 'https://api.pdfrest.com'; +$api_base =~ s{/+$}{}; + +my $pdf_path = shift @ARGV; +if (!$pdf_path || !-f $pdf_path) { print STDERR "Usage: perl summarized-pdf-text.pl /path/to/file.pdf\n"; exit 1; } + +my $filename = basename($pdf_path); +my $ua = LWP::UserAgent->new( timeout => 60 ); + +eval { + my $req = POST("$api_base/summarized-pdf-text", + 'Content_Type' => 'form-data', + 'Content' => [ + file => [$pdf_path, $filename, 'Content-Type' => 'application/pdf'], + target_word_count => '100', + ] + ); + $req->header('api-key' => $api_key); + my $resp = $ua->request($req); + print STDOUT $resp->decoded_content // ''; + if (!$resp->is_success) { print STDERR "\nSummarize failed with status " . $resp->code . "\n"; exit 1; } + 1; +} or do { my $err = $@ || 'Unknown error'; $err =~ s/\s+$//; print STDERR "Error: $err\n"; exit 1; }; + +__END__ + diff --git a/Python/Endpoint Examples/Multipart Payload/summarized-pdf-text.py b/Python/Endpoint Examples/Multipart Payload/summarized-pdf-text.py new file mode 100644 index 00000000..a44bb171 --- /dev/null +++ b/Python/Endpoint Examples/Multipart Payload/summarized-pdf-text.py @@ -0,0 +1,38 @@ +from requests_toolbelt import MultipartEncoder +import requests +import json + +# By default, we use the US-based API service. This is the primary endpoint for global use. +api_url = "https://api.pdfrest.com" + +# For GDPR compliance and enhanced performance for European users, you can switch to the EU-based service by uncommenting the URL below. +# For more information visit https://pdfrest.com/pricing#how-do-eu-gdpr-api-calls-work +#api_url = "https://eu-api.pdfrest.com" + +endpoint_url = api_url+'/summarized-pdf-text' + +# The endpoint can take a single PDF file or id as input. +mp_encoder = MultipartEncoder( + fields={ + 'file': ('file_name.pdf', open('/path/to/file', 'rb'), 'application/pdf'), + 'target_word_count': '100', + } +) + +headers = { + 'Accept': 'application/json', + 'Content-Type': mp_encoder.content_type, + 'Api-Key': 'xxxxxxxx-xxxx-xxxx-xxxx-xxxxxxxxxxxx' +} + +print("Sending POST request to summarized-pdf-text endpoint...") +response = requests.post(endpoint_url, data=mp_encoder, headers=headers) + +print("Response status code: " + str(response.status_code)) + +if response.ok: + response_json = response.json() + print(json.dumps(response_json, indent=2)) +else: + print(response.text) + diff --git a/R/Endpoint Examples/JSON Payload/summarized-pdf-text.R b/R/Endpoint Examples/JSON Payload/summarized-pdf-text.R new file mode 100644 index 00000000..734c5c3b --- /dev/null +++ b/R/Endpoint Examples/JSON Payload/summarized-pdf-text.R @@ -0,0 +1,62 @@ +#! +# Summarize a PDF using two-step JSON flow: upload then /summarized-pdf-text. + +suppressWarnings(suppressMessages({ + if (!requireNamespace("httr", quietly = TRUE)) stop("Please install 'httr' package") + if (!requireNamespace("jsonlite", quietly = TRUE)) stop("Please install 'jsonlite' package") +})) + +stderrf <- function(...) cat(sprintf(...), file = stderr()) + +api_key <- Sys.getenv("PDFREST_API_KEY", unset = "") +if (identical(api_key, "")) { + stderrf("Missing PDFREST_API_KEY in environment (.Renviron or shell)\n") + quit(status = 1) +} + +api_base <- sub("/+$$", "", Sys.getenv("PDFREST_URL", unset = "https://api.pdfrest.com")) + +args <- commandArgs(trailingOnly = TRUE) +pdf_path <- args[1] +if (is.null(pdf_path) || !file.exists(pdf_path)) { + stderrf("Usage: Rscript summarized-pdf-text.R /path/to/file.pdf\n") + quit(status = 1) +} + +filename <- basename(pdf_path) +file_bytes <- readBin(pdf_path, what = "raw", n = file.info(pdf_path)$size) + +tryCatch({ + upload_url <- paste0(api_base, "/upload") + upload_resp <- httr::POST( + upload_url, + httr::add_headers( + "api-key" = api_key, + "content-filename" = filename, + "Content-Type" = "application/octet-stream" + ), + body = file_bytes + ) + upload_text <- httr::content(upload_resp, as = "text", encoding = "UTF-8") + message(upload_text) + if (httr::http_error(upload_resp)) stop(sprintf("Upload failed with status %s", httr::status_code(upload_resp))) + + upload_json <- jsonlite::fromJSON(upload_text) + uploaded_id <- if (is.data.frame(upload_json$files)) upload_json$files$id[[1]] else upload_json$files[[1]]$id + message(sprintf("Successfully uploaded with an id of: %s", uploaded_id)) + + url <- paste0(api_base, "/summarized-pdf-text") + body <- jsonlite::toJSON(list(id = uploaded_id, target_word_count = 100), auto_unbox = TRUE) + resp <- httr::POST( + url, + httr::add_headers("api-key" = api_key, "Content-Type" = "application/json"), + body = body + ) + txt <- httr::content(resp, as = "text", encoding = "UTF-8") + cat(txt) + if (httr::http_error(resp)) stop(sprintf("Summarize failed with status %s", httr::status_code(resp))) +}, error = function(e) { + stderrf("Error: %s: %s\n", class(e)[1], conditionMessage(e)) + quit(status = 1) +}) + diff --git a/R/Endpoint Examples/Multipart Payload/summarized-pdf-text.R b/R/Endpoint Examples/Multipart Payload/summarized-pdf-text.R new file mode 100644 index 00000000..3be5d656 --- /dev/null +++ b/R/Endpoint Examples/Multipart Payload/summarized-pdf-text.R @@ -0,0 +1,46 @@ +#! +# Summarize a PDF using a single multipart/form-data request to /summarized-pdf-text. + +suppressWarnings(suppressMessages({ + if (!requireNamespace("httr", quietly = TRUE)) stop("Please install 'httr' package") +})) + +stderrf <- function(...) cat(sprintf(...), file = stderr()) + +api_key <- Sys.getenv("PDFREST_API_KEY", unset = "") +if (identical(api_key, "")) { + stderrf("Missing PDFREST_API_KEY in environment (.Renviron or shell)\n") + quit(status = 1) +} + +api_base <- sub("/+$$", "", Sys.getenv("PDFREST_URL", unset = "https://api.pdfrest.com")) + +args <- commandArgs(trailingOnly = TRUE) +pdf_path <- args[1] +if (is.null(pdf_path) || !file.exists(pdf_path)) { + stderrf("Usage: Rscript summarized-pdf-text.R /path/to/file.pdf\n") + quit(status = 1) +} + +filename <- basename(pdf_path) + +tryCatch({ + conn_url <- paste0(api_base, "/summarized-pdf-text") + body <- list( + file = httr::upload_file(pdf_path, type = "application/pdf"), + target_word_count = "100" + ) + resp <- httr::POST( + conn_url, + httr::add_headers("api-key" = api_key), + body = body, + encode = "multipart" + ) + txt <- httr::content(resp, as = "text", encoding = "UTF-8") + cat(txt) + if (httr::http_error(resp)) stop(sprintf("Summarize failed with status %s", httr::status_code(resp))) +}, error = function(e) { + stderrf("Error: %s: %s\n", class(e)[1], conditionMessage(e)) + quit(status = 1) +}) + diff --git a/Ruby/Endpoint Examples/JSON Payload/summarized-pdf-text.rb b/Ruby/Endpoint Examples/JSON Payload/summarized-pdf-text.rb new file mode 100644 index 00000000..c0b4df6c --- /dev/null +++ b/Ruby/Endpoint Examples/JSON Payload/summarized-pdf-text.rb @@ -0,0 +1,55 @@ +#! +# Summarize PDF text via two-step JSON flow. + +require "json" +require "faraday" +require "faraday/retry" +require "dotenv" + +Dotenv.load + +API_KEY = ENV["PDFREST_API_KEY"] +abort("Missing PDFREST_API_KEY in .env") if API_KEY.nil? || API_KEY.strip.empty? +API_BASE = (ENV["PDFREST_URL"] || ENV["PDFREST_API"] || "https://api.pdfrest.com").sub(%r{/+$}, "") + +pdf_path = ARGV[0] +abort("Usage: ruby summarized-pdf-text.rb /path/to/file.pdf") unless pdf_path && File.file?(pdf_path) + +filename = File.basename(pdf_path) +file_bytes = File.binread(pdf_path) + +begin + upload_conn = Faraday.new(url: API_BASE) do |f| + f.request :retry, max: 2, interval: 0.2 + f.adapter Faraday.default_adapter + end + upload_resp = upload_conn.post("/upload") do |req| + req.headers["api-key"] = API_KEY + req.headers["content-filename"] = filename + req.headers["Content-Type"] = "application/octet-stream" + req.body = file_bytes + end + STDERR.puts upload_resp.body + abort("Upload failed with status #{upload_resp.status}") unless upload_resp.success? + upload_json = JSON.parse(upload_resp.body) + uploaded_id = upload_json.fetch("files").fetch(0).fetch("id") + STDERR.puts "Successfully uploaded with an id of: #{uploaded_id}" + + conn = Faraday.new(url: API_BASE) do |f| + f.request :retry, max: 2, interval: 0.2 + f.adapter Faraday.default_adapter + end + body = { id: uploaded_id, target_word_count: 100 }.to_json + resp = conn.post("/summarized-pdf-text") do |req| + req.headers["api-key"] = API_KEY + req.headers["Content-Type"] = "application/json" + req.body = body + end + puts resp.body + abort("Summarize failed with status #{resp.status}") unless resp.success? +rescue KeyError => e + abort("Unexpected response format: #{e.message}") +rescue => e + abort("Error: #{e.class}: #{e.message}") +end + diff --git a/Ruby/Endpoint Examples/Multipart Payload/summarized-pdf-text.rb b/Ruby/Endpoint Examples/Multipart Payload/summarized-pdf-text.rb new file mode 100644 index 00000000..a5880ed6 --- /dev/null +++ b/Ruby/Endpoint Examples/Multipart Payload/summarized-pdf-text.rb @@ -0,0 +1,39 @@ +#! +# Summarize PDF text via single multipart/form-data request. + +require "faraday" +require "faraday/retry" +require "faraday/multipart" +require "dotenv" + +Dotenv.load + +API_KEY = ENV["PDFREST_API_KEY"] +abort("Missing PDFREST_API_KEY in .env") if API_KEY.nil? || API_KEY.strip.empty? +API_BASE = (ENV["PDFREST_URL"] || ENV["PDFREST_API"] || "https://api.pdfrest.com").sub(%r{/+$}, "") + +pdf_path = ARGV[0] +abort("Usage: ruby summarized-pdf-text.rb /path/to/file.pdf") unless pdf_path && File.file?(pdf_path) + +filename = File.basename(pdf_path) + +begin + conn = Faraday.new(url: API_BASE) do |f| + f.request :retry, max: 2, interval: 0.2 + f.request :multipart + f.adapter Faraday.default_adapter + end + body = { + file: Faraday::Multipart::FilePart.new(pdf_path, "application/pdf", filename), + target_word_count: "100", + } + resp = conn.post("/summarized-pdf-text") do |req| + req.headers["api-key"] = API_KEY + req.body = body + end + puts resp.body + abort("Summarize failed with status #{resp.status}") unless resp.success? +rescue => e + abort("Error: #{e.class}: #{e.message}") +end + diff --git a/VB.NET/Endpoint Examples/JSON Payload/summarized-pdf-text.vb b/VB.NET/Endpoint Examples/JSON Payload/summarized-pdf-text.vb new file mode 100644 index 00000000..261cc400 --- /dev/null +++ b/VB.NET/Endpoint Examples/JSON Payload/summarized-pdf-text.vb @@ -0,0 +1,108 @@ +''' +' Summarize PDF text using pdfRest. +' Two-step JSON flow: upload then call /summarized-pdf-text with the uploaded id. +''' + +Option Strict On +Option Explicit On + +Imports System +Imports System.Collections.Generic +Imports System.IO +Imports System.Net.Http +Imports System.Net.Http.Headers +Imports System.Text +Imports System.Text.Json +Imports System.Threading.Tasks + +Namespace VBNetSamples.Endpoint_Examples.JSON_Payload + Module SummarizedPdfText + Public Async Function Execute(args As String()) As Task + If args Is Nothing OrElse args.Length < 1 Then + Console.Error.WriteLine("Usage: dotnet run -- summarized-pdf-text /path/to/input.pdf") + Environment.Exit(1) + End If + + Dim inputPath As String = args(0) + If Not File.Exists(inputPath) Then + Console.Error.WriteLine($"Input file not found: {inputPath}") + Environment.Exit(1) + End If + + Dim apiKey As String = Environment.GetEnvironmentVariable("PDFREST_API_KEY") + If String.IsNullOrWhiteSpace(apiKey) Then + Console.Error.WriteLine("Missing environment variable PDFREST_API_KEY.") + Environment.Exit(1) + End If + + Dim baseUrl As String = Environment.GetEnvironmentVariable("PDFREST_URL") + If String.IsNullOrWhiteSpace(baseUrl) Then baseUrl = "https://api.pdfrest.com" + + Dim baseUri As Uri + Try + baseUri = New Uri(baseUrl) + Catch ex As Exception + Console.Error.WriteLine($"Invalid PDFREST_URL: {baseUrl}") + Environment.Exit(1) + Return + End Try + + Using httpClient As New HttpClient() + httpClient.BaseAddress = baseUri + + ' 1) Upload + Dim uploadRequest As New HttpRequestMessage(HttpMethod.Post, "upload") + uploadRequest.Headers.TryAddWithoutValidation("Api-Key", apiKey) + uploadRequest.Headers.Accept.Add(New MediaTypeWithQualityHeaderValue("application/json")) + + Dim fileBytes As Byte() = File.ReadAllBytes(inputPath) + Dim uploadContent As New ByteArrayContent(fileBytes) + uploadContent.Headers.TryAddWithoutValidation("Content-Type", "application/octet-stream") + uploadContent.Headers.TryAddWithoutValidation("Content-Filename", Path.GetFileName(inputPath)) + uploadRequest.Content = uploadContent + + Dim uploadResponse As HttpResponseMessage = Await httpClient.SendAsync(uploadRequest) + Dim uploadBody As String = Await uploadResponse.Content.ReadAsStringAsync() + If Not uploadResponse.IsSuccessStatusCode Then + Console.Error.WriteLine($"Upload failed: {CInt(uploadResponse.StatusCode)} {uploadResponse.ReasonPhrase}") + Console.Error.WriteLine(uploadBody) + Environment.Exit(1) + End If + Console.WriteLine(uploadBody) + + Dim uploadedId As String = Nothing + Try + Using doc As JsonDocument = JsonDocument.Parse(uploadBody) + uploadedId = doc.RootElement.GetProperty("files")(0).GetProperty("id").GetString() + End Using + Catch ex As Exception + Console.Error.WriteLine("Failed to parse upload response JSON for file id.") + Console.Error.WriteLine(ex.Message) + Environment.Exit(1) + End Try + + ' 2) Summarize via JSON payload + Dim req As New HttpRequestMessage(HttpMethod.Post, "summarized-pdf-text") + req.Headers.TryAddWithoutValidation("Api-Key", apiKey) + req.Headers.Accept.Add(New MediaTypeWithQualityHeaderValue("application/json")) + + Dim payload As New Dictionary(Of String, Object) From { + {"id", uploadedId}, + {"target_word_count", 100} + } + Dim payloadJson As String = JsonSerializer.Serialize(payload) + req.Content = New StringContent(payloadJson, Encoding.UTF8, "application/json") + + Dim resp As HttpResponseMessage = Await httpClient.SendAsync(req) + Dim body As String = Await resp.Content.ReadAsStringAsync() + If Not resp.IsSuccessStatusCode Then + Console.Error.WriteLine($"Summarize request failed: {CInt(resp.StatusCode)} {resp.ReasonPhrase}") + Console.Error.WriteLine(body) + Environment.Exit(1) + End If + Console.WriteLine(body) + End Using + End Function + End Module +End Namespace + diff --git a/VB.NET/Endpoint Examples/Multipart Payload/summarized-pdf-text.vb b/VB.NET/Endpoint Examples/Multipart Payload/summarized-pdf-text.vb new file mode 100644 index 00000000..569e0d3b --- /dev/null +++ b/VB.NET/Endpoint Examples/Multipart Payload/summarized-pdf-text.vb @@ -0,0 +1,76 @@ +''' +' Summarize PDF text using pdfRest. +' Single multipart/form-data request to /summarized-pdf-text with the file. +''' + +Option Strict On +Option Explicit On + +Imports System +Imports System.IO +Imports System.Net.Http +Imports System.Net.Http.Headers +Imports System.Text +Imports System.Threading.Tasks + +Namespace VBNetSamples.Endpoint_Examples.Multipart_Payload + Module SummarizedPdfText + Public Async Function Execute(args As String()) As Task + If args Is Nothing OrElse args.Length < 1 Then + Console.Error.WriteLine("Usage: dotnet run -- summarized-pdf-text-multipart /path/to/input.pdf") + Environment.Exit(1) + End If + + Dim inputPath As String = args(0) + If Not File.Exists(inputPath) Then + Console.Error.WriteLine($"Input file not found: {inputPath}") + Environment.Exit(1) + End If + + Dim apiKey As String = Environment.GetEnvironmentVariable("PDFREST_API_KEY") + If String.IsNullOrWhiteSpace(apiKey) Then + Console.Error.WriteLine("Missing environment variable PDFREST_API_KEY.") + Environment.Exit(1) + End If + + Dim baseUrl As String = Environment.GetEnvironmentVariable("PDFREST_URL") + If String.IsNullOrWhiteSpace(baseUrl) Then baseUrl = "https://api.pdfrest.com" + + Dim baseUri As Uri + Try + baseUri = New Uri(baseUrl) + Catch ex As Exception + Console.Error.WriteLine($"Invalid PDFREST_URL: {baseUrl}") + Environment.Exit(1) + Return + End Try + + Using httpClient As New HttpClient() + httpClient.BaseAddress = baseUri + + Dim multipart As New MultipartFormDataContent() + Dim fileBytes As Byte() = File.ReadAllBytes(inputPath) + Dim fileContent As New ByteArrayContent(fileBytes) + fileContent.Headers.ContentType = New MediaTypeHeaderValue("application/pdf") + multipart.Add(fileContent, "file", Path.GetFileName(inputPath)) + + multipart.Add(New StringContent("100", Encoding.UTF8), "target_word_count") + + Dim req As New HttpRequestMessage(HttpMethod.Post, "summarized-pdf-text") + req.Headers.TryAddWithoutValidation("Api-Key", apiKey) + req.Headers.Accept.Add(New MediaTypeWithQualityHeaderValue("application/json")) + req.Content = multipart + + Dim resp As HttpResponseMessage = Await httpClient.SendAsync(req) + Dim body As String = Await resp.Content.ReadAsStringAsync() + If Not resp.IsSuccessStatusCode Then + Console.Error.WriteLine($"Summarize (multipart) failed: {CInt(resp.StatusCode)} {resp.ReasonPhrase}") + Console.Error.WriteLine(body) + Environment.Exit(1) + End If + Console.WriteLine(body) + End Using + End Function + End Module +End Namespace + diff --git a/VB.NET/Program.vb b/VB.NET/Program.vb index 99734dbf..3253cba3 100644 --- a/VB.NET/Program.vb +++ b/VB.NET/Program.vb @@ -52,10 +52,14 @@ Module Program Await VBNetSamples.Endpoint_Examples.JSON_Payload.Markdown.Execute(rest) Case "rasterized-pdf", "rasterize-json" Await VBNetSamples.Endpoint_Examples.JSON_Payload.RasterizedPdf.Execute(rest) + Case "summarized-pdf-text" + Await VBNetSamples.Endpoint_Examples.JSON_Payload.SummarizedPdfText.Execute(rest) Case "markdown-multipart" Await VBNetSamples.Endpoint_Examples.Multipart_Payload.Markdown.Execute(rest) Case "rasterized-pdf-multipart", "rasterize-multipart" Await VBNetSamples.Endpoint_Examples.Multipart_Payload.RasterizedPdf.Execute(rest) + Case "summarized-pdf-text-multipart" + Await VBNetSamples.Endpoint_Examples.Multipart_Payload.SummarizedPdfText.Execute(rest) Case "merge-different-file-types", "merge" Await VBNetSamples.Complex_Flow_Examples.MergeDifferentFileTypes.Execute(rest) Case Else @@ -73,8 +77,10 @@ Module Program Console.Error.WriteLine("Commands:") Console.Error.WriteLine(" markdown | markdown-json Upload then convert to Markdown (JSON two-step)") Console.Error.WriteLine(" rasterized-pdf | rasterize-json Upload then rasterize PDF (JSON two-step)") + Console.Error.WriteLine(" summarized-pdf-text Upload then summarize text (JSON two-step)") Console.Error.WriteLine(" markdown-multipart Convert to Markdown (single multipart request)") Console.Error.WriteLine(" rasterized-pdf-multipart Rasterize PDF (single multipart request)") + Console.Error.WriteLine(" summarized-pdf-text-multipart Summarize text (single multipart request)") Console.Error.WriteLine(" merge-different-file-types|merge Merge PDFs and non-PDFs into one PDF") Console.Error.WriteLine("") Console.Error.WriteLine("Examples:") diff --git a/cURL/Endpoint Examples/JSON Payload/summarized-pdf-text.sh b/cURL/Endpoint Examples/JSON Payload/summarized-pdf-text.sh new file mode 100644 index 00000000..1dad0e1b --- /dev/null +++ b/cURL/Endpoint Examples/JSON Payload/summarized-pdf-text.sh @@ -0,0 +1,21 @@ +#!/bin/sh + +# By default, we use the US-based API service. This is the primary endpoint for global use. +API_URL="https://api.pdfrest.com" + +# For GDPR compliance and enhanced performance for European users, you can switch to the EU-based service by uncommenting the URL below. +# For more information visit https://pdfrest.com/pricing#how-do-eu-gdpr-api-calls-work +# API_URL="https://eu-api.pdfrest.com" + +UPLOAD_ID=$(curl --location "$API_URL/upload" \ +--header 'Api-Key: xxxxxxxxx-xxxx-xxxx-xxxx-xxxxxxxxxxxx' \ +--header 'content-filename: filename.pdf' \ +--data-binary '@/path/to/file' \ + | jq -r '.files.[0].id') + +echo "File successfully uploaded with an ID of: $UPLOAD_ID" + +curl "$API_URL/summarized-pdf-text" \ +--header 'Api-Key: xxxxxxxxx-xxxx-xxxx-xxxx-xxxxxxxxxxxx' \ +--header 'Content-Type: application/json' \ +--data-raw "{ \"id\": \"$UPLOAD_ID\", \"target_word_count\": 100 }" | jq -r '.' diff --git a/cURL/Endpoint Examples/Multipart Payload/summarized-pdf-text.sh b/cURL/Endpoint Examples/Multipart Payload/summarized-pdf-text.sh new file mode 100644 index 00000000..b562b0e0 --- /dev/null +++ b/cURL/Endpoint Examples/Multipart Payload/summarized-pdf-text.sh @@ -0,0 +1,15 @@ +#!/bin/sh + +# By default, we use the US-based API service. This is the primary endpoint for global use. +API_URL="https://api.pdfrest.com" + +# For GDPR compliance and enhanced performance for European users, you can switch to the EU-based service by uncommenting the URL below. +# For more information visit https://pdfrest.com/pricing#how-do-eu-gdpr-api-calls-work +# API_URL="https://eu-api.pdfrest.com" + +curl -X POST "$API_URL/summarized-pdf-text" \ + -H "Accept: application/json" \ + -H "Content-Type: multipart/form-data" \ + -H "Api-Key: xxxxxxxxx-xxxx-xxxx-xxxx-xxxxxxxxxxxx" \ + -F "file=@/path/to/file" \ + -F "target_word_count=100" From 1dc020dcd6eebff75638e7d2461fa3f46b515fd8 Mon Sep 17 00:00:00 2001 From: datalogics-dliang Date: Mon, 22 Sep 2025 12:25:19 -0500 Subject: [PATCH 2/2] Fix Java samples --- Java/Endpoint Examples/JSON Payload/SummarizedPDFText.java | 4 +--- .../Multipart Payload/SummarizedPDFText.java | 1 - 2 files changed, 1 insertion(+), 4 deletions(-) diff --git a/Java/Endpoint Examples/JSON Payload/SummarizedPDFText.java b/Java/Endpoint Examples/JSON Payload/SummarizedPDFText.java index 427fa3b6..40b3342b 100644 --- a/Java/Endpoint Examples/JSON Payload/SummarizedPDFText.java +++ b/Java/Endpoint Examples/JSON Payload/SummarizedPDFText.java @@ -42,8 +42,7 @@ public static void main(String[] args) { JSONObject fileObject = fileArray.getJSONObject(0); String uploadedID = fileObject.get("id").toString(); - String JSONString = - String.format("{\"id\":\"%s\", \"target_word_count\":100}", uploadedID); + String JSONString = String.format("{\"id\":\"%s\", \"target_word_count\":100}", uploadedID); final RequestBody requestBody = RequestBody.create(JSONString, MediaType.parse("application/json")); @@ -96,4 +95,3 @@ private static String uploadFile(File inputFile) { return ""; } } - diff --git a/Java/Endpoint Examples/Multipart Payload/SummarizedPDFText.java b/Java/Endpoint Examples/Multipart Payload/SummarizedPDFText.java index fc78b295..416946a8 100644 --- a/Java/Endpoint Examples/Multipart Payload/SummarizedPDFText.java +++ b/Java/Endpoint Examples/Multipart Payload/SummarizedPDFText.java @@ -66,4 +66,3 @@ private static String prettyJson(String json) { return new JSONObject(json).toString(4); } } -