pdfrender/main.cpp

148 lines
3.9 KiB
C++

#include <cstdlib>
#include <cstdio>
#include <iostream>
#include <string>
#include <future>
#include <boost/regex.hpp>
#include <boost/program_options.hpp>
#include <poppler/cpp/poppler-page.h>
#include <poppler/cpp/poppler-document.h>
#include <poppler/cpp/poppler-page-renderer.h>
using namespace std;
using namespace poppler;
namespace po = boost::program_options;
int dpi = 75;
string inputpdf;
string output;
int threads = thread::hardware_concurrency();
void render_page(document *doc, int pageno, const string& fmt) {
page *p = doc->create_page(pageno);
page_renderer r;
image img = r.render_page(p);
vector<char> buf(1024);
int size = snprintf(&buf[0], buf.size(), fmt.c_str(), pageno);
if (size >= buf.size()) {
buf.reserve(size + 1);
snprintf(&buf[0], buf.size(), fmt.c_str(), pageno);
}
img.save(&buf[0], "JPEG", dpi);
}
void render_pages(document *doc, int first, int last, int step, const string& fmt) {
for (int p=first; p<=last; p+=step) {
render_page(doc, p, fmt);
}
}
bool validate_output(const string& output)
{
using boost::regex;
using boost::sregex_iterator;
if (output.empty()) {
cout << "Output format must be specified" << endl;
return false;
}
regex fmt_regex(R"(%(?:0(\d+))?d)");
auto beg = sregex_iterator(output.begin(), output.end(), fmt_regex);
auto end = sregex_iterator();
int matches = distance(beg, end);
if (matches < 1) {
cout << "Output format must contain %d or %0<padding>d" << endl;
return false;
}
if (matches > 1) {
cout << "Output format must contain only one %d or %0<padding>d" << endl;
return false;
}
auto m = *beg;
if (!(m[1].str().empty())) {
int padding = 0;
try {
padding = stoi(m[1].str());
} catch (...) {
cout << "error: invalid interger value for padding: " << m[1].str() << endl;
return false;
}
if (padding > 10) {
cout << "error: padding is too large: " << m[1].str() << endl;
return false;
}
}
return true;
}
void parse_args(int argc, char *argv[])
{
po::options_description opts(string("Usage: ") + argv[0] + " [options] input.pdf\nOptions:");
opts.add_options()
("help,h", "prints this message")
("output,o", po::value<string>(&output),
"output format; must contain exactly one instance of %d or "
"%0<padding>d where <padding> is an integer for the page number; "
"padding, if used, will always be with zeros")
("dpi", po::value<int>(&dpi)->default_value(75),
"render at the specified dpi")
("threads,t", po::value<int>(&threads),
"use this many threads to render; by default, uses "
"the result of std::thread::hardware_concurrency()")
;
po::options_description hidden("Hidden");
hidden.add_options()
("input", po::value<string>(&inputpdf))
;
po::options_description allopts("");
allopts.add(opts).add(hidden);
po::positional_options_description pos;
pos.add("input", 1);
po::variables_map vm;
po::store(
po::command_line_parser(argc, argv)
.options(allopts)
.positional(pos)
.run(),
vm
);
po::notify(vm);
if (inputpdf.empty() || vm.count("help")) {
cout << opts;
exit(1);
}
if (!validate_output(output)) {
cout << "See " << argv[0] << " -h for more info" << endl;
exit(1);
}
}
int main(int argc, char *argv[])
{
parse_args(argc, argv);
document *doc = document::load_from_file(inputpdf);
vector<future<void>> futures;
int pagecount = doc->pages();
for (int i = 1; i <= threads; i++) {
futures.push_back(
async(launch::async,
render_pages, doc, i, pagecount, threads, output
)
);
}
}