Skip to content
Snippets Groups Projects
Commit 509d0467 authored by Carl Philipp Klemm's avatar Carl Philipp Klemm
Browse files

Add better cli utility

parent 5f078e0d
No related branches found
No related tags found
No related merge requests found
cmake_minimum_required(VERSION 3.20)
project(scipaper LANGUAGES C)
project(scipaper LANGUAGES C CXX)
set(SCI_SYSCONF_DIR /usr/share/scipaper)
set(SCI_MODULE_DIR /usr/lib/scipaper/modules)
set(SCI_USERCONF_DIR .config/scipaper)
add_definitions(-D_GNU_SOURCE)
add_definitions(-DSCI_MODULE_DIR=${SCI_MODULE_DIR})
add_definitions(-DSCI_SYSCONF_DIR=${SCI_SYSCONF_DIR})
add_definitions(-DSCI_USERCONF_DIR=${SCI_USERCONF_DIR})
add_definitions(-DSCI_SYSCONF_INI=scipaper.ini)
find_package(Doxygen)
......@@ -17,6 +19,9 @@ pkg_search_module(GLIB REQUIRED glib-2.0)
pkg_search_module(GMODULE REQUIRED gmodule-2.0)
pkg_check_modules(CURL REQUIRED libcurl)
set(CXX_STANDARD 11)
set(C_STANDARD 99)
set(COMMON_INCLUDE_DIRS
${GLIB_INCLUDE_DIRS}
${GMODULE_INCLUDE_DIRS}
......@@ -26,7 +31,7 @@ set(COMMON_INCLUDE_DIRS
${CMAKE_CURRENT_LIST_DIR}/src/modapi)
message(${COMMON_INCLUDE_DIRS})
set(COMMON_LIBRARIES ${GLIB_LIBRARIES} ${GMODULE_LIBRARIES} ${CURL_LIBRARIES})
set(COMMON_FLAGS "-std=c99 -Wall -O2 -march=native -g -fno-strict-aliasing")
set(COMMON_FLAGS "-Wall -O2 -march=native -g -fno-strict-aliasing")
set(CMAKE_INSTALL_PREFIX "/usr")
......
......@@ -16,7 +16,7 @@ set_target_properties(${PROJECT_NAME} PROPERTIES COMPILE_FLAGS ${COMMON_FLAGS})
install(TARGETS ${PROJECT_NAME} DESTINATION lib)
link_directories(${CMAKE_CURRENT_BINARY_DIR})
set(SRC_FILES_TEST_APP main.c)
set(SRC_FILES_TEST_APP main.cpp log.cpp)
set(LIBS_CLI -L. -l${PROJECT_NAME})
add_executable(${PROJECT_NAME}_cli ${SRC_FILES_TEST_APP})
add_dependencies(${PROJECT_NAME}_cli ${PROJECT_NAME})
......
/**
* libscipaper
* Copyright (C) 2022-2023 Carl Klemm
*
* This program is free software; you can redistribute it and/or
* modify it under the terms of the GNU General Public License
* version 3 as published by the Free Software Foundation.
*
* This program is distributed in the hope that it will be useful,
* but WITHOUT ANY WARRANTY; without even the implied warranty of
* MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
* GNU General Public License for more details.
*
* You should have received a copy of the GNU General Public License
* along with this program; if not, write to the
* Free Software Foundation, Inc., 51 Franklin Street, Fifth Floor,
* Boston, MA 02110-1301, USA.
*/
#include "log.h"
Log::Log(Level type, bool endlineI): endline(endlineI)
{
msglevel = type;
if(headers)
{
operator << ("["+getLabel(type)+"] ");
}
}
Log::~Log()
{
if(opened && endline)
{
std::cout<<'\n';
}
opened = false;
}
std::string Log::getLabel(Level level)
{
std::string label;
switch(level)
{
case SUPERDEBUG:
case DEBUG:
label = "DEBUG";
break;
case EXTRA:
label = "EXTRA";
case INFO:
label = "INFO ";
break;
case WARN:
label = "WARN ";
break;
case ERROR:
label = "ERROR";
break;
}
return label;
}
bool Log::headers = false;
Log::Level Log::level = WARN;
/**
* autobiblatex
* Copyright (C) 2022 Carl Klemm
*
* This program is free software; you can redistribute it and/or
* modify it under the terms of the GNU General Public License
* version 3 as published by the Free Software Foundation.
*
* This program is distributed in the hope that it will be useful,
* but WITHOUT ANY WARRANTY; without even the implied warranty of
* MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
* GNU General Public License for more details.
*
* You should have received a copy of the GNU General Public License
* along with this program; if not, write to the
* Free Software Foundation, Inc., 51 Franklin Street, Fifth Floor,
* Boston, MA 02110-1301, USA.
*/
#pragma once
#include <iostream>
#include <string>
class Log
{
public:
enum Level
{
SUPERDEBUG,
DEBUG,
EXTRA,
INFO,
WARN,
ERROR
};
private:
bool opened = false;
Level msglevel = DEBUG;
bool endline = true;
std::string getLabel(Level level);
public:
static bool headers;
static Level level;
Log() {}
Log(Level type, bool endlineI = true);
~Log();
template<class T> Log &operator<<(const T &msg)
{
if(msglevel >= level)
{
std::cout<<msg;
opened = true;
}
return *this;
}
};
/*
* main.c
* Copyright (C) Carl Philipp Klemm 2021 <carl@uvos.xyz>
*
* main.c is free software: you can redistribute it and/or modify it
* under the terms of the lesser GNU General Public License as published by the
* Free Software Foundation, either version 3 of the License, or
* (at your option) any later version.
*
* main.c is distributed in the hope that it will be useful, but
* WITHOUT ANY WARRANTY; without even the implied warranty of
* MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.
* See the GNU General Public License for more details.
*
* You should have received a copy of the lesser GNU General Public License along
* with this program. If not, see <http://www.gnu.org/licenses/>.
*/
#include <stdio.h>
#include <stdbool.h>
#include <glib.h>
#include "scipaper.h"
static void print_documents(const RequestReturn* documents)
{
printf("Found %zu documents:\n", documents->count);
for(size_t i = 0; i < documents->count; ++i)
{
if(!documents->documents[i])
continue;
char* documentString = document_meta_get_string(documents->documents[i]);
printf("Document found by %s:\n%s", sci_get_backend_info(documents->documents[i]->backendId)->name, documentString);
free(documentString);
}
}
static void search_and_grab_wallauer_via_core()
{
printf("Starting %s\n", __func__);
int id = sci_backend_get_id_by_name("core");
if(id == 0)
{
puts("core backend not available");
return;
}
DocumentMeta* queryMeta = document_meta_new();
queryMeta->author = g_strdup("Wallauer");
queryMeta->hasFullText = true;
queryMeta->backendId = id;
RequestReturn* documents = sci_fill_meta(queryMeta, NULL, 20, 0);
document_meta_free(queryMeta);
if(documents)
{
print_documents(documents);
printf("Getting text for first document from: %s (%i)\n",
sci_get_backend_name(documents->documents[0]->backendId), documents->documents[0]->backendId);
char* text = sci_get_document_text(documents->documents[0]);
if(text)
puts("got text!");
free(text);
for(size_t i = 0; i < documents->count; ++i)
{
PdfData* data = sci_get_document_pdf_data(documents->documents[i]);
if(data)
{
puts("got got data! saveing..");
char* fileName = g_strdup_printf("./%zu.pdf", i);
bool ret = sci_save_pdf_to_file(data, fileName);
g_free(fileName);
if(ret)
puts("saved");
else
puts("not saved");
}
}
request_return_free(documents);
}
else
{
puts("Could not find any documents that matched query");
}
}
static void search_wallauer(void)
{
printf("Starting %s\n", __func__);
DocumentMeta* queryMeta = document_meta_new();
queryMeta->author = g_strdup("Wallauer");
RequestReturn* documents = sci_fill_meta(queryMeta, NULL, 20, 0);
document_meta_free(queryMeta);
if(documents)
{
print_documents(documents);
request_return_free(documents);
}
else
{
puts("Could not find any documents that matched query");
}
puts("");
}
static void fill_meta_by_doi(void)
{
printf("Starting %s\n", __func__);
DocumentMeta* meta = sci_find_by_doi("10.1002/ange.19410544309", 0);
if(meta)
{
char* documentString = document_meta_get_string(meta);
printf("Found document for 10.1002/ange.19410544309:\n%s", documentString);
free(documentString);
printf("Trying grab scihub\n");
meta->backendId = 0;
PdfData* pdfData = sci_get_document_pdf_data(meta);
if(pdfData)
{
puts("Found pdf for 10.1002/ange.19410544309");
sci_save_pdf_to_file(pdfData, "out.pdf");
pdf_data_free(pdfData);
}
else
{
puts("unable to grab pdf");
}
}
else
{
puts("Could not find any documents that matched doi");
}
document_meta_free(meta);
}
int main(int argc, char** argv)
{
const char* configFileName = NULL;
if(argc > 1)
{
configFileName = argv[1];
printf("%s using config file %s\n", __func__, configFileName);
}
sci_log_set_verbosity(LL_DEBUG);
if(!sci_paper_init(configFileName, NULL, 0))
{
printf("Coult not init libscipaper");
return 1;
}
puts("Backends available:");
const BackendInfo** backends = sci_get_all_backends();
for(size_t i = 0; backends[i]; ++i)
{
char* cap = capability_flags_get_str(backends[i]->capabilities);
printf("%zu:\t%s\tCapabilities: %s\n", i, backends[i]->name, cap);
free(cap);
}
//search_wallauer();
fill_meta_by_doi();
//search_and_grab_wallauer_via_core();
sci_paper_exit();
return 0;
}
/**
* libscipaper
* Copyright (C) 2023 Carl Klemm
*
* This program is free software; you can redistribute it and/or
* modify it under the terms of the GNU General Public License
* version 3 as published by the Free Software Foundation.
*
* This program is distributed in the hope that it will be useful,
* but WITHOUT ANY WARRANTY; without even the implied warranty of
* MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
* GNU General Public License for more details.
*
* You should have received a copy of the GNU General Public License
* along with this program; if not, write to the
* Free Software Foundation, Inc., 51 Franklin Street, Fifth Floor,
* Boston, MA 02110-1301, USA.
*/
#include <iostream>
#include <scipaper/scipaper.h>
#include <algorithm>
#include <cassert>
#include "log.h"
#include "options.h"
static constexpr size_t resultsPerPage = 200;
bool grabPapers(const DocumentMeta* meta, bool dryRun, bool savePdf, bool saveText, const std::filesystem::path& outDir, size_t maxCount)
{
Log(Log::INFO)<<"Downloading results";
RequestReturn* req = sci_fill_meta(meta, nullptr, std::min(maxCount, resultsPerPage), 0);
bool retried = false;
if(req)
{
size_t pages = req->totalCount/resultsPerPage;
size_t totalCount = req->totalCount;
Log(Log::INFO)<<"Got "<<totalCount<<" results in "<<pages<<" pages";
if(dryRun)
{
request_return_free(req);
return true;
}
size_t processed = 0;
for(size_t page = 0; page <= pages; ++page)
{
if(page != 0)
req = sci_fill_meta(meta, nullptr, resultsPerPage, page);
if(!req)
{
if(!retried)
--page;
retried = true;
continue;
}
else
{
retried = false;
}
Log(Log::INFO)<<"Processing page "<<page<<": "<<processed<<" of "<<std::min(maxCount, req->totalCount)<<
", got "<<req->count<<" results this page";
for(size_t i = 0; i < req->count; ++i)
{
if(req->documents[i])
{
std::filesystem::path jsonpath = outDir/(std::to_string(page*resultsPerPage+i) + ".json");
if(savePdf)
{
std::filesystem::path pdfpath = outDir/(std::to_string(page*resultsPerPage+i) + ".pdf");
bool ret = sci_save_document_to_file(req->documents[i], pdfpath.c_str());
if(!ret)
Log(Log::WARN)<<"Could not get pdf for document "<<jsonpath;
}
char* text = nullptr;
if(saveText)
{
text = sci_get_document_text(req->documents[i]);
if(!text)
Log(Log::WARN)<<"Could not get text for document "<<jsonpath;
}
bool ret = document_meta_save(jsonpath.c_str(), req->documents[i], text);
if(!ret)
Log(Log::WARN)<<"Could not save document metadata"<<jsonpath;
}
++processed;
if(maxCount > 0 && processed >= maxCount)
break;
}
request_return_free(req);
if(maxCount > 0 && processed >= maxCount)
break;
}
return true;
}
Log(Log::WARN)<<"The backend found no results for your query";
return false;
}
bool checkDir(const std::filesystem::path& outDir)
{
if(!std::filesystem::is_directory(outDir))
{
if(!std::filesystem::create_directory(outDir))
{
std::cerr<<outDir<<" dose not exist and can not be created\n";
return false;
}
}
return true;
}
int main(int argc, char** argv)
{
Log::level = Log::INFO;
Config config;
argp_parse(&argp, argc, argv, 0, 0, &config);
if(Log::level == Log::DEBUG)
sci_log_set_verbosity(LL_DEBUG);
if(!sci_paper_init(nullptr, nullptr, 0))
{
Log(Log::ERROR)<<"could not init scipaper";
return 1;
}
bool ret = checkDir(config.outDir);
if(!ret)
return 1;
DocumentMeta queryMeta = {
.doi = const_cast<char*>(config.doi.empty() ? nullptr : config.doi.c_str()),
.title = const_cast<char*>(config.title.empty() ? nullptr : config.title.c_str()),
.journal = const_cast<char*>(config.journal.empty() ? nullptr : config.journal.c_str()),
.keywords = const_cast<char*>(config.keywords.empty() ? nullptr : config.keywords.c_str()),
.abstract = const_cast<char*>(config.abstract.empty() ? nullptr : config.abstract.c_str()),
.searchText = const_cast<char*>(config.text.empty() ? nullptr : config.text.c_str()),
.hasFullText = true
};
size_t length;
char* json = document_meta_get_json(&queryMeta, nullptr, &length);
Log(Log::DEBUG)<<"Using document meta: "<<json;
free(json);
ret = grabPapers(&queryMeta, config.dryRun, config.savePdf, config.fullText, config.outDir, config.maxNumber);
if(!ret)
return 1;
return 0;
}
......@@ -235,6 +235,12 @@ static RequestReturn* core_fill_meta_impl(int *code, const DocumentMeta* meta, s
}
g_free(tokens);
}
if(meta->abstract)
{
g_string_append(searchString, "abstract:\"");
g_string_append(searchString, meta->abstract);
g_string_append(searchString, "\"+");
}
if(meta->searchText)
{
g_string_append_c(searchString, '\"');
......@@ -315,7 +321,7 @@ static RequestReturn* core_fill_meta(const DocumentMeta* meta, size_t maxCount,
return NULL;
}
if(meta->author || meta->title || meta->keywords || meta->searchText)
if(meta->author || meta->title || meta->keywords || meta->searchText || meta->abstract)
{
int code = -1;
for(int i = 0; i < priv->retry && code != 0; ++i)
......@@ -325,6 +331,10 @@ static RequestReturn* core_fill_meta(const DocumentMeta* meta, size_t maxCount,
results = core_fill_meta_impl(&code, meta, maxCount, page, priv);
}
}
else
{
sci_module_log(LL_DEBUG, "Can not fill meta that dose not contain author, title, keywords, abstract or searchText");
}
return results;
}
......
/**
* papergrabber
* Copyright (C) 2023 Carl Klemm
*
* This program is free software; you can redistribute it and/or
* modify it under the terms of the GNU General Public License
* version 3 as published by the Free Software Foundation.
*
* This program is distributed in the hope that it will be useful,
* but WITHOUT ANY WARRANTY; without even the implied warranty of
* MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
* GNU General Public License for more details.
*
* You should have received a copy of the GNU General Public License
* along with this program; if not, write to the
* Free Software Foundation, Inc., 51 Franklin Street, Fifth Floor,
* Boston, MA 02110-1301, USA.
*/
#pragma once
#include <string>
#include <vector>
#include <argp.h>
#include <iostream>
#include <filesystem>
#include "log.h"
const char *argp_program_version = "1.0";
const char *argp_program_bug_address = "<carl@uvos.xyz>";
static char doc[] = "Application that grabs text or pdf files for documents from online resources using libscipaper";
static char args_doc[] = "";
static struct argp_option options[] =
{
{"verbose", 'v', 0, 0, "Show debug messages" },
{"question", 'q', "[FILE]", 0, "Question you wan the system to awnser" },
{"key-words", 'k', "[FILE]", 0, "Search in key words" },
{"title", 't', "[STRING]",0, "Search in title"},
{"jornal", 'j', "[STRING]",0, "Search in journal"},
{"abstract", 'a', "[STRING]",0, "Search in abstract"},
{"text", 'e', "[STRING]",0, "Freeform text search"},
{"doi", 'i', "[STRING]",0, "Search for a specific doi" },
{"dry-run", 'd', 0, 0, "Just show how manny results there are"},
{"out-dir", 'o', "[DIRECTORY]", 0, "Place to save output" },
{"limit", 'l', "[NUMBER]", 0, "Maximum number of results to process" },
{"pdf", 'p', 0, 0, "Save pdf"},
{"full-text", 'f', 0, 0, "Save full text"},
{"backend", 'b', 0, 0, "Ask scipaper to use a specific backend"},
{ 0 }
};
struct Config
{
std::string keywords;
std::string title;
std::string journal;
std::string abstract;
std::string text;
std::string question;
std::string doi;
std::string backend;
std::filesystem::path outDir = "./out";
size_t maxNumber = 100;
bool dryRun = false;
bool fullText = false;
bool savePdf = false;
};
static error_t parse_opt (int key, char *arg, struct argp_state *state)
{
Config *config = reinterpret_cast<Config*>(state->input);
switch (key)
{
case 'v':
Log::level = Log::DEBUG;
break;
case 'q':
config->question.assign(arg);
break;
case 'k':
config->keywords.assign(arg);
break;
case 't':
config->title.assign(arg);
break;
case 'o':
config->outDir.assign(arg);
break;
case 'j':
config->journal.assign(arg);
break;
case 'a':
config->abstract.assign(arg);
break;
case 'e':
config->text.assign(arg);
break;
case 'd':
config->dryRun = true;
break;
case 'l':
config->maxNumber = stoll(std::string(arg));
break;
case 'b':
config->backend.assign(arg);
break;
case 'i':
config->doi.assign(arg);
break;
case 'f':
config->fullText = true;
break;
case 'p':
config->savePdf = true;
break;
default:
return ARGP_ERR_UNKNOWN;
}
return 0;
}
static struct argp argp = {options, parse_opt, args_doc, doc};
......@@ -213,6 +213,7 @@ RequestReturn* sci_fill_meta(const DocumentMeta* meta, const FillReqest* fill, s
struct SciBackend* backend = element->data;
if(backend->fill_meta && (meta->backendId == backend->id || meta->backendId == 0))
{
sci_log(LL_DEBUG, "%s: Trying to fill using %s", __func__, backend->backend_info->name);
RequestReturn* newMetas = backend->fill_meta(meta, maxCount, page, backend->user_data);
if(newMetas)
{
......
......@@ -324,10 +324,14 @@ static bool sci_conf_is_ini_file(const char *filename)
bool sci_conf_init(const char* fileName, const char* data, size_t length)
{
sci_conf_file_count = 1;
char* home = getenv("HOME");
if(fileName)
++sci_conf_file_count;
if(data)
++sci_conf_file_count;
if(home)
++sci_conf_file_count;
size_t index = 0;
conf_files = calloc(sci_conf_file_count, sizeof(*conf_files));
......@@ -349,6 +353,24 @@ bool sci_conf_init(const char* fileName, const char* data, size_t length)
++index;
}
if(home)
{
conf_files[index].filename = g_strdup(G_STRINGIFY(SCI_SYSCONF_INI));
conf_files[index].path = g_strconcat(home, "/", G_STRINGIFY(SCI_USERCONF_DIR), "/", G_STRINGIFY(SCI_SYSCONF_INI), NULL);
gpointer conf_file = sci_conf_read_conf_file(conf_files[index].path);
if(!conf_file)
{
g_free(conf_files[index].filename);
g_free(conf_files[index].path);
--sci_conf_file_count;
}
else
{
conf_files[index].keyfile = conf_file;
++index;
}
}
if(fileName)
{
if(sci_conf_is_ini_file(fileName))
......@@ -395,7 +417,7 @@ bool sci_conf_init(const char* fileName, const char* data, size_t length)
return FALSE;
for (size_t i = 0; i < sci_conf_file_count; ++i)
sci_log(LL_DEBUG, "sci-conf: using conf file %lu: %s", (unsigned long)i, conf_files[i].filename);
sci_log(LL_DEBUG, "sci-conf: using conf file %lu: %s", (unsigned long)i, conf_files[i].path);
return TRUE;
}
......
......@@ -69,7 +69,7 @@ static void sci_modules_load(gchar **modlist)
{
struct sci_module *module = g_malloc(sizeof(*module));
module->name = g_strdup(modlist[i]);
gchar *tmp = g_module_build_path(path, modlist[i]);
gchar *tmp = g_strconcat(path, "/", modlist[i], NULL);//g_module_build_path(path, modlist[i]);
sci_log(LL_DEBUG, "Loading module: %s from %s", modlist[i], path);
......
0% Loading or .
You are about to add 0 people to the discussion. Proceed with caution.
Please register or to comment