-
Notifications
You must be signed in to change notification settings - Fork 0
Commit
This commit does not belong to any branch on this repository, and may belong to a fork outside of the repository.
Lots of work done in this new version. See release notes.
- Loading branch information
1 parent
c8db5f0
commit 4af3d5f
Showing
27 changed files
with
895 additions
and
337 deletions.
There are no files selected for viewing
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
Original file line number | Diff line number | Diff line change |
---|---|---|
@@ -1,4 +1,6 @@ | ||
*.db | ||
*.db-journal | ||
|
||
__pycache__ | ||
env/ | ||
.DS_Store |
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
Original file line number | Diff line number | Diff line change |
---|---|---|
@@ -0,0 +1,109 @@ | ||
import graphene | ||
from graphene_sqlalchemy import SQLAlchemyObjectType | ||
|
||
from database.answer import Answer as AnswerModel | ||
from database.consultee_list import ConsulteeList as ConsulteeListModel | ||
from database.consultee import Consultee as ConsulteeModel | ||
from database.document import Document as DocumentModel | ||
from database.file import File as FileModel | ||
from database.remiss import Remiss as RemissModel | ||
|
||
from service.database import Database | ||
|
||
class FileAttribute: | ||
name = graphene.String(description="Name of the file.") | ||
url = graphene.String(description="URL of the file.") | ||
|
||
|
||
class File(SQLAlchemyObjectType): | ||
class Meta: | ||
model = FileModel | ||
|
||
|
||
class DocumentAttribute: | ||
remiss_id = graphene.Int(description="Id of the answer's remiss.") | ||
type = graphene.String(description="Type of the document.") | ||
files = graphene.List(File, description="Files of the document.") | ||
|
||
|
||
class Document(SQLAlchemyObjectType): | ||
class Meta: | ||
model = DocumentModel | ||
|
||
|
||
class AnswerAttribute: | ||
organisation = graphene.String( | ||
description="Organisation or individual which authored the answer.") | ||
remiss_id = DocumentAttribute.remiss_id | ||
type = DocumentAttribute.type | ||
files = DocumentAttribute.files | ||
|
||
|
||
class Answer(SQLAlchemyObjectType): | ||
class Meta: | ||
model = AnswerModel | ||
|
||
|
||
class ConsulteeAttribute: | ||
name = graphene.String(description="Name of the consultee.") | ||
|
||
|
||
class Consultee(SQLAlchemyObjectType): | ||
class Meta: | ||
model = ConsulteeModel | ||
|
||
|
||
class ConsulteeListAttribute: | ||
consultee_list = graphene.List( | ||
Consultee, | ||
description="List of all the consultees in the document." | ||
) | ||
remiss_id = DocumentAttribute.remiss_id | ||
type = DocumentAttribute.type | ||
files = DocumentAttribute.files | ||
|
||
|
||
class ConsulteeList(SQLAlchemyObjectType): | ||
class Meta: | ||
model = ConsulteeListModel | ||
|
||
|
||
class Remiss(SQLAlchemyObjectType): | ||
class Meta: | ||
model = RemissModel | ||
|
||
|
||
class Query(graphene.ObjectType): | ||
# Allows sorting over multiple columns, by default over the primary key | ||
answer = graphene.Field(Answer) | ||
answers = graphene.List(Answer) | ||
|
||
def resolve_answer(self, *args, **kwargs): | ||
return Database.query(AnswerModel).first() | ||
|
||
def resolve_answers(self, *args, **kwargs): | ||
return Database.query(AnswerModel).all() | ||
|
||
consultee_list = graphene.Field(ConsulteeList) | ||
consultee_lists = graphene.List(ConsulteeList) | ||
|
||
consultee = graphene.Field(Consultee) | ||
consultees = graphene.List(Consultee) | ||
|
||
document = graphene.Field(Document) | ||
documents = graphene.List(Document) | ||
|
||
file = graphene.Field(File) | ||
files = graphene.List(File) | ||
|
||
remiss = graphene.Field(Remiss) | ||
remisser = graphene.List(Remiss) | ||
|
||
def resolve_remiss(self, *args, **kwargs): | ||
return RemissModel.query.first() | ||
|
||
def resolve_remisser(self, *args, **kwargs): | ||
return RemissModel.query.all() | ||
|
||
|
||
schema = graphene.Schema(query=Query) |
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
Original file line number | Diff line number | Diff line change |
---|---|---|
@@ -0,0 +1,26 @@ | ||
from flask import Flask | ||
from flask_graphql import GraphQLView | ||
|
||
from service.database import Database | ||
from api.schema import schema | ||
|
||
app = Flask(__name__) | ||
app.debug = True | ||
|
||
app.add_url_rule( | ||
'/graphql', | ||
view_func=GraphQLView.as_view( | ||
'graphql', | ||
schema=schema, | ||
graphiql=True | ||
) | ||
) | ||
|
||
|
||
@app.teardown_appcontext | ||
def shutdown_session(exception=None): | ||
Database.remove() | ||
|
||
|
||
if __name__ == '__main__': | ||
app.run() |
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
Original file line number | Diff line number | Diff line change |
---|---|---|
@@ -0,0 +1,79 @@ | ||
import urllib.request | ||
import os | ||
|
||
from pdfminer.pdfpage import PDFTextExtractionNotAllowed | ||
from pdfminer.pdfparser import PDFSyntaxError | ||
|
||
from database.file import File | ||
from database.document import Document | ||
from database.consultee_list import ConsulteeList | ||
from database.consultee import Consultee | ||
from database.answer import Answer | ||
from database.remiss import Remiss | ||
|
||
from service.downloader import Downloader | ||
from service.document_parser import DocumentParser | ||
from service.database import Database | ||
from service.file_manager import FileManager | ||
from io import BytesIO | ||
|
||
RESET_DB = False | ||
RESET_FILES = False | ||
|
||
if RESET_DB: | ||
Database.delete_all(Consultee) | ||
Database.commit() | ||
print(f'Emptied the consultee table.\n') | ||
|
||
saved_consultee_lists = ConsulteeList.query.all() | ||
saved_consultees = Consultee.query.all() | ||
saved_remisser = Remiss.query.all() | ||
saved_documents = Document.query.filter(Document.type == 'consultee_list') | ||
|
||
nb_of_consultees = len(saved_consultees) | ||
print(f'Found {nb_of_consultees} consultees in the database.') | ||
|
||
for document in saved_documents: | ||
|
||
if not RESET_DB and document.consultee_list != []: | ||
print(f'Consultees for remiss {document.remiss_id} already in database.') | ||
continue | ||
elif RESET_DB: | ||
Consultee.query.filter(Consultee.consultee_list_id == document.id).delete() | ||
|
||
filepath = f'tmp/{document.remiss_id}/{document.id}.pdf' | ||
|
||
if RESET_FILES or not FileManager.filepath_exists(filepath): | ||
try: | ||
f = Downloader.get(document.files[0].url) | ||
except urllib.error.HTTPError: | ||
print(f'404: Remissinstans {document.remiss_id} not found.') | ||
|
||
if f is not None: | ||
fp = BytesIO(f) | ||
FileManager.write_to_filepath(filepath, f) | ||
|
||
if FileManager.filepath_exists(filepath): | ||
fp = FileManager.get_filepath(filepath) | ||
else: | ||
continue | ||
|
||
try: | ||
list = DocumentParser.extract_list(fp) | ||
except (PDFTextExtractionNotAllowed, PDFSyntaxError): | ||
print(f'Document {document.remiss_id} could not be extracted.') | ||
continue | ||
|
||
if not list: | ||
print(f'Document {document.remiss_id} could not be extracted.') | ||
continue | ||
|
||
document.consultee_list = list | ||
|
||
Database.commit() | ||
|
||
print(f'Saved {len(list)} organisations for remiss {document.remiss_id}') | ||
|
||
fp.close() | ||
|
||
Database.close() |
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
Original file line number | Diff line number | Diff line change |
---|---|---|
@@ -0,0 +1,20 @@ | ||
from service.database import Database | ||
from service.cleaner import Cleaner | ||
from database.answer import Answer | ||
|
||
saved_answers = Answer.query.all() | ||
|
||
RESET_DB = False | ||
|
||
print('II-2 Cleaning filenames to get organisation name...') | ||
for index, answer in enumerate(saved_answers, start=1): | ||
if index % (len(saved_answers) // 100) == 0: | ||
print( | ||
f'{(index + 1) * 100 // len(saved_answers)} % cleaned' | ||
) | ||
|
||
if RESET_DB or answer.organisation == None: | ||
organisation_name = Cleaner.get_organisation_name(answer.files[0].name) | ||
if organisation_name != answer.organisation: | ||
answer.organisation = organisation_name | ||
Database.commit() |
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
Original file line number | Diff line number | Diff line change |
---|---|---|
@@ -0,0 +1,17 @@ | ||
from database.document import Document | ||
from sqlalchemy import ForeignKey, Column, Integer, String | ||
from sqlalchemy.orm import relationship | ||
|
||
|
||
class Answer(Document): | ||
"""Answer model.""" | ||
|
||
__tablename__ = 'answer' | ||
|
||
id = Column(Integer, ForeignKey('document.id'), primary_key=True) | ||
remiss = relationship('Remiss', back_populates='answers') | ||
organisation = Column(String) | ||
|
||
__mapper_args__ = { | ||
'polymorphic_identity': 'answer', | ||
} |
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
Original file line number | Diff line number | Diff line change |
---|---|---|
@@ -0,0 +1,19 @@ | ||
from sqlalchemy import create_engine | ||
from sqlalchemy.ext.declarative import declarative_base | ||
from sqlalchemy.orm import scoped_session, sessionmaker | ||
import os | ||
|
||
|
||
# Create database engine | ||
db_name = 'database.db' | ||
db_path = os.path.join(os.path.dirname(__file__), db_name) | ||
db_uri = 'sqlite:///{}'.format(db_path) | ||
engine = create_engine(db_uri, convert_unicode=True) | ||
|
||
# Declarative base model to create database tables and classes | ||
Base = declarative_base() | ||
Base.metadata.bind = engine # Bind engine to metadata of the base class | ||
|
||
# Create database session object | ||
db_session = scoped_session(sessionmaker(bind=engine, expire_on_commit=False)) | ||
Base.query = db_session.query_property() # Used by graphql to execute queries |
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
Original file line number | Diff line number | Diff line change |
---|---|---|
@@ -0,0 +1,15 @@ | ||
from .base import Base | ||
from sqlalchemy import ForeignKey, Column, Integer, String | ||
from sqlalchemy.orm import relationship | ||
|
||
|
||
class Consultee(Base): | ||
"""Consultee model.""" | ||
|
||
__tablename__ = 'consultee' | ||
|
||
id = Column(Integer, primary_key=True) | ||
consultee_list_id = Column(Integer, ForeignKey('consultee_list.id')) | ||
consultee_list = relationship('ConsulteeList', | ||
back_populates='consultee_list') | ||
name = Column(String) |
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
Original file line number | Diff line number | Diff line change |
---|---|---|
@@ -0,0 +1,17 @@ | ||
from database.document import Document | ||
from sqlalchemy import ForeignKey, Column, Integer | ||
from sqlalchemy.orm import relationship | ||
|
||
|
||
class ConsulteeList(Document): | ||
"""ConsulteeList model.""" | ||
|
||
__tablename__ = 'consultee_list' | ||
|
||
id = Column(Integer, ForeignKey('document.id'), primary_key=True) | ||
remiss = relationship('Remiss', back_populates='consultees') | ||
consultee_list = relationship('Consultee', back_populates='consultee_list') | ||
|
||
__mapper_args__ = { | ||
'polymorphic_identity': 'consultee_list', | ||
} |
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
Original file line number | Diff line number | Diff line change |
---|---|---|
@@ -0,0 +1,20 @@ | ||
from .base import Base | ||
from sqlalchemy import Column, Integer, String, Date | ||
|
||
|
||
class Content(Base): | ||
"""Content model.""" | ||
|
||
__tablename__ = 'content' | ||
|
||
id = Column(Integer, primary_key=True) | ||
issuer = Column(String) | ||
published_on = Column(Date) | ||
title = Column(String) | ||
url = Column(String) | ||
type = Column(String) | ||
|
||
__mapper_args__ = { | ||
'polymorphic_identity': 'content', | ||
'polymorphic_on': type | ||
} |
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
Original file line number | Diff line number | Diff line change |
---|---|---|
@@ -0,0 +1,20 @@ | ||
from .base import Base | ||
from sqlalchemy import ForeignKey, Column, Integer, String | ||
from sqlalchemy.orm import relationship | ||
|
||
|
||
class Document(Base): | ||
"""Document model.""" | ||
|
||
__tablename__ = 'document' | ||
|
||
id = Column(Integer, primary_key=True) | ||
remiss_id = Column(Integer, ForeignKey('remiss.id')) | ||
remiss = relationship('Remiss', back_populates='other_documents') | ||
files = relationship('File', back_populates='document') | ||
type = Column(String) | ||
|
||
__mapper_args__ = { | ||
'polymorphic_identity': 'document', | ||
'polymorphic_on': type | ||
} |
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
Original file line number | Diff line number | Diff line change |
---|---|---|
@@ -0,0 +1,15 @@ | ||
from .base import Base | ||
from sqlalchemy import ForeignKey, Column, Integer, String | ||
from sqlalchemy.orm import relationship | ||
|
||
|
||
class File(Base): | ||
"""File model.""" | ||
|
||
__tablename__ = 'file' | ||
|
||
id = Column(Integer, primary_key=True) | ||
document_id = Column(Integer, ForeignKey('document.id')) | ||
document = relationship('Document', back_populates='files') | ||
name = Column(String) | ||
url = Column(String) |
Oops, something went wrong.