Skip to content

Commit

Permalink
Merge pull request #97 from NAL-i5K/testpy3_rebase
Browse files Browse the repository at this point in the history
Testpy3 rebase
  • Loading branch information
mpoelchau authored Nov 1, 2019
2 parents 28f126e + 8ab71a6 commit 80ea9ab
Show file tree
Hide file tree
Showing 23 changed files with 244 additions and 252 deletions.
19 changes: 9 additions & 10 deletions .travis.yml
Original file line number Diff line number Diff line change
Expand Up @@ -4,10 +4,11 @@ matrix:
include:
- os: linux
language: python
python: 2.7
python: 3.7
install:
- pip install .
- pip install coveralls coverage codecov
- pip install .

script:
- coverage run -a ./gff3tool/bin/gff3_QC.py -g example_file/example.gff3 -f example_file/reference.fa -o error.txt
- coverage run -a ./gff3tool/bin/gff3_fix.py -qc_r error.txt -g example_file/example.gff3 -og corrected.gff3
Expand All @@ -21,19 +22,18 @@ matrix:

- os: linux
language: python
python: 2.7
install:
- pip install --no-cache-dir gff3tool
- os: osx
language: generic
python: 3.7
install:
- pip2 install .
- pip3 install --no-cache-dir gff3tool
- os: osx
language: generic
python: 3.7
install:
- pip2 install --no-cache-dir gff3tool
- pip3 install .
- pip3 install --no-cache-dir gff3tool
- os: linux
language: ruby
python: 3.7
rvm: 2.2
before_script: gem install awesome_bot
script: awesome_bot --allow-redirect --allow-dupe README.md docs/*.md # only this instance will check the documents
Expand All @@ -53,4 +53,3 @@ after_success:
- coverage report
- coveralls
- codecov

2 changes: 1 addition & 1 deletion docs/FAQ.md
Original file line number Diff line number Diff line change
Expand Up @@ -37,7 +37,7 @@ In addition, if your input gff file contains a feature that has two or more pare

## Q: Which codons are considered for translation? (Generate biological sequences from a GFF3 file: gff3_to_fasta.py)

Translation from 64 combinations of [standard codons]( https://en.wikipedia.org/wiki/DNA_codon_table) (Only standard codons and universal stop codons are considered.)
Translation from 64 combinations of [standard codons](https://en.wikipedia.org/wiki/DNA_codon_table) (Only standard codons and universal stop codons are considered.)

## Q: Why does gff3_merge.py sometimes reject auto-assigned replace tags when the reference model has multiple isoforms? (Merge 2 GFF3 files: gff3_merge.py)

Expand Down
4 changes: 2 additions & 2 deletions docs/gff3_to_fasta.md
Original file line number Diff line number Diff line change
Expand Up @@ -15,8 +15,8 @@ Extract sequences from specific regions of genome based on gff file.
- **`pep`**: Translated peptide sequences (translation based on cds regions) for each record in the [`FASTA`] output. CDS features need to be included in the gff file.
- **`user_defined`**: Specify parent and child features for fasta extraction via the -u argument, format [parent feature type] [child feature type].(e.g. `-st user_defined -u miRNA exon`)
* **`translator` method for universal translation**: The `translator` method is feasible for
- translation from 64 combitions of [standard codons](https://en.wikipedia.org/wiki/DNA_codon_table) (Only standard codons and universal stop condons are considered.)
- translation from [codons with IUB Depiction](https://en.wikipedia.org/wiki/DNA_codon_table)
- translation from 64 combitions of [standard codons](https://web.archive.org/web/20161201123943/https://www-bimas.cit.nih.gov/molbio/translate/codes.html) (Only standard codons and universal stop condons are considered.)
- translation from [codons with IUB Depiction](https://web.archive.org/web/20161201123943/https://www-bimas.cit.nih.gov/molbio/translate/codes.html)
- translation from mRNA (U contained) or CDS (T, instead of U contained)

## Usage
Expand Down
14 changes: 7 additions & 7 deletions gff3tool/bin/gff3_QC.py
Original file line number Diff line number Diff line change
@@ -1,4 +1,4 @@
#! /usr/local/bin/python2.7
#! /usr/local/bin/python3
import sys
import logging
from gff3tool.lib.gff3 import Gff3
Expand Down Expand Up @@ -110,26 +110,26 @@ def script_main():
error_set.extend(cmd)
if args.output:
logger_stderr.info('Print QC report at {0:s}'.format(args.output))
report_fh = open(args.output, 'wb')
report_fh = open(args.output, 'w')
else:
logger_stderr.info('Print QC report at {0:s}'.format('report.txt'))
report_fh = open('report.txt', 'wb')
report_fh = open('report.txt', 'w')

if args.statistic:
logger_stderr.info('Print QC statistic report at {0:s}'.format(args.statistic))
statistic_fh = open(args.statistic, 'wb')
statistic_fh = open(args.statistic, 'w')
else:
logger_stderr.info('Print QC statistic report at {0:s}'.format('statistic.txt'))
statistic_fh = open('statistic.txt', 'wb')
statistic_fh = open('statistic.txt', 'w')
report_fh.write('Line_num\tError_code\tError_tag\n')
for e in sorted(error_set):
for e in sorted(error_set, key=lambda x: sorted(x.keys())):
tag = '[{0:s}]'.format(e['eTag'])
report_fh.write('{0:s}\t{1:s}\t{2:s}\n'.format(str(e['line_num']), str(e['eCode']), str(tag)))
#statistic_file
error_counts = dict()
ERROR_INFO=ERROR.INFO
statistic_fh.write('Error_code\tNumber_of_problematic_models\tError_tag\n')
for s in sorted(error_set):
for s in sorted(error_set, key=lambda x: sorted(x.keys())):
if s['eCode'] not in error_counts:
error_counts[s['eCode']]= {'count':0,'etag':ERROR_INFO[s['eCode']]}
error_counts[s['eCode']]['count'] += 1
Expand Down
4 changes: 2 additions & 2 deletions gff3tool/bin/gff3_fix.py
Original file line number Diff line number Diff line change
@@ -1,4 +1,4 @@
#! /usr/local/bin/python2.7
#! /usr/local/bin/python3
import sys
import re
import logging
Expand All @@ -23,7 +23,7 @@ def script_main():
parser = argparse.ArgumentParser(formatter_class=argparse.RawDescriptionHelpFormatter, description=dedent("""\
Testing environment:
1. Python 2.7
1. Python 3.*
Input:
1. Error report: Error report from gff3_QC.py. Specify the file name with the -qc_r or --qc_report argument;
Expand Down
17 changes: 8 additions & 9 deletions gff3tool/bin/gff3_merge.py
Original file line number Diff line number Diff line change
@@ -1,4 +1,4 @@
#! /usr/local/bin/python2.7
#! /usr/local/bin/python3
import sys
import re
import logging
Expand All @@ -19,24 +19,24 @@ def check_replace(gff, user_defined1=None):
for line in gff.lines:
if not user_defined1:
try:
if line['line_type'] == 'feature' and not line['attributes'].has_key('Parent'):
if line['line_type'] == 'feature' and 'Parent' not in line['attributes']:
roots.append(line)
except KeyError:
print('WARNING [Missing Attributes] Program failed.\n\t\t- Line {0:s}: {1:s}'.format(str(line['line_index']+1), line['line_raw']))
else:
if line['type'] in u_type:
try:
if not line['attributes'].has_key('replace'):
if 'replace' not in line['attributes']:
error_lines.append(line)
except KeyError:
print('WARNING [Missing Attributes] Program failed.\n\t\t- Line {0:s}: {1:s}'.format(str(line['line_index']+1), line['line_raw']))

#roots = [line for line in gff.lines if line['line_type'] == 'feature' and not line['attributes'].has_key('Parent')]
#roots = [line for line in gff.lines if line['line_type'] == 'feature' and 'Parent' not in line['attributes']]

for root in roots:
children = root['children']
for child in children:
if not child['attributes'].has_key('replace'):
if 'replace' not in child['attributes']:
error_lines.append(child)

if len(error_lines):
Expand Down Expand Up @@ -115,7 +115,7 @@ def script_main():
Merge two gff files of the same genome into one.
Testing enviroment:
1. Python 2.7
1. Python 3.*
Inputs:
1. GFF3 file 1: Gff with annotations modified relative to the original gff (e.g. output from the Apollo program), specify the file name with the -g1 argument
Expand Down Expand Up @@ -146,7 +146,6 @@ def script_main():
parser.add_argument('-v', '--version', action='version', version='%(prog)s ' + __version__)

args = parser.parse_args()

if args.gff_file1:
logger_stderr.info('Checking Update GFF3 file (%s)...', args.gff_file1)
elif not sys.stdin.isatty(): # if STDIN connected to pipe or file
Expand Down Expand Up @@ -220,9 +219,9 @@ def script_main():
sys.exit(0)
if args.report_file:
logger_stderr.info('Writing validation report (%s)...\n', args.report_file)
report_fh = open(args.report_file, 'wb')
report_fh = open(args.report_file, 'w')
else:
report_fh = open('merge_report.txt', 'wb')
report_fh = open('merge_report.txt', 'w')

if not args.output_gff:
args.output_gff='merged.gff'
Expand Down
32 changes: 16 additions & 16 deletions gff3tool/bin/gff3_sort.py
Original file line number Diff line number Diff line change
@@ -1,4 +1,4 @@
#! /usr/local/bin/python2.7
#! /usr/local/bin/python3
"""
Changelog:
* v0.0.2
Expand Down Expand Up @@ -37,7 +37,7 @@ def PositionSort(linelist,reference):
print ('Error')
sys.exit(1)
# 'seq2id': a dictionary mapping sequence number to their features
if seq2id.has_key(seqnum):
if seqnum in seq2id:
seq2id[seqnum].append(str(line['line_raw']))
else:
seq2id[seqnum] = [str(line['line_raw'])]
Expand All @@ -53,7 +53,7 @@ def PositionSort(linelist,reference):
print('ERROR [SeqID] SeqID does not end with a number. \n\t\t- Line {0:s}: {1:s} \n Adding argument -r like " gff3_sort -g example_file/example.gff3 -og example-sorted.gff3 -r " can handle this situation.'.format(str(line['line_index']+1),line['line_raw']))
sys.exit(1)
# 'seq2id': a dictionary mapping sequence number to their features
if seq2id.has_key(seqnum):
if seqnum in seq2id:
seq2id[seqnum].append(str(line['line_raw']))
else:
seq2id[seqnum] = [str(line['line_raw'])]
Expand Down Expand Up @@ -165,7 +165,7 @@ def TypeSort(line_list, sorting_order, reverse=False):
lineindex = line['start'] if reverse==False else line['end']
id2line[str(line['line_raw'])] = line
try:
if sorting_order.has_key(line['type']):
if line['type'] in sorting_order:
id2index[str(line['line_raw'])] = [lineindex, sorting_order[line['type']] if reverse==False else (-sorting_order[line['type']])]
else:
id2index[str(line['line_raw'])] = [lineindex, 99 if reverse==False else -99]
Expand All @@ -184,7 +184,7 @@ def main(gff, output=None, sorting_order=None, isoform_sort=False, logger=None,
gff3 = Gff3(gff_file=gff, logger=logger_null)

if output:
report = open(output, 'wb')
report = open(output, 'w')
else:
report = sys.stdout

Expand All @@ -196,13 +196,13 @@ def main(gff, output=None, sorting_order=None, isoform_sort=False, logger=None,
gff3_linenum_Set = set()

for line in gff3.lines:
if line['line_type'] == 'feature':
gff3_linenum_Set.add(line['line_index'])
try:
if line['line_type'] == 'feature' and not line['attributes'].has_key('Parent') and len(line['attributes']) != 0:
roots.append(line)
except:
logger.warning('[Missing Attributes] Program failed.\n\t\t- Line {0:s}: {1:s}'.format(str(line['line_index']+1), line['line_raw']))
if line['line_type'] == 'feature':
gff3_linenum_Set.add(line['line_index'])
try:
if line['line_type'] == 'feature' and not 'Parent' in line['attributes'] and len(line['attributes']) != 0:
roots.append(line)
except:
logger.warning('[Missing Attributes] Program failed.\n\t\t- Line {0:s}: {1:s}'.format(str(line['line_index']+1), line['line_raw']))
#roots = [line for line in gff3.lines if line['line_type'] == 'feature' and not line['attributes'].has_key('Parent')]

# Sort the root-level features based on the order of the genomic sequences
Expand Down Expand Up @@ -249,7 +249,7 @@ def main(gff, output=None, sorting_order=None, isoform_sort=False, logger=None,
gchildgroup = {}
# Visit every third-level feature, and collect a dictionary of 'type' to 'features'
for grandchild in grandchildren: # Visit each third-level feature
if gchildgroup.has_key(str(grandchild['type'])):
if str(grandchild['type']) in gchildgroup:
gchildgroup[str(grandchild['type'])].append(grandchild)
else:
gchildgroup[str(grandchild['type'])] = []
Expand All @@ -273,7 +273,7 @@ def main(gff, output=None, sorting_order=None, isoform_sort=False, logger=None,
if StrandSort(exons):
exons_sorted = StrandSort(exons)
for exon in exons_sorted:
if exon['attributes'].has_key('Parent'):
if 'Parent' in exon['attributes']:
if isinstance(exon['attributes']['Parent'], list) and len(exon['attributes']['Parent']) > 1:
gff3_linenum_Set.discard(exon['line_index'])
report.write(TwoParent(child['attributes']['ID'],exon))
Expand All @@ -289,7 +289,7 @@ def main(gff, output=None, sorting_order=None, isoform_sort=False, logger=None,
if StrandSort(cdss):
cdss_sorted = StrandSort(cdss)
for cds in cdss_sorted:
if cds['attributes'].has_key('Parent'):
if 'Parent' in cds['attributes']:
if isinstance(cds['attributes']['Parent'], list) and len(cds['attributes']['Parent']) > 1:
gff3_linenum_Set.discard(cds['line_index'])
report.write(TwoParent(child['attributes']['ID'],cds))
Expand All @@ -303,7 +303,7 @@ def main(gff, output=None, sorting_order=None, isoform_sort=False, logger=None,
if len(others):
if PositionSort(others,reference):
for other in others:
if other['attributes'].has_key('Parent'):
if 'Parent' in other['attributes']:
if isinstance(other['attributes']['Parent'], list) and len(other['attributes']['Parent']) > 1:
gff3_linenum_Set.discard(other['line_index'])
report.write(TwoParent(child['attributes']['ID'],other))
Expand Down
Loading

0 comments on commit 80ea9ab

Please sign in to comment.